From 6cac7ed5f03796b8197399a1590adfa7eddf8bc7 Mon Sep 17 00:00:00 2001 From: cmorganl Date: Wed, 1 Jun 2022 10:32:32 +0200 Subject: [PATCH 01/18] Writing clipkit helper class, removing BMGE and trimal support --- requirements.txt | 1 + setup.py | 5 - tests/test_commands.py | 12 +- treesapp/classy.py | 2 +- treesapp/clipkit_helper.py | 21 + treesapp/commands.py | 4 +- treesapp/extensions/tree_parsermodule.cpp | 814 ---------------------- treesapp/sub_binaries/BMGE.jar | Bin 105670 -> 0 bytes treesapp/training_utils.py | 2 +- treesapp/utilities.py | 2 - treesapp/wrapper.py | 4 +- 11 files changed, 35 insertions(+), 832 deletions(-) create mode 100644 treesapp/clipkit_helper.py delete mode 100644 treesapp/extensions/tree_parsermodule.cpp delete mode 100755 treesapp/sub_binaries/BMGE.jar diff --git a/requirements.txt b/requirements.txt index 2fa0933a..6091d3f2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,3 +14,4 @@ tqdm >=4.50.0 pytest >=6.2.5 pandas >=1.1.0 matplotlib >=3.3.0 +clipkit >= 1.3.0 diff --git a/setup.py b/setup.py index cebfb043..fde44b23 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,3 @@ -from setuptools import Extension from setuptools import setup, find_packages with open("README.md", "r") as readme: @@ -34,10 +33,6 @@ "include_package_data": True, "entry_points": {'console_scripts': ['treesapp = treesapp.__main__:main']}, "classifiers": CLASSIFIERS, - "ext_modules": [Extension("_tree_parser", - sources=["treesapp/extensions/tree_parsermodule.cpp"], - language="c++") - ], "install_requires": open("requirements.txt").read().splitlines(), "setup_requires": [ "setuptools>=50.0.0" diff --git a/tests/test_commands.py b/tests/test_commands.py index db98b7fa..5ca63fc3 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -430,12 +430,12 @@ def test_update(self): self.assertTrue(test_refpkg.svc is not None) return - def test_tmp(self): - from treesapp import commands - os.chdir("/home/connor/Desktop/NrfA") - cmd = "-i nrfA_refseq-protein-212.hits99.fasta -c nrfA --headless --fast -o nrfA_S1 --accession2lin nrfA_refseq-protein-212.hits99.acc_id_lin_map2.tsv --overwrite -n 8" - commands.create(cmd.split()) - return + # def test_tmp(self): + # from treesapp import commands + # os.chdir("") + # cmd = "" + # commands.create(cmd.split()) + # return if __name__ == '__main__': diff --git a/treesapp/classy.py b/treesapp/classy.py index a45badc5..453bf10d 100644 --- a/treesapp/classy.py +++ b/treesapp/classy.py @@ -480,7 +480,7 @@ def find_executables(self, args) -> dict: :return: exec_paths beings the absolute path to each executable """ exec_paths = dict() - dependencies = ["prodigal", "hmmbuild", "hmmalign", "hmmsearch", "epa-ng", "raxml-ng", "BMGE.jar"] + dependencies = ["prodigal", "hmmbuild", "hmmalign", "hmmsearch", "epa-ng", "raxml-ng"] # Extra executables necessary for certain modes of TreeSAPP if self.command == "abundance": diff --git a/treesapp/clipkit_helper.py b/treesapp/clipkit_helper.py new file mode 100644 index 00000000..9d3e7c80 --- /dev/null +++ b/treesapp/clipkit_helper.py @@ -0,0 +1,21 @@ +import logging +from clipkit import clipkit +from clipkit import args_processing + +from treesapp import logger + + +class ClipKitHelper: + CLIPKIT_MODES = {"smart-gap"} + + def __init__(self, fasta_in: str, mfa_out: str): + self.logger = logging.getLogger(logger.logger_name()) + self.input = "" + self.mfa_out = "" + + self.mode = "smart-gap" + return + + def run(self): + # clipkit.execute() + return diff --git a/treesapp/commands.py b/treesapp/commands.py index 34f0b822..41793bcf 100644 --- a/treesapp/commands.py +++ b/treesapp/commands.py @@ -47,6 +47,7 @@ def info(sys_args): import treesapp import Bio + from clipkit.version import __version__ as ck_version import numpy import packaging import pygtrie @@ -62,6 +63,7 @@ def info(sys_args): # Write the version of all python deps py_deps = {"biopython": Bio.__version__, + "clipkit": ck_version, "ete3": ete3.__version__, "joblib": joblib.__version__, "numpy": numpy.__version__, @@ -634,7 +636,7 @@ def create(sys_args): n_threads=args.num_threads, intermediates_dir=ts_create.var_output_dir) ## - # Optionally trim with BMGE and create the Phylip multiple alignment file + # Optionally trim the multiple sequence alignment create Phylip files ## dict_for_phy = dict() if args.trim_align: diff --git a/treesapp/extensions/tree_parsermodule.cpp b/treesapp/extensions/tree_parsermodule.cpp deleted file mode 100644 index 0e0fbe23..00000000 --- a/treesapp/extensions/tree_parsermodule.cpp +++ /dev/null @@ -1,814 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -using namespace std; - -static PyObject *read_the_reference_tree(PyObject *self, PyObject *args); -static PyObject *get_parents_and_children(PyObject *self, PyObject *args); -static PyObject *build_subtrees_newick(PyObject *self, PyObject *args); -static PyObject *lowest_common_ancestor(PyObject *self, PyObject *args); -char *get_node_relationships(char *tree_string); -char *split_tree_string(char *tree_string); - -static char read_the_reference_tree_docstring[] = - "Reads the labelled_tree_file and reformats it for downstream interpretation"; -static char get_parents_and_children_docstring[] = - "Stores the input tree as a binary search tree before recursively finding the children and parent of each node"; -static char build_subtrees_newick_docstring[] = - "Reads the labelled, rooted tree and returns all subtrees in the tree"; -static char lowest_common_ancestor_docstring[] = - "Calculate lowest common ancestor for a set of nodes in a tree"; - -//static PyMethodDef module_methods[] = { -// {"error_out", (PyCFunction)error_out, METH_NOARGS, NULL}, -// {NULL, NULL} -//}; - -static PyMethodDef module_methods[] = { - {"_read_the_reference_tree", - read_the_reference_tree, - METH_VARARGS, - read_the_reference_tree_docstring}, - {"_get_parents_and_children", - get_parents_and_children, - METH_VARARGS, - get_parents_and_children_docstring}, - {"_build_subtrees_newick", - build_subtrees_newick, - METH_VARARGS, - build_subtrees_newick_docstring}, - {"_lowest_common_ancestor", - lowest_common_ancestor, - METH_VARARGS, - lowest_common_ancestor_docstring}, - {NULL, NULL, 0, NULL} -}; - -struct module_state { - PyObject *error; -}; - -#if PY_MAJOR_VERSION >= 3 -#define GETSTATE(m) ((struct module_state*)PyModule_GetState(m)) -#else -#define GETSTATE(m) (&_state) -static struct module_state _state; -#endif - - -/** - A tree node - */ -struct Link { - long key; - long left; - long right; - Link* next; - Link* previous; -}; - -struct TreeNode { - long key; - TreeNode* left; - TreeNode* right; -}; - -struct CharLink { - char* subtree; - CharLink* next; -}; - - -/** - * Inserts a new Link (with key=newKey) at the head of the linked_list. - */ -void prepend_link(Link*& head, long newKey) { - Link * curr = new Link; - curr->key = newKey; - curr->next = head; - head = curr; -} - -/** - * Recursively searches a subtree for a key. - */ -bool find(long query, TreeNode *& r) { - if (r == NULL) return r; - if (query == r->key) - return true; - - bool lb = find(query, r->left); - bool rb = find(query, r->right); - if (lb || rb ) - return true; - else - return false; -} - - -TreeNode* create_node(long key, TreeNode* l = NULL, TreeNode* r = NULL ) { - TreeNode* curr = new TreeNode; - curr->key = key; - curr->left = l; - curr->right = r; - return curr; -} - - -void add_subtree(CharLink*& head, char*& new_subtree) { - CharLink * curr = new CharLink; - curr->subtree = new_subtree; - curr->next = head; - head = curr; -} - - -void deleteSubtreeList(CharLink*& head) { - if ( head != NULL ) { - deleteSubtreeList( head->next ); - free(head->subtree); - delete head; - head = NULL; - } -} - - -void deleteList(Link*& head) { - if ( head != NULL ) { - deleteList( head->next ); - delete head; - head = NULL; - } -} - - -/** - * Deletes all nodes in the tree rooted at root and sets root to NULL. - */ -void deleteTree( TreeNode*& root ) { - if ( root != NULL ) { - deleteTree( root->left ); - deleteTree( root->right ); - delete root; - root = NULL; - } -} - - -void print_list(Link* head) { - std::cout << std::endl; - for (Link* curr = head; curr != NULL; curr = curr->next){ - printf("key: %ld\tleft: %ld\tright: %ld", - curr->key, curr->left, curr->right); - if (curr->next != NULL) std::cout << "\n"; - } - std::cout << std::endl; -} - - -/** - * Prints out the tree sideways. - */ -void print_tree( TreeNode* root, int d = 0 ) { - if ( root == NULL ) return; - print_tree( root->right, d+1 ); - std::cout << std::setw( 3 * d ) << ""; // output 3 * d spaces - std::cout << root->key << std::endl; - print_tree( root->left, d+1 ); -} - -char const *float_chars = "0.123456789"; -char const *real_number_chars = "-0123456789"; - -/* - Returns 1 if character sub is a substring of super, 0 otherwise - */ -int is_char_substr(char sub, const char * super) { - int x = 0; - while(super[x]) { - if (super[x] == sub) - return 1; - x++; - } - return 0; -} - -/* - Returns the length of the char_array as int - */ -int get_char_array_length(char * char_array) { - int x = 0; - while (char_array[x]) - x++; - return x; -} - -/* - * Append a character array (source) onto another character array (dest) - * start is the position to continue appending on dest - */ -int append_char_array(int start, char * source, char *&dest) { - int i = 0; - while (source[i]) - dest[start++] = source[i++]; - return start; -} - -char reverse_char_array(char * char_array, char *&flipped, int first, int last) { - if (last == -1) - return '\0'; - flipped[first] = reverse_char_array(char_array, flipped, first+1, last-1); - return char_array[last]; -} - -/* - * param comma_separated_string: a character array with commas - * return: a vector of character arrays - */ -std::vector csv_to_list(char * comma_separated_string) { - int i = 0; - int k = 0; - std::vector char_list; - char c_array[10]; - - while (comma_separated_string[i]) { - if (comma_separated_string[i] == ',') { - c_array[k] = '\0'; - char_list.push_back(string (c_array)); - i++; k = 0; - c_array[k] = '\0'; - } - c_array[k] = comma_separated_string[i]; - i++; k++; - } - c_array[k] = '\0'; - char_list.push_back(string (c_array)); - return char_list; -} - - -static PyObject *read_the_reference_tree(PyObject *self, PyObject *args) { - char* reference_tree_file; - if (!PyArg_ParseTuple(args, "s", &reference_tree_file)) { - return NULL; - } - - FILE *reference_tree = fopen(reference_tree_file, "r"); - if (reference_tree == 0) { - printf ("The reference tree file %s could not be opened for reading!\n", reference_tree_file); - exit(0); - } - - int _MAX = 1000; - int tree_len = 0; - int count = 2; - int x = 0; - char *tree_string = (char *) malloc ( _MAX * sizeof(char)); - char count_char[10]; - char c = fgetc(reference_tree); - while ( c != EOF ) { - if (tree_len <= _MAX && tree_len >= _MAX - 100) { - _MAX = _MAX + 1000; - tree_string = (char *) realloc(tree_string, _MAX * sizeof(char)); - } - if (c == ')') { - tree_string[tree_len] = c; - tree_len++; - tree_string[tree_len] = '-'; - tree_len++; - - sprintf(count_char, "%d", count); - count++; - x = 0; - while (count_char[x]) { - c = count_char[x]; - tree_string[tree_len] = c; - tree_len++; - x++; - } - c = fgetc(reference_tree); - } - if (c == ':') { - c = fgetc(reference_tree); - // while c is a substring of float_chars, continue reading in characters - while (is_char_substr(c, float_chars) == 1) - c = fgetc(reference_tree); - } - else { - tree_string[tree_len] = c; - tree_len++; - c = fgetc(reference_tree); - } - - } - fclose(reference_tree); - // Now remove the last node - while ( c != ')') { - tree_string[tree_len + 1] = '\0'; - tree_string[tree_len] = ';'; - tree_len--; - c = tree_string[tree_len]; - } - - return Py_BuildValue("s", tree_string); -}; - - -void get_previous_node(char *&parsed_tree_string, int &end, char *&previous) { - char reversed[10]; - char c = parsed_tree_string[end]; - int i = 0; - // Skip through brackets and commas to the end of the previous node - while (is_char_substr(c, real_number_chars) == 0) { - parsed_tree_string[end--] = '\0'; - c = parsed_tree_string[end]; - } - while (is_char_substr(c, real_number_chars) == 1) { - reversed[i] = c; - parsed_tree_string[end--] = '\0'; - c = parsed_tree_string[end]; - i++; - } - reversed[i] = '\0'; - reverse_char_array(reversed, previous, 0, i); - return; -} - - -void load_linked_list(char * tree_string, Link *&head) { - char c; - int pos = 0; - int i = 0; - int newKey = -1; - int retrace_pos = 0; - int x; - char curr[10]; - char* right = (char*) malloc(20); - char* left = (char*) malloc(20); - - int tree_len = get_char_array_length(tree_string); - char* parsed_tree_string = (char*) malloc(tree_len); - for (x = 0; x < tree_len; x++) - parsed_tree_string[x] = '\0'; - - while (tree_string[pos]) { - c = tree_string[pos]; - parsed_tree_string[retrace_pos++] = c; - if (c == ')') { - // load the next node as curr - c = tree_string[pos+1]; - i = 0; - // Overwrite curr - for (x = 0; x < 10; x++) - curr[x] = '\0'; - while (is_char_substr(c, real_number_chars) == 1) { - curr[i++] = c; - pos++; - c = tree_string[pos+1]; - } - curr[i] = '\0'; - newKey = atoi(curr); - if (newKey == 0) - newKey = -1; - prepend_link(head, newKey); - - // load the previous 2 nodes as children and remove these from the string - get_previous_node(parsed_tree_string, retrace_pos, right); - head->right = atoi(right); - for (x = 0; x < 10; x++) - right[x] = '\0'; - get_previous_node(parsed_tree_string, retrace_pos, left); - head->left = atoi(left); - for (x = 0; x < 10; x++) - left[x] = '\0'; - - // add the current node to the parsed_tree_string - i = 0; - while (curr[i]) - parsed_tree_string[retrace_pos++] = curr[i++]; - } - pos++; - } - free(right); - free(left); - free(parsed_tree_string); -} - - -TreeNode* load_tree_from_list(Link* head, TreeNode*& root, std::stack& merge) { - TreeNode* previous = NULL; - if (head == NULL) { - return previous; - } - previous = load_tree_from_list(head->next, root, merge); - root = create_node(head->key); - - if (previous) { - // If a key is equal to the previous key, then the previous key is a child - if (head->left == previous->key) - root->left = previous; - if (head->right == previous->key) - root->right = previous; - // If neither children are from the previous node, then create new nodes - if (head->left != previous->key) - root->left = create_node(head->left); - if (head->right != previous->key) - root->right = create_node(head->right); - // If a child is equal to a node from a long time ago, in a galaxy far far away... - if (!merge.empty() && head->left == merge.top()->key) { - root->left = merge.top(); - merge.pop(); - } - if (!merge.empty() && head->right == merge.top()->key) { - root->right = merge.top(); - merge.pop(); - } - // If neither the left or right are equal to the previous key, store it as merge - if (head->right != previous->key && head->left != previous->key) { - merge.push(previous); - } - } - else { - root->right = create_node(head->right); - root->left = create_node(head->left); - } - return root; -} - - -int get_children_of_nodes(Link * head, char *&children) { - char * buffer = (char*) malloc (20); - int _MAX = 1000; - children = (char *) malloc (_MAX * sizeof(char)); - int x = 0; - for (Link* curr = head; curr != NULL; curr = curr->next){ - if (x <= _MAX && x >= _MAX - 100) { - _MAX = _MAX + 1000; - children = (char *) realloc (children, _MAX * sizeof(char)); - } - sprintf(buffer, "%ld", curr->key); - x = append_char_array(x, buffer, children); - children[x++] = '='; - sprintf(buffer, "%ld", curr->left); - x = append_char_array(x, buffer, children); - children[x++] = ','; - sprintf(buffer, "%ld", curr->right); - x = append_char_array(x, buffer, children); - - if (curr->next != NULL) children[x++] = ';'; - } - children[x++] = '\n'; - children[x] = '\0'; - - free(buffer); - - return x; -} - - -int get_parents_of_nodes(Link * head, char *&parents) { - char parent_string[100]; - int _MAX = 1000; - parents = (char *) malloc (_MAX * sizeof(char)); - int x; - for (x = 0; x < _MAX; x++) - parents[x] = '\0'; - for (x = 0; x < 100; x++) - parent_string[x] = '\0'; - x = 0; - for (Link* curr = head; curr != NULL; curr = curr->next){ - if (x <= _MAX && x >= _MAX - 100) { - _MAX = _MAX + 1000; - parents = (char *) realloc (parents, _MAX * sizeof(char)); - } - sprintf(parent_string, "%ld:%ld,%ld:%ld", curr->left, curr->key, curr->right, curr->key); - - x = append_char_array(x, parent_string, parents); - - if (curr->next != NULL) parents[x++] = ','; - else parents[x++] = '\n'; - } - parents[x] = '\0'; - - return x; -} - - -/* - Find all of the subtrees in the tree - */ -void get_subtree_of_node(TreeNode* root, CharLink*& head) { - char* buffer; - // Check to see if it is an internal node (key < 0) or a leaf - if (root->left == NULL && root->right == NULL) { - buffer = (char*) malloc(100); - for (int x = 0; x < 100; x++) - buffer[x] = '\0'; - sprintf(buffer, "%ld", root->key); - add_subtree(head, buffer); - return; - } - get_subtree_of_node(root->right, head); - CharLink* right_link = head; - get_subtree_of_node(root->left, head); - CharLink* left_link = head; - - // Join the last two subtrees - int sum_length = get_char_array_length(right_link->subtree) + get_char_array_length(left_link->subtree) + 2; - buffer = (char*) malloc(sum_length); - for (int x = 0; x < sum_length; x++) - buffer[x] = '\0'; - sprintf(buffer, "%s %s", right_link->subtree, left_link->subtree); - add_subtree(head, buffer); - return; -} - -/* - Find all of the subtrees in the tree - */ -void get_newick_subtrees(TreeNode* root, CharLink*& head) { - char* buffer; - // Check to see if it is an internal node (key < 0) or a leaf - if (root->left == NULL && root->right == NULL) { - buffer = (char*) malloc(100); - for (int x = 0; x < 100; x++) - buffer[x] = '\0'; - sprintf(buffer, "%ld", root->key); - add_subtree(head, buffer); - return; - } - get_newick_subtrees(root->right, head); - CharLink* right_link = head; - get_newick_subtrees(root->left, head); - CharLink* left_link = head; - - // Join the last two subtrees - int sum_length = get_char_array_length(right_link->subtree) + get_char_array_length(left_link->subtree) + 20; - buffer = (char*) malloc(sum_length); - for (int x = 0; x < sum_length; x++) - buffer[x] = '\0'; - sprintf(buffer, "(%s,%s)%ld", right_link->subtree, left_link->subtree, root->key); - add_subtree(head, buffer); - return; -} - - -void get_subtree_of_node_helper(TreeNode* root, char *&subtrees, int &len_subtrees, const char delim) { - CharLink * head = NULL; - if (delim == ',') - get_subtree_of_node(root, head); - else - get_newick_subtrees(root, head); - int _MAX = 10000; - subtrees = (char*) malloc(_MAX); - // Parse the CharLink linked-list - while (head) { - if (len_subtrees <= _MAX && len_subtrees >= _MAX - 5000 ) { - _MAX = _MAX + 10000; - subtrees = (char *) realloc (subtrees, _MAX * sizeof(char)); - } - len_subtrees = append_char_array(len_subtrees, head->subtree, subtrees); - if (head->next) subtrees[len_subtrees++] = delim; - head = head->next; - } - subtrees[len_subtrees] = '\0'; - deleteSubtreeList(head); -} - - -int get_node_relationships(char *tree_string, char *&children, char *&parents, char *&subtrees) { - /* - :param tree_string: tree_info['subtree_of_node'][node] - Function loads the whole tree into a tree struct where each node has a child and a parent - Then the tree is queried for its ONE PARENT and potentially MULTIPLE CHILDREN - */ - - // Step 1: Load the tree - Link * linked_list = NULL; - load_linked_list(tree_string, linked_list); -// print_list(linked_list); - - // Step 2: Traverse the linked-list to get parents and children strings for each node - int len_children = get_children_of_nodes(linked_list, children); -// printf("Children:\n%s\n", children); - int len_parents = get_parents_of_nodes(linked_list, parents); -// printf("Parents:\n%s\n", parents); - - // Step 3: Convert the linked-list to a tree structure - TreeNode* root = NULL; - std::stack merge; - load_tree_from_list(linked_list, root, merge); - if (!merge.empty()) { - std::cerr << "ERROR: Stack not empty after merging subtrees!" << std::endl; - print_list(linked_list); - while (!merge.empty()) { - cout << "Not popped: " << merge.top()->key << endl; - merge.pop(); - } - cout << tree_string << endl; - return 0; - } - -// print_tree(root); - - // Step 4: Traverse the tree to get all subtrees - int len_subtrees = 0; - get_subtree_of_node_helper(root, subtrees, len_subtrees, ','); - - //Step 5: Clean up the tree and linked list - deleteTree(root); - deleteList(linked_list); - - return len_children + len_parents + len_subtrees; -} - - -static PyObject *get_parents_and_children(PyObject *self, PyObject *args) { - char* tree_string; - if (!PyArg_ParseTuple(args, "s", &tree_string)) { - return NULL; - } - - char* children; - char* parents; - char* subtrees; - - int length = get_node_relationships(tree_string, children, parents, subtrees); - if (length == 0) - return Py_BuildValue("s", "$"); - - children = (char *) realloc(children, (length + 3)); - - int c_pos = 0; - while (children[c_pos]){ - c_pos++; - } - - int p_pos = 0; - while (parents[p_pos]) - children[c_pos++] = parents[p_pos++]; - - int t_pos = 0; - while (subtrees[t_pos]) - children[c_pos++] = subtrees[t_pos++]; - children[c_pos] = '\0'; - - free(parents); - free(subtrees); - - return Py_BuildValue("s", children); -} - - -TreeNode* lca_helper(TreeNode* root, std::vector node_names, int& acc, long& ancestor) { - if (root == NULL) { - return root; - } - int x = node_names.size();; - int n_contained = 0; - - lca_helper(root->left, node_names, acc, ancestor); - lca_helper(root->right, node_names, acc, ancestor); - - for (int i = 0; i < x; i++) { - long query = atol(node_names[i].c_str()); - // Search through root's subtree for the key: query - if (find(query, root)) - n_contained++; - } - // If n_contained == x, stop accumulating ancestor - if (n_contained == x && ancestor == 0) { - ancestor = acc; - } - acc++; - return root; -} - - -static PyObject *lowest_common_ancestor(PyObject *self, PyObject *args) { - char* tree_string; - char* leaves_strung; - long ancestor = 0; - int acc = 0; - - if (!PyArg_ParseTuple(args, "ss", &tree_string, &leaves_strung)) { - return NULL; - } - - // Get the node numbers to find LCA - std::vector leaves = csv_to_list(leaves_strung); - - // Step 1: Load the tree - Link * linked_list = NULL; - load_linked_list(tree_string, linked_list); - // Step 2: Convert the linked-list to a tree structure - TreeNode* root = NULL; - std::stack merge; - load_tree_from_list(linked_list, root, merge); - // Step 3: lca will return the root node of the LCA node for which all leaves are children - lca_helper(root, leaves, acc, ancestor); - - leaves.clear(); - return Py_BuildValue("i", ancestor); - -} - - -static PyObject *build_subtrees_newick(PyObject *self, PyObject *args) { - /* - Function to parse the rooted, assigned tree and find all subtrees of the inserted node - Algorithm: - 1. Load the tree (load_linked_list and load_tree_from_list) - 2. Recursively build the subtrees from leaves to root in Newick format and load into list (get_newick_subtrees) - 3. Parse the linked list for each subtree, separating them by semicolons - 4. Return string to Python - */ - char* tree_string; - if (!PyArg_ParseTuple(args, "s", &tree_string)) { - return NULL; - } - Link * linked_list = NULL; - load_linked_list(tree_string, linked_list); - - TreeNode* root = NULL; - std::stack merge; - load_tree_from_list(linked_list, root, merge); - if (!merge.empty()) { - std::cerr << "ERROR: Stack not empty after merging subtrees!" << std::endl; - print_list(linked_list); - return 0; - } - char* subtrees; - int len_subtrees = 0; - - get_subtree_of_node_helper(root, subtrees, len_subtrees, ';'); - - deleteTree(root); - deleteList(linked_list); - - return Py_BuildValue("s", subtrees); -} - - -#if PY_MAJOR_VERSION >= 3 - -static int module_traverse(PyObject *m, visitproc visit, void *arg) { - Py_VISIT(GETSTATE(m)->error); - return 0; -} - -static int module_clear(PyObject *m) { - Py_CLEAR(GETSTATE(m)->error); - return 0; -} - -static struct PyModuleDef module_def = { - PyModuleDef_HEAD_INIT, - "_tree_parser", /* m_name */ - "This module provides an interface for parsing Newick formatted trees using C from within TreeSAPP", /* m_doc */ - sizeof(struct module_state), /* m_size */ - module_methods, /* m_methods */ - NULL, /* m_reload */ - module_traverse, /* m_traverse */ - module_clear, /* m_clear */ - NULL, /* m_free */ -}; - -#define INITERROR return NULL - -PyMODINIT_FUNC PyInit__tree_parser(void) - -#else -#define INITERROR return - -PyMODINIT_FUNC -init_tree_parser(void) -#endif -{ -#if PY_MAJOR_VERSION >= 3 - PyObject *m = PyModule_Create(&module_def); -#else - static char module_docstring[] = - "This module provides an interface for parsing Newick formatted trees using C from within TreeSAPP"; - PyObject *m = Py_InitModule3("_tree_parser", module_methods, module_docstring); -#endif - - if (m == NULL) - INITERROR; - struct module_state *st = GETSTATE(m); - - st->error = PyErr_NewException("_tree_parser.Error", NULL, NULL); - if (st->error == NULL) { - Py_DECREF(m); - INITERROR; - } - -#if PY_MAJOR_VERSION >= 3 - return m; -#endif -} \ No newline at end of file diff --git a/treesapp/sub_binaries/BMGE.jar b/treesapp/sub_binaries/BMGE.jar deleted file mode 100755 index 63661ad9bc9601141a3e531d195da3c9fdbc6170..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 105670 zcma&M19UIXwk;eRJLZmUCp)%n+qUz|j-Bk-wr$(CZQIG)|8vgwzBlf8_m0=Ssz=xA zS=CjedUVyCtK_9X!BByq{+`(0^rArj#{mrl1|%b*EI=zED@y-01_T5OBrgRC^$!B% zKQQ_KwlL~n#Q!cVBOogwDx#!JCnI_wuskOP0S(fs5eCkHCvT!)X((YCUZq4TUY?pI&`}%fFKTa*H^k(=`+}0ucd)K{r}&w|BAr<4?@7k!q!f}$ii5_+QQ7%#>Cc{&dA!p$>~zv%UyX8 zjgLHLGksFVu@(9UlpOvaZ4`bHVSYnt0(oGEcz#udx>XtjgVm@}h*-r4IqxNmk{#F^ zq}mb|g8<}#W}>}(bUhUkm6`3-pZbio*`HlstDjx3nfK|x+^!~4w#>0-DG?Z~iq9_D_9VSH)s*syo*=Yn{umHF-$ZF`zSb&}Iw!+H}u2>Hd3?6@Jn zdFA56eh5H(iy`ju#_q`RJYaoFbnwA?(^dK+z4QtB7RG27UdO(*O0ribZgaBxP$HJk zdXxJ3qP#WsY!_bPAZAK`+t2e2mX-KG_)z=d!+ebA*p_bCFk&|8W+3~!dk5zG&<0_VVOFN;Es zG9~RG&7m|n>d&l|Va=hkchFOcKJ3q|mceW9y|-e26dFIC(3JDby=tP^pOObX>a}0I zo^PXJMh`!d@o-B0K<^BH6B{;2OpP5fK&5v@*f@`B0rPfPGd)0Bb~;odtgMD_j-%~^ ze~L_YC;(SbBM`ZVTI($JE?%(Qa57hXzqAq-od7!@S{Nq}iL6}=3R?%@tHH65a*|`= zyc#D@qKOX0DsEyd!9bMn44xO3!i}axw@KREj&gaw%PQ>b2C95gvBb}T85M->AM(gf zi3&YP#$1SwJ}GcCLwJiSW8V3!YmuHyPTO zRZn?lGLKG_9P2f5!7mmB!QEKlSYt(vyUj9sTjGrY*%nv89-5 zZ4DPn7G473{TX+SVl+-7{S-L9uVJ77N<_vIJrtS-O`WVpTR*4=9E2J?4+H~No2o`~ z2ga`)tO}_`b;r{W2Yg9Bp;QjhA-y5mmer!O;pi#PDe_MY)2lh2AKU|-Ei{(J9K@RW zT&6o3#vEIG#0ujpCZ2&KV{%GEqF5?-+Hg;Pi$o?=Zu%)YJHFMOgLj97l|`w9+sMAr z;Vp`LM!KBx^@WXrOk2nms|R;Xd~K%joUG?WfsAvu1eZIAtHyL9Q)$$d(J$k0%WV5<8v6S(>>#Ql?ZzY(IncK<6gdwSnL=)r$}Fnnw^o%h zmDk0@GS=B{&$1@fnt}o_bxlpnI}%h7V3Y-QzyLBA7*1leGBkSA@2~W9V zgOEKKMvz(Y3HN@RKGgzhGAQ6ayJZ&kC#_>2sDGi%Bq34JE(T zC2Rd?74U`%OMQGRx|t^L9&IZGhUqf!r$$M?&x*J-jzW%72T7N?y@aEJVaiR1!O_E( z{R7uX4y~lG;*fS#Fyt(E$ z?NJaeb33gz@7r%dnmHlA5#pqssR|Vc!FKe2gD3)Z%l)<%l-^=@nc(T`%@he{8Gav#AS{d%-WF|TyD^dwIm^CT~O<(y<>h} zK)l!!t=arNDIC?H(O7nOB8xrS^zHC?FhUC{mML?`!x_(Hf$nkY%*upOcyQ4#t!~TD zQ=r69&9VaWIn?_mlaST6N{I=k+48DijHjMQHCAZ3Xh7va_tvXY=0*MI+S#J!j;%cA zDP+N_unhf$fhP`1`c)AElxyp>An)fDg5%ogC?F+z^eV@&pr9aAQdv^+Pmu!7>%1$~ z6m$-}1L?ZCvjBNiD?a^7RMxosl&}T3onI%U3GG<^&T9?wJ;az8XBW;P>`Q9RGRfJC z`AgjaMjszSq4F|4f2yB<=P++VIpf5F$M6wYht2zOzuY_k?Zf#}t#h(1CfKKxpBp zC7}cJ*{AY?P?=ArD|me8XnCZv&{s}5 zgj14;Ur_jgn>l{RrY8uEDuS%C0ZfDrW1tdzV( z3BTe}M@!~B3(b);X}0va+}GS5PM&C(@WbB!yBdb4l9r99GvE%*o$;c% zhU41aklYY5_aGNHsHwBfmgZwx{;4NBP%-!X__RLJYcy6H6L0kYqa&Y|DS3Qeu{Zw~L+NJ}|m+Ue}<^$~O z$UO*eCx#IjYnA~!fyWuY^Ze>DIcXD-$>-zuPP~a1o3WQHMV(;{F*DvKRxbPzH!Bx?J8@}@}dUfUGd zG{ncqc-!b`Lp0lR1NuSXvK9sHz|4F+u|F}f{5(wl`{x1+iNcJ$Fl7|jk)V=lZH{4T zZjV=%6=WBKNjLpaUo99YGuB=fE(hGG&Z)R>450%1-5iE$Nd<3cbQsauz+Uty|Es9pWq6a;|~MU|BS)vU7>}sW@?A z-k3bjoq|)D#i#OGlGi2Yrao$ztiflG!$6CP(=-a65buKd+6ER&jF*tXE1V=x2hn@7 zxzl%s`5Wj@HFg|luUmW*_ zLHD4z&ep-m*i3@2NbM9DiR2wA7IvW~+{Z1#XfNa2-!OQ!VdUX{xQ2Yf6jYAkAjpw~ z_^eZgVI88dBn&Yk=H34c;LMDDv^6jg(8u5O-;*`W|4P`0Yq{zE&qt>S7>D5Dq*Y!p#Gs@d6;0myPwfx$0{LOXj`}OTL z^~wi`-!1$GykP;QKxen-qT)^euDt<^0Hr~w<2VSkwaqq0GMT+5yKTWi=tp9Q>rrU> zX>^Oao^`afUy-GM`*Zk%ImZY0{`vY~nr`?*Z9gf~LF#@eTLpJy^T7SdsB;{8`sT#U zqwW~L6mv^Dp#j4)l|H&~-hN zc`F)Svs6ypCX9wL59Esd4vl_Rv+RgtH}^R_alQ?1;fTV z_|f7oK<9@B7fiTvR<24@K!&N{54AUzXtID+WCqWp^Uy%Z(RVqS`}oC?A_18ExvaFU ziRUhq0w>76e-!O<3H4NU)ufUsDiseSzz2Kh2o7R=s@@Jv*z75zfB6vCWbY*=!LXr? zk|#J_$Uc+O7VV;Zct#Cs<9EvHCvN86IjgWXE9vwQ=pP$Br2^j|?9dW9VUu=TChE9I z2%Zx(J}x$XM1jy8LVQp0Cn(`Gq{Ga0+oW){(8+QIQr5PUa0W4AlnS1zcm$8Zj!rfCXhYP5v+AHDOZJ zYs&qmvT&{1h)RUHv4)ejJ{%#<5*2O3O2d9w7|80>WYdm3BOR%ddVr9Y7R6kvfkp{+ zT|r25F&pHhFXJoC1$2X(ifiCn|CoNX$AKdLDT1@8B}k@Z3(t(@qvb;8-FKdncbPQt zf_vcATlCeGehK#k_>_dKJ~oD;zg-Uxazd{lG-r;WKaPfTZ76>;7*FJq0Jw^}|MLzZ zc}C)n&kMgl{6>A&;PfsAvNx=&WKHa%jJ)5UOkD=(E|_nMUfUJoUOxUxF!tMq?t~q+ z8!Kqrqul12SJe^`OvBYc+M9CWXF1lX3-E9r;kv(GQ zE5q)}npCzs^8G(G$d*R4^)T zDgk_MX|GWl8vl$oXbafJ;I+gNc&IF4{6RU0*fD);4qgQB&cvCldNgIUxyz*f`MIT- zz;>ct{hY+)c=Wg=FQlJ0FMVsUV`URI<2dPWR*~~bN;P$sCMpJFttM8B1WRQC-iQ7q zpxp$_8!A%Ew1rEX*e46F_b@*0Zv|MfvTHlas=UkKrJYmuUUDVz?-#N5TA@{JdSmBt zESO@_@{(=`a3e=P66PccLg1T^iQc_JXs2qL4S`XNBRC|4JT^xfGCmfqz4srAJ$Xg2*A%%fru?>&yhSmxi-UD zI|3f-+2uc5hO=4{cJ(Khe+I?Y7nb7y`6AZ^oNlC#u-;FpP_Hjmmz?=Jb`3a@uKBaJ zOl#?pPp0{WPF0P~n3t8JUg@)2whel@XnRix`Gx6%R;Mjim&=h)borZx4O&r8w)urx zzeiIYbFHEdIv*DGkyoIBGiW(s)JbAE4D65(0zqC}(s>J0Srd~?#Q=4@9fMilxQ#D|Cp9oi(12T-td?lGqL=N}0J_q?6i zK7v21kQit(Wc@&JnR)D>1m(4ED80t>#{6chqQ$?`r|u4&MSuOq{b9&}D-6>E&2@q= z-!P(_C$_r+f)L#Y7@$tR052}-E7xs}{v#8F(dkG_*bYP9;fO^T0;5o^Ejd!!$6MWz z@JA|WQ`cixw~WBW?s*IiuGF_)t;$#&f!*tnDj18(+4Imc*cJ`7x+8S=2U^wB7%Lnp zT9W6XV{j@}g4ZD_91qpRhfP-woz?)%i*)E54k0I^yChlSt^*-c|5}Ka%Zr#koxouU{ zHWrJMQ&?N=rf`_6SK32ywE#zLI=_Y1lZ!!=!@l(~M15Tpa6Do4u3B`ODPt8xoX%Z# z6XwPji$lLw%GGjnC52Cn@6?eJx)bRHo5SSg|h2oDZ^MyQL)-WqEmm=A|rNQ zmAH7Q0p8KqJAi}czRp%|fy!p)tR55xz{c4mifH~*LeWDfZ8 zTL$ZX_FWuz^eZH7`)UTPCfJBw-dspmD7T*GK%W{V4)FHCm>e}+g!aMP8nKGNkO9?` ze5pHwTOaoUHe_Wj#*2EJ-=9rJHz}7w?tOIq!0&<_=G$N)7a_Z$L-dRCr&BvIT&MLw zg#wkY%=dSL+}&h%^z`Uv2WZnMpZfz?d3qCL#FA@|Zia8)veIK1g3^*OjFhJX1qI0Q zjs^n--bjCMf5gYhN=s5PQeucok9{LBQc4UIkRZj!3IEGE`tOXi^jOeeprG{FHovqa zG9%?bew6q)Noh%@e`Wqw<4E|gith&h4g3c$`4|5;=?^@hBF#GqOZyTSzGZ}-#t2Iv zho*c94?O4zBKq4y=G7pA@1lnHiJ7B>&e?oioB?H=V%a@RJEk>wWU9v$Y!i@D0cj*%~| zmkmRYxjB4rx9>!nrpyn0hak+f?KeZxNrAY2fgiesgs%$&&Cd(Y9>*_EcJHP}m{Qtd z3?j=;QOOl_GA%x>%b$4Q$Hy;dy45a=dTx*8(7EME6_z%RS}+klVQ(JKOSBi|P-XamB9z6NKFqTEBv*)CAcdNu9=5 zHg<#<77qgPWS;sgHt7!b*swPYRefp@@-IU>k{{6nxfnzE`d81pJ293 zF>YPoy*DZp232Zas&m6&SMFiRBZ8hB0!xtUhrSfvWg0N> zSO8bl#nK+FLmpne$A_Bm&yRvlF|h-rcT*3q@ezAP==Ri+L+y?HBK%*_4(u0^LJ?z2oUcq2VtqjeZ~OhfLkzRn&@zn&fDS9{4ivW@Y%t-p9nze}{7aEI7_ zg1hl)XEbdv&n0ugV2G_!_qSbuGDJUNx%^~`zqe<#CvS1UP&TMe%K0fQrwZKph~&Ny zx~F7sp@94JAe6O&_-esuEf?-CTryQo+<$9GljTiRFvePhtO-dc#s; zsTYPho&y9c2`ikxBPXgvC6cN5xQ?YwNL0V$D5O-X2~mfq7DW8XTQqAjCoG#*e>KTDIgX<((^-5dRn_sCv z{FgsCW;vnK;s0)J--#O_nm}T$7zd`fI*zS}&o99;E${F!MeIp_H^7U+{Qasq35@~Y z0|Jj>fdFyjz}&!`bi6t{UatGJ+{r+>ky(qeuyQGG>UlKF9Fj%kNj5K}GAV_-zR3_4 zeaTp}ue38>!9eoW|>8{y=vyFx~z}$y;&BSB|rNh zJ9FLbn344yV8#=~|3C>4>6eB`;mMLc3vjLI|52W)|5OlojCISrXL1J;kAcNCC`r%g zky^_TKW}}B23;-5K}=9A3LC_deWc=_#;9D5(@O|kC$+q&->3#1wD1S$PYSte1uO1N z8AuY_10|@W#mY=aVWohIjY5}*j$-j?YTK|WqJG=;dKC-rr9d@V{0ta{uJ z9}s1K1_Kf#1FlI${>c!!tc z_J>AJuk`4uIx%7mvSKYWN>5s5!Y5jItHF$fmTCdBK_=Vg5|2iu1$Ay1e4^jzvMtU7 zzHhN7i_Zw_`$mWw(NmrwFI7Zljd zYJ#9E$YF&Sg_tg$%>H57%txq8GvS~c(n-p~uxOhd;-$>~QQY0TTUVox5<}ohTNDa*G*N$C1@$7VYZ~ftJ@=B$?Ttou_~`d>y@CzUOX9O#NP?o5R%qJ(M7Q znTBG1Ei+>93;IB~rr~qw!41}Egp{qipUux?e5 zW1LYOsiGsK!l8o5^jDOMk-8706_L6ZR`F47o<>D>twhCn=$Nd&4;W%IN^_qu<-Q>E ze&1^*=4+n&Ypr^(HaZhCdNRp?6Lbj8-fH>=z?r7zP8(Q(y>a`+ zt5|6V1f!)cOnf^fruOfob{(aNsZ&=XH}KE5V3&4-Hzy72*bVP45TG|SJx^4)6T zsaf0#S>^?L@&$Z)NA05D=VDmT97#1-#k>!Amt!rk{^W;kp3(~_WDuW&I?Vu*1Af~8 zUCZG7P3UrNc=HVL*KgfhF_3xD9sQ6HF$55h8~pz?+5DZ2{BYq%rz$G9kyUtwM7uNcu%ZSY8{<TX)juZXP83k1nkHlZVjX;&_}^*$GDF~11mGQjfF2kp8QN7W{` z^d($Q;kTf#7FK~|wRH@!R-(2&#&;6Adn zE#(#Ad94MH47eI6Zs4$>V zc@|uIEbMXf3+l+^jxIza=U7iXj9?u|XLDEY5tp+;%9i(q3DgY;Rxa@8-)*6Vzop6% zGu-0aIKS2qOOuUZwZB0B`Oq$$;*n_n?yI~0y9MpP4`^Wh-y>50j%Xxm!16DcH1V04 zzJM|l6GVlIrqF}wQDCRh{}>fSg1_-gO*KFqKT9fll5Z|t)3g*X zq_H_~T6bz+pMPqy(XzQtUQ~6nIQ*J={_Y3b+w9!&035$}IZe5B-G6`I1}Vkh6X$UQ zdL+p?)+KK*N}CTV!2c!YcVXw=N-XNTsUsnFQ4pEvn<)!=W4E=;OlK zm@g=;S)jML4T@12j_~XxH=MbI0=r<*P&N&Bo?AI+BV^lCTa?V~Fm1{+WEz)DyL8Ji z5Xar$9zz)pngRxo{%Uet-}NQFgy!|CDL-HzVQiNAyWelKg!HavV7AmN01bTxnaA-Z zX5!Q@12d=@XSkBr#t<0T6BCyZ^<_>Yo%#_|+Vz7o7aJe(Qw_Y<=FvWjdS<*Q3*IO# z%D4-r#U`g|tNqPKyiFo0ue5Le_2o}dd@X+o4fOMAn%0@A-MTi^Q5X(jiItw`b2e=~ zy!Ru;tzRjRQj!AUm#>3klqyTBvuD$swmsEX*I|FOg~;v;d{mvtY*U?ImmkJuN!Up5P1yTD~FV_Z5$rer;g zg35Y%qZ5#$;$?ZuBQ?x%mz=XQ=_Myi|C#Cf(JmD4*6}e1uPHD%zAKu5s#dYGWvLIh#vMxp2$gm{Z9xsF`gik|mhIL{pEAImddWGL(b(M+aST_Z&)e zk77_l}$`ok8%TCI0lWx@^a#=H;5KQa17>D zHWsL|kOn^qEA_nxc1dT#3rwisGZThBmXGb5xKVV37Bw&IKHcsM4Dva$05;49h;#}V zGG1Jb%^f>oS(KhdFVHBe&e%oZ%ok@xVkKhB;81nSE@#)(j6+!Joeh`){oCa+Ucmv? z&=pUsMHt;eG~^|TMpncHlnOBMmG8(StdAAWDzSkk(C_#L9zyF zqzmv0aD6kIVD_D;2}35cV;Vn5Cx6=luGR&dn*`P+H7~)$DND+>3r)EK=1$lQB(==p z&XH62pu!C9E5`*v_gT-+iXoF{4KE)mr$1Ey4$)*gzxv)4ONZ?H_li>Ab@`{$m>&Z= zl!>x>ArW+Ya4ngHZdhaEr}S5<`I)Lo7ls-HXhVg5u$hmKQ>{_J$-099xiru%{*M%` zp~cJk+OD-}tB9ia=N%rIu!4F-Zv}X7i!Dv^59<9T-+@?k-460g;rd1+hpFd@+P^$? z{BDukGp7whGQZNR!;dRnyV9Bka@ zAHs5`!F&j+d4w_f zn=`L}G=w~_omydiiq1$du9BB~Q}4O?e^HhQL0l#HDG@czPrH)M))K_$NZZDqtmU;Ctcc z9QsYBn0!Q93?74}Kv*5ON#I@zhGSS#cyMO_On&Lx#3-w3V-Fd@f%XoGFV`>QOy~se zi}*r&WIV2@JF&62gFx|tt|w&W!W0c-n>Y8Kekc2gPVb-aWc+|WqiTq_n1?TRdkV_> z+ACcmh~t|%Z{hr@DJXPG+lTW*hM@cb!=pWkS5Vp)*e>sx;JY#XgZBW@a}@t6Zq|Q+ z6CdXRB>-SKGY$9FD$vb9p&6B@hjYgc!DBIF2mh8S(9Qnw*)HJAa1sOYIa>3MAMhnK z+dBu8@F6GUJC^g4GjS`20%yTkzFvS9B8s`hbjCe^43lLxM>Sv#&XV1nFMte-Wi^Mg z?R(A?O@?)kag^CF1(Kf=5-TJhmHiFsmiTVB*W!#>IAJhh1oYQW6n$(rW?oon;UpLKvhX;OVaYvL~DgQjU{EuA;;fGxTIc-<|hbB z-Nh8A45Trqbhx<*s%Q9xUonc7b5&fNSy75a1^v!TbH)|R(nne2KR`Z^j{Y+y!l$^6MV0Mx$1LNs zrWw3{W;bpLlBNgz5wPptWpz=?XL1#=siH}7G_FZG&DE)+YMc>AIck&hgRAJKs+o`q zk!qfLxtrcbbQ}3ln-75sMJ$Y!UD4?u^8w9XBIN}!sO)i`o{0-8*wvY?rJNaWah^$H zu5y0Jhwhu`ZZz~d$My+YgFsl5=J^$>*o*I&qJ-K7to7r^RTHLcZXHoMj7LH*0tz$W znNlW3l+v#9vb2gtRRPliM*f?m(v5GZ>~t%lt-qnqa+tIV-pkP+XeWWXLgPvT=PY=m z@)WU!V9l`;XQ?#B(@zZBlEzL6%W~Mt2IftmvoNJ*JCk!kRc}`tGMFvUj^$b92r;-u zC-LrXJj-@%l*gBqeqIyPH^;{K$^igDXEL{_426m_%Hr@v$l)n;MWXYDns#Nm@I_h{ z`KJPH8xIVheGeQqY&=qGDZ0h9MTjSo^Tp1Ir&4Q{oFZ!J@#g8jGOHL7i^=XO)@v1@=ux({a0Tl${im){XqQIL8-GTED}MM%kHHfiLUr7}Wk zP$OKOeDu6d%!8Lw;fh(x-nvf15oT-~E^5O<3TYIM-zJSr3HW_7~n4gy%%sMaJ@=G4hpbEm9T>Plp=5g_guBPA#7u3m`2R1_?2 zBHBx%ih3X*UTs)hVjf%>j)^@P>N>*%iCR>_Fi9~UZwqMF zKNS0yOk|(;;nq0SYC4H2mzr^&2tqFxi^3QZnkjPMps)mH72t9PhkQGf`Paigu;@rk zm(1*qa36fI2*2#2^x{_i-P6jX+D!f7x4ue-k zp}$z>CP)e+CAg>)p zA?KoA1@8jhO~@}!%5S2|Q=XMf>{*qk*d8Oz&r=&_)MYIB#j*keNxRjVJOx2+wf9>- zmI0 z!KlY@-(6`<#;DKW;$_9rZeXZxF{&r1+J($RC(6l-U3|n>V44@C+68?B{xyv>q9nbad*Q6pB!X!02( zD9+#C{HFoT?-=j|s>6U7cpP39rrQ^){Od?V=GC=Qs4uwdhv5f0Nz|{DI|r+)oCBKE z>Q5%ciub|;e_AZce#AR=EC+b66URe6>dJ_@Hu$rtkUUiXYAgppN8-*};3N;u>kg>n zbAQdSL?9Z*U8%!ED^!wYq+&kj0Nmh-3}{Hm4UPiT)lsw-2gIX4rjYrTXhB=-dSL>R zo;<>czGL2!7n4xYb0!GU5#i3XKJuJHssY3VDDmIaFKor%J0@wIEJF0hC| zB3%i4>y-J-NzS;6WNeGI9(i(*Te=J8CT`&*RXqx7z_}lL?j%B+VE1!?N;JE`p;IXf zo8j`wuE-04cDNcLIWHK_Et30BULd#Kemi=z*#K@BxeDtyTHP4ly;lx!r7!GRo-URAi6fvP%|8NmjXma;U1Uz}@*Z3tcXq zIMbowqtGexz*CV8SPw!ENI&COTW@||COtH;Ki^ib@H;Jd_i=9J0RRKZg_X3 zdL(+L-&lktLL+aCNt56XfQEpJKyKDTi}<G7M}R9>xx88x__J>^eBW z1>|M4m-)NhXb;fSo8nHN^NVZ?@x?{j8)^m_1s}L!Xh7&UG2<;`&uq#I$&S192klO3 z$_vfTGvnZ~KC8 zu^6>s^rlhct$b=XKvf4$9*k#Z(3TFAmiAbGHFCPW!;ar-J&vRR`d!uzn8dcUfE~8l zcy=dNy>Ys&UfZonyscr@jfut&tE&C}i^E|LTD}OZ>H2KFV^3bcRK?zXisDSXMSIYS zBi~OcXzsdmJMuwy{J88|$~BaiJExh5Lltz$PHkPZ;~cfIau&$X+71}_&&InlFvw1Y zElF6f?=TbOxun84c9>}%4-ezIc`bJY&2NlFDS9!V1k<97!1WjfQtx| z)}k;4U)6FK6osOQ>sCZAL0r)`%CsLnHRv%kOg;(L$KTwNRhCUO)H=_Ww(hfzpG~vK zuTx0|z20A)O}8dU?#F*kdrUolOgWK#-y{0oqfOsW$$C3$>^%&)jCY8HohOr(iIK@l zS;Ntb_KUFxvK#GAzW1lg>`q4QP9EJHf$mNw-5gE#r{CTj1?*1#^@sGOhus_9y4{MArh68jLxs#M42$@Dt(^L2lwN9UGU#~PpT2WB$(WfyUCk*zS!l+0?w1Y zM}2qFj*J;RPsLzI^?XUNv;kK!fUh=bDlBa;k!d$wz)5#pFZ^r-WHa~e({1sU1jIGZ zFE628FJBWpVL6>2oiFHbcdy5IUb0LS!q>Xt049-Z7ncWgHDSHdk6}4p?{}B)6gbD& z7wW9?uA}a)+b3grZnN8Iy4HJ@QT=|i7WMEVrJO>j7?7Vm^BwycUAhf@A9~otQ?1_l9NQKJ%Y5jc;-xlMZBk zAD1VZ`L^EgN8|C?fp3)O+xY-{v7CdDAI97VCyLR7giyuX^ zh#Bq%%hk)epY%t~42H5V&xNBqO{F*6K4c_$=BeJ3sY+%(hnU2?p3h$SRUi5b!Rh&Q zE#%=*wBO#_+uf%hX$qL1{_G;%Q<(-F*`6cb@15^_ZwE2F32;MwLptxzqdWZy#2Y_4 z-yxaoJa2}c?HKy92Q zw%;d$z8yB4Mt6=P%Gr6`cGQkKzaGQH?mzt$UXqH)*?rFLNA=4ssu92IhCk4o0q1MY zAApSqeHIA27{DE*5+BFgz`3nBgqv`ucl#?3-&gfFEtWKUHCy#tk;T@&HFe51_1V~Z zUbfSAH+-h!du+r+x+uJh6)K;bSky19!84iJo^7PY51rYY4#u}lI{=5*!z54F>l5O} z$Lsee-$eC&nD}m{!j0V5LrBcVQyA-L4!X~KNZw65HB7-JzNh)H0^dv5soOd^Mp`y5 z_~?87Q74z>M2xVF`}3`8szuugd*^Wi1EscI8avF$yBMGMS?7bv{hs*B5(7b1$A)+m zV5doCccp!>V@UYy_)8yk3x%?SHGCpdA=~!*$;3vk`J*rviR}X=hAco%G+)0w>dh9?lM&H^QmS%$!E-2@i;Z#ghiUl&)TZ_14K^I@1AV7SF|5f;Ht z763_?z0tNG6qD8*YWNuM`y%=}4QO8WPM*w^n$SwwX?+WfmzftyJs?&f*}!Q$t1`J< z=DpcqVh+sjTEUBo=nmC<^b=c-ZQDkV21?X z>6O1~a2Ckmre~;&`|0l{Cggsmx8*r|_|298$BWkaqGX`f@r*Uks{pZrHMpze(=N0q zDymi;)_!=@9Ev1q<|H!x%2fBJYNpU3Aqg3f11Q{r{;y(RCa{OtX> znq{e-?^R$RXCniqowhbq;q=E_C{IvU=Xl1Wr*>>5Y1x@}q9v|H?GW$5Q za{JVXe;sq{-)Z@U#f=JorsHM%x^{}6!R9FmY4;sMpN7|Q>a)yu_xAn8I4?g-&~|(n z0{Gqw=8{fYF)>>3PKLixOP5`JPffGjEVpUq^{#mj+Wp+r&HT&QVy~(&tlh`%RwTJ-`mR&t^<(`dZ*VKq34Qxk z1y--|X0g@VS_*ksF1YB#SOPgm)!A1_a9XIo|S zbpw1{-9MM!8ht*4n3vBl&*}ep*lt!eT|eFZPm$~NR@=sDJ>Bl6X$q<(H9o*M-J;bh;?SiiHF)xy&dI~a!Dy75Wiyf6_a$vNO7u04 zIc>`#YgD<4WxArp`6X@6B>#|Fa{1fhHXQzlTB?f5=7OnO&qGvgz5*ZM?{U_5cf*

_QkSa)1N+^a7A_yo1 zDFPx=1%Yq9cfLDw|F_JXnIt=#EoYzid7t+=XLnWgpC0}0#rJ}iQdHAL@ZGvDk%}HY z*@_X0<-b;km*BpEzGi_9((THs?Rrs)YvGDgOT(>8!zaH8sewDcwfT1Qeu+Fs2Q7~> z{`nsBp|>qV#6L14BPzpOL?>P3Cw%L7hqjVqLi@Ww&xUCaJg@0|dtu-(a(GsvMw&LO zQ_b;sXadUN8HQ(#ZVLT5lMXua7hP=5ntK=h4pj0N7gD6Bs~6rqSeHc;7dYzYJWp5dvv&QoK3&~j9v`{-@t_S2YvoYw8%Kjjnsv%daK zSu9z!t?dt)ZzG+)%Q)zCvRVqNojHxsUA%8Qa+-Todd`?~Y973^Z@7PQHrWyIXS4m% z#Zv%C>sIYUs)t95xgVX+pC*Wb;{M{f9yhFruV7q3;e&*Z2>OlQ}s>7dd!C(7Oj zsKuDm_8FnWAdzPys&2a>RHU$y{C(R4<(((_4R%)FbN^EA9E8xF^|l}GGwkKAX7$C9 zBK}kawYct*dV~hEq6UTJ=r_Cm4lWvh$?Xr^0fQgzOaH3*wfK`ZXh*@OZCqm3&b2N> z=>W|X$-(Hf&ZGFF=Z|E=RO5>J&eZf|Uh=aa0v_B)=e<|$I*yr^4mPR`tA65X`RsgFZM9s$YMh&Iz>0Wn-@Mer@70O{y1Vz&Ykyk}FN{~VEBFSG#A_Gk ztG{Jbt#~W>r?F+Wk+(Ri)YP9!E51=;Y#_$Aj>QgIfVs>JN|`5n9p70P#P<0$WOB1M zr1zmM4P@d5>oV0@Qyjo&l7#F&c@X$MKNxOiK#T>wah$8lY-dSv1c&Jxh{S==U>iw7 zd>^xgLGr(Us{UJ|B}o9b8J~$3bgpKZbm(ie1e;$2S4%o)_U()P`;>~?a0CmnPL^9a zYsZK+WL}SRlXFbhaWn>whb85$<8L$46ElN5ac)lFTuB1Sv0jpp(YIo0kkyxJ0W2=B z&%7K5asoTCra&DN#ulc~eXpxB%_In4V>cYY+mg=Me=Fnr8tXDS<2GEtL98YYU}e@6 zGpWfzM{ym}P*$irt2!AduinA)`IM>NxtaZ^=q-Gj9aF}Hp;xzAY) zJO4XszXrUj#juUw_!fcZ8 zn^xzTw{qk9G$&KJ+go=ZGx(>9!I&*`hku=%vx2oW^_1&#zl_`Krl@jGn#Q))+ks6R zG<7lg$VsnhtOXPaQuHuoX8YF*vm@^Q4fyt3CSg^p8fFc;@nXHh+GWlq+)E&EH+41s z6)OB~N)Bku1n7)L<1lMAqWfVPIKoX_Xo`~qxCaj(TIoDcA@&ejYzEJo-cQMqkFtQj z2fB$sbdR)v->DR9z`vNr*5eh-Ozfc=WvfcJBYY1fETJr5f(nwk6#3XO;lSEtBBUYDFcA1c5ahwWAUPN~ z5gTNhgsF}G?l!HtT@f*6_ri!o4}=A>>83q1x=PL`Tab(azd5u7!ySV~D_%|);SdRZ z1}TE5*YB_Et^BYDD?{HT`B@2p45OctXfj9R(qCpev#5%hf0Qr3hp5Z+VwJ1u<)tvQ zG^iCM7qOV!L|eac#OO@(QiB5OGbIPhhhRi5Gg4z_d90a% zeczS?Kmo%2j@Km#`nb2j$dC4m1`V0EI60PgVjmriW3flkEC#j6$w$+sRd{K$Js9p| zdF*xkLY4EK!AI7nJ=Z+i=%6__KWqbQT0ytX;BiPfP=5KCf}L3yhi?xzu%UGsUEJe0 zTa;&M7Vv~=1LXlFiBGa($?-JekCNh#>hWroZcb23Xu(Z|0V0dpq#hkaS`@~2+! z&*4RJ8zIe7P*6|7wMY?*Vc-Kmx|>Mrr`}&wXOtD5ofQMe4t0@Ptdc`+E!ops&d`v1 zHD&KN>|CH87I3D@S&}=!5>Bs27-+yVnC;QRh^p0iXAL5p-z(U1IMl2m8wen0VBO&q zaj$pC`5O&*VV1sXey{XRzezFY-OvxrykxaXy=1ge)w66Mh|0y@;PLv3LD23G%x*AZ z7{8Y|e{2@)eifnYH-JIl(B`&;-7=r1Os`nN6)W9@ly%|b*bupFY@8WzF|a5uOO zj~{HQCm$MjAvdvtdy^ zh;(uocCU(%zw+7=1uYgFRI-OE<2F^{#07q@&n0EC);UjL7~8(#tCem@BQ+Quq^5$Q zGt2o=mkP+c*?q1GZ}#DtrJLptQ6lF#66ijbzJWft;Eb$Im9xbGGw=Rj5;mn_;P{K# zR4NNp$k+#l!_dUN3lsElgDh&{unZ(wiDq0FR!;JVr5@1)JZe$%f*L7#gd+`DTuN-n z9A2^b{sD;>)+4&|1zLCDu%*54Piiu`Pq@%GPHQWASMK4!NtG47|LL?LCq8J~rlRMC z+Aj%{*OoW!NJw}^cB7oiPsS7@#~jd`hd2yO^7$zx=#ETAS7t@Mq%$y9Yhe)a+6Ajo z1Lq7R-p!$qrw7qrhalp>o%w=iZle|)pSf0e$>Mt5_T^5L!SUcX< z=AQW%ETk)g5yJfJAIs@*i=C@d$IO$l*qxi34t5aKX^?XM(p;NnoXC zT$Ne;dPLl0;+Q&R3I-|LSLXNSE|C}5`v@gV@bz#LV~4P6swD6ySs`i=8wz8B^QGY2 z(!k9gM^&x%3!0mLmYkerZzb3qOtHh%VOVT`pX8$Oglwkct7&YK-_OfGdOiBmnCZV3$>7=@1sVu9n-Nyep;Y3)VGWfz-F|PPgwkYR9BQ({o1;JtT7$iF zGay<-A60@Wp_bDozhDQN^Q}ehNHobHnbpXe1!H1tRAs&l2Tnzna(yQXK#P`7LU-m5 zernL$m}l&^p3dPiFAj1Kac|t&1(fyN?EXXlHNoZ7h?Ku_%?3dh#jRs842l5b3kWI} zFepxnf5nzH4<_w^!l7Imz@Pw{(H0YxoQd(Jdl`K z`fY;s-aBS7mcEDOoke&)vJ2`|b^!y)^M~zf3Ho9M`={65wQK3?7X z;(}m7QlGYatONbN-oe-FgTR3WW-?j!D%n94I&FG$jW+?M0%D!4!O<~7(vfgMK9^|c zhb(%zm-?@OE_51i$UbX}<Z<&aQlBA2LA! zQCmeAn!$PNDDpl()Yf(Rzta zVKp*L@HY8YpVBizX@waaDGTh5I6$;`z~u-%s}hC-G2G+Jvcy?HM629)9=^f5 z+^c!Ypy+XL#MFRz&Ilv6yMGq(@B0G#Vvr1Ez_gR8_6->%ChUOAj5ejFxjechi;wJw z#dCRud(fx)j@BJ}WLjVv9j?#YGm?3TFbA!|Zwp-NP`ZEHm`q z`d^fI?Rh`_%)8R+_YzoGeh&*O`;(Ns{uWyd`}-teQuxWY9Y=Cl5$Ht?;x8-|gjy?J zNIR1yv}g|Av1+1KBST}c9%gV$Q-pF9 zJj+zA$K^@b#4+?%?M8%KY%ZTt@#mOGxm#tJlEIcQLc&}kDBJT- zk3L>8BT?Y69+HH+bPzNiB;7bq4iA$TQk9lt{^y3 z;ML>TE+h@PLH9iV*3CkJmN0b5Q$5(5$uGbaBMcmW{~TO#+cvC+Z>!4;Mmw*tCEr-N zYii_%?B1mn-HLS?Tmqr|jwB#I8{Eag z2pK)Xi=!ER9M-VWeT&&90G<@zuUFGuM_>KSsD}NQp)tDMZcB>|=BcY+(;uJa zFqKn;8}>5ujYf0LGAj?X#82l`416|giip2M1_eZmQh{obvqZ~|ll!>vFww}U$=AmD zBRva%c)1FpTSi+OZwT`Som^qj|6GJOc7Fpa8MK(RLnQB7gjKBJi+{9g;@%Q1hxu@H z3IoTqf?534ziH>sxGQGtd5RnC*5=VUe65{56&i-d)$RtE!#zV3v|7+(P?q36m4q#Y zHMI+(-uEh1x(Q!n`dl2VvcO) zi*0~>CE-E)=Qi|%);BWvD}N84nSS-i{0>Tt$>Nhbymgm|Vz&F5CF$PqwOmjl)dyf) zE2ld)5nwAI5{8$Wb%33CjX?cg zR8m?J-PlJ7MvSr-0zi|j{E~@%7|w13AJ?FBX}m`Vp}Ht+q(JBR#u%Y_v`u(j0WKnEhp|$h=AXD=_g)s40 zK&jB1*3arq$mtk1)#24~Fi){jXNr+{7V@Jv7SO~wFfFw@gT z{L6A4*#0uQ{UHo|cF7hOA-dC_Zsj=MXZAS>=89VFezIySK!foru${6)vo2TWKR zSTeRZ*lnfStkD%wbV2671DqzY4;wTtXy)6BO+~^7R)rC-)2xBR5s_{&A|82GMQYj+d-} zYhXE{`5U!&<;%U1m2cM{(D^GJEGCI=+Y}|o)h){{qo|lc#du8)5&Q+!Kk1s_DVJ0y zZGTsDL?4nq?b(x#4blGi{w%~I-Um*+HQB_jWX($kn^RQ*5gmn-zNwM3ys!N`f zk5*jUK0mIZJ`vOXP0C($ea{Xkz*PLJ0VO%EC-utpuRp7110 zT&&}34iS*R5v@2NutsrWm-UF0IInDB@_``Nqd?-bsn4DipdG;XiN*SA!ukAh*Nw2b z^jon#hBM_Y_b@5lQDN10Q2%){Jy>AN+{M1_8;Q9OA&yYnUyk~8z|FF{Bh2zoz^W(g z?TApvAtWHS0>Zy+jk2>c5Q4%VV|+Tr*)<}+ckc3=ARZWHX<~Fe@BJM^ZVT5pC_Cw99;E zgs!4tXVoFrJO7urs{u$FGpDC_(hY_lctR{}D!E#LtV8N&8q`F$6KI zRSgy5+JqpAO+zS!be_9Y_kOd^5GDsI4#SAl9KbFpX;0j+D=*F9fNtbPb!>uVK!$w- z?7kGWeP=j#4yDidw*@+*v>pS1Zkl|^h0gK3OPnhK>NLYzEcO&>=n$fxg2{N0!ZyvE zn{cMxOIweLLXrhIJRscWtyBhugPF<;b`=9Hmh8-8brN~$WRBAqJhs*m+EY;q+_&fg zY~k%A98l&>&PqcVC+F)m@)L2L7_ZvB_ z0~Ur$21IoqOyBM-^9E%y_)sA^E(0e>zA<>F`tRy>UGr&So{2!WKV=1(_wjN4!Q6Cf zjgOjGbh(Gfv$(uTKQs-*aG(rSm>h<~2IX`K_5%Ky>91mE?hviV^Y2+K247&I=Ktv! z$yxdJ`3ND1?JIx%1t3Hm)5F*ooV?~~oU*Z(sd(9V`}K&qIIM4xIBqbokq?A=+jN&v z2TI6DV>g|2?BXki*jMLWS`#8)>i_-Lj@G9mVoeamDZFml<4!l)0@M`k_{-VcWrVVd z|D?sh0awGbv~ zW8dB-Dhb;1Te=kuwrJOeV^3kw2texJCaV?vt7Y+6?;;}l##>LR>1X4Xd37s(mTk3z z>_#^rpHi5MEK+u_3|6?H@7R1M34>^tv1zsXyn0& z0nzqeEDz~ou|{)kd4d)N?4=PakVNWWQ$o!q7Fts*@A@2zJ@eEw5dR7CfavH@T{sBO zpsPIRIF&01*64XFJD-yPORz^cdk3gEZac}Z-rfEs)FE{(l95f_VcSy?vL>Zh~*ky>0XSOg3jS>N}xIxNp zYFVSiRqD1$BRA@92g0uaOQ`y4q_`d=&lp2N$OMw-XyCo%x~dpbY-=HZ|670fU1EFw zx?y?VWe|N?%ZGVc1l_1b3MPe@Y+ILXk&d3%`pqduWHvK(&Cb5Zm|**rFE)4WSzIi( zAxeV?#b;mx%0%xtR7JmY+cXCvRoSQm5WpET;D$FK?<&`IBJ81#tIZ|6of+ zy`81gog_>wC;Dl2tmET%!eoSQE(Wn>^yKO&J5=LEj5>>=8;jz$xYw8oFPX-2m<~t) zKsgdV5UPxE*GmSgp{f*4k@MJ;9J#%kt+zcDy8&%QfxUo{_*@=DKi-a3>viWgb(aUZ zz~-lV)k$T7s1Eut!a?^!DM0kef~aS= zZlP~{rJs+og5<`PYC+VUSwS<}vLCg^-E?K?fp}GrD55{3uy}zr`3Ly%zy5kef6A?| zGXMB0kQgTDaUl=t!AzoW)MUnC?;o54E|250!HqlieFnXQ>=ZOnixuJ<-`+ovQ9h{l6RAyBnSZ(AsV zMvdS`*ln%SUiRlWK@f_dsqj-|gJ1~Qr%y_3(mByN0ZDa|U%v8)pwOH)Vu>s_I3GQ| zTJ#Sg+uK0Cq~Is}SwK4+S#^mHi@%SO!muAU7WCn^PMmOJafPS&UJ|CV#2B!WLfvKew{za}fJGFzBN%EkFpg*C3zP05#9GqV${; zLVMmLIB3xiM*)?T$b;(>Y-6Xg>*X4_{M47=l0o)adh}mWWmiYzO^KY=mjL_Zk1_>bq5_AVe1+{tO*4x@ z-}u#WCDF)Mcv_XZYzdMz1pxkxm5EIsM^_m#xf~BMhrd%Niv48%;hyp+ApVw79 zfT0osE+i_Pc0lO?dY)A^%$PB+PMCc88sXQU9ch@zue*R(^~H4F6{7RyF(@nF9B}d6 z`Gr8GE61N|t_`q@{OUyb++jesBl>7pBmK=xP*Bvo!X0xoDfC3Wj{df=v7KsV7VjYQ zi7ACD^SR={(t~x;PpUjZ41IEF(!oa4>;{lVFF_Bnrg(=*1lWLQ4fGYwRN^lRW{I?6SMBDOl1)}s^Zb~@mIk=dLTSj7j~Ep zBci(HxUnf1>PiqFH7M+D4*DOwOav>%Rz=AATT{3=EP!mXf7;n~0N|!3YTL2Pcko#H zH+Um669H-v%Aj@N^YchRv$;sd3V}97KcTBOx88L2kltv;i?M;Co|MZWl>nO38jGLr zs#FNS}DcIS(gDx)I?vN$=Z*-c|Ve@jsa@_7%tvAP=?*X0FRqh!+pT~Wd4NauR|GH%4 z@a+x%#v+qfs~lAYbJdg|S@D|3b4D%7gE9m`Y?R11E`M`eLD3)VPR7txeP~mvaP}my z|3itDUsTs=_{g0^G*$xuI;(9KXJ%+i9>yw_0_jpqc)Fzp$O54Ds1$zz1r=xShZf~2 z(!^rFgnM)!-2Fn%+uakqru*EOO6i&LKQ8)p_z;l4VlZNMpVd=Y9pZq}mi7tty%E|6 zCPqdSP@y!;gQL>T!$QIjUh)V#h$jh+J)vCu(7oY2RoQTnZ;i`GYd2P z8k{-(NLV?phsjkyj|ei`(}EE>`=WZ){d8V;DwxUX;l!@Hzi7}}y2!i*JPmwl1Ya=q zYQSRL?k)~nce9`QKvs`+K|e~5c(RAB)P(b97OmP}pSn`l#iIN<8>m~ z!BnXX5DK?%5=PwrUs@D?jO@7eY|f4Y7`Cjf|Ef_*ocRZnnhV{$T?s+d=Jja}57T6f zx(@OCpMe(5ggx2lWy+MgvrX{0ZO;vmBF($x(*UucTeoIw^)Zmc-D~)=YeRMToF8K7 zF6r1W4d-k?I`n4+b&0-G7GgCMqXzK^##mqF`3p{^gN_*y)QLrh$&#U;(Cw-S;vJSu z=~1SYd#thr5%T8m$-w~1cb}G)r@Q*TmsWmeJAehUVafC0`*k|?lEz;eZ?&$9ZzBQk z%M|Q5cnlkduz&xB^%ZnMND-!msHBQ7@*;%EKtW+j0XdIEC7~(+*Y*-cvE!5wQMqdM z7r=wCN=IlPVnqfhQ-Y=;H;V^bs@`2&{Q7Dz&|oWgi_QHm@s5a|lFF+O8#dzP0=K(L z_OV6xM<_3K-3 z;`Y97GPpbPh~%7%@vfSv5q8GNQVy_(Q$SW*cI(_KE?-p{AJ3sYsgFTa9e z#91md2~|-wRiN2Gn0jRXkIpgxI)mb|{mh`mzJV5I5foOesWba~=K6J^q+J;#v+7_= z!9ZaC=RfN=C3as`rRze`8L=En$={@&)m*yWhg51F>S6%7FarUyH6uRIvU1!@0if(` zf#EznZGcTw6xjG%Kza0EbOjQrU0y!mtOK`pM)g*NfPjzXI|h8@MiQ zs5DQZB^rR?o7DahdY-T)lRVL#cyGjHAe0Q0oLztq+X65d2LnlrL0zmL;;lM3$g~vZ zn4`g>{@Le!)VnaOZmQ@N-#9_kL%DAkX3>Lj=R?5UrNW52%E7`sz|4WoT#N{4jf{Tu zkH_lnL$rwO7NTbqSa%3+DPh*3QvvH21R<@KI!G0B}nmJ(qM)Ib2G8RqbjTfVRY@lx8Lp+sW7 z_V>y9bDgIJSplHUkGlI;fa7$?pO{ptpijyb@b+2@iOK$HdO~uXjZDGWx7I*c<^y*3=J*b_6k^hnEb|gG2^3 zSS!0z&Sy$p<~#}x%ju!*3~Tu=;(kNNA?vWtQ068a8*S#qvoAW%?;D5gO?VWO7@ zB_^xUM=LkHp0YQB`r4T)PYCVjKmUPT29~0iYgxoY6fmaui2WWA z1g8`!w&H4j#SQ1D?mJ$%fi%9>?_<^7)DP*7uXby#$ksEEdT>~&?}BFXN*xI*B}MLh zq}*U(J6*b(vDC@2i)=o!`!oB9sLMgpnS7{c_K;#=5sr-git5gB8VNB>I(-sCWP~0t zrV;NHA^etPBT1FdZ0hrb$%6TQ+b!h*i#B1cx^zqBP2bWu#~CdgINwIUJ$7ZTrhuOv zK1qPkP4RM!e&2Rs0ex%=)CqjlJ8d_|jrm1-RkdldhyjQlS7G#7b!z>x| zQd^H%)DXAgH`V;1^nEv5KDa=l zJ2~Fa6g>}6pXHb246-WVl#)+@2M%Ev^=8n(Jh0;6>sj=4z%yn)8ye;m4P+Jv=6?QD z);k(vFJLs|UuLNGhJeCio1%(~Eo#b~2QrUtT?T45j9sGvARFwWK1rUa0VxcKWq}1W zGiWUWABna$!-Rn;7w5Dv5Jb+8SVy36R&2l*@sk~~lc!{~^@CY&Q>Q!s)IT3fq80R< zpQTTQg+69WQHvO*?c`l+5yQ5kPkw4E&tYA$-U`_$TYHxP(6U`>??MRK5RJGS_H8@BYFJz^D1Dq z?R?~+>b~3#Jgv|62Qj5X(J*ff`W3!t*`^q{ss1D`p!ts;%bpx#jK>A=b~);++8O&_ zsTKbFFSqyq?;f)M{Q?6mZ?w19|9I0}H9p{}DQxC=jRdb38FX&Qc|Z%hD@IcobI-(r z)gmcx>#{^q_)XqhjNvb$JC0(d>?w?{7=bejUUkG#hU|O}kr8cQ%V~Xib@wRbB5*J0 z@HAIWHNg32YMU=lkpJ2l{Vmi<;AF!$rO(_i3>nn5jyacT!etmbHIHvCafEv_bZQ-Q zFHwi{FobFzgO=FB?HEF}j=7fT!|yPhYkq$DOR@XTRaFN6La6JdYS?BWTq`?4fx`1Y zsiI-+><9%8&zqifb2sO#8yyuZ@&6;gIb59$q$D1|2uzIRC zo)&v1HdD+cxYFMZ;hjUcsupj*YL=M0I_KruRxEY*3x%g)W5?K`&KK%!4cF76+w7iq z8lwuf6aD|2d+ADD$b~(8&=^(7^~zs#PIS(}73CUL%=OY=a87B?$W^!aHs>~bBWgTB zOO<>3N+Z)aLi1sC^HtBwo{Zb*Mw2lFEL$_=%AA{PqP8m6wvlV1jw*#GcrMHJ{@94- z!&rZXIc3+iA}Otq8*`}f^y1v8=F6T!bAMbDwc7sM)@($&vK8834+9(TkLA7a2W~zb z&kH|9H7D(oZNU`#NngHwSwDh+d;0Qx*?b5XvS^B4;>pb3Obm#om^#f zLn!B*Tx+#MSmr9nZNm<&8mC<|ifvvVsx;0NcU;kcf0XYO>|OQ(%;o5Gbx z`}F2q)VTVqL!-vT!e^Y@Os@Ibr_6Jua`)uRrB8f6NeF>EU|$`n2WZA-Y? z!A@_@&5U=xJcKuT7e3?IR(1UfJLR4`8V`MOsMW|*c*edh;`+AujCtF|^^Wc-<($lT zXvCpMS*6je-F5Od#IiB(J@WV9&rBCPl$H-sV-vU1Hyt+gA{F?vObJo8CpT$1+ z{lflt#q8U)i7OkgZfM+n@rv{!BaJfGq3z1-s{-Drz4iz1KMudTEW`N=kJd0Wk6g)C z(}Sj&%88AseQO$keDV-;O`|+(KYM+*v!E$lszGU5$Jmo{du8ZMXxg@O{r%|G+Tw5j zeRw&aKBH$GJHqInoLW>FZX?8!Y^puCF6Y$f%)xbXZPEfW7pLYR-?)a9 zYu-EjwhUY>)rWXOKlUeNNo!tYXMyBjJ|%+`9op}oZsFBQno z^>R>J+Uo%k5%=;^=PjME!5E~OWR-YL8Q$8uvur8_@tZ9I_a6eA5&pgRv0h+Kv>tyv zQ;J~nNDKpR;G4EJ<0S+ZcO^Kuhl;4rAIF3eOdgKG;I8^Oz83UR0vj>%A(-{}GnlUF zg)4($jBur7+!c&tz2NWnS6q9x-U^V(!64~Y4wW0P(Z(;|zj!ZxaOk+-tVA3`$f9t&rtpFRa% zVz4!9EtlCiOQWBdV#zboghZOLWQ6Uqnlk&|*L^W4`iajEO4PV%v^{q-tJNZNr_3DqVf22bYZqs?E zkBC|E3ueMIykTtLgCXy*+*~R58;*??U66C5M44f$&jqG5($j%FzOD5qLbj&Oji#me zd;z`@p4RtI-c*^gN(UIFTlWNczOg*ooH*!Wvu>yu(rE@~KD{U+9tCCp`4?gxZnX-N(Z?|OX-K)%G|F5B@3}) z#X~KS@|+6JQb|*U5v!QV5O~O8Xlckuf6>iWGVcK~)u?2OXWGwnvM$hA8R`2v4TDeU z>ldR*Gf9OFrTSofGJRTnQjO>0b(n@SA>~tx?xEIuWIWHz-|$1Z!ZQQ+=QupV^`YL3 zqv&Dj(FCVFoV{TiPjwf{A>;`8VY{mF5}k22vWrc85{uiIo6(!l@UtuC`tLsuuj z$>4uC%}qnsvuo9N7<}7|&6B@S&Y3}0C*CnygN6-Vd2ryjE+u_pFcawWocagKFw56d zHiQm+*ILH8%J4(+!Vdi(O?n*)|KpV4{}%e|k1IJ9`oz}X+c;6sQ8rMvR@N`yD=B7} zR*nCWt8=D0-8N|pYce<5Mw*7OO#k1$E+}m-8j3T$^95Q8(C~=$bNIW`l$!V!p#AJ^ zsmW=nN%_})cDB@%wES#o`B~pmbNGL?!0zcPLl&ED%>R3f{~yauO7%CjB5ydbGBaOs zxFmT)lST4+x1*B!)%%np*ANUj!b)0?)4y?k2zo`PWVDL*y{n9+AFTW9M<8zG+JmBt}OX)^a}iPz1bW@ezh3rtvQyxPD>M< z(%#LhMwRHxwp3nVE;u9n4R$S`RZSpJ;p#)7CZzFOJ>=+zJ&iz(AvvU*}qa({$@730?JYBH)2apH1{8YqX?@Os=@vgwP3GsMI4egACmC(zwSa-u14mbBR`FBHy9Aw=P5n zC^m=3*$STn)0cPt2wHZVl7{-)g=m@O<7KK{Ax^tF-cBs}BS)|K;DPXt>Swe(NqZMAcYi_0m*U5bqNQ|Dqbw2(yEKp%6E*;(<& zhb_eIEXt|4*|1=_5Mv%^8I|bS>=Vf8*auQr8Bc=@SGQW`iQMVVCpgZ z)w?RG%F%QvIszX_%T07oHWm^!CcF9|7F;Z!qm0#&MakoobCyXA^0CS|9XYa}k}q?- zLjs|W)Z@nKdn4W@q2w9~BcH6CCkz&Oz~{@)0v0iPWqD7}nE~!B9G$4NBnWpx5SV?h zbA8CspUf0f!3AA`yC8~-Gj%7%NgMKELCh?2iG(Z?$h2FMU`fg{9g-}ZC0WWxS?c>{ zHI!qOqY@lON>Eu&rXy;RtKh_v>(0tZ$lCZ*XG*UAs@IgHKaEL2oJcU+82_rK6{)I- zoE7JK%@>^eUM(*jk}(!TVpuT>F_^I0nzI!3fYI83mTDs^yjBV1X=>EFT zc!^)cI>bu&T}}|L^1j8{=MOv0_nm1TkdU6vP1H33@ma9v@%y3wpL@>7gg@fVnfTZb#x z0^hsbpUl;zZQ222RO7jRZ`Y+O7(5H&i`q80jcBK16^wJ&x#t8cNVdye&hAc5SRJVMe=l zY_rIGgLd3wpRoQU`2Idshg5LNB0~pkCw9@F;x}D~NN~m?TZb-B^kOMJjAu-@h<*b# zuDc(zj+&D`30G(FEFIO(pI9pzU8fme+b>zCInm!2dM0%e+sW_@{x!CiQc}zndC`KD8rdSS8QnESM9EeouaSn^y54GrR(%3_xDA%Z=S)P$N7)l zDe7`7UFVq-n#-crX_ugXF)Z@vjpG1?;iElX_||ugcS=(9klu};n>SrnF2zy=C5aK$ z1{hsdHJX(q=wB^wsSPkt@9_$b42D@_MCrR>O=H@jXU(rm+aJqk=`+2ifn2$C>3a9A zD5$`e4g)IUR?h7^XHhx~`m z(zesMQn}Ko!~5mvP5q7i&HS&?yZ9q@QWsJedce*v>!a#R>oe-#)*A*g?(sU=egCk; zKnfkwNz2>WY(xjFyZ@<3l}Zs8m!> zaTYn1oW_*Ol$MejF~sl0ua=%=iZDYMA&`VrUC~mpL{o%0cZr&nvX*64PPwGfh;h2O zi^Q7fn)t`me$hl^y2P58L~7d5pCNK8O)589yg1oc-fVr*4zKu(z)VbU~!Jju6 zN_BI&eS_yWu9b@9=J^EAZGcK$a+_6dukE)_(ja71Jbi-)H=Ih%b8Mdk4{sbi*4&Sr z%tm-CcWmz`Otv9*lsk6zUrt6Lc$A-Q?Z-^|mm21((-<;5`?(KO5;~dKkSMLq?ZljX z+GzWQ9zR0dQ94`SPnjG=bSa(v*w3EaK|EJF+t|;Tq$};pJ;%&#G~Vx%`6>Rm9+78) zH+g3gY4ZMBE+n@;r#|=9Qh8C%ubW1q)=5>Av4KrVYW@2!-Uw{!)$c;bBc<(u^bJ5oZER52P9s!+kvp zwFEh9A`+4xr&uZRG7_wmWEn>)BM{kzzV1wZvsY~bO8NZ-{3>61xyc#lM9)*exvia) ziYo<`y9~Mf@{K8yhaV&@ODhO=&JU(!nVhBN-3a&J z;|rC)W|i}KBwTHh*NqI$D%LC}VaINmu&59vuOqYi3WZjW$No6$ty}qOu6y}9y2dGA zl<35))*%&-}SG8sz95`RQe}!pGU*u6_XnA!10XNRbmikQb!;5&? zu`gG{+*h7cY})@4pNwJR1Rhl6OR4fAY}Mya$kLdK(xc%T^QNsN+P09Jkl(e}MGGN0r?3@gn{KsMf(TFVZKkEiY@~L1>;)=MXlDW6ilu@5}j$-j^q`et&8d zTXUR8C$^}Bj;D~+Mm3LzYvMzWYFhPNY~AaJorGtsz2-6{NaM~~hmfAn^IXoCkACR| z4(+(`Wn?gH&%vqO&wVDL3&!<>-e+4arkt&=EP0TAD58IE>TDk4v?E~oBHI=tNf1dsXZ*+zj{OwHK?HA1N0=0Q}1YkNi z0u6q8DG5KkP`u4O^QNDCRNa~mJk*QZ+x;_n?ORaSDlm1nUY~AwAis@4So^o)hhF!| z`|edQuRW11$IrmS3fy{llbVUIgHD@Uvq~QWcb;?qVk=d&7ah1G9DK=kKihLhG5de9 z_tw#M#7f>^VrFKB*p8W*Vvd=aVrFKJnVEgf%*@Qp%p5Z_dvD&GcXqy+o!ObQ|L!?D zQdwOpN!7ajYqiQ%CE8W~g6fCa%FP@1P-1J_WUA{HBNjkUTmJ8M$Kzv$Raxt+{>9P{it0v zRMZW9;=LxiuW}4|(ZnhKNC+z3%DX;UdMhKhxI-gbKzwd-U~L6RELfPOx)*$&E921S zQ7>S=v{18fr6!eO&Pd$zwCZY0T$U8Q7Ie*9^+e0*^DoG&JQ&zU?7 z&La=m5@GG3uWCu3CQF!cw}zK`IQb=kFIB+?xnxo}v3*wma@KgiavYbTlhtv_r86F_ zYLLm`b|5apJ0ROrG{Js=eH-q!_-558nC6Dyci?vPOXqip8}dtv6~lBpSl{>tDffnF>1zRi zlgK96jk(*0^Xm{Ep$zBO{yTi*{52)|KI)IcwB9!1MqNZVu7fJJj9b;KJ_po%jViY1 z3e_h>*-cBHFW41x#>0rS5X-NRmy&a{o<^;Js#8L&jg}YH9oDW`#SIk}u`7B!e!CzQ zfOtD7?30)>TMo7ke=m63VeVsQ%lBno)QZP(;hxRg>NcSZ`Ku1+Z@& zGe(kqO1^i%v1@lNf$qCRZ3Z-H3B1<53D>^fmh6Pk`bIKTKokaE1d_UmOMgBhhYwk! zduoHuIY>Tn^Hn*nfr(ugOYfM#@W(`kj&9tVAP?OT`djbFvidaRn~o$g*)&7?CvA;n z5prfsbT0qt@l8)cT{%v_=*uGZ{;>Rr_%l`U78|@MvMzl&yxBq)XyJU_XQPppMYrqN znsp$}Lfa>M`)s1oE&I67@^T!Nj6!kOf-hbmqfINs1r|g z1Su;Svj+(vgL>}CK-qukk$~>?wk-gYe%sc99!5-(h&hD3UC&?*%#(!KfVZ%pm<(uZ zc%kpbZtKDxj#t+L9pFkGU|K74zEO`UYEv5>Ptg1jCU-%Qy-3SgdBsRpETx_tpwhlJ z^I#jnHugC{_)vZaOwD{=8LybNrf3&p(zn8YF^2s_;(;f){A9T$&Ii~K^}Y3&>^uG- z8V+!d0pB3Th4_IaDu{q40A>7xZrq=NCXn2gF1Z!u<`)@DAa@Qn5BS6msj1#vP6517 z=Q(VJ6MsFsmBt0blHsZDm97u1rrUWf52rvo{N)q35r&P%3I-^9AB{KZHssrxS8~Tl zmG8<&n9J_%0-krQYw+!(yG-{7dH58{d$!O!C+D}rK9^^g#fIfcA4sSxZUy}DjZ;Vh zF0-9SZN1_om}UGo%%n)UBp?5gaYRTJzz_HYE7SZsE6%M9(I+sC=C&@b0hpjj=QLy*EpyWir}+Gj932gvcjH=GY&5S=s}Ot*o2D|SM<`? zaC~3vgw}4EtyywB+vQ)nsjJHoqE?z3?m9H$S6yWL zc0{AFku!s=3(5Ek_T+w%3n117MB^3-+IagOThRUXKCtHqP&M|s<$z;YdsaQMz%umq z>p5WO7(eqPp2i3|L1SZfljfz5TcITF8#4u*HW(OaW@aO93;D<#K9wC!PuSAPDn5&* zEIsv)TNvt78R|ckbzsigcKRb5f4K~L2}IjD;49Vr8FPjEZZ)-Pnm@z-VZo~Aqh`{) zlHjY;a0)GR1dB^(NX_2+hLhscHnVZQ7&5-(`Elj2g7=627V?IoFxbwm$H`7`cf%i^MJ&`^-OxscsHQD2 zW+d{o8io-me?bS@=tZuSaoHS7Po~sS*=)3_dwH%}0skCp?(sYFn<0bOn#H$~PheyX zNT^TN;0PAnFJF{c|GSenw111V@sE?Ye;3!~JjucqO&Me8vx#}cF^ITd1?lTXK$6i7 z2{Bc!fM7~YGBkuS6m+g3lUY1;d@9E=m|RS7aq*Md#(hUy2%;9IBpjET{&T6eByE#< z!&~;Ew;FAQ!Wg3Yvp|8w`xbXHkH`P;sR#nSd747Dh!AwdWYemGRbPj%)dA7Xt`{u=6^V$<{@V z?8EEM>b7}~?BY$52i6a}7++fj>SStS>-~) zF4f{giR2b%Up%entc)++QD$8IVx>kzpLQlqegLaRLlCJnNE+HX8UqL;dohoyRDn&I z+23w8wKt9bk#mBR04s)jKpRG;f1&T2{zDv+eMm&?mxKk)4hTEdTaO`9m z!sRx(f&emNmv3?QYa}I|Y9uIG#pbn}Jv01|fpMfI#dApUj~suNI}gNt-_*jg2@m%` zeCH@!OTB6OMQV$C!ZqKQBc1uJ`_8>t{2t->>3ltTB70du*H8oGgXfeh* zSf6J6jE^~5(JH*_^;&-PT+0qdlNI0Y>1%C^MLH4(ysMFE2Rl-C^Ztpp7*S<;&Db47 zazXL3?(!^nB`x9VB*^^XAdWP^qi}dVo>HCPilo+^>I-X@kYJGYW`J(tsFhF3s$8QY zb`e+eoA;^u=#;Z9u?~?stLhXVfh=<4Y?_cEPH7`i;`V|UHc3~<*?|zk(ul5!!J8Ga=Pzc7%fJe zW+{JW3u3EK2vV=!NMfl`TT=U(B`BY(=9+osh6K&kq?}4i^kZIF83C|%!XrJZjCIgc zTGqaR;cJaf{}h&6@s?LKZSaclJ>T)SN2d0|y`g$Uvn^Yl(I?P6f!pI%Y?g2~fW_sB z7VD7i^yco9{;EQTu8>8}{j`dP_vR2EUNw2yQ-E^zE`dc@*rR$N)uvfS;V#{nf*ruA zH4L|cSqZPUuyol*TCrx0OL0KPqBZP%QJsoSomojse?rMFhgEWb>eLb)bdi$^L)}Nt zu2nf-KS#Md*KEQ#VRxO8?oy@*ccjXA zgNvgz-%!@v=6;Hp{QPnIx(PF_peAucQ^P~!;^yM&nudp_g^k6zRYga||FF_6X^_DT z&Kc*BfLrYH=FJe{m84yu^DWuXEz9$nf8_@I_y#@p3g2(OLMKIy9A<{7Fi##WM;z?u z3srqcV6zg9K$)o^z?4Fo+b;D;wC!jJhEZ6-TKK1B4^<5)z9_E>Zp9Hx?-A3Ye&kH} zSR}_H4qYV!+n$60#UggwjMW=9@*eRWRy2wEnOM~|4c?xJmeCum*WlU_`=-$N6Cp%| zZ1v_7irt4Pmq;Zr^VRYhCtqsdVFrj-(tcZV`nIGJXMsD?kg>liV6QE^;T)&M z9;3w$%wx6MepUjxG=ElVidSxYTeAALbpCA#|Jzc=x23F~mB6cQ;+0)7T93dKr}Y@C z^%$>(7^8(4r-c}+sx9%8j*W3)brS~9=2 ze2CkhQ?vm8A3c-21>sH9{@Ot$-ohCI=SuwCcv8aPVs3BC!tLbriio$%)9GES^m_>l zrJ`<}==L&XX0#t@FwnB}fT{j8+?ZihCCJ4`<+ktcO_*TFlzhz{i|h@KmN$fHk7#sA z?&A1jx4R8fSxPA)N05xF-NWa_5zfe|nbWzt?aB=w6k8Z|h8QOPlKV ztumZEPVm65AC&xS_vB$}bnJSg-eyY1r`I+w$`oalqm|zAoqL80w{>NoifUqFeETmu{SxnUqi)vz=Ym^hK6=ZH=$y!Uo38v^y)Hn=JjLgOL^`L z2n*$g5?1j&hex}JsMh5 z=7C#5CMkKQa?5!Z#{g=Ous|h;ma>1KN%U(;X@15;;7hZ82Sht(>2G=4vP`mz;U5x@ z@}CAhx`tjoHI_>h4D|D*nzONtvHiu+sI)z2FhZqv;K2t4mt(c{EffobetGTiK@XzY zEk_)^V=L>A;0t*+y0#&InyWAtY31c-ound&E%fnF$?pfj&TkmY<|r8I-)%B;>f{r# zT5%3@&`2d++)_<$wcPA^&%%#>G7L)2s;<|B^c|S-Tj`*&(T-s}>X0-3=&*>1GOliH z%r>5%m3NObg`c0XL_%);N}BNulC&wZj|%dq$TUJ8%&nj`CvWmoXoJwuHSsy{2p204 zt#SGdesxcq<$d@~_>NgWui@&2HjRWkxMsq|boL}gr|<6ZwVwB=kB?f%e-~2CLvA?i zA|~@^u`AN`)wHu7jG5K|UgZvPWnO?;dpCT0zqBKSHL~mpKK{95Y}H(URm)m&n};om z3qbQHK1N4mX5UL=js;;3zMN^mLS_NFO`wFMskqk*8@{t+a6Umg^q%^i!u)iwgCT4^ zIC9)#Ff1A>RF%))mCSmZ|HT^^eJvmCzgx+u|F)9bdiITj4Qg!`r^fCe3Bvtaj z8x#@nubKh_6d;|83QH)-K3X)9TYQhk#!L>R3xA?l4%59JfZ+gN&pM_uFhB46=e}U< zoHJ z?(3{{$|88mM-dOuWB?y9}XGdDV^U*x35FI6|<42 z#}i^RdXqv!h{B8*d<{jAxMCm(5wITs2g6BcA02!ZfCY476i>{9-an^DGl;MM2RoG) zx4)^ELskDu3v0~q%^8c@@J$A*%+QlB! z^2K#fr!uFMbTgji+%k;`w%t&LV>}@BOnq9fXm2F*wr*oX#r>Hov+`#OcSS%@&>VNk zWWc!a79BwyD@*56&7!vPYQ$<~!w;9$R#AT&4_w+c%C&r>NC(>0fLhC(jgxlHLMc%Pn3QFr20IiFw>8nD7B8ReC7;+Ak_=yND~! zRr@DZNLHwC*6HvrCf5U0HR5(Ik$1_S>Qh6M^m6eY3US8nx(e}Wjer>8@@NA&zRmVG zisb7ijlO_djIup;SH&UCW(Dwm>ON&Gp1A~0mCWcq8H^3$Q?(L0bon1_7wc9cRz(*p zTkpIiy0iOu*luv+oqW_I2%Gy0J-nL=PTGOyxK`YTMR*w8Kl;HJEx~^TV+oQ|7OYtOR^9`Qgt9HNo*!H`xd2<11L$0lLWTXfS2@j{PiU zF_E?uxFiF?VK#vz@{UUZIq$Ht4}_RJArpZRQSkQ6+p)xeu8g~~jyu^`7?;*qp(7%{ zPH(9g{|~&Xsyoxu=s2Ar7N`7V1z#&)gR#-?V`rRu00dI3nAi25ymujbhqYyT zoQI+)Wx}!G9=G*O{YL&sGuI&OTN*u{qFzQ!^tqsEFkK|V*#K`f+&F*p?uEG>A(rgh~Xm^gT| zQ@sXf?$$DXO3~7k&y%h=f`qq$_^a(!0*3ILO^#Fy@QCjaH!RWer(&VTzEZ@%9q-~L zArMWWiclTFROcv_UTl^NN;R0^nincfp+ZKIxYhJCH4~O;2a0YEd&~FUR<)SIHMOvp z?5%;+d<|aUzYF<^mbR=RMdSr~gJ?{(xj)bmXxu(=IkI)*aphtO+hqdz0eaC#n5yfu zd;fubxlONu<+CujgKtImQFz|HwNRj%SNp83Reh#y3EMt)DR!uNMXqx@@$|E^FeAEZ z8ajPU*7sHdFvI_oKP);?l0NV5M&a0p>R2-@-z!hjVpjTLZ|_g5s+_w~i6i(ig$!n> ziikTk-|@G)I0TN_vhN+OO77*j09m_xGuF2At-tSmP~N{jo1njZ;llXu-uvkP>m7`+ znS+Cwm6@fUy_ut%h_$1=+dsE7PIA(cAo6IuvJ_%P;uO?fvrM)*sD>@&rg`9g3xZ~! z^TE8>Sxn3jm8Dtpf_@n9zrB?lv!!xts65$O|nc(&;tx>JHIjGzkoz;FKNXvU$dnm|V=*)E2(}~TsU>TE_`Rf@>0&|nqDB>utkKcUGtHGW3 zJ{)t76&Za6F~rOs=M76nRW?-(50lUDTSHS{+9Pc8I09U7ZILL+&B!tPhJ}&KLn%_U zU<{bTAWUg8VGOune8NMdQ^px+Lnv>rK$b6SmV2OiCLji9)I%(w2 zIg~8S^7@&>nD-rOoa?~p0ZS3d5wjJ4=B@x$#h{?=pbP0HU`JORW9umyJ0WilEwQ4h4JwkyZVf91% za=3#j-NE1qRQWAR|3Sm*E(58mbN0v>z2RdALunSzIY~~GwuZ(V!UbuvQ`tt~LwZ7D z2j}Y@0ulElvBk`SiTIV!gkxYIH^LFG2vU^FJ)wnS0=9XetSv8Mig8B zNOcIj7{vKRLNtW#mn^cd$3?Y990BTIA~!_>4{|kpa|Z=w{nPl9MooQFl4ksBZn0_6 z0OO`R%eJE7{Rk$bS0x`zZ|2z%zhA%vD*WWI^%hh`g%=1&P@1_0&V-&aH--q(`ediG zuhT>gv;B^kaOi23KVOvna7b2tk!HSv9SwV`@pUuccN0JL<(v6cg)x5DYX=|w9j!Ur z_er|*IFn9BBY0YoT@rD?^(!rT#-4C!smIKiV=P4cN5x~QPx-JKAs}JbYHYbv$xG?h z=G6$@mRkheD4t%P`p*rKUW*hC1g>AzJ?A7+CzwIf_s1T3c~Rv=qKo3}cVExP;C$Xg zAEni-yR*l5-cM4N#j{tc<(3&eEv|?vE-;>8nFd+W<%XG@preFC+uwK~EU;5Mw&3)Z zcO0^JcNoN6Y+U*IeZZ34T?$EB!t{)B*Uhma!K_IY59}Rrev(eZnOBWgP^r)n#wsN# z^ijwxFY*Zcrtx&!M}-a18qOo|h|`U)Rx!$c;^#|KDV@RNoUMA}dFFwbtGpFmo`)j0 za`hu$r-`^rcn`!n8~WL)*5F5$`PNqq_={M*)S-shxJU#kcZ9)(gf~$=?DkW3xi~`V_`9fq1-> z4tG*&&4F(2)tFbHUPo^cAOv&xZ}EJ#K}MZI+Ea|Q!ok`7+A$uTxBqy~=Fqwf5rTjD zQVs;h|N1%muTbHpfcrG|wp?Mcb2q!C z11Or^;s6=z^$v4A6Li+j4!3CRvH;NzUd5-q1{0|~ofvZusk{fR`#9K7V1;zvYC&g* zvD)D6VY6E%plf;hZ3V(7uxsTPVSD$Q&i(<}W@FZ4=XhC_?QN;qd=b%Gswh4bS|%cz z^AjxI4bG$Y#@)KSPdxU=B<|+6Hl*vZwDo3fk57+)jynM0((JYb7i`G@vbmS|*z)`v zfY|aJ0*H3I{A$g|sC`6;@A_KQeoVNIcW}VG=^Ox| zvc09vT6J0Xvw>@A8dODp?F*IV$^{LP39$x=?BQp`^V(yK?&O#|5H?zshQBsB11vE7fJ2 zOQ2Uy*tR1H7O`tZ!YVSACWI9Zrdjky)xdM48>uTBDGKeOGutVH4k%bwqViOA zCtKZM+7a}Y*XRjLdr8lMa}c`_cG4K&VkjYpP?wZcrM1>udn+554ZLYshGxtt$+wtF z4Cv=X(U4BBMWSk}%yc#8cD8=P;Wc);yP8U7NlsjWP8Jn**4C6(mUco{ofB{;>5jK% zODwY4k=Ph4D1VK%)2H47khL!aG}!ghB+r)@qZIPQ(l=I{mt64aEz+U}Brr{(ftI(RIHAd7)V1d!cg!AnsE6>xXPRl9lDpnk%MAI>+C8@BmNtc+GSC&*2 zCwgd~xKM>0p$X9MIqEaIb@O)%8uRzhwe&n=f&!KfpB4s$;v#6>ZZ1y3NlE z2&XMu5Ks7YISg7O<95o4@J`{lN%JfVEc3T^+~-)?VDHcksG@MrPoRDw$ddiSYcc8CxWe5fhOG8AjisHfBa+?3_d9Pjz6%TU;o~CCX-^wgEOATQnad{S%}Q93 z(yWqc=2X^PVFYL%wy@~hmQ5ehcxvVhj8^OCfFJP2c}i5L#%q|Lyr2I zNoZ62Ny(W(&Z1l=E|a`SmncENHjjrfw_LPcbVWo>_!!(^_?oY{GN=l?!a{&@UY9wV zhqOZCW7@m2zgmN|xMxoXrOYg5trrd_^)6G0a+&xC$v;0LInD2J1)Z1w#O~;tnUA#|LLOYL~5YlLd z?iMHeqG8+WL;0Wl@Xw(betfM3etYc(wGq(e`1<~n4UPz$AqK90XiuIYPAUL)7zYd7 zg{b#ydAt<3eh~Lp6o8B$Cgv-_%U;sbJoLF!909jp1`E%{mfC& zZajRI9vJwB@coGT0AFr-^|_N}3JecbOqd%3MGr3`=(?8&#n)L7hao)}`y=G@YbR9i zmhW4FAjWavEhi$v6kh0yK0~VH9=tpkp~9oq5n!d5_h?p zM3}cE<8%F1X9G<&rkmZ(qsJBCfHxV(w(a`F#%6*+^ff9P^eX__%0X59yYw)m=j zEPuAakQ4;Y!Qx0ppdWY6!Td-@QsYuQgT+BLqXwR*K9JIb`6W=0(2{TzF%YqlV~c!~Y7T;W)dwNNkq+n8^>FPbjy`oY*I z@q*+m7)XoP*&@!b+#sNAAWaQUVD$a>oO zP+SULwfZhJrh*erLK|NgdBN;7Wij^P3XnqFPGpj?!!IM+&1a848bVkkEqY6RND1VZIG{Z6cTR^1UE~l?9aD9BvQ!}@#&Bi z3D5daPho$V#hx5Lv02=GHN1cfp-k>Ddwov6FGYf+^TA^o+s$6kr67E|twf_p96>>z z%LZy1@t03Td1M!2NBHx?G9T65qBc=DsA|{tYg4tiO#npSBQchcFkEzqi6FyZ2o?dx zsUSlku*78+WH(bem$j{a>ZqWmso5#ARPSnunJ3_yEQy*mfh-lN0H zfSD<6=)5c-1ECAqC(#84qywf4^LZS>fVKg9^Zx_ef0ARTdK2BRy`H+y?T+Rmy|Ml8 zME)nheDx-_P{{YK}T!KeYdwekwcw2Gqbt6;$iWnz)=m zMG%vSPHICNM1;fwN0dUMzYkOjP}6@eiNMYcXhW0Qv5NyoHCPy`j_MaW)31f+R}(xm z+)VupyDqJKSZQTbXQdgvTUmkoU#au|m^-kY*sk^XrtxhuZ&(}S|7P )L)b$L|-b zZ9V^Xqy7)c|M5;nx5fFWU8Dhy6+)X>nKmnLm&Akl5x?EEl=Eoz&x5~0Sv~4=QCdj$ zzLXJpHi^CjTs5Ay8L2WDP0c=8ub_{sumTMt$0~p2mbQ#_X*z>DC{1X8Y}8VxWSU>x zTqbGvjKl41-v6ygDA8LdjA>hA*#PWcFs5DX{keF!x~K*6XoK-+K3TWiC*!t$lcd4b z>AeB_=^q*c9$(FZT_^xO9z-dJpqK+=0lR~8mk||__LWu+W=)Tr2cd2ofBzVhAvmE| z%N}){)MJ~;9*wRj&=x!c{*NQC()6JoA1_4WI6Uk!PqF@K_ZXWHd>bYP&RO*^1D-iN zY|f-D<~;6$kpB*ank9{2Sp;*=OfyCjR#pJ{o}QX{Z8vKni<s z)4{jqO`fkRsj9u^w+PKWo?we=v%e||z`QNg!PzyOd%409t!rDijZeQV|LTCVu9*yu zTm-c(oA}bS64ZU(sIhFg2II1f(3`=Cv5dzI@_|){brELf+h%~!!|RM{*-+H?{$Od^ zSQQL;HYF7Pj%69{)aPGwo6YJ>T0d~%AFMCi-51q)6_SV7$j#Q17uD5^8r8HTKJ;46 zYzj9*QC0d?kF*rPa!;=0-blWBJmz$-(@gL~=MaXq))@^t!3_pRbe@Gav? z-|wrA1Qy6Es`<{JfQl=LUIACu8IUTnsNYMdn0;HQn%j~6LbgOB2sY&K{w{Ha-EHD? zJ94wPS30wduKX+Tgx{w85#!l=#YSGY_x4e*#3gi}SPJkyVr!uhiq3+k6uiN7OVK-} zoVry@sCQ21@vo#$ke}FS;X5U%eJ|%!dS>b~u7uJMKN2tee2QfK4UR2#TwhRancl%U z5#LG`b zcWv=W?bNMz$J4}i>)7G~%DuiZo`Q*wUP#8(jfqPf8S$%cT<44jtAOAzsGL6E2T>t1ng}DQy;0_@87{8w-gcOduaw zC^fjiu7|T8loLa!u%yXyVMG-}i~VMRqJFU?hMc4fOY}3Og#5cqn2g*HrN;dQyIgpF zyRsBT-oLF>LKyy{lnR-(xvccRsbmCkNtEa}U-%qz5BmQ9#{3_}Ms#ZzpvY&mnS}f= zdi;lweyDY2D&&P0kN>~Z|M~{~e^sw?^V(iw2+kHQ^?#k8T<9f+=&korBKI{1E0P4+ zAr=WoTy4}!g?{&J!YdX2hDsDE6+puTO&&AC5Sh?z$pk(+qEt*W_lvj%j4k%qmOk|> zZ9ZCKeCicPD)?m#)05vgae*OSODGTzhK=^Wx1?jlQ6E_<$SlRfyzr#TM#oAVkOKih zVPFxNeoe}Jxx+B0M+O5sC|;vaP{E`*9Eb?<5PhkrpE;d-3@rt|>IE z>cc=ts9jGGQAt^hPb{R}*HDCQnM@b1WS1o>8>b!tVuHQ@GnBf4IH?6K*?v=!^1kSp zOzq?@DZ9G%J`faYd)%>G2Dq{;>j{XOR^j&9){yMmDv~T~t;m3=5XWsmW9Tprbu*MQ zuwR*iVec|dY#+TL+_*Y70)&NGOI=xlYJORbv!UTeHFh@!X9Baxji0OfkFh{phzZ8` z8X=9cuidB#9Tft`c1p>o2d^_&p%4Sl-Cgw<D(g( zMbSWHAo18Miap$|B7NPpA*1i#64#Mph)G5XiUDrK_kqx`pBUi3zaR|jMr#M=#%ss; z?4b|qMtY6J8DmHR#0JvKZ_>n<`Okvj#nSw4s851~SAU?$DGB?IeM);#K(Iyy1WwSUi z8l}?C$GC;yXgizRYyX_6bT@aYD6FZ(E$MTL7&K}rVYVQMoLc>fTKh25UyjN`*+wrt zNmoR<0x&gy@J>teN-I2cHFci+nHJ9+bZ~H%?Zzo)>TQifJ4~As*ivR%wVqM`eM!vY zu%(#W2IoA!DarlDd9LoJGaK|KYR=b<;+XSU$KCqF*j?u%+)?hs&fQyp7+;IQAAz#j z7lGC_9fA5O0f9auj}(SJn-sR}_!EO{SNq9&=aX_$)z^-U1ba`FI~KO*bI7Nj5fb%t z!Ir)d)f*j3O70F3q>JiWFxsKsf#tJAba-!|MvyTOGdxfqjy_3$hF`A6GTl<@&{sE> zV{{LXI+9rh$E2KPQq5}*P4Zvq6t_qT=U)j+?8SGKGsbTKI!2PBC(w~es*uclPfwhdz(4{dx5v)DnUR z-@&Th6mcc~L;J#9L*E-`phYlh{~>ypv+K42ffzU3)$6n_ulMm)^laTt6A>?U*G`op zAO&*PwM2?7G=(b=(Z-_h!%}^X%JLQN)}*?FW?voKBuW}IF1yKd@WbT5H=N3Du_B|& zz^dDTcxvKA&Yyf93&DIoAgxzGI!(@jqgWl)92&C0RIq}nSO}rPT+nJKn@!qOio-O5 ze4YToJQ{AmT?PXb^34f{^uX6KR*BptB*h&# z9uKHj2Scl}nscZFkLDnCZV1D)eN%((Y{XIwP*E?r1}uhS|I&fq>V!(P5Hv5@=8V4< zAeF;)L%PBG&H~L?-LS1Hj;L*`73SNY_u+o>#fRW0c6dRp zLvTn5;=@j;UY2ywL~kqf(D(pa8^_Zphdvyd7VVr+9f>-P_hr@G#|KZ$RZVu86anf# zcICJa$k<8x__k7Ep)3J|yXs-+N+-FX7A^czKZ1sES*`~|T=U2`(U5Is`vIi86KSrJ z!tG>Ko}N&fFNC{RpTv9z{QYaBdlL%NS?GS}?zkR`zZ)_?ByGWnZ|xzSM3ATx2Bw-m zxOmsHAW`^qToOm}NDAqe#Z3G7Nct4Yg|<1XWw^Z)R?kfL#7gF(*t`?c&i|5Ie@Tst zzvO9Dl8;2B%j5M%IAOs<+w0TOo>(EY>fqFt-yUS@9@i!*kO?vRFaPyle%!w#&dpzv z;xD;+WBOb)pXx%m)$t^MGSwCx&F5{{>KOUdlObz!8GD^KUo3{pFVW#%4ZlM^_Qge`b#eS zB_q9njZe=1+1TmnpN+jbfsGSUJq`{D3sm@5fkS%8izF-v`Um;^mu&n8`Ih%5OW}6Q z%kyzp%nc*^AJcXg{Xyzo?g7|yUp&m}K2^=coW|wPc{z$)Dr;ejShFw-83SwrT7!_40uhgd5X6lo|ynm*s{v35cof(Y2 zffE7FnMW4c>h1fKiZCw!r~}5|>Twa^{4=|b_&Pm+my>6_JnybaxvO%J`5!ta3ja+) z9qwONJh8tL=KdkU<*$S~g1^=4Vt*xkJ^rTz+}VFhU`CEhbTmquu$UO7y09*dc!krwz?Hgit@gubENt6| z<^y5&=#n_RG8p;}ks;Fh_sSi{_afY!uuTZq>Ofk z!uRt1HR)r8A)3#4S;_@#GdCL7;23of`c8Zgr**D5xe?c=5Wn#Dh)xl7!^%B{&*&xQ zg3(JTi}n4-W7^p9?E%>x{ei`i?gZZpdpO{t&h;2JiR~qKKuou`_1L>Cxzo}ruS(79 zP8k5`G`^i(vYNP~)#QGsUz@x86#?kP-z&K6ub*)-@VtxJklw2EIDUO#d-2^67xyw< z=cS`AlECTZ2=V&)Zs4?WqsLMl7^eKp_`#4qT-w?o%8)Qz(vI0AEP0@)z9s;TP*TdN zE?!N!hq9^F9~pA>~^e^l(jhsGwW5(&^O_3fvm+-cuWDKXt=X!k z$LOdnjc;Ms)QxuLECuM<%TE3nsXmw+EOdB|YwjDfis&>}7e~@k-PEjZTB|Irjh%Qj z)dXz@t1Dg-F3)cA*BV`bpP4^mI7hyEHMYw#Y0jA7yh>+n)g^lA7%sU(wM{bnX(13_czG2Q zjcx5F|LNms7lSsTV7l~;kZSKBYrF7u_1wa^2uMvmY=+=V0>)yoHvZLzw@gSBZ-vzV z3HOscssa51Q;Y@z6pL|c2-)E|B$Y#-ijXlU+TfN~d+#Cjqgj7?jVOA90E-1N*dhqB zUx1l`Vtu0>pBDKC`DX8h@EXXPa2Knq*#-OBEnK>0BW&&34v#AQBXH+F6_fu^RMu^_pF9;90lt$zf<{CnoZ2?pCsk(1|$|=*D zZz`pJned2}dfp^bd1GR85Qb9R{i4 z*b?^oHMz%lKg!@Y;f20Q?E8Z!@9I_8h!I6u1zSFRN2})VBYMCZ*XQ0AX;f4rV6JZN zS8Of=%i08iO$hzf`GXLgep=3ZC8t*(j+Ipi@)Cb=;wt7(-!q){G@R;9quwqx)~N1} zmQSN$-P|s(UruC%@bAH=w^hQQI~5tHHEikh9?HK~p6`RMMeqn}Rq~XVsBl%kE?aAr z71>r~4mP+$tXm@1SDWleKcrX7U@-I>W%wC++369$&?NK(%lp9^h3o|@K#+EcC0x(A zt%8%T?f6xa`^yoM&BJTIV^U#8JzhC9onfJlW9Aq{ zVDb3_xSv}dVdqKB`&tpO)>hphRYb2H!mr|UpWn=%d&_U$=Lkqh zZq*?pV6$&(v{Jv}@f|EpU;;3dx!JZhp0&!Z+Ud8O{7TTRuP+xtI@8*LXHWQH@8M`_ zP%9GHqkx=bj=DG63^4|36|l2`H(&X7XGU3k2H(qYjs^W7hm2u%(G>by52@Y^v-5EU z%gz1icUt+aE?ouyo|@pN#;8hv;2-nPRvCo59sS>RWj#F}LYI8aUI0+dCU6Kn6_T}) zO6`H2N{mzW+OY;0W@sX<9b@t}>g476pXpYiXE2_R7{hCL-o`wjicctsXMQF4w$~dB zOs+zbDb<;<{pFyA*CPA$Jkvb^jUyEqZGi^}&H4C+u+s=B#24$1d_4%?(aSKO@Y_l~ zBkaxvv$LkQHeys2+c+b)lO0qKnJY5Fc>)}Kugs7UNZ(om973)_2E!$ql3ugfH#I&W zL>m!IiqEY#s8LE(=?K`%xwti zk^B~jPhwMtI0F5E{2b^dwYN-h$MZm$G5H~2Qqxx-}BvNOD(EFK#gst?z%gMh#A zf|01Z!{@L*k zprIcjg(haHPsA88hD0e*(~OIw!6nWip0{MH{$Zi(tQ&`!wBh&|DEO(aX=)}J`+-c` z&`6SoJpm~#RV-aJhsd>e=dXyLVJ8}F=lHfA?CtpSRq`$4QKuGH&6WFc{q}LwVT$)~ zeMPp?>k9d|$5kCt&rdQo1EubiFN)pGze#;3yS|;t7E5K{vfGTf2o&cD7z!2JM>v3x z`?=BtEDXc8AqAid@{)_#OLgxF5@6a2qGcj2Py~8PbkFS$GlO($`2!ztWndhK{y60S<>Naki~lVoS$NI)$cWKfWSH3xA8}V1Jh6n4x2lMSw5r?T zd*P2z`#Y-Qus^lnl2aN&?#8oMqxxuP7{bITsQp5Qa!6Gmm%n7$#SylIgl70;NZs}r z)e~sC66@|F{9t&2j>r*lll)pc?cP&1+00T?8++RI`r*qmF8U{C%B`gj;K5^YS-J33 zM5m$}2a@n?wgiL*_JPOB2S4rBKw<|`?wb?Y>+-lM#6(SVh*Bm`W+FICf8i&Ruf9+y zVw0Mrnh7jnX#x0})rEmFAte9I)Fm;yJo( z^nda7jzPLa&AMRQ*lpXkZQHhO+qP}n-fi2q&E4+l@7#MPW+v{OsHzq9#+$#YB34#r zKA4EWnpAI{C{XrB0;cSZw4v+`^-|fQ%h&k}48%EKLKsO4T4mR+i?aKd7Nw^>jW@Z& zLgS-4^7bG*F!VupHz-$@Dns0XEGEW*w4H{@wI}Z6_*7}k;fJWLW8e-6(MN01 zTeoi^KF+|DF>x2Xi`%BH<3HX~R*z{DRf?t?0c(A(t}q;Np_(J?22BaBm&!V+paGxa zTe^a_BwM-yE^$72!CU-kWS!EW;vy2~a6VGd`}m_E9wrNlU7JZSaV9^TZ(KLuxtx#o zrYUJzFVg}0+I$+RQ2LpbPB>q~zj=gd1;Wjp@Lv*js4~Pp8#lrWb;F3`{;cWZU@gLg z8U@!f#4Y};{OC(BNUG)z1Clfn=nJ^S+9lV-$}P47lr>@XnplL@^Y4%>*8uyLHA(fV zn~CW`WCuAxYzJ9EbO(7se2D2n_=<6R*pwV0C;7DblpYw%P2~}vn7MJL1=IYGB}u|N zBkveX)cP)p!p3%8(HkC1*Lj-V4^&*dLBVeKQ~FCj5Qpr9>_Muk=2*1!?`h~?e2le_ zmdgE2;TGfqFo0MJ5nt?k!H{KJ^dxBVdB)`{Q-%FZQ7ugBmtM>kM6i$M-QhO1e8m)5 zqnw)so3zy5%lqNdT>4B|Ck?)q_ePr!FM70ITCg7bp7oLcIRA2?%mUplX`A4Ziy!1) zD)K!Km(-8OgufQdw{g)L$^=JGwl}>H?&YA?x zLKZq|V0$e5bMVc0G!QeL%-jF8PEfsZenaE<=?$jq292F%pWzbf7v2&+w<)PM!Mqe=!1r(vkv$%sqp^~0A zX$Jj-bo%>2UMvX)$EqXbGuk{q0?o=cJtic4(-oenn;(eG)RgIoDV8&9h(UK8vTqbK z?=Zx7timUnktBf4$E+$cHxBy=#^9l=2XlVw zIps{v!K%5R+oKh11DNy8fwFYwGTC#34`9~%N9$B!i*!m8r`GnCTWPC@cSy~x-I3m| zT&{5j@37ncc}ZlQ?Fn2(2~G;G+vv}hrJ6jIURQY2gF#;4Ez8(H182Uv?; zl%4Z>!p*WdNJzJh2fIs0vwP=|x)63v=7A?oa%1U|cG*ISOvf@x2FIxan3ZKjoc+s1 z#GkbeV$L-b;*$$$z-cIV6o-9Wif{Pz3$*ivQ1*}{^^3}zaz-Dl?!YK}81D}Ads=MB zgO#$1O?Oy7#Tz!-a6@}ubcn9`~pn@ zBn@l~eq9lOL689;evkie2>ySE2$|bin>bl{2%8w$+1T4TSvXtR+5T_Bg^G;4mJ0GW zt_K~R-)E|s&~;UTChVUG0SH)mYLcbEInI8Ep#wR8vv>UQB8mJxK{f~N_KQj-P9>aD z7}^VItrF+R2|=ARMK&BaTkKJXV@b9HH$B7xX`Q2Nw?3yEw;7((+$_BBjvp3(2vMH7 zxq|?-8o}m{`S5tCzPVYbVjb@g9(Ond0#0Rax;?Ya?!TPL)szka-lZ*=kha^ z`Lcsx5DVqN!_PPNzQ#{jDfuaHU*FK(Iw|{?42`Bax&m5u&crF=L8EcO8SM)j50yY} z^J#QMOR?!{2Uh|nlOtUT*%H_+{ZA7_&On;tcqP#Q2FoBCc!oI|yArf>Uw6D`Ubflx zmiR1I*{s0#xjj%So1@dz!;$8cjJ5~U{^rK(rt#roc+PhpcR;dVOJNGUO#890pt}!x z^3=G)`p9;8A@=LRD0ZU$F0%Nw=_WiB>3wI$EGYQ0MN!HO+_4f=M$uOKW@ekID1F3C zDJ>w5OGW*WVg@;OyfNw{=m29%GxX~~o8a}EMiSIU(9RnJ3!0|%H~tk$kcY{PXaRjr zdCuiRgmi`-gk>~mx_#pFD{njvKvr|Mm5vGxqfY#S`A}&pZrh_N;|=e&CEV7KZKnnr z;0|raj2MX4HMIOx`GHR4I_z;*69vq0c+K+=Y1g<4Q#M?9$AI39qr|<)QLQnOQ@WG% zKEv>%%y<0AF6)bxwZIny7{E5r>^Fzm{OCXJ5 zI%V0OBC*;CirDhXig(IX(9#MVp4!axO@wDlT^Svbq}}bKh3*8>3`1Ma6I#!b=N!Eb zWg*Y7Qqj*FWe*~>72~O#J1XC{T^T)*qa&#KVll}XHPwmn?osPV$_(COfw={=8 zvc03zk2!3aVR%nw=^D4X!>Id9^R&;pJIci#Hp?L9gMjtwzb4kKGvcE?it6SQ(e(<& zr^xr0^&9_%%*JnDOVncocW$rmKl>S7b%np)uRuUA*#A_5ApG}cC1c?1XyN|9cBd8% zNO!H_7G9^@l-la>HHYe<*kfTx0taHcHBh0eD+g4vnZO(X*?0U%7bED&#QDUSRY9qZ zMxlV02BgG=`eu^-033RDA8Xj=INIczxKxrqP&COs{8EqKO#dt!uF&0mzIu&CnVd{# z_yf#o$kYBv)X-?%SN%En8?=8^Mi0#&mqY?{bq*g z+F$K*a-HYsa+ovW96!`#-&wvwqspA$Ymy?*ENF8b7Mh;0p}BAKch8^rWHb5YE}sIE zxV3+FB#=73gOa+rjevT_i0B@o)7srUtK&SA+^=CY{Y z$#TB0iVx2Ta&{()+Ro`@D@)wkPjzT_4i9{2dG`*4avu=nzN?aWOXaekD86FQzPbMS zj12QRa~>4G1-AduCf6?4Ht($Guz=(lg7w0M(m{g@8*^H4+c8?|T9 zM0?xP`5VK~N$$K*@f0rKoj{_&nS*d2jJ!vkdzcz0`d@^bnU<2b3x)n#YY*L98buwVi zzK)}TV}8pRXj%tmFovpMPtr-^MZ9}u3FCs5;7$&uxb%RLUq;^}`|^jArmhP}(O!nZ^2F{esM(#3_kkfOt0< zW*De-f_Zh9=)$~tizVo}Hk$~m22Yg1r?DG(Vsy}9F1=drL;vK7_1L&#)Ql|HKzxga zlGr%s@+C|?$Ev?8GZbUYQv3Qc;>DRXb0}-rn!)I?Yl-pWF@IA71FD23L`R0Fn|S^gQ!{&Zgt> z((6fnzLAn)O@X)=BX)QWCIndwL8EB7P&^w+7x|KbFe^Nnd6d{-R{wyxM%{S27Q6^6 z7Q=T5*!nRujLUcsCL|+A13y<7;1gI;IKI(D*n1_gwJ2f%2;NMtI(eW9X>wK>J93Tg zG*8*EASBU>ekg1lOFDfkY2~ze6FM)7IP=(k=Kg6cyI2$k;NP6wj86OxizEc5KpOPF zR|Gfh=2l(vgGYZGZ(d}JdM2@@@;uKGg=8a}ehv6qu%d*pvBffgRjL<7%pmKcILjAB z2xwsj3^EcUOl0=!=Mf-g<6MXf2ohG{m1g0?dK%7B+uOJ>f$KJAoLy(7~5Oc1b9ONwlC& zE{eVaN6frYGX=K;>d%|Qfy6K@wpV%>Fk>$ZH%Ma9Si03~q`XD4_YlbB3v;N=3I$JpitX~ z1Yc{-gR{a#w5d^_?|eB(G9W^#*Tqbv-CtOc~~yDJOMEYh%#0yzs3-dZJWm zUPOs>C%s<1RXgJSsGTB?3)e|h!&7J*B3HF&WF;t)c-PTo2%qgEP*1v5+?Do@@%IL& zQHg~NvMsZ{wDTaOB-H*>MEumN9OlM-cDGO@0~6%@HnF z=?VfSsaUwvfY6RiiA&!mtG^$9Q`Qya(~StwpaTWhrfju}{7z|Xr#C{O$@3YydjM^g z`b2tF?Q!;iRoekI*`CZ3s=ZHo7D%bbmsL($a?JgX1>q@$%&>x6oiw@he>#@U5e;wp z8)i4C058%E^C$O$qM-S``%Q(GJV8g6M`B~U~hOl@ryc*D>{h$QjQk8ZRh z933B(x_G372wb%9+u?3SW>w?a&ftWiyVYYD+(5R3zkALKGfIy)%dWc{YQnZCphcnyzG=tGyv?%}5I_S_EQa?CFU6!C@TeCII zoHixh<{KIHLz)^C$6cPPF4Q|h8d|_DO6ov$1PWJofqyaa|PgA&N@%`ZEQoi z@Ht|y7Xl1S^Zpjt%mX_P9%i`!%x|iQH#{m}mk|?G3fd)`N6id@2_tw?|6>~90_&0h z33$nX6Ilm??ij%XpSbHEYiAgAx~RTEpvJkWX+gJMqO4v^jCQXrCHvz~)*Z%-a(N%S zB>Tf}N#@awly@cLz*hzntXA%lsdadx!yGV9SLI^$&DeHwz9qldC^WbNe=V=TnW~nK$8x&9*x~ce&dL>b?n+ciQ^2!SiSy)WGT8G_`R5*11T#>Q~d*1J@uqGGNwDA3~14W zC$8o%wCLom!HlX#A4Wk-gQ%B8{JoMVzzXUf{-Ol9EoR%9N2=|INdF_;=N~x&#%^0Tz{+=1D#ApqS2q zKf@UQZaz@#6HDCcFsOeKQv6U7YVL!I>I+qJPVau=yz)zz74?Gafvl+e(rccaZ=@^# z;xYHb+u-wmfiL(1{;%86rPcE-1DN7Z^ZJ0JzQ<4$6(U;6`rn&&gz+c*Y=ie z=8B=46{(;WTQLoJFe@BBfykhsBRHJnx_|P%Jv(wmn^2D*<$L^)+Vb)gH6t<%8v{xr zMgF5wB8H49kwzAk>5?i3?3ThkFxSfiVl{=?E0Gw^rFT+WyN&j{KwHu-Scfn0$}mSD za6f-(D^m>$f%-^G4nXSPw_7VWpxSySpwQ^X?eD2ZtS0wrjN3z+lT%Os)(~~Omdj5O z$CZ~^6b6_U==#4XcH5^$aWo4~l`IZC@!i^(ur38P3qWnW;(XB3{>j3q37n~91 z$*?hfz^qBKvOsa5tVt)EmfpvNZTiEXG3pqFY&uT+sC{fOXVN7z#eW5vDZE)VS5M#f zNx}dgunutY%W~oj-~gG*`MU);g`f`y?8HX@xa=#yxt?w+Vp(?l#iYr|mWo6cG~_yg zbeexE*g2N@XoC&2b9#`B5KDLXRiEcYbFj{P?MB>9+dNoOD%m2`gh<+PDdKGw)R(}Ihx(#5_HsU;yI@cLV>B8S5 zy@4@`%%gtGmDe(V{^_zXC1&2C&pCphC{DlR1iPyBUYR z;ksDu0q{Xn! z3sWV(7NssISSI_ z!(JZ1YZ<^d8^8~jzaQA1ANU;+z&FLT%4=p1B`lG-#zQbQI#c5mtFPyV@rT8yqD9M~ zX<>GW#tDcdE=6RL_5?Ylsx~vM3+P6RByVx8?8nS1b&L#=S>`wOACe%ZX`0f{`qA*=5Ir`5jHL$Vk=`}Id{TpVn%*^Ge^LYURpI+w^UcbQj(ujs zA=~Vk;k;_VgIxZV!+PZaR$TqEdOOF=$b-Yb1mPpNb3%PR2kNB%UHHrb@MKYIe@_4F z?;8FF*iUP4zEgmH*SjXzPj4JWwoj|Kh}fxFH~%Z3J12tI4v0@}80R$qyr}+Jk-bw# z@!i+iMnmy*lLr#v`m`g>ArP@#NB z*HguioBPCb{t9HPm~AVaKao0FxMsL5OC`qWncmX}(A05D$sTi<`{sI=$BA{xdI&h` zYw#oU!^MGaT(-UBy-O+&=tU?Fy*BxTmf+4Q)d@#X=MKaBDKvi6RPUPdAm~j>rk`k> zEB5eumI~E4b>^(FXkU@fZ_i*(xve<2%sOA$Kd@+1tWvn9L-UYx^pa2BsxGKsJ<-P~ z>W1z12#iwL8MgTaF8{zp^@&U-w^hJCXH0M{m2^u%9`kGzcFUbo;mxU(*QxLp>Y#VQ zKJY2<_E&bDgP+R*n(m~m$|14eQck}?6;qPi4RhIrw}%Z+Iu45!Yzei&qU4PoBsmU8 z$?wRz!Jx1u{nd8(9&sT1GB0mW8zwuAloNV8d{W4!>a#_Xk;4hjQBL1rQ7oene^Tla zd|cZaX0Lmd@NB41k3@5N^R%EGDtlRQ=wLD}?54_TP0=!k_D=Q9-nST>I*0@E3Y(0Q zDehtBA)`~xlE0Di1IHvyqMd!iF3t^nxJhul#pvMNkpDLG9Fe>%T5EP*nDo9O+oh#! zjHP8HXX@uo4pH-MR0S?B$Fiv2kVR6EQ-K{?_BzwGy0)#JV(=lwn}#e20)7JlnOYLt zZ7_w{mGfoN?;`;o#-nr$Ou{57+YoxYSP$=G$gCboz#J_HyG~k~mfy}&?O9js*(0y~ zqFBaNV&SWDOU&&HYn~?!RU-*43)5$ZNACrM*#!xG3@`xC@$baF_Mr5WI+yw}83vDh z-PA6q%cI=5DV3~=tCZyFQSPA|C03j8_^9zvm^%EsF()_L_N)Nw7uY}SkJ7s!M)tH2 zL6W&5IPVsd=pWozkxq2hLpqpH#{#2zpU;q<~ih z^=!th#3K&9JX!(yO!*9{8P)K7L3buShH;UEtmp?h_4NcW!?#NZe8Q!?^s?y9%LDz2 z(>hldTR_yabf$)}Lde&ywQ>-w0`_ZM0&Ln*a4apJD|n5#3e@1prLS<}lR=G-rgprbXAuU8|e!lqBTDAH;r%<(sBy zB$r6kH`@#Zrj+Y_oQVvytBm?bR)Y@a)5h_T07vei=oh^PdUO zf6aPaoGq;Xhk*~u|10jdk!tSrzxt>q@{^K-{J%=UvWQrVPX#Nq7lQrtD)gPy;rL)7 z2Sx?$c}x0>r4gkSdno8)-0~_#IH_l%odN*|MU(@w(!Cg^??LeG-g@`yMuN{oHb- z7If?|DvR6+8h+g@@oo!&aD#sfLr9pZsfz>_I~|F)B~V3eX^qP%E3n32H+UYw1>(Rq z{YXd(!6ovNwU1oAy|YZ!LCa{CDEuji1~?{E!!a6K@4;Rq0fi#uXQMoLR9~NlW{BF9;z@1ND?GeBA8)RUmOPF zxRX;i8BKSLz+9W^QURAr4RXhv;-*rW;JKsRdA=2^P7cIIu)A5e7veyEg43xmS7FhqNLSZ^)#z$sz(TsYg$Q{%)~z*9 zSDBHB)vCul$~CcqYO5=fbN|=|H&z3Gs4pS(K@OGIMv)G5(KS!aFL^*?(wiv;6vEQ1 zu&OAa5KCh+xIx$qI-%Cw3@c5+{v=RXPOaKSG@GM(O32DGhJ;g~Zt_oc_ES3ijd=(xLq0a;+rg}uD2qu$jBT82EjEweFiA~=Z7?BiB$+6k z`E{h&=3!b?q~eS+TUI1fOBGdTEklDR4oZaNh@~q3fp}pX#m@XjM<_5nsf1BsPgxO@ z*x96KQMej{xmb5n!Bp-v6s4Q+Dk3U;XBpSdOp3YDlFFqjS!af>64EA&hf87F$Qe|x zg?4duM{^W&bOJJ50F872BFh4TlECEo9uW*E3bCpYXB8q9Q<<{6tc4ys&P;{CJCQzd zObfubS%2XNcsKOL3((3fpoK0@%!92wx{6znYFpH+PX0Bpk0MSM`J~b!@VhO2d-(y$ zop@KjF_LM1M91(N@H@L{-5-^d_|jl!xP?R@NDRh9WmYxwUihncF}RG_bM)9SQ=bKg zk$?TH3-s8FXbw~H$Mn`E@Rh;x(f$5LEPvL)cf=P4z61U#I_{p((9`XuThF`wb4AZK zTh0nor{vEJs#p&8e^*au1BW(D_-Rs2ExT&rZ5I-*qWa1o?o1=k0s8(Nj^5M79pGe4;toW7~QwG0^giYsR8)+gvR*UbHO7l;F)@XdjIJ+?$Z8L42w?lb&s+(hM zRng>I8U2AV&zNigMZy@{#L<%;JaY+3QhD?Na;om3CwHc-)d+gJpxA;fN*XAZh7~`t zdHfF8)|Y(O8*24RhGgttNGcPTQtCDa(2L2-G5M(H#=OUy6QB+uDjkm)D#H(th2f81 zT-gm-=jmFFU>CXc=chIX;HbHMG(J}xvJ9U4u!EAO`5^a%Bk2QDDcPPoW;*+r)I?ef zjeCMoJwD|-KDpVX!cZJARgfI=zWTejIw)Z?8@7;zLF#6GGFS7ftoNCEVKs<*2HllQ zwS0gQK7W-Cofw7wvWyce*V&FSpuqR=^NZ=0yv$<^6PWT6HnysU?bP6wIO#giG;xB# z*`ElW>C&WU^r$;1c`zD2l>p!JpJNovGroI@&Eq2LD#eh%Hk_!N(yRTJ2l(4(j+V{8 zR$@vD05m#0R7yOTWmRSjRnlJ%0F0s)P?~%y{l=VPGDqe)*+nTyhy4CH#qlDnB70PU z0w4PJYm@7OBdF{ra5&gXHjr;Pea?8y^X_!!U-Y>P|CDC!w}8~J)p?Pyw-ts)Hw@i& zfmxNFMBe+8o{|QEWApmAp>cVb+woW(enMmm4*TE>qwSwWhBCN3W+{y2mG$wolv8hm z1*FB@1SGMc(HzP`&h@tlZ&t~Q2b>6gi57D_n2%Zm*yPkOAu=*7%xe@dZ%Mw+V0^^i zLYmC@I#4=K#G( zPFn{MOly!TwiK6wa3)J51QAZ#&BOyUdnAtKTNG$ool7V&rQHGgn_CM{!Ox(alEWNM zKr<>Xk19kNEKnpGvSCJM3k8fZw`y;x6csRE-Se*=zDp4=-w&UP5haU=5)H}C*~!Vy zlBTwj-UqX#$+=O!^r*aeTdvNmmM}R2Hn-=>qo9 zNc+@aS+EiU^u5F(e_t8l{)i+MlI7n?6Iwz3~q|^#=<82J6^xC&*Wn7cn zs-*TgLc_RGZBMYaQU1bY?L|Dvv20!F?p2zVaispJjdUkF&@DIE!#1GU>?1V$w8+vx z_rk}uv)n+my15K3kqiL3fe;m$=g1OmzH3%G=?uZ%30qc76{W}9h`<_c`KNalEkIEb zS>)P!JEom1{*lh)9W%;iI1j?!BeRzl0+W_bccs=&V{#LFKB)wI_iT^}}bA}^>lGzJ;I+uH8Yu=Z&(`8j8&MZw$FL6^SG0`b8 zu_-a}{ho`)G%tDR1hIKqNDkI702iYi6+&l4qU>sS7~?O>R!`tw8b3!^Oo}HwYn^#~ zPBdlp0ay^BJWkN#>taitHN3CXCOH<*S#1reKI|m2+vMLOX)AzP~)-ZDlinY2x< zgfl|vA5NIzZh<`Nb7jN+8w7Gs-)Z`naFQh)H#hmLpbn!qpY`aV9!Sb;gfUO49to%(duRYU z6M){@FL@QL(vExJ4n5CR0Q-@TgBJwnop4q)Cj(!G@=B{6syE>IiNGq3TAlWJXEh#D z%#2){ARA;=GA>*;_`KlVE}#j7RT8x?S^;jGSWD>1vqiWKmsN~opY_DOMbZPARYD}Z z8$`2!Rh)aFwDL=<|Sem;4ELmBtrPc3ijT@uqhd=#8zb%xy1EL8u8uN5yg% zs0d$2PT;PR0T!0rw9j&^{TkGW{9Rs6;tASaumxjBxw%*0n04m6;DNjITWYtZ#6{o+ z@lgQZ#ai~~EgKdd-_>eD-c~%{^0p~Cpm$jcA`aND)$?5iYVhGkhP@ybIPiuu z$Tha`wKsOL$b4PRQ2z#rdhvGb@Kc^ocx%p)k9s9fw8=)1dM!_YP>Wf;q8DVCMcp37 zhMRk(twD@#_~5f4c4eATjc$y~yM6U47JYtRK-syGLozR}*kw|KKwc<859sJ64!gi@ zf5f{AyF?!f+2y7~q%WWBGG4#Md(v7tUzp6hL3RONf5y8)wh8_W>xDzC1L+Ji>49(& z!YLNh9k&s0t{wG2$Ox3SarMaX2w3;uc1${E+aZz>Q)^b=C^QWC14Sb?TBeSNA?%bUyelXseR5VPxqD+ z%&J%K2yE`{9n>sTUzyaRUE)gFG*U zg=qVWuF$+jb;D-Y&#&D%LuglAcOqV7xXpNjX;)vbjbEVic6Tat2LyF!dXKWi8{u_c zW_wsW>#wD5w74%bqisD1+=4izxIhO+aM^&EKZhC(cw5lwew9eOwvBn#FM{KT=dHxa z_e7X-ll&n3$A3g9AgG?fD!+n{88F5A&yq<#;D)JHR$Am#zd}bCFv!%D$dn^;Qydc= z>HYytSN-F5zwoRX^1tQdwH0CY&(}5_6j-4iu zp)^_>#NrCy8*fn(^y31x zN)aI|#f3fvubh5*!DOr?4U8xeW8dn0z{y^0N^IDYwS~4FZh=@|f-53yMr2E4vYz%b zk*womD@COT0dtZf!c-Tjn}UEWocW~5#f2l5tsidh*LV6QIPgX(I85@EVqBb8>5yqS zap5^yS2sc#pgFG9BbbmL8U(>EX*gB_V@)Al8{vGYwKj3Wf(CxYB)GC7LJdceIstS3 z;p7IXwxM1=Lt)ch<`o+CmMnA_@Ccw^*U;{U9TQnzHI+kbJ9CMXlM8V2h`}i;y<{8rBcz@$x`K zj>0J%{2YZNna*a!QPLKrxnZc)WcWSsRE`weC`9p;l(YC|=wJjfgY=LVxS=J52TJO^ zZ7_qD=8h3xkXsJU?H4k#NPW^|^xIMv_L&#jy-j1+;#tK>ZgyTcA8>ow?e_Ou4L|f# zJN?#G2MR^Hd1@&vMSP4GJ-wg)SB$*53Z{Pn#e&vV>|8WmHrGi^r#B>5Gh!BxF5#Ke z7ggWiN`|=EF*S0`*-N)CCm-g2=jTGOBU0RLo!vo>oMC!zcnmg(H>aIh5+ElG(=PQ! zypi?(<2vCT>O+ge+&Y1#%!NscBZ}6-(1Pl*Hs(TzRCmnBx-$otXMY`Lb~l%}rz<{U zA2>Vhqmo|GI!mytQ>xG6vDN8Nj1rCpiiH0r%ukBK7tZl#6gZUT;w4C5O6M8v3Z%o7 zEg(AlSa7p|b)%Ek`W*EDWJ5G+y|1BZd2pe@n`YmXDHXKbb6aN#w+5dg9Jw^YURA+P z+wufO!9I(YDa2{sxMIc)Y8=-t%EAb~A0^dWkil`-5Xa^%fYtswp2rnb2;i=wL**k* z1AI7wQzDR^ll@lcmLidm8~w&G`oO=M>Aazi>_Lo~0u~+Ml7yENg6qP`WH$kg(Wj&G z?4GtRKk)Bqm{0Ic`Kym|pw&izxdcyUkUTPHvk33Fg1b_#KDd|nKvX}3xQ9C7oL}x4 zD?9UK;wjjiR6W`qW`)CH3~N`s!T&^e|5l?ZbHupZz*P4|%;(T`jpnhJ_f1|S386VN zr*A!@mWi;@P%&8e*zUWasPX0o8A@uMov$J;rdM#zOR60wM{7k<%vUobdo6@^k=@$P zNXAYD0PN^HzA$inj5C4DhXi3K1DpRffEz+!{mPP_QLyf32<8Bhljn_UyTc6IhuG;S zOFnJy691Td_0OeB*XMCiW3q11{*&zQfvhxRG*K!z3h_Xvnk~>T;6U%9WUvoWs)PKr zOw4X0qsqi>@~8M<0b|$#+YD?L9fw|EISJxeHB??5WT!HTx{3@bGSx6g!PX-Bp)$3B z?0ExnFNw(!e$>P@KBK1@+OhBZ$0f!Cl zVcqSbsOm|$d7UM~0!tua5bow4BiQU;4&A1lBR~cfk3EK&#F?1{q+b0Qy7+}&!VDjo zgLkkh`-m7H41@{x+WN1)VJhNUHwrn0Q=^OAVHQj6(|2IsqE8$)dvLaUc;DnmdnL+Y z*zq8%1?|tNGnI`##b>|&WRw$D?=q-gRO$WyR$Tr+Bjx|^?y0khlk@-3J^jBk&ws@! z{|5g*p{ONZ;NHr^sQVmvtjM6EfR$0$bAyr2~; zBFKc1+N?gXUs9GQr=L={*3=%1Zk3)rDa{HadsrW5l_RU4BiotQE_-y(8w~C4#S;vT z@fAEM3SE0@4-qVlu|3*}m40n(&k-yOUArGL3qyN+k5acMv!4Z~_IUXcU3+#9O!tc{ zhz7>?2-ViGJ|)TaB+drrdw35OtnFdN2FBLR9=vX6e2*5)?eT&Krj@BZt>35Hg)hhz zjLY4HEC?K(d#tMK)rifHdq^KXD9qy-0W6NuJ-Yv}`xzvN41;@k&tk_jw7&@^_i*_X zqx#Qv&_9n$U@$KX@3B21 zun*(c^nNm^4!oDiy9x~6pjnhgq~5(`I+It)&^iq7qWCcVK0(((KlZhj+lIpXQi69y`HmeSODnU_T~zNicj&uOa^q1TenMLiP69Lb^f5zJmJmV7?8L z$B>a9NX2}U6f-0lS;&uc;NIa_9>O9NG89J2GZaD;po>(SMQoxsFkI*_jF*PXC(5&u zc%o778lwce76e6mqFoQKZ=zrHo5|{444c`Ccy+G_&Gke-?k^OgU(B2V>Tql1`DhN} z3`M2TX^w{iplFJYKN-ULDA94I&KR@Q|FO9P%_9EG)Fa%kU0}%58$8l z8H<$AYs{P}ix!PJbpjC6C(mH0&!t0V3Spyjr?8qZc!ri0Sz$`VuO*2{KdZu zG5BGOiew1zV>|7MnHknO>5#KfvsVL=g{XeKKqj>U6W)%MkQ(fbzwK+y zt+oGHY6q9}m4>#N{9I+O7n;7zj#R4Np4V|PtYK2;{Slrv8Faz~DuD$=;)nXo(P|(z zoRo_p!@?4}XOA!9O5lvvb)w%&j5eV5^}9x(Shc(GMX|Dkj)Zya zvX_P&K+;4q#u|obvLis`=l~c0nvpHNu!s`f3sbMBhmRce>i=@q=<7+k$&ajHNl=Be z4opl;B2h~Fotg8N1;QnZ_$NldkYb>{_s0_TrP{|=5hJJM?n>)u*4oLFgChBP@qq;= zF@-^Jo>h|BC@o?u-!agzuC;E*f^Yb)*Bl0fPsM@+%7Em-< z$3&i#pLPT(IVnPrbqi+2;5TKZ+$z^28xcYIizY<)e?4y*3~_b^q=16~`J= zuPjDA$e1dg*pqUlaWW>M0iQ2Is{2M8{ivRZd-k}Q3LPxq#ECv3b^`sYS+OU^IMt%Z zHPH-87>Sp|7GU{fM~pdcdVe1UI_SXrNLP}!U!h!XHPG}aW3UxvL_mvS^J0-@W>hb- z$>T8Rv(GLltQu@MuV`ZFsLS2VKJLVdfd-XHbq_jk3{7i6FqO=%w;NgQ47_b#lBVR~ zz8Ty@PCQB@3#J8k-bo}SiseuB>QShrHCZrdl3k(F^)dP``d;Y+>|-Q3{J^=Qc7Szw z8N~c##uWD8a6ZUoFdKeWw1D7hO#77LOq3gyr15p}0ADQ{VM*KHMH{QA<^cX4l~4oC zLS{K2Ro@1G9Kt0h`)``B;3^psn;8?*VXBJ5R3i6k|M1b3vc}~f3%0HQpK|zFy*oBu z&j(N@*;F$w4;nF=X*AyMv~5nb)vaz>n3DMPP_sHAcoR`!qSw!5cGu?;Lx`%$NF`uR z)m+@kOC}wBWcb@sGClfPS(n%x%WYX7g!q9c?K+ygWS(({K7`XvVRMbb?q7W^TiDj< z)6)A=16*ZRh@TNy%pp2Wcc}{(i1~_1ty2-L1IC_y$sdWfI}Z1vlpWzVLsQ5U`(|&( zKKq&Iko)Smo8t0M-7qR-+{b%bbf{J!g*O@M%Um>>$mw&_?(%sX)pt7I{mVo}yd>mF z2_+x$wIPRPYweh^6uX86t#cc8xuKd>J>}Ta%unwE5Jv4?9FFl-?5NiFOoi1|WZ2oB0Vkg70lR2quzEk5+jCQ7}gLY0$*4y+MAZo_?9gwvd7C}E+;7oF2cpl znkP8VvR_EqEtq39t2!Zcuo$;jYN(c`_ve$R;j{#b2}<}l5?zEFqfH)w;XSU55N4oZ5)wgnRv=ue?pXW(U`L7&{5Uwi+n zCpGLB@20_ok4V2dFU2)ZV+I7C#2&T(iPCpfhQJU}b_ucF58nynm3`UI-6L?8CWiG|O%xxeV>j=d?c&MgG#;oGS zupoY*jRR@Oa&msTLs=WId%n+>rWil7JPJ5x!QV)>zVd&t$0TuLyoDi&^<-zwDdZN! zOF1`EeP`#J;_e5R%u$S^vS~&MBh1cC0A|crx?pIe_NCoPdZNhSwJDjk?>2io4T;d3^Knb2J~vKYd5IoR$BF`kF*dMbgd6<$U!sa z^khdF)Y>T;fQWYatn`Hxa!E3LL5J8U!u;BmFQm9!qm|;ldm-0g90i6bX=z{1Zy!8# zID*4>KyxDRl(2M1xeuN>cS|Rjh=N3!iVCh9(7F5`>2yCI)hIAwE)OJ{nJRDQI&2sITPEx6V%pN~W&@IXe{B z*0M$=ZeuYr*3~a16}b)OiKU!hdJhm;xd>;jG~Jy|=1Di1((_$mn8O=_Pt-0z1PdTG z0W&TO@lFLR3w}Q4`>K{wrIXUwaS3m;Tz6L`f1xhxERcMxw=7hjtYt&C{RDTXRF zG$?Nq!fYkov`|uL2lE$7?qI2N7`X4FXfww5_voOwI)VY>ax3fy($Xg$P88mE)0|1r z+QPvpPIlPYpcP(?i*g|=<~pPrH@%~ZEumsHpgq4biV|Is7d7vEx9a?Vb?H@oks~HL{$) zrV}0*utF#$)en!iYU|?OpI>@`(lXn%nf=${O@<0h$!kTFE$P#)6C`Hikv7O8r z4{$L;O3-$=Inx1>VJcDpCQor>SsdKMSu!Dzi%&KptP{to)N)iJ;2V(8WcAl)D+Ug` zI7hoU%fi;w9Yn|A=06R4<6P@S`{M-`R2K&~@umVA%oZ%~PV7K$kZ@5E`v`1-AtVG{ z3ap$wrg=ryAxHY$c{iS4a3Uk@gM{Gn?l{pkZeat~P|Zz2t}nn`@60=Xg*iG1+ewf7 z9PtoS$01nBq2~JBLl#_HftGdU*LW>`bwGeLrR{G%-hgy{8{Mc1Vz0 zO^v%y(koYzHD8=S9W`ZLXdT}uCRlug=4Mc|ym*S@Oj5{95p|5XrCwRqXHf~lSHqQC%gJ*w+2y?MV z{#=vo(HqxR@TW!>XIu_pmf|vJJy|*6t^*jE?qEXjEx}IQSnk3O+L=!_C-XcPnNFS{ z6&t@5vWjO9Xm>vT>l~J0FOaUDT>L0gUMp%?td>yXlSib3r@7YQ3p_zc1LtntVvx6V z%J;D((Yo<3g~MNMcczL(I@Q#SN@& zpa6S_yTZRgAuu?stAgVpxDnd06%VoDdV)&^&hZ8=afGnjw4k3ZvE%JPCGMG=j%1iB zr<)=;e!lLRY_l@K*MEoum11xW4YkDkeEkbH#!B_hSm&{YMM{L45lskuA(qZC{B|$v z5+W}Nv=ivG3nI?FU)u2VwL_Ol6D8v1p3o!$>`){n*T-7J^5sjHJ)q|Vy2bL!5sx-% zTH{-1dtD!0d5A#I$#TsIPcMm-7oi2aX61ShSkex%3Z2O~d6<4DD#fed&c^r zVVGx9RUW|HAOBWmIJh)zu|d$AWZ+<);hvq>#EmOT3XMc^Cn?S{6DKtZ7m_A2(}^9* z-P1`21`hf#(tmgos?zBNEgVD)!k1uD+POFz*hnR82VKQZYL?#@G>Z@cED}Myj{Hp& zTiAWvxpJfy?f1WxB%s5gCDtjTE)g)HdjaLa*tuAt+h*$C!(MidcF^)-z zd^B$HZC{OrAGgUCaYBK0rqO)c^_h6%=zy=rLpAYRN9nxyJF;}Lv~yYJefSuII(k&e zjB=Fi8}Er@Te3Zl*}6z%PNnf6wQS95=(i0^gAESHQf8If_&5qKQ7Bhd*?{t_&@UxJ?Jjp{{XliN#*+mcx>VZDN0@+nmPhHi?& zCZR1y_D7#NXjUL~j+F!g^YgitTI^&7|5Cu+j)nI5hxTE#c7xeMc2kJUCjZL@A=+k= zf17;Kwce5(z~kg5Bdb+yjZ}{XkGqG^@Q z6;|`-akN=1S}fS3FB!Q+2h`w+0fyl(?%7*LAcRJRj(8CJeWF zkos)zC2@>C;ly5^b*(G4%Ssi!V=q488j9)$qQggWVK*m$JI8|mM>%KcGC+GL7tn43 z?&b>a_J_WHkZYc+Kg?pVU_0*epib++4#iafg1aq%yD5SDC%R~1LNI&H6Sl5w=Ct`W z?~dcqZENj(78$+yyDOnn?Q~s)cJHVqA!TEX%Kxi87y4Lxe>&=7cd8WWn*3U9DntHT zcDpnCX&4hD?NEB!KffEX*V;{S^(h*mc?T+TF#7Lqi%XnK>}|DfXG=gQKE{yN>n{ePFjzj|8NbmMitKPr|@A>A<%vq4oltZJdpRQ1?Wr zr;gg#E8oG5kK7NRq7cSDeG;^~`%N`X!go0OUHFyBVe;;p+l+)K$n`p8v9E_WiO5&_ zB^xk8JX5ny`KS@yYjbw}uZ1V=nXvz}54<|-W9%MiqUQzU%YWROpuHIjLwR?w82fJw zeCcSTbL`7NFTTiBU1Bvw6<7Ls`X+&irH31}b4?fSYTL`7gcpNx8G7t=TVjnfFcMV_ zy8x1cE!B<=JN#BX3teVb%k4RZAQ#6o42oLHe4xo}u!=ohXEt_)0p_Cmav{pn{=8jy zoX~b8@jWsTm^sW++FO;D0Lf!vSHVMg7fLZ%EH@a@r`9}zC4McSBcvKkP-)Uj{qx}J zwt1~Bxl~9AKv}L8TK-muOWZ((7*+{|^4)pNl*K*gdLLD?8k^&S-6xhc+ck#W7&_B< zLLUEfWK;H@kpR~NL?j?H+pmsfyKcK~9RX@q+swp*{VyHc!*cQ!15vK?jT20*Y_$wN z+k>=|XwToMmSWs!sK|Zc@7@=rYuD`I{jPllH1NWvnB#TeO};Tw_ON(T9EO$%d>PZW)uw?b%EU?(J~rRbUGA76V*;llx?<3&nu1X| zvowSBKzxfV@o&Y7Q{x4iRqw^dnMVAH8N&q2<#qetkfQ+y^7brc3Gd^`hZKbp*;HhQ zMrzfuY!j;ImsaG@p&$7lP~%%<*GbQWpTC$GMaonAKPrW6@I`?v88asQ3!|56Av7XC zmMnoc0uPOy!8-vA$b|E*l-OoF07d2=8*1Il4L4IYF1gPs&d$ ztKEa%Ri8iS^?X*pFq(Cmd`!Pn7dd(Xc8(;!M}CPG8DKPza2azL8+hay6dABI)7fEV zMRpR53*5ywWHLiZ3M7d1avcz((5Uog{pLHyER6A>h z99mWL&`$ELL{xWGd#T~cU6w~ImIl&%=K<-9e&;U)96b+mRB zPV=m9omdawK|KpziPm}!d^WxRD!Whg7@|B_RzlR3_OGfmGFyN zL7~6kJu%F_TgFg+M02cMbH^>|dVx#{=f-0HX zN84J}Gp#NJrWu|HSFpfiv7-V;rHAzlE9!zbExEHTnPmR7V3@>fMBR^C4I{K|tqNQ* z(-OL1%96QY&k#Lh&=GA!rNnzgWsH3epNn9HannM;J#}vOX@UfPxRDn558d0lI3OI zlH%<{waPhVy{m(4!CWXfU|PsHa9H5IlvU8Gmip z^saIx(Vg%fkAKL2K=#Uh*T5bA0k7Jlb5M7X%N_R-O|@Hdr*xOWo$^6K2SP-~AD8lb zlA!0dB{xV!A4~JYT8Ah!V~wx5(T7_3{@MURwPeB>Xzs1MfMbu|f)!kKE;WQF zuwjES^3cWbKohB5(M9n<7isXK555ece8N;}xYomYr;9Y=6O6(%NnV3Qp4`$aZbO%1 ze%GyU!zoS$rvK@zpG_6$mAi4@nd`N`{z=u#{tbJi2rzWOkYNzji*|#|G}eZ5n4$tG z-E+*E^5Xj+J<`u~LZ(k@}f_`MsK{nR?@+rSXM?zEYVo_yAWO;m18Q z2~C0LNyiaXw-S`$z)B`Es#PoXDjZ^{6-WA`O5j#1koHL!L1k8F0-2D@s})rGLMJNQ z7Tbem7FC8gjgzvgwjoWcR0lgG(u{6E9aC)VdKz(QmR|TpB&q`R4{=+i-0*4TpZbf& zQ)>BNL0Kt!`b7?1S~rOXj3wDx=jZXQ8(#pFk_?&#Wq}qnOSS<;VATzgA?Gy6v22)M zW@RBX$sDyyNWoUg$~SZqoUK##w43@I_|{F9fg3a$yR;K3tpoM2SB;h-9@HJbpAlBA z+~BU7I)6QDYzI0|@VfTa<2$SI24p3F6I^q-&fWoE)PMZ;sAS~Uf9x+%!ROYy1dsXm zKCr*cf7AGg;X2O=l3lOTx3O$@6WxTj-INV={qMLBd^xCgB|Xq<+2|(E3Dnb&2bs57 z8zx7s#{YBK0-$&|3uB{F%FbbkD$ zp?PDLh;*~@xMkI%rG`jVbCObBRX*AVr+G`3EN-)8srkm=tmcWb31?uZ4eMgiW)D;J z4Vkm1%X}>>_&I?MrpAm*;q|R2#7n*R%%^%UNskThA}=c+xtvS7^%3iQ(Y30^o-OIm zCt63}u2?*)Yte|yp~gYpz~r@%c?Az&xJK0l`*+C3e%?5^X4w+Ki-r@1r-akGCxog- zx_rS)2F}Uu@aWo_yx;HYoSWXfxMn?p(akpb%1;)jYVWX(>)k=obvyhcYu5$)m+%Yf z?*vW?rn9n%M-O1QWklMAS_ z05|h#aQIfXFu4u5hYu*CBs@zDk<)fCB@VdzX0r@_(+-pQgd>*gC#ixNIMR)4b+NRz ze@s{F;+xr37J{N#9ODdlwDwv9X&p4BQ@E^)sBk+@PoqmtC`axcoA!y_gZH`Ghi{>I z7H+Au&Y9<_G9#-UC?70MelJ6xa!gx4aGU73cRLVt^>QRuyJU;eIm#4#+p^9^WForP zIk0s77#6!0-SE05+x)EUt@E+7GENVtu}_nxJ5H6S-%nVkOHP`meHP9$@#oL8bY^L$ zfgePUWgm=>`P~!lRoj=03R~JRwvH4|dNe`R=n7j}+vha2K2P_n_1^+5AiU<4g>&qq zN@iL~=S;MiPN1v7-ZCv%y|$O6zAx2ROJDz6YwPNPsHDeaKs?kq*S2P|BRy&k!u$d$BMR{Tusy=Ob-2TQDsG8=E7Zi5x_1dJ`bgw%Q;45=#mjsBV=S|gG=Ag}SK*6I zeM>4VzaIJ@lDxog9Ze`~{!duzo{cx%^*kGAh2gthe{!oB=br*y`T z{kXa=BzneI2pb_ya)&DN%eF7uWYWYsz@&n`jZC5BCKLN{+tzv4J4PO(oT|+bmCYw4 zM&DN^2D>jy{GK71x1BLIOFT9=3p`dgD?C;)Yb?4jRr}*GsyD%3cTw%2Ox~3S2k_S~Q>NZ&3K=@5@}ZD`e)* z-^E;^+s<5|Tg=>$E6&-bo5$Rt+vO{vFon5>y$%pQ+MsQP9>ej&IvVoG&E?PU${MrhVMAQ-XD*c}mb z>W5fwLJVn4MbTfA3{T)y@K2PG3@mfsT7+OW?5&U8@sD_ofWNiJq(YVi-*aLMD7WV@ zBVmv%K~`25ZkMP4omav=g~Wk~Zsg!WXd2oJ@+{Ah9E9dy%2}Br;Sd6&FHmaPRIp@Y|2-i z@9gckl#CEi4iGxAM(2l08Gwz?um}r@LzGIk%8sr=2Qd=5cT-BqEJ?Z>N7W1y9bLg= zH!iUmAZf`y|8z1*X3QN>o6bsS`+~VuCmy9P-k9I~OAqOc{@b!~57ZWnL!%W@N`{Yk z)=DhYg(z=>gv+TRid%5(M0X*Vr$tgfT^pd}d_4!ZAGa)i(V{oL6@BUq{?wN>$^puO za`b`!pGC7nv@YM*AIcL)`v11N{G(a_FQzMOW@2RR{C{9Ti#W}6m4}v&Guv!@u6cpl z4u(5Z0)1maKy+eHl%Iql{da-+f3pyV5pwqjAcW@X&NZ5rscWhk##M>P|E?gYZnAFG z7gj%yHm%Chs9G!{zsXAT*Z~@<;o~K6Fg~LG*(YN(v35N@DUb#F3se2X56`Z{T42O9 z43FNcwuVjSf!+=t@Hz6iNZvtQvR{vR9Rg_^_a}=yL%hCCjWtE=!tgm)7I&Jj^V|{4 z>rv&nh=+pwR`ElMV+p>}L1o z%fG~t{aZeB;h0&{|0}8iO9PC^ksOz&ytu4*CKXHNX50ZqT+ju3lw+X{7TXgvkR0*vHz<_fcgtAXNYWt)W26Y z96~&JF#)zn-vaI^t3quJ@QmF3ii9A!Yq7ug@Qqo+V#9CEE!Rrm(08G4|GSW1Z2W=0 z$GT$;s;ct|&};-P`J89*p6S$Nksm3y=7owjY|$o17G#}eaDzX993jLj>VG1*v5q{07~ z(?~OXury^JvNmn)Hg^_l=(OQ=7PlAJLEM0iU0+eRQfs7czfT z2IT~O5wmzjsZY=JCuM(y11;dTh#O49JWH`v?6Z8RI~nyt)Lu;47N07={0)*87x{Zu ziNXk^Xn&M=OYBgsTEnh$%wt}viq-5@ z*k~4l(LgB$bQ5Egi26E%&M?to>?i+a1|v_P2e)x zuqJsZNNp3lIk7P=z>FB&$)j&$sV@V|W2Nqim;!w9jI~dvzH8=)QmhU=aKE{)+(7xl zuma#NUxumPXWYcX$Xt}8LI}AkyUy%k!8)e2WgjLH0|0olEfk6IgfE_ArE~MBW`wkD zOUt8`#B~~_s1FrHODo{YQCW&t=^7@lTn_Q6_AT(>>Ku}V)m+3`i*9^Xa$uK6bp+!1 z_!eLQaIC9+C0qIY4PG=u884uY9aJzTF!gXu{Gd7VhV#M!YHpDCJL;*Un4U<~!4UxW zqpqNl3SFp5zX?5Pr2*$sKa>&y+hBUtC?%bQowo zxF=s@Zrr1v0fE0=YRD2jzD6FZ&EA()?XMMvDPx%pm%gP|Tr_||Er!niR}KLEyPK3^gv#jZ=%c6HFm?JCYaN!{)Vq6y3^~9=9xlO8&;!s2ha=(t}J4pk?p_ z7`7@a4e;oLS)xYlYS%@+PXN5@e#UzquS#Ot($2$+&HM-w>;^K63TRV`|AZ?Pd&QG| zQh+zC-8J#cV#YybX;8XOPOG_#qA zDdN?q2lMMA=7zQxTv0c)DE~N3o~5!VP(S&C9;f4`G4K>5OA!-C9w49glhrp>y{Ns{ z0Wr;+qmfE6>1D@@VUAtIHeN)zr4pxdIyN1<4$BuZRcg8{rm$u=3dg922&iJ7jfOY& zjBU2dzbTK4dfu%b;u_i1RyZ)E@j6GqiS!b}(C4+sZYOs;lWAIF&mqPuOpS;pB`?xB z<%|i>e=3bwm~bx{@shlQCP|Z-*l)CDw*107+110xTbaj+@XqYPvuqcTr6PRP`t>0y5YmXhQe~BwV3cZCO1~w)LuA((ZUxq49h9 z*?;l%hLG+AWAwo>c&d288meD(eZ3ienoTdks7{JM~ zgr#hG)tss*r)9PhORfV(rQQds@wQYO@9cEpMNIi3;fI$k38FgR zBAY_r3V=`gp312EmHvrdZE5|{?@|n<*p?>&musunj8heV@}U8M=pJ^^;+Xk5Mn#%v zoO(OCP^a9LvawUlHB*`)E%`V#U?SHyE(@TbJLpP;5+`Cy^Q?Z!w&F6E46_roMRyH5 z<6sy?SO;F3l`;Gy-aNo>>u@r>T`Wp*-kjOB@}jVT)qfd#}e89WZhu;2K{M&%=(lZSumP zJ&4E?N9>FQ_EMJqECs9WzhKKc@G9lNvb@jO33T1- z-TK>PPsOrdiyc}=|Ah?i8#LoqFf&5O0QxKRo;;9d5mJM~U)6q+E>}TQEC3_ASC|I;zJb);AX?L2?5g_Q-_x^N|jk*u)*~GN_SlQ(nNtp0B);V ztUtrwx;sgZ5tM2XXh&VH!r7ukNRlTWuZeLd#@YhGi^|f#xH!?s{GwCYpi?9+*Gw58 zz-qW|E5&15eIB$vQTu0#QXdJ_Kp?DSYEt9hEO5GSC2zfQqY!r@a$OtxC&t>(F1%H) zTLJ3Q(L}beZNONK*M!H|4^^hXQi6Ba*55`_IR>?!?V28n7<(5+9IY&r-M6Zk)<{B@ ze2EyR+c9#FNb?folpm^Zp}ss}_^=0ZL3cm>_p+GrN@fCG zPJAp4b(B6M)eOuD)ygjE!pJWHYL1d+eZIR1?EoanNyDRCPj^0hQZzOP!~8l544YJr zF))vWdLU=#j5Qp(B(fN~5sqDMChC@WV*cK3L>n@BDi`8M2yx55{=XVj(J+NAkD(Q? z8N&6avzL4!9gvD=w4?087STv3nW)%E92^k2hOUDW$IK+hn8Pi9miYpx)9i^z6rZfp zo-XVd3lxg;)&zu`~sb`luC=@qX6oevsW*2OiU?R%kP*Bj8 zghLz)kin&vsdOi$K!drw=BVe|0yn0=Qj2o1h-MCA%3M^1P#CMV3#i4HvPrfoyG6wv zF$^?~fl+xXT=r93x(h<$!B!IC%H$5$4N^z9&MfSOjHIUM|QXR^<`m3H~&# z6WmV}r6pY~61!O~2X?|KxK)S^`3e^7ox5hC2y&XB+D+eycO3*o%v7Zi(H@P2N9-r> z=ti#hs|uShPc>K#-2n&57pk>77uuk9E{HFE=<7Fg*D~>j+>I9INc?YmLp~c~N?d9I0ihD(-9cHcs}A zWVuJWbv`jikiGk zF=d;PRw5>O!DjC5-g^~aWgvqED4#wtjcJg#=c154P!ef9ok(9UZ$Q^C{wvY{dcDM@A#4uOTFTNv336nq)b;#s10i6XbdngeZ2anXiET2?Bdtht>88Yy)RaM+l5 zv5k6KN2N@qV+qJNbVH>A8MKwk&gi0ek?iC}Vx!^`f7)R*tp)l@gSbi+s$?Oif(Qqo zIDCy0Zn_;qktN`dwDDP~qurr}=@hAHNZz5li1xYNQgTepk)B6RnttVgoqz+QkwUvZ z)JPV#IV?|w#!$O*|F(j8;$1g*>JY_7()zYSd9r>4N(RYySbS)FvjT6jiOfwOb`$g6lY(H3>{m9#uSoS$D;LxKWP^xhmwcc$keyZiULJf-&ryt06GWRtN+Z} zSuv@t-DUDB?Q3sIt~_&tnK4B?+x%X+f_4~W!mtKbyoM%O+ameKJQ_Qt6p~|W>f%es zIqlZQgW@UI3{%G#eQ%$U5lCObQH6q{i*MplvOYA1xQRo#G?F5vF2RSJG?RsRgyY{) z9qO2ZmQ3N+R9sv&;lrrZzEeEYBrQ58r%8$|g&{6rYy(yIDSPN4U?-uvPtpslzi;UXDjviq2ZBSmz zkYC%dxHov@y;X9d&;4etxH2QSA8OveNO!OE@bCjkHz6Zz(Kjb=oQJFWM)Y`ie`nmr z*rM~oJ_DF;D0!jZZ#iD5uKa3lLD>W8`dnTZKd`?+MsA!gLf>mKc6#wz@h5I)T(P=? zOaL3~XghtFFSO0O9<3NVBZn`)w?n{f(Y+&OcXw|7eSqi;_v~wbB)0Jm1iWx~N9pW7 zzmT-u;1}+%81C|wL3;~!-6A{=_{jg{^N8%bFyr-OB37oo`01yptnuuhE+S5##@kUU6kibYVMoXTX0J|S>;V=T`KLb zODt$UD`uS|mb>uCsGY@y3Vyc4A# zXG`2~H7%ZH^FwbroKU(z)7XEfqjjV#uw0=w4t7OXJK5x&Z+4VUeWHBMf1=Si;pd)j z`IKO9>de(_?aVVi%RXSoxT4KV%uo(jl{95rHBR4C zRy|)7aPj2Kg4Kv*bb>anSNT5W4JZgu3?*(fGF z|Kd6L40ZP4D-i9>S=iqedhP@)%r@`vrEwbTgZnwr z6Xt!xC;a`?BOLM>aw__b_(mZoCJNMbYMR^Q+v^4dybl6O3Qc2MW(!&##6>UY3e%sS9Na@A?zwI^#rk9Dlynx=$~mCIeZl= zkKl8-+6jp~qPyY)tQYw-1TNa=L+wcgw*Lc=yXG|!*y*8uEw2E~+(>wyJI#Pdwq!V^ zR~|-DOW6GnJJZ-L5IN^UzezHDCVT$@9l1lbmsu9vEB79NV}gtR4SJ#dMUV2Z2^z<6 zxGCZSG_E65@ZkmrWkv?U;=S2`OupriWWkk3{>iW|Gr38(cmAWUPdVL3AUW(hNI6ua zdoANk?vDEx`*zZy>Q%8#7^XUq+jwzDZFi?v%W2D8SU$U zJ54S)R`#{hA6975#&ca*SdA>+V#~!?HFKXUp5s3h%^OX-B}(-zf$#0K3~-SvcJ$V| zC4pc3Ubf{0toMMQ{V}=JVZHb4c#{qi8ru=3RLTlaAl2AmDljPekfXf{fZOw?A7p)6 zy|gUBmFv5qzV*N$#S4TU6r>Nzs+GU+|EXP?Tm#!G7JU-29((@Nc?a>CU8VdHhMbcB zxAXo#Bi?ZTf9KuERnN-yC+qD0f_anvZ)c_TOw0@v?VR-NjfDSu|9`ESNviKID37S$ zo()V~8Fpfc#wYjD9)oYtibR3_z32xN(jP!5~%Vh z4CV{-3Pa5qiH*z6B~>LoFM>0-Az!m2W;3UJvtN{5E~KaPu}_(#WY61e*Vmuh9wv7k zHP&8Gxv)J7hy%X-6RI8Ova+0P!aPXq-76-1%GpTXGqu4bA!0>g2lYmKuF&{0q@Q!sFx z_&$y?Niav0sQbtR_A9@fj(CQ4vGKU8b}><_sI7+}!G3V*iM?YC?wMVbfCVURxw>HG zm0g$sbRO5R1g`UK+dlD`fD|6rp#TS{X{uI&U2PMiO8pdHs`*yYzw4ROsX$FUD#M%F z68Oyb9Z>-kd!uH=X0VX1#9L838lP^Y%O~nchj%K)L)RTRVtmX~CD zq;1JwZy{CkOm!;~D5CmnNN!ZdxMjWlVwD*mTSlY^r5H{o;l}fms+DL$UlDPMtd%em z;K$QKmov$Cf=4P5vW>MVvWq^2h6htL1tOd&j2f#F189~-?l5q}%#H>b!8Uu!ejZos z%oUnSH#D#{04LBCX6|6TJ5z9)^n#(~R1U+UsKsSVUMm!qPQMAPZNnS1YExJ2Yx34| zfR<)PT|4oTGznD_{XLMt$_?Hyb7u@Zis$Id1`w5UmbY_8P<}S9Qnf z{mNy1S0Uo(=haqP5#1u3te(G0s%xudT?*Fzll0_AH(-HbY=df9TSd?6Hyru zqAl(YGQ^tGwm!+!q&%@QV_E!~Bdta)I~nZG2rMjLuP7Gpuyl8bEb|8f-~4REC6;%j z1=je5NQSuh^)w7L^QyK7n6X=iu1x}rx4*_k{D%yn1j~DPvE*X3ZgYuk;_le;0n>Vx zrVPv5PiNk=R6{|eT9@=;OJ#j=cZj3X*Cf(AtAJ>kVD~x*OSE`7{g{ z96n8uS*yBGT<&N&CawXZiLB)TH~n)2T9`>XeGN&rfC)&Y*@-NMtSQ;7&`jiLrmq;2 zCUr>LU{NCF2PI5?jbxxQLxpBmV!rh#W-(Qr%#)YQ7rk3kh)%chot|NhX^8Szz4Fn9 zt>XY+=E@LdrC>^+j$VZ>%fO4$oSuxGKy|Q6=}=o}5Ul1&@|r_PV{+9BV(ZdppKD{2 zlnxR_9$7oAA9>-_XXu7o!-M5AaW5|}k@9m7Vly**YEQH;LqerNd4>Pdl57&=QQGyw z&Y-!Qt}hf=QFM4n6Den8eHvncQ(#4cCnvlT6ewKV&;3n_l$W(UXE`@l?5;}IhPcYm zBw<+F@qXo$)6D4ki$gGFPJGx z?Xxqeh6eig6bC|Zt$-SeO*|+tQGY<35V>8k4UTrk&r6PjR_oy6UQrpOWLVfCHX#97 zqMV8uQdOzm0XhnhTQP9yfczt-@wCp-3|=f_e%-v}U8{NE5F)=)J(v|UJ(xsr5U|rY z$sW?Hik;#(EY4-SVA9G8Q>@(&9I5T64ep34pr0E|o;=COBd@*$JoniOgg5-V`DSbY z&MDAuUFS3sk=9eNjVq4&Wc*o|5k+fu1DO3wHB^j!W|8jwyzR-@KRz69`o#BeA1nY=tMw-RCyz3V*jta>b9m;KZ^EsI<-xOZH;A zd_;YLRlfoC68!v4b*uWO{Xw=K2jcN>euFW)TUt@(#c;GI{Ph zAd96avHeDy27)vMZR4rvvB^pD$VtG9lueFxc@(bVE!ExKk*|}PIwFEfcs-vfzM5qK zMm)@LCFwfz)625~8S4CU8h(7`Jp7E>jAA?AgNLK6X=5&T7Cf=QCs~= zqGa?NE=7_XSHpV*;pP>WXmqpKOfV(#TDH6@eNvurH69L^S*lqUsZZGviPCvAujGPj z>mGuemH16s%UBND$he+=kI^J*fl(E{40Z%+KJFT8U+QF6%7{lb0uPzA&Yk5a-e9t#|zJj>h^tgmyLN!98IQ&NX#x%p4 zOSM5Uv$+b^PSbO-iuA8%u8qgJA?$Av51k%GANO~HK$Uu7Fh_V0y9hdmgnyk!zna!4 ztSJ?UU%xi~zlc|q|1rPye+g-%l>Y}6T9%52C5jQs*Vpvq^p*35{^{Smje6hS<_+kG z`sU5XW&#??2xuh>`L*T=##;1MXl=YmQtYr{vM7FLL`CVd0FlnW@|}a^l%-*GSl6WK z7zM6w^x6e4pG+Ma{=BcXKY55>4&C=t940azN7v}uUHl^iE(k-zvB*SVtNvNyJxs#S@suE%pI`-K?YH2j1K1@#;*gPbB2uu4$LVG zHriX-+^_wlW*tpj&4@D9IfprP_4e;sk$NTAY#N#-^Z(o6z?vB>+-H0NJ(4{RKTzao zpB`oC2-P?CDr%;`0{cm5SKFj3g?Q(IWn*tAV;%Ni`A5pMRmYOdPyl(a(y?&?9MU(S z?PS6@r{O;TZmp-WO0iasl}>z`;VwBz!m{FLoTlp{6OTAn>E-raGU!K{c>Q6oD{ zJ~vmH>8D+BJ>aI7f=IvruMaNo`6*LdtT5s?SweKoIrV=!V1|S9!Vi)Ixc<7)I5ryhF)o( z7{9^h3e(klcuEHBl$>lYuDz1Wsl=#JD-3wS`J>7Js^z}TwOWd{EyG)eiXh2Jv~Cw( z51~FWuSsd3(oz;-2%y#vy%95d7gA|nIb0B4kPU~hYB%@&m~jm;w&b!}R@W%3ka)Ey z#Ys}Wh(ST+{4o^tE}1$guPFqlSwGACJ(mV{B$qmeY6R+^6CsIKR$zIe@1pKw-hl9i zRmhwjuUF3DQSL_cuA|Vp%;!IT)C8ZMXXXvc+Qeg!5AGDfp`y(!sx~1(obU*Cn z_OEitTyv}eeoGVK9B~M&km7zdkPBg0Yt`SQP3B4zM6^{ZyU-I&&^77VMjyn+LD)0$ z+%7o>@biO9*sp_uN*fuLlvsg?OuR}mT&KDj704c0$S&cAv$ZFDL8j>a^z;%JFy04( zMrna{@wPINVyQOQo6ij)?Uqk)kXtI7sAC6bf}6UxnBA+%J@cBgSD15P#iSlyD+)=~ z<(v~}qP8rrFPld>=~um7S!vN|vBAevabn55IHKFgT3t&mQNU81kShC5Tv~s@;_0lR z?j)r0wOQs*6hdp;I7u5wnO&oYrXAv^X!ryMM6H2f1Axjrg3i%%+cEyP)92K=A)ce< z8Z+m|_zoU^QA1O|@*Ti~S(DzkEt?$k1^#}&+HF(k332w|$jpkHx!Q7><={pK zcwALAY|8N6HRIw6Z%PYq4n6Uv4QFZf^I+8Z_PnFt)PQ_N_4YWRehoW4li8%)8(Hpd zfv8Y$Ndnt}D3i%EX>wUWp?4AW%mrXi3{`0ur_?dDx)dlk+obR)PcIL0BT!NXfeD_N zBFIn!KIlqd!X(_QdP$sB?6}Y?6Xq#g$|p6))hqSxRh1h;If7=k!zMx#1d33e23 z%lAhZTymqNb&8T%Ny@Ak8}Iyg4a6W%gRioB>)pyXk>8>TP-$s+)YNg+ZEd&}W?#|p z|5@xyTd499?=RUwe#qH@A{C9nIis{hkrqU397#@$%zmj(FMwWd9<&KsE%0m%$)fAS zRj{WhXM)$H#uVj4{;vC#)gR)d&+s-hgn{-mSV`V<9_M=Xgui?a{}mmo!cDt=gLG(- z4}2Yd`0^Lzmgx{bKj0f1EHD6D+!lSD&xh*!_kTWRh!3zH0Y475j-Sf>|D2HUpR!#* zT2%PIh4p18xTccm3Oo1Zm3A^{vhnXlWhG7s2uf*0YPDM3pC~U?HCt3Qpa>EkhOpD2 zKUmUpsr6$CaFES|4|ogb3ETIDD`Y|3cYG#ST^$qkSohsC&!65q-}n7l96u{?lW9yQ z(}`RSGdP!t4n$hR?bmDS`*bw4VM_JdG+TEoePuJwFpUDp`#hIZ5)wq zYc!M{OZ{Jc){1MFXb?Mo3@+stw9tqb)=66h^)g9LWf$5>TP5|@NtX(1)>4h&Wmn2; zywD(5s%yZh20i0=g9>{YYV$#*apmXcuTXO;b;vVW`WSC##yvfnY; zA1doX|4)_u$3k9Y2hr(OInw47*x*d1KdSU+mHxs{>+S7DJXg6;xuSBD%FWO=Tvd6P z$}MIdZst}CIf+MD$ca1>(?*%O%|cF-?arfB9)o_Y%HxoBJSH1}&_I6W3h932N}7GHr*9a5`DR-LA@Pky`a+>ckHBJl=v8qry4<}TFj8!@FFomCLz zt1+$#on~}eOcP(pmk-Ltji^>kb^_L*P37$>_o%!BBiCT$TGOPmk?-atY~OF1G;H!E z$EN=BD5BS)v);^4w2)qQwV9t}p_HF&=BHRFNB>j{Rrtve$xp+Sr$aQfA?4^O(3SZa zDnAorHejvKLg;LC&cQ4j(K#2L#ps-e5$B_G0Xi3=a}lz8F+!K1b16EPp|c5{%Pss0 zex+&BC6kPf$Twq#EeKtO&Q=U=Q#o*Q4J!Dxh`dha+f}|p<=3lxr^=?O{05cZsIo;E zd6UX-R=HQ@w_xa2mH$KKx2gPgmEVE1?o|0*D!&`SdoV#K)~ie9_agQCRNjsLE|u>_ ze~-%dVgv0{`Tdyq0W;rk<_}tEHFQlnn?HoehY@-N9Vzk0RQ|ZipHTUeDt}7lPpkYH zl|QTU=Tv?`<P`x&Ic<05OwKE419!@`WT&0(D@X*;%C@pKUet|D*uI_FZ5aQKf3NZ%%=|~J@lPuMS>?Z|{NF17Rpq~-O8<_|A1d!r`JXEPkIE0K zyjK-O6%=bd3`hxv4o63rCbJ#O?9y0%CKPl`rb#x#etss*riuT~o_z3!Kc6#A8mpmA zlgz(PK50YYy}*r7Eh0=<&mida>|sUlt#15`0k6@yeU zSQQDXNK{3VDu$?Hs49l3!mf(psu-b)WL1n*#VA#bRz->`QdN5ryl$b>+C9J%h5;~cZhzfU0yT{Su zS>$NyaB{2>CA_k_96<_T#hgk68RVH?ix8*8Gq$D!U`qfdW!OjifTleC<&8E41^hqtDZxNd2#vt>ZEoD4$=utg>nmP~u-e zU1m!Hqml)JgL$*IvXuXZ|H=OZtALv?suO*xZY zo*E|-mC+UMCV<-3&T_190u&#Ru)M9!v3{1T9n*%@0DXt2qYY}37Aj3if1#IQ8Nf)J zyLJ5$1uSg ztigT+<<>h}+NEG4eWID|gche{!eJQck8I76*?=u%%uj5Zqq*5ZNt_Q+jPQ>8CaRtZ)>!LBCM#1+f@aVA$wgTzK^#9i*3sSw8I zaCdmLD256@$QM7kqoKjs=B%IZbksv_p}#}QPze@;y2=DrNYL8hnbV3&3`J={EJy2x zqYhJBgP}4%ovH4&W(SmL(Si%sw*rl@I(NOhrNUj0b%DMqm2{TuM^FPtI~b7_0IUFq zy5L;}ZymhYm+M+Rxtf;;UX!x{h%0Tb#+4onY82XY(SdjhdJ(z8+3s29GYw!Rp#v{# z_q5l!QR}(14cyhT29iUUqmA$a5ei|}(F#PN*F(WkkPd9ZRKIqbT6L{ZbW*Gi^P#P) zLk8+3gRT}#(WG@lldO2I0pwxRYxJDHMhWO@TH~OdSRoCL2NZ{r^uN-#suqv4QR+6( zmpP}jK+TwYHL$k?1kl>%T;psci=ZH_@uOyec&a;^S2)`i zI98zkie?9D@PO11)*drPfDr0d&vvxR^stnK z`r+`T1l;}OamjMdfZweSafAoTF9YhrKhavg(ILo&%)(ooCs2}dL|mjnldFwRPgP636OB&l_GG0ZWS|;i^=qN(9|rKRm>8z5kfnX)rihobkH7UdC+M>ryZSU zD|66at)g1Y!QezI+rn_v%5q!9Trm%!I;)tE^sIsQ5i1Vd(kw~__V4nprh4pgaZndm z3m6vYo*tt{R#77sSlLBlAtkB)tWu=1dFO!TZkUYP-tOxIp=m)4asZ98aJ4}1x1&_; zEg)*UtKHt>_ShR;Yn(0iR704qG%L$zW36J5SWL-q$lKcHhCbMAU+eO$3?@`$hu&?k zcRMv&&UMh>l+4si#CF4$@L6R$&YB=?=%PLLrRC+BFni9N4=+re=TsjD+=S70p(g+e zlv*MwnJjAxlLc4(p$qvdx;FZ|RU9K~DH-OMv1X;T;VeM3nSDg7PZdETI9If{!wk`1 z+S)Xjr04>WF}f-SWoqtd_t;lJY<+!TUMq`670u{V9*ktJ=8k5&H1a{kt$^Z6^x{IK zGm2?!b2?Bqo|TRk` zoV`s-*wN;c?P&1nquK~er3WQ98a+f}vsi8w4tAlH&1V;az3W$ytVJ8NkJH}J=5B@x zc-;0}E3+Wy;mElmaecySuj}w+xf>enazc3+PFDh_;|`bikZfblv4<};G}~se!Yb-S zy_H#6gq5|h)mGsY4OY=8z#5q*&ze)S5Nye@J{>ZaGhi)o8RKW6InB>qb1SSzpr87? z>0vrVnp||whB*MM0<&#`z5ziit3=&fe#nBgWImm$cQ(MdUayr1YUR=LXX;>*|WYkn|aP-kp?90}#5)9zc^K*NIxlx7bK%C+0uou2;6 z;Q&euI<*?OYQmgbUU)10j?G3 zt#CpQ(dI1?K33Z8IS>vH=)A^sH(<-f$_BFpt2ha!k>X@=idCE{PP2;B#Tiy{rr2N= zXMw{iVBBPOoa&B1X9`*uTksYED&yI}RF-?CRh)zKMxIr-(#lrBNWhXY-$ESMmVhHR zf*P`NqVW5G? zi>=}kaj8{YCN?2-xm8>tt~86yRj9{qhfSo0TnN$6MKQbZRi74f$@zPXV{%w}m^b ztO}{!39A}5ki}csAaKN8Vgn`PBaU2DedL7e*}&!9n1IEdRosm$=u+HQrmaxFR&aPL zy0ig;EiL%vrKQEnCgS>y=E93#8S|~I4D&FDgOXw>&DGrMZf}>nZXUO9Yb0n_B*cOj zzZ$yrZKp$%s5``R8CRb70EhK0zy@nuZxx-e5@ks!)yvo>VDvbv=)w;Zbg5O`iyPeZ zWNeB1C{dQ-gV=7MfFLViV?%7HXdjA$HdHhuYSS(&8_y2h|s%gtMi-7(Y$ohrKdN zQc701>(|>?G}gJB+-;>JlBddNgx%v@=Ye2le&yK8u_GX*QqtPy1YhK4k68u`Cy$Yj zm0iLv1?-sYf@RXfNG^-yrKMOOcr7I>%8H7LN^(||;TJauXdvRVv$Oqp3F1n4uT|_5 z_hT_r$Kan;JRtU4#e=vD{}4J4i$|>DQSlh=$2>06$%m4plJ-^ysETKOle2V$tX7fE zD^_EOcDNy>-JY5|p`aiQD%$R`w`OU()K=EYPOyq6u-7leeOYF=o5hn>@sxNPYBwso zu%XW$#+((aoOPgt&xmL70F-~fvqnFvd7&f{%8}I$PqjIkRzK z2G>?b9&A}i~_h4~u%_UFO1_zO58e-RvZ0YWc{r_JJJ zt9V7cY89{H7tq(yc}BbeOS3+Uj}?x()s6BTW|p=NfkKK6h~a7^HpF5DmpX4!ve=F% z5b-bztl8x^=<>-mb7n1EFlAP)y?Ty)arylD<<$#np|%U~yDuO5o!E&SC)eh^!6&-jK$zul&oo)6ySZ(76WmtIE%Tp)q zK)c2n@VUKW&fMCn>S@`Ybsl-RqTUG^n%c7|v1{C>8fR2;vvc#({3j0WP~l0?1X(V7 ziFTN=438o8z79t7%2{!HY8W8gB3)H#gu`>KB z-uG-?s&+B}cY(4p@tZxk^QG%<(@x)ndSn4vI#J&aN+!s!y%m;kt~Ji4^=XvMN4A>n z+F_-V7Oa|Fq1*k1V!gD(ApQ3+inT*I`q4P!$kW154C#k>I`CYG$8CqDtE(B0lLef+ z%4%}0cIwBqkO!xuZlyioD0oP^sV#UsU`(bxzaY~-7JOKhXW9z^D9nSmKu1gf#Ds!k zJnZD?Xz~>4%ogUQp}tBXXh)YyT8i3G49EZ;kN|If$a4~X>n;ye>F2syw8LWUl+=de zSe_xvm1oGtWWw4$O{#m)iK%wnK=L1%(obXAQ`>yB7afxJm_XWpSK!)dxPFiSa7L<6 zgqpx|$BZdLDHipWqNYmiC?ZF@-8jQ;9F2wUt67FNXuVvmGLU&`Rb?g~XgkiQhmNME z;674pchzI>@}CUrS4X3M+9Bh9kH6aOAr!4$@2!pY612k0njP!3%IHs<6Z*A&R7@T= zYi~uf1N{tGkmolY>*V=O-;qhEgnKP+pUeuyjq%_?vj8z`?Ht&m0opqz+o!m|CRpEP zSWr=$d#wbJ-vBn-kBrZDKcmO|6{G(C&=PRYtFL|0>(HkP+L_oxKK=?8X;oZ+Ra|K= z@vLkMZipjp2V=Aa8qW5W?xy-fn@xEBO7I1S;0>-^Ekyy`9f8>O$o zN85UTKghH@jT-hr4*a-K&rm1&}=&*bg}AX z;emZzZ5UUqKQD`D8y?$gB@pXsXR1l(G&1zpF2t|Qvrc)76e#XIejj=Xp$pn;kw zNevZ$h#aX^=mlO6Ti@<+Hv4WP16Om~TzJJ6hPdY90A>TwuuN>lP!q5nAL@D>mINvV zyqlo$98K*(=?spRP-2Ca1ZA#qI@)krdZcx!!W)lx_p*Jl3-6bPWCO@^M?GFLbhNfQ z@wQ>s5nblfFML7m<5ELj^)t!-(k^dK;!VC*M_aqI3cnSH2P`HUORZ;uE$%i3SK@05;i2!_uBmci1Hmt zs0p~AZ%dsVoHh*7YHI;XqEo9XgX5zhe!hEcrSUykN({si1NaJeQ%7@4KvcNLy%5@| ztpbKm;83a#6kJE2aBi7WUUsqN^N$Y3HTfPF#6(yNR*zWq>UZt+djXa2U z`FjFp;8we79S%^9?{=^W`k!30;#E%lR)12cHt{PIhxr4jmB-!xkUu2}QrkjZ0DzqF z9(TyY`iXj`5Rs6}=5Ys~)E}T{=!-|*8j&|N5liG1Wc=hS@BfR`N|djvpXhJb+O@o? zbtTlt!F;Z>^g&p@XPhEynK~0EH!3YqO+`7GywQTE^9KG%e5uuu3cU zXws^{h77NBw81zE!!`EdBfOpr<&8j9*MAqGtq}>-tcQW$)3$yAtiXNDRaBsK!|I)N zZTe4PNE{FdbG7(zraGN1DUS+}hkvvKt--a1cK_Ar*)m4{AcnkQu9*>^a#=MwX%<8p zj#;px)eVQ<0}s_BVmvE_j?qjY=(*6wB<@&&rmUgKjk~i9 z4^e4{vhY+=pKHn5kWnRncnBLZGX!^f!Ts_=@q^m2wdC%>R1`dPl1f*j3#T43N?W%F zN5!M93sIQ?*faY_km2-gdJZ9E4H-n=q3;q(-;=+C^ge_?2nc@&;g15sA4B+)fbgdf z{wyf}=Rx_u2+IGjp!{D3gujCD*8$;gApC7W_&W%H9}xZl!aoL;|5H%;KL?fnOHld$ zroS3t@(YB2qrdy&|A250{nHoz4}^Oe{*crlJWnZo=14C5k5dIy)CT7Op z55u2lkbW5OgT4jQp&zdMRu&=sNIe{-`!*;K{b=2f(fwGRP8^Gu{s7${sQZI-e=tjs zexmLtu_1)Ap_2b$5Vi+|heLQoP&hd#JTfReij9_ZQ*=L7uSZ%yx^xI<1cfsL!dVc` z*6W+2`(t!JSNHSudgrsT(l5~cLN-qNwdNF&(Zz4x<60%=j(or?k~{guu%6G z>HcEfKSuX!b$^NOFV+2Hb^kbay!4mp{&L-SK)*nLh3?nse!cEHS%dT&1LV09!mgn3 zDz;jtYtsE@-EYx-x2}h+>;&nz1?f|JP}rl_zeD%e=z6wR@)Ztg*Rl2Bo(Lhx34eqr z>AMKM2LQ@W!k=W5$P^OBPG+Y7Sx7p^3|YenDVy0z*lxnX-Twd)T}0_5N{neYG4CU4 zE#F1Lx`@ROhNth}Mh12gYfMBpi9|mNe7>95SQm-zB++7dEdc5JchceM5X_5-=_aw; z$Z9PRCj-Z8fp{61rv(PcK&2KKCOD$`qblVQlchO>8*;XZ;RAh4CBb`pyWca!ABog{o88Ce@Z zZx$PLk72#(;bRO3m%YAmFjknEXB^V=;pcSgdAQS@@Qx_@r61$GwO-f`~lTw*4 zcd^pCO`|w5fa1WA6xA*=DTv}EgJQWa_oQnydXod_#cTAUx=2MFsq7+C11WDKSLs2i zXh|{Cy2!^k1Ms2xHV|-o!<8d0}kzHhd9I5Fd3;*Yg&kx1;LWA)|{TW|q zFuvHw5YSlYXIu;U89zo(Cw3DkV+p9(QmjK_sKy)*y7UjQVmU;6a5gd+Ui^L1!M3v9 zvJ6AaYtUGZU6Y;!G`5lf;40fS4jevCWjdQl))Vxk%1hK)T$9ah zXh@5|!CwzW!+bux$?#qYZz{Yy;LU;;6XwC&4KEJL55rpm?{n~$!TSchsSwdj%}z>&%15O26&S#)d08IvJCJNTb==) zWgBOJ>un_l*y0yjovqvekFiZPz}dE$1~}O^*8s=a78+otZHWO6w=FZk0k(Pr3^$4} znqh@h!Wbi1CtG%$-m;nujkZmM7YDjTTb!mHUSct8kdS(!k*JqedMA(&hiTIO>MpHP zl5A?Hm&69DzJ}5?xxq&avsl0$G3nyS=9m_r=?%oXefBjF+p1wr63b!+hw4M&F(-79wjhhxW-#9F-le49+oZ3y!=_VTi>FxaroAF%P=6e7X zAblCNVIMiKHs-8d!knd zbiKW|1QC&yH0c$)MCxa>%mtCrTJ1F5guo04XjC462HOyGrtD3z{h5anF7p!WYz@kM z5K?rLP13!5<&osf;B%5I>;M6-P{Uo4&;v{w};09sqZh_X`N3Mb_ zTOkW*zU&)Lt=ygT)=oqMm(FgoZ8y2vfUf~|xAne)6wcE0#9-@M$rh05CfCJmmjPdv zz;Dacq|20LyZhWa4gb@2CE*AE0NXI7rWM9LSUT?}3!k00Z19VXK6@00uJ| z9aQ37-Q?b#B*iH4zHMmB&XAp{8@kb(av~4eyuAms-qA&N;bKP%$Q93SExbzxz~=1g zB6~Z@UQrJv?7f%^Exw_J@j>>?xmvt8u5PO@|>Sp)M>;O}uc=st&ICW@^p7v=;9 zqPA>Bp+NM3$SvE5Ins-k58B~zuy_Y{ljm1A;;7pDw6}LGn7iFTS!1A4GVlV5x*yKl ztL`H&0^)J_-$h>PA}=4N`1nqYu9E);y2&fv-nYF6pPVJJFjT)PbH5A<`*JsV4FniV0=Y@6t23a7btx!v0y-6&;-Ml3BA{_h1h@m8y27V!B4d^KK2zO4Fc%xx1E^m zGvH_MF7mmi+dfXtkern3`Cz(vpHcjM+k2sPKh<-JdSBikV&2}afZW?q>pp@1Zt?{p z{bW5h_mP|{&vlnI?pGsQ>BKfSFd?{)9gY{kHE3PF$;=V**Y3={DRv#@B z4ln)$%RvDIWd9~ocJ{9k4s4NcZonzYBeK!`Edp)+h!*|dxLfF23^f$pKL^Jx%S}xAPEK6tB*8qz|qO4T<@S#c@Lhf z*y&=TxaMg?klCbFYxKvRx+ZK{ICt>-%n3} z!13z`U(#*8kpGA4dheAL+q;|mh{>=F*)vg#AO_qUNkX8~VZnVBPXs95PE!9DY6msk z+56nz5%tdAYXfBj0-GPX<+?))Imi&qzv|i`A^WEizV&2D(0Vcj-u%CuM}*{HT!4r_ z4K=SpQ*?%0^X;`|b(5c<+rN+Nrl?f`WA4v5zW%~(^>Q+w9`kQGk%C#uul{M($9l*& zty&8`=Qm%S(u3-Rdjlim$nTKGi$=FcCf+RP`{;`Ll}Z!L72fd;9h;=${U<1V5S1PK zX{gH4^E-_UPDrh(-od_zAh9`j_TCSfj3$R+xwm!~C3@BXbl}`cpQdf?-6LVC{JnSh z>V8vD-DkkNKCloRMJcWIO>>P2$+lgT$w8GI-!C~2nOtbebNVG$LMAt9!}!5IY2~=r zC%XiCyQ$eX@E`0Kt?Bn~&=)LR+MhenFrwk}9^8pTT*RQhO-I#`^Xw}+srpwfbigNC z(ikAyBzOyPPYOu*TM7ibi%LRXGPZ9FhZu_%lh8LtftYY@6olDO6Yh`^>r%ZOqa8UR z${ScUYw7=@v&2%XF{OQbH4Xz8Lrg4<@b#lmVq!N1Xx*p%CG>KDB7*5GzzWC@H%t>3 zorVssQ#xtntc?AoylhC)PC6`O$k;+5;>B(nRog{vYzIsq@pb#s9At)_+cj?4ko% z7ai2+6PqzGeJuy3Xib0RbSc`$01T+TFG23XAq~XVK+TtX6i&=c6F{1Ywu#$&U+e@w zYkTig{kz3|eY?eGxt!4RCSl%JI>Cp$&w>wRw)Z~LNe3T7wuz7iTBi;e^}>o0(+vd9 z2Lc2&2f9#qSlD&aL9*UhT^#d6Rw~{Yg`<6=us*OI2syA#_{2ce$MKASwfuluyXNDx|FT__bfL=bt{z!)P2>0?A<;21%^ z3pGZNS3-;t4+faM?YN(KI1_grIX`#=3D(UcH+To+|Nq8}O+qu)8{;LP2ikn{maKdT z6Terl>k-)T^L)d=X@9$d+GwMV9{-JMPLb72_5SDCzFRbOXi#`qC@YTA9@=**>@(`G z_vFqImEV!0o1XMeEuyZF~oMb0c==I68c_`(nA3@dMbL zWJn0h)4n!m6CeZSB?r&`D>_`S{m@C1Z|r@-ztHY|N3s}T0AVEBW~9JDCzQnF`>p&w z1;Vf}r^(*l^A9`xl=U?|AN6mN7y853wUE|f{yY*qm6c4 z8`O??&R~hYM?pv7w}BDz+rTUXH`+@k>H8Le@tFp0wEVs+_c8+Gjr|QQ892PR?`Mpp zfbEU1Ix*kgZkhs`{GKMoz6zh@*IGTbQ~RLup1-_hhc5^i1m~F>X zAx7K8(o_^b#`r%{$#i``QhD)A*C8^+(llQ>(4}3SWS7(>04%rHaQp&E!h zgNTo8zMcfBLg8a2vOq(Icha$FCpBcDAF23|sRSzICt z#18z>XN{%)Q)dNRX{y})mg1sGGF!2htu`=AiIIhlwV8L21VtmMY{#wBGc>V!aY#ax z)rS%?*%^?lo0iHXdt?!1S`pPUULzSLGignAMaZIqX!oV-kaQ<%bQc8ErITc$lD|vW zm)Ey9P0KrJ##YdGnxX6xLOYdBDf&jl1b&ZS&_%pkm%GiNTa@ z&k+&39EjnkahwkMNowaNVrfM;t=vkEmGG2qI#tdBH2hTgfh!usgNH9WN!3m=Tr%d= z&Q;(yFL~AnxqCrwO&PI1l&3$+0nFP>QWrBo-t=EbQVaU%U)O@3ci39c1$r&$%)`_| zW}61t@bey2{h#_NCHXb7n@+c#j8o;_uYI2Y`%atU^dl!SVeh}ZcqAAlSZ$QF9juxx zT{oRtK(~ar@~V=fUq8+?bBZ4{KR=@U+$Rg|#7hv7o&OCY$NA)F#36QCNGurHacuWgt{MuwnQxu5W z5TG0zf>gXmJH7d{q3K`ABz+ZO!{1Yc4M(I1djs2GZ@)I!aA-yN<%sfgU%f-N;oV0> zq<_A3p_-ALclc)HQbvbp%KJKXyNBh=7JUEkyv>xn=}SQ46b@P#ZzrDm(54q7yXdTa zbaw4NS`DkCS-a?*U36|2od?DT<^z!z>+=D;RP!Z~XD4Y7A^Zj{jIid1GD?xvophct zr#&~s0V6se5_HoVt=>3lWB?*ywjVA3+m>q|ly{T&u#M<^oLn7AI*qy>*QKY@Q)s~T zz2|#-XRt21AY^;xcY;;y)A`T7k%R%#OJaiXK*HPma6pAG(JHJ>`Hb3*hcRw+P$H9l zWX_k&i{nUtI+q8~!Fq)fx%F*-&LFE5^NS{R4(#J6aa}62byI-}_Ec+MaBSt2uACA>6{w^(}Y zVP#I11{Qc6lyIaC5Wq{QX4onVKfW(7%lru}-}Y}OeSgXF>(Z4~#?R$##f^J%;?Sj}W%XebWy8tDC=U2VGus*wh`(nFk zJsLwg4;H<_+L`1l$`;;d;YMEt7=QMZ~BG0)K3%r3HNzyjyYwB3)j5#Zo zHteR20jkq>=;SLyB(FGha#x7tas6r-@({pJ{f;3xH^$Lb?yIiB1L+TkjJg(TDi2O; zy4-~2_1=QzT_VA217n*d>U;^eNchxHMUt92q`l=jJQaz#2g)37U(GOQrNp)N!Ho$_ zenOw9c!_K4gQL1S9~|vhBWQO+kF@>%uQZxNI>}e2l`^OwhxQ$c4wB_pH;^(2s;%2_ zCBD~Y0U%oD+G{(xo3?v#SLOn3Hw{Bi>Y-8@+JpMpO+Cm#2(>gC{VZu+P?Ns`9mYO> zme$^}w8K}!khQ|f#P_cR530mYJP66 z-M0+J|EJvkzbW#=aZ4c&yc?)yMf%%mZ5z}UFgE(lV1K(Dpq(K*=z~6iXxrX{HbMA z?B9y}GV|nOq#>4`Vsxx34cVu&6&RiE6WJ3_4VWc0ZDou4jnbzDS^AKpP45v3w!c7~ zJJIlHyI7u}J<1929;0n8Z#E`+n|+(hUG#J>DbqHS`$r2IJ?wDNo%Hl>dPWyLGjR0Y zAouA&U}uX5=sC4q`)F|HE^>d(Zo1KL%X`ZVIsWoj(xLQ!Bb|NzJ-#(Tdwh6)eTkHh zzBjr}-%<^hPl#xre3X#UrvJ5kN(}k@6r@r=_;kz}$<*|ls3DPrRlvY08VNvWbJ1C<0)YWmR}ZyrQS&7;iI%`?IOS*5BVrRs%Y z_l4a@E(QOkuzwN~_Rp}l(T_YX^7_adNoM2+QIS#6Br~ckswJw0WDt9n0z8^Q zOyopjBPWqT*!i?Bh>VIdJ@@5H;^0XMshRVM7;ERatpnc+)D2t zx6%FNcKRf_gMLZwq~DTz=F_%q~J{u=p>zfXSW z-;zK0Z=^?X@~5zngCc?SiX6&C9wj25Qc+BqmT49jzRoDaxBPRr#2vDc{m`<#(E45;W5kMYBu^G}|)O!H{I z=~z0}R8Pm5&ZOf_J7|&VHd<`zq@|{AI??nvon(5BmYZIslTB~ZO4H|bis?r>&8*Pr z=6E{GJcw4A2h$nm;dG{X6sdnpw8lJ>&Na`X^UMqAeDg7Mfw`V8GI!9$=2Pe~ z<}>KA<_qa@=1b}E=9}p<^Zj(W`DMDo{4T9C|4f}Kr7l&_234VrY6M-WM$^^mK-#1Z zqs?kEJwY8q+tl&2U7bWd>Jr+auApnwRdlV|LQhlAq^GMF(lgb|=mvE&Jxjflo~`bo z=crH6jq1zvT=hMAp879(zWNKjAWYDU!s6&9VMFPqVd?a;utK^itejpRHk)1%b}Zc- z=B8JNokXt*+eohs+eEJmyN_NU)=hVYJxp&1dx_o{_A$LF>?eA&C7OCI@$`1fFnWh2 znciucMDMmtr}tRq(@sl0?XsLn@3ovy@3ZWt-IfRGKFgEze#=Yr0n5j9e>kNNhKJFI z!pG5v!^`Ls;Zy08;dALz;VbCV;iuDQ!q278hIiBF!k?rEten1Rwb0kB1L*741p0<` zCVj_x3jNT!k$z;ofPQSfg??hamwsw}h<;{$ntl;sre8-z;9z})O9Q(>JAne^#F^CdYIXwUSZKu->?{4 z1dFv5usGXXHo$f~8)#d@hT6_y!)*K6aN84XgzXuYYqd7~7j$^6O zqgh&XDNB#8XPMDwv8?DVEIayYmJ@v!8x#E!%Z>hq<;B=oeoQ4B8&k~+VwzZC%z12F z%sp&;%yX*60{>*Eiwlj2`tr^J83PL2PKoi;#arw@o@ zXABt4&Kxj~Z5S|}oi$(MOe@J{yl;3wFVgFj+VCCp$?Cp54}5}MdU32p4Tgbwz6!ny2)gstqwgnQXb3H#a0 z2`{i$5`JQ@B}TE=6W6jg5-(>jCT?ZVCvIo|Ox(#nPP~(SlK2q&H1PxWS>hM$^Ta>c z7fH$NUr8zK%cMf~RnkiKP12?8m!w3rzW0-ijylBW*6kf#mZ#M6g<%X5bQ%Et`j zJaQ3*|1OeL_6V=>}9;% zzJwRsm-7ks23~1*@u~Kc`84|$KHa{LSJ@xuGwmPpS;I&1*~82EwBghE)Zw%F+~ITi z!r{yLqT#K4@o*16X81Y0cKDTi$?)BL>F~$-vBO{D#|{63A3yvcUp69wFCVd*H;lNP zyGCr~t47?KC}c?Ul^`7VA+@^k#u~OaBk$yAjoiaGj(mz=I`Tz+*~pLh zrjZBv<)fnbm7|jQ=20WKcT^p}bJSXX*QhJ_J)`dB_l|m%?;7<3-#dC3-#2G z{DINu@cpCj<*$z3&tDt;6Mr*h5dSbGiGP%m%|A|=#6L-y!9Pt|$UjT*@Xu3D=U=2; z$p4jcCI32Q2mdDJe*SIBQ~bM>SNZoT@A98hKH|Tm{KWsAD)_Id1Nd*LDg5`;@%)d} zD&CX2l>eF9#1E#n^4`?51xei|XzE>prS26X^&z389uTI~mxMX>O`)cKDlDnLiik8J zBGbY}RN5dBoi&FZM8^9J53~|og;>%T`q>D-5~5~onmC#gJM+L zqhfU08zMFBJ&~67mB>iz5t->`k(F)}W788wL3*kvOdl`Cr8kNR=^dgd{cKU3ex4{v zzgm>0cZss}2gStnH^ih2MNG~ZB&K9!h^mZYF(YH9n3*w0%*t?x*%|9ab;j9ZPR6xj zZpIyAUdB^me#Tp(CgXdtAak%-n7LXk%Dh|DX8t0M%?cOGvWANlS?QuKD_hiOO%l$m zIifM^c(F38LAbKkiB(w}#p$qo-;t4kuyr1nUgCvw3u`wr!$H$y4o)~kjI56fW@%)%w;)OAvix+bh@ltM%csX~rcqO++ zyqddMyq4<{ujg(SZ{*%2-psvIyr0`GKFEDae3<*2_$V(ze4LjgKFKQtazvpigf8^gLdh+**Kl7g!|H*$-9L)bj^yYuBkg+y}j!jnB*a}6AovtWj z7bvE&>lE`?ucD59T?s2t6idMX#afV{L=U*%aobp7b~;I zAE(S7?^3GAyOp`)+m(6a&sOG--=)-y-={1Xe?VC{{xfCK_+OO869y}_6P7E-PUujM zn{b}8e8MJW#f0mXx(QyTenOYxobZ&=FyT3+al(7b$|6;96`A*=q07S=$}eQ(PzqdhtHxjN%`Z4aI*b=aiU~jV0yExh2z-^GfC_=a+0!E-$%WxuWD_EBBSnP#$Kd62hm_S9{1b-b2nD5j%Kf-efa7%{r=wOd2&{ zFpr6`&oQ&pt+ARH{VQn%KWZih62tq3AdAJqQWii+^pFcJL63RJ5Q~Wq&y1Rk$s^*V z7oIdSK7P1HJKG$aGB9UU)bLR;1Dnn4jKt|?cBYz}9^FHl^taR0Llz844-Z$(>6VPl z3_gk4@_NXWoWy$GOCkt0vkioCZo*1r#HXP5Cn+?uvz!3^K{7^X^pF)117q{UljCeX zWDX2&J>>M1lpeAyWwa@LY;wt{QGl6|5R<*xyYbKA{X)7!;cv zK771hex$XH_Tou!SzAg$D60~bgz=$CPo;m5j>9n0O9mjMMNvJ$pCp%t{XtF&kq;zN zVIBV~L5UN;#8YyIgFJ^=!tH!WlEJ9C%-TcN4jY!Fj#Nj+!do^Hq`Md-I~v}2TeQsv z@1VFiOM0}8U3pZ~#}*zy!d?X_vWP4Ss1FQVXvG(VvM5U^B>@*?O@jO&3rivd5K$2X z6j=?6D`GU%XhjyGs1X&2MFo*fT%sb{Do917TAy_CKnM}E_nb-QnD#21(uL54?Oy(J?=FRc*eVfsKK$ z*`p1rs#ubWFg(8b2QzEA+nhDuDBsdnU+}YMVW9;hETl*qW8wZDTUKq&sZ7fs%^uD4 zNxh`BT%N4ss=11N@Wfc|1Ecr*`f)!#4D5BTxkn!?UB|p_=Ff;Qxu2o)p7EmN{1xeD zUKB&e-Y+J$9cox}x?E^IZVg7#`|NIwKE@XRJlHtk@Q#?6^SZA0$&!t%9fajSh6Ea4 zN#8waw4vMd*?d&i(WTGkXBphwUaQo-%Qj|&{|kR>BVnl;`lqEiv3s+QbxEl!oYaka z6JNttmbs&nbO)DQVfejvz^K1nlIeLWx8A9tfNg{hHEbY;aER#(&Ss9N4R1$Gqk>FE z201l^%|^_2zqWU4I-k?xbvm<9UI{YcqsH^jr}&r8kCPkvYguFhidND@TYRm^Hv3uc zxdy9Zt=BsDb%mjB&pkiLVil;jx9|pSn;``Rsek-x9G>UvY%N&6Sq&s7}Zu+QS+aj$m8m-cVUqn+#B&ussd$}MJd zSnilt-``l}I_%k2zwpK?-(gG4nf1dN)@KOAw=wjT{^MZ*sr_UmAici;c_1ien_;&l zxSFlAOVi$T^K@%Owi0!hUr)}`j`dG6ZVuSBg|RN~H}sO?kAY^t zQG^=x+`P&-!5>b&9&@f-AhAlRHamRL8w$fpGJ;arEo&=pIKv}kY2`WK{rS8$e zTupwGOk68iWSN|%B2<-$y8{+kB{!)E6=dRWgF}|dCsc%5GI5>YkX5n_YN%eSJ_w+- z`R=Hp3aR=Ka7LRSff}llst*QqZN49B=oVo8y^YF53sN;!=$CV|ok|*W=CsFIHdt>b z6=n$5%R5jdV0A*V4+5~o=0Z09_LzqNjSHoGNrL3>$oGXsZG7z z(zdbkfT#J&=cIN;+jc_QGyUgL#wJvAj8_de=d=~CY;)nJH;L`b>jahB>Lbe^?h|}8 zHnTIJ7r$B;Udu}i=KbPhddi2=uF+XcQPSAbw$|^!>To+3A0pvt!$vC(fjX?wdDo+Q znXuS`jjO`Reb?eEd-|G<(!I!Dk4Rp1kIWdU^}_Mg^48-cGPJz#5n1VHxIV;}7_aCS`N$gxFd6s-yfj_f%T=?exp*ru&E&vRi5((c(Nvnf#6*uif zH2f3*Y0(Qb1u$oPGWm*gCODY6db*%LUjN*+PP5;b3BS4%4)sYF(F6;}q$|$B)78Zp zPgsF-5iPC=W){YszVsW25FQ)}#c`+TYC*c3BG&FwB>*04z$@-VOHG=F)VPNO6`|1` zhzQAnf^4Qy$u!2el8+=(Yinc{1^Rq|!(;5!$63^6aqPhv1A=W zA=2aMGy)Uq_Rb7?Z^he|s<{A^83Uj>4fF~W{uO~v(!-G%Q zd2!J=3J#$mR3;T48B9ZH@PA6=S@^t+$x}6j>3s#G3u%ejf8ZT~B0gWBVXnh>BdY=+ z)B`|E%oTuq7_b>4H1lqp*l!-n*#L0xsw4;dl@lE3Br;`Y=P^v$9%~%{2pGwLrqIWo z9qA-G?JKz!F7(99%K_jrPqI=!W{9DDAr_e7GLQrVSq1|Mb0B7Gb0;?OSxS~z%)BT0 zq@b}H1psVe4bK-t0zj=`9P2CFk(CSW@`7b64+){C0Vp1xm5dV-6N69@N^ms7U^&wm z(X5%-9W{$>(vtl)imyboLDUQ+%!Xe~ERBaH!Gz`IJIigNlELV=Nj#wl!}=#IG?9Mjl&yX@0r`{nEJ6KuQOOyb zz+u4l{aIARZ%UoMe(f|rQSX<`ig;+mb>CsRi`jc4sef_HswghA?MI{*KV=N;1fH07 z{Pf48xLT|6lp~xS`OJ4VEqTd2iOc3qkCU6r1fLmIUnOzGe}zo1-fwYIVv>7a?y|5d SM8{1ZltSg%Z}MT;f`0&n+1w}q diff --git a/treesapp/training_utils.py b/treesapp/training_utils.py index 03d54db5..9c644cbb 100644 --- a/treesapp/training_utils.py +++ b/treesapp/training_utils.py @@ -365,7 +365,7 @@ def generate_pquery_data_for_trainer(ref_pkg: ReferencePackage, taxon: str, fasta.write_new_fasta(taxonomy_filtered_query_seqs, fasta_name=query_fasta_file) ## - # Run hmmalign, BMGE and EPA-NG to map sequences from the taxonomic rank onto the tree + # Run hmmalign, ClipKit and EPA-NG to map sequences from the taxonomic rank onto the tree ## aln_stdout = wrapper.profile_aligner(executables, ce_refpkg.f__msa, ce_refpkg.f__profile, query_fasta_file, query_sto_file) diff --git a/treesapp/utilities.py b/treesapp/utilities.py index c645ab17..b3236c9c 100644 --- a/treesapp/utilities.py +++ b/treesapp/utilities.py @@ -192,8 +192,6 @@ def executable_dependency_versions(exe_dict: dict) -> str: versions_dict[exe] = stdout.strip() elif exe == "FastTree": stdout, returncode = eci.launch_write_command([exe_dict[exe], "-expert"]) - elif exe == "BMGE.jar": - stdout, returncode = eci.launch_write_command(["java", "-Xmx10m", "-jar", exe_dict[exe], "-?"]) else: LOGGER.warning("Unknown version command for " + exe + ".\n") continue diff --git a/treesapp/wrapper.py b/treesapp/wrapper.py index 94f0519a..76fdb2a4 100644 --- a/treesapp/wrapper.py +++ b/treesapp/wrapper.py @@ -815,7 +815,7 @@ def get_msa_trim_command(executables, mfa_file, molecule, tool="BMGE"): return trim_command, trimmed_msa_file -def filter_multiple_alignments(executables, concatenated_mfa_files, refpkg_dict, n_proc=1, tool="BMGE", silent=False): +def filter_multiple_alignments(executables, concatenated_mfa_files, refpkg_dict, n_proc=1, tool="ClipKit", silent=False): """ Runs BMGE using the provided lists of the concatenated hmmalign files, and the number of sequences in each file. @@ -824,7 +824,7 @@ def filter_multiple_alignments(executables, concatenated_mfa_files, refpkg_dict, :param refpkg_dict: A dictionary of ReferencePackage instances indexed by their respective denominators :param n_proc: The number of parallel processes to be launched for alignment trimming :param tool: The software to use for alignment trimming - :return: A list of files resulting from BMGE multiple sequence alignment masking. + :return: A list of files resulting from multiple sequence alignment masking. """ start_time = time.time() task_list = list() From 5f347729d94996d6e4cf03b49342970615d74319 Mon Sep 17 00:00:00 2001 From: cmorganl Date: Fri, 3 Jun 2022 09:52:55 +0200 Subject: [PATCH 02/18] Implemented basic helper class for clipkit and tests --- tests/test_clipkit_helper.py | 36 ++++++++++++++++++++++++++++++++++++ treesapp/clipkit_helper.py | 35 ++++++++++++++++++++++++++++------- 2 files changed, 64 insertions(+), 7 deletions(-) create mode 100644 tests/test_clipkit_helper.py diff --git a/tests/test_clipkit_helper.py b/tests/test_clipkit_helper.py new file mode 100644 index 00000000..a9a5f0cb --- /dev/null +++ b/tests/test_clipkit_helper.py @@ -0,0 +1,36 @@ +import os +import unittest + +from .testing_utils import get_test_data + + +class MyTestCase(unittest.TestCase): + def setUp(self) -> None: + self.test_fa = get_test_data('PuhA.mfa') + self.output_fa = 'PuhA.clipped.mfa' + + def tearDown(self) -> None: + if os.path.isfile(self.output_fa): + os.remove(self.output_fa) + + def test_run(self): + from treesapp import clipkit_helper + from clipkit import modes as ck_modes + ck = clipkit_helper.ClipKitHelper(fasta_in=self.test_fa, + mfa_out=self.output_fa, + mode="smart-gap") + ck.run() + self.assertTrue(os.path.isfile(self.output_fa)) + + ck.mode = ck_modes.TrimmingMode("kpi-smart-gap") + ck.run() + self.assertTrue(os.path.isfile(self.output_fa)) + + ck.mode = ck_modes.TrimmingMode("kpi") + ck.run() + self.assertTrue(os.path.isfile(self.output_fa)) + return + + +if __name__ == '__main__': + unittest.main() diff --git a/treesapp/clipkit_helper.py b/treesapp/clipkit_helper.py index 9d3e7c80..f0de1671 100644 --- a/treesapp/clipkit_helper.py +++ b/treesapp/clipkit_helper.py @@ -1,6 +1,8 @@ import logging -from clipkit import clipkit -from clipkit import args_processing +import os.path + +from clipkit import clipkit as ck +from clipkit import modes as ck_modes from treesapp import logger @@ -8,14 +10,33 @@ class ClipKitHelper: CLIPKIT_MODES = {"smart-gap"} - def __init__(self, fasta_in: str, mfa_out: str): + def __init__(self, fasta_in: str, mfa_out=None, mode="smart-gap", gap_prop=0.9): + self.input = fasta_in + if mfa_out is None: + prefix, ext = os.path.splitext(fasta_in) + self.mfa_out = prefix + ".trim" + ext + else: + self.mfa_out = mfa_out + self.logger = logging.getLogger(logger.logger_name()) - self.input = "" - self.mfa_out = "" + self.mode = ck_modes.TrimmingMode(mode) + self.gap_prop = gap_prop - self.mode = "smart-gap" + self.ff_in = "fasta" + self.ff_out = "fasta" return def run(self): - # clipkit.execute() + + ck.execute(input_file=self.input, + input_file_format=self.ff_in, + output_file=self.mfa_out, + output_file_format=self.ff_out, + gaps=self.gap_prop, + complement=False, + mode=self.mode, + use_log=False) + return + + def summarise_trimming(self): return From 0838ac56374b4f07d4a12bfddb65f399873b4300 Mon Sep 17 00:00:00 2001 From: cmorganl Date: Sun, 5 Jun 2022 15:25:38 +0200 Subject: [PATCH 03/18] Allow kwargs or argument lists to be used with async pools --- tests/test_multiple_alignment.py | 8 ++++++++ treesapp/external_command_interface.py | 7 +++++-- treesapp/multiple_alignment.py | 0 3 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 tests/test_multiple_alignment.py create mode 100644 treesapp/multiple_alignment.py diff --git a/tests/test_multiple_alignment.py b/tests/test_multiple_alignment.py new file mode 100644 index 00000000..052b47e9 --- /dev/null +++ b/tests/test_multiple_alignment.py @@ -0,0 +1,8 @@ +import unittest + +class MyTestCase(unittest.TestCase): + def test_something(self): + self.assertEqual(True, False) # add assertion here + +if __name__ == '__main__': + unittest.main() diff --git a/treesapp/external_command_interface.py b/treesapp/external_command_interface.py index 1b3f11f5..70bc5b5b 100644 --- a/treesapp/external_command_interface.py +++ b/treesapp/external_command_interface.py @@ -43,7 +43,7 @@ def launch_write_command(cmd_list, collect_all=True): return stdout, proc.returncode -def run_apply_async_multiprocessing(func, arguments_list: list, num_processes: int, pbar_desc: str, +def run_apply_async_multiprocessing(func, arguments_list, num_processes: int, pbar_desc: str, disable=False) -> list: if len(arguments_list) == 0: return [] @@ -57,7 +57,10 @@ def update(*a): pbar.update() for args in arguments_list: - jobs.append(pool.apply_async(func=func, args=(*args,), callback=update)) + if isinstance(args, list): + jobs.append(pool.apply_async(func=func, args=(*args,), callback=update)) + elif isinstance(args, dict): + jobs.append(pool.apply_async(func=func, kwds=args, callback=update)) pool.close() for job in pbar: diff --git a/treesapp/multiple_alignment.py b/treesapp/multiple_alignment.py new file mode 100644 index 00000000..e69de29b From 5075f51fa928c7c7a85aaa613ba8877784386e64 Mon Sep 17 00:00:00 2001 From: cmorganl Date: Mon, 6 Jun 2022 13:08:13 +0200 Subject: [PATCH 04/18] Validation of ClipKit outputs and writing QC'd MSAs --- tests/test_clipkit_helper.py | 4 +- tests/test_file_parsers.py | 39 -------- tests/test_multiple_alignment.py | 29 +++++- treesapp/assign.py | 159 +++---------------------------- treesapp/clipkit_helper.py | 148 ++++++++++++++++++++++++++-- treesapp/file_parsers.py | 117 ----------------------- treesapp/multiple_alignment.py | 119 +++++++++++++++++++++++ treesapp/wrapper.py | 67 ------------- 8 files changed, 303 insertions(+), 379 deletions(-) diff --git a/tests/test_clipkit_helper.py b/tests/test_clipkit_helper.py index a9a5f0cb..88be0c71 100644 --- a/tests/test_clipkit_helper.py +++ b/tests/test_clipkit_helper.py @@ -7,7 +7,7 @@ class MyTestCase(unittest.TestCase): def setUp(self) -> None: self.test_fa = get_test_data('PuhA.mfa') - self.output_fa = 'PuhA.clipped.mfa' + self.output_fa = 'PuhA.trim.mfa' def tearDown(self) -> None: if os.path.isfile(self.output_fa): @@ -17,7 +17,7 @@ def test_run(self): from treesapp import clipkit_helper from clipkit import modes as ck_modes ck = clipkit_helper.ClipKitHelper(fasta_in=self.test_fa, - mfa_out=self.output_fa, + output_dir='./', mode="smart-gap") ck.run() self.assertTrue(os.path.isfile(self.output_fa)) diff --git a/tests/test_file_parsers.py b/tests/test_file_parsers.py index a18ad007..86393570 100644 --- a/tests/test_file_parsers.py +++ b/tests/test_file_parsers.py @@ -134,45 +134,6 @@ def test_read_lineage_ids(self): read_lineage_ids(get_test_data("McrA_lineage_ids - GTDB_map.tsv")) return - def test_check_seq_name_integer_compatibility(self): - from treesapp import file_parsers - msa, n_refs = file_parsers.check_seq_name_integer_compatibility(seq_dict=self.test_fasta_data) - self.assertEqual(-1, n_refs) - self.assertEqual("Bad_header_name", msa.popitem()[0]) - return - - def test_validate_alignment_trimming(self): - from treesapp import file_parsers - from treesapp import fasta - test_msa = get_test_data("PuhA.mfa") - headers = set([str(i) + "_PuhA" for i in range(1, 48)]) - tmp_fasta = fasta.FASTA(file_name=test_msa) - tmp_fasta.load_fasta() - - # Test bad file extension - with pytest.raises(SystemExit): - file_parsers.validate_alignment_trimming(["PuhA.stk"], headers) - - # Fail due to a bad sequence name in a fasta - with pytest.raises(SystemExit): - file_parsers.validate_alignment_trimming([self.test_data_file], set(self.test_fasta_data)) - - # Ensure success - success, fail, msg = file_parsers.validate_alignment_trimming(msa_files=[test_msa], - unique_ref_headers=set(tmp_fasta.get_seq_names())) - self.assertEqual(32, len(success[test_msa])) - self.assertEqual([], fail) - self.assertIsInstance(msg, str) - - # MSA fails due to more sequence names in unique_ref_headers than MSA - success, fail, msg = file_parsers.validate_alignment_trimming(msa_files=[test_msa], - unique_ref_headers=headers) - self.assertEqual({}, success) - self.assertEqual([test_msa], fail) - self.assertIsInstance(msg, str) - - return - if __name__ == '__main__': unittest.main() diff --git a/tests/test_multiple_alignment.py b/tests/test_multiple_alignment.py index 052b47e9..83969cfe 100644 --- a/tests/test_multiple_alignment.py +++ b/tests/test_multiple_alignment.py @@ -1,8 +1,33 @@ +import os import unittest +from .testing_utils import get_test_data + + class MyTestCase(unittest.TestCase): - def test_something(self): - self.assertEqual(True, False) # add assertion here + def test_trim_multiple_alignments(self): + from treesapp import multiple_alignment + from treesapp import refpkg + test_fa = get_test_data('PuhA.mfa') + output_file = os.path.join("tests", "test_data", "PuhA.trim.mfa") + test_rp = refpkg.ReferencePackage(refpkg_name="PuhA") + test_rp.f__pkl = get_test_data(filename=os.path.join("refpkgs", "PuhA_build.pkl")) + test_rp.slurp() + + result = multiple_alignment.trim_multiple_alignment_farmer({"PuhA": [test_fa]}, + min_seq_length=10, + n_proc=1, + ref_pkgs={"PuhA": test_rp}) + self.assertTrue(os.path.isfile(output_file)) + self.assertIsInstance(result, dict) + self.assertTrue("PuhA" in result.keys()) + self.assertEqual(os.path.basename(output_file), + os.path.basename(result["PuhA"].pop())) + + if os.path.isfile(output_file): + os.remove(output_file) + return + if __name__ == '__main__': unittest.main() diff --git a/treesapp/assign.py b/treesapp/assign.py index e7805095..40b6ace7 100755 --- a/treesapp/assign.py +++ b/treesapp/assign.py @@ -15,20 +15,21 @@ from treesapp import abundance from treesapp import classy +from treesapp import entish +from treesapp import external_command_interface as eci +from treesapp import fasta +from treesapp import file_parsers +from treesapp import lca_calculations as ts_lca from treesapp import logger +from treesapp import multiple_alignment from treesapp import phylo_seq from treesapp import refpkg from treesapp import treesapp_args -from treesapp import entish -from treesapp import lca_calculations as ts_lca from treesapp import jplace_utils -from treesapp import file_parsers from treesapp import phylo_dist from treesapp import utilities from treesapp import wrapper -from treesapp import fasta from treesapp import training_utils -from treesapp import external_command_interface as eci from treesapp.hmmer_tbl_parser import HmmMatch LOGGER = logging.getLogger(logger.logger_name()) @@ -363,35 +364,27 @@ def search(self, ref_pkg_dict: dict, hmm_parsing_thresholds, num_threads=2) -> d return file_parsers.parse_domain_tables(hmm_parsing_thresholds, refpkg_hmmer_tables) - def align(self, refpkg_dict: dict, homolog_seq_files: list, min_seq_length: int, n_proc: int, - trim_align=True, verbose=False) -> dict: + def align(self, refpkg_dict: dict, homolog_seq_files: list, min_seq_length: int, n_proc: int, trim_align=True) -> dict: if self.past_last_stage("align"): return {} MSAs = namedtuple("MSAs", "ref query") - ref_alignment_dimensions = get_alignment_dims(refpkg_dict) split_msa_files = self.fetch_multiple_alignments() target_refpkgs = {prefix: rp for prefix, rp in refpkg_dict.items() if prefix not in split_msa_files} if self.stage_status("align") or target_refpkgs: - # create_ref_phy_files(refpkg_dict, align_output_dir, - # homolog_seq_files, ref_alignment_dimensions) concatenated_msa_files = multiple_alignments(self.executables, homolog_seq_files, target_refpkgs, "hmmalign", output_dir=self.stage_lookup(name="align").dir_path, num_proc=n_proc, silent=self.silent) if concatenated_msa_files: combined_msa_files = {} - file_type = utilities.find_msa_type(concatenated_msa_files) - alignment_length_dict = get_sequence_counts(concatenated_msa_files, ref_alignment_dimensions, - verbose, file_type) if trim_align: - tool = "BMGE" - trimmed_mfa_files = wrapper.filter_multiple_alignments(self.executables, concatenated_msa_files, - target_refpkgs, n_proc, tool, self.silent) - qc_ma_dict = check_for_removed_sequences(trimmed_mfa_files, concatenated_msa_files, - target_refpkgs, min_seq_length) - evaluate_trimming_performance(qc_ma_dict, alignment_length_dict, concatenated_msa_files, tool) - combined_msa_files.update(qc_ma_dict) + trimmed_mfa_files = multiple_alignment.trim_multiple_alignment_farmer(concatenated_msa_files, + min_seq_length=min_seq_length, + ref_pkgs=refpkg_dict, + n_proc=n_proc, + silent=self.silent) + combined_msa_files.update(trimmed_mfa_files) else: combined_msa_files.update(concatenated_msa_files) @@ -872,132 +865,6 @@ def gather_split_msa(refpkg_names: list, align_dir: str) -> dict: return split_msa_map -def check_for_removed_sequences(trimmed_msa_files: dict, msa_files: dict, refpkg_dict: dict, min_len=10): - """ - Reads the multiple alignment files (either Phylip or FASTA formatted) and looks for both reference and query - sequences that have been removed. Multiple alignment files are removed from `mfa_files` if: - 1. all query sequences were removed; a DEBUG message is issued - 2. at least one reference sequence was removed - This quality-control function is necessary for placing short query sequences onto reference trees. - - :param trimmed_msa_files: - :param msa_files: A dictionary containing the untrimmed MSA files indexed by reference package code (denominator) - :param refpkg_dict: A dictionary of ReferencePackage objects indexed by their ref_pkg names - :param min_len: The minimum allowable sequence length after trimming (not including gap characters) - :return: A dictionary of denominators, with multiple alignment dictionaries as values. Example: - {M0702: { "McrB_hmm_purified.phy-BMGE.fasta": {'1': seq1, '2': seq2}}} - """ - qc_ma_dict = dict() - num_successful_alignments = 0 - discarded_seqs_string = "" - trimmed_away_seqs = dict() - untrimmed_msa_failed = [] - LOGGER.debug("Validating trimmed multiple sequence alignment files... ") - - for refpkg_name in sorted(trimmed_msa_files.keys()): - ref_pkg = refpkg_dict[refpkg_name] # type: refpkg.ReferencePackage - trimmed_away_seqs[ref_pkg.prefix] = 0 - # Create a set of the reference sequence names - ref_headers = fasta.get_headers(ref_pkg.f__msa) - unique_refs = set([re.sub('_' + re.escape(ref_pkg.prefix), '', x)[1:] for x in ref_headers]) - msa_passed, msa_failed, summary_str = file_parsers.validate_alignment_trimming( - trimmed_msa_files[ref_pkg.prefix], - unique_refs, True, min_len) - - # Report the number of sequences that are removed by BMGE - for trimmed_msa_file in trimmed_msa_files[ref_pkg.prefix]: - try: - prefix = re.search('(' + re.escape(ref_pkg.prefix) + r"_.*_group\d+)-(BMGE|trimAl).fasta$", - os.path.basename(trimmed_msa_file)).group(1) - except TypeError: - LOGGER.error("Unexpected file name format for a trimmed MSA.\n") - sys.exit(3) - # Find the untrimmed query sequence MSA file - the trimmed MSA file's 'pair' - pair = "" - for msa_file in msa_files[ref_pkg.prefix]: - if re.search(re.escape(prefix) + r'\.', msa_file): - pair = msa_file - break - if pair: - if trimmed_msa_file in msa_failed: - untrimmed_msa_failed.append(pair) - trimmed_away_seqs[ref_pkg.prefix] += len( - set(fasta.get_headers(pair)).difference(set(fasta.get_headers(trimmed_msa_file)))) - else: - LOGGER.error("Unable to map trimmed MSA file '" + trimmed_msa_file + "' to its original MSA.\n") - sys.exit(5) - - if len(msa_failed) > 0: - if len(untrimmed_msa_failed) != len(msa_failed): - LOGGER.error("Not all of the failed ({}/{})," - " trimmed MSA files were mapped to their original MSAs." - "\n".format(len(msa_failed), len(trimmed_msa_files[ref_pkg.prefix]))) - sys.exit(3) - untrimmed_msa_passed, _, _ = file_parsers.validate_alignment_trimming(untrimmed_msa_failed, unique_refs, - True, min_len) - msa_passed.update(untrimmed_msa_passed) - num_successful_alignments += len(msa_passed) - qc_ma_dict[ref_pkg.prefix] = msa_passed - discarded_seqs_string += summary_str - untrimmed_msa_failed.clear() - - LOGGER.debug("done.\n") - LOGGER.debug("\tSequences removed during trimming:\n\t\t" + - '\n\t\t'.join([k + ": " + str(trimmed_away_seqs[k]) for k in trimmed_away_seqs.keys()]) + "\n") - - LOGGER.debug("\tSequences <" + str(min_len) + " characters removed after trimming:" + - discarded_seqs_string + "\n") - - if num_successful_alignments == 0: - LOGGER.error("No quality alignment files to analyze after trimming. Exiting now.\n") - sys.exit(0) # Should be 3, but this allows Clade_exclusion_analyzer to continue after exit - - return qc_ma_dict - - -def evaluate_trimming_performance(qc_ma_dict, alignment_length_dict, concatenated_msa_files, tool): - """ - - :param qc_ma_dict: A dictionary mapping denominators to files to multiple alignment dictionaries - :param alignment_length_dict: - :param concatenated_msa_files: Dictionary with markers indexing original (untrimmed) multiple alignment files - :param tool: The name of the tool that was appended to the original, untrimmed or unmasked alignment files - :return: None - """ - trimmed_length_dict = dict() - for denominator in sorted(qc_ma_dict.keys()): - if len(concatenated_msa_files[denominator]) >= 1: - of_ext = concatenated_msa_files[denominator][0].split('.')[-1] - else: - continue - if denominator not in trimmed_length_dict: - trimmed_length_dict[denominator] = list() - for multi_align_file in qc_ma_dict[denominator]: - file_type = multi_align_file.split('.')[-1] - multi_align = qc_ma_dict[denominator][multi_align_file] - num_seqs, trimmed_seq_length = fasta.multiple_alignment_dimensions(multi_align_file, multi_align) - - original_multi_align = re.sub('-' + tool + '.' + file_type, '.' + of_ext, multi_align_file) - raw_align_len = alignment_length_dict[original_multi_align] - diff = raw_align_len - trimmed_seq_length - if diff < 0: - LOGGER.warning("MSA length increased after {} processing for {}\n".format(tool, multi_align_file)) - else: - trimmed_length_dict[denominator].append(diff) - - trimming_performance_string = "\tAverage columns removed:\n" - for denominator in trimmed_length_dict: - trimming_performance_string += "\t\t" + denominator + "\t" - n_trimmed_files = len(trimmed_length_dict[denominator]) - if n_trimmed_files > 0: - trimming_performance_string += str(round(sum(trimmed_length_dict[denominator]) / n_trimmed_files, 1)) + "\n" - else: - trimming_performance_string += str(0.0) + "\n" - - LOGGER.debug(trimming_performance_string + "\n") - return - - def delete_files(clean_up: bool, root_dir: str, section: int) -> None: files_to_be_deleted = [] if clean_up: diff --git a/treesapp/clipkit_helper.py b/treesapp/clipkit_helper.py index f0de1671..2e40a0e1 100644 --- a/treesapp/clipkit_helper.py +++ b/treesapp/clipkit_helper.py @@ -1,3 +1,5 @@ +import re +import sys import logging import os.path @@ -5,18 +7,22 @@ from clipkit import modes as ck_modes from treesapp import logger +from treesapp import fasta +from treesapp import refpkg +from treesapp import file_parsers class ClipKitHelper: CLIPKIT_MODES = {"smart-gap"} - def __init__(self, fasta_in: str, mfa_out=None, mode="smart-gap", gap_prop=0.9): + def __init__(self, fasta_in: str, output_dir: str, mode="smart-gap", gap_prop=0.9): self.input = fasta_in - if mfa_out is None: - prefix, ext = os.path.splitext(fasta_in) - self.mfa_out = prefix + ".trim" + ext - else: - self.mfa_out = mfa_out + if not os.path.isdir(output_dir): + os.mkdir(output_dir) + + prefix, ext = os.path.splitext(os.path.basename(fasta_in)) + self.mfa_out = os.path.join(output_dir, prefix + ".trim" + ext) + self.qc_mfa_out = os.path.join(output_dir, prefix + ".trim.qc" + ext) self.logger = logging.getLogger(logger.logger_name()) self.mode = ck_modes.TrimmingMode(mode) @@ -24,6 +30,24 @@ def __init__(self, fasta_in: str, mfa_out=None, mode="smart-gap", gap_prop=0.9): self.ff_in = "fasta" self.ff_out = "fasta" + self.refpkg_name = '' + self.min_unaligned_seq_length = 1 + + # Attributes used in evaluating trimming performance + self.success = False + self.num_msa_seqs = 0 + self.num_msa_cols = 0 + self.num_trim_seqs = 0 + self.num_trim_cols = 0 + self.trim_qc_seqs = [] # These sequences passed the min_unaligned_seq_length filter + + # Specific to MSAs for phylogenetic placement + self.num_queries_failed_trimming = 0 + self.num_refs_failed_trimming = 0 + self.num_queries_failed_qc = 0 + self.num_refs_failed_qc = 0 + self.num_queries_retained = 0 + self.num_refs_retained = 0 return def run(self): @@ -39,4 +63,116 @@ def run(self): return def summarise_trimming(self): + if self.num_trim_seqs == 0: + self.logger.warning("No sequences were read from {}.\n".format(self.mfa_out)) + + if self.num_trim_cols < self.min_unaligned_seq_length: + # Throw an error if the final trimmed alignment is shorter than min_seq_length, and therefore empty + self.logger.warning( + "Multiple sequence alignment in {} is shorter than minimum sequence length threshold ({}).\n" + "".format(self.mfa_out, self.min_unaligned_seq_length)) + elif self.num_refs_failed_trimming: + # Testing whether there were more sequences in the untrimmed alignment than the trimmed one + self.logger.warning( + "{} reference sequences in {} were removed during alignment trimming " + + "suggesting either truncated sequences or the initial reference alignment was terrible.\n" + "".format(self.num_refs_failed_trimming, self.mfa_out)) + elif self.num_refs_failed_qc: + self.logger.warning("{} reference sequences in {} were shorter than the minimum character length ({})" + " and removed after alignment trimming.\n" + "".format(self.num_refs_failed_qc, self.mfa_out, self.min_unaligned_seq_length)) + + # Ensure that there is at least 1 query sequence retained after trimming the multiple alignment + elif self.num_queries_retained == 0: + self.logger.warning("No query sequences in {} were retained after trimming.\n".format(self.mfa_out)) + + if self.success is False: + self.logger.debug("The untrimmed MSA will be used instead.\n") + return + + def quantify_refs_and_pqueries(self, unique_ref_headers: set, msa_fasta: fasta.FASTA = None): + if not unique_ref_headers: + return + + if not msa_fasta: + msa_fasta = self.read_trimmed_msa() + + for seq_name in msa_fasta.fasta_dict: + if seq_name[0] == '-': # The negative integers indicate this is a query sequence + if seq_name in self.trim_qc_seqs: + self.num_queries_retained += 1 + else: + self.num_queries_failed_qc += 1 + elif seq_name in unique_ref_headers: + if seq_name in self.trim_qc_seqs: + self.num_refs_retained += 1 + else: + self.num_refs_failed_qc += 1 + else: + raise RuntimeError("Unsure what to do with sequence '{}'.\n".format(seq_name)) + return + + def read_trimmed_msa(self) -> fasta.FASTA: + msa_records = fasta.FASTA(file_name=self.mfa_out) + if self.ff_out == "phylip": + msa_records.fasta_dict = file_parsers.read_phylip_to_dict(self.mfa_out) + elif self.ff_out == "fasta": + msa_records.fasta_dict = fasta.read_fasta_to_dict(self.mfa_out) + else: + self.logger.error("Unsupported file format ('{}') of {}.\n".format(self.ff_out, self.mfa_out)) + sys.exit(1) + + return msa_records + + def validate_alignment_trimming(self): + msa_fasta = self.read_trimmed_msa() + if self.num_trim_seqs == 0: + self.success = False + + if self.num_trim_cols < self.min_unaligned_seq_length: + self.success = False + + if self.num_trim_cols > self.num_msa_cols: + self.logger.warning("MSA length increased after trimming {}\n".format(self.input)) + self.success = False + + msa_fasta.unalign() + for seq_name, seq in msa_fasta.fasta_dict.items(): + if len(seq) >= self.min_unaligned_seq_length: + self.trim_qc_seqs.append(seq_name) + + return + + def compare_original_and_trimmed_multiple_alignments(self, min_len: int, ref_pkg=None): + """Summarises the number of character positions trimmed and new dimensions between the input and output MSA.""" + + self.num_trim_seqs, self.num_trim_cols = fasta.multiple_alignment_dimensions(self.mfa_out) + self.num_msa_seqs, self.num_msa_cols = fasta.multiple_alignment_dimensions(self.input) + + self.min_unaligned_seq_length = min_len + self.validate_alignment_trimming() + + if ref_pkg is not None: # type: refpkg.ReferencePackage + # Create a set of the reference sequence names + unique_ref_headers = set([re.sub('_' + re.escape(ref_pkg.prefix), '', x)[1:] + for x in + ref_pkg.msa]) + self.quantify_refs_and_pqueries(unique_ref_headers) + + return + + def get_qc_output(self) -> str: + if self.success: + return self.qc_mfa_out + else: + return self.input + + def write_qc_trimmed_multiple_alignment(self) -> None: + if not self.success: + return + + msa_fasta = self.read_trimmed_msa() + msa_fasta.keep_only(header_subset=self.trim_qc_seqs) + fasta.write_new_fasta(fasta_dict=msa_fasta.fasta_dict, + fasta_name=self.qc_mfa_out) return diff --git a/treesapp/file_parsers.py b/treesapp/file_parsers.py index e9ee3f96..ae5fb9a4 100644 --- a/treesapp/file_parsers.py +++ b/treesapp/file_parsers.py @@ -545,123 +545,6 @@ def read_stockholm_to_dict(sto_file): return seq_dict -def check_seq_name_integer_compatibility(seq_dict: dict) -> (dict, int): - # Parse the MSA dict and ensure headers are integer-compatible - multi_align = {} - n_msa_refs = 0 - for seq_name, seq in seq_dict.items(): - try: - if int(seq_name) > 0: - n_msa_refs += 1 - except ValueError: - if re.match(r"^_\d+", seq_name): - leaf_num = re.sub("^_", '-', seq_name) - # The section of regular expresion after '_' needs to match denominator and refpkg names - elif re.match(r"^\d+_\w{2,10}$", seq_name): - leaf_num = seq_name.split('_')[0] - else: - return {seq_name: ""}, -1 - if int(leaf_num) > 0: - n_msa_refs += 1 - multi_align[seq_name] = seq - return multi_align, n_msa_refs - - -def validate_alignment_trimming(msa_files: list, unique_ref_headers: set, - queries_mapped=False, min_seq_length=30) -> (dict, list, str): - """ - Parse a list of multiple sequence alignment (MSA) files and determine whether the multiple alignment: - 1. is shorter than the min_seq_length (30 by default) - 2. is missing any reference sequences - The number of query sequences discarded - these may have been added by hmmalign or PaPaRa - is returned via a string - - NOTE: Initially designed for sequence records with numeric names (e.g. >488) but accommodates other TreeSAPP formats - - :param msa_files: A list of either Phylip or FASTA formatted MSA files - :param unique_ref_headers: A set of all headers that were in the untrimmed MSA - :param queries_mapped: Boolean indicating whether sequences should be present in addition to reference sequences. - While query sequences _could_ be identified as any that are not in unique_ref_headers, - queries have names that are negative integers for more rapid and scalable identification - :param min_seq_length: Optional minimum unaligned (no '-'s) length a sequence must exceed to be retained - :return: 1. Dictionary indexed by MSA file name mapping to FASTA-dictionaries - 2. A string mapping the number of query sequences removed from each MSA file - 3. A string describing the number of sequences discarded - """ - discarded_seqs_string = "" - successful_multiple_alignments = dict() - failed_multiple_alignments = list() - n_refs = len(unique_ref_headers) - for multi_align_file in msa_files: - filtered_multi_align = dict() - discarded_seqs = list() - num_queries_retained = 0 - n_retained_refs = 0 - f_ext = multi_align_file.split('.')[-1] - - # Read the multiple alignment file - if re.search("phy", f_ext): # File is in Phylip format - seq_dict = read_phylip_to_dict(multi_align_file) - elif re.match("^f", f_ext): # This is meant to match all fasta extensions - seq_dict = fasta.read_fasta_to_dict(multi_align_file) - elif f_ext == "mfa": # This is meant to match a multiple alignment in FASTA format - seq_dict = fasta.read_fasta_to_dict(multi_align_file) - else: - LOGGER.error("Unable to detect file format of " + multi_align_file + ".\n") - sys.exit(13) - - multi_align, n_msa_refs = check_seq_name_integer_compatibility(seq_dict) - if n_msa_refs < 0: - LOGGER.error("Unexpected sequence name ('{}') detected in {}.\n" - "".format(multi_align.popitem()[0], multi_align_file)) - sys.exit(13) - if len(multi_align) == 0: - LOGGER.warning("No sequences were read from {}. " - "The untrimmed alignment will be used instead.\n".format(multi_align_file)) - failed_multiple_alignments.append(multi_align_file) - continue - # The numeric identifiers make it easy to maintain order in the Phylip file by a numerical sort - for seq_name in sorted(multi_align, key=lambda x: int(x.split('_')[0])): - seq_dummy = re.sub('-', '', multi_align[seq_name]) - if len(seq_dummy) < min_seq_length: - discarded_seqs.append(seq_name) - else: - filtered_multi_align[seq_name] = multi_align[seq_name] - # The negative integers indicate this is a query sequence - if seq_name[0] == '-': - num_queries_retained += 1 - else: - n_retained_refs += 1 - discarded_seqs_string += "\n\t\t" + multi_align_file + " = " + str(len(discarded_seqs)) - if len(discarded_seqs) == len(multi_align.keys()): - # Throw an error if the final trimmed alignment is shorter than min_seq_length, and therefore empty - LOGGER.warning("Multiple sequence alignment in {} is shorter than minimum sequence length threshold ({})." - "\nThe untrimmed MSA will be used instead.\n".format(multi_align_file, min_seq_length)) - failed_multiple_alignments.append(multi_align_file) - elif n_refs > n_msa_refs: - # Testing whether there were more sequences in the untrimmed alignment than the trimmed one - LOGGER.warning("Reference sequences in " + multi_align_file + " were removed during alignment trimming " + - "suggesting either truncated sequences or the initial reference alignment was terrible.\n" + - "The untrimmed alignment will be used instead.\n") - failed_multiple_alignments.append(multi_align_file) - elif n_refs > n_retained_refs: - LOGGER.warning("Reference sequences shorter than the minimum character length ({})" - " in {} were removed after alignment trimming.\n".format(min_seq_length, multi_align_file) + - "The untrimmed alignment will be used instead.\n") - failed_multiple_alignments.append(multi_align_file) - # Ensure that there is at least 1 query sequence retained after trimming the multiple alignment - elif queries_mapped and num_queries_retained == 0: - LOGGER.warning("No query sequences in " + multi_align_file + " were retained after trimming.\n") - else: - successful_multiple_alignments[multi_align_file] = filtered_multi_align - - if multi_align_file in successful_multiple_alignments: - discarded_seqs_string += " (retained)" - else: - discarded_seqs_string += " (removed)" - - return successful_multiple_alignments, failed_multiple_alignments, discarded_seqs_string - - def read_annotation_mapping_file(annot_map_file: str) -> dict: """ Used for reading a file mapping the reference package name to all true positive orthologs in the query input diff --git a/treesapp/multiple_alignment.py b/treesapp/multiple_alignment.py index e69de29b..e890a025 100644 --- a/treesapp/multiple_alignment.py +++ b/treesapp/multiple_alignment.py @@ -0,0 +1,119 @@ +import os.path +import time +import logging + +from treesapp import logger +from treesapp import refpkg +from treesapp import external_command_interface as eci +from treesapp import clipkit_helper as ckh + + +LOGGER = logging.getLogger(logger.logger_name()) + + +def trim_multiple_alignment_clipkit(msa_file: str, ref_pkg: refpkg.ReferencePackage, min_seq_length: int) -> ckh.ClipKitHelper: + trimmer = ckh.ClipKitHelper(fasta_in=msa_file, + output_dir=os.path.dirname(msa_file)) + trimmer.refpkg_name = ref_pkg.prefix + trimmer.run() + trimmer.compare_original_and_trimmed_multiple_alignments(min_seq_length, ref_pkg) + trimmer.write_qc_trimmed_multiple_alignment() + return trimmer + + +def summarise_trimming(msa_trimmers: list) -> None: + """Summarises various outcomes of trimming MSAs.""" + num_successful_alignments = 0 + discarded_seqs_string = "" + trimmed_away_seqs = dict() + untrimmed_msa_failed = [] + LOGGER.debug("Validating trimmed multiple sequence alignment files... ") + for trimmer in msa_trimmers: # type: ckh.ClipKitHelper + # TODO: Gather all useful stats for each trimmer instance + if trimmer.success: + num_successful_alignments += 1 + + # TODO: Summarise trimming by reference package + trimming_performance_string = "\tAverage columns removed:\n" + for refpkg_name in trimmed_length_dict: + trimming_performance_string += "\t\t" + refpkg_name + "\t" + n_trimmed_files = len(trimmed_length_dict[denominator]) + if n_trimmed_files > 0: + trimming_performance_string += str( + round(sum(trimmed_length_dict[denominator]) / n_trimmed_files, 1)) + "\n" + else: + trimming_performance_string += str(0.0) + "\n" + + LOGGER.debug(trimming_performance_string + "\n") + + discarded_seqs_string += "\n\t\t" + self.mfa_out + " = " + str(len(discarded_seqs)) + num_successful_alignments += len(msa_passed) + qc_ma_dict[ref_pkg.prefix] = msa_passed + discarded_seqs_string += summary_str + untrimmed_msa_failed.clear() + + LOGGER.debug("done.\n") + LOGGER.debug("\tSequences removed during trimming:\n\t\t" + + '\n\t\t'.join([k + ": " + str(trimmed_away_seqs[k]) for k in trimmed_away_seqs.keys()]) + "\n") + + LOGGER.debug("\tSequences <" + str(min_len) + " characters removed after trimming:" + + discarded_seqs_string + "\n") + + if num_successful_alignments == 0: + LOGGER.error("No quality alignment files to analyze after trimming. Exiting now.\n") + sys.exit(0) # Should be 3, but this allows Clade_exclusion_analyzer to continue after exit + return + + +def gather_multiple_alignments(msa_trimmers: list) -> dict: + """ + Creates a dictionary of MSA files indexed by reference package names. + These files are trimmed outputs if trimming was successful, or the original if not. + """ + trimmed_output_files = {} + for trimmer in msa_trimmers: # type: ckh.ClipKitHelper + try: + trimmed_output_files[trimmer.refpkg_name].append(trimmer.get_qc_output()) + except KeyError: + trimmed_output_files[trimmer.refpkg_name] = [trimmer.get_qc_output()] + return trimmed_output_files + + +def trim_multiple_alignment_farmer(concatenated_mfa_files: dict, min_seq_length: int, ref_pkgs: dict, + n_proc=1, silent=False) -> dict: + """ + Runs ClipKit using the provided lists of the concatenated hmmalign files, and the number of sequences in each file. + + :param concatenated_mfa_files: A dictionary containing f_contig keys mapping to a FASTA or Phylip sequential file + :param min_seq_length: Minimum length for a sequence to be retained in the MSA + :param ref_pkgs: A dictionary of reference package names mapped to ReferencePackage instances + :param n_proc: The number of parallel processes to be launched for alignment trimming + :param silent: A boolean indicating whether the + :return: A list of files resulting from multiple sequence alignment masking. + """ + start_time = time.time() + task_list = list() + + for refpkg_code, mfa_files in sorted(concatenated_mfa_files.items()): + for msa in mfa_files: + task_list.append({"msa_file": msa, + "ref_pkg": ref_pkgs[refpkg_code], + "min_seq_length": min_seq_length}) + + msa_trimmers = eci.run_apply_async_multiprocessing(func=trim_multiple_alignment_clipkit, + arguments_list=task_list, + num_processes=n_proc, + pbar_desc="Multiple alignment trimming", + disable=silent) + + end_time = time.time() + hours, remainder = divmod(end_time - start_time, 3600) + minutes, seconds = divmod(remainder, 60) + LOGGER.debug("\tMultiple alignment trimming time required: " + + ':'.join([str(hours), str(minutes), str(round(seconds, 2))]) + "\n") + + summarise_trimming(msa_trimmers) + # Collect the trimmed (or untrimmed if reference sequences were removed) output files + trimmed_output_files = gather_multiple_alignments(msa_trimmers) + + return trimmed_output_files diff --git a/treesapp/wrapper.py b/treesapp/wrapper.py index 76fdb2a4..0422d7de 100644 --- a/treesapp/wrapper.py +++ b/treesapp/wrapper.py @@ -1,6 +1,5 @@ import sys import os -import time import re import glob import logging @@ -787,69 +786,3 @@ def run_odseq(odseq_exe: str, fasta_in: str, outliers_fa: str, num_threads: int) return - -def get_msa_trim_command(executables, mfa_file, molecule, tool="BMGE"): - """ - Trims/masks/filters the multiple sequence alignment using either BMGE or trimAl - - :param executables: A dictionary mapping software to a path of their respective executable - :param mfa_file: Name of a MSA file - :param molecule: prot | dna - :param tool: Name of the software to use for trimming [BMGE|trimAl] - Returns file name of the trimmed multiple alignment file in FASTA format - """ - f_ext = mfa_file.split('.')[-1] - if not re.match("mfa|fasta|phy|fa", f_ext): - LOGGER.error("Unsupported file format: '" + f_ext + "'\n") - sys.exit(5) - - trimmed_msa_file = '.'.join(mfa_file.split('.')[:-1]) + '-' + re.escape(tool) + ".fasta" - if tool == "trimAl": - trim_command = trimal_command(executables["trimal"], mfa_file, trimmed_msa_file) - elif tool == "BMGE": - trim_command = bmge_command(executables["BMGE.jar"], mfa_file, trimmed_msa_file, molecule) - else: - LOGGER.error("Unsupported trimming software requested: '" + tool + "'") - sys.exit(5) - - return trim_command, trimmed_msa_file - - -def filter_multiple_alignments(executables, concatenated_mfa_files, refpkg_dict, n_proc=1, tool="ClipKit", silent=False): - """ - Runs BMGE using the provided lists of the concatenated hmmalign files, and the number of sequences in each file. - - :param executables: A dictionary mapping software to a path of their respective executable - :param concatenated_mfa_files: A dictionary containing f_contig keys mapping to a FASTA or Phylip sequential file - :param refpkg_dict: A dictionary of ReferencePackage instances indexed by their respective denominators - :param n_proc: The number of parallel processes to be launched for alignment trimming - :param tool: The software to use for alignment trimming - :return: A list of files resulting from multiple sequence alignment masking. - """ - start_time = time.time() - task_list = list() - trimmed_output_files = {} - - for refpkg_code in sorted(concatenated_mfa_files.keys()): - if refpkg_code not in trimmed_output_files: - trimmed_output_files[refpkg_code] = [] - mfa_files = concatenated_mfa_files[refpkg_code] - for concatenated_mfa_file in mfa_files: - trim_command, trimmed_msa_file = get_msa_trim_command(executables, concatenated_mfa_file, - refpkg_dict[refpkg_code].molecule, tool) - trimmed_output_files[refpkg_code].append(trimmed_msa_file) - task_list.append([trim_command]) - - eci.run_apply_async_multiprocessing(func=eci.launch_write_command, - arguments_list=task_list, - num_processes=n_proc, - pbar_desc="Multiple alignment trimming", - disable=silent) - - end_time = time.time() - hours, remainder = divmod(end_time - start_time, 3600) - minutes, seconds = divmod(remainder, 60) - LOGGER.debug("\t" + tool + " time required: " + - ':'.join([str(hours), str(minutes), str(round(seconds, 2))]) + "\n") - return trimmed_output_files - From 684328f4c1722cbd256e1786eaf66bfafb552be3 Mon Sep 17 00:00:00 2001 From: cmorganl Date: Tue, 7 Jun 2022 13:15:42 +0200 Subject: [PATCH 05/18] summarise trimming stats --- tests/test_multiple_alignment.py | 12 ++++--- treesapp/assign.py | 10 +++--- treesapp/clipkit_helper.py | 9 +++-- treesapp/multiple_alignment.py | 60 +++++++++++++++++--------------- 4 files changed, 47 insertions(+), 44 deletions(-) diff --git a/tests/test_multiple_alignment.py b/tests/test_multiple_alignment.py index 83969cfe..a59d1b8f 100644 --- a/tests/test_multiple_alignment.py +++ b/tests/test_multiple_alignment.py @@ -9,7 +9,8 @@ def test_trim_multiple_alignments(self): from treesapp import multiple_alignment from treesapp import refpkg test_fa = get_test_data('PuhA.mfa') - output_file = os.path.join("tests", "test_data", "PuhA.trim.mfa") + trim_file = os.path.join("tests", "test_data", "PuhA.trim.mfa") + qc_file = os.path.join("tests", "test_data", "PuhA.trim.qc.mfa") test_rp = refpkg.ReferencePackage(refpkg_name="PuhA") test_rp.f__pkl = get_test_data(filename=os.path.join("refpkgs", "PuhA_build.pkl")) test_rp.slurp() @@ -18,14 +19,15 @@ def test_trim_multiple_alignments(self): min_seq_length=10, n_proc=1, ref_pkgs={"PuhA": test_rp}) - self.assertTrue(os.path.isfile(output_file)) + self.assertTrue(os.path.isfile(trim_file)) self.assertIsInstance(result, dict) self.assertTrue("PuhA" in result.keys()) - self.assertEqual(os.path.basename(output_file), + self.assertEqual(os.path.basename(qc_file), os.path.basename(result["PuhA"].pop())) - if os.path.isfile(output_file): - os.remove(output_file) + for f_path in [trim_file, qc_file]: + if os.path.isfile(f_path): + os.remove(f_path) return diff --git a/treesapp/assign.py b/treesapp/assign.py index 40b6ace7..15c02021 100755 --- a/treesapp/assign.py +++ b/treesapp/assign.py @@ -440,7 +440,7 @@ def write_classified_orfs(self, pqueries: dict, extracted_seqs: dict) -> None: molecule="dna", subset=classified_seq_names, full_name=self.fasta_full_name) - nuc_orfs.header_registry = fasta.register_headers(nuc_orfs.fasta_dict.keys()) + nuc_orfs.header_registry = fasta.register_headers(header_list=list(nuc_orfs.fasta_dict.keys())) nuc_orfs.change_dict_keys() if not os.path.isfile(self.classified_nuc_seqs): self.ts_logger.info("Creating nucleotide FASTA file of classified sequences '{}'... " @@ -1338,13 +1338,13 @@ def assign(sys_args): ts_assign.increment_stage_dir(checkpoint="search") ## - # STAGE 4: Run hmmalign, and optionally BMGE, to produce the MSAs for phylogenetic placement + # STAGE 4: Run hmmalign, and optionally trim, to produce the MSAs for phylogenetic placement ## - split_msa_files = ts_assign.align(refpkg_dict, homolog_seq_files, + split_msa_files = ts_assign.align(refpkg_dict=refpkg_dict, + homolog_seq_files=homolog_seq_files, n_proc=n_proc, trim_align=args.trim_align, - min_seq_length=args.min_seq_length, - verbose=args.verbose) + min_seq_length=args.min_seq_length) delete_files(args.delete, ts_assign.stage_lookup("search").dir_path, 2) ts_assign.increment_stage_dir(checkpoint="align") diff --git a/treesapp/clipkit_helper.py b/treesapp/clipkit_helper.py index 2e40a0e1..321457a6 100644 --- a/treesapp/clipkit_helper.py +++ b/treesapp/clipkit_helper.py @@ -1,4 +1,3 @@ -import re import sys import logging import os.path @@ -34,7 +33,7 @@ def __init__(self, fasta_in: str, output_dir: str, mode="smart-gap", gap_prop=0. self.min_unaligned_seq_length = 1 # Attributes used in evaluating trimming performance - self.success = False + self.success = True self.num_msa_seqs = 0 self.num_msa_cols = 0 self.num_trim_seqs = 0 @@ -122,6 +121,8 @@ def read_trimmed_msa(self) -> fasta.FASTA: self.logger.error("Unsupported file format ('{}') of {}.\n".format(self.ff_out, self.mfa_out)) sys.exit(1) + msa_records.header_registry = fasta.register_headers(list(msa_records.fasta_dict.keys()), + drop=True) return msa_records def validate_alignment_trimming(self): @@ -154,9 +155,7 @@ def compare_original_and_trimmed_multiple_alignments(self, min_len: int, ref_pkg if ref_pkg is not None: # type: refpkg.ReferencePackage # Create a set of the reference sequence names - unique_ref_headers = set([re.sub('_' + re.escape(ref_pkg.prefix), '', x)[1:] - for x in - ref_pkg.msa]) + unique_ref_headers = set(ref_pkg.get_fasta().get_seq_names()) self.quantify_refs_and_pqueries(unique_ref_headers) return diff --git a/treesapp/multiple_alignment.py b/treesapp/multiple_alignment.py index e890a025..0185aa7e 100644 --- a/treesapp/multiple_alignment.py +++ b/treesapp/multiple_alignment.py @@ -1,3 +1,4 @@ +import sys import os.path import time import logging @@ -7,11 +8,11 @@ from treesapp import external_command_interface as eci from treesapp import clipkit_helper as ckh - LOGGER = logging.getLogger(logger.logger_name()) -def trim_multiple_alignment_clipkit(msa_file: str, ref_pkg: refpkg.ReferencePackage, min_seq_length: int) -> ckh.ClipKitHelper: +def trim_multiple_alignment_clipkit(msa_file: str, ref_pkg: refpkg.ReferencePackage, + min_seq_length: int) -> ckh.ClipKitHelper: trimmer = ckh.ClipKitHelper(fasta_in=msa_file, output_dir=os.path.dirname(msa_file)) trimmer.refpkg_name = ref_pkg.prefix @@ -23,41 +24,42 @@ def trim_multiple_alignment_clipkit(msa_file: str, ref_pkg: refpkg.ReferencePack def summarise_trimming(msa_trimmers: list) -> None: """Summarises various outcomes of trimming MSAs.""" + refpkg_trimming_stats = {trimmer.refpkg_name: { + "msa_files": 0, + "cols_removed": [], + "seqs_removed": [], + "successes": 0, + } + for trimmer in msa_trimmers} num_successful_alignments = 0 - discarded_seqs_string = "" - trimmed_away_seqs = dict() - untrimmed_msa_failed = [] + LOGGER.debug("Validating trimmed multiple sequence alignment files... ") for trimmer in msa_trimmers: # type: ckh.ClipKitHelper - # TODO: Gather all useful stats for each trimmer instance + # Gather all useful stats for each trimmer instance + refpkg_trimming_stats[trimmer.refpkg_name]["msa_files"] += 1 if trimmer.success: + refpkg_trimming_stats[trimmer.refpkg_name]["successes"] += 1 num_successful_alignments += 1 - - # TODO: Summarise trimming by reference package - trimming_performance_string = "\tAverage columns removed:\n" - for refpkg_name in trimmed_length_dict: - trimming_performance_string += "\t\t" + refpkg_name + "\t" - n_trimmed_files = len(trimmed_length_dict[denominator]) - if n_trimmed_files > 0: - trimming_performance_string += str( - round(sum(trimmed_length_dict[denominator]) / n_trimmed_files, 1)) + "\n" else: - trimming_performance_string += str(0.0) + "\n" - - LOGGER.debug(trimming_performance_string + "\n") - - discarded_seqs_string += "\n\t\t" + self.mfa_out + " = " + str(len(discarded_seqs)) - num_successful_alignments += len(msa_passed) - qc_ma_dict[ref_pkg.prefix] = msa_passed - discarded_seqs_string += summary_str - untrimmed_msa_failed.clear() + continue + refpkg_trimming_stats[trimmer.refpkg_name]["cols_removed"].append(trimmer.num_msa_cols - trimmer.num_trim_cols) + refpkg_trimming_stats[trimmer.refpkg_name]["seqs_removed"].append(trimmer.num_msa_seqs - trimmer.num_trim_seqs) + + # Summarise trimming by reference package + for refpkg_name, stats in refpkg_trimming_stats.items(): + trim_summary = "\t\t{} trimming stats:\n".format(refpkg_name) + if stats["msa_files"] == 0: + continue + trim_summary += "Multiple alignment files = {}\n".format(stats["msa_files"]) + trim_summary += "Files successfully trimmed = {}\n".format(stats["successes"]) + trim_summary += "Average columns removed = {}\n".format(round(sum(stats["cols_removed"]) / + len(stats["cols_removed"]))) + trim_summary += "Average sequences removed = {}\n".format(round(sum(stats["seqs_removed"]) / + len(stats["seqs_removed"]))) + + LOGGER.debug(trim_summary + "\n") LOGGER.debug("done.\n") - LOGGER.debug("\tSequences removed during trimming:\n\t\t" + - '\n\t\t'.join([k + ": " + str(trimmed_away_seqs[k]) for k in trimmed_away_seqs.keys()]) + "\n") - - LOGGER.debug("\tSequences <" + str(min_len) + " characters removed after trimming:" + - discarded_seqs_string + "\n") if num_successful_alignments == 0: LOGGER.error("No quality alignment files to analyze after trimming. Exiting now.\n") From 1bce80fcecf6a4172743e34b079a0e7fdb17dbb0 Mon Sep 17 00:00:00 2001 From: cmorganl Date: Tue, 7 Jun 2022 13:55:52 +0200 Subject: [PATCH 06/18] Switch to clipkit in train and create modules --- treesapp/clipkit_helper.py | 7 +++++-- treesapp/commands.py | 26 +++++++------------------- treesapp/training_utils.py | 24 ++++++++---------------- 3 files changed, 20 insertions(+), 37 deletions(-) diff --git a/treesapp/clipkit_helper.py b/treesapp/clipkit_helper.py index 321457a6..b1225f29 100644 --- a/treesapp/clipkit_helper.py +++ b/treesapp/clipkit_helper.py @@ -50,7 +50,6 @@ def __init__(self, fasta_in: str, output_dir: str, mode="smart-gap", gap_prop=0. return def run(self): - ck.execute(input_file=self.input, input_file_format=self.ff_in, output_file=self.mfa_out, @@ -166,12 +165,16 @@ def get_qc_output(self) -> str: else: return self.input - def write_qc_trimmed_multiple_alignment(self) -> None: + def get_qc_trimmed_fasta(self) -> fasta.FASTA: if not self.success: return msa_fasta = self.read_trimmed_msa() msa_fasta.keep_only(header_subset=self.trim_qc_seqs) + return msa_fasta + + def write_qc_trimmed_multiple_alignment(self) -> None: + msa_fasta = self.get_qc_trimmed_fasta() fasta.write_new_fasta(fasta_dict=msa_fasta.fasta_dict, fasta_name=self.qc_mfa_out) return diff --git a/treesapp/commands.py b/treesapp/commands.py index 1c3d3051..846bb8e1 100644 --- a/treesapp/commands.py +++ b/treesapp/commands.py @@ -31,6 +31,7 @@ from treesapp import create_refpkg as ts_create_mod from treesapp import update_refpkg as ts_update_mod from treesapp import hmmer_tbl_parser +from treesapp import multiple_alignment LOGGER = logging.getLogger(logger.logger_name()) @@ -640,29 +641,16 @@ def create(sys_args): ## dict_for_phy = dict() if args.trim_align: - trimmed_mfa_files = wrapper.filter_multiple_alignments(ts_create.executables, - {ts_create.ref_pkg.refpkg_code: - [ts_create.ref_pkg.f__msa]}, - {ts_create.ref_pkg.refpkg_code: - ts_create.ref_pkg}) - trimmed_mfa_file = trimmed_mfa_files[ts_create.ref_pkg.refpkg_code] - unique_ref_headers = set(ref_seqs.fasta_dict.keys()) - qc_ma_dict, failed_trimmed_msa, summary_str = file_parsers.validate_alignment_trimming(trimmed_mfa_file, - unique_ref_headers) - LOGGER.debug("Number of sequences discarded: " + summary_str + "\n") - if len(qc_ma_dict.keys()) == 0: + trimmer = multiple_alignment.trim_multiple_alignment_clipkit(msa_file=ts_create.ref_pkg.f__msa, + ref_pkg=ts_create.ref_pkg, + min_seq_length=args.min_seq_length) + trimmer.summarise_trimming() + if trimmer.num_refs_retained != trimmer.num_msa_seqs: # At least one of the reference sequences were discarded and therefore this package is invalid. LOGGER.error("Trimming removed reference sequences. This could indicate non-homologous sequences.\n" + "Please improve sequence quality-control and/or rerun without the '--trim_align' flag.\n") sys.exit(13) - elif len(qc_ma_dict.keys()) > 1: - LOGGER.error("Multiple trimmed alignment files are found when only one is expected:\n" + - "\n".join([str(k) + ": " + str(qc_ma_dict[k]) for k in qc_ma_dict])) - sys.exit(13) - # NOTE: only a single trimmed-MSA file in the dictionary - for trimmed_msa_file in qc_ma_dict: - dict_for_phy = qc_ma_dict[trimmed_msa_file] - os.remove(trimmed_msa_file) + dict_for_phy.update(trimmer.get_qc_trimmed_fasta().fasta_dict) else: dict_for_phy.update(ref_seqs.fasta_dict) diff --git a/treesapp/training_utils.py b/treesapp/training_utils.py index 9c644cbb..7f7a395f 100644 --- a/treesapp/training_utils.py +++ b/treesapp/training_utils.py @@ -21,10 +21,10 @@ from treesapp import classy from treesapp import phylo_seq from treesapp import logger -from treesapp import external_command_interface as eci from treesapp.jplace_utils import jplace_parser, demultiplex_pqueries, calc_pquery_mean_tip_distances from treesapp.entish import map_internal_nodes_leaves from treesapp.refpkg import ReferencePackage +from treesapp import multiple_alignment LOGGER = logging.getLogger(logger.logger_name()) @@ -375,27 +375,19 @@ def generate_pquery_data_for_trainer(ref_pkg: ReferencePackage, taxon: str, LOGGER.debug(str(aln_stdout) + "\n") - trim_command, combined_msa = wrapper.get_msa_trim_command(executables, all_msa, ce_refpkg.molecule) - eci.launch_write_command(trim_command) - intermediate_files += glob(combined_msa + "*") - - # Ensure reference sequences haven't been removed during MSA trimming - msa_dict, failed_msa_files, summary_str = file_parsers.validate_alignment_trimming([combined_msa], - set(ce_fasta.fasta_dict), - True) - nrow, ncolumn = fasta.multiple_alignment_dimensions(mfa_file=combined_msa, - seq_dict=fasta.read_fasta_to_dict(combined_msa)) - LOGGER.debug("Columns = " + str(ncolumn) + "\n") - if combined_msa not in msa_dict.keys(): + trimmer = multiple_alignment.trim_multiple_alignment_clipkit(msa_file=all_msa, + ref_pkg=ref_pkg, + min_seq_length=int(0.1*ref_pkg.hmm_length())) + trimmer.summarise_trimming() + if not trimmer.success: LOGGER.debug("Placements for '{}' are being skipped after failing MSA validation.\n".format(taxon)) for old_file in intermediate_files: os.remove(old_file) - intermediate_files.clear() + intermediate_files.clear() return pqueries - LOGGER.debug("Number of sequences discarded: " + summary_str + "\n") # Create the query-only FASTA file required by EPA-ng - fasta.split_combined_ref_query_fasta(combined_msa, query_msa, ref_msa) + fasta.split_combined_ref_query_fasta(trimmer.get_qc_output(), query_msa, ref_msa) raxml_files = wrapper.raxml_evolutionary_placement(epa_exe=executables["epa-ng"], refpkg_tree=ce_refpkg.f__tree, From a6c4c68c201be0b25187f886c478e2ef1de9a402 Mon Sep 17 00:00:00 2001 From: cmorganl Date: Fri, 10 Jun 2022 08:40:15 -0400 Subject: [PATCH 07/18] Silence ClipKit messages --- setup.py | 2 +- tests/test_classy.py | 5 +++-- tests/test_graftm_utils.py | 2 +- tests/test_placement_trainer.py | 2 +- tests/test_refpkg.py | 2 +- tests/test_training_utils.py | 7 +++---- treesapp/__init__.py | 2 +- treesapp/clipkit_helper.py | 28 ++++++++++++++++++++-------- treesapp/utilities.py | 13 +++++++++++++ treesapp/wrapper.py | 21 --------------------- 10 files changed, 44 insertions(+), 40 deletions(-) diff --git a/setup.py b/setup.py index fde44b23..590f1c2d 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ SETUP_METADATA = \ { "name": "treesapp", - "version": "0.11.4", + "version": "0.11.5", "description": "TreeSAPP is a functional and taxonomic annotation tool for genomes and metagenomes.", "long_description": LONG_DESCRIPTION, "long_description_content_type": "text/markdown", diff --git a/tests/test_classy.py b/tests/test_classy.py index 75b38d84..b487673d 100644 --- a/tests/test_classy.py +++ b/tests/test_classy.py @@ -69,11 +69,12 @@ def test_furnish_with_arguments(self): args.input = [self.fasta] args.output = self.output_dir args.molecule = "prot" - args.executables = {'prodigal': '/home/connor/bin/prodigal', 'BMGE.jar': '/usr/local/bin/BMGE.jar', + args.executables = {'prodigal': '/home/connor/bin/prodigal', 'hmmbuild': '/usr/local/bin/hmmbuild', 'hmmalign': '/usr/local/bin/hmmalign', 'hmmsearch': '/usr/local/bin/hmmsearch', - 'epa-ng': '/usr/local/bin/epa-ng', 'raxml-ng': '/usr/local/bin/raxml-ng'} + 'epa-ng': '/usr/local/bin/epa-ng', + 'raxml-ng': '/usr/local/bin/raxml-ng'} self.db.furnish_with_arguments(args) self.assertEqual(len(args.executables), len(self.db.executables)) self.assertEqual(self.fasta, self.db.input_sequences) diff --git a/tests/test_graftm_utils.py b/tests/test_graftm_utils.py index f2bb3fe0..86536c79 100644 --- a/tests/test_graftm_utils.py +++ b/tests/test_graftm_utils.py @@ -71,7 +71,7 @@ def test_prep_graftm_ref_files(self): from treesapp import utilities # Find the executables exe_map = {} - for dep in ["hmmbuild", "hmmalign", "raxml-ng", "mafft", "BMGE.jar"]: + for dep in ["hmmbuild", "hmmalign", "raxml-ng", "mafft"]: exe_map[dep] = utilities.fetch_executable_path(dep, self.ts_dir) taxon_str = 'd__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhizobiales' diff --git a/tests/test_placement_trainer.py b/tests/test_placement_trainer.py index 8428db72..066bb656 100644 --- a/tests/test_placement_trainer.py +++ b/tests/test_placement_trainer.py @@ -27,7 +27,7 @@ def setUp(self) -> None: # Executables dictionary self.exes = {} self.treesapp_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) + os.sep - for dep in ["hmmbuild", "hmmalign", "raxml-ng", "mafft", "BMGE.jar"]: + for dep in ["hmmbuild", "hmmalign", "raxml-ng", "mafft"]: self.exes[dep] = fetch_executable_path(dep, get_treesapp_root()) return diff --git a/tests/test_refpkg.py b/tests/test_refpkg.py index 61077eb8..b7560826 100644 --- a/tests/test_refpkg.py +++ b/tests/test_refpkg.py @@ -40,7 +40,7 @@ def setUp(self) -> None: # Find the executables self.exe_map = {} self.treesapp_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) + os.sep - for dep in ["hmmbuild", "hmmalign", "raxml-ng", "mafft", "BMGE.jar", "FastTree"]: + for dep in ["hmmbuild", "hmmalign", "raxml-ng", "mafft", "FastTree"]: self.exe_map[dep] = fetch_executable_path(dep, utils.get_treesapp_root()) return diff --git a/tests/test_training_utils.py b/tests/test_training_utils.py index dad1766a..245f8ddd 100644 --- a/tests/test_training_utils.py +++ b/tests/test_training_utils.py @@ -53,7 +53,7 @@ def test_generate_pquery_data_for_trainer(self): treesapp_dir = get_treesapp_root() executables = {} - for dep in ["hmmbuild", "hmmalign", "hmmsearch", "epa-ng", "raxml-ng", "FastTree", "mafft", "BMGE.jar"]: + for dep in ["hmmbuild", "hmmalign", "hmmsearch", "epa-ng", "raxml-ng", "FastTree", "mafft"]: executables[dep] = fetch_executable_path(dep, treesapp_dir) pbar = tqdm() test_taxon_one = "f__Bradyrhizobiaceae; g__Bradyrhizobium; s__Bradyrhizobium 'sp.' BTAi1" @@ -68,10 +68,9 @@ def test_generate_pquery_data_for_trainer(self): def test_fetch_executable_path(self): from treesapp.utilities import fetch_executable_path - from re import sub treesapp_dir = get_treesapp_root() - exe_path = fetch_executable_path("BMGE.jar", treesapp_dir) - self.assertEqual("/sub_binaries/BMGE.jar", sub(treesapp_dir, '', exe_path)) + exe_path = fetch_executable_path("epa-ng", treesapp_dir) + self.assertEqual("epa-ng", os.path.basename(exe_path)) return def test_load_training_data_frame(self): diff --git a/treesapp/__init__.py b/treesapp/__init__.py index 9d007770..e08f3305 100644 --- a/treesapp/__init__.py +++ b/treesapp/__init__.py @@ -27,7 +27,7 @@ __status__ = "Production/Stable" __title__ = "TreeSAPP" __url__ = "https://github.com/hallamlab/TreeSAPP" -__version__ = "0.11.4" +__version__ = "0.11.5" __all__ = ['abundance', 'annotate_extra', 'assign', 'clade_annotation', 'classy', 'commands', 'create_refpkg', 'entish', 'entrez_utils', diff --git a/treesapp/clipkit_helper.py b/treesapp/clipkit_helper.py index b1225f29..1de302e4 100644 --- a/treesapp/clipkit_helper.py +++ b/treesapp/clipkit_helper.py @@ -1,4 +1,5 @@ import sys +import time import logging import os.path @@ -9,6 +10,7 @@ from treesapp import fasta from treesapp import refpkg from treesapp import file_parsers +from treesapp import utilities class ClipKitHelper: @@ -34,6 +36,7 @@ def __init__(self, fasta_in: str, output_dir: str, mode="smart-gap", gap_prop=0. # Attributes used in evaluating trimming performance self.success = True + self.exec_time = 0 self.num_msa_seqs = 0 self.num_msa_cols = 0 self.num_trim_seqs = 0 @@ -50,17 +53,26 @@ def __init__(self, fasta_in: str, output_dir: str, mode="smart-gap", gap_prop=0. return def run(self): - ck.execute(input_file=self.input, - input_file_format=self.ff_in, - output_file=self.mfa_out, - output_file_format=self.ff_out, - gaps=self.gap_prop, - complement=False, - mode=self.mode, - use_log=False) + start_time = time.time() + + # Capture all output from print statements within ClipKit + with utilities.Capturing() as output: + ck.execute(input_file=self.input, + input_file_format=self.ff_in, + output_file=self.mfa_out, + output_file_format=self.ff_out, + gaps=self.gap_prop, + complement=False, + mode=self.mode, + use_log=False) + + self.exec_time = time.time() - start_time return def summarise_trimming(self): + self.logger.debug("Trimming required {}s".format(round(self.exec_time, 3))) + self.logger.debug("Percentage of alignment trimmed = {}%".format(round((100*self.num_trim_cols) / + self.num_msa_cols), 2)) if self.num_trim_seqs == 0: self.logger.warning("No sequences were read from {}.\n".format(self.mfa_out)) diff --git a/treesapp/utilities.py b/treesapp/utilities.py index b3236c9c..6ef248b2 100644 --- a/treesapp/utilities.py +++ b/treesapp/utilities.py @@ -5,6 +5,7 @@ import shutil from glob import glob from csv import Sniffer +from io import StringIO from pygtrie import StringTrie import multiprocessing @@ -15,6 +16,18 @@ LOGGER = logging.getLogger(logger.logger_name()) +class Capturing(list): + def __enter__(self): + self._stdout = sys.stdout + sys.stdout = self._stringio = StringIO() + return self + + def __exit__(self, *args): + self.extend(self._stringio.getvalue().splitlines()) + del self._stringio # free up some memory + sys.stdout = self._stdout + + def base_file_prefix(file_path: str) -> str: return os.path.splitext(os.path.basename(file_path))[0] diff --git a/treesapp/wrapper.py b/treesapp/wrapper.py index 0422d7de..7fa71ad6 100644 --- a/treesapp/wrapper.py +++ b/treesapp/wrapper.py @@ -338,27 +338,6 @@ def raxml_evolutionary_placement(epa_exe: str, refpkg_tree: str, refpkg_msa: str return epa_files -def trimal_command(executable, mfa_file, trimmed_msa_file): - trim_command = [executable, - '-in', mfa_file, - '-out', trimmed_msa_file, - '-gt', str(0.02)] - return trim_command - - -def bmge_command(executable, mfa_file, trimmed_msa_file, molecule): - if molecule == "prot": - bmge_settings = ["-t", "AA", "-m", "BLOSUM30"] - else: - bmge_settings = ["-t", "DNA", "-m", "DNAPAM100:2"] - trim_command = ["java", "-Xmx512m", "-jar", executable] - trim_command += bmge_settings - trim_command += ["-g", "0.99:0.33"] # Specifying the gap rate per_sequence:per_character - trim_command += ['-i', mfa_file, - '-of', trimmed_msa_file] - return trim_command - - def hmmalign_command(executable, ref_aln, ref_profile, input_fasta, output_multiple_alignment): malign_command = [executable, '--mapali', ref_aln, From 448fcb71d544a4fad7937938cd0ea55d827b2e75 Mon Sep 17 00:00:00 2001 From: cmorganl Date: Fri, 10 Jun 2022 09:00:27 -0400 Subject: [PATCH 08/18] Silence tqdm progress bars from ClipKit --- CHANGELOG.md | 6 ++++++ treesapp/clipkit_helper.py | 5 ++++- treesapp/utilities.py | 5 +++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 088cf507..4ddb20d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## [0.11.5] - 2022-06 +### Added +### Fixed +### Changed +- Switched BMGE for ClipKit (#67) + ## [0.11.4] - 2022-05-22 ### Added - Centroid inference for pOTUs based on the midpoint, or balance point, of all cluster members. diff --git a/treesapp/clipkit_helper.py b/treesapp/clipkit_helper.py index 1de302e4..6f9d0f07 100644 --- a/treesapp/clipkit_helper.py +++ b/treesapp/clipkit_helper.py @@ -52,7 +52,7 @@ def __init__(self, fasta_in: str, output_dir: str, mode="smart-gap", gap_prop=0. self.num_refs_retained = 0 return - def run(self): + def run(self, verbose=False): start_time = time.time() # Capture all output from print statements within ClipKit @@ -67,6 +67,9 @@ def run(self): use_log=False) self.exec_time = time.time() - start_time + + if verbose: + self.logger.debug('\n'.join(output)) return def summarise_trimming(self): diff --git a/treesapp/utilities.py b/treesapp/utilities.py index 6ef248b2..be8a4702 100644 --- a/treesapp/utilities.py +++ b/treesapp/utilities.py @@ -6,9 +6,11 @@ from glob import glob from csv import Sniffer from io import StringIO +from functools import partialmethod from pygtrie import StringTrie import multiprocessing +from tqdm import tqdm from treesapp import external_command_interface as eci from treesapp import logger @@ -18,6 +20,7 @@ class Capturing(list): def __enter__(self): + tqdm.__init__ = partialmethod(tqdm.__init__, disable=True) self._stdout = sys.stdout sys.stdout = self._stringio = StringIO() return self @@ -26,6 +29,8 @@ def __exit__(self, *args): self.extend(self._stringio.getvalue().splitlines()) del self._stringio # free up some memory sys.stdout = self._stdout + tqdm.__init__ = partialmethod(tqdm.__init__, disable=False) + return def base_file_prefix(file_path: str) -> str: From 04c30a07881f4e0f939c56cbc2881f8d256c0ecf Mon Sep 17 00:00:00 2001 From: cmorganl Date: Fri, 10 Jun 2022 11:13:47 -0400 Subject: [PATCH 09/18] Check trimming mode names --- tests/test_clipkit_helper.py | 8 +++++++- treesapp/clipkit_helper.py | 7 +++++-- treesapp/multiple_alignment.py | 4 +++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/tests/test_clipkit_helper.py b/tests/test_clipkit_helper.py index 88be0c71..36c0dd54 100644 --- a/tests/test_clipkit_helper.py +++ b/tests/test_clipkit_helper.py @@ -20,14 +20,20 @@ def test_run(self): output_dir='./', mode="smart-gap") ck.run() + ck.compare_original_and_trimmed_multiple_alignments(min_len=200) + ck.summarise_trimming() self.assertTrue(os.path.isfile(self.output_fa)) ck.mode = ck_modes.TrimmingMode("kpi-smart-gap") ck.run() + ck.compare_original_and_trimmed_multiple_alignments(min_len=200) + ck.summarise_trimming() self.assertTrue(os.path.isfile(self.output_fa)) - ck.mode = ck_modes.TrimmingMode("kpi") + ck.mode = ck_modes.TrimmingMode("gappy") ck.run() + ck.compare_original_and_trimmed_multiple_alignments(min_len=200) + ck.summarise_trimming() self.assertTrue(os.path.isfile(self.output_fa)) return diff --git a/treesapp/clipkit_helper.py b/treesapp/clipkit_helper.py index 6f9d0f07..3c8b21c7 100644 --- a/treesapp/clipkit_helper.py +++ b/treesapp/clipkit_helper.py @@ -14,10 +14,14 @@ class ClipKitHelper: - CLIPKIT_MODES = {"smart-gap"} + CLIPKIT_MODES = set([v.value for _k, v in ck_modes.TrimmingMode._member_map_.items()]) def __init__(self, fasta_in: str, output_dir: str, mode="smart-gap", gap_prop=0.9): + self.logger = logging.getLogger(logger.logger_name()) self.input = fasta_in + if mode not in self.CLIPKIT_MODES: + self.logger.error("'{}' is not a valid TrimmingMode.\n".format(mode)) + sys.exit(1) if not os.path.isdir(output_dir): os.mkdir(output_dir) @@ -25,7 +29,6 @@ def __init__(self, fasta_in: str, output_dir: str, mode="smart-gap", gap_prop=0. self.mfa_out = os.path.join(output_dir, prefix + ".trim" + ext) self.qc_mfa_out = os.path.join(output_dir, prefix + ".trim.qc" + ext) - self.logger = logging.getLogger(logger.logger_name()) self.mode = ck_modes.TrimmingMode(mode) self.gap_prop = gap_prop diff --git a/treesapp/multiple_alignment.py b/treesapp/multiple_alignment.py index 0185aa7e..cdadaf3c 100644 --- a/treesapp/multiple_alignment.py +++ b/treesapp/multiple_alignment.py @@ -13,8 +13,10 @@ def trim_multiple_alignment_clipkit(msa_file: str, ref_pkg: refpkg.ReferencePackage, min_seq_length: int) -> ckh.ClipKitHelper: + # Modes can be one of 'smart-gap', 'kpi', 'kpic', 'gappy' trimmer = ckh.ClipKitHelper(fasta_in=msa_file, - output_dir=os.path.dirname(msa_file)) + output_dir=os.path.dirname(msa_file), + mode="gappy") trimmer.refpkg_name = ref_pkg.prefix trimmer.run() trimmer.compare_original_and_trimmed_multiple_alignments(min_seq_length, ref_pkg) From 9cd04d29039c18c7b8aed17adfa3b9a783a738dd Mon Sep 17 00:00:00 2001 From: cmorganl Date: Thu, 16 Jun 2022 07:49:12 -0400 Subject: [PATCH 10/18] Parameter tuning --- treesapp/clipkit_helper.py | 2 +- treesapp/multiple_alignment.py | 5 +++-- treesapp/wrapper.py | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/treesapp/clipkit_helper.py b/treesapp/clipkit_helper.py index 3c8b21c7..fc41f3ca 100644 --- a/treesapp/clipkit_helper.py +++ b/treesapp/clipkit_helper.py @@ -16,7 +16,7 @@ class ClipKitHelper: CLIPKIT_MODES = set([v.value for _k, v in ck_modes.TrimmingMode._member_map_.items()]) - def __init__(self, fasta_in: str, output_dir: str, mode="smart-gap", gap_prop=0.9): + def __init__(self, fasta_in: str, output_dir: str, mode="smart-gap", gap_prop=0.95): self.logger = logging.getLogger(logger.logger_name()) self.input = fasta_in if mode not in self.CLIPKIT_MODES: diff --git a/treesapp/multiple_alignment.py b/treesapp/multiple_alignment.py index cdadaf3c..51c9a9e2 100644 --- a/treesapp/multiple_alignment.py +++ b/treesapp/multiple_alignment.py @@ -13,10 +13,11 @@ def trim_multiple_alignment_clipkit(msa_file: str, ref_pkg: refpkg.ReferencePackage, min_seq_length: int) -> ckh.ClipKitHelper: - # Modes can be one of 'smart-gap', 'kpi', 'kpic', 'gappy' + # Modes can be one of 'smart-gap', 'kpi', 'kpic', 'gappy', 'kpi-smart-gap', 'kpi-gappy' trimmer = ckh.ClipKitHelper(fasta_in=msa_file, output_dir=os.path.dirname(msa_file), - mode="gappy") + mode="gappy", + gap_prop=0.25) trimmer.refpkg_name = ref_pkg.prefix trimmer.run() trimmer.compare_original_and_trimmed_multiple_alignments(min_seq_length, ref_pkg) diff --git a/treesapp/wrapper.py b/treesapp/wrapper.py index 7fa71ad6..6e92ea4b 100644 --- a/treesapp/wrapper.py +++ b/treesapp/wrapper.py @@ -313,8 +313,9 @@ def raxml_evolutionary_placement(epa_exe: str, refpkg_tree: str, refpkg_msa: str '-t', refpkg_tree, '-q', query_msa, "--model", refpkg_model, - "--no-pre-mask", + # "--no-pre-mask", "--dyn-heur", str(0.9), + # "--baseball-heur", str(0.9), # "--fix-heur", str(0.2), "--preserve-rooting", "on", "--filter-min-lwr", str(0.01), From 966afdd5a342431cc74adf287c26678b0790d4db Mon Sep 17 00:00:00 2001 From: cmorganl Date: Sat, 18 Jun 2022 10:52:35 -0400 Subject: [PATCH 11/18] Distinguish trimming for placement --- .gitignore | 1 + setup.py | 2 +- tests/test_clipkit_helper.py | 9 ++-- tests/test_commands.py | 2 +- tests/test_placement_trainer.py | 4 +- tox.ini | 6 +-- treesapp/clipkit_helper.py | 73 ++++++++++++++++++++++----------- treesapp/commands.py | 5 ++- treesapp/multiple_alignment.py | 32 ++++++++++----- treesapp/training_utils.py | 8 ++-- 10 files changed, 92 insertions(+), 50 deletions(-) diff --git a/.gitignore b/.gitignore index da94a5be..f298e2c8 100644 --- a/.gitignore +++ b/.gitignore @@ -38,6 +38,7 @@ nosetests.xml TreeSAPP.Rproj .Rhistory .RData +.python-version* # Data diff --git a/setup.py b/setup.py index 590f1c2d..f80cd53d 100644 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ "setuptools>=50.0.0" ], "extras_require": { - 'test': ['pytest', 'pytest-cov'], + 'tests': ['pytest', 'pytest-cov'], } } diff --git a/tests/test_clipkit_helper.py b/tests/test_clipkit_helper.py index 36c0dd54..ea7f8de8 100644 --- a/tests/test_clipkit_helper.py +++ b/tests/test_clipkit_helper.py @@ -18,21 +18,22 @@ def test_run(self): from clipkit import modes as ck_modes ck = clipkit_helper.ClipKitHelper(fasta_in=self.test_fa, output_dir='./', - mode="smart-gap") + mode="smart-gap", + min_len=200) ck.run() - ck.compare_original_and_trimmed_multiple_alignments(min_len=200) + ck.compare_original_and_trimmed_multiple_alignments() ck.summarise_trimming() self.assertTrue(os.path.isfile(self.output_fa)) ck.mode = ck_modes.TrimmingMode("kpi-smart-gap") ck.run() - ck.compare_original_and_trimmed_multiple_alignments(min_len=200) + ck.compare_original_and_trimmed_multiple_alignments() ck.summarise_trimming() self.assertTrue(os.path.isfile(self.output_fa)) ck.mode = ck_modes.TrimmingMode("gappy") ck.run() - ck.compare_original_and_trimmed_multiple_alignments(min_len=200) + ck.compare_original_and_trimmed_multiple_alignments() ck.summarise_trimming() self.assertTrue(os.path.isfile(self.output_fa)) return diff --git a/tests/test_commands.py b/tests/test_commands.py index 5ca63fc3..db11fb72 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -106,7 +106,7 @@ def test_assign(self): "--placement_summary", "aelw", "--trim_align", "--svm"] assign.assign(assign_commands_list) - self.assertEqual(13, len(read_classification_table(assignments_tbl))) + self.assertEqual(14, len(read_classification_table(assignments_tbl))) self.assertTrue(os.path.isfile(classified_seqs_faa)) assign.assign(assign_commands_list + ["--targets", "McrA,McrB,XmoA"]) self.assertEqual(18, len(read_classification_table(assignments_tbl))) diff --git a/tests/test_placement_trainer.py b/tests/test_placement_trainer.py index 066bb656..e6117324 100644 --- a/tests/test_placement_trainer.py +++ b/tests/test_placement_trainer.py @@ -27,7 +27,7 @@ def setUp(self) -> None: # Executables dictionary self.exes = {} self.treesapp_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) + os.sep - for dep in ["hmmbuild", "hmmalign", "raxml-ng", "mafft"]: + for dep in ["hmmbuild", "hmmalign", "raxml-ng", "mafft", "epa-ng"]: self.exes[dep] = fetch_executable_path(dep, get_treesapp_root()) return @@ -80,6 +80,8 @@ def test_clade_exclusion_phylo_placement(self): '634498.mru_1924'], 'd__Bacteria; p__Proteobacteria; c__Gammaproteobacteria': ['523846.Mfer_0784', '79929.MTBMA_c15480']}} + + # This will fail since query sequences (McrA) are unrelated to reference package (PuhA) pqueries = clade_exclusion_phylo_placement(rank_training_seqs=train_seqs, test_fasta=self.bad_fasta, ref_pkg=self.test_refpkg, executables=self.exes, min_seqs=3, output_dir=self.output_dir) diff --git a/tox.ini b/tox.ini index 642b9b4c..84ebd69c 100644 --- a/tox.ini +++ b/tox.ini @@ -40,7 +40,7 @@ passenv = deps= .[tests] codecov>=2.0.0 - setuptools>=50.3.1 + setuptools>=60.4.0 setenv = {[default]setenv} @@ -76,8 +76,8 @@ skip_install = True deps = codecov>=2.0.0 - coverage==5.0.3 - setuptools>=50.3.1 + coverage>=5.0.3 + setuptools>=60.4.0 setenv = {[default]setenv} diff --git a/treesapp/clipkit_helper.py b/treesapp/clipkit_helper.py index fc41f3ca..90d8cd63 100644 --- a/treesapp/clipkit_helper.py +++ b/treesapp/clipkit_helper.py @@ -8,7 +8,6 @@ from treesapp import logger from treesapp import fasta -from treesapp import refpkg from treesapp import file_parsers from treesapp import utilities @@ -16,7 +15,8 @@ class ClipKitHelper: CLIPKIT_MODES = set([v.value for _k, v in ck_modes.TrimmingMode._member_map_.items()]) - def __init__(self, fasta_in: str, output_dir: str, mode="smart-gap", gap_prop=0.95): + def __init__(self, fasta_in: str, output_dir: str, + mode="smart-gap", gap_prop=0.95, min_len=None, for_placement=False): self.logger = logging.getLogger(logger.logger_name()) self.input = fasta_in if mode not in self.CLIPKIT_MODES: @@ -35,7 +35,7 @@ def __init__(self, fasta_in: str, output_dir: str, mode="smart-gap", gap_prop=0. self.ff_in = "fasta" self.ff_out = "fasta" self.refpkg_name = '' - self.min_unaligned_seq_length = 1 + self.min_unaligned_seq_length = min_len # Attributes used in evaluating trimming performance self.success = True @@ -44,9 +44,11 @@ def __init__(self, fasta_in: str, output_dir: str, mode="smart-gap", gap_prop=0. self.num_msa_cols = 0 self.num_trim_seqs = 0 self.num_trim_cols = 0 + self.num_qc_seqs = 0 self.trim_qc_seqs = [] # These sequences passed the min_unaligned_seq_length filter # Specific to MSAs for phylogenetic placement + self.placement = for_placement # Boolean indicating MSA contained ref and query sequences self.num_queries_failed_trimming = 0 self.num_refs_failed_trimming = 0 self.num_queries_failed_qc = 0 @@ -106,6 +108,20 @@ def summarise_trimming(self): self.logger.debug("The untrimmed MSA will be used instead.\n") return + def read_trimmed_msa(self) -> fasta.FASTA: + msa_records = fasta.FASTA(file_name=self.mfa_out) + if self.ff_out == "phylip": + msa_records.fasta_dict = file_parsers.read_phylip_to_dict(self.mfa_out) + elif self.ff_out == "fasta": + msa_records.fasta_dict = fasta.read_fasta_to_dict(self.mfa_out) + else: + self.logger.error("Unsupported file format ('{}') of {}.\n".format(self.ff_out, self.mfa_out)) + sys.exit(1) + + msa_records.header_registry = fasta.register_headers(list(msa_records.fasta_dict.keys()), + drop=True) + return msa_records + def quantify_refs_and_pqueries(self, unique_ref_headers: set, msa_fasta: fasta.FASTA = None): if not unique_ref_headers: return @@ -128,22 +144,7 @@ def quantify_refs_and_pqueries(self, unique_ref_headers: set, msa_fasta: fasta.F raise RuntimeError("Unsure what to do with sequence '{}'.\n".format(seq_name)) return - def read_trimmed_msa(self) -> fasta.FASTA: - msa_records = fasta.FASTA(file_name=self.mfa_out) - if self.ff_out == "phylip": - msa_records.fasta_dict = file_parsers.read_phylip_to_dict(self.mfa_out) - elif self.ff_out == "fasta": - msa_records.fasta_dict = fasta.read_fasta_to_dict(self.mfa_out) - else: - self.logger.error("Unsupported file format ('{}') of {}.\n".format(self.ff_out, self.mfa_out)) - sys.exit(1) - - msa_records.header_registry = fasta.register_headers(list(msa_records.fasta_dict.keys()), - drop=True) - return msa_records - def validate_alignment_trimming(self): - msa_fasta = self.read_trimmed_msa() if self.num_trim_seqs == 0: self.success = False @@ -154,30 +155,52 @@ def validate_alignment_trimming(self): self.logger.warning("MSA length increased after trimming {}\n".format(self.input)) self.success = False + if self.placement: + self.validate_placement_trimming() + elif self.num_trim_seqs != self.num_msa_seqs: + self.success = False + elif self.num_qc_seqs != self.num_msa_seqs: + self.success = False + return + + def validate_placement_trimming(self) -> None: + if self.num_queries_retained == 0: + self.success = False + if self.num_refs_failed_trimming > 0: + self.success = False + if self.num_refs_failed_qc > 0: + self.success = False + return + + def quality_control_trimmed_seqs(self) -> None: + """ + Quality control trimmed sequences according to their unaligned lengths. + Those passing this filter are appended to the list self.trim_qc_seqs. + """ + msa_fasta = self.read_trimmed_msa() msa_fasta.unalign() for seq_name, seq in msa_fasta.fasta_dict.items(): if len(seq) >= self.min_unaligned_seq_length: self.trim_qc_seqs.append(seq_name) - + self.num_qc_seqs = len(self.trim_qc_seqs) return - def compare_original_and_trimmed_multiple_alignments(self, min_len: int, ref_pkg=None): + def compare_original_and_trimmed_multiple_alignments(self, ref_pkg=None): """Summarises the number of character positions trimmed and new dimensions between the input and output MSA.""" self.num_trim_seqs, self.num_trim_cols = fasta.multiple_alignment_dimensions(self.mfa_out) self.num_msa_seqs, self.num_msa_cols = fasta.multiple_alignment_dimensions(self.input) - self.min_unaligned_seq_length = min_len - self.validate_alignment_trimming() + self.quality_control_trimmed_seqs() - if ref_pkg is not None: # type: refpkg.ReferencePackage + if self.placement: # Create a set of the reference sequence names unique_ref_headers = set(ref_pkg.get_fasta().get_seq_names()) self.quantify_refs_and_pqueries(unique_ref_headers) return - def get_qc_output(self) -> str: + def get_qc_output_file(self) -> str: if self.success: return self.qc_mfa_out else: @@ -193,6 +216,8 @@ def get_qc_trimmed_fasta(self) -> fasta.FASTA: def write_qc_trimmed_multiple_alignment(self) -> None: msa_fasta = self.get_qc_trimmed_fasta() + if not msa_fasta: + return fasta.write_new_fasta(fasta_dict=msa_fasta.fasta_dict, fasta_name=self.qc_mfa_out) return diff --git a/treesapp/commands.py b/treesapp/commands.py index 846bb8e1..77ecff8a 100644 --- a/treesapp/commands.py +++ b/treesapp/commands.py @@ -643,9 +643,10 @@ def create(sys_args): if args.trim_align: trimmer = multiple_alignment.trim_multiple_alignment_clipkit(msa_file=ts_create.ref_pkg.f__msa, ref_pkg=ts_create.ref_pkg, - min_seq_length=args.min_seq_length) + min_seq_length=args.min_seq_length, + for_placement=False) trimmer.summarise_trimming() - if trimmer.num_refs_retained != trimmer.num_msa_seqs: + if not trimmer.success: # At least one of the reference sequences were discarded and therefore this package is invalid. LOGGER.error("Trimming removed reference sequences. This could indicate non-homologous sequences.\n" + "Please improve sequence quality-control and/or rerun without the '--trim_align' flag.\n") diff --git a/treesapp/multiple_alignment.py b/treesapp/multiple_alignment.py index 51c9a9e2..594abab3 100644 --- a/treesapp/multiple_alignment.py +++ b/treesapp/multiple_alignment.py @@ -12,15 +12,18 @@ def trim_multiple_alignment_clipkit(msa_file: str, ref_pkg: refpkg.ReferencePackage, - min_seq_length: int) -> ckh.ClipKitHelper: + min_seq_length: int, for_placement=False) -> ckh.ClipKitHelper: # Modes can be one of 'smart-gap', 'kpi', 'kpic', 'gappy', 'kpi-smart-gap', 'kpi-gappy' trimmer = ckh.ClipKitHelper(fasta_in=msa_file, output_dir=os.path.dirname(msa_file), mode="gappy", - gap_prop=0.25) + gap_prop=0.25, + min_len=min_seq_length, + for_placement=for_placement) trimmer.refpkg_name = ref_pkg.prefix trimmer.run() - trimmer.compare_original_and_trimmed_multiple_alignments(min_seq_length, ref_pkg) + trimmer.compare_original_and_trimmed_multiple_alignments(ref_pkg) + trimmer.validate_alignment_trimming() trimmer.write_qc_trimmed_multiple_alignment() return trimmer @@ -53,12 +56,18 @@ def summarise_trimming(msa_trimmers: list) -> None: trim_summary = "\t\t{} trimming stats:\n".format(refpkg_name) if stats["msa_files"] == 0: continue + # To avoid ZeroDivisionError + if stats["successes"] > 0: + avg_cols_removed = round(sum(stats["cols_removed"]) / len(stats["cols_removed"])) + avg_seqs_removed = round(sum(stats["seqs_removed"]) / len(stats["seqs_removed"])) + else: + avg_cols_removed = 0 + avg_seqs_removed = 0 + trim_summary += "Multiple alignment files = {}\n".format(stats["msa_files"]) trim_summary += "Files successfully trimmed = {}\n".format(stats["successes"]) - trim_summary += "Average columns removed = {}\n".format(round(sum(stats["cols_removed"]) / - len(stats["cols_removed"]))) - trim_summary += "Average sequences removed = {}\n".format(round(sum(stats["seqs_removed"]) / - len(stats["seqs_removed"]))) + trim_summary += "Average columns removed = {}\n".format(avg_cols_removed) + trim_summary += "Average sequences removed = {}\n".format(avg_seqs_removed) LOGGER.debug(trim_summary + "\n") @@ -66,7 +75,7 @@ def summarise_trimming(msa_trimmers: list) -> None: if num_successful_alignments == 0: LOGGER.error("No quality alignment files to analyze after trimming. Exiting now.\n") - sys.exit(0) # Should be 3, but this allows Clade_exclusion_analyzer to continue after exit + sys.exit(0) # This allows Clade_exclusion_analyzer to continue after exit return @@ -78,9 +87,9 @@ def gather_multiple_alignments(msa_trimmers: list) -> dict: trimmed_output_files = {} for trimmer in msa_trimmers: # type: ckh.ClipKitHelper try: - trimmed_output_files[trimmer.refpkg_name].append(trimmer.get_qc_output()) + trimmed_output_files[trimmer.refpkg_name].append(trimmer.get_qc_output_file()) except KeyError: - trimmed_output_files[trimmer.refpkg_name] = [trimmer.get_qc_output()] + trimmed_output_files[trimmer.refpkg_name] = [trimmer.get_qc_output_file()] return trimmed_output_files @@ -103,7 +112,8 @@ def trim_multiple_alignment_farmer(concatenated_mfa_files: dict, min_seq_length: for msa in mfa_files: task_list.append({"msa_file": msa, "ref_pkg": ref_pkgs[refpkg_code], - "min_seq_length": min_seq_length}) + "min_seq_length": min_seq_length, + "for_placement": True}) msa_trimmers = eci.run_apply_async_multiprocessing(func=trim_multiple_alignment_clipkit, arguments_list=task_list, diff --git a/treesapp/training_utils.py b/treesapp/training_utils.py index 7f7a395f..4acc7da7 100644 --- a/treesapp/training_utils.py +++ b/treesapp/training_utils.py @@ -377,17 +377,19 @@ def generate_pquery_data_for_trainer(ref_pkg: ReferencePackage, taxon: str, trimmer = multiple_alignment.trim_multiple_alignment_clipkit(msa_file=all_msa, ref_pkg=ref_pkg, - min_seq_length=int(0.1*ref_pkg.hmm_length())) + min_seq_length=int(0.1*ref_pkg.hmm_length()), + for_placement=True) trimmer.summarise_trimming() if not trimmer.success: LOGGER.debug("Placements for '{}' are being skipped after failing MSA validation.\n".format(taxon)) for old_file in intermediate_files: - os.remove(old_file) + if os.path.isfile(old_file): + os.remove(old_file) intermediate_files.clear() return pqueries # Create the query-only FASTA file required by EPA-ng - fasta.split_combined_ref_query_fasta(trimmer.get_qc_output(), query_msa, ref_msa) + fasta.split_combined_ref_query_fasta(trimmer.get_qc_output_file(), query_msa, ref_msa) raxml_files = wrapper.raxml_evolutionary_placement(epa_exe=executables["epa-ng"], refpkg_tree=ce_refpkg.f__tree, From 1e45401dfcd7ab4c91f1fdbad3d13edd3cc753f3 Mon Sep 17 00:00:00 2001 From: cmorganl Date: Sat, 18 Jun 2022 11:13:16 -0400 Subject: [PATCH 12/18] Fix tests --- tests/test_multiple_alignment.py | 3 ++- treesapp/multiple_alignment.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_multiple_alignment.py b/tests/test_multiple_alignment.py index a59d1b8f..326aa57d 100644 --- a/tests/test_multiple_alignment.py +++ b/tests/test_multiple_alignment.py @@ -18,7 +18,8 @@ def test_trim_multiple_alignments(self): result = multiple_alignment.trim_multiple_alignment_farmer({"PuhA": [test_fa]}, min_seq_length=10, n_proc=1, - ref_pkgs={"PuhA": test_rp}) + ref_pkgs={"PuhA": test_rp}, + for_placement=False) self.assertTrue(os.path.isfile(trim_file)) self.assertIsInstance(result, dict) self.assertTrue("PuhA" in result.keys()) diff --git a/treesapp/multiple_alignment.py b/treesapp/multiple_alignment.py index 594abab3..24b4bd9f 100644 --- a/treesapp/multiple_alignment.py +++ b/treesapp/multiple_alignment.py @@ -94,7 +94,7 @@ def gather_multiple_alignments(msa_trimmers: list) -> dict: def trim_multiple_alignment_farmer(concatenated_mfa_files: dict, min_seq_length: int, ref_pkgs: dict, - n_proc=1, silent=False) -> dict: + n_proc=1, for_placement=True, silent=False) -> dict: """ Runs ClipKit using the provided lists of the concatenated hmmalign files, and the number of sequences in each file. @@ -113,7 +113,7 @@ def trim_multiple_alignment_farmer(concatenated_mfa_files: dict, min_seq_length: task_list.append({"msa_file": msa, "ref_pkg": ref_pkgs[refpkg_code], "min_seq_length": min_seq_length, - "for_placement": True}) + "for_placement": for_placement}) msa_trimmers = eci.run_apply_async_multiprocessing(func=trim_multiple_alignment_clipkit, arguments_list=task_list, From 318909df3db0fa2e7e4b1682f9c2190054d3b135 Mon Sep 17 00:00:00 2001 From: cmorganl Date: Sat, 18 Jun 2022 14:34:32 -0400 Subject: [PATCH 13/18] Squashed commit of the following: commit 5e1336a9fc796e2743727498a59842657082576b Author: cmorganl Date: Sat Jun 18 14:32:19 2022 -0400 Finish binning by sequence identity commit 40d9165d8280aee0f3009fd03b8aaca757d8d779 Author: cmorganl Date: Sat Jun 18 13:46:09 2022 -0400 Implement pairwise alignment against refpkg commit 02a8471ef9b20bd0f6626c1952daefa2e57e65ff Merge: a739274 1e45401 Author: cmorganl Date: Sat Jun 18 11:24:18 2022 -0400 Merge branch 'clipkit' into seq_clumping commit a739274b4abe7f005133ccb5ad8dd5b3768d192e Author: cmorganl Date: Thu Jun 16 07:50:31 2022 -0400 refactor HMM match binning --- tests/test_assign.py | 7 +++ tests/test_refpkg.py | 13 +++++ treesapp/assign.py | 108 ++++++++++++++++++++++------------- treesapp/hmmer_tbl_parser.py | 10 ++++ treesapp/refpkg.py | 40 +++++++++++++ 5 files changed, 139 insertions(+), 39 deletions(-) diff --git a/tests/test_assign.py b/tests/test_assign.py index 8db92a2a..afc6f3f1 100644 --- a/tests/test_assign.py +++ b/tests/test_assign.py @@ -243,6 +243,13 @@ def test_load_refpkg_classifiers(self): return + def test_bin_hmm_matches_by_region(self): + + return + + def test_bin_hmm_matches_by_identity(self): + return + if __name__ == '__main__': unittest.main() diff --git a/tests/test_refpkg.py b/tests/test_refpkg.py index b7560826..df8b317b 100644 --- a/tests/test_refpkg.py +++ b/tests/test_refpkg.py @@ -64,6 +64,19 @@ def test_band(self): self.db.band() return + def test_blast(self): + import Bio.Align + qseq = "AMQIGMSFISXYKVCAGEAAVADLAFAAKHAGVIQMADILPARRARGPNEPGGIKFGHFC" \ + "DMIQGDRKYPNDPVRANLEVVAAGAMLFDQIWLGSYMSGGVGFTQYATAAYTDNILDDYC" \ + "EYGVDYIKKKHGGIAKAKSTQEVVSDIATEVNLYGMEQYESYPTALESHFGGSQRASVLA" \ + "AASGLTCSLATANSNAGLNGWYLSMLMHKEGWSRLGFFGYDLQDQCGSANSMSIRPDEGL" \ + "LGELRGPNYPNYAI" + aln, seq_id, g_seq_id = self.db.blast(qseq) # type: Bio.Align.PairwiseAlignment + self.assertEqual(100, aln.score) + self.assertEqual(62, round(seq_id, 0)) + self.assertEqual(89, round(g_seq_id, 0)) + return + def test_disband(self): # Basic disband self.db.disband(output_dir="./tests/") diff --git a/treesapp/assign.py b/treesapp/assign.py index 15c02021..60690be6 100755 --- a/treesapp/assign.py +++ b/treesapp/assign.py @@ -561,7 +561,49 @@ def load_homologs(hmm_matches: dict, hmmsearch_query_fasta: str, query_seq_fasta return -def bin_hmm_matches(hmm_matches: dict, fasta_dict: dict) -> (dict, dict): +def bin_hmm_matches_by_region(ref_pkg_hmm_matches: list) -> dict: + """ + Algorithm for binning sequences: + 1. Sort HmmMatches by the proportion of the HMM profile they covered in increasing order (full-length last) + 2. For HmmMatch in sorted matches, determine overlap between HmmMatch and each bin's representative HmmMatch + 3. If overlap exceeds 80% of representative's aligned length add it to the bin, else continue + 4. When bins are exhausted create new bin with HmmMatch + """ + bins = dict() + for hmm_match in sorted(ref_pkg_hmm_matches, key=lambda x: x.end - x.start): # type: HmmMatch + # Add the FASTA record of the trimmed sequence - this one moves on for placement + binned = False + for bin_num in sorted(bins): + bin_rep = bins[bin_num][0] + overlap = min(hmm_match.pend, bin_rep.pend) - max(hmm_match.pstart, bin_rep.pstart) + if (100 * overlap) / (bin_rep.pend - bin_rep.pstart) > 80: # 80 refers to overlap proportion with seed + bins[bin_num].append(hmm_match) + binned = True + break + + if not binned: + bin_num = len(bins) + bins[bin_num] = list() + bins[bin_num].append(hmm_match) + + return bins + + +def bin_hmm_matches_by_identity(ref_pkg_hmm_matches: list, fasta_dict: dict, ref_pkg: refpkg.ReferencePackage) -> dict: + bins = dict() + for hmm_match in sorted(ref_pkg_hmm_matches, key=lambda x: x.end - x.start): # type: HmmMatch + match_sequence = fasta_dict[hmm_match.sequence_name()][hmm_match.start - 1:hmm_match.end] + _aln, _seq_id, g_seq_id = ref_pkg.blast(qseq=match_sequence) + hmm_match.aln_pident = round(g_seq_id, 2) + # Round using -1 to group into bins of width 10 + try: + bins[round(g_seq_id, -1)].append(hmm_match) + except KeyError: + bins[round(g_seq_id, -1)] = [hmm_match] + return bins + + +def bin_hmm_matches(hmm_matches: dict, fasta_dict: dict, refpkg_dict: dict, method="region") -> (dict, dict): """ Used for extracting query sequences that mapped to reference package HMM profiles. These are binned into groups based on the location on the HMM profile they mapped to such that MSAs downstream will have more conserved positions @@ -574,57 +616,41 @@ def bin_hmm_matches(hmm_matches: dict, fasta_dict: dict) -> (dict, dict): :param hmm_matches: Contains lists of HmmMatch objects mapped to the marker they matched :param fasta_dict: Stores either the original or ORF-predicted input FASTA. Headers are keys, sequences are values + :param method: How should the sequences be binned? Options are 'region' or 'identity'. :return: List of files that go on to placement stage, dictionary mapping marker-specific numbers to contig names """ LOGGER.info("Extracting and grouping the quality-controlled sequences... ") extracted_seq_dict = dict() # Keys are markers -> bin_num -> negative integers -> extracted sequences numeric_contig_index = dict() # Keys are markers -> negative integers -> headers - bins = dict() for marker in hmm_matches: if len(hmm_matches[marker]) == 0: continue if marker not in numeric_contig_index.keys(): numeric_contig_index[marker] = dict() - numeric_decrementor = -1 if marker not in extracted_seq_dict: extracted_seq_dict[marker] = dict() - # Algorithm for binning sequences: - # 1. Sort HmmMatches by the proportion of the HMM profile they covered in increasing order (full-length last) - # 2. For HmmMatch in sorted matches, determine overlap between HmmMatch and each bin's representative HmmMatch - # 3. If overlap exceeds 80% of representative's aligned length add it to the bin, else continue - # 4. When bins are exhausted create new bin with HmmMatch - for hmm_match in sorted(hmm_matches[marker], key=lambda x: x.end - x.start): # type: HmmMatch - if hmm_match.desc != '-': - contig_name = hmm_match.orf + ' ' + hmm_match.desc - else: - contig_name = hmm_match.orf - # Add the query sequence to the index map - orf_coordinates = str(hmm_match.start) + '_' + str(hmm_match.end) - numeric_contig_index[marker][numeric_decrementor] = contig_name + '|' + marker + '|' + orf_coordinates - # Add the FASTA record of the trimmed sequence - this one moves on for placement - full_sequence = fasta_dict[contig_name] - binned = False - for bin_num in sorted(bins): - bin_rep = bins[bin_num][0] - overlap = min(hmm_match.pend, bin_rep.pend) - max(hmm_match.pstart, bin_rep.pstart) - if (100 * overlap) / (bin_rep.pend - bin_rep.pstart) > 80: # 80 refers to overlap proportion with seed - bins[bin_num].append(hmm_match) - extracted_seq_dict[marker][bin_num][numeric_decrementor] = full_sequence[ - hmm_match.start - 1:hmm_match.end] - binned = True - break - if not binned: - bin_num = len(bins) - bins[bin_num] = list() - extracted_seq_dict[marker][bin_num] = dict() - bins[bin_num].append(hmm_match) - extracted_seq_dict[marker][bin_num][numeric_decrementor] = full_sequence[ - hmm_match.start - 1:hmm_match.end] - numeric_decrementor -= 1 + if method == "region": + binned_matches = bin_hmm_matches_by_region(ref_pkg_hmm_matches=hmm_matches[marker]) + else: + binned_matches = bin_hmm_matches_by_identity(ref_pkg_hmm_matches=hmm_matches[marker], + fasta_dict=fasta_dict, + ref_pkg=refpkg_dict[marker]) + + numeric_decrementor = -1 + for bin_num in binned_matches: + for hmm_match in binned_matches[bin_num]: + match_sequence = fasta_dict[hmm_match.sequence_name()][hmm_match.start - 1:hmm_match.end] + # Add the query sequence to the index map + numeric_contig_index[marker][ + numeric_decrementor] = hmm_match.sequence_name() + '|' + marker + '|' + hmm_match.coord_string() + try: + extracted_seq_dict[marker][bin_num][numeric_decrementor] = match_sequence + except KeyError: + extracted_seq_dict[marker][bin_num] = {numeric_decrementor: match_sequence} + numeric_decrementor -= 1 - bins.clear() LOGGER.info("done.\n") return extracted_seq_dict, numeric_contig_index @@ -1328,7 +1354,10 @@ def assign(sys_args): load_homologs(hmm_matches, ts_assign.formatted_input, query_seqs) pqueries = load_pqueries(hmm_matches, query_seqs) query_seqs.change_dict_keys("num_id") - extracted_seq_dict, numeric_contig_index = bin_hmm_matches(hmm_matches, query_seqs.fasta_dict) + extracted_seq_dict, numeric_contig_index = bin_hmm_matches(hmm_matches, + query_seqs.fasta_dict, + refpkg_dict=refpkg_dict, + method="identity") numeric_contig_index = replace_contig_names(numeric_contig_index, query_seqs) homolog_seq_files = write_grouped_fastas(extracted_seq_dict, numeric_contig_index, refpkg_dict, ts_assign.stage_lookup("search").dir_path) @@ -1344,7 +1373,8 @@ def assign(sys_args): homolog_seq_files=homolog_seq_files, n_proc=n_proc, trim_align=args.trim_align, - min_seq_length=args.min_seq_length) + min_seq_length=args.min_seq_length, + ) delete_files(args.delete, ts_assign.stage_lookup("search").dir_path, 2) ts_assign.increment_stage_dir(checkpoint="align") diff --git a/treesapp/hmmer_tbl_parser.py b/treesapp/hmmer_tbl_parser.py index ea745cd9..4d8908ac 100755 --- a/treesapp/hmmer_tbl_parser.py +++ b/treesapp/hmmer_tbl_parser.py @@ -71,6 +71,7 @@ def __init__(self): self.eval = 0.0 # Full-sequence E-value (in the case a sequence alignment is split) self.full_score = 0 self.next_domain = None # The next domain aligned by hmmsearch + self.aln_pident = 0.0 def get_info(self): info_string = "Info for query " + str(self.orf) + ":\n" @@ -85,6 +86,15 @@ def get_info(self): info_string += "\tfull score = " + str(self.full_score) + "\n" return info_string + def coord_string(self, sep='_') -> str: + return str(self.start) + sep + str(self.end) + + def sequence_name(self, sep=' ') -> str: + if self.desc != '-': + return self.orf + sep + self.desc + else: + return self.orf + def subsequent_matches(self): if not self.next_domain: return [self] diff --git a/treesapp/refpkg.py b/treesapp/refpkg.py index 006506b0..398098a7 100644 --- a/treesapp/refpkg.py +++ b/treesapp/refpkg.py @@ -7,6 +7,9 @@ from shutil import copy from datetime import datetime as dt +from Bio import Align +from Bio.SubsMat import MatrixInfo as matlist +from Bio import pairwise2 from packaging import version from ete3 import Tree from pandas import DataFrame @@ -1272,6 +1275,43 @@ def deduplicate_annotation_members(self) -> None: return + def blast(self, qseq: str, **kwargs) -> (Align.PairwiseAlignment, float, float): + """Find the percent pairwise identity between a query sequence and its closest match in a reference package.""" + aligner = Align.PairwiseAligner(mode="global") + aligner.match_score = kwargs.get('match', 1) + aligner.mismatch_score = kwargs.get('mismatch', 0) + aligner.gap_score = kwargs.get('gap', -10) + aligner.extend_gap_score = kwargs.get('extend_gap', -1) + + def _calculate_identity(sequenceA, sequenceB): + """ + Returns the percentage of identical characters between two sequences. + Assumes the sequences are aligned. + """ + + sa, sb, sl = sequenceA, sequenceB, len(sequenceA) + matches = [sa[i] == sb[i] for i in range(sl)] + seq_id = (100 * sum(matches)) / sl + + gapless_sl = sum([1 for i in range(sl) if (sa[i] != '-' and sb[i] != '-')]) + gap_id = (100 * sum(matches)) / gapless_sl + return (seq_id, gap_id) + + ref_seqs = self.get_fasta() + ref_seqs.unalign() + top_aln = None + for sname, sseq in ref_seqs.fasta_dict.items(): + aln = aligner.align(sseq, qseq)[0] + if not top_aln: + top_aln = aln + elif aln.score > top_aln.score: + top_aln = aln + + # Calculate sequence identity + aligned_A , _aln, aligned_B = top_aln.format().split("\n")[:3] + seq_id, g_seq_id = _calculate_identity(aligned_A, aligned_B) + return top_aln, seq_id, g_seq_id + def write_edited_pkl(ref_pkg: ReferencePackage, output_dir: str, overwrite: bool) -> int: if output_dir: From f2e499e34788e71cbe5907e6876691b3a90c1fc6 Mon Sep 17 00:00:00 2001 From: cmorganl Date: Thu, 23 Jun 2022 10:49:42 -0400 Subject: [PATCH 14/18] Test writing grouped fasta in assign --- tests/test_assign.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/test_assign.py b/tests/test_assign.py index afc6f3f1..572540f0 100644 --- a/tests/test_assign.py +++ b/tests/test_assign.py @@ -250,6 +250,43 @@ def test_bin_hmm_matches_by_region(self): def test_bin_hmm_matches_by_identity(self): return + def test_write_grouped_fastas(self): + from treesapp import assign + from treesapp import fasta + import random + from string import ascii_uppercase + seq_dict = {} + seq_name_idx = {} + rp_name = "PuhA" + scaler = 3 + n_seqs = self.refpkg_dict[rp_name].num_seqs * scaler + + # Test with empty inputs + fasta_files = assign.write_grouped_fastas(extracted_seq_dict=seq_dict, + numeric_contig_index=seq_name_idx, + refpkg_dict=self.refpkg_dict, + output_dir=self.output_dir) + self.assertEqual([], fasta_files) + + # Test real condition + seq_dict.update({rp_name: {"99": {-1*n: ''.join(random.choice(ascii_uppercase) for _ in range(50)) + for n in range(n_seqs)}}}) + seq_name_idx.update({rp_name: {-1*x: "seq_{}".format(x) for x in range(n_seqs)}}) + fasta_files = assign.write_grouped_fastas(extracted_seq_dict=seq_dict, + numeric_contig_index=seq_name_idx, + refpkg_dict=self.refpkg_dict, + output_dir=self.output_dir) + self.assertEqual(scaler, len(fasta_files)) + self.assertEqual([os.path.join(self.output_dir, + "{}_hmm_purified_group{}.faa".format(rp_name, n)) for n in range(scaler)], + fasta_files) + # Ensure there are the right number of sequences in each file + for file_path in fasta_files: + self.assertEqual(self.refpkg_dict[rp_name].num_seqs, + len(fasta.get_headers(file_path))) + + return + if __name__ == '__main__': unittest.main() From cb82d67a219d372b33a73c4ebd32037d7cea0ccc Mon Sep 17 00:00:00 2001 From: cmorganl Date: Thu, 23 Jun 2022 16:41:12 -0400 Subject: [PATCH 15/18] Group and trim queries according to identity to reference sequences --- tests/test_assign.py | 23 ++-- tests/test_commands.py | 18 +++- tests/test_multiple_alignment.py | 5 +- treesapp/assign.py | 178 ++++++++++++++++++------------- treesapp/clipkit_helper.py | 8 +- treesapp/multiple_alignment.py | 28 +++-- 6 files changed, 159 insertions(+), 101 deletions(-) diff --git a/tests/test_assign.py b/tests/test_assign.py index 572540f0..cd0cbc29 100644 --- a/tests/test_assign.py +++ b/tests/test_assign.py @@ -262,20 +262,21 @@ def test_write_grouped_fastas(self): n_seqs = self.refpkg_dict[rp_name].num_seqs * scaler # Test with empty inputs - fasta_files = assign.write_grouped_fastas(extracted_seq_dict=seq_dict, - numeric_contig_index=seq_name_idx, - refpkg_dict=self.refpkg_dict, - output_dir=self.output_dir) - self.assertEqual([], fasta_files) + fasta_file_group_map = assign.write_grouped_fastas(extracted_seq_dict=seq_dict, + seq_name_index=seq_name_idx, + refpkg_dict=self.refpkg_dict, + output_dir=self.output_dir) + self.assertEqual({}, fasta_file_group_map) # Test real condition seq_dict.update({rp_name: {"99": {-1*n: ''.join(random.choice(ascii_uppercase) for _ in range(50)) for n in range(n_seqs)}}}) seq_name_idx.update({rp_name: {-1*x: "seq_{}".format(x) for x in range(n_seqs)}}) - fasta_files = assign.write_grouped_fastas(extracted_seq_dict=seq_dict, - numeric_contig_index=seq_name_idx, - refpkg_dict=self.refpkg_dict, - output_dir=self.output_dir) + fasta_file_group_map = assign.write_grouped_fastas(extracted_seq_dict=seq_dict, + seq_name_index=seq_name_idx, + refpkg_dict=self.refpkg_dict, + output_dir=self.output_dir) + fasta_files = list(fasta_file_group_map[rp_name]) self.assertEqual(scaler, len(fasta_files)) self.assertEqual([os.path.join(self.output_dir, "{}_hmm_purified_group{}.faa".format(rp_name, n)) for n in range(scaler)], @@ -287,6 +288,10 @@ def test_write_grouped_fastas(self): return + def test_bin_hmm_matches(self): + self.assertTrue(False) + return + if __name__ == '__main__': unittest.main() diff --git a/tests/test_commands.py b/tests/test_commands.py index db11fb72..4b9b1376 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -106,11 +106,17 @@ def test_assign(self): "--placement_summary", "aelw", "--trim_align", "--svm"] assign.assign(assign_commands_list) - self.assertEqual(14, len(read_classification_table(assignments_tbl))) + assigned_queries = 15 + self.assertEqual(assigned_queries, + len(read_classification_table(assignments_tbl))) self.assertTrue(os.path.isfile(classified_seqs_faa)) - assign.assign(assign_commands_list + ["--targets", "McrA,McrB,XmoA"]) - self.assertEqual(18, len(read_classification_table(assignments_tbl))) - self.assertEqual(18, len(fasta.get_headers(classified_seqs_faa))) + assign.assign(assign_commands_list + ["--targets", "McrA,McrB,XmoA", + "--min_seq_length", str(30)]) + assigned_queries = 17 + self.assertEqual(assigned_queries, + len(read_classification_table(assignments_tbl))) + self.assertEqual(assigned_queries, + len(fasta.get_headers(classified_seqs_faa))) # Test nucleotide sequence input WITHOUT targets listed assign_commands_list = ["--fastx_input", self.nt_test_fa, @@ -124,8 +130,10 @@ def test_assign(self): "--reads", get_test_data("SRR3669912_1.fastq"), "--reverse", get_test_data("SRR3669912_2.fastq")] assign.assign(assign_commands_list) + assigned_queries = 8 lines = read_classification_table(assignments_tbl) - self.assertEqual(8, len(lines)) + self.assertEqual(assigned_queries, + len(lines)) classified_seqs = set() pqueries = assignments_to_pqueries(lines) for rp_pqs in pqueries.values(): diff --git a/tests/test_multiple_alignment.py b/tests/test_multiple_alignment.py index 326aa57d..7e030d80 100644 --- a/tests/test_multiple_alignment.py +++ b/tests/test_multiple_alignment.py @@ -15,7 +15,10 @@ def test_trim_multiple_alignments(self): test_rp.f__pkl = get_test_data(filename=os.path.join("refpkgs", "PuhA_build.pkl")) test_rp.slurp() - result = multiple_alignment.trim_multiple_alignment_farmer({"PuhA": [test_fa]}, + result = multiple_alignment.trim_multiple_alignment_farmer([{"qry_ref_mfa": test_fa, + "refpkg_name": "PuhA", + "gap_tuned": True, + "avg_id": 88}], min_seq_length=10, n_proc=1, ref_pkgs={"PuhA": test_rp}, diff --git a/treesapp/assign.py b/treesapp/assign.py index 60690be6..40383ca1 100755 --- a/treesapp/assign.py +++ b/treesapp/assign.py @@ -364,7 +364,9 @@ def search(self, ref_pkg_dict: dict, hmm_parsing_thresholds, num_threads=2) -> d return file_parsers.parse_domain_tables(hmm_parsing_thresholds, refpkg_hmmer_tables) - def align(self, refpkg_dict: dict, homolog_seq_files: list, min_seq_length: int, n_proc: int, trim_align=True) -> dict: + def align(self, refpkg_dict: dict, pquery_groups: list, + min_seq_length: int, n_proc: int, + trim_align=True) -> dict: if self.past_last_stage("align"): return {} MSAs = namedtuple("MSAs", "ref query") @@ -372,21 +374,21 @@ def align(self, refpkg_dict: dict, homolog_seq_files: list, min_seq_length: int, split_msa_files = self.fetch_multiple_alignments() target_refpkgs = {prefix: rp for prefix, rp in refpkg_dict.items() if prefix not in split_msa_files} if self.stage_status("align") or target_refpkgs: - concatenated_msa_files = multiple_alignments(self.executables, homolog_seq_files, - target_refpkgs, "hmmalign", - output_dir=self.stage_lookup(name="align").dir_path, - num_proc=n_proc, silent=self.silent) - if concatenated_msa_files: + msa_files = multiple_alignments(self.executables, pquery_groups, + target_refpkgs, "hmmalign", + output_dir=self.stage_lookup(name="align").dir_path, + num_proc=n_proc, silent=self.silent) + if msa_files: combined_msa_files = {} if trim_align: - trimmed_mfa_files = multiple_alignment.trim_multiple_alignment_farmer(concatenated_msa_files, + trimmed_mfa_files = multiple_alignment.trim_multiple_alignment_farmer(pquery_groups, min_seq_length=min_seq_length, ref_pkgs=refpkg_dict, n_proc=n_proc, silent=self.silent) combined_msa_files.update(trimmed_mfa_files) else: - combined_msa_files.update(concatenated_msa_files) + combined_msa_files.update(msa_files) # Subset the multiple alignment of reference sequences and queries to just contain query sequences for refpkg_name in combined_msa_files: @@ -590,6 +592,9 @@ def bin_hmm_matches_by_region(ref_pkg_hmm_matches: list) -> dict: def bin_hmm_matches_by_identity(ref_pkg_hmm_matches: list, fasta_dict: dict, ref_pkg: refpkg.ReferencePackage) -> dict: + """ + Group query sequences based on their identity to the closest sequence in the reference package. + """ bins = dict() for hmm_match in sorted(ref_pkg_hmm_matches, key=lambda x: x.end - x.start): # type: HmmMatch match_sequence = fasta_dict[hmm_match.sequence_name()][hmm_match.start - 1:hmm_match.end] @@ -597,9 +602,9 @@ def bin_hmm_matches_by_identity(ref_pkg_hmm_matches: list, fasta_dict: dict, ref hmm_match.aln_pident = round(g_seq_id, 2) # Round using -1 to group into bins of width 10 try: - bins[round(g_seq_id, -1)].append(hmm_match) + bins[int(round(g_seq_id, -1))].append(hmm_match) except KeyError: - bins[round(g_seq_id, -1)] = [hmm_match] + bins[int(round(g_seq_id, -1))] = [hmm_match] return bins @@ -611,17 +616,19 @@ def bin_hmm_matches(hmm_matches: dict, fasta_dict: dict, refpkg_dict: dict, meth The first nested dictionary returned "extracted_seq_dict" contains marker (i.e. ref_pkg names) strings mapped to bin numbers mapped to query sequence negative integer code names mapped to their extracted, or sliced, sequence. - The second dictionary returned "numeric_contig_index" is used for mapping query sequence negative integer code names + The second dictionary returned "seq_name_index" is used for mapping query sequence negative integer code names mapped to their original header names with the alignment coordinates appended at the end for each marker. :param hmm_matches: Contains lists of HmmMatch objects mapped to the marker they matched :param fasta_dict: Stores either the original or ORF-predicted input FASTA. Headers are keys, sequences are values + :param refpkg_dict: Dictionary of reference packages indexed by their prefix attibutes. :param method: How should the sequences be binned? Options are 'region' or 'identity'. :return: List of files that go on to placement stage, dictionary mapping marker-specific numbers to contig names """ LOGGER.info("Extracting and grouping the quality-controlled sequences... ") extracted_seq_dict = dict() # Keys are markers -> bin_num -> negative integers -> extracted sequences numeric_contig_index = dict() # Keys are markers -> negative integers -> headers + bin_identities = {} for marker in hmm_matches: if len(hmm_matches[marker]) == 0: @@ -640,6 +647,9 @@ def bin_hmm_matches(hmm_matches: dict, fasta_dict: dict, refpkg_dict: dict, meth numeric_decrementor = -1 for bin_num in binned_matches: + if marker not in bin_identities: + bin_identities[marker] = {} + bin_identities[marker][bin_num] = [hmm_match.aln_pident for hmm_match in binned_matches[bin_num]] for hmm_match in binned_matches[bin_num]: match_sequence = fasta_dict[hmm_match.sequence_name()][hmm_match.start - 1:hmm_match.end] # Add the query sequence to the index map @@ -653,72 +663,66 @@ def bin_hmm_matches(hmm_matches: dict, fasta_dict: dict, refpkg_dict: dict, meth LOGGER.info("done.\n") - return extracted_seq_dict, numeric_contig_index + return extracted_seq_dict, numeric_contig_index, bin_identities -def write_grouped_fastas(extracted_seq_dict: dict, numeric_contig_index: dict, refpkg_dict: dict, output_dir: str): - hmmalign_input_fastas = list() - bulk_marker_fasta = dict() - bin_fasta = dict() - +def summarise_hits_to_groups(extracted_seq_dict): group_size_string = "Number of query sequences in each marker's group:\n" for marker in extracted_seq_dict: for group in sorted(extracted_seq_dict[marker]): if extracted_seq_dict[marker][group]: group_size_string += "\t".join([marker, str(group), str(len(extracted_seq_dict[marker][group]))]) + "\n" LOGGER.debug(group_size_string + "\n") + return + + +def write_grouped_fastas(extracted_seq_dict: dict, seq_name_index: dict, refpkg_dict: dict, output_dir: str) -> dict: + hmmalign_input_fastas = dict() + bulk_marker_fasta = dict() + bin_fasta = dict() + + summarise_hits_to_groups(extracted_seq_dict) LOGGER.info("Writing the grouped sequences to FASTA files... ") for marker in extracted_seq_dict: ref_pkg = refpkg_dict[marker] # type: refpkg.ReferencePackage f_acc = 0 # For counting the number of files for a marker. Will exceed groups if len(queries) > len(references) + f_path = os.path.join(output_dir, "{}_hmm_purified_group{}.faa".format(marker, f_acc)) + hmmalign_input_fastas[marker] = {} for group in sorted(extracted_seq_dict[marker]): if extracted_seq_dict[marker][group]: group_sequences = extracted_seq_dict[marker][group] for num in group_sequences: # Add the query sequence to the master marker FASTA with the full sequence name - bulk_marker_fasta[numeric_contig_index[marker][num]] = group_sequences[num] + bulk_marker_fasta[seq_name_index[marker][num]] = group_sequences[num] # Add the query sequence to this bin's FASTA file bin_fasta[str(num)] = group_sequences[num] # Ensuring the number of query sequences doesn't exceed the number of reference sequences if len(bin_fasta) >= ref_pkg.num_seqs: - fasta.write_new_fasta(bin_fasta, - output_dir + marker + "_hmm_purified_group" + str(f_acc) + ".faa") - hmmalign_input_fastas.append(output_dir + marker + "_hmm_purified_group" + str(f_acc) + ".faa") + fasta.write_new_fasta(bin_fasta, f_path) + hmmalign_input_fastas[marker][f_path] = group f_acc += 1 + f_path = os.path.join(output_dir, "{}_hmm_purified_group{}.faa".format(marker, f_acc)) bin_fasta.clear() if len(bin_fasta) >= 1: - fasta.write_new_fasta(bin_fasta, output_dir + marker + "_hmm_purified_group" + str(f_acc) + ".faa") - hmmalign_input_fastas.append(output_dir + marker + "_hmm_purified_group" + str(f_acc) + ".faa") + fasta.write_new_fasta(bin_fasta, f_path) + hmmalign_input_fastas[marker][f_path] = group + f_acc += 1 + f_path = os.path.join(output_dir, "{}_hmm_purified_group{}.faa".format(marker, f_acc)) f_acc += 1 bin_fasta.clear() # Now write a single FASTA file with all identified markers if len(bulk_marker_fasta) >= 1: - trimmed_hits_fasta = output_dir + marker + "_hmm_purified.faa" + trimmed_hits_fasta = os.path.join(output_dir, marker + "_hmm_purified.faa") fasta.write_new_fasta(bulk_marker_fasta, trimmed_hits_fasta) bulk_marker_fasta.clear() LOGGER.info("done.\n") return hmmalign_input_fastas -def subsequence(fasta_dictionary, contig_name, start, end): - """ - Extracts a sub-sequence from `start` to `end` of `contig_name` in `fasta_dictionary` - with headers for keys and sequences as values. `contig_name` does not contain the '>' character - - :param fasta_dictionary: - :param contig_name: - :param start: - :param end: - :return: A string representing the sub-sequence of interest - """ - subseq = fasta_dictionary['>' + contig_name][start:end] - return subseq - - def get_sequence_counts(concatenated_mfa_files: dict, ref_alignment_dimensions: dict, verbosity: bool, file_type: str): alignment_length_dict = dict() for refpkg_name in concatenated_mfa_files: @@ -800,13 +804,13 @@ def multiple_alignments(executables: dict, query_sequence_files: list, refpkg_di return concatenated_msa_files -def prepare_and_run_hmmalign(execs: dict, single_query_fasta_files: list, refpkg_dict: dict, +def prepare_and_run_hmmalign(execs: dict, pquery_groups_manifest: list, refpkg_dict: dict, output_dir="", n_proc=2, silent=False) -> dict: """ Runs `hmmalign` to add the query sequences into the reference FASTA multiple alignments :param execs: Dictionary of executable file paths indexed by the software names - :param single_query_fasta_files: List of unaligned query sequences in FASTA format + :param pquery_groups_manifest: List of dictionaries :param refpkg_dict: A dictionary of ReferencePackage instances indexed by their respective prefix attributes :param output_dir: Where to write the multiple alignment files containing reference and query sequences :param n_proc: The number of alignment jobs to run in parallel @@ -819,32 +823,27 @@ def prepare_and_run_hmmalign(execs: dict, single_query_fasta_files: list, refpkg task_list = list() # Run hmmalign on each fasta file - for query_fa_in in sorted(single_query_fasta_files): - file_name_info = re.match(r"(.*)_hmm_purified.*\.(f.*)$", os.path.basename(query_fa_in)) - if file_name_info: - refpkg_name, extension = file_name_info.groups() - else: - LOGGER.error("Unable to parse information from file name:" + "\n" + str(query_fa_in) + "\n") - sys.exit(3) + for pquery_group in pquery_groups_manifest: + # Add to the manifest to ensure all files are available to ClipKitHelper + file_prefix, _ext = os.path.splitext(os.path.basename(pquery_group["qry_fa"])) + query_mfa_out = os.path.join(output_dir, file_prefix + ".sto") + pquery_group["qry_ref_mfa"] = os.path.join(output_dir, file_prefix + ".mfa") try: - ref_pkg = refpkg_dict[refpkg_name] # type: refpkg.ReferencePackage + ref_pkg = refpkg_dict[pquery_group["refpkg_name"]] # type: refpkg.ReferencePackage except KeyError: + # Reference packages are provided only for MSAs that need to be processed continue - if ref_pkg.prefix not in hmmalign_singlehit_files: - hmmalign_singlehit_files[ref_pkg.prefix] = [] - - query_mfa_out = os.path.join(output_dir, - re.sub('.' + re.escape(extension) + r"$", ".sto", os.path.basename(query_fa_in))) - try: - mfa_out_dict[ref_pkg.prefix].append(query_mfa_out) - except KeyError: - mfa_out_dict[ref_pkg.prefix] = [query_mfa_out] + # Stash file name in dictionary for quick look-up + mfa_out_dict[query_mfa_out] = pquery_group # Get the paths to either the HMM or CM profile files task_list.append([wrapper.hmmalign_command(execs["hmmalign"], - ref_pkg.f__msa, ref_pkg.f__profile, query_fa_in, query_mfa_out)]) + ref_pkg.f__msa, + ref_pkg.f__profile, + pquery_group["qry_fa"], + query_mfa_out)]) eci.run_apply_async_multiprocessing(func=eci.launch_write_command, arguments_list=task_list, @@ -852,19 +851,21 @@ def prepare_and_run_hmmalign(execs: dict, single_query_fasta_files: list, refpkg pbar_desc="Profile alignment", disable=silent) - for prefix in mfa_out_dict: - for query_mfa_out in mfa_out_dict[prefix]: - mfa_file = re.sub(r"\.sto$", ".mfa", query_mfa_out) - seq_dict = file_parsers.read_stockholm_to_dict(query_mfa_out) - fasta.write_new_fasta(seq_dict, mfa_file) - hmmalign_singlehit_files[prefix].append(mfa_file) - end_time = time.time() hours, remainder = divmod(end_time - start_time, 3600) minutes, seconds = divmod(remainder, 60) LOGGER.debug("\thmmalign time required: " + ':'.join([str(hours), str(minutes), str(round(seconds, 2))]) + "\n") + # Convert from Stockholm to FASTA format + for query_mfa_out, pquery_group in mfa_out_dict.items(): + seq_dict = file_parsers.read_stockholm_to_dict(query_mfa_out) + fasta.write_new_fasta(seq_dict, pquery_group["qry_ref_mfa"]) + try: + hmmalign_singlehit_files[pquery_group["refpkg_name"]].append(pquery_group["qry_ref_mfa"]) + except KeyError: + hmmalign_singlehit_files[pquery_group["refpkg_name"]] = [pquery_group["qry_ref_mfa"]] + return hmmalign_singlehit_files @@ -1306,6 +1307,30 @@ def alert_for_refpkg_feature_annotations(pqueries: dict, refpkg_dict: dict) -> N return +def build_pquery_group_manifest(bin_identities: dict, file_group_map: dict) -> list: + pquery_group_manifest = [] + for marker in file_group_map: + for file_name, group_name in file_group_map[marker].items(): + manifest = {"group": group_name, + "refpkg_name": marker, + "qry_fa": file_name, + "qry_ref_mfa": '', + "gap_tuned": False, + "avg_id": 0.0} + + try: + id_vals = bin_identities[marker][group_name] + manifest["avg_id"] = round(sum(id_vals)/len(id_vals), 2) + except (KeyError, ZeroDivisionError): + manifest["avg_id"] = 0.0 + + if manifest["avg_id"] >= 0: + manifest["gap_tuned"] = True + + pquery_group_manifest.append(manifest) + return pquery_group_manifest + + def assign(sys_args): # STAGE 1: Prompt the user and prepare files and lists for the pipeline parser = treesapp_args.TreeSAPPArgumentParser(description='Classify sequences through evolutionary placement.') @@ -1354,13 +1379,17 @@ def assign(sys_args): load_homologs(hmm_matches, ts_assign.formatted_input, query_seqs) pqueries = load_pqueries(hmm_matches, query_seqs) query_seqs.change_dict_keys("num_id") - extracted_seq_dict, numeric_contig_index = bin_hmm_matches(hmm_matches, - query_seqs.fasta_dict, - refpkg_dict=refpkg_dict, - method="identity") + extracted_seq_dict, numeric_contig_index, bin_identities = bin_hmm_matches(hmm_matches, + query_seqs.fasta_dict, + refpkg_dict=refpkg_dict, + method="identity") numeric_contig_index = replace_contig_names(numeric_contig_index, query_seqs) - homolog_seq_files = write_grouped_fastas(extracted_seq_dict, numeric_contig_index, - refpkg_dict, ts_assign.stage_lookup("search").dir_path) + homolog_seq_files = write_grouped_fastas(extracted_seq_dict, + seq_name_index=numeric_contig_index, + refpkg_dict=refpkg_dict, + output_dir=ts_assign.stage_lookup("search").dir_path) + pquery_group_manifest = build_pquery_group_manifest(bin_identities, + homolog_seq_files) # TODO: Replace this merge_fasta_dicts_by_index with FASTA - only necessary for writing the classified sequences extracted_seq_dict = fasta.merge_fasta_dicts_by_index(extracted_seq_dict, numeric_contig_index) delete_files(args.delete, ts_assign.stage_lookup("search").dir_path, 1) @@ -1370,11 +1399,10 @@ def assign(sys_args): # STAGE 4: Run hmmalign, and optionally trim, to produce the MSAs for phylogenetic placement ## split_msa_files = ts_assign.align(refpkg_dict=refpkg_dict, - homolog_seq_files=homolog_seq_files, + pquery_groups=pquery_group_manifest, n_proc=n_proc, trim_align=args.trim_align, - min_seq_length=args.min_seq_length, - ) + min_seq_length=args.min_seq_length) delete_files(args.delete, ts_assign.stage_lookup("search").dir_path, 2) ts_assign.increment_stage_dir(checkpoint="align") diff --git a/treesapp/clipkit_helper.py b/treesapp/clipkit_helper.py index 90d8cd63..6c7b91d9 100644 --- a/treesapp/clipkit_helper.py +++ b/treesapp/clipkit_helper.py @@ -19,6 +19,10 @@ def __init__(self, fasta_in: str, output_dir: str, mode="smart-gap", gap_prop=0.95, min_len=None, for_placement=False): self.logger = logging.getLogger(logger.logger_name()) self.input = fasta_in + if not os.path.isfile(self.input): + self.logger.error("ClipKit input file '{}' doesn't exist.".format(self.input)) + sys.exit(1) + if mode not in self.CLIPKIT_MODES: self.logger.error("'{}' is not a valid TrimmingMode.\n".format(mode)) sys.exit(1) @@ -57,7 +61,9 @@ def __init__(self, fasta_in: str, output_dir: str, self.num_refs_retained = 0 return - def run(self, verbose=False): + def run(self, verbose=False, force=False) -> None: + if os.path.isfile(self.ff_out) and not force: + return start_time = time.time() # Capture all output from print statements within ClipKit diff --git a/treesapp/multiple_alignment.py b/treesapp/multiple_alignment.py index 24b4bd9f..d3b4f7b0 100644 --- a/treesapp/multiple_alignment.py +++ b/treesapp/multiple_alignment.py @@ -12,12 +12,12 @@ def trim_multiple_alignment_clipkit(msa_file: str, ref_pkg: refpkg.ReferencePackage, - min_seq_length: int, for_placement=False) -> ckh.ClipKitHelper: + min_seq_length: int, for_placement=False, gap_prop=0.8) -> ckh.ClipKitHelper: # Modes can be one of 'smart-gap', 'kpi', 'kpic', 'gappy', 'kpi-smart-gap', 'kpi-gappy' trimmer = ckh.ClipKitHelper(fasta_in=msa_file, output_dir=os.path.dirname(msa_file), mode="gappy", - gap_prop=0.25, + gap_prop=gap_prop, min_len=min_seq_length, for_placement=for_placement) trimmer.refpkg_name = ref_pkg.prefix @@ -93,27 +93,35 @@ def gather_multiple_alignments(msa_trimmers: list) -> dict: return trimmed_output_files -def trim_multiple_alignment_farmer(concatenated_mfa_files: dict, min_seq_length: int, ref_pkgs: dict, +def trim_multiple_alignment_farmer(pquery_groups_manifest: list, min_seq_length: int, ref_pkgs: dict, n_proc=1, for_placement=True, silent=False) -> dict: """ Runs ClipKit using the provided lists of the concatenated hmmalign files, and the number of sequences in each file. - :param concatenated_mfa_files: A dictionary containing f_contig keys mapping to a FASTA or Phylip sequential file + :param pquery_groups_manifest: A list of dictionaries with the keys '', ... :param min_seq_length: Minimum length for a sequence to be retained in the MSA :param ref_pkgs: A dictionary of reference package names mapped to ReferencePackage instances :param n_proc: The number of parallel processes to be launched for alignment trimming + :param for_placement: A flag indicating the MSA contains both reference and query sequences :param silent: A boolean indicating whether the :return: A list of files resulting from multiple sequence alignment masking. """ start_time = time.time() task_list = list() + hmm_perc = 1.0 - for refpkg_code, mfa_files in sorted(concatenated_mfa_files.items()): - for msa in mfa_files: - task_list.append({"msa_file": msa, - "ref_pkg": ref_pkgs[refpkg_code], - "min_seq_length": min_seq_length, - "for_placement": for_placement}) + for pquery_group in pquery_groups_manifest: + trim_args = {"msa_file": pquery_group["qry_ref_mfa"], + "ref_pkg": ref_pkgs[pquery_group["refpkg_name"]], + "min_seq_length": min_seq_length, + "for_placement": for_placement} + + if pquery_group["gap_tuned"]: + trim_args["gap_prop"] = pquery_group["avg_id"]/100 + if trim_args["min_seq_length"] == 0: + trim_args["min_seq_length"] = int(ref_pkgs[pquery_group["refpkg_name"]].hmm_length() * (hmm_perc/100)) + + task_list.append(trim_args) msa_trimmers = eci.run_apply_async_multiprocessing(func=trim_multiple_alignment_clipkit, arguments_list=task_list, From 756fcead7e313d345046b6e42f0c0c355a6f441a Mon Sep 17 00:00:00 2001 From: cmorganl Date: Thu, 23 Jun 2022 18:37:25 -0400 Subject: [PATCH 16/18] Parallelize refpkg blast --- tests/test_refpkg.py | 2 +- treesapp/refpkg.py | 22 +++++++++++++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/tests/test_refpkg.py b/tests/test_refpkg.py index df8b317b..2cc58a70 100644 --- a/tests/test_refpkg.py +++ b/tests/test_refpkg.py @@ -71,7 +71,7 @@ def test_blast(self): "EYGVDYIKKKHGGIAKAKSTQEVVSDIATEVNLYGMEQYESYPTALESHFGGSQRASVLA" \ "AASGLTCSLATANSNAGLNGWYLSMLMHKEGWSRLGFFGYDLQDQCGSANSMSIRPDEGL" \ "LGELRGPNYPNYAI" - aln, seq_id, g_seq_id = self.db.blast(qseq) # type: Bio.Align.PairwiseAlignment + aln, seq_id, g_seq_id = self.db.blast(qseq, n_proc=1) # type: Bio.Align.PairwiseAlignment self.assertEqual(100, aln.score) self.assertEqual(62, round(seq_id, 0)) self.assertEqual(89, round(g_seq_id, 0)) diff --git a/treesapp/refpkg.py b/treesapp/refpkg.py index 398098a7..c0aebcc2 100644 --- a/treesapp/refpkg.py +++ b/treesapp/refpkg.py @@ -7,9 +7,8 @@ from shutil import copy from datetime import datetime as dt +import Bio.Align from Bio import Align -from Bio.SubsMat import MatrixInfo as matlist -from Bio import pairwise2 from packaging import version from ete3 import Tree from pandas import DataFrame @@ -1275,6 +1274,11 @@ def deduplicate_annotation_members(self) -> None: return + @staticmethod + def bio_aligner_helper(pw_aligner: Bio.Align.PairwiseAligner, seqA: str, seqB: str): + aln = pw_aligner.align(seqA, seqB)[0] + return aln + def blast(self, qseq: str, **kwargs) -> (Align.PairwiseAlignment, float, float): """Find the percent pairwise identity between a query sequence and its closest match in a reference package.""" aligner = Align.PairwiseAligner(mode="global") @@ -1300,8 +1304,20 @@ def _calculate_identity(sequenceA, sequenceB): ref_seqs = self.get_fasta() ref_seqs.unalign() top_aln = None + + task_list = [] for sname, sseq in ref_seqs.fasta_dict.items(): - aln = aligner.align(sseq, qseq)[0] + task_list.append({"seqA": sseq, + "seqB": qseq, + "pw_aligner": aligner}) + + results = eci.run_apply_async_multiprocessing(func=self.bio_aligner_helper, + arguments_list=task_list, + num_processes=kwargs.get('n_proc', 1), + pbar_desc="BLAST-ing refpkg", + disable=True) + + for aln in results: if not top_aln: top_aln = aln elif aln.score > top_aln.score: From 50f99715b46585db41a89630ffbfd650d84f643e Mon Sep 17 00:00:00 2001 From: cmorganl Date: Thu, 23 Jun 2022 18:38:24 -0400 Subject: [PATCH 17/18] Define str method for ClipKitHelper --- treesapp/clipkit_helper.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/treesapp/clipkit_helper.py b/treesapp/clipkit_helper.py index 6c7b91d9..938f44b4 100644 --- a/treesapp/clipkit_helper.py +++ b/treesapp/clipkit_helper.py @@ -61,6 +61,16 @@ def __init__(self, fasta_in: str, output_dir: str, self.num_refs_retained = 0 return + def __str__(self) -> str: + return "ClipKitHelper instance for MSA '{}':\n" \ + "Mode = {}\n" \ + "Gap-proportion = {}\n" \ + "Placement = {}\n" \ + "Execution time = {}s\n" \ + "Success = {}\n".format(os.path.basename(self.input), self.mode, + self.gap_prop, self.placement, + round(self.exec_time, 3), self.success) + def run(self, verbose=False, force=False) -> None: if os.path.isfile(self.ff_out) and not force: return From c29cbc70f0419848e3c418ad0f92473c8b5134c9 Mon Sep 17 00:00:00 2001 From: cmorganl Date: Fri, 16 Sep 2022 14:05:08 +0200 Subject: [PATCH 18/18] Test timing of refpkg blast --- tests/test_refpkg.py | 15 ++++++++++++++- tests/testing_utils.py | 3 +++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/tests/test_refpkg.py b/tests/test_refpkg.py index 2cc58a70..7ac1fdc5 100644 --- a/tests/test_refpkg.py +++ b/tests/test_refpkg.py @@ -42,6 +42,7 @@ def setUp(self) -> None: self.treesapp_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__))) + os.sep for dep in ["hmmbuild", "hmmalign", "raxml-ng", "mafft", "FastTree"]: self.exe_map[dep] = fetch_executable_path(dep, utils.get_treesapp_root()) + self.n_cpu = utils.NUM_THREADS return def tearDown(self) -> None: @@ -66,12 +67,24 @@ def test_band(self): def test_blast(self): import Bio.Align + import timeit + qseq = "AMQIGMSFISXYKVCAGEAAVADLAFAAKHAGVIQMADILPARRARGPNEPGGIKFGHFC" \ "DMIQGDRKYPNDPVRANLEVVAAGAMLFDQIWLGSYMSGGVGFTQYATAAYTDNILDDYC" \ "EYGVDYIKKKHGGIAKAKSTQEVVSDIATEVNLYGMEQYESYPTALESHFGGSQRASVLA" \ "AASGLTCSLATANSNAGLNGWYLSMLMHKEGWSRLGFFGYDLQDQCGSANSMSIRPDEGL" \ "LGELRGPNYPNYAI" - aln, seq_id, g_seq_id = self.db.blast(qseq, n_proc=1) # type: Bio.Align.PairwiseAlignment + ref_pkg = self.db + + exec_time = timeit.timeit(stmt="ref_pkg.blast(qseq=qseq, n_proc=n_proc)", + globals={'ref_pkg': ref_pkg, + 'qseq': qseq, + 'n_proc': self.n_cpu}, + number=10) + self.assertTrue(0 < exec_time < 10) + + aln, seq_id, g_seq_id = ref_pkg.blast(qseq, + n_proc=self.n_cpu) # type: Bio.Align.PairwiseAlignment self.assertEqual(100, aln.score) self.assertEqual(62, round(seq_id, 0)) self.assertEqual(89, round(g_seq_id, 0)) diff --git a/tests/testing_utils.py b/tests/testing_utils.py index 4a61d371..40c40767 100644 --- a/tests/testing_utils.py +++ b/tests/testing_utils.py @@ -6,6 +6,9 @@ import ete3 +NUM_THREADS = os.cpu_count() + + def random_ete_tree(leaf_names: list, branch_len_dist=None) -> ete3.Tree: if not branch_len_dist: branch_len_dist = (0, 1)