From 69ea588692ffb2154630ebf04ab0466ede7e96ab Mon Sep 17 00:00:00 2001 From: Julia Date: Sun, 29 Dec 2024 21:02:08 +0100 Subject: [PATCH 01/36] wip: gh actions for docs --- .github/workflows/dev-docs.yml | 28 ++++++++++++++++++++++++++ .github/workflows/docs.yml | 28 -------------------------- .github/workflows/release-docs.yml | 32 ++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 28 deletions(-) create mode 100644 .github/workflows/dev-docs.yml delete mode 100644 .github/workflows/docs.yml create mode 100644 .github/workflows/release-docs.yml diff --git a/.github/workflows/dev-docs.yml b/.github/workflows/dev-docs.yml new file mode 100644 index 0000000..8119a3a --- /dev/null +++ b/.github/workflows/dev-docs.yml @@ -0,0 +1,28 @@ +name: Dev Docs + +on: + push: + branches: + - develop + +jobs: + dev-docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Configure Git Credentials + run: | + git config user.name github-actions[bot] + git config user.email 41898282+github-actions[bot]@users.noreply.github.com + - uses: actions/setup-python@v5 + with: + python-version: 3.x + - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV + - uses: actions/cache@v4 + with: + key: mkdocs-material-${{ env.cache_id }} + path: .cache + restore-keys: | + mkdocs-material- + - run: pip install mkdocs-material mkdocstrings-python mike markdown-callouts + - run: mike deploy --push develop diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml deleted file mode 100644 index 39fdeb0..0000000 --- a/.github/workflows/docs.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: docs -on: - push: - branches: - - master -permissions: - contents: write -jobs: - deploy: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Configure Git Credentials - run: | - git config user.name github-actions[bot] - git config user.email 41898282+github-actions[bot]@users.noreply.github.com - - uses: actions/setup-python@v5 - with: - python-version: 3.x - - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV - - uses: actions/cache@v4 - with: - key: mkdocs-material-${{ env.cache_id }} - path: .cache - restore-keys: | - mkdocs-material- - - run: pip install mkdocs-material mkdocstrings-python mike markdown-callouts - - run: mkdocs gh-deploy --force diff --git a/.github/workflows/release-docs.yml b/.github/workflows/release-docs.yml new file mode 100644 index 0000000..fc997ed --- /dev/null +++ b/.github/workflows/release-docs.yml @@ -0,0 +1,32 @@ +name: Release Docs + +on: + release + +jobs: + dev-docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Configure Git Credentials + run: | + git config user.name github-actions[bot] + git config user.email 41898282+github-actions[bot]@users.noreply.github.com + - uses: actions/setup-python@v5 + with: + python-version: 3.x + - name: Set release notes tag + run: | + export RELEASE_TAG_VERSION=${{ github.event.release.tag_name }} + echo "RELEASE_TAG_VERSION=${RELEASE_TAG_VERSION:1}" >> $GITHUB_ENV + - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV + - uses: actions/cache@v4 + with: + key: mkdocs-material-${{ env.cache_id }} + path: .cache + restore-keys: | + mkdocs-material- + - run: pip install mkdocs-material mkdocstrings-python mike markdown-callouts + - run: | + mike deploy --push --update-aliases ${RELEASE_TAG_VERSION} latest + mike set-default --push latest From bdb8913e210997420a23a419481aa5ad4ae0420e Mon Sep 17 00:00:00 2001 From: Julia Date: Sun, 29 Dec 2024 21:02:44 +0100 Subject: [PATCH 02/36] wrong branch name... --- .github/workflows/dev-docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dev-docs.yml b/.github/workflows/dev-docs.yml index 8119a3a..5b2a9b5 100644 --- a/.github/workflows/dev-docs.yml +++ b/.github/workflows/dev-docs.yml @@ -3,7 +3,7 @@ name: Dev Docs on: push: branches: - - develop + - dev jobs: dev-docs: From a912ba0a9f16d18921f7b1601198e12ed59c7b33 Mon Sep 17 00:00:00 2001 From: Julia Date: Sun, 29 Dec 2024 21:07:15 +0100 Subject: [PATCH 03/36] wrong docs version name --- .github/workflows/dev-docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dev-docs.yml b/.github/workflows/dev-docs.yml index 5b2a9b5..2ac4c6d 100644 --- a/.github/workflows/dev-docs.yml +++ b/.github/workflows/dev-docs.yml @@ -25,4 +25,4 @@ jobs: restore-keys: | mkdocs-material- - run: pip install mkdocs-material mkdocstrings-python mike markdown-callouts - - run: mike deploy --push develop + - run: mike deploy --push dev From 83c51aca2034650740611b1b67deddd5a98ffd40 Mon Sep 17 00:00:00 2001 From: Julia Date: Sun, 29 Dec 2024 21:15:54 +0100 Subject: [PATCH 04/36] please work, I'm hungry --- .github/workflows/dev-docs.yml | 2 ++ .github/workflows/release-docs.yml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/dev-docs.yml b/.github/workflows/dev-docs.yml index 2ac4c6d..5f219c5 100644 --- a/.github/workflows/dev-docs.yml +++ b/.github/workflows/dev-docs.yml @@ -10,6 +10,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Configure Git Credentials run: | git config user.name github-actions[bot] diff --git a/.github/workflows/release-docs.yml b/.github/workflows/release-docs.yml index fc997ed..8e95dee 100644 --- a/.github/workflows/release-docs.yml +++ b/.github/workflows/release-docs.yml @@ -8,6 +8,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Configure Git Credentials run: | git config user.name github-actions[bot] From f16bd5221ff6abf59b673ae8c7f7446b6aaca399 Mon Sep 17 00:00:00 2001 From: Julia Date: Sun, 29 Dec 2024 22:00:07 +0100 Subject: [PATCH 05/36] wip: docs for v1 --- docs/install.md | 2 - docs/usage.md | 118 ++++++++++++++++++++++++------------------------ mkdocs.yml | 8 +--- 3 files changed, 59 insertions(+), 69 deletions(-) diff --git a/docs/install.md b/docs/install.md index 961916b..8019117 100644 --- a/docs/install.md +++ b/docs/install.md @@ -1,5 +1,3 @@ -# Installing PyPythia - ## Requirements In order to use this difficulty prediction, you need RAxML-NG installed somewhere on your system. You can find the install instructions [here](https://github.com/amkozlov/raxml-ng). diff --git a/docs/usage.md b/docs/usage.md index dc7e758..7eda6eb 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,94 +1,96 @@ -# Using PyPythia - This library can be used in two ways: either directly as command line tool, or the prediction can be called from other python code. -## Command Line Interface - +## Command Line Tool If you only want to predict the difficulty for a single MSA, you can query the predictor using the command line interface, for example like this: ```commandline pythia --msa examples/example.phy --raxmlng /path/to/raxml-ng ``` Note that when you installed PyPythia using conda, you will have to download the `example.phy` and adjust the path accordingly. -The output will be something like `The predicted difficulty for MSA examples/example.phy is: 0.02.`, telling us that example.phy is an easy dataset. In fact, this dataset exhibits a single likelihood peak. Depending on the predictor version you are using, the actual value might slightly differ. This is expected and nothing to worry about ๐Ÿ™‚ +The output will be something like `The predicted difficulty for MSA examples/example.phy is: 0.16.`, telling us that example.phy is an easy dataset. In fact, this dataset exhibits a single likelihood peak. Depending on the predictor version you are using, the actual value might slightly differ. This is expected and nothing to worry about ๐Ÿ™‚ *Note that Pythia can also handle FASTA input files, see section Input Data below.* The following options are available: ```commandline -PyPythia version 2.0.0 released by The Exelixis Lab +PyPythia version 1.2.0 released by The Exelixis Lab Developed by: Julia Haag Latest version: https://github.com/tschuelia/PyPythia Questions/problems/suggestions? Please open an issue on GitHub. -usage: pythia [-h] -m MSA -r RAXMLNG [-t THREADS] [-s SEED] [-p PREFIX] - [--predictor PREDICTOR] [-prec PRECISION] [-sT] [--forceDuplicates] - [--forceFullGaps] [--shap] [-v] +usage: pythia [-h] -m MSA -r RAXMLNG [-t THREADS] [-p PREDICTOR] [-o OUTPUT] [-prec PRECISION] [-sT] [--removeDuplicates] [--forceDuplicates] + [--shap] [-v] [-b] [-q] Parser for Pythia command line options. options: -h, --help show this help message and exit - -m MSA, --msa MSA Multiple Sequence Alignment to predict the difficulty for. - Must be in either phylip or fasta format. + -m MSA, --msa MSA Multiple Sequence Alignment to predict the difficulty for. Must be in either phylip or fasta format. -r RAXMLNG, --raxmlng RAXMLNG - Path to the binary of RAxML-NG. For install instructions - see https://github.com/amkozlov/raxml-ng.(default: 'raxml- - ng' if in $PATH, otherwise this option is mandatory). + Path to the binary of RAxML-NG. For install instructions see https://github.com/amkozlov/raxml-ng. -t THREADS, --threads THREADS - Number of threads to use for parallel parsimony tree - inference (default: RAxML-NG autoconfig). - -s SEED, --seed SEED Seed for the RAxML-NG parsimony tree inference (default: - 0). - -p PREFIX, --prefix PREFIX - Prefix of the PyPythia log and result file (default: MSA - file name). - --predictor PREDICTOR - Filepath of the alternative predictor to use (default: - latest Pythia). + Number of threads to use for parallel parsimony tree inference. If none is set, Pythia uses the parallelization scheme + of RAxML-NG that automatically detects the optimal number of threads for your machine. + -p PREDICTOR, --predictor PREDICTOR + Filepath of the predictor to use. If not set, assume it is 'predictors/latest.pckl' in the project directory. + -o OUTPUT, --output OUTPUT + Option to specify a filepath where the result will be written to. The file will contain a single line with only the + difficulty. -prec PRECISION, --precision PRECISION - Set the number of decimals the difficulty should be rounded - to (default: 2). - -sT, --storeTrees If set, stores the parsimony trees as - '{prefix}.pythia.trees' file (default: False). - --forceDuplicates Per default, Pythia refuses to predict the difficulty for - MSAs containing duplicate sequences. Only set this option - if you are absolutely sure that you want to predict the - difficulty for this MSA (default: False). - --forceFullGaps Per default, Pythia refuses to predict the difficulty for - MSAs containing sequences with only gaps. Only set this - option if you are absolutely sure that you want to predict - the difficulty for this MSA (default: False). - --shap If set, computes the shapley values of the prediction as - waterfall plot in '{prefix}.shap.pdf'. When using this - option, make sure you understand what shapley values are - and how to interpret this plot.For details on shapley - values refer to the wiki: - https://github.com/tschuelia/PyPythia/wiki/Usage#shapley- - values (default: False). - -v, --verbose If set, additionally prints the MSA features (default: - False). + Set the number of decimals the difficulty should be rounded to. Recommended and default is 2. + -sT, --storeTrees If set, stores the parsimony trees as '{msa_name}.parsimony.trees' file. + --removeDuplicates Pythia refuses to predict the difficulty for MSAs containing duplicate sequences. If this option is set, PyPythia + removes the duplicate sequences, stores the reduced MSA as '{msa_name}.{phy/fasta}.pythia.reduced' and predicts the + difficulty for the reduced alignment. + --forceDuplicates Per default, Pythia refuses to predict the difficulty for MSAs containing duplicate sequences. Set this option if you + are absolutely sure that you want to predict the difficulty for this MSA. + --shap If set, computes the shapley values of the prediction as waterfall plot in '{msa_name}.shap.pdf'. When using this + option, make sure you understand what shapley values are and how to interpret this plot.For details on shapley values + refer to the wiki: https://github.com/tschuelia/PyPythia/wiki/Usage#shapley-values. + -v, --verbose If set, additionally prints the MSA features. + -b, --benchmark If set, time the runtime of the prediction. + -q, --quiet If set, Pythia does not print progress updates and only prints the predicted difficulty. ``` ## From Code - You can also use the library as a regular python library by installing it in your current environment. -The following code snippet shows how to predict the difficulty for an MSA using PyPythia: +Then you can query the prediction like this: ```python -from pypythia.prediction import predict_difficulty -import pathlib - -msa = pathlib.Path("examples/example.phy") -difficulty = predict_difficulty(msa) -print(f"The predicted difficulty for MSA {msa} is: {round(difficulty, 2)}.") +from pypythia.predictor import DifficultyPredictor +from pypythia.prediction import get_all_features +from pypythia.raxmlng import RAxMLNG +from pypythia.msa import MSA + +predictor = DifficultyPredictor(open("pypythia/predictors/latest.pckl", "rb")) +raxmlng = RAxMLNG("/path/to/raxml-ng") +msa = MSA("examples/example.phy") + +msa_features = get_all_features(raxmlng, msa) +difficulty = predictor.predict(msa_features) +print(difficulty) ``` +*Note that Pythia can also handle FASTA input files, see section Input Data below.* -And the output will be the same as for the CLI: `The predicted difficulty for MSA examples/example.phy is: 0.02.`. +#### Using Python multiprocessing +There are reported issues with multiprocessing in Python and LightGBM based predictors (see for example the [LightGBM FAQ](https://lightgbm.readthedocs.io/en/latest/FAQ.html#lightgbm-hangs-when-multithreading-openmp-and-using-forking-in-linux-at-the-same-time)). +We added a type check in the `predictor.py` prediction code that sets the number of threads to 1 for the prediction (`num_threads=1`) if the predictor is a LightGBM predictor. +This should not affect the previous Pythia versions using the scikit-learn predictors. Since the multithreading issues do not occur consistently, this issue is hard to debug. +If you encounter any issues with Python multiprocessing and Pythia please open a GitHub issue. -If you want to get all features, or do more specific analyses of your MSA, see the API Reference for further details on all available classes and methods. +## Usage Without Installation +As of version 1.0.1, PyPythia includes a script `prediction_no_install.py` in the root directory. This script contains the single function `predict_difficulty`. +Provided a path to an MSA, a path to a trained difficulty predictor (e.g. `pypythia/predictors/latest.pckl`), and a path to an executable of RAxML-NG, this fucntion +returns the predicted difficulty without requiring an installation of PyPythia. Note that this script can only be called from PyPythia's root directory. +To use this script, open it using your favorite text editor / python IDE and add the following at the end: +```python +msa_file = "path/to/your/msa" # the file path of your MSA, can be either relative or absolute +raxmlng_exe_path = "path/to/raxml-ng/bin/raxml-ng" # path pointing to the RAxML-NG executable on your system +predictor_path = "pypythia/predictors/latest.pckl" +predict_difficulty(msa_file, predictor_path, raxmlng_exe_path) +``` # Input data ### Supported file types @@ -106,10 +108,6 @@ Make sure that the MSA only contains RAxML-NG compatible taxon names. In particular, taxon labels with spaces, tabs, newlines, commas, colons, semicolons and parenthesis are invalid. ### MSAs with duplicate sequences -Pythia refuses to predict the difficulty for MSAs containing duplicate sequences or MSAs containing sequences containing only gaps. -As of version 2.0.0, Pythia removes duplicates and full-gap sequences per default and predicts the difficulty for this reduced MSA. -If you absolutely want to predict the difficulty for the original MSA, set the command line flags `--forceDuplicates` and `--forceFullGaps`. - As of version 1.0.0 Pythia refuses to predict the difficulty for MSAs containing multiple exactly identical sequences (duplicate sequences). The reason for this is that duplicate sequences can have a substantial impact on the resulting topologies during the maximum parsimony tree inference and therefore on the topological distance measures. @@ -145,7 +143,7 @@ The following figure shows an exemplary waterfall plot output for the MSA `examp The x-axis depicts the difficulty and the y-axis the features alongside the respective feature value. The features are sorted by their Shapley value with the highest contribution on top. You can read the plot as follows. The base line difficulty that Pythia v1.1.0 learned is 0.35, as indicated by the `E[f(x)] = 0.35` on the x-axis. The `proportion_invariant` feature contributed to the overall prediction with a shift towards `1.0` (more difficult) of `0.01`, so *in combination with the other features*, a `proportion_invariant` of `0.341` indicates that the MSA is slightly more difficult than the average difficulty in the training set. We emphasize that the *combination with the other features* part, since the same value for `proportion_invariant` with a different MSA and different feature values for the remaining features might lead to a substantially different contribution to the overall prediction. The feature with the highest impact for this example is the patterns-over-taxa ratio (`num_patterns/num_taxa`). The overall contribution is 0.23 towards `0.0`, meaning it shifts the overall prediction towards `easy`. - + ## More Details For further information please refer to [this great book on interpretable ML](https://christophm.github.io/interpretable-ml-book/shapley.html), the [documentation of the `shap` package](https://shap.readthedocs.io/en/latest/index.html), especially [their notes on the interpretability of Shapley values](https://shap.readthedocs.io/en/latest/example_notebooks/overviews/Be%20careful%20when%20interpreting%20predictive%20models%20in%20search%20of%20causal%C2%A0insights.html#Be-careful-when-interpreting-predictive-models-in-search-of-causal%C2%A0insights). diff --git a/mkdocs.yml b/mkdocs.yml index 764df18..e36e551 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -13,13 +13,7 @@ nav: - Home: index.md - Install: install.md - User Guide: usage.md -- API Reference: - - msa: api/msa.md - - raxmlng: api/raxmlng.md - - prediction: api/prediction.md - - predictor: api/predictor.md - - custom_types: api/custom_types.md - - config: api/config.md + plugins: - search - mkdocstrings From 4e9e755be7de586eaa2461b4803947a72a1cec5b Mon Sep 17 00:00:00 2001 From: Julia Date: Sun, 29 Dec 2024 22:02:49 +0100 Subject: [PATCH 06/36] reset docs to current state --- docs/install.md | 2 + docs/usage.md | 118 ++++++++++++++++++++++++------------------------ mkdocs.yml | 8 +++- 3 files changed, 69 insertions(+), 59 deletions(-) diff --git a/docs/install.md b/docs/install.md index 8019117..961916b 100644 --- a/docs/install.md +++ b/docs/install.md @@ -1,3 +1,5 @@ +# Installing PyPythia + ## Requirements In order to use this difficulty prediction, you need RAxML-NG installed somewhere on your system. You can find the install instructions [here](https://github.com/amkozlov/raxml-ng). diff --git a/docs/usage.md b/docs/usage.md index 7eda6eb..dc7e758 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,96 +1,94 @@ +# Using PyPythia + This library can be used in two ways: either directly as command line tool, or the prediction can be called from other python code. -## Command Line Tool +## Command Line Interface + If you only want to predict the difficulty for a single MSA, you can query the predictor using the command line interface, for example like this: ```commandline pythia --msa examples/example.phy --raxmlng /path/to/raxml-ng ``` Note that when you installed PyPythia using conda, you will have to download the `example.phy` and adjust the path accordingly. -The output will be something like `The predicted difficulty for MSA examples/example.phy is: 0.16.`, telling us that example.phy is an easy dataset. In fact, this dataset exhibits a single likelihood peak. Depending on the predictor version you are using, the actual value might slightly differ. This is expected and nothing to worry about ๐Ÿ™‚ +The output will be something like `The predicted difficulty for MSA examples/example.phy is: 0.02.`, telling us that example.phy is an easy dataset. In fact, this dataset exhibits a single likelihood peak. Depending on the predictor version you are using, the actual value might slightly differ. This is expected and nothing to worry about ๐Ÿ™‚ *Note that Pythia can also handle FASTA input files, see section Input Data below.* The following options are available: ```commandline -PyPythia version 1.2.0 released by The Exelixis Lab +PyPythia version 2.0.0 released by The Exelixis Lab Developed by: Julia Haag Latest version: https://github.com/tschuelia/PyPythia Questions/problems/suggestions? Please open an issue on GitHub. -usage: pythia [-h] -m MSA -r RAXMLNG [-t THREADS] [-p PREDICTOR] [-o OUTPUT] [-prec PRECISION] [-sT] [--removeDuplicates] [--forceDuplicates] - [--shap] [-v] [-b] [-q] +usage: pythia [-h] -m MSA -r RAXMLNG [-t THREADS] [-s SEED] [-p PREFIX] + [--predictor PREDICTOR] [-prec PRECISION] [-sT] [--forceDuplicates] + [--forceFullGaps] [--shap] [-v] Parser for Pythia command line options. options: -h, --help show this help message and exit - -m MSA, --msa MSA Multiple Sequence Alignment to predict the difficulty for. Must be in either phylip or fasta format. + -m MSA, --msa MSA Multiple Sequence Alignment to predict the difficulty for. + Must be in either phylip or fasta format. -r RAXMLNG, --raxmlng RAXMLNG - Path to the binary of RAxML-NG. For install instructions see https://github.com/amkozlov/raxml-ng. + Path to the binary of RAxML-NG. For install instructions + see https://github.com/amkozlov/raxml-ng.(default: 'raxml- + ng' if in $PATH, otherwise this option is mandatory). -t THREADS, --threads THREADS - Number of threads to use for parallel parsimony tree inference. If none is set, Pythia uses the parallelization scheme - of RAxML-NG that automatically detects the optimal number of threads for your machine. - -p PREDICTOR, --predictor PREDICTOR - Filepath of the predictor to use. If not set, assume it is 'predictors/latest.pckl' in the project directory. - -o OUTPUT, --output OUTPUT - Option to specify a filepath where the result will be written to. The file will contain a single line with only the - difficulty. + Number of threads to use for parallel parsimony tree + inference (default: RAxML-NG autoconfig). + -s SEED, --seed SEED Seed for the RAxML-NG parsimony tree inference (default: + 0). + -p PREFIX, --prefix PREFIX + Prefix of the PyPythia log and result file (default: MSA + file name). + --predictor PREDICTOR + Filepath of the alternative predictor to use (default: + latest Pythia). -prec PRECISION, --precision PRECISION - Set the number of decimals the difficulty should be rounded to. Recommended and default is 2. - -sT, --storeTrees If set, stores the parsimony trees as '{msa_name}.parsimony.trees' file. - --removeDuplicates Pythia refuses to predict the difficulty for MSAs containing duplicate sequences. If this option is set, PyPythia - removes the duplicate sequences, stores the reduced MSA as '{msa_name}.{phy/fasta}.pythia.reduced' and predicts the - difficulty for the reduced alignment. - --forceDuplicates Per default, Pythia refuses to predict the difficulty for MSAs containing duplicate sequences. Set this option if you - are absolutely sure that you want to predict the difficulty for this MSA. - --shap If set, computes the shapley values of the prediction as waterfall plot in '{msa_name}.shap.pdf'. When using this - option, make sure you understand what shapley values are and how to interpret this plot.For details on shapley values - refer to the wiki: https://github.com/tschuelia/PyPythia/wiki/Usage#shapley-values. - -v, --verbose If set, additionally prints the MSA features. - -b, --benchmark If set, time the runtime of the prediction. - -q, --quiet If set, Pythia does not print progress updates and only prints the predicted difficulty. + Set the number of decimals the difficulty should be rounded + to (default: 2). + -sT, --storeTrees If set, stores the parsimony trees as + '{prefix}.pythia.trees' file (default: False). + --forceDuplicates Per default, Pythia refuses to predict the difficulty for + MSAs containing duplicate sequences. Only set this option + if you are absolutely sure that you want to predict the + difficulty for this MSA (default: False). + --forceFullGaps Per default, Pythia refuses to predict the difficulty for + MSAs containing sequences with only gaps. Only set this + option if you are absolutely sure that you want to predict + the difficulty for this MSA (default: False). + --shap If set, computes the shapley values of the prediction as + waterfall plot in '{prefix}.shap.pdf'. When using this + option, make sure you understand what shapley values are + and how to interpret this plot.For details on shapley + values refer to the wiki: + https://github.com/tschuelia/PyPythia/wiki/Usage#shapley- + values (default: False). + -v, --verbose If set, additionally prints the MSA features (default: + False). ``` ## From Code + You can also use the library as a regular python library by installing it in your current environment. -Then you can query the prediction like this: +The following code snippet shows how to predict the difficulty for an MSA using PyPythia: ```python -from pypythia.predictor import DifficultyPredictor -from pypythia.prediction import get_all_features -from pypythia.raxmlng import RAxMLNG -from pypythia.msa import MSA - -predictor = DifficultyPredictor(open("pypythia/predictors/latest.pckl", "rb")) -raxmlng = RAxMLNG("/path/to/raxml-ng") -msa = MSA("examples/example.phy") - -msa_features = get_all_features(raxmlng, msa) -difficulty = predictor.predict(msa_features) -print(difficulty) +from pypythia.prediction import predict_difficulty +import pathlib + +msa = pathlib.Path("examples/example.phy") +difficulty = predict_difficulty(msa) +print(f"The predicted difficulty for MSA {msa} is: {round(difficulty, 2)}.") ``` -*Note that Pythia can also handle FASTA input files, see section Input Data below.* -#### Using Python multiprocessing -There are reported issues with multiprocessing in Python and LightGBM based predictors (see for example the [LightGBM FAQ](https://lightgbm.readthedocs.io/en/latest/FAQ.html#lightgbm-hangs-when-multithreading-openmp-and-using-forking-in-linux-at-the-same-time)). -We added a type check in the `predictor.py` prediction code that sets the number of threads to 1 for the prediction (`num_threads=1`) if the predictor is a LightGBM predictor. -This should not affect the previous Pythia versions using the scikit-learn predictors. Since the multithreading issues do not occur consistently, this issue is hard to debug. -If you encounter any issues with Python multiprocessing and Pythia please open a GitHub issue. +And the output will be the same as for the CLI: `The predicted difficulty for MSA examples/example.phy is: 0.02.`. -## Usage Without Installation -As of version 1.0.1, PyPythia includes a script `prediction_no_install.py` in the root directory. This script contains the single function `predict_difficulty`. -Provided a path to an MSA, a path to a trained difficulty predictor (e.g. `pypythia/predictors/latest.pckl`), and a path to an executable of RAxML-NG, this fucntion -returns the predicted difficulty without requiring an installation of PyPythia. Note that this script can only be called from PyPythia's root directory. +If you want to get all features, or do more specific analyses of your MSA, see the API Reference for further details on all available classes and methods. -To use this script, open it using your favorite text editor / python IDE and add the following at the end: -```python -msa_file = "path/to/your/msa" # the file path of your MSA, can be either relative or absolute -raxmlng_exe_path = "path/to/raxml-ng/bin/raxml-ng" # path pointing to the RAxML-NG executable on your system -predictor_path = "pypythia/predictors/latest.pckl" -predict_difficulty(msa_file, predictor_path, raxmlng_exe_path) -``` # Input data ### Supported file types @@ -108,6 +106,10 @@ Make sure that the MSA only contains RAxML-NG compatible taxon names. In particular, taxon labels with spaces, tabs, newlines, commas, colons, semicolons and parenthesis are invalid. ### MSAs with duplicate sequences +Pythia refuses to predict the difficulty for MSAs containing duplicate sequences or MSAs containing sequences containing only gaps. +As of version 2.0.0, Pythia removes duplicates and full-gap sequences per default and predicts the difficulty for this reduced MSA. +If you absolutely want to predict the difficulty for the original MSA, set the command line flags `--forceDuplicates` and `--forceFullGaps`. + As of version 1.0.0 Pythia refuses to predict the difficulty for MSAs containing multiple exactly identical sequences (duplicate sequences). The reason for this is that duplicate sequences can have a substantial impact on the resulting topologies during the maximum parsimony tree inference and therefore on the topological distance measures. @@ -143,7 +145,7 @@ The following figure shows an exemplary waterfall plot output for the MSA `examp The x-axis depicts the difficulty and the y-axis the features alongside the respective feature value. The features are sorted by their Shapley value with the highest contribution on top. You can read the plot as follows. The base line difficulty that Pythia v1.1.0 learned is 0.35, as indicated by the `E[f(x)] = 0.35` on the x-axis. The `proportion_invariant` feature contributed to the overall prediction with a shift towards `1.0` (more difficult) of `0.01`, so *in combination with the other features*, a `proportion_invariant` of `0.341` indicates that the MSA is slightly more difficult than the average difficulty in the training set. We emphasize that the *combination with the other features* part, since the same value for `proportion_invariant` with a different MSA and different feature values for the remaining features might lead to a substantially different contribution to the overall prediction. The feature with the highest impact for this example is the patterns-over-taxa ratio (`num_patterns/num_taxa`). The overall contribution is 0.23 towards `0.0`, meaning it shifts the overall prediction towards `easy`. - + ## More Details For further information please refer to [this great book on interpretable ML](https://christophm.github.io/interpretable-ml-book/shapley.html), the [documentation of the `shap` package](https://shap.readthedocs.io/en/latest/index.html), especially [their notes on the interpretability of Shapley values](https://shap.readthedocs.io/en/latest/example_notebooks/overviews/Be%20careful%20when%20interpreting%20predictive%20models%20in%20search%20of%20causal%C2%A0insights.html#Be-careful-when-interpreting-predictive-models-in-search-of-causal%C2%A0insights). diff --git a/mkdocs.yml b/mkdocs.yml index e36e551..764df18 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -13,7 +13,13 @@ nav: - Home: index.md - Install: install.md - User Guide: usage.md - +- API Reference: + - msa: api/msa.md + - raxmlng: api/raxmlng.md + - prediction: api/prediction.md + - predictor: api/predictor.md + - custom_types: api/custom_types.md + - config: api/config.md plugins: - search - mkdocstrings From 68f8b82fbb9aa54111cb820ec89edc73f2501da3 Mon Sep 17 00:00:00 2001 From: Julia Date: Sun, 29 Dec 2024 22:30:09 +0100 Subject: [PATCH 07/36] more docs --- docs/api/config.md | 2 + docs/api/custom_types.md | 2 + docs/api/msa.md | 4 ++ docs/api/prediction.md | 2 + docs/api/predictor.md | 1 + docs/api/raxmlng.md | 2 + docs/conf.py | 36 ------------ docs/index.md | 56 +++++++++++-------- docs/install.md | 60 +++++++++++++++----- docs/usage.md | 115 ++++++++++++++++++++++++++++----------- generate_api_docs.py | 4 ++ pypythia/predictor.py | 4 +- 12 files changed, 181 insertions(+), 107 deletions(-) delete mode 100644 docs/conf.py diff --git a/docs/api/config.md b/docs/api/config.md index b7ff8a0..58a21fa 100644 --- a/docs/api/config.md +++ b/docs/api/config.md @@ -3,8 +3,10 @@ options: show_root_heading: true + modernize_annotations: true ::: pypythia.config.DEFAULT_RAXMLNG_EXE options: show_root_heading: true + modernize_annotations: true diff --git a/docs/api/custom_types.md b/docs/api/custom_types.md index 31bd892..cde8acd 100644 --- a/docs/api/custom_types.md +++ b/docs/api/custom_types.md @@ -5,6 +5,7 @@ show_root_heading: true merge_init_into_class: false group_by_category: true + modernize_annotations: true ::: pypythia.custom_types.FileFormat @@ -12,3 +13,4 @@ show_root_heading: true merge_init_into_class: false group_by_category: true + modernize_annotations: true diff --git a/docs/api/msa.md b/docs/api/msa.md index 25ceb81..4e95b8d 100644 --- a/docs/api/msa.md +++ b/docs/api/msa.md @@ -5,18 +5,22 @@ show_root_heading: true merge_init_into_class: false group_by_category: true + modernize_annotations: true ::: pypythia.msa.parse options: show_root_heading: true + modernize_annotations: true ::: pypythia.msa.remove_full_gap_sequences options: show_root_heading: true + modernize_annotations: true ::: pypythia.msa.deduplicate_sequences options: show_root_heading: true + modernize_annotations: true diff --git a/docs/api/prediction.md b/docs/api/prediction.md index ae7ddfd..4545e8d 100644 --- a/docs/api/prediction.md +++ b/docs/api/prediction.md @@ -3,8 +3,10 @@ options: show_root_heading: true + modernize_annotations: true ::: pypythia.prediction.collect_features options: show_root_heading: true + modernize_annotations: true diff --git a/docs/api/predictor.md b/docs/api/predictor.md index 646bc8a..528a696 100644 --- a/docs/api/predictor.md +++ b/docs/api/predictor.md @@ -5,3 +5,4 @@ show_root_heading: true merge_init_into_class: false group_by_category: true + modernize_annotations: true diff --git a/docs/api/raxmlng.md b/docs/api/raxmlng.md index 10045a0..78eebba 100644 --- a/docs/api/raxmlng.md +++ b/docs/api/raxmlng.md @@ -5,8 +5,10 @@ show_root_heading: true merge_init_into_class: false group_by_category: true + modernize_annotations: true ::: pypythia.raxmlng.run_raxmlng_command options: show_root_heading: true + modernize_annotations: true diff --git a/docs/conf.py b/docs/conf.py deleted file mode 100644 index 7dd60aa..0000000 --- a/docs/conf.py +++ /dev/null @@ -1,36 +0,0 @@ -# Configuration file for the Sphinx documentation builder. -import datetime - -# -- Project information - -project = "Pandora" -copyright = f"{datetime.date.today().year}, Julia Haag" -author = "Julia Haag" - -release = "0.1" -version = "0.1.0" - -# -- General configuration - -extensions = [ - "sphinx.ext.duration", - "sphinx.ext.doctest", - "sphinx.ext.autodoc", - "sphinx.ext.autosummary", - "sphinx.ext.intersphinx", -] - -intersphinx_mapping = { - "python": ("https://docs.python.org/3/", None), - "sphinx": ("https://www.sphinx-doc.org/en/master/", None), -} -intersphinx_disabled_domains = ["std"] - -templates_path = ["_templates"] - -# -- Options for HTML output - -html_theme = "sphinx_rtd_theme" - -# -- Options for EPUB output -epub_show_urls = "footnote" diff --git a/docs/index.md b/docs/index.md index a9b4705..084813f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,43 +1,53 @@ # Home -Welcome to the PyPythia documentation. Pythia is a lightweight python library to predict the difficulty of Multiple Sequence Alignments (MSAs). +Welcome to the PyPythia documentation. - -### C Library - -The same functionality is also available as C library [here](https://github.com/tschuelia/difficulty_prediction). -Since the C library depends on [Coraxlib](https://codeberg.org/Exelixis-Lab/coraxlib) it is not as easy and fast to use as this python library. -If you are only interested in the difficulty of your MSA, we recommend using this Python library. -If you want to incorporate the difficulty prediction in a phylogenetic tool, we recommend using the faster C library. - +Pythia is a lightweight python library to predict the difficulty of Multiple Sequence Alignments (MSA). Phylogenetic +analyzes under the Maximum-Likelihood (ML) model are time and resource intensive. To adequately capture the vastness of +tree space, one needs to infer multiple independent trees. On some datasets, multiple tree inferences converge to +similar tree topologies, on others to multiple, topologically highly distinct yet statistically indistinguishable +topologies. Pythia predicts the degree of difficulty of analyzing a dataset prior to initiating ML-based tree +inferences. Predicting the difficulty using Pythia is substantially faster than inferring multiple ML trees using +RAxML-NG. Pythia can be used to increase user awareness with respect to the amount of signal and uncertainty to be +expected in phylogenetic analyzes, and hence inform an appropriate (post-)analysis setup. Further, it can be used to +select appropriate search algorithms for easy-, intermediate-, and hard-to-analyze datasets. Pythia supports DNA, AA, +and morphological data in Phylip and FASTA format. ### Support -If you encounter any trouble using Pythia, have a question, or you find a bug, please feel free to open an issue here. - +If you encounter any trouble using Pythia, have a question, or you find a bug, please feel free to open an +issue [here](https://github.com/tschuelia/PyPythia/issues). ### Publication The paper explaining the details of Pythia is published in MBE: -Haag, J., Hรถhler, D., Bettisworth, B., & Stamatakis, A. (2022). **From Easy to Hopeless - Predicting the Difficulty of Phylogenetic Analyses.** *Molecular Biology and Evolution*, 39(12). [https://doi.org/10.1093/molbev/msac254](https://doi.org/10.1093/molbev/msac254) +Haag, J., Hรถhler, D., Bettisworth, B., & Stamatakis, A. (2022). **From Easy to Hopeless - Predicting the Difficulty of +Phylogenetic Analyses.** *Molecular Biology and Evolution*, 39( +12). [https://doi.org/10.1093/molbev/msac254](https://doi.org/10.1093/molbev/msac254) > [!WARNING] > Since this publication, we made some considerable changes to Pythia. -> The most important change is that we switched from using a Random Forest Regressor to using a LightGBM Gradient Boosted Tree Regressor. -> This affects all Pythia versions >= 1. If you use Pythia in your work, please state the correct learning algorithm. If you are unsure, feel free to reach out to me ๐Ÿ™‚ - - +> The most important change is that we switched from using a Random Forest Regressor to using a LightGBM Gradient +> Boosted Tree Regressor. +> This affects all Pythia versions >= 1. If you use Pythia in your work, please state the correct learning algorithm. If +> you are unsure, feel free to reach out to me ๐Ÿ™‚ ### References * A. M. Kozlov, D. Darriba, T. Flouri, B. Morel, and A. Stamatakis (2019) -**RAxML-NG: a fast, scalable and user-friendly tool for maximum likelihood phylogenetic inference** -*Bioinformatics*, 35(21): 4453โ€“4455. -[https://doi.org/10.1093/bioinformatics/btz305](https://doi.org/10.1093/bioinformatics/btz305) + **RAxML-NG: a fast, scalable and user-friendly tool for maximum likelihood phylogenetic inference** + *Bioinformatics*, 35(21): 4453โ€“4455. + [https://doi.org/10.1093/bioinformatics/btz305](https://doi.org/10.1093/bioinformatics/btz305) * D. Hรถhler, W. Pfeiffer, V. Ioannidis, H. Stockinger, A. Stamatakis (2022) -**RAxML Grove: an empirical phylogenetic tree database** -*Bioinformatics*, 38(6):1741โ€“1742. -[https://doi.org/10.1093/bioinformatics/btab863](https://doi.org/10.1093/bioinformatics/btab863) + **RAxML Grove: an empirical phylogenetic tree database** + *Bioinformatics*, 38(6):1741โ€“1742. + [https://doi.org/10.1093/bioinformatics/btab863](https://doi.org/10.1093/bioinformatics/btab863) + +### CPythia -For full documentation visit [mkdocs.org](https://www.mkdocs.org). +The same functionality is also available as C library [here](https://github.com/tschuelia/difficulty_prediction). +Since the C library depends on [Coraxlib](https://codeberg.org/Exelixis-Lab/coraxlib) it is not as easy and fast to use +as this python library. +If you are only interested in the difficulty of your MSA, we recommend using this Python library. +If you want to incorporate the difficulty prediction in a phylogenetic tool, we recommend using the faster C library. diff --git a/docs/install.md b/docs/install.md index 961916b..9c55814 100644 --- a/docs/install.md +++ b/docs/install.md @@ -1,33 +1,41 @@ # Installing PyPythia ## Requirements -In order to use this difficulty prediction, you need RAxML-NG installed somewhere on your system. You can find the install instructions [here](https://github.com/amkozlov/raxml-ng). +In order to use this difficulty prediction, you need RAxML-NG installed somewhere on your system. You can find the +install instructions [here](https://github.com/amkozlov/raxml-ng). ## Install using conda (recommended) + The easiest (and recommended) way to install PyPythia is by using conda: + ``` conda install pypythia -c conda-forge -c nodefaults ``` - ## Install using pip + You can also install Pythia using the python package manager pip: + ``` pip install git+https://github.com/tschuelia/PyPythia.git ``` -Please note that this can lead to issues with package versions and dependencies when installing in an existing (conda) environment. +Please note that this can lead to issues with package versions and dependencies when installing in an existing (conda) +environment. Verify the correct installation by running `pythia -h`. ### Installing a specific tag/version + You can again use pip for this and simply specify the tag you wish to install, e.g. for version `0.0.1` run: + ``` pip install git+https://github.com/tschuelia/PyPythia.git@0.0.1 ``` ## Installation from source + You can install Pythia from source if you want to explore the code or get the lastest development version. To do so run the following steps: @@ -40,39 +48,65 @@ pip install . Verify the correct installation by running `pythia -h`. ## Troubleshooting -Most issues when installing Pythia seem to arise from broken or non-working LightGBM installations. If you encounter any such problem, and none of the following options help, please refer to the LightGBM [installation instructions](https://github.com/microsoft/LightGBM/tree/master/python-package) for your operating system and install LightGBM manually _before_ repeating the Pythia installation as described above. + +Most issues when installing Pythia seem to arise from broken or non-working LightGBM installations. If you encounter any +such problem, and none of the following options help, please refer to the +LightGBM [installation instructions](https://github.com/microsoft/LightGBM/tree/master/python-package) for your +operating system and install LightGBM manually _before_ repeating the Pythia installation as described above. ### Python version -Since Pythia version 1.1.0 we provide the option to output the Shapley values for your prediction. Currently, the `shap` package does not support Python Version 3.11. The requirements should take care of the correct Python version, but if you encounter any issues, please first check that the Python version is <3.11. You can do so by typing `python --version` in your terminal and checking the output. + +Since Pythia version 1.1.0 we provide the option to output the Shapley values for your prediction. Currently, the `shap` +package does not support Python Version 3.11. The requirements should take care of the correct Python version, but if +you encounter any issues, please first check that the Python version is <3.11. You can do so by typing +`python --version` in your terminal and checking the output. ### Installing on M1 chips -Installing on MacBooks with M1 chips caused some trouble for some users that seem to be caused by LightGBM's multiprocessing support. If you encounter any errors with the log pointing to LightGBM, the first thing you could try is to install LightGBM using [homebrew](https://brew.sh/index): + +Installing on MacBooks with M1 chips caused some trouble for some users that seem to be caused by LightGBM's +multiprocessing support. If you encounter any errors with the log pointing to LightGBM, the first thing you could try is +to install LightGBM using [homebrew](https://brew.sh/index): + ``` brew install lightgbm ``` + This might take a few minutes to finish. Once this ran successfully you can try to rerun the install instructions above. -If this does not solve your problem, you can try to install LightGBM manually using pip and disabling the multiprocessing: +If this does not solve your problem, you can try to install LightGBM manually using pip and disabling the +multiprocessing: + ``` pip install lightgbm --install-option=--nomp ``` -and then rerun the installation of PyPythia. Thanks [@willbour](https://github.com/willbour) for finding the fix for this! + +and then rerun the installation of PyPythia. Thanks [@willbour](https://github.com/willbour) for finding the fix for +this! ### Using a clean conda environment -When using conda and installing PyPythia using pip in an existing environment, you might encounter dependency or version related issues. To check whether this is the case or you have a general issue with Pythia please try to create a new, clean conda environment: + +When using conda and installing PyPythia using pip in an existing environment, you might encounter dependency or version +related issues. To check whether this is the case or you have a general issue with Pythia please try to create a new, +clean conda environment: 1. Use the provided environment file `etc/environment.yml` and create a new conda environment: + ``` conda env create --file etc/environment.yml ``` -If you want to install a different version of Pythia, you can add the git tag by appending `@[version]` (e.g. for version 1.1.0 append `@1.1.0`) after `.git` in the `etc/environment.yml` file. + +If you want to install a different version of Pythia, you can add the git tag by appending `@[version]` (e.g. for +version 1.1.0 append `@1.1.0`) after `.git` in the `etc/environment.yml` file. 2. Activate the conda environment: `conda activate pythia` 3. Try to (re)run Pythia. - ### Running Pythia -If you are having trouble running pythia, you can also replace `pythia` with `python pypythia/prediction.py`. For this to work you need to install Pythia from source and you need to be in the PyPythia directory (which you should be after the installation). -If this does not work either, as of version 1.0.1, PyPythia includes a Python script that allows predictions from code without installing Pythia. See the Usage wiki page for more details. +If you are having trouble running pythia, you can also replace `pythia` with `python pypythia/prediction.py`. For this +to work you need to install Pythia from source and you need to be in the PyPythia directory (which you should be after +the installation). + +If this does not work either, as of version 1.0.1, PyPythia includes a Python script that allows predictions from code +without installing Pythia. See the Usage wiki page for more details. diff --git a/docs/usage.md b/docs/usage.md index dc7e758..990d8b8 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,20 +1,28 @@ # Using PyPythia -This library can be used in two ways: either directly as command line tool, or the prediction can be called from other python code. +This library can be used in two ways: either directly as command line tool, or the prediction can be called from other +python code. ## Command Line Interface -If you only want to predict the difficulty for a single MSA, you can query the predictor using the command line interface, for example like this: +If you only want to predict the difficulty for a single MSA, you can query the predictor using the command line +interface, for example like this: + ```commandline pythia --msa examples/example.phy --raxmlng /path/to/raxml-ng ``` -Note that when you installed PyPythia using conda, you will have to download the `example.phy` and adjust the path accordingly. -The output will be something like `The predicted difficulty for MSA examples/example.phy is: 0.02.`, telling us that example.phy is an easy dataset. In fact, this dataset exhibits a single likelihood peak. Depending on the predictor version you are using, the actual value might slightly differ. This is expected and nothing to worry about ๐Ÿ™‚ +Note that when you installed PyPythia using conda, you will have to download the `example.phy` and adjust the path +accordingly. + +The output will be something like `The predicted difficulty for MSA examples/example.phy is: 0.02.`, telling us that +example.phy is an easy dataset. In fact, this dataset exhibits a single likelihood peak. Depending on the predictor +version you are using, the actual value might slightly differ. This is expected and nothing to worry about ๐Ÿ™‚ *Note that Pythia can also handle FASTA input files, see section Input Data below.* The following options are available: + ```commandline PyPythia version 2.0.0 released by The Exelixis Lab Developed by: Julia Haag @@ -70,7 +78,6 @@ options: False). ``` - ## From Code You can also use the library as a regular python library by installing it in your current environment. @@ -87,65 +94,107 @@ print(f"The predicted difficulty for MSA {msa} is: {round(difficulty, 2)}.") And the output will be the same as for the CLI: `The predicted difficulty for MSA examples/example.phy is: 0.02.`. -If you want to get all features, or do more specific analyses of your MSA, see the API Reference for further details on all available classes and methods. +If you want to get all features, or do more specific analyses of your MSA, see the API Reference for further details on +all available classes and methods. +## Input data -# Input data ### Supported file types + The input for Pythia is an MSA file in either Phylip or FASTA format. ### Supported data types + Pythia supports DNA, AA, and morphological data. -Please note that by morphological data we refer to biological data. According to our analyses, the attributes of biological morphological data are similar to the attributes of DNA and AA data. -However, when analyzing language data (cognate, sound-class, and morphosyntactic data) we observed substantially distinct attributes and concluded that morphological language data is not -comparable to DNA, AA, or biological morphological data. Thus, at the moment Pythia is not able to reliably predict the difficulty for language alignments. +Please note that by morphological data we refer to biological data. According to our analyses, the attributes of +biological morphological data are similar to the attributes of DNA and AA data. +However, when analyzing language data (cognate, sound-class, and morphosyntactic data) we observed substantially +distinct attributes and concluded that morphological language data is not +comparable to DNA, AA, or biological morphological data. Thus, at the moment Pythia is not able to reliably predict the +difficulty for language alignments. ### Taxon names + Make sure that the MSA only contains RAxML-NG compatible taxon names. In particular, taxon labels with spaces, tabs, newlines, commas, colons, semicolons and parenthesis are invalid. ### MSAs with duplicate sequences -Pythia refuses to predict the difficulty for MSAs containing duplicate sequences or MSAs containing sequences containing only gaps. -As of version 2.0.0, Pythia removes duplicates and full-gap sequences per default and predicts the difficulty for this reduced MSA. -If you absolutely want to predict the difficulty for the original MSA, set the command line flags `--forceDuplicates` and `--forceFullGaps`. -As of version 1.0.0 Pythia refuses to predict the difficulty for MSAs containing multiple exactly identical sequences (duplicate sequences). -The reason for this is that duplicate sequences can have a substantial impact on the resulting topologies during the maximum parsimony tree inference +Pythia refuses to predict the difficulty for MSAs containing duplicate sequences or MSAs containing sequences containing +only gaps. +As of version 2.0.0, Pythia removes duplicates and full-gap sequences per default and predicts the difficulty for this +reduced MSA. +If you absolutely want to predict the difficulty for the original MSA, set the command line flags `--forceDuplicates` +and `--forceFullGaps`. + +As of version 1.0.0 Pythia refuses to predict the difficulty for MSAs containing multiple exactly identical sequences ( +duplicate sequences). +The reason for this is that duplicate sequences can have a substantial impact on the resulting topologies during the +maximum parsimony tree inference and therefore on the topological distance measures. -If you set the command line option `--removeDuplicates`, Pythia will create a reduced alignment with all duplicates removed and predict the difficulty for this reduced alignment. +If you set the command line option `--removeDuplicates`, Pythia will create a reduced alignment with all duplicates +removed and predict the difficulty for this reduced alignment. For duplicate sequences, the first occurrence of the sequence is kept. -WARNING: The resulting predicted difficulty is only applicable to the reduced MSA! We recommend to only use the created reduced alignment for your subsequent analyses. +WARNING: The resulting predicted difficulty is only applicable to the reduced MSA! We recommend to only use the created +reduced alignment for your subsequent analyses. +## Predictors -# Predictors -To continuously and automatically improve the prediction accuracy of Pythia, we regularly extend the training data set and subsequently retrain the predictor. -We extend the training data using the anonymized MSAs that we continuously obtain during our RAxML Grove database updates. +To continuously and automatically improve the prediction accuracy of Pythia, we regularly extend the training data set +and subsequently retrain the predictor. +We extend the training data using the anonymized MSAs that we continuously obtain during our RAxML Grove database +updates. Note that these MSAs are only available internally in RAxML Grove and are not publicly available. -As per default, PyPythia uses the lastest predictor `predictors/latest.pckl`. Older versions of the trained predictors are available in the `predictors` directory and can be passed to Pythia (see Usage instructions above). All predictors of versions >= 1.0.0 are trained using DNA, AA, and morphological MSAs. +As per default, PyPythia uses the lastest predictor `predictors/latest.pckl`. Older versions of the trained predictors +are available in the `predictors` directory and can be passed to Pythia (see Usage instructions above). All predictors +of versions >= 1.0.0 are trained using DNA, AA, and morphological MSAs. Note that the predictions for the same MSA can be different when using different versions of Pythia. +## SHAP Waterfall Plot -# Shapley Values -As of version 1.1.0, Pythia includes an option to plot Shapley values for a prediction. The interpretation of Shapley values is not straight-forward and we emphasize the importance of learning about these values before drawing conclusions based on the resulting plot! +As of version 1.1.0, Pythia includes an option to plot Shapley values for a prediction. The interpretation of Shapley +values is not straight-forward, and we emphasize the importance of learning about these values before drawing conclusions +based on the resulting plot! We provide the Shapley values as waterfall plot. -In the following, we briefly describe what Shapley values are, what a waterfall plot is, and how you can interpret this plot. -It is important to note that Shapley values are not the same as feature importances. Predicting the difficulty of two distinct MSAs will lead to two distinct waterfall plots. +In the following, we briefly describe what Shapley values are, what a waterfall plot is, and how you can interpret this +plot. +It is important to note that Shapley values are not the same as feature importances. Predicting the difficulty of two +distinct MSAs will lead to two distinct waterfall plots. -## Shapley Values -Based on the training data, our difficulty predictor Pythia has learned a base line difficulty. This base line difficulty is the expected value for every new prediction. Starting off this base line, Pythia adjusts its prediction using the features of the MSA. To determine how much each feature contributes to this change, ultimately leading to the final prediction is estimated by Shapley values. Since Pythia is a tree-based regressor, computing the Shapley values requires some advanced mathematics that I won't go into detail about here. If you are interested in this check out the links in the More Details section below. Due to the calculation of Shapley values, the value for one feature is NOT the difference in prediction when removing this feature. The Shapley value for one feature can only be interpreted considering all feature values together for a specific set of feature values. +### Shapley Values -## Waterfall plot -The following figure shows an exemplary waterfall plot output for the MSA `example/example.py` and Pythia version 1.1.0. +Based on the training data, our difficulty predictor Pythia has learned a base line difficulty. This base line +difficulty is the expected value for every new prediction. Starting off this base line, Pythia adjusts its prediction +using the features of the MSA. To determine how much each feature contributes to this change, ultimately leading to the +final prediction is estimated by Shapley values. Since Pythia is a tree-based regressor, computing the Shapley values +requires some advanced mathematics that I won't go into detail about here. If you are interested in this check out the +links in the More Details section below. Due to the calculation of Shapley values, the value for one feature is NOT the +difference in prediction when removing this feature. The Shapley value for one feature can only be interpreted +considering all feature values together for a specific set of feature values. +### Waterfall plot +The following figure shows an exemplary waterfall plot output for the MSA `example/example.py` and Pythia version 1.1.0. -The x-axis depicts the difficulty and the y-axis the features alongside the respective feature value. The features are sorted by their Shapley value with the highest contribution on top. You can read the plot as follows. The base line difficulty that Pythia v1.1.0 learned is 0.35, as indicated by the `E[f(x)] = 0.35` on the x-axis. The `proportion_invariant` feature contributed to the overall prediction with a shift towards `1.0` (more difficult) of `0.01`, so *in combination with the other features*, a `proportion_invariant` of `0.341` indicates that the MSA is slightly more difficult than the average difficulty in the training set. We emphasize that the *combination with the other features* part, since the same value for `proportion_invariant` with a different MSA and different feature values for the remaining features might lead to a substantially different contribution to the overall prediction. -The feature with the highest impact for this example is the patterns-over-taxa ratio (`num_patterns/num_taxa`). The overall contribution is 0.23 towards `0.0`, meaning it shifts the overall prediction towards `easy`. +The x-axis depicts the difficulty and the y-axis the features alongside the respective feature value. The features are +sorted by their Shapley value with the highest contribution on top. You can read the plot as follows. The base line +difficulty that Pythia v1.1.0 learned is 0.35, as indicated by the `E[f(x)] = 0.35` on the x-axis. The +`proportion_invariant` feature contributed to the overall prediction with a shift towards `1.0` (more difficult) of +`0.01`, so *in combination with the other features*, a `proportion_invariant` of `0.341` indicates that the MSA is +slightly more difficult than the average difficulty in the training set. We emphasize that the *combination with the +other features* part, since the same value for `proportion_invariant` with a different MSA and different feature values +for the remaining features might lead to a substantially different contribution to the overall prediction. +The feature with the highest impact for this example is the patterns-over-taxa ratio (`num_patterns/num_taxa`). The +overall contribution is 0.23 towards `0.0`, meaning it shifts the overall prediction towards `easy`. -## More Details -For further information please refer to [this great book on interpretable ML](https://christophm.github.io/interpretable-ml-book/shapley.html), the [documentation of the `shap` package](https://shap.readthedocs.io/en/latest/index.html), especially [their notes on the interpretability of Shapley values](https://shap.readthedocs.io/en/latest/example_notebooks/overviews/Be%20careful%20when%20interpreting%20predictive%20models%20in%20search%20of%20causal%C2%A0insights.html#Be-careful-when-interpreting-predictive-models-in-search-of-causal%C2%A0insights). +### More Details + +For further information please refer +to [this great book on interpretable ML](https://christophm.github.io/interpretable-ml-book/shapley.html), +the [documentation of the `shap` package](https://shap.readthedocs.io/en/latest/index.html), +especially [their notes on the interpretability of Shapley values](https://shap.readthedocs.io/en/latest/example_notebooks/overviews/Be%20careful%20when%20interpreting%20predictive%20models%20in%20search%20of%20causal%C2%A0insights.html#Be-careful-when-interpreting-predictive-models-in-search-of-causal%C2%A0insights). diff --git a/generate_api_docs.py b/generate_api_docs.py index 1fb65bc..969be4c 100644 --- a/generate_api_docs.py +++ b/generate_api_docs.py @@ -47,6 +47,7 @@ show_root_heading: true merge_init_into_class: false group_by_category: true + modernize_annotations: true """) ) @@ -56,6 +57,7 @@ ::: pypythia.{file.stem}.{mtd}\n options: show_root_heading: true + modernize_annotations: true """) ) @@ -66,10 +68,12 @@ ::: pypythia.{file.stem}.DEFAULT_MODEL_FILE\n options: show_root_heading: true + modernize_annotations: true ::: pypythia.{file.stem}.DEFAULT_RAXMLNG_EXE\n options: show_root_heading: true + modernize_annotations: true """) ) diff --git a/pypythia/predictor.py b/pypythia/predictor.py index d540cb2..3f529be 100644 --- a/pypythia/predictor.py +++ b/pypythia/predictor.py @@ -25,8 +25,8 @@ class DifficultyPredictor: Defaults to None. In this case, the features are inferred from the model file. Attributes: - predictor: Loaded trained predictor. - features: Names of the features the predictor was trained with. + predictor (lgb.Booster): The trained LightGBM model used for predicting the difficulty. + features (list[str]): Names of the features the predictor was trained with. """ def __init__( From 2d7cc877b6c9597dd32f4dd0938a03df9f421e8a Mon Sep 17 00:00:00 2001 From: Julia Date: Sun, 29 Dec 2024 22:54:18 +0100 Subject: [PATCH 08/36] add version flag --- pypythia/logger.py | 1 - pypythia/main.py | 8 +++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/pypythia/logger.py b/pypythia/logger.py index 29dfc31..a77824b 100644 --- a/pypythia/logger.py +++ b/pypythia/logger.py @@ -1,7 +1,6 @@ import sys import textwrap import time -from syslog import LOG_INFO import loguru diff --git a/pypythia/main.py b/pypythia/main.py index 9a519e7..2532267 100644 --- a/pypythia/main.py +++ b/pypythia/main.py @@ -3,6 +3,7 @@ import sys import time +from pypythia import __version__ from pypythia.config import DEFAULT_MODEL_FILE, DEFAULT_RAXMLNG_EXE from pypythia.logger import get_header, log_runtime_information, logger from pypythia.msa import MSA, deduplicate_sequences, parse, remove_full_gap_sequences @@ -115,6 +116,8 @@ def _parse_cli() -> argparse.Namespace: action="store_true", ) + parser.add_argument("-V", "--version", action="version", version=__version__) + return parser.parse_args() @@ -153,8 +156,11 @@ def _handle_full_gap_sequences(msa: MSA, force_full_gaps: bool) -> MSA: def main(): - logger.info(get_header()) args = _parse_cli() + if args.version: + return + + logger.info(get_header()) # Format all paths to pathlib.Path objects and set a default value if not provided msa_file = pathlib.Path(args.msa) From 29b912456db4bd40722023633cba3c6684d78764 Mon Sep 17 00:00:00 2001 From: Julia Date: Tue, 25 Feb 2025 13:52:14 +0100 Subject: [PATCH 09/36] wip --- pypythia/main.py | 3 --- pypythia/msa.py | 4 ++-- pypythia/raxmlng.py | 17 ++++++++++++++--- tests/test_raxmlng.py | 4 ++-- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/pypythia/main.py b/pypythia/main.py index 2532267..d20ac57 100644 --- a/pypythia/main.py +++ b/pypythia/main.py @@ -157,9 +157,6 @@ def _handle_full_gap_sequences(msa: MSA, force_full_gaps: bool) -> MSA: def main(): args = _parse_cli() - if args.version: - return - logger.info(get_header()) # Format all paths to pathlib.Path objects and set a default value if not provided diff --git a/pypythia/msa.py b/pypythia/msa.py index 1149b98..60f9c96 100644 --- a/pypythia/msa.py +++ b/pypythia/msa.py @@ -283,7 +283,7 @@ def _site_entropy(site): return np.mean([_site_entropy(site) for site in self.sequences.T]) def pattern_entropy(self) -> float: - """Returns an entropy-like metric based on the number of occurrences of all patterns of the MSA. + r"""Returns an entropy-like metric based on the number of occurrences of all patterns of the MSA. The pattern entropy is calculated as $$ @@ -300,7 +300,7 @@ def pattern_entropy(self) -> float: return np.sum(pattern_counts * np.log(pattern_counts)) def bollback_multinomial(self) -> float: - """ + r""" Returns the Bollback multinomial metric for the MSA. The Bollback multinomial metric is calculated as diff --git a/pypythia/raxmlng.py b/pypythia/raxmlng.py index f8f0fd6..2463997 100644 --- a/pypythia/raxmlng.py +++ b/pypythia/raxmlng.py @@ -36,7 +36,18 @@ def _get_value_from_line(line: str, search_string: str) -> float: ) -def _get_raxmlng_rfdist_results(log_file: pathlib.Path) -> tuple[float, float, float]: +def get_raxmlng_rfdist_results(log_file: pathlib.Path) -> tuple[int, float, float]: + """ + Method to parse the RAxML-NG log file and extract the number of unique topologies, relative RF-Distance, and absolute RF-Distance. + Args: + log_file (pathlib.Path): Filepath pointing to the RAxML-NG log file. + + Returns: + num_topos (float): Number of unique topologies of the given set of trees. + rel_rfdist (float): Relative RF-Distance of the given set of trees. Computed as average over all pairwise RF-Distances. Value between 0.0 and 1.0. + abs_rfdist (float): Absolute RF-Distance of the given set of trees. + + """ abs_rfdist = None rel_rfdist = None num_topos = None @@ -60,7 +71,7 @@ def _get_raxmlng_rfdist_results(log_file: pathlib.Path) -> tuple[float, float, f if abs_rfdist is None or rel_rfdist is None or num_topos is None: raise ValueError("Error parsing raxml-ng log.") - return num_topos, rel_rfdist, abs_rfdist + return int(num_topos), rel_rfdist, abs_rfdist class RAxMLNG: @@ -176,4 +187,4 @@ def get_rfdistance_results( prefix = tmpdir / "rfdist" self._run_rfdist(trees_file, prefix, **kwargs) log_file = pathlib.Path(f"{prefix}.raxml.log") - return _get_raxmlng_rfdist_results(log_file) + return get_raxmlng_rfdist_results(log_file) diff --git a/tests/test_raxmlng.py b/tests/test_raxmlng.py index 97dbe9e..8454ea1 100644 --- a/tests/test_raxmlng.py +++ b/tests/test_raxmlng.py @@ -5,11 +5,11 @@ import pytest from pypythia.custom_errors import RAxMLNGError -from pypythia.raxmlng import _get_raxmlng_rfdist_results, run_raxmlng_command +from pypythia.raxmlng import get_raxmlng_rfdist_results, run_raxmlng_command def test_get_raxmlng_rfdist_results(raxmlng_rfdistance_log): - num_topos, rel_rfdist, abs_rfdist = _get_raxmlng_rfdist_results( + num_topos, rel_rfdist, abs_rfdist = get_raxmlng_rfdist_results( raxmlng_rfdistance_log ) From b0217a1d6c8d83b72e4c860f60d916d01f157dcf Mon Sep 17 00:00:00 2001 From: Julia Date: Tue, 25 Feb 2025 13:58:44 +0100 Subject: [PATCH 10/36] add licence-files --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index 0f2b262..b41935f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,6 +7,7 @@ url = https://github.com/tschuelia/PyPythia author = Julia Haag author_email = julia.haag@h-its.org version = 2.0.0 +license_files = LICENSE classifiers = Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 From 32e41ec515285a8211756f4b8d18c7f962bf0070 Mon Sep 17 00:00:00 2001 From: Julia Date: Tue, 25 Feb 2025 15:09:17 +0100 Subject: [PATCH 11/36] switch to pyproject.toml --- pyproject.toml | 45 +++++++++++++++++++++++++++++++++++++++++++-- setup.cfg | 46 ---------------------------------------------- 2 files changed, 43 insertions(+), 48 deletions(-) delete mode 100644 setup.cfg diff --git a/pyproject.toml b/pyproject.toml index 2f2d3d0..13b89b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,44 @@ +[project] +name = "PyPythiaPhylo" +description = "Lightweight python library for predicting the difficulty of alignments in phylogenetics" +readme = {file = "README.md", content-type = "text/markdown"} +authors = [{name = "Julia Haag", email = "info@juliaschmid.com"}] +version = "2.0.0" +license = "GPL-3.0-or-later" +classifiers = [ + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12" +] +requires-python = ">= 3.9, <= 3.12" + +dependencies = [ + "pandas", + "biopython", + "lightgbm>=4.4", + "numpy>=2.0", + "shap>=0.41", + "matplotlib", + "loguru" +] + +[project.optional-dependencies] +test = [ + "pytest", + "pyarrow" +] + +[project.urls] +Homepage = "https://github.com/tschuelia/PyPythia" +Documentation = "https://tschuelia.github.io/PyPythia/latest/" + +[project.scripts] +pythia = "pypythia.main:main" + [build-system] -requires = ['setuptools', 'wheel'] +requires = ["hatchling"] +build-backend = "hatchling.build" [tool.black] exclude = ''' @@ -28,5 +67,7 @@ no_implicit_optional = true check_untyped_defs = true [tool.pytest.ini_options] -# This will be pytest's future default. addopts = "--import-mode=importlib" + +[tool.hatch.build.targets.wheel] +packages = ["pypythia"] diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index b41935f..0000000 --- a/setup.cfg +++ /dev/null @@ -1,46 +0,0 @@ -[metadata] -name = PyPythia -description = Lightweight python library for predicting the difficulty of alignments in phylogenetics -long_description = file: README.md -long_description_content_type = text/markdown -url = https://github.com/tschuelia/PyPythia -author = Julia Haag -author_email = julia.haag@h-its.org -version = 2.0.0 -license_files = LICENSE -classifiers = - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3.10 - Programming Language :: Python :: 3.11 - Programming Language :: Python :: 3.12 - -[options] -include_package_data = true -install_requires = - pandas - biopython - lightgbm >= 4.4 - numpy >= 2.0 - shap >= 0.41 - matplotlib - loguru - -python_requires = >=3.9 -package_dir= - =. -packages = find: - -[options.extras_require] -test = - pytest - pyarrow - -[options.package_data] -* = predictors/*.txt - -[options.packages.find] -where = . - -[options.entry_points] -console_scripts = - pythia = pypythia.main:main From 0ade8a3961b5b6941ab39e633cb00cbec7467306 Mon Sep 17 00:00:00 2001 From: Julia Date: Tue, 25 Feb 2025 15:10:16 +0100 Subject: [PATCH 12/36] fix python version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 13b89b0..6a4210f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12" ] -requires-python = ">= 3.9, <= 3.12" +requires-python = ">= 3.9, < 3.13" dependencies = [ "pandas", From dd28a3158ad07bd12dbd0ab2c136fdb2347e6863 Mon Sep 17 00:00:00 2001 From: Julia Date: Thu, 27 Feb 2025 11:11:44 +0100 Subject: [PATCH 13/36] add repo link to docs --- mkdocs.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mkdocs.yml b/mkdocs.yml index 764df18..a2c5458 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -27,6 +27,8 @@ plugins: canonical_version: latest site_name: PyPythia site_url: https://tschuelia.github.io/PyPythia/ +repo_url: https://github.com/tschuelia/PyPythia +repo_name: tschuelia/PyPythia theme: name: material palette: From c21ff90193a2519731ed47de2648f5e3cd4da8f1 Mon Sep 17 00:00:00 2001 From: Julia Date: Thu, 27 Feb 2025 13:15:27 +0100 Subject: [PATCH 14/36] update pre-commit; test coverage --- .pre-commit-config.yaml | 4 +- pypythia/custom_types.py | 1 - pypythia/main.py | 4 -- pypythia/msa.py | 4 +- pypythia/raxmlng.py | 6 --- tests/conftest.py | 1 - tests/test_config.py | 2 +- tests/test_msa.py | 108 ++++++++++++++++++++++++++++++++++++++- tests/test_predictor.py | 12 ++++- 9 files changed, 122 insertions(+), 20 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 23bfde5..1d85544 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,10 +7,10 @@ repos: - id: trailing-whitespace - id: detect-private-key - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.4 + rev: v0.9.7 hooks: # Run the linter. - id: ruff - args: [--select, I, --fix ] + args: [--select, F, E, I, --fix ] # Run the formatter. - id: ruff-format diff --git a/pypythia/custom_types.py b/pypythia/custom_types.py index 1bdac6f..5ee269f 100644 --- a/pypythia/custom_types.py +++ b/pypythia/custom_types.py @@ -1,4 +1,3 @@ -import pathlib from enum import Enum diff --git a/pypythia/main.py b/pypythia/main.py index d20ac57..71ec5a8 100644 --- a/pypythia/main.py +++ b/pypythia/main.py @@ -235,7 +235,6 @@ def main(): log_runtime=True, ) - features_start = time.perf_counter() msa_features = collect_features( msa=msa, msa_file=msa_file, @@ -245,13 +244,10 @@ def main(): threads=args.threads, seed=args.seed, ) - features_end = time.perf_counter() log_runtime_information("Predicting the difficulty", log_runtime=True) - prediction_start = time.perf_counter() difficulty = predictor.predict(msa_features)[0] - prediction_end = time.perf_counter() script_end = time.perf_counter() diff --git a/pypythia/msa.py b/pypythia/msa.py index 60f9c96..26c995c 100644 --- a/pypythia/msa.py +++ b/pypythia/msa.py @@ -174,7 +174,7 @@ def __init__( self.n_taxa, self.n_sites = self.sequences.shape def __str__(self): - return f"MSA(name={self.name}, n_taxa={self.n_taxa}, n_sites={self.n_sites}, data_type={self.data_type})" + return f"MSA(name={self.name}, n_taxa={self.n_taxa}, n_sites={self.n_sites}, data_type={self.data_type.name})" def __repr__(self): return str(self) @@ -450,7 +450,7 @@ def deduplicate_sequences(msa: MSA, msa_name: Optional[str] = None) -> MSA: PyPythiaException: If the MSA does not contain any duplicate sequences. """ if not msa.contains_duplicate_sequences(): - raise PyPythiaException("No duplicates found in MSA.") + raise PyPythiaException("No duplicate sequences found in MSA.") unique_sequences, unique_indices = np.unique( msa.sequences, axis=0, return_index=True diff --git a/pypythia/raxmlng.py b/pypythia/raxmlng.py index 2463997..d673755 100644 --- a/pypythia/raxmlng.py +++ b/pypythia/raxmlng.py @@ -108,12 +108,6 @@ def _base_cmd( *additional_settings, ] - def _run_alignment_parse( - self, msa_file: pathlib.Path, model: str, prefix: pathlib.Path, **kwargs - ) -> None: - cmd = self._base_cmd(msa_file, model, prefix, parse=None, **kwargs) - run_raxmlng_command(cmd) - def _run_rfdist( self, trees_file: pathlib.Path, prefix: pathlib.Path, **kwargs ) -> None: diff --git a/tests/conftest.py b/tests/conftest.py index ea481ee..52ad49b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,7 +3,6 @@ import pandas as pd import pytest -from pypythia.msa import parse from pypythia.predictor import DifficultyPredictor from pypythia.raxmlng import RAxMLNG diff --git a/tests/test_config.py b/tests/test_config.py index d3a373d..5047713 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1 +1 @@ -RAXMLNG_COMMAND = "/Users/julia/Software/raxml-ng/bin/raxml-ng" +RAXMLNG_COMMAND = "/Users/julia/Software/raxml-ng_v1.2.0/raxml-ng" diff --git a/tests/test_msa.py b/tests/test_msa.py index 236c134..fb546eb 100644 --- a/tests/test_msa.py +++ b/tests/test_msa.py @@ -1,11 +1,19 @@ import pathlib +import tempfile import numpy as np import pytest from pypythia.custom_errors import PyPythiaException -from pypythia.custom_types import DataType -from pypythia.msa import MSA, _get_file_format, _guess_dtype, parse +from pypythia.custom_types import DataType, FileFormat +from pypythia.msa import ( + MSA, + _get_file_format, + _guess_dtype, + deduplicate_sequences, + parse, + remove_full_gap_sequences, +) def test_parse(msa_test_data): @@ -76,6 +84,22 @@ def test_msa_init_wrong_taxa(): MSA(taxa, sequences, DataType.DNA, "test") +def test_msa_str_and_repr(): + taxa = np.array(["TAXON1", "TAXON2", "TAXON3"]) + sequences = np.array( + [ + [b"A", b"C", b"G", b"T"], + [b"A", b"C", b"G", b"T"], + [b"A", b"C", b"G", b"T"], + ] + ) + + msa = MSA(taxa, sequences, DataType.DNA, "test") + expected_str = "MSA(name=test, n_taxa=3, n_sites=4, data_type=DNA)" + assert str(msa) == expected_str + assert repr(msa) == expected_str + + def test_contains_duplicate_sequences(msa_test_data): for idx, row in msa_test_data.iterrows(): msa_file = pathlib.Path(row.msa_file) @@ -138,6 +162,46 @@ def test_get_raxmlng_model(msa_test_data): assert msa.get_raxmlng_model() == row.raxmlng_model +def test_get_raxmlng_model_fails_for_invalid_dtype(): + with pytest.raises(PyPythiaException, match="Unsupported data type:"): + MSA( + np.array(["TAXON1", "TAXON2", "TAXON3"]), + np.array( + [ + [b"A", b"C", b"G", b"T"], + [b"A", b"C", b"G", b"T"], + [b"A", b"C", b"G", b"T"], + ] + ), + "INVALID_DTYPE", + "test", + ).get_raxmlng_model() + + +def test_write(phylip_msa_file): + msa = parse(phylip_msa_file) + with tempfile.NamedTemporaryFile() as tmpfile: + tmpfile = pathlib.Path(tmpfile.name) + msa.write(tmpfile, file_format=FileFormat.PHYLIP) + + # File format is correct + assert _get_file_format(tmpfile) == FileFormat.PHYLIP + + # Number of taxa and sites is identical + msa_reread = parse(tmpfile) + assert msa_reread.n_taxa == msa.n_taxa + assert msa_reread.n_sites == msa.n_sites + + # Names of taxa are identical + np.testing.assert_array_equal(msa_reread.taxa, msa.taxa) + + # Sequences are identical + np.testing.assert_array_equal(msa_reread.sequences, msa.sequences) + + # Data type is identical + assert msa_reread.data_type == msa.data_type + + class TestMSAFeatures: def test_n_taxa(self, msa_test_data): for idx, row in msa_test_data.iterrows(): @@ -186,3 +250,43 @@ def test_bollback_multinomial(self, msa_test_data): msa_file = pathlib.Path(row.msa_file) msa = parse(msa_file) assert msa.bollback_multinomial() == row.bollback_multinomial + + +def test_remove_full_gap_sequences(msa_test_data): + for idx, row in msa_test_data.iterrows(): + msa_file = pathlib.Path(row.msa_file) + msa = parse(msa_file) + + if row.contains_full_gap_sequences: + # If the MSA contains full-gap sequences: expect these sequences to be removed + msa_no_full_gaps = remove_full_gap_sequences(msa) + assert not msa_no_full_gaps.contains_full_gap_sequences() + assert msa_no_full_gaps.n_taxa < msa.n_taxa + # Number of sites should not be affected + assert msa_no_full_gaps.n_sites == msa.n_sites + else: + # Otherwise, expect a PyPythiaException + with pytest.raises( + PyPythiaException, match="No full-gap sequences found in MSA." + ): + remove_full_gap_sequences(msa) + + +def test_deduplicate_sequences(msa_test_data): + for idx, row in msa_test_data.iterrows(): + msa_file = pathlib.Path(row.msa_file) + msa = parse(msa_file) + + if row.contains_duplicates: + # If the MSA contains duplicate sequences: expect these sequences to be removed + msa_no_duplicates = deduplicate_sequences(msa) + assert not msa_no_duplicates.contains_duplicate_sequences() + assert msa_no_duplicates.n_taxa < msa.n_taxa + # Number of sites should not be affected + assert msa_no_duplicates.n_sites == msa.n_sites + else: + # Otherwise, expect a PyPythiaException + with pytest.raises( + PyPythiaException, match="No duplicate sequences found in MSA." + ): + deduplicate_sequences(msa) diff --git a/tests/test_predictor.py b/tests/test_predictor.py index f6f8c88..d4b0bc5 100644 --- a/tests/test_predictor.py +++ b/tests/test_predictor.py @@ -2,12 +2,22 @@ import numpy as np import pandas as pd import pytest -from numpy import typing as npt from pypythia.custom_errors import PyPythiaException class TestPredictor: + def test_str_and_repr(self, predictor): + expected_str = ( + "DifficultyPredictor(" + "model_file=pypythia/predictors/latest.txt, features=['num_patterns/num_taxa', " + "'num_sites/num_taxa', 'proportion_gaps', 'proportion_invariant', 'entropy', " + "'bollback', 'num_patterns/num_sites', 'pattern_entropy', 'avg_rfdist_parsimony', " + "'proportion_unique_topos_parsimony'])" + ) + assert str(predictor) == expected_str + assert repr(predictor) == expected_str + def test_predict(self, predictor): query = pd.DataFrame( { From 29b7cd6a78f61c51e10e2f7773de635350d62e50 Mon Sep 17 00:00:00 2001 From: Julia Date: Thu, 27 Feb 2025 16:58:33 +0100 Subject: [PATCH 15/36] wip: refactoring --- pypythia/main.py | 205 ++++------------------------- pypythia/prediction.py | 283 +++++++++++++++++++++++++++++++---------- 2 files changed, 242 insertions(+), 246 deletions(-) diff --git a/pypythia/main.py b/pypythia/main.py index 71ec5a8..6618abc 100644 --- a/pypythia/main.py +++ b/pypythia/main.py @@ -5,11 +5,8 @@ from pypythia import __version__ from pypythia.config import DEFAULT_MODEL_FILE, DEFAULT_RAXMLNG_EXE -from pypythia.logger import get_header, log_runtime_information, logger -from pypythia.msa import MSA, deduplicate_sequences, parse, remove_full_gap_sequences -from pypythia.prediction import collect_features -from pypythia.predictor import DifficultyPredictor -from pypythia.raxmlng import RAxMLNG +from pypythia.logger import get_header, logger +from pypythia.prediction import predict_difficulty def _parse_cli() -> argparse.Namespace: @@ -68,25 +65,10 @@ def _parse_cli() -> argparse.Namespace: help="Filepath of the alternative predictor to use (default: latest Pythia).", ) - parser.add_argument( - "-prec", - "--precision", - type=int, - default=2, - required=False, - help="Set the number of decimals the difficulty should be rounded to (default: 2).", - ) - - parser.add_argument( - "-sT", - "--storeTrees", - help="If set, stores the parsimony trees as '{prefix}.pythia.trees' file (default: False).", - action="store_true", - ) - parser.add_argument( "--forceDuplicates", - help="Per default, Pythia refuses to predict the difficulty for MSAs containing duplicate sequences. " + help="Per default, Pythia refuses to predict the difficulty for MSAs containing duplicate sequences," + "and removes duplicate sequences prior to predicting the difficulty. " "Only set this option if you are absolutely sure that you want to predict the difficulty " "for this MSA (default: False). ", action="store_true", @@ -94,25 +76,17 @@ def _parse_cli() -> argparse.Namespace: parser.add_argument( "--forceFullGaps", - help="Per default, Pythia refuses to predict the difficulty for MSAs containing sequences with only gaps. " + help="Per default, Pythia refuses to predict the difficulty for MSAs containing sequences with only gaps," + "and removes full-gap sequences prior to predicting the difficulty. " "Only set this option if you are absolutely sure that you want to predict the difficulty " "for this MSA (default: False). ", action="store_true", ) parser.add_argument( - "--shap", - help="If set, computes the shapley values of the prediction as waterfall plot in '{prefix}.shap.pdf'. " - "When using this option, make sure you understand what shapley values are and how to interpret this plot." - "For details on shapley values refer to the wiki: " - "https://github.com/tschuelia/PyPythia/wiki/Usage#shapley-values (default: False).", - action="store_true", - ) - - parser.add_argument( - "-v", - "--verbose", - help="If set, additionally prints the MSA features (default: False).", + "--nofiles", + help="Prevent Pythia from writing any files and only print logs/results to the terminal (default: False)." + "WARNING: in this case and if your MSA contains duplicate/full-gap sequences the reduced MSA will not be stored.", action="store_true", ) @@ -121,62 +95,21 @@ def _parse_cli() -> argparse.Namespace: return parser.parse_args() -def _handle_duplicates(msa: MSA, force_duplicates: bool) -> MSA: - if msa.contains_duplicate_sequences() and force_duplicates: - logger.warning( - "WARNING: The provided MSA contains duplicate sequences. " - "The setting 'forceDuplicates' is set, Pythia will predict the difficulty for the MSA with duplicates." - ) - return msa - - if msa.contains_duplicate_sequences(): - log_runtime_information( - "The input MSA contains duplicate sequences. Removing duplicates before predicting the difficulty." - ) - return deduplicate_sequences(msa) - else: - return msa - - -def _handle_full_gap_sequences(msa: MSA, force_full_gaps: bool) -> MSA: - if msa.contains_full_gap_sequences() and force_full_gaps: - log_runtime_information( - "WARNING: The provided MSA contains sequences with only gaps. " - "The setting 'forceFullGaps' is set, Pythia will predict the difficulty for the MSA with full gap sequences." - ) - return msa - - if msa.contains_full_gap_sequences(): - log_runtime_information( - "The input MSA contains sequences with only gaps. Removing full gap sequences before predicting the difficulty." - ) - return remove_full_gap_sequences(msa) - else: - return msa - - def main(): args = _parse_cli() logger.info(get_header()) # Format all paths to pathlib.Path objects and set a default value if not provided msa_file = pathlib.Path(args.msa) - raxmlng_executable = pathlib.Path(args.raxmlng) - prefix = pathlib.Path(args.prefix) if args.prefix else msa_file - predictor_file = pathlib.Path(args.predictor) + prefix = pathlib.Path(args.prefix) or msa_file - # Setup all result files, log the Pythia header - log_file = pathlib.Path(f"{prefix}.pythia.log") - logger.add(log_file, format="{message}") - log_file.write_text(get_header() + "\n") + store_results = not args.nofiles - results_file = pathlib.Path(f"{prefix}.pythia.csv") - pars_trees_file = pathlib.Path(f"{prefix}.pythia.trees") - shap_file = pathlib.Path(f"{prefix}.shap.pdf") - reduced_msa_file = pathlib.Path(f"{prefix}.reduced.phy") - - # Start the actual prediction - SCRIPT_START = time.perf_counter() + if store_results: + # Setup the logfile and log the Pythia header + log_file = pathlib.Path(f"{prefix}.pythia.log") + logger.add(log_file, format="{message}") + log_file.write_text(get_header() + "\n") logger.info( f"Pythia was called at {time.strftime('%d-%b-%Y %H:%M:%S')} as follows:\n" @@ -184,108 +117,26 @@ def main(): logger.info(" ".join(sys.argv)) logger.info("") - log_runtime_information( - message=f"Starting prediction for MSA {msa_file}.", log_runtime=True - ) - - raxmlng = RAxMLNG(raxmlng_executable) - - log_runtime_information( - message=f"Loading predictor {predictor_file.name}", log_runtime=True - ) - predictor = DifficultyPredictor(predictor_file) - - log_runtime_information(message="Loading MSA", log_runtime=True) - - msa = parse(msa_file) - - # First, deduplicate the MSA if necessary - reduced_msa = _handle_duplicates(msa, args.forceDuplicates) - - # Second, remove full gap sequences if necessary - reduced_msa = _handle_full_gap_sequences(reduced_msa, args.forceFullGaps) - - # check if the reduced MSA is different from the original MSA - is_reduced = msa != reduced_msa - if is_reduced: - reduced_msa.write(reduced_msa_file) - msa = reduced_msa - msa_file = reduced_msa_file - - log_runtime_information( - "The input MSA contained duplicate sequences and/or sequences containing only gaps. " - f"Saving a reduced alignment as {reduced_msa_file}.\n" - "WARNING: This predicted difficulty is only applicable to the reduced MSA (duplicate sequences removed). " - f"We recommend to only use the reduced alignment {reduced_msa_file} for your subsequent analyses.\n", - log_runtime=True, - ) - - log_runtime_information( - f"Starting to compute MSA features for MSA {msa_file}", log_runtime=True - ) - - if args.threads is None: - log_runtime_information( - "Number of threads not specified, using RAxML-NG autoconfig.", - log_runtime=True, - ) - else: - log_runtime_information( - f"Using {args.threads} threads for parallel parsimony tree computation.", - log_runtime=True, - ) + # Start the actual prediction + script_start = time.perf_counter() - msa_features = collect_features( - msa=msa, + difficulty = predict_difficulty( msa_file=msa_file, - raxmlng=raxmlng, - pars_trees_file=pars_trees_file if args.storeTrees else None, - log_info=True, + model_file=pathlib.Path(args.predictor), + raxmlng=pathlib.Path(args.raxmlng), threads=args.threads, seed=args.seed, + deduplicate=not args.forceDuplicates, + remove_full_gaps=not args.forceFullGaps, + result_prefix=prefix, + store_results=store_results, + log_info=True, ) - log_runtime_information("Predicting the difficulty", log_runtime=True) - - difficulty = predictor.predict(msa_features)[0] - script_end = time.perf_counter() - log_runtime_information("Done") - - if args.shap: - fig = predictor.plot_shapley_values(msa_features) - fig.tight_layout() - fig.savefig(fname=shap_file) - - if args.verbose: - logger.info("โ”€" * 20) - logger.info("FEATURES: ") - for feat, val in msa_features.items(): - logger.info(f"{feat}: {round(val[0], args.precision)}") - - msa_features["difficulty"] = difficulty - msa_features["msa_file"] = str(msa_file) - - msa_features.to_csv(results_file, index=False) - logger.info("") - logger.info(f"Results: {results_file}.") - - if is_reduced: - logger.info(f"Reduced MSA: {reduced_msa_file}.") - - if args.storeTrees: - logger.info(f"Inferred parsimony trees: {pars_trees_file}.") - - if args.shap: - logger.info(f"SHAP waterfall plot: {shap_file}.") - logger.warning( - "WARNING: When using shap plots, make sure you understand what shapley values are and how you can interpret" - " this plot. For details refer to the wiki: https://github.com/tschuelia/PyPythia/wiki/Usage#shapley-values" - ) - logger.info("") - total_runtime = script_end - SCRIPT_START + total_runtime = script_end - script_start hours, remainder = divmod(total_runtime, 3600) minutes, seconds = divmod(remainder, 60) @@ -301,7 +152,7 @@ def main(): logger.info(f"Total runtime: {seconds:.2f} seconds.") logger.info( - f"\nThe predicted difficulty for MSA {msa_file} is: {round(difficulty, args.precision)}\n" + f"\nThe predicted difficulty for MSA {msa_file} is: {round(difficulty, 2)}\n" ) diff --git a/pypythia/prediction.py b/pypythia/prediction.py index 27ff7d9..6d100a0 100644 --- a/pypythia/prediction.py +++ b/pypythia/prediction.py @@ -1,5 +1,6 @@ import pathlib import shutil +import tempfile from tempfile import TemporaryDirectory from typing import Optional @@ -15,67 +16,40 @@ from pypythia.raxmlng import RAxMLNG -def predict_difficulty( - msa_file: pathlib.Path, - model_file: Optional[pathlib.Path] = DEFAULT_MODEL_FILE, - raxmlng: Optional[pathlib.Path] = DEFAULT_RAXMLNG_EXE, - threads: int = None, - seed: int = 0, - file_format: Optional[FileFormat] = None, - data_type: Optional[DataType] = None, - deduplicate: bool = True, - remove_full_gaps: bool = True, - reduced_msa_file: Optional[pathlib.Path] = None, -) -> np.float64: - """Predict the difficulty of an MSA using the PyPythia difficulty predictor. - - Per default, the MSA is deduplicated and full gap sequences are removed before the difficulty is predicted. - - Args: - msa_file (pathlib.Path): Path to the MSA file. Note that the MSA file must be in either FASTA or PHYLIP format. - model_file (pathlib.Path, optional): Path to the trained difficulty predictor model. - Defaults to the latest model shipped with PyPythia. - raxmlng (pathlib.Path, optional): Path to the RAxML-NG executable. - If not set, uses the RAxML-NG binary found in the PATH environment variable. - threads (int, optional): Number of threads to use for parallel parsimony tree inference. If not set, uses the - RAxML-NG auto parallelization scheme. - seed (int, optional): Random seed to use for the parsimony tree inference. Defaults to 0. - file_format (FileFormat, optional): File format of the MSA file. Defaults to None. In this case, the file format - is inferred based on the file content. See `pypythia.msa.parse` for information on when this is required. - data_type (DataType, optional): Data type of the MSA sequences. Defaults to None. In this case, the data type - is inferred based on the file content. See `pypythia.msa.parse` for information on when this is required. - deduplicate (bool, optional): If True, remove duplicate sequences from the MSA. Defaults to True. - remove_full_gaps (bool, optional): If True, remove full gap sequences from the MSA. Defaults to True. - reduced_msa_file (pathlib.Path, optional): Path to store the reduced MSA after deduplication and removal of full gap sequences. - - Returns: - np.float64: Predicted difficulty of the MSA. - """ - - predictor = DifficultyPredictor(model_file=model_file) - - if raxmlng is None: - raise PyPythiaException( - "Path to the RAxML-NG executable is required if 'raxml-ng' is not in $PATH." +def _handle_duplicates(msa: MSA, deduplicate: bool, log_info: bool = False) -> MSA: + contains_duplicates = msa.contains_duplicate_sequences() + if contains_duplicates and deduplicate: + log_info and log_runtime_information( + "The input MSA contains duplicate sequences. Removing duplicates before predicting the difficulty." ) + return deduplicate_sequences(msa) + elif contains_duplicates: + log_info and logger.warning( + "WARNING: The provided MSA contains duplicate sequences, but deduplication is disabled. " + "Pythia will predict the difficulty for the MSA with duplicates." + ) + return msa + else: + return msa - raxmlng = RAxMLNG(**{"exe_path": raxmlng} if raxmlng else {}) - msa = parse(msa_file, file_format=file_format, data_type=data_type) - - if deduplicate and msa.contains_duplicate_sequences(): - msa = deduplicate_sequences(msa) - if remove_full_gaps and msa.contains_full_gap_sequences(): - msa = remove_full_gap_sequences(msa) - - if reduced_msa_file: - msa.write(reduced_msa_file) - - msa_features = collect_features( - msa, msa_file, raxmlng, log_info=False, threads=threads, seed=seed - ) - difficulty = predictor.predict(msa_features) - return difficulty[0] +def _handle_full_gap_sequences( + msa: MSA, remove_full_gaps: bool, log_info: bool = False +) -> MSA: + contains_full_gaps = msa.contains_full_gap_sequences() + if contains_full_gaps and remove_full_gaps: + log_info and log_runtime_information( + "The input MSA contains sequences with only gaps. Removing full gap sequences before predicting the difficulty." + ) + return remove_full_gap_sequences(msa) + elif contains_full_gaps: + log_info and log_runtime_information( + "WARNING: The provided MSA contains sequences with only gaps, but gap removal is disabled. " + "Pythia will predict the difficulty for the MSA with full gap sequences." + ) + return msa + else: + return msa def collect_features( @@ -83,7 +57,7 @@ def collect_features( msa_file: pathlib.Path, raxmlng: RAxMLNG, pars_trees_file: Optional[pathlib.Path] = None, - log_info: bool = True, + log_info: bool = False, threads: int = None, seed: int = 0, ) -> pd.DataFrame: @@ -94,7 +68,7 @@ def collect_features( raxmlng (RAxMLNG): Initialized RAxMLNG object. pars_trees_file (pathlib.Path, optional): Path to store the inferred parsimony trees. Defaults to None. In this case, the trees are not stored. - log_info (bool, optional): If True, log intermediate progress information using the default logger. + log_info (bool, optional): If True, log intermediate progress information using the default logger. Defaults to False. threads (int, optional): The number of threads to use for parallel parsimony tree inference. Defaults to None. Uses the RAxML-NG auto parallelization scheme if none is set. seed (int, optional): Random seed to use for the parsimony tree inference. Defaults to 0. @@ -102,19 +76,15 @@ def collect_features( Dataframe containing a single row with all features required for predicting the difficulty of the MSA. The columns correspond to the feature names the predictor was trained with. """ - if not log_info: - logger.remove() - with TemporaryDirectory() as tmpdir: msa_file = msa_file model = msa.get_raxmlng_model() - log_runtime_information("Retrieving num_taxa, num_sites.", log_runtime=True) + log_info and log_runtime_information("Retrieving num_taxa, num_sites.") n_pars_trees = 24 - log_runtime_information( + log_info and log_runtime_information( f"Inferring {n_pars_trees} parsimony trees with random seed {seed}.", - log_runtime=True, ) trees = raxmlng.infer_parsimony_trees( msa_file, @@ -126,13 +96,13 @@ def collect_features( **dict(threads=threads) if threads else {}, ) if pars_trees_file is not None: - log_runtime_information( + log_info and log_runtime_information( f"Storing the inferred parsimony trees in the file {pars_trees_file}." ) shutil.copy(trees, pars_trees_file) - log_runtime_information( - "Computing the RF-Distance for the parsimony trees.", log_runtime=True + log_info and log_runtime_information( + "Computing the RF-Distance for the parsimony trees." ) num_topos, rel_rfdist, _ = raxmlng.get_rfdistance_results(trees, redo=None) @@ -152,3 +122,178 @@ def collect_features( "proportion_unique_topos_parsimony": num_topos / n_pars_trees, } return pd.DataFrame(features, index=[0]) + + +def predict_difficulty( + msa_file: pathlib.Path, + model_file: Optional[pathlib.Path] = DEFAULT_MODEL_FILE, + raxmlng: Optional[pathlib.Path] = DEFAULT_RAXMLNG_EXE, + threads: int = None, + seed: int = 0, + file_format: Optional[FileFormat] = None, + data_type: Optional[DataType] = None, + deduplicate: bool = True, + remove_full_gaps: bool = True, + result_prefix: Optional[pathlib.Path] = None, + store_results: bool = True, + log_info: bool = False, +) -> np.float64: + """Predict the difficulty of an MSA using the PyPythia difficulty predictor. + + Per default, the MSA is deduplicated and full gap sequences are removed before the difficulty is predicted. + + Args: + msa_file (pathlib.Path): Path to the MSA file. Note that the MSA file must be in either FASTA or PHYLIP format. + model_file (pathlib.Path, optional): Path to the trained difficulty predictor model. + Defaults to the latest model shipped with PyPythia. + raxmlng (pathlib.Path, optional): Path to the RAxML-NG executable. + If not set, uses the RAxML-NG binary found in the PATH environment variable. + threads (int, optional): Number of threads to use for parallel parsimony tree inference. If not set, uses the + RAxML-NG auto parallelization scheme. + seed (int, optional): Random seed to use for the parsimony tree inference. Defaults to 0. + file_format (FileFormat, optional): File format of the MSA file. Defaults to None. In this case, the file format + is inferred based on the file content. See `pypythia.msa.parse` for information on when this is required. + data_type (DataType, optional): Data type of the MSA sequences. Defaults to None. In this case, the data type + is inferred based on the file content. See `pypythia.msa.parse` for information on when this is required. + deduplicate (bool, optional): If True, remove duplicate sequences from the MSA. Defaults to True. + remove_full_gaps (bool, optional): If True, remove full gap sequences from the MSA. Defaults to True. + result_prefix (pathlib.Path, optional): Prefix for the result files. Defaults to None. In this case, the prefix + is set to the MSA file name. + store_results (bool, optional): If True, store intermediate results as file. Defaults to True. + In this case, the following files are stored: + - The reduced MSA in PHYLIP format (if duplicates or full gap sequences were removed) in `{result_prefix}.reduced.phy` + - The inferred parsimony trees in Newick format in `{result_prefix}.pythia.trees` + - The shapley values as waterfall plot in `{result_prefix}.shap.pdf` + - The features and predicted difficulty as CSV file in `{result_prefix}.pythia.csv` + + log_info (bool, optional): If True, log intermediate progress information using the default logger. Defaults to False. + + Returns: + np.float64: Predicted difficulty of the MSA. + """ + result_prefix = pathlib.Path(result_prefix) if result_prefix else msa_file + + pars_trees_file = pathlib.Path(f"{result_prefix}.pythia.trees") + shap_file = pathlib.Path(f"{result_prefix}.shap.pdf") + results_file = pathlib.Path(f"{result_prefix}.pythia.csv") + + # We definitely need to store the reduced MSA somewhere for RAxML-NG + if store_results: + # If the user wants to keep the results, use the result_prefix + reduced_msa_file = pathlib.Path(f"{result_prefix}.reduced.phy") + else: + # Else, use a temporary file + reduced_msa_file = pathlib.Path( + tempfile.NamedTemporaryFile(mode="w", suffix=".phy").name + ) + + log_info and log_runtime_information( + message=f"Starting prediction for MSA {msa_file}." + ) + + # Init RAxML-NG + if raxmlng is None: + raise PyPythiaException( + "Path to the RAxML-NG executable is required if 'raxml-ng' is not in $PATH." + ) + + raxmlng = RAxMLNG(**{"exe_path": raxmlng} if raxmlng else {}) + + # Init the prediction model + log_info and log_runtime_information(message=f"Loading predictor {model_file.name}") + + predictor = DifficultyPredictor(model_file=model_file) + + # Load the MSA + log_info and log_runtime_information(message="Loading MSA") + + msa = parse(msa_file, file_format=file_format, data_type=data_type) + + # Deduplicate the MSA if necessary + reduced_msa = _handle_duplicates(msa, deduplicate) + + # Remove full gap sequences if necessary + reduced_msa = _handle_full_gap_sequences(reduced_msa, remove_full_gaps) + + # Check if the reduced MSA is different from the original MSA + is_reduced = msa != reduced_msa + if is_reduced: + reduced_msa_file = reduced_msa_file or pathlib.Path(f"{msa_file}.reduced.phy") + + # If the reduced MSA is different from the original MSA, proceed with the reduced MSA + msa = reduced_msa + + log_info and log_runtime_information( + "The input MSA contained duplicate sequences and/or sequences containing only gaps. " + "WARNING: This predicted difficulty is only applicable to the reduced MSA (duplicate sequences removed). ", + ) + + # Save the reduced MSA + msa_file = reduced_msa_file + msa.write(msa_file) + + log_info and log_runtime_information( + f"Saving a reduced alignment as {reduced_msa_file}.\n" + f"We recommend to only use the reduced alignment {reduced_msa_file} for your subsequent analyses.\n", + ) + + # Compute the MSA Features + log_info and log_runtime_information( + f"Starting to compute MSA features for MSA {msa_file}" + ) + + log_info and log_runtime_information( + "Number of threads not specified, using RAxML-NG autoconfig." + if threads is None + else f"Using {threads} threads for parallel parsimony tree computation." + ) + + msa_features = collect_features( + msa=msa, + msa_file=msa_file, + raxmlng=raxmlng, + pars_trees_file=pars_trees_file if store_results else None, + log_info=log_info, + threads=threads, + seed=seed, + ) + + # Predict the difficulty + log_info and log_runtime_information("Predicting the difficulty") + difficulty = predictor.predict(msa_features) + + if store_results: + # Plot shapley values + # this only makes sense if store_results=True, otherwise the figure would be lost + fig = predictor.plot_shapley_values(msa_features) + fig.tight_layout() + fig.savefig(fname=shap_file) + + log_info and log_runtime_information("Done") + + # Log the feature values + if log_info: + logger.info("โ”€" * 20) + logger.info("FEATURES: ") + for feat, val in msa_features.items(): + logger.info(f"{feat}: {round(val[0], 2)}") + + if store_results: + # Write the features + difficulty + msa_features["difficulty"] = difficulty + msa_features["msa_file"] = str(msa_file) + + msa_features.to_csv(results_file, index=False) + + if log_info: + logger.info("") + logger.info(f"Results: {results_file}.") + is_reduced and logger.info(f"Reduced MSA: {reduced_msa_file}.") + logger.info(f"Inferred parsimony trees: {pars_trees_file}.") + logger.info(f"SHAP waterfall plot: {shap_file}.") + logger.warning( + "WARNING: When using shap plots, make sure you understand what shapley values are and how you can interpret" + " this plot. For details refer to the wiki: https://github.com/tschuelia/PyPythia/wiki/Usage#shapley-values" + ) + + return difficulty[0] From 402df36184c439f91e35e3e420625a4447079103 Mon Sep 17 00:00:00 2001 From: Julia Date: Thu, 27 Feb 2025 17:36:38 +0100 Subject: [PATCH 16/36] more test cases: reduced MSAs --- .gitignore | 1 - tests/data/DNA/0.reduced.phy | 912 +++++++ tests/data/DNA/1.reduced.phy | 264 ++ tests/data/DNA/4.reduced.phy | 4182 ++++++++++++++++++++++++++++++ tests/data/DNA/5.reduced.phy | 22 + tests/data/DNA/small.reduced.phy | 22 + tests/data/MORPH/0.reduced.phy | 5 + tests/data/MORPH/1.reduced.phy | 6 + tests/data/msa_test_data.parquet | Bin 11245 -> 18662 bytes tests/test_msa.py | 12 +- 10 files changed, 5419 insertions(+), 7 deletions(-) create mode 100644 tests/data/DNA/0.reduced.phy create mode 100644 tests/data/DNA/1.reduced.phy create mode 100644 tests/data/DNA/4.reduced.phy create mode 100644 tests/data/DNA/5.reduced.phy create mode 100644 tests/data/DNA/small.reduced.phy create mode 100644 tests/data/MORPH/0.reduced.phy create mode 100644 tests/data/MORPH/1.reduced.phy diff --git a/.gitignore b/.gitignore index 394954c..25c3eb2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -**/*.reduced.* **/*.log **/*.csv diff --git a/tests/data/DNA/0.reduced.phy b/tests/data/DNA/0.reduced.phy new file mode 100644 index 0000000..cdc3b12 --- /dev/null +++ b/tests/data/DNA/0.reduced.phy @@ -0,0 +1,912 @@ + 56 766 +COLLETOTRICHUM_MUSAE_CBS116870_HQ596280 ---------- ---------- ---------- ---------- ---------- +COLLETOTRICHUM_FIORINIAE_EHS58_EF593325 ---------- ---------- ---------- ---------- ---------- +COLLETOTRICHUM_ACUTATUM_A6_PT250_AJ748624 ---------- ---------- ---------- ---------- ---------- +COLLETOTRICHUM_ACUTATUM_A1_CA546_AJ748631 ---------- ---------- ---------- ---------- ---------- +COLLETOTRICHUM_SIMMONDSII_PT135_AJ748607 ---------- ---------- ---------- ---------- ---------- +COLLETOTRICHUM_CLAVATUM_CBS19332_AJ748612 ---------- ---------- ---------- ---------- ---------- +COLLETOTRICHUM_CLAVATUM_JG05_AJ409302 ---------- ---------- ---------- ---------- ---------- +COLLETOTRICHUM_MUSAE_B15_DQ454038 ---------- ---------- ---------- ---------- ---------- +COLLETOTRICHUM_FIORINIAE_STEU5287_AY376557 ---------- ---------- ---------- ---------- ---------- +COLLETOTRICHUM_SIMMONDSII_STEU4452_AY376551 ---------- ---------- ---------- ---------- ---------- +COLLETOTRICHUM_GLOEOSPORIOIDES_STEU4295_AY376580 ---------- ---------- ---------- ---------- ---------- +COLLETOTRICHUM_MUSAE_MUSAE_JN121299 ---------- ---------- ----ATTGTA AGTTGCAGTC CATCACCACA +COLLETOTRICHUM_GLOEOSPORIOIDES_1765_JN121297 ---------- ----AGTTGC AGTCCATCAC CACAAATCAC AACAACGCCT +COLLETOTRICHUM_ACUTATUM_IMI117620_FJ788419 ---------- --CACCCTCT CCCGTCAC-A ACAATTCTGC GACGCGTCGG +COLLETOTRICHUM_GLOEOSPORIOIDES_8_JN121296 ---------- -GTAAGTTGC AGTCCATCAC CACAAATCAC AACAACGCCT +COLLETOTRICHUM_ACUTATUM_UWS14_JN121270 ---------- GTCACCCTCT CCCGTCAC-A ACAATTCTGC GACGCGTCGG +COLLETOTRICHUM_ACUTATUM_67_JN121275 ---------- GTCACCCTCT CCCGTCAC-A ACAATTCTGC GACGCGTCGG +COLLETOTRICHUM_CLAVATUM_CAMP30_JN121236 ---------- GTCGCTCTCT CCCATCAC-A ACAATTCTGC GACGCGTCCC +COLLETOTRICHUM_FIORINIAE_ACUVA_JN121278 ---------- GTCGCTCTCT CCCATCAC-A ACAATTCTGC GACGCGTCGG +COLLETOTRICHUM_SIMMONDSII_1570_JN121294 ---------- GTCGCTCTCT CCCATCACCA ACAATTCTGC GACGCGTCGG +COLLETOTRICHUM_ACUTATUM_UWS149_JN121273 ---------A GTCACCCTCT CCCGTCAC-A ACAATTCTGC GACGCGTCGG +COLLETOTRICHUM_SIMMONDSII_UWS137_JN121287 ---------A GTCGCTCTCT CCCATCAACA ACAATTCTGC GACGCGTCGG +COLLETOTRICHUM_CLAVATUM_OLP2_JN121220 ---------A GTCGCTCTCT CCCATCAC-A ACAATT-TGC GACGCGTCCC +COLLETOTRICHUM_CLAVATUM_64_JN121255 ---------A GTCGCTCTCT CCCATCAC-A ACAATTCTGC GACGCGTCCC +COLLETOTRICHUM_CLAVATUM_OLF22_JN121247 ---------A GTCGCTCTCT CCCATCAC-A ACAATTCTGC GACGCGTCCC +COLLETOTRICHUM_CLAVATUM_IMI398854_JN121213 ---------A GTCGCTCTCT CCCATCAC-A ACAATTCTGC GACGCGTCCC +COLLETOTRICHUM_CLAVATUM_OL5_JN121215 ---------A GTCGCTCTCT CCCATCAC-A ACAATTCTGC GACGCGTCCC +COLLETOTRICHUM_SIMMONDSII_CBS23149_JN121288 ---------A GTCGCTCTCT CCCATCACCA ACAATTCTGC GACGCGTCGG +COLLETOTRICHUM_SIMMONDSII_FRA_JN121284 ---------A GTCGCTCTCT CCCATCACCA ACAATTCTGC GACGCGTCGG +COLLETOTRICHUM_SIMMONDSII_1572_JN121293 --------AA GTCGCTCTCT CCCATCACCA ACAATTCTGC GACGCGTCGG +COLLETOTRICHUM_GLOEOSPORIOIDES_C2_JN121298 --------AT TGTAAGTTGC AGTCCATCAC CACAAATCAC AACAACGCCT +COLLETOTRICHUM_CLAVATUM_CIRRI_JN121251 -------TAA GTCGCTCTCT CCCATCAC-A ACAATTCTGC GACGCGTCCC +COLLETOTRICHUM_CLAVATUM_OLF48_JN121249 -------TAA GTCGCTCTCT CCCATCAC-A ACAATTCTGC GACGCGTCCC +COLLETOTRICHUM_CLAVATUM_OLP12_JN121224 ------GAAA GTCGCTCTCT CCCATCAC-A ACAATTCTGC GACGCGTCCC +COLLETOTRICHUM_SIMMONDSII_1036_JN121295 ------GTAA GTCGCTCTCT CCCATCAACA ACAATTCTGC GACGCGTCGG +COLLETOTRICHUM_CLAVATUM_OLF21_JN121246 ------GTAA GTCGCTCTCT CCCATCAC-A ACAATTCTGC GACGCGTCCC +COLLETOTRICHUM_CLAVATUM_OL11_JN121217 ------GTAA GTCGCTCTCT CCCATCAC-A ACAATTCTGC GACGCGTCCC +COLLETOTRICHUM_CLAVATUM_MELA_JN121267 ------GTAA GTCGCTCTCT CCCATCAC-A ACAATTCTGC GACGCGTCCC +COLLETOTRICHUM_FIORINIAE_1491_JN121280 ------GTAA GTCGCTCTCT CCCATCAC-A ACAATTCTGC GACGCGTCGG +COLLETOTRICHUM_SIMMONDSII_1567_JN121290 ------GTAA GTCGCTCTCT CCCATCACCA ACAATTCTGC GACGCGTCGG +COLLETOTRICHUM_SIMMONDSII_725_JN121281 ------GTAA GTCGCTCTCT CCCATCACCA ACAATTCTGC GACGCGTCGG +COLLETOTRICHUM_ACUTATUM_OLE_JN121276 -----TGTAA GTCACCCTCT CCCGTCAC-A ACAATTCTGC GACGCGTCGG +COLLETOTRICHUM_SIMMONDSII_UWS68_JN121286 -----TGTAA GTCGCTCTCT CCCATCAACA ACAATTCTGC GACGCGTCGG +COLLETOTRICHUM_CLAVATUM_OL17_JN121242 -----TGTAA GTCGCTCTCT CCCATCAC-A ACAATT-TGC GACGCGTCCC +COLLETOTRICHUM_CLAVATUM_OLP10_JN121222 -----TGTAA GTCGCTCTCT CCCATCAC-A ACAATTCTGC GACGCGTCCC +COLLETOTRICHUM_SIMMONDSII_SPL100_JN121282 -----TGTAA GTCGCTCTCT CCCATCACCA ACAATTCTGC GACGCGTCGG +COLLETOTRICHUM_ACUTATUM_UWS147_JN121272 ----ATGTAA GTCACCCTCT CCCGTCAC-A ACAATTCTGC GACGCGTCGG +COLLETOTRICHUM_CLAVATUM_ORA1_JN121261 ----ATGTAA GTCGCTCTCT CCCATCAC-A ACAATTCTGC GACGCGTCCC +COLLETOTRICHUM_SIMMONDSII_1568B_JN121292 ---ATTGTAA GTCGCTCTCT CCCATCACCA ACAATTCTGC GACGCGTCGG +COLLETOTRICHUM_SIMMONDSII_CAF_JN121285 ---ATTGTAA GTCGCTCTCT CCCATCACCA ACAATTCTGC GACGCGTCGG +COLLETOTRICHUM_CLAVATUM_OL14_JN121240 ---GATGTAA GTCGCTCTCT CCCATCAC-A ACAATTCTGC GACGCGTCCC +COLLETOTRICHUM_CLAVATUM_CGMUL_JN121266 ---GTAGTAA GTCGCTCTCT CCCATCAC-A ACAATTCTGC GACGCGTCCC +COLLETOTRICHUM_SIMMONDSII_BRIP28519_GU183289 -AGATTGTAA GTCGCTCTCT CCCATCAACA ACAATTCTGC GACGCGTCGG +COLLETOTRICHUM_CLAVATUM_8689_JN121269 GAGATTGTAA GTCGCTCTCT CCCATCAC-A ACAATTCTGC GACGCGTCCC +COLLETOTRICHUM_CLAVATUM_ITRANA1_JN121219 GAGATTGTAA GTCGCTCTCT CCCATCAC-A ACAATTCTGC GACGCGTCCC +COLLETOTRICHUM_FIORINIAE_1409_JN121279 GAGATTGTAA GTCGCTCTCT CCCATCAC-A ACAATTCTGC GACGCGTCGG + + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + --------CT ATTTCGGGAT AGCCCCTGAG CGTACCCCGC CGATATTCCC + --------CT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + --------GT TTATCCGCCC TGCCCCTGAG CGTACCCCGC CGACATTTTT + ATCACAACAA CGCTTGCGAC GCGTTTATCC GCCTTGCCCC TGAGGGTACC + TGCGACGCGT TTATCCGCCC TGCCCCTGAG CGTACCCCGC CGACATTTTT + AATGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + TGCGACGCGT TTATCCGCCC TGCCCCTGAG CGTACCCCGC CGACATTTTT + AATGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + AATGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + GACGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + AATGGCCACT ATTTCGGGAT AGCCCCTGAG CGTACCCCGC CGATATTCCC + AATGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + AATGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + AATAGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + GACGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + GACGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + GACGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + GACGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + GACGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + AATGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + AATGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + AATGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + TGCGACGCGT TTATCCGCCC TGCCCCTGAG CGTACCCCGC CGACATTTTT + GACGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + GACGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + GACGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + AATAGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + GACGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + GACGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + GACGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + AATGGCCACT ATTTCGGGAT AGCCCCTGAG CGTACCCCGC CGATATTCCC + AATGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + AATGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + AATGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + AATAGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + GACGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + GACGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + AATGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + AATGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + GACGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + AATGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + AATGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + GACGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + GACGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + AATAGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + GACGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + GACGGCCACT ATTTCGGGGT AGCCCCTGAG CGTACCCCGC CGATATTCCC + AATGGCCACT ATTTCGGGAT AGCCCCTGAG CGTACCCCGC CGATATTCCC + + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ACCCAAAACG ACATTTACCA AACATGGCCC GACTTGCATC TCCAACGCTT + ACCCAAG-CC ATGCTCGCCA AACATGGCCC GACTTGCATC TCCAACGCTC + ACCCGAC-CT CTTTGCTCAA CAAACCCGCG ACGCCTGTCA ATCATCGACC + CCGCCGA-CA TTATGCACAA CAAACCCGCG ACGCCTGTCA ATCATCGACG + ACCCGAC-CT CTTTGCTCAA CAAACCCGCG ACGCCTGTCA ATCATCGACC + ACCCAAA-CG ATACTCGCCA GACATGGCCC GACTTGCATC TCCAACGCTT + ACCCGAC-CT CTTTGCTCAA CAAACCCGCG ACGCCTGTCA ATCATCGACC + ACCCAAA-CG ATACTCGCCA GACATGGCCC GACTTGCATC TCCAACGCTT + ACCCAAA-CG ATACTCGCCA GACATGGCCC GACTTGCATC TCCAACGCTT + ACCCAAA-CG ATGTTCGCCA AAGACAGCCG GACTTGCATC TCCAACGCTT + ACCCAAAACG ACATTTACCA AACATGGCCC GACTTGCATC TCCAACGCTT + ACCCAAG-CC ATGCTCGCCA AACATGGCCC GACTTGCATC TCCAACGCTC + ACCCAAA-CG ATACTCGCCA GACATGGCCC GACTTGCATC TCCAACGCTT + ACCCAAG-CC ATGCTCGCCA GACATGACCC GACTTGCATC TCCAACGCTT + ACCCAAA-CG ATGTTCGCCA AAGACAGCCG GACTTGCATC TCCAACGCTT + ACCCAAA-CG ATGTTCGCCA AAGACAGCCG GACTTGCATC TCCAACGCTT + ACCCAAA-CG ATGTTCGCCA AAGACAGCCG GACTTGCATC TCCAACGCTT + ACCCAAA-CG ATGTTCGCCA AAGACAGCCG GACTTGCATC TCCAACGCTT + ACCCAAA-CG ATGTTCGCCA AAGACAGCCG GACTTGCATC TCCAACGCTT + ACCCAAG-CC ATGCTCGCCA AACATGGCCC GACTTGCATC TCCAACGCTC + ACCCAAG-CC ATGCTCGCCA AACATGGCCC GACTTGCATC TCCAACGCTC + ACCCAAG-CC ATGCTCGCCA AACATGGCCC GACTTGCATC TCCAACGCTC + ACCCGAC-CT CTTTGCTCAA CAAACCCGCG ACGCCTGTCA ATCATCGACC + ACCCAAA-CG ATGTTCGCCA AAGACAGCCG GACTTGCATC TCCAACGCTT + ACCCAAA-CG ATGTTCGCCA AAGACAGCCG GACTTGCATC TCCAACGCTT + ACCCAAA-CG ATGTTCGCCA AAGACAGCCG GACTTGCATC TCCAACGCTT + ACCCAAG-CC ATGCTCGCCA GACATGACCC GACTTGCATC TCCAACGCTT + ACCCAAA-CG ATGTTCGCCA AAGACAGCCG GACTTGCATC TCCAACGCTT + ACCCAAA-CG ATGTTCGCCA AAGACAGCCG GACTTGCATC TCCAACGCTT + ACCCAAA-CG ATGTTCGCCA AAGACAGCCG GACTTGCATC TCCAACGCTT + ACCCAAAACG ACATTTACCA AACATGGCCC GACTTGCATC TCCAACGCTT + ACCCAAG-CC ATGCTCGCCA AACATGGCCC GACTTGCATC TCCAACGCTC + ACCCAAG-CC ATGCTCGCCA AACATGGCCC GACTTGCATC TCCAACGCTC + ACCCAAA-CG ATACTCGCCA GACATGGCCC GACTTGCATC TCCAACGCTT + ACCCAAG-CC ATGCTCGCCA GACATGACCC GACTTGCATC TCCAACGCTT + ACCCAAA-CG ATGTTCGCCA AAGACAGCCG GACTTGCATC TCCAACGCTT + ACCCAAA-CG ATGTTCGCCA AAGACAGCCG GACTTGCATC TCCAACGCTT + ACCCAAG-CC ATGCTCGCCA AACATGGCCC GACTTGCATC TCCAACGCTC + ACCCAAA-CG ATACTCGCCA GACATGGCCC GACTTGCATC TCCAACGCTT + ACCCAAA-CG ATGTTCGCCA AAGACAGCCG GACTTGCATC TCCAACGCTT + ACCCAAG-CC ATGCTCGCCA AACATGGCCC GACTTGCATC TCCAACGCTC + ACCCAAG-CC ATGCTCGCCA AACATGGCCC GACTTGCATC TCCAACGCTC + ACCCAAA-CG ATGTTCGCCA AAGACAGCCG GACTTGCATC TCCAACGCTT + ACCCAAA-CG ATGTTCGCCA AAGACAGCCG GACTTGCATC TCCAACGCTT + ACCCAAG-CC ATGCTCGCCA GACATGACCC GACTTGCATC TCCAACGCTT + ACCCAAA-CG ATGTTCGCCA AAGACAGCCG GACTTGCATC TCCAACGCTT + ACCCAAA-CG ATGTTCGCCA AAGACAGCCG GACTTGCATC TCCAACGCTT + ACCCAAAACG ACATTTACCA AACATGGCCC GACTTGCATC TCCAACGCTT + + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + CTAGCATGGG AAACATTCCG CTGACCATTG ATTGTTTTCT CGCAATAGGT + CTGTCGTGGG AAACATTCCG CTGACCATTG ATTGTTTTCT CGCAATAGGT + TCCTAGTCTG GAATGTTTTG CTGACTGCTG CTTTTCT--- GTCTACAGGT + CCCAACTCTG GAATGTTTTG CTGACTGCTG CTTTTTTT-- GTCTACAGGT + TCCTAGTCTG GAATGTTTTG CTGACTGCTG CTTTTCT--- GTCTACAGGT + CTACCATGGA AAACATTCCG CTGACCATTG ATTGTTTTCT CGCAATAGGT + TCCTAGTCTG GAATGTTTTG CTGACTGCTG CTTTTCT--- GTCTACAGGT + CTACCATGGA AAACATTCCG CTGACCATTG ATTGTTTTCT CGCAATAGGT + CTACCATGGA AAACATTCCG CTGACCATTG ATTGTTTTCT CGCAATAGGT + CTAGTATGGA AAACATCCCG CTGACCATTG ATTGTTTTCT CGCAACAGGT + CTAGCATGGG AAACATTCCG CTGACCATTG ATTGTTTTCT CGCAATAGGT + CTGTCGTGGG AAACATTCCG CTGACCATTG ATTGTTTTCT CGCAATAGGT + CTACCATGGA AAACATTCCG CTGACCATTG ATTGTTTTCT CGCAATAGGT + CTGTCATGGG AAACATTCCG CTGACCATTG ATTGTTTTCT TGCAATAGGT + TCAGTATGGA AAACATCCCG CTGACCATTG ATTGTTTTCT CGCAACAGGT + CTAGTATGGA AAACATCCCG CTGACCATTG ATTGTTTTCT CGCAACAGGT + CTAGTATGGA AAACATCCCG CTGACCATTG ATTGTTTTCT CGCAACAGGT + TCAGTATGGA AAACATCCCG CTGACCATTG ATTGTTTTCT CGCAACAGGT + TCAGTATGGA AAACATCCCG CTGACCATTG ATTGTTTTCT CGCAACAGGT + CTGTCGTGGG AAACATTCCG CTGACCATTG ATTGTTTTCT CGCAATAGGT + CTGTCGTGGG AAACATTCCG CTGACCATTG ATTGTTTTCT CGCAATAGGT + CTGTCGTGGG AAACATTCCG CTGACCATTG ATTGTTTTCT CGCAATAGGT + TCCTAGTCTG GAATGTTTTG CTGACTGCTG CTTTTCT--- GTCTACAGGT + CTAGTATGGA AAACATCCCG CTGACCATTG ATTGTTTTCT CGCAACAGGT + CTAGTATGGA AAACATCCCG CTGACCATTG ATTGTTTTCT CGCAACAGGT + CTAGTATGGA AAACATCCCG CTGACCATTG ATTGTTTTCT CGCAACAGGT + CTGTCATGGG AAACATTCCG CTGACCATTG ATTGTTTTCT CGCAATAGGT + CTAGTATGGA AAACATCCCG CTGACCATTG ATTGTTTTCT CGCAACAGGT + TCAGTATGGA AAACATCCCG CTGACCATTG ATTGTTTTCT CGCAACAGGT + TCAGTATGGA AAACATCCCG CTGACCATTG ATTGTTTTCT CGCAACAGGT + CTAGCATGGG AAACATTCCG CTGACCATTG ATTGTTTTCT CGCAATAGGT + CTGTCGTGGG AAACATTCCG CTGACCATTG ATTGTTTTCT CGCAATAGGT + CTGTCGTGGG AAACATTCCG CTGACCATTG ATTGTTTTCT CGCAATAGGT + CTACCATGGA AAACATTCCG CTGACCATTG ATTGTTTTCT CGCAATAGGT + CTGTCATGGG AAACATTCCG CTGACCATTG ATTGTTTTCT TGCAATAGGT + CTAGTATGGA AAACATCCCG CTGACCATTG ATTGTTTTCT CGCAACAGGT + CTAGTATGGA AAACATCCCG CTGACCATTG ATTGTTTTCT CGCAACAGGT + CTGTCGTGGG AAACATTCCG CTGACCATTG ATTGTTTTCT CGCAATAGGT + CTACCATGGA AAACATTCCG CTGACCATTG ATTGTTTTCT CGCAATAGGT + TCAGTATGGA AAACATCCCG CTGACCATTG ATTGTTTTCT CGCAACAGGT + CTGTCGTGGG AAACATTCCG CTGACCATTG ATTGTTTTCT CGCAATAGGT + CTGTCGTGGG AAACATTCCG CTGACCATTG ATTGTTTTCT CGCAATAGGT + CTAGTATGGA AAACATCCCG CTGACCATTG ATTGTTTTCT CGCAACAGGT + TCAGTATGGA AAACATCCCG CTGACCATTG ATTGTTTTCT CGCAACAGGT + CTGTCATGGG AAACATTCCG CTGACCATTG ATTGTTTTCT TGCAATAGGT + TCAGTATGGA AAACATCCCG CTGACCATTG ATTGTTTTCT CGCAACAGGT + TCAGTATGGA AAACATCCCG CTGACCATTG ATTGTTTTCT CGCAACAGGT + CTAGCATGGG AAACATTCCG CTGACCATTG ATTGTTTTCT CGCAATAGGT + + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCAACACAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCAACCCAAC + TCACCTCCAG ACCGGCCAGT GCGTAAGTCT TCCT---AAG CCAAATCCAA + TCATCTCCAG ACCGGCCAGT GCGTAAGTCT TCTTCCCAAG CCAAATCCCA + TCACCTCCAG ACCGGCCAGT GCGTAAGTCT TCCT---AAG CCAAATCCAA + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCAACCCAAC + TCACCTCCAG ACCGGCCAGT GCGTAAGTCT TCCT---AAG CCAAATCCAA + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCAACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCAACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCGACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCAACACAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCAACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCAACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-AAC TCAACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCGACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCGACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCGACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCGACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCGACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCAACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCAACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCAACCCAAC + TCACCTCCAG ACCGGCCAGT GCGTAAGTCT TCCT---AAG CCAAATCCAA + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCGACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCGACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCGACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-AAC TCAACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCGACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCGACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCGACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCAACACAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCAACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCAACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCAACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-AAC TCAACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCGACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCGACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCAACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCAACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCGACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCAACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCAACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCGACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCGACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-AAC TCAACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCGACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCGACCCAAC + TCACCTTCAG ACCGGCCAGT GCGTAAGTAT CTCCTG-ATC TCAACACAAC + + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---GGTAACC AGATTGGTGC + ---------- ---------- ---------- ---GGTAACC AGATTGGTGC + ---------- ---------- ---------- ---GGTAACC AGATTGGTGC + ---------- ---------- ---------- ---GGTAACC AGATTGGTGC + ---------- ---------- ---------- ---GGTAACC AGATTGGTGC + ---------- ---------- ---------- -TTGGTAACC AAATCGGTGC + GAGCCGGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + CCGCCTGATT GCGGGGCTAA CCTCCTTGTA CAGGGTAACC AGATTGGTGC + CCGCCTGATT GCGGGGCTAA CCTCCTTGTA CAGGGTAACC AGATTGGTGC + CCGCCTGATT GCGGGGCTAA CCTCCTTGTA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + CCGCCTGATT GCGGGGCTAA CCTCCTTGTA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + GAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + CCGCCTGATT GCGGGGCTAA CCTCCTTGTA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + GAGCCGGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + AAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + GAGCCAGAGT GCGGGGCTAA CTTCTTTGAA CAGGGTAACC AGATTGGTGC + + -------GGG TAAGTGAAGA GACGTCCGAC G-ACACGGCA CTAT-ATACG + ------CTGG TGCGTAGCCA A-CCGCTCAC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCAAC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCAAC G-ACGCGGCG ATTTCG-ATA + TGCCTTCTGG TGCGTAGCCA A-CCGCCAGC G-ACGCGGCG ATTCCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCATC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA AACCGCCATC GGACGCGGCG ATTTCG-ACA + TGCTTTTTGG TATGTGACGA GACCGCCGAC G-ACCCGGCA ATAT-ATACT + TGCCTTCTGG TGCGTAGCCA A-CCGCTCAC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCAGC G-ACGCGGCG ATTCCG-ACA + TGCCTTCTGG TACGTGACGA GACCGCCGAC G-ACCCGGCA ATATCATACT + TGCCTTCTGG TATGTGACGA GACCGCCGAC G-ACCCGGCA ATAT-ATACT + TGCCTTCTGG TACGTGACGA GACCGCCGAC G-ACCCGGCA ATATCATACT + TGCCTTCTGG TGCGTAGCCA A-CCGCCAAC G-ACGCGGCG ATCTCG-ATA + TGCCTTCTGG TACGTGACGA GACCGCCGAC G-ACCCGGCA ATATCATACT + TGCCTTCTGG TGCGTAGCCA A-CCGCCAAC G-ACGCGGCG ATCTCG-ATA + TGCCTTCTGG TGCGTAGCCA A-CCGCCAAC G-ACGCGGCG ATCTCG-ATA + TGCCTTCTGG TGCGTAGCCA A-CCGCCATC G-ACGCGGCG ATTTCG-ACA + CGCCTTCTGG TGCGTAGCCA A-CCGCTCAC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCAGC G-ACGCGGCG ATTCCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCAAC G-ACGCGGCG ATCTCG-ATA + TGCCTTCTGG TGCGTAGCCA A-CCGCCGAC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCATC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCATC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCATC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCATC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCATC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCAGC G-ACGCGGCG ATTCCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCAGC G-ACGCGGCG ATTCCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCAGC G-ACGCGGCG ATTCCG-ACA + TGCCTTCTGG TACGTGACGA GACCGCCGAC G-ACCCGGCA ATATCATACT + TGCCTTCTGG TGCGTAGCCA A-CCGCCATC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCATC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCATC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCGAC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCATC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCATC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCATC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCTCAC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCAGC G-ACGCGGCG ATTCCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCAGC G-ACGCGGCG ATTCCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCAAC G-ACGCGGCG ATCTCG-ATA + TGCCTTCTGG TGCGTAGCCA A-CCGCCGAC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCATC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCATC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCAGC G-ACGCGGCG ATTCCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCAAC G-ACGCGGCG ATCTCG-ATA + TGCCTTCTGG TGCGTAGCCA A-CCGCCATC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCAGC G-ACGCGGCG ATTCCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCAGC G-ACGCGGCG ATTCCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCATC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCATC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCGAC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCATC G-ACGCGGCG ATTTCG-ACA + TGCCTTCTGG TGCGTAGCCA A-CCGCCATC G-ACGCGGCG ATTTCG-ACA + CGCCTTCTGG TGCGTAGCCA A-CCGCTCAC G-ACGCGGCG ATTTCG-ACA + + AAGTCGAGGA CGGCAGATGT TGACGATGG- AGTAGGCAAA ACATTTCTGG + TT--TGATAC GATTTCGTAC TGACCTTGG- TACAG-CAAA ACATCTCTGG + TT--TGACAC GATCTCGTAC TGACCTTGA- TACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCGAAC TGACCTTGA- TACAGGCAGA ACATCTCTGG + TT--TGACAC GATCTCGTAC TGACCTTGG- TACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCATAC TGACCTCGA- CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCATAC TGACCTCGA- CACAGGCAAA ACATCTCTGG + TG--CGAGGA CGGCAGATGT TGACGATGG- AGTAGGCAAA ACATTTCTGG + TT--TGATAC GATTTCGTAC TGACCTTGG- TACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCGTAC TGACCTTGG- TACAGGCAAA ACATCTCTGG + TG--CGAGGA CGGCAGATGT TGACGATGG- AATAGGCAAA ACATTTCTGG + TG--CGAGGA CGGCAGATGT TGACGATGG- AGTAGGCAAA ACATTTCTGG + TG--CGAGGA CGGCAGATGT TGACGATGG- AATAGGCAAA ACATTTCTGG + TT--TGACAC GATCTCGTAC TGATCTTGGG CACAGGCAAA ACATCTCTGG + TG--CGAGGA CGGCAGATGT TGACGATGG- AATAGGCAAA ACATTTCTGG + TT--TGACAC GATCTCGTAC TGATCTTGGG CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCGTAC TGATCTTGGG CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCATAC TGACCTCGA- CACAGGCAAA ACATCTCTGG + TT--TGATAC GATTTCGTAC TGACCTTGG- TACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCGTAC TGACCTTGG- TACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCGTAC TGATCTTGGG CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCGTAC TGACCTTGG- TACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCATAC TGACCTCGA- CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCATAC TGACCTCGA- CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCATAC TGACCTCGA- CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCATAC TGACCTCGA- CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCATAC TGACCTCGA- CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCGTAC TGACCTTGG- TACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCGTAC TGACCTTGG- TACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCGTAC TGACCTTGG- TACAGGCAAA ACATCTCTGG + TG--CGAGGA CGGCAGATGT TGACGATGG- AATAGGCAAA ACATTTCTGG + TT--TGACAC GATCTCATAC TGACCTCGA- CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCATAC TGACCTCGA- CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCATAC TGACCTCGA- CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCGTAC TGACCTTGG- TACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCATAC TGACCTCGA- CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCATAC TGACCTCGA- CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCATAC TGACCTCGA- CACAGGCAAA ACATCTCTGG + TT--TGATAC GATTTCGTAC TGACCTTGG- TACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCGTAC TGACCTTGG- TACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCGTAC TGACCTTGG- TACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCGTAC TGATCTTGGG CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCGTAC TGACCTTGG- TACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCATAC TGACCTCGA- CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCATAC TGACCTCGA- CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCGTAC TGACCTTGG- TACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCGTAC TGATCTTGGG CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCATAC TGACCTCGA- CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCGTAC TGACCTTGG- TACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCGTAC TGACCTTGG- TACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCATAC TGACCTCGA- CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCATAC TGACCTCGA- CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCGTAC TGACCTTGG- TACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCATAC TGACCTCGA- CACAGGCAAA ACATCTCTGG + TT--TGACAC GATCTCATAC TGACCTCGA- CACAGGCAAA ACATCTCTGG + TT--TGATAC GATTTCGTAC TGACCTTGG- TACAGGCAAA ACATCTCTGG + + CGAGCACGGC CTCGACAGCA ATGGAGTGTA TGTCATGC-- --CCCCTATC + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCACTTTG TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCACTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCACTC-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGC CTCGACAGCA ATGGAGTGTA TGTCATGC-- --CCCCTATC + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCACTTTG TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCACTC-G TCCTCCAGTG + CGAGCACGGC CTAGACAGCA ATGGAGTGTA TGTCATGC-- --CCCTTATC + CGAGCACGGC CTCGACAGCA ATGGAGTGTA TGTCATGC-- --CCCCTATC + CGAGCACGGC CTAGACAGCA ATGGAGTGTA TGTCATGC-- --CCCTTATC + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGCG + CGAGCACGGC CTAGACAGCA ATGGAGTGTA TGTCATGC-- --CCCTTATC + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGCG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGCG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCACTTTG TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCACTC-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGCG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCACTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCACTC-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCACTC-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCACTC-G TCCTCCAGTG + CGAGCACGGC CTAGACAGCA ATGGAGTGTA TGTCATGC-- --CCCTTATC + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCACTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCACTTTG TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCACTC-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCACTC-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGCG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCACTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCACTC-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGCG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCACTC-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCACTC-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCACTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCTCTT-G TCCTCCAGTG + CGAGCACGGT CTCGACAGCA ATGGCGTGTA TGTCACTTTG TCCTCCAGTG + + TGGCCACATT G-GTGGTAGA CCGCTAAACT CGA-ACAGCT ACAACGGCAC + CGGCTGCCCC ATGGACCTAG TAGCTAATTA TACCACAGGT ACAACGGCAC + CAGCCAGCT- ATGGACCCAA CAGCTAATCA TACCACAGGT ACAACGGCAC + CGGCATCCTC ATGGACCCAG CAGCTAATCA CACCACAGGT ACAACGGCAC + TGGCTTCCCC GTGGACCCAG CAGCTAATCA TACCATAGGT ACAACGGCAC + CAGCTACCTC GTGGACCCAG CAGCTAATCA TACCACAGGT ACAATGGCAC + CAGCTACCTC GTGGACCCAG CAGCTAATCA TACCACAGGT ACAATGGCAC + TGGCCACATT G-GTGGTAGA CCGCTAAACT CGA-ACAGCT ACAACGGCAC + CGGCTGCCCC ATGGACCTAG TAGCTAATTA TACCACAGGT ACAACGGCAC + TGGCTTCCCC GTGGACCCAG CAGCTAATCA TACCATAGGT ACAACGGCAC + TGTCCACATT G-GTGGTTGA CCGCTAAACT CGA-ACAGCT ACAACGGCAC + TGGCCACATT G-GTGGTAGA CCGCTAAACT CGA-ACAGCT ACAACGGCAC + TGTCCACATT G-GTGGTTGA CCGCTAAACT CGA-ACAGCT ACAACGGCAC + CGGCCGCCCC ATGGACCCAA CAGCTAATCA TGCCACAGGT ACAACGGCAC + TGTCCACATT G-GTGGTTGA CCGCTAAACT CGA-ACAGCT ACAACGGCAC + CGGCCGCCCC ATGGACCCAA CAGCTAATCA TGCCACAGGT ACAACGGCAC + CGGCCGCCCC ATGGACCCAA CAGCTAATCA TGCCACAGGT ACAACGGCAC + CAGCTACCTC GTGGACCCAG CAGCTAATCA TACCACAGGT ACAATGGCAC + CGGCTGCCCC ATGGACCTAG TAGCTAATTA TACCACAGGT ACAACGGCAC + TGGCTTCCCC GTGGACCCAG CAGCTAATCA TACCATAGGT ACAACGGCAC + CGGCCGCCCC ATGGACCCAA CAGCTAATCA TGCCACAGGT ACAACGGCAC + TGGCTTCCTC TTGGACCCAA TAGCTAATCA TGTCACAGGT ACAACGGCAC + CAGCTACCTC GTGGACCCAG CAGCTAATCA TACCACAGGT ACAATGGCAC + CAGCTACCTC GTGGACCCAG CAGCTAATCA TACCACAGGT ACAATGGCAC + CAGCTACCTC GTGGACCCAG CAGCTAATCA TACCACAGGT ACAATGGCAC + CAGCTACCTC GTGGACCCAG CAGCTAATCA TACCACAGGT ACAATGGCAC + CAGCTACCTC GTGGACCCAG CAGCTAATCA TACCACAGGT ACAATGGCAC + TGGCTTCCCC GTGGACCCAG CAGCTAATCA TACCATAGGT ACAACGGCAC + TGGCTTCCCC GTGGACCCAG CAGCTAATCA TACCATAGGT ACAACGGCAC + TGGCTTCCCC GTGGACCCAG CAGCTAATCA TACCATAGGT ACAACGGCAC + TGTCCACATT G-GTGGTTGA CCGCTAAACT CGA-ACAGCT ACAACGGCAC + CAGCTACCTC GTGGACCCAG CAGCTAATCA TACCACAGGT ACAATGGCAC + CAGCTACCTC GTGGACCCAG CAGCTAATCA TACCACAGGT ACAATGGCAC + CAGCTACCTC GTGGACCCAG CAGCTAATCA TACCACAGGT ACAATGGCAC + TGGCTTCCTC TTGGACCCAA TAGCTAATCA TGTCACAGGT ACAACGGCAC + CAGCTACCTC GTGGACCCAG CAGCTAATCA TACCACAGGT ACAATGGCAC + CAGCTACCTC GTGGACCCAG CAGCTAATCA TACCACAGGT ACAATGGCAC + CAGCTACCTC GTGGACCCAG CAGCTAATCA TACCACAGGT ACAATGGCAC + CGGCTGCCCC ATGGACCTAG TAGCTAATTA TACCACAGGT ACAACGGCAC + TGGCTTCCCC GTGGACCCAG CAGCTAATCA TACCATAGGT ACAACGGCAC + TGGCTTCCCC GTGGACCCAG CAGCTAATCA TACCATAGGT ACAACGGCAC + CGGCCGCCCC ATGGACCCAA CAGCTAATCA TGCCACAGGT ACAACGGCAC + TGGCTTCCTC TTGGACCCAA TAGCTAATCA TGTCACAGGT ACAACGGCAC + CAGCTACCTC GTGGACCCAG CAGCTAATCA TACCACAGGT ACAATGGCAC + CAGCTACCTC GTGGACCCAG CAGCTAATCA TACCACAGGT ACAATGGCAC + TGGCTTCCCC GTGGACCCAG CAGCTAATCA TACCATAGGT ACAACGGCAC + CGGCCGCCCC ATGGACCCAA CAGCTAATCA TGCCACAGGT ACAACGGCAC + CAGCTACCTC GTGGACCCAG CAGCTAATCA TACCACAGGT ACAATGGCAC + TGGCTTCCCC GTGGACCCAG CAGCTAATCA TACCATAGGT ACAACGGCAC + TGGCTTCCCC GTGGACCCAG CAGCTAATCA TACCATAGGT ACAACGGCAC + CAGCTACCTC GTGGACCCAG CAGCTAATCA TACCACAGGT ACAATGGCAC + CAGCTACCTC GTGGACCCAG CAGCTAATCA TACCACAGGT ACAATGGCAC + TGGCTTCCTC TTGGACCCAA TAGCTAATCA TGTCACAGGT ACAACGGCAC + CAGCTACCTC GTGGACCCAG CAGCTAATCA TACCACAGGT ACAATGGCAC + CAGCTACCTC GTGGACCCAG CAGCTAATCA TACCACAGGT ACAATGGCAC + CGGCTGCCCC ATGGACCTAG TAGCTAATTA TACCACAGGT ACAACGGCAC + + CTCTGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GTATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAACTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTATTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + CTCTGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GTATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + CTCTGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + CTCTGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + CTCTGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + CTCTGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GTATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + CTCTGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GTATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GCATGAGCGT CTACTTCAAC GAAGTTTGTT + TTCCGAGCTC CAGCTCGAGC GTATGAGCGT CTACTTCAAC GAAGTTTGTT + + ACCTTATAGC CCCCAGAGTG CCAGATAAAC ATATTGACGA GTACTGACCT + ATCCTA-GTC CCCCAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + ATCCTA-GTC CCCGAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + ATCCTA-GTC CCCCAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + ATCCTA-GCC CCCCAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + ATCCTA-GTC CTCCAGTGTG C-AGGCAATC CTATTGACGA AAGCTGACCT + ATCCTA-GTC CTCCAGTGTG C-AGGCAATC CTATTGACGA AAGCTGACCT + ACCTTATAGC CCCCAGAGTG CCAGATAAAC ATATTGACGA GTACTGACCT + ATCCTA-GTC CCCCAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + ATCCTA-GTC CCCCAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + ACCTTATAGC CCCAAGAGTG CAAGATAAAC ATATTGACGA GTACTGACCT + ACCTTATAGC CCCCAGAGTG CCAGATAAAC ATATTGACGA GTACTGACCT + ACCTTATAGC CCCAAGAGTG CAAGATAAAC ATATTGACGA GTACTGACCT + ATCCTA-GTC CCCCAGTGTG C-AGGCAACC CTATTGACGA ATGCTGACCT + ACCTTATAGC CCCAAGAGTG CAAGATAAAC ATATTGACGA GTACTGACCT + ATCCTA-GTC CCCCAGTGTG C-AGGCAACC CTATTGACGA ATGCTGACCT + ATCCTA-GTC CCCCAGTGTG C-AGGCAACC CTATTGACGA ATGCTGACCT + ATCCTA-GTC CTCCAGTGTG C-AGGCAATC CTATTGACGA AAGCTGACCT + ATCCTA-GTC CCCCAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + ATCCTA-GCC CCCCAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + ATCCTA-GTC CCCCAGTGTG C-AGGCAACC CTATTGACGA ATGCTGACCT + ATCCTA-GTC CCCCAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + ATCCTA-GTC CTCCAGTGTG C-AGGCAATC CTATTGACGA AAGCTGACCT + ATCCTA-GTC CTCCAGTGTG C-AGGCAATC CTATTGACGA AAGCTGACCT + ATCCTA-GTC CTCCAGTGTG C-AGGCAATC CTATTGACGA AAGCTGACCT + ATCCTA-GTC CTCCAGTGTG C-AGGCAATC CTATTGACGA AAGCTGACCT + ATCCTA-GTC CTCCAGTGTG C-AGGCAATC CTATTGACGA AAGCTGACCT + ATCCTA-GCC CCCCAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + ATCCTA-GTC CCCCAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + ATCCTA-GCC CCCCAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + ACCTTATAGC CCCAAGAGTG CAAGATAAAC ATATTGACGA GTACTGACCT + ATCCTA-GTC CTCCAGTGTG C-AGGCAATC CTATTGACGA AAGCTGACCT + ATCCTA-GTC CTCCAGTGTG C-AGGCAATC CTATTGACGA AAGCTGACCT + ATCCTA-GTC CTCCAGTGTG C-AGGCAATC CTATTGACGA AAGCTGACCT + ATCCTA-GTC CCCCAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + ATCCTA-GTC CTCCAGTGTG C-AGGCAATC CTATTGACGA AAGCTGACCT + ATCCTA-GTC CTCCAGTGTG C-AGGCAATC CTATTGACGA AAGCTGACCT + ATCCTA-GTC CTCCAGTGTG C-AGGCAATC CTATTGACGA AAGCTGACCT + ATCCTA-GTC CCCCAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + ATCCTA-GCC CCCCAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + ATCCTA-GTC CCCCAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + ATCCTA-GTC CCCCAGTGTG C-AGGCAACC CTATTGACGA ATGCTGACCT + ATCCTA-GTC CCCCAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + ATCCTA-GTC CTCCAGTGTG C-AGGCAATC CTATTGACGA AAGCTGACCT + ATCCTA-GTC CTCCAGTGTG C-AGGCAATC CTATTGACGA AAGCTGACCT + ATCCTA-GTC CCCCAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + ATCCTA-GTC CCCCAGTGTG C-AGGCAACC CTATTGACGA ATGCTGACCT + ATCCTA-GTC CTCCAGTGTG C-AGGCAATC CTATTGACGA AAGCTGACCT + ATCCTA-GCC CCCCAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + ATCCTA-GTC CCCCAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + ATCCTA-GTC CTCCAGTGTG C-AGGCAATC CTATTGACGA AAGCTGACCT + ATCCTA-GTC CTCCAGTGTG C-AGGCAATC CTATTGACGA AAGCTGACCT + ATCCTA-GTC CCCCAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + ATCCTA-GTC CTCCAGTGTG C-AGGCAATC CTATTGACGA AAGCTGACCT + ATCCTA-GTC CTCCAGTGTG C-AGGCAATC CTATTGACGA AAGCTGACCT + ATCCTA-GTC CCCCAGTGTG C-AGGCAATC CTATTGACGA ATGCTGACCT + + TCACTCCTAC CCAGGCTTCC GGCAACAAGT ACGTGCCCCG TGCCGTCCTC + TCTCACCCAA TCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCGA CCAGGCCTCC GGCAATAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCTCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCACTCCTAC CCAGGCTTCC GGCAACAAGT ACGTGCCCCG TGCCGTCCTC + TCTCACCCAA TCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCGCTCCTAC CCAGGCTTCC GGCAACAAGT ACGTGCCCCG TGCCGTCCTC + TCACTCCTAC CCAGGCTTCC GGCAACAAGT ACGTGCCCCG TGCCGTCCTC + TCGCTCCTAC CCAGGCTTCC GGCAACAAGT ACGTGCCCCG TGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCGCTCCTAC CCAGGCTTCC GGCAACAAGT ACGTGCCCCG TGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCGCTCCTAC CCAGGCTTCC GGCAACAAGT ACGTGCCCCG TGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA TCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + TCTCACCCAA CCAGGCCTCC GGCAACAAGT ACGTTCCCCG CGCCGTCCTC + + GTCGATTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCTTTCGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTCGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCTG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCTG GTCCCTTTGG + GTCGATTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCTTTCGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGATTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCTTTCGG + GTCGATTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCTTTCGG + GTCGATTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCTTTCGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGATTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCTTTCGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCTG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTCGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCTG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCTG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCTG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCTG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCTG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGATTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCTTTCGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCTG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCTG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCTG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTCGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCTG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCTG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCTG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTCGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCTG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCTG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCTG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCTG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCTG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTCGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCTG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCTG GTCCCTTTGG + GTCGACTTGG AGCCCGGTAC CATGGACGCC GTCCGTGCCG GTCCCTTTGG + + CCAGCTCTTC CGCCCCGACA ACTTCGTCTT CGGCCAGTCT ---------- + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTGTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTGTT TGGCCAGTCC GGTGCCGGCA + CCAG------ ---------- ---------- ---------- ---------- + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + TCAGCTCTTC CGCCCCGACA ACTTCGTCTT CGGCCAGTCT GGTGCCGGCA + CCAGCTCTTC CGCCCCGACA ACTTCGTCTT CGGCCAGTCT GGTGCTGGCA + TCAGCTCTTC CGCCCCGACA ACTTCGTCTT CGGCCAGTCT GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + TCAGCTCTTC CGCCCCGACA ACTTCGTCTT CGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTGTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTGTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTGTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTGTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTGTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTGTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + TCAGCTCTTC CGCCCCGACA ACTTCGTCTT CGGCCAGTCT GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTGTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTGTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTGTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTGTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTGTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTGTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTGTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTGTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTGTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTGTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTGTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTGTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTGTT TGGCCAGTCC GGTGCCGGCA + CCAGCTTTTC CGCCCCGACA ACTTCGTCTT TGGCCAGTCC GGTGCCGGCA + + ---------- ------ + ACAACTGGGC CAAGGG + ACAACTGGGC CAAGGG + ACAACTGGGC CAAGGG + ACAACTGGGC CAAGGG + ACAACTGGGC CAAGGG + ACAACTGGGC CAAGGG + ---------- ------ + ACAACTG--- ------ + ACAACTG--- ------ + ACAACTG--- ------ + ACAACTGGGC CAAGGG + ACAACTGGGC CAAGG- + ACAACTGG-- ------ + ACAACTGGGC CAAGG- + ACAACTGGGC CAAG-- + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGG- + ACAACTGGGC CA---- + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGGG + ACAACTGGGC CAAGGG + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGGG + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGGG + ACAACTGGGC CAAGGG + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGGG + ACAACTGGGC CAAGG- + ACAACTGGGC CAAG-- + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGGG + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGGG + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGGG + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGGG + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGGG + ACAACTGGGC CAAGGG + ACAACTGGGC CAAGG- + ACAACTGGGC CAAGGG + ACAACTGGGC CAAGGG diff --git a/tests/data/DNA/1.reduced.phy b/tests/data/DNA/1.reduced.phy new file mode 100644 index 0000000..ededfc0 --- /dev/null +++ b/tests/data/DNA/1.reduced.phy @@ -0,0 +1,264 @@ + 23 501 +COLLETOTRICHUM_ACUTATUM_IMI117617_AF411700 ---------- ---------- ----GTGAAC ATACCTA--A CCGTTGCTTC +COLLETOTRICHUM_ACUTATUM_A6_PT250_AJ749700 GTTACCGCT- -CTACAACCC TTT-GTGAAC ATACCTA--A CCGTTGCTTC +COLLETOTRICHUM_ACUTATUM_A6_S2_AF411719 GTTACCGCT- -CTACAACCC TTT-GTGAAC ATACCTA--A CCGTTGCTTC +COLLETOTRICHUM_ACUTATUM_OLE_JN121189 GTTACCGCT- -CTACAACCC TTT-GTGAAC ATACCTA--A CCGTTGCTTC +COLLETOTRICHUM_ACUTATUM_67_JN121188 GTTACCGCT- -CTACAACCC TTT-GTGAAC ATACCTA--A CCGTTGCTTC +COLLETOTRICHUM_ACUTATUM_A7_IMI345581_AJ536212 GTTACCGCT- -CTACAACCC TTT-GTGAAC ATACCTA--A CCGTTGCTTC +COLLETOTRICHUM_ACUTATUM_A7_BBA65797_AJ301925 GTTACCGCT- -CTACAACCC TTT-GTGAAC ATACCTA--A CCGTTGCTTC +COLLETOTRICHUM_CLAVATUM_CBS19332_AJ749688 GTTACCGCT- -CTACAACCC TTT-GTGAAC ATACCTA--A CCGTTGCTTC +COLLETOTRICHUM_CLAVATUM_64_JN121168 GTTACCGCT- -CTACAACCC TTT-GTGAAC ATACCTA--A CCGTTGCTTC +COLLETOTRICHUM_SIMMONDSII_1036_JN121208 GTTACCGCT- -CTATAACCC TTT-GTGAAC ATACCTA--A CCGTTGCTTC +COLLETOTRICHUM_SIMMONDSII_PT135_AJ749683 GTTACCGCT- -CTATAACCC TTT-GTGAAC ATACCTA--A CCGTTGCTTC +COLLETOTRICHUM_SIMMONDSII_1567_JN121203 GTTACCGCT- -CTATAACCC TTT-GTGAAC ATACCTA--A CCGTTGCTTC +COLLETOTRICHUM_SIMMONDSII_BRIP28519_GU183331 GTTACCGCT- -CTATAACCC TTT-GTGAAC ATACCTA--A CCGTTGCTTC +COLLETOTRICHUM_ACUTATUM_A8_TOM9_AF521205 GTTACCGCT- -CTATAACCC TTT-GTGAAC GTACCTA--A CCGTTGCTTC +COLLETOTRICHUM_ACUTATUM_A1_CA546_AJ749674 GTTACCGCT- -CTATAACCC TTT-GTGAAC GTACCTA--A CCGTTGCTTC +COLLETOTRICHUM_FIORINIAE_ACUVA_JN121191 GTTACCGCT- -CTATAACCC TTT-GTGAAC GTACCTA--A CCGTTGCTTC +COLLETOTRICHUM_FIORINIAE_1409_JN121192 GTTACCGCT- -CTATAACCC TTT-GTGAAC GTACCTA--A CCGTTGCTTC +COLLETOTRICHUM_ACUTATUM_A8_TOM21_AF521196 GTTACCGCT- -CTATAACCC TTT-GTGAAC GTACCTA--A CCGTTGCTTC +COLLETOTRICHUM_CLAVATUM_JG05_AJ300557 GTTACCGCT- -TTACAACCC TTT-GTGAAC ATACCTA--A CCGTTGCTTC +COLLETOTRICHUM_GLOEOSPORIOIDES_STEU4295_AY376532 GTTTACGCT- --GTGAACAT ACCTACAACT GTTGCTTCGG CGGGTAGGG- +COLLETOTRICHUM_GLOEOSPORIOIDES_1765_JN121210 GTTTACGCT- -CTACAACCC TTT-GTGAAC ATACCTACAA CTGTTGCTTC +COLLETOTRICHUM_MUSAE_B15_DQ453986 GTTTACGCT- -CTGCAACCC TTT-GTGAAC ATACCTATAA CTGTTGCTTC +COLLETOTRICHUM_MUSAE_CBS116870_HQ596292 GTTTACGCTT CCTGCAACCC TTTTGTGAAC ATACCTATAA CTGTTGCTTC + + GGCGGGCAGG GGAAGCCTCT CGCGGGCCT- CCCCTCCCGG CGCCGGCCCC + GGCGGGCAGG GGAAGCCCTT CGCGGGCGA- ACCCTCCCGG CGCCGGCCCC + GGCGGGCAGG GGAAGCCCTT CGCGGGCGA- ACCCTCCCGG CGCCGGCCCC + GGCGGGCAGG GGAAGCCTCT CGCGGGCCT- CCCCTCCCGG CGCCGGCCCC + GGCGGGCAGG GGAAGCCTCT CGCGGGCCT- CCCCTCCCGG CGCCGGCCCC + GGCGGGCAGG GGAAGCCTCT CGCGGGCGA- CCCCTCCCGG CGCCGGCCCC + GGCGGGCAGG GGAAGCCTCT CGCGGGCGA- CCCCTCCCGG CGCCGGCCCC + GGCGGGCAGG GGAAGCCTCT CGTGGGCGG- ACCCTCCCGG CGCCGGCCCC + GGCGGGCAGG GGAAGCCTCT CGTGGGCGG- ACCCTCCCGG CGCCGGCCCC + GGCGGGCAGG GGAAGCCTCT CGCGGGCCT- CCCCTCCCGG CGCCGGCCCC + GGCGGGCAGG GGAAGCCTCT CGCGGGCCT- CCCCTCCCGG CGCCGGCCCC + GGCGGGCAGG GGAAGCCTCT CGCGGGCCT- CCCCTCCCGG CGCCGGCCCC + GGCGGGCAGG GGAAGCCTCT CGCGGGCCTT CCCCTCCCGG CGCCGGCCCC + GGCGGGCAGG GGAAGCCTCT CGCGGGCCT- CCCCTCCCGG CGCCGGCCCC + GGCGGGCAGG GGAAGCCTCT CGCGGGCCT- CCCCTCCCGG CGCCGGCCCC + GGCGGGCAGG GGAAGCCTCT CGCGGGCCT- CCCCTCCCGG CGCCGGCCCC + GGCGGGCAGG GGAAGCCTCT CGCGGGCCT- CCCCTCCCGG CGCCGGCCCC + GGCGGGCAGG GGAAGCCTCT CGCGGGCCT- CCCCTCCCGG CGCCGGCCCC + GGCGGGCAGG GGAAGCCTCT CGTGGGCGG- ACCCTCCCGG CGCCGGCCCC + ----TCTCCG CGA------C CCTCCCGGCC TCCCGCCT-- CCGGGCGGGT + GGCGGGTAGG G-----TCTC CGCGA----- -CCCTCCCGG CCTCCCGCCT + GGCGGGTAGG G-----TCCC CGTGA----- -CCCTCCCGG CCCCCCGCCC + GGCGGGTAGG G-----TCCC CGTGA----- -CCCTCCCGG CCCCCCGCCC + + -ACCACGGGG ACGGGGCGCC CGCCGGAGGA -AACCAAACT CTATTTACAC + -ATCACGGGG GCGGAGCGCC CGCCGGAGGA -AATCAAACT CTATTTACAC + -ATCACGGGG GCGGAGCGCC CGCCGGAGGA -AATCAAACT CTATTTACAC + -ACCACGGGG ACGGGGCGCC CGCCGGAGGA -AACCAAACT CTATTTACAC + -ACCACGGGG ACGGGGCGCC CGCCGGAGGA -AACCAAACT CTATTTACAC + -ATCACGGGG GCGGAGCGCC CGCCGGAGGA -AACCAAACT CTATTTACAC + -ATCACGGGG GCGGAGCGCC CGCCGGAGGA -AACCAAACT CTATTTACAC + -GTCACGGGG GCGGAGCGCC CGCCGGAGGA -AACCAAACT CTATTTACAC + -GTCACGGGG GCGGAGCGCC CGCCGGAGGA -AACCAAACT CTATTTACAC + -ACCACGGGG ACGGGGCGCC CGCCGGAGGA -AACCAAACT CTATTTACAC + CACCACGGGG ACGGGGCGCC CGCCGGAGGA -AACCAAACT CTATTTACAC + CACCACGGGG ACGGGGCGCC CGCCGGAGGA -AACCAAACT CTATTTACAC + CACCACGGGG ACGGGGCGCC CGCCGGAGGA -AACCAAACT CTATTTACAC + CACCACGGGG ACGGGGCGCC CGCCGGAGGA -AACCAAACT CTATTTACAC + CACCACGGGG ACGGGGCGCC CGCCGGAGGA -AACCAAACT CTATTTACAC + CACCACGGGG ACGGGGCGCC CGCCGGAGGA -AACCAAACT CTATTTACAC + CACCACGGGG ACGGGGCGCC CGCCGGAGGA -AACCAAACT CTATTTACAC + CACCACGGGG ACGGGGCGCC CGCCGGAGGA -AACCAAACT CTATTTTCAC + -GTCACGGGG GCGGAGCGCC CGCCGGAGGA -AACCAAACT CTATTTACAC + CG-GCGCCCG CCGGAGGATA ACCAAACTCT GATTTAACGA CGTTTCTTCT + --CCGGGCGG GTCG-GCGCC CGCCGGAGGA TAACCAAACT CTGATTTAAC + --CCGGGCGG GTCG-GCGCC CGCCGGAGGA TAACCAAACT CTGATTTAAC + --CCGGGCGG GTCG-GCGCC CGCCGGAGGA TAACCAAACT CTGATTTAAC + + GACGTCTCTT CTGAGTGGCA CAAGCAAATA ATTAAAACTT TTAACAACGG + GACGTCTCTT CTGAGTGGCA CAAGCAAATA ATTAAAACTT TTAACAACGG + GACGTCTCTT CTGAGTGGCA CAAGCAAATA ATTAAAACTT TTAACAACGG + GACGTCTCTT CTGAGTGGCA CAAGCAAATA ATTAAAACTT TTAACAACGG + GACGTCTCTT CTGAGTGGCA CAAGCAAATA ATTAAAACTT TTAACAACGG + GACGTCTCTT CTGAGTGGCA CAAGCAAATA ATTAAAACTT TTAACAACGG + GACGTCTCTT CTGAGTGGCA CAAGCAAATA ATTAAAACTT TTAACAACGG + GACGTCTCTT CTGAGTGGCA CAAGCAAATA ATTAAAACTT TTAACAACGG + GACGTCTCTT CTGAGTGGCA CAAGCAAATA ATTAAAACTT TTAACAACGG + GACGTCTCTT CTGAGTGGCA CAAGCAAATA ATTAAAACTT TTAACAACGG + GACGTCTCTT CTGAGTGGCA CAAGCAAATA ATTAAAACTT TTAACAACGG + GACGTCTCTT CTGAGTGGCA CAAGCAAATA ATTAAAACTT TTAACAACGG + GACGTCTCTT CTGAGTGGCA CAAGCAAATA ATTAAAACTT TTAACAACGG + GACGTCTCTT CTGAGTGGCA CAAGCAAATA ATTAAAACTT TTAACAACGG + GACGTCTCTT CTGAGTGGCA CAAGCAAATA ATTAAAACTT TTAACAACGG + GACGTCTCTT CTGAGTGGCA CAAGCAAATA ATTAAAACTT TTAACAACGG + GACGTCTCTT CTGAGTGGCA CAAGCAAATA ATTAAAACTT TTAACAACGG + GACGTCTCTT CTGAGTGGCA CAAGCAAATA ATTAAAACTT TTAACAACGG + GACGTCTCTT CTGAGTGGCA CAAGCAAATA ATTAAAACTT TTAACAACGG + GAGTGGTACA AGCAAATAAT CAAAACTTTT AACAACGGAT CTCTTGGTTC + GACGTTTCTT CTGAGTGGTA CAAGCAAATA ATCAAAACTT TTAACAACGG + GACGTTTCTT CTGAGTGGTA CAAGCAAATA ATCAAAACTT TTAACAACGG + GACGTTTCTT CTGAGTGGTA CAAGCAAATA ATCAAAACTT TTAACAACGG + + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + TGGCATCGAT GAAGAACGCA GCGAAATGCG ATAAGTAATG TGAATTGCAG + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + ATCTCTTGGT TCTGGCATCG ATGAAGAACG CAGCGAAATG CGATAAGTAA + + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + AATTCAGTGA ATCATCGAAT CTTTGAACGC ACATTGCGCC CGCCAGCATT + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + TGTGAATTGC AGAATTCAGT GAATCATCGA ATCTTTGAAC GCACATTGCG + + CTCGCCAGCA TTCTGGCGAG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + CTCGCCAGCA TTCTGGCGAG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + CTCGCCAGCA TTCTGGCGAG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + CTCGCCAGCA TTCTGGCGAG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + CTCGCCAGCA TTCTGGCGAG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + CTCGCCAGCA TTCTGGCGAG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + CTCGCCAGCA TTCTGGCGAG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + CTCGCCAGCA TTCTGGCGAG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + CTCGCCAGCA TTCTGGCGAG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + CTCGCCAGCA TTCTGGCGAG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + CTCGCCAGCA TTCTGGCGAG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + CTCGCCAGCA TTCTGGCGAG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + CTCGCCAGCA TTCTGGCGAG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + CTCGCCAGCA TACTGGCGAG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + CTCGCCAGCA TTCTGGCGAG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + CTCGCCAGCA TTCTGGCGAG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + CTCGCCAGCA TTCTGGCGAG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + CTCGCCAGCA TTCTGGCGAG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + CTCGCCAGCA TTCTGGCGAG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + CTGGCGGGCA TGCCTGTTCG AGCGTCATTT CAACCCTCAA GCTCTGCTTG + CCCGCCAGCA TTCTGGCGGG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + CCCGCCAGCA TTCTGGCGGG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + CCCGCCAGCA TTCTGGCGGG CATGCCTGTT CGAGCGTCAT TTCAACCCTC + + AAGCACCGCT TGGTTTTGGG GCCCCACGG- AGACGTGGGC CCTTAAAGG- + AAGCACCGCT TGGTTTTGGG GCCCCACGGT CGACGTGGGC CCTTAAAGGT + AAGCACCGCT TGGTTTTGGG GCCCCACGGT CGACGTGGGC CCTTAAAGGT + AAGCACCGCT TGGTTTTGGG GCCCCACGGC AGACGTGGGC CCTTAAAGGT + AAGCACCGCT TGGTTTTGGG GCCCCACGGC AGACGTGGGC CCTTAAAGGT + AAGCACCGCT TGGTTTTGGG GCCCCACGGC CGACGTGGGC CCTTAAAGGT + AAGCACCGCT TGGTTTTGGG GCCCCACGGC CGACGTGGGC CCTTAAAGGT + AAGCACCGCT TGGTTTTGGG GCCCCACGGC CGACGTGGGC CCTTAAAGGT + AAGCACCGCT TGGTTTTGGG GCCCCACGGC CGACGTGGGC CCTTAAAGGT + AAGCACCGCT TGGTTTTGGG GCCCCACGGC ACACGTGGGC CCTTAAAGGT + AAGCACCGCT TGGTTTTGGG GCCCCACGGC ACACGTGGGC CCTTAAAGGT + AAGCACCGCT TGGTTTTGGG GCCCCACGGC ACACGTGGGC CCTTAAAGGT + AAGCACCGCT TGGTTTTGGG GCCCCACGGC ACACGTGGGC CCTTAAAGGT + AAGCACCGCT TGGCTTTGGG GCCCCACGGC ACACGTGGGC CCTTAAAGGT + AAGCACCGCT TGGTTTTGGG GCCCCACGGC ACACGTGGGC CCTTGAAGGT + AAGCACCGCT TGGTTTTGGG GCCCCACGGC CGACGTGGGC CCTTAAAGGT + AAGCACCGCT TGGTTTTGGG GCCCCACGGC CGACGTGGGC CCTTAAAGGT + AAGCACCGCT TGGTATTGGG GCCCCACGGC ACACGTGGGC CCTTAAAGGT + AAGCACCGCT TGGTTTTGGG GCCCCACGGC CGACGTGGGC CCTTAAAGGT + GTGTTGGGGC CCTACAGCCG ATGTAGGCCC TCAAAGGTAG TGGCGGACCC + AAGCTCTGCT TGGTGTTGGG GCCCTACAGC CGATGTAGGC CCTCAAAGGT + AAGCTCTGCT TGGTGTTGGG GCCCTACAGC AGATGTAGGC CCTCAAAGGT + AAGCTCTGCT TGGTGTTGGG GCCCTACAGC AGATGTAGGC CCTCAAAGGT + + ---------- ---------- ---------- ---------- ---------- + AGTGGCGGAC CCTCCCGGAG CCTCCTTTGC GTAGTAACTA -ACGTCTCGC + AGTGGCGGAC CCTCCCGGAG CCTCCTTTGC GTAGTAACTA -ACGTCTCGC + AGTGGCGGAC CCTCCCGGAG CCTCCTTTGC GTAGTAACTA -ACGTCTCGC + AGTGGCGGAC CCTCCCGGAG CCTCCTTTGC GTAGTAACTA -ACGTCTCGC + AGTGGCGGAC CCTCCCGGAG CCTCCTTTGC GTAGTAACTA -ACGTCTCGC + AGTGGCGGAC CCTCCCGGAG CCTCCTTTGC GTAGTAACTA -ACGTCTCGC + AGTGGCGGAC CCTCCCGGAG CCTCCTTTGC GTAGTAACTA -ACGTCTCGC + AGTGGCGGAC CCTCCCGGAG CCTCCTTTGC GTAGTAACTA -ACGTCTCGC + AGTGGCGGAC CCTCCCGGAG CCTCCTTTGC GTAGTAACTA -ACGTCTCGC + AGTGGCGGAC CCTCCCGGAG CCTCCTTTGC GTAGTAACTA -ACGTCTCGC + AGTGGCGGAC CCTCCCGGAG CCTCCTTTGC GTAGTAACTA -ACGTCTCGC + AGTGGCGGAC CCTCCCGGAG CCTCCTTTGC GTAGTAACTA -ACGTCTCGC + AGTGGCGGAC CCTCCCGGAG CCTCCTTTGC GTAGTAACTA -ACGTCTCGC + AGTGGCGGAC CCTCCCGGAG CCTCCTTTGC GTAGTAACTA -ACGTCTCGC + AGTGGCGGAC CCTCCCGGAG CCTCCTTTGC GTAGTAACTA -ACGTCTCGC + AGTGGCGGAC CCTCCCGGAG CCTCCTTTGC GTAGTAACTA -ACGTCTCGC + AGTGGCGGAC CCTCCCGGAG CCTCCTTTGC GTAGTAACTA -ACGTCTCGC + AGTGGCGGAC CCTCCCGGAG CCTCCTTTGC GTAGTAACTA -ACGTCTCGC + TCCCGGAGCC TCCTTTGCGT AGTAACTTTA CGTCTCGCAC TGGGATCCGG + AGTGGCGGAC CCTCCCGGAG CCTCCTTTGC GTAGTAACTT TACGTCTCGC + AGTGGCGGAC CCTCCCGGAG CCTCCTTTGC GTAGTAACTT TACGTCTCGC + AGTGGCGGAC CCTCCCGGAG CCTCCTTTGC GTAGTAACTT TACGTCTCGC + + ---------- ---------- ---------- ---------- ---------- + ACTGGGATCC GG-AGGGACT CTTGCCGTTA AACCCCCAAA TTTTTT--CA + ACTGGGATCC GG-AGGGACT CTTGCCGTTA AACCCCCAAA TTTTTT--CA + ACTGGGATCC GG-AGGGACT CTTGCCGTAA AACCCC---- ---------- + ACTGGGATCC GG-AGGGACT CTTGCCGTAA AACCCCCAAA TTCTTT-ACA + ACTGGGATCC GG-AGGGACT CTTGCCGTTA AACCCCCAAA TTTTTT---- + ACTGGGATCC GG-AGGGACT CTTGCCGTTA AACCCCCAAA TTTTTT--CA + ACTGGGATCC GG-AGGGACT CTTGCCGTTA AACCCCCAAA TTTTTT--CA + ACTGGGATCC GG-AGGGACT CTTGCCGTTA AACCCCCAAA TTTTTT--CA + ACTGGGATCC GG-AGGGACT CTTGCCGTAA AACCCCCCAA TTTTTT-ACA + ACTGGGATTC GG-AGGGACT CTTGCCGTAA AACCCCCAAA TTTTTT-AC- + ACTGGGATTC GG-AGGGACT CTTGCCGTAA AACCCCCAAA TTTTTT-ACA + ACTGGGATTC GG-AGGGACT CTTGCCGTAA AACCCCCAAA TTTTTTTACA + ACTGGGATCC GGGAGGGACT CTTGCCGTAA ACCCCCCCAA TTCTTT-ACA + ACTGGGATCC GG-AGGGACT CTTGCCGTAA AACCCCCCAA TTCTTT-ACA + ACTGGGATCC GG-AGGGACT CTTGCCGTTA AACCCCCAAA TTCTT--ACA + ACTGGGATCC GG-AGGGACT CTTGCCGTTA AACCCCCAAA TTCTTT-ACA + ACTGGGATCC GG-AGGGACT CTTGCCGTAA ACCCCCCCAA TTCTTT-ACA + ACTGGGATCC GG-AGGGACT CTTGCCGTTA AACCCCCAAA TTTTTT--CA + -AGGGACTCT TGCCGTAAAA CCCCCCAATT TTCCAAAG-- ---------- + ACTGGGATCC GG-AGGGACT CTTGCCGTAA AACCCCCCAA TTTTCCAAAG + ACTGGGATCC GG-AGGGACT CTTGCCGTAA AACCCCCCAA TTTTCCAAAG + ACTGGGATCC GG-AGGGACT CTTGCCGTAA AACCCCCCAA TTTTCCAAAG + + - + - + G + - + G + - + G + - + G + G + - + G + G + G + G + G + G + G + G + - + - + - + G diff --git a/tests/data/DNA/4.reduced.phy b/tests/data/DNA/4.reduced.phy new file mode 100644 index 0000000..2202b73 --- /dev/null +++ b/tests/data/DNA/4.reduced.phy @@ -0,0 +1,4182 @@ + 122 1695 +CERCOSPORA_BETICOLA_CBS_116456 ----CCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_CONYZAE-CANADENSIS_CBS_135978 -CA-AD--S- S-CBTGCGCC ACGAGCTGTC TTCCGTAAGT ACCGCCACAA +CERCOSPORA_ZEAE-MAYDIS_CBS_117757 -MAYD-S-CB S-CC---GCC ACGAGCTGTC TTCCGTAAGT ACCTCCACAA +CERCOSPORA_CAMPI-SILII_CBS_132625 -S-----CBS -CTG-GCGCC ACGAGCTGTC TTCCGTAAGT ACCTCCACAA +CERCOSPORA_EUPHORBIAE-SIEBOLDIANAE_CBS_113306 -S--B--D-A -CGATGCGCC ACGAGCTGTC TTCCGTAAGT ACCTCCACAA +CERCOSPORA_LACTUCAE-SATIVAE_CBS_132604 -SAT-VA--C BS-ATGCGCC ACGAGCTGTC TTCCGTAAGT ACCTCCACAA +CERCOSPORA_SENECIONIS-WALKERI_CBS_132636 -WA-K-R--C BCGATGCGCC ACGAGCTGTC TTCCGTAAGT ACCTCCACAA +CERCOSPORA_CF._BRUNKII_CBS_132657 GTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAT +CERCOSPORA_APIICOLA_CBS_116457 TTCGCCGGCG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT ACTGCCACAA +CERCOSPORA_SP._J_MUCC_541 TTCGCCGGCG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_SP._A_CBS_132631 TTCGCCGGTG ACGATGCGCC ACGAGCGGTC TTTCGTAAGT GATGCCACGG +SEPTORIA_PROVENCIALIS_CPC_12226 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT ACCTCCACAA +CERCOSPORA_VIGNIGENA_CBS_132611 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT ACTGCCACAA +CERCOSPORA_ZEINA_CPC_11995 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GATACCACAA +CERCOSPORA_CONIOGRAMMES_CBS_132634 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GATGCCACAA +CERCOSPORA_SP._P_CPC_10526 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCC-CCACAA +CERCOSPORA_SP._C_CBS_132629 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTCCCACAA +CERCOSPORA_POLYGONACEA_CBS_132614 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTCCCACAA +CERCOSPORA_SOJINA_CBS_132615 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTCCCACAA +CERCOSPORA_PUNCTIFORMIS_CBS_132626 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTCCCACAA +CERCOSPORA_ACHYRANTHIS_CBS_132613 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTCCCACAA +CERCOSPORA_AGAVICOLA_CBS_117292 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCA--- +CERCOSPORA_SOLANI_CCTU_1043 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_SP._E_CBS_132628 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_CF._MODIOLAE_CPC_5115 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_MERCURIALIS_CBS_550.71 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_RODMANII_CBS_113123 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_CF._SIGESBECKIAE_CBS_132601 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_FAGOPYRI_CBS_132623 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_OLIVASCENS_CBS_253.67 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_GAMSIANA_CBS_144962 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_UWEBRAUNIANA_CBS_138581 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_CHINENSIS_CBS_132612 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_APII_CBS_116455 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_CF._RESEDAE_CBS_118793 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_COFFEICOLA_P_MUCC_771 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_COFFEICOLA_CML_3396 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_COFFEICOLA_CML_2985 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_COFFEICOLA_CML_3398 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_COFFEICOLA_CML_3342 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_COFFEICOLA_CML_3391 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_COFFEICOLA_CML_2988 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_COFFEICOLA_CML_2984 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_COFFEICOLA_CML_3412 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_COFFEICOLA_CML_2990 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_COFFEICOLA_CML_3394 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_CF._MALLOTI_MUCC_575 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_KIKUCHII_CBS_128.27 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_SP._T_CCTU_1148 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_IRANICA_CBS_136124 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_SP._N_CBS_132619 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_CF._RICHARDIICOLA_CBS_132627 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_SP._O_CBS_132635 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_CF._ERYSIMI_CBS_115059 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_SP._F_CPC_12062 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_SP._Q_CBS_132679 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_SP._Q_CBS_132682 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_DISPORI_CBS_132608 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_CF._CITRULINA_CBS_119395 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_PILEICOLA_CBS_132607 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_PLANTAGINIS_CBS_252.67 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_AFF._CANESCENS_CBS_111133 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_CAPSICI_CBS_118712 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_CYLINDRACEA_CBS_138580 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_ZEBRINA_CBS_118790 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_SP._L_CBS_115477 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_DELAIREAE_CBS_132595 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_RUMICIS_CPC_5439 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_ARMORACIAE_CBS_250.67 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_BIZZOZERIANA_CBS_258.67 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_CONVOLVULICOLA_CBS_136126 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_SP._S_CBS_132599 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_SP._D_CBS_132630 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAG +CERCOSPORA_ALTHAEINA_CBS_248.67 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAT +CERCOSPORA_SP._K_CBS_132603 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAT +CERCOSPORA_GOSSYPII_CBS_136137 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAT +CERCOSPORA_CF._FLAGELLARIS_CCTU_1154 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAT +CERCOSPORA_CF._FLAGELLARIS_CCTU_1198 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAT +CERCOSPORA_CF._FLAGELLARIS_CBS_132653 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAT +CERCOSPORA_CELOSIAE_CBS_132600 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAT +CERCOSPORA_SP._G_CBS_115518 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAT +CERCOSPORA_SP._P_CBS_132680 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAT +CERCOSPORA_CF._NICOTIANAE_CBS_131.32 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAT +CERCOSPORA_ALCHEMILLICOLA_CPC_5259 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAT +CERCOSPORA_SP._I_CBS_114815 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAT +CERCOSPORA_CF._PHYSALIDIS_CBS_765.79 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAT +CERCOSPORA_PSEUDOCHENOPODII_CBS_136022 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACCA +CERCOSPORA_CF._CHENOPODII_CBS_132594 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACCA +CERCOSPORA_SP._P_CBS_112728 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCATAT +CERCOSPORA_SP._P_CBS_112730 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCATAT +CERCOSPORA_SP._P_CBS_112894 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCATAT +CERCOSPORA_SP._P_CBS_115413 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCATAT +CERCOSPORA_SP._P_CPC_4001 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCATAT +CERCOSPORA_SP._P_CPC_4002 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCATAT +CERCOSPORA_SP._P_CBS_113996 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCATAT +CERCOSPORA_SP._P_CPC_5327 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCATAT +CERCOSPORA_SP._B_CBS_132602 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCCCAT +CERCOSPORA_CHENOPODII_CBS_132620 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCTCGA +CERCOSPORA_SP._H_CBS_115205 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCTACAT +CERCOSPORA_RICINELLA_CBS_132605 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGTCACAA +CERCOSPORA_CF._HELIANTHICOLA_MUC_716 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTTCCACAA +CERCOSPORA_SP._P_CPC_10527 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTTCCACAA +CERCOSPORA_SP._P_CPC_10552 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTTCCACAA +CERCOSPORA_SP._P_CBS_132664 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTTCCACAA +CERCOSPORA_SP._P_CPC_5262 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTTCCACAA +CERCOSPORA_SP._P_CBS_112649 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTTCCACAA +CERCOSPORA_SP._P_CBS_115609 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTTCCACAA +CERCOSPORA_SP._P_CPC_11631 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTTCCACAA +CERCOSPORA_SP._P_CPC_11633 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTTCCACAA +CERCOSPORA_SP._P_CBS_132665 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTTCCACAA +CERCOSPORA_SP._P_CPC_11632 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTTCCACAA +CERCOSPORA_SP._P_CPC_11630 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTTCCACAA +CERCOSPORA_SP._P_CBS_132662 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTTCCACAA +CERCOSPORA_SP._P_CBS_132660 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTTCCACAA +CERCOSPORA_CF._ZINNIAE_CCTU_1003 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTTCCACAA +CERCOSPORA_SP._M_CPC_10553 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTCCGTACGT GCTGCCACAA +CERCOSPORA_SORGHICOLA_CCTU_1173 TTCGCCGGTG ACGATGCGCC ACGAGCTGTC TTTCGTAAGT GATGCCACGG +CERCOSPORA_CORCHORI_MUCC_585 TTCGCCGGTG ACGATGCGCC ACGAGCTGTG TTCCGTAAGT GCTGCCACAA +CERCOSPORA_VIOLAE_CBS_251.67 TTCGCCGGTG ACGATGCGCC ACGGGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_CF._IPOMOEAE_CBS_132639 TTCGCCGGTG ATGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA +CERCOSPORA_SP._R_CBS_114644 TTCGCCGGTG ATGATGCGCC ACGAGCTGTC TTCCGTGAGT ACTTCCGCTA +CERCOSPORA_CF._COREOPSIDIS_CBS_132598 TTCGCTGGTG ACGATGCGCC ACGAGCTGTC TTCCGTAAGT GCTGCCACAA + + TCAG---ACG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAGG-TGCG A-AGAAGC-- -CTGC-TAA- C-TG------ A-G--CA-AD + TCAG--CGAT G-AGCAGC-G G---GTTC-- A----GG--G ---A-G--MA + TCAG--GAGC A-AGCAGC-- ---GTAC--A AGC-TG---- C-A-G--S-- + TGATC-CACG AGAGC----T GC-GAATG-G G--G---A-G --S--B--D- + TCAG-TGACG A-AGCAG--- GTACAAAA-- -TG-----CA -G--SAT-VA + TGATG-CGCG A-AGC----T GC--GATG-G GC-G---A-C --WA-K-R-- + TGAG---ATG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TCAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TGAG---ACA C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC TTTGGG--AT + GCAG---CCG C--AAATCGG GCAACAAGC- AAGAGCTGAC ATCGGGA-AC + TCAG--CACG A-AGCAGC-- GCAGGATG-- AAGAGCTGAC ATTGAG--AT + TCAG---ATG C-AAAAGCTG GCAGGAAGG- AGGGGCTGAC ATTGGG--AC + TCCA---GTG C--AAAGCTG GTGGGAAGGA AAGAGCTGAC GCCGGG--AC + TCAG---ATA C--ACGGCGG GCGGGAAGG- AAGAGCTGAC AATGAG--AC + TCAG---ATG C-AAAACCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAG---ACG T-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AT + TCAG---ATG C-AAAACCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAG---ATG C-AAAACCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ACTGGG--AC + TCAG---ATG C-AAAGCCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + ---------- C-ACAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + ACAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGA--AC + ACAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGA--AC + ACAG---ATG C-AAAGGCTG GCAGGAAGG- GGGAGCTGAC ATTGGA--AC + TCAA---ATG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AC + TCAA---TTG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TCAA---TTG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TCAA---TTG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TCAG---ACG C-AAAAGCGG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAG---ACG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAG---ACG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAG---ACG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAG---ACG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAG---ACG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAG---ACG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AT + TCAG---ACG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AT + TCAG---ACG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AT + TCAG---ACG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AT + TCAG---ACG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AT + TCAG---ACG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AT + TCAG---ACG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AT + TCAG---ACG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AT + TCAG---ACG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AT + TCAG---ACG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AT + TCAG---ACG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AT + TCAG---ACG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AT + TCAG---ATG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TCAG---ATG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TCAG---ATG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TCAG---ATG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TCAG---ATG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TCAG---ATG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TCAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC AATGGG--AC + TCAG---ATG C-AAACGCTG GCAGGAAGG- AGGAGCTGAC ATTGAG--AC + TCAG---ATG C-AAACGCTG GCAGGAAGG- AGGAGCTGAC ATTGAG--AC + TCAG---ATG C-AAACGCTG GCAGGAAGG- AGGAGCTGAC ATTGAG--AC + TCAG---ATG C-AGAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAT---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCCG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATGGAG--AC + TCGG---ACG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TGAG---ACG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TGAG---ATA C-AAAAGCCG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TGAG---ATA C-AAAAGCTG GCAGGGAGC- AAGAGCTGAC ATTGGG--AT + TGAG---ATA C-AGAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TGAG---ATA C-AGAAGCTG GCAGGACGC- AGGAGCTGAC ATTGGG--AT + TGAG---ATC C-AAAAGCTG GCAGAAAGC- AGGAGCTGAC ATTGGA--AT + TGAG---ATG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TGAG---ATG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TGAG---ATG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TGAG---ATG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TGAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATGGGG--AC + TCAG---GTG C-AAAAGCTG GCAGGAAAG- AGGGGCTGAC ATTGGG--AC + TGAG---ATA C-AAAAGCTG GCAGGGAGC- AGGAGCTGAC ATTGGG--AT + TGAG---ATG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATGGGG--AT + TGAG---ATG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TGAG---ATG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TGAG---ATG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TGAG---ATG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TGAG---ATG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TGAG---ATG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TGAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TGAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AT + TGAG---ATG C-CAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TGAG---ATG C-CAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TGAG---ATG C-CAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + CCAG---GTG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AA + CCAG---GTG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AA + TGAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TGAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TGAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TGAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TGAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TGAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TGAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TGAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TGAG---ATG C-AAAAGCTG GCAGGGAGG- AGAAGCTGAC ATTGGG--AC + TCCG---ATG C-AAAAGCTG GCGGGAAGG- AGGAGCTGAC ATTGGG--AC + TGAG---ATG C-CAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TGAG---ACA C-AAAAGCTG GCAGCGAGC- AGGAGCTGAC ATTGGG--AT + ACAC---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AT + TCAG---ATG C-AAAACCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAG---ATG C-AAAAGCTG GCAGGAAGG- AGAAGCTGAC ATTGGG--AC + TCAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TCAA---TTG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + GCAG---CCG C--AAATCGG GCAACAAGC- AAGAGCTGAC ATCGGGA-AC + TCAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AC + TGAG---ACA C-AGAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + TCAA---TTG C-AAAAGCTG GCAGGAAGC- AGGAGCTGAC ATTGGG--AT + CC--TAC--- ---GCTG--- GCCAGCGCTA GGGAGCTGAC ATTGAACTCC + TCAG---ATG C-AAAAGCTG GCAGGAAGG- AGGAGCTGAC ATTGGG--AT + + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + --S-S-CBTG GTTGGACGAC CGGA--ACCA TGGGTATCCG CTCC-GCC-A + YD-S-CBS-C C---GACGAC CGCGCCGCGA --GCTATGCG ATCC-GCC-A + ---CBS-CTG -GTGGACGAC CGCGGCGC-- TCGGTATGCG ACCCCGCC-A + A-CGTCCATT GTTGGAGG-- CCCGCCACCA TCGGCATGCG ATCC-GCC-A + --CBS-CGTT GTTGGACGGC GG--CCACCA TGGGTCTGCG ATCC-GCC-A + CBCGTCCATT GTTGGAGG-- CCCGCCACCA TCGGCATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG AATC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCGTCCATT GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-G + AGCATCCATT GTTGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCT-GCC-A + AGCGTCCATT GTCGGACGAC CGCGCCACCA TGGGTATGCG ACCC-GCC-A + AGCATCCATT GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTTGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-G + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCGTCCATC GTCGGACGAC CGCGTCACCA TGGGTATGCG ATCC-GCC-A + AGCGTCCATC GTCGGACGAC CGCGTCACCA TGGGTATGCG ATCC-GCC-A + AGCGTCCATC GTCGGACGAC CGCGTCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTGGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGACACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTTGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATT GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATT GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATT GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CACGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ACCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ACCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ACCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCC ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-ACC-A + AGCATCCATC GTTGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTTGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTTGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ACCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ACCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ACCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATTC-GCC-T + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTTGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATT GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-G + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCA-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + AGCATCCATT GTCGGCCGAC CACGTCACCA TGGGTATGCG ATCCTGTC-- + AGCATCCATC GTCGGACGAC CGCGCCACCA TGGGTATGCG ATCC-GCC-A + + TCTCCGCCGC TGCAAATTCC T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTTCGATTA ---------- ---------- TC--CCGC-C A-AD--S-S- + TCCTCGATTA ---------- ---------- --A---TC-- CCGC-MAYD- + TCCTCGATTA ---------- ---------- -G--TC--CC GC-S-----C + TCCTCGATTA -------A-- ----TC--CC GC-S--B--D -A-TGAAGGA + TCCTCGATTA -----C---- --------TC --CCGC-SAT -VA--CBS-G + TCCTCGATTA ---C------ -CA-TC--CG GC-WA-K-R- -CBTGAAGGA + TCTCCGCCAC TGCAGAATCG T-CCTAACAA GAGCACAGTA TCATGATTGG + TCCCCGCCGC TGCAGAATCA C-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGATTCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGCAGCG T-CCTAACAA GAGCACAGTA TCATGATTGG + TCCTCGATTA TGCC-ATTGA TATCTGACAA GAGCATAGTA TCATGATTGG + TCTCCGCCGC TGCAGAACCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCAGCGC TGCAGAAGCA T-CCTGACAA ACGCACAGTA TCATGATTGG + TCTCCGCCGC TGTGGAAATG T-CCTAACAA GAGCGCAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCAC TGCAGAAACA T-CCTAACAA GAGCATAGTA TCATGATTGG + TCTCCGCCGC TGCAGATTCG T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCG T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCG T-CCTAACAA GAGCACAGTA TCATGATTGG + TCCCCGCCGC TGCAGAATCG T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCTGCCGC TGCAGAATCA T-CCTAACAA GAGCACAGTA TCATGATTGG + CCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + T---CGCCGC TGCAGAATCG A-CCTAACAA GAGCACAGTA TCATGATTGG + CCTCCGCCGC TGCAGAATCG A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAGACG T-CCTAACAA GAGCATAGTA TCATGATTGG + TCTCCGCCGC TGCAGAGACG T-CCTAACAA GAGCATAGTA TCATGATTGG + CCTCCGCCGC TGCAGAATCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAAATTCC T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAAATTCC T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAAATTCC T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAAATTCC T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAAATTCC T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TTTCCGCCGC TGCAGAATCA T-CCTAACAA GAGCACAGTA TCATGATTGG + CCTCCGCCGC TGCAGAATCG T-CCTAACAA GAGCACAGTA TCATGATTGG + CCTCCGCCGC TGCAGAATCG T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TTTCCGCCGC TGCAGAATCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TTTCCGCCGC TGCAGAATCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGTCGC TGCAGAAACA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAAAGGCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAAACA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAAATTCC T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAACC- T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGATTCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGATTCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGATTCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGATTCA T-TCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGATTCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGATTCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGATTCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGATTCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCAC TGCAAAATCG T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCC ATCCTAACAA GAGCACAGTA TCATGATTGG + TCTCTGCCGC TGCAGAATCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGATTCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGATTCC A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCAC TGCAGAATCG T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCAC TGCAGAATCG T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCAC TGCAGAATCG T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCAC TGCAGAATCG T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCG T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCG T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCG T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCG T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCG T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGCATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCG T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGATTCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCCCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCCCCGC CGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCCCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCCCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCCCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCCCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCCCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCCCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCCCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCCCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCCCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCCCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGCAGCG T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGATTCA T-CCTAACAA GAGCACAGTA TCATGATTGG + TCTCCGCCGC TGCAGAATCA A-CCTAACAA GAGCACAGTA TCATGATTGG + -CTGCGCTGA AGCCGGGAGA TGTCTGACAA GAGCATAGTA TCATGATTGG + TCTCCGCCGC TGCAGATTCA T-CCTAACAA GAGCACAGTA TCATGATTGG + + TATGCTCTCT CTTCGTACGT A--------- CAGTTTTCTG AAACAATCAG + CBTGCAGCAT CTA-CTAAGT ATCTTCCCTC CTCTCTTCT- ---------- + S-CBS-CC-- -TTCAGAAGT AA-TTCACTC CTCCCGCCT- ---------- + BS-CTG-GCT CAGCATCGG- CTCATCCCTC CTCCCTTCT- ---------- + TCGG-CCCAT CTTCGTACCT AGCTTCCCTC CTCCCTTCT- ---------- + TAAGCACCCA TCTCATACGT ATCTCCCCTC CTCCCTTCT- ---------- + TAGG-CCCAT CTTCGTACCT ATCTTCCCTC CTCCCTTCT- ---------- + TATGCTCCCT TTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CAATTTTCTG GAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + CATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AGACACCCAG + TATGCTCCCT CTTCGTACGT ATCTTCCCTC CTCCCTTCT- ---TGACCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACCTTTCTG AAACGGCCAG + TATGCTCCCT CTTCGTACGT A--------- CCCTTTTCCG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTGCGT A--------- CACTTTTCGG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CATTTTTCTG GAACAATCAG + TATGCTCCCT CTTTGTACGT A--------- CATTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CATTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CATTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATG------ ---------- ---------- --------TG AAACCATCAG + TATGCTCCCT CTTTGTACGT A--------- CACTTTTCTG AAACCATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACCATCAG + TATGCTCCCT CTTCGTGCGT A--------- CACTTTTTTG GAACGACCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATG------ ---------- ---------- --------TG AAACAATCAG + TATG------ ---------- ---------- --------TG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- -------CAG AAACAAGCAG + TATGCTCCCT CTTCGTACGT A--------- CAGTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CAGTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATG------ ---------- ---------- --------TG AAACAATCAG + TATG------ ---------- ---------- --------TG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + CATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAACCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- -------CAG AAACAAGCAG + CATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACACCCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAACCAG + TATGCTCCCT CTTCGTACGT A--------- CAGTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + AATG------ ---------- ---------- --------TG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACGATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATG------ ---------- ---------- --------TG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTTGTACGT A--------- CACTTTTCTG AAACAATCAG + AATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG GAATAATCAG + TATG------ ---------- ---------- --------TG AAACAATCAG + TATG------ ---------- ---------- --------TG AAACAATCAG + TATG------ ---------- ---------- --------TG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCC- -TTCGTACGT A--------- CAGTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + CATG------ ---------- ---------- --------TG AAACAATCAG + CATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTTTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + AATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAACCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATG------ ---------- ---------- --------TG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + CATG------ ---------- ---------- --------TG AAACACCCAG + TATGCTCCCT CTTTGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + AATGCTCCCT CTTTGTACGT A--------- CAC-TTTCTG AAACAATCAG + TATGCTCCCT CTTCGTACGT A--------- CACTTTTCTG AAACAATCAG + + GCAGCCGCGA -GGCAGAGCT AACGACAGCA ACACAGGACA AGGACGGCGA + ---------- ---------- ---------- -G-CA-AD-- S-S-CBAAAC + -A-------- ---------- ---------- --A--T--GC G-MAYD-S-C + ---------- ---------- ---------A TTT----G-S -----CBS-C + ---------- ----A-TT-- --G-S--B-- D-A-AAGCAA TGGACGGGCA + --T------- ---------- A-TT----G- SAT-VA--CB S-AAACACTA + ---------- ----A-TT-- --A-WA-K-R --CBAAACAA TGGACGGGCA + GCAGCCGCGA -GGCAGAGCT AACGAGAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGGGA -GGCAGGGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCAA -GACAGAGCT GACCAAAGCA CCACAGGACA AGGACGGCGA + GCACTCGCAA TGGCAAGGCT AACGA-GGTG ACACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCAA -GGCCGAGCT AATTGCACCA CCACAGGACA AGGACGGCGA + GCAGCCGCAA -GGCAAAGCT AACGGAAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA TTACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACCACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACCACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACCACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACAACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCAA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCAA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACCACAACA CCACAGGACA AGGACGGCGA + GCAGCGGCGA -GGCAGAGCT AACGACAGCA TCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA ACACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGAGAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACGGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGAGAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGAGAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT GACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACCACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -TGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCAA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCAA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACCACAGCA CCGCAGGACA AGGACGGCGA + GCAGCCGCGA -GACAGAGCT AACGACAGCA TCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACGGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACGGCA TCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGAGAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGAGAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACGGCA TTACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA TCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGATAGCA CCGCAGGACA AGGACGGCGA + GCAGCTGCGA -GGCAGAGCT AACGATAGCA CCGCAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCACCCGCGA -GGCAGAGCT AACGACAGCG TCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCG TCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA TCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCG TCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGTGCT AACGACAGCA TCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCG CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAAAGCT AACGACAGCG CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAAAGCT AACGACAGCA TCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCG CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCG TCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAACA CCACAGGACA AGGACGGCGA + GCAGCCGCAA -GACAGAGCT GACCAAAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA TCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA TCACAGGACA AGGACGGCGA + GCAGCCGCGA -GGCAGAGCT AACGACAGCA CCACAGGACA AGGACGGCGA + + TGGTATGATG TGCGCCCACC CTC---TGCG AATGTACTGA ACTAACCTCG + CGTTACGACG CCGAGT-TCT -GA------C -CA-CAC--- ---------- + BS-CC---GA ACAATTCTCT -GC----G-C C-CGA----- -CGC--CAC- + TG-GAAACAG TCGATTCGCT --G------G A--------C A-CAC----- + TGGT-CGACG ACGATTCTCC A-CAC----- ---------- ---------T + TGGTAGCACG GC-ATTCGAT -------C-- CATT------ ---------- + TGGT-CGACG ACGATTCTGG A-CACT---- ---------- ---------- + TGGTATGAAG TGCGACCACC CTC---TGCA ACTGCGCTGA ACTAATGTCA + TGGTATGATG TACGCCCACT TTC---TGTG ACTGCACTGA GCTAACCTCA + TGGTATGATG TGCGGCCAAC CTC---TGCG ACTGTACTGC ACTAACCTCA + TGGTACGATG CGCGCCTCCT CTC---TGCG ACTGCTCTGA GCTGACCCCC + TGGTACGACG ACGATTCTCT CCCCAATGCG ACCTGACTAA CACATCATC- + TGGTATGATG TACGCCCACT TTC---TGTG ACTGCACTGA GCTAACCTCA + TGGTACGATG CGGGCGCGGT CTT---TGCG ACTTCACTGA GCTAATCTCA + TGGTACGATG CGCGCACATG CTC---TGCG ACTGCGCTGA ACTAACTTCC + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCACA CTC---TGCG ATTTCAGTGA GCTGACCCCA + TGGTATGATG TACGCCCACT TTC---TGTG ACTGCACTGA GCTAACCTCA + TGGTATGATG TACGCCCACT TTC---TGTG ACTGCACTGA GCTAACCTCA + TGGTATGATG TACGCCCACT TCC---TGTG ACTGCACTGA GCTAACCTCA + TGGTATGATG TACGCCCACT TTC---TGTG ACTGCACTGA GCTAACCTCA + TGGTATGATG TGCGCCCACC CTC---TGCG AATGTACTGA ATTAACCTCA + TGGTACGATG CGCACCCACT CTA---TACG ACTTCAATGA GCTGACCTCA + TGGTACGATG CGCACCCACT CTA---TACG GCTGCAATGA GCTGACCTCA + TGGTACGATG CGCACCCACT CTA---TACG GCTGCAATGA GCTGACCTCA + TGGTATGATG TGCGCCCAAT CTC---TGCG ACTGCCCTGA ACTAACCTCA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCT CTC---TGCG ATTGCACTGA ACTAATCTCA + TGGTATGATG TACGCCCACC TTC---TGCG ACGGCACTGA ACTGATCTCA + TGGTATGATG TGCGCCCACC CTC---TGCG AATGTACTGA ACTAACCTCG + TGGTATGAAG TGCGACCACC CTC---TGCA ACTGCGCTGA ACTAATGTCA + TGGTATGAAG CGCGACCGTC CTC---TGCG ACTACACTAA GCTAATTTCA + TGGTATGAAG TGCGACCACC CTC---TGCA ACTGCGCTGA ACTAATCTCA + TGGTATGAAG TGCGACCACC CTC---TGCA ACTGCGCTGA ACTAATCTCA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCT CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCT CTC---TGCG ATTGCACTGA ACTAATCTCA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCT CTC---TGCG ATTGCACTGA ACTAATCTCA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCACT CTC---TACG ACTTCAATGA GCTGACATCA + TGGTATAATG TGCGCCCACC TTC---TGCG ACGGCACTGA ACTGACCTCA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTATGAAG CGCGACCGTC CTC---TGCG ACTACACTAA GCTAATTTCA + TGGTATGAAG TGCGACCACC CTC---TGCG ACTGCACTGA GCTAACCTCA + TGGTATGATG GGCGCCCACT CTC---TGCG ACTGCACTGA ACTAATCTCA + TGGTATGAAG TGCGACCACC CTC---TGCA ACTGCGCTGA ACTAATGTCA + TGGTACGATG TGTGCCCAAT CTC---CGCG ATTGCTCTGA GCTGACCTCA + TGGTATGACG TTCGCCCGCC CTC---TGCG ACTGTACTGA ACTAACCCCA + TGGTATGACG TTCGCCCGCC CTC---TGCG ACTGTACTGA ACTAACATCG + TGGTATGACG TTCGCCCGCC CTC---TGCG ACTGTACTGA ACTAACATCG + TGGTATGACG TTCGCCCGCC CTC---TGCG ACTGTACTGA ACTAACATCG + TGGTATGATG TGCGCCCAAC CTC---TGCG ACTGTACTGA ACTAACCTCA + TGGTATGACG TTCGCCCGCC CTC---TGCG ACTGTACTGA ACTAACATCG + TGGTATGACG TTCGCCCGCC CTC---TGCG ACTGTACTGA ACTAACCCCA + TGGTATGACG TTCGCCCGCC CTC---TGCG ACTGTACTGA ACTAACCCCA + TGGTATGAAG TGCGACCACC CTC---TGCA ACTGCGCTGA ACTAATCTCA + TGGTATGAAG TGCGACCGCC CTC---TGCG ACTACACTGA GCTGACCTGA + TGGTACCATG CGCACCCACT CTT---CGCG ATTTCAATGA GCTGACCTCG + TGGTATGACG TTCGCCCGCC CTC---TGCG ACTGTACTGA ACTAACATCG + TGGTATGACG TGCGCCTACC CTC---TGCG ACTGTGCTGA ACTAACCTCG + TGGTATGATG TGCGCCCACC CTC---TGCG AATGTACTGA ATTAACCTCG + TGGTATGATG TGCGCCCACC CTC---TGCG AATGTACTGA ATTAACCTCG + TGGTATGATG TGCGCCCACC CTC---TGCG AATGTACTGA ATTAACCTCG + TGGTATGATG TGCGCCCACC CTC---TGCG AATGTACTGA ATTAACCTCG + TGGTATGATG TGCGCCCACC TTC---TGCG ACGGCACTGA ACTGACCTCA + TGGTATGATG TGCGCCCACC TTC---TGCG ACGGCACTGA ACTGACCTCA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTATGAAG TGCGACCACC CTC---TGCA ACTGCGCTGA ACTGACCTCA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTATGATG TGCGCCCACC TTC---TGCG ACGGCACTGA ACTGACCTCA + TGGTATGATG TGCGCCCACC TTC---TGCG ACGGCACTGA ACTGACCTCA + TGGTATGATG TGCGCCCACC CTC---TGCG ACTGTACTGA ACTAACCTCA + TGGTATGATG TGCGCCCACC CTC---TGCG ACTGTACTGA ACTAACCTCA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTATGATG TGCGCCCACT CTC---TGCG ACTGCACTGA GCTAACCTCA + TGGTATGATG TGCGCCCACC CTC---TGCG ACTGTACTGA ACTAACCTCA + TGGTATGATG TGCGCCCACC TTC---TGCG ACGGCACTGA ACTGACCTCA + TGGTATGATG TGCGCCCAAC CTC---TGCG ACTGTACTGA ACTAACCTCA + TGGTATGAAG TGCGACCGCC CTC---TGCG ACTGCACTGA GCTAACCTCA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCACCCGCT CTC---TGCG ATTGCACTGA ACTAATCTCT + TGGTACGATG CGCACCCGCC CTC---TGCG ATTGCACTGA ACTAATCATA + TGGTACGATG CGCGCCTCCT GTC---TGCG ACTGCTCTGA GCTGACCCCC + TGGTATGAAG TGCGACCGCC CTC---TGTG ACTACACTGA GCTGACCTGA + TGGTATGACG CTCGCCCGCC CTC---TGCG ACTGTACTGA ACTAACATCG + TGGTACGATG CGCACCCGCT CTC---TGCG ATTGCACTGA ACTAATCACA + TGGTATGAAG TGCGACCGCC CTC---TGCG ACTACACTGA GCTGACCTGA + TGGTATGATG TGCGCCCATC CTC---TGCG ACTGTACTGA ACTAACCTCG + + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ---------- ----T-CGC- CC-A---CA- AD--S-S-CB TTA---CACT + ---------- ---------- ------T-CG -GCG-GG--M AYD-S-CBS- + ---------- ---------T --T-TACGCC -A---S---- -CBS-CTG-G + --T-TACGCC -A---S--B- -D-A-TTC-- -GCTCTGTGC TGCGCGCTCA + -------T-- A-TGCGAC-G -C-SAT-VA- -CBS-TTT-- -GCGCGCTAC + A---TGCGCA TA---WA-K- R--CBTTC-- -GCACTGTCC TGCGCGCTCA + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCTCT + CCCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + CTTCAAGGAC AAATCACCAC CAAGGAGCTC GGCACGGTCA TGCGCTCCCT + -----AGGAC AAATCACCAC AAAAGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + AGCCAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCCAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCCAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + AACTAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + AACCAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + AACCAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTTA TGCGCTCCCT + AGCAAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCCAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + ACCAAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCAAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ATCTAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCCAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACTAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACTAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACTAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACTAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCAAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + ACCAAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCAAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + ACCAAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGTACCGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGTACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + ACCAAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + GCCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + CTTCAAGGAC AAATCACCAC CAAGGAGCTC GGCACGGTCA TGCGCTCCCT + ATCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACCGTTA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAACTC GGCACTGTCA TGCGCTCCCT + ACCGAAGGAC AAATCACCAC CAAGGAGCTC GGCACTGTCA TGCGCTCCCT + + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AACGAAGTCG + --T-C-A-G- GCTGA-C--A AACCA----- ---------- C--G------ + CC----TT-- -------TCA ---G-GCT-A -C--ATACCA ---------- + TT------T- --TGC---G- GCT-A-C--A ACCGA----- ---------- + -C--AACCGA ---------- -----C--G- ---------- C--------- + ---G-ACT-A TC--GACCGA ---------- -----C--G- ---------- + -C--AATCCA ---------- -----C--G- -------T-- C--------- + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AATGAGGTCG + CGGCCAGAAC CCCAGCGAAT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGATATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAAT CTGAGCTGCA GGACATGATC AACGAAGTGG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGATATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAAGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGTCAGAAC CCCAGCGAGT CCGAGCTGCA GGATATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + AGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGATATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGATATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AACGAAGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AATGAGGTCG + CGGTCAGAAC CCCAGCGAGT CGGAGCTGCA GGACATGATC AACGAAGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AATGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AATGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGATATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + AGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGATATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGTCAGAAC CCCAGCGAGT CGGAGCTGCA GGACATGATC AACGAAGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGATATGATC AATGAAGTCG + CGGTCAGAAC CCCAGCGAGT CCGAGCTGCA GGATATGATC AACGAAGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AATGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AATGAGGTCG + CGGTCAGAAC CCCAGCGAGT CCGAGCTGCA GGATATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGATATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTTCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTTCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTTCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTTCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CGGAGCTGCA GGATATGATT AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGATATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CGGAGCTGCA GGATATGATT AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGATATGATC AAGGAAGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCACAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + TGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT TTGAGCTGCA GGATATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AACGAGGTCG + CGGTCAGAAC CCCAGCGAGT CTGAGCTGCA GGATATGATC AATGAAGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGTCAGAAC CCCAGCGAGT CCGAGCTGCA GGATATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CTGAGCTGCA GGACATGATC AACGAGGTCG + CGGCCAGAAC CCTAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + CGGTCAGAAC CCCAGCGAGT CCGAGCTGCA GGATATGATC AACGAGGTCG + CGGCCAGAAC CCCAGCGAGT CCGAGCTGCA GGACATGATC AACGAGGTCG + + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT CACCATGATG + -----C---- ---------- -CA-AD--S- S-CB------ ---------- + -----C--G- ---------- C--------- ------MAYD -S-CBS-CC- + C--G------ -----C---- ---------- -S-----CBS -CTG-G---- + ------S--B --D-A-CACT ATCGATTTCC ----G--C-- ---------- + ---------- ------SAT- VA--CBS-CC -------T-- ------G--- + ------WA-K -R--CBCACT ATCGATTTTC ----G--C-- ---------- + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT CACTATGATG + ATGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAGTTCCT CACCATGATG + ACGCCGACAA CAACGGCACC ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGATAA CAATGGCACT ATCGATTTCC CGGAGTTCCT CACCATGATG + ATGCCGACAA TAACGGCACA ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTCC CCGAATTCCT CACCATGATG + ATGCCGACAA CAACGGCACG ATCGATTTCC CCGAGTTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACA ATCGATTTTC CCGAATTCCT TACCATGATG + ATGCCGACAA TAACGGCACA ATTGATTTCC CCGAATTCCT CACCATGATG + ATGCCGACAA TAACGGCACA ATCGATTTCC CCGAATTCCT CACCATGATG + ATGCCGACAA TAACGGCACA ATCGATTTCC CCGAATTCCT CACCATGATG + ATGCCGACAA TAACGGCACA ATCGATTTCC CTGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTTCT ---------- + ATGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTTCT CACCATGATG + ATGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTTCT CACC------ + ATGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT CACCATGATG + ATGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTTCT CACCATGATG + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT ---------- + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT ---------- + ACGCCGACAA CAACGGCACG ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT CACTATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT CACC------ + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT ---------- + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT ---------- + ACGCCGACAA CAACGGCACG ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCTGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT CACCATGATG + ATGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTTCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTCC CCGAATTCCT TACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGACAA CAATGGCACA ATCGATTTCC CCGAATTCCT CACC------ + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT ---------- + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT AACCATGATG + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT CACC------ + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTTCT CACCATGATG + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT CACC------ + ACGCCGACAA CAACGGCACA ATCGATTTCC CGGAATTCCT CA-------- + ACGCCGACAA CAACGGCACA ATCGATTTCC CGGAATTCCT CACC------ + ACGCCGACAA CAACGGCACT ATCGATTTCC CCGAATTCCT ---------- + ACGCCGACAA CAACGGCACG ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT CACC------ + ACGCCGACAA CAACGGCACG ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACT ATCGATTTCC CCGAATTCCT ---------- + ACGCCGACAA CAACGGCACT ATCGATTTCC CCGAATTCCT ---------- + ACGCCGACAA CAACGGCACT ATCGATTTCC CCGAATTCCT ---------- + ACGCCGACAA CAACGGCACT ATCGATTTCC CCGAATTCCT CACCATGATG + ATGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTTCT CACCATGATG + ATGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTTCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGATAA CAACGGCACA ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ATGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTTCT CACCATGATG + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTTCT CACCATGATG + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT ---------- + ACGCCGACAA CAACGGGACA ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACC------ + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACC------ + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACC------ + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACC------ + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACC------ + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACC------ + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAGTTCCT CACCATGATG + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT CACCATGATG + ATGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCC- ---------- + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTTCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTCC CCGAATTCCT TACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACC------ + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACC------ + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACC------ + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT ---------- + ACGCCGACAA CAACGGCACG ATCGATTTTC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACC ATCGATTTCC CCGAATTCCT ---------- + ACGCCGACAA CAACGGCACG ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT CACC------ + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT CACCATGATG + ACGCCGACAA CAACGGCACG ATTGATTTCC CCGAATTTCT CACCATGATG + ACGCCGACAA CAACGGCACA ATCGATTTCC CCGAATTCCT CACCATGATG + + G--------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + --G--C---- ---------- ---------G --C--T---- -C-----C-- + -----T---- ---------- G--C------ ---------- -------G-- + ---------- ----G--C-- ---------- ---------- -G--C--T-- + ---------- -G--C--T-- ---C--T--C -----A---- -------S-- + ---------- -T-------- ------C--- -----C---- -C-----G-- + ---------- -G--C--T-- T--C-----C -----G---- -C-----WA- + GCC------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GGCAGA---- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAG- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCAGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCCGCAAGA TG-------- ---------- ------TCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCAGCTCG + GCCA------ --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- ---------A ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCTGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAA--- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCTA AGGCCGCTCG + ---------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- ---------A ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- ---------A ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA T-AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCCGCAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCCGCAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCCGCAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCCGCAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCCGCAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCCGCAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCCGCAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCCGCAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCCGCAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCCGCAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCCGCAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- ---------A ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- ---------A ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA T-AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGA---- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GGCAGA---- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA T-AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- ---------A ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- ---------A ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAA--- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCTA AGGCCGCTCG + ---------- ---------A ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- ---------A ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- ---------A ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAAA---- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCA------ --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TG-------- ---------G CTCGCCTCCA AGGCCGCTCG + G--------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + G--------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- ---------A ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCGTCCA AGGCCGCTCG + ---------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTTGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCAGCTCG + ---------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGA---- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCAAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCC------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCC------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCC------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCC------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCC------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCC------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCC------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCC------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- ---------A ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + ---------- ---------A ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCGTCCA AGGCCGCTCG + ---------- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAAAGA TGAAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + GCCAGAA--- --AAGGCCCC ACGTAAGCAG CTCGCCTCCA AGGCCGCTCG + + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + ---A------ -----CA-AD --S-S-CB-C --A--T---- ---------- + C--------C -----C---- -G-------- ---MAYD-S- CBS-CC---- + ---C--T--C -----A---- -------S-- ---CBS-CTG -G-C--A--- + B--D-A-GCA -A-------- ---------- ---------- ---------- + ---------S AT-VA--CBS --C--A---- -G-------- ---------- + K-R--CBGCA -A-------- ---------- ---------- ---------- + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + TAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGATACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTATAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCC CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAATTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCCACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCCACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + CAAGTCCGCA CCATCTACCG GTGGTGTCAA GAAGCCTCAC CGCTACAAGC + + CAGGTAAGCA TCGAGTCG-C CTCGACTT-- CAC--ATCCA C-AACACCAT + ---------- -------A-- ---------- ---------- ---------- + C--A------ ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- --------S- -B--D-A--- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- --------WA -K-R--CB-- + CAGGTAAGCA TCGAGTTA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTACGCA TCGAGTC--C TTCGACTA-- CTCAGATTCA C-AGCACGTT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CTGGTAAGCA TACAGTCG-C GCCCACTTGA C----TTCTG C-AACATGAT + CAGGTAAGCT TCACATCGCA GCTCACTC-- C-C--ACTCA C-AACATTGC + CAGGTAAGCA TCGAGTCA-T TTCGACTT-- C-C--ATCCA C-AACACCAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACAAT + CAGGTAAGCA TCGAGTCG-C ATCCGCGT-- CAC--ACTCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C CTCGACTG-- C-C--ATCCA C-AACGCGAC + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACATGCC + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATGCA C-AACATGCT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-------AT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACATGCC + CTGGTAAGCA TGCAGTCA-C TTCGACTT-- C-C--ACCCA C-AACACGAT + CAGGTAAGCA TCGAGCCA-C CCCGACTT-- CTC--ATCCA C-AACGCCAT + CAGGTAAGCA TCGAGTCG-C CTCGACTT-- CCC--ACCCA C-AACACGAT + CAGGTAAGCA TCGAGTCG-C CTCGACTT-- CCC--ACCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATACA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTTA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCG-C CCCGACTT-- CAC--ATCCA C-AACACCAT + CAGGTAAGCA TTGAGTCA-C TTCGACTT-- C-C--ACCCA C-AACACGAT + CAGGTAAGCA TCGAGTCG-C CTCGACGT-- CAC--ATCCA C-AACACCAT + CAGGTAAGCA TCGAGTCA-C TTCGACTG-- C-C--ATCCA C-ACCACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTTA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGGGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGGGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTTA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCG-C CTCGACTT-- CTC--ATACA C-AACGCGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCC C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TTGAGTCA-C TTCGACTT-- C-C--ACCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-T--ATCCA C-AACACAAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACTAT + CAGGCAAGCA TCGAGTCG-C CTCGACTT-- CAC--ATCCA C-AACACCAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA CCGAGTCA-T TCCCACTG-- C-C--ATCCA C-AACACGAC + CAGGTAAGCA TCGAGTCA-C CTCGACTT-- C-C--ACCCA C-AACACGAT + CAGGTAAGCA TCGAGTGGCC CTCACCGT-- A-C--ATCGA C-AACACAAC + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ACCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ACCCA T-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- A-C--ACCCA C-AACACGAT + CAGGTAAGCG TCGAGTCA-C TTCGACTT-- C-C--ATCCA G-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGCCTT-- C----ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACCAT + CAGGTAAGCA TCGAGTCA-T TTCGACTG-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ACCCA C-AACACGAT + CAGGTAAGCA TCAAGGCA-C TTCGACAT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCG-C CTCGACTT-- CAC--ATCCA C-AACACCAT + CAGGTAAGCA TCGAGTCA-C TTCGACTG-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTG-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTGA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCC C-AACACGAT + CAGGTAAGCA TCGAGTCC-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCC C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCC C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTTGACTT-- C-C--GTCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCG C-AACACGAT + CAGGTAAGCA TCGAGTCA-C CTCGACTT-- C-C--ATCCG C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGGCA-C TTCGACTG-- G-C--ATCCA C-AACAAGAT + CAGGTAAGCA TCGAGTCA-C GTCGACGT-- C-C--ACCCT CAAACGCGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- G-C--ATCCA C-AGCACGAT + CAGGTAAGCA TCAAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAC + CAGGTAAGCA TCGTGTGA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA CCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA CCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTTA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTTA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAAGCA CCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CTGGTAAGCA TAGAGTCG-C GCCCACTTGA C----TTCCG C-AACATGAT + CAGGTAAGCA TCGAGTCG-C CTCGACTTAA C----ATCCA C-AACACGAC + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACCAT + CAGGTAAGCA TCGAGTCA-C TTCGACTT-- C-C--ATCCA C-AACACGAT + CAGGTAATCA CCGAGTCA-C TTCGACTT-- C-C--ATCCA C-GACACGAT + CAGGTAAGCA TCCAGTCA-C TTCGACTT-- C-C--ACCCA C-AACACGAT + + CTAAC-ATTC ATTCCC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + ----T----C A-AD--S-S- CB-------- ---------- ---------- + ---------- ---A------ ---------- MAYD-S-CBS -CC------- + ---------- ---------- ---S-----C BS-CTG-G-- ---------- + ---------- ---------- ---------- C--------- -------A-- + --SAT-VA-- CBS------- ---------- ---------- ------C--- + ---------- ---------- ---------- C--------- -------A-- + CTAAC-ATTC TTTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CATTTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC TCTCAC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATGC ---CTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ACCT CCTC----AG GTACCGTCGC TCTCCGTGAG ATCCGTCGTT + CTAAC-ATTC CATCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCCC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATGC TCTCAC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC TCTCTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-CCTC CATCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-CCTC CATCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-CCTC CATCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-CCTC CATCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATCC TCTCTCTTAG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATCC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATCC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGTT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC TTTCTC--AG GTACCGTCGC TCTTCGTGAG ATCCGTCGCT + CTAAC-ATTT CCTTCC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC TCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC ATTCCC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAACTATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTC ATTCCC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC TCTCTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTC TCTCTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTC TCTCTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTC TCTCTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTC TCTCTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTC TCTCTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTC TCTCTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTC TCTCTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTC TCTCTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTC TCTCTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTC TCTCTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATGC --TCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC TTTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTT CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTT CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC TTTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-AGTC CCTCCC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTG CATCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CATCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC ATTCCC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTT CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCCC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCCC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATAC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC GCTCCC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCCC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-GTTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATCC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-AGCC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCCC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCCC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC ATTCCC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTA CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTA CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAT-ATTC CCTCTC--AG GTACCGTCGC TCTGCGCGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-AATC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-GTTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCCC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGTT + CTAAC-ATTC CCTCCC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGTT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATGC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCCC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC TCTCTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTC TCTCTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC TTTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC TTTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC TCTCTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTC TCTCTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTC TCTCTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTC TCTCTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATGC ---CTC--AG GTACCGTCGC TCTCCGTGAG ATTCGTCGCT + CTAAC-ATTC CCTCCC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ACTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATAC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC CCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + CTAAC-ATTC TCTCTC--AG GTACCGTCGC TCTCCGTGAG ATCCGTCGCT + + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ----C----- ---------- -A--GAGC-A -CCTCG---T ----AAC-TC + ---------- ---G------ --------C- ---------- -----A--GA + ---------- ---------- C--------- -------A-- GAG--A-CTT + GAG--A-CTT CG---T---- ----TC---S --B--D-A-- -----CGAT- + ---------- ---A--GAG- -A-CTTCG-- -T-------- TC---SAT-V + GAG--A-CTT CG--ATGT-- ---TTC---W A-K-R--CB- -----CCAT- + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACTGAGCTC CTCATCCGCA AGCTCCCATT CCAGCGTCTC + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGTTGCCATT CCAGCGTCTT + ACCAAAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAACTC CTCATCCGCA AGCTCCCATT CCAGCGTCTC + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTCCCATT CCAGCGTCTG + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTACCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGTTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATTCGCA AGCTTCCGTT CCAGCGTCTG + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATTCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAATC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAATC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTT CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTT CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTT CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACTGAGCTC CTCATCCGCA AGCTCCCATT CCAGCGTCTC + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCGTT CCAGCGTCTT + ACCAGAAGTC GACCGAGCTC CTCATCCGCA AGCTGCCATT CCAGCGTCTT + + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + ---CA-AD-- S-S-CB---- ---CAT---- ---TTC---- TC-------- + C---TCTT-- ---TGA---- --T----MAY D-S-CBS-CC ---------C + CG---T---- ----TC---S -----CBS-C TG-G------ -GCT------ + -----C-TC- A--TC----- ---------- ---------- -------C-- + A--CBS---- ---CGAT--- ----TTC--- -TC------- ---------- + -----CGTC- ---TC----- ---------- ---------- -------C-- + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TTGCTCAAGA CTTCAAGTCC GACCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGCGAAA TCGCTCAAGA TTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCCCAAGA CTTCAAGTCC GATCTTCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAAA TCGCTCAAGA TTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTTGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCCCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAATCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGAT TCCAGAGCTC + GTTCGTGAGA TTGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA TTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAAA TCGCCCAAGA TTTCAAGTCC GACCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GACCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCGCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCCCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGTTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGTTC + GTTCGCGAAA TCGCTCAAGA TTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTCCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + GTTCGTGAGA TCGCTCAAGA CTTCAAGTCC GATCTCCGCT TCCAGAGCTC + + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + ---T------ ---------- ----C----- ---------- -------CA- + AA-------- TACT---TC- ---------- ---------- ---------- + C-TC-A--TC ---------- ---------- ---------- --C------- + ---------- ---------- S--B--D-A- ---------- --C------- + ---------- -----C---- ---------- --------SA T-VA--CBS- + ---------- ---------- WA-K-R--CB ---------- --C-C----- + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAGG AGTCCGTCGA GGCTTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCTTACCTC GTCTCCCTCT + TGCCATCGGC GCCCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCCCTTCAAG AGTCCGTCGA GGCTTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCTTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCGCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCCCTTCAAG AGTCCGTCGA GGCTTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCTTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCTTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCTGTCGA GGCTTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTGCAAG AGTCCGTCGA GGCCTATCTG GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCCCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCATACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AATCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AATCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATTGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCTTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCCCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTCCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCATACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + TGCCATCGGC GCTCTTCAAG AGTCCGTCGA GGCCTACCTC GTCTCCCTCT + + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + AD--S-S-CB ---------- --C------- ---------- --T------- + -C-------- ---------- ----MAYD-S -CBS-CC--- --T------- + ---------- -----S---- -CBS-CTG-G ---------- --C----C-- + ---------- --T------- ---------- ---------- ---------- + ---------- --C------- ---------- --T------- ---------- + ---------C --T-----A- -------G-- T--------- ---------- + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAAGACAC CAACCTCTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTTTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TTGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TTGAGGACAC CAACCTGTGC GCCATCCACG CCAATACCGA GTGAGGGCCT + TCGAAGACAC CAACCTGTGC GCTATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAA------ ---------- + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAAGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAAGACAC CAACCTTTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAAGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGG GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCCATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAATCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TTGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATTCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAAGACAC CAACCTGTGC GCCATCCACG CCAA------ ---------- + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGATAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAAGACAC CAACTTGTGC GCAATCCACG CCAATACTGA GTGAGGGCCT + TTGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TTGAGGACAC CAACCTGTGC GCCATCCACG CTAATACTGA GTGAGGGCCT + TTGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAA------ ---------- + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTCTGC GCCATTCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TTGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAA------ ---------- + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAA------ ---------- + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCATG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAA------ ---------- + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGT GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAAGATAC CAACCTCTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAAGATAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TTGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + TCGAAGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGC-T + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGC-- + TCGAGGACAC CAACCTGTGC GCCATCCACG CCAATACTGA GTGAGGGCCT + + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + ---------- ---------- ---------- ---------- -CA-AD--S- + --C----T-- ---------G --------A- ----C----- ---------- + ---------- --T------- ---------- ---------- ---------- + ---------- -S--B--D-A ---------- ---G------ ---------- + ---------- --C------- ---------- -SAT-VA--C BS-------- + ---------- -WA-K-R--C B--------- ---C--T--- ---------- + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTCGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + ----GCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + CCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACCACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCTCCG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + ----GCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + ----GCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + ----GCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + ----GCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + ----GCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + TCACGCC-CG ACCTCCAACC CTTTGTGAAC CAAACTTGTT GCTTCGGGGG + TCGGGCT-CG ACCTCCAACC CTTTGTGAAC ACAACTTGTT GCTTCGGGGG + + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + S-CB------ ---------T ---------- -------A-- ---------- + --C--G---- ---------- -MAYD-S-CB S-CC------ ---T-----C + ---------- -S-----CBS -CTG-G---- ---------- ---------- + ---------- ---------- ---------- -------G-- ---------- + --------T- ---------- ------T--- ---------- ---------- + ----T----- ---------- -----A---- -------G-- ---------- + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG ACCGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GG----TTCG A-CGGCGAGC GCTCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG G-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTTCAAAC + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTTCCAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-TAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + CGACCCTGCC GGCGAACTCG T-CGCCGGGC GCCCCCGGAG GTCTTCTGAA + CGACCCTGCC GT----TTCG A-CGGCGAGC GCCCCCGGAG GCCTT-CAAA + + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + --------A- ---------- G--------- -----CA-AD --S-S-CB-- + --T------- ---------- ---------- --G--T---- -A-----A-- + ---------- ---------- ---------- ---------- --G------- + --S--B--D- A--------- ---------- ---------- ---------- + ---------G ---------- ----SAT-VA --CBS----- ---------- + --WA-K-R-- CB-------- ---------- ---------- ---------- + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCC TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CCCTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTGAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CCCTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CCCTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTTAAAACA AATTAAACAA AACTTTCAAC + CACTGCATCT TTGCGTCGGA GTTT-AAGTA AATTAAACAA AACTTTCAAC + + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + ---------- ---------- ---------- ---------- ---------- + -T-G------ --------MA YD-S-CBS-C C--------- ---------- + -------S-- ---CBS-CTG -G-------- ---------- ---------- + ---------- ---------- ---------- ---------- ---S--B--D + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---WA-K-R- + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + AACGGATCTC TTGGTTCTGG CATCGATGAA GAACGCAGCG AAATGCGATA + + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + ---------- ---------- ---------C A-AD--S-S- CB-------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + -A-------- ---------- ---------- ---------- ---------- + ---------- -------SAT -VA--CBS-- ---------- ---------- + -CB------- ---------- ---------- ---------- ---------- + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + AGTAATGTGA ATTGCAGAAT TCAGTGAATC ATCGAATCTT TGAACGCACA + + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + ---------- ---------- ---------- ---------- ---------- + ---------- -----MAYD- S-CBS-CC-- --------C- ---------- + ---S-----C BS-CTG-G-- ---------- ---------- ---------- + ---------- ---------- ---------- ----S--B-- D-A------- + ---------- ------C--- ---------- ---------- ---------- + ---------- ---------- ---------- ----WA-K-R --CB------ + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCTT TGGTATTCCG AAGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCTC TGGTATTCCG GAGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCTT TGGTATTCCG AAGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCTT TGGTATTCCG AAGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCTT TGGTATTCCG AAGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCTT TGGTATTCCG AAGGGCATGC CTGTTCGAGC GTCATTTCAC + TTGCGCCCCT TGGTATTCCG AGGGGCATGC CTGTTCGAGC GTCATTTCAC + + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + ---------- ---------- ---CA-AD-- S-S-CB---- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------S + ---------- ---------- ---------- ---------- ---------- + --C------- SAT-VA--CB S--------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TAGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TGGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TAGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCGA + CACTCAAGCC TAGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTC TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TAGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + CACTCAAGCC TGGCTTGGTA TTGGGCGTCG CGGTGTTCCG CGCGCCTTAA + CACTCAAGCC TCGCTTGGTA TTGGGCGCCG CGGTGTTCCG CGCGCCTCAA + + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + ---------- ---------- ---------- ---------- ---------- + ---------- --MAYD-S-C BS-CC----- ---------- ---------- + -----CBS-C TG-G------ ---------- ---------- ---------- + ---------- ---------- -----S--B- -D-A------ ---------- + ---------- ---------- ---------- ---------- ---------G + ---------- ---------- -----WA-K- R--CB----- ---------- + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCCAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCCAAG CGCTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGGGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCCAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGACT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGCCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + AGTCTTCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGGAT TTTTCAATTC + AGTC-TCCGG CTGAGCTGTC CGTCTCTAAG CGTTGTGATT TCATTAA-TC + + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + ---------- -------CA- AD--S-S-CB ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------- ---------- ---------- ---------- -----S---- + ---------- ---------- ---CT----- -----AG--- ---------- + ---SAT-VA- -CBS------ -------A-- ---------- ---------- + ---------- ---------- ----T----- -----A---- ---------- + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGT GCGGGTGGCC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTTGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TT ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ATAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + GCTTCGGAGT GCGGGTGGCC GCGGCCGTTA AATCTTTATT CAAAGGGAGA + GCTTCGGAGC GCGGGCGGTC GCGGCCGTTA AATCTT--TC ACAAGGGAGA + + AGGTGAGCAA CTGCCAGCAC ACAAATGCA- -T-TTTGTCG CACAAA---- + ---------- --------CT ---------- AG-------- ---------- + ---------M AYD-S-CBS- CC-------- ---------- ---------- + -CBS-CTG-G ---------- ---------- ---------- --------CT + ---------- ------S--B --D-A----- -------C-- ---------- + ---CT----- -----AG--- ---------- ---------- ------SAT- + ---------- ------WA-K -R--CB---- -------A-- ---------- + AGGTGAGCAA CTACCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC GCGAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAC CCGCCAAAGC GCGCCTGCA- -C-TTCGTCG CACAAA---- + AGGTTAGTCG CCGCCATCAC ACACACGCAC AC-TCCATCG CACGAA---- + AGGTGAGCAA CTGCCAGCAC ACAAATGCA- -T-TTTGTCG CACAAA---- + AGGTGAGCAA CT-CCCGCAC GCGAACGCA- -C-TTCGGCG CACAAA---- + AGGTGAGCAA CTGTCAGCAC GCGAACGCA- -C-TTCGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -C-CGTTTCG CACAAA---- + AGGTGAGCAA CTGCCCGCAC ACGAATGCA- -C-TTTGCCG CACAAA---- + AGGTGAGCAA CTGCCCGCAC ACGAATGCA- -C-TTTGCCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -C-TTTGCCG CACAAA---- + AGGTGAGCAA CTGCCCGCAC ACGAATGCA- -C-TTTGCCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -C-CTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -C-CTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -C-CTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -CTTTTATCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGGCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAAATGCA- -T-TTTGTCG CACAAA---- + AGGTAAGCAA CTGCCAGCAC ACAAATGCA- -T-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ATAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAAATGCG- -T-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAAATGCA- -T-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATCGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -C-CTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAAATGCA- -T-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCGA GTGCCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAAATGCA- -T-TTTGTCG CACAAA---- + AGGTGAGCAA -CGCCATCCC ACAGACGCAC TC-TCCATCG CACTAA--G- + AGGTGAGCAA CTGCCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCTAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA ---------- ---CACGCAC CC-TCGATCG TATGAA---- + AGGTGAGCAA CTGCCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAG TTGCCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTACCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -C-CGTTTCG CACAAA---- + AGGTGAGCAA CTGCTAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTACCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTACCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTACCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTACCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGGCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAAATGCA- -C-TTTGGCG CACAA----- + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAG TTGCCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAATTGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAC CCGCCAGAGC GCGCCTGCA- -C-TTCGTCG CACAAA---- + AGGTGAGCCA CTGCCAGCAC ACGAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTGAGCAA CTGCCAGCAC ACAAATGCA- -C-TTTGTCG CACAAA---- + AGGTAAGATA TCGCCATCAC TCTCTGGCGG -C---CGCCG CTCGACTAAA + AGGTGAGCAA CTGCCAGCAC ACGAATGCA- -C-TTTGTCG CACAAA---- + + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------- -CA-AD--S- S-CB------ -----C---- ---------- + ---T------ ----T----- -----A---- ---------- ---------- + ---------- AG-------- ---------- ---------- -S-----CBS + ---------- ---------- ---------- ---------- ---------- + VA--CBS--- ---------C ---------- ---------- ---------- + ---------- ---------- ---------- ---------- ---------- + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCTGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCTCCGCT TATCGCCTTC GCGCTGGTGC CCC---TCCA + ---------T TTTCGCTGCT TATCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGCTGC CCC---TCCA + ---------T TTTCGCCGCT TATCGCCTTT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTTT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTTT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTTT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTTT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC MAYD-S-CBS + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTTGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCCGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC -TC--GTCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---GCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---GCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T CTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT CGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + -----A-T-T T--C-CTTGT TATCGACT-- ---------- --------C- + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + C----T---T GTTCGCTACT TAT--C-TTT GCGCTGGAGC ---------- + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCC + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT CGTCGCCTCT GCGCTGCTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC S-----CBS- + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC -----CTCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------- ---------- ---------- ---------- ---------- + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGTCGCT CGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCTCCGCT TATCGCCTTC GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC -TC--ATCCA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + ACACCATCTT TTTCGCT-CT TATCATCGTT GCGCTGGCGA CGAGGGGCAA + ---------T TTTCGCCGCT TGTCGCCTCT GCGCTGGTGC CCC---TCCA + + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + ---------- ---------- ---------- ---------- ---------- + ------MAYD -S-CBS-CC- ---------- ---A------ ---------- + -CTG-G---- -------C-- ---------- ---------- ---------- + --T----S-- B--D-A---- ---------- ---------- ------C--- + ---------- ---------- ---------- ----T----S AT-VA--CBS + --T----WA- K-R--CB--- ---------- ---------- ------C--- + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCATCGC + GAT-----TT GGTGGGGTGC GAG----AAA TTCGGCGCTT GGG------- + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----GT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTTCGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTTCGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGTGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + -CC-----T- --TGGGATAA CTGGC--AAA TTCGGCGCT- -----T---- + AAG-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + CTT-----TT ---GGGGAGA -----C-AAA TTCGGCGCTT ------T--- + AAA-----TT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAT-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAGAATTAAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + ---G------ -C-------- --G-----AA TTAGA----- CGG------- + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGATT TGGGCTCTGC + ---------- C----G---- ---------- ----GCG--- GGG-A-A--- + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGATT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + CTG-G----- -GTGGAGA-- ---C---AAA TTCGGCGC-- --G-T----- + AAT-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAT-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAT-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + GAT-----TT ----GGTTGC GAG----TAA GCTCTGCCGC -TC--G---- + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + ---------- ---------- -------AAT TTCGGCGCTT TGGGCTCTGC + AAT-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCATCGC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + CTT-----TT ---GGGGAGA -----C-AAA TTCGGCGCTT ------T--- + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + AAT-----TT GGTGGGGTGC GAG----AAT TTCGACTC-- --GGCTCCAC + AAA-----CT GGTGGGGTGC GAG----AAT TTCGGCGCTT TGGGCTCTGC + + CGC-TTGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + T----CA-AD --S-S-CB-- ---------- ---------- -------C-- + ---------- ---------T ---------- ---------- --------T- + ---------- ---------- ---------- --T----S-- ---CBS-CTG + --C--T---- ---------- ---------- ---------- -G--CAA-S- + ---------- ---------- ---------- C-----C--T ---------- + --C--T---- ---------- ---------- ---------- -G--CG--WA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCGATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TTGCAA TGACTTCAT- ---------- ---------- -CGGCTATGA + ----CAGCCA TGATCTCAT- ---------- ---------- -CCGCGATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + GGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCCATGA + CGC-TTCCAA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CGGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + ----CATCCG CTTTGCGGC- TC--G----C T--------- ------A-G- + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTGTGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCGATGA + ----CAGTCA GCTCTGCCGC -TT--G---- CT-----CA- AD--S-S-CB + CGC-TTGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TTGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TTGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- A-A-----C- ---------- + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TTGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- -------A-- ---------- ---------A + CGC-TTGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + ----C----A TTATCTCAT- --T--GCTCT GCCGC-TC-- G----CT--- + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGCTTCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + --C-CAGCCA TGATCTC--- ---T------ ----T--GCT CTGCCGCTTC + CGCTTCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGCTTCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGCTTCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGCTTCGCGA TGACTTCAT- ------CCGC TATGACTTCA TCCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- WA-K-R--CB ---------- ---------- + CGCTTCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + ----CTGCGC TCTGCCGC-T C--G----CT ---------- -----A---- + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CT--CA-CCA ---TCTCAT- -----C---- ---------- ------T--- + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGTGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- SAT-VA--CB S--------- ---------- + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + CGC-TCGCGA TGACTTCAT- MAYD-S-CBS -CC------- ---------- + CGC-TTGCAA TGACTTCAT- ---------- ---------- -CGGCTATGA + CGC-TCGCGA TGACTTCAT- S-----CBS- CTG-G----- ---------- + ----CAGTCA GCTCTGCCGC TTC--G---- CT-------- ---------- + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + AGC----CAA TGACTTGATC TCAAGCCCAG TATCCATTCC GCCTCCATCA + CGC-TCGCGA TGACTTCAT- ---------- ---------- -CCGCTATGA + + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + ---C--T--- ---------- ---------- ---------- --G--CAA-C + ---MAYD-S- CBS-CC---- ---------- ---------- --------C- + -G-------- ---------- ---------- -C-----C-- T--------- + -B--D-A--T ----G----- GA-T------ --TTGC---- -A-------- + ---------- ---------- -----G--CA A-SAT-VA-- CBS--T---- + -K-R--CBAT ----G---G- -A--A----G -CTTG----- -A-------- + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGGA-TT-- -GGGCATACC GC---CTGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCCCA CCACT-TA-- -GAGCATATC GC---CAGCA + CTCTTCCT-C CTACCGCCAG ACGC--TTGA GGGGTACACC GATAGCAGCG + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCCCA ACGTGTTT-- -GGGCATTCC GC---CGGCG + TTCCTCG--C CCACCGCCCA ACGCA-TC-- -GGGGACACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--T CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--T CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--T CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--T CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + ----C----- ---------- -----T---- ------T--G CTCTGCCGC- + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -TGGCATACC GT---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + ---------- ---------- -CGC-TGTGA GGGCTAG--C GCTAGCATCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCCCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGGA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--A CCACCGTTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + ---------T ---------- T--GCTCTGC GGC-TC--G- ---CT----- + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCTTCCT-C CCACG--CAC ACGC-TCAGA GGA-TA---C GACATCAGCC + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + ---------- ---------- ---C--TTGA GGGGTCCAG- -ATCGCAGCT + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + --G----CT- ---------- ---------- -----ACACT GATAGCACCG + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCC-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ATGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + -ACTTTCT-C CTACCGG--G ACGC--TTCA GGGGA-CC-- -CTACCTGCG + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + ---C------ ---------- ----T----- -----T--GC TCTGCCGC-T + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + -------T-- GCTCTGCCGC -TC--G---- CT-------- ---------- + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + ---TTCCT-C CTACCCCCG- -CGC--TTGT CAGGTAA-CC ---AGCCGTG + CTCTTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TC-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATGCC GC---CAGCA + CTCCTCG--C CCACCACTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + -------T-C CTACCGCCAC GCG---TCGA GGGGCACACT A-TA---GCG + CTCCTCG--C CCACCGCCCA CCACT-TA-- -GAGCATATC GC---CAGCA + ------CT-C TTACCGCCCG AG--T-CTGA GGTCAACACA -AT---AGCC + ---------C CTTCCGCCAG CCGG--TTCA GGGGTCAACC GA-AG---CG + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + CTTCTGGCAC CGACAGCGAT GTTCATTT-- -GTGAACAAT GG---AAGC- + CTCCTCG--C CCACCGCTCA ACGCA-TT-- -GGGCATACC GC---CAGCA + + CACCACACAT CTG---CACG ACCACAC--- TT-CGCATCA AGAATAGAC- + A-AD--S-S- CB-T----G- ----GA-T-- -----CTTG- -----A---- + ----C--T-- ---------- ---------- ---------- ---G--CAA- + ---------- ---------- ------G--C AA-S-----C BS-CTG-G-T + ---------- ----C----G ------T--- ---------S --B--D-A-- + G-----GA-T --------TT G------A-- ---------- ---------- + ---------- ----C----G ------T--- ----A----W A-K-R--CB- + CACCACATAT CTC---CACG ACCACAC--- TT-CGCATCA AGAATAGAC- + CACCACTCAC TCGCACCACG ACCACAC--- AT-GACGTCA AGAACAGAC- + CACCACACTC TTG---CACG ACCACAC--- AT-CACATCG AGAATAGAC- + CACAACACA- ------TACG TCCAGACATG ATTCACGTCG AAGATGGAC- + AACCACACTT CG----CACA ACATCTC--- CTTCGTATGC AAGGCATGCC + ---CACGCTC AGA----TC- ACCTCCA-GA ATTGAT--GG AAGGCATGCG + -TC-AACCTC AGGAATCGAA --AGCTC--G CTTGCTTTGC AAGGCATGCA + CACAACACAT ACA---CCCA CACACGC--- TT-CACAACA CGGATAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + SAT-VA--CB S-C---CACA ACATG----- CTTCCAAAG- --TG-AACCC + CACCACACAC TTACACCACG AGCACAC--- AT-CACATCA AGAATACAC- + CACCACATAC TTGCACCACG ACCACAC--- AT-CACATCA AGAATAGAC- + CACCACATAC TTGCACCACG ACCACAC--- AT-CACATCA AGAATGGAC- + CACCACATAC TTGCACCACG ACCACAC--- AT-CACATCA AGAATAGAC- + CACCACACAT CGG---CACG ACCACAC--- TT-CGCATCA AGAATAGAC- + CAACACACAT CTG---CACG ACCACAC--- AT-CACATCA AGAATAGAC- + TC--G----C T--------- ---------- ---------- -TG------C + ---CACACTC AGA----TC- AACTCCA-GA ATTGAA--GG AAGGCGTGCG + CACCATACAC TTG---CACG ACCACAG--- AT-CACATCA AGAATAGAC- + S--B--D-A- CG----CAAC TTGCACC--G CTCAGAA--- AA-GACTGCA + ---CACGCTC AGA----TC- ACCTCCA-GA ATTGAT--GG AAGGCATGCG + CACCACACAT CTG---CACG ACCACAC--- TT-CACATCA AGAATAGAC- + CACCACACAT TTG---CACA ACCACAC--- TT-CACATCG AGAATAGAC- + ----A----- ----C-T--- -C-------A ---------- ---------- + CACCACACAT CTG---CACG ACCACAC--- TT-CGCATCA AGAATAGAC- + CACCACACAC TTG---CACG ACCACAC--- AT-CACATCA AGAACAGAT- + CACCACACAT CTG---CACG ACCACAC--- TT-CGCATCA AGAATAGAC- + CACCACACAT CTG---CACG ACCACAC--- TT-CGCATCA AGAATAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + ---------- ---------- -----T---- ---C--G--- -C-----TCA + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + TA-CACACCA CGAAT-GAC- -CGTCTC-G- CTGCTTATGC AAGGCATGAC + AACGACCATA C---A--AAC ACCACGA-T- GAT--TATGC AAGGCAAGTT + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + ---------- ---------- -----T---- ---C--G--- -C-----TCA + ---TA-AACT CGA-GAAACG AC--CGC--- CGTCGGCTGC AAGGCATGCC + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAT CTG---CACG ACCACAC--- TT-CGCATCA AGAATAGAC- + CACCACACTT CTG---CACA ACCACAC--- TT-CACATCA AGAATAGAC- + -------ACA -AD--S-S-C BCAACTC--A CTTG---TGC GAGCAAAG-- + CACCACACAT CTG---CACG ACCACAC--- TT-CGCATCA AGAATAGAC- + CA----A--- ------C-T- ---C------ -ACCTTATGC AAGTG---CC + CACCACACTC TTG---CACG ACCACAC--- AT-CACATCG AGAATAGAC- + CACCCCACAC TTG---CACG ACCACAC--- TT-CACATCG AGAGAAGAC- + G----C---- -TCA----A- --------C- T----C---- ---ACATGCC + CACCACACAC TTG---CACG ACCACAC--- TT-CACATCG AGAATAGAC- + CACCACACAT CTG---CACA ACCACAC--- TT-CACATCA AGAATAGAC- + CACCACACAC TTG---CACG ACCACAC--- TT-CACATCG AGAATAGAC- + CACCACACAC TTG---CACA ACCACAC--- TT-CACATCG AGAATAGAC- + CACCACACAC TTG---CACG ACCACAC--- TT-CACATCG AGAATAGAC- + CACCACATAT CTC---CACG ACCACAC--- TT-CGCATCA AGAATAGAC- + CACCACACAT CTG---CACA ACCACAC--- TT-CACATCA AGAATAGAT- + -C-----A-A ---------- ---------- -----T---- ---C--G--- + CACCCCACAC TTG---CACG ACCACAC--- TT-CACATCG AGAGAAGAC- + CACCACACAT CTG---CACA ACCACAC--- TT-CACATCA AGAATAGAC- + CACCACATAT CTC---CACG ACCACAC--- TT-CGCATCA AGAATAGAC- + CACCACATAT CTC---CACG ACCACAC--- TT-CGCATCA AGAATAGAC- + ---------- ---------- ---------- ---------- ---------- + S-----CBS- CTG-GCCACA ACTACTTGCA CTTCGTACAC AA---AT-CA + C--G----CT ---------- ---------- ---------- T-------C- + CACCACACAT TTG---CACA ACCACAC--- TT-CACATCA AGAATAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAT CTG---CACG ACCACAC--- TT-CGCATCA AGAATAGAC- + CACCACACAT TTG---CACA ACCACAC--- TT-CACACCA AGAATAGAC- + -AC-AACCTC AGGAATCGAA --ATCTC--G CTTACTTTGC AAGGCATGCA + ---------- --T------- C--G----C- ----TCA--- -A-------- + CACCACACAT CTG---CACA ACCACAC--- TT-CACATCA AGAATAGAC- + CACCACACAT CTG---CACA ACCACAC--- TT-CACATCA AGAATAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + -AACACCATG AA--GAC--A GCATCTG--- GCTCGTATGC AAGGCAAGCC + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + ---CACACTC AGA----TC- AACTCCA-GA ATTGAT--GG AAGGCGTGCG + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + ---CACACTC AGA----TC- AACTCCA-GA ATTGAT--GG AAGGCATGCG + -TC-ACCCTC AGGAATCGAA --AGCTC--- CTTGCTATGC AAGGCATGCA + CACCACACAT CTG---CACA ACCACAC--- TT-CACATCA AGAATAGAC- + CACCACACCG TTG---CACG ACCACAC--- TT-CACGTCA AGAATAGTC- + CACCACACAT TTG---CACA ACCACAC--- TT-CACACCA AGAATAGAC- + ---C------ -A-------- ---------- -------T-- -----C--G- + ---CACACTC AGA----TC- AACTCCA-GA ATTGAT--GG AAGGCGTGCG + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + ---CACGCTC AGA----AC- AACTCCA-GA ATTGAT--GG AAGGCGTGCG + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + -TC-AACCTC AGGAA-CGAA --AGCTC--G CTTGCTATGC AAGGCATGCA + --CGACACTT CG-GCTTACA ATATCTC--- CCTCGTATGA AATG---GCC + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + WA-K-R--CB CG-A--CAAA GCACA-C--- CTTAGAA--- TA-TCCTCCA + CACCACACAA CTG---CACA ACCACAC--- TT-CACATCA AGAACAGAC- + CACCACACAT CTG---CACG ACCACAC--- TT-CACATCA AGAATAGAC- + C-T----C-- -----A---- ---------- ---------- -T-------C + AACGACCATA C---T--ACC ACCACGAAT- GAT--TGTGC AAGGCAGCTC + -T----C--- A---A----- ---------- ---------- T-------C- + -C-T----C- ------ACCA ACATCACTTG ---CGTCTGC AAAG---TC- + CACCACACAT CTG---CACA ACCACAC--- TT-CACATCA CGAATAGAC- + MAYD-S-CBS -CC---CACA ACATA----- --TGGTCTCA AAGGCCTGCC + CACCACACAT CTG---CACG ACCACAC--- TT-CGCATCA AGAATAGAC- + + -TGCTGACAA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + ---------- --------C- ---G------ ---------- ---CA + MAYD-S-CBS -CC----T-- --G---G-GA ---------- T-G-- + ---CG----- GA-T------ -CTTGC---- -A-------- ----- + ---------A -A-----C-- ---------- --------T- ----- + C----G---- ---------- -----SAT-V A--CBS---- ----- + ---------A -A-----C-- ---------- --------T- ----- + -TGCTGACAA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG -T-G-----C T--CC + -TGCTGACGA TGGTTTCCCA CAGGAAGCAG -T-G-----C T--CC + -TGCTGACAA TGGCTTCTCA TAGGAAGCTG -T-G-----C ---CC + TTACTGACAA TGTTGCCTCA CAGGAAGCTG ---G-----C ---CC + CTACTGACAA TGTTGACCCA CAGGAAGCTG ------G--C A-A-- + TCACAGACAA --CG---TCA CATCAAACCC TTCATACG-C G-T-A + -TGCTGACGA TGGCTTCTCA CAGGAAGCTG -T-G-----C ---CC + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -TCTAT-TGA A--GT + CAAGAATCGA T--TGCCTCA GAGGGCTCTG ---------- A-CCC + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG ---G-----G ---GC + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG ---G-----G ---GC + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG ---GT----C ---CC + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG ---G-----G ---AC + -TGCTGACAA CGGCTTCTCA CAGGAAGCAG -T-G-----C T--CC + -TACTGACGA TGACTTCTCA CAGGAAGCTG -T-G-----C ----- + --G----C-- ---TCA---- A--------- C-T----C-- ---A- + CTACTGACAA TGTTGACCCA CAGGTAGCTG ------G--C A-A-- + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG T--GAC--CA -GAAC + CGAATGGAA- -GGTGCCTGA CAGCTTGCTG --------A- C---- + CTACTGACAA TGTTGACCCA CAGGAAACTG ---------C A-A-- + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG --TG-----C ---CC + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG -T-G-----C T--CC + -----T---- ---C--G--- -C-----TCA ----A----- ----C + -TGCTGACAA TGGCTCCTCA CAGGAAGCAG -T-G-----C ----- + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG ---G-----G ---GC + -TGCTGACAA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + -TGCTGACAA TGGCTCCTCA CAGGAAGCAG ------T--C ---CC + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG ---G-----G ---GC + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG ---G-----G ---CC + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG ---G--G--C A-AC- + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -A---AC--T G-GAA + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -A-A-AC--C ACGAA + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -A---AC--C A-GAA + ----A----- ----C-TG-- -C-------A C-------AA -TG-- + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C ----- + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C ----- + CTACTGACAA TTG---CTCA CACAAAG--- T--GAC--CA -GAAC + TTACTGACAA TGAACCCTCA CAAAATG--- ------CA-A ----T + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG ---------- ---GC + ----A----- ----C-T--- -C-------A C-A-----A- -TG-- + TAACTGACAA TATTTG---A CAGGACACAG ---T---ACC CCACG + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG ------T--C ---CC + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C ---CC + -TGCTGACAA TGGCTCCTCA CAGGAAGCAG ---G-----G ---GC + -TGCTAACAA TGGCTTCTCA CAGGAAGCAG -T-G-----C T--CC + -AA-TACCAC AGGAATCGAA --GGAAGCTG ---ACTT--- ----- + -TGCTGACAA TGGCCCCTCA CAGGAAGCAG -T---AC--C A-GAA + TGACCAAAA- --TT-CCCCA CAGGAATCGA ----G----- ----G + -TGCTGACAA TGGCTTCTCA CAGGAAGCAG -T-G-----C T--CC + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG CA-AD--S-S -CBCT + TTACTTG--- TGTGGCCACA C---TA-CAC --CG-GAATC GA--- + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG -T-G-----C T--CC + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG -T---AC--C A-GAA + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG ---G---CAC A---- + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG -T-G-----C T--CC + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG ---G-----G ---GC + -TGCTGACAA TGGCTCCTCA CAGGAAGCAG -T-G-----C ----- + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG -T-G-----C T--CC + -C-----TCA ----A--C-- ----C-T--- -C-----A-A C--CC + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG ---G-----G ---GC + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + -TGCTGACAA TGGCTCCTCA CAGGAAGCAG -T-G-----C ----- + -TGCTGACAA TGGCTCCTCA CAGGAAGCAG C------TA- -TC-- + ---------- ---------- ---------- ---------- ----- + CTACAGGAAA TGAT--CGCA CAGGAAGGCT T--------- ---A- + -G----C--- --TCA----A ---------C -T----C--- ----A + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG ---G-----G ---GC + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG WA-K-R--CB ---CC + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG --GG----GC --GCT + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG -T-G-----C T--CC + TCACTGACAA TTTG---TCA CAGCAAAC-- -T---AC-CC A-GAA + -C-T----C- ------ACCA CAGGAAGTTG ---------C A-A-- + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG -T-G-----C ----- + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG -T---AC--C A-GAA + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + TTACTGAAAT G---GCCTCA CAGAA---TG --AC--CA-G AA-CG + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + CTTCTGACAA TGTTGACCCA CAGGAAACTG ---G-----C A-AC- + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + CTACTGACAA TGTTGACCCA CAGGAAACTG ---G-----C A-AC- + TCACTGACAA AGTG---TCA CAGCAAAC-- -T---AC--C A-GAA + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG -T-G-----C T--CC + -TACTGACGA TGGCTTCTCA CAGGAAGCAG ---G-----C A-AC- + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG -T-G-----C T--CC + ---C-----T CA----A--- ------C-T- ---C------ -AC-- + CTTCTGACAA TGTTGACCCA CAGGAAGCTG ---------C A-A-- + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG --CG--A--G ----C + CTTCTGACAA TGTTGAACCA CAGGAAACTG ---G-----C A-AC- + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + TCACTGACAA AGTG---TCA CAGCAAAC-- -T---AC--C A-GAA + TTACAGAC-- -TT-GACTCC AAGAAAGGAG ---G-----G ---GC + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + TGACTGGAT- -GGTGCCTGA CAGCTAGCTG ---------- C--CC + -TGCTGACGA TGGCTCCTCA CAGGAAGCAG -T-G-----C T--CC + -TGCTGACGA TGGCTTCTCA CAGGAAGCAG SAT-VA--CB S--TC + --G----C-- ---TCA---- A--------- C-TG---C-- ----- + TTACTGACAA TGATCCCACA CAA------- T--GT-CAGA -ATGA + -G----C--- --TCA----A ---------C -T--T-C--- ----A + TACCTCGCGA ATTGAC--CG CAGGAGGCTG CTT------- ----- + -TGCTGACGA TGGTTTCTCA CAGGAAGCAG --GG----GC --GCT + ATGCCGAGAA TGGAG--TGA CAGGCAGCCA CT-------- ----- + -TGCTGACAA TGGCTCCTCA CAGGAAGCAG --GG----GC --GCT diff --git a/tests/data/DNA/5.reduced.phy b/tests/data/DNA/5.reduced.phy new file mode 100644 index 0000000..a1a1ae5 --- /dev/null +++ b/tests/data/DNA/5.reduced.phy @@ -0,0 +1,22 @@ + 1 522 +TAXON0 TTTTCAAAGC AATGTGTGAC AGGTACAGGG ACAAATCCCG TTAATAAGTA + + AGAGGATTTG TGCTTGGCTC TGTCACATGC CACTTTGAAA A--------- + + ---------- ---------- ---------- ---------- ---------- + + ---------- ---------- ---------- ---------- ---------- + + ---------- ---------- ---------- ---------- ---------- + + ---------- ---------- ---------- ---------- ---------- + + ---------- ---------- ---------- ---------- ---------- + + ---------- ---------- ---------- ---------- ---------- + + ---------- ---------- ---------- ---------- ---------- + + ---------- ---------- ---------- ---------- ---------- + + ---------- ---------- -- diff --git a/tests/data/DNA/small.reduced.phy b/tests/data/DNA/small.reduced.phy new file mode 100644 index 0000000..a1a1ae5 --- /dev/null +++ b/tests/data/DNA/small.reduced.phy @@ -0,0 +1,22 @@ + 1 522 +TAXON0 TTTTCAAAGC AATGTGTGAC AGGTACAGGG ACAAATCCCG TTAATAAGTA + + AGAGGATTTG TGCTTGGCTC TGTCACATGC CACTTTGAAA A--------- + + ---------- ---------- ---------- ---------- ---------- + + ---------- ---------- ---------- ---------- ---------- + + ---------- ---------- ---------- ---------- ---------- + + ---------- ---------- ---------- ---------- ---------- + + ---------- ---------- ---------- ---------- ---------- + + ---------- ---------- ---------- ---------- ---------- + + ---------- ---------- ---------- ---------- ---------- + + ---------- ---------- ---------- ---------- ---------- + + ---------- ---------- -- diff --git a/tests/data/MORPH/0.reduced.phy b/tests/data/MORPH/0.reduced.phy new file mode 100644 index 0000000..e6c5649 --- /dev/null +++ b/tests/data/MORPH/0.reduced.phy @@ -0,0 +1,5 @@ + 4 8 +AMYTORNIS 00000000 +STIPITURUS_MALACHURUS 11211111 +STIPITURUS_MALLEE 12102021 +STIPITURUS_RUFICEPS 22102021 diff --git a/tests/data/MORPH/1.reduced.phy b/tests/data/MORPH/1.reduced.phy new file mode 100644 index 0000000..0b247f6 --- /dev/null +++ b/tests/data/MORPH/1.reduced.phy @@ -0,0 +1,6 @@ + 5 38 +LARIX_OCCIDENTALIS 0-01----1- ---------- ---1------ -----0-- +PSEUDOTSUGA_JAPONICA 0011110001 0001111110 0001100001 10100110 +PSEUDOTSUGA_MACROCARPA 0100-01011 1100000101 1111000110 01000001 +PSEUDOTSUGA_MENZIESII 0100101010 1100000001 1111000110 01100101 +PSEUDOTSUGA_SINENSIS 1011010101 0011111110 0000111001 10111110 diff --git a/tests/data/msa_test_data.parquet b/tests/data/msa_test_data.parquet index 20f52f89080ecc41d3a8ab9f7e1cc868159e77f8..9dfe46891d9fadc46e71cdcbbffb31224fc6782f 100644 GIT binary patch literal 18662 zcmc&+3w%@6{lDo05@?}76H=}IYlXW`N}HrDwun2qNokterfr~U64a7co0OXe&7%)= zLt%njWxj{b*A$%(J}3?q`Kzl;_eT*_<{Tn2+!Pri)TuZ()cwxAxyeoLO~ubj|Cijy zdHsIB^F6=wJHPY0x3p1awLC|DB1isYROi5(LrGYHn++R9A$&gGJK` zm1KIc6mlu5viiI{LIY>jd3m`gUzNM4PzD9{ru<81T7w-MDQIYFr5qEc1F zD_}9?guFtoDF-nNzxL9APnsb&F}NjjscH57jhUVpp8O)PD+oarB+quG8Li0)H1dw zl5>dcY;tr`Qu?V_@~-Y+pWhK-Ixr$p?2?e*)8sFb{m!J0=$R64u7tywN*q?=zo1xwe_yws@aGAuRArx8_>Y1$mfY&Og@V9_CW zo+)A0tR7#P0)3I;S*95sOFDZ3?pJTvJ2YR~vLAQryF#fAVaZp0OG?m;x z1!F8iQ=|uke5PE1ks}xdR!SsF384V>#CJGkB`kDn>VW^WVmVPXcM?&goaG7zA`V|5 z+~tmjXrIdw@q`&6mMW~|!Td#Obc&!M{KKT+c4iz22|;cwNN_wi!Ld6^(_KA|P*>Qq zGU^Gqc$`P3kHtAhepn)3FKGs4!P*qcnMBAWM6PUd4slMtlrX_5hjT5QQ{XIuvlY&I zICJ0}%9kzzbHE(6@jLXUOkSSUJf(DPyhJI*1CI|tQzRD(3Y%GMb0!y*>X*i@@hNs3 z1T%1LVX-`iGXsIBzboPxa4_KBpRL%NMIM|Uj*kSIN#2f=G;3;R+cj*OD4@}oeCFKo=i%7kzcmx8{7jpSY=^GHw_O}Uc81B z8*6-EJcPMWz{5kcG`4ZaaQ@2OfXDsHBpx!1N7%<{8vwFDi#)9$UzGzOLZTU1WddZF z2S{FAH>89U1M(XHViBC3aIOJBhI3?P`Jgx#=F5oZWwUdL5NOAzO#cXCPy8;_j^Vgc zq-uvSH!~V96erQh!)SyYk%*@+z)n8^WM3A!@m%tF5db2jivbbl5~6_ImnFXsmf$(e zWVumNLYF=M>*uc9iHLm3=ugyNJvw7~?*VlE)7u?4Ox=TQPpS`p+rAyWTfW4yf88py zX!a{dUJP$VYu~efI=bX}bmtxTcozNni*=_o(!J=~gNv(a@7?I7pWgP$k(RsA{OQg^ z8&%s-+vF(x_M@j+Zuf%&x$PMQM zz(VR7c!~o2hD5##H0~S*U@rl*(p-x=$Qy-$q1k?WR#=Rd3jJ*ViAQIagQSHTuMH^v5q>pL_mIZ=pF0 z-h5V3`Zx5cybFHsV9?TLC#$HVC>1neZi+q<7wA3f9$A&Qd*W(!^6j9PBm>(<$*A7yzH;3%%Lz2&`cl&~ zKl&V%pZo0a@TFg%16N%BdD+P?k^eyZhNtwW(Wkjrz54k5Cs3ei_r8+*PN3+A(%mQj zu?JzslI+pea_0@PV97qTa%9i$nXYHhdyh+Es~s<*qDN1ke42g*+5d0*`Gwbhgvzh@ z{Y{N0KS4WY{`>m>IrJGi@YIxlDVq?qA>NDkf=f-LihTs>q{_ zZdiWddDY!o)834pp1tKJ z?WMO5K4iM)SK3e1&#!&yT8~z!s$2`Dl;ef_Bg_TtGAtz9)Jfo`$IWIJA3(64o3qIc zDsp`V04OA#fuZQ+P6u$x>63e2X*G|mt@x#C&e0!?^zQgz^fUDXBbJ?>ANO=TH1gPa zt)-W3y=$cKwf`kAU48#Z>Gp;{v{pYj^7fbKUv%=Kmq#{!_+(U*0o9 zENq@NF#C5Sx2|6EJGqR7Gg-oTsR;DbsCt!%P)6m6Y#3H;`1TaUkF zI$p2RHm<3LUxBtGSJJG~zWcNvej4oqKd;zv^!^L9cith9l)Zzt7~UT$tXQn=mPpEv z=(JbP59~a>vMuZYF!D|FBP>+jxwy-G_+By(14v@d+P41NW&f_kD_tGvzuzXjS#0QcuxekiNW zD{9hKNhCR+T`ntAtzM&j&VF$1S638i*RPX+>n>B~5sS3l0PXqt1#Uun$${ruSH3b& zJKuf5RNJl^?K60=mdz7E77!x{pvNKm6@#oW7jiNi$zdvq?97DID1dceHhCD4J1zxS zg$yvP!rYAZ@=(%VmRzYH%>S_FEm)r=-Ve|#*F1PjvGo|z@BHkPYR+-AhoEly;>nY! zG*>c89YZ%pe)9ej+c9+Qon1u_fAt}H>fQS`7r1_ccD~iHvfcM*ghsRB_c(g2W!FZ> z-LIj^Q-1OGi=}TNR^dMqS=KoQN-0MR_eGcsy~|)LhZ~akDFm9C;(>3^CbLG=Fj5hdl?Cq|5MhJ?95H-nPC-Ii3+LPxBptFPPb3rAo}1UUJF0oE;oRsG>? z@`+Y*OA8FGkUkz-VXpWz56Laf@@b5U))HVkrBC50PK@MB^G4BQkAmSlmS2!_9OeF? ztm8q(u_TDYxbLNMk{>02KEZ+g?hfX7Flpmba-q@;EgL1sp1gijyLoQWO*s(1TAJ+m z1w5xa8t_3#q$>jZmT(#h4`(TE&LSUPLOyLyDEJxh7uBSZEcpnmA#8B=KFRV&j3d&2 zZG909zo$Ai`sMG@%bIrO(CItaonl^eXV&^pwxf$z%sl+wk8efuW`^&6C*nq@$S)5C zuJ{{zbmhXmORGLcH`?~Ur@7{*=()>xovY8;fDRhJey#nUk5Sj_1KGA$kD`JnfBC_} z$3I3p_ilT;>h^VL-#~Ho(SMGhW1l&9d|v)X^yb{ys`q4nMQgk6sjr=N1YJdpvHRE8=l?xqaQ;K39~Eu)4QCM=_j3-fts! zS^#w+3#@y>Tru5Cn8EJ|)Df$vlh<952%Amug}dUBXvS>d_}rGu$BdE{omSQm%1L3D z*W-7{D+qbs+^l3gdM?q7Z8MX}5A*W#lOa`pZfeMqD*sSGI`TWO2>efD5;CU~?1hFq;&2=GZb3cZ4m-u+GLi z!e6pj_|ujJ4pH1kQESL@`EGl`))Q4~c2syuMJUROii=LqK6@`O&d!U4@p=VtHiGiw z6KEgb;Q2&Er6(lPOHNP#bpk_)6CHz_fCv;haj_b50-_q^L~r!T3Elsa6TVxVsNEbn z&f73@g10RHuR8^Df_>!w%I{TdLW(j{syMZ6@JrIz`2vvz)j|*w*<+!A$ufaeagK?iac0SV zKOoMPUpkJAc`+;eYF)@%q(U0|ZaU}?1)Ene7Ti1ziFegw%8^oQoP~+Mg%tAV6^t{> zv=^X4)*=7&HUTd6%NyglTBBYk3G3K15 z)RE$H(}#)KL-ctZkzgO=+S5K~pQCROd-kD0M_*sCAG43YG{m{;i}8!dQgF99jpY?; zDl`>UrB@TmJj!ft?Oe#l|CNW;mH1ddEuko?9Zq~)Mp4z2u7_%%s2)n+!{q67Ddqh* zb8(_v-)>^laAl}4dIp>N9Gx{QjJ?4gW1z|F^t)-JufNCWukkuNJ8+$-yK^AitfzdgDzn!WXoozi zKc0@c^u`6odLx&n*>pplbqS^s=SA0roPIOyF<6E$pU$eV&*^W7*q1--`nBCCf!?Hl#h^vwA zb~n=f3|)Ow%;pc#wyJivywjqxb(+0y!(!Ygz{3)?Y3f!$-vdnFX-kLMr>Zsv)Ty#3 zaZ1Q#^s9$#mO7AkAdZziWbYhUVbyS$fjm}|*Q&9HoN@VFh6cdDVaQr#u5m#gVCZvq z)`YMOATRDe;H>WVSa^N(*ZCZNORrnclt=7VvlnQnYzf>3>YDYqFG;?3KOMG%Y|g5t zh|S9O9r9?W!Q7p|oWXj42KizxzuFt8-){pwuvtxf4Ao9Q-RsntIh{~Dt-2b1ADo(| zmG(}Piq#o4?KU(34YM~fevPWIv5`04`X;8Fqk+dxL_R^CiNeq2Z>V;54n*zhx(M#K zlf_=3n|Q8x4C*kT{v#j1p3SC;~C8_Y-d1sVeA((xQXa4 z#;O~}rW)ocp4VbB!ML?+OssB+U`t2c^|tRbrx>0t4V@UzOmgw-i0LZmhsx9&x0ynl z%9s0l;HC4r8$mbyHACz;h}cU8*R*iq}ihs`Oicz1G+(Hl`iHAm_trY z4WpA8^)Pc?u`%{L4RzkJWVQPTVql{=JvbvgU5!mKm!WRZZJ=XLysif9UYDQtf<9%^ z$9|W;t{>I|m7}vRYSjRK{)RzEXG3%hyMI<>w}WnBSv>5#5V28~s2lgMaYaU(l!lqe z+_8t9VAm2ld}cbp>1X$M$_P3c*X8uO6Q%=Tv%*e8lh>vRXBu;`c>`gazoFN`+SCb? zjkS*njE0&uFh{|sv01&|GoAZ_HllVQZJdR+Douya$T#8JQCKg}%+|+ks-V7@eLXAs z>t`_ThxH%qe^b;o2)+TVL3~@EaXkQk(rvH|!hRqwzbOQ1)=p0x?t`Eo5vQRhfc*h` zXGU8lT2Bn;lyyL;8{c~!gS~~yVW7R%Dm(28m|(4_&NTP>JyvrN>}$8%ZyAI!h&g>3 z{eZLKUwhr)le+x&6%MOOWv$Y|o(<#$+n?F)HfX@#9klz+A=u+}1D;rJ@PBJ+m^nLf zdXTzSRly$5+MtI0jEY&K)pelbO+$`OHRvk4Mu_P3_ogAHKWAm0g8yLeG>2?eW*YXn zu=ljX8fa2G8_g`vexY9k>pg7;-yY^9>@UqBJJhjNS;Fp#UL*Z9>|H^YMoaG)^D_i{ zblgX^O=DS+S$BAQ2jSf0`RigfTkXbsMjOZi_Ach~K@0)!FAea=)8N;DjxWGIx2F?y z*k4!0%8;D1c;DT!)D>;fSN9tOus7xV=gIjBG>n7Iy~b$EU{f7LQ4Bbg!s!V-T5;My zbx@S42Vyh&Ada=TaX_ZNAJX-;5RRc@6og)k?GTF58F5;V(=9!Ec(UOzjSYuu^!<>o z596j`9Oh|c!Zs>4wgYKs0_|1!iUU7v&_~xX4?;ysTo2TNKNECf9&wV=;h>TZ=8y&S)1-p2><0bw!#wMS zIFQQ2#GG`#%5H|go@yRSrB}~XNYc~eM@*0BgN{9~p}u})fir3N zi|aXh4Q&5`9TQ(;?r^DwQzx|H0F$Y0sIsyu(VpoU3@Sd7{lx=K>C)FJrqar`s>(_x zTnFJkI96hMINjdDNXZNuJMc_-10EV2AG2V)C%U$Se)WMW;PEt4HEHyk7%B{D=4o+~ z-jIWd6|jB+vsZ;`{V^CyEKd*RisPlD2AHNneC6qv1*gaI^>BG*f}YNnMlW4oVIshM zNcy!nNi~DIvhwJSY3&)M<;l~A>%;tHeHprrhFAyQ*VRylW+1}i6XWoY<88o=czEk@ z-wla92-Tb5Umr~4pN`m~tUZw3^;ey_k1@cm0j*S7!>+#d>? zL?+GkkCg{E;_DAH-aP*r7vre}a`wRN!~KgN3_Z#ir_b5DFfQedzZ0iZVq!RU+)#%x z13%a@;O@lnUJKxd`&+j-jb6HZO9k}!F}8jRY)ECLrwuO5{Ow>S87p*hzA>h-Q4lA@ zl815om1*r8F;kvC>M)sd1EmAPm9@WtOU z(EBvUC^QyAs?w-s!HD1*^ap$l#p>883HyXXyD22&(&ssa8BH*)fN+=8;ylw! z6Ryd8LPDZPDew)cQR>($BzuiSPbKK21Ot1eYKFt9LLHT7QIsbvsFco+&Xl36vbjXk zyh2oPMPuPaCdWzk6p8Mqq9duWKff5JDC?}FKdHj3IY*O!{IedNNrwVji(}s**;^#K zEJruf;4zg4n`uU>Vsgd!MR{taj7!A-z=}>O;ZwRurenL=UUX9d>m{6wzGN$mLyjMXWRlgw}{lN&EsCv1`EcdhNfM2lBL;zleilUnv+az`yoNlGkHy2w+! zIj?SdThI7=ekr=1%`uW>-J~H`RM;-Wt9FZGCn>aYDhr)dajy^r?rQVFm$F%!Xd?I_ zrK)CLxOkiFQ=&WUT)5PbE8>s(BJp@fX!Zt$t}tkSF8r8M4((hvj7yyvFZ84ugw!0E zk!sd1;`&kkoNIZ=6&9jV-?q?m@7F1GLxU#O+^my4c%aRO^QvrRp!DpCOS`YGCzRxj z3%*EwC+#`QV3buc3xjb=^h+VUD=Wawf%85}qYFB8k%Kd`uNCw2g}Z6xdGlE(ifz6< zr*|sQxe}-@&|~wxG`pKdhnAtKVvOc9ekYlGcxJhH3xvTWZzhkzPI>9PjDD6c%Ix`- z93xK_GSwG~ZW|59W|{q*Mjw}=g9vWR%Rwn;;F>}OpQJ}-i=|S${F1m>$131$Me-#l zY4q-D^rQ^-D88RH>rz%VEaV289j=_qB-cF}2)G5$7FTdA5cP*fgMJ|}n-=!@#DxAf zz<0{GGBr;#ErePc@eKQd0>i<9yw3`f4M{MP<&=81T@so+1RK@fwnhCDtWLve9=_U{ z^x~1Er1ZS((3=j#low&XQNFK6YO3{wuZ#8j-K zmumQR%h7A}Ev9aXmRx6kqpn+*&+c2s>X3}RP?Sp$nsT)?EQGv5L_Mj3gL*q&uJugMMGY>k0`$pT4$8zt|AK z)(ShUGWg`j3k~8`!?(f_Lmk~T;egu)eFkr|@jPFT|Gcfs0dk|8YBWuUj1MGa0QS`u z;`K^x5nQq;;9j+j3R>W?r8u>}vOVmz_rkZ;BN;)ee*w>fbmq}+rKk`y}}gPBfRL5vO$oA$}qL3u8Ervj6}9 diff --git a/tests/test_msa.py b/tests/test_msa.py index fb546eb..50238f4 100644 --- a/tests/test_msa.py +++ b/tests/test_msa.py @@ -20,8 +20,8 @@ def test_parse(msa_test_data): for idx, row in msa_test_data.iterrows(): msa_file = pathlib.Path(row.msa_file) msa = parse(msa_file) - assert msa.n_taxa == row.n_taxa - assert msa.n_sites == row.n_sites + assert msa.n_taxa == row.num_taxa + assert msa.n_sites == row.num_sites assert msa.data_type.value == row.data_type assert msa.name == msa_file.name @@ -207,19 +207,19 @@ def test_n_taxa(self, msa_test_data): for idx, row in msa_test_data.iterrows(): msa_file = pathlib.Path(row.msa_file) msa = parse(msa_file) - assert msa.n_taxa == row.n_taxa + assert msa.n_taxa == row.num_taxa def test_n_sites(self, msa_test_data): for idx, row in msa_test_data.iterrows(): msa_file = pathlib.Path(row.msa_file) msa = parse(msa_file) - assert msa.n_sites == row.n_sites + assert msa.n_sites == row.num_sites def test_n_patterns(self, msa_test_data): for idx, row in msa_test_data.iterrows(): msa_file = pathlib.Path(row.msa_file) msa = parse(msa_file) - assert msa.n_patterns == row.n_patterns + assert msa.n_patterns == row.num_patterns def test_percentage_gaps(self, msa_test_data): for idx, row in msa_test_data.iterrows(): @@ -249,7 +249,7 @@ def test_bollback_multinomial(self, msa_test_data): for idx, row in msa_test_data.iterrows(): msa_file = pathlib.Path(row.msa_file) msa = parse(msa_file) - assert msa.bollback_multinomial() == row.bollback_multinomial + assert msa.bollback_multinomial() == row.bollback def test_remove_full_gap_sequences(msa_test_data): From a850fb4ba66790db3fc02f4e50d69bc52fac3d13 Mon Sep 17 00:00:00 2001 From: Julia Date: Thu, 27 Feb 2025 18:00:33 +0100 Subject: [PATCH 17/36] fix test data --- tests/data/msa_test_data.parquet | Bin 18662 -> 18707 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/tests/data/msa_test_data.parquet b/tests/data/msa_test_data.parquet index 9dfe46891d9fadc46e71cdcbbffb31224fc6782f..89daa9d193c022f49cccdce83a362e445de7cfe7 100644 GIT binary patch delta 276 zcmaDhk#X`Q#tl)z{2Q1=w=wyMs);fPF)++voSY#X&h){4@&RE9y@#``f5opmYrmbN z=PU1-r}hbvnL%8;9^234H9C1B^0xir7^(dqPCd03Wf0X9HJW@+*nD!In9yW<0S=}O zfs-F9sBrvX7HeWrYYCXltLQj+wn!J#qR`2Kisr0MA!-wXC-*AaZC)tK&d9VPdh$6% zE!IDgYIDLTCy41x=H(KeoX@N=*+HCz@xf$UB{`;!*va-RvYVU4V>z)H?u2fzJvK9i anADIAljaipWiO_o#vyjWdUKOfiW&epQ(fl( delta 261 zcmbO{iSgM)#tl)zd=D5!A2Irf%7`)uF)$QN&J+%pkCE8=JG3GHrG2`_yNb#8pW8P& zURoad<)XbPgQ$k6&SVY|^T{qe;!M{9Ci5w(a4%vOTfwZh!C!5i?_@y{oyojhqLWXH zv@@LunOvu6&blI4ZClXh38JoyOy{B||5nsuT@ Date: Fri, 28 Feb 2025 11:00:31 +0100 Subject: [PATCH 18/36] better parametrize test cases --- .gitignore | 2 + pyproject.toml | 1 + tests/conftest.py | 17 ++-- tests/test_msa.py | 217 ++++++++++++++++++++++------------------------ 4 files changed, 115 insertions(+), 122 deletions(-) diff --git a/.gitignore b/.gitignore index 25c3eb2..5817cad 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ **/*.log **/*.csv +**/*.shap.pdf +**/*.pythia.trees # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/pyproject.toml b/pyproject.toml index 6a4210f..3321838 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ dependencies = [ [project.optional-dependencies] test = [ "pytest", + "pytest-lazy-fixtures", "pyarrow" ] diff --git a/tests/conftest.py b/tests/conftest.py index 52ad49b..c3bd50d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,13 +9,16 @@ from .test_config import RAXMLNG_COMMAND -@pytest.fixture -def msa_test_data(): - df = pd.read_parquet("tests/data/msa_test_data.parquet") - df["msa_file"] = df["msa_file"].apply( - lambda x: pathlib.Path.cwd() / "tests" / "data" / x - ) - return df +df = pd.read_parquet("tests/data/msa_test_data.parquet") +df["msa_file"] = df["msa_file"].apply( + lambda x: pathlib.Path.cwd() / "tests" / "data" / x +) + + +@pytest.fixture(params=df.iterrows(), ids=lambda x: str(x[1].msa_file)) +def msa_test_data_row(request): + idx, row = request.param + return row @pytest.fixture diff --git a/tests/test_msa.py b/tests/test_msa.py index 50238f4..cf2a464 100644 --- a/tests/test_msa.py +++ b/tests/test_msa.py @@ -16,14 +16,13 @@ ) -def test_parse(msa_test_data): - for idx, row in msa_test_data.iterrows(): - msa_file = pathlib.Path(row.msa_file) - msa = parse(msa_file) - assert msa.n_taxa == row.num_taxa - assert msa.n_sites == row.num_sites - assert msa.data_type.value == row.data_type - assert msa.name == msa_file.name +def test_parse(msa_test_data_row): + msa_file = pathlib.Path(msa_test_data_row.msa_file) + msa = parse(msa_file) + assert msa.n_taxa == msa_test_data_row.num_taxa + assert msa.n_sites == msa_test_data_row.num_sites + assert msa.data_type.value == msa_test_data_row.data_type + assert msa.name == msa_file.name def test_parse_large_phylip(phylip_msa_file): @@ -100,24 +99,24 @@ def test_msa_str_and_repr(): assert repr(msa) == expected_str -def test_contains_duplicate_sequences(msa_test_data): - for idx, row in msa_test_data.iterrows(): - msa_file = pathlib.Path(row.msa_file) - msa = parse(msa_file) - assert msa.contains_duplicate_sequences() == row.contains_duplicates +def test_contains_duplicate_sequences(msa_test_data_row): + msa_file = pathlib.Path(msa_test_data_row.msa_file) + msa = parse(msa_file) + assert msa.contains_duplicate_sequences() == msa_test_data_row.contains_duplicates -def test_contains_full_gap_sequences(msa_test_data): - for idx, row in msa_test_data.iterrows(): - msa_file = pathlib.Path(row.msa_file) - msa = parse(msa_file) - assert msa.contains_full_gap_sequences() == row.contains_full_gap_sequences +def test_contains_full_gap_sequences(msa_test_data_row): + msa_file = pathlib.Path(msa_test_data_row.msa_file) + msa = parse(msa_file) + assert ( + msa.contains_full_gap_sequences() + == msa_test_data_row.contains_full_gap_sequences + ) -def test_get_file_format(msa_test_data): - for idx, row in msa_test_data.iterrows(): - msa_file = pathlib.Path(row.msa_file) - assert _get_file_format(msa_file).value == row.file_format +def test_get_file_format(msa_test_data_row): + msa_file = pathlib.Path(msa_test_data_row.msa_file) + assert _get_file_format(msa_file).value == msa_test_data_row.file_format def test_get_msa_file_format_raises_value_error(raxmlng_inference_log): @@ -128,11 +127,10 @@ def test_get_msa_file_format_raises_value_error(raxmlng_inference_log): _get_file_format(raxmlng_inference_log) -def test_guess_dtype(msa_test_data): - for idx, row in msa_test_data.iterrows(): - msa_file = pathlib.Path(row.msa_file) - msa = parse(msa_file) - assert _guess_dtype(msa.sequences).value == row.data_type +def test_guess_dtype(msa_test_data_row): + msa_file = pathlib.Path(msa_test_data_row.msa_file) + msa = parse(msa_file) + assert _guess_dtype(msa.sequences).value == msa_test_data_row.data_type def test_guess_dtype_fails(): @@ -155,11 +153,10 @@ def test_guess_dtype_fails(): _guess_dtype(sequences) -def test_get_raxmlng_model(msa_test_data): - for idx, row in msa_test_data.iterrows(): - msa_file = pathlib.Path(row.msa_file) - msa = parse(msa_file) - assert msa.get_raxmlng_model() == row.raxmlng_model +def test_get_raxmlng_model(msa_test_data_row): + msa_file = pathlib.Path(msa_test_data_row.msa_file) + msa = parse(msa_file) + assert msa.get_raxmlng_model() == msa_test_data_row.raxmlng_model def test_get_raxmlng_model_fails_for_invalid_dtype(): @@ -203,90 +200,80 @@ def test_write(phylip_msa_file): class TestMSAFeatures: - def test_n_taxa(self, msa_test_data): - for idx, row in msa_test_data.iterrows(): - msa_file = pathlib.Path(row.msa_file) - msa = parse(msa_file) - assert msa.n_taxa == row.num_taxa - - def test_n_sites(self, msa_test_data): - for idx, row in msa_test_data.iterrows(): - msa_file = pathlib.Path(row.msa_file) - msa = parse(msa_file) - assert msa.n_sites == row.num_sites - - def test_n_patterns(self, msa_test_data): - for idx, row in msa_test_data.iterrows(): - msa_file = pathlib.Path(row.msa_file) - msa = parse(msa_file) - assert msa.n_patterns == row.num_patterns - - def test_percentage_gaps(self, msa_test_data): - for idx, row in msa_test_data.iterrows(): - msa_file = pathlib.Path(row.msa_file) - msa = parse(msa_file) - assert msa.proportion_gaps == row.proportion_gaps - - def test_percentage_invariant(self, msa_test_data): - for idx, row in msa_test_data.iterrows(): - msa_file = pathlib.Path(row.msa_file) - msa = parse(msa_file) - assert msa.proportion_invariant == row.proportion_invariant - - def test_entropy(self, msa_test_data): - for idx, row in msa_test_data.iterrows(): - msa_file = pathlib.Path(row.msa_file) - msa = parse(msa_file) - assert msa.entropy() == row.entropy - - def test_pattern_entropy(self, msa_test_data): - for idx, row in msa_test_data.iterrows(): - msa_file = pathlib.Path(row.msa_file) - msa = parse(msa_file) - assert msa.pattern_entropy() == row.pattern_entropy - - def test_bollback_multinomial(self, msa_test_data): - for idx, row in msa_test_data.iterrows(): - msa_file = pathlib.Path(row.msa_file) - msa = parse(msa_file) - assert msa.bollback_multinomial() == row.bollback - - -def test_remove_full_gap_sequences(msa_test_data): - for idx, row in msa_test_data.iterrows(): - msa_file = pathlib.Path(row.msa_file) + def test_n_taxa(self, msa_test_data_row): + msa_file = pathlib.Path(msa_test_data_row.msa_file) + msa = parse(msa_file) + assert msa.n_taxa == msa_test_data_row.num_taxa + + def test_n_sites(self, msa_test_data_row): + msa_file = pathlib.Path(msa_test_data_row.msa_file) msa = parse(msa_file) + assert msa.n_sites == msa_test_data_row.num_sites - if row.contains_full_gap_sequences: - # If the MSA contains full-gap sequences: expect these sequences to be removed - msa_no_full_gaps = remove_full_gap_sequences(msa) - assert not msa_no_full_gaps.contains_full_gap_sequences() - assert msa_no_full_gaps.n_taxa < msa.n_taxa - # Number of sites should not be affected - assert msa_no_full_gaps.n_sites == msa.n_sites - else: - # Otherwise, expect a PyPythiaException - with pytest.raises( - PyPythiaException, match="No full-gap sequences found in MSA." - ): - remove_full_gap_sequences(msa) - - -def test_deduplicate_sequences(msa_test_data): - for idx, row in msa_test_data.iterrows(): - msa_file = pathlib.Path(row.msa_file) + def test_n_patterns(self, msa_test_data_row): + msa_file = pathlib.Path(msa_test_data_row.msa_file) msa = parse(msa_file) + assert msa.n_patterns == msa_test_data_row.num_patterns - if row.contains_duplicates: - # If the MSA contains duplicate sequences: expect these sequences to be removed - msa_no_duplicates = deduplicate_sequences(msa) - assert not msa_no_duplicates.contains_duplicate_sequences() - assert msa_no_duplicates.n_taxa < msa.n_taxa - # Number of sites should not be affected - assert msa_no_duplicates.n_sites == msa.n_sites - else: - # Otherwise, expect a PyPythiaException - with pytest.raises( - PyPythiaException, match="No duplicate sequences found in MSA." - ): - deduplicate_sequences(msa) + def test_percentage_gaps(self, msa_test_data_row): + msa_file = pathlib.Path(msa_test_data_row.msa_file) + msa = parse(msa_file) + assert msa.proportion_gaps == msa_test_data_row.proportion_gaps + + def test_percentage_invariant(self, msa_test_data_row): + msa_file = pathlib.Path(msa_test_data_row.msa_file) + msa = parse(msa_file) + assert msa.proportion_invariant == msa_test_data_row.proportion_invariant + + def test_entropy(self, msa_test_data_row): + msa_file = pathlib.Path(msa_test_data_row.msa_file) + msa = parse(msa_file) + assert msa.entropy() == msa_test_data_row.entropy + + def test_pattern_entropy(self, msa_test_data_row): + msa_file = pathlib.Path(msa_test_data_row.msa_file) + msa = parse(msa_file) + assert msa.pattern_entropy() == msa_test_data_row.pattern_entropy + + def test_bollback_multinomial(self, msa_test_data_row): + msa_file = pathlib.Path(msa_test_data_row.msa_file) + msa = parse(msa_file) + assert msa.bollback_multinomial() == msa_test_data_row.bollback + + +def test_remove_full_gap_sequences(msa_test_data_row): + msa_file = pathlib.Path(msa_test_data_row.msa_file) + msa = parse(msa_file) + + if msa_test_data_row.contains_full_gap_sequences: + # If the MSA contains full-gap sequences: expect these sequences to be removed + msa_no_full_gaps = remove_full_gap_sequences(msa) + assert not msa_no_full_gaps.contains_full_gap_sequences() + assert msa_no_full_gaps.n_taxa < msa.n_taxa + # Number of sites should not be affected + assert msa_no_full_gaps.n_sites == msa.n_sites + else: + # Otherwise, expect a PyPythiaException + with pytest.raises( + PyPythiaException, match="No full-gap sequences found in MSA." + ): + remove_full_gap_sequences(msa) + + +def test_deduplicate_sequences(msa_test_data_row): + msa_file = pathlib.Path(msa_test_data_row.msa_file) + msa = parse(msa_file) + + if msa_test_data_row.contains_duplicates: + # If the MSA contains duplicate sequences: expect these sequences to be removed + msa_no_duplicates = deduplicate_sequences(msa) + assert not msa_no_duplicates.contains_duplicate_sequences() + assert msa_no_duplicates.n_taxa < msa.n_taxa + # Number of sites should not be affected + assert msa_no_duplicates.n_sites == msa.n_sites + else: + # Otherwise, expect a PyPythiaException + with pytest.raises( + PyPythiaException, match="No duplicate sequences found in MSA." + ): + deduplicate_sequences(msa) From 06eb6b925a66031901c4bb4d538b8ea2f473c8cd Mon Sep 17 00:00:00 2001 From: Julia Date: Fri, 28 Feb 2025 14:54:13 +0100 Subject: [PATCH 19/36] correctly setup ruff pre-commit; some test refactoring --- .pre-commit-config.yaml | 2 +- generate_api_docs.py | 18 ++++++++++----- pyproject.toml | 29 +++--------------------- tests/conftest.py | 11 +++++++-- tests/test_msa.py | 50 ++++++++++++++++------------------------- 5 files changed, 44 insertions(+), 66 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1d85544..bbe44f0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,6 +11,6 @@ repos: hooks: # Run the linter. - id: ruff - args: [--select, F, E, I, --fix ] + args: [--fix, --exit-non-zero-on-fix] # Run the formatter. - id: ruff-format diff --git a/generate_api_docs.py b/generate_api_docs.py index 969be4c..66c30d6 100644 --- a/generate_api_docs.py +++ b/generate_api_docs.py @@ -41,30 +41,35 @@ with api_file.open("w") as f: for cls in classes: f.write( - textwrap.dedent(f""" + textwrap.dedent( + f""" ::: pypythia.{file.stem}.{cls}\n options: show_root_heading: true merge_init_into_class: false group_by_category: true modernize_annotations: true - """) + """ + ) ) for mtd in methods: f.write( - textwrap.dedent(f""" + textwrap.dedent( + f""" ::: pypythia.{file.stem}.{mtd}\n options: show_root_heading: true modernize_annotations: true - """) + """ + ) ) if file_name == "config": with api_file.open("a") as f: f.write( - textwrap.dedent(f""" + textwrap.dedent( + f""" ::: pypythia.{file.stem}.DEFAULT_MODEL_FILE\n options: show_root_heading: true @@ -74,7 +79,8 @@ options: show_root_heading: true modernize_annotations: true - """) + """ + ) ) mkdocs_cfg_file = pathlib.Path("mkdocs.yml") diff --git a/pyproject.toml b/pyproject.toml index 3321838..e5e9137 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,6 @@ dependencies = [ [project.optional-dependencies] test = [ "pytest", - "pytest-lazy-fixtures", "pyarrow" ] @@ -41,31 +40,9 @@ pythia = "pypythia.main:main" requires = ["hatchling"] build-backend = "hatchling.build" -[tool.black] -exclude = ''' -/( - \.eggs - | \.git - | \.venv - | build - | dist -)/ -''' - -[tool.isort] -multi_line_output = 3 -include_trailing_comma = true -ensure_newline_before_comments = true -line_length = 88 -known_first_party = "pypythia" -skip_glob = '\.eggs/*,\.git/*,\.venv/*,build/*,dist/*' -default_section = 'THIRDPARTY' - -[tool.mypy] -python_version = 3.8 -ignore_missing_imports = true -no_implicit_optional = true -check_untyped_defs = true +[tool.ruff.lint] +select = ["F", "I", "UP"] +ignore = ["E501"] [tool.pytest.ini_options] addopts = "--import-mode=importlib" diff --git a/tests/conftest.py b/tests/conftest.py index c3bd50d..292ed2c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,14 +8,21 @@ from .test_config import RAXMLNG_COMMAND - df = pd.read_parquet("tests/data/msa_test_data.parquet") df["msa_file"] = df["msa_file"].apply( lambda x: pathlib.Path.cwd() / "tests" / "data" / x ) -@pytest.fixture(params=df.iterrows(), ids=lambda x: str(x[1].msa_file)) +@pytest.fixture +def msa_test_data(): + return df + + +@pytest.fixture( + params=df.iterrows(), + ids=lambda x: f"{x[1].msa_file.parent.name}/{x[1].msa_file.name}", +) def msa_test_data_row(request): idx, row = request.param return row diff --git a/tests/test_msa.py b/tests/test_msa.py index cf2a464..4d29365 100644 --- a/tests/test_msa.py +++ b/tests/test_msa.py @@ -17,7 +17,7 @@ def test_parse(msa_test_data_row): - msa_file = pathlib.Path(msa_test_data_row.msa_file) + msa_file = msa_test_data_row.msa_file msa = parse(msa_file) assert msa.n_taxa == msa_test_data_row.num_taxa assert msa.n_sites == msa_test_data_row.num_sites @@ -100,14 +100,12 @@ def test_msa_str_and_repr(): def test_contains_duplicate_sequences(msa_test_data_row): - msa_file = pathlib.Path(msa_test_data_row.msa_file) - msa = parse(msa_file) + msa = parse(msa_test_data_row.msa_file) assert msa.contains_duplicate_sequences() == msa_test_data_row.contains_duplicates def test_contains_full_gap_sequences(msa_test_data_row): - msa_file = pathlib.Path(msa_test_data_row.msa_file) - msa = parse(msa_file) + msa = parse(msa_test_data_row.msa_file) assert ( msa.contains_full_gap_sequences() == msa_test_data_row.contains_full_gap_sequences @@ -115,8 +113,10 @@ def test_contains_full_gap_sequences(msa_test_data_row): def test_get_file_format(msa_test_data_row): - msa_file = pathlib.Path(msa_test_data_row.msa_file) - assert _get_file_format(msa_file).value == msa_test_data_row.file_format + assert ( + _get_file_format(msa_test_data_row.msa_file).value + == msa_test_data_row.file_format + ) def test_get_msa_file_format_raises_value_error(raxmlng_inference_log): @@ -128,8 +128,7 @@ def test_get_msa_file_format_raises_value_error(raxmlng_inference_log): def test_guess_dtype(msa_test_data_row): - msa_file = pathlib.Path(msa_test_data_row.msa_file) - msa = parse(msa_file) + msa = parse(msa_test_data_row.msa_file) assert _guess_dtype(msa.sequences).value == msa_test_data_row.data_type @@ -154,8 +153,7 @@ def test_guess_dtype_fails(): def test_get_raxmlng_model(msa_test_data_row): - msa_file = pathlib.Path(msa_test_data_row.msa_file) - msa = parse(msa_file) + msa = parse(msa_test_data_row.msa_file) assert msa.get_raxmlng_model() == msa_test_data_row.raxmlng_model @@ -201,49 +199,40 @@ def test_write(phylip_msa_file): class TestMSAFeatures: def test_n_taxa(self, msa_test_data_row): - msa_file = pathlib.Path(msa_test_data_row.msa_file) - msa = parse(msa_file) + msa = parse(msa_test_data_row.msa_file) assert msa.n_taxa == msa_test_data_row.num_taxa def test_n_sites(self, msa_test_data_row): - msa_file = pathlib.Path(msa_test_data_row.msa_file) - msa = parse(msa_file) + msa = parse(msa_test_data_row.msa_file) assert msa.n_sites == msa_test_data_row.num_sites def test_n_patterns(self, msa_test_data_row): - msa_file = pathlib.Path(msa_test_data_row.msa_file) - msa = parse(msa_file) + msa = parse(msa_test_data_row.msa_file) assert msa.n_patterns == msa_test_data_row.num_patterns def test_percentage_gaps(self, msa_test_data_row): - msa_file = pathlib.Path(msa_test_data_row.msa_file) - msa = parse(msa_file) + msa = parse(msa_test_data_row.msa_file) assert msa.proportion_gaps == msa_test_data_row.proportion_gaps def test_percentage_invariant(self, msa_test_data_row): - msa_file = pathlib.Path(msa_test_data_row.msa_file) - msa = parse(msa_file) + msa = parse(msa_test_data_row.msa_file) assert msa.proportion_invariant == msa_test_data_row.proportion_invariant def test_entropy(self, msa_test_data_row): - msa_file = pathlib.Path(msa_test_data_row.msa_file) - msa = parse(msa_file) + msa = parse(msa_test_data_row.msa_file) assert msa.entropy() == msa_test_data_row.entropy def test_pattern_entropy(self, msa_test_data_row): - msa_file = pathlib.Path(msa_test_data_row.msa_file) - msa = parse(msa_file) + msa = parse(msa_test_data_row.msa_file) assert msa.pattern_entropy() == msa_test_data_row.pattern_entropy def test_bollback_multinomial(self, msa_test_data_row): - msa_file = pathlib.Path(msa_test_data_row.msa_file) - msa = parse(msa_file) + msa = parse(msa_test_data_row.msa_file) assert msa.bollback_multinomial() == msa_test_data_row.bollback def test_remove_full_gap_sequences(msa_test_data_row): - msa_file = pathlib.Path(msa_test_data_row.msa_file) - msa = parse(msa_file) + msa = parse(msa_test_data_row.msa_file) if msa_test_data_row.contains_full_gap_sequences: # If the MSA contains full-gap sequences: expect these sequences to be removed @@ -261,8 +250,7 @@ def test_remove_full_gap_sequences(msa_test_data_row): def test_deduplicate_sequences(msa_test_data_row): - msa_file = pathlib.Path(msa_test_data_row.msa_file) - msa = parse(msa_file) + msa = parse(msa_test_data_row.msa_file) if msa_test_data_row.contains_duplicates: # If the MSA contains duplicate sequences: expect these sequences to be removed From c57ccaf9d948a2087fd495bcd408beba70b94ac9 Mon Sep 17 00:00:00 2001 From: Julia Date: Fri, 28 Feb 2025 14:54:28 +0100 Subject: [PATCH 20/36] wip: tests --- pypythia/prediction.py | 17 +++- tests/test_prediction.py | 213 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 228 insertions(+), 2 deletions(-) create mode 100644 tests/test_prediction.py diff --git a/pypythia/prediction.py b/pypythia/prediction.py index 6d100a0..6e6569a 100644 --- a/pypythia/prediction.py +++ b/pypythia/prediction.py @@ -218,8 +218,6 @@ def predict_difficulty( # Check if the reduced MSA is different from the original MSA is_reduced = msa != reduced_msa if is_reduced: - reduced_msa_file = reduced_msa_file or pathlib.Path(f"{msa_file}.reduced.phy") - # If the reduced MSA is different from the original MSA, proceed with the reduced MSA msa = reduced_msa @@ -248,6 +246,21 @@ def predict_difficulty( else f"Using {threads} threads for parallel parsimony tree computation." ) + # If the MSA/reduced MSA contains less than 4 sequences, RAxML-NG will fail as there is only a single possible + # tree topology for this MSA. In this case, any phylogenetic inference is meaningless and we raise a + # PyPythia exception to inform the user. + if msa.n_taxa < 4: + error_msg = ( + "The MSA contains less than 4 sequences. " + "Phylogenetic inference is not meaningful for such small MSAs as there exists only a single possible tree topology. " + ) + if is_reduced: + error_msg += ( + "Note that during preprocessing, Pythia reduced the input MSA by removing duplicate sequences and/or " + "sequences containing only gaps leading to an MSA with less than 4 sequences. " + "You can rerun the prediction and disable deduplication and gap removal to use the original MSA. " + ) + msa_features = collect_features( msa=msa, msa_file=msa_file, diff --git a/tests/test_prediction.py b/tests/test_prediction.py new file mode 100644 index 0000000..6147554 --- /dev/null +++ b/tests/test_prediction.py @@ -0,0 +1,213 @@ +import pathlib +import tempfile +import warnings + +import pandas as pd +import pytest + +from pypythia import __version__ +from pypythia.msa import parse +from pypythia.prediction import ( + _handle_duplicates, + _handle_full_gap_sequences, + collect_features, + predict_difficulty, +) + + +def test_handle_duplicates(msa_test_data_row): + msa = parse(msa_test_data_row.msa_file) + reduced_msa = _handle_duplicates(msa, deduplicate=True, log_info=False) + + if msa_test_data_row.contains_duplicates: + assert reduced_msa != msa + assert reduced_msa.n_taxa < msa.n_taxa + else: + assert reduced_msa == msa + + +def test_handle_duplicates_dont_deduplicate(msa_test_data_row): + msa = parse(msa_test_data_row.msa_file) + reduced_msa = _handle_duplicates(msa, deduplicate=False, log_info=False) + + assert reduced_msa == msa + + +def test_handle_full_gap_sequences(msa_test_data_row): + msa = parse(msa_test_data_row.msa_file) + reduced_msa = _handle_full_gap_sequences(msa, remove_full_gaps=True, log_info=False) + + if msa_test_data_row.contains_full_gap_sequences: + assert reduced_msa != msa + assert reduced_msa.n_taxa < msa.n_taxa + else: + assert reduced_msa == msa + + +def test_handle_full_gap_sequences_dont_remove_full_gaps(msa_test_data_row): + msa = parse(msa_test_data_row.msa_file) + reduced_msa = _handle_full_gap_sequences( + msa, remove_full_gaps=False, log_info=False + ) + + assert reduced_msa == msa + + +def test_collect_features(msa_test_data_row, raxmlng): + msa = parse(msa_test_data_row.msa_file) + features = collect_features( + msa=msa, msa_file=msa_test_data_row.msa_file, raxmlng=raxmlng + ) + assert features.shape[0] == 1 + + pd.testing.assert_series_equal( + features.loc[0], + msa_test_data_row[features.columns], + check_dtype=False, + check_names=False, + ) + + +def test_collect_features_stores_trees(phylip_msa_file, raxmlng): + msa = parse(phylip_msa_file) + with tempfile.NamedTemporaryFile("w") as pars_trees_file: + pars_trees_file = pathlib.Path(pars_trees_file.name) + collect_features( + msa=msa, + msa_file=phylip_msa_file, + raxmlng=raxmlng, + pars_trees_file=pars_trees_file, + ) + assert pars_trees_file.exists() + + # Should contain 24 parsimony trees + assert sum(1 for _ in pars_trees_file.open()) == 24 + + +@pytest.mark.parametrize("store_results", [True, False]) +def test_predict_difficulty(msa_test_data_row, raxmlng_command, store_results): + # Check if the Pythia version is identical, if not the expected difficulty parquet file might be outdated + # In this case, raise a warning + if msa_test_data_row.pythia_version != __version__: + warnings.warn( + f"The Pythia version in the test data is {msa_test_data_row.pythia_version}, but the current " + f"Pythia version is {__version__}. The expected difficulty parquet file might be outdated." + ) + + with tempfile.TemporaryDirectory() as tmpdir: + prefix = pathlib.Path(tmpdir) / "test" + + predicted_difficulty = predict_difficulty( + msa_file=msa_test_data_row.msa_file, + raxmlng=raxmlng_command, + seed=msa_test_data_row.raxmlng_seed, + deduplicate=False, + remove_full_gaps=False, + result_prefix=prefix, + store_results=store_results, + ) + + # 1. Check if the predicted difficulty matches the "ground-truth" in our test data + assert predicted_difficulty == pytest.approx( + msa_test_data_row.predicted_difficulty, abs=0.01 + ) + + # 2. Check if the results exist if store_results=True, else check if they don't exist + pars_trees_file = pathlib.Path(f"{prefix}.pythia.trees") + shap_file = pathlib.Path(f"{prefix}.shap.pdf") + results_file = pathlib.Path(f"{prefix}.pythia.csv") + reduced_msa_file = pathlib.Path(f"{prefix}.reduced.phy") + + if store_results: + assert pars_trees_file.exists() + assert shap_file.exists() + assert results_file.exists() + else: + assert not pars_trees_file.exists() + assert not shap_file.exists() + assert not results_file.exists() + + # Since we set deduplicate=False and remove_full_gaps=False, the reduced MSA should be identical to the original + # and no reduced MSA should be saved in either case + assert not reduced_msa_file.exists() + + +def test_predict_difficulty_with_deduplication_and_gap_removal( + msa_test_data, raxmlng_command +): + """ + In this test case, we check if the deduplication and gap removal works as expected. For this, we use the test MSAs + that contain duplicate and/or full-gap sequences and compare the results to the respective data in msa_test_data + for the reduced MSA. + + If, for instance, the MSA `DNA/test.phy` contains duplicates/full-gap sequences, the respective reduced MSA + `DNA/test.reduced.phy` should be in `msa_test_data`, and running predict_difficulty with `DNA/test.phy` and + `deduplicate=True` and `remove_full_gaps=True` should yield the same difficulty as the difficulty for + `DNA/test.reduced.phy` in `msa_test_data`. + """ + + # Check if the Pythia version is identical, if not the expected difficulty parquet file might be outdated + # In this case, raise a warning + pythia_version_test_data = msa_test_data.pythia_version.unique()[0] + if pythia_version_test_data != __version__: + warnings.warn( + f"The Pythia version in the test data is {pythia_version_test_data}, but the current " + f"Pythia version is {__version__}. The expected difficulty parquet file might be outdated." + ) + + data_with_duplicates_or_full_gaps = msa_test_data.loc[ + lambda x: x.contains_duplicates | x.contains_full_gap_sequences + ] + + # Sanity check that we actually ran all tests + expected_n_test_cases = 5 + actual_n_test_cases = 0 + + for idx, row in data_with_duplicates_or_full_gaps.iterrows(): + # First, we check if the respective reduced alignment is also in `msa_test_data` + reduced_msa_file_name = f"{row.data_type}/{row.msa_file.stem}.reduced.phy" + reduced_row = msa_test_data.loc[ + msa_test_data.msa_file.astype(str).str.endswith(reduced_msa_file_name) + ] + if reduced_row.empty: + continue + elif reduced_row.shape[0] > 1: + raise ValueError( + f"Multiple rows found for reduced MSA {reduced_msa_file_name}" + ) + + with tempfile.TemporaryDirectory() as tmpdir: + prefix = pathlib.Path(tmpdir) / "test" + + predicted_difficulty = predict_difficulty( + msa_file=row.msa_file, + raxmlng=raxmlng_command, + deduplicate=True, + remove_full_gaps=True, + result_prefix=prefix, + store_results=True, + ) + + # 1. We expect a reduced MSA to be saved + reduced_msa_file = pathlib.Path(f"{prefix}.reduced.phy") + assert reduced_msa_file.exists() + + # 2. Check if the predicted difficulty matches the "ground-truth" in our test data + # Note that we check against the difficulty for the reduced MSA in the test data + assert predicted_difficulty == pytest.approx( + reduced_row.predicted_difficulty.mean(), abs=0.01 + ) + + # 3. Check if the computed features match the features of the reduced MSA in the test data + results_file = pathlib.Path(f"{prefix}.pythia.csv") + results = pd.read_csv(results_file) + results.drop( + columns=["difficulty", "msa_file"], inplace=True + ) # difficulty already with tolerance + pd.testing.assert_frame_equal( + results.reset_index(drop=True), + reduced_row[results.columns].reset_index(drop=True), + ) + actual_n_test_cases += 1 + + assert actual_n_test_cases == expected_n_test_cases, "Not all test cases were run." From db7bde191a42081cba0dc4868982c8111221cbbc Mon Sep 17 00:00:00 2001 From: Julia Date: Fri, 28 Feb 2025 15:57:16 +0100 Subject: [PATCH 21/36] more error handling + test coverage --- pypythia/prediction.py | 9 +++- pypythia/raxmlng.py | 20 ++++++++ tests/conftest.py | 5 ++ tests/data/DNA/3_taxa_msa.fasta | 30 ++++++++++++ tests/test_prediction.py | 82 +++++++++++++++++++++++++++++++++ tests/test_raxmlng.py | 38 ++++++++++++++- 6 files changed, 182 insertions(+), 2 deletions(-) create mode 100644 tests/data/DNA/3_taxa_msa.fasta diff --git a/pypythia/prediction.py b/pypythia/prediction.py index 6e6569a..02d129d 100644 --- a/pypythia/prediction.py +++ b/pypythia/prediction.py @@ -171,6 +171,9 @@ def predict_difficulty( Returns: np.float64: Predicted difficulty of the MSA. """ + if not msa_file.exists(): + raise PyPythiaException(f"The given MSA {msa_file} file does not exist.") + result_prefix = pathlib.Path(result_prefix) if result_prefix else msa_file pars_trees_file = pathlib.Path(f"{result_prefix}.pythia.trees") @@ -197,7 +200,10 @@ def predict_difficulty( "Path to the RAxML-NG executable is required if 'raxml-ng' is not in $PATH." ) - raxmlng = RAxMLNG(**{"exe_path": raxmlng} if raxmlng else {}) + try: + raxmlng = RAxMLNG(**{"exe_path": raxmlng} if raxmlng else {}) + except Exception as e: + raise PyPythiaException("Initializing RAxML-NG failed.") from e # Init the prediction model log_info and log_runtime_information(message=f"Loading predictor {model_file.name}") @@ -260,6 +266,7 @@ def predict_difficulty( "sequences containing only gaps leading to an MSA with less than 4 sequences. " "You can rerun the prediction and disable deduplication and gap removal to use the original MSA. " ) + raise PyPythiaException(error_msg) msa_features = collect_features( msa=msa, diff --git a/pypythia/raxmlng.py b/pypythia/raxmlng.py index d673755..e7864c9 100644 --- a/pypythia/raxmlng.py +++ b/pypythia/raxmlng.py @@ -82,9 +82,29 @@ class RAxMLNG: Attributes: exe_path (pathlib.Path): Path to the RAxML-NG executable. + + Raises: + FileNotFoundError: If the RAxML-NG executable is not found. + RuntimeError: If the RAxML-NG executable is not working or is not a RAxML-NG executable. + """ def __init__(self, exe_path: Optional[pathlib.Path] = DEFAULT_RAXMLNG_EXE): + if not exe_path.exists(): + raise FileNotFoundError("RAxML-NG executable not found.") + + try: + out = subprocess.check_output([str(exe_path.absolute())], encoding="utf-8") + except Exception as e: + raise RuntimeError( + f"Your RAxML-NG executable does not seem to work. Running `{exe_path}` failed: {e}" + ) from e + + if not "RAxML-NG" in out: + raise RuntimeError( + f"The given executable `{exe_path}` does not seem to be a RAxML-NG executable." + ) + self.exe_path = exe_path def _base_cmd( diff --git a/tests/conftest.py b/tests/conftest.py index 292ed2c..b283b0a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -53,6 +53,11 @@ def raxmlng(raxmlng_command): return RAxMLNG(raxmlng_command) +@pytest.fixture +def data_dir(): + return pathlib.Path.cwd() / "tests" / "data" + + @pytest.fixture def multiple_trees_path(): return pathlib.Path.cwd() / "tests" / "data" / "trees" / "many.trees" diff --git a/tests/data/DNA/3_taxa_msa.fasta b/tests/data/DNA/3_taxa_msa.fasta new file mode 100644 index 0000000..a81fbf9 --- /dev/null +++ b/tests/data/DNA/3_taxa_msa.fasta @@ -0,0 +1,30 @@ +>Taxon0 +TTTTCAAAGCAATGTGTGACAGGTACAGGGACAAATCCCGTTAATAAGTAAGAGGATTTG +TGCTTGGCTCTGTCACATGCCACTTTGAAAANNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNN--NNNNNNNNNNNNNNNNNNNNNNNNN----NNNNNNNN---- +--------NNNNNNNNNNNNNN---------NNN-N------NN-NNNNNNNNNNNNNNN +-NNNNNNNNNNNNNNNN--NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN---NNNNN +NNNNNNNNNNNN--NNN-----------N------------- +>Taxon1 +TTTTCAAAGCAATGTGTGACAGGTACAGGGACAAATCCCGTTAATAAGTAAGAGGATTTG +TGCTTGGCTCTGTCACATGCCACTTTGAAAANNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNN--NN-NNNNNNNNNNNNNNNNNNNNN---N-NNNNNN------ +----------NNNNNNNN----N--NNNNNNNNN-N------NNNNNNNNNNNNNNNNNN +-NNNNNNNNNNNNNNNNN--NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN---NNNNN +NNNNNNNNNNNNNNNNN------------------------- +>Taxon2 +TTTTCAAAGCAATGTGTGACAGGTACAGGGACAAATCCCGTTAATAAGTAAGAGGATTTG +TGCTTGGCTCTGTCACATGCCACTTTGAAAANNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNN--NNNNNNNNNNNNNNNNNNNNNNNN--NN-NNNNNN------ +----------NNNNNNNN----NNNNNN--NNNN-N------NNNNNNNNNNNNNNNNNN +-NNNNNNNNNNNNNNNNN--NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN---NNNNN +NNNNNNNNNNNNNNN--------------------------- diff --git a/tests/test_prediction.py b/tests/test_prediction.py index 6147554..bb34f9e 100644 --- a/tests/test_prediction.py +++ b/tests/test_prediction.py @@ -1,4 +1,5 @@ import pathlib +import re import tempfile import warnings @@ -6,6 +7,7 @@ import pytest from pypythia import __version__ +from pypythia.custom_errors import PyPythiaException from pypythia.msa import parse from pypythia.prediction import ( _handle_duplicates, @@ -211,3 +213,83 @@ def test_predict_difficulty_with_deduplication_and_gap_removal( actual_n_test_cases += 1 assert actual_n_test_cases == expected_n_test_cases, "Not all test cases were run." + + +def test_predict_difficulty_fewer_than_four_taxa(data_dir, raxmlng_command): + with pytest.raises( + PyPythiaException, match="The MSA contains less than 4 sequences." + ): + with tempfile.TemporaryDirectory() as tmpdir: + prefix = pathlib.Path(tmpdir) / "test" + predict_difficulty( + msa_file=data_dir / "DNA" / "3_taxa_msa.fasta", + raxmlng=raxmlng_command, + deduplicate=True, + remove_full_gaps=True, + result_prefix=prefix, + store_results=True, + ) + + +def test_predict_difficulty_with_deduplication_and_gap_removal_if_reduced_msa_has_fewer_than_four_taxa( + data_dir, raxmlng_command +): + # DNA/5.phy contains 10 taxa, but after deduplication and gap removal, only 3 taxa remain + # in this case, we expect the general error, but also the hint about the reduced MSA + with pytest.raises( + PyPythiaException, + match=re.compile( + r"The MSA contains less than 4 sequences.+reduced the input MSA", re.DOTALL + ), + ): + with tempfile.TemporaryDirectory() as tmpdir: + prefix = pathlib.Path(tmpdir) / "test" + predict_difficulty( + msa_file=data_dir / "DNA" / "5.phy", + raxmlng=raxmlng_command, + deduplicate=True, + remove_full_gaps=True, + result_prefix=prefix, + store_results=True, + ) + + +def test_predict_difficulty_msa_files_does_not_exist(raxmlng_command): + msa_file = pathlib.Path("this_does_not_exist.fasta") + with pytest.raises( + PyPythiaException, match=f"The given MSA {msa_file} file does not exist." + ): + predict_difficulty( + msa_file=msa_file, + raxmlng=raxmlng_command, + deduplicate=True, + remove_full_gaps=True, + result_prefix=None, + store_results=False, + ) + + +def test_predict_difficulty_raxmlng_executable_none(small_msa_file): + with pytest.raises( + PyPythiaException, match="Path to the RAxML-NG executable is required" + ): + predict_difficulty( + msa_file=small_msa_file, + raxmlng=None, + deduplicate=False, + remove_full_gaps=False, + result_prefix=None, + store_results=False, + ) + + +def test_predict_difficulty_raxmlng_init_fails(small_msa_file): + with pytest.raises(PyPythiaException, match="Initializing RAxML-NG failed"): + predict_difficulty( + msa_file=small_msa_file, + raxmlng=pathlib.Path("/path/to/non/existing/raxml-ng"), + deduplicate=False, + remove_full_gaps=False, + result_prefix=None, + store_results=False, + ) diff --git a/tests/test_raxmlng.py b/tests/test_raxmlng.py index 8454ea1..b42ff0e 100644 --- a/tests/test_raxmlng.py +++ b/tests/test_raxmlng.py @@ -1,3 +1,4 @@ +import os import pathlib import tempfile from tempfile import TemporaryDirectory @@ -5,7 +6,42 @@ import pytest from pypythia.custom_errors import RAxMLNGError -from pypythia.raxmlng import get_raxmlng_rfdist_results, run_raxmlng_command +from pypythia.raxmlng import RAxMLNG, get_raxmlng_rfdist_results, run_raxmlng_command + + +def test_raxmlng_init(raxmlng_command): + assert raxmlng_command.exists() + # Should work without any problems + RAxMLNG(raxmlng_command) + + +def test_raxmlng_init_fails_non_existing_exe(): + with pytest.raises(FileNotFoundError, match="RAxML-NG executable not found."): + RAxMLNG(pathlib.Path("this_does_not_exist")) + + +def test_raxmlng_init_fails_wrong_exe(raxmlng_command): + with pytest.raises( + RuntimeError, match="Your RAxML-NG executable does not seem to work." + ): + with tempfile.NamedTemporaryFile("wb") as tmpfile: + # Manually break the RAxML-NG file to trigger the executable-broken error + tmpfile.write(b"NonSense" + raxmlng_command.read_bytes()) + tmpfile.flush() + RAxMLNG(pathlib.Path(tmpfile.name)) + + +def test_raxmlng_init_fails_non_raxmlng_exe(): + with pytest.raises( + RuntimeError, + match="The given executable `.*` does not seem to be a RAxML-NG executable.", + ): + with tempfile.NamedTemporaryFile("w", suffix=".sh") as tmpfile: + tmpfile.write("#!/bin/bash\n") + tmpfile.write("echo test\n") + tmpfile.flush() + os.chmod(tmpfile.name, 0o777) + RAxMLNG(pathlib.Path(tmpfile.name)) def test_get_raxmlng_rfdist_results(raxmlng_rfdistance_log): From 8b1bd592d9a9e474bdf1a0787cc0950ebef8566e Mon Sep 17 00:00:00 2001 From: Julia Date: Fri, 28 Feb 2025 16:00:12 +0100 Subject: [PATCH 22/36] handle none prefix --- pypythia/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypythia/main.py b/pypythia/main.py index 6618abc..7045a1c 100644 --- a/pypythia/main.py +++ b/pypythia/main.py @@ -101,7 +101,7 @@ def main(): # Format all paths to pathlib.Path objects and set a default value if not provided msa_file = pathlib.Path(args.msa) - prefix = pathlib.Path(args.prefix) or msa_file + prefix = pathlib.Path(args.prefix) if args.prefix else msa_file store_results = not args.nofiles From 2e19061acda460f807f2c8708459ad940ccbd46d Mon Sep 17 00:00:00 2001 From: Julia Date: Fri, 28 Feb 2025 16:08:03 +0100 Subject: [PATCH 23/36] wip: fix gh actions --- .github/workflows/test-pythia.yml | 2 +- tests/test_raxmlng.py | 25 ++++++++++++++----------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/.github/workflows/test-pythia.yml b/.github/workflows/test-pythia.yml index 8dd683c..b2a6f62 100644 --- a/.github/workflows/test-pythia.yml +++ b/.github/workflows/test-pythia.yml @@ -30,7 +30,7 @@ jobs: cat tests/test_config.py - name: Run Pythia tests run: | - PYTHONPATH=. pytest + PYTHONPATH=. pytest -svx --color=yes Install-using-conda: runs-on: ${{ matrix.os }} diff --git a/tests/test_raxmlng.py b/tests/test_raxmlng.py index b42ff0e..24488b6 100644 --- a/tests/test_raxmlng.py +++ b/tests/test_raxmlng.py @@ -24,11 +24,13 @@ def test_raxmlng_init_fails_wrong_exe(raxmlng_command): with pytest.raises( RuntimeError, match="Your RAxML-NG executable does not seem to work." ): - with tempfile.NamedTemporaryFile("wb") as tmpfile: - # Manually break the RAxML-NG file to trigger the executable-broken error - tmpfile.write(b"NonSense" + raxmlng_command.read_bytes()) - tmpfile.flush() - RAxMLNG(pathlib.Path(tmpfile.name)) + tmpfile = tempfile.NamedTemporaryFile("wb", delete=False) + # Manually break the RAxML-NG file to trigger the executable-broken error + tmpfile.write(b"NonSense" + raxmlng_command.read_bytes()) + tmpfile.close() + os.chmod(tmpfile.name, 0o777) + RAxMLNG(pathlib.Path(tmpfile.name)) + os.unlink(tmpfile.name) def test_raxmlng_init_fails_non_raxmlng_exe(): @@ -36,12 +38,13 @@ def test_raxmlng_init_fails_non_raxmlng_exe(): RuntimeError, match="The given executable `.*` does not seem to be a RAxML-NG executable.", ): - with tempfile.NamedTemporaryFile("w", suffix=".sh") as tmpfile: - tmpfile.write("#!/bin/bash\n") - tmpfile.write("echo test\n") - tmpfile.flush() - os.chmod(tmpfile.name, 0o777) - RAxMLNG(pathlib.Path(tmpfile.name)) + tmpfile = tempfile.NamedTemporaryFile("w", delete=False) + tmpfile.write("#!/bin/bash\n") + tmpfile.write("echo test\n") + tmpfile.close() + os.chmod(tmpfile.name, 0o777) + RAxMLNG(pathlib.Path(tmpfile.name)) + os.unlink(tmpfile.name) def test_get_raxmlng_rfdist_results(raxmlng_rfdistance_log): From f25caf3cf36970e1d1d8be28c0e56decfac5cd7d Mon Sep 17 00:00:00 2001 From: Julia Date: Mon, 3 Mar 2025 08:56:56 +0100 Subject: [PATCH 24/36] don't extract unused abs rfdist --- pypythia/prediction.py | 2 +- pypythia/raxmlng.py | 21 +++++++-------------- tests/test_raxmlng.py | 10 ++-------- 3 files changed, 10 insertions(+), 23 deletions(-) diff --git a/pypythia/prediction.py b/pypythia/prediction.py index 02d129d..db4c204 100644 --- a/pypythia/prediction.py +++ b/pypythia/prediction.py @@ -104,7 +104,7 @@ def collect_features( log_info and log_runtime_information( "Computing the RF-Distance for the parsimony trees." ) - num_topos, rel_rfdist, _ = raxmlng.get_rfdistance_results(trees, redo=None) + num_topos, rel_rfdist = raxmlng.get_rfdistance_results(trees, redo=None) features = { "num_taxa": msa.n_taxa, diff --git a/pypythia/raxmlng.py b/pypythia/raxmlng.py index e7864c9..9c364cd 100644 --- a/pypythia/raxmlng.py +++ b/pypythia/raxmlng.py @@ -36,30 +36,24 @@ def _get_value_from_line(line: str, search_string: str) -> float: ) -def get_raxmlng_rfdist_results(log_file: pathlib.Path) -> tuple[int, float, float]: +def get_raxmlng_rfdist_results(log_file: pathlib.Path) -> tuple[int, float]: """ - Method to parse the RAxML-NG log file and extract the number of unique topologies, relative RF-Distance, and absolute RF-Distance. + Method to parse the RAxML-NG log file and extract the number of unique topologies and relative RF-Distance. Args: log_file (pathlib.Path): Filepath pointing to the RAxML-NG log file. Returns: num_topos (float): Number of unique topologies of the given set of trees. rel_rfdist (float): Relative RF-Distance of the given set of trees. Computed as average over all pairwise RF-Distances. Value between 0.0 and 1.0. - abs_rfdist (float): Absolute RF-Distance of the given set of trees. """ - abs_rfdist = None rel_rfdist = None num_topos = None for line in log_file.open().readlines(): line = line.strip() - if "Average absolute RF distance in this tree set:" in line: - abs_rfdist = _get_value_from_line( - line, "Average absolute RF distance in this tree set:" - ) - elif "Average relative RF distance in this tree set:" in line: + if "Average relative RF distance in this tree set:" in line: rel_rfdist = _get_value_from_line( line, "Average relative RF distance in this tree set:" ) @@ -68,10 +62,10 @@ def get_raxmlng_rfdist_results(log_file: pathlib.Path) -> tuple[int, float, floa line, "Number of unique topologies in this tree set:" ) - if abs_rfdist is None or rel_rfdist is None or num_topos is None: + if rel_rfdist is None or num_topos is None: raise ValueError("Error parsing raxml-ng log.") - return int(num_topos), rel_rfdist, abs_rfdist + return int(num_topos), rel_rfdist class RAxMLNG: @@ -179,8 +173,8 @@ def infer_parsimony_trees( def get_rfdistance_results( self, trees_file: pathlib.Path, prefix: pathlib.Path = None, **kwargs - ) -> tuple[float, float, float]: - """Method that computes the number of unique topologies, relative RF-Distance, and absolute RF-Distance for the given set of trees. + ) -> tuple[float, float]: + """Method that computes the number of unique topologies and the relative RF-Distance for the given set of trees. Args: trees_file (pathlib.Path): Filepath pointing to the file containing the trees. @@ -193,7 +187,6 @@ def get_rfdistance_results( Returns: num_topos (float): Number of unique topologies of the given set of trees. rel_rfdist (float): Relative RF-Distance of the given set of trees. Computed as average over all pairwise RF-Distances. Value between 0.0 and 1.0. - abs_rfdist (float): Absolute RF-Distance of the given set of trees. """ with TemporaryDirectory() as tmpdir: tmpdir = pathlib.Path(tmpdir) diff --git a/tests/test_raxmlng.py b/tests/test_raxmlng.py index 24488b6..42acf3f 100644 --- a/tests/test_raxmlng.py +++ b/tests/test_raxmlng.py @@ -48,13 +48,10 @@ def test_raxmlng_init_fails_non_raxmlng_exe(): def test_get_raxmlng_rfdist_results(raxmlng_rfdistance_log): - num_topos, rel_rfdist, abs_rfdist = get_raxmlng_rfdist_results( - raxmlng_rfdistance_log - ) + num_topos, rel_rfdist = get_raxmlng_rfdist_results(raxmlng_rfdistance_log) assert num_topos == 2 assert rel_rfdist == pytest.approx(1 / 3, abs=0.01) - assert abs_rfdist == pytest.approx(2) def test_get_raxmlng_rfdist_results_raises_value_error(raxmlng_inference_log): @@ -78,12 +75,9 @@ def test_infer_parsimony_trees(raxmlng, phylip_msa_file): def test_get_rfdistance_results(raxmlng, multiple_trees_path): - num_topos, rel_rfdist, abs_rfdist = raxmlng.get_rfdistance_results( - multiple_trees_path - ) + num_topos, rel_rfdist = raxmlng.get_rfdistance_results(multiple_trees_path) assert num_topos == 6 assert rel_rfdist == pytest.approx(0.114, abs=0.01) - assert abs_rfdist == pytest.approx(22.269, abs=0.1) def test_run_raxmlng_command(raxmlng_command, phylip_msa_file): From d41e15e7e908aa92cb68873dd3bb71e153f28b9c Mon Sep 17 00:00:00 2001 From: Julia Date: Mon, 3 Mar 2025 10:13:26 +0100 Subject: [PATCH 25/36] separate test data for macOS and linux (RAxML-NG yields slightly different results) --- tests/conftest.py | 11 ++++++++++- tests/data/msa_test_data_linux.parquet | Bin 0 -> 16454 bytes ...data.parquet => msa_test_data_macOS.parquet} | Bin 3 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 tests/data/msa_test_data_linux.parquet rename tests/data/{msa_test_data.parquet => msa_test_data_macOS.parquet} (100%) diff --git a/tests/conftest.py b/tests/conftest.py index b283b0a..d81830d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,5 @@ import pathlib +import platform import pandas as pd import pytest @@ -8,7 +9,15 @@ from .test_config import RAXMLNG_COMMAND -df = pd.read_parquet("tests/data/msa_test_data.parquet") +# RAxML-NG results (RF-Distance, number of unique topologies) and consequently the predicted difficulties +# are slightly different on MacOS versus on Linux (due to the RNG in RAxML-NG). Therefore, we need to +# load different test data depending on the platform. Note that the test data are identical except for the +# RF-Distance, number of unique topologies, and predicted difficulties. +if platform.system() == "Darwin": + df = pd.read_parquet("tests/data/msa_test_data_macOS.parquet") +else: + df = pd.read_parquet("tests/data/msa_test_data_linux.parquet") + df["msa_file"] = df["msa_file"].apply( lambda x: pathlib.Path.cwd() / "tests" / "data" / x ) diff --git a/tests/data/msa_test_data_linux.parquet b/tests/data/msa_test_data_linux.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e182bd5df1c15434cdd1ed527ceaddc90176bfc5 GIT binary patch literal 16454 zcmcg!3w#q*)}OQ~O`(MXO-L2BR*Y3jo1`sE0QY8+(zLZr+eq_bwdB<%W%8hT^uhXx z!iS(Nu8&<6AIqocw<lekxP?U)1w>sI5!qc`0bS%#U$`pne&^oFq?1fi>TfIa z%S`6J&iS8n&pG$rlR_(PQc1v@uQpK;!dpz!_%i|XLi>Fb_ifI%zxuBJ*HcBZ<1rMdN zBHjVxkW$hLsk$6+7D~rS$4#G2l}+Vf^18_-fKrJE0Iq4)EO?fQa&k*n)3>egi(G;gf-gXxrWP8Cnpio+xrL?rg|S4Kzc6*USfv4B0aj&xyoUsP8on@A zVJ$T9u&Niv?%%q$VCfzX>pC9R{J2UbloMn9U%)sYZVTKi0P9+DURePMa)mgrklKbd z9|Em@N_bP$zWBEg>Dst5leH?W9SP}%Ea?V_IWx%Q|6EkUls)@DFR$8xr~=W@&8n}T zn!LFCFk18C7RS#f>_fKSsNVl~+ZOa*`Ss>Qw=F~Sr@eN3d-!>@@-OyJhpyj-)<1x+ zm(V+3-gZ_kc>~>gWI;9KeHiWh?_a!jy!jzCYm)P?_bY#kT63fDIfPztDQ9Xuk&xx(g`{@e^L{5q{atVh>&dtqoJUPNz5Lp;@Eq3@2O88=lvH1ym_^rx@(UwP?W2hfb!Z@nZd{SbXB z?SRj_SdtW%U}tp*B}*c#O`1IFa=quaCzs_f-b%!HE=#%vCQm`cxL-7+x4Lhqr-czEqYbHAMFTipJsl2Kazd)KJ~!qJ!vMALy|1M zuoeIbC$cp>sg7McASfIkcC?}3=?^}d|MrhtwL4n}A6&k5iZ&n3n;Ck;sogAQiqHOY zk@k${n}6vl#%b?;X7$>tVvjbuZt)eDRCj8Po6f$n=iUdkx$phoob^vWq5bz81-|d@ ze?>d6_Rjq0x4o&|xMGpxrF-tuUi-`bCyci|s6D0HwsPmK9<6X*xe;;+#|vAiuoeUx zDSRYJHU8Y0eXq5c23J`r3=~x?&~}PM<;Qi}8)pS}oO|Xhvge4X{Jh2a%d53j@S2@h=%v0ulQ%uK@7tm) zv~#xoqd7P*RU5!WXHUlSml9z(vT_L<&E7>=Qeo|R3=@Q1#TOPt$uDN1e~lhFwC3SM zYm{0_B$}~Yt$pR#BKQ>M71mPKTKOsmd}eDaLDoB-xjJu7esQC=N+c40{*$~i9$0dtzQ5pD%>f9HBJWZ3+AWXYGu?Uu>34j7RypG& z+DFlMefgU+s5D13M4v!EkKFvh^|lk}*7Y65zyA6ddf~kdn+jbwqa6q8mbUrcL1-u& zKF^}3n|I&ucz7?$9e?M+?WG40YY`u%iK-j}xrC#IXHQrQLS^*6LTOQxG>gPk$#kL* zcz~hA_h0O$Hxaj};Wx~R>pxrzHQe4zRE8HHpu^G6E5RE>b%nA*naW@`RhB~<UG)AzJ=c2IdAYD*%4%PEt~c6ws+8v zKe=dD?!cSqs%fspm;CW9q`$1(rQduMy}b9iwu8Ylh`IZ+PiG$d6J88*vWL(!Gdovq zzU?3AKv%~Bv}a|Y~oBNKXiTxa-gpSA}F02IrBU#>Zk@UtUID_DAtCj)>le~b&y z>4HM>5X!ks+4}J@$ZqjpzMAbVJ{U4}W*+7IgFY zmn@IcYfyf4N7m|7ThO26-@NcGy#k5nYdrrL|1_$)w{z9p#UG-lmd<@+Vbv#S`|QvD zc2aRCdif{2FV>6Kp(737?QMJH6V$Q4FWdI|$Eff(_Z^-4>?dUH#xm5sXq@~=@pfdo z@2hw2*!32=sdmM#O_%*0t$5NJTlMg9bb0aY#n(+I&XW@QI7-R$BCG`_!g=A&a|u5= z&pCx!K1nJrJ}0lC0>-sQcc+ZNaX}v(a{;~@jD{? z`aHKI;^>Izb44A1u==LyNe+r0Cbp-U5-#k*t0b13Ak_%F$NZh`a2?yJ_lRO!^NKR2AE){#^zaRwR2>2NJ!J4yL$D~BOs z{!>`Mq5g>1=je!edcq(Kr%}wK$Pw%6=;?I(!V!=jn*G5* z9RKA!L8Ad5D6=C1`Ut1?1BH0pK3Bx!?r{4$JAJMw69E!u3n9r2_BB`YhQUVLtv zf|@SPFRr1+6&4TMiZn}BCfhhpb~;;eBugO~MN*=;9K4p2X5r9Y2wxP%C(O4uLVHjo z+bvS;6)VnWW8xVy42(}vz#T9 zp^s+E-pEosGC^@RZ~FI0C!`meV=?T&9FId;in9fZo%z5+c;Ls&yReoTnMYDk;Mg3< zQoLTMcxV!^5uVmD8)0n*Hsj;kN)0rG)5qDeli7;riWEbWfs^o%j5!Hw&%-Hg@FMK) z&X%pqR_vUnI3fjh;7@Rk;(VK4f1F@kKqMUgnJrr>R_wS~aY7Cpgoi~ekg)bV0!@mC zY%a)hle8&Y@x`Tz)6;>I@brc`32V>8DG|7NEH;!X)|UW_Q6I2IR#*DPfK%RK5a~+A zt}B3_@IZq_64staq%;c*$L#HF#jdLr8?FLo#1uwz)EHT~42u@0&5hZLp>oA%*8oG| zh965Otj!=D7a;Qa3n5{%MI;*(DRxyTep$iE1;TOG&ZY{9v`MuY*l>$j%Hm2GtrE%B zi4?1qii4G8ZAdx_jI5@#3o_WKLU2bJuDVgSerrA_yWh1A(0)QS4076`M8iH;C6)Nr;C@B`T}lGUxzEmME!wq>D-b<%Sx9(9lVwH zI09Xs((;nhfWz;BiU90Kp#(qL^hBVjB=5lxysFuE4}MqObOSW71Um?IPn>QtnNFXF zad#xwvAD=K8Yq>*Ud;s?X96MW5A`P+g3fM_D}w9&o`?f)y`lTEQrII0-M&B<9^1aK zZq}UAn|NT7QIvAKdrTz>OH#{!ga|9_rg8ulmlF6S4i7@MDUD;xIdMD@@=aDWa!(wjhL2c63>iA5AaMqR3?2)cL7o|MwLBvfOnHL7@H969nDm{;3>3Gg z6(Zobq_Fb^BD1T7N=Q_XqyotdoA|6P_^0r)N72v#FGX>{=9}UW|^{;7P3y$VT>Vg$q^xMe#;m$#csw~ zbt2OuM3AxD4*-X__Ia=f`}_bvlJt@YILRx&KrxAaV+@E}`EX`SI6tYaDa;cQ7vS<% zqeS(Hks&F6CS;wQU&HMw%)j5BOt2iLOVSBZWa^cIg%=HzRl#c*Q5nNo(e!CSAu?WM z%{*kxVG6U7>5!%GHO63u^PoZq$!#f&xy@4YOmVfT(?re?Jsw9S*h9AVjL+HQ=;_Bh z`%u55rzhBp*~insxK^csU$0c9u-)b?me;7RP*+ry-bBgsX_KkNGM7#C%imX3;%hd2 zJx$YXaO3Mbny#jGU348ychUMTQm4};wfEx61&MZjn~^QUlTab;4K&?L!#McHL~7^M z!V}6g-BR5QZ$E1AGf}(2Ty0ewW6pYW#2IMzwfJ;SPk&>N!&0-Pp*z^s5NPx|{cfhg z*W11m&<%H(wg+Mtf^jhACCeZ%aA+6J!7u;n_9^?FPr&Wq86 zoPHDIF_;H1A4^r(=k(V_?AOCsLygL5=nXgM8~qN8Ic%@fL~RxogZnuH=CD%_bOJiS z1-w-nrN^oxZ4Ci))L}8k?1pw9#sYmKu6m}^UC;Cqy86bL%^zZHRc&m$#jLbhOkTHP z0hS5$VUF6=nk686fXL36+f6=YbwfavjD1*730w_+)qu^c0la;2UD-o+OWzW!n$sEJ zu^PQrwLRpF<8vA6K>u|E)+$qt3+h0JKDVVNgfReKEI;U3)$1|yWb|u%4!^nEttagf zyVc|c8cJJ2Z+)63J(gvduiejt?SRc$)fll^S=phEaT-jW37r#N_c4Gk=JKn&ar(VB zumhXb$k(CD>1Vo~Y7=J@DyLOf!O)8h64{WbaX}30K#BK9yVsZJ? z*k)tQsb*z%Ci@nN7oFh$5Le2)tIh-1^)u+OR1 zgyS(yV81*WN9A8Q&VcR0+%F({OJloOSDi37)!?VtuhU_Id23S}S=&mZTPEtRwS6C- zB0ODcCNZCxaPj-3vsJJUrLj96GlemgkNbP*OXqdhgKhe22H1H>6EBHgQ~ITg`>>7; z>ju`2lz5bce>8R((Dgg3bTL=J6mqI-h)rg+!_0lt&9UEU(0E6}YWMfWAVza`Fe-hz z>KkJ&gQnkYU}8?Zt_JL0m!I*1J!P`TUYB3f3+sW>VbMgbYS5p*uHRv)i;fWYe-P|; zuq}+m!}>*<7-f#Sv3&JQGRCA7ou%;|d)NtaEn&l>vjNUNJI{*|Y%*@kseLb)4M5Ba zI}MFqn>w6n&LQUYg>C-2ZU-AvFBmp9J|=WD(4+<*g*}bU>h+G+_XT6p#(|W17RIU+ z8y?l(gday?y&PSvkH=I&|8(*72eDs2(Qz-V{}BHhqpp6~8^9XGkM$YX1K3Zx4d#A0 z55(~sLr`Yp^o7%VKiEgaX{ZU{{eaz)F_xw6mrl<~>wwTUzV|u?XA7mnz<8}ycE%Mj z!dg+C$@h9aR#On-Yp2_9?uR*uIei)T0YAk4+UtfrsmpI);;*W= zP7V9re!Jfkf-_zx=o90H{clYT@!1P!2gz$y6`b*`bt*W|D9IYF(twRO4md0-uvK=A zNMqOEn?^eO`9b^?_78T8DP*fMF>ua>v!@-_K%>f8Z({ZA7w(H-y=UyOw+Eku^Q9?d zhd#C{bJ%^M*GNADXIH>dZ|)w!KSOXv$1B(f2~RHjIacahj)|q-~UJZU@RR1jZ}z6DNMyq>rwB zCZv*}pwoFtr3*hvzK0ZXUWul`{~->Tm$)quQ=x2Xja61w*Z!!oat@Rh)B<(jPm)gL z6&Gn8PAciZhs6YS4%n!)6?TuOpmt* z9oes;zkX?gKD}_xDDY;Ec?3?aKS6InEl018l^@uV{2KX&M>X6!VGIW~8CwS`E2|RY ziDWRT_!^cMPc*GdU8hK=rL9$!l_XsU={~rYVtTmT)=W^6Nn zcCfD=FanhUpDBNUnh0Cy=u$bnA`5RAM|`v@5PJ9oLmkP4$E#xoI&XB1pnG#3jb7nKp+7Ana^gpneI=}qnA{| zJ5*)TFDp+ny@1WC8^ClRUkv)P;cxcp(sd>1vGE{PKO~Bz&KDWn*d5mo>-ltJcif&9 zu=Y)q!nf}w7y*BO3FX>mAcEzgaY!U(PCgb79>ljFGT*%YHEzbP1US3E`>=fRi_oJR z;`F(A7sjo;`FG-SQceu#j_Wj7XRr@8_qi=N-)jc_VRiOjQWF1Rq=fy+1pbm8{&mBD{-F#iifV=bLE!%Zx?0}2 literal 0 HcmV?d00001 diff --git a/tests/data/msa_test_data.parquet b/tests/data/msa_test_data_macOS.parquet similarity index 100% rename from tests/data/msa_test_data.parquet rename to tests/data/msa_test_data_macOS.parquet From 670c8f10c259fc2940472a794d2d11bb3043ace6 Mon Sep 17 00:00:00 2001 From: Julia Date: Mon, 3 Mar 2025 10:33:08 +0100 Subject: [PATCH 26/36] rename parse function --- pypythia/msa.py | 5 ++--- pypythia/prediction.py | 9 +++++++-- tests/test_msa.py | 40 ++++++++++++++++++++-------------------- tests/test_prediction.py | 14 +++++++------- 4 files changed, 36 insertions(+), 32 deletions(-) diff --git a/pypythia/msa.py b/pypythia/msa.py index 26c995c..892bbed 100644 --- a/pypythia/msa.py +++ b/pypythia/msa.py @@ -355,15 +355,14 @@ def write( SeqIO.write(_biopython_sequences, output_file, file_format.value) -def parse( +def parse_msa( msa_file: pathlib.Path, file_format: Optional[FileFormat] = None, data_type: Optional[DataType] = None, ) -> MSA: """Parse a multiple sequence alignment file. Note that the file needs to be in FASTA or PHYLIP format. - Note that per default, the file format and data type are inferred from the file content. - + Per default, the file format and data type are inferred from the file content. If the file format cannot be determined, a PyPythiaException is raised. In this case, make sure the file is in proper FASTA or PHYLIP format. If you are absolutely sure it is, you can provide the file format manually. diff --git a/pypythia/prediction.py b/pypythia/prediction.py index db4c204..3508fc3 100644 --- a/pypythia/prediction.py +++ b/pypythia/prediction.py @@ -11,7 +11,12 @@ from pypythia.custom_errors import PyPythiaException from pypythia.custom_types import DataType, FileFormat from pypythia.logger import log_runtime_information, logger -from pypythia.msa import MSA, deduplicate_sequences, parse, remove_full_gap_sequences +from pypythia.msa import ( + MSA, + deduplicate_sequences, + parse_msa, + remove_full_gap_sequences, +) from pypythia.predictor import DifficultyPredictor from pypythia.raxmlng import RAxMLNG @@ -213,7 +218,7 @@ def predict_difficulty( # Load the MSA log_info and log_runtime_information(message="Loading MSA") - msa = parse(msa_file, file_format=file_format, data_type=data_type) + msa = parse_msa(msa_file, file_format=file_format, data_type=data_type) # Deduplicate the MSA if necessary reduced_msa = _handle_duplicates(msa, deduplicate) diff --git a/tests/test_msa.py b/tests/test_msa.py index 4d29365..63254aa 100644 --- a/tests/test_msa.py +++ b/tests/test_msa.py @@ -11,14 +11,14 @@ _get_file_format, _guess_dtype, deduplicate_sequences, - parse, + parse_msa, remove_full_gap_sequences, ) def test_parse(msa_test_data_row): msa_file = msa_test_data_row.msa_file - msa = parse(msa_file) + msa = parse_msa(msa_file) assert msa.n_taxa == msa_test_data_row.num_taxa assert msa.n_sites == msa_test_data_row.num_sites assert msa.data_type.value == msa_test_data_row.data_type @@ -26,7 +26,7 @@ def test_parse(msa_test_data_row): def test_parse_large_phylip(phylip_msa_file): - msa = parse(phylip_msa_file) + msa = parse_msa(phylip_msa_file) assert msa.n_taxa == 68 assert msa.n_sites == 766 assert msa.data_type == DataType.DNA @@ -34,7 +34,7 @@ def test_parse_large_phylip(phylip_msa_file): def test_parse_small_fasta(small_msa_file): - msa = parse(small_msa_file) + msa = parse_msa(small_msa_file) assert msa.n_taxa == 10 assert msa.n_sites == 522 assert msa.data_type == DataType.DNA @@ -100,12 +100,12 @@ def test_msa_str_and_repr(): def test_contains_duplicate_sequences(msa_test_data_row): - msa = parse(msa_test_data_row.msa_file) + msa = parse_msa(msa_test_data_row.msa_file) assert msa.contains_duplicate_sequences() == msa_test_data_row.contains_duplicates def test_contains_full_gap_sequences(msa_test_data_row): - msa = parse(msa_test_data_row.msa_file) + msa = parse_msa(msa_test_data_row.msa_file) assert ( msa.contains_full_gap_sequences() == msa_test_data_row.contains_full_gap_sequences @@ -128,7 +128,7 @@ def test_get_msa_file_format_raises_value_error(raxmlng_inference_log): def test_guess_dtype(msa_test_data_row): - msa = parse(msa_test_data_row.msa_file) + msa = parse_msa(msa_test_data_row.msa_file) assert _guess_dtype(msa.sequences).value == msa_test_data_row.data_type @@ -153,7 +153,7 @@ def test_guess_dtype_fails(): def test_get_raxmlng_model(msa_test_data_row): - msa = parse(msa_test_data_row.msa_file) + msa = parse_msa(msa_test_data_row.msa_file) assert msa.get_raxmlng_model() == msa_test_data_row.raxmlng_model @@ -174,7 +174,7 @@ def test_get_raxmlng_model_fails_for_invalid_dtype(): def test_write(phylip_msa_file): - msa = parse(phylip_msa_file) + msa = parse_msa(phylip_msa_file) with tempfile.NamedTemporaryFile() as tmpfile: tmpfile = pathlib.Path(tmpfile.name) msa.write(tmpfile, file_format=FileFormat.PHYLIP) @@ -183,7 +183,7 @@ def test_write(phylip_msa_file): assert _get_file_format(tmpfile) == FileFormat.PHYLIP # Number of taxa and sites is identical - msa_reread = parse(tmpfile) + msa_reread = parse_msa(tmpfile) assert msa_reread.n_taxa == msa.n_taxa assert msa_reread.n_sites == msa.n_sites @@ -199,40 +199,40 @@ def test_write(phylip_msa_file): class TestMSAFeatures: def test_n_taxa(self, msa_test_data_row): - msa = parse(msa_test_data_row.msa_file) + msa = parse_msa(msa_test_data_row.msa_file) assert msa.n_taxa == msa_test_data_row.num_taxa def test_n_sites(self, msa_test_data_row): - msa = parse(msa_test_data_row.msa_file) + msa = parse_msa(msa_test_data_row.msa_file) assert msa.n_sites == msa_test_data_row.num_sites def test_n_patterns(self, msa_test_data_row): - msa = parse(msa_test_data_row.msa_file) + msa = parse_msa(msa_test_data_row.msa_file) assert msa.n_patterns == msa_test_data_row.num_patterns def test_percentage_gaps(self, msa_test_data_row): - msa = parse(msa_test_data_row.msa_file) + msa = parse_msa(msa_test_data_row.msa_file) assert msa.proportion_gaps == msa_test_data_row.proportion_gaps def test_percentage_invariant(self, msa_test_data_row): - msa = parse(msa_test_data_row.msa_file) + msa = parse_msa(msa_test_data_row.msa_file) assert msa.proportion_invariant == msa_test_data_row.proportion_invariant def test_entropy(self, msa_test_data_row): - msa = parse(msa_test_data_row.msa_file) + msa = parse_msa(msa_test_data_row.msa_file) assert msa.entropy() == msa_test_data_row.entropy def test_pattern_entropy(self, msa_test_data_row): - msa = parse(msa_test_data_row.msa_file) + msa = parse_msa(msa_test_data_row.msa_file) assert msa.pattern_entropy() == msa_test_data_row.pattern_entropy def test_bollback_multinomial(self, msa_test_data_row): - msa = parse(msa_test_data_row.msa_file) + msa = parse_msa(msa_test_data_row.msa_file) assert msa.bollback_multinomial() == msa_test_data_row.bollback def test_remove_full_gap_sequences(msa_test_data_row): - msa = parse(msa_test_data_row.msa_file) + msa = parse_msa(msa_test_data_row.msa_file) if msa_test_data_row.contains_full_gap_sequences: # If the MSA contains full-gap sequences: expect these sequences to be removed @@ -250,7 +250,7 @@ def test_remove_full_gap_sequences(msa_test_data_row): def test_deduplicate_sequences(msa_test_data_row): - msa = parse(msa_test_data_row.msa_file) + msa = parse_msa(msa_test_data_row.msa_file) if msa_test_data_row.contains_duplicates: # If the MSA contains duplicate sequences: expect these sequences to be removed diff --git a/tests/test_prediction.py b/tests/test_prediction.py index bb34f9e..2fc5dfe 100644 --- a/tests/test_prediction.py +++ b/tests/test_prediction.py @@ -8,7 +8,7 @@ from pypythia import __version__ from pypythia.custom_errors import PyPythiaException -from pypythia.msa import parse +from pypythia.msa import parse_msa from pypythia.prediction import ( _handle_duplicates, _handle_full_gap_sequences, @@ -18,7 +18,7 @@ def test_handle_duplicates(msa_test_data_row): - msa = parse(msa_test_data_row.msa_file) + msa = parse_msa(msa_test_data_row.msa_file) reduced_msa = _handle_duplicates(msa, deduplicate=True, log_info=False) if msa_test_data_row.contains_duplicates: @@ -29,14 +29,14 @@ def test_handle_duplicates(msa_test_data_row): def test_handle_duplicates_dont_deduplicate(msa_test_data_row): - msa = parse(msa_test_data_row.msa_file) + msa = parse_msa(msa_test_data_row.msa_file) reduced_msa = _handle_duplicates(msa, deduplicate=False, log_info=False) assert reduced_msa == msa def test_handle_full_gap_sequences(msa_test_data_row): - msa = parse(msa_test_data_row.msa_file) + msa = parse_msa(msa_test_data_row.msa_file) reduced_msa = _handle_full_gap_sequences(msa, remove_full_gaps=True, log_info=False) if msa_test_data_row.contains_full_gap_sequences: @@ -47,7 +47,7 @@ def test_handle_full_gap_sequences(msa_test_data_row): def test_handle_full_gap_sequences_dont_remove_full_gaps(msa_test_data_row): - msa = parse(msa_test_data_row.msa_file) + msa = parse_msa(msa_test_data_row.msa_file) reduced_msa = _handle_full_gap_sequences( msa, remove_full_gaps=False, log_info=False ) @@ -56,7 +56,7 @@ def test_handle_full_gap_sequences_dont_remove_full_gaps(msa_test_data_row): def test_collect_features(msa_test_data_row, raxmlng): - msa = parse(msa_test_data_row.msa_file) + msa = parse_msa(msa_test_data_row.msa_file) features = collect_features( msa=msa, msa_file=msa_test_data_row.msa_file, raxmlng=raxmlng ) @@ -71,7 +71,7 @@ def test_collect_features(msa_test_data_row, raxmlng): def test_collect_features_stores_trees(phylip_msa_file, raxmlng): - msa = parse(phylip_msa_file) + msa = parse_msa(phylip_msa_file) with tempfile.NamedTemporaryFile("w") as pars_trees_file: pars_trees_file = pathlib.Path(pars_trees_file.name) collect_features( From ced7e59f196d7fca3258c943b2e0342f64385049 Mon Sep 17 00:00:00 2001 From: Julia Date: Tue, 4 Mar 2025 10:32:39 +0100 Subject: [PATCH 27/36] update gitignore --- .gitignore | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.gitignore b/.gitignore index 5817cad..6dfc535 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,21 @@ **/*.shap.pdf **/*.pythia.trees +**/*.iqtree.ckp.gz +**/*.iqtree.iqtree +**/*.iqtree.trees +**/*.iqtree.treefile + +**/*.raxml.bestModel +**/*.raxml.bestTree +**/*.raxml.bestTreeCollapsed +**/*.raxml.mlTrees +**/*.raxml.plausibleTrees +**/*.raxml.rba +**/*.raxml.startTree +**/*.rfDistances + + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] From 287a23890f3a262514cdaad5c4c8becd1e20abae Mon Sep 17 00:00:00 2001 From: Julia Date: Tue, 4 Mar 2025 10:38:48 +0100 Subject: [PATCH 28/36] fix doc references --- docs/api/custom_errors.md | 2 ++ docs/api/msa.md | 2 +- docs/api/prediction.md | 4 ++-- docs/api/raxmlng.md | 6 ++++++ generate_api_docs.py | 1 + mkdocs.yml | 5 +++-- pypythia/prediction.py | 4 ++-- 7 files changed, 17 insertions(+), 7 deletions(-) diff --git a/docs/api/custom_errors.md b/docs/api/custom_errors.md index ee1ee83..507ca20 100644 --- a/docs/api/custom_errors.md +++ b/docs/api/custom_errors.md @@ -5,6 +5,7 @@ show_root_heading: true merge_init_into_class: false group_by_category: true + modernize_annotations: true ::: pypythia.custom_errors.RAxMLNGError @@ -12,3 +13,4 @@ show_root_heading: true merge_init_into_class: false group_by_category: true + modernize_annotations: true diff --git a/docs/api/msa.md b/docs/api/msa.md index 4e95b8d..d1de69e 100644 --- a/docs/api/msa.md +++ b/docs/api/msa.md @@ -7,7 +7,7 @@ group_by_category: true modernize_annotations: true -::: pypythia.msa.parse +::: pypythia.msa.parse_msa options: show_root_heading: true diff --git a/docs/api/prediction.md b/docs/api/prediction.md index 4545e8d..704d4d7 100644 --- a/docs/api/prediction.md +++ b/docs/api/prediction.md @@ -1,11 +1,11 @@ -::: pypythia.prediction.predict_difficulty +::: pypythia.prediction.collect_features options: show_root_heading: true modernize_annotations: true -::: pypythia.prediction.collect_features +::: pypythia.prediction.predict_difficulty options: show_root_heading: true diff --git a/docs/api/raxmlng.md b/docs/api/raxmlng.md index 78eebba..8bd29ad 100644 --- a/docs/api/raxmlng.md +++ b/docs/api/raxmlng.md @@ -12,3 +12,9 @@ options: show_root_heading: true modernize_annotations: true + +::: pypythia.raxmlng.get_raxmlng_rfdist_results + + options: + show_root_heading: true + modernize_annotations: true diff --git a/generate_api_docs.py b/generate_api_docs.py index 66c30d6..02e160b 100644 --- a/generate_api_docs.py +++ b/generate_api_docs.py @@ -15,6 +15,7 @@ "prediction": None, "predictor": None, "custom_types": None, + "custom_errors": None, "config": None, } diff --git a/mkdocs.yml b/mkdocs.yml index a2c5458..f9fc2d8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -19,16 +19,17 @@ nav: - prediction: api/prediction.md - predictor: api/predictor.md - custom_types: api/custom_types.md + - custom_errors: api/custom_errors.md - config: api/config.md plugins: - search - mkdocstrings - mike: canonical_version: latest +repo_name: tschuelia/PyPythia +repo_url: https://github.com/tschuelia/PyPythia site_name: PyPythia site_url: https://tschuelia.github.io/PyPythia/ -repo_url: https://github.com/tschuelia/PyPythia -repo_name: tschuelia/PyPythia theme: name: material palette: diff --git a/pypythia/prediction.py b/pypythia/prediction.py index 3508fc3..091604d 100644 --- a/pypythia/prediction.py +++ b/pypythia/prediction.py @@ -157,9 +157,9 @@ def predict_difficulty( RAxML-NG auto parallelization scheme. seed (int, optional): Random seed to use for the parsimony tree inference. Defaults to 0. file_format (FileFormat, optional): File format of the MSA file. Defaults to None. In this case, the file format - is inferred based on the file content. See `pypythia.msa.parse` for information on when this is required. + is inferred based on the file content. See `pypythia.msa.parse_msa` for information on when this is required. data_type (DataType, optional): Data type of the MSA sequences. Defaults to None. In this case, the data type - is inferred based on the file content. See `pypythia.msa.parse` for information on when this is required. + is inferred based on the file content. See `pypythia.msa.parse_msa` for information on when this is required. deduplicate (bool, optional): If True, remove duplicate sequences from the MSA. Defaults to True. remove_full_gaps (bool, optional): If True, remove full gap sequences from the MSA. Defaults to True. result_prefix (pathlib.Path, optional): Prefix for the result files. Defaults to None. In this case, the prefix From 90b6e9f01c07bb7222951ff431c168e2734cd5f4 Mon Sep 17 00:00:00 2001 From: Julia Date: Tue, 4 Mar 2025 16:58:23 +0100 Subject: [PATCH 29/36] reintroduce shap command line flag (runtime impact) --- pypythia/main.py | 10 ++++++++++ pypythia/prediction.py | 7 ++++--- tests/test_prediction.py | 11 +++++++++-- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/pypythia/main.py b/pypythia/main.py index 7045a1c..d3877c7 100644 --- a/pypythia/main.py +++ b/pypythia/main.py @@ -65,6 +65,15 @@ def _parse_cli() -> argparse.Namespace: help="Filepath of the alternative predictor to use (default: latest Pythia).", ) + parser.add_argument( + "--shap", + help="If set, computes the shapley values of the prediction as waterfall plot in '{prefix}.shap.pdf'. " + "When using this option, make sure you understand what shapley values are and how to interpret this plot." + "For details on shapley values refer to the documentation: " + "https://tschuelia.github.io/PyPythia/latest/usage/#shap-waterfall-plot (default: False).", + action="store_true", + ) + parser.add_argument( "--forceDuplicates", help="Per default, Pythia refuses to predict the difficulty for MSAs containing duplicate sequences," @@ -130,6 +139,7 @@ def main(): remove_full_gaps=not args.forceFullGaps, result_prefix=prefix, store_results=store_results, + plot_shap=args.shap, log_info=True, ) diff --git a/pypythia/prediction.py b/pypythia/prediction.py index 091604d..afb3fbf 100644 --- a/pypythia/prediction.py +++ b/pypythia/prediction.py @@ -141,6 +141,7 @@ def predict_difficulty( remove_full_gaps: bool = True, result_prefix: Optional[pathlib.Path] = None, store_results: bool = True, + plot_shap: bool = False, log_info: bool = False, ) -> np.float64: """Predict the difficulty of an MSA using the PyPythia difficulty predictor. @@ -168,9 +169,9 @@ def predict_difficulty( In this case, the following files are stored: - The reduced MSA in PHYLIP format (if duplicates or full gap sequences were removed) in `{result_prefix}.reduced.phy` - The inferred parsimony trees in Newick format in `{result_prefix}.pythia.trees` - - The shapley values as waterfall plot in `{result_prefix}.shap.pdf` + - The shapley values as waterfall plot in `{result_prefix}.shap.pdf` (if plot_shap=True) - The features and predicted difficulty as CSV file in `{result_prefix}.pythia.csv` - + plot_shap (bool, optional): If True, plot the shapley values as waterfall plot. Defaults to False. log_info (bool, optional): If True, log intermediate progress information using the default logger. Defaults to False. Returns: @@ -287,7 +288,7 @@ def predict_difficulty( log_info and log_runtime_information("Predicting the difficulty") difficulty = predictor.predict(msa_features) - if store_results: + if plot_shap and store_results: # Plot shapley values # this only makes sense if store_results=True, otherwise the figure would be lost fig = predictor.plot_shapley_values(msa_features) diff --git a/tests/test_prediction.py b/tests/test_prediction.py index 2fc5dfe..c478ee9 100644 --- a/tests/test_prediction.py +++ b/tests/test_prediction.py @@ -87,7 +87,10 @@ def test_collect_features_stores_trees(phylip_msa_file, raxmlng): @pytest.mark.parametrize("store_results", [True, False]) -def test_predict_difficulty(msa_test_data_row, raxmlng_command, store_results): +@pytest.mark.parametrize("plot_shap", [True, False]) +def test_predict_difficulty( + msa_test_data_row, raxmlng_command, store_results, plot_shap +): # Check if the Pythia version is identical, if not the expected difficulty parquet file might be outdated # In this case, raise a warning if msa_test_data_row.pythia_version != __version__: @@ -107,6 +110,7 @@ def test_predict_difficulty(msa_test_data_row, raxmlng_command, store_results): remove_full_gaps=False, result_prefix=prefix, store_results=store_results, + plot_shap=plot_shap, ) # 1. Check if the predicted difficulty matches the "ground-truth" in our test data @@ -122,8 +126,11 @@ def test_predict_difficulty(msa_test_data_row, raxmlng_command, store_results): if store_results: assert pars_trees_file.exists() - assert shap_file.exists() assert results_file.exists() + if plot_shap: + assert shap_file.exists() + else: + assert not shap_file.exists() else: assert not pars_trees_file.exists() assert not shap_file.exists() From a0fbb02588884ff0943fc65e9e5bcb86b9209c74 Mon Sep 17 00:00:00 2001 From: Julia Date: Wed, 5 Mar 2025 09:25:12 +0100 Subject: [PATCH 30/36] update usage docs --- docs/usage.md | 83 +++++++++++++++++++++++++-------------------------- 1 file changed, 41 insertions(+), 42 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 990d8b8..9426f46 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -17,7 +17,8 @@ accordingly. The output will be something like `The predicted difficulty for MSA examples/example.phy is: 0.02.`, telling us that example.phy is an easy dataset. In fact, this dataset exhibits a single likelihood peak. Depending on the predictor -version you are using, the actual value might slightly differ. This is expected and nothing to worry about ๐Ÿ™‚ +version or operating system you are using, the actual value might slightly differ. +This is expected and nothing to worry about ๐Ÿ™‚ *Note that Pythia can also handle FASTA input files, see section Input Data below.* @@ -30,52 +31,49 @@ Latest version: https://github.com/tschuelia/PyPythia Questions/problems/suggestions? Please open an issue on GitHub. usage: pythia [-h] -m MSA -r RAXMLNG [-t THREADS] [-s SEED] [-p PREFIX] - [--predictor PREDICTOR] [-prec PRECISION] [-sT] [--forceDuplicates] - [--forceFullGaps] [--shap] [-v] + [--predictor PREDICTOR] [--shap] [--forceDuplicates] [--forceFullGaps] + [--nofiles] [-V] Parser for Pythia command line options. options: -h, --help show this help message and exit - -m MSA, --msa MSA Multiple Sequence Alignment to predict the difficulty for. - Must be in either phylip or fasta format. + -m MSA, --msa MSA Multiple Sequence Alignment to predict the difficulty for. Must be + in either phylip or fasta format. -r RAXMLNG, --raxmlng RAXMLNG - Path to the binary of RAxML-NG. For install instructions - see https://github.com/amkozlov/raxml-ng.(default: 'raxml- - ng' if in $PATH, otherwise this option is mandatory). + Path to the binary of RAxML-NG. For install instructions see + https://github.com/amkozlov/raxml-ng.(default: 'raxml-ng' if in + $PATH, otherwise this option is mandatory). -t THREADS, --threads THREADS - Number of threads to use for parallel parsimony tree - inference (default: RAxML-NG autoconfig). - -s SEED, --seed SEED Seed for the RAxML-NG parsimony tree inference (default: - 0). + Number of threads to use for parallel parsimony tree inference + (default: RAxML-NG autoconfig). + -s SEED, --seed SEED Seed for the RAxML-NG parsimony tree inference (default: 0). -p PREFIX, --prefix PREFIX - Prefix of the PyPythia log and result file (default: MSA - file name). + Prefix of the PyPythia log and result file (default: MSA file name). --predictor PREDICTOR - Filepath of the alternative predictor to use (default: - latest Pythia). - -prec PRECISION, --precision PRECISION - Set the number of decimals the difficulty should be rounded - to (default: 2). - -sT, --storeTrees If set, stores the parsimony trees as - '{prefix}.pythia.trees' file (default: False). - --forceDuplicates Per default, Pythia refuses to predict the difficulty for - MSAs containing duplicate sequences. Only set this option - if you are absolutely sure that you want to predict the - difficulty for this MSA (default: False). - --forceFullGaps Per default, Pythia refuses to predict the difficulty for - MSAs containing sequences with only gaps. Only set this - option if you are absolutely sure that you want to predict - the difficulty for this MSA (default: False). - --shap If set, computes the shapley values of the prediction as - waterfall plot in '{prefix}.shap.pdf'. When using this - option, make sure you understand what shapley values are - and how to interpret this plot.For details on shapley - values refer to the wiki: - https://github.com/tschuelia/PyPythia/wiki/Usage#shapley- - values (default: False). - -v, --verbose If set, additionally prints the MSA features (default: - False). + Filepath of the alternative predictor to use (default: latest + Pythia). + --shap If set, computes the shapley values of the prediction as waterfall + plot in '{prefix}.shap.pdf'. When using this option, make sure you + understand what shapley values are and how to interpret this + plot.For details on shapley values refer to the documentation: + https://tschuelia.github.io/PyPythia/latest/usage/#shap-waterfall- + plot (default: False). + --forceDuplicates Per default, Pythia refuses to predict the difficulty for MSAs + containing duplicate sequences,and removes duplicate sequences prior + to predicting the difficulty. Only set this option if you are + absolutely sure that you want to predict the difficulty for this MSA + (default: False). + --forceFullGaps Per default, Pythia refuses to predict the difficulty for MSAs + containing sequences with only gaps,and removes full-gap sequences + prior to predicting the difficulty. Only set this option if you are + absolutely sure that you want to predict the difficulty for this MSA + (default: False). + --nofiles Prevent Pythia from writing any files and only print logs/results to + the terminal (default: False). WARNING: in this case and if your MSA + contains duplicate/full-gap sequences the reduced MSA will not be + stored. + -V, --version Print the version number and exit. ``` ## From Code @@ -147,9 +145,10 @@ and subsequently retrain the predictor. We extend the training data using the anonymized MSAs that we continuously obtain during our RAxML Grove database updates. Note that these MSAs are only available internally in RAxML Grove and are not publicly available. -As per default, PyPythia uses the lastest predictor `predictors/latest.pckl`. Older versions of the trained predictors -are available in the `predictors` directory and can be passed to Pythia (see Usage instructions above). All predictors -of versions >= 1.0.0 are trained using DNA, AA, and morphological MSAs. +As per default, PyPythia uses the latest predictor `predictors/latest.txt`. If you want to use an older version of Pythia, +please install the respective PyPythia version. +You can also pass a custom predictor file using the `--predictor` option. However, this will only work if the passed +file contains a LightGBM Booster model. Note that the predictions for the same MSA can be different when using different versions of Pythia. @@ -180,7 +179,7 @@ considering all feature values together for a specific set of feature values. The following figure shows an exemplary waterfall plot output for the MSA `example/example.py` and Pythia version 1.1.0. The x-axis depicts the difficulty and the y-axis the features alongside the respective feature value. The features are -sorted by their Shapley value with the highest contribution on top. You can read the plot as follows. The base line +sorted by their Shapley value with the most important feature on top. You can read the plot as follows. The baseline difficulty that Pythia v1.1.0 learned is 0.35, as indicated by the `E[f(x)] = 0.35` on the x-axis. The `proportion_invariant` feature contributed to the overall prediction with a shift towards `1.0` (more difficult) of `0.01`, so *in combination with the other features*, a `proportion_invariant` of `0.341` indicates that the MSA is From 0b96375186ca2f7c63b69da3c8df2f6ea8c783dd Mon Sep 17 00:00:00 2001 From: Julia Date: Wed, 5 Mar 2025 09:43:08 +0100 Subject: [PATCH 31/36] refactoring --- pypythia/msa.py | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/pypythia/msa.py b/pypythia/msa.py index 892bbed..28c5fea 100644 --- a/pypythia/msa.py +++ b/pypythia/msa.py @@ -1,6 +1,7 @@ import math import pathlib from collections import Counter +from functools import cached_property from io import StringIO from typing import Optional @@ -35,7 +36,14 @@ DNA_AMBIGUITY_CHARS = list(DNA_AMBIGUITY_CODES.keys()) -DNA_AMBIGUITY_MAP = { +# This dict provides a set of characters that can all represent the respective nucleotide. +# The values are provided in ASCII ordinals. +# This map is needed for the computation of proportion of invariant sites since ambiguous or gap characters are +# considered invariant if they can be resolved to a single character. +# An exemplary entry of this map is: +# A: {45, 65, 68, 72, 77, 82, 86, 87} +# which corresponds to the characters '-', 'A', 'D', 'H', 'M', 'R', 'V', 'W' +DNA_AMBIGUITY_MAP: dict[bytes, set[int]] = { nt: set( map( ord, @@ -45,7 +53,7 @@ for nt in NUCLEOTIDES } -DNA_CHARS = NUCLEOTIDES + list(DNA_AMBIGUITY_CODES.keys()) + DNA_GAP_CHARS +DNA_CHARS = NUCLEOTIDES + DNA_AMBIGUITY_CHARS + DNA_GAP_CHARS AMINO_ACIDS = [ @@ -77,7 +85,14 @@ AA_AMBIGUITY_CHARS = list(AA_AMBIGUITY_CODES.keys()) -AA_AMBIGUITY_MAP = { +# This dict provides a set of characters that can all represent the respective amino acids. +# The values are provided in ASCII ordinals. +# This map is needed for the computation of proportion of invariant sites since ambiguous or gap characters are +# considered invariant if they can be resolved to a single character. +# An exemplary entry of this map is: +# D: {45, 66, 68} +# which corresponds to the characters '-', 'B', 'D' +AA_AMBIGUITY_MAP: dict[bytes, set[int]] = { aa: set( map( ord, [GAP, aa] + [c for c, vals in AA_AMBIGUITY_CODES.items() if aa in vals] @@ -86,7 +101,7 @@ for aa in AMINO_ACIDS } -AA_CHARS = AMINO_ACIDS + list(AA_AMBIGUITY_CODES.keys()) + GAP_CHARS +AA_CHARS = AMINO_ACIDS + AA_AMBIGUITY_CHARS + GAP_CHARS def _get_file_format(msa_file: pathlib.Path) -> FileFormat: @@ -198,7 +213,7 @@ def contains_duplicate_sequences(self) -> bool: unique_sequences = np.unique(self.sequences, axis=0) return unique_sequences.shape[0] < self.sequences.shape[0] - @property + @cached_property def n_patterns(self) -> int: """Returns the number of unique patterns in the MSA. @@ -211,7 +226,7 @@ def n_patterns(self) -> int: un = set([c.tobytes() for c in self.sequences.T]) return len(un) - ((GAP * self.n_taxa) in un) - @property + @cached_property def proportion_gaps(self) -> float: """Returns the proportion of gap characters in the MSA. Note that prior to calculating the percentage, full-gap sites are removed. @@ -224,7 +239,7 @@ def proportion_gaps(self) -> float: ] return np.sum(full_gap_sites_removed == GAP) / full_gap_sites_removed.size - @property + @cached_property def proportion_invariant(self) -> float: """Returns the proportion of invariant sites in the MSA. A site is considered invariant if all sequences have the same character at that site. @@ -247,9 +262,11 @@ def proportion_invariant(self) -> float: non_gap_site_count = 0 invariant_count = 0 + gap_ord_set = {GAP_ORD} + for site in self.sequences.T: site = set(site.tobytes()) - if site == {GAP_ORD}: + if site == gap_ord_set: # full-gap sites are not counted as invariant continue From 20e115c52d4bdda52fdd8f142692a2f800678a79 Mon Sep 17 00:00:00 2001 From: Julia Date: Wed, 5 Mar 2025 09:53:44 +0100 Subject: [PATCH 32/36] refactoring --- pypythia/config.py | 2 +- pypythia/main.py | 12 +++++-- pypythia/prediction.py | 75 ++++++++++++++++++++-------------------- pypythia/raxmlng.py | 2 +- tests/test_prediction.py | 7 ++-- 5 files changed, 53 insertions(+), 45 deletions(-) diff --git a/pypythia/config.py b/pypythia/config.py index ecd6905..eca6eab 100644 --- a/pypythia/config.py +++ b/pypythia/config.py @@ -4,4 +4,4 @@ DEFAULT_RAXMLNG_EXE = ( pathlib.Path(shutil.which("raxml-ng")) if shutil.which("raxml-ng") else None ) -DEFAULT_MODEL_FILE = pathlib.Path(__file__).parent / "predictors/latest.txt" +DEFAULT_MODEL_FILE = pathlib.Path(__file__).parent / "predictors" / "latest.txt" diff --git a/pypythia/main.py b/pypythia/main.py index d3877c7..94c8b8a 100644 --- a/pypythia/main.py +++ b/pypythia/main.py @@ -94,19 +94,25 @@ def _parse_cli() -> argparse.Namespace: parser.add_argument( "--nofiles", - help="Prevent Pythia from writing any files and only print logs/results to the terminal (default: False)." + help="Prevent Pythia from writing any files and only print logs/results to the terminal (default: False). " "WARNING: in this case and if your MSA contains duplicate/full-gap sequences the reduced MSA will not be stored.", action="store_true", ) - parser.add_argument("-V", "--version", action="version", version=__version__) + parser.add_argument( + "-V", + "--version", + action="version", + version=__version__, + help="Print the version number and exit.", + ) return parser.parse_args() def main(): - args = _parse_cli() logger.info(get_header()) + args = _parse_cli() # Format all paths to pathlib.Path objects and set a default value if not provided msa_file = pathlib.Path(args.msa) diff --git a/pypythia/prediction.py b/pypythia/prediction.py index afb3fbf..c28db90 100644 --- a/pypythia/prediction.py +++ b/pypythia/prediction.py @@ -81,19 +81,23 @@ def collect_features( Dataframe containing a single row with all features required for predicting the difficulty of the MSA. The columns correspond to the feature names the predictor was trained with. """ - with TemporaryDirectory() as tmpdir: - msa_file = msa_file - model = msa.get_raxmlng_model() - - log_info and log_runtime_information("Retrieving num_taxa, num_sites.") + # If the MSA contains less than 4 sequences, RAxML-NG will fail as there is only a single possible + # tree topology for this MSA. In this case, any phylogenetic inference is meaningless and we raise a + # PyPythia exception to inform the user. + if msa.n_taxa < 4: + raise PyPythiaException( + "The MSA contains less than 4 sequences. " + "Phylogenetic inference is not meaningful for such small MSAs as there exists only a single possible tree topology. " + ) + with TemporaryDirectory() as tmpdir: n_pars_trees = 24 log_info and log_runtime_information( f"Inferring {n_pars_trees} parsimony trees with random seed {seed}.", ) trees = raxmlng.infer_parsimony_trees( msa_file, - model, + msa.get_raxmlng_model(), pathlib.Path(tmpdir) / "pars", redo=None, seed=seed, @@ -131,7 +135,6 @@ def collect_features( def predict_difficulty( msa_file: pathlib.Path, - model_file: Optional[pathlib.Path] = DEFAULT_MODEL_FILE, raxmlng: Optional[pathlib.Path] = DEFAULT_RAXMLNG_EXE, threads: int = None, seed: int = 0, @@ -142,6 +145,7 @@ def predict_difficulty( result_prefix: Optional[pathlib.Path] = None, store_results: bool = True, plot_shap: bool = False, + model_file: pathlib.Path = DEFAULT_MODEL_FILE, log_info: bool = False, ) -> np.float64: """Predict the difficulty of an MSA using the PyPythia difficulty predictor. @@ -150,8 +154,6 @@ def predict_difficulty( Args: msa_file (pathlib.Path): Path to the MSA file. Note that the MSA file must be in either FASTA or PHYLIP format. - model_file (pathlib.Path, optional): Path to the trained difficulty predictor model. - Defaults to the latest model shipped with PyPythia. raxmlng (pathlib.Path, optional): Path to the RAxML-NG executable. If not set, uses the RAxML-NG binary found in the PATH environment variable. threads (int, optional): Number of threads to use for parallel parsimony tree inference. If not set, uses the @@ -172,6 +174,8 @@ def predict_difficulty( - The shapley values as waterfall plot in `{result_prefix}.shap.pdf` (if plot_shap=True) - The features and predicted difficulty as CSV file in `{result_prefix}.pythia.csv` plot_shap (bool, optional): If True, plot the shapley values as waterfall plot. Defaults to False. + model_file (pathlib.Path): Path to the trained difficulty predictor model. + Defaults to the latest model shipped with PyPythia. log_info (bool, optional): If True, log intermediate progress information using the default logger. Defaults to False. Returns: @@ -180,6 +184,11 @@ def predict_difficulty( if not msa_file.exists(): raise PyPythiaException(f"The given MSA {msa_file} file does not exist.") + if raxmlng is None: + raise PyPythiaException( + "Path to the RAxML-NG executable is required if 'raxml-ng' is not in $PATH." + ) + result_prefix = pathlib.Path(result_prefix) if result_prefix else msa_file pars_trees_file = pathlib.Path(f"{result_prefix}.pythia.trees") @@ -190,22 +199,17 @@ def predict_difficulty( if store_results: # If the user wants to keep the results, use the result_prefix reduced_msa_file = pathlib.Path(f"{result_prefix}.reduced.phy") + _tmpfile = None else: # Else, use a temporary file - reduced_msa_file = pathlib.Path( - tempfile.NamedTemporaryFile(mode="w", suffix=".phy").name - ) + _tmpfile = tempfile.NamedTemporaryFile(mode="w", suffix=".phy") + reduced_msa_file = pathlib.Path(_tmpfile.name) log_info and log_runtime_information( message=f"Starting prediction for MSA {msa_file}." ) # Init RAxML-NG - if raxmlng is None: - raise PyPythiaException( - "Path to the RAxML-NG executable is required if 'raxml-ng' is not in $PATH." - ) - try: raxmlng = RAxMLNG(**{"exe_path": raxmlng} if raxmlng else {}) except Exception as e: @@ -213,12 +217,13 @@ def predict_difficulty( # Init the prediction model log_info and log_runtime_information(message=f"Loading predictor {model_file.name}") - - predictor = DifficultyPredictor(model_file=model_file) + try: + predictor = DifficultyPredictor(model_file=model_file) + except Exception as e: + raise PyPythiaException("Initializing the difficulty predictor failed.") from e # Load the MSA log_info and log_runtime_information(message="Loading MSA") - msa = parse_msa(msa_file, file_format=file_format, data_type=data_type) # Deduplicate the MSA if necessary @@ -230,6 +235,14 @@ def predict_difficulty( # Check if the reduced MSA is different from the original MSA is_reduced = msa != reduced_msa if is_reduced: + if reduced_msa.n_taxa < 4: + raise PyPythiaException( + "During preprocessing, Pythia reduced the input MSA by removing duplicate sequences and/or " + "sequences containing only gaps leading to an MSA with less than 4 sequences. " + "RAxML-NG refuses to infer trees for such small MSAs as there exists only a single possible tree topology. " + "You can rerun the prediction and disable deduplication and gap removal to use the original MSA. " + ) + # If the reduced MSA is different from the original MSA, proceed with the reduced MSA msa = reduced_msa @@ -255,25 +268,9 @@ def predict_difficulty( log_info and log_runtime_information( "Number of threads not specified, using RAxML-NG autoconfig." if threads is None - else f"Using {threads} threads for parallel parsimony tree computation." + else f"Using {threads} threads for parallel parsimony tree inference." ) - # If the MSA/reduced MSA contains less than 4 sequences, RAxML-NG will fail as there is only a single possible - # tree topology for this MSA. In this case, any phylogenetic inference is meaningless and we raise a - # PyPythia exception to inform the user. - if msa.n_taxa < 4: - error_msg = ( - "The MSA contains less than 4 sequences. " - "Phylogenetic inference is not meaningful for such small MSAs as there exists only a single possible tree topology. " - ) - if is_reduced: - error_msg += ( - "Note that during preprocessing, Pythia reduced the input MSA by removing duplicate sequences and/or " - "sequences containing only gaps leading to an MSA with less than 4 sequences. " - "You can rerun the prediction and disable deduplication and gap removal to use the original MSA. " - ) - raise PyPythiaException(error_msg) - msa_features = collect_features( msa=msa, msa_file=msa_file, @@ -322,4 +319,8 @@ def predict_difficulty( " this plot. For details refer to the wiki: https://github.com/tschuelia/PyPythia/wiki/Usage#shapley-values" ) + if _tmpfile is not None: + # store_results was false, so we stored the reduced MSA in a temporary file, which we need to clean up + _tmpfile.close() + return difficulty[0] diff --git a/pypythia/raxmlng.py b/pypythia/raxmlng.py index 9c364cd..cf2706c 100644 --- a/pypythia/raxmlng.py +++ b/pypythia/raxmlng.py @@ -84,7 +84,7 @@ class RAxMLNG: """ def __init__(self, exe_path: Optional[pathlib.Path] = DEFAULT_RAXMLNG_EXE): - if not exe_path.exists(): + if exe_path is None or not exe_path.exists(): raise FileNotFoundError("RAxML-NG executable not found.") try: diff --git a/tests/test_prediction.py b/tests/test_prediction.py index c478ee9..65dd62e 100644 --- a/tests/test_prediction.py +++ b/tests/test_prediction.py @@ -231,8 +231,8 @@ def test_predict_difficulty_fewer_than_four_taxa(data_dir, raxmlng_command): predict_difficulty( msa_file=data_dir / "DNA" / "3_taxa_msa.fasta", raxmlng=raxmlng_command, - deduplicate=True, - remove_full_gaps=True, + deduplicate=False, + remove_full_gaps=False, result_prefix=prefix, store_results=True, ) @@ -246,7 +246,8 @@ def test_predict_difficulty_with_deduplication_and_gap_removal_if_reduced_msa_ha with pytest.raises( PyPythiaException, match=re.compile( - r"The MSA contains less than 4 sequences.+reduced the input MSA", re.DOTALL + r"During preprocessing.+leading to an MSA with less than 4 sequences", + re.DOTALL, ), ): with tempfile.TemporaryDirectory() as tmpdir: From 72e05ee63863f7fa9452959264c86dd4ea6edbd0 Mon Sep 17 00:00:00 2001 From: Julia Date: Wed, 5 Mar 2025 09:58:28 +0100 Subject: [PATCH 33/36] refactoring --- tests/test_prediction.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/test_prediction.py b/tests/test_prediction.py index 65dd62e..f4e64fb 100644 --- a/tests/test_prediction.py +++ b/tests/test_prediction.py @@ -301,3 +301,20 @@ def test_predict_difficulty_raxmlng_init_fails(small_msa_file): result_prefix=None, store_results=False, ) + + +def test_predict_difficulty_predictor_init_fails(small_msa_file, raxmlng_command): + with pytest.raises( + PyPythiaException, match="Initializing the difficulty predictor failed" + ): + with tempfile.NamedTemporaryFile("w") as f: + f.write("This is not a valid predictor file.") + predict_difficulty( + msa_file=small_msa_file, + raxmlng=raxmlng_command, + deduplicate=False, + remove_full_gaps=False, + result_prefix=None, + store_results=False, + model_file=pathlib.Path(f.name), + ) From be52c49566f8ce68b56928a14326188f86653473 Mon Sep 17 00:00:00 2001 From: Julia Date: Wed, 5 Mar 2025 10:02:57 +0100 Subject: [PATCH 34/36] update docs --- docs/index.md | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/docs/index.md b/docs/index.md index 084813f..c9c26bd 100644 --- a/docs/index.md +++ b/docs/index.md @@ -31,18 +31,9 @@ Phylogenetic Analyses.** *Molecular Biology and Evolution*, 39( > Boosted Tree Regressor. > This affects all Pythia versions >= 1. If you use Pythia in your work, please state the correct learning algorithm. If > you are unsure, feel free to reach out to me ๐Ÿ™‚ +> +> There will soon be a new pre-print that explains the changes in detail, stay tuned! -### References - -* A. M. Kozlov, D. Darriba, T. Flouri, B. Morel, and A. Stamatakis (2019) - **RAxML-NG: a fast, scalable and user-friendly tool for maximum likelihood phylogenetic inference** - *Bioinformatics*, 35(21): 4453โ€“4455. - [https://doi.org/10.1093/bioinformatics/btz305](https://doi.org/10.1093/bioinformatics/btz305) - -* D. Hรถhler, W. Pfeiffer, V. Ioannidis, H. Stockinger, A. Stamatakis (2022) - **RAxML Grove: an empirical phylogenetic tree database** - *Bioinformatics*, 38(6):1741โ€“1742. - [https://doi.org/10.1093/bioinformatics/btab863](https://doi.org/10.1093/bioinformatics/btab863) ### CPythia From c99aaf75b834150c6672403171c5cb0bf09bed8b Mon Sep 17 00:00:00 2001 From: Julia Date: Wed, 5 Mar 2025 10:38:15 +0100 Subject: [PATCH 35/36] update docs --- docs/index.md | 3 +-- docs/usage.md | 11 +++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/index.md b/docs/index.md index c9c26bd..b69ae33 100644 --- a/docs/index.md +++ b/docs/index.md @@ -22,8 +22,7 @@ issue [here](https://github.com/tschuelia/PyPythia/issues). The paper explaining the details of Pythia is published in MBE: Haag, J., Hรถhler, D., Bettisworth, B., & Stamatakis, A. (2022). **From Easy to Hopeless - Predicting the Difficulty of -Phylogenetic Analyses.** *Molecular Biology and Evolution*, 39( -12). [https://doi.org/10.1093/molbev/msac254](https://doi.org/10.1093/molbev/msac254) +Phylogenetic Analyses.** *Molecular Biology and Evolution*, 39(12). [https://doi.org/10.1093/molbev/msac254](https://doi.org/10.1093/molbev/msac254) > [!WARNING] > Since this publication, we made some considerable changes to Pythia. diff --git a/docs/usage.md b/docs/usage.md index 9426f46..5aaa927 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -76,6 +76,17 @@ options: -V, --version Print the version number and exit. ``` +### Result files + +Pythia will write the following files: +- A logfile containing the same information as printed to the terminal: `{result_prefix}.pythia.log` +- The reduced MSA file in case the input MSA contained duplicate/full-gap sequences (and the reduction was not disabled): `{result_prefix}.reduced.phy` +- The inferred parsimony trees in Newick format: `{result_prefix}.pythia.trees` +- The shapley values as waterfall plot (if --shap is set): `{result_prefix}.shap.pdf` +- The features and predicted difficulty as CSV file: `{result_prefix}.pythia.csv` + +The result_prefix can be set using the `--prefix` command line option. If not set, Pythia uses the MSA file as prefix. You can prevent Pythia from writing any files via the flag `--nofiles`. + ## From Code You can also use the library as a regular python library by installing it in your current environment. From 6d8fe77203e26a786ac50b953fc839d50800fdcc Mon Sep 17 00:00:00 2001 From: Julia Date: Wed, 5 Mar 2025 11:57:04 +0100 Subject: [PATCH 36/36] rename package and adjust version retrieval for pypi --- pyproject.toml | 2 +- pypythia/__init__.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e5e9137..f735281 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "PyPythiaPhylo" +name = "PythiaPhyloPredictor" description = "Lightweight python library for predicting the difficulty of alignments in phylogenetics" readme = {file = "README.md", content-type = "text/markdown"} authors = [{name = "Julia Haag", email = "info@juliaschmid.com"}] diff --git a/pypythia/__init__.py b/pypythia/__init__.py index bec46b9..27dc1b3 100644 --- a/pypythia/__init__.py +++ b/pypythia/__init__.py @@ -4,3 +4,12 @@ __version__ = importlib.metadata.distribution(__name__).version except Exception: __version__ = "unknown" + +# Required if the package was installed via PyPi... +if __version__ == "unknown": + try: + from importlib.metadata import version + + __version__ = version("PythiaPhyloPredictor") + except Exception: + pass