Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions coretracker/FisherExact/Fisher.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import scipy.stats as ss
from scipy.special import gammaln as lgamma
import statlib.fexact as f
from statlib.fexact import fisher_exact as f_exact
from statlib.asa159 import rcont2
from statlib.asa205 import enum as rcont
from . import statlib.fexact as f
from .statlib.fexact import fisher_exact as f_exact
from .statlib.asa159 import rcont2
from .statlib.asa205 import enum as rcont
import numpy as np
import logging
import os
Expand Down Expand Up @@ -220,19 +220,19 @@ def _fisher_sim(c, replicate, seed=None):
results = np.zeros(replicate)

fact = np.zeros(n + 1)
for i in xrange(2, n + 1):
for i in range(2, n + 1):
fact[i] = fact[i - 1] + np.log(i)

observed = np.zeros((nr, nc), dtype="int32", order='F')
for it in xrange(replicate):
for it in range(replicate):
rcont2(nrow=nr, ncol=nc, nrowt=sr, ncolt=sc, key=key,
seed=seed, matrix=observed, ierror=ierror)
# if we do not have an error, make spcial action
ans = 0.
tmp_observed = observed.ravel()
if ierror[0] != 0:
raise ValueError("Fortran subroutine rcont2 return an error !")
for j in xrange(nc):
for j in range(nc):
i = 0
ii = j * nr
while(i < nr):
Expand Down Expand Up @@ -268,7 +268,7 @@ def _midp(c):
global result
result = []
logfact = np.zeros(n + 1)
for i in xrange(2, n + 1):
for i in range(2, n + 1):
logfact[i] = logfact[i - 1] + np.log(i)

def callback(iflag, table, m, n, rowsum, colsum, prob, mult):
Expand Down
2 changes: 1 addition & 1 deletion coretracker/FisherExact/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from Fisher import fisher_exact
from .Fisher import fisher_exact
__all__ = [fisher_exact]
2 changes: 1 addition & 1 deletion coretracker/classifier/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from classifier import Classifier, getDataFromFeatures, read_from_json
from .classifier import Classifier, getDataFromFeatures, read_from_json
import os
this_dir, this_filename = os.path.split(__file__)
MODELPATH = os.path.join(this_dir, "models", '%s/classifier.pkl.z')
Expand Down
84 changes: 42 additions & 42 deletions coretracker/classifier/classifier.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from __future__ import division


import itertools
import json
Expand Down Expand Up @@ -55,7 +55,7 @@ def load_from_file(clc, loadfile):
clf = joblib.load(loadfile)
return clf
except IOError:
print('Problem with file %s, can not open it' % loadfile)
print(('Problem with file %s, can not open it' % loadfile))
except Exception as e:
raise e
return None
Expand Down Expand Up @@ -112,11 +112,11 @@ def feature_importance(self, outfile="importance.png", features_list=[]):
[tree.feature_importances_ for tree in self.clf.estimators_], axis=0)
plt.figure()
plt.title("Feature importances")
plt.bar(range(n_feats), importances[
plt.bar(list(range(n_feats)), importances[
indices], width=0.5, color="b", yerr=std[indices], align="center")
if len(features_list) > 0:
features_list = np.asarray(features_list)[indices]
plt.xticks(range(n_feats), features_list, rotation='vertical')
plt.xticks(list(range(n_feats)), features_list, rotation='vertical')
plt.xlim([-1, n_feats])
plt.margins(0.2)

Expand Down Expand Up @@ -176,21 +176,21 @@ def get_stat(self, X_test, y_test):
(prob_pos.max() - prob_pos.min())

clf_score = brier_score_loss(y_test, prob_pos)
print("%s:" % self.method)
print("\tBrier: %1.3f" % (clf_score))
print("\tPrecision: %1.3f" % precision_score(y_test, y_pred))
print("\tRecall: %1.3f" % recall_score(y_test, y_pred))
print("\tF1: %1.3f" % f1_score(y_test, y_pred))
print("\tROC AUC score: %1.3f\n" % roc_auc_score(y_test, prob_pos))
print(("%s:" % self.method))
print(("\tBrier: %1.3f" % (clf_score)))
print(("\tPrecision: %1.3f" % precision_score(y_test, y_pred)))
print(("\tRecall: %1.3f" % recall_score(y_test, y_pred)))
print(("\tF1: %1.3f" % f1_score(y_test, y_pred)))
print(("\tROC AUC score: %1.3f\n" % roc_auc_score(y_test, prob_pos)))


def read_from_json(data, labels=None, use_global=True, use_pvalue=True):
"""Parse X array from data"""
if isinstance(data, basestring):
if isinstance(data, str):
with open(data) as jfile:
data = json.load(jfile)

if labels and isinstance(labels, basestring):
if labels and isinstance(labels, str):
with open(labels) as jfile2:
labels = json.load(jfile2)
# matrice format
Expand All @@ -209,9 +209,9 @@ def read_from_json(data, labels=None, use_global=True, use_pvalue=True):
# each entry format :
# [fitch, suspected, gene_frac, rea_frac, used_frac, subs_count, codon_lik_for_rea_aa]

for aa2, val in data['aa'].items():
for aa1, glist in val.items():
for genome, gdata in glist.items():
for aa2, val in list(data['aa'].items()):
for aa1, glist in list(val.items()):
for genome, gdata in list(glist.items()):
type_check = gdata[dtype]
codon_total = gdata['codons'][dtype]
fitch = gdata['fitch']
Expand All @@ -221,10 +221,10 @@ def read_from_json(data, labels=None, use_global=True, use_pvalue=True):
used_codon = type_check['used_codon']
# gene_in_genome = data['genes'][genome]
was_lost = gdata['lost'][fisher_type]
total_aa = np.sum(codon_total.values())
total_aa = np.sum(list(codon_total.values()))
# mixte_codon = type_check['mixte_codon']
subs_count = type_check['count']
for codon in codon_total.keys():
for codon in list(codon_total.keys()):
gene_count = 0
total_gene_count = 0
try:
Expand Down Expand Up @@ -320,7 +320,7 @@ def get_2D_distinct(Xdata, Xlabel, y, etiquette, outfile="2Dcompare.png", featur
ncomp = len(features)
if ncomp == 0 or ncomp > len(etiquette):
ncomp = len(etiquette)
features = range(len(etiquette))
features = list(range(len(etiquette)))
else:
Xdata = Xdata[:, features]

Expand All @@ -331,8 +331,8 @@ def get_2D_distinct(Xdata, Xlabel, y, etiquette, outfile="2Dcompare.png", featur

plt.close('all')
f, axarr = plt.subplots(i, j)
for xax in xrange(ncomp):
for yax in xrange(xax + 1, ncomp):
for xax in range(ncomp):
for yax in range(xax + 1, ncomp):
total_size -= 1
i, j = np.unravel_index(total_size, axarr.shape)
axarr[i, j].scatter(Xdata[:, xax], Xdata[:, yax], c=color)
Expand Down Expand Up @@ -385,8 +385,8 @@ def draw_pca_data(X_features, Xlabel, y, outfile="PCA.png"):
plt.close('all')
f, axarr = plt.subplots(i, j)
if total_size > 1:
for xax in xrange(ncomp):
for yax in xrange(xax + 1, ncomp):
for xax in range(ncomp):
for yax in range(xax + 1, ncomp):
total_size -= 1
i, j = np.unravel_index(total_size, axarr.shape)
axarr[i, j].scatter(X_features[:, xax],
Expand All @@ -413,11 +413,11 @@ def print_data(X, X_label, Y, etiquette=None):
"N. used", "Cod. count", "Sub. count", "G. len", "codon_lik", "N. mixte", "id"]
etiquette = list(etiquette)

print("\n" + "\t".join(["genome", "codon",
"ori_aa", "rea_aa"] + etiquette))
for i in xrange(len(X_label)):
print(("\n" + "\t".join(["genome", "codon",
"ori_aa", "rea_aa"] + etiquette)))
for i in range(len(X_label)):
if Y[i] == 1:
print("\t".join(list(X_label[i]) + [str(x) for x in X[i]]))
print(("\t".join(list(X_label[i]) + [str(x) for x in X[i]])))


def getDataFromFeatures(Xdata, etiquette, feats=[]):
Expand All @@ -434,7 +434,7 @@ def get_sensibility_and_precision(pred_y, true_y, X_labels=None, X=None, log=Tru
assert nel == len(pred_y), 'Vector should be the same size\n'
true_pos, true_neg, false_pos, false_neg = 0.0, 0.0, 0.0, 0.0
false_neg_list, false_pos_list = [], []
for i in xrange(len(pred_y)):
for i in range(len(pred_y)):
if pred_y[i] == 0 and true_y[i] == 1:
false_neg += 1
false_neg_list.append(i)
Expand All @@ -446,30 +446,30 @@ def get_sensibility_and_precision(pred_y, true_y, X_labels=None, X=None, log=Tru
elif pred_y[i] == 0 and true_y[i] == 0:
true_neg += 1

print("Test size is: %d\nTrue Positive is: %d\nTrue negative is: \
print(("Test size is: %d\nTrue Positive is: %d\nTrue negative is: \
%d\nFalse positive is: %d\nFalse negative is:%d" % (
nel, true_pos, true_neg, false_pos, false_neg))
nel, true_pos, true_neg, false_pos, false_neg)))
print('-------------------------------------------')
print("Sensibility is %f" % (true_pos / (true_pos + false_neg)
if (true_pos + false_neg) > 0 else 1))
print("Specificity is %f" % (true_neg / (true_neg + false_pos)
if (true_neg + false_pos) > 0 else 1))
print("Accuracy is %f" % ((true_neg + true_pos) / nel))
print("Precision is %f\n\n" %
(true_pos / (true_pos + false_pos) if (true_pos + false_pos) > 0 else 1))
print(("Sensibility is %f" % (true_pos / (true_pos + false_neg)
if (true_pos + false_neg) > 0 else 1)))
print(("Specificity is %f" % (true_neg / (true_neg + false_pos)
if (true_neg + false_pos) > 0 else 1)))
print(("Accuracy is %f" % ((true_neg + true_pos) / nel)))
print(("Precision is %f\n\n" %
(true_pos / (true_pos + false_pos) if (true_pos + false_pos) > 0 else 1)))
if log:
if X_labels is not None and X is not None:
if len(false_neg_list) > 0:
print("List of false negatives")
for i in false_neg_list:
print("\t".join(X_labels[i]))
print("\t".join([str(x) for x in X[i]]))
print(("\t".join(X_labels[i])))
print(("\t".join([str(x) for x in X[i]])))

if len(false_pos_list) > 0:
print("\nList of False positives")
for i in false_pos_list:
print("\t".join(X_labels[i]))
print("\t".join([str(x) for x in X[i]]))
print(("\t".join(X_labels[i])))
print(("\t".join([str(x) for x in X[i]])))


def split_zeros_pos(L, X, Y, split_size=300):
Expand All @@ -493,7 +493,7 @@ def get_aa_cross_val(L, X, Y, AA, tsize=None, rstate=-1):
"""Get test data from dataset"""
test_position = []
aa_y = np.zeros(Y.shape)
for i in xrange(len(Y)):
for i in range(len(Y)):
if L[i][-1] == AA:
aa_y[i] = 1
test_position.append(i)
Expand All @@ -510,7 +510,7 @@ def get_aa_cross_val(L, X, Y, AA, tsize=None, rstate=-1):
test_position = np.random.permutation(test_position)
mask = np.ones(Y.shape, dtype=bool)
mask[test_position] = False
train_position = np.array(range(len(mask)))[mask]
train_position = np.array(list(range(len(mask))))[mask]

if rstate > 0:
return shuffle(train_position, random_state=rstate), shuffle(test_position, random_state=rstate)
Expand Down
4 changes: 2 additions & 2 deletions coretracker/classifier/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ class ModelType(object):
default_sfeat = {'1': mod1, '2': mod2, '3': mod3}

def __init__(self, m, etiquette, sfeat=[], encode=False):
if m not in self.default_sfeat.keys():
if m not in list(self.default_sfeat.keys()):
raise ValueError('Selected model do not exist')
self.model = str(m)
self.etiquette = etiquette
if not sfeat:
try:
self.sfeat = self.default_sfeat[self.model]
except:
self.sfeat = range(len(etiquette))
self.sfeat = list(range(len(etiquette)))
else:
self.sfeat = sfeat
self.encode = encode
Expand Down
24 changes: 11 additions & 13 deletions coretracker/coreutils/AncestralRecon.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,20 @@
from abc import ABCMeta, abstractmethod
import utils
from . import utils
from collections import defaultdict, Counter
import numpy as np
import operator
from letterconfig import *
from .letterconfig import *


def init_back_table(dct):
"""Get back table for the current genetic code"""
back_table = defaultdict(list)
for aa, codon in zip(dct.forward_table.values(), dct.forward_table.keys()):
for aa, codon in zip(list(dct.forward_table.values()), list(dct.forward_table.keys())):
back_table[aa].append(codon)
return back_table


class AbsAncest:
__metaclass__ = ABCMeta

class AbsAncest(metaclass=ABCMeta):
def __init__(self, tree, nodestates):
self.tree = tree
self.nodestates = nodestates
Expand Down Expand Up @@ -55,8 +53,8 @@ def flip_rea_forward(clc, nodestates):
"""Flip rea data dict"""
new_dt = utils.makehash(1, set)
state_map = defaultdict(set)
for (genome, aarea) in nodestates.items():
nl = [(c, aa) for aa, codons in aarea.items() for c in codons]
for (genome, aarea) in list(nodestates.items()):
nl = [(c, aa) for aa, codons in list(aarea.items()) for c in codons]
for (c, aa) in nl:
new_dt[genome][c].add(aa)
state_map[c].add(aa)
Expand Down Expand Up @@ -149,13 +147,13 @@ def __const_term_state(self, term_list, smat, char_states, nullstate):

# sort, whether by total number
if self.enforce_order:
return [(x, is_valid_for_c[x]) for x in char_states if x in is_valid_for_c.keys()]
return [(x, is_valid_for_c[x]) for x in char_states if x in list(is_valid_for_c.keys())]
if self.sort_by_size:
possible_order = sorted([(x, np.count_nonzero(y)) for x, y in is_valid_for_c.items(
)], key=operator.itemgetter(1), reverse=True)
possible_order = sorted([(x, np.count_nonzero(y)) for x, y in list(is_valid_for_c.items(
))], key=operator.itemgetter(1), reverse=True)
else:
# here we sort by subtree weight, ignoring excluded subgroup
possible_order = sorted([(x, y[-1]) for x, y in is_valid_for_c.items()],
possible_order = sorted([(x, y[-1]) for x, y in list(is_valid_for_c.items())],
key=operator.itemgetter(1), reverse=True)
return possible_order

Expand Down Expand Up @@ -263,7 +261,7 @@ def __init__(self, tree, reassigned, ori_aa, dest_aa, dct, codon_rea=(None, None
self.dct = dct
self.back_table = init_back_table(dct)
codon_list = self.back_table[aa_letters_3to1[ori_aa]]
self.colors = dict(zip(codon_list, colors))
self.colors = dict(list(zip(codon_list, colors)))
for leaf in tree:
if leaf.name in reassigned:
leaf.add_features(reassigned={1})
Expand Down
12 changes: 6 additions & 6 deletions coretracker/coreutils/Faces.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,12 @@

def _get_codon_fgcolors(codontable, cible_aa):
"""Get colon foreground color"""
return dict((k, (_aafgcolors[v] if v != cible_aa else '#FFFFFF')) for (k, v) in codontable.items())
return dict((k, (_aafgcolors[v] if v != cible_aa else '#FFFFFF')) for (k, v) in list(codontable.items()))


def _get_codon_bgcolors(codontable, cible_aa, spec_codon_col):
"""Get colon background color"""
return dict((k, spec_codon_col.get(k, ("#FFFFFF" if v != cible_aa else '#000000'))) for (k, v) in codontable.items())
return dict((k, spec_codon_col.get(k, ("#FFFFFF" if v != cible_aa else '#000000'))) for (k, v) in list(codontable.items()))


class PPieChartFace(faces.StaticItemFace):
Expand All @@ -120,7 +120,7 @@ def __init__(self, percents, width, height, colors=None, line_color=None, label_

if not is_percent:
s = sum(percents)
percents = map(lambda x: x * 100. / s, percents)
percents = [x * 100. / s for x in percents]

if round(sum(percents)) > 100:
raise ValueError("PPieChartItem: percentage values > 100")
Expand Down Expand Up @@ -271,7 +271,7 @@ def __init__(self, seq, cible_aa, seqtype="aa", fsize=10,
self.col_w *= 3
if not isinstance(self.seq, list):
# only consider the position where 3 nuc can be obtained
self.seq = [self.seq[i:i + 3] for i in xrange(0,
self.seq = [self.seq[i:i + 3] for i in range(0,
len(self.seq) - len(self.seq) % 3, 3)]
if not fg_colors:
fg_colors = _get_codon_fgcolors(codontable, cible_aa)
Expand Down Expand Up @@ -421,7 +421,7 @@ def __init__(self, aalist, readict, is_leaf=True, spacer=1, height=12,

def _init_colors(bgtype=True, fgcolor='#000000'):
color = {}
for aa in readict.keys():
for aa in list(readict.keys()):
c = _aabgcolors[aa.upper()] if bgtype else fgcolor
color[aa.upper()] = QBrush(QColor(c))
return color
Expand All @@ -431,7 +431,7 @@ def _init_colors(bgtype=True, fgcolor='#000000'):
def update_items(self):
try:
max_codons = math.ceil(
max([len(x) for x in self.readict.values()]) / 2.0) * 2
max([len(x) for x in list(self.readict.values())]) / 2.0) * 2
except:
max_codons = 1
if self.maxcodon:
Expand Down
Loading