From 213935235aea7540021831335849b1b83a4414eb Mon Sep 17 00:00:00 2001
From: "mh595@uni-freiburg.de" <fr_mh595@login01.binac.uni-tuebingen.de>
Date: Mon, 22 Mar 2021 15:34:47 +0100
Subject: [PATCH 1/7] Added a print for the total average time for chemical
 compounds in the results section

---
 reconstruct.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/reconstruct.py b/reconstruct.py
index 6f2f760..88f11cc 100644
--- a/reconstruct.py
+++ b/reconstruct.py
@@ -117,6 +117,7 @@ def maketasks(params):
 # Pareto Option "default": (3*5 best graphs for each category + 15 pareto front)
 # Pareto Option "random":  (No pareto front and no 3*5 best graphs. Just take 30 random graphs total)
 # Pareto Option "greedy":  (Instead of using the pareto front, take graphs with the lowest direct distance to the target)
+# Pareto Option "paretogreed":  (Greedy approach for pareto front)
 # Pareto Option "pareto_only": (Instead of using the 3*5 best graphs it takes double the graphs from the pareto front.
 # Pareto Option: "all": (Takes EVERY graph from the pareto front)
 parser = argparse.ArgumentParser()
@@ -127,11 +128,13 @@ def maketasks(params):
 parser.add_argument('--cipselector_option', nargs=1, type=int, default=[1], ## Change this back
                     choices=[0, 1, 2],
                     help='1: Take k best from all, 2: Take k best from each current cip')
-parser.add_argument('--cipselector_k', nargs=1, type=int, default=[100],
+parser.add_argument('--cipselector_k', nargs=1, type=int, default=[10],
                     help='k for Cipselector')
 parser.add_argument('--pareto_option', nargs=1, type=str, default=['greedy'],
-                    choices=['default', 'random', 'greedy', 'pareto_only', 'all'],
+                    choices=['default', 'random', 'greedy', 'paretogreed', 'pareto_only', 'all'],
                     help='Pareto option for optimization')
+parser.add_argument('--keepgraphs', nargs=1, type=int, default=[30],
+                    help='Number of graphs kept from the pareto part')
 parser.add_argument('--use_normalization', nargs=1, type=int, default=[1], choices=[1,0],
                     help='If 1, normalization will be applied for cipselection')
 parser.add_argument('--min_count', nargs=1, type=int, default=[2], 
@@ -339,10 +342,12 @@ def report(folder = '.res', tasklist=None):
     lsuccess = [int(succ) for data in dat.values() for v in data.values() for succ,steps,times,avg in v]
     avg_productions = np.array([int(avg) for data in dat.values() for v in data.values() for succ,steps,times,avg in v])
     rnd = [int(steps) for data in dat.values() for v in data.values() for succ,steps,times,avg in v]
+    time = np.array([int(times) for data in dat.values() for v in data.values() for succ,steps,times,avg in v])
     print ("nores",nores)
     print ('nosucc',nosucc)
     print ("sumsuccess:", sum(lsuccess), lsuccess)
     print ("Average productions:", avg_productions.mean(), avg_productions)
+    print ("Average times:", np.average(time), time)
 #    print ("maxrnd:", max([int(b) for c in dat.values() for a,b,_ in c.values()]))
     print("maxrnd:", max(rnd))
     

From 2f73c50b2ed3593b6ac8dbb2fd2d4a811cb8e163 Mon Sep 17 00:00:00 2001
From: "mh595@uni-freiburg.de" <fr_mh595@login01.binac.uni-tuebingen.de>
Date: Wed, 24 Mar 2021 18:54:35 +0100
Subject: [PATCH 2/7] New stuff for run script

---
 run_in_order.sh | 110 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 85 insertions(+), 25 deletions(-)

diff --git a/run_in_order.sh b/run_in_order.sh
index 0094a60..3d0a735 100644
--- a/run_in_order.sh
+++ b/run_in_order.sh
@@ -5,7 +5,8 @@ conda activate binenv
 
 REPEATS=50 ### Change to 100 for Normal or 250 for Chem
 
-execute () {    #### Make sure to change filename in first sed command to 'chem_runall_binac.sh' or just 'runall_binac.sh'
+execute () {    ######## Make sure to change filename in first sed command to 'chem_runall_binac.sh' or just 'runall_binac.sh'
+
     sed '/reconstruct.py/s/$/'"$STRING"'/' runall_binac.sh > .run_$RESPREFIX.sh
     JOBID=$(qsub -q short -t 1-$REPEATS .run_$RESPREFIX.sh | sed -r 's/^[^0-9]*([0-9]+).*$/\1/')
     echo "Current Task: $JOBID : $STRING"
@@ -41,7 +42,7 @@ for PARETO in 'random' 'greedy' 'pareto_only' 'all' 'default'; do
                         for SIZELIMITER in 1; do
                             RESPREFIX="$CIPSELECTOR-$CIPK-$CONTEXTSIZE-$MINCOUNT-$SIZELIMITER-$NORMALIZATION-$DECOMPRADIUS-$PARETO"
                             STRING=" --cipselector_option $CIPSELECTOR --pareto_option $PARETO --cipselector_k $CIPK --context_size $CONTEXTSIZE --min_count $MINCOUNT --graph_size_limiter $SIZELIMITER --use_normalization $NORMALIZATION --max_decompose_radius $DECOMPRADIUS --resprefix $RESPREFIX"
-                            report ##  Replace this with report/execute/pass
+                            ##report ##  Replace this with report/execute/pass
                         done
                     done
                 done
@@ -50,37 +51,64 @@ for PARETO in 'random' 'greedy' 'pareto_only' 'all' 'default'; do
     done
 done
 ## Chemset Comparison
-#for CONTEXTSIZE in 1 2; do
-#    for CIPK in 200 300 400; do
-#        RESPREFIX="cipK-$CIPK-contextsize-$CONTEXTSIZE"
-#        STRING=" --context_size $CONTEXTSIZE --cipselector_k $CIPK --resprefix $RESPREFIX"
-#        pass
-#    done
-#done
+for CONTEXTSIZE in 1 2; do
+    for CIPK in 100 200 300 400; do
+        RESPREFIX="cipK-$CIPK-contextsize-$CONTEXTSIZE"
+        STRING=" --pareto_option 'default' --core_sizes 0 1 2 3 --context_size $CONTEXTSIZE --cipselector_k $CIPK --resprefix $RESPREFIX"
+#        reportchem
+    done
+done
 
 ## Artificial Comparison
-#for CONTEXTSIZE in 1 2; do
-#    RESPREFIX="coresizes-012-contextsize-$CONTEXTSIZE"
-#    STRING=" --core_sizes 0 1 2 --context_size $CONTEXTSIZE --resprefix $RESPREFIX"
+CIPSELECTOR=2
+for CONTEXTSIZE in 1 2; do
+    RESPREFIX="coresizes-012-contextsize-$CONTEXTSIZE"
+    STRING=" --cipselector_option $CIPSELECTOR --core_sizes 0 1 2 --context_size $CONTEXTSIZE --resprefix $RESPREFIX"
 #    report
-#    RESPREFIX="coresizes-01-contextsize-$CONTEXTSIZE"
-#    STRING=" --core_sizes 0 1 --context_size $CONTEXTSIZE --resprefix $RESPREFIX"
+    RESPREFIX="coresizes-01-contextsize-$CONTEXTSIZE"
+    STRING=" --cipselector_option $CIPSELECTOR --core_sizes 0 1 --context_size $CONTEXTSIZE --resprefix $RESPREFIX"
 #    report
-#    RESPREFIX="coresizes-0-contextsize-$CONTEXTSIZE"
-#    STRING=" --core_sizes 0 --context_size $CONTEXTSIZE --resprefix $RESPREFIX"
+    RESPREFIX="coresizes-0-contextsize-$CONTEXTSIZE"
+    STRING=" --cipselector_option $CIPSELECTOR --core_sizes 0 --context_size $CONTEXTSIZE --resprefix $RESPREFIX"
 #    report
-#done
+done
 
+## Comparison of Average Productions ##
+    CIPSELECTOR=0
+    CIPK=1000
+    RESPREFIX="cipsel0_$CIPK"
+    STRING=" --cipselector_option $CIPSELECTOR --cipselector_k $CIPK --resprefix $RESPREFIX"
+#    report
+    CIPSELECTOR=1
+    CIPK=100
+    RESPREFIX="cipsel1_$CIPK"
+    STRING=" --cipselector_option $CIPSELECTOR --cipselector_k $CIPK --resprefix $RESPREFIX"
+#    report
+    CIPSELECTOR=2
+    CIPK=10
+    RESPREFIX="cipsel2_$CIPK"
+    STRING=" --cipselector_option $CIPSELECTOR --cipselector_k $CIPK --resprefix $RESPREFIX"
+#    report
+##########################
 
 ## Cipselector 1: 100 200 400 800
-#for CIPK in 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200; do
-for CIPK in 10 30 50 70 90; do
+for CIPK in 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200; do
+#for CIPK in 10 30 50 70 90; do
     RESPREFIX="cipsel1_$CIPK"
     STRING=" --cipselector_option 1 --cipselector_k $CIPK --resprefix $RESPREFIX"
-#    pass
+#    report
+done
+
+for KEEP in 12 30 60; do
+    for CIPK in 50 100 250; do
+        RESPREFIX="keep-$KEEP-cipk-$CIPK"
+        STRING=" --keepgraphs $KEEP --cipselector_option 1 --cipselector_k $CIPK --resprefix $RESPREFIX"
+#        report
+    done
 done
 
-## Cipselector 2: (Default) 1 5 10 15 20 #### REMOVED 100 FOR CHEMSETS
+
+## Cipselector 2: 1 5 10 15 20 #### REMOVED 100 FOR CHEMSETS
 for CIPK in 1 5 10 15 20; do
     RESPREFIX="cipsel2_$CIPK"
     STRING=" --cipselector_option 2 --cipselector_k $CIPK --resprefix $RESPREFIX"
@@ -95,10 +123,11 @@ for NORM in 0; do
 done
 
 ## Pareto Options: ###### REMOVED 'all' FOR CHEMSETS
-for PARETO in 'random' 'greedy' 'pareto_only' 'all' 'default'; do
-    RESPREFIX="pareto_$PARETO"
-    STRING=" --pareto_option $PARETO --resprefix $RESPREFIX"
-#    pass
+for PARETO in 'random' 'greedy' 'paretogreed' 'pareto_only' 'all' 'default'; do
+    RESPREFIX="res_pareto_$PARETO"
+#    STRING=" --pareto_option $PARETO --resprefix $RESPREFIX"
+    STRING=" --core_sizes 0 1 2 3 --pareto_option $PARETO --resprefix $RESPREFIX"
+#    reportchem
 done
 
 ## Contextsizes/Thickness: 
@@ -121,3 +150,34 @@ for SIZELIMITER in 0 1; do
     STRING=" --graph_size_limiter $SIZELIMITER --resprefix $RESPREFIX"
 #    pass
 done
+
+
+## Cipselector Comparison Graphs
+KEEP=60
+#CIP0
+###for CIPK in 300 1500 3000 4500 6000 7500 9000; do # KEEP 30
+###for CIPK in 100 900 2250 3750 5250 6750 8250; do
+###for CIPK in 150 750 1500 2250 3000 3750 4500; do # KEEP 60
+for CIPK in 50 450 1125 1875 2625 3375 4125; do  
+    RESPREFIX="res_keep-$KEEP-cipsel0-$CIPK"
+    STRING=" --keepgraphs $KEEP --cipselector_option 0 --cipselector_k $CIPK --pareto_option 'greedy' --resprefix $RESPREFIX"
+    report
+done
+#CIP1
+###for CIPK in 10 50 100 150 200 250 300; do # KEEP 30
+###for CIPK in 50 30 75 125 175 225 275; do
+###for CIPK in 5 25 50 75 100 125 150; do # KEEP 60
+for CIPK in 1 15 38 63 88 113 138; do
+    RESPREFIX="res_keep-$KEEP-cipsel1-$CIPK"
+    STRING=" --keepgraphs $KEEP --cipselector_option 1 --cipselector_k $CIPK --pareto_option 'greedy' --resprefix $RESPREFIX"
+    report
+done
+#CIP2
+###for CIPK in 10 20 30 40 50 60 70 80 90 100; do # KEEP 30
+###for CIPK in 1 2 3 4 5 6 7 8 9; do
+###for CIPK in 5 10 15 20 25 30 35 40 45 50; do # KEEP 60
+for CIPK in 1 2 3 4 6 7 8 9; do
+    RESPREFIX="res_keep-$KEEP-cipsel2-$CIPK"
+    STRING=" --keepgraphs $KEEP --cipselector_option 2 --cipselector_k $CIPK --pareto_option 'greedy' --resprefix $RESPREFIX"
+    report
+done

From fb2d33943a63ebd2d99d8173441df79c2a381d56 Mon Sep 17 00:00:00 2001
From: MatthiasHerrmann <herrmannmatthias@gmx.net>
Date: Thu, 25 Mar 2021 14:37:49 +0100
Subject: [PATCH 3/7] Cleaned up pareto.py

---
 exploration/pareto.py         | 89 ++++++++++++++---------------------
 exploration/pareto_options.py | 46 ++++++++++++++++++
 2 files changed, 81 insertions(+), 54 deletions(-)
 create mode 100644 exploration/pareto_options.py

diff --git a/exploration/pareto.py b/exploration/pareto.py
index 0d06c72..e4deb0e 100644
--- a/exploration/pareto.py
+++ b/exploration/pareto.py
@@ -5,13 +5,12 @@
 import heapq
 import numpy as np
 from eden.util import timeit
-### from graphlearn.local_substitution_graph_grammar import LocalSubstitutionGraphGrammar as lsggold ########## TMP
 from graphlearn.cipcorevector import LsggCoreVec as lsgg
 from toolz.curried import compose, map, concat
 from exploration.pareto_funcs import _manage_int_or_float
 logger = logging.getLogger(__name__)
 import structout as so
-from exploration import pareto_funcs as paretof, cost_estimator as costs
+from exploration import pareto_options, cost_estimator as costs
 from extensions import lsggscramble as lsggs
 from sklearn.metrics.pairwise import euclidean_distances
 
@@ -223,33 +222,34 @@ def optimize(self, graphs):
 
     def optimize_step(self, graphs):
         # filter, expand, chk duplicates
-        costs = self.get_costs(graphs)
-        status = self.checkstatus(costs, graphs)
+        step_start_time = time.time()
+        graphlen_start = len(graphs)
+        graphs, status = self.filter_by_cost(graphs)
         if status: return [],True,None
-        graphs = self.filter_by_cost(costs, graphs)
+        graphlen_filter = len(graphs)
+        logger.log(10, f"cost_filter: Got {graphlen_start} graphs, reduced to {graphlen_filter} ({time.time()-step_start_time})")
         num_graphs = len(graphs)
         if self.grammar.cipselector == new_cipselector0:  ### SPECIAL CASE
-            logger.log(10, "USING CIPSELECTOR 0")
             graphs = self._expand_neighbors2(graphs)
         else:
             graphs = self._expand_neighbors(graphs)
-        avg_productions = len(graphs)/num_graphs
-        logger.log(10, f"Average productions per graph: {avg_productions}")
+        graphlen_expand = len(graphs)
+        avg_productions = graphlen_expand/graphlen_filter
+        logger.log(10, f"graph generation: Got {avg_productions} per graph. ({time.time()-timenow})")
         graphs = self.duplicate_rm(graphs)
+        logger.log(10, "duplicate_rm: {graphlen_expand} -> {len(graphs} graphs. ({time.time()-timenow})")
         return graphs, status, avg_productions
 
    
 
-    def filter_by_cost(self,costs,graphs):
+    def filter_by_cost(self,graphs):
         """expand "keepgraphs" graphs, divided between top graphs in everything
         and pareto front, discard rest"""
-        timenow=time.time()
-        in_count = len(graphs)
         keepgraphs = self.keepgraphs
                     
         if in_count <= self.keepgraphs:
             # Only few graphs remaining so just return all of them.
-            logger.debug('cost_filter: keep all %d graphs' % in_count)
+            logger.log(10, "cost_filter: keep all graphs")
             return graphs
         
         elif self.pareto_option == "random":
@@ -267,42 +267,35 @@ def filter_by_cost(self,costs,graphs):
         
         elif self.pareto_option == 'greedy':
             # Return graphs with the lowest euclidean distance to the target vector
-            distances = []
-            for g in graphs:
-                distances.append(euclidean_distances(self.target_graph_vector, vertex_vec(g, self.decomposer).sum(axis=0))[0][0])
-            ranked_distances = np.argsort(distances)[:keepgraphs]
-            res =  [graphs[i] for i in ranked_distances]
+            return pareto_option.greedy(graphs, self.target_graph_vector, self.decomposer, keepgraphs)
+
+        costs = self.get_costs(graphs)
+        status = self.checkstatus(costs, graphs)
+        if status:
+            # Some graph has distance == 0
+            return graphs, True
+
         elif self.pareto_option == "default":
             # Take best graphs from estimators and pareto front
-           costs_ranked = np.argsort(costs,axis=0)[:int(keepgraphs/6),[0,1,3]]
-           want , counts = np.unique(costs_ranked,return_counts=True)
-           res = [graphs[idd] for idd,count in zip( want,counts) if count > 0 ]
-           dontwant = [i for i in range(len(graphs)) if i not in want]
-           restgraphs = [graphs[i] for i in dontwant]
-           restcosts = costs[dontwant][:,[0,1,2]]
-           paretoselectedgraphs = paretof._pareto_set(restgraphs, restcosts)
-           random.shuffle(paretoselectedgraphs)
-           res += paretoselectedgraphs[:int(keepgraphs/2)]
+            return pareto_option.default(graphs, costs, keepgraphs), False
         
         elif self.pareto_option == "paretogreed":
             # 1. choose pareto graphs 
-            # 2. new score is the average rank over all costs 
+            # 2. new score is the average rank over all costs
             # 3. choose k best of those 
-           graphs, costs = paretof._pareto_set(graphs, costs,return_costs=True)
-           costs_ranked = np.argsort(costs,axis=0).sum(axis=1)
-           choosegr = np.argsort(costs_ranked) 
-           res = [graphs[x] for x in choosegr[:keepgraphs]]
+           return pareto_option.paretogreed(graphs, costs, keepgraphs), False
+        
+        paretoselectedgraphs = paretof._pareto_set(graphs, costs)
+        random.shuffle(paretoselectedgraphs)
+        if self.pareto_option == "pareto_only":
+            # Return only graphs from the pareto front
+            return paretoselectedgraphs[:keepgraphs], False
+        
+        elif self.pareto_option == "all":
+            # Return ALL graphs from the pareto front
+            return paretoselectedgraphs, False
         else:
-            paretoselectedgraphs = paretof._pareto_set(graphs, costs)
-            random.shuffle(paretoselectedgraphs)
-
-            if self.pareto_option == "pareto_only":
-                # Return only graphs from the pareto front
-                res = paretoselectedgraphs[:keepgraphs]
-            
-            elif self.pareto_option == "all":
-                # Return ALL graphs from the pareto front
-                res = paretoselectedgraphs
+            raise ValueError("Invalid Pareto Option")
 
 ##        # DEBUG TO SHOW THE REAL DISTANCE
 ##        if self.cheat:
@@ -320,16 +313,10 @@ def filter_by_cost(self,costs,graphs):
 ##                from util import util
 ##                util.dumpfile(graphs[costs_ranked[stuff][0]],"gr")
 ##                print ("graph dumped")
-        logger.log(10, f"cost_filter: got {in_count} graphs, reduced to {len(res)} (%.2fs)"%(time.time()-timenow))
-
-        return res
 
    
     def duplicate_rm(self,graphs):
-        timenow=time.time()
-        count = len(graphs)
         graphs  = list(self._duplicate_rm(graphs))
-        logger.debug("duplicate_rm: %d -> %d graphs (%.2fs)" % (count, len(graphs), time.time()-timenow))
         return graphs
 
     def _duplicate_rm(self,graphs):
@@ -360,7 +347,7 @@ def get_costs(self, graphs):
         for i,e in enumerate(costs[:,2]):
             nucol[i,2] = resdic[e]
         costs = np.hstack((costs, np.sum(nucol,axis =1).reshape(-1,1)))
-        logger.debug("costs: best dist: %f (%.2fs)" %  (np.min(costs[:,0]) ,time.time()-timenow))
+        logger.log(10, f"costs: best dist: {np.min(costs[:,0])} ({time.time()-timenow})")
         return costs
 
     def _get_neighbors(self, graph):
@@ -373,17 +360,14 @@ def _get_neighbors(self, graph):
         return neighs
 
     def _expand_neighbors(self, graphs):
-        timenow = time.time()
         global _decomposer ##### Stupid hack but I dont know how else to allow lambda functions in multiprocessing
         _decomposer = self.decomposer #####
         if self.multiproc>1:
             with multiprocessing.Pool(self.multiproc) as p:
                 res = list(concat(p.map(self._get_neighbors,graphs)))
-                logger.debug("graph generation: %.2fs" %  (time.time()-timenow))
                 return res
         else:
             res = list(concat(map(self._get_neighbors,graphs)))
-            logger.debug("graph generation: %.2fs" %  (time.time()-timenow))
             return res
 
 
@@ -398,14 +382,11 @@ def _get_score_substitution(self, graph): #
 
     def _expand_neighbors2(self, graphs): #
         """Only used with Cipselector Option 0. Replaces _expand_neighbors"""
-        timenow = time.time()
         if self.multiproc>1:
             with multiprocessing.Pool(self.multiproc) as p:
                 res = list(concat(p.map(self._get_score_substitution,graphs)))
-                logger.debug("graph generation: %.2fs" %  (time.time()-timenow))
         else:
             res = list(concat(map(self._get_score_substitution,graphs)))
-            logger.debug("graph generation: %.2fs" %  (time.time()-timenow))
         res.sort(reverse=True, key=lambda a: a[0])
         counter = 0
         grlist = []
diff --git a/exploration/pareto_options.py b/exploration/pareto_options.py
new file mode 100644
index 0000000..7efc605
--- /dev/null
+++ b/exploration/pareto_options.py
@@ -0,0 +1,46 @@
+import pareto_funcs as paretof
+import numpy as np
+
+def greedy(graphs, target, decomposer, keepgraphs):
+    """
+    Return graphs with the lowest euclidean distance to the target vector.
+    Also returns if one of the distances equals 0.
+    """
+    distances = []
+    for g in graphs:
+        distances.append(euclidean_distances(target, vertex_vec(g, decomposer).sum(axis=0))[0][0])
+    ranked_distances = np.argsort(distances)[:keepgraphs]
+    res =  [graphs[i] for i in ranked_distances]
+    if distances[ranked_distances[0]] == 0:
+        ## => At least 1 distance is 0 => Successful reconstruction
+        return res, True
+    return res, False
+    
+
+def default(graphs, costs, keepgraphs):
+    """
+    Take best graphs from estimators and pareto front.
+    """
+    costs_ranked = np.argsort(costs,axis=0)[:int(keepgraphs/6),[0,1,3]]
+    want , counts = np.unique(costs_ranked,return_counts=True)
+    res = [graphs[idd] for idd,count in zip( want,counts) if count > 0 ]
+    dontwant = [i for i in range(len(graphs)) if i not in want]
+    restgraphs = [graphs[i] for i in dontwant]
+    restcosts = costs[dontwant][:,[0,1,2]]
+    paretoselectedgraphs = paretof._pareto_set(restgraphs, restcosts)
+    random.shuffle(paretoselectedgraphs)
+    res += paretoselectedgraphs[:int(keepgraphs/2)]
+    return res
+
+
+def paretogreed(graphs, costs, keepgraphs):
+    """
+    1. choose pareto graphs 
+    2. new score is the average rank over all costs
+    3. choose k best of those
+    """
+    graphs, costs = paretof._pareto_set(graphs, costs,return_costs=True)
+    costs_ranked = np.argsort(costs,axis=0).sum(axis=1)
+    choosegr = np.argsort(costs_ranked)
+    res = [graphs[x] for x in choosegr[:keepgraphs]]
+    return res

From dea95f7c0395d0dea4894e318f1eca3491ec0bf2 Mon Sep 17 00:00:00 2001
From: "mh595@uni-freiburg.de" <fr_mh595@login01.binac.uni-tuebingen.de>
Date: Thu, 25 Mar 2021 14:53:35 +0100
Subject: [PATCH 4/7] Fix minor issues

---
 exploration/pareto.py         | 12 ++++++------
 exploration/pareto_options.py |  4 +++-
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/exploration/pareto.py b/exploration/pareto.py
index e4deb0e..573dd14 100644
--- a/exploration/pareto.py
+++ b/exploration/pareto.py
@@ -235,9 +235,9 @@ def optimize_step(self, graphs):
             graphs = self._expand_neighbors(graphs)
         graphlen_expand = len(graphs)
         avg_productions = graphlen_expand/graphlen_filter
-        logger.log(10, f"graph generation: Got {avg_productions} per graph. ({time.time()-timenow})")
+        logger.log(10, f"graph generation: Got {avg_productions} per graph. ({time.time()-step_start_time})")
         graphs = self.duplicate_rm(graphs)
-        logger.log(10, "duplicate_rm: {graphlen_expand} -> {len(graphs} graphs. ({time.time()-timenow})")
+        logger.log(10, f"duplicate_rm: {graphlen_expand} -> {len(graphs)} graphs. ({time.time()-step_start_time})")
         return graphs, status, avg_productions
 
    
@@ -247,7 +247,7 @@ def filter_by_cost(self,graphs):
         and pareto front, discard rest"""
         keepgraphs = self.keepgraphs
                     
-        if in_count <= self.keepgraphs:
+        if len(graphs) <= self.keepgraphs:
             # Only few graphs remaining so just return all of them.
             logger.log(10, "cost_filter: keep all graphs")
             return graphs
@@ -267,7 +267,7 @@ def filter_by_cost(self,graphs):
         
         elif self.pareto_option == 'greedy':
             # Return graphs with the lowest euclidean distance to the target vector
-            return pareto_option.greedy(graphs, self.target_graph_vector, self.decomposer, keepgraphs)
+            return pareto_options.greedy(graphs, self.target_graph_vector, self.decomposer, keepgraphs)
 
         costs = self.get_costs(graphs)
         status = self.checkstatus(costs, graphs)
@@ -277,13 +277,13 @@ def filter_by_cost(self,graphs):
 
         elif self.pareto_option == "default":
             # Take best graphs from estimators and pareto front
-            return pareto_option.default(graphs, costs, keepgraphs), False
+            return pareto_options.default(graphs, costs, keepgraphs), False
         
         elif self.pareto_option == "paretogreed":
             # 1. choose pareto graphs 
             # 2. new score is the average rank over all costs
             # 3. choose k best of those 
-           return pareto_option.paretogreed(graphs, costs, keepgraphs), False
+           return pareto_options.paretogreed(graphs, costs, keepgraphs), False
         
         paretoselectedgraphs = paretof._pareto_set(graphs, costs)
         random.shuffle(paretoselectedgraphs)
diff --git a/exploration/pareto_options.py b/exploration/pareto_options.py
index 7efc605..4bcf89a 100644
--- a/exploration/pareto_options.py
+++ b/exploration/pareto_options.py
@@ -1,5 +1,7 @@
-import pareto_funcs as paretof
+import exploration.pareto_funcs as paretof
+from graphlearn.cipcorevector import vertex_vec
 import numpy as np
+from sklearn.metrics.pairwise import euclidean_distances
 
 def greedy(graphs, target, decomposer, keepgraphs):
     """

From d770c96743020897cd74d2d38f1e59a8dcefcd36 Mon Sep 17 00:00:00 2001
From: "mh595@uni-freiburg.de" <fr_mh595@login01.binac.uni-tuebingen.de>
Date: Sat, 27 Mar 2021 14:03:36 +0100
Subject: [PATCH 5/7] Fixed Pareto options. Greedy still problematic

---
 exploration/pareto.py         | 25 +++++++++++++------------
 exploration/pareto_options.py |  4 +++-
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/exploration/pareto.py b/exploration/pareto.py
index 573dd14..214024f 100644
--- a/exploration/pareto.py
+++ b/exploration/pareto.py
@@ -10,7 +10,7 @@
 from exploration.pareto_funcs import _manage_int_or_float
 logger = logging.getLogger(__name__)
 import structout as so
-from exploration import pareto_options, cost_estimator as costs
+from exploration import pareto_options, pareto_funcs as paretof, cost_estimator as costs
 from extensions import lsggscramble as lsggs
 from sklearn.metrics.pairwise import euclidean_distances
 
@@ -246,16 +246,18 @@ def filter_by_cost(self,graphs):
         """expand "keepgraphs" graphs, divided between top graphs in everything
         and pareto front, discard rest"""
         keepgraphs = self.keepgraphs
+
+        costs = self.get_costs(graphs)
+        status = self.checkstatus(costs, graphs)
+        if status:
+            # Some graph has distance == 0
+            return graphs, True
                     
         if len(graphs) <= self.keepgraphs:
             # Only few graphs remaining so just return all of them.
             logger.log(10, "cost_filter: keep all graphs")
-            return graphs
+            return graphs, False
         
-        elif self.pareto_option == "random":
-            # Return randomly selected graphs without any application of pareto.
-            res =  random.sample(graphs, keepgraphs)
-
 ##        elif self.prefilter_kick!=0:
 ##            # DELETE THE 25% worst in each category
 ##            assert False
@@ -264,16 +266,14 @@ def filter_by_cost(self,graphs):
 ##            keep =  [i for i in range(len(graphs)) if i not in trash]
 ##            graphs = [graphs[i] for i in keep]
 ##            costs = costs[keep]
-        
+
         elif self.pareto_option == 'greedy':
             # Return graphs with the lowest euclidean distance to the target vector
             return pareto_options.greedy(graphs, self.target_graph_vector, self.decomposer, keepgraphs)
 
-        costs = self.get_costs(graphs)
-        status = self.checkstatus(costs, graphs)
-        if status:
-            # Some graph has distance == 0
-            return graphs, True
+        elif self.pareto_option == "random":
+            # Return randomly selected graphs without any application of pareto.
+            return random.sample(graphs, keepgraphs), False
 
         elif self.pareto_option == "default":
             # Take best graphs from estimators and pareto front
@@ -287,6 +287,7 @@ def filter_by_cost(self,graphs):
         
         paretoselectedgraphs = paretof._pareto_set(graphs, costs)
         random.shuffle(paretoselectedgraphs)
+
         if self.pareto_option == "pareto_only":
             # Return only graphs from the pareto front
             return paretoselectedgraphs[:keepgraphs], False
diff --git a/exploration/pareto_options.py b/exploration/pareto_options.py
index 4bcf89a..aee7793 100644
--- a/exploration/pareto_options.py
+++ b/exploration/pareto_options.py
@@ -2,6 +2,8 @@
 from graphlearn.cipcorevector import vertex_vec
 import numpy as np
 from sklearn.metrics.pairwise import euclidean_distances
+import random
+
 
 def greedy(graphs, target, decomposer, keepgraphs):
     """
@@ -13,7 +15,7 @@ def greedy(graphs, target, decomposer, keepgraphs):
         distances.append(euclidean_distances(target, vertex_vec(g, decomposer).sum(axis=0))[0][0])
     ranked_distances = np.argsort(distances)[:keepgraphs]
     res =  [graphs[i] for i in ranked_distances]
-    if distances[ranked_distances[0]] == 0:
+    if distances[ranked_distances[0]] <= 0:
         ## => At least 1 distance is 0 => Successful reconstruction
         return res, True
     return res, False

From 0a9e6e648509f294b0d68e7558a17259d5ab2a42 Mon Sep 17 00:00:00 2001
From: "mh595@uni-freiburg.de" <fr_mh595@login01.binac.uni-tuebingen.de>
Date: Thu, 1 Apr 2021 14:48:52 +0200
Subject: [PATCH 6/7] Probably last commit from the cluster

---
 exploration/pareto.py         |  11 +--
 exploration/pareto_options.py |   5 +-
 reconstruct.py                |   2 +-
 run_in_order.sh               | 134 +++++++++++++---------------------
 4 files changed, 61 insertions(+), 91 deletions(-)

diff --git a/exploration/pareto.py b/exploration/pareto.py
index 214024f..589eae3 100644
--- a/exploration/pareto.py
+++ b/exploration/pareto.py
@@ -247,11 +247,12 @@ def filter_by_cost(self,graphs):
         and pareto front, discard rest"""
         keepgraphs = self.keepgraphs
 
-        costs = self.get_costs(graphs)
-        status = self.checkstatus(costs, graphs)
-        if status:
-            # Some graph has distance == 0
-            return graphs, True
+        if True:  # Greedy Cost calculation doesnt work at the moment.. self.pareto_option != 'greedy':
+            costs = self.get_costs(graphs)
+            status = self.checkstatus(costs, graphs)
+            if status:
+                # Some graph has distance == 0
+                return graphs, True
                     
         if len(graphs) <= self.keepgraphs:
             # Only few graphs remaining so just return all of them.
diff --git a/exploration/pareto_options.py b/exploration/pareto_options.py
index aee7793..3acdf5b 100644
--- a/exploration/pareto_options.py
+++ b/exploration/pareto_options.py
@@ -11,11 +11,14 @@ def greedy(graphs, target, decomposer, keepgraphs):
     Also returns if one of the distances equals 0.
     """
     distances = []
+####    distances = euclidean_distances(target, vertex_vec(graphs, decomposer))
     for g in graphs:
         distances.append(euclidean_distances(target, vertex_vec(g, decomposer).sum(axis=0))[0][0])
+    if min(distances) == 0:
+        return graphs, True
     ranked_distances = np.argsort(distances)[:keepgraphs]
     res =  [graphs[i] for i in ranked_distances]
-    if distances[ranked_distances[0]] <= 0:
+    if distances[ranked_distances[0]] < 0.000001:
         ## => At least 1 distance is 0 => Successful reconstruction
         return res, True
     return res, False
diff --git a/reconstruct.py b/reconstruct.py
index 88f11cc..8d01828 100644
--- a/reconstruct.py
+++ b/reconstruct.py
@@ -45,7 +45,7 @@ def maketasks(params):
 ##  OPTIONS FOR GRAPHS
 ##########################################
 
-EXPERIMENT_REPEATS = 50 #### CHANGE THIS BACK TO 100! 50 only for chemsets
+EXPERIMENT_REPEATS = 20 #### CHANGE THIS BACK TO 100! 50 only for chemsets
 # 1. param dict
 
 params_graphs = {
diff --git a/run_in_order.sh b/run_in_order.sh
index 3d0a735..64dd189 100644
--- a/run_in_order.sh
+++ b/run_in_order.sh
@@ -3,11 +3,11 @@
 source /beegfs/work/workspace/ws/fr_mh595-conda-0/conda/etc/profile.d/conda.sh
 conda activate binenv
 
-REPEATS=50 ### Change to 100 for Normal or 250 for Chem
+REPEATS=100 ### Change to 100 for Normal or 250 for Chem
 
 execute () {    ######## Make sure to change filename in first sed command to 'chem_runall_binac.sh' or just 'runall_binac.sh'
 
-    sed '/reconstruct.py/s/$/'"$STRING"'/' runall_binac.sh > .run_$RESPREFIX.sh
+    sed '/reconstruct.py/s/$/'"$STRING"'/' chem_runall_binac.sh > .run_$RESPREFIX.sh
     JOBID=$(qsub -q short -t 1-$REPEATS .run_$RESPREFIX.sh | sed -r 's/^[^0-9]*([0-9]+).*$/\1/')
     echo "Current Task: $JOBID : $STRING"
     echo "$JOBID : $STRING" >> results.txt
@@ -32,17 +32,17 @@ pass () {
 
 echo "Start: $(date)"
 ## Parameter Optimization
-CIPSELECTOR=1
-for PARETO in 'random' 'greedy' 'pareto_only' 'all' 'default'; do
-    for NORMALIZATION in 1; do
-        for CIPK in 500 1000; do
+CIPSELECTOR=2
+for PARETO in 'greedy' 'default'; do
+    for NORMALIZATION in 0 1; do
+        for CIPK in 10; do
             for DECOMPRADIUS in 1 2; do
                 for CONTEXTSIZE in 1 2; do
-                    for MINCOUNT in 1; do
-                        for SIZELIMITER in 1; do
+                    for MINCOUNT in 1 2; do
+                        for SIZELIMITER in 0 1; do
                             RESPREFIX="$CIPSELECTOR-$CIPK-$CONTEXTSIZE-$MINCOUNT-$SIZELIMITER-$NORMALIZATION-$DECOMPRADIUS-$PARETO"
                             STRING=" --cipselector_option $CIPSELECTOR --pareto_option $PARETO --cipselector_k $CIPK --context_size $CONTEXTSIZE --min_count $MINCOUNT --graph_size_limiter $SIZELIMITER --use_normalization $NORMALIZATION --max_decompose_radius $DECOMPRADIUS --resprefix $RESPREFIX"
-                            ##report ##  Replace this with report/execute/pass
+#####                            report ##  Replace this with report/execute... HANDLE WITH CARE
                         done
                     done
                 done
@@ -51,53 +51,36 @@ for PARETO in 'random' 'greedy' 'pareto_only' 'all' 'default'; do
     done
 done
 ## Chemset Comparison
+CIPSELECTOR=2
+for PARETOOPTION in 'default' 'greedy'; do
 for CONTEXTSIZE in 1 2; do
-    for CIPK in 100 200 300 400; do
-        RESPREFIX="cipK-$CIPK-contextsize-$CONTEXTSIZE"
-        STRING=" --pareto_option 'default' --core_sizes 0 1 2 3 --context_size $CONTEXTSIZE --cipselector_k $CIPK --resprefix $RESPREFIX"
+    for CIPK in 5 10 15 20; do
+        RESPREFIX="res_CHEMCOMPARE_pareto-$PARETOOPTION-cipsel-$CIPSELECTOR-cipK-$CIPK-contextsize-$CONTEXTSIZE"
+        STRING=" --pareto_option $PARETOOPTION --core_sizes 0 1 2 3 --context_size $CONTEXTSIZE --cipselector_option $CIPSELECTOR --cipselector_k $CIPK --resprefix $RESPREFIX"
 #        reportchem
     done
 done
+done
 
 ## Artificial Comparison
 CIPSELECTOR=2
+CIPK=10
+for PARETOOPTION in 'default' 'greedy'; do
 for CONTEXTSIZE in 1 2; do
-    RESPREFIX="coresizes-012-contextsize-$CONTEXTSIZE"
-    STRING=" --cipselector_option $CIPSELECTOR --core_sizes 0 1 2 --context_size $CONTEXTSIZE --resprefix $RESPREFIX"
+    RESPREFIX="res_cipsel-$CIPSELECTOR-cipk-$CIPK-pareto-$PARETOOPTION-coresizes-012-contextsize-$CONTEXTSIZE"
+    STRING=" --cipselector_option $CIPSELECTOR --cipselector_k $CIPK --pareto_option $PARETOOPTION --core_sizes 0 1 2 --context_size $CONTEXTSIZE --resprefix $RESPREFIX"
 #    report
-    RESPREFIX="coresizes-01-contextsize-$CONTEXTSIZE"
-    STRING=" --cipselector_option $CIPSELECTOR --core_sizes 0 1 --context_size $CONTEXTSIZE --resprefix $RESPREFIX"
+    RESPREFIX="res_cipsel-$CIPSELECTOR-cipk-$CIPK-pareto-$PARETOOPTION-coresizes-01-contextsize-$CONTEXTSIZE"
+    STRING=" --cipselector_option $CIPSELECTOR --cipselector_k $CIPK --pareto_option $PARETOOPTION --core_sizes 0 1 --context_size $CONTEXTSIZE --resprefix $RESPREFIX"
 #    report
-    RESPREFIX="coresizes-0-contextsize-$CONTEXTSIZE"
-    STRING=" --cipselector_option $CIPSELECTOR --core_sizes 0 --context_size $CONTEXTSIZE --resprefix $RESPREFIX"
+    RESPREFIX="res_cipsel-$CIPSELECTOR-cipk-$CIPK-pareto-$PARETOOPTION-coresizes-0-contextsize-$CONTEXTSIZE"
+    STRING=" --cipselector_option $CIPSELECTOR --cipselector_k $CIPK --pareto_option $PARETOOPTION --core_sizes 0 --context_size $CONTEXTSIZE --resprefix $RESPREFIX"
 #    report
 done
+done
 
-## Comparison of Average Productions ##
-    CIPSELECTOR=0
-    CIPK=1000
-    RESPREFIX="cipsel0_$CIPK"
-    STRING=" --cipselector_option $CIPSELECTOR --cipselector_k $CIPK --resprefix $RESPREFIX"
-#    report
-    CIPSELECTOR=1
-    CIPK=100
-    RESPREFIX="cipsel1_$CIPK"
-    STRING=" --cipselector_option $CIPSELECTOR --cipselector_k $CIPK --resprefix $RESPREFIX"
-#    report
-    CIPSELECTOR=2
-    CIPK=10
-    RESPREFIX="cipsel2_$CIPK"
-    STRING=" --cipselector_option $CIPSELECTOR --cipselector_k $CIPK --resprefix $RESPREFIX"
-#    report
 ##########################
 
-## Cipselector 1: 100 200 400 800
-for CIPK in 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200; do
-#for CIPK in 10 30 50 70 90; do
-    RESPREFIX="cipsel1_$CIPK"
-    STRING=" --cipselector_option 1 --cipselector_k $CIPK --resprefix $RESPREFIX"
-#    report
-done
 
 for KEEP in 12 30 60; do
     for CIPK in 50 100 250; do
@@ -108,47 +91,15 @@ for KEEP in 12 30 60; do
 done
 
 
-## Cipselector 2: 1 5 10 15 20 #### REMOVED 100 FOR CHEMSETS
-for CIPK in 1 5 10 15 20; do
-    RESPREFIX="cipsel2_$CIPK"
-    STRING=" --cipselector_option 2 --cipselector_k $CIPK --resprefix $RESPREFIX"
-#    pass
-done
-
-## Normalization:
-for NORM in 0; do
-    RESPREFIX="no_norm"
-    STRING=" --use_normalization $NORM --resprefix $RESPREFIX"
-#    pass
-done
-
-## Pareto Options: ###### REMOVED 'all' FOR CHEMSETS
+CIPSELECTOR=2
+CIPK=10
+## Pareto Comparison:
 for PARETO in 'random' 'greedy' 'paretogreed' 'pareto_only' 'all' 'default'; do
-    RESPREFIX="res_pareto_$PARETO"
-#    STRING=" --pareto_option $PARETO --resprefix $RESPREFIX"
-    STRING=" --core_sizes 0 1 2 3 --pareto_option $PARETO --resprefix $RESPREFIX"
-#    reportchem
-done
-
-## Contextsizes/Thickness: 
-for CONTEXTSIZE in 1 2; do
-    RESPREFIX="contextsize_$CONTEXTSIZE"
-    STRING=" --context_size $CONTEXTSIZE --resprefix $RESPREFIX"
-#    pass
-done
-
-## Mincount/min_cip:
-for MINCOUNT in 1 2; do
-    RESPREFIX="mincount_$MINCOUNT"
-    STRING=" --min_count $MINCOUNT --resprefix $RESPREFIX"
-#    pass
-done
-
-## Graphsizelimiter:
-for SIZELIMITER in 0 1; do
-    RESPREFIX="sizelimiter_$SIZELIMITER"
-    STRING=" --graph_size_limiter $SIZELIMITER --resprefix $RESPREFIX"
-#    pass
+    RESPREFIX="res_cipsel-$CIPSELECTOR-cipk-$CIPK-pareto-$PARETO"
+    RESPREFIX="res_CHEM_cipsel-$CIPSELECTOR-cipk-$CIPK-pareto-$PARETO"
+    RESPREFIX="res_MINCOUNT1_cipsel-$CIPSELECTOR-cipk-$CIPK-pareto-$PARETO"
+    STRING=" --cipselector_option $CIPSELECTOR --cipselector_k $CIPK --pareto_option $PARETO --resprefix $RESPREFIX"
+###    report
 done
 
 
@@ -161,7 +112,7 @@ KEEP=60
 for CIPK in 50 450 1125 1875 2625 3375 4125; do  
     RESPREFIX="res_keep-$KEEP-cipsel0-$CIPK"
     STRING=" --keepgraphs $KEEP --cipselector_option 0 --cipselector_k $CIPK --pareto_option 'greedy' --resprefix $RESPREFIX"
-    report
+#    report
 done
 #CIP1
 ###for CIPK in 10 50 100 150 200 250 300; do # KEEP 30
@@ -170,7 +121,7 @@ done
 for CIPK in 1 15 38 63 88 113 138; do
     RESPREFIX="res_keep-$KEEP-cipsel1-$CIPK"
     STRING=" --keepgraphs $KEEP --cipselector_option 1 --cipselector_k $CIPK --pareto_option 'greedy' --resprefix $RESPREFIX"
-    report
+#    report
 done
 #CIP2
 ###for CIPK in 10 20 30 40 50 60 70 80 90 100; do # KEEP 30
@@ -179,5 +130,20 @@ done
 for CIPK in 1 2 3 4 6 7 8 9; do
     RESPREFIX="res_keep-$KEEP-cipsel2-$CIPK"
     STRING=" --keepgraphs $KEEP --cipselector_option 2 --cipselector_k $CIPK --pareto_option 'greedy' --resprefix $RESPREFIX"
-    report
+#    report
+done
+
+
+
+######### TESTING #########
+RESPREFIX="res_TESTING"
+STRING=" --cipselector_option 1 --cipselector_k 100 --pareto_option 'greedy' --resprefix $RESPREFIX"
+#report # execute
+
+### 24 Core Test ###
+PARETOOPTION='greedy'
+for CONTEXTSIZE in 1 2; do
+    RESPREFIX="res_24core_CHEMCOMPARE_pareto-$PARETOOPTION-cipsel-2-cipK-10-contextsize-$CONTEXTSIZE"
+    STRING=" --pareto_option $PARETOOPTION --core_sizes 0 1 2 3 --context_size $CONTEXTSIZE --cipselector_option 2 --cipselector_k 10 --resprefix $RESPREFIX"
+#    reportchem
 done

From 23801e9a7a5195215918b650935f2772b3d3e7e1 Mon Sep 17 00:00:00 2001
From: MatthiasHerrmann <herrmannmatthias@gmx.net>
Date: Thu, 1 Apr 2021 15:42:07 +0200
Subject: [PATCH 7/7] Fix default params and some minor cleaning

---
 exploration/pareto.py | 4 ----
 reconstruct.py        | 6 +++---
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/exploration/pareto.py b/exploration/pareto.py
index 589eae3..1e4744b 100644
--- a/exploration/pareto.py
+++ b/exploration/pareto.py
@@ -482,10 +482,6 @@ def __init__(
         self.output_k_best = output_k_best
         self.decomposer = decomposer
         self.grammar = lsgg_size_hack(radii=core_sizes, thickness=context_size, core_vec_decomposer=decomposer, cipselector=cipselector, nodelevel_radius_and_thickness=True) #cip_root_all=False, half_step_distance=True)
-###        self.grammar = lsggold(nodelevel_radius_and_thickness=True) #cip_root_all=False, half_step_distance=True)
-##        self.grammar.radii = core_sizes #self.grammar.set_core_size(core_sizes)
-##        self.grammar.thickness = context_size #self.grammar.decomposition_args['thickness_list'] = [context_size]
-        #self.grammar.set_min_count(min_count) interfacecount 1 makes no sense
         self.grammar.filter_min_cip = min_count #self.grammar.filter_args['min_cip_count'] = min_count
         self.optiopts = kwargs
         self.cs2cs = cs2cs
diff --git a/reconstruct.py b/reconstruct.py
index 8d01828..33499e9 100644
--- a/reconstruct.py
+++ b/reconstruct.py
@@ -45,7 +45,7 @@ def maketasks(params):
 ##  OPTIONS FOR GRAPHS
 ##########################################
 
-EXPERIMENT_REPEATS = 20 #### CHANGE THIS BACK TO 100! 50 only for chemsets
+EXPERIMENT_REPEATS = 50
 # 1. param dict
 
 params_graphs = {
@@ -125,7 +125,7 @@ def maketasks(params):
                     help='Core sizes/Radii')
 parser.add_argument('--context_size', nargs=1, type=float, default=[1],
                     help='Context sizes/Thickness')
-parser.add_argument('--cipselector_option', nargs=1, type=int, default=[1], ## Change this back
+parser.add_argument('--cipselector_option', nargs=1, type=int, default=[2], ## Change this back
                     choices=[0, 1, 2],
                     help='1: Take k best from all, 2: Take k best from each current cip')
 parser.add_argument('--cipselector_k', nargs=1, type=int, default=[10],
@@ -137,7 +137,7 @@ def maketasks(params):
                     help='Number of graphs kept from the pareto part')
 parser.add_argument('--use_normalization', nargs=1, type=int, default=[1], choices=[1,0],
                     help='If 1, normalization will be applied for cipselection')
-parser.add_argument('--min_count', nargs=1, type=int, default=[2], 
+parser.add_argument('--min_count', nargs=1, type=int, default=[1], 
                     help='Also called min_cip')
 parser.add_argument('--graph_size_limiter', nargs=1, type=int, default=[1], choices=[1,0],
                     help='If 0, graph size limiter is only used with a graphs >100')