From 6289ce6d81825fcb59b83d4f89a33b737f3c1a73 Mon Sep 17 00:00:00 2001
From: ErickHernandezGutierrez <erick.hernandez@cimat.mx>
Date: Thu, 23 Jan 2020 11:10:27 -0600
Subject: [PATCH 01/17] adding operator_withRegulation.c

---
 commit/operator/operator_withRegularization.c | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 commit/operator/operator_withRegularization.c

diff --git a/commit/operator/operator_withRegularization.c b/commit/operator/operator_withRegularization.c
new file mode 100644
index 00000000..c2dfbff2
--- /dev/null
+++ b/commit/operator/operator_withRegularization.c
@@ -0,0 +1,24 @@
+
+
+void COMMIT_L(
+    int nF, int nIC, int nV, int nS,
+    double *vIN, double *vOUT)
+{
+    for(int f = 0; f < nF; f++){
+
+        vOUT[nV*nS] += -2*vIN[f] + x[nF + f];
+
+        for(int r = 1; r < nIC-1; r++){
+            vOUT[nV*nS + r] += vIN[(r-1)*nF + f] -2*vIN[r*nF + f] + vIN[(r+1)*nF + f];
+        }
+
+        vOUT[nV*nS + nIC - 1] += vIN[(nIC-2)*nF + f] - 2*vIN[(nIC-1)*nF + f];
+    }
+}
+
+void COMMIT_Lt(
+    int nF, int nIC, int nV, int nS,
+    double *vIN, double *vOUT)
+{
+    
+}
\ No newline at end of file

From 274e722dd7b3b317c3f80eabea96837f7ceec219 Mon Sep 17 00:00:00 2001
From: ErickHernandezGutierrez <erick.hernandez@cimat.mx>
Date: Mon, 17 Feb 2020 11:15:00 -0600
Subject: [PATCH 02/17] Executable version

---
 commit/core.pyx                               | 17 ++++++++--
 commit/operator/operator.pyx                  | 28 +++++++++++++++--
 commit/operator/operator_noLUT.c              | 31 +++++++++++++++++++
 commit/operator/operator_withLUT.c            | 31 +++++++++++++++++++
 commit/operator/operator_withRegularization.c | 24 --------------
 5 files changed, 103 insertions(+), 28 deletions(-)
 delete mode 100644 commit/operator/operator_withRegularization.c

diff --git a/commit/core.pyx b/commit/core.pyx
index 58063114..f2d26c89 100755
--- a/commit/core.pyx
+++ b/commit/core.pyx
@@ -661,7 +661,19 @@ cdef class Evaluation :
             raise RuntimeError( 'Dictionary not loaded; call "load_dictionary()" first.' )
         if self.niiDWI is None :
             raise RuntimeError( 'Data not loaded; call "load_data()" first.' )
-        return self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ].flatten().astype(np.float64)
+
+        y = self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ].flatten().astype(np.float64)
+        #return y
+        print(type(y))
+        print(y.shape)
+        print(y.shape[0])
+        print(self.KERNELS['wmr'].shape[0])
+        print(y.shape[0] + self.KERNELS['wmr'].shape[0])
+        y2 = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0], dtype=np.float64)
+        y2[0:y.shape[0]] = y
+        print(y2.shape)
+        return y2
+        #"""
 
     def fit( self, tol_fun = 1e-3, tol_x = 1e-6, max_iter = 100, verbose = 1, x0 = None, regularisation = None ) :
         """Fit the model to the data.
@@ -763,6 +775,7 @@ cdef class Evaluation :
         nF = self.DICTIONARY['IC']['nF']
         nE = self.DICTIONARY['EC']['nE']
         nV = self.DICTIONARY['nV']
+        nS = self.KERNELS['wmr'].shape[2]
         norm_fib = np.ones( nF )
         # x is the x of the original problem
         # self.x is the x preconditioned
@@ -803,7 +816,7 @@ cdef class Evaluation :
         niiMAP_hdr = niiMAP.header if nibabel.__version__ >= '2.0.0' else niiMAP.get_header()
 
         y_mea = np.reshape( self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ].flatten().astype(np.float32), (nV,-1) )
-        y_est = np.reshape( self.A.dot(self.x), (nV,-1) ).astype(np.float32)
+        y_est = np.reshape( self.A.dot(self.x)[:int(nV*nS)], (nV,-1) ).astype(np.float32)
 
         print( '\t\t- RMSE...', end="" )
         sys.stdout.flush()
diff --git a/commit/operator/operator.pyx b/commit/operator/operator.pyx
index 6d83202a..50f414a3 100755
--- a/commit/operator/operator.pyx
+++ b/commit/operator/operator.pyx
@@ -26,7 +26,15 @@ cdef extern void COMMIT_At(
     unsigned char *_ICthreadsT, unsigned int *_ECthreadsT, unsigned int *_ISOthreadsT
 ) nogil
 
+cdef extern void COMMIT_L(
+    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    double *_v_in, double *_v_out
+) nogil
 
+cdef extern void COMMIT_Lt(
+    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    double *_v_in, double *_v_out
+) nogil
 
 cdef class LinearOperator :
     """This class is a wrapper to the C code for performing marix-vector multiplications
@@ -61,7 +69,7 @@ cdef class LinearOperator :
     cdef unsigned int*   ISOthreadsT
 
 
-    def __init__( self, DICTIONARY, KERNELS, THREADS ) :
+    def __init__( self, DICTIONARY, KERNELS, THREADS, tikterm=0.3 ) :
         """Set the pointers to the data structures used by the C code."""
         self.DICTIONARY = DICTIONARY
         self.KERNELS    = KERNELS
@@ -75,6 +83,7 @@ cdef class LinearOperator :
         self.nI         = KERNELS['iso'].shape[0]   # number of ISO contributions
         self.n          = DICTIONARY['IC']['n']     # numbner of IC segments
         self.ndirs      = KERNELS['wmr'].shape[1]   # number of directions
+        #self.tikterm    = tikterm
 
         if KERNELS['wmr'].size > 0 :
             self.nS = KERNELS['wmr'].shape[2]       # number of SAMPLES
@@ -85,7 +94,7 @@ cdef class LinearOperator :
 
         self.adjoint    = 0                         # direct of inverse product
 
-        self.n1 = self.nV*self.nS
+        self.n1 = self.nV*self.nS + self.nR
         self.n2 = self.nR*self.nF + self.nT*self.nE + self.nI*self.nV
 
         # get C pointers to arrays in DICTIONARY
@@ -188,4 +197,19 @@ cdef class LinearOperator :
                     self.ICthreadsT, self.ECthreadsT, self.ISOthreadsT
                 )
 
+        if not self.adjoint:
+            with nogil:
+                # DIRECT PRODUCT L*lambda*x
+                COMMIT_L(
+                    self.nF, self.nR, self.nV, self.nS, 0.3,
+                    &v_in[0], &v_out[0]
+                )
+        else:
+            with nogil:
+                # INVERSE PRODUCT L'*lambda*y
+                COMMIT_Lt(
+                    self.nF, self.nR, self.nV, self.nS, 0.3, #self.tikterm
+                    &v_in[0], &v_out[0]
+                ) #"""
+
         return v_out
diff --git a/commit/operator/operator_noLUT.c b/commit/operator/operator_noLUT.c
index 0046f237..d261c02a 100644
--- a/commit/operator/operator_noLUT.c
+++ b/commit/operator/operator_noLUT.c
@@ -185,3 +185,34 @@ void COMMIT_At(
         pthread_join( threads[t], NULL );
     return;
 }
+
+void COMMIT_L(
+    int nF, int nIC, int nV, int nS, double regterm,
+    double *vIN, double *vOUT)
+{
+    for(int f = 0; f < nF; f++){
+
+        vOUT[nV*nS] += regterm*( -2*vIN[f] + x[nF + f] );
+
+        for(int r = 1; r < nIC-1; r++){
+            vOUT[nV*nS + r] += regterm*( vIN[(r-1)*nF + f] -2*vIN[r*nF + f] + vIN[(r+1)*nF + f] );
+        }
+
+        vOUT[nV*nS + nIC - 1] += regterm*( vIN[(nIC-2)*nF + f] - 2*vIN[(nIC-1)*nF + f] );
+    }
+}
+
+void COMMIT_Lt(
+    int nF, int nIC, int nV, int nS, double regterm,
+    double *vIN, double *vOUT)
+{
+    for(int f = 0; f < nF; f++){
+        vOUT[f] += regterm*( -2*vIN[nV*nS] + vIN[nV*nS + 1] );
+
+        for (int r = 0; r < nIC; r++){
+            vOUT[r*nF + f] += regterm*( vIN[nV*nS + (r-1)] - 2*vIN[nV*nS + r] + vIN[nV*nS + (r+1)] );
+        }
+        
+        vOUT[(nIC-1)*nF + f] += regterm*( vIN[nV*nS + (nIC-2)] - 2*vIN[nV*nS + (nIC-1)] );
+    }
+}
\ No newline at end of file
diff --git a/commit/operator/operator_withLUT.c b/commit/operator/operator_withLUT.c
index 9c959f57..e7d23730 100644
--- a/commit/operator/operator_withLUT.c
+++ b/commit/operator/operator_withLUT.c
@@ -2245,3 +2245,34 @@ void COMMIT_At(
         pthread_join( threads[t], NULL );
     return;
 }
+
+void COMMIT_L(
+    int _nF, int _nIC, int _nV, int _nS, double _tikterm,
+    double *_vIN, double *_vOUT)
+{
+    for(int f = 0; f < _nF; f++){
+
+        _vOUT[_nV*_nS] += _tikterm*( -2*_vIN[f] + x[_nF + f] );
+
+        for(int r = 1; r < _nIC-1; r++){
+            _vOUT[_nV*_nS + r] += _tikterm*( _vIN[(r-1)*_nF + f] -2*_vIN[r*_nF + f] + _vIN[(r+1)*_nF + f] );
+        }
+
+        _vOUT[_nV*_nS + _nIC - 1] += _tikterm*( _vIN[(_nIC-2)*_nF + f] - 2*_vIN[(_nIC-1)*_nF + f] );
+    }
+}
+
+void COMMIT_Lt(
+    int _nF, int _nIC, int _nV, int _nS, double _tikterm,
+    double *_vIN, double *_vOUT)
+{
+    for(int f = 0; f < _nF; f++){
+        _vOUT[f] += _tikterm*( -2*_vIN[_nV*_nS] + _vIN[_nV*_nS + 1] );
+
+        for (int r = 0; r < _nIC; r++){
+            _vOUT[r*_nF + f] += _tikterm*( _vIN[_nV*_nS + (r-1)] - 2*_vIN[_nV*_nS + r] + _vIN[_nV*_nS + (r+1)] );
+        }
+        
+        _vOUT[(_nIC-1)*_nF + f] += _tikterm*( _vIN[_nV*_nS + (_nIC-2)] - 2*_vIN[_nV*_nS + (_nIC-1)] );
+    }
+}
\ No newline at end of file
diff --git a/commit/operator/operator_withRegularization.c b/commit/operator/operator_withRegularization.c
deleted file mode 100644
index c2dfbff2..00000000
--- a/commit/operator/operator_withRegularization.c
+++ /dev/null
@@ -1,24 +0,0 @@
-
-
-void COMMIT_L(
-    int nF, int nIC, int nV, int nS,
-    double *vIN, double *vOUT)
-{
-    for(int f = 0; f < nF; f++){
-
-        vOUT[nV*nS] += -2*vIN[f] + x[nF + f];
-
-        for(int r = 1; r < nIC-1; r++){
-            vOUT[nV*nS + r] += vIN[(r-1)*nF + f] -2*vIN[r*nF + f] + vIN[(r+1)*nF + f];
-        }
-
-        vOUT[nV*nS + nIC - 1] += vIN[(nIC-2)*nF + f] - 2*vIN[(nIC-1)*nF + f];
-    }
-}
-
-void COMMIT_Lt(
-    int nF, int nIC, int nV, int nS,
-    double *vIN, double *vOUT)
-{
-    
-}
\ No newline at end of file

From 685513733af2bbb8f682b328b5fd1d667cf3059f Mon Sep 17 00:00:00 2001
From: ErickHernandezGutierrez <erick.hernandez@cimat.mx>
Date: Thu, 27 Feb 2020 16:25:45 -0600
Subject: [PATCH 03/17] Now regularization term is not constant

---
 commit/core.pyx              |  4 ++--
 commit/operator/operator.pyx | 11 ++++++-----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/commit/core.pyx b/commit/core.pyx
index f2d26c89..5efcc326 100755
--- a/commit/core.pyx
+++ b/commit/core.pyx
@@ -621,7 +621,7 @@ cdef class Evaluation :
         print( '   [ %.1f seconds ]' % ( time.time() - tic ) )
 
 
-    def build_operator( self ) :
+    def build_operator( self, regtikhonov=0.1 ) :
         """Compile/build the operator for computing the matrix-vector multiplications by A and A'
         using the informations from self.DICTIONARY, self.KERNELS and self.THREADS.
         NB: needs to call this function to update pointers to data structures in case
@@ -648,7 +648,7 @@ cdef class Evaluation :
             import commit.operator.operator
         else :
             reload( sys.modules['commit.operator.operator'] )
-        self.A = sys.modules['commit.operator.operator'].LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS )
+        self.A = sys.modules['commit.operator.operator'].LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, regtikhonov )
 
         print( '   [ %.1f seconds ]' % ( time.time() - tic ) )
 
diff --git a/commit/operator/operator.pyx b/commit/operator/operator.pyx
index 50f414a3..244c03ec 100755
--- a/commit/operator/operator.pyx
+++ b/commit/operator/operator.pyx
@@ -43,6 +43,7 @@ cdef class LinearOperator :
     """
     cdef int nS, nF, nR, nE, nT, nV, nI, n, ndirs
     cdef public int adjoint, n1, n2
+    cdef public float regtikhonov
 
     cdef DICTIONARY
     cdef KERNELS
@@ -69,7 +70,7 @@ cdef class LinearOperator :
     cdef unsigned int*   ISOthreadsT
 
 
-    def __init__( self, DICTIONARY, KERNELS, THREADS, tikterm=0.3 ) :
+    def __init__( self, DICTIONARY, KERNELS, THREADS, regtikhonov ) :
         """Set the pointers to the data structures used by the C code."""
         self.DICTIONARY = DICTIONARY
         self.KERNELS    = KERNELS
@@ -83,7 +84,7 @@ cdef class LinearOperator :
         self.nI         = KERNELS['iso'].shape[0]   # number of ISO contributions
         self.n          = DICTIONARY['IC']['n']     # numbner of IC segments
         self.ndirs      = KERNELS['wmr'].shape[1]   # number of directions
-        #self.tikterm    = tikterm
+        self.regtikhonov = regtikhonov
 
         if KERNELS['wmr'].size > 0 :
             self.nS = KERNELS['wmr'].shape[2]       # number of SAMPLES
@@ -140,7 +141,7 @@ cdef class LinearOperator :
     @property
     def T( self ) :
         """Transpose of the explicit matrix."""
-        C = LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS )
+        C = LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, self.regtikhonov )
         C.adjoint = 1 - C.adjoint
         return C
 
@@ -201,14 +202,14 @@ cdef class LinearOperator :
             with nogil:
                 # DIRECT PRODUCT L*lambda*x
                 COMMIT_L(
-                    self.nF, self.nR, self.nV, self.nS, 0.3,
+                    self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
                     &v_in[0], &v_out[0]
                 )
         else:
             with nogil:
                 # INVERSE PRODUCT L'*lambda*y
                 COMMIT_Lt(
-                    self.nF, self.nR, self.nV, self.nS, 0.3, #self.tikterm
+                    self.nF, self.nR, self.nV, self.nS, self.regtikhonov, #self.tikterm
                     &v_in[0], &v_out[0]
                 ) #"""
 

From 76da763195c347a78ec4a9d8a2b9635e14a79ecb Mon Sep 17 00:00:00 2001
From: ErickHernandezGutierrez <erick.hernandez@cimat.mx>
Date: Tue, 24 Mar 2020 11:38:20 -0600
Subject: [PATCH 04/17] Fixed bug in A'y multiplication with tikhonov

---
 commit/core.pyx                    |  6 ++---
 commit/operator/operator_withLUT.c |  2 +-
 commit/solvers.py                  | 35 ++++++++++++++++++++++++++++++
 3 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/commit/core.pyx b/commit/core.pyx
index 5efcc326..8f4933e0 100755
--- a/commit/core.pyx
+++ b/commit/core.pyx
@@ -664,14 +664,14 @@ cdef class Evaluation :
 
         y = self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ].flatten().astype(np.float64)
         #return y
-        print(type(y))
+        """print(type(y))
         print(y.shape)
         print(y.shape[0])
         print(self.KERNELS['wmr'].shape[0])
-        print(y.shape[0] + self.KERNELS['wmr'].shape[0])
+        print(y.shape[0] + self.KERNELS['wmr'].shape[0])"""
         y2 = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0], dtype=np.float64)
         y2[0:y.shape[0]] = y
-        print(y2.shape)
+        #print(y2.shape)
         return y2
         #"""
 
diff --git a/commit/operator/operator_withLUT.c b/commit/operator/operator_withLUT.c
index e7d23730..6bc8886c 100644
--- a/commit/operator/operator_withLUT.c
+++ b/commit/operator/operator_withLUT.c
@@ -2269,7 +2269,7 @@ void COMMIT_Lt(
     for(int f = 0; f < _nF; f++){
         _vOUT[f] += _tikterm*( -2*_vIN[_nV*_nS] + _vIN[_nV*_nS + 1] );
 
-        for (int r = 0; r < _nIC; r++){
+        for (int r = 0; r < _nIC-1; r++){
             _vOUT[r*_nF + f] += _tikterm*( _vIN[_nV*_nS + (r-1)] - 2*_vIN[_nV*_nS + r] + _vIN[_nV*_nS + (r+1)] );
         }
         
diff --git a/commit/solvers.py b/commit/solvers.py
index ce4325fb..12ee511c 100755
--- a/commit/solvers.py
+++ b/commit/solvers.py
@@ -287,6 +287,11 @@ def fista( y, A, At, tol_fun, tol_x, max_iter, verbose, x0, omega, proximal) :
     res = -y.copy()
     xhat = x0.copy()
     x = np.zeros_like(xhat)
+    checkval = np.sum(A.dot(xhat))
+    if np.isnan(checkval):
+        print('----------------------------------- Te la pelas 1 Ax -----------------------------------')
+        print(A.dot(xhat))
+        print()
     res += A.dot(xhat)
     proximal( xhat )
     reg_term = omega( xhat )
@@ -295,10 +300,20 @@ def fista( y, A, At, tol_fun, tol_x, max_iter, verbose, x0, omega, proximal) :
     told = 1
     beta = 0.9
     prev_x = xhat.copy()
+    checkval = np.sum(np.asarray(At.dot(res)))
+    if np.isnan(checkval):
+        print('----------------------------------- Te la pelas 2 A\'y -----------------------------------')
+        print(np.asarray(At.dot(res)))
+        print()
     grad = np.asarray(At.dot(res))
     qfval = prev_obj
 
     # Step size computation
+    checkval = np.sum(A.dot(grad))
+    if np.isnan(checkval):
+        print('----------------------------------- Te la pelas 3 Ax -----------------------------------')
+        print(A.dot(grad))
+        print()
     L = ( np.linalg.norm( A.dot(grad) ) / np.linalg.norm(grad) )**2
     mu = 1.9 / L
 
@@ -323,6 +338,11 @@ def fista( y, A, At, tol_fun, tol_x, max_iter, verbose, x0, omega, proximal) :
         # Check stepsize
         tmp = x-xhat
         q = qfval + np.real( np.dot(tmp,grad) ) + 0.5/mu * np.linalg.norm(tmp)**2 + reg_term_x
+        checkval = np.sum(A.dot(x) - y)
+        if np.isnan(checkval):
+            print('----------------------------------- Te la pelas 4 Ax -----------------------------------')
+            print(A.dot(x) - y)
+            print()
         res = A.dot(x) - y
         res_norm = np.linalg.norm(res)
         curr_obj = 0.5 * res_norm**2 + reg_term_x
@@ -340,6 +360,11 @@ def fista( y, A, At, tol_fun, tol_x, max_iter, verbose, x0, omega, proximal) :
             # Check stepsize
             tmp = x-xhat
             q = qfval + np.real( np.dot(tmp,grad) ) + 0.5/mu * np.linalg.norm(tmp)**2 + reg_term_x
+            checkval = np.sum(A.dot(x) - y)
+            if np.isnan(checkval):
+                print('----------------------------------- Te la pelas 5 Ax -----------------------------------')
+                print(A.dot(x) - y)
+                print()
             res = A.dot(x) - y
             res_norm = np.linalg.norm(res)
             curr_obj = 0.5 * res_norm**2 + reg_term_x
@@ -373,9 +398,19 @@ def fista( y, A, At, tol_fun, tol_x, max_iter, verbose, x0, omega, proximal) :
         xhat = x + (told-1)/t * (x - prev_x)
 
         # Gradient computation
+        checkval = np.sum(A.dot(xhat) - y)
+        if np.isnan(checkval):
+            print('----------------------------------- Te la pelas 6 Ax -----------------------------------')
+            print(A.dot(xhat) - y)
+            print()
         res = A.dot(xhat) - y
         xarr = np.asarray(x)
 
+        checkval = np.sum(np.asarray(At.dot(res)))
+        if np.isnan(checkval):
+            print('----------------------------------- Te la pelas 7 A\'y -----------------------------------')
+            print(np.asarray(At.dot(res)))
+            print()
         grad = np.asarray(At.dot(res))
 
         # Update variables

From 298754abb90b79522a43743744a96ed4d253581a Mon Sep 17 00:00:00 2001
From: ErickHernandezGutierrez <erick.hernandez@cimat.mx>
Date: Tue, 7 Apr 2020 22:32:14 -0500
Subject: [PATCH 05/17] Adding Tikhonov's first derivative matrix

---
 .gitignore                                    |   20 +-
 CHANGELOG.md                                  |   86 +-
 commit/core.pyx                               | 1828 +++----
 commit/operator/operator.pyx                  |  432 +-
 commit/operator/operator.pyxbld               |   66 +-
 commit/operator/operator_noLUT.c              |  464 +-
 commit/operator/operator_withLUT.c            | 4584 +++++++++--------
 commit/solvers.py                             |  876 ++--
 commit/trk2dictionary/trk2dictionary.pyx      |  892 ++--
 commit/trk2dictionary/trk2dictionary_c.cpp    | 1218 ++---
 doc/tutorials/AdvancedSolvers/README.md       |  308 +-
 .../AdvancedSolvers/tutorial_solvers.ipynb    |  512 +-
 setup.py                                      |  108 +-
 13 files changed, 5725 insertions(+), 5669 deletions(-)

diff --git a/.gitignore b/.gitignore
index 64f267a7..b8a0c22e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,11 +1,11 @@
-build
-.ipynb_checkpoints
-.DS_Store
-.DS_Store?
-._*
-.Spotlight-V100
-.Trashes
-ehthumbs.db
-Thumbs.db
-__pycache__/
+build
+.ipynb_checkpoints
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+__pycache__/
 .vscode/
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 67dd1e28..2e749ae5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,43 +1,43 @@
-
-# Change Log
-All notable changes to COMMIT will be documented in this file.
-
-## [1.3.4] - 2020-04-02
-
-### Changed
-- Added colorized output. NB: needs AMICO 1.2.0 or above.
-
-
-## [1.3.3] - 2020-03-31
-
-### Added
-- Added possibility to save the predicted DW-MR signal in save_results.
- 
-### Fixed
-- Minor cleanup.
-
-## [1.3.2] - 2020-03-27
-
-### Added
-- Check if dictionary (upon loading) and data have the same geometry.
- 
-### Fixed
-- Bug while saving coefficients in save_results.
-
-## [1.3.1] - 2020-03-27
-
-### Fixed
-- Improved the loading of the streamlines in trk2dictionary
-
-## [1.3] - 2019-10-30
-
-This version of COMMIT *is not compatible* with [AMICO](https://github.com/daducci/AMICO) v1.0.1 of below. If you update COMMIT to this version, please update AMICO to version 1.1.0 or above.
- 
-### Added
-- Changelog file to keep tracking of the COMMIT versions.
- 
-### Changed
-- Added compatibility with low resolution LUTs.
- 
-### Fixed
-- Nothing.
+
+# Change Log
+All notable changes to COMMIT will be documented in this file.
+
+## [1.3.4] - 2020-04-02
+
+### Changed
+- Added colorized output. NB: needs AMICO 1.2.0 or above.
+
+
+## [1.3.3] - 2020-03-31
+
+### Added
+- Added possibility to save the predicted DW-MR signal in save_results.
+ 
+### Fixed
+- Minor cleanup.
+
+## [1.3.2] - 2020-03-27
+
+### Added
+- Check if dictionary (upon loading) and data have the same geometry.
+ 
+### Fixed
+- Bug while saving coefficients in save_results.
+
+## [1.3.1] - 2020-03-27
+
+### Fixed
+- Improved the loading of the streamlines in trk2dictionary
+
+## [1.3] - 2019-10-30
+
+This version of COMMIT *is not compatible* with [AMICO](https://github.com/daducci/AMICO) v1.0.1 of below. If you update COMMIT to this version, please update AMICO to version 1.1.0 or above.
+ 
+### Added
+- Changelog file to keep tracking of the COMMIT versions.
+ 
+### Changed
+- Added compatibility with low resolution LUTs.
+ 
+### Fixed
+- Nothing.
diff --git a/commit/core.pyx b/commit/core.pyx
index 68a1ac0b..224f6901 100755
--- a/commit/core.pyx
+++ b/commit/core.pyx
@@ -1,914 +1,914 @@
-#!python
-#cython: language_level=3, boundscheck=False, wraparound=False, nonecheck=False, cdivision=True, initializedcheck=False, binding=False
-from __future__ import print_function
-cimport cython
-import numpy as np
-cimport numpy as np
-
-import time
-import glob
-import sys
-from os import makedirs, remove
-from os.path import exists, join as pjoin, isfile
-import nibabel
-import pickle
-import commit.models
-import commit.solvers
-import amico.scheme
-import amico.lut
-import pyximport
-pyximport.install( reload_support=True, language_level=3 )
-from amico.util import LOG, NOTE, WARNING, ERROR
-
-
-def setup( lmax = 12, ndirs = 32761 ) :
-    """General setup/initialization of the COMMIT framework.
-    
-    Parameters
-    ----------
-    lmax : int
-        Maximum SH order to use for the rotation phase (default : 12)
-    ndirs : int
-        Number of directions on the half of the sphere representing the possible orientations of the response functions (default : 32761)
-    """
-
-    if not amico.lut.is_valid(ndirs):
-        ERROR( 'Unsupported value for ndirs.\nNote: Supported values for ndirs are [500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500, 7000, 7500, 8000, 8500, 9000, 9500, 10000, 32761 (default)]' )
-
-    amico.lut.precompute_rotation_matrices( lmax, ndirs )
-
-def load_dictionary_info(filename):
-    """Function to load dictionary info file
-    
-    Parameters
-    ----------
-    filename : string
-        This value is always COMMIT_PATH + dictionary_info.pickle
-    """
-    if not isfile( filename ):
-        ERROR( 'Dictionary is outdated or not found. Execute "trk2dictionary" script first' )
-    with open( filename, 'rb' ) as dictionary_info_file:
-        if sys.version_info.major == 3:
-            aux = pickle.load( dictionary_info_file, fix_imports=True, encoding='bytes' )
-            # Pickle files written by Python 2 are loaded with byte
-            # keys, whereas those written by Python 3 are loaded with
-            # str keys, even when both are written using protocol=2
-            result_aux = {(k.decode() if hasattr(k,"decode") else k): v for k, v in aux.items()}
-            return result_aux
-        else:
-            return pickle.load( dictionary_info_file )
-
-cdef class Evaluation :
-    """Class to hold all the information (data and parameters) when performing an
-    evaluation with the COMMIT framework.
-    """
-    cdef public niiDWI
-    cdef public niiDWI_img
-    cdef public scheme
-    cdef public model
-    cdef public KERNELS
-    cdef public DICTIONARY
-    cdef public THREADS
-    cdef public A
-    cdef public x
-    cdef public CONFIG
-
-    def __init__( self, study_path, subject ) :
-        """Setup the data structures with default values.
-
-        Parameters
-        ----------
-        study_path : string
-            The path to the folder containing all the subjects from one study
-        subject : string
-            The path (relative to previous folder) to the subject folder
-        """
-        self.niiDWI     = None # set by "load_data" method
-        self.scheme     = None # set by "load_data" method
-        self.model      = None # set by "set_model" method
-        self.KERNELS    = None # set by "load_kernels" method
-        self.DICTIONARY = None # set by "load_dictionary" method
-        self.THREADS    = None # set by "set_threads" method
-        self.A          = None # set by "build_operator" method
-        self.x          = None # set by "fit" method
-
-        # store all the parameters of an evaluation with COMMIT
-        self.CONFIG = {}
-        self.set_config('study_path', study_path)
-        self.set_config('subject', subject)
-        self.set_config('DATA_path', pjoin( study_path, subject ))
-
-        self.set_config('doNormalizeSignal', True)
-        self.set_config('doMergeB0', False)
-        self.set_config('doNormalizeKernels', True)
-        self.set_config('doDemean', False)
-        self.set_config('doNormalizeMaps', False)
-
-
-
-    def set_config( self, key, value ) :
-        self.CONFIG[ key ] = value
-
-    def get_config( self, key ) :
-        return self.CONFIG.get( key )
-
-
-    def load_data( self, dwi_filename = 'DWI.nii', scheme_filename = 'DWI.scheme', b0_thr = 0 ) :
-        """Load the diffusion signal and its corresponding acquisition scheme.
-
-        Parameters
-        ----------
-        dwi_filename : string
-            The file name of the DWI data, relative to the subject folder (default : 'DWI.nii')
-        scheme_filename : string
-            The file name of the corresponding acquisition scheme (default : 'DWI.scheme')
-        b0_thr : float
-            The threshold below which a b-value is considered a b0 (default : 0)
-        """
-
-        # Loading data and acquisition scheme
-        tic = time.time()
-        LOG( '\n-> Loading data:' )
-
-        print( '\t* DWI signal:' )
-        self.set_config('dwi_filename', dwi_filename)
-        self.niiDWI  = nibabel.load( pjoin( self.get_config('DATA_path'), dwi_filename) )
-        self.niiDWI_img = self.niiDWI.get_data().astype(np.float32)
-        if self.niiDWI_img.ndim ==3 :
-            self.niiDWI_img = np.expand_dims( self.niiDWI_img, axis=3 )
-        hdr = self.niiDWI.header if nibabel.__version__ >= '2.0.0' else self.niiDWI.get_header()
-        self.set_config('dim', self.niiDWI_img.shape[0:3])
-        self.set_config('pixdim', tuple( hdr.get_zooms()[:3] ))
-        print( '\t\t- dim    = %d x %d x %d x %d' % self.niiDWI_img.shape )
-        print( '\t\t- pixdim = %.3f x %.3f x %.3f' % self.get_config('pixdim') )
-
-        print( '\t* Acquisition scheme:' )
-        self.set_config('scheme_filename', scheme_filename)
-        self.set_config('b0_thr', b0_thr)
-        self.scheme = amico.scheme.Scheme( pjoin( self.get_config('DATA_path'), scheme_filename), b0_thr )
-        print( '\t\t- %d samples, %d shells' % ( self.scheme.nS, len(self.scheme.shells) ) )
-        print( '\t\t- %d @ b=0' % ( self.scheme.b0_count ), end='' )
-        for i in xrange(len(self.scheme.shells)) :
-            print( ', %d @ b=%.1f' % ( len(self.scheme.shells[i]['idx']), self.scheme.shells[i]['b'] ), end='' )
-        print()
-
-        if self.scheme.nS != self.niiDWI_img.shape[3] :
-            ERROR( 'Scheme does not match with DWI data' )
-
-        if self.scheme.dwi_count == 0 :
-            ERROR( 'There are no DWI volumes in the data' )
-
-        LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
-
-        # Preprocessing
-        tic = time.time()
-        LOG( '\n-> Preprocessing:' )
-
-        if self.get_config('doNormalizeSignal') :
-            if self.scheme.b0_count > 0 :
-                print( '\t* Normalizing to b0... ', end='' )
-                sys.stdout.flush()
-                mean = np.mean( self.niiDWI_img[:,:,:,self.scheme.b0_idx], axis=3 )
-                idx = mean <= 0
-                mean[ idx ] = 1
-                mean = 1 / mean
-                mean[ idx ] = 0
-                for i in xrange(self.scheme.nS) :
-                    self.niiDWI_img[:,:,:,i] *= mean
-            else :
-                print( '\t* There are no b0 volume(s) for normalization...', end='' )
-            print( '[ min=%.2f,  mean=%.2f, max=%.2f ]' % ( self.niiDWI_img.min(), self.niiDWI_img.mean(), self.niiDWI_img.max() ) )
-
-        if self.scheme.b0_count > 1 :
-            if self.get_config('doMergeB0') :
-                print( '\t* Merging multiple b0 volume(s)... ', end='' )
-                mean = np.expand_dims( np.mean( self.niiDWI_img[:,:,:,self.scheme.b0_idx], axis=3 ), axis=3 )
-                self.niiDWI_img = np.concatenate( (mean, self.niiDWI_img[:,:,:,self.scheme.dwi_idx]), axis=3 )
-            else :
-                print( '\t* Keeping all b0 volume(s)... ', end='' )
-            print( '[ %d x %d x %d x %d ]' % self.niiDWI_img.shape )
-
-        if self.get_config('doDemean') :
-            print( '\t* Demeaning signal... ', end='' )
-            sys.stdout.flush()
-            mean = np.repeat( np.expand_dims(np.mean(self.niiDWI_img,axis=3),axis=3), self.niiDWI_img.shape[3], axis=3 )
-            self.niiDWI_img = self.niiDWI_img - mean
-            print( '[ min=%.2f,  mean=%.2f, max=%.2f ]' % ( self.niiDWI_img.min(), self.niiDWI_img.mean(), self.niiDWI_img.max() ) )
-
-        LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
-
-
-    def set_model( self, model_name ) :
-        """Set the model to use to describe the signal contributions in each voxel.
-
-        Parameters
-        ----------
-        model_name : string
-            The name of the model (must match a class name in "commit.models" module)
-        """
-        # Call the specific model constructor
-        if hasattr(commit.models, model_name ) :
-            self.model = getattr(commit.models,model_name)()
-        else :
-            ERROR( 'Model "%s" not recognized' % model_name )
-
-        self.set_config('ATOMS_path', pjoin( self.get_config('study_path'), 'kernels', self.model.id ))
-
-
-    def generate_kernels( self, regenerate = False, lmax = 12, ndirs = 32761 ) :
-        """Generate the high-resolution response functions for each compartment.
-        Dispatch to the proper function, depending on the model.
-
-        Parameters
-        ----------
-        regenerate : boolean
-            Regenerate kernels if they already exist (default : False)
-        lmax : int
-            Maximum SH order to use for the rotation procedure (default : 12)
-        ndirs : int
-            Number of directions on the half of the sphere representing the possible orientations of the response functions (default : 32761)
-        """
-        if not amico.lut.is_valid(ndirs):
-            ERROR( 'Unsupported value for ndirs.\nNote: Supported values for ndirs are [500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500, 7000, 7500, 8000, 8500, 9000, 9500, 10000, 32761 (default)]' )
-        if self.scheme is None :
-            ERROR( 'Scheme not loaded; call "load_data()" first' )
-        if self.model is None :
-            ERROR( 'Model not set; call "set_model()" method first' )
-
-        # store some values for later use
-        self.set_config('lmax', lmax)
-        self.set_config('ndirs', ndirs)
-        self.model.scheme = self.scheme
-
-        LOG( '\n-> Simulating with "%s" model:' % self.model.name )
-
-        # check if kernels were already generated
-        tmp = glob.glob( pjoin(self.get_config('ATOMS_path'),'A_*.npy') )
-        if len(tmp)>0 and not regenerate :
-            LOG( '   [ Kernels already computed. Use option "regenerate=True" to force regeneration ]' )
-            return
-
-        # create folder or delete existing files (if any)
-        if not exists( self.get_config('ATOMS_path') ) :
-            makedirs( self.get_config('ATOMS_path') )
-        else :
-            for f in glob.glob( pjoin(self.get_config('ATOMS_path'),'*') ) :
-                remove( f )
-
-        # auxiliary data structures
-        aux = amico.lut.load_precomputed_rotation_matrices( lmax, ndirs )
-        idx_IN, idx_OUT = amico.lut.aux_structures_generate( self.scheme, lmax )
-
-        # Dispatch to the right handler for each model
-        tic = time.time()
-        self.model.generate( self.get_config('ATOMS_path'), aux, idx_IN, idx_OUT, ndirs )
-        LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
-
-
-    def load_kernels( self ) :
-        """Load rotated kernels and project to the specific gradient scheme of this subject.
-        Dispatch to the proper function, depending on the model.
-        """
-        if self.model is None :
-            ERROR( 'Model not set; call "set_model()" method first' )
-        if self.scheme is None :
-            ERROR( 'Scheme not loaded; call "load_data()" first' )
-
-        tic = time.time()
-        LOG( '\n-> Resampling LUT for subject "%s":' % self.get_config('subject') )
-
-        # auxiliary data structures
-        idx_OUT, Ylm_OUT = amico.lut.aux_structures_resample( self.scheme, self.get_config('lmax') )
-
-        # Dispatch to the right handler for each model
-        if self.get_config('doMergeB0') :
-            print( '\t* Merging multiple b0 volume(s)...' )
-        else :
-            print( '\t* Keeping all b0 volume(s)...' )
-        self.KERNELS = self.model.resample( self.get_config('ATOMS_path'), idx_OUT, Ylm_OUT, self.get_config('doMergeB0'), self.get_config('ndirs') )
-        nIC  = self.KERNELS['wmr'].shape[0]
-        nEC  = self.KERNELS['wmh'].shape[0]
-        nISO = self.KERNELS['iso'].shape[0]
-        print( '\t  [ OK ]' )
-
-        # ensure contiguous arrays for C part
-        self.KERNELS['wmr'] = np.ascontiguousarray( self.KERNELS['wmr'] )
-        self.KERNELS['wmh'] = np.ascontiguousarray( self.KERNELS['wmh'] )
-        self.KERNELS['iso'] = np.ascontiguousarray( self.KERNELS['iso'] )
-
-        # De-mean kernels
-        if self.get_config('doDemean') :
-            print( '\t* Demeaning signal...', end='' )
-            for j in xrange(self.get_config('ndirs')) :
-                for i in xrange(nIC) :
-                    self.KERNELS['wmr'][i,j,:] -= self.KERNELS['wmr'][i,j,:].mean()
-                for i in xrange(nEC) :
-                    self.KERNELS['wmh'][i,j,:] -= self.KERNELS['wmh'][i,j,:].mean()
-            for i in xrange(nISO) :
-                self.KERNELS['iso'][i] -= self.KERNELS['iso'][i].mean()
-            print( '[ OK ]' )
-
-        # Normalize atoms
-        if self.get_config('doNormalizeKernels') :
-            print( '\t* Normalizing... ', end='' )
-
-            self.KERNELS['wmr_norm'] = np.zeros( nIC )
-            for i in xrange(nIC) :
-                self.KERNELS['wmr_norm'][i] = np.linalg.norm( self.KERNELS['wmr'][i,0,:] )
-                for j in xrange(self.get_config('ndirs')) :
-                    self.KERNELS['wmr'][i,j,:] /= self.KERNELS['wmr_norm'][i]
-
-            self.KERNELS['wmh_norm'] = np.zeros( nEC )
-            for i in xrange(nEC) :
-                self.KERNELS['wmh_norm'][i] = np.linalg.norm( self.KERNELS['wmh'][i,0,:] )
-                for j in xrange(self.get_config('ndirs')) :
-                    self.KERNELS['wmh'][i,j,:] /= self.KERNELS['wmh_norm'][i]
-
-            self.KERNELS['iso_norm'] = np.zeros( nISO )
-            for i in xrange(nISO) :
-                self.KERNELS['iso_norm'][i] = np.linalg.norm( self.KERNELS['iso'][i,:] )
-                self.KERNELS['iso'][i,:] /= self.KERNELS['iso_norm'][i]
-
-            print( '[ OK ]' )
-
-        LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
-
-
-    cpdef load_dictionary( self, path, use_mask = False ) :
-        """Load the sparse structure previously created with "trk2dictionary" script.
-
-        Parameters
-        ----------
-        path : string
-            Folder containing the output of the trk2dictionary script (relative to subject path)
-        use_mask : boolean
-            If False (default) the optimization will be conducted only on the voxels actually
-            traversed by tracts. If True, the mask specified in trk2dictionary
-            (i.e. "filename_mask" paramater) will be used instead.
-            NB: if no mask was specified in trk2dictionary, the "tdi" and
-            "mask" masks are equivalent and this parameter is not influent.
-        """
-        if self.niiDWI is None :
-            ERROR( 'Data not loaded; call "load_data()" first' )
-
-        tic = time.time()
-        LOG( '\n-> Loading the dictionary:' )
-        self.DICTIONARY = {}
-        self.set_config('TRACKING_path', pjoin(self.get_config('DATA_path'),path))
-
-        # load mask
-        self.set_config('dictionary_mask', 'mask' if use_mask else 'tdi' )
-        mask_filename = pjoin(self.get_config('TRACKING_path'),'dictionary_%s.nii'%self.get_config('dictionary_mask'))
-        if not exists( mask_filename ) :
-            mask_filename += '.gz'
-            if not exists( mask_filename ) :
-                ERROR( 'Dictionary not found. Execute "trk2dictionary" script first' );
-        niiMASK = nibabel.load( mask_filename )
-        niiMASK_hdr = niiMASK.header if nibabel.__version__ >= '2.0.0' else niiMASK.get_header()
-        if ( self.get_config('dim')[0]!=niiMASK.shape[0] or
-             self.get_config('dim')[1]!=niiMASK.shape[1] or
-             self.get_config('dim')[2]!=niiMASK.shape[2] or
-             abs(self.get_config('pixdim')[0]-niiMASK_hdr['pixdim'][1])>1e-3 or
-             abs(self.get_config('pixdim')[1]-niiMASK_hdr['pixdim'][2])>1e-3 or
-             abs(self.get_config('pixdim')[2]-niiMASK_hdr['pixdim'][3])>1e-3 ) :
-            print( '  [WARNING] dictionary does not have the same geometry as the dataset' )
-        self.DICTIONARY['MASK'] = (niiMASK.get_data() > 0).astype(np.uint8)
-
-        # segments from the tracts
-        # ------------------------
-        print( '\t* Segments from the tracts... ', end='' )
-        sys.stdout.flush()
-
-        dictionary_info = load_dictionary_info( pjoin(self.get_config('TRACKING_path'), "dictionary_info.pickle") )
-
-        self.DICTIONARY['ndirs'] = dictionary_info['ndirs']
-
-        if self.DICTIONARY['ndirs'] != self.get_config('ndirs'):
-            ERROR( 'Dictionary is outdated. Execute "trk2dictionary" script first' )
-
-        self.DICTIONARY['TRK'] = {}
-        self.DICTIONARY['TRK']['norm'] = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_TRK_norm.dict'), dtype=np.float32 )
-        self.DICTIONARY['TRK']['len']  = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_TRK_len.dict'), dtype=np.float32 )
-
-        self.DICTIONARY['IC'] = {}
-        self.DICTIONARY['IC']['fiber'] = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_f.dict'), dtype=np.uint32 )
-        self.DICTIONARY['IC']['v']     = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_v.dict'), dtype=np.uint32 )
-        self.DICTIONARY['IC']['o']     = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_o.dict'), dtype=np.uint16 )
-        self.DICTIONARY['IC']['len']   = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_len.dict'), dtype=np.float32 )
-        self.DICTIONARY['IC']['n']     = self.DICTIONARY['IC']['fiber'].size
-        self.DICTIONARY['IC']['nF']    = self.DICTIONARY['TRK']['norm'].size
-
-        # reorder the segments based on the "v" field
-        idx = np.argsort( self.DICTIONARY['IC']['v'], kind='mergesort' )
-        self.DICTIONARY['IC']['v']     = self.DICTIONARY['IC']['v'][ idx ]
-        self.DICTIONARY['IC']['o']     = self.DICTIONARY['IC']['o'][ idx ]
-        self.DICTIONARY['IC']['fiber'] = self.DICTIONARY['IC']['fiber'][ idx ]
-        self.DICTIONARY['IC']['len']   = self.DICTIONARY['IC']['len'][ idx ]
-        del idx
-
-        # divide the length of each segment by the fiber length so that all the columns of the libear operator will have same length
-        # NB: it works in conjunction with the normalization of the kernels
-        cdef :
-            np.float32_t [:] sl = self.DICTIONARY['IC']['len']
-            np.float32_t [:] tl = self.DICTIONARY['TRK']['norm']
-            np.uint32_t  [:] f  = self.DICTIONARY['IC']['fiber']
-            int s
-        if self.get_config('doNormalizeKernels') :
-            for s in xrange(self.DICTIONARY['IC']['n']) :
-                sl[s] /= tl[ f[s] ]
-
-        print( '[ %d fibers and %d segments ]' % ( self.DICTIONARY['IC']['nF'], self.DICTIONARY['IC']['n'] ) )
-
-        # segments from the peaks
-        # -----------------------
-        print( '\t* Segments from the peaks...  ', end='' )
-        sys.stdout.flush()
-
-        self.DICTIONARY['EC'] = {}
-        self.DICTIONARY['EC']['v']  = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_EC_v.dict'), dtype=np.uint32 )
-        self.DICTIONARY['EC']['o']  = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_EC_o.dict'), dtype=np.uint16 )
-        self.DICTIONARY['EC']['nE'] = self.DICTIONARY['EC']['v'].size
-
-        # reorder the segments based on the "v" field
-        idx = np.argsort( self.DICTIONARY['EC']['v'], kind='mergesort' )
-        self.DICTIONARY['EC']['v'] = self.DICTIONARY['EC']['v'][ idx ]
-        self.DICTIONARY['EC']['o'] = self.DICTIONARY['EC']['o'][ idx ]
-        del idx
-
-        print( '[ %d segments ]' % self.DICTIONARY['EC']['nE'] )
-
-        # isotropic compartments
-        # ----------------------
-        print( '\t* Isotropic contributions...  ', end='' )
-        sys.stdout.flush()
-
-        self.DICTIONARY['ISO'] = {}
-
-        self.DICTIONARY['nV'] = self.DICTIONARY['MASK'].sum()
-
-        vx, vy, vz = ( self.DICTIONARY['MASK'] > 0 ).nonzero() # [TODO] find a way to avoid using int64 (not necessary and waste of memory)
-        vx = vx.astype(np.int32)
-        vy = vy.astype(np.int32)
-        vz = vz.astype(np.int32)
-        self.DICTIONARY['ISO']['v'] = vx + self.get_config('dim')[0] * ( vy + self.get_config('dim')[1] * vz )
-        del vx, vy, vz
-
-        # reorder the segments based on the "v" field
-        idx = np.argsort( self.DICTIONARY['ISO']['v'], kind='mergesort' )
-        self.DICTIONARY['ISO']['v'] = self.DICTIONARY['ISO']['v'][ idx ]
-        del idx
-
-        print( '[ %d voxels ]' % self.DICTIONARY['nV'] )
-
-        # post-processing
-        # ---------------
-        print( '\t* Post-processing...          ', end='' )
-        sys.stdout.flush()
-
-        # get the indices to extract the VOI as in MATLAB (in place of DICTIONARY.MASKidx)
-        idx = self.DICTIONARY['MASK'].ravel(order='F').nonzero()[0]
-        self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] = np.unravel_index( idx, self.DICTIONARY['MASK'].shape, order='F' )
-
-        lut = np.zeros( self.get_config('dim'), dtype=np.uint32 ).ravel()
-        for i in xrange(idx.size) :
-            lut[ idx[i] ] = i
-        self.DICTIONARY['IC'][ 'v'] = lut[ self.DICTIONARY['IC'][ 'v'] ]
-        self.DICTIONARY['EC'][ 'v'] = lut[ self.DICTIONARY['EC'][ 'v'] ]
-        self.DICTIONARY['ISO']['v'] = lut[ self.DICTIONARY['ISO']['v'] ]
-
-        print( '[ OK ]' )
-
-        LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
-
-
-    def set_threads( self, n = None ) :
-        """Set the number of threads to use for the matrix-vector operations with A and A'.
-
-        Parameters
-        ----------
-        n : integer
-            Number of threads to use (default : number of CPUs in the system)
-        """
-        if n is None :
-            # Set to the number of CPUs in the system
-            try :
-                import multiprocessing
-                n = multiprocessing.cpu_count()
-            except :
-                n = 1
-
-        if n < 1 or n > 255 :
-            ERROR( 'Number of threads must be between 1 and 255' )
-        if self.DICTIONARY is None :
-            ERROR( 'Dictionary not loaded; call "load_dictionary()" first' )
-        if self.KERNELS is None :
-            ERROR( 'Response functions not generated; call "generate_kernels()" and "load_kernels()" first' )
-
-        self.THREADS = {}
-        self.THREADS['n'] = n
-
-        cdef :
-            long [:] C
-            long t, tot, i1, i2, N, c
-            int i
-
-        tic = time.time()
-        LOG( '\n-> Distributing workload to different threads:' )
-        print( '\t* Number of threads : %d' % n )
-
-        # Distribute load for the computation of A*x product
-        print( '\t* A operator...  ', end='' )
-        sys.stdout.flush()
-
-        if self.DICTIONARY['IC']['n'] > 0 :
-            self.THREADS['IC'] = np.zeros( n+1, dtype=np.uint32 )
-            if n > 1 :
-                N = np.floor( self.DICTIONARY['IC']['n']/n )
-                t = 1
-                tot = 0
-                C = np.bincount( self.DICTIONARY['IC']['v'] )
-                for c in C :
-                    tot += c
-                    if tot >= N :
-                        self.THREADS['IC'][t] = self.THREADS['IC'][t-1] + tot
-                        t += 1
-                        tot = 0
-            self.THREADS['IC'][n] = self.DICTIONARY['IC']['n']
-
-            # check if some threads are not assigned any segment
-            if np.count_nonzero( np.diff( self.THREADS['IC'].astype(np.int32) ) <= 0 ) :
-                self.THREADS = None
-                ERROR( 'Too many threads for the IC compartments to evaluate; try decreasing the number' )
-        else :
-            self.THREADS['IC'] = None
-
-        if self.DICTIONARY['EC']['nE'] > 0 :
-            self.THREADS['EC'] = np.zeros( n+1, dtype=np.uint32 )
-            for i in xrange(n) :
-                self.THREADS['EC'][i] = np.searchsorted( self.DICTIONARY['EC']['v'], self.DICTIONARY['IC']['v'][ self.THREADS['IC'][i] ] )
-            self.THREADS['EC'][n] = self.DICTIONARY['EC']['nE']
-
-            # check if some threads are not assigned any segment
-            if np.count_nonzero( np.diff( self.THREADS['EC'].astype(np.int32) ) <= 0 ) :
-                self.THREADS = None
-                ERROR( 'Too many threads for the EC compartments to evaluate; try decreasing the number' )
-        else :
-            self.THREADS['EC'] = None
-
-        if self.DICTIONARY['nV'] > 0 :
-            self.THREADS['ISO'] = np.zeros( n+1, dtype=np.uint32 )
-            for i in xrange(n) :
-                self.THREADS['ISO'][i] = np.searchsorted( self.DICTIONARY['ISO']['v'], self.DICTIONARY['IC']['v'][ self.THREADS['IC'][i] ] )
-            self.THREADS['ISO'][n] = self.DICTIONARY['nV']
-
-            # check if some threads are not assigned any segment
-            if np.count_nonzero( np.diff( self.THREADS['ISO'].astype(np.int32) ) <= 0 ) :
-                self.THREADS = None
-                ERROR( 'Too many threads for the ISO compartments to evaluate; try decreasing the number' )
-        else :
-            self.THREADS['ISO'] = None
-
-        print( '[ OK ]' )
-
-        # Distribute load for the computation of At*y product
-        print( '\t* A\' operator... ', end='' )
-        sys.stdout.flush()
-
-        if self.DICTIONARY['IC']['n'] > 0 :
-            self.THREADS['ICt'] = np.full( self.DICTIONARY['IC']['n'], n-1, dtype=np.uint8 )
-            if n > 1 :
-                idx = np.argsort( self.DICTIONARY['IC']['fiber'], kind='mergesort' )
-                C = np.bincount( self.DICTIONARY['IC']['fiber'] )
-                t = tot = i1 = i2 = 0
-                N = np.floor(self.DICTIONARY['IC']['n']/n)
-                for c in C :
-                    i2 += c
-                    tot += c
-                    if tot >= N :
-                        self.THREADS['ICt'][ i1:i2 ] = t
-                        t += 1
-                        if t==n-1 :
-                            break
-                        i1 = i2
-                        tot = c
-                self.THREADS['ICt'][idx] = self.THREADS['ICt'].copy()
-
-        else :
-            self.THREADS['ICt'] = None
-
-        if self.DICTIONARY['EC']['nE'] > 0 :
-            self.THREADS['ECt'] = np.zeros( n+1, dtype=np.uint32 )
-            N = np.floor( self.DICTIONARY['EC']['nE']/n )
-            for i in xrange(1,n) :
-                self.THREADS['ECt'][i] = self.THREADS['ECt'][i-1] + N
-            self.THREADS['ECt'][n] = self.DICTIONARY['EC']['nE']
-
-            # check if some threads are not assigned any segment
-            if np.count_nonzero( np.diff( self.THREADS['ECt'].astype(np.int32) ) <= 0 ) :
-                self.THREADS = None
-                ERROR( 'Too many threads for the EC compartments to evaluate; try decreasing the number' )
-        else :
-            self.THREADS['ECt'] = None
-
-        if self.DICTIONARY['nV'] > 0 :
-            self.THREADS['ISOt'] = np.zeros( n+1, dtype=np.uint32 )
-            N = np.floor( self.DICTIONARY['nV']/n )
-            for i in xrange(1,n) :
-                self.THREADS['ISOt'][i] = self.THREADS['ISOt'][i-1] + N
-            self.THREADS['ISOt'][n] = self.DICTIONARY['nV']
-
-            # check if some threads are not assigned any segment
-            if np.count_nonzero( np.diff( self.THREADS['ISOt'].astype(np.int32) ) <= 0 ) :
-                self.THREADS = None
-                ERROR( 'Too many threads for the ISO compartments to evaluate; try decreasing the number' )
-        else :
-            self.THREADS['ISOt'] = None
-
-        print( '[ OK ]' )
-
-        LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
-
-
-    def build_operator( self, regtikhonov=0.1 ) :
-        """Compile/build the operator for computing the matrix-vector multiplications by A and A'
-        using the informations from self.DICTIONARY, self.KERNELS and self.THREADS.
-        NB: needs to call this function to update pointers to data structures in case
-            the data is changed in self.DICTIONARY, self.KERNELS or self.THREADS.
-        """
-        if self.DICTIONARY is None :
-            ERROR( 'Dictionary not loaded; call "load_dictionary()" first' )
-        if self.KERNELS is None :
-            ERROR( 'Response functions not generated; call "generate_kernels()" and "load_kernels()" first' )
-        if self.THREADS is None :
-            ERROR( 'Threads not set; call "set_threads()" first' )
-
-        tic = time.time()
-        LOG( '\n-> Building linear operator A:' )
-
-        # need to pass these parameters at runtime for compiling the C code
-        from commit.operator import config
-        config.nTHREADS = self.THREADS['n']
-        config.model    = self.model.id
-        config.nIC      = self.KERNELS['wmr'].shape[0]
-        config.nEC      = self.KERNELS['wmh'].shape[0]
-        config.nISO     = self.KERNELS['iso'].shape[0]
-        if not 'commit.operator.operator' in sys.modules :
-            import commit.operator.operator
-        else :
-            reload( sys.modules['commit.operator.operator'] )
-        self.A = sys.modules['commit.operator.operator'].LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, regtikhonov )
-
-        LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
-
-
-    def get_y( self ):
-        """
-        Returns a numpy array that corresponds to the 'y' vector of the optimisation problem.
-        NB: this can be run only after having loaded the dictionary and the data.
-        """
-        if self.DICTIONARY is None :
-            ERROR( 'Dictionary not loaded; call "load_dictionary()" first' )
-        if self.niiDWI is None :
-            raise RuntimeError( 'Data not loaded; call "load_data()" first.' )
-
-        y = self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ].flatten().astype(np.float64)
-        #return y
-        """print(type(y))
-        print(y.shape)
-        print(y.shape[0])
-        print(self.KERNELS['wmr'].shape[0])
-        print(y.shape[0] + self.KERNELS['wmr'].shape[0])"""
-        y2 = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0], dtype=np.float64)
-        y2[0:y.shape[0]] = y
-        #print(y2.shape)
-        return y2
-        #"""
-
-
-    def fit( self, tol_fun = 1e-3, tol_x = 1e-6, max_iter = 100, verbose = 1, x0 = None, regularisation = None ) :
-        """Fit the model to the data.
-
-        Parameters
-        ----------
-        tol_fun : float
-            Tolerance on the objective function (default : 1e-3)
-        max_iter : integer
-            Maximum number of iterations (default : 100)
-        verbose : integer
-            Level of verbosity: 0=no print, 1=print progress (default : 1)
-        x0 : np.array
-            Initial guess for the solution of the problem (default : None)
-        regularisation : commit.solvers.init_regularisation object
-            Python dictionary that describes the wanted regularisation term.
-            Check the documentation of commit.solvers.init_regularisation to see
-            how to properly define the wanted mathematical formulation
-            ( default : None )
-        """
-        if self.niiDWI is None :
-            ERROR( 'Data not loaded; call "load_data()" first' )
-        if self.DICTIONARY is None :
-            ERROR( 'Dictionary not loaded; call "load_dictionary()" first' )
-        if self.KERNELS is None :
-            ERROR( 'Response functions not generated; call "generate_kernels()" and "load_kernels()" first' )
-        if self.THREADS is None :
-            ERROR( 'Threads not set; call "set_threads()" first' )
-        if self.A is None :
-            ERROR( 'Operator not built; call "build_operator()" first' )
-
-        if x0 is not None :
-            if x0.shape[0] != self.A.shape[1] :
-                ERROR( 'x0 dimension does not match the number of columns of the dictionary' )
-        if regularisation is None :
-            regularisation = commit.solvers.init_regularisation(self)
-
-        self.CONFIG['optimization']                   = {}
-        self.CONFIG['optimization']['tol_fun']        = tol_fun
-        self.CONFIG['optimization']['tol_x']          = tol_x
-        self.CONFIG['optimization']['max_iter']       = max_iter
-        self.CONFIG['optimization']['verbose']        = verbose
-        self.CONFIG['optimization']['regularisation'] = regularisation
-
-        # run solver
-        t = time.time()
-        LOG( '\n-> Fit model:' )
-
-        self.x, opt_details = commit.solvers.solve(self.get_y(), self.A, self.A.T, tol_fun = tol_fun, tol_x = tol_x, max_iter = max_iter, verbose = verbose, x0 = x0, regularisation = regularisation)
-
-        self.CONFIG['optimization']['fit_details'] = opt_details
-        self.CONFIG['optimization']['fit_time'] = time.time()-t
-
-        LOG( '\n   [ %s ]' % ( time.strftime("%Hh %Mm %Ss", time.gmtime(self.CONFIG['optimization']['fit_time']) ) ) )
-
-
-    def save_results( self, path_suffix = None, save_opt_details = True, save_coeff = False, save_est_dwi = False ) :
-        """Save the output (coefficients, errors, maps etc).
-
-        Parameters
-        ----------
-        path_suffix : string
-            Text to be appended to "Results" to create the output path (default : None)
-        save_opt_details : boolean
-            Save everything in a pickle file containing the following list L:
-                L[0]: dictionary with all the configuration details
-                L[1]: np.array obtained through the optimisation process with the normalised kernels
-                L[2]: np.array renormalisation of L[1]
-            (default : True)
-        save_coeff : boolean
-            Save the coefficients related to each compartment in txt files
-            and a pickle file containing the configuration details.
-            (default : False)
-        save_est_dwi : boolean
-            Save the estimated DW-MRI signal (default : False)
-        """
-        if self.x is None :
-            ERROR( 'Model not fitted to the data; call "fit()" first' )
-
-        RESULTS_path = 'Results_' + self.model.id
-        if path_suffix :
-            self.set_config('path_suffix', path_suffix)
-            RESULTS_path = RESULTS_path + path_suffix
-
-        LOG( '\n-> Saving results to "%s/*":' % RESULTS_path )
-        tic = time.time()
-
-        nF = self.DICTIONARY['IC']['nF']
-        nE = self.DICTIONARY['EC']['nE']
-        nV = self.DICTIONARY['nV']
-        nS = self.KERNELS['wmr'].shape[2]
-        norm_fib = np.ones( nF )
-        # x is the x of the original problem
-        # self.x is the x preconditioned
-        if self.get_config('doNormalizeKernels') :
-            # renormalize the coefficients
-            norm1 = np.repeat(self.KERNELS['wmr_norm'],nF)
-            norm2 = np.repeat(self.KERNELS['wmh_norm'],nE)
-            norm3 = np.repeat(self.KERNELS['iso_norm'],nV)
-            norm_fib = np.kron(np.ones(self.KERNELS['wmr'].shape[0]), self.DICTIONARY['TRK']['norm'])
-            x = self.x / np.hstack( (norm1*norm_fib,norm2,norm3) )
-        else :
-            x = self.x
-
-        # create folder or delete existing files (if any)
-        RESULTS_path = pjoin( self.get_config('TRACKING_path'), RESULTS_path )
-        if not exists( RESULTS_path ) :
-            makedirs( RESULTS_path )
-        else :
-            for f in glob.glob( pjoin(RESULTS_path,'*') ) :
-                remove( f )
-        self.set_config('RESULTS_path', RESULTS_path)
-
-        # Map of voxelwise errors
-        print( '\t* Fitting errors:' )
-
-        niiMAP_img = np.zeros( self.get_config('dim'), dtype=np.float32 )
-        affine = self.niiDWI.affine if nibabel.__version__ >= '2.0.0' else self.niiDWI.get_affine()
-        niiMAP     = nibabel.Nifti1Image( niiMAP_img, affine )
-        niiMAP_hdr = niiMAP.header if nibabel.__version__ >= '2.0.0' else niiMAP.get_header()
-
-        y_mea = np.reshape( self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ].flatten().astype(np.float32), (nV,-1) )
-        y_est = np.reshape( self.A.dot(self.x)[:int(nV*nS)], (nV,-1) ).astype(np.float32)
-
-        print( '\t\t- RMSE...  ', end='' )
-        sys.stdout.flush()
-        tmp = np.sqrt( np.mean((y_mea-y_est)**2,axis=1) )
-        niiMAP_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = tmp
-        niiMAP_hdr['cal_min'] = 0
-        niiMAP_hdr['cal_max'] = tmp.max()
-        nibabel.save( niiMAP, pjoin(RESULTS_path,'fit_RMSE.nii.gz') )
-        print( '[ %.3f +/- %.3f ]' % ( tmp.mean(), tmp.std() ) )
-
-        print( '\t\t- NRMSE... ', end='' )
-        sys.stdout.flush()
-        tmp = np.sum(y_mea**2,axis=1)
-        idx = np.where( tmp < 1E-12 )
-        tmp[ idx ] = 1
-        tmp = np.sqrt( np.sum((y_mea-y_est)**2,axis=1) / tmp )
-        tmp[ idx ] = 0
-        niiMAP_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = tmp
-        niiMAP_hdr['cal_min'] = 0
-        niiMAP_hdr['cal_max'] = 1
-        nibabel.save( niiMAP, pjoin(RESULTS_path,'fit_NRMSE.nii.gz') )
-        print( '[ %.3f +/- %.3f ]' % ( tmp.mean(), tmp.std() ) )
-
-        # Map of compartment contributions
-        print( '\t* Voxelwise contributions:' )
-
-        print( '\t\t- Intra-axonal... ', end='' )
-        sys.stdout.flush()
-        niiIC_img = np.zeros( self.get_config('dim'), dtype=np.float32 )
-        if len(self.KERNELS['wmr']) > 0 :
-            offset = nF * self.KERNELS['wmr'].shape[0]
-            tmp = ( x[:offset].reshape( (-1,nF) ) * norm_fib.reshape( (-1,nF) ) ).sum( axis=0 )
-            xv = np.bincount( self.DICTIONARY['IC']['v'], minlength=nV,
-                weights=tmp[ self.DICTIONARY['IC']['fiber'] ] * self.DICTIONARY['IC']['len']
-            ).astype(np.float32)
-            niiIC_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = xv
-        print( '[ OK ]' )
-
-        print( '\t\t- Extra-axonal... ', end='' )
-        sys.stdout.flush()
-        niiEC_img = np.zeros( self.get_config('dim'), dtype=np.float32 )
-        if len(self.KERNELS['wmh']) > 0 :
-            offset = nF * self.KERNELS['wmr'].shape[0]
-            tmp = x[offset:offset+nE*len(self.KERNELS['wmh'])].reshape( (-1,nE) ).sum( axis=0 )
-            xv = np.bincount( self.DICTIONARY['EC']['v'], weights=tmp, minlength=nV ).astype(np.float32)
-            niiEC_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = xv
-        print( '[ OK ]' )
-
-        print( '\t\t- Isotropic... ', end='' )
-        sys.stdout.flush()
-        niiISO_img = np.zeros( self.get_config('dim'), dtype=np.float32 )
-        if len(self.KERNELS['iso']) > 0 :
-            offset = nF * self.KERNELS['wmr'].shape[0] + nE * self.KERNELS['wmh'].shape[0]
-            xv = x[offset:].reshape( (-1,nV) ).sum( axis=0 )
-            niiISO_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = xv
-        print( '   [ OK ]' )
-
-        if self.get_config('doNormalizeMaps') :
-                niiIC = nibabel.Nifti1Image(  niiIC_img  / ( niiIC_img + niiEC_img + niiISO_img + 1e-16), affine )
-                niiEC = nibabel.Nifti1Image(  niiEC_img /  ( niiIC_img + niiEC_img + niiISO_img + 1E-16), affine )
-                niiISO = nibabel.Nifti1Image( niiISO_img / ( niiIC_img + niiEC_img + niiISO_img + 1E-16), affine )
-        else:
-                niiIC = nibabel.Nifti1Image( niiIC_img, affine )
-                niiEC = nibabel.Nifti1Image( niiEC_img, affine )
-                niiISO = nibabel.Nifti1Image( niiISO_img, affine )
-
-        nibabel.save( niiIC , pjoin(RESULTS_path,'compartment_IC.nii.gz') )
-        nibabel.save( niiEC , pjoin(RESULTS_path,'compartment_EC.nii.gz') )
-        nibabel.save( niiISO , pjoin(RESULTS_path,'compartment_ISO.nii.gz') )
-
-        # Configuration and results
-        print( '\t* Configuration and results:' )
-
-        if save_opt_details:
-            print( '\t\t- results.pickle... ', end='' )
-            sys.stdout.flush()
-            with open( pjoin(RESULTS_path,'results.pickle'), 'wb+' ) as fid :
-                pickle.dump( [self.CONFIG, self.x, x], fid, protocol=2 )
-            print( '[ OK ]' )
-
-        if save_coeff:
-            print( '\t\t- Coefficients txt files... ', end='' )
-            sys.stdout.flush()
-            if len(self.KERNELS['wmr']) > 0 :
-                offset = nF * self.KERNELS['wmr'].shape[0]
-                np.savetxt(pjoin(RESULTS_path,'xic.txt'), x[:offset], fmt='%12.5e')
-            if len(self.KERNELS['wmh']) > 0 :
-                offset = nF * self.KERNELS['wmr'].shape[0]
-                np.savetxt(pjoin(RESULTS_path,'xec.txt'), x[offset:offset+nE*len(self.KERNELS['wmh'])], fmt='%12.5e')
-            if len(self.KERNELS['iso']) > 0 :
-                offset = nF * self.KERNELS['wmr'].shape[0] + nE * self.KERNELS['wmh'].shape[0]
-                np.savetxt(pjoin(RESULTS_path,'xiso.txt'), x[offset:], fmt='%12.5e')
-            with open( pjoin(RESULTS_path,'config.pickle'), 'wb+' ) as fid :
-                pickle.dump( self.CONFIG, fid, protocol=2 )
-            print( '[ OK ]' )
-
-        if save_est_dwi :
-            print( '\t\t- Estimated signal... ', end='' )
-            sys.stdout.flush()
-            self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ] = y_est
-            nibabel.save( nibabel.Nifti1Image( self.niiDWI_img , affine ), pjoin(RESULTS_path,'fit_signal_estimated.nii.gz') )
-            self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ] = y_mea
-            print( '[ OK ]' )
-        
-        LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
+#!python
+#cython: language_level=3, boundscheck=False, wraparound=False, nonecheck=False, cdivision=True, initializedcheck=False, binding=False
+from __future__ import print_function
+cimport cython
+import numpy as np
+cimport numpy as np
+
+import time
+import glob
+import sys
+from os import makedirs, remove
+from os.path import exists, join as pjoin, isfile
+import nibabel
+import pickle
+import commit.models
+import commit.solvers
+import amico.scheme
+import amico.lut
+import pyximport
+pyximport.install( reload_support=True, language_level=3 )
+from amico.util import LOG, NOTE, WARNING, ERROR
+
+
+def setup( lmax = 12, ndirs = 32761 ) :
+    """General setup/initialization of the COMMIT framework.
+    
+    Parameters
+    ----------
+    lmax : int
+        Maximum SH order to use for the rotation phase (default : 12)
+    ndirs : int
+        Number of directions on the half of the sphere representing the possible orientations of the response functions (default : 32761)
+    """
+
+    if not amico.lut.is_valid(ndirs):
+        ERROR( 'Unsupported value for ndirs.\nNote: Supported values for ndirs are [500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500, 7000, 7500, 8000, 8500, 9000, 9500, 10000, 32761 (default)]' )
+
+    amico.lut.precompute_rotation_matrices( lmax, ndirs )
+
+def load_dictionary_info(filename):
+    """Function to load dictionary info file
+    
+    Parameters
+    ----------
+    filename : string
+        This value is always COMMIT_PATH + dictionary_info.pickle
+    """
+    if not isfile( filename ):
+        ERROR( 'Dictionary is outdated or not found. Execute "trk2dictionary" script first' )
+    with open( filename, 'rb' ) as dictionary_info_file:
+        if sys.version_info.major == 3:
+            aux = pickle.load( dictionary_info_file, fix_imports=True, encoding='bytes' )
+            # Pickle files written by Python 2 are loaded with byte
+            # keys, whereas those written by Python 3 are loaded with
+            # str keys, even when both are written using protocol=2
+            result_aux = {(k.decode() if hasattr(k,"decode") else k): v for k, v in aux.items()}
+            return result_aux
+        else:
+            return pickle.load( dictionary_info_file )
+
+cdef class Evaluation :
+    """Class to hold all the information (data and parameters) when performing an
+    evaluation with the COMMIT framework.
+    """
+    cdef public niiDWI
+    cdef public niiDWI_img
+    cdef public scheme
+    cdef public model
+    cdef public KERNELS
+    cdef public DICTIONARY
+    cdef public THREADS
+    cdef public A
+    cdef public x
+    cdef public CONFIG
+
+    def __init__( self, study_path, subject ) :
+        """Setup the data structures with default values.
+
+        Parameters
+        ----------
+        study_path : string
+            The path to the folder containing all the subjects from one study
+        subject : string
+            The path (relative to previous folder) to the subject folder
+        """
+        self.niiDWI     = None # set by "load_data" method
+        self.scheme     = None # set by "load_data" method
+        self.model      = None # set by "set_model" method
+        self.KERNELS    = None # set by "load_kernels" method
+        self.DICTIONARY = None # set by "load_dictionary" method
+        self.THREADS    = None # set by "set_threads" method
+        self.A          = None # set by "build_operator" method
+        self.x          = None # set by "fit" method
+
+        # store all the parameters of an evaluation with COMMIT
+        self.CONFIG = {}
+        self.set_config('study_path', study_path)
+        self.set_config('subject', subject)
+        self.set_config('DATA_path', pjoin( study_path, subject ))
+
+        self.set_config('doNormalizeSignal', True)
+        self.set_config('doMergeB0', False)
+        self.set_config('doNormalizeKernels', True)
+        self.set_config('doDemean', False)
+        self.set_config('doNormalizeMaps', False)
+
+
+
+    def set_config( self, key, value ) :
+        self.CONFIG[ key ] = value
+
+    def get_config( self, key ) :
+        return self.CONFIG.get( key )
+
+
+    def load_data( self, dwi_filename = 'DWI.nii', scheme_filename = 'DWI.scheme', b0_thr = 0 ) :
+        """Load the diffusion signal and its corresponding acquisition scheme.
+
+        Parameters
+        ----------
+        dwi_filename : string
+            The file name of the DWI data, relative to the subject folder (default : 'DWI.nii')
+        scheme_filename : string
+            The file name of the corresponding acquisition scheme (default : 'DWI.scheme')
+        b0_thr : float
+            The threshold below which a b-value is considered a b0 (default : 0)
+        """
+
+        # Loading data and acquisition scheme
+        tic = time.time()
+        LOG( '\n-> Loading data:' )
+
+        print( '\t* DWI signal:' )
+        self.set_config('dwi_filename', dwi_filename)
+        self.niiDWI  = nibabel.load( pjoin( self.get_config('DATA_path'), dwi_filename) )
+        self.niiDWI_img = self.niiDWI.get_data().astype(np.float32)
+        if self.niiDWI_img.ndim ==3 :
+            self.niiDWI_img = np.expand_dims( self.niiDWI_img, axis=3 )
+        hdr = self.niiDWI.header if nibabel.__version__ >= '2.0.0' else self.niiDWI.get_header()
+        self.set_config('dim', self.niiDWI_img.shape[0:3])
+        self.set_config('pixdim', tuple( hdr.get_zooms()[:3] ))
+        print( '\t\t- dim    = %d x %d x %d x %d' % self.niiDWI_img.shape )
+        print( '\t\t- pixdim = %.3f x %.3f x %.3f' % self.get_config('pixdim') )
+
+        print( '\t* Acquisition scheme:' )
+        self.set_config('scheme_filename', scheme_filename)
+        self.set_config('b0_thr', b0_thr)
+        self.scheme = amico.scheme.Scheme( pjoin( self.get_config('DATA_path'), scheme_filename), b0_thr )
+        print( '\t\t- %d samples, %d shells' % ( self.scheme.nS, len(self.scheme.shells) ) )
+        print( '\t\t- %d @ b=0' % ( self.scheme.b0_count ), end='' )
+        for i in xrange(len(self.scheme.shells)) :
+            print( ', %d @ b=%.1f' % ( len(self.scheme.shells[i]['idx']), self.scheme.shells[i]['b'] ), end='' )
+        print()
+
+        if self.scheme.nS != self.niiDWI_img.shape[3] :
+            ERROR( 'Scheme does not match with DWI data' )
+
+        if self.scheme.dwi_count == 0 :
+            ERROR( 'There are no DWI volumes in the data' )
+
+        LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
+
+        # Preprocessing
+        tic = time.time()
+        LOG( '\n-> Preprocessing:' )
+
+        if self.get_config('doNormalizeSignal') :
+            if self.scheme.b0_count > 0 :
+                print( '\t* Normalizing to b0... ', end='' )
+                sys.stdout.flush()
+                mean = np.mean( self.niiDWI_img[:,:,:,self.scheme.b0_idx], axis=3 )
+                idx = mean <= 0
+                mean[ idx ] = 1
+                mean = 1 / mean
+                mean[ idx ] = 0
+                for i in xrange(self.scheme.nS) :
+                    self.niiDWI_img[:,:,:,i] *= mean
+            else :
+                print( '\t* There are no b0 volume(s) for normalization...', end='' )
+            print( '[ min=%.2f,  mean=%.2f, max=%.2f ]' % ( self.niiDWI_img.min(), self.niiDWI_img.mean(), self.niiDWI_img.max() ) )
+
+        if self.scheme.b0_count > 1 :
+            if self.get_config('doMergeB0') :
+                print( '\t* Merging multiple b0 volume(s)... ', end='' )
+                mean = np.expand_dims( np.mean( self.niiDWI_img[:,:,:,self.scheme.b0_idx], axis=3 ), axis=3 )
+                self.niiDWI_img = np.concatenate( (mean, self.niiDWI_img[:,:,:,self.scheme.dwi_idx]), axis=3 )
+            else :
+                print( '\t* Keeping all b0 volume(s)... ', end='' )
+            print( '[ %d x %d x %d x %d ]' % self.niiDWI_img.shape )
+
+        if self.get_config('doDemean') :
+            print( '\t* Demeaning signal... ', end='' )
+            sys.stdout.flush()
+            mean = np.repeat( np.expand_dims(np.mean(self.niiDWI_img,axis=3),axis=3), self.niiDWI_img.shape[3], axis=3 )
+            self.niiDWI_img = self.niiDWI_img - mean
+            print( '[ min=%.2f,  mean=%.2f, max=%.2f ]' % ( self.niiDWI_img.min(), self.niiDWI_img.mean(), self.niiDWI_img.max() ) )
+
+        LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
+
+
+    def set_model( self, model_name ) :
+        """Set the model to use to describe the signal contributions in each voxel.
+
+        Parameters
+        ----------
+        model_name : string
+            The name of the model (must match a class name in "commit.models" module)
+        """
+        # Call the specific model constructor
+        if hasattr(commit.models, model_name ) :
+            self.model = getattr(commit.models,model_name)()
+        else :
+            ERROR( 'Model "%s" not recognized' % model_name )
+
+        self.set_config('ATOMS_path', pjoin( self.get_config('study_path'), 'kernels', self.model.id ))
+
+
+    def generate_kernels( self, regenerate = False, lmax = 12, ndirs = 32761 ) :
+        """Generate the high-resolution response functions for each compartment.
+        Dispatch to the proper function, depending on the model.
+
+        Parameters
+        ----------
+        regenerate : boolean
+            Regenerate kernels if they already exist (default : False)
+        lmax : int
+            Maximum SH order to use for the rotation procedure (default : 12)
+        ndirs : int
+            Number of directions on the half of the sphere representing the possible orientations of the response functions (default : 32761)
+        """
+        if not amico.lut.is_valid(ndirs):
+            ERROR( 'Unsupported value for ndirs.\nNote: Supported values for ndirs are [500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500, 7000, 7500, 8000, 8500, 9000, 9500, 10000, 32761 (default)]' )
+        if self.scheme is None :
+            ERROR( 'Scheme not loaded; call "load_data()" first' )
+        if self.model is None :
+            ERROR( 'Model not set; call "set_model()" method first' )
+
+        # store some values for later use
+        self.set_config('lmax', lmax)
+        self.set_config('ndirs', ndirs)
+        self.model.scheme = self.scheme
+
+        LOG( '\n-> Simulating with "%s" model:' % self.model.name )
+
+        # check if kernels were already generated
+        tmp = glob.glob( pjoin(self.get_config('ATOMS_path'),'A_*.npy') )
+        if len(tmp)>0 and not regenerate :
+            LOG( '   [ Kernels already computed. Use option "regenerate=True" to force regeneration ]' )
+            return
+
+        # create folder or delete existing files (if any)
+        if not exists( self.get_config('ATOMS_path') ) :
+            makedirs( self.get_config('ATOMS_path') )
+        else :
+            for f in glob.glob( pjoin(self.get_config('ATOMS_path'),'*') ) :
+                remove( f )
+
+        # auxiliary data structures
+        aux = amico.lut.load_precomputed_rotation_matrices( lmax, ndirs )
+        idx_IN, idx_OUT = amico.lut.aux_structures_generate( self.scheme, lmax )
+
+        # Dispatch to the right handler for each model
+        tic = time.time()
+        self.model.generate( self.get_config('ATOMS_path'), aux, idx_IN, idx_OUT, ndirs )
+        LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
+
+
+    def load_kernels( self ) :
+        """Load rotated kernels and project to the specific gradient scheme of this subject.
+        Dispatch to the proper function, depending on the model.
+        """
+        if self.model is None :
+            ERROR( 'Model not set; call "set_model()" method first' )
+        if self.scheme is None :
+            ERROR( 'Scheme not loaded; call "load_data()" first' )
+
+        tic = time.time()
+        LOG( '\n-> Resampling LUT for subject "%s":' % self.get_config('subject') )
+
+        # auxiliary data structures
+        idx_OUT, Ylm_OUT = amico.lut.aux_structures_resample( self.scheme, self.get_config('lmax') )
+
+        # Dispatch to the right handler for each model
+        if self.get_config('doMergeB0') :
+            print( '\t* Merging multiple b0 volume(s)...' )
+        else :
+            print( '\t* Keeping all b0 volume(s)...' )
+        self.KERNELS = self.model.resample( self.get_config('ATOMS_path'), idx_OUT, Ylm_OUT, self.get_config('doMergeB0'), self.get_config('ndirs') )
+        nIC  = self.KERNELS['wmr'].shape[0]
+        nEC  = self.KERNELS['wmh'].shape[0]
+        nISO = self.KERNELS['iso'].shape[0]
+        print( '\t  [ OK ]' )
+
+        # ensure contiguous arrays for C part
+        self.KERNELS['wmr'] = np.ascontiguousarray( self.KERNELS['wmr'] )
+        self.KERNELS['wmh'] = np.ascontiguousarray( self.KERNELS['wmh'] )
+        self.KERNELS['iso'] = np.ascontiguousarray( self.KERNELS['iso'] )
+
+        # De-mean kernels
+        if self.get_config('doDemean') :
+            print( '\t* Demeaning signal...', end='' )
+            for j in xrange(self.get_config('ndirs')) :
+                for i in xrange(nIC) :
+                    self.KERNELS['wmr'][i,j,:] -= self.KERNELS['wmr'][i,j,:].mean()
+                for i in xrange(nEC) :
+                    self.KERNELS['wmh'][i,j,:] -= self.KERNELS['wmh'][i,j,:].mean()
+            for i in xrange(nISO) :
+                self.KERNELS['iso'][i] -= self.KERNELS['iso'][i].mean()
+            print( '[ OK ]' )
+
+        # Normalize atoms
+        if self.get_config('doNormalizeKernels') :
+            print( '\t* Normalizing... ', end='' )
+
+            self.KERNELS['wmr_norm'] = np.zeros( nIC )
+            for i in xrange(nIC) :
+                self.KERNELS['wmr_norm'][i] = np.linalg.norm( self.KERNELS['wmr'][i,0,:] )
+                for j in xrange(self.get_config('ndirs')) :
+                    self.KERNELS['wmr'][i,j,:] /= self.KERNELS['wmr_norm'][i]
+
+            self.KERNELS['wmh_norm'] = np.zeros( nEC )
+            for i in xrange(nEC) :
+                self.KERNELS['wmh_norm'][i] = np.linalg.norm( self.KERNELS['wmh'][i,0,:] )
+                for j in xrange(self.get_config('ndirs')) :
+                    self.KERNELS['wmh'][i,j,:] /= self.KERNELS['wmh_norm'][i]
+
+            self.KERNELS['iso_norm'] = np.zeros( nISO )
+            for i in xrange(nISO) :
+                self.KERNELS['iso_norm'][i] = np.linalg.norm( self.KERNELS['iso'][i,:] )
+                self.KERNELS['iso'][i,:] /= self.KERNELS['iso_norm'][i]
+
+            print( '[ OK ]' )
+
+        LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
+
+
+    cpdef load_dictionary( self, path, use_mask = False ) :
+        """Load the sparse structure previously created with "trk2dictionary" script.
+
+        Parameters
+        ----------
+        path : string
+            Folder containing the output of the trk2dictionary script (relative to subject path)
+        use_mask : boolean
+            If False (default) the optimization will be conducted only on the voxels actually
+            traversed by tracts. If True, the mask specified in trk2dictionary
+            (i.e. "filename_mask" paramater) will be used instead.
+            NB: if no mask was specified in trk2dictionary, the "tdi" and
+            "mask" masks are equivalent and this parameter is not influent.
+        """
+        if self.niiDWI is None :
+            ERROR( 'Data not loaded; call "load_data()" first' )
+
+        tic = time.time()
+        LOG( '\n-> Loading the dictionary:' )
+        self.DICTIONARY = {}
+        self.set_config('TRACKING_path', pjoin(self.get_config('DATA_path'),path))
+
+        # load mask
+        self.set_config('dictionary_mask', 'mask' if use_mask else 'tdi' )
+        mask_filename = pjoin(self.get_config('TRACKING_path'),'dictionary_%s.nii'%self.get_config('dictionary_mask'))
+        if not exists( mask_filename ) :
+            mask_filename += '.gz'
+            if not exists( mask_filename ) :
+                ERROR( 'Dictionary not found. Execute "trk2dictionary" script first' );
+        niiMASK = nibabel.load( mask_filename )
+        niiMASK_hdr = niiMASK.header if nibabel.__version__ >= '2.0.0' else niiMASK.get_header()
+        if ( self.get_config('dim')[0]!=niiMASK.shape[0] or
+             self.get_config('dim')[1]!=niiMASK.shape[1] or
+             self.get_config('dim')[2]!=niiMASK.shape[2] or
+             abs(self.get_config('pixdim')[0]-niiMASK_hdr['pixdim'][1])>1e-3 or
+             abs(self.get_config('pixdim')[1]-niiMASK_hdr['pixdim'][2])>1e-3 or
+             abs(self.get_config('pixdim')[2]-niiMASK_hdr['pixdim'][3])>1e-3 ) :
+            print( '  [WARNING] dictionary does not have the same geometry as the dataset' )
+        self.DICTIONARY['MASK'] = (niiMASK.get_data() > 0).astype(np.uint8)
+
+        # segments from the tracts
+        # ------------------------
+        print( '\t* Segments from the tracts... ', end='' )
+        sys.stdout.flush()
+
+        dictionary_info = load_dictionary_info( pjoin(self.get_config('TRACKING_path'), "dictionary_info.pickle") )
+
+        self.DICTIONARY['ndirs'] = dictionary_info['ndirs']
+
+        if self.DICTIONARY['ndirs'] != self.get_config('ndirs'):
+            ERROR( 'Dictionary is outdated. Execute "trk2dictionary" script first' )
+
+        self.DICTIONARY['TRK'] = {}
+        self.DICTIONARY['TRK']['norm'] = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_TRK_norm.dict'), dtype=np.float32 )
+        self.DICTIONARY['TRK']['len']  = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_TRK_len.dict'), dtype=np.float32 )
+
+        self.DICTIONARY['IC'] = {}
+        self.DICTIONARY['IC']['fiber'] = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_f.dict'), dtype=np.uint32 )
+        self.DICTIONARY['IC']['v']     = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_v.dict'), dtype=np.uint32 )
+        self.DICTIONARY['IC']['o']     = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_o.dict'), dtype=np.uint16 )
+        self.DICTIONARY['IC']['len']   = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_len.dict'), dtype=np.float32 )
+        self.DICTIONARY['IC']['n']     = self.DICTIONARY['IC']['fiber'].size
+        self.DICTIONARY['IC']['nF']    = self.DICTIONARY['TRK']['norm'].size
+
+        # reorder the segments based on the "v" field
+        idx = np.argsort( self.DICTIONARY['IC']['v'], kind='mergesort' )
+        self.DICTIONARY['IC']['v']     = self.DICTIONARY['IC']['v'][ idx ]
+        self.DICTIONARY['IC']['o']     = self.DICTIONARY['IC']['o'][ idx ]
+        self.DICTIONARY['IC']['fiber'] = self.DICTIONARY['IC']['fiber'][ idx ]
+        self.DICTIONARY['IC']['len']   = self.DICTIONARY['IC']['len'][ idx ]
+        del idx
+
+        # divide the length of each segment by the fiber length so that all the columns of the libear operator will have same length
+        # NB: it works in conjunction with the normalization of the kernels
+        cdef :
+            np.float32_t [:] sl = self.DICTIONARY['IC']['len']
+            np.float32_t [:] tl = self.DICTIONARY['TRK']['norm']
+            np.uint32_t  [:] f  = self.DICTIONARY['IC']['fiber']
+            int s
+        if self.get_config('doNormalizeKernels') :
+            for s in xrange(self.DICTIONARY['IC']['n']) :
+                sl[s] /= tl[ f[s] ]
+
+        print( '[ %d fibers and %d segments ]' % ( self.DICTIONARY['IC']['nF'], self.DICTIONARY['IC']['n'] ) )
+
+        # segments from the peaks
+        # -----------------------
+        print( '\t* Segments from the peaks...  ', end='' )
+        sys.stdout.flush()
+
+        self.DICTIONARY['EC'] = {}
+        self.DICTIONARY['EC']['v']  = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_EC_v.dict'), dtype=np.uint32 )
+        self.DICTIONARY['EC']['o']  = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_EC_o.dict'), dtype=np.uint16 )
+        self.DICTIONARY['EC']['nE'] = self.DICTIONARY['EC']['v'].size
+
+        # reorder the segments based on the "v" field
+        idx = np.argsort( self.DICTIONARY['EC']['v'], kind='mergesort' )
+        self.DICTIONARY['EC']['v'] = self.DICTIONARY['EC']['v'][ idx ]
+        self.DICTIONARY['EC']['o'] = self.DICTIONARY['EC']['o'][ idx ]
+        del idx
+
+        print( '[ %d segments ]' % self.DICTIONARY['EC']['nE'] )
+
+        # isotropic compartments
+        # ----------------------
+        print( '\t* Isotropic contributions...  ', end='' )
+        sys.stdout.flush()
+
+        self.DICTIONARY['ISO'] = {}
+
+        self.DICTIONARY['nV'] = self.DICTIONARY['MASK'].sum()
+
+        vx, vy, vz = ( self.DICTIONARY['MASK'] > 0 ).nonzero() # [TODO] find a way to avoid using int64 (not necessary and waste of memory)
+        vx = vx.astype(np.int32)
+        vy = vy.astype(np.int32)
+        vz = vz.astype(np.int32)
+        self.DICTIONARY['ISO']['v'] = vx + self.get_config('dim')[0] * ( vy + self.get_config('dim')[1] * vz )
+        del vx, vy, vz
+
+        # reorder the segments based on the "v" field
+        idx = np.argsort( self.DICTIONARY['ISO']['v'], kind='mergesort' )
+        self.DICTIONARY['ISO']['v'] = self.DICTIONARY['ISO']['v'][ idx ]
+        del idx
+
+        print( '[ %d voxels ]' % self.DICTIONARY['nV'] )
+
+        # post-processing
+        # ---------------
+        print( '\t* Post-processing...          ', end='' )
+        sys.stdout.flush()
+
+        # get the indices to extract the VOI as in MATLAB (in place of DICTIONARY.MASKidx)
+        idx = self.DICTIONARY['MASK'].ravel(order='F').nonzero()[0]
+        self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] = np.unravel_index( idx, self.DICTIONARY['MASK'].shape, order='F' )
+
+        lut = np.zeros( self.get_config('dim'), dtype=np.uint32 ).ravel()
+        for i in xrange(idx.size) :
+            lut[ idx[i] ] = i
+        self.DICTIONARY['IC'][ 'v'] = lut[ self.DICTIONARY['IC'][ 'v'] ]
+        self.DICTIONARY['EC'][ 'v'] = lut[ self.DICTIONARY['EC'][ 'v'] ]
+        self.DICTIONARY['ISO']['v'] = lut[ self.DICTIONARY['ISO']['v'] ]
+
+        print( '[ OK ]' )
+
+        LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
+
+
+    def set_threads( self, n = None ) :
+        """Set the number of threads to use for the matrix-vector operations with A and A'.
+
+        Parameters
+        ----------
+        n : integer
+            Number of threads to use (default : number of CPUs in the system)
+        """
+        if n is None :
+            # Set to the number of CPUs in the system
+            try :
+                import multiprocessing
+                n = multiprocessing.cpu_count()
+            except :
+                n = 1
+
+        if n < 1 or n > 255 :
+            ERROR( 'Number of threads must be between 1 and 255' )
+        if self.DICTIONARY is None :
+            ERROR( 'Dictionary not loaded; call "load_dictionary()" first' )
+        if self.KERNELS is None :
+            ERROR( 'Response functions not generated; call "generate_kernels()" and "load_kernels()" first' )
+
+        self.THREADS = {}
+        self.THREADS['n'] = n
+
+        cdef :
+            long [:] C
+            long t, tot, i1, i2, N, c
+            int i
+
+        tic = time.time()
+        LOG( '\n-> Distributing workload to different threads:' )
+        print( '\t* Number of threads : %d' % n )
+
+        # Distribute load for the computation of A*x product
+        print( '\t* A operator...  ', end='' )
+        sys.stdout.flush()
+
+        if self.DICTIONARY['IC']['n'] > 0 :
+            self.THREADS['IC'] = np.zeros( n+1, dtype=np.uint32 )
+            if n > 1 :
+                N = np.floor( self.DICTIONARY['IC']['n']/n )
+                t = 1
+                tot = 0
+                C = np.bincount( self.DICTIONARY['IC']['v'] )
+                for c in C :
+                    tot += c
+                    if tot >= N :
+                        self.THREADS['IC'][t] = self.THREADS['IC'][t-1] + tot
+                        t += 1
+                        tot = 0
+            self.THREADS['IC'][n] = self.DICTIONARY['IC']['n']
+
+            # check if some threads are not assigned any segment
+            if np.count_nonzero( np.diff( self.THREADS['IC'].astype(np.int32) ) <= 0 ) :
+                self.THREADS = None
+                ERROR( 'Too many threads for the IC compartments to evaluate; try decreasing the number' )
+        else :
+            self.THREADS['IC'] = None
+
+        if self.DICTIONARY['EC']['nE'] > 0 :
+            self.THREADS['EC'] = np.zeros( n+1, dtype=np.uint32 )
+            for i in xrange(n) :
+                self.THREADS['EC'][i] = np.searchsorted( self.DICTIONARY['EC']['v'], self.DICTIONARY['IC']['v'][ self.THREADS['IC'][i] ] )
+            self.THREADS['EC'][n] = self.DICTIONARY['EC']['nE']
+
+            # check if some threads are not assigned any segment
+            if np.count_nonzero( np.diff( self.THREADS['EC'].astype(np.int32) ) <= 0 ) :
+                self.THREADS = None
+                ERROR( 'Too many threads for the EC compartments to evaluate; try decreasing the number' )
+        else :
+            self.THREADS['EC'] = None
+
+        if self.DICTIONARY['nV'] > 0 :
+            self.THREADS['ISO'] = np.zeros( n+1, dtype=np.uint32 )
+            for i in xrange(n) :
+                self.THREADS['ISO'][i] = np.searchsorted( self.DICTIONARY['ISO']['v'], self.DICTIONARY['IC']['v'][ self.THREADS['IC'][i] ] )
+            self.THREADS['ISO'][n] = self.DICTIONARY['nV']
+
+            # check if some threads are not assigned any segment
+            if np.count_nonzero( np.diff( self.THREADS['ISO'].astype(np.int32) ) <= 0 ) :
+                self.THREADS = None
+                ERROR( 'Too many threads for the ISO compartments to evaluate; try decreasing the number' )
+        else :
+            self.THREADS['ISO'] = None
+
+        print( '[ OK ]' )
+
+        # Distribute load for the computation of At*y product
+        print( '\t* A\' operator... ', end='' )
+        sys.stdout.flush()
+
+        if self.DICTIONARY['IC']['n'] > 0 :
+            self.THREADS['ICt'] = np.full( self.DICTIONARY['IC']['n'], n-1, dtype=np.uint8 )
+            if n > 1 :
+                idx = np.argsort( self.DICTIONARY['IC']['fiber'], kind='mergesort' )
+                C = np.bincount( self.DICTIONARY['IC']['fiber'] )
+                t = tot = i1 = i2 = 0
+                N = np.floor(self.DICTIONARY['IC']['n']/n)
+                for c in C :
+                    i2 += c
+                    tot += c
+                    if tot >= N :
+                        self.THREADS['ICt'][ i1:i2 ] = t
+                        t += 1
+                        if t==n-1 :
+                            break
+                        i1 = i2
+                        tot = c
+                self.THREADS['ICt'][idx] = self.THREADS['ICt'].copy()
+
+        else :
+            self.THREADS['ICt'] = None
+
+        if self.DICTIONARY['EC']['nE'] > 0 :
+            self.THREADS['ECt'] = np.zeros( n+1, dtype=np.uint32 )
+            N = np.floor( self.DICTIONARY['EC']['nE']/n )
+            for i in xrange(1,n) :
+                self.THREADS['ECt'][i] = self.THREADS['ECt'][i-1] + N
+            self.THREADS['ECt'][n] = self.DICTIONARY['EC']['nE']
+
+            # check if some threads are not assigned any segment
+            if np.count_nonzero( np.diff( self.THREADS['ECt'].astype(np.int32) ) <= 0 ) :
+                self.THREADS = None
+                ERROR( 'Too many threads for the EC compartments to evaluate; try decreasing the number' )
+        else :
+            self.THREADS['ECt'] = None
+
+        if self.DICTIONARY['nV'] > 0 :
+            self.THREADS['ISOt'] = np.zeros( n+1, dtype=np.uint32 )
+            N = np.floor( self.DICTIONARY['nV']/n )
+            for i in xrange(1,n) :
+                self.THREADS['ISOt'][i] = self.THREADS['ISOt'][i-1] + N
+            self.THREADS['ISOt'][n] = self.DICTIONARY['nV']
+
+            # check if some threads are not assigned any segment
+            if np.count_nonzero( np.diff( self.THREADS['ISOt'].astype(np.int32) ) <= 0 ) :
+                self.THREADS = None
+                ERROR( 'Too many threads for the ISO compartments to evaluate; try decreasing the number' )
+        else :
+            self.THREADS['ISOt'] = None
+
+        print( '[ OK ]' )
+
+        LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
+
+
+    def build_operator( self, regtikhonov=0.1 ) :
+        """Compile/build the operator for computing the matrix-vector multiplications by A and A'
+        using the informations from self.DICTIONARY, self.KERNELS and self.THREADS.
+        NB: needs to call this function to update pointers to data structures in case
+            the data is changed in self.DICTIONARY, self.KERNELS or self.THREADS.
+        """
+        if self.DICTIONARY is None :
+            ERROR( 'Dictionary not loaded; call "load_dictionary()" first' )
+        if self.KERNELS is None :
+            ERROR( 'Response functions not generated; call "generate_kernels()" and "load_kernels()" first' )
+        if self.THREADS is None :
+            ERROR( 'Threads not set; call "set_threads()" first' )
+
+        tic = time.time()
+        LOG( '\n-> Building linear operator A:' )
+
+        # need to pass these parameters at runtime for compiling the C code
+        from commit.operator import config
+        config.nTHREADS = self.THREADS['n']
+        config.model    = self.model.id
+        config.nIC      = self.KERNELS['wmr'].shape[0]
+        config.nEC      = self.KERNELS['wmh'].shape[0]
+        config.nISO     = self.KERNELS['iso'].shape[0]
+        if not 'commit.operator.operator' in sys.modules :
+            import commit.operator.operator
+        else :
+            reload( sys.modules['commit.operator.operator'] )
+        self.A = sys.modules['commit.operator.operator'].LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, regtikhonov )
+
+        LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
+
+
+    def get_y( self ):
+        """
+        Returns a numpy array that corresponds to the 'y' vector of the optimisation problem.
+        NB: this can be run only after having loaded the dictionary and the data.
+        """
+        if self.DICTIONARY is None :
+            ERROR( 'Dictionary not loaded; call "load_dictionary()" first' )
+        if self.niiDWI is None :
+            raise RuntimeError( 'Data not loaded; call "load_data()" first.' )
+
+        y = self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ].flatten().astype(np.float64)
+        #return y
+        """print(type(y))
+        print(y.shape)
+        print(y.shape[0])
+        print(self.KERNELS['wmr'].shape[0])
+        print(y.shape[0] + self.KERNELS['wmr'].shape[0])"""
+        y2 = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0]-1, dtype=np.float64)
+        y2[0:y.shape[0]] = y
+        #print(y2.shape)
+        return y2
+        #"""
+
+
+    def fit( self, tol_fun = 1e-3, tol_x = 1e-6, max_iter = 100, verbose = 1, x0 = None, regularisation = None ) :
+        """Fit the model to the data.
+
+        Parameters
+        ----------
+        tol_fun : float
+            Tolerance on the objective function (default : 1e-3)
+        max_iter : integer
+            Maximum number of iterations (default : 100)
+        verbose : integer
+            Level of verbosity: 0=no print, 1=print progress (default : 1)
+        x0 : np.array
+            Initial guess for the solution of the problem (default : None)
+        regularisation : commit.solvers.init_regularisation object
+            Python dictionary that describes the wanted regularisation term.
+            Check the documentation of commit.solvers.init_regularisation to see
+            how to properly define the wanted mathematical formulation
+            ( default : None )
+        """
+        if self.niiDWI is None :
+            ERROR( 'Data not loaded; call "load_data()" first' )
+        if self.DICTIONARY is None :
+            ERROR( 'Dictionary not loaded; call "load_dictionary()" first' )
+        if self.KERNELS is None :
+            ERROR( 'Response functions not generated; call "generate_kernels()" and "load_kernels()" first' )
+        if self.THREADS is None :
+            ERROR( 'Threads not set; call "set_threads()" first' )
+        if self.A is None :
+            ERROR( 'Operator not built; call "build_operator()" first' )
+
+        if x0 is not None :
+            if x0.shape[0] != self.A.shape[1] :
+                ERROR( 'x0 dimension does not match the number of columns of the dictionary' )
+        if regularisation is None :
+            regularisation = commit.solvers.init_regularisation(self)
+
+        self.CONFIG['optimization']                   = {}
+        self.CONFIG['optimization']['tol_fun']        = tol_fun
+        self.CONFIG['optimization']['tol_x']          = tol_x
+        self.CONFIG['optimization']['max_iter']       = max_iter
+        self.CONFIG['optimization']['verbose']        = verbose
+        self.CONFIG['optimization']['regularisation'] = regularisation
+
+        # run solver
+        t = time.time()
+        LOG( '\n-> Fit model:' )
+
+        self.x, opt_details = commit.solvers.solve(self.get_y(), self.A, self.A.T, tol_fun = tol_fun, tol_x = tol_x, max_iter = max_iter, verbose = verbose, x0 = x0, regularisation = regularisation)
+
+        self.CONFIG['optimization']['fit_details'] = opt_details
+        self.CONFIG['optimization']['fit_time'] = time.time()-t
+
+        LOG( '\n   [ %s ]' % ( time.strftime("%Hh %Mm %Ss", time.gmtime(self.CONFIG['optimization']['fit_time']) ) ) )
+
+
+    def save_results( self, path_suffix = None, save_opt_details = True, save_coeff = False, save_est_dwi = False ) :
+        """Save the output (coefficients, errors, maps etc).
+
+        Parameters
+        ----------
+        path_suffix : string
+            Text to be appended to "Results" to create the output path (default : None)
+        save_opt_details : boolean
+            Save everything in a pickle file containing the following list L:
+                L[0]: dictionary with all the configuration details
+                L[1]: np.array obtained through the optimisation process with the normalised kernels
+                L[2]: np.array renormalisation of L[1]
+            (default : True)
+        save_coeff : boolean
+            Save the coefficients related to each compartment in txt files
+            and a pickle file containing the configuration details.
+            (default : False)
+        save_est_dwi : boolean
+            Save the estimated DW-MRI signal (default : False)
+        """
+        if self.x is None :
+            ERROR( 'Model not fitted to the data; call "fit()" first' )
+
+        RESULTS_path = 'Results_' + self.model.id
+        if path_suffix :
+            self.set_config('path_suffix', path_suffix)
+            RESULTS_path = RESULTS_path + path_suffix
+
+        LOG( '\n-> Saving results to "%s/*":' % RESULTS_path )
+        tic = time.time()
+
+        nF = self.DICTIONARY['IC']['nF']
+        nE = self.DICTIONARY['EC']['nE']
+        nV = self.DICTIONARY['nV']
+        nS = self.KERNELS['wmr'].shape[2]
+        norm_fib = np.ones( nF )
+        # x is the x of the original problem
+        # self.x is the x preconditioned
+        if self.get_config('doNormalizeKernels') :
+            # renormalize the coefficients
+            norm1 = np.repeat(self.KERNELS['wmr_norm'],nF)
+            norm2 = np.repeat(self.KERNELS['wmh_norm'],nE)
+            norm3 = np.repeat(self.KERNELS['iso_norm'],nV)
+            norm_fib = np.kron(np.ones(self.KERNELS['wmr'].shape[0]), self.DICTIONARY['TRK']['norm'])
+            x = self.x / np.hstack( (norm1*norm_fib,norm2,norm3) )
+        else :
+            x = self.x
+
+        # create folder or delete existing files (if any)
+        RESULTS_path = pjoin( self.get_config('TRACKING_path'), RESULTS_path )
+        if not exists( RESULTS_path ) :
+            makedirs( RESULTS_path )
+        else :
+            for f in glob.glob( pjoin(RESULTS_path,'*') ) :
+                remove( f )
+        self.set_config('RESULTS_path', RESULTS_path)
+
+        # Map of voxelwise errors
+        print( '\t* Fitting errors:' )
+
+        niiMAP_img = np.zeros( self.get_config('dim'), dtype=np.float32 )
+        affine = self.niiDWI.affine if nibabel.__version__ >= '2.0.0' else self.niiDWI.get_affine()
+        niiMAP     = nibabel.Nifti1Image( niiMAP_img, affine )
+        niiMAP_hdr = niiMAP.header if nibabel.__version__ >= '2.0.0' else niiMAP.get_header()
+
+        y_mea = np.reshape( self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ].flatten().astype(np.float32), (nV,-1) )
+        y_est = np.reshape( self.A.dot(self.x)[:int(nV*nS)], (nV,-1) ).astype(np.float32)
+
+        print( '\t\t- RMSE...  ', end='' )
+        sys.stdout.flush()
+        tmp = np.sqrt( np.mean((y_mea-y_est)**2,axis=1) )
+        niiMAP_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = tmp
+        niiMAP_hdr['cal_min'] = 0
+        niiMAP_hdr['cal_max'] = tmp.max()
+        nibabel.save( niiMAP, pjoin(RESULTS_path,'fit_RMSE.nii.gz') )
+        print( '[ %.3f +/- %.3f ]' % ( tmp.mean(), tmp.std() ) )
+
+        print( '\t\t- NRMSE... ', end='' )
+        sys.stdout.flush()
+        tmp = np.sum(y_mea**2,axis=1)
+        idx = np.where( tmp < 1E-12 )
+        tmp[ idx ] = 1
+        tmp = np.sqrt( np.sum((y_mea-y_est)**2,axis=1) / tmp )
+        tmp[ idx ] = 0
+        niiMAP_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = tmp
+        niiMAP_hdr['cal_min'] = 0
+        niiMAP_hdr['cal_max'] = 1
+        nibabel.save( niiMAP, pjoin(RESULTS_path,'fit_NRMSE.nii.gz') )
+        print( '[ %.3f +/- %.3f ]' % ( tmp.mean(), tmp.std() ) )
+
+        # Map of compartment contributions
+        print( '\t* Voxelwise contributions:' )
+
+        print( '\t\t- Intra-axonal... ', end='' )
+        sys.stdout.flush()
+        niiIC_img = np.zeros( self.get_config('dim'), dtype=np.float32 )
+        if len(self.KERNELS['wmr']) > 0 :
+            offset = nF * self.KERNELS['wmr'].shape[0]
+            tmp = ( x[:offset].reshape( (-1,nF) ) * norm_fib.reshape( (-1,nF) ) ).sum( axis=0 )
+            xv = np.bincount( self.DICTIONARY['IC']['v'], minlength=nV,
+                weights=tmp[ self.DICTIONARY['IC']['fiber'] ] * self.DICTIONARY['IC']['len']
+            ).astype(np.float32)
+            niiIC_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = xv
+        print( '[ OK ]' )
+
+        print( '\t\t- Extra-axonal... ', end='' )
+        sys.stdout.flush()
+        niiEC_img = np.zeros( self.get_config('dim'), dtype=np.float32 )
+        if len(self.KERNELS['wmh']) > 0 :
+            offset = nF * self.KERNELS['wmr'].shape[0]
+            tmp = x[offset:offset+nE*len(self.KERNELS['wmh'])].reshape( (-1,nE) ).sum( axis=0 )
+            xv = np.bincount( self.DICTIONARY['EC']['v'], weights=tmp, minlength=nV ).astype(np.float32)
+            niiEC_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = xv
+        print( '[ OK ]' )
+
+        print( '\t\t- Isotropic... ', end='' )
+        sys.stdout.flush()
+        niiISO_img = np.zeros( self.get_config('dim'), dtype=np.float32 )
+        if len(self.KERNELS['iso']) > 0 :
+            offset = nF * self.KERNELS['wmr'].shape[0] + nE * self.KERNELS['wmh'].shape[0]
+            xv = x[offset:].reshape( (-1,nV) ).sum( axis=0 )
+            niiISO_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = xv
+        print( '   [ OK ]' )
+
+        if self.get_config('doNormalizeMaps') :
+                niiIC = nibabel.Nifti1Image(  niiIC_img  / ( niiIC_img + niiEC_img + niiISO_img + 1e-16), affine )
+                niiEC = nibabel.Nifti1Image(  niiEC_img /  ( niiIC_img + niiEC_img + niiISO_img + 1E-16), affine )
+                niiISO = nibabel.Nifti1Image( niiISO_img / ( niiIC_img + niiEC_img + niiISO_img + 1E-16), affine )
+        else:
+                niiIC = nibabel.Nifti1Image( niiIC_img, affine )
+                niiEC = nibabel.Nifti1Image( niiEC_img, affine )
+                niiISO = nibabel.Nifti1Image( niiISO_img, affine )
+
+        nibabel.save( niiIC , pjoin(RESULTS_path,'compartment_IC.nii.gz') )
+        nibabel.save( niiEC , pjoin(RESULTS_path,'compartment_EC.nii.gz') )
+        nibabel.save( niiISO , pjoin(RESULTS_path,'compartment_ISO.nii.gz') )
+
+        # Configuration and results
+        print( '\t* Configuration and results:' )
+
+        if save_opt_details:
+            print( '\t\t- results.pickle... ', end='' )
+            sys.stdout.flush()
+            with open( pjoin(RESULTS_path,'results.pickle'), 'wb+' ) as fid :
+                pickle.dump( [self.CONFIG, self.x, x], fid, protocol=2 )
+            print( '[ OK ]' )
+
+        if save_coeff:
+            print( '\t\t- Coefficients txt files... ', end='' )
+            sys.stdout.flush()
+            if len(self.KERNELS['wmr']) > 0 :
+                offset = nF * self.KERNELS['wmr'].shape[0]
+                np.savetxt(pjoin(RESULTS_path,'xic.txt'), x[:offset], fmt='%12.5e')
+            if len(self.KERNELS['wmh']) > 0 :
+                offset = nF * self.KERNELS['wmr'].shape[0]
+                np.savetxt(pjoin(RESULTS_path,'xec.txt'), x[offset:offset+nE*len(self.KERNELS['wmh'])], fmt='%12.5e')
+            if len(self.KERNELS['iso']) > 0 :
+                offset = nF * self.KERNELS['wmr'].shape[0] + nE * self.KERNELS['wmh'].shape[0]
+                np.savetxt(pjoin(RESULTS_path,'xiso.txt'), x[offset:], fmt='%12.5e')
+            with open( pjoin(RESULTS_path,'config.pickle'), 'wb+' ) as fid :
+                pickle.dump( self.CONFIG, fid, protocol=2 )
+            print( '[ OK ]' )
+
+        if save_est_dwi :
+            print( '\t\t- Estimated signal... ', end='' )
+            sys.stdout.flush()
+            self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ] = y_est
+            nibabel.save( nibabel.Nifti1Image( self.niiDWI_img , affine ), pjoin(RESULTS_path,'fit_signal_estimated.nii.gz') )
+            self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ] = y_mea
+            print( '[ OK ]' )
+        
+        LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
diff --git a/commit/operator/operator.pyx b/commit/operator/operator.pyx
index 244c03ec..ab077d5f 100755
--- a/commit/operator/operator.pyx
+++ b/commit/operator/operator.pyx
@@ -1,216 +1,216 @@
-#!python
-#cython: language_level=3, boundscheck=False, wraparound=False, profile=False
-
-import cython
-import numpy as np
-cimport numpy as np
-
-# Interfaces to actual C code performing the multiplications
-cdef extern void COMMIT_A(
-    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
-    double *_v_in, double *_v_out,
-    unsigned int *_ICf, unsigned int *_ICv, unsigned short *_ICo, float *_ICl,
-    unsigned int *_ECv, unsigned short *_ECo,
-    unsigned int *_ISOv,
-    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
-    unsigned int* _ICthreads, unsigned int* _ECthreads, unsigned int* _ISOthreads
-) nogil
-
-cdef extern void COMMIT_At(
-    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
-    double *_v_in, double *_v_out,
-    unsigned int *_ICf, unsigned int *_ICv, unsigned short *_ICo, float *_ICl,
-    unsigned int *_ECv, unsigned short *_ECo,
-    unsigned int *_ISOv,
-    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
-    unsigned char *_ICthreadsT, unsigned int *_ECthreadsT, unsigned int *_ISOthreadsT
-) nogil
-
-cdef extern void COMMIT_L(
-    int _nF, int _nIC, int _nV, int _nS, double _regterm,
-    double *_v_in, double *_v_out
-) nogil
-
-cdef extern void COMMIT_Lt(
-    int _nF, int _nIC, int _nV, int _nS, double _regterm,
-    double *_v_in, double *_v_out
-) nogil
-
-cdef class LinearOperator :
-    """This class is a wrapper to the C code for performing marix-vector multiplications
-    with the COMMIT linear operator A. The multiplications are done using C code
-    that uses information from the DICTIONARY, KERNELS and THREADS data structures.
-    """
-    cdef int nS, nF, nR, nE, nT, nV, nI, n, ndirs
-    cdef public int adjoint, n1, n2
-    cdef public float regtikhonov
-
-    cdef DICTIONARY
-    cdef KERNELS
-    cdef THREADS
-
-    cdef unsigned int*   ICf
-    cdef float*          ICl
-    cdef unsigned int*   ICv
-    cdef unsigned short* ICo
-    cdef unsigned int*   ECv
-    cdef unsigned short* ECo
-    cdef unsigned int*   ISOv
-
-    cdef float* LUT_IC
-    cdef float* LUT_EC
-    cdef float* LUT_ISO
-
-    cdef unsigned int*   ICthreads
-    cdef unsigned int*   ECthreads
-    cdef unsigned int*   ISOthreads
-
-    cdef unsigned char*  ICthreadsT
-    cdef unsigned int*   ECthreadsT
-    cdef unsigned int*   ISOthreadsT
-
-
-    def __init__( self, DICTIONARY, KERNELS, THREADS, regtikhonov ) :
-        """Set the pointers to the data structures used by the C code."""
-        self.DICTIONARY = DICTIONARY
-        self.KERNELS    = KERNELS
-        self.THREADS    = THREADS
-
-        self.nF         = DICTIONARY['IC']['nF']    # number of FIBERS
-        self.nR         = KERNELS['wmr'].shape[0]   # number of FIBER RADII
-        self.nE         = DICTIONARY['EC']['nE']    # number of EC segments
-        self.nT         = KERNELS['wmh'].shape[0]   # number of EC TORTUOSITY values
-        self.nV         = DICTIONARY['nV']          # number of VOXELS
-        self.nI         = KERNELS['iso'].shape[0]   # number of ISO contributions
-        self.n          = DICTIONARY['IC']['n']     # numbner of IC segments
-        self.ndirs      = KERNELS['wmr'].shape[1]   # number of directions
-        self.regtikhonov = regtikhonov
-
-        if KERNELS['wmr'].size > 0 :
-            self.nS = KERNELS['wmr'].shape[2]       # number of SAMPLES
-        elif KERNELS['wmh'].size > 0 :
-            self.nS = KERNELS['wmh'].shape[2]
-        else :
-            self.nS = KERNELS['wmr'].shape[1]
-
-        self.adjoint    = 0                         # direct of inverse product
-
-        self.n1 = self.nV*self.nS + self.nR
-        self.n2 = self.nR*self.nF + self.nT*self.nE + self.nI*self.nV
-
-        # get C pointers to arrays in DICTIONARY
-        cdef unsigned int [::1]   ICf  = DICTIONARY['IC']['fiber']
-        self.ICf = &ICf[0]
-        cdef float [::1]          ICl  = DICTIONARY['IC']['len']
-        self.ICl = &ICl[0]
-        cdef unsigned int [::1]   ICv  = DICTIONARY['IC']['v']
-        self.ICv = &ICv[0]
-        cdef unsigned short [::1] ICo  = DICTIONARY['IC']['o']
-        self.ICo = &ICo[0]
-        cdef unsigned int [::1]   ECv  = DICTIONARY['EC']['v']
-        self.ECv = &ECv[0]
-        cdef unsigned short [::1] ECo  = DICTIONARY['EC']['o']
-        self.ECo = &ECo[0]
-        cdef unsigned int [::1]   ISOv = DICTIONARY['ISO']['v']
-        self.ISOv = &ISOv[0]
-
-        # get C pointers to arrays in KERNELS
-        cdef float [:, :, ::1] wmrSFP = KERNELS['wmr']
-        self.LUT_IC  = &wmrSFP[0,0,0]
-        cdef float [:, :, ::1] wmhSFP = KERNELS['wmh']
-        self.LUT_EC  = &wmhSFP[0,0,0]
-        cdef float [:, ::1] isoSFP = KERNELS['iso']
-        self.LUT_ISO = &isoSFP[0,0]
-
-        # get C pointers to arrays in THREADS
-        cdef unsigned int [::1] ICthreads = THREADS['IC']
-        self.ICthreads  = &ICthreads[0]
-        cdef unsigned int [::1] ECthreads = THREADS['EC']
-        self.ECthreads  = &ECthreads[0]
-        cdef unsigned int [::1] ISOthreads = THREADS['ISO']
-        self.ISOthreads = &ISOthreads[0]
-
-        cdef unsigned char [::1] ICthreadsT = THREADS['ICt']
-        self.ICthreadsT  = &ICthreadsT[0]
-        cdef unsigned int  [::1] ECthreadsT = THREADS['ECt']
-        self.ECthreadsT  = &ECthreadsT[0]
-        cdef unsigned int  [::1] ISOthreadsT = THREADS['ISOt']
-        self.ISOthreadsT = &ISOthreadsT[0]
-
-
-    @property
-    def T( self ) :
-        """Transpose of the explicit matrix."""
-        C = LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, self.regtikhonov )
-        C.adjoint = 1 - C.adjoint
-        return C
-
-
-    @property
-    def shape( self ) :
-        """Size of the explicit matrix."""
-        if not self.adjoint :
-            return ( self.n1, self.n2 )
-        else :
-            return ( self.n2, self.n1 )
-
-
-    def dot( self, double [::1] v_in  ):
-        """Wrapper to C code for efficiently performing the matrix-vector multiplications.
-
-        Parameters
-        ----------
-        v_in : 1D numpy.array of double
-            Input vector for the matrix-vector multiplication
-
-        Returns
-        -------
-        v_out : 1D numpy.array of double
-            Results of the multiplication
-        """
-
-        # Permit only matrix-vector multiplications
-        if v_in.size != self.shape[1] :
-            raise RuntimeError( "A.dot(): dimensions do not match" )
-
-        # Create output array
-        cdef double [::1] v_out = np.zeros( self.shape[0], dtype=np.float64 )
-
-        # Call the cython function to read the memory pointers
-        if not self.adjoint :
-            # DIRECT PRODUCT A*x
-            with nogil :
-                COMMIT_A(
-                    self.nF, self.n, self.nE, self.nV, self.nS, self.ndirs,
-                    &v_in[0], &v_out[0],
-                    self.ICf, self.ICv, self.ICo, self.ICl, self.ECv, self.ECo, self.ISOv,
-                    self.LUT_IC, self.LUT_EC, self.LUT_ISO,
-                    self.ICthreads, self.ECthreads, self.ISOthreads
-                )
-        else :
-            # INVERSE PRODUCT A'*y
-            with nogil :
-                COMMIT_At(
-                    self.nF, self.n, self.nE, self.nV, self.nS, self.ndirs,
-                    &v_in[0], &v_out[0],
-                    self.ICf, self.ICv, self.ICo, self.ICl, self.ECv, self.ECo, self.ISOv,
-                    self.LUT_IC, self.LUT_EC, self.LUT_ISO,
-                    self.ICthreadsT, self.ECthreadsT, self.ISOthreadsT
-                )
-
-        if not self.adjoint:
-            with nogil:
-                # DIRECT PRODUCT L*lambda*x
-                COMMIT_L(
-                    self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
-                    &v_in[0], &v_out[0]
-                )
-        else:
-            with nogil:
-                # INVERSE PRODUCT L'*lambda*y
-                COMMIT_Lt(
-                    self.nF, self.nR, self.nV, self.nS, self.regtikhonov, #self.tikterm
-                    &v_in[0], &v_out[0]
-                ) #"""
-
-        return v_out
+#!python
+#cython: language_level=3, boundscheck=False, wraparound=False, profile=False
+
+import cython
+import numpy as np
+cimport numpy as np
+
+# Interfaces to actual C code performing the multiplications
+cdef extern void COMMIT_A(
+    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
+    double *_v_in, double *_v_out,
+    unsigned int *_ICf, unsigned int *_ICv, unsigned short *_ICo, float *_ICl,
+    unsigned int *_ECv, unsigned short *_ECo,
+    unsigned int *_ISOv,
+    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
+    unsigned int* _ICthreads, unsigned int* _ECthreads, unsigned int* _ISOthreads
+) nogil
+
+cdef extern void COMMIT_At(
+    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
+    double *_v_in, double *_v_out,
+    unsigned int *_ICf, unsigned int *_ICv, unsigned short *_ICo, float *_ICl,
+    unsigned int *_ECv, unsigned short *_ECo,
+    unsigned int *_ISOv,
+    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
+    unsigned char *_ICthreadsT, unsigned int *_ECthreadsT, unsigned int *_ISOthreadsT
+) nogil
+
+cdef extern void COMMIT_L(
+    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    double *_v_in, double *_v_out
+) nogil
+
+cdef extern void COMMIT_Lt(
+    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    double *_v_in, double *_v_out
+) nogil
+
+cdef class LinearOperator :
+    """This class is a wrapper to the C code for performing marix-vector multiplications
+    with the COMMIT linear operator A. The multiplications are done using C code
+    that uses information from the DICTIONARY, KERNELS and THREADS data structures.
+    """
+    cdef int nS, nF, nR, nE, nT, nV, nI, n, ndirs
+    cdef public int adjoint, n1, n2
+    cdef public float regtikhonov
+
+    cdef DICTIONARY
+    cdef KERNELS
+    cdef THREADS
+
+    cdef unsigned int*   ICf
+    cdef float*          ICl
+    cdef unsigned int*   ICv
+    cdef unsigned short* ICo
+    cdef unsigned int*   ECv
+    cdef unsigned short* ECo
+    cdef unsigned int*   ISOv
+
+    cdef float* LUT_IC
+    cdef float* LUT_EC
+    cdef float* LUT_ISO
+
+    cdef unsigned int*   ICthreads
+    cdef unsigned int*   ECthreads
+    cdef unsigned int*   ISOthreads
+
+    cdef unsigned char*  ICthreadsT
+    cdef unsigned int*   ECthreadsT
+    cdef unsigned int*   ISOthreadsT
+
+
+    def __init__( self, DICTIONARY, KERNELS, THREADS, regtikhonov ) :
+        """Set the pointers to the data structures used by the C code."""
+        self.DICTIONARY = DICTIONARY
+        self.KERNELS    = KERNELS
+        self.THREADS    = THREADS
+
+        self.nF         = DICTIONARY['IC']['nF']    # number of FIBERS
+        self.nR         = KERNELS['wmr'].shape[0]   # number of FIBER RADII
+        self.nE         = DICTIONARY['EC']['nE']    # number of EC segments
+        self.nT         = KERNELS['wmh'].shape[0]   # number of EC TORTUOSITY values
+        self.nV         = DICTIONARY['nV']          # number of VOXELS
+        self.nI         = KERNELS['iso'].shape[0]   # number of ISO contributions
+        self.n          = DICTIONARY['IC']['n']     # numbner of IC segments
+        self.ndirs      = KERNELS['wmr'].shape[1]   # number of directions
+        self.regtikhonov = regtikhonov
+
+        if KERNELS['wmr'].size > 0 :
+            self.nS = KERNELS['wmr'].shape[2]       # number of SAMPLES
+        elif KERNELS['wmh'].size > 0 :
+            self.nS = KERNELS['wmh'].shape[2]
+        else :
+            self.nS = KERNELS['wmr'].shape[1]
+
+        self.adjoint    = 0                         # direct of inverse product
+
+        self.n1 = self.nV*self.nS + self.nR-1
+        self.n2 = self.nR*self.nF + self.nT*self.nE + self.nI*self.nV
+
+        # get C pointers to arrays in DICTIONARY
+        cdef unsigned int [::1]   ICf  = DICTIONARY['IC']['fiber']
+        self.ICf = &ICf[0]
+        cdef float [::1]          ICl  = DICTIONARY['IC']['len']
+        self.ICl = &ICl[0]
+        cdef unsigned int [::1]   ICv  = DICTIONARY['IC']['v']
+        self.ICv = &ICv[0]
+        cdef unsigned short [::1] ICo  = DICTIONARY['IC']['o']
+        self.ICo = &ICo[0]
+        cdef unsigned int [::1]   ECv  = DICTIONARY['EC']['v']
+        self.ECv = &ECv[0]
+        cdef unsigned short [::1] ECo  = DICTIONARY['EC']['o']
+        self.ECo = &ECo[0]
+        cdef unsigned int [::1]   ISOv = DICTIONARY['ISO']['v']
+        self.ISOv = &ISOv[0]
+
+        # get C pointers to arrays in KERNELS
+        cdef float [:, :, ::1] wmrSFP = KERNELS['wmr']
+        self.LUT_IC  = &wmrSFP[0,0,0]
+        cdef float [:, :, ::1] wmhSFP = KERNELS['wmh']
+        self.LUT_EC  = &wmhSFP[0,0,0]
+        cdef float [:, ::1] isoSFP = KERNELS['iso']
+        self.LUT_ISO = &isoSFP[0,0]
+
+        # get C pointers to arrays in THREADS
+        cdef unsigned int [::1] ICthreads = THREADS['IC']
+        self.ICthreads  = &ICthreads[0]
+        cdef unsigned int [::1] ECthreads = THREADS['EC']
+        self.ECthreads  = &ECthreads[0]
+        cdef unsigned int [::1] ISOthreads = THREADS['ISO']
+        self.ISOthreads = &ISOthreads[0]
+
+        cdef unsigned char [::1] ICthreadsT = THREADS['ICt']
+        self.ICthreadsT  = &ICthreadsT[0]
+        cdef unsigned int  [::1] ECthreadsT = THREADS['ECt']
+        self.ECthreadsT  = &ECthreadsT[0]
+        cdef unsigned int  [::1] ISOthreadsT = THREADS['ISOt']
+        self.ISOthreadsT = &ISOthreadsT[0]
+
+
+    @property
+    def T( self ) :
+        """Transpose of the explicit matrix."""
+        C = LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, self.regtikhonov )
+        C.adjoint = 1 - C.adjoint
+        return C
+
+
+    @property
+    def shape( self ) :
+        """Size of the explicit matrix."""
+        if not self.adjoint :
+            return ( self.n1, self.n2 )
+        else :
+            return ( self.n2, self.n1 )
+
+
+    def dot( self, double [::1] v_in  ):
+        """Wrapper to C code for efficiently performing the matrix-vector multiplications.
+
+        Parameters
+        ----------
+        v_in : 1D numpy.array of double
+            Input vector for the matrix-vector multiplication
+
+        Returns
+        -------
+        v_out : 1D numpy.array of double
+            Results of the multiplication
+        """
+
+        # Permit only matrix-vector multiplications
+        if v_in.size != self.shape[1] :
+            raise RuntimeError( "A.dot(): dimensions do not match" )
+
+        # Create output array
+        cdef double [::1] v_out = np.zeros( self.shape[0], dtype=np.float64 )
+
+        # Call the cython function to read the memory pointers
+        if not self.adjoint :
+            # DIRECT PRODUCT A*x
+            with nogil :
+                COMMIT_A(
+                    self.nF, self.n, self.nE, self.nV, self.nS, self.ndirs,
+                    &v_in[0], &v_out[0],
+                    self.ICf, self.ICv, self.ICo, self.ICl, self.ECv, self.ECo, self.ISOv,
+                    self.LUT_IC, self.LUT_EC, self.LUT_ISO,
+                    self.ICthreads, self.ECthreads, self.ISOthreads
+                )
+        else :
+            # INVERSE PRODUCT A'*y
+            with nogil :
+                COMMIT_At(
+                    self.nF, self.n, self.nE, self.nV, self.nS, self.ndirs,
+                    &v_in[0], &v_out[0],
+                    self.ICf, self.ICv, self.ICo, self.ICl, self.ECv, self.ECo, self.ISOv,
+                    self.LUT_IC, self.LUT_EC, self.LUT_ISO,
+                    self.ICthreadsT, self.ECthreadsT, self.ISOthreadsT
+                )
+
+        if not self.adjoint:
+            with nogil:
+                # DIRECT PRODUCT L*lambda*x
+                COMMIT_L(
+                    self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
+                    &v_in[0], &v_out[0]
+                )
+        else:
+            with nogil:
+                # INVERSE PRODUCT L'*lambda*y
+                COMMIT_Lt(
+                    self.nF, self.nR, self.nV, self.nS, self.regtikhonov, #self.tikterm
+                    &v_in[0], &v_out[0]
+                ) #"""
+
+        return v_out
diff --git a/commit/operator/operator.pyxbld b/commit/operator/operator.pyxbld
index c2cf5f5b..55009fa6 100755
--- a/commit/operator/operator.pyxbld
+++ b/commit/operator/operator.pyxbld
@@ -1,33 +1,33 @@
-import numpy
-from os import utime
-from os.path import dirname, join
-from distutils.extension import Extension
-
-# pass parameters to the compiler at runtime
-# [TODO] find a way to avoid using this fake module
-from commit.operator import config
-
-def make_ext(modname, pyxfilename):
-
-    if ( config.nTHREADS is None or config.nTHREADS < 1 or config.nTHREADS > 255 ):
-       raise RuntimeError( 'config.nTHREADS must be between 1 and 255' )
-    if ( config.nIC is None or config.nIC < 0 or config.nIC > 20 ):
-       raise RuntimeError( 'config.nIC must be in the range [0..20]' )
-    if ( config.nEC is None or config.nEC < 0 or config.nEC > 20 ):
-       raise RuntimeError( 'config.nEC must be in the range [0..20]' )
-    if ( config.nISO is None or config.nISO < 0 or config.nISO > 20 ):
-       raise RuntimeError( 'config.nISO must be in the range [0..20]' )
-
-    # Force recompilation
-    if config.model=="VolumeFractions" :
-        filename = "operator_noLUT.c"
-    else :
-        filename = "operator_withLUT.c"
-    path = dirname(pyxfilename)
-    utime( join(path,filename), None)
-    return Extension(name=modname,
-                     sources=[pyxfilename,join(path,filename)],
-                     include_dirs=[numpy.get_include()],
-                     define_macros = [('nTHREADS',config.nTHREADS), ('nIC',config.nIC), ('nEC',config.nEC), ('nISO',config.nISO)],
-                     extra_compile_args=['-w', '-O3', '-Ofast'],
-                     )
+import numpy
+from os import utime
+from os.path import dirname, join
+from distutils.extension import Extension
+
+# pass parameters to the compiler at runtime
+# [TODO] find a way to avoid using this fake module
+from commit.operator import config
+
+def make_ext(modname, pyxfilename):
+
+    if ( config.nTHREADS is None or config.nTHREADS < 1 or config.nTHREADS > 255 ):
+       raise RuntimeError( 'config.nTHREADS must be between 1 and 255' )
+    if ( config.nIC is None or config.nIC < 0 or config.nIC > 20 ):
+       raise RuntimeError( 'config.nIC must be in the range [0..20]' )
+    if ( config.nEC is None or config.nEC < 0 or config.nEC > 20 ):
+       raise RuntimeError( 'config.nEC must be in the range [0..20]' )
+    if ( config.nISO is None or config.nISO < 0 or config.nISO > 20 ):
+       raise RuntimeError( 'config.nISO must be in the range [0..20]' )
+
+    # Force recompilation
+    if config.model=="VolumeFractions" :
+        filename = "operator_noLUT.c"
+    else :
+        filename = "operator_withLUT.c"
+    path = dirname(pyxfilename)
+    utime( join(path,filename), None)
+    return Extension(name=modname,
+                     sources=[pyxfilename,join(path,filename)],
+                     include_dirs=[numpy.get_include()],
+                     define_macros = [('nTHREADS',config.nTHREADS), ('nIC',config.nIC), ('nEC',config.nEC), ('nISO',config.nISO)],
+                     extra_compile_args=['-w', '-O3', '-Ofast'],
+                     )
diff --git a/commit/operator/operator_noLUT.c b/commit/operator/operator_noLUT.c
index d261c02a..7510f9bb 100644
--- a/commit/operator/operator_noLUT.c
+++ b/commit/operator/operator_noLUT.c
@@ -1,218 +1,246 @@
-#include <pthread.h>
-#include <stdint.h> // uint32_t etc
-
-// number of THREADS
-#ifdef nTHREADS
-    #if (nTHREADS<1 || nTHREADS>255)
-    #error "nTHREADS" must be in the range 1..255
-    #endif
-#else
-    #error "nTHREADS" parameter must be passed to the compiler as "-DnTHREADS=<value>"
-#endif
-
-
-/* global variables */
-int         nF, n;
-double      *x, *Y;
-uint32_t    *ICthreads, *ISOthreads;
-uint8_t     *ICthreadsT;
-uint32_t    *ISOthreadsT;
-uint32_t    *ICf, *ICv, *ISOv;
-float       *ICl;
-
-
-// ====================================================
-// Compute a sub-block of the A*x MAtRIX-VECTOR product
-// ====================================================
-void* COMMIT_A__block( void *ptr )
-{
-    int      id = (long)ptr;
-    double   x0;
-    double   *xPtr;
-    uint32_t *t_v, *t_vEnd, *t_f;
-    float    *t_l;
-
-    // intra-cellular compartments
-    t_v    = ICv + ICthreads[id];
-    t_vEnd = ICv + ICthreads[id+1];
-    t_l    = ICl + ICthreads[id];
-    t_f    = ICf + ICthreads[id];
-
-    while( t_v != t_vEnd )
-    {
-        x0 = x[*t_f];
-        if ( x0 != 0 )
-            Y[*t_v] += (double)(*t_l) * x0;
-        t_f++;
-        t_v++;
-        t_l++;
-    }
-
-#if nISO>=1
-    // isotropic compartments
-    t_v    = ISOv + ISOthreads[id];
-    t_vEnd = ISOv + ISOthreads[id+1];
-    xPtr   = x + nF + ISOthreads[id];
-
-    while( t_v != t_vEnd )
-    {
-        x0 = *xPtr++;
-        if ( x0 != 0 )
-            Y[*t_v] += x0;
-        t_v++;
-    }
-#endif
-
-    pthread_exit( 0 );
-}
-
-
-// =========================
-// Function called by CYTHON
-// =========================
-void COMMIT_A(
-    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
-    double *_vIN, double *_vOUT,
-    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
-    uint32_t *_ECv, uint16_t *_ECo,
-    uint32_t *_ISOv,
-    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
-    uint32_t* _ICthreads, uint32_t* _ECthreads, uint32_t* _ISOthreads
-)
-{
-    nF = _nF;
-    n  = _n;
-
-    x = _vIN;
-    Y = _vOUT;
-
-    ICf  = _ICf;
-    ICv  = _ICv;
-    ICl  = _ICl;
-    ISOv = _ISOv;
-
-    ICthreads  = _ICthreads;
-    ISOthreads = _ISOthreads;
-
-    // Run SEPARATE THREADS to perform the multiplication
-    pthread_t threads[nTHREADS];
-    int t;
-    for(t=0; t<nTHREADS ; t++)
-        pthread_create( &threads[t], NULL, COMMIT_A__block, (void *) (long int)t );
-    for(t=0; t<nTHREADS ; t++)
-        pthread_join( threads[t], NULL );
-    return;
-}
-
-
-
-/* ===================================================== */
-/* Compute a sub-block of the A'*y MAtRIX-VECTOR product */
-/* ===================================================== */
-void* COMMIT_At__block( void *ptr )
-{
-    int      id = (long)ptr;
-    double   *xPtr;
-    uint32_t *t_v, *t_vEnd, *t_f;
-    float    *t_l;
-    uint8_t  *t_t;
-
-    // intra-cellular compartments
-    t_v    = ICv;
-    t_vEnd = ICv + n;
-    t_l    = ICl;
-    t_f    = ICf;
-    t_t    = ICthreadsT;
-
-    while( t_v != t_vEnd )
-    {
-        // in this case, I need to walk throug because the segments are ordered in "voxel order"
-        if ( *t_t == id )
-            x[*t_f] += (double)(*t_l) * Y[*t_v];
-        t_t++;
-        t_f++;
-        t_v++;
-        t_l++;
-    }
-
-#if nISO>=1
-    // isotropic compartments
-    t_v    = ISOv + ISOthreadsT[id];
-    t_vEnd = ISOv + ISOthreadsT[id+1];
-    xPtr   = x + nF + ISOthreadsT[id];
-
-    while( t_v != t_vEnd )
-        (*xPtr++) += Y[*t_v++];
-#endif
-
-    pthread_exit( 0 );
-}
-
-
-// =========================
-// Function called by CYTHON
-// =========================
-void COMMIT_At(
-    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
-    double *_vIN, double *_vOUT,
-    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
-    uint32_t *_ECv, uint16_t *_ECo,
-    uint32_t *_ISOv,
-    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
-    uint8_t* _ICthreadsT, uint32_t* _ECthreadsT, uint32_t* _ISOthreadsT
-)
-{
-    nF = _nF;
-    n  = _n;
-
-    x = _vOUT;
-    Y = _vIN;
-
-    ICf  = _ICf;
-    ICv  = _ICv;
-    ICl  = _ICl;
-    ISOv = _ISOv;
-
-    ICthreadsT  = _ICthreadsT;
-    ISOthreadsT = _ISOthreadsT;
-
-    // Run SEPARATE THREADS to perform the multiplication
-    pthread_t threads[nTHREADS];
-    int t;
-    for(t=0; t<nTHREADS ; t++)
-        pthread_create( &threads[t], NULL, COMMIT_At__block, (void *) (long int)t );
-    for(t=0; t<nTHREADS ; t++)
-        pthread_join( threads[t], NULL );
-    return;
-}
-
-void COMMIT_L(
-    int nF, int nIC, int nV, int nS, double regterm,
-    double *vIN, double *vOUT)
-{
-    for(int f = 0; f < nF; f++){
-
-        vOUT[nV*nS] += regterm*( -2*vIN[f] + x[nF + f] );
-
-        for(int r = 1; r < nIC-1; r++){
-            vOUT[nV*nS + r] += regterm*( vIN[(r-1)*nF + f] -2*vIN[r*nF + f] + vIN[(r+1)*nF + f] );
-        }
-
-        vOUT[nV*nS + nIC - 1] += regterm*( vIN[(nIC-2)*nF + f] - 2*vIN[(nIC-1)*nF + f] );
-    }
-}
-
-void COMMIT_Lt(
-    int nF, int nIC, int nV, int nS, double regterm,
-    double *vIN, double *vOUT)
-{
-    for(int f = 0; f < nF; f++){
-        vOUT[f] += regterm*( -2*vIN[nV*nS] + vIN[nV*nS + 1] );
-
-        for (int r = 0; r < nIC; r++){
-            vOUT[r*nF + f] += regterm*( vIN[nV*nS + (r-1)] - 2*vIN[nV*nS + r] + vIN[nV*nS + (r+1)] );
-        }
-        
-        vOUT[(nIC-1)*nF + f] += regterm*( vIN[nV*nS + (nIC-2)] - 2*vIN[nV*nS + (nIC-1)] );
-    }
-}
\ No newline at end of file
+#include <pthread.h>
+#include <stdint.h> // uint32_t etc
+
+// number of THREADS
+#ifdef nTHREADS
+    #if (nTHREADS<1 || nTHREADS>255)
+    #error "nTHREADS" must be in the range 1..255
+    #endif
+#else
+    #error "nTHREADS" parameter must be passed to the compiler as "-DnTHREADS=<value>"
+#endif
+
+
+/* global variables */
+int         nF, n;
+double      *x, *Y;
+uint32_t    *ICthreads, *ISOthreads;
+uint8_t     *ICthreadsT;
+uint32_t    *ISOthreadsT;
+uint32_t    *ICf, *ICv, *ISOv;
+float       *ICl;
+
+
+// ====================================================
+// Compute a sub-block of the A*x MAtRIX-VECTOR product
+// ====================================================
+void* COMMIT_A__block( void *ptr )
+{
+    int      id = (long)ptr;
+    double   x0;
+    double   *xPtr;
+    uint32_t *t_v, *t_vEnd, *t_f;
+    float    *t_l;
+
+    // intra-cellular compartments
+    t_v    = ICv + ICthreads[id];
+    t_vEnd = ICv + ICthreads[id+1];
+    t_l    = ICl + ICthreads[id];
+    t_f    = ICf + ICthreads[id];
+
+    while( t_v != t_vEnd )
+    {
+        x0 = x[*t_f];
+        if ( x0 != 0 )
+            Y[*t_v] += (double)(*t_l) * x0;
+        t_f++;
+        t_v++;
+        t_l++;
+    }
+
+#if nISO>=1
+    // isotropic compartments
+    t_v    = ISOv + ISOthreads[id];
+    t_vEnd = ISOv + ISOthreads[id+1];
+    xPtr   = x + nF + ISOthreads[id];
+
+    while( t_v != t_vEnd )
+    {
+        x0 = *xPtr++;
+        if ( x0 != 0 )
+            Y[*t_v] += x0;
+        t_v++;
+    }
+#endif
+
+    pthread_exit( 0 );
+}
+
+
+// =========================
+// Function called by CYTHON
+// =========================
+void COMMIT_A(
+    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
+    double *_vIN, double *_vOUT,
+    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
+    uint32_t *_ECv, uint16_t *_ECo,
+    uint32_t *_ISOv,
+    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
+    uint32_t* _ICthreads, uint32_t* _ECthreads, uint32_t* _ISOthreads
+)
+{
+    nF = _nF;
+    n  = _n;
+
+    x = _vIN;
+    Y = _vOUT;
+
+    ICf  = _ICf;
+    ICv  = _ICv;
+    ICl  = _ICl;
+    ISOv = _ISOv;
+
+    ICthreads  = _ICthreads;
+    ISOthreads = _ISOthreads;
+
+    // Run SEPARATE THREADS to perform the multiplication
+    pthread_t threads[nTHREADS];
+    int t;
+    for(t=0; t<nTHREADS ; t++)
+        pthread_create( &threads[t], NULL, COMMIT_A__block, (void *) (long int)t );
+    for(t=0; t<nTHREADS ; t++)
+        pthread_join( threads[t], NULL );
+    return;
+}
+
+
+
+/* ===================================================== */
+/* Compute a sub-block of the A'*y MAtRIX-VECTOR product */
+/* ===================================================== */
+void* COMMIT_At__block( void *ptr )
+{
+    int      id = (long)ptr;
+    double   *xPtr;
+    uint32_t *t_v, *t_vEnd, *t_f;
+    float    *t_l;
+    uint8_t  *t_t;
+
+    // intra-cellular compartments
+    t_v    = ICv;
+    t_vEnd = ICv + n;
+    t_l    = ICl;
+    t_f    = ICf;
+    t_t    = ICthreadsT;
+
+    while( t_v != t_vEnd )
+    {
+        // in this case, I need to walk throug because the segments are ordered in "voxel order"
+        if ( *t_t == id )
+            x[*t_f] += (double)(*t_l) * Y[*t_v];
+        t_t++;
+        t_f++;
+        t_v++;
+        t_l++;
+    }
+
+#if nISO>=1
+    // isotropic compartments
+    t_v    = ISOv + ISOthreadsT[id];
+    t_vEnd = ISOv + ISOthreadsT[id+1];
+    xPtr   = x + nF + ISOthreadsT[id];
+
+    while( t_v != t_vEnd )
+        (*xPtr++) += Y[*t_v++];
+#endif
+
+    pthread_exit( 0 );
+}
+
+
+// =========================
+// Function called by CYTHON
+// =========================
+void COMMIT_At(
+    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
+    double *_vIN, double *_vOUT,
+    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
+    uint32_t *_ECv, uint16_t *_ECo,
+    uint32_t *_ISOv,
+    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
+    uint8_t* _ICthreadsT, uint32_t* _ECthreadsT, uint32_t* _ISOthreadsT
+)
+{
+    nF = _nF;
+    n  = _n;
+
+    x = _vOUT;
+    Y = _vIN;
+
+    ICf  = _ICf;
+    ICv  = _ICv;
+    ICl  = _ICl;
+    ISOv = _ISOv;
+
+    ICthreadsT  = _ICthreadsT;
+    ISOthreadsT = _ISOthreadsT;
+
+    // Run SEPARATE THREADS to perform the multiplication
+    pthread_t threads[nTHREADS];
+    int t;
+    for(t=0; t<nTHREADS ; t++)
+        pthread_create( &threads[t], NULL, COMMIT_At__block, (void *) (long int)t );
+    for(t=0; t<nTHREADS ; t++)
+        pthread_join( threads[t], NULL );
+    return;
+}
+
+void COMMIT_L(
+    int nF, int nIC, int nV, int nS, double regterm,
+    double *vIN, double *vOUT)
+{
+    /*for(int r = 0; r < nIC-1; r++){
+        for(int f = 0; f < nF; f++){
+            vOUT[nV*nS + r] += regterm*( -vIN[r*nF + f] + vIN[(r+1)*nF + f] );
+        }
+    }//*/
+}
+
+void COMMIT_Lt(
+    int nF, int nIC, int nV, int nS, double regterm,
+    double *vIN, double *vOUT)
+{
+    /*for(int f = 0; f < nF; f++){
+        vOUT[f] = -vIN[nV*nS];
+        vOUT[nF*(nIC-1) + f] = vIN[nV*nS + nIC-2];
+    }
+
+    for(int r = 0; r < nIC-2; r++){
+        for(int f = 0; f < nF; f++){
+            vOUT[nF*(r+1) + f] = vIN[nV*nS + r] + vIN[nV*nS + r+1];
+        }
+    }//*/
+}
+
+
+/*void COMMIT_L(
+    int nF, int nIC, int nV, int nS, double regterm,
+    double *vIN, double *vOUT)
+{
+    for(int f = 0; f < nF; f++){
+
+        vOUT[nV*nS] += regterm*( -2*vIN[f] + x[nF + f] );
+
+        for(int r = 1; r < nIC-1; r++){
+            vOUT[nV*nS + r] += regterm*( vIN[(r-1)*nF + f] -2*vIN[r*nF + f] + vIN[(r+1)*nF + f] );
+        }
+
+        vOUT[nV*nS + nIC - 1] += regterm*( vIN[(nIC-2)*nF + f] - 2*vIN[(nIC-1)*nF + f] );
+    }
+}
+
+void COMMIT_Lt(
+    int nF, int nIC, int nV, int nS, double regterm,
+    double *vIN, double *vOUT)
+{
+    for(int f = 0; f < nF; f++){
+        vOUT[f] += regterm*( -2*vIN[nV*nS] + vIN[nV*nS + 1] );
+
+        for (int r = 0; r < nIC; r++){
+            vOUT[r*nF + f] += regterm*( vIN[nV*nS + (r-1)] - 2*vIN[nV*nS + r] + vIN[nV*nS + (r+1)] );
+        }
+        
+        vOUT[(nIC-1)*nF + f] += regterm*( vIN[nV*nS + (nIC-2)] - 2*vIN[nV*nS + (nIC-1)] );
+    }
+}//*/
\ No newline at end of file
diff --git a/commit/operator/operator_withLUT.c b/commit/operator/operator_withLUT.c
index 6bc8886c..01b295e1 100644
--- a/commit/operator/operator_withLUT.c
+++ b/commit/operator/operator_withLUT.c
@@ -1,2278 +1,2306 @@
-#include <pthread.h>
-#include <stdint.h> // uint32_t etc
-
-// number of THREADS
-#ifdef nTHREADS
-    #if (nTHREADS<1 || nTHREADS>255)
-    #error "nTHREADS" must be in the range 1..255
-    #endif
-#else
-    #error "nTHREADS" parameter must be passed to the compiler as "-DnTHREADS=<value>"
-#endif
-
-
-/* global variables */
-int         nF, n, nE, nV, nS, ndirs;
-double      *x, *Y;
-uint32_t    *ICthreads, *ECthreads, *ISOthreads;
-uint8_t     *ICthreadsT;
-uint32_t    *ECthreadsT, *ISOthreadsT;
-uint32_t    *ICf, *ICv, *ECv, *ISOv;
-uint16_t    *ICo, *ECo;
-float       *ICl;
-float       *wmrSFP0, *wmrSFP1, *wmrSFP2, *wmrSFP3, *wmrSFP4, *wmrSFP5, *wmrSFP6, *wmrSFP7, *wmrSFP8, *wmrSFP9, *wmrSFP10, *wmrSFP11, *wmrSFP12, *wmrSFP13, *wmrSFP14, *wmrSFP15, *wmrSFP16, *wmrSFP17, *wmrSFP18, *wmrSFP19;
-float       *wmhSFP0, *wmhSFP1, *wmhSFP2, *wmhSFP3, *wmhSFP4, *wmhSFP5, *wmhSFP6, *wmhSFP7, *wmhSFP8, *wmhSFP9, *wmhSFP10, *wmhSFP11, *wmhSFP12, *wmhSFP13, *wmhSFP14, *wmhSFP15, *wmhSFP16, *wmhSFP17, *wmhSFP18, *wmhSFP19;
-float       *isoSFP0, *isoSFP1, *isoSFP2, *isoSFP3, *isoSFP4, *isoSFP5, *isoSFP6, *isoSFP7, *isoSFP8, *isoSFP9, *isoSFP10, *isoSFP11, *isoSFP12, *isoSFP13, *isoSFP14, *isoSFP15, *isoSFP16, *isoSFP17, *isoSFP18, *isoSFP19;
-
-
-
-// ====================================================
-// Compute a sub-block of the A*x MAtRIX-VECTOR product
-// ====================================================
-void* COMMIT_A__block( void *ptr )
-{
-    int      id = (long)ptr;
-    int      offset;
-    double   x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, w;
-    double   *x_Ptr0, *x_Ptr1, *x_Ptr2, *x_Ptr3, *x_Ptr4, *x_Ptr5, *x_Ptr6, *x_Ptr7, *x_Ptr8, *x_Ptr9, *x_Ptr10, *x_Ptr11, *x_Ptr12, *x_Ptr13, *x_Ptr14, *x_Ptr15, *x_Ptr16, *x_Ptr17, *x_Ptr18, *x_Ptr19;
-    double   *Yptr, *YptrEnd;
-    float    *SFP0ptr, *SFP1ptr, *SFP2ptr, *SFP3ptr, *SFP4ptr, *SFP5ptr, *SFP6ptr, *SFP7ptr, *SFP8ptr, *SFP9ptr, *SFP10ptr, *SFP11ptr, *SFP12ptr, *SFP13ptr, *SFP14ptr, *SFP15ptr, *SFP16ptr, *SFP17ptr, *SFP18ptr, *SFP19ptr;
-    uint32_t *t_v, *t_vEnd, *t_f;
-    uint16_t *t_o;
-    float    *t_l;
-
-#if nIC>=1
-    // intra-cellular compartments
-    t_v    = ICv + ICthreads[id];
-    t_vEnd = ICv + ICthreads[id+1];
-    t_o    = ICo + ICthreads[id];
-    t_l    = ICl + ICthreads[id];
-    t_f    = ICf + ICthreads[id];
-
-    while( t_v != t_vEnd )
-    {
-        x_Ptr0 = x + *t_f;
-        x0 = *x_Ptr0;
-        #if nIC>=2
-        x_Ptr1 = x_Ptr0 + nF;
-        x1 = *x_Ptr1;
-        #endif
-        #if nIC>=3
-        x_Ptr2 = x_Ptr1 + nF;
-        x2 = *x_Ptr2;
-        #endif
-        #if nIC>=4
-        x_Ptr3 = x_Ptr2 + nF;
-        x3 = *x_Ptr3;
-        #endif
-        #if nIC>=5
-        x_Ptr4 = x_Ptr3 + nF;
-        x4 = *x_Ptr4;
-        #endif
-        #if nIC>=6
-        x_Ptr5 = x_Ptr4 + nF;
-        x5 = *x_Ptr5;
-        #endif
-        #if nIC>=7
-        x_Ptr6 = x_Ptr5 + nF;
-        x6 = *x_Ptr6;
-        #endif
-        #if nIC>=8
-        x_Ptr7 = x_Ptr6 + nF;
-        x7 = *x_Ptr7;
-        #endif
-        #if nIC>=9
-        x_Ptr8 = x_Ptr7 + nF;
-        x8 = *x_Ptr8;
-        #endif
-        #if nIC>=10
-        x_Ptr9 = x_Ptr8 + nF;
-        x9 = *x_Ptr9;
-        #endif
-        #if nIC>=11
-        x_Ptr10 = x_Ptr9 + nF;
-        x10 = *x_Ptr10;
-        #endif
-        #if nIC>=12
-        x_Ptr11 = x_Ptr10 + nF;
-        x11 = *x_Ptr11;
-        #endif
-        #if nIC>=13
-        x_Ptr12 = x_Ptr11 + nF;
-        x12 = *x_Ptr12;
-        #endif
-        #if nIC>=14
-        x_Ptr13 = x_Ptr12 + nF;
-        x13 = *x_Ptr13;
-        #endif
-        #if nIC>=15
-        x_Ptr14 = x_Ptr13 + nF;
-        x14 = *x_Ptr14;
-        #endif
-        #if nIC>=16
-        x_Ptr15 = x_Ptr14 + nF;
-        x15 = *x_Ptr15;
-        #endif
-        #if nIC>=17
-        x_Ptr16 = x_Ptr15 + nF;
-        x16 = *x_Ptr16;
-        #endif
-        #if nIC>=18
-        x_Ptr17 = x_Ptr16 + nF;
-        x17 = *x_Ptr17;
-        #endif
-        #if nIC>=19
-        x_Ptr18 = x_Ptr17 + nF;
-        x18 = *x_Ptr18;
-        #endif
-        #if nIC>=20
-        x_Ptr19 = x_Ptr18 + nF;
-        x19 = *x_Ptr19;
-        #endif
-
-        if ( x0 != 0
-        #if nIC>=2
-            || x1 != 0
-        #endif
-        #if nIC>=3
-            || x2 != 0
-        #endif
-        #if nIC>=4
-            || x3 != 0
-        #endif
-        #if nIC>=5
-            || x4 != 0
-        #endif
-        #if nIC>=6
-            || x5 != 0
-        #endif
-        #if nIC>=7
-            || x6 != 0
-        #endif
-        #if nIC>=8
-            || x7 != 0
-        #endif
-        #if nIC>=9
-            || x8 != 0
-        #endif
-        #if nIC>=10
-            || x9 != 0
-        #endif
-        #if nIC>=11
-            || x10 != 0
-        #endif
-        #if nIC>=12
-            || x11 != 0
-        #endif
-        #if nIC>=13
-            || x12 != 0
-        #endif
-        #if nIC>=14
-            || x13 != 0
-        #endif
-        #if nIC>=15
-            || x14 != 0
-        #endif
-        #if nIC>=16
-            || x15 != 0
-        #endif
-        #if nIC>=17
-            || x16 != 0
-        #endif
-        #if nIC>=18
-            || x17 != 0
-        #endif
-        #if nIC>=19
-            || x18 != 0
-        #endif
-        #if nIC>=20
-            || x19 != 0
-        #endif
-        )
-        {
-            Yptr    = Y    + nS * (*t_v);
-            YptrEnd = Yptr + nS;
-            w       = (double)(*t_l);
-            offset  = nS * (*t_o);
-            SFP0ptr = wmrSFP0 + offset;
-            #if nIC>=2
-            SFP1ptr = wmrSFP1 + offset;
-            #endif
-            #if nIC>=3
-            SFP2ptr = wmrSFP2 + offset;
-            #endif
-            #if nIC>=4
-            SFP3ptr = wmrSFP3 + offset;
-            #endif
-            #if nIC>=5
-            SFP4ptr = wmrSFP4 + offset;
-            #endif
-            #if nIC>=6
-            SFP5ptr = wmrSFP5 + offset;
-            #endif
-            #if nIC>=7
-            SFP6ptr = wmrSFP6 + offset;
-            #endif
-            #if nIC>=8
-            SFP7ptr = wmrSFP7 + offset;
-            #endif
-            #if nIC>=9
-            SFP8ptr = wmrSFP8 + offset;
-            #endif
-            #if nIC>=10
-            SFP9ptr = wmrSFP9 + offset;
-            #endif
-            #if nIC>=11
-            SFP10ptr = wmrSFP10 + offset;
-            #endif
-            #if nIC>=12
-            SFP11ptr = wmrSFP11 + offset;
-            #endif
-            #if nIC>=13
-            SFP12ptr = wmrSFP12 + offset;
-            #endif
-            #if nIC>=14
-            SFP13ptr = wmrSFP13 + offset;
-            #endif
-            #if nIC>=15
-            SFP14ptr = wmrSFP14 + offset;
-            #endif
-            #if nIC>=16
-            SFP15ptr = wmrSFP15 + offset;
-            #endif
-            #if nIC>=17
-            SFP16ptr = wmrSFP16 + offset;
-            #endif
-            #if nIC>=18
-            SFP17ptr = wmrSFP17 + offset;
-            #endif
-            #if nIC>=19
-            SFP18ptr = wmrSFP18 + offset;
-            #endif
-            #if nIC>=20
-            SFP19ptr = wmrSFP19 + offset;
-            #endif
-
-            while( Yptr != YptrEnd )
-                (*Yptr++) += w * (
-                          x0 * (*SFP0ptr++)
-                        #if nIC>=2
-                        + x1 * (*SFP1ptr++)
-                        #endif
-                        #if nIC>=3
-                        + x2 * (*SFP2ptr++)
-                        #endif
-                        #if nIC>=4
-                        + x3 * (*SFP3ptr++)
-                        #endif
-                        #if nIC>=5
-                        + x4 * (*SFP4ptr++)
-                        #endif
-                        #if nIC>=6
-                        + x5 * (*SFP5ptr++)
-                        #endif
-                        #if nIC>=7
-                        + x6 * (*SFP6ptr++)
-                        #endif
-                        #if nIC>=8
-                        + x7 * (*SFP7ptr++)
-                        #endif
-                        #if nIC>=9
-                        + x8 * (*SFP8ptr++)
-                        #endif
-                        #if nIC>=10
-                        + x9 * (*SFP9ptr++)
-                        #endif
-                        #if nIC>=11
-                        + x10 * (*SFP10ptr++)
-                        #endif
-                        #if nIC>=12
-                        + x11 * (*SFP11ptr++)
-                        #endif
-                        #if nIC>=13
-                        + x12 * (*SFP12ptr++)
-                        #endif
-                        #if nIC>=14
-                        + x13 * (*SFP13ptr++)
-                        #endif
-                        #if nIC>=15
-                        + x14 * (*SFP14ptr++)
-                        #endif
-                        #if nIC>=16
-                        + x15 * (*SFP15ptr++)
-                        #endif
-                        #if nIC>=17
-                        + x16 * (*SFP16ptr++)
-                        #endif
-                        #if nIC>=18
-                        + x17 * (*SFP17ptr++)
-                        #endif
-                        #if nIC>=19
-                        + x18 * (*SFP18ptr++)
-                        #endif
-                        #if nIC>=20
-                        + x19 * (*SFP19ptr++)
-                        #endif
-                );
-        }
-
-        t_f++;
-        t_v++;
-        t_o++;
-        t_l++;
-    }
-#endif
-
-#if nEC>=1
-    // extra-cellular compartments
-    t_v    = ECv + ECthreads[id];
-    t_vEnd = ECv + ECthreads[id+1];
-    t_o    = ECo + ECthreads[id];
-
-    x_Ptr0 = x + nIC*nF + ECthreads[id];
-    #if nEC>=2
-    x_Ptr1 = x_Ptr0 + nE;
-    #endif
-    #if nEC>=3
-    x_Ptr2 = x_Ptr1 + nE;
-    #endif
-    #if nEC>=4
-    x_Ptr3 = x_Ptr2 + nE;
-    #endif
-    #if nEC>=5
-    x_Ptr4 = x_Ptr3 + nE;
-    #endif
-    #if nEC>=6
-    x_Ptr5 = x_Ptr4 + nE;
-    #endif
-    #if nEC>=7
-    x_Ptr6 = x_Ptr5 + nE;
-    #endif
-    #if nEC>=8
-    x_Ptr7 = x_Ptr6 + nE;
-    #endif
-    #if nEC>=9
-    x_Ptr8 = x_Ptr7 + nE;
-    #endif
-    #if nEC>=10
-    x_Ptr9 = x_Ptr8 + nE;
-    #endif
-    #if nEC>=11
-    x_Ptr10 = x_Ptr9 + nE;
-    #endif
-    #if nEC>=12
-    x_Ptr11 = x_Ptr10 + nE;
-    #endif
-    #if nEC>=13
-    x_Ptr12 = x_Ptr11 + nE;
-    #endif
-    #if nEC>=14
-    x_Ptr13 = x_Ptr12 + nE;
-    #endif
-    #if nEC>=15
-    x_Ptr14 = x_Ptr13 + nE;
-    #endif
-    #if nEC>=16
-    x_Ptr15 = x_Ptr14 + nE;
-    #endif
-    #if nEC>=17
-    x_Ptr16 = x_Ptr15 + nE;
-    #endif
-    #if nEC>=18
-    x_Ptr17 = x_Ptr16 + nE;
-    #endif
-    #if nEC>=19
-    x_Ptr18 = x_Ptr17 + nE;
-    #endif
-    #if nEC>=20
-    x_Ptr19 = x_Ptr18 + nE;
-    #endif
-
-    while( t_v != t_vEnd )
-    {
-        x0 = *x_Ptr0++;
-        #if nEC>=2
-        x1 = *x_Ptr1++;
-        #endif
-        #if nEC>=3
-        x2 = *x_Ptr2++;
-        #endif
-        #if nEC>=4
-        x3 = *x_Ptr3++;
-        #endif
-        #if nEC>=5
-        x4 = *x_Ptr4++;
-        #endif
-        #if nEC>=6
-        x5 = *x_Ptr5++;
-        #endif
-        #if nEC>=7
-        x6 = *x_Ptr6++;
-        #endif
-        #if nEC>=8
-        x7 = *x_Ptr7++;
-        #endif
-        #if nEC>=9
-        x8 = *x_Ptr8++;
-        #endif
-        #if nEC>=10
-        x9 = *x_Ptr9++;
-        #endif
-        #if nEC>=11
-        x10 = *x_Ptr10++;
-        #endif
-        #if nEC>=12
-        x11 = *x_Ptr11++;
-        #endif
-        #if nEC>=13
-        x12 = *x_Ptr12++;
-        #endif
-        #if nEC>=14
-        x13 = *x_Ptr13++;
-        #endif
-        #if nEC>=15
-        x14 = *x_Ptr14++;
-        #endif
-        #if nEC>=16
-        x15 = *x_Ptr15++;
-        #endif
-        #if nEC>=17
-        x16 = *x_Ptr16++;
-        #endif
-        #if nEC>=18
-        x17 = *x_Ptr17++;
-        #endif
-        #if nEC>=19
-        x18 = *x_Ptr18++;
-        #endif
-        #if nEC>=20
-        x19 = *x_Ptr19++;
-        #endif
-        if (
-               x0 != 0
-            #if nEC>=2
-            || x1 != 0
-            #endif
-            #if nEC>=3
-            || x2 != 0
-            #endif
-            #if nEC>=4
-            || x3 != 0
-            #endif
-            #if nEC>=5
-            || x4 != 0
-            #endif
-            #if nEC>=6
-            || x5 != 0
-            #endif
-            #if nEC>=7
-            || x6 != 0
-            #endif
-            #if nEC>=8
-            || x7 != 0
-            #endif
-            #if nEC>=9
-            || x8 != 0
-            #endif
-            #if nEC>=10
-            || x9 != 0
-            #endif
-            #if nEC>=11
-            || x10 != 0
-            #endif
-            #if nEC>=12
-            || x11 != 0
-            #endif
-            #if nEC>=13
-            || x12 != 0
-            #endif
-            #if nEC>=14
-            || x13 != 0
-            #endif
-            #if nEC>=15
-            || x14 != 0
-            #endif
-            #if nEC>=16
-            || x15 != 0
-            #endif
-            #if nEC>=17
-            || x16 != 0
-            #endif
-            #if nEC>=18
-            || x17 != 0
-            #endif
-            #if nEC>=19
-            || x18 != 0
-            #endif
-            #if nEC>=20
-            || x19 != 0
-            #endif
-          )
-        {
-            Yptr    = Y    + nS * (*t_v);
-            YptrEnd = Yptr + nS;
-            offset  = nS * (*t_o);
-            SFP0ptr = wmhSFP0 + offset;
-            #if nEC>=2
-            SFP1ptr = wmhSFP1 + offset;
-            #endif
-            #if nEC>=3
-            SFP2ptr = wmhSFP2 + offset;
-            #endif
-            #if nEC>=4
-            SFP3ptr = wmhSFP3 + offset;
-            #endif
-            #if nEC>=5
-            SFP4ptr = wmhSFP4 + offset;
-            #endif
-            #if nEC>=6
-            SFP5ptr = wmhSFP5 + offset;
-            #endif
-            #if nEC>=7
-            SFP6ptr = wmhSFP6 + offset;
-            #endif
-            #if nEC>=8
-            SFP7ptr = wmhSFP7 + offset;
-            #endif
-            #if nEC>=9
-            SFP8ptr = wmhSFP8 + offset;
-            #endif
-            #if nEC>=10
-            SFP9ptr = wmhSFP9 + offset;
-            #endif
-            #if nEC>=11
-            SFP10ptr = wmhSFP10 + offset;
-            #endif
-            #if nEC>=12
-            SFP11ptr = wmhSFP11 + offset;
-            #endif
-            #if nEC>=13
-            SFP12ptr = wmhSFP12 + offset;
-            #endif
-            #if nEC>=14
-            SFP13ptr = wmhSFP13 + offset;
-            #endif
-            #if nEC>=15
-            SFP14ptr = wmhSFP14 + offset;
-            #endif
-            #if nEC>=16
-            SFP15ptr = wmhSFP15 + offset;
-            #endif
-            #if nEC>=17
-            SFP16ptr = wmhSFP16 + offset;
-            #endif
-            #if nEC>=18
-            SFP17ptr = wmhSFP17 + offset;
-            #endif
-            #if nEC>=19
-            SFP18ptr = wmhSFP18 + offset;
-            #endif
-            #if nEC>=20
-            SFP19ptr = wmhSFP19 + offset;
-            #endif
-
-            while( Yptr != YptrEnd )
-                (*Yptr++) += (
-                      x0 * (*SFP0ptr++)
-                    #if nEC>=2
-                    + x1 * (*SFP1ptr++)
-                    #endif
-                    #if nEC>=3
-                    + x2 * (*SFP2ptr++)
-                    #endif
-                    #if nEC>=4
-                    + x3 * (*SFP3ptr++)
-                    #endif
-                    #if nEC>=5
-                    + x4 * (*SFP4ptr++)
-                    #endif
-                    #if nEC>=6
-                    + x5 * (*SFP5ptr++)
-                    #endif
-                    #if nEC>=7
-                    + x6 * (*SFP6ptr++)
-                    #endif
-                    #if nEC>=8
-                    + x7 * (*SFP7ptr++)
-                    #endif
-                    #if nEC>=9
-                    + x8 * (*SFP8ptr++)
-                    #endif
-                    #if nEC>=10
-                    + x9 * (*SFP9ptr++)
-                    #endif
-                    #if nEC>=11
-                    + x10 * (*SFP10ptr++)
-                    #endif
-                    #if nEC>=12
-                    + x11 * (*SFP11ptr++)
-                    #endif
-                    #if nEC>=13
-                    + x12 * (*SFP12ptr++)
-                    #endif
-                    #if nEC>=14
-                    + x13 * (*SFP13ptr++)
-                    #endif
-                    #if nEC>=15
-                    + x14 * (*SFP14ptr++)
-                    #endif
-                    #if nEC>=16
-                    + x15 * (*SFP15ptr++)
-                    #endif
-                    #if nEC>=17
-                    + x16 * (*SFP16ptr++)
-                    #endif
-                    #if nEC>=18
-                    + x17 * (*SFP17ptr++)
-                    #endif
-                    #if nEC>=19
-                    + x18 * (*SFP18ptr++)
-                    #endif
-                    #if nEC>=20
-                    + x19 * (*SFP19ptr++)
-                    #endif
-
-                );
-        }
-        t_v++;
-        t_o++;
-    }
-#endif
-
-#if nISO>=1
-    // isotropic compartments
-    t_v    = ISOv + ISOthreads[id];
-    t_vEnd = ISOv + ISOthreads[id+1];
-
-    x_Ptr0 = x + nIC*nF + nEC*nE + ISOthreads[id];
-    #if nISO>=2
-    x_Ptr1 = x_Ptr0 + nV;
-    #endif
-    #if nISO>=3
-    x_Ptr2 = x_Ptr1 + nV;
-    #endif
-    #if nISO>=4
-    x_Ptr3 = x_Ptr2 + nV;
-    #endif
-    #if nISO>=5
-    x_Ptr4 = x_Ptr3 + nV;
-    #endif
-    #if nISO>=6
-    x_Ptr5 = x_Ptr4 + nV;
-    #endif
-    #if nISO>=7
-    x_Ptr6 = x_Ptr5 + nV;
-    #endif
-    #if nISO>=8
-    x_Ptr7 = x_Ptr6 + nV;
-    #endif
-    #if nISO>=9
-    x_Ptr8 = x_Ptr7 + nV;
-    #endif
-    #if nISO>=10
-    x_Ptr9 = x_Ptr8 + nV;
-    #endif
-    #if nISO>=11
-    x_Ptr10 = x_Ptr9 + nV;
-    #endif
-    #if nISO>=12
-    x_Ptr11 = x_Ptr10 + nV;
-    #endif
-    #if nISO>=13
-    x_Ptr12 = x_Ptr11 + nV;
-    #endif
-    #if nISO>=14
-    x_Ptr13 = x_Ptr12 + nV;
-    #endif
-    #if nISO>=15
-    x_Ptr14 = x_Ptr13 + nV;
-    #endif
-    #if nISO>=16
-    x_Ptr15 = x_Ptr14 + nV;
-    #endif
-    #if nISO>=17
-    x_Ptr16 = x_Ptr15 + nV;
-    #endif
-    #if nISO>=18
-    x_Ptr17 = x_Ptr16 + nV;
-    #endif
-    #if nISO>=19
-    x_Ptr18 = x_Ptr17 + nV;
-    #endif
-    #if nISO>=20
-    x_Ptr19 = x_Ptr18 + nV;
-    #endif
-
-    while( t_v != t_vEnd )
-    {
-        x0 = *x_Ptr0++;
-        #if nISO>=2
-        x1 = *x_Ptr1++;
-        #endif
-        #if nISO>=3
-        x2 = *x_Ptr2++;
-        #endif
-        #if nISO>=4
-        x3 = *x_Ptr3++;
-        #endif
-        #if nISO>=5
-        x4 = *x_Ptr4++;
-        #endif
-        #if nISO>=6
-        x5 = *x_Ptr5++;
-        #endif
-        #if nISO>=7
-        x6 = *x_Ptr6++;
-        #endif
-        #if nISO>=8
-        x7 = *x_Ptr7++;
-        #endif
-        #if nISO>=9
-        x8 = *x_Ptr8++;
-        #endif
-        #if nISO>=10
-        x9 = *x_Ptr9++;
-        #endif
-        #if nISO>=11
-        x10 = *x_Ptr10++;
-        #endif
-        #if nISO>=12
-        x11 = *x_Ptr11++;
-        #endif
-        #if nISO>=13
-        x12 = *x_Ptr12++;
-        #endif
-        #if nISO>=14
-        x13 = *x_Ptr13++;
-        #endif
-        #if nISO>=15
-        x14 = *x_Ptr14++;
-        #endif
-        #if nISO>=16
-        x15 = *x_Ptr15++;
-        #endif
-        #if nISO>=17
-        x16 = *x_Ptr16++;
-        #endif
-        #if nISO>=18
-        x17 = *x_Ptr17++;
-        #endif
-        #if nISO>=19
-        x18 = *x_Ptr18++;
-        #endif
-        #if nISO>=20
-        x19 = *x_Ptr19++;
-        #endif
-
-        if (
-               x0 != 0
-            #if nISO>=2
-            || x1 != 0
-            #endif
-            #if nISO>=3
-            || x2 != 0
-            #endif
-            #if nISO>=4
-            || x3 != 0
-            #endif
-            #if nISO>=5
-            || x4 != 0
-            #endif
-            #if nISO>=6
-            || x5 != 0
-            #endif
-            #if nISO>=7
-            || x6 != 0
-            #endif
-            #if nISO>=8
-            || x7 != 0
-            #endif
-            #if nISO>=9
-            || x8 != 0
-            #endif
-            #if nISO>=10
-            || x9 != 0
-            #endif
-            #if nISO>=11
-            || x10 != 0
-            #endif
-            #if nISO>=12
-            || x11 != 0
-            #endif
-            #if nISO>=13
-            || x12 != 0
-            #endif
-            #if nISO>=14
-            || x13 != 0
-            #endif
-            #if nISO>=15
-            || x14 != 0
-            #endif
-            #if nISO>=16
-            || x15 != 0
-            #endif
-            #if nISO>=17
-            || x16 != 0
-            #endif
-            #if nISO>=18
-            || x17 != 0
-            #endif
-            #if nISO>=19
-            || x18 != 0
-            #endif
-            #if nISO>=20
-            || x19 != 0
-            #endif
-          )
-        {
-            Yptr    = Y    + nS * (*t_v);
-            YptrEnd = Yptr + nS;
-            SFP0ptr = isoSFP0;
-            #if nISO>=2
-            SFP1ptr = isoSFP1;
-            #endif
-            #if nISO>=3
-            SFP2ptr = isoSFP2;
-            #endif
-            #if nISO>=4
-            SFP3ptr = isoSFP3;
-            #endif
-            #if nISO>=5
-            SFP4ptr = isoSFP4;
-            #endif
-            #if nISO>=6
-            SFP5ptr = isoSFP5;
-            #endif
-            #if nISO>=7
-            SFP6ptr = isoSFP6;
-            #endif
-            #if nISO>=8
-            SFP7ptr = isoSFP7;
-            #endif
-            #if nISO>=9
-            SFP8ptr = isoSFP8;
-            #endif
-            #if nISO>=10
-            SFP9ptr = isoSFP9;
-            #endif
-            #if nISO>=11
-            SFP10ptr = isoSFP10;
-            #endif
-            #if nISO>=12
-            SFP11ptr = isoSFP11;
-            #endif
-            #if nISO>=13
-            SFP12ptr = isoSFP12;
-            #endif
-            #if nISO>=14
-            SFP13ptr = isoSFP13;
-            #endif
-            #if nISO>=15
-            SFP14ptr = isoSFP14;
-            #endif
-            #if nISO>=16
-            SFP15ptr = isoSFP15;
-            #endif
-            #if nISO>=17
-            SFP16ptr = isoSFP16;
-            #endif
-            #if nISO>=18
-            SFP17ptr = isoSFP17;
-            #endif
-            #if nISO>=19
-            SFP18ptr = isoSFP18;
-            #endif
-            #if nISO>=20
-            SFP19ptr = isoSFP19;
-            #endif
-
-            while( Yptr != YptrEnd )
-                (*Yptr++) += (
-                      x0 * (*SFP0ptr++)
-                    #if nISO>=2
-                    + x1 * (*SFP1ptr++)
-                    #endif
-                    #if nISO>=3
-                    + x2 * (*SFP2ptr++)
-                    #endif
-                    #if nISO>=4
-                    + x3 * (*SFP3ptr++)
-                    #endif
-                    #if nISO>=5
-                    + x4 * (*SFP4ptr++)
-                    #endif
-                    #if nISO>=6
-                    + x5 * (*SFP5ptr++)
-                    #endif
-                    #if nISO>=7
-                    + x6 * (*SFP6ptr++)
-                    #endif
-                    #if nISO>=8
-                    + x7 * (*SFP7ptr++)
-                    #endif
-                    #if nISO>=9
-                    + x8 * (*SFP8ptr++)
-                    #endif
-                    #if nISO>=10
-                    + x9 * (*SFP9ptr++)
-                    #endif
-                    #if nISO>=11
-                    + x10 * (*SFP10ptr++)
-                    #endif
-                    #if nISO>=12
-                    + x11 * (*SFP11ptr++)
-                    #endif
-                    #if nISO>=13
-                    + x12 * (*SFP12ptr++)
-                    #endif
-                    #if nISO>=14
-                    + x13 * (*SFP13ptr++)
-                    #endif
-                    #if nISO>=15
-                    + x14 * (*SFP14ptr++)
-                    #endif
-                    #if nISO>=16
-                    + x15 * (*SFP15ptr++)
-                    #endif
-                    #if nISO>=17
-                    + x16 * (*SFP16ptr++)
-                    #endif
-                    #if nISO>=18
-                    + x17 * (*SFP17ptr++)
-                    #endif
-                    #if nISO>=19
-                    + x18 * (*SFP18ptr++)
-                    #endif
-                    #if nISO>=20
-                    + x19 * (*SFP19ptr++)
-                    #endif
-                );
-        }
-        t_v++;
-    }
-#endif
-
-    pthread_exit( 0 );
-}
-
-
-// =========================
-// Function called by CYTHON
-// =========================
-void COMMIT_A(
-    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
-    double *_vIN, double *_vOUT,
-    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
-    uint32_t *_ECv, uint16_t *_ECo,
-    uint32_t *_ISOv,
-    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
-    uint32_t* _ICthreads, uint32_t* _ECthreads, uint32_t* _ISOthreads
-)
-{
-    nF = _nF;
-    n  = _n;
-    nE = _nE;
-    nV = _nV;
-    nS = _nS;
-    ndirs = _ndirs;
-
-    x = _vIN;
-    Y = _vOUT;
-
-    ICf  = _ICf;
-    ICv  = _ICv;
-    ICo  = _ICo;
-    ICl  = _ICl;
-    ECv  = _ECv;
-    ECo  = _ECo;
-    ISOv = _ISOv;
-
-    #if nIC>=1
-    wmrSFP0 = _wmrSFP;
-    #if nIC>=2
-    wmrSFP1 = wmrSFP0 + _ndirs*_nS;
-    #if nIC>=3
-    wmrSFP2 = wmrSFP1 + _ndirs*_nS;
-    #if nIC>=4
-    wmrSFP3 = wmrSFP2 + _ndirs*_nS;
-    #if nIC>=5
-    wmrSFP4 = wmrSFP3 + _ndirs*_nS;
-    #if nIC>=6
-    wmrSFP5 = wmrSFP4 + _ndirs*_nS;
-    #if nIC>=7
-    wmrSFP6 = wmrSFP5 + _ndirs*_nS;
-    #if nIC>=8
-    wmrSFP7 = wmrSFP6 + _ndirs*_nS;
-    #if nIC>=9
-    wmrSFP8 = wmrSFP7 + _ndirs*_nS;
-    #if nIC>=10
-    wmrSFP9 = wmrSFP8 + _ndirs*_nS;
-    #if nIC>=11
-    wmrSFP10 = wmrSFP9 + _ndirs*_nS;
-    #if nIC>=12
-    wmrSFP11 = wmrSFP10 + _ndirs*_nS;
-    #if nIC>=13
-    wmrSFP12 = wmrSFP11 + _ndirs*_nS;
-    #if nIC>=14
-    wmrSFP13 = wmrSFP12 + _ndirs*_nS;
-    #if nIC>=15
-    wmrSFP14 = wmrSFP13 + _ndirs*_nS;
-    #if nIC>=16
-    wmrSFP15 = wmrSFP14 + _ndirs*_nS;
-    #if nIC>=17
-    wmrSFP16 = wmrSFP15 + _ndirs*_nS;
-    #if nIC>=18
-    wmrSFP17 = wmrSFP16 + _ndirs*_nS;
-    #if nIC>=19
-    wmrSFP18 = wmrSFP17 + _ndirs*_nS;
-    #if nIC>=20
-    wmrSFP19 = wmrSFP18 + _ndirs*_nS;
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #if nEC>=1
-    wmhSFP0 = _wmhSFP;
-    #if nEC>=2
-    wmhSFP1 = wmhSFP0 + _ndirs*_nS;
-    #if nEC>=3
-    wmhSFP2 = wmhSFP1 + _ndirs*_nS;
-    #if nEC>=4
-    wmhSFP3 = wmhSFP2 + _ndirs*_nS;
-    #if nEC>=5
-    wmhSFP4 = wmhSFP3 + _ndirs*_nS;
-    #if nEC>=6
-    wmhSFP5 = wmhSFP4 + _ndirs*_nS;
-    #if nEC>=7
-    wmhSFP6 = wmhSFP5 + _ndirs*_nS;
-    #if nEC>=8
-    wmhSFP7 = wmhSFP6 + _ndirs*_nS;
-    #if nEC>=9
-    wmhSFP8 = wmhSFP7 + _ndirs*_nS;
-    #if nEC>=10
-    wmhSFP9 = wmhSFP8 + _ndirs*_nS;
-    #if nEC>=11
-    wmhSFP10 = wmhSFP9 + _ndirs*_nS;
-    #if nEC>=12
-    wmhSFP11 = wmhSFP10 + _ndirs*_nS;
-    #if nEC>=13
-    wmhSFP12 = wmhSFP11 + _ndirs*_nS;
-    #if nEC>=14
-    wmhSFP13 = wmhSFP12 + _ndirs*_nS;
-    #if nEC>=15
-    wmhSFP14 = wmhSFP13 + _ndirs*_nS;
-    #if nEC>=16
-    wmhSFP15 = wmhSFP14 + _ndirs*_nS;
-    #if nEC>=17
-    wmhSFP16 = wmhSFP15 + _ndirs*_nS;
-    #if nEC>=18
-    wmhSFP17 = wmhSFP16 + _ndirs*_nS;
-    #if nEC>=19
-    wmhSFP18 = wmhSFP17 + _ndirs*_nS;
-    #if nEC>=20
-    wmhSFP19 = wmhSFP18 + _ndirs*_nS;
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #if nISO>=1
-    isoSFP0 = _isoSFP;
-    #if nISO>=2
-    isoSFP1 = isoSFP0 + _nS;
-    #if nISO>=3
-    isoSFP2 = isoSFP1 + _nS;
-    #if nISO>=4
-    isoSFP3 = isoSFP2 + _nS;
-    #if nISO>=5
-    isoSFP4 = isoSFP3 + _nS;
-    #if nISO>=6
-    isoSFP5 = isoSFP4 + _nS;
-    #if nISO>=7
-    isoSFP6 = isoSFP5 + _nS;
-    #if nISO>=8
-    isoSFP7 = isoSFP6 + _nS;
-    #if nISO>=9
-    isoSFP8 = isoSFP7 + _nS;
-    #if nISO>=10
-    isoSFP9 = isoSFP8 + _nS;
-    #if nISO>=11
-    isoSFP10 = isoSFP9 + _nS;
-    #if nISO>=12
-    isoSFP11 = isoSFP10 + _nS;
-    #if nISO>=13
-    isoSFP12 = isoSFP11 + _nS;
-    #if nISO>=14
-    isoSFP13 = isoSFP12 + _nS;
-    #if nISO>=15
-    isoSFP14 = isoSFP13 + _nS;
-    #if nISO>=16
-    isoSFP15 = isoSFP14 + _nS;
-    #if nISO>=17
-    isoSFP16 = isoSFP15 + _nS;
-    #if nISO>=18
-    isoSFP17 = isoSFP16 + _nS;
-    #if nISO>=19
-    isoSFP18 = isoSFP17 + _nS;
-    #if nISO>=20
-    isoSFP19 = isoSFP18 + _nS;
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-
-    ICthreads  = _ICthreads;
-    ECthreads  = _ECthreads;
-    ISOthreads = _ISOthreads;
-
-    // Run SEPARATE THREADS to perform the multiplication
-    pthread_t threads[nTHREADS];
-    int t;
-    for(t=0; t<nTHREADS ; t++)
-        pthread_create( &threads[t], NULL, COMMIT_A__block, (void *) (long int)t );
-    for(t=0; t<nTHREADS ; t++)
-        pthread_join( threads[t], NULL );
-    return;
-}
-
-
-
-/* ===================================================== */
-/* Compute a sub-block of the A'*y MAtRIX-VECTOR product */
-/* ===================================================== */
-void* COMMIT_At__block( void *ptr )
-{
-    int      id = (long)ptr;
-    int      offset;
-    double   x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, w, Y_tmp;
-    double   *x_Ptr0, *x_Ptr1, *x_Ptr2, *x_Ptr3, *x_Ptr4, *x_Ptr5, *x_Ptr6, *x_Ptr7, *x_Ptr8, *x_Ptr9, *x_Ptr10, *x_Ptr11, *x_Ptr12, *x_Ptr13, *x_Ptr14, *x_Ptr15, *x_Ptr16, *x_Ptr17, *x_Ptr18, *x_Ptr19;
-    double   *Yptr, *YptrEnd;
-    float    *SFP0ptr, *SFP1ptr, *SFP2ptr, *SFP3ptr, *SFP4ptr, *SFP5ptr, *SFP6ptr, *SFP7ptr, *SFP8ptr, *SFP9ptr, *SFP10ptr, *SFP11ptr, *SFP12ptr, *SFP13ptr, *SFP14ptr, *SFP15ptr, *SFP16ptr, *SFP17ptr, *SFP18ptr, *SFP19ptr;
-    uint32_t *t_v, *t_vEnd, *t_f;
-    uint16_t *t_o;
-    float    *t_l;
-    uint8_t  *t_t;
-
-#if nIC>=1
-    // intra-cellular compartments
-    t_v    = ICv;
-    t_vEnd = ICv + n;
-    t_o    = ICo;
-    t_l    = ICl;
-    t_f    = ICf;
-    t_t    = ICthreadsT;
-
-    while( t_v != t_vEnd )
-    {
-        // in this case, I need to walk throug because the segments are ordered in "voxel order"
-        if ( *t_t == id )
-        {
-            Yptr    = Y    + nS * (*t_v);
-            YptrEnd = Yptr + nS;
-            offset  = nS * (*t_o);
-
-            Y_tmp = *Yptr;
-            SFP0ptr   = wmrSFP0 + offset;
-            x0 = (*SFP0ptr++) * Y_tmp;
-            #if nIC>=2
-            SFP1ptr   = wmrSFP1 + offset;
-            x1 = (*SFP1ptr++) * Y_tmp;
-            #endif
-            #if nIC>=3
-            SFP2ptr   = wmrSFP2 + offset;
-            x2 = (*SFP2ptr++) * Y_tmp;
-            #endif
-            #if nIC>=4
-            SFP3ptr   = wmrSFP3 + offset;
-            x3 = (*SFP3ptr++) * Y_tmp;
-            #endif
-            #if nIC>=5
-            SFP4ptr   = wmrSFP4 + offset;
-            x4 = (*SFP4ptr++) * Y_tmp;
-            #endif
-            #if nIC>=6
-            SFP5ptr   = wmrSFP5 + offset;
-            x5 = (*SFP5ptr++) * Y_tmp;
-            #endif
-            #if nIC>=7
-            SFP6ptr   = wmrSFP6 + offset;
-            x6 = (*SFP6ptr++) * Y_tmp;
-            #endif
-            #if nIC>=8
-            SFP7ptr   = wmrSFP7 + offset;
-            x7 = (*SFP7ptr++) * Y_tmp;
-            #endif
-            #if nIC>=9
-            SFP8ptr   = wmrSFP8 + offset;
-            x8 = (*SFP8ptr++) * Y_tmp;
-            #endif
-            #if nIC>=10
-            SFP9ptr   = wmrSFP9 + offset;
-            x9 = (*SFP9ptr++) * Y_tmp;
-            #endif
-            #if nIC>=11
-            SFP10ptr   = wmrSFP10 + offset;
-            x10 = (*SFP10ptr++) * Y_tmp;
-            #endif
-            #if nIC>=12
-            SFP11ptr   = wmrSFP11 + offset;
-            x11 = (*SFP11ptr++) * Y_tmp;
-            #endif
-            #if nIC>=13
-            SFP12ptr   = wmrSFP12 + offset;
-            x12 = (*SFP12ptr++) * Y_tmp;
-            #endif
-            #if nIC>=14
-            SFP13ptr   = wmrSFP13 + offset;
-            x13 = (*SFP13ptr++) * Y_tmp;
-            #endif
-            #if nIC>=15
-            SFP14ptr   = wmrSFP14 + offset;
-            x14 = (*SFP14ptr++) * Y_tmp;
-            #endif
-            #if nIC>=16
-            SFP15ptr   = wmrSFP15 + offset;
-            x15 = (*SFP15ptr++) * Y_tmp;
-            #endif
-            #if nIC>=17
-            SFP16ptr   = wmrSFP16 + offset;
-            x16 = (*SFP16ptr++) * Y_tmp;
-            #endif
-            #if nIC>=18
-            SFP17ptr   = wmrSFP17 + offset;
-            x17 = (*SFP17ptr++) * Y_tmp;
-            #endif
-            #if nIC>=19
-            SFP18ptr   = wmrSFP18 + offset;
-            x18 = (*SFP18ptr++) * Y_tmp;
-            #endif
-            #if nIC>=20
-            SFP19ptr   = wmrSFP19 + offset;
-            x19 = (*SFP19ptr++) * Y_tmp;
-            #endif
-
-            while( ++Yptr != YptrEnd )
-            {
-                Y_tmp = *Yptr;
-                x0 += (*SFP0ptr++) * Y_tmp;
-                #if nIC>=2
-                x1 += (*SFP1ptr++) * Y_tmp;
-                #endif
-                #if nIC>=3
-                x2 += (*SFP2ptr++) * Y_tmp;
-                #endif
-                #if nIC>=4
-                x3 += (*SFP3ptr++) * Y_tmp;
-                #endif
-                #if nIC>=5
-                x4 += (*SFP4ptr++) * Y_tmp;
-                #endif
-                #if nIC>=6
-                x5 += (*SFP5ptr++) * Y_tmp;
-                #endif
-                #if nIC>=7
-                x6 += (*SFP6ptr++) * Y_tmp;
-                #endif
-                #if nIC>=8
-                x7 += (*SFP7ptr++) * Y_tmp;
-                #endif
-                #if nIC>=9
-                x8 += (*SFP8ptr++) * Y_tmp;
-                #endif
-                #if nIC>=10
-                x9 += (*SFP9ptr++) * Y_tmp;
-                #endif
-                #if nIC>=11
-                x10 += (*SFP10ptr++) * Y_tmp;
-                #endif
-                #if nIC>=12
-                x11 += (*SFP11ptr++) * Y_tmp;
-                #endif
-                #if nIC>=13
-                x12 += (*SFP12ptr++) * Y_tmp;
-                #endif
-                #if nIC>=14
-                x13 += (*SFP13ptr++) * Y_tmp;
-                #endif
-                #if nIC>=15
-                x14 += (*SFP14ptr++) * Y_tmp;
-                #endif
-                #if nIC>=16
-                x15 += (*SFP15ptr++) * Y_tmp;
-                #endif
-                #if nIC>=17
-                x16 += (*SFP16ptr++) * Y_tmp;
-                #endif
-                #if nIC>=18
-                x17 += (*SFP17ptr++) * Y_tmp;
-                #endif
-                #if nIC>=19
-                x18 += (*SFP18ptr++) * Y_tmp;
-                #endif
-                #if nIC>=20
-                x19 += (*SFP19ptr++) * Y_tmp;
-                #endif
-            }
-
-            w = (double)(*t_l);
-            x[*t_f]      += w * x0;
-            #if nIC>=2
-            x[*t_f+nF]   += w * x1;
-            #endif
-            #if nIC>=3
-            x[*t_f+2*nF] += w * x2;
-            #endif
-            #if nIC>=4
-            x[*t_f+3*nF] += w * x3;
-            #endif
-            #if nIC>=5
-            x[*t_f+4*nF] += w * x4;
-            #endif
-            #if nIC>=6
-            x[*t_f+5*nF] += w * x5;
-            #endif
-            #if nIC>=7
-            x[*t_f+6*nF] += w * x6;
-            #endif
-            #if nIC>=8
-            x[*t_f+7*nF] += w * x7;
-            #endif
-            #if nIC>=9
-            x[*t_f+8*nF] += w * x8;
-            #endif
-            #if nIC>=10
-            x[*t_f+9*nF] += w * x9;
-            #endif
-            #if nIC>=11
-            x[*t_f+10*nF] += w * x10;
-            #endif
-            #if nIC>=12
-            x[*t_f+11*nF] += w * x11;
-            #endif
-            #if nIC>=13
-            x[*t_f+12*nF] += w * x12;
-            #endif
-            #if nIC>=14
-            x[*t_f+13*nF] += w * x13;
-            #endif
-            #if nIC>=15
-            x[*t_f+14*nF] += w * x14;
-            #endif
-            #if nIC>=16
-            x[*t_f+15*nF] += w * x15;
-            #endif
-            #if nIC>=17
-            x[*t_f+16*nF] += w * x16;
-            #endif
-            #if nIC>=18
-            x[*t_f+17*nF] += w * x17;
-            #endif
-            #if nIC>=19
-            x[*t_f+18*nF] += w * x18;
-            #endif
-            #if nIC>=20
-            x[*t_f+19*nF] += w * x19;
-            #endif
-        }
-
-        t_f++;
-        t_v++;
-        t_o++;
-        t_l++;
-        t_t++;
-    }
-#endif
-
-#if nEC>=1
-    // extra-cellular compartments
-    t_v    = ECv + ECthreadsT[id];
-    t_vEnd = ECv + ECthreadsT[id+1];
-    t_o    = ECo + ECthreadsT[id];
-
-    x_Ptr0 = x + nIC*nF + ECthreadsT[id];
-    #if nEC>=2
-    x_Ptr1 = x_Ptr0 + nE;
-    #endif
-    #if nEC>=3
-    x_Ptr2 = x_Ptr1 + nE;
-    #endif
-    #if nEC>=4
-    x_Ptr3 = x_Ptr2 + nE;
-    #endif
-    #if nEC>=5
-    x_Ptr4 = x_Ptr3 + nE;
-    #endif
-    #if nEC>=6
-    x_Ptr5 = x_Ptr4 + nE;
-    #endif
-    #if nEC>=7
-    x_Ptr6 = x_Ptr5 + nE;
-    #endif
-    #if nEC>=8
-    x_Ptr7 = x_Ptr6 + nE;
-    #endif
-    #if nEC>=9
-    x_Ptr8 = x_Ptr7 + nE;
-    #endif
-    #if nEC>=10
-    x_Ptr9 = x_Ptr8 + nE;
-    #endif
-    #if nEC>=11
-    x_Ptr10 = x_Ptr9 + nE;
-    #endif
-    #if nEC>=12
-    x_Ptr11 = x_Ptr10 + nE;
-    #endif
-    #if nEC>=13
-    x_Ptr12 = x_Ptr11 + nE;
-    #endif
-    #if nEC>=14
-    x_Ptr13 = x_Ptr12 + nE;
-    #endif
-    #if nEC>=15
-    x_Ptr14 = x_Ptr13 + nE;
-    #endif
-    #if nEC>=16
-    x_Ptr15 = x_Ptr14 + nE;
-    #endif
-    #if nEC>=17
-    x_Ptr16 = x_Ptr15 + nE;
-    #endif
-    #if nEC>=18
-    x_Ptr17 = x_Ptr16 + nE;
-    #endif
-    #if nEC>=19
-    x_Ptr18 = x_Ptr17 + nE;
-    #endif
-    #if nEC>=20
-    x_Ptr19 = x_Ptr18 + nE;
-    #endif
-
-    while( t_v != t_vEnd )
-    {
-        Yptr    = Y    + nS * (*t_v++);
-        YptrEnd = Yptr + nS;
-        offset  = nS * (*t_o++);
-
-        Y_tmp = *Yptr;
-        SFP0ptr = wmhSFP0 + offset;
-        x0 = (*SFP0ptr++) * Y_tmp;
-        #if nEC>=2
-        SFP1ptr = wmhSFP1 + offset;
-        x1 = (*SFP1ptr++) * Y_tmp;
-        #endif
-        #if nEC>=3
-        SFP2ptr = wmhSFP2 + offset;
-        x2 = (*SFP2ptr++) * Y_tmp;
-        #endif
-        #if nEC>=4
-        SFP3ptr = wmhSFP3 + offset;
-        x3 = (*SFP3ptr++) * Y_tmp;
-        #endif
-        #if nEC>=5
-        SFP4ptr = wmhSFP4 + offset;
-        x4 = (*SFP4ptr++) * Y_tmp;
-        #endif
-        #if nEC>=6
-        SFP5ptr = wmhSFP5 + offset;
-        x5 = (*SFP5ptr++) * Y_tmp;
-        #endif
-        #if nEC>=7
-        SFP6ptr = wmhSFP6 + offset;
-        x6 = (*SFP6ptr++) * Y_tmp;
-        #endif
-        #if nEC>=8
-        SFP7ptr = wmhSFP7 + offset;
-        x7 = (*SFP7ptr++) * Y_tmp;
-        #endif
-        #if nEC>=9
-        SFP8ptr = wmhSFP8 + offset;
-        x8 = (*SFP8ptr++) * Y_tmp;
-        #endif
-        #if nEC>=10
-        SFP9ptr = wmhSFP9 + offset;
-        x9 = (*SFP9ptr++) * Y_tmp;
-        #endif
-        #if nEC>=11
-        SFP10ptr = wmhSFP10 + offset;
-        x10 = (*SFP10ptr++) * Y_tmp;
-        #endif
-        #if nEC>=12
-        SFP11ptr = wmhSFP11 + offset;
-        x11 = (*SFP11ptr++) * Y_tmp;
-        #endif
-        #if nEC>=13
-        SFP12ptr = wmhSFP12 + offset;
-        x12 = (*SFP12ptr++) * Y_tmp;
-        #endif
-        #if nEC>=14
-        SFP13ptr = wmhSFP13 + offset;
-        x13 = (*SFP13ptr++) * Y_tmp;
-        #endif
-        #if nEC>=15
-        SFP14ptr = wmhSFP14 + offset;
-        x14 = (*SFP14ptr++) * Y_tmp;
-        #endif
-        #if nEC>=16
-        SFP15ptr = wmhSFP15 + offset;
-        x15 = (*SFP15ptr++) * Y_tmp;
-        #endif
-        #if nEC>=17
-        SFP16ptr = wmhSFP16 + offset;
-        x16 = (*SFP16ptr++) * Y_tmp;
-        #endif
-        #if nEC>=18
-        SFP17ptr = wmhSFP17 + offset;
-        x17 = (*SFP17ptr++) * Y_tmp;
-        #endif
-        #if nEC>=19
-        SFP18ptr = wmhSFP18 + offset;
-        x18 = (*SFP18ptr++) * Y_tmp;
-        #endif
-        #if nEC>=20
-        SFP19ptr = wmhSFP19 + offset;
-        x19 = (*SFP19ptr++) * Y_tmp;
-        #endif
-
-        while( ++Yptr != YptrEnd )
-        {
-            Y_tmp = *Yptr;
-            x0 += (*SFP0ptr++) * Y_tmp;
-            #if nEC>=2
-            x1 += (*SFP1ptr++) * Y_tmp;
-            #endif
-            #if nEC>=3
-            x2 += (*SFP2ptr++) * Y_tmp;
-            #endif
-            #if nEC>=4
-            x3 += (*SFP3ptr++) * Y_tmp;
-            #endif
-            #if nEC>=5
-            x4 += (*SFP4ptr++) * Y_tmp;
-            #endif
-            #if nEC>=6
-            x5 += (*SFP5ptr++) * Y_tmp;
-            #endif
-            #if nEC>=7
-            x6 += (*SFP6ptr++) * Y_tmp;
-            #endif
-            #if nEC>=8
-            x7 += (*SFP7ptr++) * Y_tmp;
-            #endif
-            #if nEC>=9
-            x8 += (*SFP8ptr++) * Y_tmp;
-            #endif
-            #if nEC>=10
-            x9 += (*SFP9ptr++) * Y_tmp;
-            #endif
-            #if nEC>=11
-            x10 += (*SFP10ptr++) * Y_tmp;
-            #endif
-            #if nEC>=12
-            x11 += (*SFP11ptr++) * Y_tmp;
-            #endif
-            #if nEC>=13
-            x12 += (*SFP12ptr++) * Y_tmp;
-            #endif
-            #if nEC>=14
-            x13 += (*SFP13ptr++) * Y_tmp;
-            #endif
-            #if nEC>=15
-            x14 += (*SFP14ptr++) * Y_tmp;
-            #endif
-            #if nEC>=16
-            x15 += (*SFP15ptr++) * Y_tmp;
-            #endif
-            #if nEC>=17
-            x16 += (*SFP16ptr++) * Y_tmp;
-            #endif
-            #if nEC>=18
-            x17 += (*SFP17ptr++) * Y_tmp;
-            #endif
-            #if nEC>=19
-            x18 += (*SFP18ptr++) * Y_tmp;
-            #endif
-            #if nEC>=20
-            x19 += (*SFP19ptr++) * Y_tmp;
-            #endif
-        }
-        (*x_Ptr0++) += x0;
-        #if nEC>=2
-        (*x_Ptr1++) += x1;
-        #endif
-        #if nEC>=3
-        (*x_Ptr2++) += x2;
-        #endif
-        #if nEC>=4
-        (*x_Ptr3++) += x3;
-        #endif
-        #if nEC>=5
-        (*x_Ptr4++) += x4;
-        #endif
-        #if nEC>=6
-        (*x_Ptr5++) += x5;
-        #endif
-        #if nEC>=7
-        (*x_Ptr6++) += x6;
-        #endif
-        #if nEC>=8
-        (*x_Ptr7++) += x7;
-        #endif
-        #if nEC>=9
-        (*x_Ptr8++) += x8;
-        #endif
-        #if nEC>=10
-        (*x_Ptr9++) += x9;
-        #endif
-        #if nEC>=11
-        (*x_Ptr10++) += x10;
-        #endif
-        #if nEC>=12
-        (*x_Ptr11++) += x11;
-        #endif
-        #if nEC>=13
-        (*x_Ptr12++) += x12;
-        #endif
-        #if nEC>=14
-        (*x_Ptr13++) += x13;
-        #endif
-        #if nEC>=15
-        (*x_Ptr14++) += x14;
-        #endif
-        #if nEC>=16
-        (*x_Ptr15++) += x15;
-        #endif
-        #if nEC>=17
-        (*x_Ptr16++) += x16;
-        #endif
-        #if nEC>=18
-        (*x_Ptr17++) += x17;
-        #endif
-        #if nEC>=19
-        (*x_Ptr18++) += x18;
-        #endif
-        #if nEC>=20
-        (*x_Ptr19++) += x19;
-        #endif
-    }
-#endif
-
-#if nISO>=1
-    // isotropic compartments
-    t_v    = ISOv + ISOthreadsT[id];
-    t_vEnd = ISOv + ISOthreadsT[id+1];
-
-    x_Ptr0 = x + nIC*nF + nEC*nE + ISOthreadsT[id];
-    #if nISO>=2
-    x_Ptr1 = x_Ptr0 + nV;
-    #endif
-    #if nISO>=3
-    x_Ptr2 = x_Ptr1 + nV;
-    #endif
-    #if nISO>=4
-    x_Ptr3 = x_Ptr2 + nV;
-    #endif
-    #if nISO>=5
-    x_Ptr4 = x_Ptr3 + nV;
-    #endif
-    #if nISO>=6
-    x_Ptr5 = x_Ptr4 + nV;
-    #endif
-    #if nISO>=7
-    x_Ptr6 = x_Ptr5 + nV;
-    #endif
-    #if nISO>=8
-    x_Ptr7 = x_Ptr6 + nV;
-    #endif
-    #if nISO>=9
-    x_Ptr8 = x_Ptr7 + nV;
-    #endif
-    #if nISO>=10
-    x_Ptr9 = x_Ptr8 + nV;
-    #endif
-    #if nISO>=11
-    x_Ptr10 = x_Ptr9 + nV;
-    #endif
-    #if nISO>=12
-    x_Ptr11 = x_Ptr10 + nV;
-    #endif
-    #if nISO>=13
-    x_Ptr12 = x_Ptr11 + nV;
-    #endif
-    #if nISO>=14
-    x_Ptr13 = x_Ptr12 + nV;
-    #endif
-    #if nISO>=15
-    x_Ptr14 = x_Ptr13 + nV;
-    #endif
-    #if nISO>=16
-    x_Ptr15 = x_Ptr14 + nV;
-    #endif
-    #if nISO>=17
-    x_Ptr16 = x_Ptr15 + nV;
-    #endif
-    #if nISO>=18
-    x_Ptr17 = x_Ptr16 + nV;
-    #endif
-    #if nISO>=19
-    x_Ptr18 = x_Ptr17 + nV;
-    #endif
-    #if nISO>=20
-    x_Ptr19 = x_Ptr18 + nV;
-    #endif
-
-    while( t_v != t_vEnd )
-    {
-        Yptr    = Y    + nS * (*t_v++);
-        YptrEnd = Yptr + nS;
-
-        SFP0ptr = isoSFP0;
-        #if nISO>=2
-        SFP1ptr = isoSFP1;
-        #endif
-        #if nISO>=3
-        SFP2ptr = isoSFP2;
-        #endif
-        #if nISO>=4
-        SFP3ptr = isoSFP3;
-        #endif
-        #if nISO>=5
-        SFP4ptr = isoSFP4;
-        #endif
-        #if nISO>=6
-        SFP5ptr = isoSFP5;
-        #endif
-        #if nISO>=7
-        SFP6ptr = isoSFP6;
-        #endif
-        #if nISO>=8
-        SFP7ptr = isoSFP7;
-        #endif
-        #if nISO>=9
-        SFP8ptr = isoSFP8;
-        #endif
-        #if nISO>=10
-        SFP9ptr = isoSFP9;
-        #endif
-        #if nISO>=11
-        SFP10ptr = isoSFP10;
-        #endif
-        #if nISO>=12
-        SFP11ptr = isoSFP11;
-        #endif
-        #if nISO>=13
-        SFP12ptr = isoSFP12;
-        #endif
-        #if nISO>=14
-        SFP13ptr = isoSFP13;
-        #endif
-        #if nISO>=15
-        SFP14ptr = isoSFP14;
-        #endif
-        #if nISO>=16
-        SFP15ptr = isoSFP15;
-        #endif
-        #if nISO>=17
-        SFP16ptr = isoSFP16;
-        #endif
-        #if nISO>=18
-        SFP17ptr = isoSFP17;
-        #endif
-        #if nISO>=19
-        SFP18ptr = isoSFP18;
-        #endif
-        #if nISO>=20
-        SFP19ptr = isoSFP19;
-        #endif
-
-        Y_tmp = *Yptr;
-        x0 = (*SFP0ptr++) * Y_tmp;
-        #if nISO>=2
-        x1 = (*SFP1ptr++) * Y_tmp;
-        #endif
-        #if nISO>=3
-        x2 = (*SFP2ptr++) * Y_tmp;
-        #endif
-        #if nISO>=4
-        x3 = (*SFP3ptr++) * Y_tmp;
-        #endif
-        #if nISO>=5
-        x4 = (*SFP4ptr++) * Y_tmp;
-        #endif
-        #if nISO>=6
-        x5 = (*SFP5ptr++) * Y_tmp;
-        #endif
-        #if nISO>=7
-        x6 = (*SFP6ptr++) * Y_tmp;
-        #endif
-        #if nISO>=8
-        x7 = (*SFP7ptr++) * Y_tmp;
-        #endif
-        #if nISO>=9
-        x8 = (*SFP8ptr++) * Y_tmp;
-        #endif
-        #if nISO>=10
-        x9 = (*SFP9ptr++) * Y_tmp;
-        #endif
-        #if nISO>=11
-        x10 = (*SFP10ptr++) * Y_tmp;
-        #endif
-        #if nISO>=12
-        x11 = (*SFP11ptr++) * Y_tmp;
-        #endif
-        #if nISO>=13
-        x12 = (*SFP12ptr++) * Y_tmp;
-        #endif
-        #if nISO>=14
-        x13 = (*SFP13ptr++) * Y_tmp;
-        #endif
-        #if nISO>=15
-        x14 = (*SFP14ptr++) * Y_tmp;
-        #endif
-        #if nISO>=16
-        x15 = (*SFP15ptr++) * Y_tmp;
-        #endif
-        #if nISO>=17
-        x16 = (*SFP16ptr++) * Y_tmp;
-        #endif
-        #if nISO>=18
-        x17 = (*SFP17ptr++) * Y_tmp;
-        #endif
-        #if nISO>=19
-        x18 = (*SFP18ptr++) * Y_tmp;
-        #endif
-        #if nISO>=20
-        x19 = (*SFP19ptr++) * Y_tmp;
-        #endif
-
-        while( ++Yptr != YptrEnd )
-        {
-            Y_tmp = *Yptr;
-            x0  += (*SFP0ptr++) * Y_tmp;
-            #if nISO>=2
-            x1  += (*SFP1ptr++) * Y_tmp;
-            #endif
-            #if nISO>=3
-            x2  += (*SFP2ptr++) * Y_tmp;
-            #endif
-            #if nISO>=4
-            x3  += (*SFP3ptr++) * Y_tmp;
-            #endif
-            #if nISO>=5
-            x4  += (*SFP4ptr++) * Y_tmp;
-            #endif
-            #if nISO>=6
-            x5  += (*SFP5ptr++) * Y_tmp;
-            #endif
-            #if nISO>=7
-            x6  += (*SFP6ptr++) * Y_tmp;
-            #endif
-            #if nISO>=8
-            x7  += (*SFP7ptr++) * Y_tmp;
-            #endif
-            #if nISO>=9
-            x8  += (*SFP8ptr++) * Y_tmp;
-            #endif
-            #if nISO>=10
-            x9  += (*SFP9ptr++) * Y_tmp;
-            #endif
-            #if nISO>=11
-            x10  += (*SFP10ptr++) * Y_tmp;
-            #endif
-            #if nISO>=12
-            x11  += (*SFP11ptr++) * Y_tmp;
-            #endif
-            #if nISO>=13
-            x12  += (*SFP12ptr++) * Y_tmp;
-            #endif
-            #if nISO>=14
-            x13  += (*SFP13ptr++) * Y_tmp;
-            #endif
-            #if nISO>=15
-            x14  += (*SFP14ptr++) * Y_tmp;
-            #endif
-            #if nISO>=16
-            x15  += (*SFP15ptr++) * Y_tmp;
-            #endif
-            #if nISO>=17
-            x16  += (*SFP16ptr++) * Y_tmp;
-            #endif
-            #if nISO>=18
-            x17  += (*SFP17ptr++) * Y_tmp;
-            #endif
-            #if nISO>=19
-            x18  += (*SFP18ptr++) * Y_tmp;
-            #endif
-            #if nISO>=20
-            x19  += (*SFP19ptr++) * Y_tmp;
-            #endif
-        }
-
-        (*x_Ptr0++) += x0;
-        #if nISO>=2
-        (*x_Ptr1++) += x1;
-        #endif
-        #if nISO>=3
-        (*x_Ptr2++) += x2;
-        #endif
-        #if nISO>=4
-        (*x_Ptr3++) += x3;
-        #endif
-        #if nISO>=5
-        (*x_Ptr4++) += x4;
-        #endif
-        #if nISO>=6
-        (*x_Ptr5++) += x5;
-        #endif
-        #if nISO>=7
-        (*x_Ptr6++) += x6;
-        #endif
-        #if nISO>=8
-        (*x_Ptr7++) += x7;
-        #endif
-        #if nISO>=9
-        (*x_Ptr8++) += x8;
-        #endif
-        #if nISO>=10
-        (*x_Ptr9++) += x9;
-        #endif
-        #if nISO>=11
-        (*x_Ptr10++) += x10;
-        #endif
-        #if nISO>=12
-        (*x_Ptr11++) += x11;
-        #endif
-        #if nISO>=13
-        (*x_Ptr12++) += x12;
-        #endif
-        #if nISO>=14
-        (*x_Ptr13++) += x13;
-        #endif
-        #if nISO>=15
-        (*x_Ptr14++) += x14;
-        #endif
-        #if nISO>=16
-        (*x_Ptr15++) += x15;
-        #endif
-        #if nISO>=17
-        (*x_Ptr16++) += x16;
-        #endif
-        #if nISO>=18
-        (*x_Ptr17++) += x17;
-        #endif
-        #if nISO>=19
-        (*x_Ptr18++) += x18;
-        #endif
-        #if nISO>=20
-        (*x_Ptr19++) += x19;
-        #endif
-    }
-#endif
-
-    pthread_exit( 0 );
-}
-
-
-// =========================
-// Function called by CYTHON
-// =========================
-void COMMIT_At(
-    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
-    double *_vIN, double *_vOUT,
-    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
-    uint32_t *_ECv, uint16_t *_ECo,
-    uint32_t *_ISOv,
-    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
-    uint8_t* _ICthreadsT, uint32_t* _ECthreadsT, uint32_t* _ISOthreadsT
-)
-{
-    nF = _nF;
-    n  = _n;
-    nE = _nE;
-    nV = _nV;
-    nS = _nS;
-    ndirs = _ndirs;
-
-    x = _vOUT;
-    Y = _vIN;
-
-    ICf  = _ICf;
-    ICv  = _ICv;
-    ICo  = _ICo;
-    ICl  = _ICl;
-    ECv  = _ECv;
-    ECo  = _ECo;
-    ISOv = _ISOv;
-
-    #if nIC>=1
-    wmrSFP0 = _wmrSFP;
-    #if nIC>=2
-    wmrSFP1 = wmrSFP0 + _ndirs*_nS;
-    #if nIC>=3
-    wmrSFP2 = wmrSFP1 + _ndirs*_nS;
-    #if nIC>=4
-    wmrSFP3 = wmrSFP2 + _ndirs*_nS;
-    #if nIC>=5
-    wmrSFP4 = wmrSFP3 + _ndirs*_nS;
-    #if nIC>=6
-    wmrSFP5 = wmrSFP4 + _ndirs*_nS;
-    #if nIC>=7
-    wmrSFP6 = wmrSFP5 + _ndirs*_nS;
-    #if nIC>=8
-    wmrSFP7 = wmrSFP6 + _ndirs*_nS;
-    #if nIC>=9
-    wmrSFP8 = wmrSFP7 + _ndirs*_nS;
-    #if nIC>=10
-    wmrSFP9 = wmrSFP8 + _ndirs*_nS;
-    #if nIC>=11
-    wmrSFP10 = wmrSFP9 + _ndirs*_nS;
-    #if nIC>=12
-    wmrSFP11 = wmrSFP10 + _ndirs*_nS;
-    #if nIC>=13
-    wmrSFP12 = wmrSFP11 + _ndirs*_nS;
-    #if nIC>=14
-    wmrSFP13 = wmrSFP12 + _ndirs*_nS;
-    #if nIC>=15
-    wmrSFP14 = wmrSFP13 + _ndirs*_nS;
-    #if nIC>=16
-    wmrSFP15 = wmrSFP14 + _ndirs*_nS;
-    #if nIC>=17
-    wmrSFP16 = wmrSFP15 + _ndirs*_nS;
-    #if nIC>=18
-    wmrSFP17 = wmrSFP16 + _ndirs*_nS;
-    #if nIC>=19
-    wmrSFP18 = wmrSFP17 + _ndirs*_nS;
-    #if nIC>=20
-    wmrSFP19 = wmrSFP18 + _ndirs*_nS;
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #if nEC>=1
-    wmhSFP0 = _wmhSFP;
-    #if nEC>=2
-    wmhSFP1 = wmhSFP0 + _ndirs*_nS;
-    #if nEC>=3
-    wmhSFP2 = wmhSFP1 + _ndirs*_nS;
-    #if nEC>=4
-    wmhSFP3 = wmhSFP2 + _ndirs*_nS;
-    #if nEC>=5
-    wmhSFP4 = wmhSFP3 + _ndirs*_nS;
-    #if nEC>=6
-    wmhSFP5 = wmhSFP4 + _ndirs*_nS;
-    #if nEC>=7
-    wmhSFP6 = wmhSFP5 + _ndirs*_nS;
-    #if nEC>=8
-    wmhSFP7 = wmhSFP6 + _ndirs*_nS;
-    #if nEC>=9
-    wmhSFP8 = wmhSFP7 + _ndirs*_nS;
-    #if nEC>=10
-    wmhSFP9 = wmhSFP8 + _ndirs*_nS;
-    #if nEC>=11
-    wmhSFP10 = wmhSFP9 + _ndirs*_nS;
-    #if nEC>=12
-    wmhSFP11 = wmhSFP10 + _ndirs*_nS;
-    #if nEC>=13
-    wmhSFP12 = wmhSFP11 + _ndirs*_nS;
-    #if nEC>=14
-    wmhSFP13 = wmhSFP12 + _ndirs*_nS;
-    #if nEC>=15
-    wmhSFP14 = wmhSFP13 + _ndirs*_nS;
-    #if nEC>=16
-    wmhSFP15 = wmhSFP14 + _ndirs*_nS;
-    #if nEC>=17
-    wmhSFP16 = wmhSFP15 + _ndirs*_nS;
-    #if nEC>=18
-    wmhSFP17 = wmhSFP16 + _ndirs*_nS;
-    #if nEC>=19
-    wmhSFP18 = wmhSFP17 + _ndirs*_nS;
-    #if nEC>=20
-    wmhSFP19 = wmhSFP18 + _ndirs*_nS;
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #if nISO>=1
-    isoSFP0 = _isoSFP;
-    #if nISO>=2
-    isoSFP1 = isoSFP0 + _nS;
-    #if nISO>=3
-    isoSFP2 = isoSFP1 + _nS;
-    #if nISO>=4
-    isoSFP3 = isoSFP2 + _nS;
-    #if nISO>=5
-    isoSFP4 = isoSFP3 + _nS;
-    #if nISO>=6
-    isoSFP5 = isoSFP4 + _nS;
-    #if nISO>=7
-    isoSFP6 = isoSFP5 + _nS;
-    #if nISO>=8
-    isoSFP7 = isoSFP6 + _nS;
-    #if nISO>=9
-    isoSFP8 = isoSFP7 + _nS;
-    #if nISO>=10
-    isoSFP9 = isoSFP8 + _nS;
-    #if nISO>=11
-    isoSFP10 = isoSFP9 + _nS;
-    #if nISO>=12
-    isoSFP11 = isoSFP10 + _nS;
-    #if nISO>=13
-    isoSFP12 = isoSFP11 + _nS;
-    #if nISO>=14
-    isoSFP13 = isoSFP12 + _nS;
-    #if nISO>=15
-    isoSFP14 = isoSFP13 + _nS;
-    #if nISO>=16
-    isoSFP15 = isoSFP14 + _nS;
-    #if nISO>=17
-    isoSFP16 = isoSFP15 + _nS;
-    #if nISO>=18
-    isoSFP17 = isoSFP16 + _nS;
-    #if nISO>=19
-    isoSFP18 = isoSFP17 + _nS;
-    #if nISO>=20
-    isoSFP19 = isoSFP18 + _nS;
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-
-    ICthreadsT  = _ICthreadsT;
-    ECthreadsT  = _ECthreadsT;
-    ISOthreadsT = _ISOthreadsT;
-
-    // Run SEPARATE THREADS to perform the multiplication
-    pthread_t threads[nTHREADS];
-    int t;
-    for(t=0; t<nTHREADS ; t++)
-        pthread_create( &threads[t], NULL, COMMIT_At__block, (void *) (long int)t );
-    for(t=0; t<nTHREADS ; t++)
-        pthread_join( threads[t], NULL );
-    return;
-}
-
-void COMMIT_L(
-    int _nF, int _nIC, int _nV, int _nS, double _tikterm,
-    double *_vIN, double *_vOUT)
-{
-    for(int f = 0; f < _nF; f++){
-
-        _vOUT[_nV*_nS] += _tikterm*( -2*_vIN[f] + x[_nF + f] );
-
-        for(int r = 1; r < _nIC-1; r++){
-            _vOUT[_nV*_nS + r] += _tikterm*( _vIN[(r-1)*_nF + f] -2*_vIN[r*_nF + f] + _vIN[(r+1)*_nF + f] );
-        }
-
-        _vOUT[_nV*_nS + _nIC - 1] += _tikterm*( _vIN[(_nIC-2)*_nF + f] - 2*_vIN[(_nIC-1)*_nF + f] );
-    }
-}
-
-void COMMIT_Lt(
-    int _nF, int _nIC, int _nV, int _nS, double _tikterm,
-    double *_vIN, double *_vOUT)
-{
-    for(int f = 0; f < _nF; f++){
-        _vOUT[f] += _tikterm*( -2*_vIN[_nV*_nS] + _vIN[_nV*_nS + 1] );
-
-        for (int r = 0; r < _nIC-1; r++){
-            _vOUT[r*_nF + f] += _tikterm*( _vIN[_nV*_nS + (r-1)] - 2*_vIN[_nV*_nS + r] + _vIN[_nV*_nS + (r+1)] );
-        }
-        
-        _vOUT[(_nIC-1)*_nF + f] += _tikterm*( _vIN[_nV*_nS + (_nIC-2)] - 2*_vIN[_nV*_nS + (_nIC-1)] );
-    }
-}
\ No newline at end of file
+#include <pthread.h>
+#include <stdint.h> // uint32_t etc
+
+// number of THREADS
+#ifdef nTHREADS
+    #if (nTHREADS<1 || nTHREADS>255)
+    #error "nTHREADS" must be in the range 1..255
+    #endif
+#else
+    #error "nTHREADS" parameter must be passed to the compiler as "-DnTHREADS=<value>"
+#endif
+
+
+/* global variables */
+int         nF, n, nE, nV, nS, ndirs;
+double      *x, *Y;
+uint32_t    *ICthreads, *ECthreads, *ISOthreads;
+uint8_t     *ICthreadsT;
+uint32_t    *ECthreadsT, *ISOthreadsT;
+uint32_t    *ICf, *ICv, *ECv, *ISOv;
+uint16_t    *ICo, *ECo;
+float       *ICl;
+float       *wmrSFP0, *wmrSFP1, *wmrSFP2, *wmrSFP3, *wmrSFP4, *wmrSFP5, *wmrSFP6, *wmrSFP7, *wmrSFP8, *wmrSFP9, *wmrSFP10, *wmrSFP11, *wmrSFP12, *wmrSFP13, *wmrSFP14, *wmrSFP15, *wmrSFP16, *wmrSFP17, *wmrSFP18, *wmrSFP19;
+float       *wmhSFP0, *wmhSFP1, *wmhSFP2, *wmhSFP3, *wmhSFP4, *wmhSFP5, *wmhSFP6, *wmhSFP7, *wmhSFP8, *wmhSFP9, *wmhSFP10, *wmhSFP11, *wmhSFP12, *wmhSFP13, *wmhSFP14, *wmhSFP15, *wmhSFP16, *wmhSFP17, *wmhSFP18, *wmhSFP19;
+float       *isoSFP0, *isoSFP1, *isoSFP2, *isoSFP3, *isoSFP4, *isoSFP5, *isoSFP6, *isoSFP7, *isoSFP8, *isoSFP9, *isoSFP10, *isoSFP11, *isoSFP12, *isoSFP13, *isoSFP14, *isoSFP15, *isoSFP16, *isoSFP17, *isoSFP18, *isoSFP19;
+
+
+
+// ====================================================
+// Compute a sub-block of the A*x MAtRIX-VECTOR product
+// ====================================================
+void* COMMIT_A__block( void *ptr )
+{
+    int      id = (long)ptr;
+    int      offset;
+    double   x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, w;
+    double   *x_Ptr0, *x_Ptr1, *x_Ptr2, *x_Ptr3, *x_Ptr4, *x_Ptr5, *x_Ptr6, *x_Ptr7, *x_Ptr8, *x_Ptr9, *x_Ptr10, *x_Ptr11, *x_Ptr12, *x_Ptr13, *x_Ptr14, *x_Ptr15, *x_Ptr16, *x_Ptr17, *x_Ptr18, *x_Ptr19;
+    double   *Yptr, *YptrEnd;
+    float    *SFP0ptr, *SFP1ptr, *SFP2ptr, *SFP3ptr, *SFP4ptr, *SFP5ptr, *SFP6ptr, *SFP7ptr, *SFP8ptr, *SFP9ptr, *SFP10ptr, *SFP11ptr, *SFP12ptr, *SFP13ptr, *SFP14ptr, *SFP15ptr, *SFP16ptr, *SFP17ptr, *SFP18ptr, *SFP19ptr;
+    uint32_t *t_v, *t_vEnd, *t_f;
+    uint16_t *t_o;
+    float    *t_l;
+
+#if nIC>=1
+    // intra-cellular compartments
+    t_v    = ICv + ICthreads[id];
+    t_vEnd = ICv + ICthreads[id+1];
+    t_o    = ICo + ICthreads[id];
+    t_l    = ICl + ICthreads[id];
+    t_f    = ICf + ICthreads[id];
+
+    while( t_v != t_vEnd )
+    {
+        x_Ptr0 = x + *t_f;
+        x0 = *x_Ptr0;
+        #if nIC>=2
+        x_Ptr1 = x_Ptr0 + nF;
+        x1 = *x_Ptr1;
+        #endif
+        #if nIC>=3
+        x_Ptr2 = x_Ptr1 + nF;
+        x2 = *x_Ptr2;
+        #endif
+        #if nIC>=4
+        x_Ptr3 = x_Ptr2 + nF;
+        x3 = *x_Ptr3;
+        #endif
+        #if nIC>=5
+        x_Ptr4 = x_Ptr3 + nF;
+        x4 = *x_Ptr4;
+        #endif
+        #if nIC>=6
+        x_Ptr5 = x_Ptr4 + nF;
+        x5 = *x_Ptr5;
+        #endif
+        #if nIC>=7
+        x_Ptr6 = x_Ptr5 + nF;
+        x6 = *x_Ptr6;
+        #endif
+        #if nIC>=8
+        x_Ptr7 = x_Ptr6 + nF;
+        x7 = *x_Ptr7;
+        #endif
+        #if nIC>=9
+        x_Ptr8 = x_Ptr7 + nF;
+        x8 = *x_Ptr8;
+        #endif
+        #if nIC>=10
+        x_Ptr9 = x_Ptr8 + nF;
+        x9 = *x_Ptr9;
+        #endif
+        #if nIC>=11
+        x_Ptr10 = x_Ptr9 + nF;
+        x10 = *x_Ptr10;
+        #endif
+        #if nIC>=12
+        x_Ptr11 = x_Ptr10 + nF;
+        x11 = *x_Ptr11;
+        #endif
+        #if nIC>=13
+        x_Ptr12 = x_Ptr11 + nF;
+        x12 = *x_Ptr12;
+        #endif
+        #if nIC>=14
+        x_Ptr13 = x_Ptr12 + nF;
+        x13 = *x_Ptr13;
+        #endif
+        #if nIC>=15
+        x_Ptr14 = x_Ptr13 + nF;
+        x14 = *x_Ptr14;
+        #endif
+        #if nIC>=16
+        x_Ptr15 = x_Ptr14 + nF;
+        x15 = *x_Ptr15;
+        #endif
+        #if nIC>=17
+        x_Ptr16 = x_Ptr15 + nF;
+        x16 = *x_Ptr16;
+        #endif
+        #if nIC>=18
+        x_Ptr17 = x_Ptr16 + nF;
+        x17 = *x_Ptr17;
+        #endif
+        #if nIC>=19
+        x_Ptr18 = x_Ptr17 + nF;
+        x18 = *x_Ptr18;
+        #endif
+        #if nIC>=20
+        x_Ptr19 = x_Ptr18 + nF;
+        x19 = *x_Ptr19;
+        #endif
+
+        if ( x0 != 0
+        #if nIC>=2
+            || x1 != 0
+        #endif
+        #if nIC>=3
+            || x2 != 0
+        #endif
+        #if nIC>=4
+            || x3 != 0
+        #endif
+        #if nIC>=5
+            || x4 != 0
+        #endif
+        #if nIC>=6
+            || x5 != 0
+        #endif
+        #if nIC>=7
+            || x6 != 0
+        #endif
+        #if nIC>=8
+            || x7 != 0
+        #endif
+        #if nIC>=9
+            || x8 != 0
+        #endif
+        #if nIC>=10
+            || x9 != 0
+        #endif
+        #if nIC>=11
+            || x10 != 0
+        #endif
+        #if nIC>=12
+            || x11 != 0
+        #endif
+        #if nIC>=13
+            || x12 != 0
+        #endif
+        #if nIC>=14
+            || x13 != 0
+        #endif
+        #if nIC>=15
+            || x14 != 0
+        #endif
+        #if nIC>=16
+            || x15 != 0
+        #endif
+        #if nIC>=17
+            || x16 != 0
+        #endif
+        #if nIC>=18
+            || x17 != 0
+        #endif
+        #if nIC>=19
+            || x18 != 0
+        #endif
+        #if nIC>=20
+            || x19 != 0
+        #endif
+        )
+        {
+            Yptr    = Y    + nS * (*t_v);
+            YptrEnd = Yptr + nS;
+            w       = (double)(*t_l);
+            offset  = nS * (*t_o);
+            SFP0ptr = wmrSFP0 + offset;
+            #if nIC>=2
+            SFP1ptr = wmrSFP1 + offset;
+            #endif
+            #if nIC>=3
+            SFP2ptr = wmrSFP2 + offset;
+            #endif
+            #if nIC>=4
+            SFP3ptr = wmrSFP3 + offset;
+            #endif
+            #if nIC>=5
+            SFP4ptr = wmrSFP4 + offset;
+            #endif
+            #if nIC>=6
+            SFP5ptr = wmrSFP5 + offset;
+            #endif
+            #if nIC>=7
+            SFP6ptr = wmrSFP6 + offset;
+            #endif
+            #if nIC>=8
+            SFP7ptr = wmrSFP7 + offset;
+            #endif
+            #if nIC>=9
+            SFP8ptr = wmrSFP8 + offset;
+            #endif
+            #if nIC>=10
+            SFP9ptr = wmrSFP9 + offset;
+            #endif
+            #if nIC>=11
+            SFP10ptr = wmrSFP10 + offset;
+            #endif
+            #if nIC>=12
+            SFP11ptr = wmrSFP11 + offset;
+            #endif
+            #if nIC>=13
+            SFP12ptr = wmrSFP12 + offset;
+            #endif
+            #if nIC>=14
+            SFP13ptr = wmrSFP13 + offset;
+            #endif
+            #if nIC>=15
+            SFP14ptr = wmrSFP14 + offset;
+            #endif
+            #if nIC>=16
+            SFP15ptr = wmrSFP15 + offset;
+            #endif
+            #if nIC>=17
+            SFP16ptr = wmrSFP16 + offset;
+            #endif
+            #if nIC>=18
+            SFP17ptr = wmrSFP17 + offset;
+            #endif
+            #if nIC>=19
+            SFP18ptr = wmrSFP18 + offset;
+            #endif
+            #if nIC>=20
+            SFP19ptr = wmrSFP19 + offset;
+            #endif
+
+            while( Yptr != YptrEnd )
+                (*Yptr++) += w * (
+                          x0 * (*SFP0ptr++)
+                        #if nIC>=2
+                        + x1 * (*SFP1ptr++)
+                        #endif
+                        #if nIC>=3
+                        + x2 * (*SFP2ptr++)
+                        #endif
+                        #if nIC>=4
+                        + x3 * (*SFP3ptr++)
+                        #endif
+                        #if nIC>=5
+                        + x4 * (*SFP4ptr++)
+                        #endif
+                        #if nIC>=6
+                        + x5 * (*SFP5ptr++)
+                        #endif
+                        #if nIC>=7
+                        + x6 * (*SFP6ptr++)
+                        #endif
+                        #if nIC>=8
+                        + x7 * (*SFP7ptr++)
+                        #endif
+                        #if nIC>=9
+                        + x8 * (*SFP8ptr++)
+                        #endif
+                        #if nIC>=10
+                        + x9 * (*SFP9ptr++)
+                        #endif
+                        #if nIC>=11
+                        + x10 * (*SFP10ptr++)
+                        #endif
+                        #if nIC>=12
+                        + x11 * (*SFP11ptr++)
+                        #endif
+                        #if nIC>=13
+                        + x12 * (*SFP12ptr++)
+                        #endif
+                        #if nIC>=14
+                        + x13 * (*SFP13ptr++)
+                        #endif
+                        #if nIC>=15
+                        + x14 * (*SFP14ptr++)
+                        #endif
+                        #if nIC>=16
+                        + x15 * (*SFP15ptr++)
+                        #endif
+                        #if nIC>=17
+                        + x16 * (*SFP16ptr++)
+                        #endif
+                        #if nIC>=18
+                        + x17 * (*SFP17ptr++)
+                        #endif
+                        #if nIC>=19
+                        + x18 * (*SFP18ptr++)
+                        #endif
+                        #if nIC>=20
+                        + x19 * (*SFP19ptr++)
+                        #endif
+                );
+        }
+
+        t_f++;
+        t_v++;
+        t_o++;
+        t_l++;
+    }
+#endif
+
+#if nEC>=1
+    // extra-cellular compartments
+    t_v    = ECv + ECthreads[id];
+    t_vEnd = ECv + ECthreads[id+1];
+    t_o    = ECo + ECthreads[id];
+
+    x_Ptr0 = x + nIC*nF + ECthreads[id];
+    #if nEC>=2
+    x_Ptr1 = x_Ptr0 + nE;
+    #endif
+    #if nEC>=3
+    x_Ptr2 = x_Ptr1 + nE;
+    #endif
+    #if nEC>=4
+    x_Ptr3 = x_Ptr2 + nE;
+    #endif
+    #if nEC>=5
+    x_Ptr4 = x_Ptr3 + nE;
+    #endif
+    #if nEC>=6
+    x_Ptr5 = x_Ptr4 + nE;
+    #endif
+    #if nEC>=7
+    x_Ptr6 = x_Ptr5 + nE;
+    #endif
+    #if nEC>=8
+    x_Ptr7 = x_Ptr6 + nE;
+    #endif
+    #if nEC>=9
+    x_Ptr8 = x_Ptr7 + nE;
+    #endif
+    #if nEC>=10
+    x_Ptr9 = x_Ptr8 + nE;
+    #endif
+    #if nEC>=11
+    x_Ptr10 = x_Ptr9 + nE;
+    #endif
+    #if nEC>=12
+    x_Ptr11 = x_Ptr10 + nE;
+    #endif
+    #if nEC>=13
+    x_Ptr12 = x_Ptr11 + nE;
+    #endif
+    #if nEC>=14
+    x_Ptr13 = x_Ptr12 + nE;
+    #endif
+    #if nEC>=15
+    x_Ptr14 = x_Ptr13 + nE;
+    #endif
+    #if nEC>=16
+    x_Ptr15 = x_Ptr14 + nE;
+    #endif
+    #if nEC>=17
+    x_Ptr16 = x_Ptr15 + nE;
+    #endif
+    #if nEC>=18
+    x_Ptr17 = x_Ptr16 + nE;
+    #endif
+    #if nEC>=19
+    x_Ptr18 = x_Ptr17 + nE;
+    #endif
+    #if nEC>=20
+    x_Ptr19 = x_Ptr18 + nE;
+    #endif
+
+    while( t_v != t_vEnd )
+    {
+        x0 = *x_Ptr0++;
+        #if nEC>=2
+        x1 = *x_Ptr1++;
+        #endif
+        #if nEC>=3
+        x2 = *x_Ptr2++;
+        #endif
+        #if nEC>=4
+        x3 = *x_Ptr3++;
+        #endif
+        #if nEC>=5
+        x4 = *x_Ptr4++;
+        #endif
+        #if nEC>=6
+        x5 = *x_Ptr5++;
+        #endif
+        #if nEC>=7
+        x6 = *x_Ptr6++;
+        #endif
+        #if nEC>=8
+        x7 = *x_Ptr7++;
+        #endif
+        #if nEC>=9
+        x8 = *x_Ptr8++;
+        #endif
+        #if nEC>=10
+        x9 = *x_Ptr9++;
+        #endif
+        #if nEC>=11
+        x10 = *x_Ptr10++;
+        #endif
+        #if nEC>=12
+        x11 = *x_Ptr11++;
+        #endif
+        #if nEC>=13
+        x12 = *x_Ptr12++;
+        #endif
+        #if nEC>=14
+        x13 = *x_Ptr13++;
+        #endif
+        #if nEC>=15
+        x14 = *x_Ptr14++;
+        #endif
+        #if nEC>=16
+        x15 = *x_Ptr15++;
+        #endif
+        #if nEC>=17
+        x16 = *x_Ptr16++;
+        #endif
+        #if nEC>=18
+        x17 = *x_Ptr17++;
+        #endif
+        #if nEC>=19
+        x18 = *x_Ptr18++;
+        #endif
+        #if nEC>=20
+        x19 = *x_Ptr19++;
+        #endif
+        if (
+               x0 != 0
+            #if nEC>=2
+            || x1 != 0
+            #endif
+            #if nEC>=3
+            || x2 != 0
+            #endif
+            #if nEC>=4
+            || x3 != 0
+            #endif
+            #if nEC>=5
+            || x4 != 0
+            #endif
+            #if nEC>=6
+            || x5 != 0
+            #endif
+            #if nEC>=7
+            || x6 != 0
+            #endif
+            #if nEC>=8
+            || x7 != 0
+            #endif
+            #if nEC>=9
+            || x8 != 0
+            #endif
+            #if nEC>=10
+            || x9 != 0
+            #endif
+            #if nEC>=11
+            || x10 != 0
+            #endif
+            #if nEC>=12
+            || x11 != 0
+            #endif
+            #if nEC>=13
+            || x12 != 0
+            #endif
+            #if nEC>=14
+            || x13 != 0
+            #endif
+            #if nEC>=15
+            || x14 != 0
+            #endif
+            #if nEC>=16
+            || x15 != 0
+            #endif
+            #if nEC>=17
+            || x16 != 0
+            #endif
+            #if nEC>=18
+            || x17 != 0
+            #endif
+            #if nEC>=19
+            || x18 != 0
+            #endif
+            #if nEC>=20
+            || x19 != 0
+            #endif
+          )
+        {
+            Yptr    = Y    + nS * (*t_v);
+            YptrEnd = Yptr + nS;
+            offset  = nS * (*t_o);
+            SFP0ptr = wmhSFP0 + offset;
+            #if nEC>=2
+            SFP1ptr = wmhSFP1 + offset;
+            #endif
+            #if nEC>=3
+            SFP2ptr = wmhSFP2 + offset;
+            #endif
+            #if nEC>=4
+            SFP3ptr = wmhSFP3 + offset;
+            #endif
+            #if nEC>=5
+            SFP4ptr = wmhSFP4 + offset;
+            #endif
+            #if nEC>=6
+            SFP5ptr = wmhSFP5 + offset;
+            #endif
+            #if nEC>=7
+            SFP6ptr = wmhSFP6 + offset;
+            #endif
+            #if nEC>=8
+            SFP7ptr = wmhSFP7 + offset;
+            #endif
+            #if nEC>=9
+            SFP8ptr = wmhSFP8 + offset;
+            #endif
+            #if nEC>=10
+            SFP9ptr = wmhSFP9 + offset;
+            #endif
+            #if nEC>=11
+            SFP10ptr = wmhSFP10 + offset;
+            #endif
+            #if nEC>=12
+            SFP11ptr = wmhSFP11 + offset;
+            #endif
+            #if nEC>=13
+            SFP12ptr = wmhSFP12 + offset;
+            #endif
+            #if nEC>=14
+            SFP13ptr = wmhSFP13 + offset;
+            #endif
+            #if nEC>=15
+            SFP14ptr = wmhSFP14 + offset;
+            #endif
+            #if nEC>=16
+            SFP15ptr = wmhSFP15 + offset;
+            #endif
+            #if nEC>=17
+            SFP16ptr = wmhSFP16 + offset;
+            #endif
+            #if nEC>=18
+            SFP17ptr = wmhSFP17 + offset;
+            #endif
+            #if nEC>=19
+            SFP18ptr = wmhSFP18 + offset;
+            #endif
+            #if nEC>=20
+            SFP19ptr = wmhSFP19 + offset;
+            #endif
+
+            while( Yptr != YptrEnd )
+                (*Yptr++) += (
+                      x0 * (*SFP0ptr++)
+                    #if nEC>=2
+                    + x1 * (*SFP1ptr++)
+                    #endif
+                    #if nEC>=3
+                    + x2 * (*SFP2ptr++)
+                    #endif
+                    #if nEC>=4
+                    + x3 * (*SFP3ptr++)
+                    #endif
+                    #if nEC>=5
+                    + x4 * (*SFP4ptr++)
+                    #endif
+                    #if nEC>=6
+                    + x5 * (*SFP5ptr++)
+                    #endif
+                    #if nEC>=7
+                    + x6 * (*SFP6ptr++)
+                    #endif
+                    #if nEC>=8
+                    + x7 * (*SFP7ptr++)
+                    #endif
+                    #if nEC>=9
+                    + x8 * (*SFP8ptr++)
+                    #endif
+                    #if nEC>=10
+                    + x9 * (*SFP9ptr++)
+                    #endif
+                    #if nEC>=11
+                    + x10 * (*SFP10ptr++)
+                    #endif
+                    #if nEC>=12
+                    + x11 * (*SFP11ptr++)
+                    #endif
+                    #if nEC>=13
+                    + x12 * (*SFP12ptr++)
+                    #endif
+                    #if nEC>=14
+                    + x13 * (*SFP13ptr++)
+                    #endif
+                    #if nEC>=15
+                    + x14 * (*SFP14ptr++)
+                    #endif
+                    #if nEC>=16
+                    + x15 * (*SFP15ptr++)
+                    #endif
+                    #if nEC>=17
+                    + x16 * (*SFP16ptr++)
+                    #endif
+                    #if nEC>=18
+                    + x17 * (*SFP17ptr++)
+                    #endif
+                    #if nEC>=19
+                    + x18 * (*SFP18ptr++)
+                    #endif
+                    #if nEC>=20
+                    + x19 * (*SFP19ptr++)
+                    #endif
+
+                );
+        }
+        t_v++;
+        t_o++;
+    }
+#endif
+
+#if nISO>=1
+    // isotropic compartments
+    t_v    = ISOv + ISOthreads[id];
+    t_vEnd = ISOv + ISOthreads[id+1];
+
+    x_Ptr0 = x + nIC*nF + nEC*nE + ISOthreads[id];
+    #if nISO>=2
+    x_Ptr1 = x_Ptr0 + nV;
+    #endif
+    #if nISO>=3
+    x_Ptr2 = x_Ptr1 + nV;
+    #endif
+    #if nISO>=4
+    x_Ptr3 = x_Ptr2 + nV;
+    #endif
+    #if nISO>=5
+    x_Ptr4 = x_Ptr3 + nV;
+    #endif
+    #if nISO>=6
+    x_Ptr5 = x_Ptr4 + nV;
+    #endif
+    #if nISO>=7
+    x_Ptr6 = x_Ptr5 + nV;
+    #endif
+    #if nISO>=8
+    x_Ptr7 = x_Ptr6 + nV;
+    #endif
+    #if nISO>=9
+    x_Ptr8 = x_Ptr7 + nV;
+    #endif
+    #if nISO>=10
+    x_Ptr9 = x_Ptr8 + nV;
+    #endif
+    #if nISO>=11
+    x_Ptr10 = x_Ptr9 + nV;
+    #endif
+    #if nISO>=12
+    x_Ptr11 = x_Ptr10 + nV;
+    #endif
+    #if nISO>=13
+    x_Ptr12 = x_Ptr11 + nV;
+    #endif
+    #if nISO>=14
+    x_Ptr13 = x_Ptr12 + nV;
+    #endif
+    #if nISO>=15
+    x_Ptr14 = x_Ptr13 + nV;
+    #endif
+    #if nISO>=16
+    x_Ptr15 = x_Ptr14 + nV;
+    #endif
+    #if nISO>=17
+    x_Ptr16 = x_Ptr15 + nV;
+    #endif
+    #if nISO>=18
+    x_Ptr17 = x_Ptr16 + nV;
+    #endif
+    #if nISO>=19
+    x_Ptr18 = x_Ptr17 + nV;
+    #endif
+    #if nISO>=20
+    x_Ptr19 = x_Ptr18 + nV;
+    #endif
+
+    while( t_v != t_vEnd )
+    {
+        x0 = *x_Ptr0++;
+        #if nISO>=2
+        x1 = *x_Ptr1++;
+        #endif
+        #if nISO>=3
+        x2 = *x_Ptr2++;
+        #endif
+        #if nISO>=4
+        x3 = *x_Ptr3++;
+        #endif
+        #if nISO>=5
+        x4 = *x_Ptr4++;
+        #endif
+        #if nISO>=6
+        x5 = *x_Ptr5++;
+        #endif
+        #if nISO>=7
+        x6 = *x_Ptr6++;
+        #endif
+        #if nISO>=8
+        x7 = *x_Ptr7++;
+        #endif
+        #if nISO>=9
+        x8 = *x_Ptr8++;
+        #endif
+        #if nISO>=10
+        x9 = *x_Ptr9++;
+        #endif
+        #if nISO>=11
+        x10 = *x_Ptr10++;
+        #endif
+        #if nISO>=12
+        x11 = *x_Ptr11++;
+        #endif
+        #if nISO>=13
+        x12 = *x_Ptr12++;
+        #endif
+        #if nISO>=14
+        x13 = *x_Ptr13++;
+        #endif
+        #if nISO>=15
+        x14 = *x_Ptr14++;
+        #endif
+        #if nISO>=16
+        x15 = *x_Ptr15++;
+        #endif
+        #if nISO>=17
+        x16 = *x_Ptr16++;
+        #endif
+        #if nISO>=18
+        x17 = *x_Ptr17++;
+        #endif
+        #if nISO>=19
+        x18 = *x_Ptr18++;
+        #endif
+        #if nISO>=20
+        x19 = *x_Ptr19++;
+        #endif
+
+        if (
+               x0 != 0
+            #if nISO>=2
+            || x1 != 0
+            #endif
+            #if nISO>=3
+            || x2 != 0
+            #endif
+            #if nISO>=4
+            || x3 != 0
+            #endif
+            #if nISO>=5
+            || x4 != 0
+            #endif
+            #if nISO>=6
+            || x5 != 0
+            #endif
+            #if nISO>=7
+            || x6 != 0
+            #endif
+            #if nISO>=8
+            || x7 != 0
+            #endif
+            #if nISO>=9
+            || x8 != 0
+            #endif
+            #if nISO>=10
+            || x9 != 0
+            #endif
+            #if nISO>=11
+            || x10 != 0
+            #endif
+            #if nISO>=12
+            || x11 != 0
+            #endif
+            #if nISO>=13
+            || x12 != 0
+            #endif
+            #if nISO>=14
+            || x13 != 0
+            #endif
+            #if nISO>=15
+            || x14 != 0
+            #endif
+            #if nISO>=16
+            || x15 != 0
+            #endif
+            #if nISO>=17
+            || x16 != 0
+            #endif
+            #if nISO>=18
+            || x17 != 0
+            #endif
+            #if nISO>=19
+            || x18 != 0
+            #endif
+            #if nISO>=20
+            || x19 != 0
+            #endif
+          )
+        {
+            Yptr    = Y    + nS * (*t_v);
+            YptrEnd = Yptr + nS;
+            SFP0ptr = isoSFP0;
+            #if nISO>=2
+            SFP1ptr = isoSFP1;
+            #endif
+            #if nISO>=3
+            SFP2ptr = isoSFP2;
+            #endif
+            #if nISO>=4
+            SFP3ptr = isoSFP3;
+            #endif
+            #if nISO>=5
+            SFP4ptr = isoSFP4;
+            #endif
+            #if nISO>=6
+            SFP5ptr = isoSFP5;
+            #endif
+            #if nISO>=7
+            SFP6ptr = isoSFP6;
+            #endif
+            #if nISO>=8
+            SFP7ptr = isoSFP7;
+            #endif
+            #if nISO>=9
+            SFP8ptr = isoSFP8;
+            #endif
+            #if nISO>=10
+            SFP9ptr = isoSFP9;
+            #endif
+            #if nISO>=11
+            SFP10ptr = isoSFP10;
+            #endif
+            #if nISO>=12
+            SFP11ptr = isoSFP11;
+            #endif
+            #if nISO>=13
+            SFP12ptr = isoSFP12;
+            #endif
+            #if nISO>=14
+            SFP13ptr = isoSFP13;
+            #endif
+            #if nISO>=15
+            SFP14ptr = isoSFP14;
+            #endif
+            #if nISO>=16
+            SFP15ptr = isoSFP15;
+            #endif
+            #if nISO>=17
+            SFP16ptr = isoSFP16;
+            #endif
+            #if nISO>=18
+            SFP17ptr = isoSFP17;
+            #endif
+            #if nISO>=19
+            SFP18ptr = isoSFP18;
+            #endif
+            #if nISO>=20
+            SFP19ptr = isoSFP19;
+            #endif
+
+            while( Yptr != YptrEnd )
+                (*Yptr++) += (
+                      x0 * (*SFP0ptr++)
+                    #if nISO>=2
+                    + x1 * (*SFP1ptr++)
+                    #endif
+                    #if nISO>=3
+                    + x2 * (*SFP2ptr++)
+                    #endif
+                    #if nISO>=4
+                    + x3 * (*SFP3ptr++)
+                    #endif
+                    #if nISO>=5
+                    + x4 * (*SFP4ptr++)
+                    #endif
+                    #if nISO>=6
+                    + x5 * (*SFP5ptr++)
+                    #endif
+                    #if nISO>=7
+                    + x6 * (*SFP6ptr++)
+                    #endif
+                    #if nISO>=8
+                    + x7 * (*SFP7ptr++)
+                    #endif
+                    #if nISO>=9
+                    + x8 * (*SFP8ptr++)
+                    #endif
+                    #if nISO>=10
+                    + x9 * (*SFP9ptr++)
+                    #endif
+                    #if nISO>=11
+                    + x10 * (*SFP10ptr++)
+                    #endif
+                    #if nISO>=12
+                    + x11 * (*SFP11ptr++)
+                    #endif
+                    #if nISO>=13
+                    + x12 * (*SFP12ptr++)
+                    #endif
+                    #if nISO>=14
+                    + x13 * (*SFP13ptr++)
+                    #endif
+                    #if nISO>=15
+                    + x14 * (*SFP14ptr++)
+                    #endif
+                    #if nISO>=16
+                    + x15 * (*SFP15ptr++)
+                    #endif
+                    #if nISO>=17
+                    + x16 * (*SFP16ptr++)
+                    #endif
+                    #if nISO>=18
+                    + x17 * (*SFP17ptr++)
+                    #endif
+                    #if nISO>=19
+                    + x18 * (*SFP18ptr++)
+                    #endif
+                    #if nISO>=20
+                    + x19 * (*SFP19ptr++)
+                    #endif
+                );
+        }
+        t_v++;
+    }
+#endif
+
+    pthread_exit( 0 );
+}
+
+
+// =========================
+// Function called by CYTHON
+// =========================
+void COMMIT_A(
+    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
+    double *_vIN, double *_vOUT,
+    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
+    uint32_t *_ECv, uint16_t *_ECo,
+    uint32_t *_ISOv,
+    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
+    uint32_t* _ICthreads, uint32_t* _ECthreads, uint32_t* _ISOthreads
+)
+{
+    nF = _nF;
+    n  = _n;
+    nE = _nE;
+    nV = _nV;
+    nS = _nS;
+    ndirs = _ndirs;
+
+    x = _vIN;
+    Y = _vOUT;
+
+    ICf  = _ICf;
+    ICv  = _ICv;
+    ICo  = _ICo;
+    ICl  = _ICl;
+    ECv  = _ECv;
+    ECo  = _ECo;
+    ISOv = _ISOv;
+
+    #if nIC>=1
+    wmrSFP0 = _wmrSFP;
+    #if nIC>=2
+    wmrSFP1 = wmrSFP0 + _ndirs*_nS;
+    #if nIC>=3
+    wmrSFP2 = wmrSFP1 + _ndirs*_nS;
+    #if nIC>=4
+    wmrSFP3 = wmrSFP2 + _ndirs*_nS;
+    #if nIC>=5
+    wmrSFP4 = wmrSFP3 + _ndirs*_nS;
+    #if nIC>=6
+    wmrSFP5 = wmrSFP4 + _ndirs*_nS;
+    #if nIC>=7
+    wmrSFP6 = wmrSFP5 + _ndirs*_nS;
+    #if nIC>=8
+    wmrSFP7 = wmrSFP6 + _ndirs*_nS;
+    #if nIC>=9
+    wmrSFP8 = wmrSFP7 + _ndirs*_nS;
+    #if nIC>=10
+    wmrSFP9 = wmrSFP8 + _ndirs*_nS;
+    #if nIC>=11
+    wmrSFP10 = wmrSFP9 + _ndirs*_nS;
+    #if nIC>=12
+    wmrSFP11 = wmrSFP10 + _ndirs*_nS;
+    #if nIC>=13
+    wmrSFP12 = wmrSFP11 + _ndirs*_nS;
+    #if nIC>=14
+    wmrSFP13 = wmrSFP12 + _ndirs*_nS;
+    #if nIC>=15
+    wmrSFP14 = wmrSFP13 + _ndirs*_nS;
+    #if nIC>=16
+    wmrSFP15 = wmrSFP14 + _ndirs*_nS;
+    #if nIC>=17
+    wmrSFP16 = wmrSFP15 + _ndirs*_nS;
+    #if nIC>=18
+    wmrSFP17 = wmrSFP16 + _ndirs*_nS;
+    #if nIC>=19
+    wmrSFP18 = wmrSFP17 + _ndirs*_nS;
+    #if nIC>=20
+    wmrSFP19 = wmrSFP18 + _ndirs*_nS;
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #if nEC>=1
+    wmhSFP0 = _wmhSFP;
+    #if nEC>=2
+    wmhSFP1 = wmhSFP0 + _ndirs*_nS;
+    #if nEC>=3
+    wmhSFP2 = wmhSFP1 + _ndirs*_nS;
+    #if nEC>=4
+    wmhSFP3 = wmhSFP2 + _ndirs*_nS;
+    #if nEC>=5
+    wmhSFP4 = wmhSFP3 + _ndirs*_nS;
+    #if nEC>=6
+    wmhSFP5 = wmhSFP4 + _ndirs*_nS;
+    #if nEC>=7
+    wmhSFP6 = wmhSFP5 + _ndirs*_nS;
+    #if nEC>=8
+    wmhSFP7 = wmhSFP6 + _ndirs*_nS;
+    #if nEC>=9
+    wmhSFP8 = wmhSFP7 + _ndirs*_nS;
+    #if nEC>=10
+    wmhSFP9 = wmhSFP8 + _ndirs*_nS;
+    #if nEC>=11
+    wmhSFP10 = wmhSFP9 + _ndirs*_nS;
+    #if nEC>=12
+    wmhSFP11 = wmhSFP10 + _ndirs*_nS;
+    #if nEC>=13
+    wmhSFP12 = wmhSFP11 + _ndirs*_nS;
+    #if nEC>=14
+    wmhSFP13 = wmhSFP12 + _ndirs*_nS;
+    #if nEC>=15
+    wmhSFP14 = wmhSFP13 + _ndirs*_nS;
+    #if nEC>=16
+    wmhSFP15 = wmhSFP14 + _ndirs*_nS;
+    #if nEC>=17
+    wmhSFP16 = wmhSFP15 + _ndirs*_nS;
+    #if nEC>=18
+    wmhSFP17 = wmhSFP16 + _ndirs*_nS;
+    #if nEC>=19
+    wmhSFP18 = wmhSFP17 + _ndirs*_nS;
+    #if nEC>=20
+    wmhSFP19 = wmhSFP18 + _ndirs*_nS;
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #if nISO>=1
+    isoSFP0 = _isoSFP;
+    #if nISO>=2
+    isoSFP1 = isoSFP0 + _nS;
+    #if nISO>=3
+    isoSFP2 = isoSFP1 + _nS;
+    #if nISO>=4
+    isoSFP3 = isoSFP2 + _nS;
+    #if nISO>=5
+    isoSFP4 = isoSFP3 + _nS;
+    #if nISO>=6
+    isoSFP5 = isoSFP4 + _nS;
+    #if nISO>=7
+    isoSFP6 = isoSFP5 + _nS;
+    #if nISO>=8
+    isoSFP7 = isoSFP6 + _nS;
+    #if nISO>=9
+    isoSFP8 = isoSFP7 + _nS;
+    #if nISO>=10
+    isoSFP9 = isoSFP8 + _nS;
+    #if nISO>=11
+    isoSFP10 = isoSFP9 + _nS;
+    #if nISO>=12
+    isoSFP11 = isoSFP10 + _nS;
+    #if nISO>=13
+    isoSFP12 = isoSFP11 + _nS;
+    #if nISO>=14
+    isoSFP13 = isoSFP12 + _nS;
+    #if nISO>=15
+    isoSFP14 = isoSFP13 + _nS;
+    #if nISO>=16
+    isoSFP15 = isoSFP14 + _nS;
+    #if nISO>=17
+    isoSFP16 = isoSFP15 + _nS;
+    #if nISO>=18
+    isoSFP17 = isoSFP16 + _nS;
+    #if nISO>=19
+    isoSFP18 = isoSFP17 + _nS;
+    #if nISO>=20
+    isoSFP19 = isoSFP18 + _nS;
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+
+    ICthreads  = _ICthreads;
+    ECthreads  = _ECthreads;
+    ISOthreads = _ISOthreads;
+
+    // Run SEPARATE THREADS to perform the multiplication
+    pthread_t threads[nTHREADS];
+    int t;
+    for(t=0; t<nTHREADS ; t++)
+        pthread_create( &threads[t], NULL, COMMIT_A__block, (void *) (long int)t );
+    for(t=0; t<nTHREADS ; t++)
+        pthread_join( threads[t], NULL );
+    return;
+}
+
+
+
+/* ===================================================== */
+/* Compute a sub-block of the A'*y MAtRIX-VECTOR product */
+/* ===================================================== */
+void* COMMIT_At__block( void *ptr )
+{
+    int      id = (long)ptr;
+    int      offset;
+    double   x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, w, Y_tmp;
+    double   *x_Ptr0, *x_Ptr1, *x_Ptr2, *x_Ptr3, *x_Ptr4, *x_Ptr5, *x_Ptr6, *x_Ptr7, *x_Ptr8, *x_Ptr9, *x_Ptr10, *x_Ptr11, *x_Ptr12, *x_Ptr13, *x_Ptr14, *x_Ptr15, *x_Ptr16, *x_Ptr17, *x_Ptr18, *x_Ptr19;
+    double   *Yptr, *YptrEnd;
+    float    *SFP0ptr, *SFP1ptr, *SFP2ptr, *SFP3ptr, *SFP4ptr, *SFP5ptr, *SFP6ptr, *SFP7ptr, *SFP8ptr, *SFP9ptr, *SFP10ptr, *SFP11ptr, *SFP12ptr, *SFP13ptr, *SFP14ptr, *SFP15ptr, *SFP16ptr, *SFP17ptr, *SFP18ptr, *SFP19ptr;
+    uint32_t *t_v, *t_vEnd, *t_f;
+    uint16_t *t_o;
+    float    *t_l;
+    uint8_t  *t_t;
+
+#if nIC>=1
+    // intra-cellular compartments
+    t_v    = ICv;
+    t_vEnd = ICv + n;
+    t_o    = ICo;
+    t_l    = ICl;
+    t_f    = ICf;
+    t_t    = ICthreadsT;
+
+    while( t_v != t_vEnd )
+    {
+        // in this case, I need to walk throug because the segments are ordered in "voxel order"
+        if ( *t_t == id )
+        {
+            Yptr    = Y    + nS * (*t_v);
+            YptrEnd = Yptr + nS;
+            offset  = nS * (*t_o);
+
+            Y_tmp = *Yptr;
+            SFP0ptr   = wmrSFP0 + offset;
+            x0 = (*SFP0ptr++) * Y_tmp;
+            #if nIC>=2
+            SFP1ptr   = wmrSFP1 + offset;
+            x1 = (*SFP1ptr++) * Y_tmp;
+            #endif
+            #if nIC>=3
+            SFP2ptr   = wmrSFP2 + offset;
+            x2 = (*SFP2ptr++) * Y_tmp;
+            #endif
+            #if nIC>=4
+            SFP3ptr   = wmrSFP3 + offset;
+            x3 = (*SFP3ptr++) * Y_tmp;
+            #endif
+            #if nIC>=5
+            SFP4ptr   = wmrSFP4 + offset;
+            x4 = (*SFP4ptr++) * Y_tmp;
+            #endif
+            #if nIC>=6
+            SFP5ptr   = wmrSFP5 + offset;
+            x5 = (*SFP5ptr++) * Y_tmp;
+            #endif
+            #if nIC>=7
+            SFP6ptr   = wmrSFP6 + offset;
+            x6 = (*SFP6ptr++) * Y_tmp;
+            #endif
+            #if nIC>=8
+            SFP7ptr   = wmrSFP7 + offset;
+            x7 = (*SFP7ptr++) * Y_tmp;
+            #endif
+            #if nIC>=9
+            SFP8ptr   = wmrSFP8 + offset;
+            x8 = (*SFP8ptr++) * Y_tmp;
+            #endif
+            #if nIC>=10
+            SFP9ptr   = wmrSFP9 + offset;
+            x9 = (*SFP9ptr++) * Y_tmp;
+            #endif
+            #if nIC>=11
+            SFP10ptr   = wmrSFP10 + offset;
+            x10 = (*SFP10ptr++) * Y_tmp;
+            #endif
+            #if nIC>=12
+            SFP11ptr   = wmrSFP11 + offset;
+            x11 = (*SFP11ptr++) * Y_tmp;
+            #endif
+            #if nIC>=13
+            SFP12ptr   = wmrSFP12 + offset;
+            x12 = (*SFP12ptr++) * Y_tmp;
+            #endif
+            #if nIC>=14
+            SFP13ptr   = wmrSFP13 + offset;
+            x13 = (*SFP13ptr++) * Y_tmp;
+            #endif
+            #if nIC>=15
+            SFP14ptr   = wmrSFP14 + offset;
+            x14 = (*SFP14ptr++) * Y_tmp;
+            #endif
+            #if nIC>=16
+            SFP15ptr   = wmrSFP15 + offset;
+            x15 = (*SFP15ptr++) * Y_tmp;
+            #endif
+            #if nIC>=17
+            SFP16ptr   = wmrSFP16 + offset;
+            x16 = (*SFP16ptr++) * Y_tmp;
+            #endif
+            #if nIC>=18
+            SFP17ptr   = wmrSFP17 + offset;
+            x17 = (*SFP17ptr++) * Y_tmp;
+            #endif
+            #if nIC>=19
+            SFP18ptr   = wmrSFP18 + offset;
+            x18 = (*SFP18ptr++) * Y_tmp;
+            #endif
+            #if nIC>=20
+            SFP19ptr   = wmrSFP19 + offset;
+            x19 = (*SFP19ptr++) * Y_tmp;
+            #endif
+
+            while( ++Yptr != YptrEnd )
+            {
+                Y_tmp = *Yptr;
+                x0 += (*SFP0ptr++) * Y_tmp;
+                #if nIC>=2
+                x1 += (*SFP1ptr++) * Y_tmp;
+                #endif
+                #if nIC>=3
+                x2 += (*SFP2ptr++) * Y_tmp;
+                #endif
+                #if nIC>=4
+                x3 += (*SFP3ptr++) * Y_tmp;
+                #endif
+                #if nIC>=5
+                x4 += (*SFP4ptr++) * Y_tmp;
+                #endif
+                #if nIC>=6
+                x5 += (*SFP5ptr++) * Y_tmp;
+                #endif
+                #if nIC>=7
+                x6 += (*SFP6ptr++) * Y_tmp;
+                #endif
+                #if nIC>=8
+                x7 += (*SFP7ptr++) * Y_tmp;
+                #endif
+                #if nIC>=9
+                x8 += (*SFP8ptr++) * Y_tmp;
+                #endif
+                #if nIC>=10
+                x9 += (*SFP9ptr++) * Y_tmp;
+                #endif
+                #if nIC>=11
+                x10 += (*SFP10ptr++) * Y_tmp;
+                #endif
+                #if nIC>=12
+                x11 += (*SFP11ptr++) * Y_tmp;
+                #endif
+                #if nIC>=13
+                x12 += (*SFP12ptr++) * Y_tmp;
+                #endif
+                #if nIC>=14
+                x13 += (*SFP13ptr++) * Y_tmp;
+                #endif
+                #if nIC>=15
+                x14 += (*SFP14ptr++) * Y_tmp;
+                #endif
+                #if nIC>=16
+                x15 += (*SFP15ptr++) * Y_tmp;
+                #endif
+                #if nIC>=17
+                x16 += (*SFP16ptr++) * Y_tmp;
+                #endif
+                #if nIC>=18
+                x17 += (*SFP17ptr++) * Y_tmp;
+                #endif
+                #if nIC>=19
+                x18 += (*SFP18ptr++) * Y_tmp;
+                #endif
+                #if nIC>=20
+                x19 += (*SFP19ptr++) * Y_tmp;
+                #endif
+            }
+
+            w = (double)(*t_l);
+            x[*t_f]      += w * x0;
+            #if nIC>=2
+            x[*t_f+nF]   += w * x1;
+            #endif
+            #if nIC>=3
+            x[*t_f+2*nF] += w * x2;
+            #endif
+            #if nIC>=4
+            x[*t_f+3*nF] += w * x3;
+            #endif
+            #if nIC>=5
+            x[*t_f+4*nF] += w * x4;
+            #endif
+            #if nIC>=6
+            x[*t_f+5*nF] += w * x5;
+            #endif
+            #if nIC>=7
+            x[*t_f+6*nF] += w * x6;
+            #endif
+            #if nIC>=8
+            x[*t_f+7*nF] += w * x7;
+            #endif
+            #if nIC>=9
+            x[*t_f+8*nF] += w * x8;
+            #endif
+            #if nIC>=10
+            x[*t_f+9*nF] += w * x9;
+            #endif
+            #if nIC>=11
+            x[*t_f+10*nF] += w * x10;
+            #endif
+            #if nIC>=12
+            x[*t_f+11*nF] += w * x11;
+            #endif
+            #if nIC>=13
+            x[*t_f+12*nF] += w * x12;
+            #endif
+            #if nIC>=14
+            x[*t_f+13*nF] += w * x13;
+            #endif
+            #if nIC>=15
+            x[*t_f+14*nF] += w * x14;
+            #endif
+            #if nIC>=16
+            x[*t_f+15*nF] += w * x15;
+            #endif
+            #if nIC>=17
+            x[*t_f+16*nF] += w * x16;
+            #endif
+            #if nIC>=18
+            x[*t_f+17*nF] += w * x17;
+            #endif
+            #if nIC>=19
+            x[*t_f+18*nF] += w * x18;
+            #endif
+            #if nIC>=20
+            x[*t_f+19*nF] += w * x19;
+            #endif
+        }
+
+        t_f++;
+        t_v++;
+        t_o++;
+        t_l++;
+        t_t++;
+    }
+#endif
+
+#if nEC>=1
+    // extra-cellular compartments
+    t_v    = ECv + ECthreadsT[id];
+    t_vEnd = ECv + ECthreadsT[id+1];
+    t_o    = ECo + ECthreadsT[id];
+
+    x_Ptr0 = x + nIC*nF + ECthreadsT[id];
+    #if nEC>=2
+    x_Ptr1 = x_Ptr0 + nE;
+    #endif
+    #if nEC>=3
+    x_Ptr2 = x_Ptr1 + nE;
+    #endif
+    #if nEC>=4
+    x_Ptr3 = x_Ptr2 + nE;
+    #endif
+    #if nEC>=5
+    x_Ptr4 = x_Ptr3 + nE;
+    #endif
+    #if nEC>=6
+    x_Ptr5 = x_Ptr4 + nE;
+    #endif
+    #if nEC>=7
+    x_Ptr6 = x_Ptr5 + nE;
+    #endif
+    #if nEC>=8
+    x_Ptr7 = x_Ptr6 + nE;
+    #endif
+    #if nEC>=9
+    x_Ptr8 = x_Ptr7 + nE;
+    #endif
+    #if nEC>=10
+    x_Ptr9 = x_Ptr8 + nE;
+    #endif
+    #if nEC>=11
+    x_Ptr10 = x_Ptr9 + nE;
+    #endif
+    #if nEC>=12
+    x_Ptr11 = x_Ptr10 + nE;
+    #endif
+    #if nEC>=13
+    x_Ptr12 = x_Ptr11 + nE;
+    #endif
+    #if nEC>=14
+    x_Ptr13 = x_Ptr12 + nE;
+    #endif
+    #if nEC>=15
+    x_Ptr14 = x_Ptr13 + nE;
+    #endif
+    #if nEC>=16
+    x_Ptr15 = x_Ptr14 + nE;
+    #endif
+    #if nEC>=17
+    x_Ptr16 = x_Ptr15 + nE;
+    #endif
+    #if nEC>=18
+    x_Ptr17 = x_Ptr16 + nE;
+    #endif
+    #if nEC>=19
+    x_Ptr18 = x_Ptr17 + nE;
+    #endif
+    #if nEC>=20
+    x_Ptr19 = x_Ptr18 + nE;
+    #endif
+
+    while( t_v != t_vEnd )
+    {
+        Yptr    = Y    + nS * (*t_v++);
+        YptrEnd = Yptr + nS;
+        offset  = nS * (*t_o++);
+
+        Y_tmp = *Yptr;
+        SFP0ptr = wmhSFP0 + offset;
+        x0 = (*SFP0ptr++) * Y_tmp;
+        #if nEC>=2
+        SFP1ptr = wmhSFP1 + offset;
+        x1 = (*SFP1ptr++) * Y_tmp;
+        #endif
+        #if nEC>=3
+        SFP2ptr = wmhSFP2 + offset;
+        x2 = (*SFP2ptr++) * Y_tmp;
+        #endif
+        #if nEC>=4
+        SFP3ptr = wmhSFP3 + offset;
+        x3 = (*SFP3ptr++) * Y_tmp;
+        #endif
+        #if nEC>=5
+        SFP4ptr = wmhSFP4 + offset;
+        x4 = (*SFP4ptr++) * Y_tmp;
+        #endif
+        #if nEC>=6
+        SFP5ptr = wmhSFP5 + offset;
+        x5 = (*SFP5ptr++) * Y_tmp;
+        #endif
+        #if nEC>=7
+        SFP6ptr = wmhSFP6 + offset;
+        x6 = (*SFP6ptr++) * Y_tmp;
+        #endif
+        #if nEC>=8
+        SFP7ptr = wmhSFP7 + offset;
+        x7 = (*SFP7ptr++) * Y_tmp;
+        #endif
+        #if nEC>=9
+        SFP8ptr = wmhSFP8 + offset;
+        x8 = (*SFP8ptr++) * Y_tmp;
+        #endif
+        #if nEC>=10
+        SFP9ptr = wmhSFP9 + offset;
+        x9 = (*SFP9ptr++) * Y_tmp;
+        #endif
+        #if nEC>=11
+        SFP10ptr = wmhSFP10 + offset;
+        x10 = (*SFP10ptr++) * Y_tmp;
+        #endif
+        #if nEC>=12
+        SFP11ptr = wmhSFP11 + offset;
+        x11 = (*SFP11ptr++) * Y_tmp;
+        #endif
+        #if nEC>=13
+        SFP12ptr = wmhSFP12 + offset;
+        x12 = (*SFP12ptr++) * Y_tmp;
+        #endif
+        #if nEC>=14
+        SFP13ptr = wmhSFP13 + offset;
+        x13 = (*SFP13ptr++) * Y_tmp;
+        #endif
+        #if nEC>=15
+        SFP14ptr = wmhSFP14 + offset;
+        x14 = (*SFP14ptr++) * Y_tmp;
+        #endif
+        #if nEC>=16
+        SFP15ptr = wmhSFP15 + offset;
+        x15 = (*SFP15ptr++) * Y_tmp;
+        #endif
+        #if nEC>=17
+        SFP16ptr = wmhSFP16 + offset;
+        x16 = (*SFP16ptr++) * Y_tmp;
+        #endif
+        #if nEC>=18
+        SFP17ptr = wmhSFP17 + offset;
+        x17 = (*SFP17ptr++) * Y_tmp;
+        #endif
+        #if nEC>=19
+        SFP18ptr = wmhSFP18 + offset;
+        x18 = (*SFP18ptr++) * Y_tmp;
+        #endif
+        #if nEC>=20
+        SFP19ptr = wmhSFP19 + offset;
+        x19 = (*SFP19ptr++) * Y_tmp;
+        #endif
+
+        while( ++Yptr != YptrEnd )
+        {
+            Y_tmp = *Yptr;
+            x0 += (*SFP0ptr++) * Y_tmp;
+            #if nEC>=2
+            x1 += (*SFP1ptr++) * Y_tmp;
+            #endif
+            #if nEC>=3
+            x2 += (*SFP2ptr++) * Y_tmp;
+            #endif
+            #if nEC>=4
+            x3 += (*SFP3ptr++) * Y_tmp;
+            #endif
+            #if nEC>=5
+            x4 += (*SFP4ptr++) * Y_tmp;
+            #endif
+            #if nEC>=6
+            x5 += (*SFP5ptr++) * Y_tmp;
+            #endif
+            #if nEC>=7
+            x6 += (*SFP6ptr++) * Y_tmp;
+            #endif
+            #if nEC>=8
+            x7 += (*SFP7ptr++) * Y_tmp;
+            #endif
+            #if nEC>=9
+            x8 += (*SFP8ptr++) * Y_tmp;
+            #endif
+            #if nEC>=10
+            x9 += (*SFP9ptr++) * Y_tmp;
+            #endif
+            #if nEC>=11
+            x10 += (*SFP10ptr++) * Y_tmp;
+            #endif
+            #if nEC>=12
+            x11 += (*SFP11ptr++) * Y_tmp;
+            #endif
+            #if nEC>=13
+            x12 += (*SFP12ptr++) * Y_tmp;
+            #endif
+            #if nEC>=14
+            x13 += (*SFP13ptr++) * Y_tmp;
+            #endif
+            #if nEC>=15
+            x14 += (*SFP14ptr++) * Y_tmp;
+            #endif
+            #if nEC>=16
+            x15 += (*SFP15ptr++) * Y_tmp;
+            #endif
+            #if nEC>=17
+            x16 += (*SFP16ptr++) * Y_tmp;
+            #endif
+            #if nEC>=18
+            x17 += (*SFP17ptr++) * Y_tmp;
+            #endif
+            #if nEC>=19
+            x18 += (*SFP18ptr++) * Y_tmp;
+            #endif
+            #if nEC>=20
+            x19 += (*SFP19ptr++) * Y_tmp;
+            #endif
+        }
+        (*x_Ptr0++) += x0;
+        #if nEC>=2
+        (*x_Ptr1++) += x1;
+        #endif
+        #if nEC>=3
+        (*x_Ptr2++) += x2;
+        #endif
+        #if nEC>=4
+        (*x_Ptr3++) += x3;
+        #endif
+        #if nEC>=5
+        (*x_Ptr4++) += x4;
+        #endif
+        #if nEC>=6
+        (*x_Ptr5++) += x5;
+        #endif
+        #if nEC>=7
+        (*x_Ptr6++) += x6;
+        #endif
+        #if nEC>=8
+        (*x_Ptr7++) += x7;
+        #endif
+        #if nEC>=9
+        (*x_Ptr8++) += x8;
+        #endif
+        #if nEC>=10
+        (*x_Ptr9++) += x9;
+        #endif
+        #if nEC>=11
+        (*x_Ptr10++) += x10;
+        #endif
+        #if nEC>=12
+        (*x_Ptr11++) += x11;
+        #endif
+        #if nEC>=13
+        (*x_Ptr12++) += x12;
+        #endif
+        #if nEC>=14
+        (*x_Ptr13++) += x13;
+        #endif
+        #if nEC>=15
+        (*x_Ptr14++) += x14;
+        #endif
+        #if nEC>=16
+        (*x_Ptr15++) += x15;
+        #endif
+        #if nEC>=17
+        (*x_Ptr16++) += x16;
+        #endif
+        #if nEC>=18
+        (*x_Ptr17++) += x17;
+        #endif
+        #if nEC>=19
+        (*x_Ptr18++) += x18;
+        #endif
+        #if nEC>=20
+        (*x_Ptr19++) += x19;
+        #endif
+    }
+#endif
+
+#if nISO>=1
+    // isotropic compartments
+    t_v    = ISOv + ISOthreadsT[id];
+    t_vEnd = ISOv + ISOthreadsT[id+1];
+
+    x_Ptr0 = x + nIC*nF + nEC*nE + ISOthreadsT[id];
+    #if nISO>=2
+    x_Ptr1 = x_Ptr0 + nV;
+    #endif
+    #if nISO>=3
+    x_Ptr2 = x_Ptr1 + nV;
+    #endif
+    #if nISO>=4
+    x_Ptr3 = x_Ptr2 + nV;
+    #endif
+    #if nISO>=5
+    x_Ptr4 = x_Ptr3 + nV;
+    #endif
+    #if nISO>=6
+    x_Ptr5 = x_Ptr4 + nV;
+    #endif
+    #if nISO>=7
+    x_Ptr6 = x_Ptr5 + nV;
+    #endif
+    #if nISO>=8
+    x_Ptr7 = x_Ptr6 + nV;
+    #endif
+    #if nISO>=9
+    x_Ptr8 = x_Ptr7 + nV;
+    #endif
+    #if nISO>=10
+    x_Ptr9 = x_Ptr8 + nV;
+    #endif
+    #if nISO>=11
+    x_Ptr10 = x_Ptr9 + nV;
+    #endif
+    #if nISO>=12
+    x_Ptr11 = x_Ptr10 + nV;
+    #endif
+    #if nISO>=13
+    x_Ptr12 = x_Ptr11 + nV;
+    #endif
+    #if nISO>=14
+    x_Ptr13 = x_Ptr12 + nV;
+    #endif
+    #if nISO>=15
+    x_Ptr14 = x_Ptr13 + nV;
+    #endif
+    #if nISO>=16
+    x_Ptr15 = x_Ptr14 + nV;
+    #endif
+    #if nISO>=17
+    x_Ptr16 = x_Ptr15 + nV;
+    #endif
+    #if nISO>=18
+    x_Ptr17 = x_Ptr16 + nV;
+    #endif
+    #if nISO>=19
+    x_Ptr18 = x_Ptr17 + nV;
+    #endif
+    #if nISO>=20
+    x_Ptr19 = x_Ptr18 + nV;
+    #endif
+
+    while( t_v != t_vEnd )
+    {
+        Yptr    = Y    + nS * (*t_v++);
+        YptrEnd = Yptr + nS;
+
+        SFP0ptr = isoSFP0;
+        #if nISO>=2
+        SFP1ptr = isoSFP1;
+        #endif
+        #if nISO>=3
+        SFP2ptr = isoSFP2;
+        #endif
+        #if nISO>=4
+        SFP3ptr = isoSFP3;
+        #endif
+        #if nISO>=5
+        SFP4ptr = isoSFP4;
+        #endif
+        #if nISO>=6
+        SFP5ptr = isoSFP5;
+        #endif
+        #if nISO>=7
+        SFP6ptr = isoSFP6;
+        #endif
+        #if nISO>=8
+        SFP7ptr = isoSFP7;
+        #endif
+        #if nISO>=9
+        SFP8ptr = isoSFP8;
+        #endif
+        #if nISO>=10
+        SFP9ptr = isoSFP9;
+        #endif
+        #if nISO>=11
+        SFP10ptr = isoSFP10;
+        #endif
+        #if nISO>=12
+        SFP11ptr = isoSFP11;
+        #endif
+        #if nISO>=13
+        SFP12ptr = isoSFP12;
+        #endif
+        #if nISO>=14
+        SFP13ptr = isoSFP13;
+        #endif
+        #if nISO>=15
+        SFP14ptr = isoSFP14;
+        #endif
+        #if nISO>=16
+        SFP15ptr = isoSFP15;
+        #endif
+        #if nISO>=17
+        SFP16ptr = isoSFP16;
+        #endif
+        #if nISO>=18
+        SFP17ptr = isoSFP17;
+        #endif
+        #if nISO>=19
+        SFP18ptr = isoSFP18;
+        #endif
+        #if nISO>=20
+        SFP19ptr = isoSFP19;
+        #endif
+
+        Y_tmp = *Yptr;
+        x0 = (*SFP0ptr++) * Y_tmp;
+        #if nISO>=2
+        x1 = (*SFP1ptr++) * Y_tmp;
+        #endif
+        #if nISO>=3
+        x2 = (*SFP2ptr++) * Y_tmp;
+        #endif
+        #if nISO>=4
+        x3 = (*SFP3ptr++) * Y_tmp;
+        #endif
+        #if nISO>=5
+        x4 = (*SFP4ptr++) * Y_tmp;
+        #endif
+        #if nISO>=6
+        x5 = (*SFP5ptr++) * Y_tmp;
+        #endif
+        #if nISO>=7
+        x6 = (*SFP6ptr++) * Y_tmp;
+        #endif
+        #if nISO>=8
+        x7 = (*SFP7ptr++) * Y_tmp;
+        #endif
+        #if nISO>=9
+        x8 = (*SFP8ptr++) * Y_tmp;
+        #endif
+        #if nISO>=10
+        x9 = (*SFP9ptr++) * Y_tmp;
+        #endif
+        #if nISO>=11
+        x10 = (*SFP10ptr++) * Y_tmp;
+        #endif
+        #if nISO>=12
+        x11 = (*SFP11ptr++) * Y_tmp;
+        #endif
+        #if nISO>=13
+        x12 = (*SFP12ptr++) * Y_tmp;
+        #endif
+        #if nISO>=14
+        x13 = (*SFP13ptr++) * Y_tmp;
+        #endif
+        #if nISO>=15
+        x14 = (*SFP14ptr++) * Y_tmp;
+        #endif
+        #if nISO>=16
+        x15 = (*SFP15ptr++) * Y_tmp;
+        #endif
+        #if nISO>=17
+        x16 = (*SFP16ptr++) * Y_tmp;
+        #endif
+        #if nISO>=18
+        x17 = (*SFP17ptr++) * Y_tmp;
+        #endif
+        #if nISO>=19
+        x18 = (*SFP18ptr++) * Y_tmp;
+        #endif
+        #if nISO>=20
+        x19 = (*SFP19ptr++) * Y_tmp;
+        #endif
+
+        while( ++Yptr != YptrEnd )
+        {
+            Y_tmp = *Yptr;
+            x0  += (*SFP0ptr++) * Y_tmp;
+            #if nISO>=2
+            x1  += (*SFP1ptr++) * Y_tmp;
+            #endif
+            #if nISO>=3
+            x2  += (*SFP2ptr++) * Y_tmp;
+            #endif
+            #if nISO>=4
+            x3  += (*SFP3ptr++) * Y_tmp;
+            #endif
+            #if nISO>=5
+            x4  += (*SFP4ptr++) * Y_tmp;
+            #endif
+            #if nISO>=6
+            x5  += (*SFP5ptr++) * Y_tmp;
+            #endif
+            #if nISO>=7
+            x6  += (*SFP6ptr++) * Y_tmp;
+            #endif
+            #if nISO>=8
+            x7  += (*SFP7ptr++) * Y_tmp;
+            #endif
+            #if nISO>=9
+            x8  += (*SFP8ptr++) * Y_tmp;
+            #endif
+            #if nISO>=10
+            x9  += (*SFP9ptr++) * Y_tmp;
+            #endif
+            #if nISO>=11
+            x10  += (*SFP10ptr++) * Y_tmp;
+            #endif
+            #if nISO>=12
+            x11  += (*SFP11ptr++) * Y_tmp;
+            #endif
+            #if nISO>=13
+            x12  += (*SFP12ptr++) * Y_tmp;
+            #endif
+            #if nISO>=14
+            x13  += (*SFP13ptr++) * Y_tmp;
+            #endif
+            #if nISO>=15
+            x14  += (*SFP14ptr++) * Y_tmp;
+            #endif
+            #if nISO>=16
+            x15  += (*SFP15ptr++) * Y_tmp;
+            #endif
+            #if nISO>=17
+            x16  += (*SFP16ptr++) * Y_tmp;
+            #endif
+            #if nISO>=18
+            x17  += (*SFP17ptr++) * Y_tmp;
+            #endif
+            #if nISO>=19
+            x18  += (*SFP18ptr++) * Y_tmp;
+            #endif
+            #if nISO>=20
+            x19  += (*SFP19ptr++) * Y_tmp;
+            #endif
+        }
+
+        (*x_Ptr0++) += x0;
+        #if nISO>=2
+        (*x_Ptr1++) += x1;
+        #endif
+        #if nISO>=3
+        (*x_Ptr2++) += x2;
+        #endif
+        #if nISO>=4
+        (*x_Ptr3++) += x3;
+        #endif
+        #if nISO>=5
+        (*x_Ptr4++) += x4;
+        #endif
+        #if nISO>=6
+        (*x_Ptr5++) += x5;
+        #endif
+        #if nISO>=7
+        (*x_Ptr6++) += x6;
+        #endif
+        #if nISO>=8
+        (*x_Ptr7++) += x7;
+        #endif
+        #if nISO>=9
+        (*x_Ptr8++) += x8;
+        #endif
+        #if nISO>=10
+        (*x_Ptr9++) += x9;
+        #endif
+        #if nISO>=11
+        (*x_Ptr10++) += x10;
+        #endif
+        #if nISO>=12
+        (*x_Ptr11++) += x11;
+        #endif
+        #if nISO>=13
+        (*x_Ptr12++) += x12;
+        #endif
+        #if nISO>=14
+        (*x_Ptr13++) += x13;
+        #endif
+        #if nISO>=15
+        (*x_Ptr14++) += x14;
+        #endif
+        #if nISO>=16
+        (*x_Ptr15++) += x15;
+        #endif
+        #if nISO>=17
+        (*x_Ptr16++) += x16;
+        #endif
+        #if nISO>=18
+        (*x_Ptr17++) += x17;
+        #endif
+        #if nISO>=19
+        (*x_Ptr18++) += x18;
+        #endif
+        #if nISO>=20
+        (*x_Ptr19++) += x19;
+        #endif
+    }
+#endif
+
+    pthread_exit( 0 );
+}
+
+
+// =========================
+// Function called by CYTHON
+// =========================
+void COMMIT_At(
+    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
+    double *_vIN, double *_vOUT,
+    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
+    uint32_t *_ECv, uint16_t *_ECo,
+    uint32_t *_ISOv,
+    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
+    uint8_t* _ICthreadsT, uint32_t* _ECthreadsT, uint32_t* _ISOthreadsT
+)
+{
+    nF = _nF;
+    n  = _n;
+    nE = _nE;
+    nV = _nV;
+    nS = _nS;
+    ndirs = _ndirs;
+
+    x = _vOUT;
+    Y = _vIN;
+
+    ICf  = _ICf;
+    ICv  = _ICv;
+    ICo  = _ICo;
+    ICl  = _ICl;
+    ECv  = _ECv;
+    ECo  = _ECo;
+    ISOv = _ISOv;
+
+    #if nIC>=1
+    wmrSFP0 = _wmrSFP;
+    #if nIC>=2
+    wmrSFP1 = wmrSFP0 + _ndirs*_nS;
+    #if nIC>=3
+    wmrSFP2 = wmrSFP1 + _ndirs*_nS;
+    #if nIC>=4
+    wmrSFP3 = wmrSFP2 + _ndirs*_nS;
+    #if nIC>=5
+    wmrSFP4 = wmrSFP3 + _ndirs*_nS;
+    #if nIC>=6
+    wmrSFP5 = wmrSFP4 + _ndirs*_nS;
+    #if nIC>=7
+    wmrSFP6 = wmrSFP5 + _ndirs*_nS;
+    #if nIC>=8
+    wmrSFP7 = wmrSFP6 + _ndirs*_nS;
+    #if nIC>=9
+    wmrSFP8 = wmrSFP7 + _ndirs*_nS;
+    #if nIC>=10
+    wmrSFP9 = wmrSFP8 + _ndirs*_nS;
+    #if nIC>=11
+    wmrSFP10 = wmrSFP9 + _ndirs*_nS;
+    #if nIC>=12
+    wmrSFP11 = wmrSFP10 + _ndirs*_nS;
+    #if nIC>=13
+    wmrSFP12 = wmrSFP11 + _ndirs*_nS;
+    #if nIC>=14
+    wmrSFP13 = wmrSFP12 + _ndirs*_nS;
+    #if nIC>=15
+    wmrSFP14 = wmrSFP13 + _ndirs*_nS;
+    #if nIC>=16
+    wmrSFP15 = wmrSFP14 + _ndirs*_nS;
+    #if nIC>=17
+    wmrSFP16 = wmrSFP15 + _ndirs*_nS;
+    #if nIC>=18
+    wmrSFP17 = wmrSFP16 + _ndirs*_nS;
+    #if nIC>=19
+    wmrSFP18 = wmrSFP17 + _ndirs*_nS;
+    #if nIC>=20
+    wmrSFP19 = wmrSFP18 + _ndirs*_nS;
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #if nEC>=1
+    wmhSFP0 = _wmhSFP;
+    #if nEC>=2
+    wmhSFP1 = wmhSFP0 + _ndirs*_nS;
+    #if nEC>=3
+    wmhSFP2 = wmhSFP1 + _ndirs*_nS;
+    #if nEC>=4
+    wmhSFP3 = wmhSFP2 + _ndirs*_nS;
+    #if nEC>=5
+    wmhSFP4 = wmhSFP3 + _ndirs*_nS;
+    #if nEC>=6
+    wmhSFP5 = wmhSFP4 + _ndirs*_nS;
+    #if nEC>=7
+    wmhSFP6 = wmhSFP5 + _ndirs*_nS;
+    #if nEC>=8
+    wmhSFP7 = wmhSFP6 + _ndirs*_nS;
+    #if nEC>=9
+    wmhSFP8 = wmhSFP7 + _ndirs*_nS;
+    #if nEC>=10
+    wmhSFP9 = wmhSFP8 + _ndirs*_nS;
+    #if nEC>=11
+    wmhSFP10 = wmhSFP9 + _ndirs*_nS;
+    #if nEC>=12
+    wmhSFP11 = wmhSFP10 + _ndirs*_nS;
+    #if nEC>=13
+    wmhSFP12 = wmhSFP11 + _ndirs*_nS;
+    #if nEC>=14
+    wmhSFP13 = wmhSFP12 + _ndirs*_nS;
+    #if nEC>=15
+    wmhSFP14 = wmhSFP13 + _ndirs*_nS;
+    #if nEC>=16
+    wmhSFP15 = wmhSFP14 + _ndirs*_nS;
+    #if nEC>=17
+    wmhSFP16 = wmhSFP15 + _ndirs*_nS;
+    #if nEC>=18
+    wmhSFP17 = wmhSFP16 + _ndirs*_nS;
+    #if nEC>=19
+    wmhSFP18 = wmhSFP17 + _ndirs*_nS;
+    #if nEC>=20
+    wmhSFP19 = wmhSFP18 + _ndirs*_nS;
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #if nISO>=1
+    isoSFP0 = _isoSFP;
+    #if nISO>=2
+    isoSFP1 = isoSFP0 + _nS;
+    #if nISO>=3
+    isoSFP2 = isoSFP1 + _nS;
+    #if nISO>=4
+    isoSFP3 = isoSFP2 + _nS;
+    #if nISO>=5
+    isoSFP4 = isoSFP3 + _nS;
+    #if nISO>=6
+    isoSFP5 = isoSFP4 + _nS;
+    #if nISO>=7
+    isoSFP6 = isoSFP5 + _nS;
+    #if nISO>=8
+    isoSFP7 = isoSFP6 + _nS;
+    #if nISO>=9
+    isoSFP8 = isoSFP7 + _nS;
+    #if nISO>=10
+    isoSFP9 = isoSFP8 + _nS;
+    #if nISO>=11
+    isoSFP10 = isoSFP9 + _nS;
+    #if nISO>=12
+    isoSFP11 = isoSFP10 + _nS;
+    #if nISO>=13
+    isoSFP12 = isoSFP11 + _nS;
+    #if nISO>=14
+    isoSFP13 = isoSFP12 + _nS;
+    #if nISO>=15
+    isoSFP14 = isoSFP13 + _nS;
+    #if nISO>=16
+    isoSFP15 = isoSFP14 + _nS;
+    #if nISO>=17
+    isoSFP16 = isoSFP15 + _nS;
+    #if nISO>=18
+    isoSFP17 = isoSFP16 + _nS;
+    #if nISO>=19
+    isoSFP18 = isoSFP17 + _nS;
+    #if nISO>=20
+    isoSFP19 = isoSFP18 + _nS;
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+
+    ICthreadsT  = _ICthreadsT;
+    ECthreadsT  = _ECthreadsT;
+    ISOthreadsT = _ISOthreadsT;
+
+    // Run SEPARATE THREADS to perform the multiplication
+    pthread_t threads[nTHREADS];
+    int t;
+    for(t=0; t<nTHREADS ; t++)
+        pthread_create( &threads[t], NULL, COMMIT_At__block, (void *) (long int)t );
+    for(t=0; t<nTHREADS ; t++)
+        pthread_join( threads[t], NULL );
+    return;
+}
+
+void COMMIT_L(
+    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    double *_vIN, double *_vOUT)
+{
+    /*for(int r = 0; r < _nIC-1; r++){
+        for(int f = 0; f < _nF; f++){
+            _vOUT[_nV*_nS + r] += _regterm*( -_vIN[r*_nF + f] + _vIN[(r+1)*_nF + f] );
+        }
+    }//*/
+}
+
+void COMMIT_Lt(
+    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    double *_vIN, double *_vOUT)
+{
+    /*for(int f = 0; f < _nF; f++){
+        _vOUT[f] = -_vIN[_nV*_nS];
+        _vOUT[_nF*(_nIC-1) + f] = _vIN[_nV*_nS + _nIC-2];
+    }
+
+    for(int r = 0; r < _nIC-2; r++){
+        for(int f = 0; f < _nF; f++){
+            _vOUT[_nF*(r+1) + f] = _vIN[_nV*_nS + r] + _vIN[_nV*_nS + r+1];
+        }
+    }//*/
+}
+
+/*
+void COMMIT_L(
+    int _nF, int _nIC, int _nV, int _nS, double _tikterm,
+    double *_vIN, double *_vOUT)
+{
+    for(int f = 0; f < _nF; f++){
+
+        _vOUT[_nV*_nS] += _tikterm*( -2*_vIN[f] + x[_nF + f] );
+
+        for(int r = 1; r < _nIC-1; r++){
+            _vOUT[_nV*_nS + r] += _tikterm*( _vIN[(r-1)*_nF + f] -2*_vIN[r*_nF + f] + _vIN[(r+1)*_nF + f] );
+        }
+
+        _vOUT[_nV*_nS + _nIC - 1] += _tikterm*( _vIN[(_nIC-2)*_nF + f] - 2*_vIN[(_nIC-1)*_nF + f] );
+    }
+}
+
+void COMMIT_Lt(
+    int _nF, int _nIC, int _nV, int _nS, double _tikterm,
+    double *_vIN, double *_vOUT)
+{
+    for(int f = 0; f < _nF; f++){
+        _vOUT[f] += _tikterm*( -2*_vIN[_nV*_nS] + _vIN[_nV*_nS + 1] );
+
+        for (int r = 0; r < _nIC-1; r++){
+            _vOUT[r*_nF + f] += _tikterm*( _vIN[_nV*_nS + (r-1)] - 2*_vIN[_nV*_nS + r] + _vIN[_nV*_nS + (r+1)] );
+        }
+        
+        _vOUT[(_nIC-1)*_nF + f] += _tikterm*( _vIN[_nV*_nS + (_nIC-2)] - 2*_vIN[_nV*_nS + (_nIC-1)] );
+    }
+}//*/
\ No newline at end of file
diff --git a/commit/solvers.py b/commit/solvers.py
index 741af9ff..e05e7e00 100755
--- a/commit/solvers.py
+++ b/commit/solvers.py
@@ -1,438 +1,438 @@
-"""
-Author: Matteo Frigo - lts5 @ EPFL and Dep. of CS @ Univ. of Verona
-
-This structure is based on the previous work of Rafael Carrillo and was
-supported by the LTS5 laboratory at EPFL, Lausanne.
-"""
-from __future__ import print_function
-import numpy as np
-from math import sqrt
-import sys
-import warnings
-eps = np.finfo(float).eps
-
-from commit.proximals import (non_negativity,
-                             omega_group_sparsity,
-                             prox_group_sparsity,
-                             soft_thresholding,
-                             projection_onto_l2_ball)
-group_sparsity = -1
-non_negative = 0
-norm1 = 1
-norm2 = 2
-norminf = np.inf
-list_regnorms = [group_sparsity, non_negative, norm1, norm2]
-list_group_sparsity_norms = [norm2]#, norminf] # removed because of issue #54
-
-
-def init_regularisation(commit_evaluation,
-                        regnorms = (non_negative, non_negative, non_negative),
-                        structureIC = None, weightsIC = None, group_norm = 2,
-                        lambdas = (.0,.0,.0) ):
-    """
-    Initialise the data structure that defines Omega in
-
-        argmin_x 0.5*||Ax-y||_2^2 + Omega(x)
-
-
-    Input
-    -----
-    commit_evaluation - commit.Evaluation object :
-        dictionary and model have to be loaded beforehand.
-
-
-    regnorms - tuple :
-        this sets the penalty term to be used for each compartment.
-            Default = (non_negative,non_negative,non_negative).
-
-            regnorms[0] corresponds to the Intracellular compartment
-            regnorms[1] corresponds to the Extracellular compartment
-            regnorms[2] corresponds to the Isotropic compartment
-
-            Each regnorms[k] must be one of commit.solvers.
-                                {group_sparsity, non_negative, norm1, norm2}.
-
-            commit.solvers.group_sparsity considers both the non-overlapping
-                and the hierarchical group sparsity (see [1]). This option is
-                allowed only in the IC compartment. The mathematical formulation
-                of this term is
-                $\Omega(x) = \lambda \sum_{g\in G} w_g |x_g|
-
-            commit.solvers.non_negative puts a non negativity constraint on the
-                coefficients corresponding to the compartment. This is the
-                default option for each compartment
-
-            commit.solvers.norm1 penalises with the 1-norm of the coefficients
-                corresponding to the compartment.
-
-            commit.solvers.norm2 penalises with the 2-norm of the coefficients
-                corresponding to the compartment.
-
-
-    structureIC - np.array(list(list)) :
-        group structure for the IC compartment.
-            This field is necessary only if regterm[0]=commit.solver.group_sparsity.
-            Example:
-                structureIC = np.array([[0,2,5],[1,3,4],[0,1,2,3,4,5],[6]])
-
-                that is equivalent to
-                            [0,1,2,3,4,5]        [6]
-                              /       \
-                        [0,2,5]       [1,3,4]
-                which has two non overlapping groups, one of which is the union
-                of two other non-overlapping groups.
-
-
-    weightsIC - np.array(np.float64) :
-        this defines the weights associated to each group of structure IC.
-
-
-    group_norm - number :
-        norm type for the commit.solver.group_sparsity penalisation of the IC compartment.
-            Default: group_norm = commit.solver.norm2
-            To be chosen among commit.solver.{norm2,norminf}.
-
-    lambdas - tuple :
-        regularisation parameter for each compartment.
-            Default: lambdas = (0.0, 0.0, 0.0)
-            The lambdas correspond to the onse described in the mathematical
-            formulation of the regularisation term
-            $\Omega(x) = lambdas[0]*regnorm[0](x) + lambdas[1]*regnorm[1](x) + lambdas[2]*regnorm[2](x)$
-
-
-    References:
-        [1] Jenatton et al. - 'Proximal Methods for Hierarchical Sparse Coding'
-    """
-    regularisation = {}
-
-    regularisation['startIC']  = 0
-    regularisation['sizeIC']   = int( commit_evaluation.DICTIONARY['IC']['nF'] * commit_evaluation.KERNELS['wmr'].shape[0])
-    regularisation['startEC']  = int( regularisation['sizeIC'] )
-    regularisation['sizeEC']   = int( commit_evaluation.DICTIONARY['EC']['nE'] * commit_evaluation.KERNELS['wmh'].shape[0])
-    regularisation['startISO'] = int( regularisation['sizeIC'] + regularisation['sizeEC'] )
-    regularisation['sizeISO']  = int( commit_evaluation.DICTIONARY['nV'] * commit_evaluation.KERNELS['iso'].shape[0])
-
-    regularisation['normIC']  = regnorms[0]
-    regularisation['normEC']  = regnorms[1]
-    regularisation['normISO'] = regnorms[2]
-
-    regularisation['lambdaIC']  = float( lambdas[0] )
-    regularisation['lambdaEC']  = float( lambdas[1] )
-    regularisation['lambdaISO'] = float( lambdas[2] )
-
-    # Solver-specific fields
-    regularisation['structureIC']      = structureIC
-    regularisation['weightsIC']        = weightsIC
-    regularisation['group_norm']       = group_norm
-
-    return regularisation
-
-
-def regularisation2omegaprox(regularisation):
-    lambdaIC  = float(regularisation.get('lambdaIC'))
-    lambdaEC  = float(regularisation.get('lambdaEC'))
-    lambdaISO = float(regularisation.get('lambdaISO'))
-    if lambdaIC < 0.0 or lambdaEC < 0.0 or lambdaISO < 0.0:
-        raise ValueError('Negative regularisation parameters are not allowed')
-
-    normIC  = regularisation.get('normIC')
-    normEC  = regularisation.get('normEC')
-    normISO = regularisation.get('normISO')
-    if not normIC in list_regnorms:
-        raise ValueError('normIC must be one of commit.solvers.{group_sparsity,non_negative,norm1,norm2}')
-    if not normEC in list_regnorms:
-        raise ValueError('normEC must be one of commit.solvers.{group_sparsity,non_negative,norm1,norm2}')
-    if not normISO in list_regnorms:
-        raise ValueError('normISO must be one of commit.solvers.{group_sparsity,non_negative,norm1,norm2}')
-
-    ## NNLS case
-    if (lambdaIC == 0.0 and lambdaEC == 0.0 and lambdaISO == 0.0) or (normIC == non_negative and normEC == non_negative and normISO == non_negative):
-        omega = lambda x: 0.0
-        prox  = lambda x: non_negativity(x, 0, len(x))
-        return omega, prox
-
-    ## All other cases
-    # Intracellular Compartment
-    startIC = regularisation.get('startIC')
-    sizeIC  = regularisation.get('sizeIC')
-    if lambdaIC == 0.0:
-        omegaIC = lambda x: 0.0
-        proxIC  = lambda x: x
-    elif normIC == norm2:
-        omegaIC = lambda x: lambdaIC * np.linalg.norm(x[startIC:sizeIC])
-        proxIC  = lambda x: projection_onto_l2_ball(x, lambdaIC, startIC, sizeIC)
-    elif normIC == norm1:
-        omegaIC = lambda x: lambdaIC * sum( x[startIC:sizeIC] )
-        proxIC  = lambda x: soft_thresholding(x, lambdaIC, startIC, sizeIC)
-    elif normIC == non_negative:
-        omegaIC = lambda x: 0.0
-        proxIC  = lambda x: non_negativity(x, startIC, sizeIC)
-    elif normIC == group_sparsity:
-        structureIC = regularisation.get('structureIC')
-        groupWeightIC   = regularisation.get('weightsIC')
-        if not len(structureIC) == len(groupWeightIC):
-            raise ValueError('Number of groups and weights do not coincide.')
-        group_norm = regularisation.get('group_norm')
-        if not group_norm in list_group_sparsity_norms:
-            raise ValueError('Wrong norm in the structured sparsity term. Choose between %s.' % str(list_group_sparsity_norms))
-
-        # convert to new data structure (needed for faster access)
-        N = np.sum([g.size for g in structureIC])
-        groupIdxIC  = np.zeros( (N,), dtype=np.int32 )
-        groupSizeIC = np.zeros( (structureIC.size,), dtype=np.int32 )
-        pos = 0
-        for i, g in enumerate(structureIC) :
-            groupSizeIC[i] = g.size
-            groupIdxIC[pos:(pos+g.size)] = g[:]
-            pos += g.size
-
-        omegaIC = lambda x: omega_group_sparsity( x, groupIdxIC, groupSizeIC, groupWeightIC, lambdaIC, group_norm )
-        proxIC  = lambda x:  prox_group_sparsity( x, groupIdxIC, groupSizeIC, groupWeightIC, lambdaIC, group_norm )
-    else:
-        raise ValueError('Type of regularisation for IC compartment not recognized.')
-
-
-    # Extracellular Compartment
-    startEC = regularisation.get('startEC')
-    sizeEC  = regularisation.get('sizeEC')
-    if lambdaEC == 0.0:
-        omegaEC = lambda x: 0.0
-        proxEC  = lambda x: x
-    elif normEC == norm2:
-        omegaEC = lambda x: lambdaEC * np.linalg.norm(x[startEC:(startEC+sizeEC)])
-        proxEC  = lambda x: projection_onto_l2_ball(x, lambdaEC, startEC, sizeEC)
-    elif normEC == norm1:
-        omegaEC = lambda x: lambdaEC * sum( x[startEC:(startEC+sizeEC)] )
-        proxEC  = lambda x: soft_thresholding(x, lambdaEC, startEC, sizeEC)
-    elif normEC == non_negative:
-        omegaEC = lambda x: 0.0
-        proxEC  = lambda x: non_negativity(x, startEC, sizeEC)
-    else:
-        raise ValueError('Type of regularisation for EC compartment not recognized.')
-
-    # Isotropic Compartment
-    startISO = regularisation.get('startISO')
-    sizeISO  = regularisation.get('sizeISO')
-    if lambdaISO == 0.0:
-        omegaISO = lambda x: 0.0
-        proxISO  = lambda x: x
-    elif normISO == norm2:
-        omegaISO = lambda x: lambdaISO * np.linalg.norm(x[startISO:(startISO+sizeISO)])
-        proxISO  = lambda x: projection_onto_l2_ball(x, lambdaISO, startISO, sizeISO)
-    elif normISO == norm1:
-        omegaISO = lambda x: lambdaISO * sum( x[startISO:(startISO+sizeISO)] )
-        proxISO  = lambda x: soft_thresholding(x, lambdaISO, startISO, sizeISO)
-    elif normISO == non_negative:
-        omegaISO = lambda x: 0.0
-        proxISO  = lambda x: non_negativity(x, startISO, sizeISO)
-    else:
-        raise ValueError('Type of regularisation for ISO compartment not recognized.')
-
-    omega = lambda x: omegaIC(x) + omegaEC(x) + omegaISO(x)
-    prox = lambda x: non_negativity(proxIC(proxEC(proxISO(x))),0,x.size) # non negativity is redunduntly forced
-
-    return omega, prox
-
-
-def evaluate_model(y, A, x, regularisation = None):
-    if regularisation is None:
-        omega = lambda x: 0.0
-        prox  = lambda x: non_negativity(x, 0, len(x))
-    else:
-        omega, _ = regularisation2omegaprox(regularisation)
-
-    return 0.5*np.linalg.norm(A.dot(x)-y)**2 + omega(x)
-
-
-def solve(y, A, At, tol_fun = 1e-4, tol_x = 1e-6, max_iter = 1000, verbose = 1, x0 = None, regularisation = None):
-    """
-    Solve the regularised least squares problem
-
-        argmin_x 0.5*||Ax-y||_2^2 + Omega(x)
-
-    with the Omega described by 'regularisation'.
-
-    Check the documentation of commit.solvers.init_regularisation to see how to
-    solve a specific problem.
-    """
-    if regularisation is None:
-        omega = lambda x: 0.0
-        prox  = lambda x: non_negativity(x, 0, x.size)
-    else:
-        omega, prox = regularisation2omegaprox(regularisation)
-
-    if x0 is None:
-        x0 = np.zeros(A.shape[1])
-
-    return fista( y, A, At, tol_fun, tol_x, max_iter, verbose, x0, omega, prox)
-
-
-def fista( y, A, At, tol_fun, tol_x, max_iter, verbose, x0, omega, proximal) :
-    """
-    Solve the regularised least squares problem
-
-        argmin_x 0.5*||Ax-y||_2^2 + Omega(x)
-
-    with the FISTA algorithm described in [1].
-
-    The penalty term and its proximal operator must be defined in such a way
-    that they already contain the regularisation parameter.
-
-    References:
-        [1] Beck & Teboulle - `A Fast Iterative Shrinkage Thresholding
-            Algorithm for Linear Inverse Problems`
-    """
-
-    # Initialization
-    res = -y.copy()
-    xhat = x0.copy()
-    x = np.zeros_like(xhat)
-    checkval = np.sum(A.dot(xhat))
-    if np.isnan(checkval):
-        print('----------------------------------- Te la pelas 1 Ax -----------------------------------')
-        print(A.dot(xhat))
-        print()
-    res += A.dot(xhat)
-    proximal( xhat )
-    reg_term = omega( xhat )
-    prev_obj = 0.5 * np.linalg.norm(res)**2 + reg_term
-
-    told = 1
-    beta = 0.9
-    prev_x = xhat.copy()
-    checkval = np.sum(np.asarray(At.dot(res)))
-    if np.isnan(checkval):
-        print('----------------------------------- Te la pelas 2 A\'y -----------------------------------')
-        print(np.asarray(At.dot(res)))
-        print()
-    grad = np.asarray(At.dot(res))
-    qfval = prev_obj
-
-    # Step size computation
-    checkval = np.sum(A.dot(grad))
-    if np.isnan(checkval):
-        print('----------------------------------- Te la pelas 3 Ax -----------------------------------')
-        print(A.dot(grad))
-        print()
-    L = ( np.linalg.norm( A.dot(grad) ) / np.linalg.norm(grad) )**2
-    mu = 1.9 / L
-
-    # Main loop
-    if verbose >= 1 :
-        print()
-        print( "      |  1/2||Ax-y||^2      Omega      |  Cost function    Abs error      Rel error    |      Abs x          Rel x    " )
-        print( "------|--------------------------------|-----------------------------------------------|------------------------------" )
-    iter = 1
-    while True :
-        if verbose >= 1 :
-            print( "%4d  |" % iter, end="" )
-            sys.stdout.flush()
-
-        # Smooth step
-        x = xhat - mu*grad
-
-        # Non-smooth step
-        proximal( x )
-        reg_term_x = omega( x )
-
-        # Check stepsize
-        tmp = x-xhat
-        q = qfval + np.real( np.dot(tmp,grad) ) + 0.5/mu * np.linalg.norm(tmp)**2 + reg_term_x
-        checkval = np.sum(A.dot(x) - y)
-        if np.isnan(checkval):
-            print('----------------------------------- Te la pelas 4 Ax -----------------------------------')
-            print(A.dot(x) - y)
-            print()
-        res = A.dot(x) - y
-        res_norm = np.linalg.norm(res)
-        curr_obj = 0.5 * res_norm**2 + reg_term_x
-
-        # Backtracking
-        while curr_obj > q :
-            # Smooth step
-            mu = beta*mu
-            x = xhat - mu*grad
-
-            # Non-smooth step
-            proximal( x )
-            reg_term_x = omega( x )
-
-            # Check stepsize
-            tmp = x-xhat
-            q = qfval + np.real( np.dot(tmp,grad) ) + 0.5/mu * np.linalg.norm(tmp)**2 + reg_term_x
-            checkval = np.sum(A.dot(x) - y)
-            if np.isnan(checkval):
-                print('----------------------------------- Te la pelas 5 Ax -----------------------------------')
-                print(A.dot(x) - y)
-                print()
-            res = A.dot(x) - y
-            res_norm = np.linalg.norm(res)
-            curr_obj = 0.5 * res_norm**2 + reg_term_x
-
-        # Global stopping criterion
-        abs_obj = abs(curr_obj - prev_obj)
-        rel_obj = abs_obj / curr_obj
-        abs_x   = np.linalg.norm(x - prev_x)
-        rel_x   = abs_x / ( np.linalg.norm(x) + eps )
-        if verbose >= 1 :
-            print( "  %13.7e  %13.7e  |  %13.7e  %13.7e  %13.7e  |  %13.7e  %13.7e" % ( 0.5 * res_norm**2, reg_term_x, curr_obj, abs_obj, rel_obj, abs_x, rel_x ) )
-
-        if abs_obj < eps :
-            criterion = "Absolute tolerance on the objective"
-            break
-        elif rel_obj < tol_fun :
-            criterion = "Relative tolerance on the objective"
-            break
-        elif abs_x < eps :
-            criterion = "Absolute tolerance on the unknown"
-            break
-        elif rel_x < tol_x :
-            criterion = "Relative tolerance on the unknown"
-            break
-        elif iter >= max_iter :
-            criterion = "Maximum number of iterations"
-            break
-
-        # FISTA update
-        t = 0.5 * ( 1 + sqrt(1+4*told**2) )
-        xhat = x + (told-1)/t * (x - prev_x)
-
-        # Gradient computation
-        checkval = np.sum(A.dot(xhat) - y)
-        if np.isnan(checkval):
-            print('----------------------------------- Te la pelas 6 Ax -----------------------------------')
-            print(A.dot(xhat) - y)
-            print()
-        res = A.dot(xhat) - y
-        xarr = np.asarray(x)
-
-        checkval = np.sum(np.asarray(At.dot(res)))
-        if np.isnan(checkval):
-            print('----------------------------------- Te la pelas 7 A\'y -----------------------------------')
-            print(np.asarray(At.dot(res)))
-            print()
-        grad = np.asarray(At.dot(res))
-
-        # Update variables
-        iter += 1
-        prev_obj = curr_obj
-        prev_x = x.copy()
-        told = t
-        qfval = 0.5 * np.linalg.norm(res)**2
-
-
-    if verbose >= 1 :
-        print( "< Stopping criterion: %s >" % criterion )
-
-    opt_details = {}
-    opt_details['residual'] = 0.5*res_norm**2
-    opt_details['regterm'] = reg_term_x
-    opt_details['cost_function'] = curr_obj
-    opt_details['abs_cost'] = abs_obj
-    opt_details['rel_cost'] = rel_obj
-    opt_details['abs_x'] = abs_x
-    opt_details['rel _x'] = rel_x
-    opt_details['iterations'] = iter
-    opt_details['stopping_criterion'] = criterion
-
-    return x, opt_details
+"""
+Author: Matteo Frigo - lts5 @ EPFL and Dep. of CS @ Univ. of Verona
+
+This structure is based on the previous work of Rafael Carrillo and was
+supported by the LTS5 laboratory at EPFL, Lausanne.
+"""
+from __future__ import print_function
+import numpy as np
+from math import sqrt
+import sys
+import warnings
+eps = np.finfo(float).eps
+
+from commit.proximals import (non_negativity,
+                             omega_group_sparsity,
+                             prox_group_sparsity,
+                             soft_thresholding,
+                             projection_onto_l2_ball)
+group_sparsity = -1
+non_negative = 0
+norm1 = 1
+norm2 = 2
+norminf = np.inf
+list_regnorms = [group_sparsity, non_negative, norm1, norm2]
+list_group_sparsity_norms = [norm2]#, norminf] # removed because of issue #54
+
+
+def init_regularisation(commit_evaluation,
+                        regnorms = (non_negative, non_negative, non_negative),
+                        structureIC = None, weightsIC = None, group_norm = 2,
+                        lambdas = (.0,.0,.0) ):
+    """
+    Initialise the data structure that defines Omega in
+
+        argmin_x 0.5*||Ax-y||_2^2 + Omega(x)
+
+
+    Input
+    -----
+    commit_evaluation - commit.Evaluation object :
+        dictionary and model have to be loaded beforehand.
+
+
+    regnorms - tuple :
+        this sets the penalty term to be used for each compartment.
+            Default = (non_negative,non_negative,non_negative).
+
+            regnorms[0] corresponds to the Intracellular compartment
+            regnorms[1] corresponds to the Extracellular compartment
+            regnorms[2] corresponds to the Isotropic compartment
+
+            Each regnorms[k] must be one of commit.solvers.
+                                {group_sparsity, non_negative, norm1, norm2}.
+
+            commit.solvers.group_sparsity considers both the non-overlapping
+                and the hierarchical group sparsity (see [1]). This option is
+                allowed only in the IC compartment. The mathematical formulation
+                of this term is
+                $\Omega(x) = \lambda \sum_{g\in G} w_g |x_g|
+
+            commit.solvers.non_negative puts a non negativity constraint on the
+                coefficients corresponding to the compartment. This is the
+                default option for each compartment
+
+            commit.solvers.norm1 penalises with the 1-norm of the coefficients
+                corresponding to the compartment.
+
+            commit.solvers.norm2 penalises with the 2-norm of the coefficients
+                corresponding to the compartment.
+
+
+    structureIC - np.array(list(list)) :
+        group structure for the IC compartment.
+            This field is necessary only if regterm[0]=commit.solver.group_sparsity.
+            Example:
+                structureIC = np.array([[0,2,5],[1,3,4],[0,1,2,3,4,5],[6]])
+
+                that is equivalent to
+                            [0,1,2,3,4,5]        [6]
+                              /       \
+                        [0,2,5]       [1,3,4]
+                which has two non overlapping groups, one of which is the union
+                of two other non-overlapping groups.
+
+
+    weightsIC - np.array(np.float64) :
+        this defines the weights associated to each group of structure IC.
+
+
+    group_norm - number :
+        norm type for the commit.solver.group_sparsity penalisation of the IC compartment.
+            Default: group_norm = commit.solver.norm2
+            To be chosen among commit.solver.{norm2,norminf}.
+
+    lambdas - tuple :
+        regularisation parameter for each compartment.
+            Default: lambdas = (0.0, 0.0, 0.0)
+            The lambdas correspond to the onse described in the mathematical
+            formulation of the regularisation term
+            $\Omega(x) = lambdas[0]*regnorm[0](x) + lambdas[1]*regnorm[1](x) + lambdas[2]*regnorm[2](x)$
+
+
+    References:
+        [1] Jenatton et al. - 'Proximal Methods for Hierarchical Sparse Coding'
+    """
+    regularisation = {}
+
+    regularisation['startIC']  = 0
+    regularisation['sizeIC']   = int( commit_evaluation.DICTIONARY['IC']['nF'] * commit_evaluation.KERNELS['wmr'].shape[0])
+    regularisation['startEC']  = int( regularisation['sizeIC'] )
+    regularisation['sizeEC']   = int( commit_evaluation.DICTIONARY['EC']['nE'] * commit_evaluation.KERNELS['wmh'].shape[0])
+    regularisation['startISO'] = int( regularisation['sizeIC'] + regularisation['sizeEC'] )
+    regularisation['sizeISO']  = int( commit_evaluation.DICTIONARY['nV'] * commit_evaluation.KERNELS['iso'].shape[0])
+
+    regularisation['normIC']  = regnorms[0]
+    regularisation['normEC']  = regnorms[1]
+    regularisation['normISO'] = regnorms[2]
+
+    regularisation['lambdaIC']  = float( lambdas[0] )
+    regularisation['lambdaEC']  = float( lambdas[1] )
+    regularisation['lambdaISO'] = float( lambdas[2] )
+
+    # Solver-specific fields
+    regularisation['structureIC']      = structureIC
+    regularisation['weightsIC']        = weightsIC
+    regularisation['group_norm']       = group_norm
+
+    return regularisation
+
+
+def regularisation2omegaprox(regularisation):
+    lambdaIC  = float(regularisation.get('lambdaIC'))
+    lambdaEC  = float(regularisation.get('lambdaEC'))
+    lambdaISO = float(regularisation.get('lambdaISO'))
+    if lambdaIC < 0.0 or lambdaEC < 0.0 or lambdaISO < 0.0:
+        raise ValueError('Negative regularisation parameters are not allowed')
+
+    normIC  = regularisation.get('normIC')
+    normEC  = regularisation.get('normEC')
+    normISO = regularisation.get('normISO')
+    if not normIC in list_regnorms:
+        raise ValueError('normIC must be one of commit.solvers.{group_sparsity,non_negative,norm1,norm2}')
+    if not normEC in list_regnorms:
+        raise ValueError('normEC must be one of commit.solvers.{group_sparsity,non_negative,norm1,norm2}')
+    if not normISO in list_regnorms:
+        raise ValueError('normISO must be one of commit.solvers.{group_sparsity,non_negative,norm1,norm2}')
+
+    ## NNLS case
+    if (lambdaIC == 0.0 and lambdaEC == 0.0 and lambdaISO == 0.0) or (normIC == non_negative and normEC == non_negative and normISO == non_negative):
+        omega = lambda x: 0.0
+        prox  = lambda x: non_negativity(x, 0, len(x))
+        return omega, prox
+
+    ## All other cases
+    # Intracellular Compartment
+    startIC = regularisation.get('startIC')
+    sizeIC  = regularisation.get('sizeIC')
+    if lambdaIC == 0.0:
+        omegaIC = lambda x: 0.0
+        proxIC  = lambda x: x
+    elif normIC == norm2:
+        omegaIC = lambda x: lambdaIC * np.linalg.norm(x[startIC:sizeIC])
+        proxIC  = lambda x: projection_onto_l2_ball(x, lambdaIC, startIC, sizeIC)
+    elif normIC == norm1:
+        omegaIC = lambda x: lambdaIC * sum( x[startIC:sizeIC] )
+        proxIC  = lambda x: soft_thresholding(x, lambdaIC, startIC, sizeIC)
+    elif normIC == non_negative:
+        omegaIC = lambda x: 0.0
+        proxIC  = lambda x: non_negativity(x, startIC, sizeIC)
+    elif normIC == group_sparsity:
+        structureIC = regularisation.get('structureIC')
+        groupWeightIC   = regularisation.get('weightsIC')
+        if not len(structureIC) == len(groupWeightIC):
+            raise ValueError('Number of groups and weights do not coincide.')
+        group_norm = regularisation.get('group_norm')
+        if not group_norm in list_group_sparsity_norms:
+            raise ValueError('Wrong norm in the structured sparsity term. Choose between %s.' % str(list_group_sparsity_norms))
+
+        # convert to new data structure (needed for faster access)
+        N = np.sum([g.size for g in structureIC])
+        groupIdxIC  = np.zeros( (N,), dtype=np.int32 )
+        groupSizeIC = np.zeros( (structureIC.size,), dtype=np.int32 )
+        pos = 0
+        for i, g in enumerate(structureIC) :
+            groupSizeIC[i] = g.size
+            groupIdxIC[pos:(pos+g.size)] = g[:]
+            pos += g.size
+
+        omegaIC = lambda x: omega_group_sparsity( x, groupIdxIC, groupSizeIC, groupWeightIC, lambdaIC, group_norm )
+        proxIC  = lambda x:  prox_group_sparsity( x, groupIdxIC, groupSizeIC, groupWeightIC, lambdaIC, group_norm )
+    else:
+        raise ValueError('Type of regularisation for IC compartment not recognized.')
+
+
+    # Extracellular Compartment
+    startEC = regularisation.get('startEC')
+    sizeEC  = regularisation.get('sizeEC')
+    if lambdaEC == 0.0:
+        omegaEC = lambda x: 0.0
+        proxEC  = lambda x: x
+    elif normEC == norm2:
+        omegaEC = lambda x: lambdaEC * np.linalg.norm(x[startEC:(startEC+sizeEC)])
+        proxEC  = lambda x: projection_onto_l2_ball(x, lambdaEC, startEC, sizeEC)
+    elif normEC == norm1:
+        omegaEC = lambda x: lambdaEC * sum( x[startEC:(startEC+sizeEC)] )
+        proxEC  = lambda x: soft_thresholding(x, lambdaEC, startEC, sizeEC)
+    elif normEC == non_negative:
+        omegaEC = lambda x: 0.0
+        proxEC  = lambda x: non_negativity(x, startEC, sizeEC)
+    else:
+        raise ValueError('Type of regularisation for EC compartment not recognized.')
+
+    # Isotropic Compartment
+    startISO = regularisation.get('startISO')
+    sizeISO  = regularisation.get('sizeISO')
+    if lambdaISO == 0.0:
+        omegaISO = lambda x: 0.0
+        proxISO  = lambda x: x
+    elif normISO == norm2:
+        omegaISO = lambda x: lambdaISO * np.linalg.norm(x[startISO:(startISO+sizeISO)])
+        proxISO  = lambda x: projection_onto_l2_ball(x, lambdaISO, startISO, sizeISO)
+    elif normISO == norm1:
+        omegaISO = lambda x: lambdaISO * sum( x[startISO:(startISO+sizeISO)] )
+        proxISO  = lambda x: soft_thresholding(x, lambdaISO, startISO, sizeISO)
+    elif normISO == non_negative:
+        omegaISO = lambda x: 0.0
+        proxISO  = lambda x: non_negativity(x, startISO, sizeISO)
+    else:
+        raise ValueError('Type of regularisation for ISO compartment not recognized.')
+
+    omega = lambda x: omegaIC(x) + omegaEC(x) + omegaISO(x)
+    prox = lambda x: non_negativity(proxIC(proxEC(proxISO(x))),0,x.size) # non negativity is redunduntly forced
+
+    return omega, prox
+
+
+def evaluate_model(y, A, x, regularisation = None):
+    if regularisation is None:
+        omega = lambda x: 0.0
+        prox  = lambda x: non_negativity(x, 0, len(x))
+    else:
+        omega, _ = regularisation2omegaprox(regularisation)
+
+    return 0.5*np.linalg.norm(A.dot(x)-y)**2 + omega(x)
+
+
+def solve(y, A, At, tol_fun = 1e-4, tol_x = 1e-6, max_iter = 1000, verbose = 1, x0 = None, regularisation = None):
+    """
+    Solve the regularised least squares problem
+
+        argmin_x 0.5*||Ax-y||_2^2 + Omega(x)
+
+    with the Omega described by 'regularisation'.
+
+    Check the documentation of commit.solvers.init_regularisation to see how to
+    solve a specific problem.
+    """
+    if regularisation is None:
+        omega = lambda x: 0.0
+        prox  = lambda x: non_negativity(x, 0, x.size)
+    else:
+        omega, prox = regularisation2omegaprox(regularisation)
+
+    if x0 is None:
+        x0 = np.zeros(A.shape[1])
+
+    return fista( y, A, At, tol_fun, tol_x, max_iter, verbose, x0, omega, prox)
+
+
+def fista( y, A, At, tol_fun, tol_x, max_iter, verbose, x0, omega, proximal) :
+    """
+    Solve the regularised least squares problem
+
+        argmin_x 0.5*||Ax-y||_2^2 + Omega(x)
+
+    with the FISTA algorithm described in [1].
+
+    The penalty term and its proximal operator must be defined in such a way
+    that they already contain the regularisation parameter.
+
+    References:
+        [1] Beck & Teboulle - `A Fast Iterative Shrinkage Thresholding
+            Algorithm for Linear Inverse Problems`
+    """
+
+    # Initialization
+    res = -y.copy()
+    xhat = x0.copy()
+    x = np.zeros_like(xhat)
+    checkval = np.sum(A.dot(xhat))
+    if np.isnan(checkval):
+        print('----------------------------------- Te la pelas 1 Ax -----------------------------------')
+        print(A.dot(xhat))
+        print()
+    res += A.dot(xhat)
+    proximal( xhat )
+    reg_term = omega( xhat )
+    prev_obj = 0.5 * np.linalg.norm(res)**2 + reg_term
+
+    told = 1
+    beta = 0.9
+    prev_x = xhat.copy()
+    checkval = np.sum(np.asarray(At.dot(res)))
+    if np.isnan(checkval):
+        print('----------------------------------- Te la pelas 2 A\'y -----------------------------------')
+        print(np.asarray(At.dot(res)))
+        print()
+    grad = np.asarray(At.dot(res))
+    qfval = prev_obj
+
+    # Step size computation
+    checkval = np.sum(A.dot(grad))
+    if np.isnan(checkval):
+        print('----------------------------------- Te la pelas 3 Ax -----------------------------------')
+        print(A.dot(grad))
+        print()
+    L = ( np.linalg.norm( A.dot(grad) ) / np.linalg.norm(grad) )**2
+    mu = 1.9 / L
+
+    # Main loop
+    if verbose >= 1 :
+        print()
+        print( "      |  1/2||Ax-y||^2      Omega      |  Cost function    Abs error      Rel error    |      Abs x          Rel x    " )
+        print( "------|--------------------------------|-----------------------------------------------|------------------------------" )
+    iter = 1
+    while True :
+        if verbose >= 1 :
+            print( "%4d  |" % iter, end="" )
+            sys.stdout.flush()
+
+        # Smooth step
+        x = xhat - mu*grad
+
+        # Non-smooth step
+        proximal( x )
+        reg_term_x = omega( x )
+
+        # Check stepsize
+        tmp = x-xhat
+        q = qfval + np.real( np.dot(tmp,grad) ) + 0.5/mu * np.linalg.norm(tmp)**2 + reg_term_x
+        checkval = np.sum(A.dot(x) - y)
+        if np.isnan(checkval):
+            print('----------------------------------- Te la pelas 4 Ax -----------------------------------')
+            print(A.dot(x) - y)
+            print()
+        res = A.dot(x) - y
+        res_norm = np.linalg.norm(res)
+        curr_obj = 0.5 * res_norm**2 + reg_term_x
+
+        # Backtracking
+        while curr_obj > q :
+            # Smooth step
+            mu = beta*mu
+            x = xhat - mu*grad
+
+            # Non-smooth step
+            proximal( x )
+            reg_term_x = omega( x )
+
+            # Check stepsize
+            tmp = x-xhat
+            q = qfval + np.real( np.dot(tmp,grad) ) + 0.5/mu * np.linalg.norm(tmp)**2 + reg_term_x
+            checkval = np.sum(A.dot(x) - y)
+            if np.isnan(checkval):
+                print('----------------------------------- Te la pelas 5 Ax -----------------------------------')
+                print(A.dot(x) - y)
+                print()
+            res = A.dot(x) - y
+            res_norm = np.linalg.norm(res)
+            curr_obj = 0.5 * res_norm**2 + reg_term_x
+
+        # Global stopping criterion
+        abs_obj = abs(curr_obj - prev_obj)
+        rel_obj = abs_obj / curr_obj
+        abs_x   = np.linalg.norm(x - prev_x)
+        rel_x   = abs_x / ( np.linalg.norm(x) + eps )
+        if verbose >= 1 :
+            print( "  %13.7e  %13.7e  |  %13.7e  %13.7e  %13.7e  |  %13.7e  %13.7e" % ( 0.5 * res_norm**2, reg_term_x, curr_obj, abs_obj, rel_obj, abs_x, rel_x ) )
+
+        if abs_obj < eps :
+            criterion = "Absolute tolerance on the objective"
+            break
+        elif rel_obj < tol_fun :
+            criterion = "Relative tolerance on the objective"
+            break
+        elif abs_x < eps :
+            criterion = "Absolute tolerance on the unknown"
+            break
+        elif rel_x < tol_x :
+            criterion = "Relative tolerance on the unknown"
+            break
+        elif iter >= max_iter :
+            criterion = "Maximum number of iterations"
+            break
+
+        # FISTA update
+        t = 0.5 * ( 1 + sqrt(1+4*told**2) )
+        xhat = x + (told-1)/t * (x - prev_x)
+
+        # Gradient computation
+        checkval = np.sum(A.dot(xhat) - y)
+        if np.isnan(checkval):
+            print('----------------------------------- Te la pelas 6 Ax -----------------------------------')
+            print(A.dot(xhat) - y)
+            print()
+        res = A.dot(xhat) - y
+        xarr = np.asarray(x)
+
+        checkval = np.sum(np.asarray(At.dot(res)))
+        if np.isnan(checkval):
+            print('----------------------------------- Te la pelas 7 A\'y -----------------------------------')
+            print(np.asarray(At.dot(res)))
+            print()
+        grad = np.asarray(At.dot(res))
+
+        # Update variables
+        iter += 1
+        prev_obj = curr_obj
+        prev_x = x.copy()
+        told = t
+        qfval = 0.5 * np.linalg.norm(res)**2
+
+
+    if verbose >= 1 :
+        print( "< Stopping criterion: %s >" % criterion )
+
+    opt_details = {}
+    opt_details['residual'] = 0.5*res_norm**2
+    opt_details['regterm'] = reg_term_x
+    opt_details['cost_function'] = curr_obj
+    opt_details['abs_cost'] = abs_obj
+    opt_details['rel_cost'] = rel_obj
+    opt_details['abs_x'] = abs_x
+    opt_details['rel _x'] = rel_x
+    opt_details['iterations'] = iter
+    opt_details['stopping_criterion'] = criterion
+
+    return x, opt_details
diff --git a/commit/trk2dictionary/trk2dictionary.pyx b/commit/trk2dictionary/trk2dictionary.pyx
index d19cbf78..2ab372b7 100755
--- a/commit/trk2dictionary/trk2dictionary.pyx
+++ b/commit/trk2dictionary/trk2dictionary.pyx
@@ -1,446 +1,446 @@
-#!python
-# cython: language_level=3, c_string_type=str, c_string_encoding=ascii, boundscheck=False, wraparound=False, profile=False
-from __future__ import print_function
-import cython
-import numpy as np
-cimport numpy as np
-import nibabel
-from os.path import join, exists, splitext
-from os import makedirs, remove
-import time
-import amico
-import pickle
-from amico.util import LOG, NOTE, WARNING, ERROR
-
-
-# Interface to actual C code
-cdef extern from "trk2dictionary_c.cpp":
-    int trk2dictionary(
-        char* filename_tractogram, int data_offset, int Nx, int Ny, int Nz, float Px, float Py, float Pz, int n_count, int n_scalars, 
-        int n_properties, float fiber_shiftX, float fiber_shiftY, float fiber_shiftZ, int points_to_skip, float min_seg_len,
-        float* ptrPEAKS, int Np, float vf_THR, int ECix, int ECiy, int ECiz,
-        float* _ptrMASK, float* ptrTDI, char* path_out, int c, double* ptrPeaksAffine,
-        int nBlurRadii, double blurSigma, double* ptrBlurRadii, int* ptrBlurSamples, double* ptrBlurWeights,  float* ptrTractsAffine, unsigned short ndirs, short* prtHashTable
-    ) nogil
-
-
-cpdef run( filename_tractogram = None, path_out = None, filename_peaks = None, filename_mask = None, do_intersect = True,
-    fiber_shift = 0, points_to_skip = 0, vf_THR = 0.1, peaks_use_affine = False,
-    flip_peaks = [False,False,False], min_seg_len = 1e-3, gen_trk = True,
-    blur_radii = [], blur_samples = [], blur_sigma = 1.0, filename_trk = None, TCK_ref_image = None, ndirs = 32761
-    ):
-    """Perform the conversion of a tractoram to the sparse data-structure internally
-    used by COMMIT to perform the matrix-vector multiplications with the operator A
-    during the inversion of the linear system.
-
-    Parameters
-    ----------
-    filename_tractogram : string
-        Path to the .trk or .tck file containing the tractogram to load.
-        
-    filename_trk : string
-        DEPRECATED. Use filename_tractogram instead.
-
-    path_out : string
-        Path to the folder where to store the sparse data structure.
-
-    filename_peaks : string
-        Path to the NIFTI file containing the peaks to use as extra-cellular contributions.
-        The data matrix should be 4D with last dimension 3*N, where N is the number
-        of peaks in each voxel. (default : no extra-cellular contributions)
-
-    filename_mask : string
-        Path to a binary mask to restrict the analysis to specific areas. Segments
-        outside this mask are discarded. If not specified (default), the mask is created from
-        all voxels intersected by the tracts.
-
-    do_intersect : boolean
-        If True then fiber segments that intersect voxel boundaries are splitted (default).
-        If False then the centroid of the segment is used as its voxel position.
-
-    fiber_shift : float or list of three float
-        If necessary, apply a translation to fiber coordinates (default : 0) to account
-        for differences between the reference system of the tracking algorithm and COMMIT.
-        The value is specified in voxel units, eg 0.5 translates by half voxel.
-        Do noth use if you are using fiber_shiftX or fiber_shiftY or fiber_shiftZ.
-
-    points_to_skip : integer
-        If necessary, discard first points at beginning/end of a fiber (default : 0).
-
-    vf_THR : float
-        Discard peaks smaller than vf_THR * max peak (default : 0.1).
-
-    peaks_use_affine : boolean
-        Whether to rotate the peaks according to the affine matrix (default : False).
-
-    flip_peaks : list of three boolean
-        If necessary, flips peak orientations along each axis (default : no flipping).
-
-    min_seg_len : float
-        Discard segments <= than this length in mm (default : 1e-3)
-
-    gen_trk : boolean
-        If True then generate a .trk file in the 'path_out' containing the fibers used in the dictionary (default : True)
-    
-    blur_radii : list of float
-        Translate each segment to given radii to assign a broader fiber contribution (default : [])
-    
-    blur_samples : list of integer
-        Segments are duplicated along a circle at a given radius; this parameter controls the number of samples to take over a given circle (defaut : [])
-
-    blur_sigma: float
-        The contributions of the segments at different radii are damped as a Gaussian (default : 1.0)    
-    
-    TCK_ref_image: string
-        Path to the NIFTI file containing the information about the geometry used for the tractogram .tck to load. 
-        If it is not specified, it will try to use the information of filename_peaks or filename_mask.
-    
-    ndirs : int
-            Number of directions on the half of the sphere
-    """
-
-    filename = path_out + '/dictionary_info.pickle'
-    dictionary_info = {}
-    dictionary_info['filename_trk'] = filename_trk
-    dictionary_info['path_out'] = path_out
-    dictionary_info['filename_peaks'] = filename_peaks
-    dictionary_info['filename_mask'] = filename_mask
-    dictionary_info['do_intersect'] = do_intersect
-    dictionary_info['fiber_shift'] = fiber_shift
-    dictionary_info['points_to_skip'] = points_to_skip
-    dictionary_info['vf_THR'] = vf_THR
-    dictionary_info['peaks_use_affine'] = peaks_use_affine
-    dictionary_info['flip_peaks'] = flip_peaks
-    dictionary_info['min_seg_len'] = min_seg_len
-    dictionary_info['gen_trk'] = gen_trk
-    dictionary_info['blur_radii'] = blur_radii
-    dictionary_info['blur_samples'] = blur_samples
-    dictionary_info['blur_sigma'] = blur_sigma
-    dictionary_info['ndirs'] = ndirs
-
-    # check the value of ndirs
-    if not amico.lut.is_valid(ndirs):
-        ERROR( 'Unsupported value for ndirs.\nNote: Supported values for ndirs are [500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500, 7000, 7500, 8000, 8500, 9000, 9500, 10000, 32761 (default)]' )
-
-    # check conflicts of fiber_shift
-    if np.isscalar(fiber_shift) :
-        fiber_shiftX = fiber_shift
-        fiber_shiftY = fiber_shift
-        fiber_shiftZ = fiber_shift
-    elif len(fiber_shift) == 3 :
-        fiber_shiftX = fiber_shift[0]
-        fiber_shiftY = fiber_shift[1]
-        fiber_shiftZ = fiber_shift[2]
-    else :
-        ERROR( '"fiber_shift" must be a scalar or a vector with 3 elements' )
-
-    tic = time.time()
-    LOG( '\n-> Creating the dictionary from tractogram:' )
-    
-    LOG( '\n   * Configuration:' )
-    print( '\t- Segment position = %s' % ( 'COMPUTE INTERSECTIONS' if do_intersect else 'CENTROID' ) )
-    print( '\t- Fiber shift X    = %.3f (voxel-size units)' % fiber_shiftX )
-    print( '\t- Fiber shift Y    = %.3f (voxel-size units)' % fiber_shiftY )
-    print( '\t- Fiber shift Z    = %.3f (voxel-size units)' % fiber_shiftZ )
-    print( '\t- Points to skip   = %d' % points_to_skip )
-    print( '\t- Min segment len  = %.2e' % min_seg_len )
-
-    # check blur params
-    cdef :
-        double [:] blurRadii
-        int [:] blurSamples
-        double [:] blurWeights
-        double* ptrBlurRadii
-        int* ptrBlurSamples
-        double* ptrBlurWeights
-        int nBlurRadii
-        float [:] ArrayInvM
-        float* ptrArrayInvM
-
-    if len(blur_radii) != len(blur_samples) :
-        ERROR( 'Number of radii and samples must match' )
-
-    # convert to numpy arrays (add fake radius for original segment)
-    nBlurRadii = len(blur_radii)+1
-    blurRadii = np.array( [0.0]+blur_radii, np.double )
-    blurSamples = np.array( [1]+blur_samples, np.int32 )
-
-    # compute weights for gaussian damping
-    blurWeights = np.empty_like( blurRadii )
-    for i in xrange(nBlurRadii):
-        blurWeights[i] = np.exp( -blurRadii[i]**2 / (2.0*blur_sigma**2) )
-
-    if nBlurRadii == 1 :
-        print( '\t- Do not blur fibers' )
-    else :
-        print( '\t- Blur fibers:' )
-        print( '\t\t- sigma = %.3f' % blur_sigma )
-        print( '\t\t- radii =   [', end="" )
-        for i in xrange( 1, blurRadii.size ) :
-            print( '%.3f' % blurRadii[i], end="" )
-        print( ']' )
-        print( '\t\t- samples = [', end="" )
-        for i in xrange( 1, blurSamples.size ) :
-            print( '%5d' % blurSamples[i], end="" )
-        print( ']' )
-        print( '\t\t- weights = [', end="" )
-        for i in xrange( 1, blurWeights.size ) :
-            print( '%.3f' % blurWeights[i], end="" )
-        print( ']' )
-
-    ptrBlurRadii   = &blurRadii[0]
-    ptrBlurSamples = &blurSamples[0]
-    ptrBlurWeights = &blurWeights[0]
-
-    # minimum segment length
-    if min_seg_len < 0 :
-        ERROR( '"min_seg_len" must be >= 0' )
-
-    LOG( '\n   * Loading data:' )
-    cdef short [:] htable = amico.lut.load_precomputed_hash_table(ndirs)
-    cdef short* ptrHashTable = &htable[0]
-
-    # fiber-tracts from .trk
-    print( '\t- Tractogram' )
-    
-    if (path_out is None):
-        ERROR( '"path_out" not defined' )
-
-    if (filename_trk is None and filename_tractogram is None):
-        ERROR( '"filename_tractogram" not defined' )
-
-    if (filename_trk is not None and filename_tractogram is not None):
-        WARNING('"filename_trk" will not be considered, "filename_tractogram" will be used')
-
-    if (filename_trk is not None and filename_tractogram is None):
-        filename_tractogram = filename_trk
-        WARNING('"filename_trk" parameter is deprecated, use "filename_tractogram" instead')
-    
-    extension = splitext(filename_tractogram)[1]
-    if (extension != ".trk" and extension != ".tck") :
-        ERROR( 'Invalid input file: only .trk and .tck are supported' )
-    try :
-        hdr = nibabel.streamlines.load( filename_tractogram, lazy_load=True ).header
-    except :
-        ERROR( 'Tractogram file not found' )
-        
-    if (extension == ".trk"):
-        Nx = hdr['dimensions'][0]
-        Ny = hdr['dimensions'][1]
-        Nz = hdr['dimensions'][2]
-        Px = hdr['voxel_sizes'][0]
-        Py = hdr['voxel_sizes'][1]
-        Pz = hdr['voxel_sizes'][2]
-
-        data_offset = 1000
-        n_count = hdr['nb_streamlines']
-        n_scalars = hdr['nb_scalars_per_point']
-        n_properties = hdr['nb_properties_per_streamline']
-
-    if (extension == ".tck"):
-        if TCK_ref_image is None:
-            if filename_peaks is not None:
-                TCK_ref_image = filename_peaks
-            elif filename_mask is not None:
-                TCK_ref_image = filename_mask
-            else:
-                ERROR( 'TCK files do not contain information about the geometry. Use "TCK_ref_image" for that' )
-
-        print ('\t\t- geometry taken from "%s"' %TCK_ref_image)
-
-        nii_image = nibabel.load(TCK_ref_image)
-        nii_hdr = nii_image.header if nibabel.__version__ >= '2.0.0' else nii_image.get_header()
-        Nx = nii_image.shape[0]
-        Ny = nii_image.shape[1]
-        Nz = nii_image.shape[2]
-        Px = nii_hdr['pixdim'][1]
-        Py = nii_hdr['pixdim'][2]
-        Pz = nii_hdr['pixdim'][3]
-        data_offset = int(hdr['_offset_data'])  #set offset
-        n_count = int(hdr['count'])  #set number of fibers
-        n_scalars = 0
-        n_properties = 0
-        
-    print( '\t\t- %d x %d x %d' % ( Nx, Ny, Nz ) )
-    print( '\t\t- %.4f x %.4f x %.4f' % ( Px, Py, Pz ) )
-    print( '\t\t- %d fibers' % n_count )
-    if Nx >= 2**16 or Nz >= 2**16 or Nz >= 2**16 :
-        ERROR( 'The max dim size is 2^16 voxels' )
-    
-    # get the affine matrix
-    if (extension == ".tck"):
-        scaleMat = np.diag(np.divide(1.0, [Px,Py,Pz]))
-        M = nii_hdr.get_best_affine()
-
-        # Affine matrix without scaling, i.e. diagonal is 1
-        M[:3, :3] = np.dot(scaleMat, M[:3, :3])
-        M = M.astype('<f4') # affine matrix in float value
-        invM = np.linalg.inv(M) # inverse affine matrix
-        #create a vector of inverse matrix M
-        ArrayInvM = np.ravel(invM)
-        ptrArrayInvM = &ArrayInvM[0]
-
-    # white-matter mask
-    cdef float* ptrMASK
-    cdef float [:, :, ::1] niiMASK_img
-    if filename_mask is not None :
-        print( '\t- Filtering mask' )
-        niiMASK = nibabel.load( filename_mask )
-        niiMASK_hdr = niiMASK.header if nibabel.__version__ >= '2.0.0' else niiMASK.get_header()
-        print( '\t\t- %d x %d x %d' % ( niiMASK.shape[0], niiMASK.shape[1], niiMASK.shape[2] ) )
-        print( '\t\t- %.4f x %.4f x %.4f' % ( niiMASK_hdr['pixdim'][1], niiMASK_hdr['pixdim'][2], niiMASK_hdr['pixdim'][3] ) )
-        if ( Nx!=niiMASK.shape[0] or Ny!=niiMASK.shape[1] or Nz!=niiMASK.shape[2] or
-            abs(Px-niiMASK_hdr['pixdim'][1])>1e-3 or abs(Py-niiMASK_hdr['pixdim'][2])>1e-3 or abs(Pz-niiMASK_hdr['pixdim'][3])>1e-3 ) :
-            WARNING( 'Dataset does not have the same geometry as the tractogram' )
-        niiMASK_img = np.ascontiguousarray( niiMASK.get_data().astype(np.float32) )
-        ptrMASK  = &niiMASK_img[0,0,0]
-    else :
-        print( '\t- No mask specified to filter IC compartments' )
-        ptrMASK = NULL
-
-    # peaks file for EC contributions
-    cdef float* ptrPEAKS
-    cdef float [:, :, :, ::1] niiPEAKS_img
-    cdef int Np
-    cdef float [:, :, ::1] niiTDI_img = np.ascontiguousarray( np.zeros((Nx,Ny,Nz),dtype=np.float32) )
-    cdef float* ptrTDI  = &niiTDI_img[0,0,0]
-    cdef double [:, ::1] affine
-    cdef double* ptrAFFINE
-    if filename_peaks is not None :
-        print( '\t- EC orientations' )
-        niiPEAKS = nibabel.load( filename_peaks )
-        niiPEAKS_hdr = niiPEAKS.header if nibabel.__version__ >= '2.0.0' else niiPEAKS.get_header()
-        print( '\t\t- %d x %d x %d x %d' % ( niiPEAKS.shape[0], niiPEAKS.shape[1], niiPEAKS.shape[2], niiPEAKS.shape[3] ) )
-        print( '\t\t- %.4f x %.4f x %.4f' % ( niiPEAKS_hdr['pixdim'][1], niiPEAKS_hdr['pixdim'][2], niiPEAKS_hdr['pixdim'][3] ) )
-        print( '\t\t- ignoring peaks < %.2f * MaxPeak' % vf_THR )
-        print( '\t\t- %susing affine matrix' % ( "" if peaks_use_affine else "not " ) )
-        print( '\t\t- flipping axes : [ x=%s, y=%s, z=%s ]' % ( flip_peaks[0], flip_peaks[1], flip_peaks[2] ) )
-        if ( Nx!=niiPEAKS.shape[0] or Ny!=niiPEAKS.shape[1] or Nz!=niiPEAKS.shape[2] or
-            abs(Px-niiPEAKS_hdr['pixdim'][1])>1e-3 or abs(Py-niiPEAKS_hdr['pixdim'][2])>1e-3 or abs(Pz-niiPEAKS_hdr['pixdim'][3])>1e-3 ) :
-            WARNING( "Dataset does not have the same geometry as the tractogram" )
-        if niiPEAKS.shape[3] % 3 :
-            ERROR( 'PEAKS dataset must have 3*k volumes' )
-        if vf_THR < 0 or vf_THR > 1 :
-            ERROR( '"vf_THR" must be between 0 and 1' )
-        niiPEAKS_img = np.ascontiguousarray( niiPEAKS.get_data().astype(np.float32) )
-        ptrPEAKS = &niiPEAKS_img[0,0,0,0]
-        Np = niiPEAKS.shape[3]/3
-
-        # affine matrix to rotate gradien directions (if required)
-        if peaks_use_affine :
-            affine = np.ascontiguousarray( niiPEAKS.affine[:3,:3].T )
-        else :
-            affine = np.ascontiguousarray( np.eye(3) )
-        ptrAFFINE = &affine[0,0]
-    else :
-        print( '\t- No dataset specified for EC compartments' )
-        Np = 0
-        ptrPEAKS = NULL
-        ptrAFFINE = NULL
-
-    # output path
-    print( '\t- Output written to "%s"' % path_out )
-    if not exists( path_out ):
-        makedirs( path_out )
-
-    # write dictionary info file
-    with open( filename, 'wb+' ) as dictionary_info_file:
-        pickle.dump(dictionary_info, dictionary_info_file, protocol=2)
-
-    # calling actual C code
-    ret = trk2dictionary( filename_tractogram, data_offset,
-        Nx, Ny, Nz, Px, Py, Pz, n_count, n_scalars, n_properties,
-        fiber_shiftX, fiber_shiftY, fiber_shiftZ, points_to_skip, min_seg_len,
-        ptrPEAKS, Np, vf_THR, -1 if flip_peaks[0] else 1, -1 if flip_peaks[1] else 1, -1 if flip_peaks[2] else 1,
-        ptrMASK, ptrTDI, path_out, 1 if do_intersect else 0, ptrAFFINE,
-        nBlurRadii, blur_sigma, ptrBlurRadii, ptrBlurSamples, ptrBlurWeights, ptrArrayInvM, ndirs, ptrHashTable  );
-    if ret == 0 :
-        WARNING( 'DICTIONARY not generated' )
-        return None
-
-    # create new TRK with only fibers in the WM mask
-    # create new dictionaty file (TRK or TCK) with only fibers in the WM mask
-    if gen_trk :
-        LOG('\n   * Generate tractogram matching the dictionary:')
-        fib = nibabel.streamlines.load( filename_tractogram, lazy_load=True )
-        hdr = fib.header
-
-        file_kept = np.fromfile( join(path_out,'dictionary_TRK_kept.dict'), dtype=np.bool_ )
-        streamlines_out = []
-        for i, f in enumerate(fib.streamlines):
-            if file_kept[i] :
-                streamlines_out.append( f )
-        hdr['count'] = len(streamlines_out) #set new number of fibers in the header
-        hdr['nb_streamlines'] = len(streamlines_out)
-
-        #create a output dictionary file (TRK or TCK) in path_out
-        tractogram_out = nibabel.streamlines.tractogram.Tractogram(streamlines=streamlines_out, affine_to_rasmm=fib.tractogram.affine_to_rasmm)
-        nibabel.streamlines.save( tractogram_out, join(path_out,'dictionary_TRK_fibers'+extension), header=hdr )
-        print( '     [ %d fibers kept ]' % np.count_nonzero( file_kept ) )
-
-    # save TDI and MASK maps
-    if filename_mask is not None :
-        affine = niiMASK.affine if nibabel.__version__ >= '2.0.0' else niiMASK.get_affine()
-    elif filename_peaks is not None :
-        affine = niiPEAKS.affine if nibabel.__version__ >= '2.0.0' else niiPEAKS.get_affine()
-    else :
-        affine = np.diag( [Px, Py, Pz, 1] )
-
-    niiTDI = nibabel.Nifti1Image( niiTDI_img, affine )
-    nibabel.save( niiTDI, join(path_out,'dictionary_tdi.nii.gz') )
-
-    if filename_mask is not None :
-        niiMASK = nibabel.Nifti1Image( niiMASK_img, affine )
-    else :
-        niiMASK = nibabel.Nifti1Image( (np.asarray(niiTDI_img)>0).astype(np.float32), affine )
-    nibabel.save( niiMASK, join(path_out,'dictionary_mask.nii.gz') )
-
-    LOG( '\n   [ %.1f seconds ]' % ( time.time() - tic ) )
-
-
-cpdef convert_old_dictionary( path ):
-    """Perform the conversion of the files representing a dictionary, i.e. dictionary_*.dict,
-    from the old format to the new one, where the files *_{vx,vy,vz}.dict are replaced
-    by a single file *_v.dict (same for the files *_{ox,oy}.dict).
-
-    Parameters
-    ----------
-    path : string
-        Path to the folder containing the dictionary_*.dict files.
-    """
-    if not exists( join(path,'dictionary_IC_vx.dict') ):
-        ERROR( 'Folder does not contain dictionary files in the old format' )
-
-    niiTDI = nibabel.load( join(path,'dictionary_tdi.nii.gz') )
-    Nx, Ny, Nz = niiTDI.shape[:3]
-    x = np.fromfile( join(path,'dictionary_IC_vx.dict'), dtype=np.uint16 ).astype(np.uint32)
-    y = np.fromfile( join(path,'dictionary_IC_vy.dict'), dtype=np.uint16 ).astype(np.uint32)
-    z = np.fromfile( join(path,'dictionary_IC_vz.dict'), dtype=np.uint16 ).astype(np.uint32)
-    v = x + Nx * ( y + Ny * z )
-    v.tofile( join(path,'dictionary_IC_v.dict') )
-    remove( join(path,'dictionary_IC_vx.dict') )
-    remove( join(path,'dictionary_IC_vy.dict') )
-    remove( join(path,'dictionary_IC_vz.dict') )
-
-    x = np.fromfile( join(path,'dictionary_EC_vx.dict'), dtype=np.uint8 ).astype(np.uint32)
-    y = np.fromfile( join(path,'dictionary_EC_vy.dict'), dtype=np.uint8 ).astype(np.uint32)
-    z = np.fromfile( join(path,'dictionary_EC_vz.dict'), dtype=np.uint8 ).astype(np.uint32)
-    v = x + Nx * ( y + Ny * z )
-    v.tofile( join(path,'dictionary_EC_v.dict') )
-    remove( join(path,'dictionary_EC_vx.dict') )
-    remove( join(path,'dictionary_EC_vy.dict') )
-    remove( join(path,'dictionary_EC_vz.dict') )
-
-    x = np.fromfile( join(path,'dictionary_IC_ox.dict'), dtype=np.uint8 ).astype(np.uint16)
-    y = np.fromfile( join(path,'dictionary_IC_oy.dict'), dtype=np.uint8 ).astype(np.uint16)
-    v = y + 181 * x
-    v.tofile( join(path,'dictionary_IC_o.dict') )
-    remove( join(path,'dictionary_IC_ox.dict') )
-    remove( join(path,'dictionary_IC_oy.dict') )
-
-    x = np.fromfile( join(path,'dictionary_EC_ox.dict'), dtype=np.uint8 ).astype(np.uint16)
-    y = np.fromfile( join(path,'dictionary_EC_oy.dict'), dtype=np.uint8 ).astype(np.uint16)
-    v = y + 181 * x
-    v.tofile( join(path,'dictionary_EC_o.dict') )
-    remove( join(path,'dictionary_EC_ox.dict') )
-    remove( join(path,'dictionary_EC_oy.dict') )
+#!python
+# cython: language_level=3, c_string_type=str, c_string_encoding=ascii, boundscheck=False, wraparound=False, profile=False
+from __future__ import print_function
+import cython
+import numpy as np
+cimport numpy as np
+import nibabel
+from os.path import join, exists, splitext
+from os import makedirs, remove
+import time
+import amico
+import pickle
+from amico.util import LOG, NOTE, WARNING, ERROR
+
+
+# Interface to actual C code
+cdef extern from "trk2dictionary_c.cpp":
+    int trk2dictionary(
+        char* filename_tractogram, int data_offset, int Nx, int Ny, int Nz, float Px, float Py, float Pz, int n_count, int n_scalars, 
+        int n_properties, float fiber_shiftX, float fiber_shiftY, float fiber_shiftZ, int points_to_skip, float min_seg_len,
+        float* ptrPEAKS, int Np, float vf_THR, int ECix, int ECiy, int ECiz,
+        float* _ptrMASK, float* ptrTDI, char* path_out, int c, double* ptrPeaksAffine,
+        int nBlurRadii, double blurSigma, double* ptrBlurRadii, int* ptrBlurSamples, double* ptrBlurWeights,  float* ptrTractsAffine, unsigned short ndirs, short* prtHashTable
+    ) nogil
+
+
+cpdef run( filename_tractogram = None, path_out = None, filename_peaks = None, filename_mask = None, do_intersect = True,
+    fiber_shift = 0, points_to_skip = 0, vf_THR = 0.1, peaks_use_affine = False,
+    flip_peaks = [False,False,False], min_seg_len = 1e-3, gen_trk = True,
+    blur_radii = [], blur_samples = [], blur_sigma = 1.0, filename_trk = None, TCK_ref_image = None, ndirs = 32761
+    ):
+    """Perform the conversion of a tractoram to the sparse data-structure internally
+    used by COMMIT to perform the matrix-vector multiplications with the operator A
+    during the inversion of the linear system.
+
+    Parameters
+    ----------
+    filename_tractogram : string
+        Path to the .trk or .tck file containing the tractogram to load.
+        
+    filename_trk : string
+        DEPRECATED. Use filename_tractogram instead.
+
+    path_out : string
+        Path to the folder where to store the sparse data structure.
+
+    filename_peaks : string
+        Path to the NIFTI file containing the peaks to use as extra-cellular contributions.
+        The data matrix should be 4D with last dimension 3*N, where N is the number
+        of peaks in each voxel. (default : no extra-cellular contributions)
+
+    filename_mask : string
+        Path to a binary mask to restrict the analysis to specific areas. Segments
+        outside this mask are discarded. If not specified (default), the mask is created from
+        all voxels intersected by the tracts.
+
+    do_intersect : boolean
+        If True then fiber segments that intersect voxel boundaries are splitted (default).
+        If False then the centroid of the segment is used as its voxel position.
+
+    fiber_shift : float or list of three float
+        If necessary, apply a translation to fiber coordinates (default : 0) to account
+        for differences between the reference system of the tracking algorithm and COMMIT.
+        The value is specified in voxel units, eg 0.5 translates by half voxel.
+        Do noth use if you are using fiber_shiftX or fiber_shiftY or fiber_shiftZ.
+
+    points_to_skip : integer
+        If necessary, discard first points at beginning/end of a fiber (default : 0).
+
+    vf_THR : float
+        Discard peaks smaller than vf_THR * max peak (default : 0.1).
+
+    peaks_use_affine : boolean
+        Whether to rotate the peaks according to the affine matrix (default : False).
+
+    flip_peaks : list of three boolean
+        If necessary, flips peak orientations along each axis (default : no flipping).
+
+    min_seg_len : float
+        Discard segments <= than this length in mm (default : 1e-3)
+
+    gen_trk : boolean
+        If True then generate a .trk file in the 'path_out' containing the fibers used in the dictionary (default : True)
+    
+    blur_radii : list of float
+        Translate each segment to given radii to assign a broader fiber contribution (default : [])
+    
+    blur_samples : list of integer
+        Segments are duplicated along a circle at a given radius; this parameter controls the number of samples to take over a given circle (defaut : [])
+
+    blur_sigma: float
+        The contributions of the segments at different radii are damped as a Gaussian (default : 1.0)    
+    
+    TCK_ref_image: string
+        Path to the NIFTI file containing the information about the geometry used for the tractogram .tck to load. 
+        If it is not specified, it will try to use the information of filename_peaks or filename_mask.
+    
+    ndirs : int
+            Number of directions on the half of the sphere
+    """
+
+    filename = path_out + '/dictionary_info.pickle'
+    dictionary_info = {}
+    dictionary_info['filename_trk'] = filename_trk
+    dictionary_info['path_out'] = path_out
+    dictionary_info['filename_peaks'] = filename_peaks
+    dictionary_info['filename_mask'] = filename_mask
+    dictionary_info['do_intersect'] = do_intersect
+    dictionary_info['fiber_shift'] = fiber_shift
+    dictionary_info['points_to_skip'] = points_to_skip
+    dictionary_info['vf_THR'] = vf_THR
+    dictionary_info['peaks_use_affine'] = peaks_use_affine
+    dictionary_info['flip_peaks'] = flip_peaks
+    dictionary_info['min_seg_len'] = min_seg_len
+    dictionary_info['gen_trk'] = gen_trk
+    dictionary_info['blur_radii'] = blur_radii
+    dictionary_info['blur_samples'] = blur_samples
+    dictionary_info['blur_sigma'] = blur_sigma
+    dictionary_info['ndirs'] = ndirs
+
+    # check the value of ndirs
+    if not amico.lut.is_valid(ndirs):
+        ERROR( 'Unsupported value for ndirs.\nNote: Supported values for ndirs are [500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500, 7000, 7500, 8000, 8500, 9000, 9500, 10000, 32761 (default)]' )
+
+    # check conflicts of fiber_shift
+    if np.isscalar(fiber_shift) :
+        fiber_shiftX = fiber_shift
+        fiber_shiftY = fiber_shift
+        fiber_shiftZ = fiber_shift
+    elif len(fiber_shift) == 3 :
+        fiber_shiftX = fiber_shift[0]
+        fiber_shiftY = fiber_shift[1]
+        fiber_shiftZ = fiber_shift[2]
+    else :
+        ERROR( '"fiber_shift" must be a scalar or a vector with 3 elements' )
+
+    tic = time.time()
+    LOG( '\n-> Creating the dictionary from tractogram:' )
+    
+    LOG( '\n   * Configuration:' )
+    print( '\t- Segment position = %s' % ( 'COMPUTE INTERSECTIONS' if do_intersect else 'CENTROID' ) )
+    print( '\t- Fiber shift X    = %.3f (voxel-size units)' % fiber_shiftX )
+    print( '\t- Fiber shift Y    = %.3f (voxel-size units)' % fiber_shiftY )
+    print( '\t- Fiber shift Z    = %.3f (voxel-size units)' % fiber_shiftZ )
+    print( '\t- Points to skip   = %d' % points_to_skip )
+    print( '\t- Min segment len  = %.2e' % min_seg_len )
+
+    # check blur params
+    cdef :
+        double [:] blurRadii
+        int [:] blurSamples
+        double [:] blurWeights
+        double* ptrBlurRadii
+        int* ptrBlurSamples
+        double* ptrBlurWeights
+        int nBlurRadii
+        float [:] ArrayInvM
+        float* ptrArrayInvM
+
+    if len(blur_radii) != len(blur_samples) :
+        ERROR( 'Number of radii and samples must match' )
+
+    # convert to numpy arrays (add fake radius for original segment)
+    nBlurRadii = len(blur_radii)+1
+    blurRadii = np.array( [0.0]+blur_radii, np.double )
+    blurSamples = np.array( [1]+blur_samples, np.int32 )
+
+    # compute weights for gaussian damping
+    blurWeights = np.empty_like( blurRadii )
+    for i in xrange(nBlurRadii):
+        blurWeights[i] = np.exp( -blurRadii[i]**2 / (2.0*blur_sigma**2) )
+
+    if nBlurRadii == 1 :
+        print( '\t- Do not blur fibers' )
+    else :
+        print( '\t- Blur fibers:' )
+        print( '\t\t- sigma = %.3f' % blur_sigma )
+        print( '\t\t- radii =   [', end="" )
+        for i in xrange( 1, blurRadii.size ) :
+            print( '%.3f' % blurRadii[i], end="" )
+        print( ']' )
+        print( '\t\t- samples = [', end="" )
+        for i in xrange( 1, blurSamples.size ) :
+            print( '%5d' % blurSamples[i], end="" )
+        print( ']' )
+        print( '\t\t- weights = [', end="" )
+        for i in xrange( 1, blurWeights.size ) :
+            print( '%.3f' % blurWeights[i], end="" )
+        print( ']' )
+
+    ptrBlurRadii   = &blurRadii[0]
+    ptrBlurSamples = &blurSamples[0]
+    ptrBlurWeights = &blurWeights[0]
+
+    # minimum segment length
+    if min_seg_len < 0 :
+        ERROR( '"min_seg_len" must be >= 0' )
+
+    LOG( '\n   * Loading data:' )
+    cdef short [:] htable = amico.lut.load_precomputed_hash_table(ndirs)
+    cdef short* ptrHashTable = &htable[0]
+
+    # fiber-tracts from .trk
+    print( '\t- Tractogram' )
+    
+    if (path_out is None):
+        ERROR( '"path_out" not defined' )
+
+    if (filename_trk is None and filename_tractogram is None):
+        ERROR( '"filename_tractogram" not defined' )
+
+    if (filename_trk is not None and filename_tractogram is not None):
+        WARNING('"filename_trk" will not be considered, "filename_tractogram" will be used')
+
+    if (filename_trk is not None and filename_tractogram is None):
+        filename_tractogram = filename_trk
+        WARNING('"filename_trk" parameter is deprecated, use "filename_tractogram" instead')
+    
+    extension = splitext(filename_tractogram)[1]
+    if (extension != ".trk" and extension != ".tck") :
+        ERROR( 'Invalid input file: only .trk and .tck are supported' )
+    try :
+        hdr = nibabel.streamlines.load( filename_tractogram, lazy_load=True ).header
+    except :
+        ERROR( 'Tractogram file not found' )
+        
+    if (extension == ".trk"):
+        Nx = hdr['dimensions'][0]
+        Ny = hdr['dimensions'][1]
+        Nz = hdr['dimensions'][2]
+        Px = hdr['voxel_sizes'][0]
+        Py = hdr['voxel_sizes'][1]
+        Pz = hdr['voxel_sizes'][2]
+
+        data_offset = 1000
+        n_count = hdr['nb_streamlines']
+        n_scalars = hdr['nb_scalars_per_point']
+        n_properties = hdr['nb_properties_per_streamline']
+
+    if (extension == ".tck"):
+        if TCK_ref_image is None:
+            if filename_peaks is not None:
+                TCK_ref_image = filename_peaks
+            elif filename_mask is not None:
+                TCK_ref_image = filename_mask
+            else:
+                ERROR( 'TCK files do not contain information about the geometry. Use "TCK_ref_image" for that' )
+
+        print ('\t\t- geometry taken from "%s"' %TCK_ref_image)
+
+        nii_image = nibabel.load(TCK_ref_image)
+        nii_hdr = nii_image.header if nibabel.__version__ >= '2.0.0' else nii_image.get_header()
+        Nx = nii_image.shape[0]
+        Ny = nii_image.shape[1]
+        Nz = nii_image.shape[2]
+        Px = nii_hdr['pixdim'][1]
+        Py = nii_hdr['pixdim'][2]
+        Pz = nii_hdr['pixdim'][3]
+        data_offset = int(hdr['_offset_data'])  #set offset
+        n_count = int(hdr['count'])  #set number of fibers
+        n_scalars = 0
+        n_properties = 0
+        
+    print( '\t\t- %d x %d x %d' % ( Nx, Ny, Nz ) )
+    print( '\t\t- %.4f x %.4f x %.4f' % ( Px, Py, Pz ) )
+    print( '\t\t- %d fibers' % n_count )
+    if Nx >= 2**16 or Nz >= 2**16 or Nz >= 2**16 :
+        ERROR( 'The max dim size is 2^16 voxels' )
+    
+    # get the affine matrix
+    if (extension == ".tck"):
+        scaleMat = np.diag(np.divide(1.0, [Px,Py,Pz]))
+        M = nii_hdr.get_best_affine()
+
+        # Affine matrix without scaling, i.e. diagonal is 1
+        M[:3, :3] = np.dot(scaleMat, M[:3, :3])
+        M = M.astype('<f4') # affine matrix in float value
+        invM = np.linalg.inv(M) # inverse affine matrix
+        #create a vector of inverse matrix M
+        ArrayInvM = np.ravel(invM)
+        ptrArrayInvM = &ArrayInvM[0]
+
+    # white-matter mask
+    cdef float* ptrMASK
+    cdef float [:, :, ::1] niiMASK_img
+    if filename_mask is not None :
+        print( '\t- Filtering mask' )
+        niiMASK = nibabel.load( filename_mask )
+        niiMASK_hdr = niiMASK.header if nibabel.__version__ >= '2.0.0' else niiMASK.get_header()
+        print( '\t\t- %d x %d x %d' % ( niiMASK.shape[0], niiMASK.shape[1], niiMASK.shape[2] ) )
+        print( '\t\t- %.4f x %.4f x %.4f' % ( niiMASK_hdr['pixdim'][1], niiMASK_hdr['pixdim'][2], niiMASK_hdr['pixdim'][3] ) )
+        if ( Nx!=niiMASK.shape[0] or Ny!=niiMASK.shape[1] or Nz!=niiMASK.shape[2] or
+            abs(Px-niiMASK_hdr['pixdim'][1])>1e-3 or abs(Py-niiMASK_hdr['pixdim'][2])>1e-3 or abs(Pz-niiMASK_hdr['pixdim'][3])>1e-3 ) :
+            WARNING( 'Dataset does not have the same geometry as the tractogram' )
+        niiMASK_img = np.ascontiguousarray( niiMASK.get_data().astype(np.float32) )
+        ptrMASK  = &niiMASK_img[0,0,0]
+    else :
+        print( '\t- No mask specified to filter IC compartments' )
+        ptrMASK = NULL
+
+    # peaks file for EC contributions
+    cdef float* ptrPEAKS
+    cdef float [:, :, :, ::1] niiPEAKS_img
+    cdef int Np
+    cdef float [:, :, ::1] niiTDI_img = np.ascontiguousarray( np.zeros((Nx,Ny,Nz),dtype=np.float32) )
+    cdef float* ptrTDI  = &niiTDI_img[0,0,0]
+    cdef double [:, ::1] affine
+    cdef double* ptrAFFINE
+    if filename_peaks is not None :
+        print( '\t- EC orientations' )
+        niiPEAKS = nibabel.load( filename_peaks )
+        niiPEAKS_hdr = niiPEAKS.header if nibabel.__version__ >= '2.0.0' else niiPEAKS.get_header()
+        print( '\t\t- %d x %d x %d x %d' % ( niiPEAKS.shape[0], niiPEAKS.shape[1], niiPEAKS.shape[2], niiPEAKS.shape[3] ) )
+        print( '\t\t- %.4f x %.4f x %.4f' % ( niiPEAKS_hdr['pixdim'][1], niiPEAKS_hdr['pixdim'][2], niiPEAKS_hdr['pixdim'][3] ) )
+        print( '\t\t- ignoring peaks < %.2f * MaxPeak' % vf_THR )
+        print( '\t\t- %susing affine matrix' % ( "" if peaks_use_affine else "not " ) )
+        print( '\t\t- flipping axes : [ x=%s, y=%s, z=%s ]' % ( flip_peaks[0], flip_peaks[1], flip_peaks[2] ) )
+        if ( Nx!=niiPEAKS.shape[0] or Ny!=niiPEAKS.shape[1] or Nz!=niiPEAKS.shape[2] or
+            abs(Px-niiPEAKS_hdr['pixdim'][1])>1e-3 or abs(Py-niiPEAKS_hdr['pixdim'][2])>1e-3 or abs(Pz-niiPEAKS_hdr['pixdim'][3])>1e-3 ) :
+            WARNING( "Dataset does not have the same geometry as the tractogram" )
+        if niiPEAKS.shape[3] % 3 :
+            ERROR( 'PEAKS dataset must have 3*k volumes' )
+        if vf_THR < 0 or vf_THR > 1 :
+            ERROR( '"vf_THR" must be between 0 and 1' )
+        niiPEAKS_img = np.ascontiguousarray( niiPEAKS.get_data().astype(np.float32) )
+        ptrPEAKS = &niiPEAKS_img[0,0,0,0]
+        Np = niiPEAKS.shape[3]/3
+
+        # affine matrix to rotate gradien directions (if required)
+        if peaks_use_affine :
+            affine = np.ascontiguousarray( niiPEAKS.affine[:3,:3].T )
+        else :
+            affine = np.ascontiguousarray( np.eye(3) )
+        ptrAFFINE = &affine[0,0]
+    else :
+        print( '\t- No dataset specified for EC compartments' )
+        Np = 0
+        ptrPEAKS = NULL
+        ptrAFFINE = NULL
+
+    # output path
+    print( '\t- Output written to "%s"' % path_out )
+    if not exists( path_out ):
+        makedirs( path_out )
+
+    # write dictionary info file
+    with open( filename, 'wb+' ) as dictionary_info_file:
+        pickle.dump(dictionary_info, dictionary_info_file, protocol=2)
+
+    # calling actual C code
+    ret = trk2dictionary( filename_tractogram, data_offset,
+        Nx, Ny, Nz, Px, Py, Pz, n_count, n_scalars, n_properties,
+        fiber_shiftX, fiber_shiftY, fiber_shiftZ, points_to_skip, min_seg_len,
+        ptrPEAKS, Np, vf_THR, -1 if flip_peaks[0] else 1, -1 if flip_peaks[1] else 1, -1 if flip_peaks[2] else 1,
+        ptrMASK, ptrTDI, path_out, 1 if do_intersect else 0, ptrAFFINE,
+        nBlurRadii, blur_sigma, ptrBlurRadii, ptrBlurSamples, ptrBlurWeights, ptrArrayInvM, ndirs, ptrHashTable  );
+    if ret == 0 :
+        WARNING( 'DICTIONARY not generated' )
+        return None
+
+    # create new TRK with only fibers in the WM mask
+    # create new dictionaty file (TRK or TCK) with only fibers in the WM mask
+    if gen_trk :
+        LOG('\n   * Generate tractogram matching the dictionary:')
+        fib = nibabel.streamlines.load( filename_tractogram, lazy_load=True )
+        hdr = fib.header
+
+        file_kept = np.fromfile( join(path_out,'dictionary_TRK_kept.dict'), dtype=np.bool_ )
+        streamlines_out = []
+        for i, f in enumerate(fib.streamlines):
+            if file_kept[i] :
+                streamlines_out.append( f )
+        hdr['count'] = len(streamlines_out) #set new number of fibers in the header
+        hdr['nb_streamlines'] = len(streamlines_out)
+
+        #create a output dictionary file (TRK or TCK) in path_out
+        tractogram_out = nibabel.streamlines.tractogram.Tractogram(streamlines=streamlines_out, affine_to_rasmm=fib.tractogram.affine_to_rasmm)
+        nibabel.streamlines.save( tractogram_out, join(path_out,'dictionary_TRK_fibers'+extension), header=hdr )
+        print( '     [ %d fibers kept ]' % np.count_nonzero( file_kept ) )
+
+    # save TDI and MASK maps
+    if filename_mask is not None :
+        affine = niiMASK.affine if nibabel.__version__ >= '2.0.0' else niiMASK.get_affine()
+    elif filename_peaks is not None :
+        affine = niiPEAKS.affine if nibabel.__version__ >= '2.0.0' else niiPEAKS.get_affine()
+    else :
+        affine = np.diag( [Px, Py, Pz, 1] )
+
+    niiTDI = nibabel.Nifti1Image( niiTDI_img, affine )
+    nibabel.save( niiTDI, join(path_out,'dictionary_tdi.nii.gz') )
+
+    if filename_mask is not None :
+        niiMASK = nibabel.Nifti1Image( niiMASK_img, affine )
+    else :
+        niiMASK = nibabel.Nifti1Image( (np.asarray(niiTDI_img)>0).astype(np.float32), affine )
+    nibabel.save( niiMASK, join(path_out,'dictionary_mask.nii.gz') )
+
+    LOG( '\n   [ %.1f seconds ]' % ( time.time() - tic ) )
+
+
+cpdef convert_old_dictionary( path ):
+    """Perform the conversion of the files representing a dictionary, i.e. dictionary_*.dict,
+    from the old format to the new one, where the files *_{vx,vy,vz}.dict are replaced
+    by a single file *_v.dict (same for the files *_{ox,oy}.dict).
+
+    Parameters
+    ----------
+    path : string
+        Path to the folder containing the dictionary_*.dict files.
+    """
+    if not exists( join(path,'dictionary_IC_vx.dict') ):
+        ERROR( 'Folder does not contain dictionary files in the old format' )
+
+    niiTDI = nibabel.load( join(path,'dictionary_tdi.nii.gz') )
+    Nx, Ny, Nz = niiTDI.shape[:3]
+    x = np.fromfile( join(path,'dictionary_IC_vx.dict'), dtype=np.uint16 ).astype(np.uint32)
+    y = np.fromfile( join(path,'dictionary_IC_vy.dict'), dtype=np.uint16 ).astype(np.uint32)
+    z = np.fromfile( join(path,'dictionary_IC_vz.dict'), dtype=np.uint16 ).astype(np.uint32)
+    v = x + Nx * ( y + Ny * z )
+    v.tofile( join(path,'dictionary_IC_v.dict') )
+    remove( join(path,'dictionary_IC_vx.dict') )
+    remove( join(path,'dictionary_IC_vy.dict') )
+    remove( join(path,'dictionary_IC_vz.dict') )
+
+    x = np.fromfile( join(path,'dictionary_EC_vx.dict'), dtype=np.uint8 ).astype(np.uint32)
+    y = np.fromfile( join(path,'dictionary_EC_vy.dict'), dtype=np.uint8 ).astype(np.uint32)
+    z = np.fromfile( join(path,'dictionary_EC_vz.dict'), dtype=np.uint8 ).astype(np.uint32)
+    v = x + Nx * ( y + Ny * z )
+    v.tofile( join(path,'dictionary_EC_v.dict') )
+    remove( join(path,'dictionary_EC_vx.dict') )
+    remove( join(path,'dictionary_EC_vy.dict') )
+    remove( join(path,'dictionary_EC_vz.dict') )
+
+    x = np.fromfile( join(path,'dictionary_IC_ox.dict'), dtype=np.uint8 ).astype(np.uint16)
+    y = np.fromfile( join(path,'dictionary_IC_oy.dict'), dtype=np.uint8 ).astype(np.uint16)
+    v = y + 181 * x
+    v.tofile( join(path,'dictionary_IC_o.dict') )
+    remove( join(path,'dictionary_IC_ox.dict') )
+    remove( join(path,'dictionary_IC_oy.dict') )
+
+    x = np.fromfile( join(path,'dictionary_EC_ox.dict'), dtype=np.uint8 ).astype(np.uint16)
+    y = np.fromfile( join(path,'dictionary_EC_oy.dict'), dtype=np.uint8 ).astype(np.uint16)
+    v = y + 181 * x
+    v.tofile( join(path,'dictionary_EC_o.dict') )
+    remove( join(path,'dictionary_EC_ox.dict') )
+    remove( join(path,'dictionary_EC_oy.dict') )
diff --git a/commit/trk2dictionary/trk2dictionary_c.cpp b/commit/trk2dictionary/trk2dictionary_c.cpp
index b04e9b9f..365ea7a5 100644
--- a/commit/trk2dictionary/trk2dictionary_c.cpp
+++ b/commit/trk2dictionary/trk2dictionary_c.cpp
@@ -1,609 +1,609 @@
-#include <stdio.h>
-#include <cstdio>
-#include <string>
-#include <map>
-#include <vector>
-#include "Vector.h"
-#include "ProgressBar.h"
-#include <numpy/arrayobject.h>
-#include <math.h>
-
-#define MAX_FIB_LEN 10000
-
-
-// CLASS to store the segments of one fiber
-class segKey
-{
-    public:
-    unsigned short x, y, z;
-    unsigned short o;
-    segKey(){}
-
-    void set(unsigned short _x, unsigned short _y, unsigned short _z, unsigned short _o)
-    {
-        x  = _x;
-        y  = _y;
-        z  = _z;
-        o = _o;
-    }
-
-    bool const operator <(const segKey& seg) const
-    {
-        return o < seg.o || (o==seg.o && z<seg.z) || (o==seg.o && z==seg.z && y<seg.y) || (o==seg.o && z==seg.z && y==seg.y && x<seg.x);
-    }
-};
-
-class segInVoxKey
-{
-    public:
-    unsigned short x, y, z;
-    segInVoxKey(){}
-
-    void set(unsigned short _x, unsigned short _y, unsigned short _z)
-    {
-        x  = _x;
-        y  = _y;
-        z  = _z;
-    }
-    bool const operator <(const segInVoxKey& o) const
-    {
-        return (z<o.z) || (z==o.z && y<o.y) || (z==o.z && y==o.y && x<o.x);
-    }
-};
-
-// global variables (to avoid passing them at each call)
-std::map<segKey,float> FiberSegments;
-
-Vector<int>     dim;
-Vector<float>   pixdim;
-float*          ptrMASK;
-unsigned int    nPointsToSkip;
-float           fiberShiftXmm, fiberShiftYmm, fiberShiftZmm;
-bool            doIntersect;
-float           minSegLen;
-
-std::vector<double> radii;         // radii for the extrusion
-std::vector<double> weights;       // damping weight
-std::vector<int>    sectors;       // number of duplicates across the extrusion circle
-double              radiusSigma;   // modulates the impact of each segment as function of radius
-
-
-bool rayBoxIntersection( Vector<double>& origin, Vector<double>& direction, Vector<double>& vmin, Vector<double>& vmax, double & t);
-void fiberForwardModel( float fiber[3][MAX_FIB_LEN], unsigned int pts, std::vector<int> sectors, std::vector<double> radii, std::vector<double> weight, short* ptrHashTable );
-void segmentForwardModel( const Vector<double>& P1, const Vector<double>& P2, double w, short* ptrHashTable );
-unsigned int read_fiberTRK( FILE* fp, float fiber[3][MAX_FIB_LEN], int ns, int np );
-unsigned int read_fiberTCK( FILE* fp, float fiber[3][MAX_FIB_LEN] , float affine[4][4]);
-
-
-// =========================
-// Function called by CYTHON
-// =========================
-int trk2dictionary(
-    char* str_filename, int data_offset, int Nx, int Ny, int Nz, float Px, float Py, float Pz, int n_count, int n_scalars, int n_properties,
-    float fiber_shiftX, float fiber_shiftY, float fiber_shiftZ, int points_to_skip, float min_seg_len,
-    float* ptrPEAKS, int Np, float vf_THR, int ECix, int ECiy, int ECiz,
-    float* _ptrMASK, float* ptrTDI, char* path_out, int c, double* ptrPeaksAffine,
-    int nBlurRadii, double blurSigma, double* ptrBlurRadii, int* ptrBlurSamples, double* ptrBlurWeights, float* ptrTractsAffine, unsigned short ndirs, short* ptrHashTable
-)
-{
-    /*=========================*/
-    /*     IC compartments     */
-    /*=========================*/
-    float          fiber[3][MAX_FIB_LEN];
-    float          fiberNorm, fiberLen;
-    unsigned int   N, totICSegments = 0, totFibers = 0, v;
-    unsigned short o;
-    unsigned char  kept;
-    Vector<double> P;
-    std::string    filename;
-    std::string    OUTPUT_path(path_out);
-    std::map<segKey,float>::iterator it;
-
-    std::map<segInVoxKey,float> FiberNorm;
-    std::map<segInVoxKey,float>::iterator itNorm;
-    segInVoxKey         inVoxKey;
-
-    printf( "\n   \033[0;32m* Exporting IC compartments:\033[0m\n" );
-    
-    int isTRK; // var to check
-
-    char *ext = strrchr(str_filename, '.'); //get the extension of input file
-
-    if (strcmp(ext,".trk")==0) //for .trk file
-        isTRK = 1;
-    else if (strcmp(ext,".tck")==0)// for .tck file
-        isTRK = 0;
-    else
-        return 0;
-
-    FILE* fpTractogram = fopen(str_filename,"rb"); //open 
-    if (fpTractogram == NULL) return 0;
-
-    if ( isTRK ) { // SKIP header on .trk
-        fseek(fpTractogram,data_offset,SEEK_SET); //skip the first 1000 bytes in the .trk file
-    }
-    else { // SKIP header on .tck
-        fseek(fpTractogram,data_offset,SEEK_SET); //skip the first offset bytes in the .tck file
-    }
-
-    // set global variables
-    dim.Set( Nx, Ny, Nz );
-    pixdim.Set( Px, Py, Pz );
-    nPointsToSkip = points_to_skip;
-    fiberShiftXmm = fiber_shiftX * pixdim.x; // shift in mm for the coordinates
-    fiberShiftYmm = fiber_shiftY * pixdim.y;
-    fiberShiftZmm = fiber_shiftZ * pixdim.z;
-    ptrMASK       = _ptrMASK;
-    doIntersect   = c > 0;
-    minSegLen     = min_seg_len;
-
-    radii.clear();
-    sectors.clear();
-    weights.clear();
-    for(int i=0; i<nBlurRadii ;i++)
-    {
-        radii.push_back( ptrBlurRadii[i] );
-        sectors.push_back( ptrBlurSamples[i] );
-        weights.push_back( ptrBlurWeights[i] );
-    }
-    radiusSigma = blurSigma;
-
-    // open files
-    filename = OUTPUT_path+"/dictionary_TRK_norm.dict";   FILE* pDict_TRK_norm = fopen(filename.c_str(),"wb");
-    if ( !pDict_TRK_norm )
-    {
-        printf( "\n[trk2dictionary] Unable to create output files" );
-        return 0;
-    }
-    filename = OUTPUT_path+"/dictionary_IC_f.dict";        FILE* pDict_IC_f      = fopen(filename.c_str(),"wb");
-    filename = OUTPUT_path+"/dictionary_IC_v.dict";        FILE* pDict_IC_v      = fopen(filename.c_str(),"wb");
-    filename = OUTPUT_path+"/dictionary_IC_o.dict";        FILE* pDict_IC_o      = fopen(filename.c_str(),"wb");
-    filename = OUTPUT_path+"/dictionary_IC_len.dict";      FILE* pDict_IC_len    = fopen(filename.c_str(),"wb");
-    filename = OUTPUT_path+"/dictionary_TRK_len.dict";     FILE* pDict_TRK_len   = fopen(filename.c_str(),"wb");
-    filename = OUTPUT_path+"/dictionary_TRK_kept.dict";    FILE* pDict_TRK_kept  = fopen(filename.c_str(),"wb");
-    filename = OUTPUT_path+"/dictionary_ndirs.dict";       FILE* pDict_ndirs     = fopen(filename.c_str(),"wb");
-
-    // iterate over fibers
-    ProgressBar PROGRESS( n_count );
-    PROGRESS.setPrefix("     ");
-    
-    float affine[4][4];
-    if (!isTRK)  {//.tck
-        //ricreate affine matrix
-        int k = 0;
-        for(int i=0; i<4; i++) {
-            for (int j=0; j<4; j++) {
-                affine[i][j] = ptrTractsAffine[k];
-                k++;
-            }
-        }
-    }
-    
-    for(int f=0; f<n_count ;f++)
-    {
-        PROGRESS.inc();
-        //read fibers in .trk or in .tck
-        if (isTRK) { // .trk file
-            N = read_fiberTRK( fpTractogram, fiber, n_scalars, n_properties );
-        }
-        else { // .tck file
-            N = read_fiberTCK( fpTractogram, fiber , affine );
-        }
-        
-        fiberForwardModel( fiber, N, sectors, radii, weights, ptrHashTable  );
-
-        kept = 0;
-        if ( FiberSegments.size() > 0 )
-        {
-            // add segments to files
-            fiberNorm = 0;
-            fiberLen = 0;
-            for (it=FiberSegments.begin(); it!=FiberSegments.end(); it++)
-            {
-                // NB: plese note inverted ordering for 'v'
-                v = it->first.x + dim.x * ( it->first.y + dim.y * it->first.z );
-                o = it->first.o;
-                fwrite( &totFibers,      4, 1, pDict_IC_f );
-                fwrite( &v,              4, 1, pDict_IC_v );
-                fwrite( &o,              2, 1, pDict_IC_o );
-                fwrite( &(it->second),   4, 1, pDict_IC_len );
-                ptrTDI[ it->first.z + dim.z * ( it->first.y + dim.y * it->first.x ) ] += it->second;
-                inVoxKey.set( it->first.x, it->first.y, it->first.z );
-                FiberNorm[inVoxKey] += it->second;
-                fiberLen += it->second;
-            }
-            for (itNorm=FiberNorm.begin(); itNorm!=FiberNorm.end(); itNorm++)
-            {
-                fiberNorm += pow(itNorm->second,2);
-            }
-            fiberNorm = sqrt(fiberNorm);
-            FiberNorm.clear();
-            fwrite( &fiberNorm,  1, 4, pDict_TRK_norm ); // actual length considered in optimization
-            fwrite( &fiberLen,   1, 4, pDict_TRK_len );
-            totICSegments += FiberSegments.size();
-            totFibers++;
-            kept = 1;
-        }
-        fwrite( &kept, 1, 1, pDict_TRK_kept );
-    }
-    PROGRESS.close();
-
-    // write dictionary ndirs value
-    fwrite(&ndirs, 1, sizeof(unsigned short), pDict_ndirs);
-    fclose( fpTractogram );
-    fclose( pDict_TRK_norm );
-    fclose( pDict_IC_f );
-    fclose( pDict_IC_v );
-    fclose( pDict_IC_o );
-    fclose( pDict_IC_len );
-    fclose( pDict_TRK_len );
-    fclose( pDict_TRK_kept );
-    fclose( pDict_ndirs );
-
-    printf("     [ %d fibers kept, %d segments in total ]\n", totFibers, totICSegments );
-
-
-    /*=========================*/
-    /*     EC compartments     */
-    /*=========================*/
-    unsigned int totECSegments = 0, totECVoxels = 0;
-
-    printf( "\n   \033[0;32m* Exporting EC compartments:\033[0m\n" );
-
-    filename = OUTPUT_path+"/dictionary_EC_v.dict";        FILE* pDict_EC_v   = fopen( filename.c_str(),   "wb" );
-    filename = OUTPUT_path+"/dictionary_EC_o.dict";        FILE* pDict_EC_o   = fopen( filename.c_str(),   "wb" );
-
-    if ( ptrPEAKS != NULL )
-    {
-        Vector<double> dir;
-        double         longitude, colatitude;
-        segKey         ec_seg;
-        int            ix, iy, iz, id, atLeastOne;
-        float          peakMax;
-        float          norms[ Np ];
-        float          *ptr;
-        int            ox, oy;
-
-        PROGRESS.reset( dim.z );
-        for(iz=0; iz<dim.z ;iz++)
-        {
-            PROGRESS.inc();
-            for(iy=0; iy<dim.y ;iy++)
-            for(ix=0; ix<dim.x ;ix++)
-            {
-                // check if in mask previously computed from IC segments
-                if ( ptrTDI[ iz + dim.z * ( iy + dim.y * ix ) ] == 0 ) continue;
-
-                peakMax = -1;
-                for(id=0; id<Np ;id++)
-                {
-                    ptr = ptrPEAKS + 3*(id + Np * ( iz + dim.z * ( iy + dim.y * ix ) ));
-                    dir.x = ptr[0];
-                    dir.y = ptr[1];
-                    dir.z = ptr[2];
-                    norms[id] = dir.norm();
-                    if ( norms[id] > peakMax )
-                        peakMax = norms[id];
-                }
-
-                if ( peakMax > 0 )
-                {
-                    ec_seg.x  = ix;
-                    ec_seg.y  = iy;
-                    ec_seg.z  = iz;
-                    atLeastOne = 0;
-                    for(id=0; id<Np ;id++)
-                    {
-                        if ( norms[id]==0 || norms[id] < vf_THR*peakMax ) continue; // peak too small, don't consider it
-
-                        // get the orientation of the current peak
-                        ptr = ptrPEAKS + 3*(id + Np * ( iz + dim.z * ( iy + dim.y * ix ) ));
-
-                        // multiply by the affine matrix
-                        dir.x = ptr[0] * ptrPeaksAffine[0] + ptr[1] * ptrPeaksAffine[1] + ptr[2] * ptrPeaksAffine[2];
-                        dir.y = ptr[0] * ptrPeaksAffine[3] + ptr[1] * ptrPeaksAffine[4] + ptr[2] * ptrPeaksAffine[5];
-                        dir.z = ptr[0] * ptrPeaksAffine[6] + ptr[1] * ptrPeaksAffine[7] + ptr[2] * ptrPeaksAffine[8];
-
-                        // flip axes if requested
-                        dir.x *= ECix;
-                        dir.y *= ECiy;
-                        dir.z *= ECiz;
-                        if ( dir.y < 0 )
-                        {
-                            // ensure to be in the right hemisphere (the one where kernels were pre-computed)
-                            dir.x = -dir.x;
-                            dir.y = -dir.y;
-                            dir.z = -dir.z;
-                        }
-                        colatitude = atan2( sqrt(dir.x*dir.x + dir.y*dir.y), dir.z );
-                        longitude  = atan2( dir.y, dir.x );
-                        ox = (int)round(colatitude/M_PI*180.0);
-                        oy = (int)round(longitude/M_PI*180.0);
-
-                        v = ec_seg.x + dim.x * ( ec_seg.y + dim.y * ec_seg.z );
-                        o = ptrHashTable[ox*181 + oy];
-                        fwrite( &v, 4, 1, pDict_EC_v );
-                        fwrite( &o, 2, 1, pDict_EC_o );
-                        totECSegments++;
-                        atLeastOne = 1;
-                    }
-                    if ( atLeastOne>0 )
-                        totECVoxels++;
-                }
-            }
-        }
-        PROGRESS.close();
-    }
-
-    fclose( pDict_EC_v );
-    fclose( pDict_EC_o );
-
-    printf("     [ %d voxels, %d segments ]\n", totECVoxels, totECSegments );
-
-    return 1;
-}
-
-
-/********************************************************************************************************************/
-/*                                                 fiberForwardModel                                                */
-/********************************************************************************************************************/
-void fiberForwardModel( float fiber[3][MAX_FIB_LEN], unsigned int pts, std::vector<int> sectors, std::vector<double> radii, std::vector<double> weights, short* ptrHashTable )
-{
-    static Vector<double> S1, S2, S1m, S2m, P, q, n, qxn, qxqxn;
-    static Vector<double> vox, vmin, vmax, dir;
-    static double         len, t, alpha, w, R;
-    static int            i, j, k;
-
-    FiberSegments.clear();
-    //printf("RANGO -----------------------------> from %d to %d\n", nPointsToSkip, pts-1-nPointsToSkip);
-    for(i=nPointsToSkip; i<pts-1-nPointsToSkip ;i++)
-    {
-        // original segment to be processed
-        S1.Set( fiber[0][i]   + fiberShiftXmm, fiber[1][i]   + fiberShiftYmm, fiber[2][i]   + fiberShiftZmm );
-        S2.Set( fiber[0][i+1] + fiberShiftXmm, fiber[1][i+1] + fiberShiftYmm, fiber[2][i+1] + fiberShiftZmm );
-        dir.x = S2.x-S1.x;
-        dir.y = S2.y-S1.y;
-        dir.z = S2.z-S1.z;
-        dir.Normalize();
-
-        // get a normal to the vector to move
-        n.x = dir.y-dir.z;
-        n.y = dir.z-dir.x;
-        n.z = dir.x-dir.y;
-        n.Normalize();
-
-        /* assign contribution(s) */
-        for(k=0; k<(int)radii.size() ;k++)
-        {
-            if ( weights[k] < 1e-3 )
-                continue;
-
-            R = radii[k];
-
-            // quaternion (q.x, q.y, q.z, w) for rotation
-            alpha = 2.0*M_PI/sectors[k];
-            w = sin(alpha/2.0);
-            q.x = dir.x * w;
-            q.y = dir.y * w;
-            q.z = dir.z * w;
-            w = cos(alpha/2.0);
-            for(j=0; j<sectors[k] ;j++)
-            {
-                // rotate the segment's normal
-                qxn.x = 2.0 * ( q.y * n.z - q.z * n.y );
-                qxn.y = 2.0 * ( q.z * n.x - q.x * n.z );
-                qxn.z = 2.0 * ( q.x * n.y - q.y * n.x );
-                qxqxn.x = q.y * qxn.z - q.z * qxn.y;
-                qxqxn.y = q.z * qxn.x - q.x * qxn.z;
-                qxqxn.z = q.x * qxn.y - q.y * qxn.x;
-                n.x += w * qxn.x + qxqxn.x;
-                n.y += w * qxn.y + qxqxn.y;
-                n.z += w * qxn.z + qxqxn.z;
-
-                // move the segment
-                S1m.x = S1.x + R*n.x;
-                S1m.y = S1.y + R*n.y;
-                S1m.z = S1.z + R*n.z;
-                S2m.x = S2.x + R*n.x;
-                S2m.y = S2.y + R*n.y;
-                S2m.z = S2.z + R*n.z;
-
-                if ( doIntersect==false )
-                    segmentForwardModel( S1m, S2m, weights[k], ptrHashTable );
-                else
-                    while( 1 )
-                    {
-                        len = sqrt( pow(S2m.x-S1m.x,2) + pow(S2m.y-S1m.y,2) + pow(S2m.z-S1m.z,2) ); // in mm
-                        if ( len <= minSegLen )
-                            break;
-                        
-                        if ( floor(S1m.x/pixdim.x)==floor(S2m.x/pixdim.x) &&
-                             floor(S1m.y/pixdim.y)==floor(S2m.y/pixdim.y) &&
-                             floor(S1m.z/pixdim.z)==floor(S2m.z/pixdim.z)
-                            )
-                        {
-                            // same voxel, no need to compute intersections
-                            segmentForwardModel( S1m, S2m, weights[k], ptrHashTable );
-                            break;
-                        }
-
-                        // compute AABB of the first point (in mm)
-                        vmin.x = floor( (S1m.x + 1e-6*dir.x)/pixdim.x ) * pixdim.x;
-                        vmin.y = floor( (S1m.y + 1e-6*dir.y)/pixdim.y ) * pixdim.y;
-                        vmin.z = floor( (S1m.z + 1e-6*dir.z)/pixdim.z ) * pixdim.z;
-                        vmax.x = vmin.x + pixdim.x;
-                        vmax.y = vmin.y + pixdim.y;
-                        vmax.z = vmin.z + pixdim.z;
-
-                        if ( rayBoxIntersection( S1m, dir, vmin, vmax, t ) && t>0 && t<len )
-                        {
-                            // add the portion S1P, and then reiterate
-                            P.Set( S1m.x + t*dir.x, S1m.y + t*dir.y, S1m.z + t*dir.z );
-                            segmentForwardModel( S1m, P, weights[k], ptrHashTable );
-                            S1m.Set( P.x, P.y, P.z );
-                        }
-                        else
-                        {
-                            // add the segment S1S2 and stop iterating
-                            segmentForwardModel( S1m, S2m, weights[k], ptrHashTable );
-                            break;
-                        }
-                    }
-            }
-        }
-    }
-}
-
-
-/********************************************************************************************************************/
-/*                                                segmentForwardModel                                               */
-/********************************************************************************************************************/
-void segmentForwardModel( const Vector<double>& P1, const Vector<double>& P2, double w, short* ptrHashTable )
-{
-    static Vector<int>    vox;
-    static Vector<double> dir, dirTrue;
-    static double         longitude, colatitude, len;
-    static segKey         key;
-    static int            ox, oy;
-
-    // direction of the segment
-    dir.y = P2.y-P1.y;
-    if ( dir.y >= 0 )
-    {
-        dir.x = P2.x-P1.x;
-        dir.z = P2.z-P1.z;
-    }
-    else
-    {
-        dir.x = P1.x-P2.x;
-        dir.y = P1.y-P2.y;
-        dir.z = P1.z-P2.z;
-    }
-
-    // length of segment
-    len = dir.norm();
-    if ( len <= minSegLen )
-        return;
-    dir.Normalize();
-
-    // voxel of the segment is the centroid
-    vox.x = floor( 0.5 * (P1.x + P2.x) / pixdim.x );
-    vox.y = floor( 0.5 * (P1.y + P2.y) / pixdim.y );
-    vox.z = floor( 0.5 * (P1.z + P2.z) / pixdim.z );
-    if ( vox.x>=dim.x || vox.x<0 || vox.y>=dim.y || vox.y<0 || vox.z>=dim.z || vox.z<0 )
-        return;
-    if ( ptrMASK && ptrMASK[ vox.z + dim.z * ( vox.y + dim.y * vox.x ) ]==0 )
-        return;
-
-    // add the segment to the data structure
-    longitude  = atan2(dir.y, dir.x);
-    colatitude = atan2( sqrt(dir.x*dir.x + dir.y*dir.y), dir.z );
-    ox = (int)round(colatitude/M_PI*180.0); // theta // i1
-    oy = (int)round(longitude/M_PI*180.0);  // phi   // i2
-    key.set( vox.x, vox.y, vox.z, (unsigned short) ptrHashTable[ox*181 + oy] );
-    FiberSegments[key] += w * len;
-}
-
-
-/********************************************************************************************************************/
-/*                                                rayBoxIntersection                                                */
-/********************************************************************************************************************/
-bool rayBoxIntersection( Vector<double>& origin, Vector<double>& direction, Vector<double>& vmin, Vector<double>& vmax, double & t)
-{
-    static double tmin, tmax, tymin, tymax, tzmin, tzmax;
-    static Vector<double> invrd;
-
-    // inverse direction to catch float problems
-    invrd.x = 1.0 / direction.x;
-    invrd.y = 1.0 / direction.y;
-    invrd.z = 1.0 / direction.z;
-
-
-    if (invrd.x >= 0)
-    {
-      tmin = (vmin.x - origin.x) * invrd.x;
-      tmax = (vmax.x - origin.x) * invrd.x;
-    }
-    else
-    {
-      tmin = (vmax.x - origin.x) * invrd.x;
-      tmax = (vmin.x - origin.x) * invrd.x;
-    }
-
-    if (invrd.y >= 0)
-    {
-      tymin = (vmin.y - origin.y) * invrd.y;
-      tymax = (vmax.y - origin.y) * invrd.y;
-    }
-    else
-    {
-      tymin = (vmax.y - origin.y) * invrd.y;
-      tymax = (vmin.y - origin.y) * invrd.y;
-    }
-
-    if ( (tmin > tymax) || (tymin > tmax) ) return false;
-    if ( tymin > tmin) tmin = tymin;
-    if ( tymax < tmax) tmax = tymax;
-
-    if (invrd.z >= 0)
-    {
-      tzmin = (vmin.z - origin.z) * invrd.z;
-      tzmax = (vmax.z - origin.z) * invrd.z;
-    }else
-    {
-      tzmin = (vmax.z - origin.z) * invrd.z;
-      tzmax = (vmin.z - origin.z) * invrd.z;
-    }
-
-    if ( (tmin > tzmax) || (tzmin > tmax) ) return false;
-    if ( tzmin > tmin) tmin = tzmin;
-    if ( tzmax < tmax) tmax = tzmax;
-
-    // check if values are valid
-    t = tmin;
-    if (t <= 0) t = tmax;
-
-    return true;
-}
-
-
-// Read a fiber from file .trk
-unsigned int read_fiberTRK( FILE* fp, float fiber[3][MAX_FIB_LEN], int ns, int np )
-{
-    int N;
-    fread((char*)&N, 1, 4, fp);
-
-    if ( N >= MAX_FIB_LEN || N <= 0 )
-        return 0;
-
-    float tmp[3];
-    for(int i=0; i<N; i++)
-    {
-        fread((char*)tmp, 1, 12, fp);
-        fiber[0][i] = tmp[0];
-        fiber[1][i] = tmp[1];
-        fiber[2][i] = tmp[2];
-        fseek(fp,4*ns,SEEK_CUR);
-    }
-    fseek(fp,4*np,SEEK_CUR);
-
-    return N;
-}
-
-// Read a fiber from file .tck
-unsigned int read_fiberTCK( FILE* fp, float fiber[3][MAX_FIB_LEN], float affine[4][4])
-{
-    int i = 0;
-    float tmp[3];
-    fread((char*)tmp, 1, 12, fp);
-    while( !(isnan(tmp[0])) && !(isnan(tmp[1])) &&  !(isnan(tmp[2])) )
-    {
-        fiber[0][i] = tmp[0]*affine[0][0] + tmp[1]*affine[0][1] + tmp[2]*affine[0][2] + affine[0][3];
-        fiber[1][i] = tmp[0]*affine[1][0] + tmp[1]*affine[1][1] + tmp[2]*affine[1][2] + affine[1][3];
-        fiber[2][i] = tmp[0]*affine[2][0] + tmp[1]*affine[2][1] + tmp[2]*affine[2][2] + affine[2][3];
-        i++;
-        fread((char*)tmp, 1, 12, fp);
-    }
-
-    return i;
-}
+#include <stdio.h>
+#include <cstdio>
+#include <string>
+#include <map>
+#include <vector>
+#include "Vector.h"
+#include "ProgressBar.h"
+#include <numpy/arrayobject.h>
+#include <math.h>
+
+#define MAX_FIB_LEN 10000
+
+
+// CLASS to store the segments of one fiber
+class segKey
+{
+    public:
+    unsigned short x, y, z;
+    unsigned short o;
+    segKey(){}
+
+    void set(unsigned short _x, unsigned short _y, unsigned short _z, unsigned short _o)
+    {
+        x  = _x;
+        y  = _y;
+        z  = _z;
+        o = _o;
+    }
+
+    bool const operator <(const segKey& seg) const
+    {
+        return o < seg.o || (o==seg.o && z<seg.z) || (o==seg.o && z==seg.z && y<seg.y) || (o==seg.o && z==seg.z && y==seg.y && x<seg.x);
+    }
+};
+
+class segInVoxKey
+{
+    public:
+    unsigned short x, y, z;
+    segInVoxKey(){}
+
+    void set(unsigned short _x, unsigned short _y, unsigned short _z)
+    {
+        x  = _x;
+        y  = _y;
+        z  = _z;
+    }
+    bool const operator <(const segInVoxKey& o) const
+    {
+        return (z<o.z) || (z==o.z && y<o.y) || (z==o.z && y==o.y && x<o.x);
+    }
+};
+
+// global variables (to avoid passing them at each call)
+std::map<segKey,float> FiberSegments;
+
+Vector<int>     dim;
+Vector<float>   pixdim;
+float*          ptrMASK;
+unsigned int    nPointsToSkip;
+float           fiberShiftXmm, fiberShiftYmm, fiberShiftZmm;
+bool            doIntersect;
+float           minSegLen;
+
+std::vector<double> radii;         // radii for the extrusion
+std::vector<double> weights;       // damping weight
+std::vector<int>    sectors;       // number of duplicates across the extrusion circle
+double              radiusSigma;   // modulates the impact of each segment as function of radius
+
+
+bool rayBoxIntersection( Vector<double>& origin, Vector<double>& direction, Vector<double>& vmin, Vector<double>& vmax, double & t);
+void fiberForwardModel( float fiber[3][MAX_FIB_LEN], unsigned int pts, std::vector<int> sectors, std::vector<double> radii, std::vector<double> weight, short* ptrHashTable );
+void segmentForwardModel( const Vector<double>& P1, const Vector<double>& P2, double w, short* ptrHashTable );
+unsigned int read_fiberTRK( FILE* fp, float fiber[3][MAX_FIB_LEN], int ns, int np );
+unsigned int read_fiberTCK( FILE* fp, float fiber[3][MAX_FIB_LEN] , float affine[4][4]);
+
+
+// =========================
+// Function called by CYTHON
+// =========================
+int trk2dictionary(
+    char* str_filename, int data_offset, int Nx, int Ny, int Nz, float Px, float Py, float Pz, int n_count, int n_scalars, int n_properties,
+    float fiber_shiftX, float fiber_shiftY, float fiber_shiftZ, int points_to_skip, float min_seg_len,
+    float* ptrPEAKS, int Np, float vf_THR, int ECix, int ECiy, int ECiz,
+    float* _ptrMASK, float* ptrTDI, char* path_out, int c, double* ptrPeaksAffine,
+    int nBlurRadii, double blurSigma, double* ptrBlurRadii, int* ptrBlurSamples, double* ptrBlurWeights, float* ptrTractsAffine, unsigned short ndirs, short* ptrHashTable
+)
+{
+    /*=========================*/
+    /*     IC compartments     */
+    /*=========================*/
+    float          fiber[3][MAX_FIB_LEN];
+    float          fiberNorm, fiberLen;
+    unsigned int   N, totICSegments = 0, totFibers = 0, v;
+    unsigned short o;
+    unsigned char  kept;
+    Vector<double> P;
+    std::string    filename;
+    std::string    OUTPUT_path(path_out);
+    std::map<segKey,float>::iterator it;
+
+    std::map<segInVoxKey,float> FiberNorm;
+    std::map<segInVoxKey,float>::iterator itNorm;
+    segInVoxKey         inVoxKey;
+
+    printf( "\n   \033[0;32m* Exporting IC compartments:\033[0m\n" );
+    
+    int isTRK; // var to check
+
+    char *ext = strrchr(str_filename, '.'); //get the extension of input file
+
+    if (strcmp(ext,".trk")==0) //for .trk file
+        isTRK = 1;
+    else if (strcmp(ext,".tck")==0)// for .tck file
+        isTRK = 0;
+    else
+        return 0;
+
+    FILE* fpTractogram = fopen(str_filename,"rb"); //open 
+    if (fpTractogram == NULL) return 0;
+
+    if ( isTRK ) { // SKIP header on .trk
+        fseek(fpTractogram,data_offset,SEEK_SET); //skip the first 1000 bytes in the .trk file
+    }
+    else { // SKIP header on .tck
+        fseek(fpTractogram,data_offset,SEEK_SET); //skip the first offset bytes in the .tck file
+    }
+
+    // set global variables
+    dim.Set( Nx, Ny, Nz );
+    pixdim.Set( Px, Py, Pz );
+    nPointsToSkip = points_to_skip;
+    fiberShiftXmm = fiber_shiftX * pixdim.x; // shift in mm for the coordinates
+    fiberShiftYmm = fiber_shiftY * pixdim.y;
+    fiberShiftZmm = fiber_shiftZ * pixdim.z;
+    ptrMASK       = _ptrMASK;
+    doIntersect   = c > 0;
+    minSegLen     = min_seg_len;
+
+    radii.clear();
+    sectors.clear();
+    weights.clear();
+    for(int i=0; i<nBlurRadii ;i++)
+    {
+        radii.push_back( ptrBlurRadii[i] );
+        sectors.push_back( ptrBlurSamples[i] );
+        weights.push_back( ptrBlurWeights[i] );
+    }
+    radiusSigma = blurSigma;
+
+    // open files
+    filename = OUTPUT_path+"/dictionary_TRK_norm.dict";   FILE* pDict_TRK_norm = fopen(filename.c_str(),"wb");
+    if ( !pDict_TRK_norm )
+    {
+        printf( "\n[trk2dictionary] Unable to create output files" );
+        return 0;
+    }
+    filename = OUTPUT_path+"/dictionary_IC_f.dict";        FILE* pDict_IC_f      = fopen(filename.c_str(),"wb");
+    filename = OUTPUT_path+"/dictionary_IC_v.dict";        FILE* pDict_IC_v      = fopen(filename.c_str(),"wb");
+    filename = OUTPUT_path+"/dictionary_IC_o.dict";        FILE* pDict_IC_o      = fopen(filename.c_str(),"wb");
+    filename = OUTPUT_path+"/dictionary_IC_len.dict";      FILE* pDict_IC_len    = fopen(filename.c_str(),"wb");
+    filename = OUTPUT_path+"/dictionary_TRK_len.dict";     FILE* pDict_TRK_len   = fopen(filename.c_str(),"wb");
+    filename = OUTPUT_path+"/dictionary_TRK_kept.dict";    FILE* pDict_TRK_kept  = fopen(filename.c_str(),"wb");
+    filename = OUTPUT_path+"/dictionary_ndirs.dict";       FILE* pDict_ndirs     = fopen(filename.c_str(),"wb");
+
+    // iterate over fibers
+    ProgressBar PROGRESS( n_count );
+    PROGRESS.setPrefix("     ");
+    
+    float affine[4][4];
+    if (!isTRK)  {//.tck
+        //ricreate affine matrix
+        int k = 0;
+        for(int i=0; i<4; i++) {
+            for (int j=0; j<4; j++) {
+                affine[i][j] = ptrTractsAffine[k];
+                k++;
+            }
+        }
+    }
+    
+    for(int f=0; f<n_count ;f++)
+    {
+        PROGRESS.inc();
+        //read fibers in .trk or in .tck
+        if (isTRK) { // .trk file
+            N = read_fiberTRK( fpTractogram, fiber, n_scalars, n_properties );
+        }
+        else { // .tck file
+            N = read_fiberTCK( fpTractogram, fiber , affine );
+        }
+        
+        fiberForwardModel( fiber, N, sectors, radii, weights, ptrHashTable  );
+
+        kept = 0;
+        if ( FiberSegments.size() > 0 )
+        {
+            // add segments to files
+            fiberNorm = 0;
+            fiberLen = 0;
+            for (it=FiberSegments.begin(); it!=FiberSegments.end(); it++)
+            {
+                // NB: plese note inverted ordering for 'v'
+                v = it->first.x + dim.x * ( it->first.y + dim.y * it->first.z );
+                o = it->first.o;
+                fwrite( &totFibers,      4, 1, pDict_IC_f );
+                fwrite( &v,              4, 1, pDict_IC_v );
+                fwrite( &o,              2, 1, pDict_IC_o );
+                fwrite( &(it->second),   4, 1, pDict_IC_len );
+                ptrTDI[ it->first.z + dim.z * ( it->first.y + dim.y * it->first.x ) ] += it->second;
+                inVoxKey.set( it->first.x, it->first.y, it->first.z );
+                FiberNorm[inVoxKey] += it->second;
+                fiberLen += it->second;
+            }
+            for (itNorm=FiberNorm.begin(); itNorm!=FiberNorm.end(); itNorm++)
+            {
+                fiberNorm += pow(itNorm->second,2);
+            }
+            fiberNorm = sqrt(fiberNorm);
+            FiberNorm.clear();
+            fwrite( &fiberNorm,  1, 4, pDict_TRK_norm ); // actual length considered in optimization
+            fwrite( &fiberLen,   1, 4, pDict_TRK_len );
+            totICSegments += FiberSegments.size();
+            totFibers++;
+            kept = 1;
+        }
+        fwrite( &kept, 1, 1, pDict_TRK_kept );
+    }
+    PROGRESS.close();
+
+    // write dictionary ndirs value
+    fwrite(&ndirs, 1, sizeof(unsigned short), pDict_ndirs);
+    fclose( fpTractogram );
+    fclose( pDict_TRK_norm );
+    fclose( pDict_IC_f );
+    fclose( pDict_IC_v );
+    fclose( pDict_IC_o );
+    fclose( pDict_IC_len );
+    fclose( pDict_TRK_len );
+    fclose( pDict_TRK_kept );
+    fclose( pDict_ndirs );
+
+    printf("     [ %d fibers kept, %d segments in total ]\n", totFibers, totICSegments );
+
+
+    /*=========================*/
+    /*     EC compartments     */
+    /*=========================*/
+    unsigned int totECSegments = 0, totECVoxels = 0;
+
+    printf( "\n   \033[0;32m* Exporting EC compartments:\033[0m\n" );
+
+    filename = OUTPUT_path+"/dictionary_EC_v.dict";        FILE* pDict_EC_v   = fopen( filename.c_str(),   "wb" );
+    filename = OUTPUT_path+"/dictionary_EC_o.dict";        FILE* pDict_EC_o   = fopen( filename.c_str(),   "wb" );
+
+    if ( ptrPEAKS != NULL )
+    {
+        Vector<double> dir;
+        double         longitude, colatitude;
+        segKey         ec_seg;
+        int            ix, iy, iz, id, atLeastOne;
+        float          peakMax;
+        float          norms[ Np ];
+        float          *ptr;
+        int            ox, oy;
+
+        PROGRESS.reset( dim.z );
+        for(iz=0; iz<dim.z ;iz++)
+        {
+            PROGRESS.inc();
+            for(iy=0; iy<dim.y ;iy++)
+            for(ix=0; ix<dim.x ;ix++)
+            {
+                // check if in mask previously computed from IC segments
+                if ( ptrTDI[ iz + dim.z * ( iy + dim.y * ix ) ] == 0 ) continue;
+
+                peakMax = -1;
+                for(id=0; id<Np ;id++)
+                {
+                    ptr = ptrPEAKS + 3*(id + Np * ( iz + dim.z * ( iy + dim.y * ix ) ));
+                    dir.x = ptr[0];
+                    dir.y = ptr[1];
+                    dir.z = ptr[2];
+                    norms[id] = dir.norm();
+                    if ( norms[id] > peakMax )
+                        peakMax = norms[id];
+                }
+
+                if ( peakMax > 0 )
+                {
+                    ec_seg.x  = ix;
+                    ec_seg.y  = iy;
+                    ec_seg.z  = iz;
+                    atLeastOne = 0;
+                    for(id=0; id<Np ;id++)
+                    {
+                        if ( norms[id]==0 || norms[id] < vf_THR*peakMax ) continue; // peak too small, don't consider it
+
+                        // get the orientation of the current peak
+                        ptr = ptrPEAKS + 3*(id + Np * ( iz + dim.z * ( iy + dim.y * ix ) ));
+
+                        // multiply by the affine matrix
+                        dir.x = ptr[0] * ptrPeaksAffine[0] + ptr[1] * ptrPeaksAffine[1] + ptr[2] * ptrPeaksAffine[2];
+                        dir.y = ptr[0] * ptrPeaksAffine[3] + ptr[1] * ptrPeaksAffine[4] + ptr[2] * ptrPeaksAffine[5];
+                        dir.z = ptr[0] * ptrPeaksAffine[6] + ptr[1] * ptrPeaksAffine[7] + ptr[2] * ptrPeaksAffine[8];
+
+                        // flip axes if requested
+                        dir.x *= ECix;
+                        dir.y *= ECiy;
+                        dir.z *= ECiz;
+                        if ( dir.y < 0 )
+                        {
+                            // ensure to be in the right hemisphere (the one where kernels were pre-computed)
+                            dir.x = -dir.x;
+                            dir.y = -dir.y;
+                            dir.z = -dir.z;
+                        }
+                        colatitude = atan2( sqrt(dir.x*dir.x + dir.y*dir.y), dir.z );
+                        longitude  = atan2( dir.y, dir.x );
+                        ox = (int)round(colatitude/M_PI*180.0);
+                        oy = (int)round(longitude/M_PI*180.0);
+
+                        v = ec_seg.x + dim.x * ( ec_seg.y + dim.y * ec_seg.z );
+                        o = ptrHashTable[ox*181 + oy];
+                        fwrite( &v, 4, 1, pDict_EC_v );
+                        fwrite( &o, 2, 1, pDict_EC_o );
+                        totECSegments++;
+                        atLeastOne = 1;
+                    }
+                    if ( atLeastOne>0 )
+                        totECVoxels++;
+                }
+            }
+        }
+        PROGRESS.close();
+    }
+
+    fclose( pDict_EC_v );
+    fclose( pDict_EC_o );
+
+    printf("     [ %d voxels, %d segments ]\n", totECVoxels, totECSegments );
+
+    return 1;
+}
+
+
+/********************************************************************************************************************/
+/*                                                 fiberForwardModel                                                */
+/********************************************************************************************************************/
+void fiberForwardModel( float fiber[3][MAX_FIB_LEN], unsigned int pts, std::vector<int> sectors, std::vector<double> radii, std::vector<double> weights, short* ptrHashTable )
+{
+    static Vector<double> S1, S2, S1m, S2m, P, q, n, qxn, qxqxn;
+    static Vector<double> vox, vmin, vmax, dir;
+    static double         len, t, alpha, w, R;
+    static int            i, j, k;
+
+    FiberSegments.clear();
+    //printf("RANGO -----------------------------> from %d to %d\n", nPointsToSkip, pts-1-nPointsToSkip);
+    for(i=nPointsToSkip; i<pts-1-nPointsToSkip ;i++)
+    {
+        // original segment to be processed
+        S1.Set( fiber[0][i]   + fiberShiftXmm, fiber[1][i]   + fiberShiftYmm, fiber[2][i]   + fiberShiftZmm );
+        S2.Set( fiber[0][i+1] + fiberShiftXmm, fiber[1][i+1] + fiberShiftYmm, fiber[2][i+1] + fiberShiftZmm );
+        dir.x = S2.x-S1.x;
+        dir.y = S2.y-S1.y;
+        dir.z = S2.z-S1.z;
+        dir.Normalize();
+
+        // get a normal to the vector to move
+        n.x = dir.y-dir.z;
+        n.y = dir.z-dir.x;
+        n.z = dir.x-dir.y;
+        n.Normalize();
+
+        /* assign contribution(s) */
+        for(k=0; k<(int)radii.size() ;k++)
+        {
+            if ( weights[k] < 1e-3 )
+                continue;
+
+            R = radii[k];
+
+            // quaternion (q.x, q.y, q.z, w) for rotation
+            alpha = 2.0*M_PI/sectors[k];
+            w = sin(alpha/2.0);
+            q.x = dir.x * w;
+            q.y = dir.y * w;
+            q.z = dir.z * w;
+            w = cos(alpha/2.0);
+            for(j=0; j<sectors[k] ;j++)
+            {
+                // rotate the segment's normal
+                qxn.x = 2.0 * ( q.y * n.z - q.z * n.y );
+                qxn.y = 2.0 * ( q.z * n.x - q.x * n.z );
+                qxn.z = 2.0 * ( q.x * n.y - q.y * n.x );
+                qxqxn.x = q.y * qxn.z - q.z * qxn.y;
+                qxqxn.y = q.z * qxn.x - q.x * qxn.z;
+                qxqxn.z = q.x * qxn.y - q.y * qxn.x;
+                n.x += w * qxn.x + qxqxn.x;
+                n.y += w * qxn.y + qxqxn.y;
+                n.z += w * qxn.z + qxqxn.z;
+
+                // move the segment
+                S1m.x = S1.x + R*n.x;
+                S1m.y = S1.y + R*n.y;
+                S1m.z = S1.z + R*n.z;
+                S2m.x = S2.x + R*n.x;
+                S2m.y = S2.y + R*n.y;
+                S2m.z = S2.z + R*n.z;
+
+                if ( doIntersect==false )
+                    segmentForwardModel( S1m, S2m, weights[k], ptrHashTable );
+                else
+                    while( 1 )
+                    {
+                        len = sqrt( pow(S2m.x-S1m.x,2) + pow(S2m.y-S1m.y,2) + pow(S2m.z-S1m.z,2) ); // in mm
+                        if ( len <= minSegLen )
+                            break;
+                        
+                        if ( floor(S1m.x/pixdim.x)==floor(S2m.x/pixdim.x) &&
+                             floor(S1m.y/pixdim.y)==floor(S2m.y/pixdim.y) &&
+                             floor(S1m.z/pixdim.z)==floor(S2m.z/pixdim.z)
+                            )
+                        {
+                            // same voxel, no need to compute intersections
+                            segmentForwardModel( S1m, S2m, weights[k], ptrHashTable );
+                            break;
+                        }
+
+                        // compute AABB of the first point (in mm)
+                        vmin.x = floor( (S1m.x + 1e-6*dir.x)/pixdim.x ) * pixdim.x;
+                        vmin.y = floor( (S1m.y + 1e-6*dir.y)/pixdim.y ) * pixdim.y;
+                        vmin.z = floor( (S1m.z + 1e-6*dir.z)/pixdim.z ) * pixdim.z;
+                        vmax.x = vmin.x + pixdim.x;
+                        vmax.y = vmin.y + pixdim.y;
+                        vmax.z = vmin.z + pixdim.z;
+
+                        if ( rayBoxIntersection( S1m, dir, vmin, vmax, t ) && t>0 && t<len )
+                        {
+                            // add the portion S1P, and then reiterate
+                            P.Set( S1m.x + t*dir.x, S1m.y + t*dir.y, S1m.z + t*dir.z );
+                            segmentForwardModel( S1m, P, weights[k], ptrHashTable );
+                            S1m.Set( P.x, P.y, P.z );
+                        }
+                        else
+                        {
+                            // add the segment S1S2 and stop iterating
+                            segmentForwardModel( S1m, S2m, weights[k], ptrHashTable );
+                            break;
+                        }
+                    }
+            }
+        }
+    }
+}
+
+
+/********************************************************************************************************************/
+/*                                                segmentForwardModel                                               */
+/********************************************************************************************************************/
+void segmentForwardModel( const Vector<double>& P1, const Vector<double>& P2, double w, short* ptrHashTable )
+{
+    static Vector<int>    vox;
+    static Vector<double> dir, dirTrue;
+    static double         longitude, colatitude, len;
+    static segKey         key;
+    static int            ox, oy;
+
+    // direction of the segment
+    dir.y = P2.y-P1.y;
+    if ( dir.y >= 0 )
+    {
+        dir.x = P2.x-P1.x;
+        dir.z = P2.z-P1.z;
+    }
+    else
+    {
+        dir.x = P1.x-P2.x;
+        dir.y = P1.y-P2.y;
+        dir.z = P1.z-P2.z;
+    }
+
+    // length of segment
+    len = dir.norm();
+    if ( len <= minSegLen )
+        return;
+    dir.Normalize();
+
+    // voxel of the segment is the centroid
+    vox.x = floor( 0.5 * (P1.x + P2.x) / pixdim.x );
+    vox.y = floor( 0.5 * (P1.y + P2.y) / pixdim.y );
+    vox.z = floor( 0.5 * (P1.z + P2.z) / pixdim.z );
+    if ( vox.x>=dim.x || vox.x<0 || vox.y>=dim.y || vox.y<0 || vox.z>=dim.z || vox.z<0 )
+        return;
+    if ( ptrMASK && ptrMASK[ vox.z + dim.z * ( vox.y + dim.y * vox.x ) ]==0 )
+        return;
+
+    // add the segment to the data structure
+    longitude  = atan2(dir.y, dir.x);
+    colatitude = atan2( sqrt(dir.x*dir.x + dir.y*dir.y), dir.z );
+    ox = (int)round(colatitude/M_PI*180.0); // theta // i1
+    oy = (int)round(longitude/M_PI*180.0);  // phi   // i2
+    key.set( vox.x, vox.y, vox.z, (unsigned short) ptrHashTable[ox*181 + oy] );
+    FiberSegments[key] += w * len;
+}
+
+
+/********************************************************************************************************************/
+/*                                                rayBoxIntersection                                                */
+/********************************************************************************************************************/
+bool rayBoxIntersection( Vector<double>& origin, Vector<double>& direction, Vector<double>& vmin, Vector<double>& vmax, double & t)
+{
+    static double tmin, tmax, tymin, tymax, tzmin, tzmax;
+    static Vector<double> invrd;
+
+    // inverse direction to catch float problems
+    invrd.x = 1.0 / direction.x;
+    invrd.y = 1.0 / direction.y;
+    invrd.z = 1.0 / direction.z;
+
+
+    if (invrd.x >= 0)
+    {
+      tmin = (vmin.x - origin.x) * invrd.x;
+      tmax = (vmax.x - origin.x) * invrd.x;
+    }
+    else
+    {
+      tmin = (vmax.x - origin.x) * invrd.x;
+      tmax = (vmin.x - origin.x) * invrd.x;
+    }
+
+    if (invrd.y >= 0)
+    {
+      tymin = (vmin.y - origin.y) * invrd.y;
+      tymax = (vmax.y - origin.y) * invrd.y;
+    }
+    else
+    {
+      tymin = (vmax.y - origin.y) * invrd.y;
+      tymax = (vmin.y - origin.y) * invrd.y;
+    }
+
+    if ( (tmin > tymax) || (tymin > tmax) ) return false;
+    if ( tymin > tmin) tmin = tymin;
+    if ( tymax < tmax) tmax = tymax;
+
+    if (invrd.z >= 0)
+    {
+      tzmin = (vmin.z - origin.z) * invrd.z;
+      tzmax = (vmax.z - origin.z) * invrd.z;
+    }else
+    {
+      tzmin = (vmax.z - origin.z) * invrd.z;
+      tzmax = (vmin.z - origin.z) * invrd.z;
+    }
+
+    if ( (tmin > tzmax) || (tzmin > tmax) ) return false;
+    if ( tzmin > tmin) tmin = tzmin;
+    if ( tzmax < tmax) tmax = tzmax;
+
+    // check if values are valid
+    t = tmin;
+    if (t <= 0) t = tmax;
+
+    return true;
+}
+
+
+// Read a fiber from file .trk
+unsigned int read_fiberTRK( FILE* fp, float fiber[3][MAX_FIB_LEN], int ns, int np )
+{
+    int N;
+    fread((char*)&N, 1, 4, fp);
+
+    if ( N >= MAX_FIB_LEN || N <= 0 )
+        return 0;
+
+    float tmp[3];
+    for(int i=0; i<N; i++)
+    {
+        fread((char*)tmp, 1, 12, fp);
+        fiber[0][i] = tmp[0];
+        fiber[1][i] = tmp[1];
+        fiber[2][i] = tmp[2];
+        fseek(fp,4*ns,SEEK_CUR);
+    }
+    fseek(fp,4*np,SEEK_CUR);
+
+    return N;
+}
+
+// Read a fiber from file .tck
+unsigned int read_fiberTCK( FILE* fp, float fiber[3][MAX_FIB_LEN], float affine[4][4])
+{
+    int i = 0;
+    float tmp[3];
+    fread((char*)tmp, 1, 12, fp);
+    while( !(isnan(tmp[0])) && !(isnan(tmp[1])) &&  !(isnan(tmp[2])) )
+    {
+        fiber[0][i] = tmp[0]*affine[0][0] + tmp[1]*affine[0][1] + tmp[2]*affine[0][2] + affine[0][3];
+        fiber[1][i] = tmp[0]*affine[1][0] + tmp[1]*affine[1][1] + tmp[2]*affine[1][2] + affine[1][3];
+        fiber[2][i] = tmp[0]*affine[2][0] + tmp[1]*affine[2][1] + tmp[2]*affine[2][2] + affine[2][3];
+        i++;
+        fread((char*)tmp, 1, 12, fp);
+    }
+
+    return i;
+}
diff --git a/doc/tutorials/AdvancedSolvers/README.md b/doc/tutorials/AdvancedSolvers/README.md
index fdbca6f5..684a2fe4 100644
--- a/doc/tutorials/AdvancedSolvers/README.md
+++ b/doc/tutorials/AdvancedSolvers/README.md
@@ -1,154 +1,154 @@
-
-You can find the ipython notebook version of this tutorial [at this link](tutorial_solvers.ipynb).
-
-# Advanced solvers
-
-This tutorial shows how to exploit the advanced features of the COMMIT framework from the side of the **optimisation problem**. The general formulation is the following:
-\begin{equation}
-x^* = \arg\min_{x\in R^n_+} \frac12 \|Ax-y\|_2^2 + \lambda_{IC}\Omega_{IC}(x) + \lambda_{EC}\Omega_{EC}(x) + \lambda_{ISO}\Omega_{ISO}(x),
-\end{equation}
-where $A$ is the COMMIT dictionary, $n$ is defined in such a way that the product $Ax$ makes sense and $y$ is the datum that we want to fit. The three regularisation terms allow us to exploit ***distinct penalties for each compartment***.
-
-*Note*: before exploring this tutorial, you should follow the [Getting Started](https://github.com/daducci/COMMIT/tree/master/doc/tutorials/GettingStarted) tutorial.
-
-
-### Download and unpack the data
-
-Download and extract the **example dataset** from the following [ZIP archive](http://hardi.epfl.ch/static/data/COMMIT_demos/LausanneTwoShell.zip), which contains the following files:
-
-- `DWI.nii`: a diffusion MRI dataset with 100 measurements distributed on 2 shells, respectively at b=700 s/mm^2 and b=2000 s/mm^2;
-- `DWI.scheme`: its corresponding acquisition scheme;
-- `peaks.nii.gz`: main diffusion orientations estimated with CSD;
-- `fibers.trk`: tractogram with about 280K fibers estimated using a streamline-based algorithm;
-- `WM.nii.gz`: white-matter mask extracted from an anatomical T1w image.
-
-
-<span style="color:crimson">**Make sure that your working directory is the folder where you unzipped the downloaded archive.**</span>
-
-
-```python
-path_to_the_directory_with_the_unzipped_archive = '.' # edit this
-cd path_to_the_directory_with_the_unzipped_archive
-```
-
-### Load the usual COMMIT structure
-
-
-```python
-from commit import trk2dictionary
-
-trk2dictionary.run(
-    filename_tractogram = 'LausanneTwoShell/fibers.trk',
-    path_out            = 'LausanneTwoShell/CommitOutput',
-    filename_peaks      = 'LausanneTwoShell/peaks.nii.gz',
-    filename_mask       = 'LausanneTwoShell/WM.nii.gz',
-    fiber_shift         = 0.5,
-    peaks_use_affine    = True
-)
-
-import commit
-commit.core.setup() 
-mit = commit.Evaluation( '.', 'LausanneTwoShell' )
-mit.load_data( 'DWI.nii', 'DWI.scheme' )
-
-mit.set_model( 'StickZeppelinBall' )
-
-d_par = 1.7E-3              # Parallel diffusivity [mm^2/s]
-ICVFs = [ 0.7 ]             # Intra-cellular volume fraction(s) [0..1]
-d_ISOs = [ 1.7E-3, 3.0E-3 ] # Isotropic diffusivitie(s) [mm^2/s]
-
-mit.model.set( d_par, ICVFs, d_ISOs )
-mit.generate_kernels( regenerate=True )
-mit.load_kernels()
-
-mit.load_dictionary( 'CommitOutput' )
-mit.set_threads()
-mit.build_operator()
-```
-
-### Perform clustering of the streamlines
-
-You will need `dipy`, which is among the requirements of COMMIT, hence there should be no problem.
-
-The `threshold` parameter has to be tuned for each brain. Do not consider our choice as a standard one.
-
-
-```python
-from nibabel import trackvis as tv
-fname='LausanneTwoShell/fibers.trk'
-streams, hdr = tv.read(fname)
-streamlines = [i[0] for i in streams]
-
-from dipy.segment.clustering import QuickBundles
-threshold = 15.0
-qb = QuickBundles(threshold=threshold)
-clusters = qb.cluster(streamlines)
-
-import numpy as np
-structureIC = np.array([np.array(c.indices) for c in clusters])
-weightsIC   = np.array([1.0/np.sqrt(len(c)) for c in structureIC])
-```
-
-### Define the regularisation term
-Each compartment must be regularised separately. The user can choose among the following penalties:
-
-- $\sum_{g\in G}w_g\|x_g\|_k$ : `commit.solvers.group_sparsity` with $k\in \{2, \infty\}$ (only for IC compartment)
-
-- $\|x\|_1$ : `commit.solvers.norm1`
-
-- $\|x\|_2$ : `commit.solvers.norm2`
-
-- $\iota_{\ge 0}(x)$ : `commit.solvers.non_negative` (Default for all compartments)
-
-If the chosen regularisation for the IC compartment is $\sum_{g\in G}\|x_g\|_k$, we can define $k$ via the `group_norm` field, which must be
-
-- $\|x\|_2$ : `commit.solvers.norm2`
-
-In this example we consider the following penalties:
-
-- Intracellular: group sparsity with 2-norm of each group
-
-- Extracellular: 2-norm
-
-- Isotropic: 1-norm
-
-
-```python
-regnorms = [commit.solvers.group_sparsity, commit.solvers.norm2, commit.solvers.norm1]
-
-group_norm = 2 # each group is penalised with its 2-norm
-```
-
-The regularisation parameters are specified within the lambdas field. Again, do not consider our choice as a standard one.
-
-
-```python
-lambdas = [10.,10.,10.]
-```
-
-### Call the constructor of the data structure
-
-
-```python
-regterm = commit.solvers.init_regularisation(mit,
-                                             regnorms    = regnorms,
-                                             structureIC = structureIC,
-                                             weightsIC   = weightsIC,
-                                             group_norm  = group_norm,
-                                             lambdas     = lambdas)
-```
-
-### Call the fit function to perform the optimisation
-
-
-```python
-mit.fit(regularisation=regterm, max_iter=1000)
-```
-
-### Save the results
-
-
-```python
-suffix = '_AdvancedSolvers'
-mit.save_results(path_suffix=suffix)
-```
+
+You can find the ipython notebook version of this tutorial [at this link](tutorial_solvers.ipynb).
+
+# Advanced solvers
+
+This tutorial shows how to exploit the advanced features of the COMMIT framework from the side of the **optimisation problem**. The general formulation is the following:
+\begin{equation}
+x^* = \arg\min_{x\in R^n_+} \frac12 \|Ax-y\|_2^2 + \lambda_{IC}\Omega_{IC}(x) + \lambda_{EC}\Omega_{EC}(x) + \lambda_{ISO}\Omega_{ISO}(x),
+\end{equation}
+where $A$ is the COMMIT dictionary, $n$ is defined in such a way that the product $Ax$ makes sense and $y$ is the datum that we want to fit. The three regularisation terms allow us to exploit ***distinct penalties for each compartment***.
+
+*Note*: before exploring this tutorial, you should follow the [Getting Started](https://github.com/daducci/COMMIT/tree/master/doc/tutorials/GettingStarted) tutorial.
+
+
+### Download and unpack the data
+
+Download and extract the **example dataset** from the following [ZIP archive](http://hardi.epfl.ch/static/data/COMMIT_demos/LausanneTwoShell.zip), which contains the following files:
+
+- `DWI.nii`: a diffusion MRI dataset with 100 measurements distributed on 2 shells, respectively at b=700 s/mm^2 and b=2000 s/mm^2;
+- `DWI.scheme`: its corresponding acquisition scheme;
+- `peaks.nii.gz`: main diffusion orientations estimated with CSD;
+- `fibers.trk`: tractogram with about 280K fibers estimated using a streamline-based algorithm;
+- `WM.nii.gz`: white-matter mask extracted from an anatomical T1w image.
+
+
+<span style="color:crimson">**Make sure that your working directory is the folder where you unzipped the downloaded archive.**</span>
+
+
+```python
+path_to_the_directory_with_the_unzipped_archive = '.' # edit this
+cd path_to_the_directory_with_the_unzipped_archive
+```
+
+### Load the usual COMMIT structure
+
+
+```python
+from commit import trk2dictionary
+
+trk2dictionary.run(
+    filename_tractogram = 'LausanneTwoShell/fibers.trk',
+    path_out            = 'LausanneTwoShell/CommitOutput',
+    filename_peaks      = 'LausanneTwoShell/peaks.nii.gz',
+    filename_mask       = 'LausanneTwoShell/WM.nii.gz',
+    fiber_shift         = 0.5,
+    peaks_use_affine    = True
+)
+
+import commit
+commit.core.setup() 
+mit = commit.Evaluation( '.', 'LausanneTwoShell' )
+mit.load_data( 'DWI.nii', 'DWI.scheme' )
+
+mit.set_model( 'StickZeppelinBall' )
+
+d_par = 1.7E-3              # Parallel diffusivity [mm^2/s]
+ICVFs = [ 0.7 ]             # Intra-cellular volume fraction(s) [0..1]
+d_ISOs = [ 1.7E-3, 3.0E-3 ] # Isotropic diffusivitie(s) [mm^2/s]
+
+mit.model.set( d_par, ICVFs, d_ISOs )
+mit.generate_kernels( regenerate=True )
+mit.load_kernels()
+
+mit.load_dictionary( 'CommitOutput' )
+mit.set_threads()
+mit.build_operator()
+```
+
+### Perform clustering of the streamlines
+
+You will need `dipy`, which is among the requirements of COMMIT, hence there should be no problem.
+
+The `threshold` parameter has to be tuned for each brain. Do not consider our choice as a standard one.
+
+
+```python
+from nibabel import trackvis as tv
+fname='LausanneTwoShell/fibers.trk'
+streams, hdr = tv.read(fname)
+streamlines = [i[0] for i in streams]
+
+from dipy.segment.clustering import QuickBundles
+threshold = 15.0
+qb = QuickBundles(threshold=threshold)
+clusters = qb.cluster(streamlines)
+
+import numpy as np
+structureIC = np.array([np.array(c.indices) for c in clusters])
+weightsIC   = np.array([1.0/np.sqrt(len(c)) for c in structureIC])
+```
+
+### Define the regularisation term
+Each compartment must be regularised separately. The user can choose among the following penalties:
+
+- $\sum_{g\in G}w_g\|x_g\|_k$ : `commit.solvers.group_sparsity` with $k\in \{2, \infty\}$ (only for IC compartment)
+
+- $\|x\|_1$ : `commit.solvers.norm1`
+
+- $\|x\|_2$ : `commit.solvers.norm2`
+
+- $\iota_{\ge 0}(x)$ : `commit.solvers.non_negative` (Default for all compartments)
+
+If the chosen regularisation for the IC compartment is $\sum_{g\in G}\|x_g\|_k$, we can define $k$ via the `group_norm` field, which must be
+
+- $\|x\|_2$ : `commit.solvers.norm2`
+
+In this example we consider the following penalties:
+
+- Intracellular: group sparsity with 2-norm of each group
+
+- Extracellular: 2-norm
+
+- Isotropic: 1-norm
+
+
+```python
+regnorms = [commit.solvers.group_sparsity, commit.solvers.norm2, commit.solvers.norm1]
+
+group_norm = 2 # each group is penalised with its 2-norm
+```
+
+The regularisation parameters are specified within the lambdas field. Again, do not consider our choice as a standard one.
+
+
+```python
+lambdas = [10.,10.,10.]
+```
+
+### Call the constructor of the data structure
+
+
+```python
+regterm = commit.solvers.init_regularisation(mit,
+                                             regnorms    = regnorms,
+                                             structureIC = structureIC,
+                                             weightsIC   = weightsIC,
+                                             group_norm  = group_norm,
+                                             lambdas     = lambdas)
+```
+
+### Call the fit function to perform the optimisation
+
+
+```python
+mit.fit(regularisation=regterm, max_iter=1000)
+```
+
+### Save the results
+
+
+```python
+suffix = '_AdvancedSolvers'
+mit.save_results(path_suffix=suffix)
+```
diff --git a/doc/tutorials/AdvancedSolvers/tutorial_solvers.ipynb b/doc/tutorials/AdvancedSolvers/tutorial_solvers.ipynb
index 66fcaf17..f876c26f 100755
--- a/doc/tutorials/AdvancedSolvers/tutorial_solvers.ipynb
+++ b/doc/tutorials/AdvancedSolvers/tutorial_solvers.ipynb
@@ -1,256 +1,256 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "You can find the text version of this tutorial [at this link](README.md)."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Advanced solvers\n",
-    "\n",
-    "This tutorial shows how to exploit the advanced features of the COMMIT framework from the side of the **optimisation problem**. The general formulation is the following:\n",
-    "\\begin{equation}\n",
-    "x^* = \\arg\\min_{x\\in R^n_+} \\frac12 \\|Ax-y\\|_2^2 + \\lambda_{IC}\\Omega_{IC}(x) + \\lambda_{EC}\\Omega_{EC}(x) + \\lambda_{ISO}\\Omega_{ISO}(x),\n",
-    "\\end{equation}\n",
-    "where $A$ is the COMMIT dictionary, $n$ is defined in such a way that the product $Ax$ makes sense and $y$ is the datum that we want to fit. The three regularisation terms allow us to exploit ***distinct penalties for each compartment***.\n",
-    "\n",
-    "*Note*: before exploring this tutorial, you should follow the [Getting Started](https://github.com/daducci/COMMIT/tree/master/doc/tutorials/GettingStarted) tutorial.\n",
-    "\n",
-    "\n",
-    "### Download and unpack the data\n",
-    "\n",
-    "Download and extract the **example dataset** from the following [ZIP archive](http://hardi.epfl.ch/static/data/COMMIT_demos/LausanneTwoShell.zip), which contains the following files:\n",
-    "\n",
-    "- `DWI.nii`: a diffusion MRI dataset with 100 measurements distributed on 2 shells, respectively at b=700 s/mm^2 and b=2000 s/mm^2;\n",
-    "- `DWI.scheme`: its corresponding acquisition scheme;\n",
-    "- `peaks.nii.gz`: main diffusion orientations estimated with CSD;\n",
-    "- `fibers.trk`: tractogram with about 280K fibers estimated using a streamline-based algorithm;\n",
-    "- `WM.nii.gz`: white-matter mask extracted from an anatomical T1w image.\n",
-    "\n",
-    "\n",
-    "<span style=\"color:crimson\">**Make sure that your working directory is the folder where you unzipped the downloaded archive.**</span>"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "path_to_the_directory_with_the_unzipped_archive = '.' # edit this\n",
-    "cd path_to_the_directory_with_the_unzipped_archive"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Load the usual COMMIT structure"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from commit import trk2dictionary\n",
-    "\n",
-    "trk2dictionary.run(\n",
-    "    filename_tractogram = 'LausanneTwoShell/fibers.trk',\n",
-    "    path_out            = 'LausanneTwoShell/CommitOutput',\n",
-    "    filename_peaks      = 'LausanneTwoShell/peaks.nii.gz',\n",
-    "    filename_mask       = 'LausanneTwoShell/WM.nii.gz',\n",
-    "    fiber_shift         = 0.5,\n",
-    "    peaks_use_affine    = True\n",
-    ")\n",
-    "\n",
-    "import commit\n",
-    "commit.core.setup() \n",
-    "mit = commit.Evaluation( '.', 'LausanneTwoShell' )\n",
-    "mit.load_data( 'DWI.nii', 'DWI.scheme' )\n",
-    "\n",
-    "mit.set_model( 'StickZeppelinBall' )\n",
-    "\n",
-    "d_par = 1.7E-3              # Parallel diffusivity [mm^2/s]\n",
-    "ICVFs = [ 0.7 ]             # Intra-cellular volume fraction(s) [0..1]\n",
-    "d_ISOs = [ 1.7E-3, 3.0E-3 ] # Isotropic diffusivitie(s) [mm^2/s]\n",
-    "\n",
-    "mit.model.set( d_par, ICVFs, d_ISOs )\n",
-    "mit.generate_kernels( regenerate=True )\n",
-    "mit.load_kernels()\n",
-    "\n",
-    "mit.load_dictionary( 'CommitOutput' )\n",
-    "mit.set_threads()\n",
-    "mit.build_operator()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Perform clustering of the streamlines\n",
-    "\n",
-    "You will need `dipy`, which is among the requirements of COMMIT, hence there should be no problem.\n",
-    "\n",
-    "The `threshold` parameter has to be tuned for each brain. Do not consider our choice as a standard one."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from nibabel import trackvis as tv\n",
-    "fname='LausanneTwoShell/fibers.trk'\n",
-    "streams, hdr = tv.read(fname)\n",
-    "streamlines = [i[0] for i in streams]\n",
-    "\n",
-    "from dipy.segment.clustering import QuickBundles\n",
-    "threshold = 15.0\n",
-    "qb = QuickBundles(threshold=threshold)\n",
-    "clusters = qb.cluster(streamlines)\n",
-    "\n",
-    "import numpy as np\n",
-    "structureIC = np.array([np.array(c.indices) for c in clusters])\n",
-    "weightsIC   = np.array([1.0/np.sqrt(len(c)) for c in structureIC])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Define the regularisation term\n",
-    "Each compartment must be regularised separately. The user can choose among the following penalties:\n",
-    "\n",
-    "- $\\sum_{g\\in G}w_g\\|x_g\\|_k$ : `commit.solvers.group_sparsity` with $k\\in \\{2, \\infty\\}$ (only for IC compartment)\n",
-    "\n",
-    "- $\\|x\\|_1$ : `commit.solvers.norm1`\n",
-    "\n",
-    "- $\\|x\\|_2$ : `commit.solvers.norm2`\n",
-    "\n",
-    "- $\\iota_{\\ge 0}(x)$ : `commit.solvers.non_negative` (Default for all compartments)\n",
-    "\n",
-    "If the chosen regularisation for the IC compartment is $\\sum_{g\\in G}\\|x_g\\|_k$, we can define $k$ via the `group_norm` field, which must be\n",
-    "\n",
-    "- $\\|x\\|_2$ : `commit.solvers.norm2` \n",
-    "\n",
-    "In this example we consider the following penalties:\n",
-    "\n",
-    "- Intracellular: group sparsity with 2-norm of each group\n",
-    "\n",
-    "- Extracellular: 2-norm\n",
-    "\n",
-    "- Isotropic: 1-norm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "regnorms = [commit.solvers.group_sparsity, commit.solvers.norm2, commit.solvers.norm1]\n",
-    "\n",
-    "group_norm = 2 # each group is penalised with its 2-norm"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The regularisation parameters are specified within the lambdas field. Again, do not consider our choice as a standard one."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "lambdas = [10.,10.,10.]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Call the constructor of the data structure"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "regterm = commit.solvers.init_regularisation(mit,\n",
-    "                                             regnorms    = regnorms,\n",
-    "                                             structureIC = structureIC,\n",
-    "                                             weightsIC   = weightsIC,\n",
-    "                                             group_norm  = group_norm,\n",
-    "                                             lambdas     = lambdas)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Call the fit function to perform the optimisation"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "mit.fit(regularisation=regterm, max_iter=1000)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Save the results"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "suffix = '_AdvancedSolvers'\n",
-    "mit.save_results(path_suffix=suffix)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can find the text version of this tutorial [at this link](README.md)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Advanced solvers\n",
+    "\n",
+    "This tutorial shows how to exploit the advanced features of the COMMIT framework from the side of the **optimisation problem**. The general formulation is the following:\n",
+    "\\begin{equation}\n",
+    "x^* = \\arg\\min_{x\\in R^n_+} \\frac12 \\|Ax-y\\|_2^2 + \\lambda_{IC}\\Omega_{IC}(x) + \\lambda_{EC}\\Omega_{EC}(x) + \\lambda_{ISO}\\Omega_{ISO}(x),\n",
+    "\\end{equation}\n",
+    "where $A$ is the COMMIT dictionary, $n$ is defined in such a way that the product $Ax$ makes sense and $y$ is the datum that we want to fit. The three regularisation terms allow us to exploit ***distinct penalties for each compartment***.\n",
+    "\n",
+    "*Note*: before exploring this tutorial, you should follow the [Getting Started](https://github.com/daducci/COMMIT/tree/master/doc/tutorials/GettingStarted) tutorial.\n",
+    "\n",
+    "\n",
+    "### Download and unpack the data\n",
+    "\n",
+    "Download and extract the **example dataset** from the following [ZIP archive](http://hardi.epfl.ch/static/data/COMMIT_demos/LausanneTwoShell.zip), which contains the following files:\n",
+    "\n",
+    "- `DWI.nii`: a diffusion MRI dataset with 100 measurements distributed on 2 shells, respectively at b=700 s/mm^2 and b=2000 s/mm^2;\n",
+    "- `DWI.scheme`: its corresponding acquisition scheme;\n",
+    "- `peaks.nii.gz`: main diffusion orientations estimated with CSD;\n",
+    "- `fibers.trk`: tractogram with about 280K fibers estimated using a streamline-based algorithm;\n",
+    "- `WM.nii.gz`: white-matter mask extracted from an anatomical T1w image.\n",
+    "\n",
+    "\n",
+    "<span style=\"color:crimson\">**Make sure that your working directory is the folder where you unzipped the downloaded archive.**</span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "path_to_the_directory_with_the_unzipped_archive = '.' # edit this\n",
+    "cd path_to_the_directory_with_the_unzipped_archive"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Load the usual COMMIT structure"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from commit import trk2dictionary\n",
+    "\n",
+    "trk2dictionary.run(\n",
+    "    filename_tractogram = 'LausanneTwoShell/fibers.trk',\n",
+    "    path_out            = 'LausanneTwoShell/CommitOutput',\n",
+    "    filename_peaks      = 'LausanneTwoShell/peaks.nii.gz',\n",
+    "    filename_mask       = 'LausanneTwoShell/WM.nii.gz',\n",
+    "    fiber_shift         = 0.5,\n",
+    "    peaks_use_affine    = True\n",
+    ")\n",
+    "\n",
+    "import commit\n",
+    "commit.core.setup() \n",
+    "mit = commit.Evaluation( '.', 'LausanneTwoShell' )\n",
+    "mit.load_data( 'DWI.nii', 'DWI.scheme' )\n",
+    "\n",
+    "mit.set_model( 'StickZeppelinBall' )\n",
+    "\n",
+    "d_par = 1.7E-3              # Parallel diffusivity [mm^2/s]\n",
+    "ICVFs = [ 0.7 ]             # Intra-cellular volume fraction(s) [0..1]\n",
+    "d_ISOs = [ 1.7E-3, 3.0E-3 ] # Isotropic diffusivitie(s) [mm^2/s]\n",
+    "\n",
+    "mit.model.set( d_par, ICVFs, d_ISOs )\n",
+    "mit.generate_kernels( regenerate=True )\n",
+    "mit.load_kernels()\n",
+    "\n",
+    "mit.load_dictionary( 'CommitOutput' )\n",
+    "mit.set_threads()\n",
+    "mit.build_operator()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Perform clustering of the streamlines\n",
+    "\n",
+    "You will need `dipy`, which is among the requirements of COMMIT, hence there should be no problem.\n",
+    "\n",
+    "The `threshold` parameter has to be tuned for each brain. Do not consider our choice as a standard one."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from nibabel import trackvis as tv\n",
+    "fname='LausanneTwoShell/fibers.trk'\n",
+    "streams, hdr = tv.read(fname)\n",
+    "streamlines = [i[0] for i in streams]\n",
+    "\n",
+    "from dipy.segment.clustering import QuickBundles\n",
+    "threshold = 15.0\n",
+    "qb = QuickBundles(threshold=threshold)\n",
+    "clusters = qb.cluster(streamlines)\n",
+    "\n",
+    "import numpy as np\n",
+    "structureIC = np.array([np.array(c.indices) for c in clusters])\n",
+    "weightsIC   = np.array([1.0/np.sqrt(len(c)) for c in structureIC])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Define the regularisation term\n",
+    "Each compartment must be regularised separately. The user can choose among the following penalties:\n",
+    "\n",
+    "- $\\sum_{g\\in G}w_g\\|x_g\\|_k$ : `commit.solvers.group_sparsity` with $k\\in \\{2, \\infty\\}$ (only for IC compartment)\n",
+    "\n",
+    "- $\\|x\\|_1$ : `commit.solvers.norm1`\n",
+    "\n",
+    "- $\\|x\\|_2$ : `commit.solvers.norm2`\n",
+    "\n",
+    "- $\\iota_{\\ge 0}(x)$ : `commit.solvers.non_negative` (Default for all compartments)\n",
+    "\n",
+    "If the chosen regularisation for the IC compartment is $\\sum_{g\\in G}\\|x_g\\|_k$, we can define $k$ via the `group_norm` field, which must be\n",
+    "\n",
+    "- $\\|x\\|_2$ : `commit.solvers.norm2` \n",
+    "\n",
+    "In this example we consider the following penalties:\n",
+    "\n",
+    "- Intracellular: group sparsity with 2-norm of each group\n",
+    "\n",
+    "- Extracellular: 2-norm\n",
+    "\n",
+    "- Isotropic: 1-norm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "regnorms = [commit.solvers.group_sparsity, commit.solvers.norm2, commit.solvers.norm1]\n",
+    "\n",
+    "group_norm = 2 # each group is penalised with its 2-norm"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The regularisation parameters are specified within the lambdas field. Again, do not consider our choice as a standard one."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lambdas = [10.,10.,10.]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Call the constructor of the data structure"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "regterm = commit.solvers.init_regularisation(mit,\n",
+    "                                             regnorms    = regnorms,\n",
+    "                                             structureIC = structureIC,\n",
+    "                                             weightsIC   = weightsIC,\n",
+    "                                             group_norm  = group_norm,\n",
+    "                                             lambdas     = lambdas)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Call the fit function to perform the optimisation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mit.fit(regularisation=regterm, max_iter=1000)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Save the results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "suffix = '_AdvancedSolvers'\n",
+    "mit.save_results(path_suffix=suffix)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/setup.py b/setup.py
index 8fb2a222..c358eeb5 100644
--- a/setup.py
+++ b/setup.py
@@ -1,54 +1,54 @@
-from distutils.core import setup, Extension
-from Cython.Distutils import build_ext
-from Cython.Build import cythonize
-import numpy
-import amico
-
-amico_version = amico.__version__.split('.')
-amico_version = [int(version_val) for version_val in amico_version]
-if amico_version[0] == 1 and amico_version[1] < 2:
-    raise RuntimeError( 'COMMIT requires AMICO v1.2.0 or above. Current AMICO version is %s' % amico.__version__ )
-
-# Cython extension to create the sparse data structure from a tractogram
-# for the computation of matrix-vector multiplications
-ext1 = Extension(
-    name='commit.trk2dictionary',
-    sources=['commit/trk2dictionary/trk2dictionary.pyx'],
-    include_dirs=[numpy.get_include()],
-    extra_compile_args=['-w'],
-    extra_link_args=[],
-    language='c++',
-)
-
-ext2 = Extension(
-    name='commit.core',
-    sources=['commit/core.pyx'],
-    include_dirs=[numpy.get_include()],
-    extra_compile_args=['-w'],
-    extra_link_args=[],
-    language='c++',
-)
-
-ext3 = Extension(
-    name='commit.proximals',
-    sources=['commit/proximals.pyx'],
-    include_dirs=[numpy.get_include()],
-    extra_compile_args=['-w'],
-    extra_link_args=[],
-    language='c++',
-)
-
-setup(
-    name='commit',
-    version='1.3.4',
-    description='Convex Optimization Modeling for Microstructure Informed Tractography (COMMIT)',
-    author='Alessandro Daducci',
-    author_email='alessandro.daducci@univr.it',
-    url='https://github.com/daducci/COMMIT',
-    cmdclass = {'build_ext':build_ext},
-    ext_modules = [ ext1, ext2, ext3 ],
-    packages=['commit','commit.operator'],
-    package_data={
-        'commit.operator':["*.*"], # needed by pyximport to compile at runtime
-    },
-)
+from distutils.core import setup, Extension
+from Cython.Distutils import build_ext
+from Cython.Build import cythonize
+import numpy
+import amico
+
+amico_version = amico.__version__.split('.')
+amico_version = [int(version_val) for version_val in amico_version]
+if amico_version[0] == 1 and amico_version[1] < 2:
+    raise RuntimeError( 'COMMIT requires AMICO v1.2.0 or above. Current AMICO version is %s' % amico.__version__ )
+
+# Cython extension to create the sparse data structure from a tractogram
+# for the computation of matrix-vector multiplications
+ext1 = Extension(
+    name='commit.trk2dictionary',
+    sources=['commit/trk2dictionary/trk2dictionary.pyx'],
+    include_dirs=[numpy.get_include()],
+    extra_compile_args=['-w'],
+    extra_link_args=[],
+    language='c++',
+)
+
+ext2 = Extension(
+    name='commit.core',
+    sources=['commit/core.pyx'],
+    include_dirs=[numpy.get_include()],
+    extra_compile_args=['-w'],
+    extra_link_args=[],
+    language='c++',
+)
+
+ext3 = Extension(
+    name='commit.proximals',
+    sources=['commit/proximals.pyx'],
+    include_dirs=[numpy.get_include()],
+    extra_compile_args=['-w'],
+    extra_link_args=[],
+    language='c++',
+)
+
+setup(
+    name='commit',
+    version='1.3.4',
+    description='Convex Optimization Modeling for Microstructure Informed Tractography (COMMIT)',
+    author='Alessandro Daducci',
+    author_email='alessandro.daducci@univr.it',
+    url='https://github.com/daducci/COMMIT',
+    cmdclass = {'build_ext':build_ext},
+    ext_modules = [ ext1, ext2, ext3 ],
+    packages=['commit','commit.operator'],
+    package_data={
+        'commit.operator':["*.*"], # needed by pyximport to compile at runtime
+    },
+)

From e82315265154f1dfc234edbd8bba559e9dd34319 Mon Sep 17 00:00:00 2001
From: ErickHernandezGutierrez <erick.hernandez@cimat.mx>
Date: Tue, 7 Apr 2020 22:47:21 -0500
Subject: [PATCH 06/17] adding version to setup.py

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index c358eeb5..1f9042b0 100644
--- a/setup.py
+++ b/setup.py
@@ -40,7 +40,7 @@
 
 setup(
     name='commit',
-    version='1.3.4',
+    version='1.3.4-tikhonov',
     description='Convex Optimization Modeling for Microstructure Informed Tractography (COMMIT)',
     author='Alessandro Daducci',
     author_email='alessandro.daducci@univr.it',

From dab38d4adfc2419ab791f7e8aad547af6b8ae0c3 Mon Sep 17 00:00:00 2001
From: ErickHernandezGutierrez <erick.hernandez@cimat.mx>
Date: Wed, 8 Apr 2020 12:30:50 -0500
Subject: [PATCH 07/17] Adding free boundary second derivative matrix

---
 commit/core.pyx                    |  2 +-
 commit/operator/operator.pyx       |  2 +-
 commit/operator/operator_withLUT.c | 81 +++++++++++++++++++++---------
 3 files changed, 59 insertions(+), 26 deletions(-)

diff --git a/commit/core.pyx b/commit/core.pyx
index 224f6901..0a2d13e7 100755
--- a/commit/core.pyx
+++ b/commit/core.pyx
@@ -678,7 +678,7 @@ cdef class Evaluation :
         print(y.shape[0])
         print(self.KERNELS['wmr'].shape[0])
         print(y.shape[0] + self.KERNELS['wmr'].shape[0])"""
-        y2 = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0]-1, dtype=np.float64)
+        y2 = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0]-2, dtype=np.float64)
         y2[0:y.shape[0]] = y
         #print(y2.shape)
         return y2
diff --git a/commit/operator/operator.pyx b/commit/operator/operator.pyx
index ab077d5f..e6a80abc 100755
--- a/commit/operator/operator.pyx
+++ b/commit/operator/operator.pyx
@@ -95,7 +95,7 @@ cdef class LinearOperator :
 
         self.adjoint    = 0                         # direct of inverse product
 
-        self.n1 = self.nV*self.nS + self.nR-1
+        self.n1 = self.nV*self.nS + self.nR-2
         self.n2 = self.nR*self.nF + self.nT*self.nE + self.nI*self.nV
 
         # get C pointers to arrays in DICTIONARY
diff --git a/commit/operator/operator_withLUT.c b/commit/operator/operator_withLUT.c
index 01b295e1..5ccc9cdd 100644
--- a/commit/operator/operator_withLUT.c
+++ b/commit/operator/operator_withLUT.c
@@ -2246,47 +2246,76 @@ void COMMIT_At(
     return;
 }
 
+/*
+////////////////////////// L_2^z //////////////////////////
 void COMMIT_L(
-    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    int _nF, int _nIC, int _nV, int _nS, double _tikterm,
     double *_vIN, double *_vOUT)
 {
-    /*for(int r = 0; r < _nIC-1; r++){
-        for(int f = 0; f < _nF; f++){
-            _vOUT[_nV*_nS + r] += _regterm*( -_vIN[r*_nF + f] + _vIN[(r+1)*_nF + f] );
+    for(int f = 0; f < _nF; f++){
+
+        _vOUT[_nV*_nS] += _tikterm*( -2*_vIN[f] + x[_nF + f] );
+
+        for(int r = 1; r < _nIC-1; r++){
+            _vOUT[_nV*_nS + r] += _tikterm*( _vIN[(r-1)*_nF + f] -2*_vIN[r*_nF + f] + _vIN[(r+1)*_nF + f] );
         }
-    }//*/
+
+        _vOUT[_nV*_nS + _nIC - 1] += _tikterm*( _vIN[(_nIC-2)*_nF + f] - 2*_vIN[(_nIC-1)*_nF + f] );
+    }
 }
 
 void COMMIT_Lt(
-    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    int _nF, int _nIC, int _nV, int _nS, double _tikterm,
     double *_vIN, double *_vOUT)
 {
-    /*for(int f = 0; f < _nF; f++){
-        _vOUT[f] = -_vIN[_nV*_nS];
-        _vOUT[_nF*(_nIC-1) + f] = _vIN[_nV*_nS + _nIC-2];
+    for(int f = 0; f < _nF; f++){
+        _vOUT[f] += _tikterm*( -2*_vIN[_nV*_nS] + _vIN[_nV*_nS + 1] );
+
+        for (int r = 0; r < _nIC-1; r++){
+            _vOUT[r*_nF + f] += _tikterm*( _vIN[_nV*_nS + (r-1)] - 2*_vIN[_nV*_nS + r] + _vIN[_nV*_nS + (r+1)] );
+        }
+        
+        _vOUT[(_nIC-1)*_nF + f] += _tikterm*( _vIN[_nV*_nS + (_nIC-2)] - 2*_vIN[_nV*_nS + (_nIC-1)] );
     }
+}//*/
 
-    for(int r = 0; r < _nIC-2; r++){
+/*
+////////////////////////// L_1 //////////////////////////
+void COMMIT_L(
+    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    double *_vIN, double *_vOUT)
+{
+    for(int r = 0; r < _nIC-1; r++){
         for(int f = 0; f < _nF; f++){
-            _vOUT[_nF*(r+1) + f] = _vIN[_nV*_nS + r] + _vIN[_nV*_nS + r+1];
+            _vOUT[_nV*_nS + r] += _regterm*( -_vIN[r*_nF + f] + _vIN[(r+1)*_nF + f] );
         }
-    }//*/
+    }
 }
 
-/*
-void COMMIT_L(
-    int _nF, int _nIC, int _nV, int _nS, double _tikterm,
+void COMMIT_Lt(
+    int _nF, int _nIC, int _nV, int _nS, double _regterm,
     double *_vIN, double *_vOUT)
 {
     for(int f = 0; f < _nF; f++){
+        _vOUT[f] += _regterm*( -_vIN[_nV*_nS] );
 
-        _vOUT[_nV*_nS] += _tikterm*( -2*_vIN[f] + x[_nF + f] );
+        for(int r = 1; r < _nIC-1; r++)
+            _vOUT[_nF*r + f] += _regterm*( _vIN[_nV*_nS + r-1] - _vIN[_nV*_nS + r] );
 
-        for(int r = 1; r < _nIC-1; r++){
-            _vOUT[_nV*_nS + r] += _tikterm*( _vIN[(r-1)*_nF + f] -2*_vIN[r*_nF + f] + _vIN[(r+1)*_nF + f] );
-        }
+        _vOUT[_nF*(_nIC-1) + f] += _regterm*( _vIN[_nV*_nS + _nIC-2] );
+    }
+}
+//*/
 
-        _vOUT[_nV*_nS + _nIC - 1] += _tikterm*( _vIN[(_nIC-2)*_nF + f] - 2*_vIN[(_nIC-1)*_nF + f] );
+////////////////////////// L_2 //////////////////////////
+void COMMIT_L(
+    int _nF, int _nIC, int _nV, int _nS, double _tikterm,
+    double *_vIN, double *_vOUT)
+{
+    for(int r = 0; r < _nIC-2; r++){
+        for(int f = 0; f < _nF; f++){
+            _vOUT[_nV*_nS + r] += _tikterm*( _vIN[r*_nF + f] -2*_vIN[(r+1)*_nF + f] + _vIN[(r+2)*_nF + f] );
+        }
     }
 }
 
@@ -2295,12 +2324,16 @@ void COMMIT_Lt(
     double *_vIN, double *_vOUT)
 {
     for(int f = 0; f < _nF; f++){
-        _vOUT[f] += _tikterm*( -2*_vIN[_nV*_nS] + _vIN[_nV*_nS + 1] );
+        _vOUT[f] += _tikterm*( _vIN[_nV*_nS] );
 
-        for (int r = 0; r < _nIC-1; r++){
-            _vOUT[r*_nF + f] += _tikterm*( _vIN[_nV*_nS + (r-1)] - 2*_vIN[_nV*_nS + r] + _vIN[_nV*_nS + (r+1)] );
+        _vOUT[_nF + f] += _tikterm*( -2*_vIN[_nV*_nS] + _vIN[_nV*_nS + 1] );
+
+        for (int r = 2; r < _nIC-2; r++){
+            _vOUT[r*_nF + f] += _tikterm*( _vIN[_nV*_nS + (r-2)] -2*_vIN[_nV*_nS + (r-1)] + _vIN[_nV*_nS + r] );
         }
         
-        _vOUT[(_nIC-1)*_nF + f] += _tikterm*( _vIN[_nV*_nS + (_nIC-2)] - 2*_vIN[_nV*_nS + (_nIC-1)] );
+        _vOUT[(_nIC-2)*_nF + f] += _tikterm*( _vIN[_nV*_nS + _nIC-4] -2*_vIN[_nV*_nS + _nIC-3] );
+
+        _vOUT[(_nIC-1)*_nF + f] += _tikterm*( _vIN[_nV*_nS + (_nIC-3)] );
     }
 }//*/
\ No newline at end of file

From 3c9c7295440b50fad1fabdc6ba8a1d669ba5b1f9 Mon Sep 17 00:00:00 2001
From: ErickHernandezGutierrez <erick.hernandez@cimat.mx>
Date: Thu, 9 Apr 2020 15:26:44 -0500
Subject: [PATCH 08/17] adding more L matrices

---
 commit/core.pyx                    | 2 +-
 commit/operator/operator.pyx       | 2 +-
 commit/operator/operator_withLUT.c | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/commit/core.pyx b/commit/core.pyx
index 0a2d13e7..b7dfd14b 100755
--- a/commit/core.pyx
+++ b/commit/core.pyx
@@ -678,7 +678,7 @@ cdef class Evaluation :
         print(y.shape[0])
         print(self.KERNELS['wmr'].shape[0])
         print(y.shape[0] + self.KERNELS['wmr'].shape[0])"""
-        y2 = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0]-2, dtype=np.float64)
+        y2 = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0], dtype=np.float64)
         y2[0:y.shape[0]] = y
         #print(y2.shape)
         return y2
diff --git a/commit/operator/operator.pyx b/commit/operator/operator.pyx
index e6a80abc..958de161 100755
--- a/commit/operator/operator.pyx
+++ b/commit/operator/operator.pyx
@@ -95,7 +95,7 @@ cdef class LinearOperator :
 
         self.adjoint    = 0                         # direct of inverse product
 
-        self.n1 = self.nV*self.nS + self.nR-2
+        self.n1 = self.nV*self.nS + self.nR
         self.n2 = self.nR*self.nF + self.nT*self.nE + self.nI*self.nV
 
         # get C pointers to arrays in DICTIONARY
diff --git a/commit/operator/operator_withLUT.c b/commit/operator/operator_withLUT.c
index 5ccc9cdd..3a6cb515 100644
--- a/commit/operator/operator_withLUT.c
+++ b/commit/operator/operator_withLUT.c
@@ -2246,7 +2246,7 @@ void COMMIT_At(
     return;
 }
 
-/*
+
 ////////////////////////// L_2^z //////////////////////////
 void COMMIT_L(
     int _nF, int _nIC, int _nV, int _nS, double _tikterm,
@@ -2308,7 +2308,7 @@ void COMMIT_Lt(
 //*/
 
 ////////////////////////// L_2 //////////////////////////
-void COMMIT_L(
+/*void COMMIT_L(
     int _nF, int _nIC, int _nV, int _nS, double _tikterm,
     double *_vIN, double *_vOUT)
 {

From f3692ac0be2990f3a43c7c4f742738a5b1447fc2 Mon Sep 17 00:00:00 2001
From: ErickHernandezGutierrez <erick.hernandez@cimat.mx>
Date: Thu, 9 Apr 2020 16:12:38 -0500
Subject: [PATCH 09/17] Testing matrices L

---
 commit/core.pyx                    | 2 +-
 commit/operator/operator.pyx       | 4 ++--
 commit/operator/operator_withLUT.c | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/commit/core.pyx b/commit/core.pyx
index b7dfd14b..0a2d13e7 100755
--- a/commit/core.pyx
+++ b/commit/core.pyx
@@ -678,7 +678,7 @@ cdef class Evaluation :
         print(y.shape[0])
         print(self.KERNELS['wmr'].shape[0])
         print(y.shape[0] + self.KERNELS['wmr'].shape[0])"""
-        y2 = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0], dtype=np.float64)
+        y2 = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0]-2, dtype=np.float64)
         y2[0:y.shape[0]] = y
         #print(y2.shape)
         return y2
diff --git a/commit/operator/operator.pyx b/commit/operator/operator.pyx
index 958de161..892dccf7 100755
--- a/commit/operator/operator.pyx
+++ b/commit/operator/operator.pyx
@@ -95,7 +95,7 @@ cdef class LinearOperator :
 
         self.adjoint    = 0                         # direct of inverse product
 
-        self.n1 = self.nV*self.nS + self.nR
+        self.n1 = self.nV*self.nS + self.nR-2
         self.n2 = self.nR*self.nF + self.nT*self.nE + self.nI*self.nV
 
         # get C pointers to arrays in DICTIONARY
@@ -209,7 +209,7 @@ cdef class LinearOperator :
             with nogil:
                 # INVERSE PRODUCT L'*lambda*y
                 COMMIT_Lt(
-                    self.nF, self.nR, self.nV, self.nS, self.regtikhonov, #self.tikterm
+                    self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
                     &v_in[0], &v_out[0]
                 ) #"""
 
diff --git a/commit/operator/operator_withLUT.c b/commit/operator/operator_withLUT.c
index 3a6cb515..5ccc9cdd 100644
--- a/commit/operator/operator_withLUT.c
+++ b/commit/operator/operator_withLUT.c
@@ -2246,7 +2246,7 @@ void COMMIT_At(
     return;
 }
 
-
+/*
 ////////////////////////// L_2^z //////////////////////////
 void COMMIT_L(
     int _nF, int _nIC, int _nV, int _nS, double _tikterm,
@@ -2308,7 +2308,7 @@ void COMMIT_Lt(
 //*/
 
 ////////////////////////// L_2 //////////////////////////
-/*void COMMIT_L(
+void COMMIT_L(
     int _nF, int _nIC, int _nV, int _nS, double _tikterm,
     double *_vIN, double *_vOUT)
 {

From fc5df1d9a06100468cced5d42d0a1aa6505f6104 Mon Sep 17 00:00:00 2001
From: ErickHernandezGutierrez <erick.hernandez@cimat.mx>
Date: Tue, 28 Apr 2020 11:16:14 -0500
Subject: [PATCH 10/17] Changing L to L2Z

---
 commit/core.pyx                    | 2 +-
 commit/operator/operator.pyx       | 2 +-
 commit/operator/operator_withLUT.c | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/commit/core.pyx b/commit/core.pyx
index 0a2d13e7..e7e70517 100755
--- a/commit/core.pyx
+++ b/commit/core.pyx
@@ -678,7 +678,7 @@ cdef class Evaluation :
         print(y.shape[0])
         print(self.KERNELS['wmr'].shape[0])
         print(y.shape[0] + self.KERNELS['wmr'].shape[0])"""
-        y2 = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0]-2, dtype=np.float64)
+        y2 = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0], dtype=np.float64) #aqui resta
         y2[0:y.shape[0]] = y
         #print(y2.shape)
         return y2
diff --git a/commit/operator/operator.pyx b/commit/operator/operator.pyx
index 892dccf7..f3f0617f 100755
--- a/commit/operator/operator.pyx
+++ b/commit/operator/operator.pyx
@@ -95,7 +95,7 @@ cdef class LinearOperator :
 
         self.adjoint    = 0                         # direct of inverse product
 
-        self.n1 = self.nV*self.nS + self.nR-2
+        self.n1 = self.nV*self.nS + self.nR #aqui resta
         self.n2 = self.nR*self.nF + self.nT*self.nE + self.nI*self.nV
 
         # get C pointers to arrays in DICTIONARY
diff --git a/commit/operator/operator_withLUT.c b/commit/operator/operator_withLUT.c
index 5ccc9cdd..3a6cb515 100644
--- a/commit/operator/operator_withLUT.c
+++ b/commit/operator/operator_withLUT.c
@@ -2246,7 +2246,7 @@ void COMMIT_At(
     return;
 }
 
-/*
+
 ////////////////////////// L_2^z //////////////////////////
 void COMMIT_L(
     int _nF, int _nIC, int _nV, int _nS, double _tikterm,
@@ -2308,7 +2308,7 @@ void COMMIT_Lt(
 //*/
 
 ////////////////////////// L_2 //////////////////////////
-void COMMIT_L(
+/*void COMMIT_L(
     int _nF, int _nIC, int _nV, int _nS, double _tikterm,
     double *_vIN, double *_vOUT)
 {

From c0b870c5be3c4791dd7919499ff0fb684c18f334 Mon Sep 17 00:00:00 2001
From: ErickHernandezGutierrez <erick.hernandez@cimat.mx>
Date: Sat, 13 Jun 2020 20:09:21 -0500
Subject: [PATCH 11/17] Adding option to choose L matrix and Lambda value

---
 commit/core.pyx                    |  31 +++++---
 commit/operator/operator.pyx       | 122 ++++++++++++++++++++++++-----
 commit/operator/operator_withLUT.c |  92 ++++++++++++----------
 3 files changed, 172 insertions(+), 73 deletions(-)

diff --git a/commit/core.pyx b/commit/core.pyx
index e7e70517..eed20cf6 100755
--- a/commit/core.pyx
+++ b/commit/core.pyx
@@ -629,7 +629,7 @@ cdef class Evaluation :
         LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
 
 
-    def build_operator( self, regtikhonov=0.1 ) :
+    def build_operator( self, regtikhonov=0.0, Ltype=1 ) :
         """Compile/build the operator for computing the matrix-vector multiplications by A and A'
         using the informations from self.DICTIONARY, self.KERNELS and self.THREADS.
         NB: needs to call this function to update pointers to data structures in case
@@ -656,7 +656,7 @@ cdef class Evaluation :
             import commit.operator.operator
         else :
             reload( sys.modules['commit.operator.operator'] )
-        self.A = sys.modules['commit.operator.operator'].LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, regtikhonov )
+        self.A = sys.modules['commit.operator.operator'].LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, regtikhonov, Ltype )
 
         LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
 
@@ -672,17 +672,22 @@ cdef class Evaluation :
             raise RuntimeError( 'Data not loaded; call "load_data()" first.' )
 
         y = self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ].flatten().astype(np.float64)
-        #return y
-        """print(type(y))
-        print(y.shape)
-        print(y.shape[0])
-        print(self.KERNELS['wmr'].shape[0])
-        print(y.shape[0] + self.KERNELS['wmr'].shape[0])"""
-        y2 = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0], dtype=np.float64) #aqui resta
-        y2[0:y.shape[0]] = y
-        #print(y2.shape)
-        return y2
-        #"""
+
+        # add regularization part
+        if self.A.regtikhonov > 0.0:
+            if self.A.Ltype == 0:
+                yL = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0]-1, dtype=np.float64)
+            elif self.A.Ltype == 1:
+                yL = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0]-2, dtype=np.float64)
+            elif self.A.Ltype == 2:
+                yL = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0]+1, dtype=np.float64)
+            else:
+                yL = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0]  , dtype=np.float64)
+            
+            yL[0:y.shape[0]] = y
+            return yL
+        else:
+            return y
 
 
     def fit( self, tol_fun = 1e-3, tol_x = 1e-6, max_iter = 100, verbose = 1, x0 = None, regularisation = None ) :
diff --git a/commit/operator/operator.pyx b/commit/operator/operator.pyx
index f3f0617f..ad7cfad7 100755
--- a/commit/operator/operator.pyx
+++ b/commit/operator/operator.pyx
@@ -26,23 +26,54 @@ cdef extern void COMMIT_At(
     unsigned char *_ICthreadsT, unsigned int *_ECthreadsT, unsigned int *_ISOthreadsT
 ) nogil
 
-cdef extern void COMMIT_L(
+cdef extern void COMMIT_L1(
     int _nF, int _nIC, int _nV, int _nS, double _regterm,
     double *_v_in, double *_v_out
 ) nogil
 
-cdef extern void COMMIT_Lt(
+cdef extern void COMMIT_L2(
     int _nF, int _nIC, int _nV, int _nS, double _regterm,
     double *_v_in, double *_v_out
 ) nogil
 
+cdef extern void COMMIT_L1z(
+    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    double *_v_in, double *_v_out
+) nogil
+
+cdef extern void COMMIT_L2z(
+    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    double *_v_in, double *_v_out
+) nogil
+
+cdef extern void COMMIT_L1t(
+    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    double *_v_in, double *_v_out
+) nogil
+
+cdef extern void COMMIT_L2t(
+    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    double *_v_in, double *_v_out
+) nogil
+
+cdef extern void COMMIT_L1zt(
+    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    double *_v_in, double *_v_out
+) nogil
+
+cdef extern void COMMIT_L2zt(
+    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    double *_v_in, double *_v_out
+) nogil
+
+
 cdef class LinearOperator :
     """This class is a wrapper to the C code for performing marix-vector multiplications
     with the COMMIT linear operator A. The multiplications are done using C code
     that uses information from the DICTIONARY, KERNELS and THREADS data structures.
     """
     cdef int nS, nF, nR, nE, nT, nV, nI, n, ndirs
-    cdef public int adjoint, n1, n2
+    cdef public int adjoint, n1, n2, Ltype
     cdef public float regtikhonov
 
     cdef DICTIONARY
@@ -70,7 +101,7 @@ cdef class LinearOperator :
     cdef unsigned int*   ISOthreadsT
 
 
-    def __init__( self, DICTIONARY, KERNELS, THREADS, regtikhonov ) :
+    def __init__( self, DICTIONARY, KERNELS, THREADS, regtikhonov, Ltype ) :
         """Set the pointers to the data structures used by the C code."""
         self.DICTIONARY = DICTIONARY
         self.KERNELS    = KERNELS
@@ -85,6 +116,7 @@ cdef class LinearOperator :
         self.n          = DICTIONARY['IC']['n']     # numbner of IC segments
         self.ndirs      = KERNELS['wmr'].shape[1]   # number of directions
         self.regtikhonov = regtikhonov
+        self.Ltype      = Ltype
 
         if KERNELS['wmr'].size > 0 :
             self.nS = KERNELS['wmr'].shape[2]       # number of SAMPLES
@@ -95,7 +127,18 @@ cdef class LinearOperator :
 
         self.adjoint    = 0                         # direct of inverse product
 
-        self.n1 = self.nV*self.nS + self.nR #aqui resta
+        # set shape of the operator according to Ltype
+        if self.regtikhonov > 0.0:
+            if self.Ltype == 0:
+                self.n1 = self.nV*self.nS + (self.nR-1)
+            elif self.Ltype == 1:
+                self.n1 = self.nV*self.nS + (self.nR-2)
+            elif self.Ltype == 2:
+                self.n1 = self.nV*self.nS + (self.nR+1)
+            else:
+                self.n1 = self.nV*self.nS + (self.nR)
+        else:
+            self.n1 = self.nV*self.nS
         self.n2 = self.nR*self.nF + self.nT*self.nE + self.nI*self.nV
 
         # get C pointers to arrays in DICTIONARY
@@ -141,7 +184,7 @@ cdef class LinearOperator :
     @property
     def T( self ) :
         """Transpose of the explicit matrix."""
-        C = LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, self.regtikhonov )
+        C = LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, self.regtikhonov, self.Ltype )
         C.adjoint = 1 - C.adjoint
         return C
 
@@ -198,19 +241,58 @@ cdef class LinearOperator :
                     self.ICthreadsT, self.ECthreadsT, self.ISOthreadsT
                 )
 
-        if not self.adjoint:
-            with nogil:
-                # DIRECT PRODUCT L*lambda*x
-                COMMIT_L(
-                    self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
-                    &v_in[0], &v_out[0]
-                )
-        else:
-            with nogil:
-                # INVERSE PRODUCT L'*lambda*y
-                COMMIT_Lt(
-                    self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
-                    &v_in[0], &v_out[0]
-                ) #"""
+        if self.regtikhonov > 0.0:
+            if not self.adjoint:
+                # DIRECT PRODUCT lambda*L*x
+                if self.Ltype == 0:
+                    with nogil:
+                        COMMIT_L1(
+                            self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
+                            &v_in[0], &v_out[0]
+                        )
+                elif self.Ltype == 1:
+                    with nogil:
+                        COMMIT_L2(
+                            self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
+                            &v_in[0], &v_out[0]
+                        )
+                elif self.Ltype == 2:
+                    with nogil:
+                        COMMIT_L1z(
+                            self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
+                            &v_in[0], &v_out[0]
+                        )
+                else:
+                    with nogil:
+                        COMMIT_L2z(
+                            self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
+                            &v_in[0], &v_out[0]
+                        )
+            else:
+                # INVERSE PRODUCT lambda*L'*y
+                if self.Ltype == 0:
+                    with nogil:
+                        COMMIT_L1t(
+                            self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
+                            &v_in[0], &v_out[0]
+                        )
+                elif self.Ltype == 1:
+                    with nogil:
+                        COMMIT_L2t(
+                            self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
+                            &v_in[0], &v_out[0]
+                        )
+                elif self.Ltype == 2:
+                    with nogil:
+                        COMMIT_L1zt(
+                            self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
+                            &v_in[0], &v_out[0]
+                        )
+                else:
+                    with nogil:
+                        COMMIT_L2zt(
+                            self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
+                            &v_in[0], &v_out[0]
+                        )
 
         return v_out
diff --git a/commit/operator/operator_withLUT.c b/commit/operator/operator_withLUT.c
index 3a6cb515..07a98734 100644
--- a/commit/operator/operator_withLUT.c
+++ b/commit/operator/operator_withLUT.c
@@ -2246,42 +2246,8 @@ void COMMIT_At(
     return;
 }
 
-
-////////////////////////// L_2^z //////////////////////////
-void COMMIT_L(
-    int _nF, int _nIC, int _nV, int _nS, double _tikterm,
-    double *_vIN, double *_vOUT)
-{
-    for(int f = 0; f < _nF; f++){
-
-        _vOUT[_nV*_nS] += _tikterm*( -2*_vIN[f] + x[_nF + f] );
-
-        for(int r = 1; r < _nIC-1; r++){
-            _vOUT[_nV*_nS + r] += _tikterm*( _vIN[(r-1)*_nF + f] -2*_vIN[r*_nF + f] + _vIN[(r+1)*_nF + f] );
-        }
-
-        _vOUT[_nV*_nS + _nIC - 1] += _tikterm*( _vIN[(_nIC-2)*_nF + f] - 2*_vIN[(_nIC-1)*_nF + f] );
-    }
-}
-
-void COMMIT_Lt(
-    int _nF, int _nIC, int _nV, int _nS, double _tikterm,
-    double *_vIN, double *_vOUT)
-{
-    for(int f = 0; f < _nF; f++){
-        _vOUT[f] += _tikterm*( -2*_vIN[_nV*_nS] + _vIN[_nV*_nS + 1] );
-
-        for (int r = 0; r < _nIC-1; r++){
-            _vOUT[r*_nF + f] += _tikterm*( _vIN[_nV*_nS + (r-1)] - 2*_vIN[_nV*_nS + r] + _vIN[_nV*_nS + (r+1)] );
-        }
-        
-        _vOUT[(_nIC-1)*_nF + f] += _tikterm*( _vIN[_nV*_nS + (_nIC-2)] - 2*_vIN[_nV*_nS + (_nIC-1)] );
-    }
-}//*/
-
-/*
 ////////////////////////// L_1 //////////////////////////
-void COMMIT_L(
+void COMMIT_L1(
     int _nF, int _nIC, int _nV, int _nS, double _regterm,
     double *_vIN, double *_vOUT)
 {
@@ -2292,7 +2258,7 @@ void COMMIT_L(
     }
 }
 
-void COMMIT_Lt(
+void COMMIT_L1t(
     int _nF, int _nIC, int _nV, int _nS, double _regterm,
     double *_vIN, double *_vOUT)
 {
@@ -2305,10 +2271,9 @@ void COMMIT_Lt(
         _vOUT[_nF*(_nIC-1) + f] += _regterm*( _vIN[_nV*_nS + _nIC-2] );
     }
 }
-//*/
 
 ////////////////////////// L_2 //////////////////////////
-/*void COMMIT_L(
+void COMMIT_L2(
     int _nF, int _nIC, int _nV, int _nS, double _tikterm,
     double *_vIN, double *_vOUT)
 {
@@ -2319,7 +2284,7 @@ void COMMIT_Lt(
     }
 }
 
-void COMMIT_Lt(
+void COMMIT_L2t(
     int _nF, int _nIC, int _nV, int _nS, double _tikterm,
     double *_vIN, double *_vOUT)
 {
@@ -2336,4 +2301,51 @@ void COMMIT_Lt(
 
         _vOUT[(_nIC-1)*_nF + f] += _tikterm*( _vIN[_nV*_nS + (_nIC-3)] );
     }
-}//*/
\ No newline at end of file
+}
+
+////////////////////////// L_1^z //////////////////////////
+void COMMIT_L1z(
+    int _nF, int _nIC, int _nV, int _nS, double _tikterm,
+    double *_vIN, double *_vOUT)
+{
+    return;
+}
+
+void COMMIT_L1zt(
+        int _nF, int _nIC, int _nV, int _nS, double _tikterm,
+    double *_vIN, double *_vOUT)
+{
+    return;
+}
+
+////////////////////////// L_2^z //////////////////////////
+void COMMIT_L2z(
+    int _nF, int _nIC, int _nV, int _nS, double _tikterm,
+    double *_vIN, double *_vOUT)
+{
+    for(int f = 0; f < _nF; f++){
+
+        _vOUT[_nV*_nS] += _tikterm*( -2*_vIN[f] + x[_nF + f] );
+
+        for(int r = 1; r < _nIC-1; r++){
+            _vOUT[_nV*_nS + r] += _tikterm*( _vIN[(r-1)*_nF + f] -2*_vIN[r*_nF + f] + _vIN[(r+1)*_nF + f] );
+        }
+
+        _vOUT[_nV*_nS + _nIC - 1] += _tikterm*( _vIN[(_nIC-2)*_nF + f] - 2*_vIN[(_nIC-1)*_nF + f] );
+    }
+}
+
+void COMMIT_L2zt(
+    int _nF, int _nIC, int _nV, int _nS, double _tikterm,
+    double *_vIN, double *_vOUT)
+{
+    for(int f = 0; f < _nF; f++){
+        _vOUT[f] += _tikterm*( -2*_vIN[_nV*_nS] + _vIN[_nV*_nS + 1] );
+
+        for (int r = 0; r < _nIC-1; r++){
+            _vOUT[r*_nF + f] += _tikterm*( _vIN[_nV*_nS + (r-1)] - 2*_vIN[_nV*_nS + r] + _vIN[_nV*_nS + (r+1)] );
+        }
+        
+        _vOUT[(_nIC-1)*_nF + f] += _tikterm*( _vIN[_nV*_nS + (_nIC-2)] - 2*_vIN[_nV*_nS + (_nIC-1)] );
+    }
+}

From 90f6e4714a5c5341e2cfb36149016b1500d5cb2e Mon Sep 17 00:00:00 2001
From: ErickHernandezGutierrez <erick.hernandez@cimat.mx>
Date: Fri, 3 Jul 2020 12:57:34 -0500
Subject: [PATCH 12/17] Adding L1z matrix

---
 commit/operator/operator_withLUT.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/commit/operator/operator_withLUT.c b/commit/operator/operator_withLUT.c
index 07a98734..a96302f5 100644
--- a/commit/operator/operator_withLUT.c
+++ b/commit/operator/operator_withLUT.c
@@ -2308,14 +2308,26 @@ void COMMIT_L1z(
     int _nF, int _nIC, int _nV, int _nS, double _tikterm,
     double *_vIN, double *_vOUT)
 {
-    return;
+    for(int f = 0; f < _nF; f++){
+        _vOUT[_nV*_nS] += _tikterm*( _vIN[f] );
+
+        for(int r = 1; r < _nIC; r++){
+            _vOUT[_nV*_nS + r] += _tikterm*( -_vIN[(r-1)*_nF + f] + _vIN[r*_nF + f] );
+        }
+
+        _vOUT[_nV*_nS + _nIC] += _tikterm*( -_vIN[(_nIC-1)*_nF + f] );
+    }
 }
 
 void COMMIT_L1zt(
         int _nF, int _nIC, int _nV, int _nS, double _tikterm,
     double *_vIN, double *_vOUT)
 {
-    return;
+    for(int f = 0; f < _nF; f++){
+        for(int r = 0; r < _nIC; r++){
+            _vOUT[r*_nF + f] += _tikterm*( _vIN[_nV*_nS + r] - _vIN[_nV*_nS + r + 1]);
+        }
+    }
 }
 
 ////////////////////////// L_2^z //////////////////////////

From 0750c20e912ec48bf36bfea6761d6bb3b8f7c0c5 Mon Sep 17 00:00:00 2001
From: ErickHernandezGutierrez <erick.hernandez@cimat.mx>
Date: Mon, 15 Feb 2021 23:38:58 -0600
Subject: [PATCH 13/17] Rename some variables

---
 commit/core.pyx                    | 29 +++++++---
 commit/operator/operator.pyx       | 89 +++++++++++++++---------------
 commit/operator/operator_withLUT.c | 56 +++++++++----------
 3 files changed, 95 insertions(+), 79 deletions(-)

diff --git a/commit/core.pyx b/commit/core.pyx
index eed20cf6..7da557f5 100755
--- a/commit/core.pyx
+++ b/commit/core.pyx
@@ -629,11 +629,18 @@ cdef class Evaluation :
         LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
 
 
-    def build_operator( self, regtikhonov=0.0, Ltype=1 ) :
+    def build_operator( self, tikhonov_equalizer=0, deriv_matrix=None ) :
         """Compile/build the operator for computing the matrix-vector multiplications by A and A'
         using the informations from self.DICTIONARY, self.KERNELS and self.THREADS.
         NB: needs to call this function to update pointers to data structures in case
             the data is changed in self.DICTIONARY, self.KERNELS or self.THREADS.
+
+        Parameters
+        ----------
+        tikhonov_equalizer: float
+            equalizer parameter of the Tikhonov regularization term
+        deriv_matrix: string
+            derivative matrix of the Tikhonov regularization term
         """
         if self.DICTIONARY is None :
             ERROR( 'Dictionary not loaded; call "load_dictionary()" first' )
@@ -641,6 +648,12 @@ cdef class Evaluation :
             ERROR( 'Response functions not generated; call "generate_kernels()" and "load_kernels()" first' )
         if self.THREADS is None :
             ERROR( 'Threads not set; call "set_threads()" first' )
+        if tikhonov_equalizer < 0:
+            ERROR( 'Invalid value for Tikhonov equalizer parameter; value must be positive or zero' )
+        if tikhonov_equalizer > 0 and deriv_matrix == None:
+            ERROR( 'Tikhonov equalizer term given but derivative matrix was not selected; add "deriv_matrix" parameter in "build_operator()", valid options are \'L1\' (first  derivative with free boundary conditions), \'L2\' (second derivative with free boundary conditions), \'L1z\' (first  derivative with zero boundary conditions) and \'L2z\' (second derivative with zero boundary conditions)' )
+        if tikhonov_equalizer > 0 and deriv_matrix!='L1' and deriv_matrix!='L2' and deriv_matrix!='L1z' and deriv_matrix!='L2z':
+            ERROR( 'Invalid derivative matrix selection for regularization term; valid options are \'L1\' (first  derivative with free boundary conditions), \'L2\' (second derivative with free boundary conditions), \'L1z\' (first  derivative with zero boundary conditions) and \'L2z\' (second derivative with zero boundary conditions)' )
 
         tic = time.time()
         LOG( '\n-> Building linear operator A:' )
@@ -656,7 +669,7 @@ cdef class Evaluation :
             import commit.operator.operator
         else :
             reload( sys.modules['commit.operator.operator'] )
-        self.A = sys.modules['commit.operator.operator'].LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, regtikhonov, Ltype )
+        self.A = sys.modules['commit.operator.operator'].LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, tikhonov_equalizer, deriv_matrix )
 
         LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
 
@@ -674,15 +687,17 @@ cdef class Evaluation :
         y = self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ].flatten().astype(np.float64)
 
         # add regularization part
-        if self.A.regtikhonov > 0.0:
-            if self.A.Ltype == 0:
+        if self.A.tikhonov_equalizer > 0:
+            if self.A.deriv_matrix == 'L1':
                 yL = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0]-1, dtype=np.float64)
-            elif self.A.Ltype == 1:
+            elif self.A.deriv_matrix == 'L2':
                 yL = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0]-2, dtype=np.float64)
-            elif self.A.Ltype == 2:
+            elif self.A.deriv_matrix == 'L1z':
                 yL = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0]+1, dtype=np.float64)
-            else:
+            elif self.A.deriv_matrix == 'L2z':
                 yL = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0]  , dtype=np.float64)
+            else:
+                ERROR( 'Invalid derivative matrix selection for regularization term; valid options are \'L1\' (first  derivative with free boundary conditions), \'L2\' (second derivative with free boundary conditions), \'L1z\' (first  derivative with zero boundary conditions) and \'L2z\' (second derivative with zero boundary conditions)' )
             
             yL[0:y.shape[0]] = y
             return yL
diff --git a/commit/operator/operator.pyx b/commit/operator/operator.pyx
index ad7cfad7..96083ee7 100755
--- a/commit/operator/operator.pyx
+++ b/commit/operator/operator.pyx
@@ -27,42 +27,42 @@ cdef extern void COMMIT_At(
 ) nogil
 
 cdef extern void COMMIT_L1(
-    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_v_in, double *_v_out
 ) nogil
 
 cdef extern void COMMIT_L2(
-    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_v_in, double *_v_out
 ) nogil
 
 cdef extern void COMMIT_L1z(
-    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_v_in, double *_v_out
 ) nogil
 
 cdef extern void COMMIT_L2z(
-    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_v_in, double *_v_out
 ) nogil
 
 cdef extern void COMMIT_L1t(
-    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_v_in, double *_v_out
 ) nogil
 
 cdef extern void COMMIT_L2t(
-    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_v_in, double *_v_out
 ) nogil
 
 cdef extern void COMMIT_L1zt(
-    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_v_in, double *_v_out
 ) nogil
 
 cdef extern void COMMIT_L2zt(
-    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_v_in, double *_v_out
 ) nogil
 
@@ -73,8 +73,9 @@ cdef class LinearOperator :
     that uses information from the DICTIONARY, KERNELS and THREADS data structures.
     """
     cdef int nS, nF, nR, nE, nT, nV, nI, n, ndirs
-    cdef public int adjoint, n1, n2, Ltype
-    cdef public float regtikhonov
+    cdef public int adjoint, n1, n2
+    cdef public float tikhonov_equalizer
+    cdef public char* deriv_matrix
 
     cdef DICTIONARY
     cdef KERNELS
@@ -101,22 +102,22 @@ cdef class LinearOperator :
     cdef unsigned int*   ISOthreadsT
 
 
-    def __init__( self, DICTIONARY, KERNELS, THREADS, regtikhonov, Ltype ) :
+    def __init__( self, DICTIONARY, KERNELS, THREADS, tikhonov_equalizer=0, deriv_matrix=None ) :
         """Set the pointers to the data structures used by the C code."""
         self.DICTIONARY = DICTIONARY
         self.KERNELS    = KERNELS
         self.THREADS    = THREADS
 
-        self.nF         = DICTIONARY['IC']['nF']    # number of FIBERS
-        self.nR         = KERNELS['wmr'].shape[0]   # number of FIBER RADII
-        self.nE         = DICTIONARY['EC']['nE']    # number of EC segments
-        self.nT         = KERNELS['wmh'].shape[0]   # number of EC TORTUOSITY values
-        self.nV         = DICTIONARY['nV']          # number of VOXELS
-        self.nI         = KERNELS['iso'].shape[0]   # number of ISO contributions
-        self.n          = DICTIONARY['IC']['n']     # numbner of IC segments
-        self.ndirs      = KERNELS['wmr'].shape[1]   # number of directions
-        self.regtikhonov = regtikhonov
-        self.Ltype      = Ltype
+        self.nF         = DICTIONARY['IC']['nF']     # number of FIBERS
+        self.nR         = KERNELS['wmr'].shape[0]    # number of FIBER RADII
+        self.nE         = DICTIONARY['EC']['nE']     # number of EC segments
+        self.nT         = KERNELS['wmh'].shape[0]    # number of EC TORTUOSITY values
+        self.nV         = DICTIONARY['nV']           # number of VOXELS
+        self.nI         = KERNELS['iso'].shape[0]    # number of ISO contributions
+        self.n          = DICTIONARY['IC']['n']      # numbner of IC segments
+        self.ndirs      = KERNELS['wmr'].shape[1]    # number of directions
+        self.tikhonov_equalizer = tikhonov_equalizer # equalizer parameter of the Tikhonov regularization term
+        self.deriv_matrix       = deriv_matrix       # derivative matrix of the Tikhonov regularization term
 
         if KERNELS['wmr'].size > 0 :
             self.nS = KERNELS['wmr'].shape[2]       # number of SAMPLES
@@ -127,13 +128,13 @@ cdef class LinearOperator :
 
         self.adjoint    = 0                         # direct of inverse product
 
-        # set shape of the operator according to Ltype
-        if self.regtikhonov > 0.0:
-            if self.Ltype == 0:
+        # set shape of the operator according to deriv_matrix
+        if self.tikhonov_equalizer > 0.0:
+            if self.deriv_matrix == 0:
                 self.n1 = self.nV*self.nS + (self.nR-1)
-            elif self.Ltype == 1:
+            elif self.deriv_matrix == 1:
                 self.n1 = self.nV*self.nS + (self.nR-2)
-            elif self.Ltype == 2:
+            elif self.deriv_matrix == 2:
                 self.n1 = self.nV*self.nS + (self.nR+1)
             else:
                 self.n1 = self.nV*self.nS + (self.nR)
@@ -184,7 +185,7 @@ cdef class LinearOperator :
     @property
     def T( self ) :
         """Transpose of the explicit matrix."""
-        C = LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, self.regtikhonov, self.Ltype )
+        C = LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, self.tikhonov_equalizer, self.deriv_matrix )
         C.adjoint = 1 - C.adjoint
         return C
 
@@ -241,57 +242,57 @@ cdef class LinearOperator :
                     self.ICthreadsT, self.ECthreadsT, self.ISOthreadsT
                 )
 
-        if self.regtikhonov > 0.0:
+        if self.tikhonov_equalizer > 0:
             if not self.adjoint:
                 # DIRECT PRODUCT lambda*L*x
-                if self.Ltype == 0:
+                if self.deriv_matrix == 'L1':
                     with nogil:
                         COMMIT_L1(
-                            self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
                             &v_in[0], &v_out[0]
                         )
-                elif self.Ltype == 1:
+                elif self.deriv_matrix == 'L2':
                     with nogil:
                         COMMIT_L2(
-                            self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
                             &v_in[0], &v_out[0]
                         )
-                elif self.Ltype == 2:
+                elif self.deriv_matrix == 'L1z':
                     with nogil:
                         COMMIT_L1z(
-                            self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
                             &v_in[0], &v_out[0]
                         )
-                else:
+                elif self.deriv_matrix == 'L2z':
                     with nogil:
                         COMMIT_L2z(
-                            self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
                             &v_in[0], &v_out[0]
                         )
             else:
                 # INVERSE PRODUCT lambda*L'*y
-                if self.Ltype == 0:
+                if self.deriv_matrix == 'L1':
                     with nogil:
                         COMMIT_L1t(
-                            self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
                             &v_in[0], &v_out[0]
                         )
-                elif self.Ltype == 1:
+                elif self.deriv_matrix == 'L2':
                     with nogil:
                         COMMIT_L2t(
-                            self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
                             &v_in[0], &v_out[0]
                         )
-                elif self.Ltype == 2:
+                elif self.deriv_matrix == 'L1z':
                     with nogil:
                         COMMIT_L1zt(
-                            self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
                             &v_in[0], &v_out[0]
                         )
-                else:
+                elif self.deriv_matrix == 'L2z':
                     with nogil:
                         COMMIT_L2zt(
-                            self.nF, self.nR, self.nV, self.nS, self.regtikhonov,
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
                             &v_in[0], &v_out[0]
                         )
 
diff --git a/commit/operator/operator_withLUT.c b/commit/operator/operator_withLUT.c
index a96302f5..dc45f9bd 100644
--- a/commit/operator/operator_withLUT.c
+++ b/commit/operator/operator_withLUT.c
@@ -2248,116 +2248,116 @@ void COMMIT_At(
 
 ////////////////////////// L_1 //////////////////////////
 void COMMIT_L1(
-    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_vIN, double *_vOUT)
 {
     for(int r = 0; r < _nIC-1; r++){
         for(int f = 0; f < _nF; f++){
-            _vOUT[_nV*_nS + r] += _regterm*( -_vIN[r*_nF + f] + _vIN[(r+1)*_nF + f] );
+            _vOUT[_nV*_nS + r] += _lambda*( -_vIN[r*_nF + f] + _vIN[(r+1)*_nF + f] );
         }
     }
 }
 
 void COMMIT_L1t(
-    int _nF, int _nIC, int _nV, int _nS, double _regterm,
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_vIN, double *_vOUT)
 {
     for(int f = 0; f < _nF; f++){
-        _vOUT[f] += _regterm*( -_vIN[_nV*_nS] );
+        _vOUT[f] += _lambda*( -_vIN[_nV*_nS] );
 
         for(int r = 1; r < _nIC-1; r++)
-            _vOUT[_nF*r + f] += _regterm*( _vIN[_nV*_nS + r-1] - _vIN[_nV*_nS + r] );
+            _vOUT[_nF*r + f] += _lambda*( _vIN[_nV*_nS + r-1] - _vIN[_nV*_nS + r] );
 
-        _vOUT[_nF*(_nIC-1) + f] += _regterm*( _vIN[_nV*_nS + _nIC-2] );
+        _vOUT[_nF*(_nIC-1) + f] += _lambda*( _vIN[_nV*_nS + _nIC-2] );
     }
 }
 
 ////////////////////////// L_2 //////////////////////////
 void COMMIT_L2(
-    int _nF, int _nIC, int _nV, int _nS, double _tikterm,
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_vIN, double *_vOUT)
 {
     for(int r = 0; r < _nIC-2; r++){
         for(int f = 0; f < _nF; f++){
-            _vOUT[_nV*_nS + r] += _tikterm*( _vIN[r*_nF + f] -2*_vIN[(r+1)*_nF + f] + _vIN[(r+2)*_nF + f] );
+            _vOUT[_nV*_nS + r] += _lambda*( _vIN[r*_nF + f] -2*_vIN[(r+1)*_nF + f] + _vIN[(r+2)*_nF + f] );
         }
     }
 }
 
 void COMMIT_L2t(
-    int _nF, int _nIC, int _nV, int _nS, double _tikterm,
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_vIN, double *_vOUT)
 {
     for(int f = 0; f < _nF; f++){
-        _vOUT[f] += _tikterm*( _vIN[_nV*_nS] );
+        _vOUT[f] += _lambda*( _vIN[_nV*_nS] );
 
-        _vOUT[_nF + f] += _tikterm*( -2*_vIN[_nV*_nS] + _vIN[_nV*_nS + 1] );
+        _vOUT[_nF + f] += _lambda*( -2*_vIN[_nV*_nS] + _vIN[_nV*_nS + 1] );
 
         for (int r = 2; r < _nIC-2; r++){
-            _vOUT[r*_nF + f] += _tikterm*( _vIN[_nV*_nS + (r-2)] -2*_vIN[_nV*_nS + (r-1)] + _vIN[_nV*_nS + r] );
+            _vOUT[r*_nF + f] += _lambda*( _vIN[_nV*_nS + (r-2)] -2*_vIN[_nV*_nS + (r-1)] + _vIN[_nV*_nS + r] );
         }
         
-        _vOUT[(_nIC-2)*_nF + f] += _tikterm*( _vIN[_nV*_nS + _nIC-4] -2*_vIN[_nV*_nS + _nIC-3] );
+        _vOUT[(_nIC-2)*_nF + f] += _lambda*( _vIN[_nV*_nS + _nIC-4] -2*_vIN[_nV*_nS + _nIC-3] );
 
-        _vOUT[(_nIC-1)*_nF + f] += _tikterm*( _vIN[_nV*_nS + (_nIC-3)] );
+        _vOUT[(_nIC-1)*_nF + f] += _lambda*( _vIN[_nV*_nS + (_nIC-3)] );
     }
 }
 
 ////////////////////////// L_1^z //////////////////////////
 void COMMIT_L1z(
-    int _nF, int _nIC, int _nV, int _nS, double _tikterm,
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_vIN, double *_vOUT)
 {
     for(int f = 0; f < _nF; f++){
-        _vOUT[_nV*_nS] += _tikterm*( _vIN[f] );
+        _vOUT[_nV*_nS] += _lambda*( _vIN[f] );
 
         for(int r = 1; r < _nIC; r++){
-            _vOUT[_nV*_nS + r] += _tikterm*( -_vIN[(r-1)*_nF + f] + _vIN[r*_nF + f] );
+            _vOUT[_nV*_nS + r] += _lambda*( -_vIN[(r-1)*_nF + f] + _vIN[r*_nF + f] );
         }
 
-        _vOUT[_nV*_nS + _nIC] += _tikterm*( -_vIN[(_nIC-1)*_nF + f] );
+        _vOUT[_nV*_nS + _nIC] += _lambda*( -_vIN[(_nIC-1)*_nF + f] );
     }
 }
 
 void COMMIT_L1zt(
-        int _nF, int _nIC, int _nV, int _nS, double _tikterm,
+        int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_vIN, double *_vOUT)
 {
     for(int f = 0; f < _nF; f++){
         for(int r = 0; r < _nIC; r++){
-            _vOUT[r*_nF + f] += _tikterm*( _vIN[_nV*_nS + r] - _vIN[_nV*_nS + r + 1]);
+            _vOUT[r*_nF + f] += _lambda*( _vIN[_nV*_nS + r] - _vIN[_nV*_nS + r + 1]);
         }
     }
 }
 
 ////////////////////////// L_2^z //////////////////////////
 void COMMIT_L2z(
-    int _nF, int _nIC, int _nV, int _nS, double _tikterm,
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_vIN, double *_vOUT)
 {
     for(int f = 0; f < _nF; f++){
 
-        _vOUT[_nV*_nS] += _tikterm*( -2*_vIN[f] + x[_nF + f] );
+        _vOUT[_nV*_nS] += _lambda*( -2*_vIN[f] + x[_nF + f] );
 
         for(int r = 1; r < _nIC-1; r++){
-            _vOUT[_nV*_nS + r] += _tikterm*( _vIN[(r-1)*_nF + f] -2*_vIN[r*_nF + f] + _vIN[(r+1)*_nF + f] );
+            _vOUT[_nV*_nS + r] += _lambda*( _vIN[(r-1)*_nF + f] -2*_vIN[r*_nF + f] + _vIN[(r+1)*_nF + f] );
         }
 
-        _vOUT[_nV*_nS + _nIC - 1] += _tikterm*( _vIN[(_nIC-2)*_nF + f] - 2*_vIN[(_nIC-1)*_nF + f] );
+        _vOUT[_nV*_nS + _nIC - 1] += _lambda*( _vIN[(_nIC-2)*_nF + f] - 2*_vIN[(_nIC-1)*_nF + f] );
     }
 }
 
 void COMMIT_L2zt(
-    int _nF, int _nIC, int _nV, int _nS, double _tikterm,
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_vIN, double *_vOUT)
 {
     for(int f = 0; f < _nF; f++){
-        _vOUT[f] += _tikterm*( -2*_vIN[_nV*_nS] + _vIN[_nV*_nS + 1] );
+        _vOUT[f] += _lambda*( -2*_vIN[_nV*_nS] + _vIN[_nV*_nS + 1] );
 
         for (int r = 0; r < _nIC-1; r++){
-            _vOUT[r*_nF + f] += _tikterm*( _vIN[_nV*_nS + (r-1)] - 2*_vIN[_nV*_nS + r] + _vIN[_nV*_nS + (r+1)] );
+            _vOUT[r*_nF + f] += _lambda*( _vIN[_nV*_nS + (r-1)] - 2*_vIN[_nV*_nS + r] + _vIN[_nV*_nS + (r+1)] );
         }
         
-        _vOUT[(_nIC-1)*_nF + f] += _tikterm*( _vIN[_nV*_nS + (_nIC-2)] - 2*_vIN[_nV*_nS + (_nIC-1)] );
+        _vOUT[(_nIC-1)*_nF + f] += _lambda*( _vIN[_nV*_nS + (_nIC-2)] - 2*_vIN[_nV*_nS + (_nIC-1)] );
     }
 }

From 82d05c50ef6bf831b081907ee774aacb99abf159 Mon Sep 17 00:00:00 2001
From: ErickHernandezGutierrez <erick.hernandez@cimat.mx>
Date: Tue, 16 Feb 2021 16:10:12 -0600
Subject: [PATCH 14/17] Add Tikhonov regularization changes to CHANGELOG.md

---
 CHANGELOG.md | 10 ++++++++++
 setup.py     |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4224780e..269c22c1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,16 @@
 # Change Log
 All notable changes to COMMIT will be documented in this file.
 
+## [1.5.0] - 2021-02-16
+
+### Added
+- core.pyx: Add to the function build_operator the parameter tikhonov_equalizer and deriv_matrix
+- operator.pyx: Extend Ax and A'y multiplications when Tikhonov regularization is enabled
+- operator_withLUC.c: Add C functions to extend Ax and A'y multiplications when Tikhonov regularization is enabled
+
+### Changed
+- core.pyx: The function get_y returns a larger vector y when Tikhonov regularization is enabled
+
 ## [1.4.5] - 2021-02-08
 
 ### Fixed
diff --git a/setup.py b/setup.py
index 74391f2e..d8fabce4 100644
--- a/setup.py
+++ b/setup.py
@@ -44,7 +44,7 @@ def run(self):
 
 description = 'Convex Optimization Modeling for Microstructure Informed Tractography (COMMIT)'
 opts = dict(name='dmri-commit',
-            version='1.4.5',
+            version='1.5.0',
             description=description,
             long_description=description,
             author='Alessandro Daducci',

From b62d9d4f624c4f6c833e94f51b53dfacb2a7cff4 Mon Sep 17 00:00:00 2001
From: ErickHernandezGutierrez <erick.hernandez@cimat.mx>
Date: Wed, 17 Feb 2021 22:26:03 -0600
Subject: [PATCH 15/17] Change endlines to linux-style endline

---
 commit/operator/operator.pyx             |  598 +--
 commit/operator/operator_noLUT.c         |  490 +--
 commit/operator/operator_withLUT.c       | 4726 +++++++++++-----------
 commit/solvers.py                        |  806 ++--
 commit/trk2dictionary/trk2dictionary.pyx |  858 ++--
 5 files changed, 3739 insertions(+), 3739 deletions(-)

diff --git a/commit/operator/operator.pyx b/commit/operator/operator.pyx
index 96083ee7..39831703 100755
--- a/commit/operator/operator.pyx
+++ b/commit/operator/operator.pyx
@@ -1,299 +1,299 @@
-#!python
-#cython: language_level=3, boundscheck=False, wraparound=False, profile=False
-
-import cython
-import numpy as np
-cimport numpy as np
-
-# Interfaces to actual C code performing the multiplications
-cdef extern void COMMIT_A(
-    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
-    double *_v_in, double *_v_out,
-    unsigned int *_ICf, unsigned int *_ICv, unsigned short *_ICo, float *_ICl,
-    unsigned int *_ECv, unsigned short *_ECo,
-    unsigned int *_ISOv,
-    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
-    unsigned int* _ICthreads, unsigned int* _ECthreads, unsigned int* _ISOthreads
-) nogil
-
-cdef extern void COMMIT_At(
-    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
-    double *_v_in, double *_v_out,
-    unsigned int *_ICf, unsigned int *_ICv, unsigned short *_ICo, float *_ICl,
-    unsigned int *_ECv, unsigned short *_ECo,
-    unsigned int *_ISOv,
-    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
-    unsigned char *_ICthreadsT, unsigned int *_ECthreadsT, unsigned int *_ISOthreadsT
-) nogil
-
-cdef extern void COMMIT_L1(
-    int _nF, int _nIC, int _nV, int _nS, double _lambda,
-    double *_v_in, double *_v_out
-) nogil
-
-cdef extern void COMMIT_L2(
-    int _nF, int _nIC, int _nV, int _nS, double _lambda,
-    double *_v_in, double *_v_out
-) nogil
-
-cdef extern void COMMIT_L1z(
-    int _nF, int _nIC, int _nV, int _nS, double _lambda,
-    double *_v_in, double *_v_out
-) nogil
-
-cdef extern void COMMIT_L2z(
-    int _nF, int _nIC, int _nV, int _nS, double _lambda,
-    double *_v_in, double *_v_out
-) nogil
-
-cdef extern void COMMIT_L1t(
-    int _nF, int _nIC, int _nV, int _nS, double _lambda,
-    double *_v_in, double *_v_out
-) nogil
-
-cdef extern void COMMIT_L2t(
-    int _nF, int _nIC, int _nV, int _nS, double _lambda,
-    double *_v_in, double *_v_out
-) nogil
-
-cdef extern void COMMIT_L1zt(
-    int _nF, int _nIC, int _nV, int _nS, double _lambda,
-    double *_v_in, double *_v_out
-) nogil
-
-cdef extern void COMMIT_L2zt(
-    int _nF, int _nIC, int _nV, int _nS, double _lambda,
-    double *_v_in, double *_v_out
-) nogil
-
-
-cdef class LinearOperator :
-    """This class is a wrapper to the C code for performing marix-vector multiplications
-    with the COMMIT linear operator A. The multiplications are done using C code
-    that uses information from the DICTIONARY, KERNELS and THREADS data structures.
-    """
-    cdef int nS, nF, nR, nE, nT, nV, nI, n, ndirs
-    cdef public int adjoint, n1, n2
-    cdef public float tikhonov_equalizer
-    cdef public char* deriv_matrix
-
-    cdef DICTIONARY
-    cdef KERNELS
-    cdef THREADS
-
-    cdef unsigned int*   ICf
-    cdef float*          ICl
-    cdef unsigned int*   ICv
-    cdef unsigned short* ICo
-    cdef unsigned int*   ECv
-    cdef unsigned short* ECo
-    cdef unsigned int*   ISOv
-
-    cdef float* LUT_IC
-    cdef float* LUT_EC
-    cdef float* LUT_ISO
-
-    cdef unsigned int*   ICthreads
-    cdef unsigned int*   ECthreads
-    cdef unsigned int*   ISOthreads
-
-    cdef unsigned char*  ICthreadsT
-    cdef unsigned int*   ECthreadsT
-    cdef unsigned int*   ISOthreadsT
-
-
-    def __init__( self, DICTIONARY, KERNELS, THREADS, tikhonov_equalizer=0, deriv_matrix=None ) :
-        """Set the pointers to the data structures used by the C code."""
-        self.DICTIONARY = DICTIONARY
-        self.KERNELS    = KERNELS
-        self.THREADS    = THREADS
-
-        self.nF         = DICTIONARY['IC']['nF']     # number of FIBERS
-        self.nR         = KERNELS['wmr'].shape[0]    # number of FIBER RADII
-        self.nE         = DICTIONARY['EC']['nE']     # number of EC segments
-        self.nT         = KERNELS['wmh'].shape[0]    # number of EC TORTUOSITY values
-        self.nV         = DICTIONARY['nV']           # number of VOXELS
-        self.nI         = KERNELS['iso'].shape[0]    # number of ISO contributions
-        self.n          = DICTIONARY['IC']['n']      # numbner of IC segments
-        self.ndirs      = KERNELS['wmr'].shape[1]    # number of directions
-        self.tikhonov_equalizer = tikhonov_equalizer # equalizer parameter of the Tikhonov regularization term
-        self.deriv_matrix       = deriv_matrix       # derivative matrix of the Tikhonov regularization term
-
-        if KERNELS['wmr'].size > 0 :
-            self.nS = KERNELS['wmr'].shape[2]       # number of SAMPLES
-        elif KERNELS['wmh'].size > 0 :
-            self.nS = KERNELS['wmh'].shape[2]
-        else :
-            self.nS = KERNELS['wmr'].shape[1]
-
-        self.adjoint    = 0                         # direct of inverse product
-
-        # set shape of the operator according to deriv_matrix
-        if self.tikhonov_equalizer > 0.0:
-            if self.deriv_matrix == 0:
-                self.n1 = self.nV*self.nS + (self.nR-1)
-            elif self.deriv_matrix == 1:
-                self.n1 = self.nV*self.nS + (self.nR-2)
-            elif self.deriv_matrix == 2:
-                self.n1 = self.nV*self.nS + (self.nR+1)
-            else:
-                self.n1 = self.nV*self.nS + (self.nR)
-        else:
-            self.n1 = self.nV*self.nS
-        self.n2 = self.nR*self.nF + self.nT*self.nE + self.nI*self.nV
-
-        # get C pointers to arrays in DICTIONARY
-        cdef unsigned int [::1]   ICf  = DICTIONARY['IC']['fiber']
-        self.ICf = &ICf[0]
-        cdef float [::1]          ICl  = DICTIONARY['IC']['len']
-        self.ICl = &ICl[0]
-        cdef unsigned int [::1]   ICv  = DICTIONARY['IC']['v']
-        self.ICv = &ICv[0]
-        cdef unsigned short [::1] ICo  = DICTIONARY['IC']['o']
-        self.ICo = &ICo[0]
-        cdef unsigned int [::1]   ECv  = DICTIONARY['EC']['v']
-        self.ECv = &ECv[0]
-        cdef unsigned short [::1] ECo  = DICTIONARY['EC']['o']
-        self.ECo = &ECo[0]
-        cdef unsigned int [::1]   ISOv = DICTIONARY['ISO']['v']
-        self.ISOv = &ISOv[0]
-
-        # get C pointers to arrays in KERNELS
-        cdef float [:, :, ::1] wmrSFP = KERNELS['wmr']
-        self.LUT_IC  = &wmrSFP[0,0,0]
-        cdef float [:, :, ::1] wmhSFP = KERNELS['wmh']
-        self.LUT_EC  = &wmhSFP[0,0,0]
-        cdef float [:, ::1] isoSFP = KERNELS['iso']
-        self.LUT_ISO = &isoSFP[0,0]
-
-        # get C pointers to arrays in THREADS
-        cdef unsigned int [::1] ICthreads = THREADS['IC']
-        self.ICthreads  = &ICthreads[0]
-        cdef unsigned int [::1] ECthreads = THREADS['EC']
-        self.ECthreads  = &ECthreads[0]
-        cdef unsigned int [::1] ISOthreads = THREADS['ISO']
-        self.ISOthreads = &ISOthreads[0]
-
-        cdef unsigned char [::1] ICthreadsT = THREADS['ICt']
-        self.ICthreadsT  = &ICthreadsT[0]
-        cdef unsigned int  [::1] ECthreadsT = THREADS['ECt']
-        self.ECthreadsT  = &ECthreadsT[0]
-        cdef unsigned int  [::1] ISOthreadsT = THREADS['ISOt']
-        self.ISOthreadsT = &ISOthreadsT[0]
-
-
-    @property
-    def T( self ) :
-        """Transpose of the explicit matrix."""
-        C = LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, self.tikhonov_equalizer, self.deriv_matrix )
-        C.adjoint = 1 - C.adjoint
-        return C
-
-
-    @property
-    def shape( self ) :
-        """Size of the explicit matrix."""
-        if not self.adjoint :
-            return ( self.n1, self.n2 )
-        else :
-            return ( self.n2, self.n1 )
-
-
-    def dot( self, double [::1] v_in  ):
-        """Wrapper to C code for efficiently performing the matrix-vector multiplications.
-
-        Parameters
-        ----------
-        v_in : 1D numpy.array of double
-            Input vector for the matrix-vector multiplication
-
-        Returns
-        -------
-        v_out : 1D numpy.array of double
-            Results of the multiplication
-        """
-
-        # Permit only matrix-vector multiplications
-        if v_in.size != self.shape[1] :
-            raise RuntimeError( "A.dot(): dimensions do not match" )
-
-        # Create output array
-        cdef double [::1] v_out = np.zeros( self.shape[0], dtype=np.float64 )
-
-        # Call the cython function to read the memory pointers
-        if not self.adjoint :
-            # DIRECT PRODUCT A*x
-            with nogil :
-                COMMIT_A(
-                    self.nF, self.n, self.nE, self.nV, self.nS, self.ndirs,
-                    &v_in[0], &v_out[0],
-                    self.ICf, self.ICv, self.ICo, self.ICl, self.ECv, self.ECo, self.ISOv,
-                    self.LUT_IC, self.LUT_EC, self.LUT_ISO,
-                    self.ICthreads, self.ECthreads, self.ISOthreads
-                )
-        else :
-            # INVERSE PRODUCT A'*y
-            with nogil :
-                COMMIT_At(
-                    self.nF, self.n, self.nE, self.nV, self.nS, self.ndirs,
-                    &v_in[0], &v_out[0],
-                    self.ICf, self.ICv, self.ICo, self.ICl, self.ECv, self.ECo, self.ISOv,
-                    self.LUT_IC, self.LUT_EC, self.LUT_ISO,
-                    self.ICthreadsT, self.ECthreadsT, self.ISOthreadsT
-                )
-
-        if self.tikhonov_equalizer > 0:
-            if not self.adjoint:
-                # DIRECT PRODUCT lambda*L*x
-                if self.deriv_matrix == 'L1':
-                    with nogil:
-                        COMMIT_L1(
-                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
-                            &v_in[0], &v_out[0]
-                        )
-                elif self.deriv_matrix == 'L2':
-                    with nogil:
-                        COMMIT_L2(
-                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
-                            &v_in[0], &v_out[0]
-                        )
-                elif self.deriv_matrix == 'L1z':
-                    with nogil:
-                        COMMIT_L1z(
-                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
-                            &v_in[0], &v_out[0]
-                        )
-                elif self.deriv_matrix == 'L2z':
-                    with nogil:
-                        COMMIT_L2z(
-                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
-                            &v_in[0], &v_out[0]
-                        )
-            else:
-                # INVERSE PRODUCT lambda*L'*y
-                if self.deriv_matrix == 'L1':
-                    with nogil:
-                        COMMIT_L1t(
-                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
-                            &v_in[0], &v_out[0]
-                        )
-                elif self.deriv_matrix == 'L2':
-                    with nogil:
-                        COMMIT_L2t(
-                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
-                            &v_in[0], &v_out[0]
-                        )
-                elif self.deriv_matrix == 'L1z':
-                    with nogil:
-                        COMMIT_L1zt(
-                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
-                            &v_in[0], &v_out[0]
-                        )
-                elif self.deriv_matrix == 'L2z':
-                    with nogil:
-                        COMMIT_L2zt(
-                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
-                            &v_in[0], &v_out[0]
-                        )
-
-        return v_out
+#!python
+#cython: language_level=3, boundscheck=False, wraparound=False, profile=False
+
+import cython
+import numpy as np
+cimport numpy as np
+
+# Interfaces to actual C code performing the multiplications
+cdef extern void COMMIT_A(
+    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
+    double *_v_in, double *_v_out,
+    unsigned int *_ICf, unsigned int *_ICv, unsigned short *_ICo, float *_ICl,
+    unsigned int *_ECv, unsigned short *_ECo,
+    unsigned int *_ISOv,
+    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
+    unsigned int* _ICthreads, unsigned int* _ECthreads, unsigned int* _ISOthreads
+) nogil
+
+cdef extern void COMMIT_At(
+    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
+    double *_v_in, double *_v_out,
+    unsigned int *_ICf, unsigned int *_ICv, unsigned short *_ICo, float *_ICl,
+    unsigned int *_ECv, unsigned short *_ECo,
+    unsigned int *_ISOv,
+    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
+    unsigned char *_ICthreadsT, unsigned int *_ECthreadsT, unsigned int *_ISOthreadsT
+) nogil
+
+cdef extern void COMMIT_L1(
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
+    double *_v_in, double *_v_out
+) nogil
+
+cdef extern void COMMIT_L2(
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
+    double *_v_in, double *_v_out
+) nogil
+
+cdef extern void COMMIT_L1z(
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
+    double *_v_in, double *_v_out
+) nogil
+
+cdef extern void COMMIT_L2z(
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
+    double *_v_in, double *_v_out
+) nogil
+
+cdef extern void COMMIT_L1t(
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
+    double *_v_in, double *_v_out
+) nogil
+
+cdef extern void COMMIT_L2t(
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
+    double *_v_in, double *_v_out
+) nogil
+
+cdef extern void COMMIT_L1zt(
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
+    double *_v_in, double *_v_out
+) nogil
+
+cdef extern void COMMIT_L2zt(
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
+    double *_v_in, double *_v_out
+) nogil
+
+
+cdef class LinearOperator :
+    """This class is a wrapper to the C code for performing marix-vector multiplications
+    with the COMMIT linear operator A. The multiplications are done using C code
+    that uses information from the DICTIONARY, KERNELS and THREADS data structures.
+    """
+    cdef int nS, nF, nR, nE, nT, nV, nI, n, ndirs
+    cdef public int adjoint, n1, n2
+    cdef public float tikhonov_equalizer
+    cdef public char* deriv_matrix
+
+    cdef DICTIONARY
+    cdef KERNELS
+    cdef THREADS
+
+    cdef unsigned int*   ICf
+    cdef float*          ICl
+    cdef unsigned int*   ICv
+    cdef unsigned short* ICo
+    cdef unsigned int*   ECv
+    cdef unsigned short* ECo
+    cdef unsigned int*   ISOv
+
+    cdef float* LUT_IC
+    cdef float* LUT_EC
+    cdef float* LUT_ISO
+
+    cdef unsigned int*   ICthreads
+    cdef unsigned int*   ECthreads
+    cdef unsigned int*   ISOthreads
+
+    cdef unsigned char*  ICthreadsT
+    cdef unsigned int*   ECthreadsT
+    cdef unsigned int*   ISOthreadsT
+
+
+    def __init__( self, DICTIONARY, KERNELS, THREADS, tikhonov_equalizer=0, deriv_matrix=None ) :
+        """Set the pointers to the data structures used by the C code."""
+        self.DICTIONARY = DICTIONARY
+        self.KERNELS    = KERNELS
+        self.THREADS    = THREADS
+
+        self.nF         = DICTIONARY['IC']['nF']     # number of FIBERS
+        self.nR         = KERNELS['wmr'].shape[0]    # number of FIBER RADII
+        self.nE         = DICTIONARY['EC']['nE']     # number of EC segments
+        self.nT         = KERNELS['wmh'].shape[0]    # number of EC TORTUOSITY values
+        self.nV         = DICTIONARY['nV']           # number of VOXELS
+        self.nI         = KERNELS['iso'].shape[0]    # number of ISO contributions
+        self.n          = DICTIONARY['IC']['n']      # numbner of IC segments
+        self.ndirs      = KERNELS['wmr'].shape[1]    # number of directions
+        self.tikhonov_equalizer = tikhonov_equalizer # equalizer parameter of the Tikhonov regularization term
+        self.deriv_matrix       = deriv_matrix       # derivative matrix of the Tikhonov regularization term
+
+        if KERNELS['wmr'].size > 0 :
+            self.nS = KERNELS['wmr'].shape[2]       # number of SAMPLES
+        elif KERNELS['wmh'].size > 0 :
+            self.nS = KERNELS['wmh'].shape[2]
+        else :
+            self.nS = KERNELS['wmr'].shape[1]
+
+        self.adjoint    = 0                         # direct of inverse product
+
+        # set shape of the operator according to deriv_matrix
+        if self.tikhonov_equalizer > 0.0:
+            if self.deriv_matrix == 0:
+                self.n1 = self.nV*self.nS + (self.nR-1)
+            elif self.deriv_matrix == 1:
+                self.n1 = self.nV*self.nS + (self.nR-2)
+            elif self.deriv_matrix == 2:
+                self.n1 = self.nV*self.nS + (self.nR+1)
+            else:
+                self.n1 = self.nV*self.nS + (self.nR)
+        else:
+            self.n1 = self.nV*self.nS
+        self.n2 = self.nR*self.nF + self.nT*self.nE + self.nI*self.nV
+
+        # get C pointers to arrays in DICTIONARY
+        cdef unsigned int [::1]   ICf  = DICTIONARY['IC']['fiber']
+        self.ICf = &ICf[0]
+        cdef float [::1]          ICl  = DICTIONARY['IC']['len']
+        self.ICl = &ICl[0]
+        cdef unsigned int [::1]   ICv  = DICTIONARY['IC']['v']
+        self.ICv = &ICv[0]
+        cdef unsigned short [::1] ICo  = DICTIONARY['IC']['o']
+        self.ICo = &ICo[0]
+        cdef unsigned int [::1]   ECv  = DICTIONARY['EC']['v']
+        self.ECv = &ECv[0]
+        cdef unsigned short [::1] ECo  = DICTIONARY['EC']['o']
+        self.ECo = &ECo[0]
+        cdef unsigned int [::1]   ISOv = DICTIONARY['ISO']['v']
+        self.ISOv = &ISOv[0]
+
+        # get C pointers to arrays in KERNELS
+        cdef float [:, :, ::1] wmrSFP = KERNELS['wmr']
+        self.LUT_IC  = &wmrSFP[0,0,0]
+        cdef float [:, :, ::1] wmhSFP = KERNELS['wmh']
+        self.LUT_EC  = &wmhSFP[0,0,0]
+        cdef float [:, ::1] isoSFP = KERNELS['iso']
+        self.LUT_ISO = &isoSFP[0,0]
+
+        # get C pointers to arrays in THREADS
+        cdef unsigned int [::1] ICthreads = THREADS['IC']
+        self.ICthreads  = &ICthreads[0]
+        cdef unsigned int [::1] ECthreads = THREADS['EC']
+        self.ECthreads  = &ECthreads[0]
+        cdef unsigned int [::1] ISOthreads = THREADS['ISO']
+        self.ISOthreads = &ISOthreads[0]
+
+        cdef unsigned char [::1] ICthreadsT = THREADS['ICt']
+        self.ICthreadsT  = &ICthreadsT[0]
+        cdef unsigned int  [::1] ECthreadsT = THREADS['ECt']
+        self.ECthreadsT  = &ECthreadsT[0]
+        cdef unsigned int  [::1] ISOthreadsT = THREADS['ISOt']
+        self.ISOthreadsT = &ISOthreadsT[0]
+
+
+    @property
+    def T( self ) :
+        """Transpose of the explicit matrix."""
+        C = LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, self.tikhonov_equalizer, self.deriv_matrix )
+        C.adjoint = 1 - C.adjoint
+        return C
+
+
+    @property
+    def shape( self ) :
+        """Size of the explicit matrix."""
+        if not self.adjoint :
+            return ( self.n1, self.n2 )
+        else :
+            return ( self.n2, self.n1 )
+
+
+    def dot( self, double [::1] v_in  ):
+        """Wrapper to C code for efficiently performing the matrix-vector multiplications.
+
+        Parameters
+        ----------
+        v_in : 1D numpy.array of double
+            Input vector for the matrix-vector multiplication
+
+        Returns
+        -------
+        v_out : 1D numpy.array of double
+            Results of the multiplication
+        """
+
+        # Permit only matrix-vector multiplications
+        if v_in.size != self.shape[1] :
+            raise RuntimeError( "A.dot(): dimensions do not match" )
+
+        # Create output array
+        cdef double [::1] v_out = np.zeros( self.shape[0], dtype=np.float64 )
+
+        # Call the cython function to read the memory pointers
+        if not self.adjoint :
+            # DIRECT PRODUCT A*x
+            with nogil :
+                COMMIT_A(
+                    self.nF, self.n, self.nE, self.nV, self.nS, self.ndirs,
+                    &v_in[0], &v_out[0],
+                    self.ICf, self.ICv, self.ICo, self.ICl, self.ECv, self.ECo, self.ISOv,
+                    self.LUT_IC, self.LUT_EC, self.LUT_ISO,
+                    self.ICthreads, self.ECthreads, self.ISOthreads
+                )
+        else :
+            # INVERSE PRODUCT A'*y
+            with nogil :
+                COMMIT_At(
+                    self.nF, self.n, self.nE, self.nV, self.nS, self.ndirs,
+                    &v_in[0], &v_out[0],
+                    self.ICf, self.ICv, self.ICo, self.ICl, self.ECv, self.ECo, self.ISOv,
+                    self.LUT_IC, self.LUT_EC, self.LUT_ISO,
+                    self.ICthreadsT, self.ECthreadsT, self.ISOthreadsT
+                )
+
+        if self.tikhonov_equalizer > 0:
+            if not self.adjoint:
+                # DIRECT PRODUCT lambda*L*x
+                if self.deriv_matrix == 'L1':
+                    with nogil:
+                        COMMIT_L1(
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
+                            &v_in[0], &v_out[0]
+                        )
+                elif self.deriv_matrix == 'L2':
+                    with nogil:
+                        COMMIT_L2(
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
+                            &v_in[0], &v_out[0]
+                        )
+                elif self.deriv_matrix == 'L1z':
+                    with nogil:
+                        COMMIT_L1z(
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
+                            &v_in[0], &v_out[0]
+                        )
+                elif self.deriv_matrix == 'L2z':
+                    with nogil:
+                        COMMIT_L2z(
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
+                            &v_in[0], &v_out[0]
+                        )
+            else:
+                # INVERSE PRODUCT lambda*L'*y
+                if self.deriv_matrix == 'L1':
+                    with nogil:
+                        COMMIT_L1t(
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
+                            &v_in[0], &v_out[0]
+                        )
+                elif self.deriv_matrix == 'L2':
+                    with nogil:
+                        COMMIT_L2t(
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
+                            &v_in[0], &v_out[0]
+                        )
+                elif self.deriv_matrix == 'L1z':
+                    with nogil:
+                        COMMIT_L1zt(
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
+                            &v_in[0], &v_out[0]
+                        )
+                elif self.deriv_matrix == 'L2z':
+                    with nogil:
+                        COMMIT_L2zt(
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
+                            &v_in[0], &v_out[0]
+                        )
+
+        return v_out
diff --git a/commit/operator/operator_noLUT.c b/commit/operator/operator_noLUT.c
index 7510f9bb..0e8da715 100644
--- a/commit/operator/operator_noLUT.c
+++ b/commit/operator/operator_noLUT.c
@@ -1,246 +1,246 @@
-#include <pthread.h>
-#include <stdint.h> // uint32_t etc
-
-// number of THREADS
-#ifdef nTHREADS
-    #if (nTHREADS<1 || nTHREADS>255)
-    #error "nTHREADS" must be in the range 1..255
-    #endif
-#else
-    #error "nTHREADS" parameter must be passed to the compiler as "-DnTHREADS=<value>"
-#endif
-
-
-/* global variables */
-int         nF, n;
-double      *x, *Y;
-uint32_t    *ICthreads, *ISOthreads;
-uint8_t     *ICthreadsT;
-uint32_t    *ISOthreadsT;
-uint32_t    *ICf, *ICv, *ISOv;
-float       *ICl;
-
-
-// ====================================================
-// Compute a sub-block of the A*x MAtRIX-VECTOR product
-// ====================================================
-void* COMMIT_A__block( void *ptr )
-{
-    int      id = (long)ptr;
-    double   x0;
-    double   *xPtr;
-    uint32_t *t_v, *t_vEnd, *t_f;
-    float    *t_l;
-
-    // intra-cellular compartments
-    t_v    = ICv + ICthreads[id];
-    t_vEnd = ICv + ICthreads[id+1];
-    t_l    = ICl + ICthreads[id];
-    t_f    = ICf + ICthreads[id];
-
-    while( t_v != t_vEnd )
-    {
-        x0 = x[*t_f];
-        if ( x0 != 0 )
-            Y[*t_v] += (double)(*t_l) * x0;
-        t_f++;
-        t_v++;
-        t_l++;
-    }
-
-#if nISO>=1
-    // isotropic compartments
-    t_v    = ISOv + ISOthreads[id];
-    t_vEnd = ISOv + ISOthreads[id+1];
-    xPtr   = x + nF + ISOthreads[id];
-
-    while( t_v != t_vEnd )
-    {
-        x0 = *xPtr++;
-        if ( x0 != 0 )
-            Y[*t_v] += x0;
-        t_v++;
-    }
-#endif
-
-    pthread_exit( 0 );
-}
-
-
-// =========================
-// Function called by CYTHON
-// =========================
-void COMMIT_A(
-    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
-    double *_vIN, double *_vOUT,
-    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
-    uint32_t *_ECv, uint16_t *_ECo,
-    uint32_t *_ISOv,
-    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
-    uint32_t* _ICthreads, uint32_t* _ECthreads, uint32_t* _ISOthreads
-)
-{
-    nF = _nF;
-    n  = _n;
-
-    x = _vIN;
-    Y = _vOUT;
-
-    ICf  = _ICf;
-    ICv  = _ICv;
-    ICl  = _ICl;
-    ISOv = _ISOv;
-
-    ICthreads  = _ICthreads;
-    ISOthreads = _ISOthreads;
-
-    // Run SEPARATE THREADS to perform the multiplication
-    pthread_t threads[nTHREADS];
-    int t;
-    for(t=0; t<nTHREADS ; t++)
-        pthread_create( &threads[t], NULL, COMMIT_A__block, (void *) (long int)t );
-    for(t=0; t<nTHREADS ; t++)
-        pthread_join( threads[t], NULL );
-    return;
-}
-
-
-
-/* ===================================================== */
-/* Compute a sub-block of the A'*y MAtRIX-VECTOR product */
-/* ===================================================== */
-void* COMMIT_At__block( void *ptr )
-{
-    int      id = (long)ptr;
-    double   *xPtr;
-    uint32_t *t_v, *t_vEnd, *t_f;
-    float    *t_l;
-    uint8_t  *t_t;
-
-    // intra-cellular compartments
-    t_v    = ICv;
-    t_vEnd = ICv + n;
-    t_l    = ICl;
-    t_f    = ICf;
-    t_t    = ICthreadsT;
-
-    while( t_v != t_vEnd )
-    {
-        // in this case, I need to walk throug because the segments are ordered in "voxel order"
-        if ( *t_t == id )
-            x[*t_f] += (double)(*t_l) * Y[*t_v];
-        t_t++;
-        t_f++;
-        t_v++;
-        t_l++;
-    }
-
-#if nISO>=1
-    // isotropic compartments
-    t_v    = ISOv + ISOthreadsT[id];
-    t_vEnd = ISOv + ISOthreadsT[id+1];
-    xPtr   = x + nF + ISOthreadsT[id];
-
-    while( t_v != t_vEnd )
-        (*xPtr++) += Y[*t_v++];
-#endif
-
-    pthread_exit( 0 );
-}
-
-
-// =========================
-// Function called by CYTHON
-// =========================
-void COMMIT_At(
-    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
-    double *_vIN, double *_vOUT,
-    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
-    uint32_t *_ECv, uint16_t *_ECo,
-    uint32_t *_ISOv,
-    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
-    uint8_t* _ICthreadsT, uint32_t* _ECthreadsT, uint32_t* _ISOthreadsT
-)
-{
-    nF = _nF;
-    n  = _n;
-
-    x = _vOUT;
-    Y = _vIN;
-
-    ICf  = _ICf;
-    ICv  = _ICv;
-    ICl  = _ICl;
-    ISOv = _ISOv;
-
-    ICthreadsT  = _ICthreadsT;
-    ISOthreadsT = _ISOthreadsT;
-
-    // Run SEPARATE THREADS to perform the multiplication
-    pthread_t threads[nTHREADS];
-    int t;
-    for(t=0; t<nTHREADS ; t++)
-        pthread_create( &threads[t], NULL, COMMIT_At__block, (void *) (long int)t );
-    for(t=0; t<nTHREADS ; t++)
-        pthread_join( threads[t], NULL );
-    return;
-}
-
-void COMMIT_L(
-    int nF, int nIC, int nV, int nS, double regterm,
-    double *vIN, double *vOUT)
-{
-    /*for(int r = 0; r < nIC-1; r++){
-        for(int f = 0; f < nF; f++){
-            vOUT[nV*nS + r] += regterm*( -vIN[r*nF + f] + vIN[(r+1)*nF + f] );
-        }
-    }//*/
-}
-
-void COMMIT_Lt(
-    int nF, int nIC, int nV, int nS, double regterm,
-    double *vIN, double *vOUT)
-{
-    /*for(int f = 0; f < nF; f++){
-        vOUT[f] = -vIN[nV*nS];
-        vOUT[nF*(nIC-1) + f] = vIN[nV*nS + nIC-2];
-    }
-
-    for(int r = 0; r < nIC-2; r++){
-        for(int f = 0; f < nF; f++){
-            vOUT[nF*(r+1) + f] = vIN[nV*nS + r] + vIN[nV*nS + r+1];
-        }
-    }//*/
-}
-
-
-/*void COMMIT_L(
-    int nF, int nIC, int nV, int nS, double regterm,
-    double *vIN, double *vOUT)
-{
-    for(int f = 0; f < nF; f++){
-
-        vOUT[nV*nS] += regterm*( -2*vIN[f] + x[nF + f] );
-
-        for(int r = 1; r < nIC-1; r++){
-            vOUT[nV*nS + r] += regterm*( vIN[(r-1)*nF + f] -2*vIN[r*nF + f] + vIN[(r+1)*nF + f] );
-        }
-
-        vOUT[nV*nS + nIC - 1] += regterm*( vIN[(nIC-2)*nF + f] - 2*vIN[(nIC-1)*nF + f] );
-    }
-}
-
-void COMMIT_Lt(
-    int nF, int nIC, int nV, int nS, double regterm,
-    double *vIN, double *vOUT)
-{
-    for(int f = 0; f < nF; f++){
-        vOUT[f] += regterm*( -2*vIN[nV*nS] + vIN[nV*nS + 1] );
-
-        for (int r = 0; r < nIC; r++){
-            vOUT[r*nF + f] += regterm*( vIN[nV*nS + (r-1)] - 2*vIN[nV*nS + r] + vIN[nV*nS + (r+1)] );
-        }
-        
-        vOUT[(nIC-1)*nF + f] += regterm*( vIN[nV*nS + (nIC-2)] - 2*vIN[nV*nS + (nIC-1)] );
-    }
+#include <pthread.h>
+#include <stdint.h> // uint32_t etc
+
+// number of THREADS
+#ifdef nTHREADS
+    #if (nTHREADS<1 || nTHREADS>255)
+    #error "nTHREADS" must be in the range 1..255
+    #endif
+#else
+    #error "nTHREADS" parameter must be passed to the compiler as "-DnTHREADS=<value>"
+#endif
+
+
+/* global variables */
+int         nF, n;
+double      *x, *Y;
+uint32_t    *ICthreads, *ISOthreads;
+uint8_t     *ICthreadsT;
+uint32_t    *ISOthreadsT;
+uint32_t    *ICf, *ICv, *ISOv;
+float       *ICl;
+
+
+// ====================================================
+// Compute a sub-block of the A*x MAtRIX-VECTOR product
+// ====================================================
+void* COMMIT_A__block( void *ptr )
+{
+    int      id = (long)ptr;
+    double   x0;
+    double   *xPtr;
+    uint32_t *t_v, *t_vEnd, *t_f;
+    float    *t_l;
+
+    // intra-cellular compartments
+    t_v    = ICv + ICthreads[id];
+    t_vEnd = ICv + ICthreads[id+1];
+    t_l    = ICl + ICthreads[id];
+    t_f    = ICf + ICthreads[id];
+
+    while( t_v != t_vEnd )
+    {
+        x0 = x[*t_f];
+        if ( x0 != 0 )
+            Y[*t_v] += (double)(*t_l) * x0;
+        t_f++;
+        t_v++;
+        t_l++;
+    }
+
+#if nISO>=1
+    // isotropic compartments
+    t_v    = ISOv + ISOthreads[id];
+    t_vEnd = ISOv + ISOthreads[id+1];
+    xPtr   = x + nF + ISOthreads[id];
+
+    while( t_v != t_vEnd )
+    {
+        x0 = *xPtr++;
+        if ( x0 != 0 )
+            Y[*t_v] += x0;
+        t_v++;
+    }
+#endif
+
+    pthread_exit( 0 );
+}
+
+
+// =========================
+// Function called by CYTHON
+// =========================
+void COMMIT_A(
+    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
+    double *_vIN, double *_vOUT,
+    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
+    uint32_t *_ECv, uint16_t *_ECo,
+    uint32_t *_ISOv,
+    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
+    uint32_t* _ICthreads, uint32_t* _ECthreads, uint32_t* _ISOthreads
+)
+{
+    nF = _nF;
+    n  = _n;
+
+    x = _vIN;
+    Y = _vOUT;
+
+    ICf  = _ICf;
+    ICv  = _ICv;
+    ICl  = _ICl;
+    ISOv = _ISOv;
+
+    ICthreads  = _ICthreads;
+    ISOthreads = _ISOthreads;
+
+    // Run SEPARATE THREADS to perform the multiplication
+    pthread_t threads[nTHREADS];
+    int t;
+    for(t=0; t<nTHREADS ; t++)
+        pthread_create( &threads[t], NULL, COMMIT_A__block, (void *) (long int)t );
+    for(t=0; t<nTHREADS ; t++)
+        pthread_join( threads[t], NULL );
+    return;
+}
+
+
+
+/* ===================================================== */
+/* Compute a sub-block of the A'*y MAtRIX-VECTOR product */
+/* ===================================================== */
+void* COMMIT_At__block( void *ptr )
+{
+    int      id = (long)ptr;
+    double   *xPtr;
+    uint32_t *t_v, *t_vEnd, *t_f;
+    float    *t_l;
+    uint8_t  *t_t;
+
+    // intra-cellular compartments
+    t_v    = ICv;
+    t_vEnd = ICv + n;
+    t_l    = ICl;
+    t_f    = ICf;
+    t_t    = ICthreadsT;
+
+    while( t_v != t_vEnd )
+    {
+        // in this case, I need to walk throug because the segments are ordered in "voxel order"
+        if ( *t_t == id )
+            x[*t_f] += (double)(*t_l) * Y[*t_v];
+        t_t++;
+        t_f++;
+        t_v++;
+        t_l++;
+    }
+
+#if nISO>=1
+    // isotropic compartments
+    t_v    = ISOv + ISOthreadsT[id];
+    t_vEnd = ISOv + ISOthreadsT[id+1];
+    xPtr   = x + nF + ISOthreadsT[id];
+
+    while( t_v != t_vEnd )
+        (*xPtr++) += Y[*t_v++];
+#endif
+
+    pthread_exit( 0 );
+}
+
+
+// =========================
+// Function called by CYTHON
+// =========================
+void COMMIT_At(
+    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
+    double *_vIN, double *_vOUT,
+    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
+    uint32_t *_ECv, uint16_t *_ECo,
+    uint32_t *_ISOv,
+    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
+    uint8_t* _ICthreadsT, uint32_t* _ECthreadsT, uint32_t* _ISOthreadsT
+)
+{
+    nF = _nF;
+    n  = _n;
+
+    x = _vOUT;
+    Y = _vIN;
+
+    ICf  = _ICf;
+    ICv  = _ICv;
+    ICl  = _ICl;
+    ISOv = _ISOv;
+
+    ICthreadsT  = _ICthreadsT;
+    ISOthreadsT = _ISOthreadsT;
+
+    // Run SEPARATE THREADS to perform the multiplication
+    pthread_t threads[nTHREADS];
+    int t;
+    for(t=0; t<nTHREADS ; t++)
+        pthread_create( &threads[t], NULL, COMMIT_At__block, (void *) (long int)t );
+    for(t=0; t<nTHREADS ; t++)
+        pthread_join( threads[t], NULL );
+    return;
+}
+
+void COMMIT_L(
+    int nF, int nIC, int nV, int nS, double regterm,
+    double *vIN, double *vOUT)
+{
+    /*for(int r = 0; r < nIC-1; r++){
+        for(int f = 0; f < nF; f++){
+            vOUT[nV*nS + r] += regterm*( -vIN[r*nF + f] + vIN[(r+1)*nF + f] );
+        }
+    }//*/
+}
+
+void COMMIT_Lt(
+    int nF, int nIC, int nV, int nS, double regterm,
+    double *vIN, double *vOUT)
+{
+    /*for(int f = 0; f < nF; f++){
+        vOUT[f] = -vIN[nV*nS];
+        vOUT[nF*(nIC-1) + f] = vIN[nV*nS + nIC-2];
+    }
+
+    for(int r = 0; r < nIC-2; r++){
+        for(int f = 0; f < nF; f++){
+            vOUT[nF*(r+1) + f] = vIN[nV*nS + r] + vIN[nV*nS + r+1];
+        }
+    }//*/
+}
+
+
+/*void COMMIT_L(
+    int nF, int nIC, int nV, int nS, double regterm,
+    double *vIN, double *vOUT)
+{
+    for(int f = 0; f < nF; f++){
+
+        vOUT[nV*nS] += regterm*( -2*vIN[f] + x[nF + f] );
+
+        for(int r = 1; r < nIC-1; r++){
+            vOUT[nV*nS + r] += regterm*( vIN[(r-1)*nF + f] -2*vIN[r*nF + f] + vIN[(r+1)*nF + f] );
+        }
+
+        vOUT[nV*nS + nIC - 1] += regterm*( vIN[(nIC-2)*nF + f] - 2*vIN[(nIC-1)*nF + f] );
+    }
+}
+
+void COMMIT_Lt(
+    int nF, int nIC, int nV, int nS, double regterm,
+    double *vIN, double *vOUT)
+{
+    for(int f = 0; f < nF; f++){
+        vOUT[f] += regterm*( -2*vIN[nV*nS] + vIN[nV*nS + 1] );
+
+        for (int r = 0; r < nIC; r++){
+            vOUT[r*nF + f] += regterm*( vIN[nV*nS + (r-1)] - 2*vIN[nV*nS + r] + vIN[nV*nS + (r+1)] );
+        }
+        
+        vOUT[(nIC-1)*nF + f] += regterm*( vIN[nV*nS + (nIC-2)] - 2*vIN[nV*nS + (nIC-1)] );
+    }
 }//*/
\ No newline at end of file
diff --git a/commit/operator/operator_withLUT.c b/commit/operator/operator_withLUT.c
index dc45f9bd..297502fd 100644
--- a/commit/operator/operator_withLUT.c
+++ b/commit/operator/operator_withLUT.c
@@ -1,2363 +1,2363 @@
-#include <pthread.h>
-#include <stdint.h> // uint32_t etc
-
-// number of THREADS
-#ifdef nTHREADS
-    #if (nTHREADS<1 || nTHREADS>255)
-    #error "nTHREADS" must be in the range 1..255
-    #endif
-#else
-    #error "nTHREADS" parameter must be passed to the compiler as "-DnTHREADS=<value>"
-#endif
-
-
-/* global variables */
-int         nF, n, nE, nV, nS, ndirs;
-double      *x, *Y;
-uint32_t    *ICthreads, *ECthreads, *ISOthreads;
-uint8_t     *ICthreadsT;
-uint32_t    *ECthreadsT, *ISOthreadsT;
-uint32_t    *ICf, *ICv, *ECv, *ISOv;
-uint16_t    *ICo, *ECo;
-float       *ICl;
-float       *wmrSFP0, *wmrSFP1, *wmrSFP2, *wmrSFP3, *wmrSFP4, *wmrSFP5, *wmrSFP6, *wmrSFP7, *wmrSFP8, *wmrSFP9, *wmrSFP10, *wmrSFP11, *wmrSFP12, *wmrSFP13, *wmrSFP14, *wmrSFP15, *wmrSFP16, *wmrSFP17, *wmrSFP18, *wmrSFP19;
-float       *wmhSFP0, *wmhSFP1, *wmhSFP2, *wmhSFP3, *wmhSFP4, *wmhSFP5, *wmhSFP6, *wmhSFP7, *wmhSFP8, *wmhSFP9, *wmhSFP10, *wmhSFP11, *wmhSFP12, *wmhSFP13, *wmhSFP14, *wmhSFP15, *wmhSFP16, *wmhSFP17, *wmhSFP18, *wmhSFP19;
-float       *isoSFP0, *isoSFP1, *isoSFP2, *isoSFP3, *isoSFP4, *isoSFP5, *isoSFP6, *isoSFP7, *isoSFP8, *isoSFP9, *isoSFP10, *isoSFP11, *isoSFP12, *isoSFP13, *isoSFP14, *isoSFP15, *isoSFP16, *isoSFP17, *isoSFP18, *isoSFP19;
-
-
-
-// ====================================================
-// Compute a sub-block of the A*x MAtRIX-VECTOR product
-// ====================================================
-void* COMMIT_A__block( void *ptr )
-{
-    int      id = (long)ptr;
-    int      offset;
-    double   x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, w;
-    double   *x_Ptr0, *x_Ptr1, *x_Ptr2, *x_Ptr3, *x_Ptr4, *x_Ptr5, *x_Ptr6, *x_Ptr7, *x_Ptr8, *x_Ptr9, *x_Ptr10, *x_Ptr11, *x_Ptr12, *x_Ptr13, *x_Ptr14, *x_Ptr15, *x_Ptr16, *x_Ptr17, *x_Ptr18, *x_Ptr19;
-    double   *Yptr, *YptrEnd;
-    float    *SFP0ptr, *SFP1ptr, *SFP2ptr, *SFP3ptr, *SFP4ptr, *SFP5ptr, *SFP6ptr, *SFP7ptr, *SFP8ptr, *SFP9ptr, *SFP10ptr, *SFP11ptr, *SFP12ptr, *SFP13ptr, *SFP14ptr, *SFP15ptr, *SFP16ptr, *SFP17ptr, *SFP18ptr, *SFP19ptr;
-    uint32_t *t_v, *t_vEnd, *t_f;
-    uint16_t *t_o;
-    float    *t_l;
-
-#if nIC>=1
-    // intra-cellular compartments
-    t_v    = ICv + ICthreads[id];
-    t_vEnd = ICv + ICthreads[id+1];
-    t_o    = ICo + ICthreads[id];
-    t_l    = ICl + ICthreads[id];
-    t_f    = ICf + ICthreads[id];
-
-    while( t_v != t_vEnd )
-    {
-        x_Ptr0 = x + *t_f;
-        x0 = *x_Ptr0;
-        #if nIC>=2
-        x_Ptr1 = x_Ptr0 + nF;
-        x1 = *x_Ptr1;
-        #endif
-        #if nIC>=3
-        x_Ptr2 = x_Ptr1 + nF;
-        x2 = *x_Ptr2;
-        #endif
-        #if nIC>=4
-        x_Ptr3 = x_Ptr2 + nF;
-        x3 = *x_Ptr3;
-        #endif
-        #if nIC>=5
-        x_Ptr4 = x_Ptr3 + nF;
-        x4 = *x_Ptr4;
-        #endif
-        #if nIC>=6
-        x_Ptr5 = x_Ptr4 + nF;
-        x5 = *x_Ptr5;
-        #endif
-        #if nIC>=7
-        x_Ptr6 = x_Ptr5 + nF;
-        x6 = *x_Ptr6;
-        #endif
-        #if nIC>=8
-        x_Ptr7 = x_Ptr6 + nF;
-        x7 = *x_Ptr7;
-        #endif
-        #if nIC>=9
-        x_Ptr8 = x_Ptr7 + nF;
-        x8 = *x_Ptr8;
-        #endif
-        #if nIC>=10
-        x_Ptr9 = x_Ptr8 + nF;
-        x9 = *x_Ptr9;
-        #endif
-        #if nIC>=11
-        x_Ptr10 = x_Ptr9 + nF;
-        x10 = *x_Ptr10;
-        #endif
-        #if nIC>=12
-        x_Ptr11 = x_Ptr10 + nF;
-        x11 = *x_Ptr11;
-        #endif
-        #if nIC>=13
-        x_Ptr12 = x_Ptr11 + nF;
-        x12 = *x_Ptr12;
-        #endif
-        #if nIC>=14
-        x_Ptr13 = x_Ptr12 + nF;
-        x13 = *x_Ptr13;
-        #endif
-        #if nIC>=15
-        x_Ptr14 = x_Ptr13 + nF;
-        x14 = *x_Ptr14;
-        #endif
-        #if nIC>=16
-        x_Ptr15 = x_Ptr14 + nF;
-        x15 = *x_Ptr15;
-        #endif
-        #if nIC>=17
-        x_Ptr16 = x_Ptr15 + nF;
-        x16 = *x_Ptr16;
-        #endif
-        #if nIC>=18
-        x_Ptr17 = x_Ptr16 + nF;
-        x17 = *x_Ptr17;
-        #endif
-        #if nIC>=19
-        x_Ptr18 = x_Ptr17 + nF;
-        x18 = *x_Ptr18;
-        #endif
-        #if nIC>=20
-        x_Ptr19 = x_Ptr18 + nF;
-        x19 = *x_Ptr19;
-        #endif
-
-        if ( x0 != 0
-        #if nIC>=2
-            || x1 != 0
-        #endif
-        #if nIC>=3
-            || x2 != 0
-        #endif
-        #if nIC>=4
-            || x3 != 0
-        #endif
-        #if nIC>=5
-            || x4 != 0
-        #endif
-        #if nIC>=6
-            || x5 != 0
-        #endif
-        #if nIC>=7
-            || x6 != 0
-        #endif
-        #if nIC>=8
-            || x7 != 0
-        #endif
-        #if nIC>=9
-            || x8 != 0
-        #endif
-        #if nIC>=10
-            || x9 != 0
-        #endif
-        #if nIC>=11
-            || x10 != 0
-        #endif
-        #if nIC>=12
-            || x11 != 0
-        #endif
-        #if nIC>=13
-            || x12 != 0
-        #endif
-        #if nIC>=14
-            || x13 != 0
-        #endif
-        #if nIC>=15
-            || x14 != 0
-        #endif
-        #if nIC>=16
-            || x15 != 0
-        #endif
-        #if nIC>=17
-            || x16 != 0
-        #endif
-        #if nIC>=18
-            || x17 != 0
-        #endif
-        #if nIC>=19
-            || x18 != 0
-        #endif
-        #if nIC>=20
-            || x19 != 0
-        #endif
-        )
-        {
-            Yptr    = Y    + nS * (*t_v);
-            YptrEnd = Yptr + nS;
-            w       = (double)(*t_l);
-            offset  = nS * (*t_o);
-            SFP0ptr = wmrSFP0 + offset;
-            #if nIC>=2
-            SFP1ptr = wmrSFP1 + offset;
-            #endif
-            #if nIC>=3
-            SFP2ptr = wmrSFP2 + offset;
-            #endif
-            #if nIC>=4
-            SFP3ptr = wmrSFP3 + offset;
-            #endif
-            #if nIC>=5
-            SFP4ptr = wmrSFP4 + offset;
-            #endif
-            #if nIC>=6
-            SFP5ptr = wmrSFP5 + offset;
-            #endif
-            #if nIC>=7
-            SFP6ptr = wmrSFP6 + offset;
-            #endif
-            #if nIC>=8
-            SFP7ptr = wmrSFP7 + offset;
-            #endif
-            #if nIC>=9
-            SFP8ptr = wmrSFP8 + offset;
-            #endif
-            #if nIC>=10
-            SFP9ptr = wmrSFP9 + offset;
-            #endif
-            #if nIC>=11
-            SFP10ptr = wmrSFP10 + offset;
-            #endif
-            #if nIC>=12
-            SFP11ptr = wmrSFP11 + offset;
-            #endif
-            #if nIC>=13
-            SFP12ptr = wmrSFP12 + offset;
-            #endif
-            #if nIC>=14
-            SFP13ptr = wmrSFP13 + offset;
-            #endif
-            #if nIC>=15
-            SFP14ptr = wmrSFP14 + offset;
-            #endif
-            #if nIC>=16
-            SFP15ptr = wmrSFP15 + offset;
-            #endif
-            #if nIC>=17
-            SFP16ptr = wmrSFP16 + offset;
-            #endif
-            #if nIC>=18
-            SFP17ptr = wmrSFP17 + offset;
-            #endif
-            #if nIC>=19
-            SFP18ptr = wmrSFP18 + offset;
-            #endif
-            #if nIC>=20
-            SFP19ptr = wmrSFP19 + offset;
-            #endif
-
-            while( Yptr != YptrEnd )
-                (*Yptr++) += w * (
-                          x0 * (*SFP0ptr++)
-                        #if nIC>=2
-                        + x1 * (*SFP1ptr++)
-                        #endif
-                        #if nIC>=3
-                        + x2 * (*SFP2ptr++)
-                        #endif
-                        #if nIC>=4
-                        + x3 * (*SFP3ptr++)
-                        #endif
-                        #if nIC>=5
-                        + x4 * (*SFP4ptr++)
-                        #endif
-                        #if nIC>=6
-                        + x5 * (*SFP5ptr++)
-                        #endif
-                        #if nIC>=7
-                        + x6 * (*SFP6ptr++)
-                        #endif
-                        #if nIC>=8
-                        + x7 * (*SFP7ptr++)
-                        #endif
-                        #if nIC>=9
-                        + x8 * (*SFP8ptr++)
-                        #endif
-                        #if nIC>=10
-                        + x9 * (*SFP9ptr++)
-                        #endif
-                        #if nIC>=11
-                        + x10 * (*SFP10ptr++)
-                        #endif
-                        #if nIC>=12
-                        + x11 * (*SFP11ptr++)
-                        #endif
-                        #if nIC>=13
-                        + x12 * (*SFP12ptr++)
-                        #endif
-                        #if nIC>=14
-                        + x13 * (*SFP13ptr++)
-                        #endif
-                        #if nIC>=15
-                        + x14 * (*SFP14ptr++)
-                        #endif
-                        #if nIC>=16
-                        + x15 * (*SFP15ptr++)
-                        #endif
-                        #if nIC>=17
-                        + x16 * (*SFP16ptr++)
-                        #endif
-                        #if nIC>=18
-                        + x17 * (*SFP17ptr++)
-                        #endif
-                        #if nIC>=19
-                        + x18 * (*SFP18ptr++)
-                        #endif
-                        #if nIC>=20
-                        + x19 * (*SFP19ptr++)
-                        #endif
-                );
-        }
-
-        t_f++;
-        t_v++;
-        t_o++;
-        t_l++;
-    }
-#endif
-
-#if nEC>=1
-    // extra-cellular compartments
-    t_v    = ECv + ECthreads[id];
-    t_vEnd = ECv + ECthreads[id+1];
-    t_o    = ECo + ECthreads[id];
-
-    x_Ptr0 = x + nIC*nF + ECthreads[id];
-    #if nEC>=2
-    x_Ptr1 = x_Ptr0 + nE;
-    #endif
-    #if nEC>=3
-    x_Ptr2 = x_Ptr1 + nE;
-    #endif
-    #if nEC>=4
-    x_Ptr3 = x_Ptr2 + nE;
-    #endif
-    #if nEC>=5
-    x_Ptr4 = x_Ptr3 + nE;
-    #endif
-    #if nEC>=6
-    x_Ptr5 = x_Ptr4 + nE;
-    #endif
-    #if nEC>=7
-    x_Ptr6 = x_Ptr5 + nE;
-    #endif
-    #if nEC>=8
-    x_Ptr7 = x_Ptr6 + nE;
-    #endif
-    #if nEC>=9
-    x_Ptr8 = x_Ptr7 + nE;
-    #endif
-    #if nEC>=10
-    x_Ptr9 = x_Ptr8 + nE;
-    #endif
-    #if nEC>=11
-    x_Ptr10 = x_Ptr9 + nE;
-    #endif
-    #if nEC>=12
-    x_Ptr11 = x_Ptr10 + nE;
-    #endif
-    #if nEC>=13
-    x_Ptr12 = x_Ptr11 + nE;
-    #endif
-    #if nEC>=14
-    x_Ptr13 = x_Ptr12 + nE;
-    #endif
-    #if nEC>=15
-    x_Ptr14 = x_Ptr13 + nE;
-    #endif
-    #if nEC>=16
-    x_Ptr15 = x_Ptr14 + nE;
-    #endif
-    #if nEC>=17
-    x_Ptr16 = x_Ptr15 + nE;
-    #endif
-    #if nEC>=18
-    x_Ptr17 = x_Ptr16 + nE;
-    #endif
-    #if nEC>=19
-    x_Ptr18 = x_Ptr17 + nE;
-    #endif
-    #if nEC>=20
-    x_Ptr19 = x_Ptr18 + nE;
-    #endif
-
-    while( t_v != t_vEnd )
-    {
-        x0 = *x_Ptr0++;
-        #if nEC>=2
-        x1 = *x_Ptr1++;
-        #endif
-        #if nEC>=3
-        x2 = *x_Ptr2++;
-        #endif
-        #if nEC>=4
-        x3 = *x_Ptr3++;
-        #endif
-        #if nEC>=5
-        x4 = *x_Ptr4++;
-        #endif
-        #if nEC>=6
-        x5 = *x_Ptr5++;
-        #endif
-        #if nEC>=7
-        x6 = *x_Ptr6++;
-        #endif
-        #if nEC>=8
-        x7 = *x_Ptr7++;
-        #endif
-        #if nEC>=9
-        x8 = *x_Ptr8++;
-        #endif
-        #if nEC>=10
-        x9 = *x_Ptr9++;
-        #endif
-        #if nEC>=11
-        x10 = *x_Ptr10++;
-        #endif
-        #if nEC>=12
-        x11 = *x_Ptr11++;
-        #endif
-        #if nEC>=13
-        x12 = *x_Ptr12++;
-        #endif
-        #if nEC>=14
-        x13 = *x_Ptr13++;
-        #endif
-        #if nEC>=15
-        x14 = *x_Ptr14++;
-        #endif
-        #if nEC>=16
-        x15 = *x_Ptr15++;
-        #endif
-        #if nEC>=17
-        x16 = *x_Ptr16++;
-        #endif
-        #if nEC>=18
-        x17 = *x_Ptr17++;
-        #endif
-        #if nEC>=19
-        x18 = *x_Ptr18++;
-        #endif
-        #if nEC>=20
-        x19 = *x_Ptr19++;
-        #endif
-        if (
-               x0 != 0
-            #if nEC>=2
-            || x1 != 0
-            #endif
-            #if nEC>=3
-            || x2 != 0
-            #endif
-            #if nEC>=4
-            || x3 != 0
-            #endif
-            #if nEC>=5
-            || x4 != 0
-            #endif
-            #if nEC>=6
-            || x5 != 0
-            #endif
-            #if nEC>=7
-            || x6 != 0
-            #endif
-            #if nEC>=8
-            || x7 != 0
-            #endif
-            #if nEC>=9
-            || x8 != 0
-            #endif
-            #if nEC>=10
-            || x9 != 0
-            #endif
-            #if nEC>=11
-            || x10 != 0
-            #endif
-            #if nEC>=12
-            || x11 != 0
-            #endif
-            #if nEC>=13
-            || x12 != 0
-            #endif
-            #if nEC>=14
-            || x13 != 0
-            #endif
-            #if nEC>=15
-            || x14 != 0
-            #endif
-            #if nEC>=16
-            || x15 != 0
-            #endif
-            #if nEC>=17
-            || x16 != 0
-            #endif
-            #if nEC>=18
-            || x17 != 0
-            #endif
-            #if nEC>=19
-            || x18 != 0
-            #endif
-            #if nEC>=20
-            || x19 != 0
-            #endif
-          )
-        {
-            Yptr    = Y    + nS * (*t_v);
-            YptrEnd = Yptr + nS;
-            offset  = nS * (*t_o);
-            SFP0ptr = wmhSFP0 + offset;
-            #if nEC>=2
-            SFP1ptr = wmhSFP1 + offset;
-            #endif
-            #if nEC>=3
-            SFP2ptr = wmhSFP2 + offset;
-            #endif
-            #if nEC>=4
-            SFP3ptr = wmhSFP3 + offset;
-            #endif
-            #if nEC>=5
-            SFP4ptr = wmhSFP4 + offset;
-            #endif
-            #if nEC>=6
-            SFP5ptr = wmhSFP5 + offset;
-            #endif
-            #if nEC>=7
-            SFP6ptr = wmhSFP6 + offset;
-            #endif
-            #if nEC>=8
-            SFP7ptr = wmhSFP7 + offset;
-            #endif
-            #if nEC>=9
-            SFP8ptr = wmhSFP8 + offset;
-            #endif
-            #if nEC>=10
-            SFP9ptr = wmhSFP9 + offset;
-            #endif
-            #if nEC>=11
-            SFP10ptr = wmhSFP10 + offset;
-            #endif
-            #if nEC>=12
-            SFP11ptr = wmhSFP11 + offset;
-            #endif
-            #if nEC>=13
-            SFP12ptr = wmhSFP12 + offset;
-            #endif
-            #if nEC>=14
-            SFP13ptr = wmhSFP13 + offset;
-            #endif
-            #if nEC>=15
-            SFP14ptr = wmhSFP14 + offset;
-            #endif
-            #if nEC>=16
-            SFP15ptr = wmhSFP15 + offset;
-            #endif
-            #if nEC>=17
-            SFP16ptr = wmhSFP16 + offset;
-            #endif
-            #if nEC>=18
-            SFP17ptr = wmhSFP17 + offset;
-            #endif
-            #if nEC>=19
-            SFP18ptr = wmhSFP18 + offset;
-            #endif
-            #if nEC>=20
-            SFP19ptr = wmhSFP19 + offset;
-            #endif
-
-            while( Yptr != YptrEnd )
-                (*Yptr++) += (
-                      x0 * (*SFP0ptr++)
-                    #if nEC>=2
-                    + x1 * (*SFP1ptr++)
-                    #endif
-                    #if nEC>=3
-                    + x2 * (*SFP2ptr++)
-                    #endif
-                    #if nEC>=4
-                    + x3 * (*SFP3ptr++)
-                    #endif
-                    #if nEC>=5
-                    + x4 * (*SFP4ptr++)
-                    #endif
-                    #if nEC>=6
-                    + x5 * (*SFP5ptr++)
-                    #endif
-                    #if nEC>=7
-                    + x6 * (*SFP6ptr++)
-                    #endif
-                    #if nEC>=8
-                    + x7 * (*SFP7ptr++)
-                    #endif
-                    #if nEC>=9
-                    + x8 * (*SFP8ptr++)
-                    #endif
-                    #if nEC>=10
-                    + x9 * (*SFP9ptr++)
-                    #endif
-                    #if nEC>=11
-                    + x10 * (*SFP10ptr++)
-                    #endif
-                    #if nEC>=12
-                    + x11 * (*SFP11ptr++)
-                    #endif
-                    #if nEC>=13
-                    + x12 * (*SFP12ptr++)
-                    #endif
-                    #if nEC>=14
-                    + x13 * (*SFP13ptr++)
-                    #endif
-                    #if nEC>=15
-                    + x14 * (*SFP14ptr++)
-                    #endif
-                    #if nEC>=16
-                    + x15 * (*SFP15ptr++)
-                    #endif
-                    #if nEC>=17
-                    + x16 * (*SFP16ptr++)
-                    #endif
-                    #if nEC>=18
-                    + x17 * (*SFP17ptr++)
-                    #endif
-                    #if nEC>=19
-                    + x18 * (*SFP18ptr++)
-                    #endif
-                    #if nEC>=20
-                    + x19 * (*SFP19ptr++)
-                    #endif
-
-                );
-        }
-        t_v++;
-        t_o++;
-    }
-#endif
-
-#if nISO>=1
-    // isotropic compartments
-    t_v    = ISOv + ISOthreads[id];
-    t_vEnd = ISOv + ISOthreads[id+1];
-
-    x_Ptr0 = x + nIC*nF + nEC*nE + ISOthreads[id];
-    #if nISO>=2
-    x_Ptr1 = x_Ptr0 + nV;
-    #endif
-    #if nISO>=3
-    x_Ptr2 = x_Ptr1 + nV;
-    #endif
-    #if nISO>=4
-    x_Ptr3 = x_Ptr2 + nV;
-    #endif
-    #if nISO>=5
-    x_Ptr4 = x_Ptr3 + nV;
-    #endif
-    #if nISO>=6
-    x_Ptr5 = x_Ptr4 + nV;
-    #endif
-    #if nISO>=7
-    x_Ptr6 = x_Ptr5 + nV;
-    #endif
-    #if nISO>=8
-    x_Ptr7 = x_Ptr6 + nV;
-    #endif
-    #if nISO>=9
-    x_Ptr8 = x_Ptr7 + nV;
-    #endif
-    #if nISO>=10
-    x_Ptr9 = x_Ptr8 + nV;
-    #endif
-    #if nISO>=11
-    x_Ptr10 = x_Ptr9 + nV;
-    #endif
-    #if nISO>=12
-    x_Ptr11 = x_Ptr10 + nV;
-    #endif
-    #if nISO>=13
-    x_Ptr12 = x_Ptr11 + nV;
-    #endif
-    #if nISO>=14
-    x_Ptr13 = x_Ptr12 + nV;
-    #endif
-    #if nISO>=15
-    x_Ptr14 = x_Ptr13 + nV;
-    #endif
-    #if nISO>=16
-    x_Ptr15 = x_Ptr14 + nV;
-    #endif
-    #if nISO>=17
-    x_Ptr16 = x_Ptr15 + nV;
-    #endif
-    #if nISO>=18
-    x_Ptr17 = x_Ptr16 + nV;
-    #endif
-    #if nISO>=19
-    x_Ptr18 = x_Ptr17 + nV;
-    #endif
-    #if nISO>=20
-    x_Ptr19 = x_Ptr18 + nV;
-    #endif
-
-    while( t_v != t_vEnd )
-    {
-        x0 = *x_Ptr0++;
-        #if nISO>=2
-        x1 = *x_Ptr1++;
-        #endif
-        #if nISO>=3
-        x2 = *x_Ptr2++;
-        #endif
-        #if nISO>=4
-        x3 = *x_Ptr3++;
-        #endif
-        #if nISO>=5
-        x4 = *x_Ptr4++;
-        #endif
-        #if nISO>=6
-        x5 = *x_Ptr5++;
-        #endif
-        #if nISO>=7
-        x6 = *x_Ptr6++;
-        #endif
-        #if nISO>=8
-        x7 = *x_Ptr7++;
-        #endif
-        #if nISO>=9
-        x8 = *x_Ptr8++;
-        #endif
-        #if nISO>=10
-        x9 = *x_Ptr9++;
-        #endif
-        #if nISO>=11
-        x10 = *x_Ptr10++;
-        #endif
-        #if nISO>=12
-        x11 = *x_Ptr11++;
-        #endif
-        #if nISO>=13
-        x12 = *x_Ptr12++;
-        #endif
-        #if nISO>=14
-        x13 = *x_Ptr13++;
-        #endif
-        #if nISO>=15
-        x14 = *x_Ptr14++;
-        #endif
-        #if nISO>=16
-        x15 = *x_Ptr15++;
-        #endif
-        #if nISO>=17
-        x16 = *x_Ptr16++;
-        #endif
-        #if nISO>=18
-        x17 = *x_Ptr17++;
-        #endif
-        #if nISO>=19
-        x18 = *x_Ptr18++;
-        #endif
-        #if nISO>=20
-        x19 = *x_Ptr19++;
-        #endif
-
-        if (
-               x0 != 0
-            #if nISO>=2
-            || x1 != 0
-            #endif
-            #if nISO>=3
-            || x2 != 0
-            #endif
-            #if nISO>=4
-            || x3 != 0
-            #endif
-            #if nISO>=5
-            || x4 != 0
-            #endif
-            #if nISO>=6
-            || x5 != 0
-            #endif
-            #if nISO>=7
-            || x6 != 0
-            #endif
-            #if nISO>=8
-            || x7 != 0
-            #endif
-            #if nISO>=9
-            || x8 != 0
-            #endif
-            #if nISO>=10
-            || x9 != 0
-            #endif
-            #if nISO>=11
-            || x10 != 0
-            #endif
-            #if nISO>=12
-            || x11 != 0
-            #endif
-            #if nISO>=13
-            || x12 != 0
-            #endif
-            #if nISO>=14
-            || x13 != 0
-            #endif
-            #if nISO>=15
-            || x14 != 0
-            #endif
-            #if nISO>=16
-            || x15 != 0
-            #endif
-            #if nISO>=17
-            || x16 != 0
-            #endif
-            #if nISO>=18
-            || x17 != 0
-            #endif
-            #if nISO>=19
-            || x18 != 0
-            #endif
-            #if nISO>=20
-            || x19 != 0
-            #endif
-          )
-        {
-            Yptr    = Y    + nS * (*t_v);
-            YptrEnd = Yptr + nS;
-            SFP0ptr = isoSFP0;
-            #if nISO>=2
-            SFP1ptr = isoSFP1;
-            #endif
-            #if nISO>=3
-            SFP2ptr = isoSFP2;
-            #endif
-            #if nISO>=4
-            SFP3ptr = isoSFP3;
-            #endif
-            #if nISO>=5
-            SFP4ptr = isoSFP4;
-            #endif
-            #if nISO>=6
-            SFP5ptr = isoSFP5;
-            #endif
-            #if nISO>=7
-            SFP6ptr = isoSFP6;
-            #endif
-            #if nISO>=8
-            SFP7ptr = isoSFP7;
-            #endif
-            #if nISO>=9
-            SFP8ptr = isoSFP8;
-            #endif
-            #if nISO>=10
-            SFP9ptr = isoSFP9;
-            #endif
-            #if nISO>=11
-            SFP10ptr = isoSFP10;
-            #endif
-            #if nISO>=12
-            SFP11ptr = isoSFP11;
-            #endif
-            #if nISO>=13
-            SFP12ptr = isoSFP12;
-            #endif
-            #if nISO>=14
-            SFP13ptr = isoSFP13;
-            #endif
-            #if nISO>=15
-            SFP14ptr = isoSFP14;
-            #endif
-            #if nISO>=16
-            SFP15ptr = isoSFP15;
-            #endif
-            #if nISO>=17
-            SFP16ptr = isoSFP16;
-            #endif
-            #if nISO>=18
-            SFP17ptr = isoSFP17;
-            #endif
-            #if nISO>=19
-            SFP18ptr = isoSFP18;
-            #endif
-            #if nISO>=20
-            SFP19ptr = isoSFP19;
-            #endif
-
-            while( Yptr != YptrEnd )
-                (*Yptr++) += (
-                      x0 * (*SFP0ptr++)
-                    #if nISO>=2
-                    + x1 * (*SFP1ptr++)
-                    #endif
-                    #if nISO>=3
-                    + x2 * (*SFP2ptr++)
-                    #endif
-                    #if nISO>=4
-                    + x3 * (*SFP3ptr++)
-                    #endif
-                    #if nISO>=5
-                    + x4 * (*SFP4ptr++)
-                    #endif
-                    #if nISO>=6
-                    + x5 * (*SFP5ptr++)
-                    #endif
-                    #if nISO>=7
-                    + x6 * (*SFP6ptr++)
-                    #endif
-                    #if nISO>=8
-                    + x7 * (*SFP7ptr++)
-                    #endif
-                    #if nISO>=9
-                    + x8 * (*SFP8ptr++)
-                    #endif
-                    #if nISO>=10
-                    + x9 * (*SFP9ptr++)
-                    #endif
-                    #if nISO>=11
-                    + x10 * (*SFP10ptr++)
-                    #endif
-                    #if nISO>=12
-                    + x11 * (*SFP11ptr++)
-                    #endif
-                    #if nISO>=13
-                    + x12 * (*SFP12ptr++)
-                    #endif
-                    #if nISO>=14
-                    + x13 * (*SFP13ptr++)
-                    #endif
-                    #if nISO>=15
-                    + x14 * (*SFP14ptr++)
-                    #endif
-                    #if nISO>=16
-                    + x15 * (*SFP15ptr++)
-                    #endif
-                    #if nISO>=17
-                    + x16 * (*SFP16ptr++)
-                    #endif
-                    #if nISO>=18
-                    + x17 * (*SFP17ptr++)
-                    #endif
-                    #if nISO>=19
-                    + x18 * (*SFP18ptr++)
-                    #endif
-                    #if nISO>=20
-                    + x19 * (*SFP19ptr++)
-                    #endif
-                );
-        }
-        t_v++;
-    }
-#endif
-
-    pthread_exit( 0 );
-}
-
-
-// =========================
-// Function called by CYTHON
-// =========================
-void COMMIT_A(
-    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
-    double *_vIN, double *_vOUT,
-    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
-    uint32_t *_ECv, uint16_t *_ECo,
-    uint32_t *_ISOv,
-    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
-    uint32_t* _ICthreads, uint32_t* _ECthreads, uint32_t* _ISOthreads
-)
-{
-    nF = _nF;
-    n  = _n;
-    nE = _nE;
-    nV = _nV;
-    nS = _nS;
-    ndirs = _ndirs;
-
-    x = _vIN;
-    Y = _vOUT;
-
-    ICf  = _ICf;
-    ICv  = _ICv;
-    ICo  = _ICo;
-    ICl  = _ICl;
-    ECv  = _ECv;
-    ECo  = _ECo;
-    ISOv = _ISOv;
-
-    #if nIC>=1
-    wmrSFP0 = _wmrSFP;
-    #if nIC>=2
-    wmrSFP1 = wmrSFP0 + _ndirs*_nS;
-    #if nIC>=3
-    wmrSFP2 = wmrSFP1 + _ndirs*_nS;
-    #if nIC>=4
-    wmrSFP3 = wmrSFP2 + _ndirs*_nS;
-    #if nIC>=5
-    wmrSFP4 = wmrSFP3 + _ndirs*_nS;
-    #if nIC>=6
-    wmrSFP5 = wmrSFP4 + _ndirs*_nS;
-    #if nIC>=7
-    wmrSFP6 = wmrSFP5 + _ndirs*_nS;
-    #if nIC>=8
-    wmrSFP7 = wmrSFP6 + _ndirs*_nS;
-    #if nIC>=9
-    wmrSFP8 = wmrSFP7 + _ndirs*_nS;
-    #if nIC>=10
-    wmrSFP9 = wmrSFP8 + _ndirs*_nS;
-    #if nIC>=11
-    wmrSFP10 = wmrSFP9 + _ndirs*_nS;
-    #if nIC>=12
-    wmrSFP11 = wmrSFP10 + _ndirs*_nS;
-    #if nIC>=13
-    wmrSFP12 = wmrSFP11 + _ndirs*_nS;
-    #if nIC>=14
-    wmrSFP13 = wmrSFP12 + _ndirs*_nS;
-    #if nIC>=15
-    wmrSFP14 = wmrSFP13 + _ndirs*_nS;
-    #if nIC>=16
-    wmrSFP15 = wmrSFP14 + _ndirs*_nS;
-    #if nIC>=17
-    wmrSFP16 = wmrSFP15 + _ndirs*_nS;
-    #if nIC>=18
-    wmrSFP17 = wmrSFP16 + _ndirs*_nS;
-    #if nIC>=19
-    wmrSFP18 = wmrSFP17 + _ndirs*_nS;
-    #if nIC>=20
-    wmrSFP19 = wmrSFP18 + _ndirs*_nS;
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #if nEC>=1
-    wmhSFP0 = _wmhSFP;
-    #if nEC>=2
-    wmhSFP1 = wmhSFP0 + _ndirs*_nS;
-    #if nEC>=3
-    wmhSFP2 = wmhSFP1 + _ndirs*_nS;
-    #if nEC>=4
-    wmhSFP3 = wmhSFP2 + _ndirs*_nS;
-    #if nEC>=5
-    wmhSFP4 = wmhSFP3 + _ndirs*_nS;
-    #if nEC>=6
-    wmhSFP5 = wmhSFP4 + _ndirs*_nS;
-    #if nEC>=7
-    wmhSFP6 = wmhSFP5 + _ndirs*_nS;
-    #if nEC>=8
-    wmhSFP7 = wmhSFP6 + _ndirs*_nS;
-    #if nEC>=9
-    wmhSFP8 = wmhSFP7 + _ndirs*_nS;
-    #if nEC>=10
-    wmhSFP9 = wmhSFP8 + _ndirs*_nS;
-    #if nEC>=11
-    wmhSFP10 = wmhSFP9 + _ndirs*_nS;
-    #if nEC>=12
-    wmhSFP11 = wmhSFP10 + _ndirs*_nS;
-    #if nEC>=13
-    wmhSFP12 = wmhSFP11 + _ndirs*_nS;
-    #if nEC>=14
-    wmhSFP13 = wmhSFP12 + _ndirs*_nS;
-    #if nEC>=15
-    wmhSFP14 = wmhSFP13 + _ndirs*_nS;
-    #if nEC>=16
-    wmhSFP15 = wmhSFP14 + _ndirs*_nS;
-    #if nEC>=17
-    wmhSFP16 = wmhSFP15 + _ndirs*_nS;
-    #if nEC>=18
-    wmhSFP17 = wmhSFP16 + _ndirs*_nS;
-    #if nEC>=19
-    wmhSFP18 = wmhSFP17 + _ndirs*_nS;
-    #if nEC>=20
-    wmhSFP19 = wmhSFP18 + _ndirs*_nS;
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #if nISO>=1
-    isoSFP0 = _isoSFP;
-    #if nISO>=2
-    isoSFP1 = isoSFP0 + _nS;
-    #if nISO>=3
-    isoSFP2 = isoSFP1 + _nS;
-    #if nISO>=4
-    isoSFP3 = isoSFP2 + _nS;
-    #if nISO>=5
-    isoSFP4 = isoSFP3 + _nS;
-    #if nISO>=6
-    isoSFP5 = isoSFP4 + _nS;
-    #if nISO>=7
-    isoSFP6 = isoSFP5 + _nS;
-    #if nISO>=8
-    isoSFP7 = isoSFP6 + _nS;
-    #if nISO>=9
-    isoSFP8 = isoSFP7 + _nS;
-    #if nISO>=10
-    isoSFP9 = isoSFP8 + _nS;
-    #if nISO>=11
-    isoSFP10 = isoSFP9 + _nS;
-    #if nISO>=12
-    isoSFP11 = isoSFP10 + _nS;
-    #if nISO>=13
-    isoSFP12 = isoSFP11 + _nS;
-    #if nISO>=14
-    isoSFP13 = isoSFP12 + _nS;
-    #if nISO>=15
-    isoSFP14 = isoSFP13 + _nS;
-    #if nISO>=16
-    isoSFP15 = isoSFP14 + _nS;
-    #if nISO>=17
-    isoSFP16 = isoSFP15 + _nS;
-    #if nISO>=18
-    isoSFP17 = isoSFP16 + _nS;
-    #if nISO>=19
-    isoSFP18 = isoSFP17 + _nS;
-    #if nISO>=20
-    isoSFP19 = isoSFP18 + _nS;
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-
-    ICthreads  = _ICthreads;
-    ECthreads  = _ECthreads;
-    ISOthreads = _ISOthreads;
-
-    // Run SEPARATE THREADS to perform the multiplication
-    pthread_t threads[nTHREADS];
-    int t;
-    for(t=0; t<nTHREADS ; t++)
-        pthread_create( &threads[t], NULL, COMMIT_A__block, (void *) (long int)t );
-    for(t=0; t<nTHREADS ; t++)
-        pthread_join( threads[t], NULL );
-    return;
-}
-
-
-
-/* ===================================================== */
-/* Compute a sub-block of the A'*y MAtRIX-VECTOR product */
-/* ===================================================== */
-void* COMMIT_At__block( void *ptr )
-{
-    int      id = (long)ptr;
-    int      offset;
-    double   x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, w, Y_tmp;
-    double   *x_Ptr0, *x_Ptr1, *x_Ptr2, *x_Ptr3, *x_Ptr4, *x_Ptr5, *x_Ptr6, *x_Ptr7, *x_Ptr8, *x_Ptr9, *x_Ptr10, *x_Ptr11, *x_Ptr12, *x_Ptr13, *x_Ptr14, *x_Ptr15, *x_Ptr16, *x_Ptr17, *x_Ptr18, *x_Ptr19;
-    double   *Yptr, *YptrEnd;
-    float    *SFP0ptr, *SFP1ptr, *SFP2ptr, *SFP3ptr, *SFP4ptr, *SFP5ptr, *SFP6ptr, *SFP7ptr, *SFP8ptr, *SFP9ptr, *SFP10ptr, *SFP11ptr, *SFP12ptr, *SFP13ptr, *SFP14ptr, *SFP15ptr, *SFP16ptr, *SFP17ptr, *SFP18ptr, *SFP19ptr;
-    uint32_t *t_v, *t_vEnd, *t_f;
-    uint16_t *t_o;
-    float    *t_l;
-    uint8_t  *t_t;
-
-#if nIC>=1
-    // intra-cellular compartments
-    t_v    = ICv;
-    t_vEnd = ICv + n;
-    t_o    = ICo;
-    t_l    = ICl;
-    t_f    = ICf;
-    t_t    = ICthreadsT;
-
-    while( t_v != t_vEnd )
-    {
-        // in this case, I need to walk throug because the segments are ordered in "voxel order"
-        if ( *t_t == id )
-        {
-            Yptr    = Y    + nS * (*t_v);
-            YptrEnd = Yptr + nS;
-            offset  = nS * (*t_o);
-
-            Y_tmp = *Yptr;
-            SFP0ptr   = wmrSFP0 + offset;
-            x0 = (*SFP0ptr++) * Y_tmp;
-            #if nIC>=2
-            SFP1ptr   = wmrSFP1 + offset;
-            x1 = (*SFP1ptr++) * Y_tmp;
-            #endif
-            #if nIC>=3
-            SFP2ptr   = wmrSFP2 + offset;
-            x2 = (*SFP2ptr++) * Y_tmp;
-            #endif
-            #if nIC>=4
-            SFP3ptr   = wmrSFP3 + offset;
-            x3 = (*SFP3ptr++) * Y_tmp;
-            #endif
-            #if nIC>=5
-            SFP4ptr   = wmrSFP4 + offset;
-            x4 = (*SFP4ptr++) * Y_tmp;
-            #endif
-            #if nIC>=6
-            SFP5ptr   = wmrSFP5 + offset;
-            x5 = (*SFP5ptr++) * Y_tmp;
-            #endif
-            #if nIC>=7
-            SFP6ptr   = wmrSFP6 + offset;
-            x6 = (*SFP6ptr++) * Y_tmp;
-            #endif
-            #if nIC>=8
-            SFP7ptr   = wmrSFP7 + offset;
-            x7 = (*SFP7ptr++) * Y_tmp;
-            #endif
-            #if nIC>=9
-            SFP8ptr   = wmrSFP8 + offset;
-            x8 = (*SFP8ptr++) * Y_tmp;
-            #endif
-            #if nIC>=10
-            SFP9ptr   = wmrSFP9 + offset;
-            x9 = (*SFP9ptr++) * Y_tmp;
-            #endif
-            #if nIC>=11
-            SFP10ptr   = wmrSFP10 + offset;
-            x10 = (*SFP10ptr++) * Y_tmp;
-            #endif
-            #if nIC>=12
-            SFP11ptr   = wmrSFP11 + offset;
-            x11 = (*SFP11ptr++) * Y_tmp;
-            #endif
-            #if nIC>=13
-            SFP12ptr   = wmrSFP12 + offset;
-            x12 = (*SFP12ptr++) * Y_tmp;
-            #endif
-            #if nIC>=14
-            SFP13ptr   = wmrSFP13 + offset;
-            x13 = (*SFP13ptr++) * Y_tmp;
-            #endif
-            #if nIC>=15
-            SFP14ptr   = wmrSFP14 + offset;
-            x14 = (*SFP14ptr++) * Y_tmp;
-            #endif
-            #if nIC>=16
-            SFP15ptr   = wmrSFP15 + offset;
-            x15 = (*SFP15ptr++) * Y_tmp;
-            #endif
-            #if nIC>=17
-            SFP16ptr   = wmrSFP16 + offset;
-            x16 = (*SFP16ptr++) * Y_tmp;
-            #endif
-            #if nIC>=18
-            SFP17ptr   = wmrSFP17 + offset;
-            x17 = (*SFP17ptr++) * Y_tmp;
-            #endif
-            #if nIC>=19
-            SFP18ptr   = wmrSFP18 + offset;
-            x18 = (*SFP18ptr++) * Y_tmp;
-            #endif
-            #if nIC>=20
-            SFP19ptr   = wmrSFP19 + offset;
-            x19 = (*SFP19ptr++) * Y_tmp;
-            #endif
-
-            while( ++Yptr != YptrEnd )
-            {
-                Y_tmp = *Yptr;
-                x0 += (*SFP0ptr++) * Y_tmp;
-                #if nIC>=2
-                x1 += (*SFP1ptr++) * Y_tmp;
-                #endif
-                #if nIC>=3
-                x2 += (*SFP2ptr++) * Y_tmp;
-                #endif
-                #if nIC>=4
-                x3 += (*SFP3ptr++) * Y_tmp;
-                #endif
-                #if nIC>=5
-                x4 += (*SFP4ptr++) * Y_tmp;
-                #endif
-                #if nIC>=6
-                x5 += (*SFP5ptr++) * Y_tmp;
-                #endif
-                #if nIC>=7
-                x6 += (*SFP6ptr++) * Y_tmp;
-                #endif
-                #if nIC>=8
-                x7 += (*SFP7ptr++) * Y_tmp;
-                #endif
-                #if nIC>=9
-                x8 += (*SFP8ptr++) * Y_tmp;
-                #endif
-                #if nIC>=10
-                x9 += (*SFP9ptr++) * Y_tmp;
-                #endif
-                #if nIC>=11
-                x10 += (*SFP10ptr++) * Y_tmp;
-                #endif
-                #if nIC>=12
-                x11 += (*SFP11ptr++) * Y_tmp;
-                #endif
-                #if nIC>=13
-                x12 += (*SFP12ptr++) * Y_tmp;
-                #endif
-                #if nIC>=14
-                x13 += (*SFP13ptr++) * Y_tmp;
-                #endif
-                #if nIC>=15
-                x14 += (*SFP14ptr++) * Y_tmp;
-                #endif
-                #if nIC>=16
-                x15 += (*SFP15ptr++) * Y_tmp;
-                #endif
-                #if nIC>=17
-                x16 += (*SFP16ptr++) * Y_tmp;
-                #endif
-                #if nIC>=18
-                x17 += (*SFP17ptr++) * Y_tmp;
-                #endif
-                #if nIC>=19
-                x18 += (*SFP18ptr++) * Y_tmp;
-                #endif
-                #if nIC>=20
-                x19 += (*SFP19ptr++) * Y_tmp;
-                #endif
-            }
-
-            w = (double)(*t_l);
-            x[*t_f]      += w * x0;
-            #if nIC>=2
-            x[*t_f+nF]   += w * x1;
-            #endif
-            #if nIC>=3
-            x[*t_f+2*nF] += w * x2;
-            #endif
-            #if nIC>=4
-            x[*t_f+3*nF] += w * x3;
-            #endif
-            #if nIC>=5
-            x[*t_f+4*nF] += w * x4;
-            #endif
-            #if nIC>=6
-            x[*t_f+5*nF] += w * x5;
-            #endif
-            #if nIC>=7
-            x[*t_f+6*nF] += w * x6;
-            #endif
-            #if nIC>=8
-            x[*t_f+7*nF] += w * x7;
-            #endif
-            #if nIC>=9
-            x[*t_f+8*nF] += w * x8;
-            #endif
-            #if nIC>=10
-            x[*t_f+9*nF] += w * x9;
-            #endif
-            #if nIC>=11
-            x[*t_f+10*nF] += w * x10;
-            #endif
-            #if nIC>=12
-            x[*t_f+11*nF] += w * x11;
-            #endif
-            #if nIC>=13
-            x[*t_f+12*nF] += w * x12;
-            #endif
-            #if nIC>=14
-            x[*t_f+13*nF] += w * x13;
-            #endif
-            #if nIC>=15
-            x[*t_f+14*nF] += w * x14;
-            #endif
-            #if nIC>=16
-            x[*t_f+15*nF] += w * x15;
-            #endif
-            #if nIC>=17
-            x[*t_f+16*nF] += w * x16;
-            #endif
-            #if nIC>=18
-            x[*t_f+17*nF] += w * x17;
-            #endif
-            #if nIC>=19
-            x[*t_f+18*nF] += w * x18;
-            #endif
-            #if nIC>=20
-            x[*t_f+19*nF] += w * x19;
-            #endif
-        }
-
-        t_f++;
-        t_v++;
-        t_o++;
-        t_l++;
-        t_t++;
-    }
-#endif
-
-#if nEC>=1
-    // extra-cellular compartments
-    t_v    = ECv + ECthreadsT[id];
-    t_vEnd = ECv + ECthreadsT[id+1];
-    t_o    = ECo + ECthreadsT[id];
-
-    x_Ptr0 = x + nIC*nF + ECthreadsT[id];
-    #if nEC>=2
-    x_Ptr1 = x_Ptr0 + nE;
-    #endif
-    #if nEC>=3
-    x_Ptr2 = x_Ptr1 + nE;
-    #endif
-    #if nEC>=4
-    x_Ptr3 = x_Ptr2 + nE;
-    #endif
-    #if nEC>=5
-    x_Ptr4 = x_Ptr3 + nE;
-    #endif
-    #if nEC>=6
-    x_Ptr5 = x_Ptr4 + nE;
-    #endif
-    #if nEC>=7
-    x_Ptr6 = x_Ptr5 + nE;
-    #endif
-    #if nEC>=8
-    x_Ptr7 = x_Ptr6 + nE;
-    #endif
-    #if nEC>=9
-    x_Ptr8 = x_Ptr7 + nE;
-    #endif
-    #if nEC>=10
-    x_Ptr9 = x_Ptr8 + nE;
-    #endif
-    #if nEC>=11
-    x_Ptr10 = x_Ptr9 + nE;
-    #endif
-    #if nEC>=12
-    x_Ptr11 = x_Ptr10 + nE;
-    #endif
-    #if nEC>=13
-    x_Ptr12 = x_Ptr11 + nE;
-    #endif
-    #if nEC>=14
-    x_Ptr13 = x_Ptr12 + nE;
-    #endif
-    #if nEC>=15
-    x_Ptr14 = x_Ptr13 + nE;
-    #endif
-    #if nEC>=16
-    x_Ptr15 = x_Ptr14 + nE;
-    #endif
-    #if nEC>=17
-    x_Ptr16 = x_Ptr15 + nE;
-    #endif
-    #if nEC>=18
-    x_Ptr17 = x_Ptr16 + nE;
-    #endif
-    #if nEC>=19
-    x_Ptr18 = x_Ptr17 + nE;
-    #endif
-    #if nEC>=20
-    x_Ptr19 = x_Ptr18 + nE;
-    #endif
-
-    while( t_v != t_vEnd )
-    {
-        Yptr    = Y    + nS * (*t_v++);
-        YptrEnd = Yptr + nS;
-        offset  = nS * (*t_o++);
-
-        Y_tmp = *Yptr;
-        SFP0ptr = wmhSFP0 + offset;
-        x0 = (*SFP0ptr++) * Y_tmp;
-        #if nEC>=2
-        SFP1ptr = wmhSFP1 + offset;
-        x1 = (*SFP1ptr++) * Y_tmp;
-        #endif
-        #if nEC>=3
-        SFP2ptr = wmhSFP2 + offset;
-        x2 = (*SFP2ptr++) * Y_tmp;
-        #endif
-        #if nEC>=4
-        SFP3ptr = wmhSFP3 + offset;
-        x3 = (*SFP3ptr++) * Y_tmp;
-        #endif
-        #if nEC>=5
-        SFP4ptr = wmhSFP4 + offset;
-        x4 = (*SFP4ptr++) * Y_tmp;
-        #endif
-        #if nEC>=6
-        SFP5ptr = wmhSFP5 + offset;
-        x5 = (*SFP5ptr++) * Y_tmp;
-        #endif
-        #if nEC>=7
-        SFP6ptr = wmhSFP6 + offset;
-        x6 = (*SFP6ptr++) * Y_tmp;
-        #endif
-        #if nEC>=8
-        SFP7ptr = wmhSFP7 + offset;
-        x7 = (*SFP7ptr++) * Y_tmp;
-        #endif
-        #if nEC>=9
-        SFP8ptr = wmhSFP8 + offset;
-        x8 = (*SFP8ptr++) * Y_tmp;
-        #endif
-        #if nEC>=10
-        SFP9ptr = wmhSFP9 + offset;
-        x9 = (*SFP9ptr++) * Y_tmp;
-        #endif
-        #if nEC>=11
-        SFP10ptr = wmhSFP10 + offset;
-        x10 = (*SFP10ptr++) * Y_tmp;
-        #endif
-        #if nEC>=12
-        SFP11ptr = wmhSFP11 + offset;
-        x11 = (*SFP11ptr++) * Y_tmp;
-        #endif
-        #if nEC>=13
-        SFP12ptr = wmhSFP12 + offset;
-        x12 = (*SFP12ptr++) * Y_tmp;
-        #endif
-        #if nEC>=14
-        SFP13ptr = wmhSFP13 + offset;
-        x13 = (*SFP13ptr++) * Y_tmp;
-        #endif
-        #if nEC>=15
-        SFP14ptr = wmhSFP14 + offset;
-        x14 = (*SFP14ptr++) * Y_tmp;
-        #endif
-        #if nEC>=16
-        SFP15ptr = wmhSFP15 + offset;
-        x15 = (*SFP15ptr++) * Y_tmp;
-        #endif
-        #if nEC>=17
-        SFP16ptr = wmhSFP16 + offset;
-        x16 = (*SFP16ptr++) * Y_tmp;
-        #endif
-        #if nEC>=18
-        SFP17ptr = wmhSFP17 + offset;
-        x17 = (*SFP17ptr++) * Y_tmp;
-        #endif
-        #if nEC>=19
-        SFP18ptr = wmhSFP18 + offset;
-        x18 = (*SFP18ptr++) * Y_tmp;
-        #endif
-        #if nEC>=20
-        SFP19ptr = wmhSFP19 + offset;
-        x19 = (*SFP19ptr++) * Y_tmp;
-        #endif
-
-        while( ++Yptr != YptrEnd )
-        {
-            Y_tmp = *Yptr;
-            x0 += (*SFP0ptr++) * Y_tmp;
-            #if nEC>=2
-            x1 += (*SFP1ptr++) * Y_tmp;
-            #endif
-            #if nEC>=3
-            x2 += (*SFP2ptr++) * Y_tmp;
-            #endif
-            #if nEC>=4
-            x3 += (*SFP3ptr++) * Y_tmp;
-            #endif
-            #if nEC>=5
-            x4 += (*SFP4ptr++) * Y_tmp;
-            #endif
-            #if nEC>=6
-            x5 += (*SFP5ptr++) * Y_tmp;
-            #endif
-            #if nEC>=7
-            x6 += (*SFP6ptr++) * Y_tmp;
-            #endif
-            #if nEC>=8
-            x7 += (*SFP7ptr++) * Y_tmp;
-            #endif
-            #if nEC>=9
-            x8 += (*SFP8ptr++) * Y_tmp;
-            #endif
-            #if nEC>=10
-            x9 += (*SFP9ptr++) * Y_tmp;
-            #endif
-            #if nEC>=11
-            x10 += (*SFP10ptr++) * Y_tmp;
-            #endif
-            #if nEC>=12
-            x11 += (*SFP11ptr++) * Y_tmp;
-            #endif
-            #if nEC>=13
-            x12 += (*SFP12ptr++) * Y_tmp;
-            #endif
-            #if nEC>=14
-            x13 += (*SFP13ptr++) * Y_tmp;
-            #endif
-            #if nEC>=15
-            x14 += (*SFP14ptr++) * Y_tmp;
-            #endif
-            #if nEC>=16
-            x15 += (*SFP15ptr++) * Y_tmp;
-            #endif
-            #if nEC>=17
-            x16 += (*SFP16ptr++) * Y_tmp;
-            #endif
-            #if nEC>=18
-            x17 += (*SFP17ptr++) * Y_tmp;
-            #endif
-            #if nEC>=19
-            x18 += (*SFP18ptr++) * Y_tmp;
-            #endif
-            #if nEC>=20
-            x19 += (*SFP19ptr++) * Y_tmp;
-            #endif
-        }
-        (*x_Ptr0++) += x0;
-        #if nEC>=2
-        (*x_Ptr1++) += x1;
-        #endif
-        #if nEC>=3
-        (*x_Ptr2++) += x2;
-        #endif
-        #if nEC>=4
-        (*x_Ptr3++) += x3;
-        #endif
-        #if nEC>=5
-        (*x_Ptr4++) += x4;
-        #endif
-        #if nEC>=6
-        (*x_Ptr5++) += x5;
-        #endif
-        #if nEC>=7
-        (*x_Ptr6++) += x6;
-        #endif
-        #if nEC>=8
-        (*x_Ptr7++) += x7;
-        #endif
-        #if nEC>=9
-        (*x_Ptr8++) += x8;
-        #endif
-        #if nEC>=10
-        (*x_Ptr9++) += x9;
-        #endif
-        #if nEC>=11
-        (*x_Ptr10++) += x10;
-        #endif
-        #if nEC>=12
-        (*x_Ptr11++) += x11;
-        #endif
-        #if nEC>=13
-        (*x_Ptr12++) += x12;
-        #endif
-        #if nEC>=14
-        (*x_Ptr13++) += x13;
-        #endif
-        #if nEC>=15
-        (*x_Ptr14++) += x14;
-        #endif
-        #if nEC>=16
-        (*x_Ptr15++) += x15;
-        #endif
-        #if nEC>=17
-        (*x_Ptr16++) += x16;
-        #endif
-        #if nEC>=18
-        (*x_Ptr17++) += x17;
-        #endif
-        #if nEC>=19
-        (*x_Ptr18++) += x18;
-        #endif
-        #if nEC>=20
-        (*x_Ptr19++) += x19;
-        #endif
-    }
-#endif
-
-#if nISO>=1
-    // isotropic compartments
-    t_v    = ISOv + ISOthreadsT[id];
-    t_vEnd = ISOv + ISOthreadsT[id+1];
-
-    x_Ptr0 = x + nIC*nF + nEC*nE + ISOthreadsT[id];
-    #if nISO>=2
-    x_Ptr1 = x_Ptr0 + nV;
-    #endif
-    #if nISO>=3
-    x_Ptr2 = x_Ptr1 + nV;
-    #endif
-    #if nISO>=4
-    x_Ptr3 = x_Ptr2 + nV;
-    #endif
-    #if nISO>=5
-    x_Ptr4 = x_Ptr3 + nV;
-    #endif
-    #if nISO>=6
-    x_Ptr5 = x_Ptr4 + nV;
-    #endif
-    #if nISO>=7
-    x_Ptr6 = x_Ptr5 + nV;
-    #endif
-    #if nISO>=8
-    x_Ptr7 = x_Ptr6 + nV;
-    #endif
-    #if nISO>=9
-    x_Ptr8 = x_Ptr7 + nV;
-    #endif
-    #if nISO>=10
-    x_Ptr9 = x_Ptr8 + nV;
-    #endif
-    #if nISO>=11
-    x_Ptr10 = x_Ptr9 + nV;
-    #endif
-    #if nISO>=12
-    x_Ptr11 = x_Ptr10 + nV;
-    #endif
-    #if nISO>=13
-    x_Ptr12 = x_Ptr11 + nV;
-    #endif
-    #if nISO>=14
-    x_Ptr13 = x_Ptr12 + nV;
-    #endif
-    #if nISO>=15
-    x_Ptr14 = x_Ptr13 + nV;
-    #endif
-    #if nISO>=16
-    x_Ptr15 = x_Ptr14 + nV;
-    #endif
-    #if nISO>=17
-    x_Ptr16 = x_Ptr15 + nV;
-    #endif
-    #if nISO>=18
-    x_Ptr17 = x_Ptr16 + nV;
-    #endif
-    #if nISO>=19
-    x_Ptr18 = x_Ptr17 + nV;
-    #endif
-    #if nISO>=20
-    x_Ptr19 = x_Ptr18 + nV;
-    #endif
-
-    while( t_v != t_vEnd )
-    {
-        Yptr    = Y    + nS * (*t_v++);
-        YptrEnd = Yptr + nS;
-
-        SFP0ptr = isoSFP0;
-        #if nISO>=2
-        SFP1ptr = isoSFP1;
-        #endif
-        #if nISO>=3
-        SFP2ptr = isoSFP2;
-        #endif
-        #if nISO>=4
-        SFP3ptr = isoSFP3;
-        #endif
-        #if nISO>=5
-        SFP4ptr = isoSFP4;
-        #endif
-        #if nISO>=6
-        SFP5ptr = isoSFP5;
-        #endif
-        #if nISO>=7
-        SFP6ptr = isoSFP6;
-        #endif
-        #if nISO>=8
-        SFP7ptr = isoSFP7;
-        #endif
-        #if nISO>=9
-        SFP8ptr = isoSFP8;
-        #endif
-        #if nISO>=10
-        SFP9ptr = isoSFP9;
-        #endif
-        #if nISO>=11
-        SFP10ptr = isoSFP10;
-        #endif
-        #if nISO>=12
-        SFP11ptr = isoSFP11;
-        #endif
-        #if nISO>=13
-        SFP12ptr = isoSFP12;
-        #endif
-        #if nISO>=14
-        SFP13ptr = isoSFP13;
-        #endif
-        #if nISO>=15
-        SFP14ptr = isoSFP14;
-        #endif
-        #if nISO>=16
-        SFP15ptr = isoSFP15;
-        #endif
-        #if nISO>=17
-        SFP16ptr = isoSFP16;
-        #endif
-        #if nISO>=18
-        SFP17ptr = isoSFP17;
-        #endif
-        #if nISO>=19
-        SFP18ptr = isoSFP18;
-        #endif
-        #if nISO>=20
-        SFP19ptr = isoSFP19;
-        #endif
-
-        Y_tmp = *Yptr;
-        x0 = (*SFP0ptr++) * Y_tmp;
-        #if nISO>=2
-        x1 = (*SFP1ptr++) * Y_tmp;
-        #endif
-        #if nISO>=3
-        x2 = (*SFP2ptr++) * Y_tmp;
-        #endif
-        #if nISO>=4
-        x3 = (*SFP3ptr++) * Y_tmp;
-        #endif
-        #if nISO>=5
-        x4 = (*SFP4ptr++) * Y_tmp;
-        #endif
-        #if nISO>=6
-        x5 = (*SFP5ptr++) * Y_tmp;
-        #endif
-        #if nISO>=7
-        x6 = (*SFP6ptr++) * Y_tmp;
-        #endif
-        #if nISO>=8
-        x7 = (*SFP7ptr++) * Y_tmp;
-        #endif
-        #if nISO>=9
-        x8 = (*SFP8ptr++) * Y_tmp;
-        #endif
-        #if nISO>=10
-        x9 = (*SFP9ptr++) * Y_tmp;
-        #endif
-        #if nISO>=11
-        x10 = (*SFP10ptr++) * Y_tmp;
-        #endif
-        #if nISO>=12
-        x11 = (*SFP11ptr++) * Y_tmp;
-        #endif
-        #if nISO>=13
-        x12 = (*SFP12ptr++) * Y_tmp;
-        #endif
-        #if nISO>=14
-        x13 = (*SFP13ptr++) * Y_tmp;
-        #endif
-        #if nISO>=15
-        x14 = (*SFP14ptr++) * Y_tmp;
-        #endif
-        #if nISO>=16
-        x15 = (*SFP15ptr++) * Y_tmp;
-        #endif
-        #if nISO>=17
-        x16 = (*SFP16ptr++) * Y_tmp;
-        #endif
-        #if nISO>=18
-        x17 = (*SFP17ptr++) * Y_tmp;
-        #endif
-        #if nISO>=19
-        x18 = (*SFP18ptr++) * Y_tmp;
-        #endif
-        #if nISO>=20
-        x19 = (*SFP19ptr++) * Y_tmp;
-        #endif
-
-        while( ++Yptr != YptrEnd )
-        {
-            Y_tmp = *Yptr;
-            x0  += (*SFP0ptr++) * Y_tmp;
-            #if nISO>=2
-            x1  += (*SFP1ptr++) * Y_tmp;
-            #endif
-            #if nISO>=3
-            x2  += (*SFP2ptr++) * Y_tmp;
-            #endif
-            #if nISO>=4
-            x3  += (*SFP3ptr++) * Y_tmp;
-            #endif
-            #if nISO>=5
-            x4  += (*SFP4ptr++) * Y_tmp;
-            #endif
-            #if nISO>=6
-            x5  += (*SFP5ptr++) * Y_tmp;
-            #endif
-            #if nISO>=7
-            x6  += (*SFP6ptr++) * Y_tmp;
-            #endif
-            #if nISO>=8
-            x7  += (*SFP7ptr++) * Y_tmp;
-            #endif
-            #if nISO>=9
-            x8  += (*SFP8ptr++) * Y_tmp;
-            #endif
-            #if nISO>=10
-            x9  += (*SFP9ptr++) * Y_tmp;
-            #endif
-            #if nISO>=11
-            x10  += (*SFP10ptr++) * Y_tmp;
-            #endif
-            #if nISO>=12
-            x11  += (*SFP11ptr++) * Y_tmp;
-            #endif
-            #if nISO>=13
-            x12  += (*SFP12ptr++) * Y_tmp;
-            #endif
-            #if nISO>=14
-            x13  += (*SFP13ptr++) * Y_tmp;
-            #endif
-            #if nISO>=15
-            x14  += (*SFP14ptr++) * Y_tmp;
-            #endif
-            #if nISO>=16
-            x15  += (*SFP15ptr++) * Y_tmp;
-            #endif
-            #if nISO>=17
-            x16  += (*SFP16ptr++) * Y_tmp;
-            #endif
-            #if nISO>=18
-            x17  += (*SFP17ptr++) * Y_tmp;
-            #endif
-            #if nISO>=19
-            x18  += (*SFP18ptr++) * Y_tmp;
-            #endif
-            #if nISO>=20
-            x19  += (*SFP19ptr++) * Y_tmp;
-            #endif
-        }
-
-        (*x_Ptr0++) += x0;
-        #if nISO>=2
-        (*x_Ptr1++) += x1;
-        #endif
-        #if nISO>=3
-        (*x_Ptr2++) += x2;
-        #endif
-        #if nISO>=4
-        (*x_Ptr3++) += x3;
-        #endif
-        #if nISO>=5
-        (*x_Ptr4++) += x4;
-        #endif
-        #if nISO>=6
-        (*x_Ptr5++) += x5;
-        #endif
-        #if nISO>=7
-        (*x_Ptr6++) += x6;
-        #endif
-        #if nISO>=8
-        (*x_Ptr7++) += x7;
-        #endif
-        #if nISO>=9
-        (*x_Ptr8++) += x8;
-        #endif
-        #if nISO>=10
-        (*x_Ptr9++) += x9;
-        #endif
-        #if nISO>=11
-        (*x_Ptr10++) += x10;
-        #endif
-        #if nISO>=12
-        (*x_Ptr11++) += x11;
-        #endif
-        #if nISO>=13
-        (*x_Ptr12++) += x12;
-        #endif
-        #if nISO>=14
-        (*x_Ptr13++) += x13;
-        #endif
-        #if nISO>=15
-        (*x_Ptr14++) += x14;
-        #endif
-        #if nISO>=16
-        (*x_Ptr15++) += x15;
-        #endif
-        #if nISO>=17
-        (*x_Ptr16++) += x16;
-        #endif
-        #if nISO>=18
-        (*x_Ptr17++) += x17;
-        #endif
-        #if nISO>=19
-        (*x_Ptr18++) += x18;
-        #endif
-        #if nISO>=20
-        (*x_Ptr19++) += x19;
-        #endif
-    }
-#endif
-
-    pthread_exit( 0 );
-}
-
-
-// =========================
-// Function called by CYTHON
-// =========================
-void COMMIT_At(
-    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
-    double *_vIN, double *_vOUT,
-    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
-    uint32_t *_ECv, uint16_t *_ECo,
-    uint32_t *_ISOv,
-    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
-    uint8_t* _ICthreadsT, uint32_t* _ECthreadsT, uint32_t* _ISOthreadsT
-)
-{
-    nF = _nF;
-    n  = _n;
-    nE = _nE;
-    nV = _nV;
-    nS = _nS;
-    ndirs = _ndirs;
-
-    x = _vOUT;
-    Y = _vIN;
-
-    ICf  = _ICf;
-    ICv  = _ICv;
-    ICo  = _ICo;
-    ICl  = _ICl;
-    ECv  = _ECv;
-    ECo  = _ECo;
-    ISOv = _ISOv;
-
-    #if nIC>=1
-    wmrSFP0 = _wmrSFP;
-    #if nIC>=2
-    wmrSFP1 = wmrSFP0 + _ndirs*_nS;
-    #if nIC>=3
-    wmrSFP2 = wmrSFP1 + _ndirs*_nS;
-    #if nIC>=4
-    wmrSFP3 = wmrSFP2 + _ndirs*_nS;
-    #if nIC>=5
-    wmrSFP4 = wmrSFP3 + _ndirs*_nS;
-    #if nIC>=6
-    wmrSFP5 = wmrSFP4 + _ndirs*_nS;
-    #if nIC>=7
-    wmrSFP6 = wmrSFP5 + _ndirs*_nS;
-    #if nIC>=8
-    wmrSFP7 = wmrSFP6 + _ndirs*_nS;
-    #if nIC>=9
-    wmrSFP8 = wmrSFP7 + _ndirs*_nS;
-    #if nIC>=10
-    wmrSFP9 = wmrSFP8 + _ndirs*_nS;
-    #if nIC>=11
-    wmrSFP10 = wmrSFP9 + _ndirs*_nS;
-    #if nIC>=12
-    wmrSFP11 = wmrSFP10 + _ndirs*_nS;
-    #if nIC>=13
-    wmrSFP12 = wmrSFP11 + _ndirs*_nS;
-    #if nIC>=14
-    wmrSFP13 = wmrSFP12 + _ndirs*_nS;
-    #if nIC>=15
-    wmrSFP14 = wmrSFP13 + _ndirs*_nS;
-    #if nIC>=16
-    wmrSFP15 = wmrSFP14 + _ndirs*_nS;
-    #if nIC>=17
-    wmrSFP16 = wmrSFP15 + _ndirs*_nS;
-    #if nIC>=18
-    wmrSFP17 = wmrSFP16 + _ndirs*_nS;
-    #if nIC>=19
-    wmrSFP18 = wmrSFP17 + _ndirs*_nS;
-    #if nIC>=20
-    wmrSFP19 = wmrSFP18 + _ndirs*_nS;
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #if nEC>=1
-    wmhSFP0 = _wmhSFP;
-    #if nEC>=2
-    wmhSFP1 = wmhSFP0 + _ndirs*_nS;
-    #if nEC>=3
-    wmhSFP2 = wmhSFP1 + _ndirs*_nS;
-    #if nEC>=4
-    wmhSFP3 = wmhSFP2 + _ndirs*_nS;
-    #if nEC>=5
-    wmhSFP4 = wmhSFP3 + _ndirs*_nS;
-    #if nEC>=6
-    wmhSFP5 = wmhSFP4 + _ndirs*_nS;
-    #if nEC>=7
-    wmhSFP6 = wmhSFP5 + _ndirs*_nS;
-    #if nEC>=8
-    wmhSFP7 = wmhSFP6 + _ndirs*_nS;
-    #if nEC>=9
-    wmhSFP8 = wmhSFP7 + _ndirs*_nS;
-    #if nEC>=10
-    wmhSFP9 = wmhSFP8 + _ndirs*_nS;
-    #if nEC>=11
-    wmhSFP10 = wmhSFP9 + _ndirs*_nS;
-    #if nEC>=12
-    wmhSFP11 = wmhSFP10 + _ndirs*_nS;
-    #if nEC>=13
-    wmhSFP12 = wmhSFP11 + _ndirs*_nS;
-    #if nEC>=14
-    wmhSFP13 = wmhSFP12 + _ndirs*_nS;
-    #if nEC>=15
-    wmhSFP14 = wmhSFP13 + _ndirs*_nS;
-    #if nEC>=16
-    wmhSFP15 = wmhSFP14 + _ndirs*_nS;
-    #if nEC>=17
-    wmhSFP16 = wmhSFP15 + _ndirs*_nS;
-    #if nEC>=18
-    wmhSFP17 = wmhSFP16 + _ndirs*_nS;
-    #if nEC>=19
-    wmhSFP18 = wmhSFP17 + _ndirs*_nS;
-    #if nEC>=20
-    wmhSFP19 = wmhSFP18 + _ndirs*_nS;
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #if nISO>=1
-    isoSFP0 = _isoSFP;
-    #if nISO>=2
-    isoSFP1 = isoSFP0 + _nS;
-    #if nISO>=3
-    isoSFP2 = isoSFP1 + _nS;
-    #if nISO>=4
-    isoSFP3 = isoSFP2 + _nS;
-    #if nISO>=5
-    isoSFP4 = isoSFP3 + _nS;
-    #if nISO>=6
-    isoSFP5 = isoSFP4 + _nS;
-    #if nISO>=7
-    isoSFP6 = isoSFP5 + _nS;
-    #if nISO>=8
-    isoSFP7 = isoSFP6 + _nS;
-    #if nISO>=9
-    isoSFP8 = isoSFP7 + _nS;
-    #if nISO>=10
-    isoSFP9 = isoSFP8 + _nS;
-    #if nISO>=11
-    isoSFP10 = isoSFP9 + _nS;
-    #if nISO>=12
-    isoSFP11 = isoSFP10 + _nS;
-    #if nISO>=13
-    isoSFP12 = isoSFP11 + _nS;
-    #if nISO>=14
-    isoSFP13 = isoSFP12 + _nS;
-    #if nISO>=15
-    isoSFP14 = isoSFP13 + _nS;
-    #if nISO>=16
-    isoSFP15 = isoSFP14 + _nS;
-    #if nISO>=17
-    isoSFP16 = isoSFP15 + _nS;
-    #if nISO>=18
-    isoSFP17 = isoSFP16 + _nS;
-    #if nISO>=19
-    isoSFP18 = isoSFP17 + _nS;
-    #if nISO>=20
-    isoSFP19 = isoSFP18 + _nS;
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-
-    ICthreadsT  = _ICthreadsT;
-    ECthreadsT  = _ECthreadsT;
-    ISOthreadsT = _ISOthreadsT;
-
-    // Run SEPARATE THREADS to perform the multiplication
-    pthread_t threads[nTHREADS];
-    int t;
-    for(t=0; t<nTHREADS ; t++)
-        pthread_create( &threads[t], NULL, COMMIT_At__block, (void *) (long int)t );
-    for(t=0; t<nTHREADS ; t++)
-        pthread_join( threads[t], NULL );
-    return;
-}
-
-////////////////////////// L_1 //////////////////////////
-void COMMIT_L1(
-    int _nF, int _nIC, int _nV, int _nS, double _lambda,
-    double *_vIN, double *_vOUT)
-{
-    for(int r = 0; r < _nIC-1; r++){
-        for(int f = 0; f < _nF; f++){
-            _vOUT[_nV*_nS + r] += _lambda*( -_vIN[r*_nF + f] + _vIN[(r+1)*_nF + f] );
-        }
-    }
-}
-
-void COMMIT_L1t(
-    int _nF, int _nIC, int _nV, int _nS, double _lambda,
-    double *_vIN, double *_vOUT)
-{
-    for(int f = 0; f < _nF; f++){
-        _vOUT[f] += _lambda*( -_vIN[_nV*_nS] );
-
-        for(int r = 1; r < _nIC-1; r++)
-            _vOUT[_nF*r + f] += _lambda*( _vIN[_nV*_nS + r-1] - _vIN[_nV*_nS + r] );
-
-        _vOUT[_nF*(_nIC-1) + f] += _lambda*( _vIN[_nV*_nS + _nIC-2] );
-    }
-}
-
-////////////////////////// L_2 //////////////////////////
-void COMMIT_L2(
-    int _nF, int _nIC, int _nV, int _nS, double _lambda,
-    double *_vIN, double *_vOUT)
-{
-    for(int r = 0; r < _nIC-2; r++){
-        for(int f = 0; f < _nF; f++){
-            _vOUT[_nV*_nS + r] += _lambda*( _vIN[r*_nF + f] -2*_vIN[(r+1)*_nF + f] + _vIN[(r+2)*_nF + f] );
-        }
-    }
-}
-
-void COMMIT_L2t(
-    int _nF, int _nIC, int _nV, int _nS, double _lambda,
-    double *_vIN, double *_vOUT)
-{
-    for(int f = 0; f < _nF; f++){
-        _vOUT[f] += _lambda*( _vIN[_nV*_nS] );
-
-        _vOUT[_nF + f] += _lambda*( -2*_vIN[_nV*_nS] + _vIN[_nV*_nS + 1] );
-
-        for (int r = 2; r < _nIC-2; r++){
-            _vOUT[r*_nF + f] += _lambda*( _vIN[_nV*_nS + (r-2)] -2*_vIN[_nV*_nS + (r-1)] + _vIN[_nV*_nS + r] );
-        }
-        
-        _vOUT[(_nIC-2)*_nF + f] += _lambda*( _vIN[_nV*_nS + _nIC-4] -2*_vIN[_nV*_nS + _nIC-3] );
-
-        _vOUT[(_nIC-1)*_nF + f] += _lambda*( _vIN[_nV*_nS + (_nIC-3)] );
-    }
-}
-
-////////////////////////// L_1^z //////////////////////////
-void COMMIT_L1z(
-    int _nF, int _nIC, int _nV, int _nS, double _lambda,
-    double *_vIN, double *_vOUT)
-{
-    for(int f = 0; f < _nF; f++){
-        _vOUT[_nV*_nS] += _lambda*( _vIN[f] );
-
-        for(int r = 1; r < _nIC; r++){
-            _vOUT[_nV*_nS + r] += _lambda*( -_vIN[(r-1)*_nF + f] + _vIN[r*_nF + f] );
-        }
-
-        _vOUT[_nV*_nS + _nIC] += _lambda*( -_vIN[(_nIC-1)*_nF + f] );
-    }
-}
-
-void COMMIT_L1zt(
-        int _nF, int _nIC, int _nV, int _nS, double _lambda,
-    double *_vIN, double *_vOUT)
-{
-    for(int f = 0; f < _nF; f++){
-        for(int r = 0; r < _nIC; r++){
-            _vOUT[r*_nF + f] += _lambda*( _vIN[_nV*_nS + r] - _vIN[_nV*_nS + r + 1]);
-        }
-    }
-}
-
-////////////////////////// L_2^z //////////////////////////
-void COMMIT_L2z(
-    int _nF, int _nIC, int _nV, int _nS, double _lambda,
-    double *_vIN, double *_vOUT)
-{
-    for(int f = 0; f < _nF; f++){
-
-        _vOUT[_nV*_nS] += _lambda*( -2*_vIN[f] + x[_nF + f] );
-
-        for(int r = 1; r < _nIC-1; r++){
-            _vOUT[_nV*_nS + r] += _lambda*( _vIN[(r-1)*_nF + f] -2*_vIN[r*_nF + f] + _vIN[(r+1)*_nF + f] );
-        }
-
-        _vOUT[_nV*_nS + _nIC - 1] += _lambda*( _vIN[(_nIC-2)*_nF + f] - 2*_vIN[(_nIC-1)*_nF + f] );
-    }
-}
-
-void COMMIT_L2zt(
-    int _nF, int _nIC, int _nV, int _nS, double _lambda,
-    double *_vIN, double *_vOUT)
-{
-    for(int f = 0; f < _nF; f++){
-        _vOUT[f] += _lambda*( -2*_vIN[_nV*_nS] + _vIN[_nV*_nS + 1] );
-
-        for (int r = 0; r < _nIC-1; r++){
-            _vOUT[r*_nF + f] += _lambda*( _vIN[_nV*_nS + (r-1)] - 2*_vIN[_nV*_nS + r] + _vIN[_nV*_nS + (r+1)] );
-        }
-        
-        _vOUT[(_nIC-1)*_nF + f] += _lambda*( _vIN[_nV*_nS + (_nIC-2)] - 2*_vIN[_nV*_nS + (_nIC-1)] );
-    }
-}
+#include <pthread.h>
+#include <stdint.h> // uint32_t etc
+
+// number of THREADS
+#ifdef nTHREADS
+    #if (nTHREADS<1 || nTHREADS>255)
+    #error "nTHREADS" must be in the range 1..255
+    #endif
+#else
+    #error "nTHREADS" parameter must be passed to the compiler as "-DnTHREADS=<value>"
+#endif
+
+
+/* global variables */
+int         nF, n, nE, nV, nS, ndirs;
+double      *x, *Y;
+uint32_t    *ICthreads, *ECthreads, *ISOthreads;
+uint8_t     *ICthreadsT;
+uint32_t    *ECthreadsT, *ISOthreadsT;
+uint32_t    *ICf, *ICv, *ECv, *ISOv;
+uint16_t    *ICo, *ECo;
+float       *ICl;
+float       *wmrSFP0, *wmrSFP1, *wmrSFP2, *wmrSFP3, *wmrSFP4, *wmrSFP5, *wmrSFP6, *wmrSFP7, *wmrSFP8, *wmrSFP9, *wmrSFP10, *wmrSFP11, *wmrSFP12, *wmrSFP13, *wmrSFP14, *wmrSFP15, *wmrSFP16, *wmrSFP17, *wmrSFP18, *wmrSFP19;
+float       *wmhSFP0, *wmhSFP1, *wmhSFP2, *wmhSFP3, *wmhSFP4, *wmhSFP5, *wmhSFP6, *wmhSFP7, *wmhSFP8, *wmhSFP9, *wmhSFP10, *wmhSFP11, *wmhSFP12, *wmhSFP13, *wmhSFP14, *wmhSFP15, *wmhSFP16, *wmhSFP17, *wmhSFP18, *wmhSFP19;
+float       *isoSFP0, *isoSFP1, *isoSFP2, *isoSFP3, *isoSFP4, *isoSFP5, *isoSFP6, *isoSFP7, *isoSFP8, *isoSFP9, *isoSFP10, *isoSFP11, *isoSFP12, *isoSFP13, *isoSFP14, *isoSFP15, *isoSFP16, *isoSFP17, *isoSFP18, *isoSFP19;
+
+
+
+// ====================================================
+// Compute a sub-block of the A*x MAtRIX-VECTOR product
+// ====================================================
+void* COMMIT_A__block( void *ptr )
+{
+    int      id = (long)ptr;
+    int      offset;
+    double   x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, w;
+    double   *x_Ptr0, *x_Ptr1, *x_Ptr2, *x_Ptr3, *x_Ptr4, *x_Ptr5, *x_Ptr6, *x_Ptr7, *x_Ptr8, *x_Ptr9, *x_Ptr10, *x_Ptr11, *x_Ptr12, *x_Ptr13, *x_Ptr14, *x_Ptr15, *x_Ptr16, *x_Ptr17, *x_Ptr18, *x_Ptr19;
+    double   *Yptr, *YptrEnd;
+    float    *SFP0ptr, *SFP1ptr, *SFP2ptr, *SFP3ptr, *SFP4ptr, *SFP5ptr, *SFP6ptr, *SFP7ptr, *SFP8ptr, *SFP9ptr, *SFP10ptr, *SFP11ptr, *SFP12ptr, *SFP13ptr, *SFP14ptr, *SFP15ptr, *SFP16ptr, *SFP17ptr, *SFP18ptr, *SFP19ptr;
+    uint32_t *t_v, *t_vEnd, *t_f;
+    uint16_t *t_o;
+    float    *t_l;
+
+#if nIC>=1
+    // intra-cellular compartments
+    t_v    = ICv + ICthreads[id];
+    t_vEnd = ICv + ICthreads[id+1];
+    t_o    = ICo + ICthreads[id];
+    t_l    = ICl + ICthreads[id];
+    t_f    = ICf + ICthreads[id];
+
+    while( t_v != t_vEnd )
+    {
+        x_Ptr0 = x + *t_f;
+        x0 = *x_Ptr0;
+        #if nIC>=2
+        x_Ptr1 = x_Ptr0 + nF;
+        x1 = *x_Ptr1;
+        #endif
+        #if nIC>=3
+        x_Ptr2 = x_Ptr1 + nF;
+        x2 = *x_Ptr2;
+        #endif
+        #if nIC>=4
+        x_Ptr3 = x_Ptr2 + nF;
+        x3 = *x_Ptr3;
+        #endif
+        #if nIC>=5
+        x_Ptr4 = x_Ptr3 + nF;
+        x4 = *x_Ptr4;
+        #endif
+        #if nIC>=6
+        x_Ptr5 = x_Ptr4 + nF;
+        x5 = *x_Ptr5;
+        #endif
+        #if nIC>=7
+        x_Ptr6 = x_Ptr5 + nF;
+        x6 = *x_Ptr6;
+        #endif
+        #if nIC>=8
+        x_Ptr7 = x_Ptr6 + nF;
+        x7 = *x_Ptr7;
+        #endif
+        #if nIC>=9
+        x_Ptr8 = x_Ptr7 + nF;
+        x8 = *x_Ptr8;
+        #endif
+        #if nIC>=10
+        x_Ptr9 = x_Ptr8 + nF;
+        x9 = *x_Ptr9;
+        #endif
+        #if nIC>=11
+        x_Ptr10 = x_Ptr9 + nF;
+        x10 = *x_Ptr10;
+        #endif
+        #if nIC>=12
+        x_Ptr11 = x_Ptr10 + nF;
+        x11 = *x_Ptr11;
+        #endif
+        #if nIC>=13
+        x_Ptr12 = x_Ptr11 + nF;
+        x12 = *x_Ptr12;
+        #endif
+        #if nIC>=14
+        x_Ptr13 = x_Ptr12 + nF;
+        x13 = *x_Ptr13;
+        #endif
+        #if nIC>=15
+        x_Ptr14 = x_Ptr13 + nF;
+        x14 = *x_Ptr14;
+        #endif
+        #if nIC>=16
+        x_Ptr15 = x_Ptr14 + nF;
+        x15 = *x_Ptr15;
+        #endif
+        #if nIC>=17
+        x_Ptr16 = x_Ptr15 + nF;
+        x16 = *x_Ptr16;
+        #endif
+        #if nIC>=18
+        x_Ptr17 = x_Ptr16 + nF;
+        x17 = *x_Ptr17;
+        #endif
+        #if nIC>=19
+        x_Ptr18 = x_Ptr17 + nF;
+        x18 = *x_Ptr18;
+        #endif
+        #if nIC>=20
+        x_Ptr19 = x_Ptr18 + nF;
+        x19 = *x_Ptr19;
+        #endif
+
+        if ( x0 != 0
+        #if nIC>=2
+            || x1 != 0
+        #endif
+        #if nIC>=3
+            || x2 != 0
+        #endif
+        #if nIC>=4
+            || x3 != 0
+        #endif
+        #if nIC>=5
+            || x4 != 0
+        #endif
+        #if nIC>=6
+            || x5 != 0
+        #endif
+        #if nIC>=7
+            || x6 != 0
+        #endif
+        #if nIC>=8
+            || x7 != 0
+        #endif
+        #if nIC>=9
+            || x8 != 0
+        #endif
+        #if nIC>=10
+            || x9 != 0
+        #endif
+        #if nIC>=11
+            || x10 != 0
+        #endif
+        #if nIC>=12
+            || x11 != 0
+        #endif
+        #if nIC>=13
+            || x12 != 0
+        #endif
+        #if nIC>=14
+            || x13 != 0
+        #endif
+        #if nIC>=15
+            || x14 != 0
+        #endif
+        #if nIC>=16
+            || x15 != 0
+        #endif
+        #if nIC>=17
+            || x16 != 0
+        #endif
+        #if nIC>=18
+            || x17 != 0
+        #endif
+        #if nIC>=19
+            || x18 != 0
+        #endif
+        #if nIC>=20
+            || x19 != 0
+        #endif
+        )
+        {
+            Yptr    = Y    + nS * (*t_v);
+            YptrEnd = Yptr + nS;
+            w       = (double)(*t_l);
+            offset  = nS * (*t_o);
+            SFP0ptr = wmrSFP0 + offset;
+            #if nIC>=2
+            SFP1ptr = wmrSFP1 + offset;
+            #endif
+            #if nIC>=3
+            SFP2ptr = wmrSFP2 + offset;
+            #endif
+            #if nIC>=4
+            SFP3ptr = wmrSFP3 + offset;
+            #endif
+            #if nIC>=5
+            SFP4ptr = wmrSFP4 + offset;
+            #endif
+            #if nIC>=6
+            SFP5ptr = wmrSFP5 + offset;
+            #endif
+            #if nIC>=7
+            SFP6ptr = wmrSFP6 + offset;
+            #endif
+            #if nIC>=8
+            SFP7ptr = wmrSFP7 + offset;
+            #endif
+            #if nIC>=9
+            SFP8ptr = wmrSFP8 + offset;
+            #endif
+            #if nIC>=10
+            SFP9ptr = wmrSFP9 + offset;
+            #endif
+            #if nIC>=11
+            SFP10ptr = wmrSFP10 + offset;
+            #endif
+            #if nIC>=12
+            SFP11ptr = wmrSFP11 + offset;
+            #endif
+            #if nIC>=13
+            SFP12ptr = wmrSFP12 + offset;
+            #endif
+            #if nIC>=14
+            SFP13ptr = wmrSFP13 + offset;
+            #endif
+            #if nIC>=15
+            SFP14ptr = wmrSFP14 + offset;
+            #endif
+            #if nIC>=16
+            SFP15ptr = wmrSFP15 + offset;
+            #endif
+            #if nIC>=17
+            SFP16ptr = wmrSFP16 + offset;
+            #endif
+            #if nIC>=18
+            SFP17ptr = wmrSFP17 + offset;
+            #endif
+            #if nIC>=19
+            SFP18ptr = wmrSFP18 + offset;
+            #endif
+            #if nIC>=20
+            SFP19ptr = wmrSFP19 + offset;
+            #endif
+
+            while( Yptr != YptrEnd )
+                (*Yptr++) += w * (
+                          x0 * (*SFP0ptr++)
+                        #if nIC>=2
+                        + x1 * (*SFP1ptr++)
+                        #endif
+                        #if nIC>=3
+                        + x2 * (*SFP2ptr++)
+                        #endif
+                        #if nIC>=4
+                        + x3 * (*SFP3ptr++)
+                        #endif
+                        #if nIC>=5
+                        + x4 * (*SFP4ptr++)
+                        #endif
+                        #if nIC>=6
+                        + x5 * (*SFP5ptr++)
+                        #endif
+                        #if nIC>=7
+                        + x6 * (*SFP6ptr++)
+                        #endif
+                        #if nIC>=8
+                        + x7 * (*SFP7ptr++)
+                        #endif
+                        #if nIC>=9
+                        + x8 * (*SFP8ptr++)
+                        #endif
+                        #if nIC>=10
+                        + x9 * (*SFP9ptr++)
+                        #endif
+                        #if nIC>=11
+                        + x10 * (*SFP10ptr++)
+                        #endif
+                        #if nIC>=12
+                        + x11 * (*SFP11ptr++)
+                        #endif
+                        #if nIC>=13
+                        + x12 * (*SFP12ptr++)
+                        #endif
+                        #if nIC>=14
+                        + x13 * (*SFP13ptr++)
+                        #endif
+                        #if nIC>=15
+                        + x14 * (*SFP14ptr++)
+                        #endif
+                        #if nIC>=16
+                        + x15 * (*SFP15ptr++)
+                        #endif
+                        #if nIC>=17
+                        + x16 * (*SFP16ptr++)
+                        #endif
+                        #if nIC>=18
+                        + x17 * (*SFP17ptr++)
+                        #endif
+                        #if nIC>=19
+                        + x18 * (*SFP18ptr++)
+                        #endif
+                        #if nIC>=20
+                        + x19 * (*SFP19ptr++)
+                        #endif
+                );
+        }
+
+        t_f++;
+        t_v++;
+        t_o++;
+        t_l++;
+    }
+#endif
+
+#if nEC>=1
+    // extra-cellular compartments
+    t_v    = ECv + ECthreads[id];
+    t_vEnd = ECv + ECthreads[id+1];
+    t_o    = ECo + ECthreads[id];
+
+    x_Ptr0 = x + nIC*nF + ECthreads[id];
+    #if nEC>=2
+    x_Ptr1 = x_Ptr0 + nE;
+    #endif
+    #if nEC>=3
+    x_Ptr2 = x_Ptr1 + nE;
+    #endif
+    #if nEC>=4
+    x_Ptr3 = x_Ptr2 + nE;
+    #endif
+    #if nEC>=5
+    x_Ptr4 = x_Ptr3 + nE;
+    #endif
+    #if nEC>=6
+    x_Ptr5 = x_Ptr4 + nE;
+    #endif
+    #if nEC>=7
+    x_Ptr6 = x_Ptr5 + nE;
+    #endif
+    #if nEC>=8
+    x_Ptr7 = x_Ptr6 + nE;
+    #endif
+    #if nEC>=9
+    x_Ptr8 = x_Ptr7 + nE;
+    #endif
+    #if nEC>=10
+    x_Ptr9 = x_Ptr8 + nE;
+    #endif
+    #if nEC>=11
+    x_Ptr10 = x_Ptr9 + nE;
+    #endif
+    #if nEC>=12
+    x_Ptr11 = x_Ptr10 + nE;
+    #endif
+    #if nEC>=13
+    x_Ptr12 = x_Ptr11 + nE;
+    #endif
+    #if nEC>=14
+    x_Ptr13 = x_Ptr12 + nE;
+    #endif
+    #if nEC>=15
+    x_Ptr14 = x_Ptr13 + nE;
+    #endif
+    #if nEC>=16
+    x_Ptr15 = x_Ptr14 + nE;
+    #endif
+    #if nEC>=17
+    x_Ptr16 = x_Ptr15 + nE;
+    #endif
+    #if nEC>=18
+    x_Ptr17 = x_Ptr16 + nE;
+    #endif
+    #if nEC>=19
+    x_Ptr18 = x_Ptr17 + nE;
+    #endif
+    #if nEC>=20
+    x_Ptr19 = x_Ptr18 + nE;
+    #endif
+
+    while( t_v != t_vEnd )
+    {
+        x0 = *x_Ptr0++;
+        #if nEC>=2
+        x1 = *x_Ptr1++;
+        #endif
+        #if nEC>=3
+        x2 = *x_Ptr2++;
+        #endif
+        #if nEC>=4
+        x3 = *x_Ptr3++;
+        #endif
+        #if nEC>=5
+        x4 = *x_Ptr4++;
+        #endif
+        #if nEC>=6
+        x5 = *x_Ptr5++;
+        #endif
+        #if nEC>=7
+        x6 = *x_Ptr6++;
+        #endif
+        #if nEC>=8
+        x7 = *x_Ptr7++;
+        #endif
+        #if nEC>=9
+        x8 = *x_Ptr8++;
+        #endif
+        #if nEC>=10
+        x9 = *x_Ptr9++;
+        #endif
+        #if nEC>=11
+        x10 = *x_Ptr10++;
+        #endif
+        #if nEC>=12
+        x11 = *x_Ptr11++;
+        #endif
+        #if nEC>=13
+        x12 = *x_Ptr12++;
+        #endif
+        #if nEC>=14
+        x13 = *x_Ptr13++;
+        #endif
+        #if nEC>=15
+        x14 = *x_Ptr14++;
+        #endif
+        #if nEC>=16
+        x15 = *x_Ptr15++;
+        #endif
+        #if nEC>=17
+        x16 = *x_Ptr16++;
+        #endif
+        #if nEC>=18
+        x17 = *x_Ptr17++;
+        #endif
+        #if nEC>=19
+        x18 = *x_Ptr18++;
+        #endif
+        #if nEC>=20
+        x19 = *x_Ptr19++;
+        #endif
+        if (
+               x0 != 0
+            #if nEC>=2
+            || x1 != 0
+            #endif
+            #if nEC>=3
+            || x2 != 0
+            #endif
+            #if nEC>=4
+            || x3 != 0
+            #endif
+            #if nEC>=5
+            || x4 != 0
+            #endif
+            #if nEC>=6
+            || x5 != 0
+            #endif
+            #if nEC>=7
+            || x6 != 0
+            #endif
+            #if nEC>=8
+            || x7 != 0
+            #endif
+            #if nEC>=9
+            || x8 != 0
+            #endif
+            #if nEC>=10
+            || x9 != 0
+            #endif
+            #if nEC>=11
+            || x10 != 0
+            #endif
+            #if nEC>=12
+            || x11 != 0
+            #endif
+            #if nEC>=13
+            || x12 != 0
+            #endif
+            #if nEC>=14
+            || x13 != 0
+            #endif
+            #if nEC>=15
+            || x14 != 0
+            #endif
+            #if nEC>=16
+            || x15 != 0
+            #endif
+            #if nEC>=17
+            || x16 != 0
+            #endif
+            #if nEC>=18
+            || x17 != 0
+            #endif
+            #if nEC>=19
+            || x18 != 0
+            #endif
+            #if nEC>=20
+            || x19 != 0
+            #endif
+          )
+        {
+            Yptr    = Y    + nS * (*t_v);
+            YptrEnd = Yptr + nS;
+            offset  = nS * (*t_o);
+            SFP0ptr = wmhSFP0 + offset;
+            #if nEC>=2
+            SFP1ptr = wmhSFP1 + offset;
+            #endif
+            #if nEC>=3
+            SFP2ptr = wmhSFP2 + offset;
+            #endif
+            #if nEC>=4
+            SFP3ptr = wmhSFP3 + offset;
+            #endif
+            #if nEC>=5
+            SFP4ptr = wmhSFP4 + offset;
+            #endif
+            #if nEC>=6
+            SFP5ptr = wmhSFP5 + offset;
+            #endif
+            #if nEC>=7
+            SFP6ptr = wmhSFP6 + offset;
+            #endif
+            #if nEC>=8
+            SFP7ptr = wmhSFP7 + offset;
+            #endif
+            #if nEC>=9
+            SFP8ptr = wmhSFP8 + offset;
+            #endif
+            #if nEC>=10
+            SFP9ptr = wmhSFP9 + offset;
+            #endif
+            #if nEC>=11
+            SFP10ptr = wmhSFP10 + offset;
+            #endif
+            #if nEC>=12
+            SFP11ptr = wmhSFP11 + offset;
+            #endif
+            #if nEC>=13
+            SFP12ptr = wmhSFP12 + offset;
+            #endif
+            #if nEC>=14
+            SFP13ptr = wmhSFP13 + offset;
+            #endif
+            #if nEC>=15
+            SFP14ptr = wmhSFP14 + offset;
+            #endif
+            #if nEC>=16
+            SFP15ptr = wmhSFP15 + offset;
+            #endif
+            #if nEC>=17
+            SFP16ptr = wmhSFP16 + offset;
+            #endif
+            #if nEC>=18
+            SFP17ptr = wmhSFP17 + offset;
+            #endif
+            #if nEC>=19
+            SFP18ptr = wmhSFP18 + offset;
+            #endif
+            #if nEC>=20
+            SFP19ptr = wmhSFP19 + offset;
+            #endif
+
+            while( Yptr != YptrEnd )
+                (*Yptr++) += (
+                      x0 * (*SFP0ptr++)
+                    #if nEC>=2
+                    + x1 * (*SFP1ptr++)
+                    #endif
+                    #if nEC>=3
+                    + x2 * (*SFP2ptr++)
+                    #endif
+                    #if nEC>=4
+                    + x3 * (*SFP3ptr++)
+                    #endif
+                    #if nEC>=5
+                    + x4 * (*SFP4ptr++)
+                    #endif
+                    #if nEC>=6
+                    + x5 * (*SFP5ptr++)
+                    #endif
+                    #if nEC>=7
+                    + x6 * (*SFP6ptr++)
+                    #endif
+                    #if nEC>=8
+                    + x7 * (*SFP7ptr++)
+                    #endif
+                    #if nEC>=9
+                    + x8 * (*SFP8ptr++)
+                    #endif
+                    #if nEC>=10
+                    + x9 * (*SFP9ptr++)
+                    #endif
+                    #if nEC>=11
+                    + x10 * (*SFP10ptr++)
+                    #endif
+                    #if nEC>=12
+                    + x11 * (*SFP11ptr++)
+                    #endif
+                    #if nEC>=13
+                    + x12 * (*SFP12ptr++)
+                    #endif
+                    #if nEC>=14
+                    + x13 * (*SFP13ptr++)
+                    #endif
+                    #if nEC>=15
+                    + x14 * (*SFP14ptr++)
+                    #endif
+                    #if nEC>=16
+                    + x15 * (*SFP15ptr++)
+                    #endif
+                    #if nEC>=17
+                    + x16 * (*SFP16ptr++)
+                    #endif
+                    #if nEC>=18
+                    + x17 * (*SFP17ptr++)
+                    #endif
+                    #if nEC>=19
+                    + x18 * (*SFP18ptr++)
+                    #endif
+                    #if nEC>=20
+                    + x19 * (*SFP19ptr++)
+                    #endif
+
+                );
+        }
+        t_v++;
+        t_o++;
+    }
+#endif
+
+#if nISO>=1
+    // isotropic compartments
+    t_v    = ISOv + ISOthreads[id];
+    t_vEnd = ISOv + ISOthreads[id+1];
+
+    x_Ptr0 = x + nIC*nF + nEC*nE + ISOthreads[id];
+    #if nISO>=2
+    x_Ptr1 = x_Ptr0 + nV;
+    #endif
+    #if nISO>=3
+    x_Ptr2 = x_Ptr1 + nV;
+    #endif
+    #if nISO>=4
+    x_Ptr3 = x_Ptr2 + nV;
+    #endif
+    #if nISO>=5
+    x_Ptr4 = x_Ptr3 + nV;
+    #endif
+    #if nISO>=6
+    x_Ptr5 = x_Ptr4 + nV;
+    #endif
+    #if nISO>=7
+    x_Ptr6 = x_Ptr5 + nV;
+    #endif
+    #if nISO>=8
+    x_Ptr7 = x_Ptr6 + nV;
+    #endif
+    #if nISO>=9
+    x_Ptr8 = x_Ptr7 + nV;
+    #endif
+    #if nISO>=10
+    x_Ptr9 = x_Ptr8 + nV;
+    #endif
+    #if nISO>=11
+    x_Ptr10 = x_Ptr9 + nV;
+    #endif
+    #if nISO>=12
+    x_Ptr11 = x_Ptr10 + nV;
+    #endif
+    #if nISO>=13
+    x_Ptr12 = x_Ptr11 + nV;
+    #endif
+    #if nISO>=14
+    x_Ptr13 = x_Ptr12 + nV;
+    #endif
+    #if nISO>=15
+    x_Ptr14 = x_Ptr13 + nV;
+    #endif
+    #if nISO>=16
+    x_Ptr15 = x_Ptr14 + nV;
+    #endif
+    #if nISO>=17
+    x_Ptr16 = x_Ptr15 + nV;
+    #endif
+    #if nISO>=18
+    x_Ptr17 = x_Ptr16 + nV;
+    #endif
+    #if nISO>=19
+    x_Ptr18 = x_Ptr17 + nV;
+    #endif
+    #if nISO>=20
+    x_Ptr19 = x_Ptr18 + nV;
+    #endif
+
+    while( t_v != t_vEnd )
+    {
+        x0 = *x_Ptr0++;
+        #if nISO>=2
+        x1 = *x_Ptr1++;
+        #endif
+        #if nISO>=3
+        x2 = *x_Ptr2++;
+        #endif
+        #if nISO>=4
+        x3 = *x_Ptr3++;
+        #endif
+        #if nISO>=5
+        x4 = *x_Ptr4++;
+        #endif
+        #if nISO>=6
+        x5 = *x_Ptr5++;
+        #endif
+        #if nISO>=7
+        x6 = *x_Ptr6++;
+        #endif
+        #if nISO>=8
+        x7 = *x_Ptr7++;
+        #endif
+        #if nISO>=9
+        x8 = *x_Ptr8++;
+        #endif
+        #if nISO>=10
+        x9 = *x_Ptr9++;
+        #endif
+        #if nISO>=11
+        x10 = *x_Ptr10++;
+        #endif
+        #if nISO>=12
+        x11 = *x_Ptr11++;
+        #endif
+        #if nISO>=13
+        x12 = *x_Ptr12++;
+        #endif
+        #if nISO>=14
+        x13 = *x_Ptr13++;
+        #endif
+        #if nISO>=15
+        x14 = *x_Ptr14++;
+        #endif
+        #if nISO>=16
+        x15 = *x_Ptr15++;
+        #endif
+        #if nISO>=17
+        x16 = *x_Ptr16++;
+        #endif
+        #if nISO>=18
+        x17 = *x_Ptr17++;
+        #endif
+        #if nISO>=19
+        x18 = *x_Ptr18++;
+        #endif
+        #if nISO>=20
+        x19 = *x_Ptr19++;
+        #endif
+
+        if (
+               x0 != 0
+            #if nISO>=2
+            || x1 != 0
+            #endif
+            #if nISO>=3
+            || x2 != 0
+            #endif
+            #if nISO>=4
+            || x3 != 0
+            #endif
+            #if nISO>=5
+            || x4 != 0
+            #endif
+            #if nISO>=6
+            || x5 != 0
+            #endif
+            #if nISO>=7
+            || x6 != 0
+            #endif
+            #if nISO>=8
+            || x7 != 0
+            #endif
+            #if nISO>=9
+            || x8 != 0
+            #endif
+            #if nISO>=10
+            || x9 != 0
+            #endif
+            #if nISO>=11
+            || x10 != 0
+            #endif
+            #if nISO>=12
+            || x11 != 0
+            #endif
+            #if nISO>=13
+            || x12 != 0
+            #endif
+            #if nISO>=14
+            || x13 != 0
+            #endif
+            #if nISO>=15
+            || x14 != 0
+            #endif
+            #if nISO>=16
+            || x15 != 0
+            #endif
+            #if nISO>=17
+            || x16 != 0
+            #endif
+            #if nISO>=18
+            || x17 != 0
+            #endif
+            #if nISO>=19
+            || x18 != 0
+            #endif
+            #if nISO>=20
+            || x19 != 0
+            #endif
+          )
+        {
+            Yptr    = Y    + nS * (*t_v);
+            YptrEnd = Yptr + nS;
+            SFP0ptr = isoSFP0;
+            #if nISO>=2
+            SFP1ptr = isoSFP1;
+            #endif
+            #if nISO>=3
+            SFP2ptr = isoSFP2;
+            #endif
+            #if nISO>=4
+            SFP3ptr = isoSFP3;
+            #endif
+            #if nISO>=5
+            SFP4ptr = isoSFP4;
+            #endif
+            #if nISO>=6
+            SFP5ptr = isoSFP5;
+            #endif
+            #if nISO>=7
+            SFP6ptr = isoSFP6;
+            #endif
+            #if nISO>=8
+            SFP7ptr = isoSFP7;
+            #endif
+            #if nISO>=9
+            SFP8ptr = isoSFP8;
+            #endif
+            #if nISO>=10
+            SFP9ptr = isoSFP9;
+            #endif
+            #if nISO>=11
+            SFP10ptr = isoSFP10;
+            #endif
+            #if nISO>=12
+            SFP11ptr = isoSFP11;
+            #endif
+            #if nISO>=13
+            SFP12ptr = isoSFP12;
+            #endif
+            #if nISO>=14
+            SFP13ptr = isoSFP13;
+            #endif
+            #if nISO>=15
+            SFP14ptr = isoSFP14;
+            #endif
+            #if nISO>=16
+            SFP15ptr = isoSFP15;
+            #endif
+            #if nISO>=17
+            SFP16ptr = isoSFP16;
+            #endif
+            #if nISO>=18
+            SFP17ptr = isoSFP17;
+            #endif
+            #if nISO>=19
+            SFP18ptr = isoSFP18;
+            #endif
+            #if nISO>=20
+            SFP19ptr = isoSFP19;
+            #endif
+
+            while( Yptr != YptrEnd )
+                (*Yptr++) += (
+                      x0 * (*SFP0ptr++)
+                    #if nISO>=2
+                    + x1 * (*SFP1ptr++)
+                    #endif
+                    #if nISO>=3
+                    + x2 * (*SFP2ptr++)
+                    #endif
+                    #if nISO>=4
+                    + x3 * (*SFP3ptr++)
+                    #endif
+                    #if nISO>=5
+                    + x4 * (*SFP4ptr++)
+                    #endif
+                    #if nISO>=6
+                    + x5 * (*SFP5ptr++)
+                    #endif
+                    #if nISO>=7
+                    + x6 * (*SFP6ptr++)
+                    #endif
+                    #if nISO>=8
+                    + x7 * (*SFP7ptr++)
+                    #endif
+                    #if nISO>=9
+                    + x8 * (*SFP8ptr++)
+                    #endif
+                    #if nISO>=10
+                    + x9 * (*SFP9ptr++)
+                    #endif
+                    #if nISO>=11
+                    + x10 * (*SFP10ptr++)
+                    #endif
+                    #if nISO>=12
+                    + x11 * (*SFP11ptr++)
+                    #endif
+                    #if nISO>=13
+                    + x12 * (*SFP12ptr++)
+                    #endif
+                    #if nISO>=14
+                    + x13 * (*SFP13ptr++)
+                    #endif
+                    #if nISO>=15
+                    + x14 * (*SFP14ptr++)
+                    #endif
+                    #if nISO>=16
+                    + x15 * (*SFP15ptr++)
+                    #endif
+                    #if nISO>=17
+                    + x16 * (*SFP16ptr++)
+                    #endif
+                    #if nISO>=18
+                    + x17 * (*SFP17ptr++)
+                    #endif
+                    #if nISO>=19
+                    + x18 * (*SFP18ptr++)
+                    #endif
+                    #if nISO>=20
+                    + x19 * (*SFP19ptr++)
+                    #endif
+                );
+        }
+        t_v++;
+    }
+#endif
+
+    pthread_exit( 0 );
+}
+
+
+// =========================
+// Function called by CYTHON
+// =========================
+void COMMIT_A(
+    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
+    double *_vIN, double *_vOUT,
+    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
+    uint32_t *_ECv, uint16_t *_ECo,
+    uint32_t *_ISOv,
+    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
+    uint32_t* _ICthreads, uint32_t* _ECthreads, uint32_t* _ISOthreads
+)
+{
+    nF = _nF;
+    n  = _n;
+    nE = _nE;
+    nV = _nV;
+    nS = _nS;
+    ndirs = _ndirs;
+
+    x = _vIN;
+    Y = _vOUT;
+
+    ICf  = _ICf;
+    ICv  = _ICv;
+    ICo  = _ICo;
+    ICl  = _ICl;
+    ECv  = _ECv;
+    ECo  = _ECo;
+    ISOv = _ISOv;
+
+    #if nIC>=1
+    wmrSFP0 = _wmrSFP;
+    #if nIC>=2
+    wmrSFP1 = wmrSFP0 + _ndirs*_nS;
+    #if nIC>=3
+    wmrSFP2 = wmrSFP1 + _ndirs*_nS;
+    #if nIC>=4
+    wmrSFP3 = wmrSFP2 + _ndirs*_nS;
+    #if nIC>=5
+    wmrSFP4 = wmrSFP3 + _ndirs*_nS;
+    #if nIC>=6
+    wmrSFP5 = wmrSFP4 + _ndirs*_nS;
+    #if nIC>=7
+    wmrSFP6 = wmrSFP5 + _ndirs*_nS;
+    #if nIC>=8
+    wmrSFP7 = wmrSFP6 + _ndirs*_nS;
+    #if nIC>=9
+    wmrSFP8 = wmrSFP7 + _ndirs*_nS;
+    #if nIC>=10
+    wmrSFP9 = wmrSFP8 + _ndirs*_nS;
+    #if nIC>=11
+    wmrSFP10 = wmrSFP9 + _ndirs*_nS;
+    #if nIC>=12
+    wmrSFP11 = wmrSFP10 + _ndirs*_nS;
+    #if nIC>=13
+    wmrSFP12 = wmrSFP11 + _ndirs*_nS;
+    #if nIC>=14
+    wmrSFP13 = wmrSFP12 + _ndirs*_nS;
+    #if nIC>=15
+    wmrSFP14 = wmrSFP13 + _ndirs*_nS;
+    #if nIC>=16
+    wmrSFP15 = wmrSFP14 + _ndirs*_nS;
+    #if nIC>=17
+    wmrSFP16 = wmrSFP15 + _ndirs*_nS;
+    #if nIC>=18
+    wmrSFP17 = wmrSFP16 + _ndirs*_nS;
+    #if nIC>=19
+    wmrSFP18 = wmrSFP17 + _ndirs*_nS;
+    #if nIC>=20
+    wmrSFP19 = wmrSFP18 + _ndirs*_nS;
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #if nEC>=1
+    wmhSFP0 = _wmhSFP;
+    #if nEC>=2
+    wmhSFP1 = wmhSFP0 + _ndirs*_nS;
+    #if nEC>=3
+    wmhSFP2 = wmhSFP1 + _ndirs*_nS;
+    #if nEC>=4
+    wmhSFP3 = wmhSFP2 + _ndirs*_nS;
+    #if nEC>=5
+    wmhSFP4 = wmhSFP3 + _ndirs*_nS;
+    #if nEC>=6
+    wmhSFP5 = wmhSFP4 + _ndirs*_nS;
+    #if nEC>=7
+    wmhSFP6 = wmhSFP5 + _ndirs*_nS;
+    #if nEC>=8
+    wmhSFP7 = wmhSFP6 + _ndirs*_nS;
+    #if nEC>=9
+    wmhSFP8 = wmhSFP7 + _ndirs*_nS;
+    #if nEC>=10
+    wmhSFP9 = wmhSFP8 + _ndirs*_nS;
+    #if nEC>=11
+    wmhSFP10 = wmhSFP9 + _ndirs*_nS;
+    #if nEC>=12
+    wmhSFP11 = wmhSFP10 + _ndirs*_nS;
+    #if nEC>=13
+    wmhSFP12 = wmhSFP11 + _ndirs*_nS;
+    #if nEC>=14
+    wmhSFP13 = wmhSFP12 + _ndirs*_nS;
+    #if nEC>=15
+    wmhSFP14 = wmhSFP13 + _ndirs*_nS;
+    #if nEC>=16
+    wmhSFP15 = wmhSFP14 + _ndirs*_nS;
+    #if nEC>=17
+    wmhSFP16 = wmhSFP15 + _ndirs*_nS;
+    #if nEC>=18
+    wmhSFP17 = wmhSFP16 + _ndirs*_nS;
+    #if nEC>=19
+    wmhSFP18 = wmhSFP17 + _ndirs*_nS;
+    #if nEC>=20
+    wmhSFP19 = wmhSFP18 + _ndirs*_nS;
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #if nISO>=1
+    isoSFP0 = _isoSFP;
+    #if nISO>=2
+    isoSFP1 = isoSFP0 + _nS;
+    #if nISO>=3
+    isoSFP2 = isoSFP1 + _nS;
+    #if nISO>=4
+    isoSFP3 = isoSFP2 + _nS;
+    #if nISO>=5
+    isoSFP4 = isoSFP3 + _nS;
+    #if nISO>=6
+    isoSFP5 = isoSFP4 + _nS;
+    #if nISO>=7
+    isoSFP6 = isoSFP5 + _nS;
+    #if nISO>=8
+    isoSFP7 = isoSFP6 + _nS;
+    #if nISO>=9
+    isoSFP8 = isoSFP7 + _nS;
+    #if nISO>=10
+    isoSFP9 = isoSFP8 + _nS;
+    #if nISO>=11
+    isoSFP10 = isoSFP9 + _nS;
+    #if nISO>=12
+    isoSFP11 = isoSFP10 + _nS;
+    #if nISO>=13
+    isoSFP12 = isoSFP11 + _nS;
+    #if nISO>=14
+    isoSFP13 = isoSFP12 + _nS;
+    #if nISO>=15
+    isoSFP14 = isoSFP13 + _nS;
+    #if nISO>=16
+    isoSFP15 = isoSFP14 + _nS;
+    #if nISO>=17
+    isoSFP16 = isoSFP15 + _nS;
+    #if nISO>=18
+    isoSFP17 = isoSFP16 + _nS;
+    #if nISO>=19
+    isoSFP18 = isoSFP17 + _nS;
+    #if nISO>=20
+    isoSFP19 = isoSFP18 + _nS;
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+
+    ICthreads  = _ICthreads;
+    ECthreads  = _ECthreads;
+    ISOthreads = _ISOthreads;
+
+    // Run SEPARATE THREADS to perform the multiplication
+    pthread_t threads[nTHREADS];
+    int t;
+    for(t=0; t<nTHREADS ; t++)
+        pthread_create( &threads[t], NULL, COMMIT_A__block, (void *) (long int)t );
+    for(t=0; t<nTHREADS ; t++)
+        pthread_join( threads[t], NULL );
+    return;
+}
+
+
+
+/* ===================================================== */
+/* Compute a sub-block of the A'*y MAtRIX-VECTOR product */
+/* ===================================================== */
+void* COMMIT_At__block( void *ptr )
+{
+    int      id = (long)ptr;
+    int      offset;
+    double   x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, w, Y_tmp;
+    double   *x_Ptr0, *x_Ptr1, *x_Ptr2, *x_Ptr3, *x_Ptr4, *x_Ptr5, *x_Ptr6, *x_Ptr7, *x_Ptr8, *x_Ptr9, *x_Ptr10, *x_Ptr11, *x_Ptr12, *x_Ptr13, *x_Ptr14, *x_Ptr15, *x_Ptr16, *x_Ptr17, *x_Ptr18, *x_Ptr19;
+    double   *Yptr, *YptrEnd;
+    float    *SFP0ptr, *SFP1ptr, *SFP2ptr, *SFP3ptr, *SFP4ptr, *SFP5ptr, *SFP6ptr, *SFP7ptr, *SFP8ptr, *SFP9ptr, *SFP10ptr, *SFP11ptr, *SFP12ptr, *SFP13ptr, *SFP14ptr, *SFP15ptr, *SFP16ptr, *SFP17ptr, *SFP18ptr, *SFP19ptr;
+    uint32_t *t_v, *t_vEnd, *t_f;
+    uint16_t *t_o;
+    float    *t_l;
+    uint8_t  *t_t;
+
+#if nIC>=1
+    // intra-cellular compartments
+    t_v    = ICv;
+    t_vEnd = ICv + n;
+    t_o    = ICo;
+    t_l    = ICl;
+    t_f    = ICf;
+    t_t    = ICthreadsT;
+
+    while( t_v != t_vEnd )
+    {
+        // in this case, I need to walk throug because the segments are ordered in "voxel order"
+        if ( *t_t == id )
+        {
+            Yptr    = Y    + nS * (*t_v);
+            YptrEnd = Yptr + nS;
+            offset  = nS * (*t_o);
+
+            Y_tmp = *Yptr;
+            SFP0ptr   = wmrSFP0 + offset;
+            x0 = (*SFP0ptr++) * Y_tmp;
+            #if nIC>=2
+            SFP1ptr   = wmrSFP1 + offset;
+            x1 = (*SFP1ptr++) * Y_tmp;
+            #endif
+            #if nIC>=3
+            SFP2ptr   = wmrSFP2 + offset;
+            x2 = (*SFP2ptr++) * Y_tmp;
+            #endif
+            #if nIC>=4
+            SFP3ptr   = wmrSFP3 + offset;
+            x3 = (*SFP3ptr++) * Y_tmp;
+            #endif
+            #if nIC>=5
+            SFP4ptr   = wmrSFP4 + offset;
+            x4 = (*SFP4ptr++) * Y_tmp;
+            #endif
+            #if nIC>=6
+            SFP5ptr   = wmrSFP5 + offset;
+            x5 = (*SFP5ptr++) * Y_tmp;
+            #endif
+            #if nIC>=7
+            SFP6ptr   = wmrSFP6 + offset;
+            x6 = (*SFP6ptr++) * Y_tmp;
+            #endif
+            #if nIC>=8
+            SFP7ptr   = wmrSFP7 + offset;
+            x7 = (*SFP7ptr++) * Y_tmp;
+            #endif
+            #if nIC>=9
+            SFP8ptr   = wmrSFP8 + offset;
+            x8 = (*SFP8ptr++) * Y_tmp;
+            #endif
+            #if nIC>=10
+            SFP9ptr   = wmrSFP9 + offset;
+            x9 = (*SFP9ptr++) * Y_tmp;
+            #endif
+            #if nIC>=11
+            SFP10ptr   = wmrSFP10 + offset;
+            x10 = (*SFP10ptr++) * Y_tmp;
+            #endif
+            #if nIC>=12
+            SFP11ptr   = wmrSFP11 + offset;
+            x11 = (*SFP11ptr++) * Y_tmp;
+            #endif
+            #if nIC>=13
+            SFP12ptr   = wmrSFP12 + offset;
+            x12 = (*SFP12ptr++) * Y_tmp;
+            #endif
+            #if nIC>=14
+            SFP13ptr   = wmrSFP13 + offset;
+            x13 = (*SFP13ptr++) * Y_tmp;
+            #endif
+            #if nIC>=15
+            SFP14ptr   = wmrSFP14 + offset;
+            x14 = (*SFP14ptr++) * Y_tmp;
+            #endif
+            #if nIC>=16
+            SFP15ptr   = wmrSFP15 + offset;
+            x15 = (*SFP15ptr++) * Y_tmp;
+            #endif
+            #if nIC>=17
+            SFP16ptr   = wmrSFP16 + offset;
+            x16 = (*SFP16ptr++) * Y_tmp;
+            #endif
+            #if nIC>=18
+            SFP17ptr   = wmrSFP17 + offset;
+            x17 = (*SFP17ptr++) * Y_tmp;
+            #endif
+            #if nIC>=19
+            SFP18ptr   = wmrSFP18 + offset;
+            x18 = (*SFP18ptr++) * Y_tmp;
+            #endif
+            #if nIC>=20
+            SFP19ptr   = wmrSFP19 + offset;
+            x19 = (*SFP19ptr++) * Y_tmp;
+            #endif
+
+            while( ++Yptr != YptrEnd )
+            {
+                Y_tmp = *Yptr;
+                x0 += (*SFP0ptr++) * Y_tmp;
+                #if nIC>=2
+                x1 += (*SFP1ptr++) * Y_tmp;
+                #endif
+                #if nIC>=3
+                x2 += (*SFP2ptr++) * Y_tmp;
+                #endif
+                #if nIC>=4
+                x3 += (*SFP3ptr++) * Y_tmp;
+                #endif
+                #if nIC>=5
+                x4 += (*SFP4ptr++) * Y_tmp;
+                #endif
+                #if nIC>=6
+                x5 += (*SFP5ptr++) * Y_tmp;
+                #endif
+                #if nIC>=7
+                x6 += (*SFP6ptr++) * Y_tmp;
+                #endif
+                #if nIC>=8
+                x7 += (*SFP7ptr++) * Y_tmp;
+                #endif
+                #if nIC>=9
+                x8 += (*SFP8ptr++) * Y_tmp;
+                #endif
+                #if nIC>=10
+                x9 += (*SFP9ptr++) * Y_tmp;
+                #endif
+                #if nIC>=11
+                x10 += (*SFP10ptr++) * Y_tmp;
+                #endif
+                #if nIC>=12
+                x11 += (*SFP11ptr++) * Y_tmp;
+                #endif
+                #if nIC>=13
+                x12 += (*SFP12ptr++) * Y_tmp;
+                #endif
+                #if nIC>=14
+                x13 += (*SFP13ptr++) * Y_tmp;
+                #endif
+                #if nIC>=15
+                x14 += (*SFP14ptr++) * Y_tmp;
+                #endif
+                #if nIC>=16
+                x15 += (*SFP15ptr++) * Y_tmp;
+                #endif
+                #if nIC>=17
+                x16 += (*SFP16ptr++) * Y_tmp;
+                #endif
+                #if nIC>=18
+                x17 += (*SFP17ptr++) * Y_tmp;
+                #endif
+                #if nIC>=19
+                x18 += (*SFP18ptr++) * Y_tmp;
+                #endif
+                #if nIC>=20
+                x19 += (*SFP19ptr++) * Y_tmp;
+                #endif
+            }
+
+            w = (double)(*t_l);
+            x[*t_f]      += w * x0;
+            #if nIC>=2
+            x[*t_f+nF]   += w * x1;
+            #endif
+            #if nIC>=3
+            x[*t_f+2*nF] += w * x2;
+            #endif
+            #if nIC>=4
+            x[*t_f+3*nF] += w * x3;
+            #endif
+            #if nIC>=5
+            x[*t_f+4*nF] += w * x4;
+            #endif
+            #if nIC>=6
+            x[*t_f+5*nF] += w * x5;
+            #endif
+            #if nIC>=7
+            x[*t_f+6*nF] += w * x6;
+            #endif
+            #if nIC>=8
+            x[*t_f+7*nF] += w * x7;
+            #endif
+            #if nIC>=9
+            x[*t_f+8*nF] += w * x8;
+            #endif
+            #if nIC>=10
+            x[*t_f+9*nF] += w * x9;
+            #endif
+            #if nIC>=11
+            x[*t_f+10*nF] += w * x10;
+            #endif
+            #if nIC>=12
+            x[*t_f+11*nF] += w * x11;
+            #endif
+            #if nIC>=13
+            x[*t_f+12*nF] += w * x12;
+            #endif
+            #if nIC>=14
+            x[*t_f+13*nF] += w * x13;
+            #endif
+            #if nIC>=15
+            x[*t_f+14*nF] += w * x14;
+            #endif
+            #if nIC>=16
+            x[*t_f+15*nF] += w * x15;
+            #endif
+            #if nIC>=17
+            x[*t_f+16*nF] += w * x16;
+            #endif
+            #if nIC>=18
+            x[*t_f+17*nF] += w * x17;
+            #endif
+            #if nIC>=19
+            x[*t_f+18*nF] += w * x18;
+            #endif
+            #if nIC>=20
+            x[*t_f+19*nF] += w * x19;
+            #endif
+        }
+
+        t_f++;
+        t_v++;
+        t_o++;
+        t_l++;
+        t_t++;
+    }
+#endif
+
+#if nEC>=1
+    // extra-cellular compartments
+    t_v    = ECv + ECthreadsT[id];
+    t_vEnd = ECv + ECthreadsT[id+1];
+    t_o    = ECo + ECthreadsT[id];
+
+    x_Ptr0 = x + nIC*nF + ECthreadsT[id];
+    #if nEC>=2
+    x_Ptr1 = x_Ptr0 + nE;
+    #endif
+    #if nEC>=3
+    x_Ptr2 = x_Ptr1 + nE;
+    #endif
+    #if nEC>=4
+    x_Ptr3 = x_Ptr2 + nE;
+    #endif
+    #if nEC>=5
+    x_Ptr4 = x_Ptr3 + nE;
+    #endif
+    #if nEC>=6
+    x_Ptr5 = x_Ptr4 + nE;
+    #endif
+    #if nEC>=7
+    x_Ptr6 = x_Ptr5 + nE;
+    #endif
+    #if nEC>=8
+    x_Ptr7 = x_Ptr6 + nE;
+    #endif
+    #if nEC>=9
+    x_Ptr8 = x_Ptr7 + nE;
+    #endif
+    #if nEC>=10
+    x_Ptr9 = x_Ptr8 + nE;
+    #endif
+    #if nEC>=11
+    x_Ptr10 = x_Ptr9 + nE;
+    #endif
+    #if nEC>=12
+    x_Ptr11 = x_Ptr10 + nE;
+    #endif
+    #if nEC>=13
+    x_Ptr12 = x_Ptr11 + nE;
+    #endif
+    #if nEC>=14
+    x_Ptr13 = x_Ptr12 + nE;
+    #endif
+    #if nEC>=15
+    x_Ptr14 = x_Ptr13 + nE;
+    #endif
+    #if nEC>=16
+    x_Ptr15 = x_Ptr14 + nE;
+    #endif
+    #if nEC>=17
+    x_Ptr16 = x_Ptr15 + nE;
+    #endif
+    #if nEC>=18
+    x_Ptr17 = x_Ptr16 + nE;
+    #endif
+    #if nEC>=19
+    x_Ptr18 = x_Ptr17 + nE;
+    #endif
+    #if nEC>=20
+    x_Ptr19 = x_Ptr18 + nE;
+    #endif
+
+    while( t_v != t_vEnd )
+    {
+        Yptr    = Y    + nS * (*t_v++);
+        YptrEnd = Yptr + nS;
+        offset  = nS * (*t_o++);
+
+        Y_tmp = *Yptr;
+        SFP0ptr = wmhSFP0 + offset;
+        x0 = (*SFP0ptr++) * Y_tmp;
+        #if nEC>=2
+        SFP1ptr = wmhSFP1 + offset;
+        x1 = (*SFP1ptr++) * Y_tmp;
+        #endif
+        #if nEC>=3
+        SFP2ptr = wmhSFP2 + offset;
+        x2 = (*SFP2ptr++) * Y_tmp;
+        #endif
+        #if nEC>=4
+        SFP3ptr = wmhSFP3 + offset;
+        x3 = (*SFP3ptr++) * Y_tmp;
+        #endif
+        #if nEC>=5
+        SFP4ptr = wmhSFP4 + offset;
+        x4 = (*SFP4ptr++) * Y_tmp;
+        #endif
+        #if nEC>=6
+        SFP5ptr = wmhSFP5 + offset;
+        x5 = (*SFP5ptr++) * Y_tmp;
+        #endif
+        #if nEC>=7
+        SFP6ptr = wmhSFP6 + offset;
+        x6 = (*SFP6ptr++) * Y_tmp;
+        #endif
+        #if nEC>=8
+        SFP7ptr = wmhSFP7 + offset;
+        x7 = (*SFP7ptr++) * Y_tmp;
+        #endif
+        #if nEC>=9
+        SFP8ptr = wmhSFP8 + offset;
+        x8 = (*SFP8ptr++) * Y_tmp;
+        #endif
+        #if nEC>=10
+        SFP9ptr = wmhSFP9 + offset;
+        x9 = (*SFP9ptr++) * Y_tmp;
+        #endif
+        #if nEC>=11
+        SFP10ptr = wmhSFP10 + offset;
+        x10 = (*SFP10ptr++) * Y_tmp;
+        #endif
+        #if nEC>=12
+        SFP11ptr = wmhSFP11 + offset;
+        x11 = (*SFP11ptr++) * Y_tmp;
+        #endif
+        #if nEC>=13
+        SFP12ptr = wmhSFP12 + offset;
+        x12 = (*SFP12ptr++) * Y_tmp;
+        #endif
+        #if nEC>=14
+        SFP13ptr = wmhSFP13 + offset;
+        x13 = (*SFP13ptr++) * Y_tmp;
+        #endif
+        #if nEC>=15
+        SFP14ptr = wmhSFP14 + offset;
+        x14 = (*SFP14ptr++) * Y_tmp;
+        #endif
+        #if nEC>=16
+        SFP15ptr = wmhSFP15 + offset;
+        x15 = (*SFP15ptr++) * Y_tmp;
+        #endif
+        #if nEC>=17
+        SFP16ptr = wmhSFP16 + offset;
+        x16 = (*SFP16ptr++) * Y_tmp;
+        #endif
+        #if nEC>=18
+        SFP17ptr = wmhSFP17 + offset;
+        x17 = (*SFP17ptr++) * Y_tmp;
+        #endif
+        #if nEC>=19
+        SFP18ptr = wmhSFP18 + offset;
+        x18 = (*SFP18ptr++) * Y_tmp;
+        #endif
+        #if nEC>=20
+        SFP19ptr = wmhSFP19 + offset;
+        x19 = (*SFP19ptr++) * Y_tmp;
+        #endif
+
+        while( ++Yptr != YptrEnd )
+        {
+            Y_tmp = *Yptr;
+            x0 += (*SFP0ptr++) * Y_tmp;
+            #if nEC>=2
+            x1 += (*SFP1ptr++) * Y_tmp;
+            #endif
+            #if nEC>=3
+            x2 += (*SFP2ptr++) * Y_tmp;
+            #endif
+            #if nEC>=4
+            x3 += (*SFP3ptr++) * Y_tmp;
+            #endif
+            #if nEC>=5
+            x4 += (*SFP4ptr++) * Y_tmp;
+            #endif
+            #if nEC>=6
+            x5 += (*SFP5ptr++) * Y_tmp;
+            #endif
+            #if nEC>=7
+            x6 += (*SFP6ptr++) * Y_tmp;
+            #endif
+            #if nEC>=8
+            x7 += (*SFP7ptr++) * Y_tmp;
+            #endif
+            #if nEC>=9
+            x8 += (*SFP8ptr++) * Y_tmp;
+            #endif
+            #if nEC>=10
+            x9 += (*SFP9ptr++) * Y_tmp;
+            #endif
+            #if nEC>=11
+            x10 += (*SFP10ptr++) * Y_tmp;
+            #endif
+            #if nEC>=12
+            x11 += (*SFP11ptr++) * Y_tmp;
+            #endif
+            #if nEC>=13
+            x12 += (*SFP12ptr++) * Y_tmp;
+            #endif
+            #if nEC>=14
+            x13 += (*SFP13ptr++) * Y_tmp;
+            #endif
+            #if nEC>=15
+            x14 += (*SFP14ptr++) * Y_tmp;
+            #endif
+            #if nEC>=16
+            x15 += (*SFP15ptr++) * Y_tmp;
+            #endif
+            #if nEC>=17
+            x16 += (*SFP16ptr++) * Y_tmp;
+            #endif
+            #if nEC>=18
+            x17 += (*SFP17ptr++) * Y_tmp;
+            #endif
+            #if nEC>=19
+            x18 += (*SFP18ptr++) * Y_tmp;
+            #endif
+            #if nEC>=20
+            x19 += (*SFP19ptr++) * Y_tmp;
+            #endif
+        }
+        (*x_Ptr0++) += x0;
+        #if nEC>=2
+        (*x_Ptr1++) += x1;
+        #endif
+        #if nEC>=3
+        (*x_Ptr2++) += x2;
+        #endif
+        #if nEC>=4
+        (*x_Ptr3++) += x3;
+        #endif
+        #if nEC>=5
+        (*x_Ptr4++) += x4;
+        #endif
+        #if nEC>=6
+        (*x_Ptr5++) += x5;
+        #endif
+        #if nEC>=7
+        (*x_Ptr6++) += x6;
+        #endif
+        #if nEC>=8
+        (*x_Ptr7++) += x7;
+        #endif
+        #if nEC>=9
+        (*x_Ptr8++) += x8;
+        #endif
+        #if nEC>=10
+        (*x_Ptr9++) += x9;
+        #endif
+        #if nEC>=11
+        (*x_Ptr10++) += x10;
+        #endif
+        #if nEC>=12
+        (*x_Ptr11++) += x11;
+        #endif
+        #if nEC>=13
+        (*x_Ptr12++) += x12;
+        #endif
+        #if nEC>=14
+        (*x_Ptr13++) += x13;
+        #endif
+        #if nEC>=15
+        (*x_Ptr14++) += x14;
+        #endif
+        #if nEC>=16
+        (*x_Ptr15++) += x15;
+        #endif
+        #if nEC>=17
+        (*x_Ptr16++) += x16;
+        #endif
+        #if nEC>=18
+        (*x_Ptr17++) += x17;
+        #endif
+        #if nEC>=19
+        (*x_Ptr18++) += x18;
+        #endif
+        #if nEC>=20
+        (*x_Ptr19++) += x19;
+        #endif
+    }
+#endif
+
+#if nISO>=1
+    // isotropic compartments
+    t_v    = ISOv + ISOthreadsT[id];
+    t_vEnd = ISOv + ISOthreadsT[id+1];
+
+    x_Ptr0 = x + nIC*nF + nEC*nE + ISOthreadsT[id];
+    #if nISO>=2
+    x_Ptr1 = x_Ptr0 + nV;
+    #endif
+    #if nISO>=3
+    x_Ptr2 = x_Ptr1 + nV;
+    #endif
+    #if nISO>=4
+    x_Ptr3 = x_Ptr2 + nV;
+    #endif
+    #if nISO>=5
+    x_Ptr4 = x_Ptr3 + nV;
+    #endif
+    #if nISO>=6
+    x_Ptr5 = x_Ptr4 + nV;
+    #endif
+    #if nISO>=7
+    x_Ptr6 = x_Ptr5 + nV;
+    #endif
+    #if nISO>=8
+    x_Ptr7 = x_Ptr6 + nV;
+    #endif
+    #if nISO>=9
+    x_Ptr8 = x_Ptr7 + nV;
+    #endif
+    #if nISO>=10
+    x_Ptr9 = x_Ptr8 + nV;
+    #endif
+    #if nISO>=11
+    x_Ptr10 = x_Ptr9 + nV;
+    #endif
+    #if nISO>=12
+    x_Ptr11 = x_Ptr10 + nV;
+    #endif
+    #if nISO>=13
+    x_Ptr12 = x_Ptr11 + nV;
+    #endif
+    #if nISO>=14
+    x_Ptr13 = x_Ptr12 + nV;
+    #endif
+    #if nISO>=15
+    x_Ptr14 = x_Ptr13 + nV;
+    #endif
+    #if nISO>=16
+    x_Ptr15 = x_Ptr14 + nV;
+    #endif
+    #if nISO>=17
+    x_Ptr16 = x_Ptr15 + nV;
+    #endif
+    #if nISO>=18
+    x_Ptr17 = x_Ptr16 + nV;
+    #endif
+    #if nISO>=19
+    x_Ptr18 = x_Ptr17 + nV;
+    #endif
+    #if nISO>=20
+    x_Ptr19 = x_Ptr18 + nV;
+    #endif
+
+    while( t_v != t_vEnd )
+    {
+        Yptr    = Y    + nS * (*t_v++);
+        YptrEnd = Yptr + nS;
+
+        SFP0ptr = isoSFP0;
+        #if nISO>=2
+        SFP1ptr = isoSFP1;
+        #endif
+        #if nISO>=3
+        SFP2ptr = isoSFP2;
+        #endif
+        #if nISO>=4
+        SFP3ptr = isoSFP3;
+        #endif
+        #if nISO>=5
+        SFP4ptr = isoSFP4;
+        #endif
+        #if nISO>=6
+        SFP5ptr = isoSFP5;
+        #endif
+        #if nISO>=7
+        SFP6ptr = isoSFP6;
+        #endif
+        #if nISO>=8
+        SFP7ptr = isoSFP7;
+        #endif
+        #if nISO>=9
+        SFP8ptr = isoSFP8;
+        #endif
+        #if nISO>=10
+        SFP9ptr = isoSFP9;
+        #endif
+        #if nISO>=11
+        SFP10ptr = isoSFP10;
+        #endif
+        #if nISO>=12
+        SFP11ptr = isoSFP11;
+        #endif
+        #if nISO>=13
+        SFP12ptr = isoSFP12;
+        #endif
+        #if nISO>=14
+        SFP13ptr = isoSFP13;
+        #endif
+        #if nISO>=15
+        SFP14ptr = isoSFP14;
+        #endif
+        #if nISO>=16
+        SFP15ptr = isoSFP15;
+        #endif
+        #if nISO>=17
+        SFP16ptr = isoSFP16;
+        #endif
+        #if nISO>=18
+        SFP17ptr = isoSFP17;
+        #endif
+        #if nISO>=19
+        SFP18ptr = isoSFP18;
+        #endif
+        #if nISO>=20
+        SFP19ptr = isoSFP19;
+        #endif
+
+        Y_tmp = *Yptr;
+        x0 = (*SFP0ptr++) * Y_tmp;
+        #if nISO>=2
+        x1 = (*SFP1ptr++) * Y_tmp;
+        #endif
+        #if nISO>=3
+        x2 = (*SFP2ptr++) * Y_tmp;
+        #endif
+        #if nISO>=4
+        x3 = (*SFP3ptr++) * Y_tmp;
+        #endif
+        #if nISO>=5
+        x4 = (*SFP4ptr++) * Y_tmp;
+        #endif
+        #if nISO>=6
+        x5 = (*SFP5ptr++) * Y_tmp;
+        #endif
+        #if nISO>=7
+        x6 = (*SFP6ptr++) * Y_tmp;
+        #endif
+        #if nISO>=8
+        x7 = (*SFP7ptr++) * Y_tmp;
+        #endif
+        #if nISO>=9
+        x8 = (*SFP8ptr++) * Y_tmp;
+        #endif
+        #if nISO>=10
+        x9 = (*SFP9ptr++) * Y_tmp;
+        #endif
+        #if nISO>=11
+        x10 = (*SFP10ptr++) * Y_tmp;
+        #endif
+        #if nISO>=12
+        x11 = (*SFP11ptr++) * Y_tmp;
+        #endif
+        #if nISO>=13
+        x12 = (*SFP12ptr++) * Y_tmp;
+        #endif
+        #if nISO>=14
+        x13 = (*SFP13ptr++) * Y_tmp;
+        #endif
+        #if nISO>=15
+        x14 = (*SFP14ptr++) * Y_tmp;
+        #endif
+        #if nISO>=16
+        x15 = (*SFP15ptr++) * Y_tmp;
+        #endif
+        #if nISO>=17
+        x16 = (*SFP16ptr++) * Y_tmp;
+        #endif
+        #if nISO>=18
+        x17 = (*SFP17ptr++) * Y_tmp;
+        #endif
+        #if nISO>=19
+        x18 = (*SFP18ptr++) * Y_tmp;
+        #endif
+        #if nISO>=20
+        x19 = (*SFP19ptr++) * Y_tmp;
+        #endif
+
+        while( ++Yptr != YptrEnd )
+        {
+            Y_tmp = *Yptr;
+            x0  += (*SFP0ptr++) * Y_tmp;
+            #if nISO>=2
+            x1  += (*SFP1ptr++) * Y_tmp;
+            #endif
+            #if nISO>=3
+            x2  += (*SFP2ptr++) * Y_tmp;
+            #endif
+            #if nISO>=4
+            x3  += (*SFP3ptr++) * Y_tmp;
+            #endif
+            #if nISO>=5
+            x4  += (*SFP4ptr++) * Y_tmp;
+            #endif
+            #if nISO>=6
+            x5  += (*SFP5ptr++) * Y_tmp;
+            #endif
+            #if nISO>=7
+            x6  += (*SFP6ptr++) * Y_tmp;
+            #endif
+            #if nISO>=8
+            x7  += (*SFP7ptr++) * Y_tmp;
+            #endif
+            #if nISO>=9
+            x8  += (*SFP8ptr++) * Y_tmp;
+            #endif
+            #if nISO>=10
+            x9  += (*SFP9ptr++) * Y_tmp;
+            #endif
+            #if nISO>=11
+            x10  += (*SFP10ptr++) * Y_tmp;
+            #endif
+            #if nISO>=12
+            x11  += (*SFP11ptr++) * Y_tmp;
+            #endif
+            #if nISO>=13
+            x12  += (*SFP12ptr++) * Y_tmp;
+            #endif
+            #if nISO>=14
+            x13  += (*SFP13ptr++) * Y_tmp;
+            #endif
+            #if nISO>=15
+            x14  += (*SFP14ptr++) * Y_tmp;
+            #endif
+            #if nISO>=16
+            x15  += (*SFP15ptr++) * Y_tmp;
+            #endif
+            #if nISO>=17
+            x16  += (*SFP16ptr++) * Y_tmp;
+            #endif
+            #if nISO>=18
+            x17  += (*SFP17ptr++) * Y_tmp;
+            #endif
+            #if nISO>=19
+            x18  += (*SFP18ptr++) * Y_tmp;
+            #endif
+            #if nISO>=20
+            x19  += (*SFP19ptr++) * Y_tmp;
+            #endif
+        }
+
+        (*x_Ptr0++) += x0;
+        #if nISO>=2
+        (*x_Ptr1++) += x1;
+        #endif
+        #if nISO>=3
+        (*x_Ptr2++) += x2;
+        #endif
+        #if nISO>=4
+        (*x_Ptr3++) += x3;
+        #endif
+        #if nISO>=5
+        (*x_Ptr4++) += x4;
+        #endif
+        #if nISO>=6
+        (*x_Ptr5++) += x5;
+        #endif
+        #if nISO>=7
+        (*x_Ptr6++) += x6;
+        #endif
+        #if nISO>=8
+        (*x_Ptr7++) += x7;
+        #endif
+        #if nISO>=9
+        (*x_Ptr8++) += x8;
+        #endif
+        #if nISO>=10
+        (*x_Ptr9++) += x9;
+        #endif
+        #if nISO>=11
+        (*x_Ptr10++) += x10;
+        #endif
+        #if nISO>=12
+        (*x_Ptr11++) += x11;
+        #endif
+        #if nISO>=13
+        (*x_Ptr12++) += x12;
+        #endif
+        #if nISO>=14
+        (*x_Ptr13++) += x13;
+        #endif
+        #if nISO>=15
+        (*x_Ptr14++) += x14;
+        #endif
+        #if nISO>=16
+        (*x_Ptr15++) += x15;
+        #endif
+        #if nISO>=17
+        (*x_Ptr16++) += x16;
+        #endif
+        #if nISO>=18
+        (*x_Ptr17++) += x17;
+        #endif
+        #if nISO>=19
+        (*x_Ptr18++) += x18;
+        #endif
+        #if nISO>=20
+        (*x_Ptr19++) += x19;
+        #endif
+    }
+#endif
+
+    pthread_exit( 0 );
+}
+
+
+// =========================
+// Function called by CYTHON
+// =========================
+void COMMIT_At(
+    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
+    double *_vIN, double *_vOUT,
+    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
+    uint32_t *_ECv, uint16_t *_ECo,
+    uint32_t *_ISOv,
+    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
+    uint8_t* _ICthreadsT, uint32_t* _ECthreadsT, uint32_t* _ISOthreadsT
+)
+{
+    nF = _nF;
+    n  = _n;
+    nE = _nE;
+    nV = _nV;
+    nS = _nS;
+    ndirs = _ndirs;
+
+    x = _vOUT;
+    Y = _vIN;
+
+    ICf  = _ICf;
+    ICv  = _ICv;
+    ICo  = _ICo;
+    ICl  = _ICl;
+    ECv  = _ECv;
+    ECo  = _ECo;
+    ISOv = _ISOv;
+
+    #if nIC>=1
+    wmrSFP0 = _wmrSFP;
+    #if nIC>=2
+    wmrSFP1 = wmrSFP0 + _ndirs*_nS;
+    #if nIC>=3
+    wmrSFP2 = wmrSFP1 + _ndirs*_nS;
+    #if nIC>=4
+    wmrSFP3 = wmrSFP2 + _ndirs*_nS;
+    #if nIC>=5
+    wmrSFP4 = wmrSFP3 + _ndirs*_nS;
+    #if nIC>=6
+    wmrSFP5 = wmrSFP4 + _ndirs*_nS;
+    #if nIC>=7
+    wmrSFP6 = wmrSFP5 + _ndirs*_nS;
+    #if nIC>=8
+    wmrSFP7 = wmrSFP6 + _ndirs*_nS;
+    #if nIC>=9
+    wmrSFP8 = wmrSFP7 + _ndirs*_nS;
+    #if nIC>=10
+    wmrSFP9 = wmrSFP8 + _ndirs*_nS;
+    #if nIC>=11
+    wmrSFP10 = wmrSFP9 + _ndirs*_nS;
+    #if nIC>=12
+    wmrSFP11 = wmrSFP10 + _ndirs*_nS;
+    #if nIC>=13
+    wmrSFP12 = wmrSFP11 + _ndirs*_nS;
+    #if nIC>=14
+    wmrSFP13 = wmrSFP12 + _ndirs*_nS;
+    #if nIC>=15
+    wmrSFP14 = wmrSFP13 + _ndirs*_nS;
+    #if nIC>=16
+    wmrSFP15 = wmrSFP14 + _ndirs*_nS;
+    #if nIC>=17
+    wmrSFP16 = wmrSFP15 + _ndirs*_nS;
+    #if nIC>=18
+    wmrSFP17 = wmrSFP16 + _ndirs*_nS;
+    #if nIC>=19
+    wmrSFP18 = wmrSFP17 + _ndirs*_nS;
+    #if nIC>=20
+    wmrSFP19 = wmrSFP18 + _ndirs*_nS;
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #if nEC>=1
+    wmhSFP0 = _wmhSFP;
+    #if nEC>=2
+    wmhSFP1 = wmhSFP0 + _ndirs*_nS;
+    #if nEC>=3
+    wmhSFP2 = wmhSFP1 + _ndirs*_nS;
+    #if nEC>=4
+    wmhSFP3 = wmhSFP2 + _ndirs*_nS;
+    #if nEC>=5
+    wmhSFP4 = wmhSFP3 + _ndirs*_nS;
+    #if nEC>=6
+    wmhSFP5 = wmhSFP4 + _ndirs*_nS;
+    #if nEC>=7
+    wmhSFP6 = wmhSFP5 + _ndirs*_nS;
+    #if nEC>=8
+    wmhSFP7 = wmhSFP6 + _ndirs*_nS;
+    #if nEC>=9
+    wmhSFP8 = wmhSFP7 + _ndirs*_nS;
+    #if nEC>=10
+    wmhSFP9 = wmhSFP8 + _ndirs*_nS;
+    #if nEC>=11
+    wmhSFP10 = wmhSFP9 + _ndirs*_nS;
+    #if nEC>=12
+    wmhSFP11 = wmhSFP10 + _ndirs*_nS;
+    #if nEC>=13
+    wmhSFP12 = wmhSFP11 + _ndirs*_nS;
+    #if nEC>=14
+    wmhSFP13 = wmhSFP12 + _ndirs*_nS;
+    #if nEC>=15
+    wmhSFP14 = wmhSFP13 + _ndirs*_nS;
+    #if nEC>=16
+    wmhSFP15 = wmhSFP14 + _ndirs*_nS;
+    #if nEC>=17
+    wmhSFP16 = wmhSFP15 + _ndirs*_nS;
+    #if nEC>=18
+    wmhSFP17 = wmhSFP16 + _ndirs*_nS;
+    #if nEC>=19
+    wmhSFP18 = wmhSFP17 + _ndirs*_nS;
+    #if nEC>=20
+    wmhSFP19 = wmhSFP18 + _ndirs*_nS;
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #if nISO>=1
+    isoSFP0 = _isoSFP;
+    #if nISO>=2
+    isoSFP1 = isoSFP0 + _nS;
+    #if nISO>=3
+    isoSFP2 = isoSFP1 + _nS;
+    #if nISO>=4
+    isoSFP3 = isoSFP2 + _nS;
+    #if nISO>=5
+    isoSFP4 = isoSFP3 + _nS;
+    #if nISO>=6
+    isoSFP5 = isoSFP4 + _nS;
+    #if nISO>=7
+    isoSFP6 = isoSFP5 + _nS;
+    #if nISO>=8
+    isoSFP7 = isoSFP6 + _nS;
+    #if nISO>=9
+    isoSFP8 = isoSFP7 + _nS;
+    #if nISO>=10
+    isoSFP9 = isoSFP8 + _nS;
+    #if nISO>=11
+    isoSFP10 = isoSFP9 + _nS;
+    #if nISO>=12
+    isoSFP11 = isoSFP10 + _nS;
+    #if nISO>=13
+    isoSFP12 = isoSFP11 + _nS;
+    #if nISO>=14
+    isoSFP13 = isoSFP12 + _nS;
+    #if nISO>=15
+    isoSFP14 = isoSFP13 + _nS;
+    #if nISO>=16
+    isoSFP15 = isoSFP14 + _nS;
+    #if nISO>=17
+    isoSFP16 = isoSFP15 + _nS;
+    #if nISO>=18
+    isoSFP17 = isoSFP16 + _nS;
+    #if nISO>=19
+    isoSFP18 = isoSFP17 + _nS;
+    #if nISO>=20
+    isoSFP19 = isoSFP18 + _nS;
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+    #endif
+
+    ICthreadsT  = _ICthreadsT;
+    ECthreadsT  = _ECthreadsT;
+    ISOthreadsT = _ISOthreadsT;
+
+    // Run SEPARATE THREADS to perform the multiplication
+    pthread_t threads[nTHREADS];
+    int t;
+    for(t=0; t<nTHREADS ; t++)
+        pthread_create( &threads[t], NULL, COMMIT_At__block, (void *) (long int)t );
+    for(t=0; t<nTHREADS ; t++)
+        pthread_join( threads[t], NULL );
+    return;
+}
+
+////////////////////////// L_1 //////////////////////////
+void COMMIT_L1(
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
+    double *_vIN, double *_vOUT)
+{
+    for(int r = 0; r < _nIC-1; r++){
+        for(int f = 0; f < _nF; f++){
+            _vOUT[_nV*_nS + r] += _lambda*( -_vIN[r*_nF + f] + _vIN[(r+1)*_nF + f] );
+        }
+    }
+}
+
+void COMMIT_L1t(
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
+    double *_vIN, double *_vOUT)
+{
+    for(int f = 0; f < _nF; f++){
+        _vOUT[f] += _lambda*( -_vIN[_nV*_nS] );
+
+        for(int r = 1; r < _nIC-1; r++)
+            _vOUT[_nF*r + f] += _lambda*( _vIN[_nV*_nS + r-1] - _vIN[_nV*_nS + r] );
+
+        _vOUT[_nF*(_nIC-1) + f] += _lambda*( _vIN[_nV*_nS + _nIC-2] );
+    }
+}
+
+////////////////////////// L_2 //////////////////////////
+void COMMIT_L2(
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
+    double *_vIN, double *_vOUT)
+{
+    for(int r = 0; r < _nIC-2; r++){
+        for(int f = 0; f < _nF; f++){
+            _vOUT[_nV*_nS + r] += _lambda*( _vIN[r*_nF + f] -2*_vIN[(r+1)*_nF + f] + _vIN[(r+2)*_nF + f] );
+        }
+    }
+}
+
+void COMMIT_L2t(
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
+    double *_vIN, double *_vOUT)
+{
+    for(int f = 0; f < _nF; f++){
+        _vOUT[f] += _lambda*( _vIN[_nV*_nS] );
+
+        _vOUT[_nF + f] += _lambda*( -2*_vIN[_nV*_nS] + _vIN[_nV*_nS + 1] );
+
+        for (int r = 2; r < _nIC-2; r++){
+            _vOUT[r*_nF + f] += _lambda*( _vIN[_nV*_nS + (r-2)] -2*_vIN[_nV*_nS + (r-1)] + _vIN[_nV*_nS + r] );
+        }
+        
+        _vOUT[(_nIC-2)*_nF + f] += _lambda*( _vIN[_nV*_nS + _nIC-4] -2*_vIN[_nV*_nS + _nIC-3] );
+
+        _vOUT[(_nIC-1)*_nF + f] += _lambda*( _vIN[_nV*_nS + (_nIC-3)] );
+    }
+}
+
+////////////////////////// L_1^z //////////////////////////
+void COMMIT_L1z(
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
+    double *_vIN, double *_vOUT)
+{
+    for(int f = 0; f < _nF; f++){
+        _vOUT[_nV*_nS] += _lambda*( _vIN[f] );
+
+        for(int r = 1; r < _nIC; r++){
+            _vOUT[_nV*_nS + r] += _lambda*( -_vIN[(r-1)*_nF + f] + _vIN[r*_nF + f] );
+        }
+
+        _vOUT[_nV*_nS + _nIC] += _lambda*( -_vIN[(_nIC-1)*_nF + f] );
+    }
+}
+
+void COMMIT_L1zt(
+        int _nF, int _nIC, int _nV, int _nS, double _lambda,
+    double *_vIN, double *_vOUT)
+{
+    for(int f = 0; f < _nF; f++){
+        for(int r = 0; r < _nIC; r++){
+            _vOUT[r*_nF + f] += _lambda*( _vIN[_nV*_nS + r] - _vIN[_nV*_nS + r + 1]);
+        }
+    }
+}
+
+////////////////////////// L_2^z //////////////////////////
+void COMMIT_L2z(
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
+    double *_vIN, double *_vOUT)
+{
+    for(int f = 0; f < _nF; f++){
+
+        _vOUT[_nV*_nS] += _lambda*( -2*_vIN[f] + x[_nF + f] );
+
+        for(int r = 1; r < _nIC-1; r++){
+            _vOUT[_nV*_nS + r] += _lambda*( _vIN[(r-1)*_nF + f] -2*_vIN[r*_nF + f] + _vIN[(r+1)*_nF + f] );
+        }
+
+        _vOUT[_nV*_nS + _nIC - 1] += _lambda*( _vIN[(_nIC-2)*_nF + f] - 2*_vIN[(_nIC-1)*_nF + f] );
+    }
+}
+
+void COMMIT_L2zt(
+    int _nF, int _nIC, int _nV, int _nS, double _lambda,
+    double *_vIN, double *_vOUT)
+{
+    for(int f = 0; f < _nF; f++){
+        _vOUT[f] += _lambda*( -2*_vIN[_nV*_nS] + _vIN[_nV*_nS + 1] );
+
+        for (int r = 0; r < _nIC-1; r++){
+            _vOUT[r*_nF + f] += _lambda*( _vIN[_nV*_nS + (r-1)] - 2*_vIN[_nV*_nS + r] + _vIN[_nV*_nS + (r+1)] );
+        }
+        
+        _vOUT[(_nIC-1)*_nF + f] += _lambda*( _vIN[_nV*_nS + (_nIC-2)] - 2*_vIN[_nV*_nS + (_nIC-1)] );
+    }
+}
diff --git a/commit/solvers.py b/commit/solvers.py
index fe5ef151..4a8ca3b2 100755
--- a/commit/solvers.py
+++ b/commit/solvers.py
@@ -1,403 +1,403 @@
-"""
-Author: Matteo Frigo - lts5 @ EPFL and Dep. of CS @ Univ. of Verona
-
-This structure is based on the previous work of Rafael Carrillo and was
-supported by the LTS5 laboratory at EPFL, Lausanne.
-"""
-from __future__ import print_function
-import numpy as np
-from math import sqrt
-import sys
-import warnings
-eps = np.finfo(float).eps
-
-from commit.proximals import (non_negativity,
-                             omega_group_sparsity,
-                             prox_group_sparsity,
-                             soft_thresholding,
-                             projection_onto_l2_ball)
-group_sparsity = -1
-non_negative = 0
-norm1 = 1
-norm2 = 2
-norminf = np.inf
-list_regnorms = [group_sparsity, non_negative, norm1, norm2]
-list_group_sparsity_norms = [norm2]#, norminf] # removed because of issue #54
-
-
-def init_regularisation(commit_evaluation,
-                        regnorms = (non_negative, non_negative, non_negative),
-                        structureIC = None, weightsIC = None, group_norm = 2,
-                        lambdas = (.0,.0,.0) ):
-    """
-    Initialise the data structure that defines Omega in
-
-        argmin_x 0.5*||Ax-y||_2^2 + Omega(x)
-
-
-    Input
-    -----
-    commit_evaluation - commit.Evaluation object :
-        dictionary and model have to be loaded beforehand.
-
-
-    regnorms - tuple :
-        this sets the penalty term to be used for each compartment.
-            Default = (non_negative,non_negative,non_negative).
-
-            regnorms[0] corresponds to the Intracellular compartment
-            regnorms[1] corresponds to the Extracellular compartment
-            regnorms[2] corresponds to the Isotropic compartment
-
-            Each regnorms[k] must be one of commit.solvers.
-                                {group_sparsity, non_negative, norm1, norm2}.
-
-            commit.solvers.group_sparsity considers both the non-overlapping
-                and the hierarchical group sparsity (see [1]). This option is
-                allowed only in the IC compartment. The mathematical formulation
-                of this term is
-                $\Omega(x) = \lambda \sum_{g\in G} w_g |x_g|
-
-            commit.solvers.non_negative puts a non negativity constraint on the
-                coefficients corresponding to the compartment. This is the
-                default option for each compartment
-
-            commit.solvers.norm1 penalises with the 1-norm of the coefficients
-                corresponding to the compartment.
-
-            commit.solvers.norm2 penalises with the 2-norm of the coefficients
-                corresponding to the compartment.
-
-
-    structureIC - np.array(list(list)) :
-        group structure for the IC compartment.
-            This field is necessary only if regterm[0]=commit.solver.group_sparsity.
-            Example:
-                structureIC = np.array([[0,2,5],[1,3,4],[0,1,2,3,4,5],[6]])
-
-                that is equivalent to
-                            [0,1,2,3,4,5]        [6]
-                              /       \
-                        [0,2,5]       [1,3,4]
-                which has two non overlapping groups, one of which is the union
-                of two other non-overlapping groups.
-
-
-    weightsIC - np.array(np.float64) :
-        this defines the weights associated to each group of structure IC.
-
-
-    group_norm - number :
-        norm type for the commit.solver.group_sparsity penalisation of the IC compartment.
-            Default: group_norm = commit.solver.norm2
-            To be chosen among commit.solver.{norm2,norminf}.
-
-    lambdas - tuple :
-        regularisation parameter for each compartment.
-            Default: lambdas = (0.0, 0.0, 0.0)
-            The lambdas correspond to the onse described in the mathematical
-            formulation of the regularisation term
-            $\Omega(x) = lambdas[0]*regnorm[0](x) + lambdas[1]*regnorm[1](x) + lambdas[2]*regnorm[2](x)$
-
-
-    References:
-        [1] Jenatton et al. - 'Proximal Methods for Hierarchical Sparse Coding'
-    """
-    regularisation = {}
-
-    regularisation['startIC']  = 0
-    regularisation['sizeIC']   = int( commit_evaluation.DICTIONARY['IC']['nF'] * commit_evaluation.KERNELS['wmr'].shape[0])
-    regularisation['startEC']  = int( regularisation['sizeIC'] )
-    regularisation['sizeEC']   = int( commit_evaluation.DICTIONARY['EC']['nE'] * commit_evaluation.KERNELS['wmh'].shape[0])
-    regularisation['startISO'] = int( regularisation['sizeIC'] + regularisation['sizeEC'] )
-    regularisation['sizeISO']  = int( commit_evaluation.DICTIONARY['nV'] * commit_evaluation.KERNELS['iso'].shape[0])
-
-    regularisation['normIC']  = regnorms[0]
-    regularisation['normEC']  = regnorms[1]
-    regularisation['normISO'] = regnorms[2]
-
-    regularisation['lambdaIC']  = float( lambdas[0] )
-    regularisation['lambdaEC']  = float( lambdas[1] )
-    regularisation['lambdaISO'] = float( lambdas[2] )
-
-    # Solver-specific fields
-    regularisation['structureIC']      = structureIC
-    regularisation['weightsIC']        = weightsIC
-    regularisation['group_norm']       = group_norm
-
-    return regularisation
-
-
-def regularisation2omegaprox(regularisation):
-    lambdaIC  = float(regularisation.get('lambdaIC'))
-    lambdaEC  = float(regularisation.get('lambdaEC'))
-    lambdaISO = float(regularisation.get('lambdaISO'))
-    if lambdaIC < 0.0 or lambdaEC < 0.0 or lambdaISO < 0.0:
-        raise ValueError('Negative regularisation parameters are not allowed')
-
-    normIC  = regularisation.get('normIC')
-    normEC  = regularisation.get('normEC')
-    normISO = regularisation.get('normISO')
-    if not normIC in list_regnorms:
-        raise ValueError('normIC must be one of commit.solvers.{group_sparsity,non_negative,norm1,norm2}')
-    if not normEC in list_regnorms:
-        raise ValueError('normEC must be one of commit.solvers.{group_sparsity,non_negative,norm1,norm2}')
-    if not normISO in list_regnorms:
-        raise ValueError('normISO must be one of commit.solvers.{group_sparsity,non_negative,norm1,norm2}')
-
-    ## NNLS case
-    if (lambdaIC == 0.0 and lambdaEC == 0.0 and lambdaISO == 0.0) or (normIC == non_negative and normEC == non_negative and normISO == non_negative):
-        omega = lambda x: 0.0
-        prox  = lambda x: non_negativity(x, 0, len(x))
-        return omega, prox
-
-    ## All other cases
-    # Intracellular Compartment
-    startIC = regularisation.get('startIC')
-    sizeIC  = regularisation.get('sizeIC')
-    if lambdaIC == 0.0:
-        omegaIC = lambda x: 0.0
-        proxIC  = lambda x: x
-    elif normIC == norm2:
-        omegaIC = lambda x: lambdaIC * np.linalg.norm(x[startIC:sizeIC])
-        proxIC  = lambda x: projection_onto_l2_ball(x, lambdaIC, startIC, sizeIC)
-    elif normIC == norm1:
-        omegaIC = lambda x: lambdaIC * sum( x[startIC:sizeIC] )
-        proxIC  = lambda x: soft_thresholding(x, lambdaIC, startIC, sizeIC)
-    elif normIC == non_negative:
-        omegaIC = lambda x: 0.0
-        proxIC  = lambda x: non_negativity(x, startIC, sizeIC)
-    elif normIC == group_sparsity:
-        structureIC = regularisation.get('structureIC')
-        groupWeightIC   = regularisation.get('weightsIC')
-        if not len(structureIC) == len(groupWeightIC):
-            raise ValueError('Number of groups and weights do not coincide.')
-        group_norm = regularisation.get('group_norm')
-        if not group_norm in list_group_sparsity_norms:
-            raise ValueError('Wrong norm in the structured sparsity term. Choose between %s.' % str(list_group_sparsity_norms))
-
-        # convert to new data structure (needed for faster access)
-        N = np.sum([g.size for g in structureIC])
-        groupIdxIC  = np.zeros( (N,), dtype=np.int32 )
-        groupSizeIC = np.zeros( (structureIC.size,), dtype=np.int32 )
-        pos = 0
-        for i, g in enumerate(structureIC) :
-            groupSizeIC[i] = g.size
-            groupIdxIC[pos:(pos+g.size)] = g[:]
-            pos += g.size
-
-        omegaIC = lambda x: omega_group_sparsity( x, groupIdxIC, groupSizeIC, groupWeightIC, lambdaIC, group_norm )
-        proxIC  = lambda x:  prox_group_sparsity( x, groupIdxIC, groupSizeIC, groupWeightIC, lambdaIC, group_norm )
-    else:
-        raise ValueError('Type of regularisation for IC compartment not recognized.')
-
-
-    # Extracellular Compartment
-    startEC = regularisation.get('startEC')
-    sizeEC  = regularisation.get('sizeEC')
-    if lambdaEC == 0.0:
-        omegaEC = lambda x: 0.0
-        proxEC  = lambda x: x
-    elif normEC == norm2:
-        omegaEC = lambda x: lambdaEC * np.linalg.norm(x[startEC:(startEC+sizeEC)])
-        proxEC  = lambda x: projection_onto_l2_ball(x, lambdaEC, startEC, sizeEC)
-    elif normEC == norm1:
-        omegaEC = lambda x: lambdaEC * sum( x[startEC:(startEC+sizeEC)] )
-        proxEC  = lambda x: soft_thresholding(x, lambdaEC, startEC, sizeEC)
-    elif normEC == non_negative:
-        omegaEC = lambda x: 0.0
-        proxEC  = lambda x: non_negativity(x, startEC, sizeEC)
-    else:
-        raise ValueError('Type of regularisation for EC compartment not recognized.')
-
-    # Isotropic Compartment
-    startISO = regularisation.get('startISO')
-    sizeISO  = regularisation.get('sizeISO')
-    if lambdaISO == 0.0:
-        omegaISO = lambda x: 0.0
-        proxISO  = lambda x: x
-    elif normISO == norm2:
-        omegaISO = lambda x: lambdaISO * np.linalg.norm(x[startISO:(startISO+sizeISO)])
-        proxISO  = lambda x: projection_onto_l2_ball(x, lambdaISO, startISO, sizeISO)
-    elif normISO == norm1:
-        omegaISO = lambda x: lambdaISO * sum( x[startISO:(startISO+sizeISO)] )
-        proxISO  = lambda x: soft_thresholding(x, lambdaISO, startISO, sizeISO)
-    elif normISO == non_negative:
-        omegaISO = lambda x: 0.0
-        proxISO  = lambda x: non_negativity(x, startISO, sizeISO)
-    else:
-        raise ValueError('Type of regularisation for ISO compartment not recognized.')
-
-    omega = lambda x: omegaIC(x) + omegaEC(x) + omegaISO(x)
-    prox = lambda x: non_negativity(proxIC(proxEC(proxISO(x))),0,x.size) # non negativity is redunduntly forced
-
-    return omega, prox
-
-
-def evaluate_model(y, A, x, regularisation = None):
-    if regularisation is None:
-        omega = lambda x: 0.0
-        prox  = lambda x: non_negativity(x, 0, len(x))
-    else:
-        omega, _ = regularisation2omegaprox(regularisation)
-
-    return 0.5*np.linalg.norm(A.dot(x)-y)**2 + omega(x)
-
-
-def solve(y, A, At, tol_fun = 1e-4, tol_x = 1e-6, max_iter = 1000, verbose = True, x0 = None, regularisation = None):
-    """
-    Solve the regularised least squares problem
-
-        argmin_x 0.5*||Ax-y||_2^2 + Omega(x)
-
-    with the Omega described by 'regularisation'.
-
-    Check the documentation of commit.solvers.init_regularisation to see how to
-    solve a specific problem.
-    """
-    if regularisation is None:
-        omega = lambda x: 0.0
-        prox  = lambda x: non_negativity(x, 0, x.size)
-    else:
-        omega, prox = regularisation2omegaprox(regularisation)
-
-    if x0 is None:
-        x0 = np.zeros(A.shape[1])
-
-    return fista( y, A, At, tol_fun, tol_x, max_iter, verbose, x0, omega, prox)
-
-
-def fista( y, A, At, tol_fun, tol_x, max_iter, verbose, x0, omega, proximal) :
-    """
-    Solve the regularised least squares problem
-
-        argmin_x 0.5*||Ax-y||_2^2 + Omega(x)
-
-    with the FISTA algorithm described in [1].
-
-    The penalty term and its proximal operator must be defined in such a way
-    that they already contain the regularisation parameter.
-
-    References:
-        [1] Beck & Teboulle - `A Fast Iterative Shrinkage Thresholding
-            Algorithm for Linear Inverse Problems`
-    """
-
-    # Initialization
-    res = -y.copy()
-    xhat = x0.copy()
-    x = np.zeros_like(xhat)
-    res += A.dot(xhat)
-    proximal( xhat )
-    reg_term = omega( xhat )
-    prev_obj = 0.5 * np.linalg.norm(res)**2 + reg_term
-
-    told = 1
-    beta = 0.9
-    prev_x = xhat.copy()
-    grad = np.asarray(At.dot(res))
-    qfval = prev_obj
-
-    # Step size computation
-    L = ( np.linalg.norm( A.dot(grad) ) / np.linalg.norm(grad) )**2
-    mu = 1.9 / L
-
-    # Main loop
-    if verbose :
-        print()
-        print( "      |  1/2||Ax-y||^2      Omega      |  Cost function    Abs error      Rel error    |      Abs x          Rel x    " )
-        print( "------|--------------------------------|-----------------------------------------------|------------------------------" )
-    iter = 1
-    while True :
-        if verbose :
-            print( "%4d  |" % iter, end="" )
-            sys.stdout.flush()
-
-        # Smooth step
-        x = xhat - mu*grad
-
-        # Non-smooth step
-        proximal( x )
-        reg_term_x = omega( x )
-
-        # Check stepsize
-        tmp = x-xhat
-        q = qfval + np.real( np.dot(tmp,grad) ) + 0.5/mu * np.linalg.norm(tmp)**2 + reg_term_x
-        res = A.dot(x) - y
-        res_norm = np.linalg.norm(res)
-        curr_obj = 0.5 * res_norm**2 + reg_term_x
-
-        # Backtracking
-        while curr_obj > q :
-            # Smooth step
-            mu = beta*mu
-            x = xhat - mu*grad
-
-            # Non-smooth step
-            proximal( x )
-            reg_term_x = omega( x )
-
-            # Check stepsize
-            tmp = x-xhat
-            q = qfval + np.real( np.dot(tmp,grad) ) + 0.5/mu * np.linalg.norm(tmp)**2 + reg_term_x
-            res = A.dot(x) - y
-            res_norm = np.linalg.norm(res)
-            curr_obj = 0.5 * res_norm**2 + reg_term_x
-
-        # Global stopping criterion
-        abs_obj = abs(curr_obj - prev_obj)
-        rel_obj = abs_obj / curr_obj
-        abs_x   = np.linalg.norm(x - prev_x)
-        rel_x   = abs_x / ( np.linalg.norm(x) + eps )
-        if verbose :
-            print( "  %13.7e  %13.7e  |  %13.7e  %13.7e  %13.7e  |  %13.7e  %13.7e" % ( 0.5 * res_norm**2, reg_term_x, curr_obj, abs_obj, rel_obj, abs_x, rel_x ) )
-
-        if abs_obj < eps :
-            criterion = "Absolute tolerance on the objective"
-            break
-        elif rel_obj < tol_fun :
-            criterion = "Relative tolerance on the objective"
-            break
-        elif abs_x < eps :
-            criterion = "Absolute tolerance on the unknown"
-            break
-        elif rel_x < tol_x :
-            criterion = "Relative tolerance on the unknown"
-            break
-        elif iter >= max_iter :
-            criterion = "Maximum number of iterations"
-            break
-
-        # FISTA update
-        t = 0.5 * ( 1 + sqrt(1+4*told**2) )
-        xhat = x + (told-1)/t * (x - prev_x)
-
-        # Gradient computation
-        res = A.dot(xhat) - y
-        xarr = np.asarray(x)
-
-        grad = np.asarray(At.dot(res))
-
-        # Update variables
-        iter += 1
-        prev_obj = curr_obj
-        prev_x = x.copy()
-        told = t
-        qfval = 0.5 * np.linalg.norm(res)**2
-
-
-    if verbose :
-        print( "< Stopping criterion: %s >" % criterion )
-
-    opt_details = {}
-    opt_details['residual'] = 0.5*res_norm**2
-    opt_details['regterm'] = reg_term_x
-    opt_details['cost_function'] = curr_obj
-    opt_details['abs_cost'] = abs_obj
-    opt_details['rel_cost'] = rel_obj
-    opt_details['abs_x'] = abs_x
-    opt_details['rel _x'] = rel_x
-    opt_details['iterations'] = iter
-    opt_details['stopping_criterion'] = criterion
-
-    return x, opt_details
+"""
+Author: Matteo Frigo - lts5 @ EPFL and Dep. of CS @ Univ. of Verona
+
+This structure is based on the previous work of Rafael Carrillo and was
+supported by the LTS5 laboratory at EPFL, Lausanne.
+"""
+from __future__ import print_function
+import numpy as np
+from math import sqrt
+import sys
+import warnings
+eps = np.finfo(float).eps
+
+from commit.proximals import (non_negativity,
+                             omega_group_sparsity,
+                             prox_group_sparsity,
+                             soft_thresholding,
+                             projection_onto_l2_ball)
+group_sparsity = -1
+non_negative = 0
+norm1 = 1
+norm2 = 2
+norminf = np.inf
+list_regnorms = [group_sparsity, non_negative, norm1, norm2]
+list_group_sparsity_norms = [norm2]#, norminf] # removed because of issue #54
+
+
+def init_regularisation(commit_evaluation,
+                        regnorms = (non_negative, non_negative, non_negative),
+                        structureIC = None, weightsIC = None, group_norm = 2,
+                        lambdas = (.0,.0,.0) ):
+    """
+    Initialise the data structure that defines Omega in
+
+        argmin_x 0.5*||Ax-y||_2^2 + Omega(x)
+
+
+    Input
+    -----
+    commit_evaluation - commit.Evaluation object :
+        dictionary and model have to be loaded beforehand.
+
+
+    regnorms - tuple :
+        this sets the penalty term to be used for each compartment.
+            Default = (non_negative,non_negative,non_negative).
+
+            regnorms[0] corresponds to the Intracellular compartment
+            regnorms[1] corresponds to the Extracellular compartment
+            regnorms[2] corresponds to the Isotropic compartment
+
+            Each regnorms[k] must be one of commit.solvers.
+                                {group_sparsity, non_negative, norm1, norm2}.
+
+            commit.solvers.group_sparsity considers both the non-overlapping
+                and the hierarchical group sparsity (see [1]). This option is
+                allowed only in the IC compartment. The mathematical formulation
+                of this term is
+                $\Omega(x) = \lambda \sum_{g\in G} w_g |x_g|
+
+            commit.solvers.non_negative puts a non negativity constraint on the
+                coefficients corresponding to the compartment. This is the
+                default option for each compartment
+
+            commit.solvers.norm1 penalises with the 1-norm of the coefficients
+                corresponding to the compartment.
+
+            commit.solvers.norm2 penalises with the 2-norm of the coefficients
+                corresponding to the compartment.
+
+
+    structureIC - np.array(list(list)) :
+        group structure for the IC compartment.
+            This field is necessary only if regterm[0]=commit.solver.group_sparsity.
+            Example:
+                structureIC = np.array([[0,2,5],[1,3,4],[0,1,2,3,4,5],[6]])
+
+                that is equivalent to
+                            [0,1,2,3,4,5]        [6]
+                              /       \
+                        [0,2,5]       [1,3,4]
+                which has two non overlapping groups, one of which is the union
+                of two other non-overlapping groups.
+
+
+    weightsIC - np.array(np.float64) :
+        this defines the weights associated to each group of structure IC.
+
+
+    group_norm - number :
+        norm type for the commit.solver.group_sparsity penalisation of the IC compartment.
+            Default: group_norm = commit.solver.norm2
+            To be chosen among commit.solver.{norm2,norminf}.
+
+    lambdas - tuple :
+        regularisation parameter for each compartment.
+            Default: lambdas = (0.0, 0.0, 0.0)
+            The lambdas correspond to the onse described in the mathematical
+            formulation of the regularisation term
+            $\Omega(x) = lambdas[0]*regnorm[0](x) + lambdas[1]*regnorm[1](x) + lambdas[2]*regnorm[2](x)$
+
+
+    References:
+        [1] Jenatton et al. - 'Proximal Methods for Hierarchical Sparse Coding'
+    """
+    regularisation = {}
+
+    regularisation['startIC']  = 0
+    regularisation['sizeIC']   = int( commit_evaluation.DICTIONARY['IC']['nF'] * commit_evaluation.KERNELS['wmr'].shape[0])
+    regularisation['startEC']  = int( regularisation['sizeIC'] )
+    regularisation['sizeEC']   = int( commit_evaluation.DICTIONARY['EC']['nE'] * commit_evaluation.KERNELS['wmh'].shape[0])
+    regularisation['startISO'] = int( regularisation['sizeIC'] + regularisation['sizeEC'] )
+    regularisation['sizeISO']  = int( commit_evaluation.DICTIONARY['nV'] * commit_evaluation.KERNELS['iso'].shape[0])
+
+    regularisation['normIC']  = regnorms[0]
+    regularisation['normEC']  = regnorms[1]
+    regularisation['normISO'] = regnorms[2]
+
+    regularisation['lambdaIC']  = float( lambdas[0] )
+    regularisation['lambdaEC']  = float( lambdas[1] )
+    regularisation['lambdaISO'] = float( lambdas[2] )
+
+    # Solver-specific fields
+    regularisation['structureIC']      = structureIC
+    regularisation['weightsIC']        = weightsIC
+    regularisation['group_norm']       = group_norm
+
+    return regularisation
+
+
+def regularisation2omegaprox(regularisation):
+    lambdaIC  = float(regularisation.get('lambdaIC'))
+    lambdaEC  = float(regularisation.get('lambdaEC'))
+    lambdaISO = float(regularisation.get('lambdaISO'))
+    if lambdaIC < 0.0 or lambdaEC < 0.0 or lambdaISO < 0.0:
+        raise ValueError('Negative regularisation parameters are not allowed')
+
+    normIC  = regularisation.get('normIC')
+    normEC  = regularisation.get('normEC')
+    normISO = regularisation.get('normISO')
+    if not normIC in list_regnorms:
+        raise ValueError('normIC must be one of commit.solvers.{group_sparsity,non_negative,norm1,norm2}')
+    if not normEC in list_regnorms:
+        raise ValueError('normEC must be one of commit.solvers.{group_sparsity,non_negative,norm1,norm2}')
+    if not normISO in list_regnorms:
+        raise ValueError('normISO must be one of commit.solvers.{group_sparsity,non_negative,norm1,norm2}')
+
+    ## NNLS case
+    if (lambdaIC == 0.0 and lambdaEC == 0.0 and lambdaISO == 0.0) or (normIC == non_negative and normEC == non_negative and normISO == non_negative):
+        omega = lambda x: 0.0
+        prox  = lambda x: non_negativity(x, 0, len(x))
+        return omega, prox
+
+    ## All other cases
+    # Intracellular Compartment
+    startIC = regularisation.get('startIC')
+    sizeIC  = regularisation.get('sizeIC')
+    if lambdaIC == 0.0:
+        omegaIC = lambda x: 0.0
+        proxIC  = lambda x: x
+    elif normIC == norm2:
+        omegaIC = lambda x: lambdaIC * np.linalg.norm(x[startIC:sizeIC])
+        proxIC  = lambda x: projection_onto_l2_ball(x, lambdaIC, startIC, sizeIC)
+    elif normIC == norm1:
+        omegaIC = lambda x: lambdaIC * sum( x[startIC:sizeIC] )
+        proxIC  = lambda x: soft_thresholding(x, lambdaIC, startIC, sizeIC)
+    elif normIC == non_negative:
+        omegaIC = lambda x: 0.0
+        proxIC  = lambda x: non_negativity(x, startIC, sizeIC)
+    elif normIC == group_sparsity:
+        structureIC = regularisation.get('structureIC')
+        groupWeightIC   = regularisation.get('weightsIC')
+        if not len(structureIC) == len(groupWeightIC):
+            raise ValueError('Number of groups and weights do not coincide.')
+        group_norm = regularisation.get('group_norm')
+        if not group_norm in list_group_sparsity_norms:
+            raise ValueError('Wrong norm in the structured sparsity term. Choose between %s.' % str(list_group_sparsity_norms))
+
+        # convert to new data structure (needed for faster access)
+        N = np.sum([g.size for g in structureIC])
+        groupIdxIC  = np.zeros( (N,), dtype=np.int32 )
+        groupSizeIC = np.zeros( (structureIC.size,), dtype=np.int32 )
+        pos = 0
+        for i, g in enumerate(structureIC) :
+            groupSizeIC[i] = g.size
+            groupIdxIC[pos:(pos+g.size)] = g[:]
+            pos += g.size
+
+        omegaIC = lambda x: omega_group_sparsity( x, groupIdxIC, groupSizeIC, groupWeightIC, lambdaIC, group_norm )
+        proxIC  = lambda x:  prox_group_sparsity( x, groupIdxIC, groupSizeIC, groupWeightIC, lambdaIC, group_norm )
+    else:
+        raise ValueError('Type of regularisation for IC compartment not recognized.')
+
+
+    # Extracellular Compartment
+    startEC = regularisation.get('startEC')
+    sizeEC  = regularisation.get('sizeEC')
+    if lambdaEC == 0.0:
+        omegaEC = lambda x: 0.0
+        proxEC  = lambda x: x
+    elif normEC == norm2:
+        omegaEC = lambda x: lambdaEC * np.linalg.norm(x[startEC:(startEC+sizeEC)])
+        proxEC  = lambda x: projection_onto_l2_ball(x, lambdaEC, startEC, sizeEC)
+    elif normEC == norm1:
+        omegaEC = lambda x: lambdaEC * sum( x[startEC:(startEC+sizeEC)] )
+        proxEC  = lambda x: soft_thresholding(x, lambdaEC, startEC, sizeEC)
+    elif normEC == non_negative:
+        omegaEC = lambda x: 0.0
+        proxEC  = lambda x: non_negativity(x, startEC, sizeEC)
+    else:
+        raise ValueError('Type of regularisation for EC compartment not recognized.')
+
+    # Isotropic Compartment
+    startISO = regularisation.get('startISO')
+    sizeISO  = regularisation.get('sizeISO')
+    if lambdaISO == 0.0:
+        omegaISO = lambda x: 0.0
+        proxISO  = lambda x: x
+    elif normISO == norm2:
+        omegaISO = lambda x: lambdaISO * np.linalg.norm(x[startISO:(startISO+sizeISO)])
+        proxISO  = lambda x: projection_onto_l2_ball(x, lambdaISO, startISO, sizeISO)
+    elif normISO == norm1:
+        omegaISO = lambda x: lambdaISO * sum( x[startISO:(startISO+sizeISO)] )
+        proxISO  = lambda x: soft_thresholding(x, lambdaISO, startISO, sizeISO)
+    elif normISO == non_negative:
+        omegaISO = lambda x: 0.0
+        proxISO  = lambda x: non_negativity(x, startISO, sizeISO)
+    else:
+        raise ValueError('Type of regularisation for ISO compartment not recognized.')
+
+    omega = lambda x: omegaIC(x) + omegaEC(x) + omegaISO(x)
+    prox = lambda x: non_negativity(proxIC(proxEC(proxISO(x))),0,x.size) # non negativity is redunduntly forced
+
+    return omega, prox
+
+
+def evaluate_model(y, A, x, regularisation = None):
+    if regularisation is None:
+        omega = lambda x: 0.0
+        prox  = lambda x: non_negativity(x, 0, len(x))
+    else:
+        omega, _ = regularisation2omegaprox(regularisation)
+
+    return 0.5*np.linalg.norm(A.dot(x)-y)**2 + omega(x)
+
+
+def solve(y, A, At, tol_fun = 1e-4, tol_x = 1e-6, max_iter = 1000, verbose = True, x0 = None, regularisation = None):
+    """
+    Solve the regularised least squares problem
+
+        argmin_x 0.5*||Ax-y||_2^2 + Omega(x)
+
+    with the Omega described by 'regularisation'.
+
+    Check the documentation of commit.solvers.init_regularisation to see how to
+    solve a specific problem.
+    """
+    if regularisation is None:
+        omega = lambda x: 0.0
+        prox  = lambda x: non_negativity(x, 0, x.size)
+    else:
+        omega, prox = regularisation2omegaprox(regularisation)
+
+    if x0 is None:
+        x0 = np.zeros(A.shape[1])
+
+    return fista( y, A, At, tol_fun, tol_x, max_iter, verbose, x0, omega, prox)
+
+
+def fista( y, A, At, tol_fun, tol_x, max_iter, verbose, x0, omega, proximal) :
+    """
+    Solve the regularised least squares problem
+
+        argmin_x 0.5*||Ax-y||_2^2 + Omega(x)
+
+    with the FISTA algorithm described in [1].
+
+    The penalty term and its proximal operator must be defined in such a way
+    that they already contain the regularisation parameter.
+
+    References:
+        [1] Beck & Teboulle - `A Fast Iterative Shrinkage Thresholding
+            Algorithm for Linear Inverse Problems`
+    """
+
+    # Initialization
+    res = -y.copy()
+    xhat = x0.copy()
+    x = np.zeros_like(xhat)
+    res += A.dot(xhat)
+    proximal( xhat )
+    reg_term = omega( xhat )
+    prev_obj = 0.5 * np.linalg.norm(res)**2 + reg_term
+
+    told = 1
+    beta = 0.9
+    prev_x = xhat.copy()
+    grad = np.asarray(At.dot(res))
+    qfval = prev_obj
+
+    # Step size computation
+    L = ( np.linalg.norm( A.dot(grad) ) / np.linalg.norm(grad) )**2
+    mu = 1.9 / L
+
+    # Main loop
+    if verbose :
+        print()
+        print( "      |  1/2||Ax-y||^2      Omega      |  Cost function    Abs error      Rel error    |      Abs x          Rel x    " )
+        print( "------|--------------------------------|-----------------------------------------------|------------------------------" )
+    iter = 1
+    while True :
+        if verbose :
+            print( "%4d  |" % iter, end="" )
+            sys.stdout.flush()
+
+        # Smooth step
+        x = xhat - mu*grad
+
+        # Non-smooth step
+        proximal( x )
+        reg_term_x = omega( x )
+
+        # Check stepsize
+        tmp = x-xhat
+        q = qfval + np.real( np.dot(tmp,grad) ) + 0.5/mu * np.linalg.norm(tmp)**2 + reg_term_x
+        res = A.dot(x) - y
+        res_norm = np.linalg.norm(res)
+        curr_obj = 0.5 * res_norm**2 + reg_term_x
+
+        # Backtracking
+        while curr_obj > q :
+            # Smooth step
+            mu = beta*mu
+            x = xhat - mu*grad
+
+            # Non-smooth step
+            proximal( x )
+            reg_term_x = omega( x )
+
+            # Check stepsize
+            tmp = x-xhat
+            q = qfval + np.real( np.dot(tmp,grad) ) + 0.5/mu * np.linalg.norm(tmp)**2 + reg_term_x
+            res = A.dot(x) - y
+            res_norm = np.linalg.norm(res)
+            curr_obj = 0.5 * res_norm**2 + reg_term_x
+
+        # Global stopping criterion
+        abs_obj = abs(curr_obj - prev_obj)
+        rel_obj = abs_obj / curr_obj
+        abs_x   = np.linalg.norm(x - prev_x)
+        rel_x   = abs_x / ( np.linalg.norm(x) + eps )
+        if verbose :
+            print( "  %13.7e  %13.7e  |  %13.7e  %13.7e  %13.7e  |  %13.7e  %13.7e" % ( 0.5 * res_norm**2, reg_term_x, curr_obj, abs_obj, rel_obj, abs_x, rel_x ) )
+
+        if abs_obj < eps :
+            criterion = "Absolute tolerance on the objective"
+            break
+        elif rel_obj < tol_fun :
+            criterion = "Relative tolerance on the objective"
+            break
+        elif abs_x < eps :
+            criterion = "Absolute tolerance on the unknown"
+            break
+        elif rel_x < tol_x :
+            criterion = "Relative tolerance on the unknown"
+            break
+        elif iter >= max_iter :
+            criterion = "Maximum number of iterations"
+            break
+
+        # FISTA update
+        t = 0.5 * ( 1 + sqrt(1+4*told**2) )
+        xhat = x + (told-1)/t * (x - prev_x)
+
+        # Gradient computation
+        res = A.dot(xhat) - y
+        xarr = np.asarray(x)
+
+        grad = np.asarray(At.dot(res))
+
+        # Update variables
+        iter += 1
+        prev_obj = curr_obj
+        prev_x = x.copy()
+        told = t
+        qfval = 0.5 * np.linalg.norm(res)**2
+
+
+    if verbose :
+        print( "< Stopping criterion: %s >" % criterion )
+
+    opt_details = {}
+    opt_details['residual'] = 0.5*res_norm**2
+    opt_details['regterm'] = reg_term_x
+    opt_details['cost_function'] = curr_obj
+    opt_details['abs_cost'] = abs_obj
+    opt_details['rel_cost'] = rel_obj
+    opt_details['abs_x'] = abs_x
+    opt_details['rel _x'] = rel_x
+    opt_details['iterations'] = iter
+    opt_details['stopping_criterion'] = criterion
+
+    return x, opt_details
diff --git a/commit/trk2dictionary/trk2dictionary.pyx b/commit/trk2dictionary/trk2dictionary.pyx
index 21636bc0..85f9f5c0 100755
--- a/commit/trk2dictionary/trk2dictionary.pyx
+++ b/commit/trk2dictionary/trk2dictionary.pyx
@@ -1,429 +1,429 @@
-#!python
-# cython: language_level=3, c_string_type=str, c_string_encoding=ascii, boundscheck=False, wraparound=False, profile=False
-from __future__ import print_function
-import cython
-import numpy as np
-cimport numpy as np
-import nibabel
-from os.path import join, exists, splitext, dirname, isdir
-from os import makedirs, remove
-import time
-import amico
-import pickle
-from amico.util import LOG, NOTE, WARNING, ERROR
-from pkg_resources import get_distribution
-
-
-# Interface to actual C code
-cdef extern from "trk2dictionary_c.cpp":
-    int trk2dictionary(
-        char* filename_tractogram, int data_offset, int Nx, int Ny, int Nz, float Px, float Py, float Pz, int n_count, int n_scalars, 
-        int n_properties, float fiber_shiftX, float fiber_shiftY, float fiber_shiftZ, float min_seg_len, float min_fiber_len,  float max_fiber_len,
-        float* ptrPEAKS, int Np, float vf_THR, int ECix, int ECiy, int ECiz,
-        float* _ptrMASK, float* ptrTDI, char* path_out, int c, double* ptrPeaksAffine,
-        int nBlurRadii, double blurSigma, double* ptrBlurRadii, int* ptrBlurSamples, double* ptrBlurWeights,  float* ptrTractsAffine, unsigned short ndirs, short* prtHashTable
-    ) nogil
-
-
-cpdef run( filename_tractogram=None, path_out=None, filename_peaks=None, filename_mask=None, do_intersect=True,
-    fiber_shift=0, min_seg_len=1e-3, min_fiber_len=0.0, max_fiber_len=250.0,
-    vf_THR=0.1, peaks_use_affine=False, flip_peaks=[False,False,False], 
-    blur_radii=[], blur_samples=[], blur_sigma=0.0,
-    filename_trk=None, gen_trk=None, TCK_ref_image=None, ndirs=32761
-    ):
-    """Perform the conversion of a tractoram to the sparse data-structure internally
-    used by COMMIT to perform the matrix-vector multiplications with the operator A
-    during the inversion of the linear system.
-
-    Parameters
-    ----------
-    filename_tractogram : string
-        Path to the tractogram (.trk or .tck) containing the streamlines to load.
-        
-    TCK_ref_image: string
-        When loading a .tck tractogram, path to the NIFTI file containing the information about
-        the geometry to be used for the tractogram to load. If not specified, it will try to use
-        the information from filename_peaks or filename_mask.
-    
-    path_out : string
-        Path to the folder for storing the sparse data structure. If not specified (default),
-        a folder name "COMMIT" will be created in the same folder of the tractogram.
-
-    filename_mask : string
-        Path to a binary mask for restricting the analysis to specific areas.
-        Segments outside this mask are discarded. If not specified (default),
-        the mask is created from all voxels intersected by the tracts.
-
-    do_intersect : boolean
-        If True then fiber segments that intersect voxel boundaries are splitted (default).
-        If False then the centroid of the segment is used as its voxel position.
-
-    fiber_shift : float or list of three float
-        If necessary, apply a translation to fiber coordinates (default : 0) to account
-        for differences between the reference system of the tracking algorithm and COMMIT.
-        The value is specified in voxel units, eg 0.5 translates by half voxel.
-
-    min_seg_len : float
-        Discard segments <= than this length in mm (default : 1e-3).
-
-    min_fiber_len : float
-        Discard streamlines <= than this length in mm (default : 0.0).
-
-    max_fiber_len : float
-        Discard streamlines >= than this length in mm (default : 250.0).
-
-    filename_peaks : string
-        Path to the NIFTI file containing the peaks to use as extra-cellular contributions.
-        The data matrix should be 4D with last dimension 3*N, where N is the number
-        of peaks in each voxel. (default : no extra-cellular contributions).
-
-    peaks_use_affine : boolean
-        Whether to rotate the peaks according to the affine matrix (default : False).
-
-    vf_THR : float
-        Discard peaks smaller than vf_THR * max peak (default : 0.1).
-
-    flip_peaks : list of three boolean
-        If necessary, flips peak orientations along each axis (default : no flipping).
-
-    blur_radii : list of float
-        Translate each segment to given radii to assign a broader fiber contribution (default : []).
-    
-    blur_samples : list of integer
-        Segments are duplicated along a circle at a given radius; this parameter controls the
-        number of samples to take over a given circle (defaut : []).
-
-    blur_sigma: float
-        The contributions of the segments at different radii are damped as a Gaussian (default : 0.0).
-    
-    ndirs : int
-        Number of orientations on the sphere used to discretize the orientation of each
-        each segment in a streamline (default : 32761).
-
-    filename_trk : string
-        DEPRECATED. Use filename_tractogram instead.
-
-    gen_trk : string
-        DEPRECATED. No tractogram will be saved any more, but the returned coefficients will account
-        for the streamlines that were pre-filtered in this function.
-    """
-
-    # check the value of ndirs
-    if not amico.lut.is_valid(ndirs):
-        ERROR( 'Unsupported value for ndirs.\nNote: Supported values for ndirs are [500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500, 7000, 7500, 8000, 8500, 9000, 9500, 10000, 32761 (default)]' )
-
-    # check conflicts of fiber_shift
-    if np.isscalar(fiber_shift) :
-        fiber_shiftX = fiber_shift
-        fiber_shiftY = fiber_shift
-        fiber_shiftZ = fiber_shift
-    elif len(fiber_shift) == 3 :
-        fiber_shiftX = fiber_shift[0]
-        fiber_shiftY = fiber_shift[1]
-        fiber_shiftZ = fiber_shift[2]
-    else :
-        ERROR( '"fiber_shift" must be a scalar or a vector with 3 elements' )
-
-    # check for invalid parameters in the blur
-    if type(blur_radii)==list:
-        blur_radii = np.array(blur_radii, np.double)
-    elif type(blur_radii)!=np.ndarray:
-        ERROR( '"blur_radii" must be a list of floats' )
-    if type(blur_samples)==list:
-        blur_samples = np.array(blur_samples, np.int32)
-    elif type(blur_samples)!=np.ndarray:
-        ERROR( '"blur_samples" must be a list of integers' )
-
-    if blur_sigma > 0 :
-        if blur_radii.size != blur_samples.size :
-            ERROR( 'The number of blur radii and blur samples must match' )
-
-        if np.count_nonzero( blur_radii<=0 ):
-            ERROR( 'A blur radius was <= 0; only positive radii can be used' )
-
-        if np.count_nonzero( blur_samples<1 ):
-            ERROR( 'Please specify at least 1 sample per blur radius' )
-
-    tic = time.time()
-    LOG( '\n-> Creating the dictionary from tractogram:' )
-    
-    LOG( '\n   * Configuration:' )
-    print( '\t- Segment position = %s' % ( 'COMPUTE INTERSECTIONS' if do_intersect else 'CENTROID' ) )
-    print( '\t- Fiber shift X    = %.3f (voxel-size units)' % fiber_shiftX )
-    print( '\t- Fiber shift Y    = %.3f (voxel-size units)' % fiber_shiftY )
-    print( '\t- Fiber shift Z    = %.3f (voxel-size units)' % fiber_shiftZ )
-    if min_seg_len >= 1e-3:
-        print( '\t- Min segment len  = %.3f mm' % min_seg_len )
-    else:
-        print( '\t- Min segment len  = %.2e mm' % min_seg_len )
-    print( '\t- Min fiber len    = %.2f mm' % min_fiber_len )
-    print( '\t- Max fiber len    = %.2f mm' % max_fiber_len )
-
-    # check blur params
-    cdef :
-        double [:] blurRadii
-        int [:] blurSamples
-        double [:] blurWeights
-        double* ptrBlurRadii
-        int* ptrBlurSamples
-        double* ptrBlurWeights
-        int nBlurRadii
-        float [:] ArrayInvM
-        float* ptrArrayInvM
-    
-    # add a fake radius for original segment
-    if blur_sigma == 0:
-        nBlurRadii = 1
-        blurRadii = np.array( [0.0], np.double )
-        blurSamples = np.array( [1], np.int32 )
-        blurWeights = np.array( [1], np.double )
-    else:
-        nBlurRadii = len(blur_radii)+1
-        blurRadii = np.insert( blur_radii, 0, 0.0 ).astype(np.double)
-        blurSamples = np.insert( blur_samples, 0, 1 ).astype(np.int32)
-
-        # compute weights for gaussian damping
-        blurWeights = np.empty_like( blurRadii )
-        for i in xrange(nBlurRadii):
-            blurWeights[i] = np.exp( -blurRadii[i]**2 / (2.0*blur_sigma**2) )
-
-    if nBlurRadii == 1 :
-        print( '\t- Do not blur fibers' )
-    else :
-        print( '\t- Blur fibers:' )
-        print( '\t\t- sigma = %.3f' % blur_sigma )
-        print( '\t\t- radii =   [ ', end="" )
-        for i in xrange( 1, blurRadii.size ) :
-            print( '%.3f ' % blurRadii[i], end="" )
-        print( ']' )
-        print( '\t\t- weights = [ ', end="" )
-        for i in xrange( 1, blurWeights.size ) :
-            print( '%.3f ' % blurWeights[i], end="" )
-        print( ']' )
-        print( '\t\t- samples = [ ', end="" )
-        for i in xrange( 1, blurSamples.size ) :
-            print( '%5d ' % blurSamples[i], end="" )
-        print( ']' )
-
-    ptrBlurRadii   = &blurRadii[0]
-    ptrBlurSamples = &blurSamples[0]
-    ptrBlurWeights = &blurWeights[0]
-
-    if min_seg_len < 0 :
-        ERROR( '"min_seg_len" must be >= 0' )
-    if min_fiber_len < 0 :
-        ERROR( '"min_fiber_len" must be >= 0' )
-    if max_fiber_len < min_fiber_len :
-        ERROR( '"max_fiber_len" must be >= "min_fiber_len"' )
-
-    if filename_trk is None and filename_tractogram is None:
-        ERROR( '"filename_tractogram" not defined' )
-
-    if filename_trk is not None and filename_tractogram is not None:
-        WARNING('"filename_trk" will not be considered, "filename_tractogram" will be used')
-
-    if filename_trk is not None and filename_tractogram is None:
-        filename_tractogram = filename_trk
-        WARNING('"filename_trk" parameter is deprecated, use "filename_tractogram" instead')
-
-    if path_out is None:
-        path_out = dirname(filename_tractogram)
-        if path_out == '':
-            path_out = '.'
-        if not isdir(path_out):
-            ERROR( '"path_out" cannot be inferred from "filename_tractogram"' )
-        path_out = join(path_out,'COMMIT')
-
-    if gen_trk is not None:
-        WARNING('"gen_trk" parameter is deprecated')
-
-    # create output path
-    print( '\t- Output written to "%s"' % path_out )
-    if not exists( path_out ):
-        makedirs( path_out )
-
-    # Load data from files
-    LOG( '\n   * Loading data:' )
-    cdef short [:] htable = amico.lut.load_precomputed_hash_table(ndirs)
-    cdef short* ptrHashTable = &htable[0]
-
-    # Streamlines from tractogram
-    print( '\t- Tractogram' )
-    
-    if not exists(filename_tractogram):
-        ERROR( 'Tractogram file not found: %s' % filename_tractogram )        
-    extension = splitext(filename_tractogram)[1]
-    if extension != ".trk" and extension != ".tck":
-        ERROR( 'Invalid input file: only .trk and .tck are supported' )
-    
-    hdr = nibabel.streamlines.load( filename_tractogram, lazy_load=True ).header
-            
-    if extension == ".trk":
-        Nx = hdr['dimensions'][0]
-        Ny = hdr['dimensions'][1]
-        Nz = hdr['dimensions'][2]
-        Px = hdr['voxel_sizes'][0]
-        Py = hdr['voxel_sizes'][1]
-        Pz = hdr['voxel_sizes'][2]
-
-        data_offset = 1000
-        n_count = hdr['nb_streamlines']
-        n_scalars = hdr['nb_scalars_per_point']
-        n_properties = hdr['nb_properties_per_streamline']
-
-    if extension == ".tck":
-        if TCK_ref_image is None:
-            if filename_peaks is not None:
-                TCK_ref_image = filename_peaks
-            elif filename_mask is not None:
-                TCK_ref_image = filename_mask
-            else:
-                ERROR( 'TCK files do not contain information about the geometry. Use "TCK_ref_image" for that' )
-
-        print ('\t\t- geometry taken from "%s"' %TCK_ref_image)
-
-        nii_image = nibabel.load(TCK_ref_image)
-        nii_hdr = nii_image.header if nibabel.__version__ >= '2.0.0' else nii_image.get_header()
-        Nx = nii_image.shape[0]
-        Ny = nii_image.shape[1]
-        Nz = nii_image.shape[2]
-        Px = nii_hdr['pixdim'][1]
-        Py = nii_hdr['pixdim'][2]
-        Pz = nii_hdr['pixdim'][3]
-        data_offset = int(hdr['_offset_data'])  #set offset
-        n_count = int(hdr['count'])  #set number of fibers
-        n_scalars = 0
-        n_properties = 0
-        
-    print( '\t\t- %d x %d x %d' % ( Nx, Ny, Nz ) )
-    print( '\t\t- %.4f x %.4f x %.4f' % ( Px, Py, Pz ) )
-    print( '\t\t- %d fibers' % n_count )
-    if Nx >= 2**16 or Nz >= 2**16 or Nz >= 2**16 :
-        ERROR( 'The max dim size is 2^16 voxels' )
-    
-    # get the affine matrix
-    if extension == ".tck":
-        scaleMat = np.diag(np.divide(1.0, [Px,Py,Pz]))
-        M = nii_hdr.get_best_affine()
-
-        # Affine matrix without scaling, i.e. diagonal is 1
-        M[:3, :3] = np.dot(scaleMat, M[:3, :3])
-        M = M.astype('<f4') # affine matrix in float value
-        invM = np.linalg.inv(M) # inverse affine matrix
-        #create a vector of inverse matrix M
-        ArrayInvM = np.ravel(invM)
-        ptrArrayInvM = &ArrayInvM[0]
-
-    # white-matter mask
-    cdef float* ptrMASK
-    cdef float [:, :, ::1] niiMASK_img
-    if filename_mask is not None :
-        print( '\t- Filtering mask' )
-        niiMASK = nibabel.load( filename_mask )
-        niiMASK_hdr = niiMASK.header if nibabel.__version__ >= '2.0.0' else niiMASK.get_header()
-        print( '\t\t- %d x %d x %d' % ( niiMASK.shape[0], niiMASK.shape[1], niiMASK.shape[2] ) )
-        print( '\t\t- %.4f x %.4f x %.4f' % ( niiMASK_hdr['pixdim'][1], niiMASK_hdr['pixdim'][2], niiMASK_hdr['pixdim'][3] ) )
-        if ( Nx!=niiMASK.shape[0] or Ny!=niiMASK.shape[1] or Nz!=niiMASK.shape[2] or
-            abs(Px-niiMASK_hdr['pixdim'][1])>1e-3 or abs(Py-niiMASK_hdr['pixdim'][2])>1e-3 or abs(Pz-niiMASK_hdr['pixdim'][3])>1e-3 ) :
-            WARNING( 'Dataset does not have the same geometry as the tractogram' )
-        niiMASK_img = np.ascontiguousarray( niiMASK.get_data().astype(np.float32) )
-        ptrMASK  = &niiMASK_img[0,0,0]
-    else :
-        print( '\t- No mask specified to filter IC compartments' )
-        ptrMASK = NULL
-
-    # peaks file for EC contributions
-    cdef float* ptrPEAKS
-    cdef float [:, :, :, ::1] niiPEAKS_img
-    cdef int Np
-    cdef float [:, :, ::1] niiTDI_img = np.ascontiguousarray( np.zeros((Nx,Ny,Nz),dtype=np.float32) )
-    cdef float* ptrTDI  = &niiTDI_img[0,0,0]
-    cdef double [:, ::1] affine
-    cdef double* ptrAFFINE
-    if filename_peaks is not None :
-        print( '\t- EC orientations' )
-        niiPEAKS = nibabel.load( filename_peaks )
-        niiPEAKS_hdr = niiPEAKS.header if nibabel.__version__ >= '2.0.0' else niiPEAKS.get_header()
-        print( '\t\t- %d x %d x %d x %d' % ( niiPEAKS.shape[0], niiPEAKS.shape[1], niiPEAKS.shape[2], niiPEAKS.shape[3] ) )
-        print( '\t\t- %.4f x %.4f x %.4f' % ( niiPEAKS_hdr['pixdim'][1], niiPEAKS_hdr['pixdim'][2], niiPEAKS_hdr['pixdim'][3] ) )
-        print( '\t\t- ignoring peaks < %.2f * MaxPeak' % vf_THR )
-        print( '\t\t- %susing affine matrix' % ( "" if peaks_use_affine else "not " ) )
-        print( '\t\t- flipping axes : [ x=%s, y=%s, z=%s ]' % ( flip_peaks[0], flip_peaks[1], flip_peaks[2] ) )
-        if ( Nx!=niiPEAKS.shape[0] or Ny!=niiPEAKS.shape[1] or Nz!=niiPEAKS.shape[2] or
-            abs(Px-niiPEAKS_hdr['pixdim'][1])>1e-3 or abs(Py-niiPEAKS_hdr['pixdim'][2])>1e-3 or abs(Pz-niiPEAKS_hdr['pixdim'][3])>1e-3 ) :
-            WARNING( "Dataset does not have the same geometry as the tractogram" )
-        if niiPEAKS.shape[3] % 3 :
-            ERROR( 'PEAKS dataset must have 3*k volumes' )
-        if vf_THR < 0 or vf_THR > 1 :
-            ERROR( '"vf_THR" must be between 0 and 1' )
-        niiPEAKS_img = np.ascontiguousarray( niiPEAKS.get_data().astype(np.float32) )
-        ptrPEAKS = &niiPEAKS_img[0,0,0,0]
-        Np = niiPEAKS.shape[3]/3
-
-        # affine matrix to rotate gradien directions (if required)
-        if peaks_use_affine :
-            affine = np.ascontiguousarray( niiPEAKS.affine[:3,:3].T )
-        else :
-            affine = np.ascontiguousarray( np.eye(3) )
-        ptrAFFINE = &affine[0,0]
-    else :
-        print( '\t- No dataset specified for EC compartments' )
-        Np = 0
-        ptrPEAKS = NULL
-        ptrAFFINE = NULL
-
-    # write dictionary information info file
-    dictionary_info = {}
-    dictionary_info['filename_tractogram'] = filename_tractogram
-    dictionary_info['TCK_ref_image'] = TCK_ref_image
-    dictionary_info['path_out'] = path_out
-    dictionary_info['filename_peaks'] = filename_peaks
-    dictionary_info['filename_mask'] = filename_mask
-    dictionary_info['do_intersect'] = do_intersect
-    dictionary_info['fiber_shift'] = fiber_shift
-    dictionary_info['min_seg_len'] = min_seg_len
-    dictionary_info['min_fiber_len'] = min_fiber_len
-    dictionary_info['max_fiber_len'] = max_fiber_len
-    dictionary_info['vf_THR'] = vf_THR
-    dictionary_info['peaks_use_affine'] = peaks_use_affine
-    dictionary_info['flip_peaks'] = flip_peaks
-    dictionary_info['blur_radii'] = blur_radii
-    dictionary_info['blur_samples'] = blur_samples
-    dictionary_info['blur_sigma'] = blur_sigma    
-    dictionary_info['ndirs'] = ndirs
-    with open( join(path_out,'dictionary_info.pickle'), 'wb+' ) as dictionary_info_file:
-        pickle.dump(dictionary_info, dictionary_info_file, protocol=2)
-
-    # calling actual C code
-    ret = trk2dictionary( filename_tractogram, data_offset,
-        Nx, Ny, Nz, Px, Py, Pz, n_count, n_scalars, n_properties,
-        fiber_shiftX, fiber_shiftY, fiber_shiftZ, min_seg_len, min_fiber_len, max_fiber_len,
-        ptrPEAKS, Np, vf_THR, -1 if flip_peaks[0] else 1, -1 if flip_peaks[1] else 1, -1 if flip_peaks[2] else 1,
-        ptrMASK, ptrTDI, path_out, 1 if do_intersect else 0, ptrAFFINE,
-        nBlurRadii, blur_sigma, ptrBlurRadii, ptrBlurSamples, ptrBlurWeights, ptrArrayInvM, ndirs, ptrHashTable  );
-    if ret == 0 :
-        WARNING( 'DICTIONARY not generated' )
-        return None
-
-    # save TDI and MASK maps
-    if filename_mask is not None :
-        affine = niiMASK.affine if nibabel.__version__ >= '2.0.0' else niiMASK.get_affine()
-    elif filename_peaks is not None :
-        affine = niiPEAKS.affine if nibabel.__version__ >= '2.0.0' else niiPEAKS.get_affine()
-    else :
-        affine = np.diag( [Px, Py, Pz, 1] )
-
-    niiTDI = nibabel.Nifti1Image( niiTDI_img, affine )
-    nii_hdr = niiTDI.header if nibabel.__version__ >= '2.0.0' else niiTDI.get_header()
-    nii_hdr['descrip'] = 'Created with COMMIT %s'%get_distribution('dmri-commit').version
-    nibabel.save( niiTDI, join(path_out,'dictionary_tdi.nii.gz') )
-
-    if filename_mask is not None :
-        niiMASK = nibabel.Nifti1Image( niiMASK_img, affine )
-    else :
-        niiMASK = nibabel.Nifti1Image( (np.asarray(niiTDI_img)>0).astype(np.float32), affine )
-    nii_hdr = niiMASK.header if nibabel.__version__ >= '2.0.0' else niiMASK.get_header()
-    nii_hdr['descrip'] = 'Created with COMMIT %s'%get_distribution('dmri-commit').version
-    nibabel.save( niiMASK, join(path_out,'dictionary_mask.nii.gz') )
-
-    LOG( '\n   [ %.1f seconds ]' % ( time.time() - tic ) )
+#!python
+# cython: language_level=3, c_string_type=str, c_string_encoding=ascii, boundscheck=False, wraparound=False, profile=False
+from __future__ import print_function
+import cython
+import numpy as np
+cimport numpy as np
+import nibabel
+from os.path import join, exists, splitext, dirname, isdir
+from os import makedirs, remove
+import time
+import amico
+import pickle
+from amico.util import LOG, NOTE, WARNING, ERROR
+from pkg_resources import get_distribution
+
+
+# Interface to actual C code
+cdef extern from "trk2dictionary_c.cpp":
+    int trk2dictionary(
+        char* filename_tractogram, int data_offset, int Nx, int Ny, int Nz, float Px, float Py, float Pz, int n_count, int n_scalars, 
+        int n_properties, float fiber_shiftX, float fiber_shiftY, float fiber_shiftZ, float min_seg_len, float min_fiber_len,  float max_fiber_len,
+        float* ptrPEAKS, int Np, float vf_THR, int ECix, int ECiy, int ECiz,
+        float* _ptrMASK, float* ptrTDI, char* path_out, int c, double* ptrPeaksAffine,
+        int nBlurRadii, double blurSigma, double* ptrBlurRadii, int* ptrBlurSamples, double* ptrBlurWeights,  float* ptrTractsAffine, unsigned short ndirs, short* prtHashTable
+    ) nogil
+
+
+cpdef run( filename_tractogram=None, path_out=None, filename_peaks=None, filename_mask=None, do_intersect=True,
+    fiber_shift=0, min_seg_len=1e-3, min_fiber_len=0.0, max_fiber_len=250.0,
+    vf_THR=0.1, peaks_use_affine=False, flip_peaks=[False,False,False], 
+    blur_radii=[], blur_samples=[], blur_sigma=0.0,
+    filename_trk=None, gen_trk=None, TCK_ref_image=None, ndirs=32761
+    ):
+    """Perform the conversion of a tractoram to the sparse data-structure internally
+    used by COMMIT to perform the matrix-vector multiplications with the operator A
+    during the inversion of the linear system.
+
+    Parameters
+    ----------
+    filename_tractogram : string
+        Path to the tractogram (.trk or .tck) containing the streamlines to load.
+        
+    TCK_ref_image: string
+        When loading a .tck tractogram, path to the NIFTI file containing the information about
+        the geometry to be used for the tractogram to load. If not specified, it will try to use
+        the information from filename_peaks or filename_mask.
+    
+    path_out : string
+        Path to the folder for storing the sparse data structure. If not specified (default),
+        a folder name "COMMIT" will be created in the same folder of the tractogram.
+
+    filename_mask : string
+        Path to a binary mask for restricting the analysis to specific areas.
+        Segments outside this mask are discarded. If not specified (default),
+        the mask is created from all voxels intersected by the tracts.
+
+    do_intersect : boolean
+        If True then fiber segments that intersect voxel boundaries are splitted (default).
+        If False then the centroid of the segment is used as its voxel position.
+
+    fiber_shift : float or list of three float
+        If necessary, apply a translation to fiber coordinates (default : 0) to account
+        for differences between the reference system of the tracking algorithm and COMMIT.
+        The value is specified in voxel units, eg 0.5 translates by half voxel.
+
+    min_seg_len : float
+        Discard segments <= than this length in mm (default : 1e-3).
+
+    min_fiber_len : float
+        Discard streamlines <= than this length in mm (default : 0.0).
+
+    max_fiber_len : float
+        Discard streamlines >= than this length in mm (default : 250.0).
+
+    filename_peaks : string
+        Path to the NIFTI file containing the peaks to use as extra-cellular contributions.
+        The data matrix should be 4D with last dimension 3*N, where N is the number
+        of peaks in each voxel. (default : no extra-cellular contributions).
+
+    peaks_use_affine : boolean
+        Whether to rotate the peaks according to the affine matrix (default : False).
+
+    vf_THR : float
+        Discard peaks smaller than vf_THR * max peak (default : 0.1).
+
+    flip_peaks : list of three boolean
+        If necessary, flips peak orientations along each axis (default : no flipping).
+
+    blur_radii : list of float
+        Translate each segment to given radii to assign a broader fiber contribution (default : []).
+    
+    blur_samples : list of integer
+        Segments are duplicated along a circle at a given radius; this parameter controls the
+        number of samples to take over a given circle (defaut : []).
+
+    blur_sigma: float
+        The contributions of the segments at different radii are damped as a Gaussian (default : 0.0).
+    
+    ndirs : int
+        Number of orientations on the sphere used to discretize the orientation of each
+        each segment in a streamline (default : 32761).
+
+    filename_trk : string
+        DEPRECATED. Use filename_tractogram instead.
+
+    gen_trk : string
+        DEPRECATED. No tractogram will be saved any more, but the returned coefficients will account
+        for the streamlines that were pre-filtered in this function.
+    """
+
+    # check the value of ndirs
+    if not amico.lut.is_valid(ndirs):
+        ERROR( 'Unsupported value for ndirs.\nNote: Supported values for ndirs are [500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500, 7000, 7500, 8000, 8500, 9000, 9500, 10000, 32761 (default)]' )
+
+    # check conflicts of fiber_shift
+    if np.isscalar(fiber_shift) :
+        fiber_shiftX = fiber_shift
+        fiber_shiftY = fiber_shift
+        fiber_shiftZ = fiber_shift
+    elif len(fiber_shift) == 3 :
+        fiber_shiftX = fiber_shift[0]
+        fiber_shiftY = fiber_shift[1]
+        fiber_shiftZ = fiber_shift[2]
+    else :
+        ERROR( '"fiber_shift" must be a scalar or a vector with 3 elements' )
+
+    # check for invalid parameters in the blur
+    if type(blur_radii)==list:
+        blur_radii = np.array(blur_radii, np.double)
+    elif type(blur_radii)!=np.ndarray:
+        ERROR( '"blur_radii" must be a list of floats' )
+    if type(blur_samples)==list:
+        blur_samples = np.array(blur_samples, np.int32)
+    elif type(blur_samples)!=np.ndarray:
+        ERROR( '"blur_samples" must be a list of integers' )
+
+    if blur_sigma > 0 :
+        if blur_radii.size != blur_samples.size :
+            ERROR( 'The number of blur radii and blur samples must match' )
+
+        if np.count_nonzero( blur_radii<=0 ):
+            ERROR( 'A blur radius was <= 0; only positive radii can be used' )
+
+        if np.count_nonzero( blur_samples<1 ):
+            ERROR( 'Please specify at least 1 sample per blur radius' )
+
+    tic = time.time()
+    LOG( '\n-> Creating the dictionary from tractogram:' )
+    
+    LOG( '\n   * Configuration:' )
+    print( '\t- Segment position = %s' % ( 'COMPUTE INTERSECTIONS' if do_intersect else 'CENTROID' ) )
+    print( '\t- Fiber shift X    = %.3f (voxel-size units)' % fiber_shiftX )
+    print( '\t- Fiber shift Y    = %.3f (voxel-size units)' % fiber_shiftY )
+    print( '\t- Fiber shift Z    = %.3f (voxel-size units)' % fiber_shiftZ )
+    if min_seg_len >= 1e-3:
+        print( '\t- Min segment len  = %.3f mm' % min_seg_len )
+    else:
+        print( '\t- Min segment len  = %.2e mm' % min_seg_len )
+    print( '\t- Min fiber len    = %.2f mm' % min_fiber_len )
+    print( '\t- Max fiber len    = %.2f mm' % max_fiber_len )
+
+    # check blur params
+    cdef :
+        double [:] blurRadii
+        int [:] blurSamples
+        double [:] blurWeights
+        double* ptrBlurRadii
+        int* ptrBlurSamples
+        double* ptrBlurWeights
+        int nBlurRadii
+        float [:] ArrayInvM
+        float* ptrArrayInvM
+    
+    # add a fake radius for original segment
+    if blur_sigma == 0:
+        nBlurRadii = 1
+        blurRadii = np.array( [0.0], np.double )
+        blurSamples = np.array( [1], np.int32 )
+        blurWeights = np.array( [1], np.double )
+    else:
+        nBlurRadii = len(blur_radii)+1
+        blurRadii = np.insert( blur_radii, 0, 0.0 ).astype(np.double)
+        blurSamples = np.insert( blur_samples, 0, 1 ).astype(np.int32)
+
+        # compute weights for gaussian damping
+        blurWeights = np.empty_like( blurRadii )
+        for i in xrange(nBlurRadii):
+            blurWeights[i] = np.exp( -blurRadii[i]**2 / (2.0*blur_sigma**2) )
+
+    if nBlurRadii == 1 :
+        print( '\t- Do not blur fibers' )
+    else :
+        print( '\t- Blur fibers:' )
+        print( '\t\t- sigma = %.3f' % blur_sigma )
+        print( '\t\t- radii =   [ ', end="" )
+        for i in xrange( 1, blurRadii.size ) :
+            print( '%.3f ' % blurRadii[i], end="" )
+        print( ']' )
+        print( '\t\t- weights = [ ', end="" )
+        for i in xrange( 1, blurWeights.size ) :
+            print( '%.3f ' % blurWeights[i], end="" )
+        print( ']' )
+        print( '\t\t- samples = [ ', end="" )
+        for i in xrange( 1, blurSamples.size ) :
+            print( '%5d ' % blurSamples[i], end="" )
+        print( ']' )
+
+    ptrBlurRadii   = &blurRadii[0]
+    ptrBlurSamples = &blurSamples[0]
+    ptrBlurWeights = &blurWeights[0]
+
+    if min_seg_len < 0 :
+        ERROR( '"min_seg_len" must be >= 0' )
+    if min_fiber_len < 0 :
+        ERROR( '"min_fiber_len" must be >= 0' )
+    if max_fiber_len < min_fiber_len :
+        ERROR( '"max_fiber_len" must be >= "min_fiber_len"' )
+
+    if filename_trk is None and filename_tractogram is None:
+        ERROR( '"filename_tractogram" not defined' )
+
+    if filename_trk is not None and filename_tractogram is not None:
+        WARNING('"filename_trk" will not be considered, "filename_tractogram" will be used')
+
+    if filename_trk is not None and filename_tractogram is None:
+        filename_tractogram = filename_trk
+        WARNING('"filename_trk" parameter is deprecated, use "filename_tractogram" instead')
+
+    if path_out is None:
+        path_out = dirname(filename_tractogram)
+        if path_out == '':
+            path_out = '.'
+        if not isdir(path_out):
+            ERROR( '"path_out" cannot be inferred from "filename_tractogram"' )
+        path_out = join(path_out,'COMMIT')
+
+    if gen_trk is not None:
+        WARNING('"gen_trk" parameter is deprecated')
+
+    # create output path
+    print( '\t- Output written to "%s"' % path_out )
+    if not exists( path_out ):
+        makedirs( path_out )
+
+    # Load data from files
+    LOG( '\n   * Loading data:' )
+    cdef short [:] htable = amico.lut.load_precomputed_hash_table(ndirs)
+    cdef short* ptrHashTable = &htable[0]
+
+    # Streamlines from tractogram
+    print( '\t- Tractogram' )
+    
+    if not exists(filename_tractogram):
+        ERROR( 'Tractogram file not found: %s' % filename_tractogram )        
+    extension = splitext(filename_tractogram)[1]
+    if extension != ".trk" and extension != ".tck":
+        ERROR( 'Invalid input file: only .trk and .tck are supported' )
+    
+    hdr = nibabel.streamlines.load( filename_tractogram, lazy_load=True ).header
+            
+    if extension == ".trk":
+        Nx = hdr['dimensions'][0]
+        Ny = hdr['dimensions'][1]
+        Nz = hdr['dimensions'][2]
+        Px = hdr['voxel_sizes'][0]
+        Py = hdr['voxel_sizes'][1]
+        Pz = hdr['voxel_sizes'][2]
+
+        data_offset = 1000
+        n_count = hdr['nb_streamlines']
+        n_scalars = hdr['nb_scalars_per_point']
+        n_properties = hdr['nb_properties_per_streamline']
+
+    if extension == ".tck":
+        if TCK_ref_image is None:
+            if filename_peaks is not None:
+                TCK_ref_image = filename_peaks
+            elif filename_mask is not None:
+                TCK_ref_image = filename_mask
+            else:
+                ERROR( 'TCK files do not contain information about the geometry. Use "TCK_ref_image" for that' )
+
+        print ('\t\t- geometry taken from "%s"' %TCK_ref_image)
+
+        nii_image = nibabel.load(TCK_ref_image)
+        nii_hdr = nii_image.header if nibabel.__version__ >= '2.0.0' else nii_image.get_header()
+        Nx = nii_image.shape[0]
+        Ny = nii_image.shape[1]
+        Nz = nii_image.shape[2]
+        Px = nii_hdr['pixdim'][1]
+        Py = nii_hdr['pixdim'][2]
+        Pz = nii_hdr['pixdim'][3]
+        data_offset = int(hdr['_offset_data'])  #set offset
+        n_count = int(hdr['count'])  #set number of fibers
+        n_scalars = 0
+        n_properties = 0
+        
+    print( '\t\t- %d x %d x %d' % ( Nx, Ny, Nz ) )
+    print( '\t\t- %.4f x %.4f x %.4f' % ( Px, Py, Pz ) )
+    print( '\t\t- %d fibers' % n_count )
+    if Nx >= 2**16 or Nz >= 2**16 or Nz >= 2**16 :
+        ERROR( 'The max dim size is 2^16 voxels' )
+    
+    # get the affine matrix
+    if extension == ".tck":
+        scaleMat = np.diag(np.divide(1.0, [Px,Py,Pz]))
+        M = nii_hdr.get_best_affine()
+
+        # Affine matrix without scaling, i.e. diagonal is 1
+        M[:3, :3] = np.dot(scaleMat, M[:3, :3])
+        M = M.astype('<f4') # affine matrix in float value
+        invM = np.linalg.inv(M) # inverse affine matrix
+        #create a vector of inverse matrix M
+        ArrayInvM = np.ravel(invM)
+        ptrArrayInvM = &ArrayInvM[0]
+
+    # white-matter mask
+    cdef float* ptrMASK
+    cdef float [:, :, ::1] niiMASK_img
+    if filename_mask is not None :
+        print( '\t- Filtering mask' )
+        niiMASK = nibabel.load( filename_mask )
+        niiMASK_hdr = niiMASK.header if nibabel.__version__ >= '2.0.0' else niiMASK.get_header()
+        print( '\t\t- %d x %d x %d' % ( niiMASK.shape[0], niiMASK.shape[1], niiMASK.shape[2] ) )
+        print( '\t\t- %.4f x %.4f x %.4f' % ( niiMASK_hdr['pixdim'][1], niiMASK_hdr['pixdim'][2], niiMASK_hdr['pixdim'][3] ) )
+        if ( Nx!=niiMASK.shape[0] or Ny!=niiMASK.shape[1] or Nz!=niiMASK.shape[2] or
+            abs(Px-niiMASK_hdr['pixdim'][1])>1e-3 or abs(Py-niiMASK_hdr['pixdim'][2])>1e-3 or abs(Pz-niiMASK_hdr['pixdim'][3])>1e-3 ) :
+            WARNING( 'Dataset does not have the same geometry as the tractogram' )
+        niiMASK_img = np.ascontiguousarray( niiMASK.get_data().astype(np.float32) )
+        ptrMASK  = &niiMASK_img[0,0,0]
+    else :
+        print( '\t- No mask specified to filter IC compartments' )
+        ptrMASK = NULL
+
+    # peaks file for EC contributions
+    cdef float* ptrPEAKS
+    cdef float [:, :, :, ::1] niiPEAKS_img
+    cdef int Np
+    cdef float [:, :, ::1] niiTDI_img = np.ascontiguousarray( np.zeros((Nx,Ny,Nz),dtype=np.float32) )
+    cdef float* ptrTDI  = &niiTDI_img[0,0,0]
+    cdef double [:, ::1] affine
+    cdef double* ptrAFFINE
+    if filename_peaks is not None :
+        print( '\t- EC orientations' )
+        niiPEAKS = nibabel.load( filename_peaks )
+        niiPEAKS_hdr = niiPEAKS.header if nibabel.__version__ >= '2.0.0' else niiPEAKS.get_header()
+        print( '\t\t- %d x %d x %d x %d' % ( niiPEAKS.shape[0], niiPEAKS.shape[1], niiPEAKS.shape[2], niiPEAKS.shape[3] ) )
+        print( '\t\t- %.4f x %.4f x %.4f' % ( niiPEAKS_hdr['pixdim'][1], niiPEAKS_hdr['pixdim'][2], niiPEAKS_hdr['pixdim'][3] ) )
+        print( '\t\t- ignoring peaks < %.2f * MaxPeak' % vf_THR )
+        print( '\t\t- %susing affine matrix' % ( "" if peaks_use_affine else "not " ) )
+        print( '\t\t- flipping axes : [ x=%s, y=%s, z=%s ]' % ( flip_peaks[0], flip_peaks[1], flip_peaks[2] ) )
+        if ( Nx!=niiPEAKS.shape[0] or Ny!=niiPEAKS.shape[1] or Nz!=niiPEAKS.shape[2] or
+            abs(Px-niiPEAKS_hdr['pixdim'][1])>1e-3 or abs(Py-niiPEAKS_hdr['pixdim'][2])>1e-3 or abs(Pz-niiPEAKS_hdr['pixdim'][3])>1e-3 ) :
+            WARNING( "Dataset does not have the same geometry as the tractogram" )
+        if niiPEAKS.shape[3] % 3 :
+            ERROR( 'PEAKS dataset must have 3*k volumes' )
+        if vf_THR < 0 or vf_THR > 1 :
+            ERROR( '"vf_THR" must be between 0 and 1' )
+        niiPEAKS_img = np.ascontiguousarray( niiPEAKS.get_data().astype(np.float32) )
+        ptrPEAKS = &niiPEAKS_img[0,0,0,0]
+        Np = niiPEAKS.shape[3]/3
+
+        # affine matrix to rotate gradien directions (if required)
+        if peaks_use_affine :
+            affine = np.ascontiguousarray( niiPEAKS.affine[:3,:3].T )
+        else :
+            affine = np.ascontiguousarray( np.eye(3) )
+        ptrAFFINE = &affine[0,0]
+    else :
+        print( '\t- No dataset specified for EC compartments' )
+        Np = 0
+        ptrPEAKS = NULL
+        ptrAFFINE = NULL
+
+    # write dictionary information info file
+    dictionary_info = {}
+    dictionary_info['filename_tractogram'] = filename_tractogram
+    dictionary_info['TCK_ref_image'] = TCK_ref_image
+    dictionary_info['path_out'] = path_out
+    dictionary_info['filename_peaks'] = filename_peaks
+    dictionary_info['filename_mask'] = filename_mask
+    dictionary_info['do_intersect'] = do_intersect
+    dictionary_info['fiber_shift'] = fiber_shift
+    dictionary_info['min_seg_len'] = min_seg_len
+    dictionary_info['min_fiber_len'] = min_fiber_len
+    dictionary_info['max_fiber_len'] = max_fiber_len
+    dictionary_info['vf_THR'] = vf_THR
+    dictionary_info['peaks_use_affine'] = peaks_use_affine
+    dictionary_info['flip_peaks'] = flip_peaks
+    dictionary_info['blur_radii'] = blur_radii
+    dictionary_info['blur_samples'] = blur_samples
+    dictionary_info['blur_sigma'] = blur_sigma    
+    dictionary_info['ndirs'] = ndirs
+    with open( join(path_out,'dictionary_info.pickle'), 'wb+' ) as dictionary_info_file:
+        pickle.dump(dictionary_info, dictionary_info_file, protocol=2)
+
+    # calling actual C code
+    ret = trk2dictionary( filename_tractogram, data_offset,
+        Nx, Ny, Nz, Px, Py, Pz, n_count, n_scalars, n_properties,
+        fiber_shiftX, fiber_shiftY, fiber_shiftZ, min_seg_len, min_fiber_len, max_fiber_len,
+        ptrPEAKS, Np, vf_THR, -1 if flip_peaks[0] else 1, -1 if flip_peaks[1] else 1, -1 if flip_peaks[2] else 1,
+        ptrMASK, ptrTDI, path_out, 1 if do_intersect else 0, ptrAFFINE,
+        nBlurRadii, blur_sigma, ptrBlurRadii, ptrBlurSamples, ptrBlurWeights, ptrArrayInvM, ndirs, ptrHashTable  );
+    if ret == 0 :
+        WARNING( 'DICTIONARY not generated' )
+        return None
+
+    # save TDI and MASK maps
+    if filename_mask is not None :
+        affine = niiMASK.affine if nibabel.__version__ >= '2.0.0' else niiMASK.get_affine()
+    elif filename_peaks is not None :
+        affine = niiPEAKS.affine if nibabel.__version__ >= '2.0.0' else niiPEAKS.get_affine()
+    else :
+        affine = np.diag( [Px, Py, Pz, 1] )
+
+    niiTDI = nibabel.Nifti1Image( niiTDI_img, affine )
+    nii_hdr = niiTDI.header if nibabel.__version__ >= '2.0.0' else niiTDI.get_header()
+    nii_hdr['descrip'] = 'Created with COMMIT %s'%get_distribution('dmri-commit').version
+    nibabel.save( niiTDI, join(path_out,'dictionary_tdi.nii.gz') )
+
+    if filename_mask is not None :
+        niiMASK = nibabel.Nifti1Image( niiMASK_img, affine )
+    else :
+        niiMASK = nibabel.Nifti1Image( (np.asarray(niiTDI_img)>0).astype(np.float32), affine )
+    nii_hdr = niiMASK.header if nibabel.__version__ >= '2.0.0' else niiMASK.get_header()
+    nii_hdr['descrip'] = 'Created with COMMIT %s'%get_distribution('dmri-commit').version
+    nibabel.save( niiMASK, join(path_out,'dictionary_mask.nii.gz') )
+
+    LOG( '\n   [ %.1f seconds ]' % ( time.time() - tic ) )

From 11788760f6ee6c5036662b5af40a61c6f75d2c92 Mon Sep 17 00:00:00 2001
From: ErickHernandezGutierrez <erick.hernandez@cimat.mx>
Date: Thu, 25 Feb 2021 00:45:51 -0600
Subject: [PATCH 16/17] Rename Tikhonov parameters

---
 commit/core.pyx              | 31 +++++++++----------
 commit/operator/operator.pyx | 58 ++++++++++++++++++------------------
 2 files changed, 45 insertions(+), 44 deletions(-)

diff --git a/commit/core.pyx b/commit/core.pyx
index 9669104d..5c82baf5 100755
--- a/commit/core.pyx
+++ b/commit/core.pyx
@@ -639,7 +639,7 @@ cdef class Evaluation :
         LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
 
 
-    def build_operator( self, build_dir=None, tikhonov_equalizer=0, deriv_matrix=None ) :
+    def build_operator( self, build_dir=None, tikhonov_lambda=0, tikhonov_matrix=None ) :
         """Compile/build the operator for computing the matrix-vector multiplications by A and A'
         using the informations from self.DICTIONARY, self.KERNELS and self.THREADS.
         NB: needs to call this function to update pointers to data structures in case
@@ -652,12 +652,12 @@ cdef class Evaluation :
             If None (default), they will end up in the .pyxbld directory in the user’s home directory.
             If using this option, it is recommended to use a temporary directory, quit your python 
                 console between each build, and delete the content of the temporary directory.
-        tikhonov_equalizer: float
+        tikhonov_lambda: float
             equalizer parameter of the Tikhonov regularization term
-        deriv_matrix: string
+        tikhonov_matrix: string
             derivative matrix of the Tikhonov regularization term
             If None (default), no regularization term is added to the model
-            If using this option, tikhonov_equalizer must be positive
+            If using this option, tikhonov_lambda must be positive
         """
         if self.DICTIONARY is None :
             ERROR( 'Dictionary not loaded; call "load_dictionary()" first' )
@@ -665,11 +665,11 @@ cdef class Evaluation :
             ERROR( 'Response functions not generated; call "generate_kernels()" and "load_kernels()" first' )
         if self.THREADS is None :
             ERROR( 'Threads not set; call "set_threads()" first' )
-        if tikhonov_equalizer < 0:
+        if tikhonov_lambda < 0:
             ERROR( 'Invalid value for Tikhonov equalizer parameter; value must be positive or zero' )
-        if tikhonov_equalizer > 0 and deriv_matrix == None:
-            ERROR( 'Tikhonov equalizer term given but derivative matrix was not selected; add "deriv_matrix" parameter in "build_operator()", valid options are \'L1\' (first  derivative with free boundary conditions), \'L2\' (second derivative with free boundary conditions), \'L1z\' (first  derivative with zero boundary conditions) and \'L2z\' (second derivative with zero boundary conditions)' )
-        if tikhonov_equalizer > 0 and deriv_matrix!='L1' and deriv_matrix!='L2' and deriv_matrix!='L1z' and deriv_matrix!='L2z':
+        if tikhonov_lambda > 0 and tikhonov_matrix == None:
+            ERROR( 'Tikhonov equalizer term given but derivative matrix was not selected; add "tikhonov_matrix" parameter in "build_operator()", valid options are \'L1\' (first  derivative with free boundary conditions), \'L2\' (second derivative with free boundary conditions), \'L1z\' (first  derivative with zero boundary conditions) and \'L2z\' (second derivative with zero boundary conditions)' )
+        if tikhonov_lambda > 0 and tikhonov_matrix!='L1' and tikhonov_matrix!='L2' and tikhonov_matrix!='L1z' and tikhonov_matrix!='L2z':
             ERROR( 'Invalid derivative matrix selection for regularization term; valid options are \'L1\' (first  derivative with free boundary conditions), \'L2\' (second derivative with free boundary conditions), \'L1z\' (first  derivative with zero boundary conditions) and \'L2z\' (second derivative with zero boundary conditions)' )
         
         if self.DICTIONARY['IC']['nF'] <= 0 :
@@ -722,7 +722,7 @@ cdef class Evaluation :
             else :
                 reload( sys.modules['commit.operator.operator'] )
             
-        self.A = sys.modules['commit.operator.operator'].LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, tikhonov_equalizer, deriv_matrix )
+        self.A = sys.modules['commit.operator.operator'].LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, tikhonov_lambda, tikhonov_matrix )
 
         LOG( '   [ %.1f seconds ]' % ( time.time() - tic ) )
 
@@ -740,14 +740,14 @@ cdef class Evaluation :
         y = self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ].flatten().astype(np.float64)
 
         # extend y for the tikhonov regularization term
-        if self.A.tikhonov_equalizer > 0:
-            if self.A.deriv_matrix == 'L1':
+        if self.A.tikhonov_lambda > 0:
+            if self.A.tikhonov_matrix == 'L1':
                 yL = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0]-1, dtype=np.float64)
-            elif self.A.deriv_matrix == 'L2':
+            elif self.A.tikhonov_matrix == 'L2':
                 yL = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0]-2, dtype=np.float64)
-            elif self.A.deriv_matrix == 'L1z':
+            elif self.A.tikhonov_matrix == 'L1z':
                 yL = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0]+1, dtype=np.float64)
-            elif self.A.deriv_matrix == 'L2z':
+            elif self.A.tikhonov_matrix == 'L2z':
                 yL = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0]  , dtype=np.float64)
             else:
                 ERROR( 'Invalid derivative matrix selection for regularization term; valid options are \'L1\' (first  derivative with free boundary conditions), \'L2\' (second derivative with free boundary conditions), \'L1z\' (first  derivative with zero boundary conditions) and \'L2z\' (second derivative with zero boundary conditions)' )
@@ -883,6 +883,7 @@ cdef class Evaluation :
         nF = self.DICTIONARY['IC']['nF']
         nE = self.DICTIONARY['EC']['nE']
         nV = self.DICTIONARY['nV']
+        nS = self.KERNELS['iso'].shape[1]
         norm_fib = np.ones( nF )
         # x is the x of the original problem
         # self.x is the x preconditioned
@@ -915,7 +916,7 @@ cdef class Evaluation :
         niiMAP_hdr['descrip'] = 'Created with COMMIT %s'%self.get_config('version')
 
         y_mea = np.reshape( self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ].flatten().astype(np.float32), (nV,-1) )
-        y_est = np.reshape( self.A.dot(self.x), (nV,-1) ).astype(np.float32)
+        y_est = np.reshape( self.A.dot(self.x)[0:int(nV*nS)], (nV,-1) ).astype(np.float32)
 
         print( '\t\t- RMSE...  ', end='' )
         sys.stdout.flush()
diff --git a/commit/operator/operator.pyx b/commit/operator/operator.pyx
index 39831703..1f1a2da8 100755
--- a/commit/operator/operator.pyx
+++ b/commit/operator/operator.pyx
@@ -74,8 +74,8 @@ cdef class LinearOperator :
     """
     cdef int nS, nF, nR, nE, nT, nV, nI, n, ndirs
     cdef public int adjoint, n1, n2
-    cdef public float tikhonov_equalizer
-    cdef public char* deriv_matrix
+    cdef public float tikhonov_lambda
+    cdef public tikhonov_matrix
 
     cdef DICTIONARY
     cdef KERNELS
@@ -102,7 +102,7 @@ cdef class LinearOperator :
     cdef unsigned int*   ISOthreadsT
 
 
-    def __init__( self, DICTIONARY, KERNELS, THREADS, tikhonov_equalizer=0, deriv_matrix=None ) :
+    def __init__( self, DICTIONARY, KERNELS, THREADS, tikhonov_lambda=0, tikhonov_matrix=None ) :
         """Set the pointers to the data structures used by the C code."""
         self.DICTIONARY = DICTIONARY
         self.KERNELS    = KERNELS
@@ -116,8 +116,8 @@ cdef class LinearOperator :
         self.nI         = KERNELS['iso'].shape[0]    # number of ISO contributions
         self.n          = DICTIONARY['IC']['n']      # numbner of IC segments
         self.ndirs      = KERNELS['wmr'].shape[1]    # number of directions
-        self.tikhonov_equalizer = tikhonov_equalizer # equalizer parameter of the Tikhonov regularization term
-        self.deriv_matrix       = deriv_matrix       # derivative matrix of the Tikhonov regularization term
+        self.tikhonov_lambda = tikhonov_lambda # equalizer parameter of the Tikhonov regularization term
+        self.tikhonov_matrix       = tikhonov_matrix       # derivative matrix of the Tikhonov regularization term
 
         if KERNELS['wmr'].size > 0 :
             self.nS = KERNELS['wmr'].shape[2]       # number of SAMPLES
@@ -128,15 +128,15 @@ cdef class LinearOperator :
 
         self.adjoint    = 0                         # direct of inverse product
 
-        # set shape of the operator according to deriv_matrix
-        if self.tikhonov_equalizer > 0.0:
-            if self.deriv_matrix == 0:
+        # set shape of the operator according to tikhonov_matrix
+        if self.tikhonov_lambda > 0:
+            if self.tikhonov_matrix == 'L1':
                 self.n1 = self.nV*self.nS + (self.nR-1)
-            elif self.deriv_matrix == 1:
+            elif self.tikhonov_matrix == 'L2':
                 self.n1 = self.nV*self.nS + (self.nR-2)
-            elif self.deriv_matrix == 2:
+            elif self.tikhonov_matrix == 'L1z':
                 self.n1 = self.nV*self.nS + (self.nR+1)
-            else:
+            elif self.tikhonov_matrix == 'L2z':
                 self.n1 = self.nV*self.nS + (self.nR)
         else:
             self.n1 = self.nV*self.nS
@@ -185,7 +185,7 @@ cdef class LinearOperator :
     @property
     def T( self ) :
         """Transpose of the explicit matrix."""
-        C = LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, self.tikhonov_equalizer, self.deriv_matrix )
+        C = LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, self.tikhonov_lambda, self.tikhonov_matrix )
         C.adjoint = 1 - C.adjoint
         return C
 
@@ -242,57 +242,57 @@ cdef class LinearOperator :
                     self.ICthreadsT, self.ECthreadsT, self.ISOthreadsT
                 )
 
-        if self.tikhonov_equalizer > 0:
+        if self.tikhonov_lambda > 0:
             if not self.adjoint:
                 # DIRECT PRODUCT lambda*L*x
-                if self.deriv_matrix == 'L1':
+                if self.tikhonov_matrix == 'L1':
                     with nogil:
                         COMMIT_L1(
-                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_lambda,
                             &v_in[0], &v_out[0]
                         )
-                elif self.deriv_matrix == 'L2':
+                elif self.tikhonov_matrix == 'L2':
                     with nogil:
                         COMMIT_L2(
-                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_lambda,
                             &v_in[0], &v_out[0]
                         )
-                elif self.deriv_matrix == 'L1z':
+                elif self.tikhonov_matrix == 'L1z':
                     with nogil:
                         COMMIT_L1z(
-                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_lambda,
                             &v_in[0], &v_out[0]
                         )
-                elif self.deriv_matrix == 'L2z':
+                elif self.tikhonov_matrix == 'L2z':
                     with nogil:
                         COMMIT_L2z(
-                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_lambda,
                             &v_in[0], &v_out[0]
                         )
             else:
                 # INVERSE PRODUCT lambda*L'*y
-                if self.deriv_matrix == 'L1':
+                if self.tikhonov_matrix == 'L1':
                     with nogil:
                         COMMIT_L1t(
-                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_lambda,
                             &v_in[0], &v_out[0]
                         )
-                elif self.deriv_matrix == 'L2':
+                elif self.tikhonov_matrix == 'L2':
                     with nogil:
                         COMMIT_L2t(
-                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_lambda,
                             &v_in[0], &v_out[0]
                         )
-                elif self.deriv_matrix == 'L1z':
+                elif self.tikhonov_matrix == 'L1z':
                     with nogil:
                         COMMIT_L1zt(
-                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_lambda,
                             &v_in[0], &v_out[0]
                         )
-                elif self.deriv_matrix == 'L2z':
+                elif self.tikhonov_matrix == 'L2z':
                     with nogil:
                         COMMIT_L2zt(
-                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_equalizer,
+                            self.nF, self.nR, self.nV, self.nS, self.tikhonov_lambda,
                             &v_in[0], &v_out[0]
                         )
 

From 054964848ebd2cd850ae172839090c39143f5094 Mon Sep 17 00:00:00 2001
From: ErickHernandezGutierrez <erick.hernandez@cimat.mx>
Date: Thu, 25 Feb 2021 02:07:37 -0600
Subject: [PATCH 17/17] Minor cleanup to comments and error messages

---
 commit/core.pyx                    | 15 ++++----
 commit/operator/operator.pyx       | 32 ++++++++--------
 commit/operator/operator_noLUT.c   | 59 +-----------------------------
 commit/operator/operator_withLUT.c | 44 ++++++++++++++++------
 4 files changed, 56 insertions(+), 94 deletions(-)

diff --git a/commit/core.pyx b/commit/core.pyx
index 5c82baf5..511eb257 100755
--- a/commit/core.pyx
+++ b/commit/core.pyx
@@ -653,11 +653,10 @@ cdef class Evaluation :
             If using this option, it is recommended to use a temporary directory, quit your python 
                 console between each build, and delete the content of the temporary directory.
         tikhonov_lambda: float
-            equalizer parameter of the Tikhonov regularization term
+            Tikhonov lambda
+            If a positive value is given, tikhonov_matrix must not be None
         tikhonov_matrix: string
-            derivative matrix of the Tikhonov regularization term
-            If None (default), no regularization term is added to the model
-            If using this option, tikhonov_lambda must be positive
+            Tikhonov matrix
         """
         if self.DICTIONARY is None :
             ERROR( 'Dictionary not loaded; call "load_dictionary()" first' )
@@ -666,11 +665,11 @@ cdef class Evaluation :
         if self.THREADS is None :
             ERROR( 'Threads not set; call "set_threads()" first' )
         if tikhonov_lambda < 0:
-            ERROR( 'Invalid value for Tikhonov equalizer parameter; value must be positive or zero' )
+            ERROR( 'Invalid lambda for Tikhonov regularization; value must be positive or zero' )
         if tikhonov_lambda > 0 and tikhonov_matrix == None:
-            ERROR( 'Tikhonov equalizer term given but derivative matrix was not selected; add "tikhonov_matrix" parameter in "build_operator()", valid options are \'L1\' (first  derivative with free boundary conditions), \'L2\' (second derivative with free boundary conditions), \'L1z\' (first  derivative with zero boundary conditions) and \'L2z\' (second derivative with zero boundary conditions)' )
+            ERROR( 'Tikhonov lambda given but Tikhonov matrix was not selected; add "tikhonov_matrix" parameter in "build_operator()"' )
         if tikhonov_lambda > 0 and tikhonov_matrix!='L1' and tikhonov_matrix!='L2' and tikhonov_matrix!='L1z' and tikhonov_matrix!='L2z':
-            ERROR( 'Invalid derivative matrix selection for regularization term; valid options are \'L1\' (first  derivative with free boundary conditions), \'L2\' (second derivative with free boundary conditions), \'L1z\' (first  derivative with zero boundary conditions) and \'L2z\' (second derivative with zero boundary conditions)' )
+            ERROR( 'Invalid matrix selection for Tikhonov regularization term; check "tikhonov_matrix" parameter in "build_operator()"' )
         
         if self.DICTIONARY['IC']['nF'] <= 0 :
             ERROR( 'No streamline found in the dictionary; check your data' )
@@ -750,7 +749,7 @@ cdef class Evaluation :
             elif self.A.tikhonov_matrix == 'L2z':
                 yL = np.zeros(y.shape[0] + self.KERNELS['wmr'].shape[0]  , dtype=np.float64)
             else:
-                ERROR( 'Invalid derivative matrix selection for regularization term; valid options are \'L1\' (first  derivative with free boundary conditions), \'L2\' (second derivative with free boundary conditions), \'L1z\' (first  derivative with zero boundary conditions) and \'L2z\' (second derivative with zero boundary conditions)' )
+                ERROR( 'Invalid matrix selection for Tikhonov regularization term; check "tikhonov_matrix" parameter in "build_operator()"' )
             
             yL[0:y.shape[0]] = y
             return yL
diff --git a/commit/operator/operator.pyx b/commit/operator/operator.pyx
index 1f1a2da8..abc4a784 100755
--- a/commit/operator/operator.pyx
+++ b/commit/operator/operator.pyx
@@ -26,42 +26,42 @@ cdef extern void COMMIT_At(
     unsigned char *_ICthreadsT, unsigned int *_ECthreadsT, unsigned int *_ISOthreadsT
 ) nogil
 
-cdef extern void COMMIT_L1(
+cdef extern void Tikhonov_L1(
     int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_v_in, double *_v_out
 ) nogil
 
-cdef extern void COMMIT_L2(
+cdef extern void Tikhonov_L2(
     int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_v_in, double *_v_out
 ) nogil
 
-cdef extern void COMMIT_L1z(
+cdef extern void Tikhonov_L1z(
     int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_v_in, double *_v_out
 ) nogil
 
-cdef extern void COMMIT_L2z(
+cdef extern void Tikhonov_L2z(
     int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_v_in, double *_v_out
 ) nogil
 
-cdef extern void COMMIT_L1t(
+cdef extern void Tikhonov_L1t(
     int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_v_in, double *_v_out
 ) nogil
 
-cdef extern void COMMIT_L2t(
+cdef extern void Tikhonov_L2t(
     int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_v_in, double *_v_out
 ) nogil
 
-cdef extern void COMMIT_L1zt(
+cdef extern void Tikhonov_L1zt(
     int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_v_in, double *_v_out
 ) nogil
 
-cdef extern void COMMIT_L2zt(
+cdef extern void Tikhonov_L2zt(
     int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_v_in, double *_v_out
 ) nogil
@@ -247,25 +247,25 @@ cdef class LinearOperator :
                 # DIRECT PRODUCT lambda*L*x
                 if self.tikhonov_matrix == 'L1':
                     with nogil:
-                        COMMIT_L1(
+                        Tikhonov_L1(
                             self.nF, self.nR, self.nV, self.nS, self.tikhonov_lambda,
                             &v_in[0], &v_out[0]
                         )
                 elif self.tikhonov_matrix == 'L2':
                     with nogil:
-                        COMMIT_L2(
+                        Tikhonov_L2(
                             self.nF, self.nR, self.nV, self.nS, self.tikhonov_lambda,
                             &v_in[0], &v_out[0]
                         )
                 elif self.tikhonov_matrix == 'L1z':
                     with nogil:
-                        COMMIT_L1z(
+                        Tikhonov_L1z(
                             self.nF, self.nR, self.nV, self.nS, self.tikhonov_lambda,
                             &v_in[0], &v_out[0]
                         )
                 elif self.tikhonov_matrix == 'L2z':
                     with nogil:
-                        COMMIT_L2z(
+                        Tikhonov_L2z(
                             self.nF, self.nR, self.nV, self.nS, self.tikhonov_lambda,
                             &v_in[0], &v_out[0]
                         )
@@ -273,25 +273,25 @@ cdef class LinearOperator :
                 # INVERSE PRODUCT lambda*L'*y
                 if self.tikhonov_matrix == 'L1':
                     with nogil:
-                        COMMIT_L1t(
+                        Tikhonov_L1t(
                             self.nF, self.nR, self.nV, self.nS, self.tikhonov_lambda,
                             &v_in[0], &v_out[0]
                         )
                 elif self.tikhonov_matrix == 'L2':
                     with nogil:
-                        COMMIT_L2t(
+                        Tikhonov_L2t(
                             self.nF, self.nR, self.nV, self.nS, self.tikhonov_lambda,
                             &v_in[0], &v_out[0]
                         )
                 elif self.tikhonov_matrix == 'L1z':
                     with nogil:
-                        COMMIT_L1zt(
+                        Tikhonov_L1zt(
                             self.nF, self.nR, self.nV, self.nS, self.tikhonov_lambda,
                             &v_in[0], &v_out[0]
                         )
                 elif self.tikhonov_matrix == 'L2z':
                     with nogil:
-                        COMMIT_L2zt(
+                        Tikhonov_L2zt(
                             self.nF, self.nR, self.nV, self.nS, self.tikhonov_lambda,
                             &v_in[0], &v_out[0]
                         )
diff --git a/commit/operator/operator_noLUT.c b/commit/operator/operator_noLUT.c
index 0e8da715..84803cff 100644
--- a/commit/operator/operator_noLUT.c
+++ b/commit/operator/operator_noLUT.c
@@ -186,61 +186,4 @@ void COMMIT_At(
     return;
 }
 
-void COMMIT_L(
-    int nF, int nIC, int nV, int nS, double regterm,
-    double *vIN, double *vOUT)
-{
-    /*for(int r = 0; r < nIC-1; r++){
-        for(int f = 0; f < nF; f++){
-            vOUT[nV*nS + r] += regterm*( -vIN[r*nF + f] + vIN[(r+1)*nF + f] );
-        }
-    }//*/
-}
-
-void COMMIT_Lt(
-    int nF, int nIC, int nV, int nS, double regterm,
-    double *vIN, double *vOUT)
-{
-    /*for(int f = 0; f < nF; f++){
-        vOUT[f] = -vIN[nV*nS];
-        vOUT[nF*(nIC-1) + f] = vIN[nV*nS + nIC-2];
-    }
-
-    for(int r = 0; r < nIC-2; r++){
-        for(int f = 0; f < nF; f++){
-            vOUT[nF*(r+1) + f] = vIN[nV*nS + r] + vIN[nV*nS + r+1];
-        }
-    }//*/
-}
-
-
-/*void COMMIT_L(
-    int nF, int nIC, int nV, int nS, double regterm,
-    double *vIN, double *vOUT)
-{
-    for(int f = 0; f < nF; f++){
-
-        vOUT[nV*nS] += regterm*( -2*vIN[f] + x[nF + f] );
-
-        for(int r = 1; r < nIC-1; r++){
-            vOUT[nV*nS + r] += regterm*( vIN[(r-1)*nF + f] -2*vIN[r*nF + f] + vIN[(r+1)*nF + f] );
-        }
-
-        vOUT[nV*nS + nIC - 1] += regterm*( vIN[(nIC-2)*nF + f] - 2*vIN[(nIC-1)*nF + f] );
-    }
-}
-
-void COMMIT_Lt(
-    int nF, int nIC, int nV, int nS, double regterm,
-    double *vIN, double *vOUT)
-{
-    for(int f = 0; f < nF; f++){
-        vOUT[f] += regterm*( -2*vIN[nV*nS] + vIN[nV*nS + 1] );
-
-        for (int r = 0; r < nIC; r++){
-            vOUT[r*nF + f] += regterm*( vIN[nV*nS + (r-1)] - 2*vIN[nV*nS + r] + vIN[nV*nS + (r+1)] );
-        }
-        
-        vOUT[(nIC-1)*nF + f] += regterm*( vIN[nV*nS + (nIC-2)] - 2*vIN[nV*nS + (nIC-1)] );
-    }
-}//*/
\ No newline at end of file
+//TODO: Add tikhonov regularization when no LUT is required
\ No newline at end of file
diff --git a/commit/operator/operator_withLUT.c b/commit/operator/operator_withLUT.c
index 297502fd..effe2f2e 100644
--- a/commit/operator/operator_withLUT.c
+++ b/commit/operator/operator_withLUT.c
@@ -2246,8 +2246,10 @@ void COMMIT_At(
     return;
 }
 
-////////////////////////// L_1 //////////////////////////
-void COMMIT_L1(
+// ===============================================================================================
+// Compute L*x MATRIX-VECTOR product (L is first order derivative with free boundary conditions)
+// ===============================================================================================
+void Tikhonov_L1(
     int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_vIN, double *_vOUT)
 {
@@ -2258,7 +2260,10 @@ void COMMIT_L1(
     }
 }
 
-void COMMIT_L1t(
+// ==========================================================================================================
+// Compute Lt*y TRANSPOSE-MATRIX-VECTOR product (L is first order derivative with free boundary conditions)
+// ==========================================================================================================
+void Tikhonov_L1t(
     int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_vIN, double *_vOUT)
 {
@@ -2272,8 +2277,10 @@ void COMMIT_L1t(
     }
 }
 
-////////////////////////// L_2 //////////////////////////
-void COMMIT_L2(
+// ===============================================================================================
+// Compute L*x MATRIX-VECTOR product (L is second order derivative with free boundary conditions)
+// ===============================================================================================
+void Tikhonov_L2(
     int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_vIN, double *_vOUT)
 {
@@ -2284,7 +2291,10 @@ void COMMIT_L2(
     }
 }
 
-void COMMIT_L2t(
+// ==========================================================================================================
+// Compute Lt*y TRANSPOSE-MATRIX-VECTOR product (L is second order derivative with free boundary conditions)
+// ==========================================================================================================
+void Tikhonov_L2t(
     int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_vIN, double *_vOUT)
 {
@@ -2303,8 +2313,10 @@ void COMMIT_L2t(
     }
 }
 
-////////////////////////// L_1^z //////////////////////////
-void COMMIT_L1z(
+// ===============================================================================================
+// Compute L*x MATRIX-VECTOR product (L is first order derivative with zero boundary conditions)
+// ===============================================================================================
+void Tikhonov_L1z(
     int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_vIN, double *_vOUT)
 {
@@ -2319,7 +2331,10 @@ void COMMIT_L1z(
     }
 }
 
-void COMMIT_L1zt(
+// ==========================================================================================================
+// Compute Lt*y TRANSPOSE-MATRIX-VECTOR product (L is first order derivative with zero boundary conditions)
+// ==========================================================================================================
+void Tikhonov_L1zt(
         int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_vIN, double *_vOUT)
 {
@@ -2330,8 +2345,10 @@ void COMMIT_L1zt(
     }
 }
 
-////////////////////////// L_2^z //////////////////////////
-void COMMIT_L2z(
+// ===============================================================================================
+// Compute L*x MATRIX-VECTOR product (L is second order derivative with zero boundary conditions)
+// ===============================================================================================
+void Tikhonov_L2z(
     int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_vIN, double *_vOUT)
 {
@@ -2347,7 +2364,10 @@ void COMMIT_L2z(
     }
 }
 
-void COMMIT_L2zt(
+// ==========================================================================================================
+// Compute Lt*y TRANSPOSE-MATRIX-VECTOR product (L is second order derivative with zero boundary conditions)
+// ==========================================================================================================
+void Tikhonov_L2zt(
     int _nF, int _nIC, int _nV, int _nS, double _lambda,
     double *_vIN, double *_vOUT)
 {