Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pydfc/dfc_methods/discrete_hmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def __init__(self, **params):
"measure_name",
"is_state_based",
"clstr_base_measure",
"clstr_distance",
"sw_method",
"tapered_window",
"dhmm_obs_state_ratio",
Expand Down
75 changes: 22 additions & 53 deletions pydfc/dfc_methods/sliding_window_clustr.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,10 @@
import time

import numpy as np
from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
from pyclustering.cluster.kmeans import kmeans
from pyclustering.utils.metric import distance_metric, type_metric
from sklearn.cluster import KMeans

from ..dfc import DFC
from ..dfc_utils import dFC_mat2vec, dFC_vec2mat
from ..dfc_utils import KMeansCustom, dFC_mat2vec, dFC_vec2mat
from ..time_series import TIME_SERIES
from .base_dfc_method import BaseDFCMethod
from .sliding_window import SLIDING_WINDOW
Expand All @@ -36,20 +33,13 @@
Input signals.
dt : float
Sample spacing.

todo:
- pyclustering(manhattan) has a problem when suing predict
"""


class SLIDING_WINDOW_CLUSTR(BaseDFCMethod):

def __init__(self, clstr_distance="euclidean", **params):
def __init__(self, **params):

assert (
clstr_distance == "euclidean" or clstr_distance == "manhattan"
), "Clustering distance not recognized. It must be either \
euclidean or manhattan."
self.logs_ = ""
self.TPM = []
self.FCS_ = []
Expand Down Expand Up @@ -91,7 +81,12 @@ def __init__(self, clstr_distance="euclidean", **params):

self.params["measure_name"] = "Clustering"
self.params["is_state_based"] = True
self.params["clstr_distance"] = clstr_distance

assert (
self.params["clstr_distance"] == "euclidean"
or self.params["clstr_distance"] == "manhattan"
), "Clustering distance not recognized. It must be either \
euclidean or manhattan."

assert (
self.params["clstr_base_measure"] in self.base_methods_name_lst
Expand All @@ -103,32 +98,9 @@ def measure_name(self):

def dFC_mat2vec(self, C_t):
return dFC_mat2vec(C_t)
# if len(C_t.shape)==2:
# assert C_t.shape[0]==C_t.shape[1],\
# 'C is not a square matrix'
# return C_t[np.triu_indices(C_t.shape[1], k=0)]

# F = list()
# for t in range(C_t.shape[0]):
# C = C_t[t, : , :]
# assert C.shape[0]==C.shape[1],\
# 'C is not a square matrix'
# F.append(C[np.triu_indices(C_t.shape[1], k=0)])

# F = np.array(F)
# return F

def dFC_vec2mat(self, F, N):
return dFC_vec2mat(F=F, N=N)
# C = list()
# iu = np.triu_indices(N, k=0)
# for i in range(F.shape[0]):
# K = np.zeros((N, N))
# K[iu] = F[i,:]
# K = K + np.multiply(K.T, 1-np.eye(N))
# C.append(K)
# C = np.array(C)
# return C

def clusters_lst2idx(self, clusters):
Z = np.zeros((self.F.shape[0],))
Expand All @@ -142,21 +114,18 @@ def cluster_FC(self, FCS_raw, n_clusters, n_regions):
F = self.dFC_mat2vec(FCS_raw)

if self.params["clstr_distance"] == "manhattan":
pass
# ########### Manhattan Clustering ##############
# # Prepare initial centers using K-Means++ method.
# initial_centers = kmeans_plusplus_initializer(F, self.n_states).initialize()
# # create metric that will be used for clustering
# manhattan_metric = distance_metric(type_metric.MANHATTAN)
# # Create instance of K-Means algorithm with prepared centers.
# kmeans_ = kmeans(F, initial_centers, metric=manhattan_metric)
# # Run cluster analysis and obtain results.
# kmeans_.process()
# Z = self.clusters_lst2idx(kmeans_.get_clusters())
# F_cent = np.array(kmeans_.get_centers())
########### Manhattan Clustering ##############
kmeans_ = KMeansCustom(
n_clusters=n_clusters,
n_init=500,
init="k-means++",
metric="manhattan",
).fit(F)
kmeans_.cluster_centers_ = kmeans_.cluster_centers_.astype(np.float32)
F_cent = kmeans_.cluster_centers_
else:
########### Euclidean Clustering ##############
kmeans_ = KMeans(n_clusters=n_clusters, n_init=500).fit(F)
kmeans_ = KMeans(n_clusters=n_clusters, n_init=500, init="k-means++").fit(F)
kmeans_.cluster_centers_ = kmeans_.cluster_centers_.astype(np.float32)
F_cent = kmeans_.cluster_centers_

Expand Down Expand Up @@ -265,11 +234,11 @@ def estimate_dFC(self, time_series):

F = self.dFC_mat2vec(dFC_raw.get_dFC_mat(TRs=dFC_raw.TR_array))

# The code below is similar for both clustering methods,
# but is kept this way for clarity.
if self.params["clstr_distance"] == "manhattan":
pass
# ########### Manhattan Clustering ##############
# self.kmeans_.predict(F)
# Z = self.clusters_lst2idx(self.kmeans_.get_clusters())
########### Manhattan Clustering ##############
Z = self.kmeans_.predict(F.astype(np.float32))
else:
########### Euclidean Clustering ##############
Z = self.kmeans_.predict(F.astype(np.float32))
Expand Down
75 changes: 75 additions & 0 deletions pydfc/dfc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import seaborn as sns
from nilearn.plotting import plot_markers
from scipy import signal, stats
from sklearn.cluster import kmeans_plusplus
from sklearn.metrics import pairwise_distances

# np.seterr(invalid='ignore')

Expand Down Expand Up @@ -438,6 +440,79 @@ def dFC_vec2mat(F, N):
return C


############################ K-means Clustering with Manhattan distance ############################


class KMeansCustom:
def __init__(
self, n_clusters, max_iter=300, n_init=100, init="k-means++", metric="manhattan"
):
self.n_clusters = n_clusters
self.max_iter = max_iter
self.n_init = n_init
self.init = init
self.metric = metric
self.labels_ = None
self.cluster_centers_ = None
self.inertia_ = None

def _custom_distance(self, p1, p2):
return pairwise_distances([p1], [p2], metric=self.metric)[0][0]

def _assign_clusters(self, X, centroids):
clusters = []
for x in X:
distances = [self._custom_distance(x, c) for c in centroids]
clusters.append(np.argmin(distances))
return clusters

def _compute_centroids(self, X, labels):
centroids = []
for i in range(self.n_clusters):
points = X[np.array(labels) == i]
centroids.append(points.mean(axis=0))
return np.array(centroids)

def fit(self, X):
X = deepcopy(X)
min_inertia = None
best_centroids = None
best_labels = None
for _ in range(self.n_init):
if self.init == "random":
centroids = X[np.random.choice(len(X), self.n_clusters, replace=False)]
elif self.init == "k-means++":
centroids, _ = kmeans_plusplus(X, n_clusters=self.n_clusters)
for _ in range(self.max_iter):
labels = self._assign_clusters(X, centroids)
new_centroids = self._compute_centroids(X, labels)
if np.allclose(centroids, new_centroids, atol=1e-6):
break
centroids = new_centroids
inertia = np.sum(
[
self._custom_distance(x, centroids[label]) ** 2
for x, label in zip(X, labels)
]
)
if min_inertia is None or inertia < min_inertia:
min_inertia = inertia
best_centroids = centroids
best_labels = labels

self.labels_ = np.array(best_labels)
self.cluster_centers_ = np.array(best_centroids)
self.inertia_ = min_inertia
return self

def predict(self, X):
X = deepcopy(X)
return self._assign_clusters(X, self.cluster_centers_)


####################################################################################################


# test
def common_subj_lst(time_series_dict):
SUBJECTs = None
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ dependencies = [
'matplotlib',
'networkx',
'nilearn>=0.10.2,!=0.10.3',
'pyclustering',
'pycwt',
'seaborn',
'statsmodels'
Expand Down
Loading