diff --git a/EEMD.mat b/EEMD.mat new file mode 100644 index 0000000..3d25fb0 --- /dev/null +++ b/EEMD.mat @@ -0,0 +1,60 @@ +clc,clear all;clear +HC = dir('.\MRIdata\non_filtered\HC\*.nii'); +nHC = length(HC); +fnameHC = '.\MRIdata\non_filtered\HC_eemd\HIPPO\'; +aal = load_nii('./MRIdata/aal/aal_3mm'); +aalimg = aal.img; +aal_1D = reshape(aalimg,61*73*61,1); +lHIPPO = [find(aal_1D == 37)]; +rHIPPO = [find(aal_1D == 38)]; +HIPPO = [lHIPPO;rHIPPO]; +% a = aal.img == 37 +% b = aal.img == 38 +% c = a+b +% c = reshape(c,61*73*61,1); + +for i = 169:169 + dataHC = load_nii(['.\MRIdata\non_filtered\HC\',HC(i).name]); + HCimg = dataHC.img; + HCimg(find(isnan(HCimg)==1)) = 0; + HCimg = reshape(HCimg,61*73*61,195); + z = zeros(size(HCimg)); + nfolder = mkdir(['./MRIdata/non_filtered/HC_eemd/HIPPO/',extractBefore(HC(i).name,'.')]); + for ii = 1:length(HIPPO) + z(HIPPO(ii),:) = HCimg(HIPPO(ii),:); + Y = z(HIPPO(ii),:); + n = num2str(HIPPO(ii)); + [allmode] = eemd(Y, 0.1, 10000, 0); + xlswrite([fnameHC,extractBefore(HC(i).name,'.'),'\',extractBefore(HC(i).name,'.'),'_v',n],allmode); + nImf = length(allmode(:,1)); + subplot(nImf+1,1,1) + plot(Y) + title([extractBefore(HC(i).name,'.'),'-v',n]); + ylabel('signal') + subplot(nImf+1,1,2) + plot(allmode(1,:)) + ylabel('IMF1') + subplot(nImf+1,1,3) + plot(allmode(2,:)) + ylabel('IMF2') + subplot(nImf+1,1,4) + plot(allmode(3,:)) + ylabel('IMF3') + subplot(nImf+1,1,5) + plot(allmode(4,:)) + ylabel('IMF4') + subplot(nImf+1,1,6) + plot(allmode(5,:)) + ylabel('IMF5') + subplot(nImf+1,1,7) + plot(allmode(6,:)) + ylabel('IMF6') + subplot(nImf+1,1,8) + plot(allmode(7,:)) + ylabel('IMF7') + + saveas(gcf,[fnameHC,extractBefore(HC(i).name,'.'),'\',extractBefore(HC(i).name,'.'),'_v',n],'m'); + + end +end + diff --git a/README.md b/README.md index fc39a52..d15e262 100644 --- a/README.md +++ b/README.md @@ -1,103 +1,63 @@ --- type: "project" # DON'T TOUCH THIS ! :) -date: "2020-05-16" # Date you first upload your project. +date: "2023-06-22" # Date you first upload your project. # Title of your project (we like creative title) -title: "This is an example project page which serves as a template" +title: "Mechanism of delusion and hallucination in schizophrenia based on the quadripartite model" -# List the names of the collaborators within the [ ]. If alone, simple put your name within [] -names: [Samuel Guay, Pierre Bellec] +# Sole author +name: [Meng-Hua Kuo] # Your project GitHub repository URL github_repo: https://github.com/PSY6983-2021/project_template -# If you are working on a project that has website, indicate the full url including "https://" below or leave it empty. -website: - -# List +- 4 keywords that best describe your project within []. Note that the project summary also involves a number of key words. Those are listed on top of the [github repository](https://github.com/PSY6983-2021/project_template), click `manage topics`. +# List +- 4 keywords that best describe your project within []. Note that the project summary also involves a number of key words. Those are listed on top of the [github repository](https://github.com/brainhack-school2020/project_template), click `manage topics`. # Please only lowercase letters -tags: [project, github, markdown, brainhack] +tags: [res-fmri, schizophrenia, hallucinations, delusions] # Summarize your project in < ~75 words. This description will appear at the top of your page and on the list page with other projects.. -summary: "Each project repository should have a markdown file explaining the background and objectives of the project, as well as a summary of the results, and links to the different deliverables of the project. Project reports are incorporated in the BHS [website](https://psy6983.brainhackmtl.org/project)." - -# If you want to add a cover image (listpage and image in the right), add it to your directory and indicate the name -# below with the extension. -image: "" +summary: "A total of 341 HC and 213 SCZ patients were recruited in this study. Ensemble empirical mode decomposition was employed to decompose voxel-wised resting-state functional Magnetic Resonance Imaging (rs-fMRI) data into intrinsic mode functions (IMFs) within regions according to the quadripartite model. We trained multiple classifiers with the various features extracting from the IMFs, including instantaneous frequency, amplitude, and phase, as well as mean frequency, amplitude, standard deviation and sample entropy of rs-fMRI data." --- -## Project definition - -### Background - -Inspired by the [Recurse Centre](https://www.recurse.com/) initiative (formally known as the "hacker school"), Brainhack School was established in 2018 with the mission to train students from multidisciplinary backgrounds to a panoply of reproducible tools for neural data science, using a project-based approach. Following an initial 3-weeks long pilot, a 4th week was added with an intensive bootcamp, so that students could choose what tools to learn more deeply in their projects. As the course became integrated in standard curriculum at different universities, the formula seemed to be working. In order to streamline the different stages of the project, some standard template and milestones needed to be incorporated in a github-based workflow. The "project template" project (which is also our first BHS meta-project) aims at establishing such a standardized template. You can check the following [video](https://youtu.be/PTYs_JFKsHI) where Pierre Bellec gives an overview of the Brainhack school. +# Mechanism of delusion and hallucinations in schizophernia based on the quadripartite model +## Background - +Delusion and hallucination are common symptoms in schizophrenia (SCZ). Despite of different constructs composed with, false belief and false perception still share some similar features to an extent. For instance, they are both characterized by the breakdown of corollary discharge, deficit of monitoring control, and aberrant salience function. Several researches also demonstrated collaborative neural underpinning regarding delusions and hallucinations. The intrinsic networks based on the triple network theory such as salience network (SAL), default mode network (DMN) and central executive network (CEN) are recruited during delusional or hallucinatory phenomena, reflecting some brain modules might be overlapped when the symptoms arise. A quadripartite model, which supports most of current evidences and provides a storyline trying to explain the process during hallucinations, might also be interpretable for delusions due to its involvement of the aforementioned triple network and hippocampus. Nonetheless, it still falls shorts of explaining the underlying mechanisms of both hallucination and delusion. Moreover, the precise prediction of these two symptoms is not yet completed nowadays. We hypothesize that the crucial differences between delusions and hallucinations might provide as key components for machine-learning techniques to distinguish delusional or hallucinatory phenomena. -### Tools +## Tools The "project template" project will rely on the following technologies: - * Markdown, to structure the text. - * The Hugo website framework which is used by the BHS website. This makes it possible to easily add the markdown project description to the website. - * Adding the project to the website relies on github, through pull requests. + * fMRI + * Python + * Matlab -### Data +## Data -Ultimately, the project template will be used by all BHS participants. Data on the different projects will be aggregareted on the [following page](https://psy6983.brainhackmtl.org/project). This will serve as an additional example gallery in the years to come for future brainhack school students. Many reports from [BHS 2020](https://github.com/brainhack-school2020) already used this template. +In this study, ensemble empirical mode decomposition (EEMD) was employed on voxel-wised resting-state functional Magnetic Resonance Imaging (rs-fMRI) data with regions of interest (ROI) according to the quadripartite model. We then trained multiple classifiers with the extracting features from the intrinsic model functions (IMFs) in order to figure out the discrepancy of delusions and hallucinations among brain works for future analysis. -### Deliverables +## Deliverables At the end of this project, we will have: - - The current markdown document, completed and revised. - - A gallery of the student projects at Brainhack 2020. - - Instructions on the website about how to submit a pull request to the [brainhack school website](https://github.com/PSY6983-2021) in order to add the project description to the website. + - Whether the absence of disease, delusion or hallucination could be predicted by machine-learning approaches + - What fueatures are appropriate for predicting each target ## Results -### Progress overview - -The project was swiftly initiated by P Bellec, based on the existing template created in 2019 by Tristan Glatard and improved by different students. It was really not that hard. Community feedback is expected to lead to rapid further improvements of this first version. - -### Tools I learned during this project - - * **Meta-project** P Bellec learned how to do a meta project for the first time, which is developping a framework while using it at the same time. It felt really weird, but somehow quite fun as well. - * **Github workflow-** The successful use of this template approach will demonstrate that it is possible to incorporate dozens of students presentation on a website collaboratively over a few weeks. - * **Project content** Through the project reports generated using the template, it is possible to learn about what exactly the brainhack school students are working on. - -### Results + Only analysis for ROI of hippocampus is currently completed. It is showed that whether one has schizophrenia or not could only be predicted by instantaneous amplitude and instantaneous frequency. NaiveBayes model has best performance for discriminateing delusion and hallucination with original and IMF2 singal respecitively. +![image](https://github.com/MengHuaKuo/KuoMengHua_project/assets/130176621/91413b75-ee47-43b4-9911-d3a7a4444841) -#### Deliverable 1: report template +## Tools I learned during this project -You are currently reading the report template! I will let you judge whether it is useful or not. If you think there is something that could be improved, please do not hesitate to open an issue [here](https://github.com/PSY6983-2021/project_template/issues/) and let us know. + * **Python** Most of the scripts were run in Python and lots of new functions were learned during the process, expecially regarding machine learning techniques and statistical analysis. + * **Github workflow-** I am zeoro from the Github thing and somehow many challenges occured when I tried to aplly it. Due to it's a widely-used platform to share eveyone's contribution and knowledge, I am trying to improve my ability on this. + * **VS code** VS code helps me a lot when I tried run both jupyter notebooks and pure python scripts, saving a lot of time switching between multiple screens. -#### Deliverable 2: project gallery - -You can check out the [2020 BrainHack School project gallery](https://psy6983.brainhackmtl.org/project/) - -##### ECG pupilometry pipeline by Marce Kauffmann - -The repository of this project can be found [here](https://github.com/mtl-brainhack-school-2019/ecg_pupillometry_pipeline_kaufmann). The objective was to create a processing pipeline for ECG and pupillometry data. The motivation behind this task is that Marcel's lab (MIST Lab @ Polytechnique Montreal) was conducting a Human-Robot-Interaction user study. The repo features: - * a [video introduction](http://www.youtube.com/watch/8ZVCNeX42_A) to the project. - * a presentation [made in a jupyter notebook](https://github.com/mtl-brainhack-school-2019/ecg_pupillometry_pipeline_kaufmann/blob/master/BrainHackPresentation.ipynb) on the results of the project. - * Notebooks for all analyses. - * Detailed requirements files, making it easy for others to replicate the environment of the notebook. - * An overview of the results in the markdown document. - -##### Other projects -Here are other good examples of repositories: -- [Learning to manipulate biosignals with python](https://github.com/mtl-brainhack-school-2019/franclespinas-biosignals) by François Lespinasse -- [Run multivariate anaylysis to relate behavioral and electropyhysiological data](https://github.com/mtl-brainhack-school-2019/PLS_PV_Behaviour) -- [PET pipeline automation and structural MRI exploration](https://github.com/mtl-brainhack-school-2019/rwickens-sMRI-PET) by Rebekah Wickens -- [Working with PSG [EEG] data from Parkinson's patients](https://github.com/mtl-brainhack-school-2019/Soraya-sleep-data-in-PD-patients) by Cryomatrix -- [Exploring Brain Functional Activation in Adolescents Who Attempted Suicide](https://github.com/mtl-brainhack-school-2019/Anthony-Gifuni-repo) by Anthony Gifuni - -#### Deliverable 3: Instructions +## Results - To be made available soon. +In hippocampus, the detection of SCZ could be well predicted via original voxel-based rs-fMRI signal of instantaneous amplitude and frequency with the F1-socre of 1 by multiple models (e.g., KNN, logistic and SVM models). Furthermore, linear discriminant analysis model performed best in IMF4 of instantaneous frequency with the F1-score of 0.86 for detection of delusion; while logistic model performed ideally in IMF2 of sample entropy with the F1-score of 0.95 for detection of hallucination. Naive Bayse model with IMF2 of instantaneous frequency had the F1-score of 0.87 for predicting both symptoms. ## Conclusion and acknowledgement +Our study shows the precise diagnosis for SCZ is feasible. In addition, the prediction of delusion and hallucination help to clarify the influence from one brain network to another, which might indirectly shed light on the network dynamics and structural connectivity. -The BHS team hope you will find this template helpful in documenting your project. Developping this template was a group effort, and benefitted from the feedback and ideas of all BHS students over the years. - -You can also make submit your project to neurolibre https://neurolibre.org/. It is a preprint server for interactive data analyses. It is tailored for publishing interactive neuroscience notebooks that can seamlessly integrate data, text, code and figures.The submission instructions can be found here https://docs.neurolibre.org/en/latest/index.html and the jupyter book docs there https://jupyterbook.org/intro.html. +I am grateful for the efforts from all of our instructors and TAs. The project enhanced a lot of hands-on skills and also offered me a chance to practice in actual. diff --git a/code.py b/code.py new file mode 100644 index 0000000..df025be --- /dev/null +++ b/code.py @@ -0,0 +1,220 @@ +import os +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from scipy import stats +import seaborn as sns; sns.set() +from sklearn.svm import SVC +from sklearn.tree import DecisionTreeClassifier +from sklearn.neighbors import NearestNeighbors +from sklearn.neighbors import KNeighborsClassifier +from sklearn import preprocessing +from sklearn.pipeline import make_pipeline +from sklearn.naive_bayes import GaussianNB +from sklearn.ensemble import BaggingClassifier +from sklearn.ensemble import RandomForestClassifier +from sklearn.model_selection import KFold, cross_val_score +from sklearn.feature_selection import VarianceThreshold +from sklearn.feature_selection import SelectKBest +from sklearn.feature_selection import f_classif +from sklearn.feature_selection import SelectFromModel +from sklearn.neural_network import MLPClassifier +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis +from sklearn import linear_model +from sklearn.metrics import accuracy_score +from sklearn.impute import SimpleImputer +from sklearn.pipeline import Pipeline +import glob +from sklearn.metrics import f1_score +from sklearn.metrics import precision_recall_fscore_support +from sklearn.metrics import roc_auc_score +from sklearn.metrics import classification_report +from sklearn.metrics import cohen_kappa_score +from imblearn.over_sampling import BorderlineSMOTE +from imblearn.over_sampling import SMOTE +from sklearn.model_selection import RepeatedStratifiedKFold +from imblearn.over_sampling import RandomOverSampler +from collections import Counter +from sklearn.model_selection import cross_validate +import tensorflow as tf +from featurewiz import featurewiz +from sklearn.preprocessing import StandardScaler +from sklearn.linear_model import LogisticRegression + +os.chdir('G:\material') +path = 'G:\material' +val = os.path.join(path,os.listdir(path)[1]) +train = os.path.join(path,os.listdir(path)[2]) +features = os.listdir(val) #0 for en, 1 for ins, 2 for mean, 3 for SD +networks = os.listdir(os.path.join(val,features[2])) # 0 for DMN, 1 for ECN, 2 for HIPPO, 3 for SAL +#ins = os.listdir(os.path.join(val,features[1])) #0 for inam, 1 for inf, 2 for inp +#innetwork = os.listdir(os.path.join(val,features[1],ins[0])) +#f_n = os.path.join(features[1],ins[0],innetwork[0]) +f_n = os.path.join(features[2],networks[0]) +val_f_n = os.path.join(val,f_n) +train_f_n = os.path.join(train,f_n) + +for u in range(7): #each group with 7 SD data + + #prepare materials + train_names = glob.glob(os.path.join(train_f_n,'*')) + val_names = glob.glob(os.path.join(val_f_n,'*')) + df_results = pd.DataFrame(columns=['File Name', 'Model','train_acc','train_F1','train_loss','train_roc_auc','test_acc','test_F1','test_loss','test_roc_auc','val_acc','val_F1','val_loss','val_kappa','val_roc_auc']) + train_data = pd.read_excel(train_names[u], header=None) + val_data = pd.read_excel(val_names[u], header=None) + file_name = train_names[u].strip(train_f_n) + + a = train_data.iloc[-4, :] != 0 + b = pd.DataFrame([]) + for n in range(len(a)): + if a[n] == False: + c = pd.DataFrame([0]) + else: + c = pd.DataFrame([1]) + b = pd.concat([b,c]) + train_Y = pd.DataFrame(b.iloc[:,0].values) + train_X = pd.DataFrame(train_data.iloc[:-4, :].T.values.astype('float32')) + train_Y.rename(columns={0:'H'},inplace=True) + train_for_fw = pd.DataFrame() + train_for_fw = pd.concat([train_X,train_Y],axis=1) + for i in range(len(train_for_fw.T)-1): + train_for_fw.rename(columns={train_for_fw.columns[i]: 'voxels'+str([i])},inplace=True) + + features, train_data_fw = featurewiz(dataname = train_for_fw, target='H' , corr_limit=0.7, verbose=2, sep=",", + header=0,test_data="", feature_engg="", category_encoders="") + train_X = train_data_fw.iloc[:,:-1] + train_Y = train_data_fw.iloc[:,-1] + train_X = StandardScaler().fit_transform(train_X) + + d = val_data.iloc[-4, :] != 0 + e = pd.DataFrame([]) + for n in range(len(d)): + if d[n] == False: + f = pd.DataFrame([0]) + else: + f = pd.DataFrame([1]) + e = pd.concat([e,f]) + val_Y = e.iloc[:,0].values + val_Y = pd.DataFrame(e.iloc[:,0].values) + val_X = pd.DataFrame(val_data.iloc[:-4, :].T.values.astype('float32')) + val_Y.rename(columns={0:'H'},inplace=True) + val_for_fw = pd.DataFrame() + val_for_fw = pd.concat([val_X,val_Y],axis=1) + for i in range(len(val_for_fw.T)-1): + val_for_fw.rename(columns={val_for_fw.columns[i]: 'voxels'+str([i])},inplace=True) + dif = val_for_fw.columns.difference(train_data_fw.columns) + val_for_fw.drop(columns = dif,axis=1,inplace=True) + val_X = val_for_fw.iloc[:,:-1] + val_Y = val_for_fw.iloc[:,-1] + val_X = StandardScaler().fit_transform(val_X) + + # 準備模型 + models = [('LinearDiscriminantAnalysis_svd', LinearDiscriminantAnalysis()), + ('LinearDiscriminantAnalysis_lsqr', LinearDiscriminantAnalysis(solver='lsqr',shrinkage =0.2)), + ('KNN', KNeighborsClassifier()), + ('NaiveBayes', GaussianNB()), + ('SVM_rbf', SVC(C=0.5,kernel='rbf')), + ('SVM_sigmoid', SVC(C=0.5,kernel='sigmoid')), + ('Tree', DecisionTreeClassifier()), + ('RandomForest',RandomForestClassifier()), + ('Logistic',LogisticRegression(penalty='elasticnet', solver='saga', l1_ratio=0.5, random_state=0))] + + # about to train + for model_name, model in models: + #this step for changing non-zero values in y into one. + + # adress nan + train_accuracy_sum = 0 + test_accuracy_sum = 0 + loss_sum = 0 + imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean') + regressor = Pipeline(steps=[('imputer', imp_mean), ('regressor', model)]) + imp_mean1 = imp_mean.fit(train_X) + train_X = imp_mean.transform(train_X) + imp_mean2 = imp_mean.fit(val_X) + val_X = imp_mean.transform(val_X) + #BorderlineSMOTE for below, or try X,y = RandomOverSampler(sampling_strategy='all').fit_resample(X,y) + train_X, train_Y = BorderlineSMOTE(sampling_strategy='all',random_state = 42, k_neighbors = 2).fit_resample(train_X,train_Y) + val_X, val_Y = BorderlineSMOTE(sampling_strategy='all',random_state = 42, k_neighbors = 2).fit_resample(val_X,val_Y) + #X, y = SMOTE(sampling_strategy='all',random_state = 42, k_neighbors = 2).fit_resample(X,y) + # Cross Validation * 1000 times + #s = cross_val_score(models[0][1], X=X, y=y, cv=10, n_jobs=1) + #print('Cross Validation accuracy: %.3f +/- %.3f' % (np.mean(s),np.std(s))) + + for r in range(1000): + val_acc = [] + val_f1 = [] + val_loss = [] + val_kappa = [] + val_roc_auc = [] + CV_results = cross_validate(model, train_X, train_Y, cv = 10,scoring = ('accuracy','f1','roc_auc','neg_brier_score'), return_train_score = True ,return_estimator = True) #cv here is (Stratified)KFold + train_acc_M= CV_results['train_accuracy'].mean() + #train_acc_SD.append(CV_results['train_score'].std()) + train_f1_M=CV_results['train_f1'].mean() + #train_f1_SD.append(CV_results['train_f1'].std()) + train_loss_M=CV_results['train_neg_brier_score'].mean() + #train_loss_SD.append(CV_results['train_neg_brier_score'].SD()) + train_roc_auc_M=CV_results['train_roc_auc'].mean() + CV_acc_M=CV_results['test_accuracy'].mean() + #CV_acc_SD.append(CV_results['test_score'].std()) + CV_f1_M= CV_results['test_f1'].mean() + #CV_f1_SD.append(CV_results['test_f1'].std()) + CV_loss_M=CV_results['test_neg_brier_score'].mean() + #CV_loss_SD.append(CV_results['test_neg_brier_score'].SD()) + CV_roc_auc_M=CV_results['test_roc_auc'].mean() + + val_score=[] + val_predict = [] + + for i in range(len(CV_results['estimator'])): + val_score.append(CV_results['estimator'][i].score(val_X,val_Y)) + val_predict.append(CV_results['estimator'][i].predict(val_X)) + + + for ii in range(len(CV_results['estimator'])): + val_acc.append(accuracy_score(val_Y, val_predict[ii])) + val_loss.append(tf.keras.losses.BinaryCrossentropy(from_logits=True)(np.float32(val_Y), np.float32(val_predict[ii])).numpy()) + val_f1.append(f1_score(val_Y, val_predict[ii], average = 'weighted')) + #PRF = precision_recall_fscore_support( val_Y, val_predict[ii], average = 'weighted') + val_kappa.append(cohen_kappa_score(val_Y, val_predict[ii])) + try: + val_roc_auc.append(roc_auc_score( val_Y, val_predict[ii], average = 'weighted')) + except ValueError: + pass + val_acc_M=np.array(val_acc).mean() + val_loss_M = np.array(val_loss).mean() + val_f1_M = np.array(val_f1).mean() + val_kappa_M = np.array(val_kappa).mean() + val_roc_auc_M = np.array(val_roc_auc).mean() + + + + + #output + #print('File: {}, Model: {}, Cross Validation Loss: {:.4f}, Training Accuracy: {:.4f}, Testing Accuracy: {:.4f}'.format(f, model_name, loss, train_accuracy, test_accuracy)) + df_results = df_results.append({'File Name': file_name, + 'Model': model_name, + 'train_acc': train_acc_M, + 'train_F1':train_f1_M, + 'train_loss':train_loss_M, + 'train_roc_auc':train_roc_auc_M, + 'test_acc': CV_acc_M, + 'test_F1':CV_f1_M, + 'test_loss':CV_loss_M, + 'test_roc_auc':CV_roc_auc_M, + 'val_acc': val_acc_M, + 'val_F1':val_f1_M, + 'val_loss':val_loss_M, + 'val_kappa':val_kappa_M, + 'val_roc_auc':val_roc_auc_M}, + ignore_index=True) + # print(df_results,train_index,test_index) + + + #save results + result1000 = df_results.groupby(['File Name','Model']).mean() + + # 將結果寫入 Excel 檔案 + #df = pd.DataFrame([[f, model_name, loss, train_accuracy, test_accuracy]], columns=['File Name', 'Model', 'Cross Validation Loss', 'Training Accuracy', 'Testing Accuracy']) + result1000.to_excel('G:\\material\\reslut\\meanFre\DMN\\P'+file_name, index=True, header=True)