From 481b020c6c9e3667fa0f093415fcf8434c4bf463 Mon Sep 17 00:00:00 2001 From: "Hyunmin Kim (Brandon)" Date: Thu, 10 Dec 2020 15:05:21 +0900 Subject: [PATCH] Update deep_generative_model_LINCS.ipynb the directly reading input excel --- deep_generative_model_LINCS.ipynb | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/deep_generative_model_LINCS.ipynb b/deep_generative_model_LINCS.ipynb index d9d41bd..dc9fece 100644 --- a/deep_generative_model_LINCS.ipynb +++ b/deep_generative_model_LINCS.ipynb @@ -180,20 +180,21 @@ } ], "source": [ - "pclFileNameSM = \"../Data/YX_Data/pcl_information.csv\"\n", + "pclFileNameSM_df = pd.read_excel('./data/1-s2.0-S0092867417313090-mmc1.xlsx', sheet_name='Table S7 PCL Statistics', skiprows=1)\n", "pertClassDicSM = {}\n", "classDicSM = {}\n", "pertCountSM = 0\n", - "with open(pclFileNameSM, \"r\") as myfile:\n", - " for line in myfile.readlines():\n", - " if not len(line):\n", - " continue\n", + "for i, row in pclFileNameSM_df.iterrows():\n", + " \n", + " PCL_ID = row[0]\n", + " PCL_members = row[4].split('|')\n", + " for PCL_member in PCL_members:\n", " pertCountSM += 1\n", - " line = line.strip()\n", - " spline = line.split(',')\n", - " pertClassDicSM[spline[0]] = spline[1]\n", - " c = classDicSM.setdefault(spline[1], 0)\n", - " classDicSM[spline[1]] = c + 1\n", + " pertClassDicSM[PCL_member] = PCL_ID\n", + " c = classDicSM.setdefault(PCL_ID, 0)\n", + " classDicSM[PCL_ID] = c + 1\n", + "\n", + " \n", "print(\"The number of perturbagens with class information: \" + str(pertCountSM))\n", "print(\"The number of PCL: \" + str(len(classDicSM)))\n", "print(sorted(classDicSM.items(), key = operator.itemgetter(1), reverse = True)[0])"