From f1f28881c2afd5fbf907830f7b19829cd8599a43 Mon Sep 17 00:00:00 2001 From: Aurora Bunten Date: Thu, 23 Feb 2017 21:36:16 -0500 Subject: [PATCH 1/2] Turning in my Text Mining Project --- TextMiningWriteUp.pdf | Bin 0 -> 22372 bytes text_mining.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 TextMiningWriteUp.pdf create mode 100644 text_mining.py diff --git a/TextMiningWriteUp.pdf b/TextMiningWriteUp.pdf new file mode 100644 index 0000000000000000000000000000000000000000..d289e7e569a7188add4dcbdca6f56b989f825756 GIT binary patch literal 22372 zcma&NV~j3b&@I@uZQHhO+qR!JPy4iO+qP|YpSEq=_x&a}lS%GOGXE;Iv;XX@)LONw zHo20R1U)kY8w~kS@lf|r>rfsH3lS5Mqlpa+A0MNfxr3#v6%p%y85KqeYdcqS7e)y? zV^?!Ab5ln%a{&PufUAqSu|14u_ObR2}e(&R+ z4s}oI9o&OX=xxYdRX$d&j2<@w23upS-!E7Je&4R%-3o>)MlIh$zmQsIM*KN$ED$DF zwLqgtQVY=b0ivB1R>Xma9bKJ*I$c;nh2TEFk)r0i&L?7!nTBv}$zDlxRIiZf^gtqW zGfzbrnpj3b5g2z;#ci;&k~atTb`ucXpdoFPXSvKE9)NYm^R^KND+nl8uU-B0c2$D= z-uo3+64SS86{Ajt2=3FR`F*lOT#;jrxpZl>WqnRL?QCCXh4I@L~y$H+{;%Sft z8@Z}HeL8+!VC8~kX=ZXn%*!^yAf=bfz;o=i5KgukP9Uu}B@ z$I~+(=lXIS2`vXbDMKN9$E7P~X6Pkoq@+C*rU3njhn9KGmhqSB7BjB~2kePi7vQksQAKd=IAK~ZNqaC$m(f0Z~Q$HdiE*f%|f)Hu`Qm zK>Fu3I_>XkGkJLV(fAqTKB%YFi`UUZ8e-R{bB|kVv@A^cEW_c(r>5QMf$X9r5)tRG z=&T;9-nbLi(zTi%y}3hX&H4C`DfuG5Q{<}ocA zr@LIZS!9NqDf65^{<>)RvVk%{zB?Hng+0f3SQ42fNkNNuO;MiPiCH%&fj-qMJU1+v z2M41$^m<-fOHi$$I{C{&r7efKE5j}xB-#ozU6*F|7A5;9lCGUuaM*#^i<_>26yY&AT{SRBf{0~X|Pr%I1 z!t_7K|5uXz|KJji{~ugZqSg_+$%J(Mj9v|@l@W|$Eks15s*>OU3`!m3%H0XnHS^;i zh9^^cY!>q@Ge2-RbH4*uApW?)ha2A|$#Nrga%}ndob;}0G0^?xpm17xsq2W>05mH& zeF_fW$o>1lfdt7*6b1^9_2>h_sM^049_;IP_Vq`ynMHJu=zyvX1Pg2Q?)$4GV{T?i zP(R4g`ZO^v5P|>{(WHJV%$FdtCK$KTCsJ&`|G|6;?MsLQ-dui1Q)@72nLX0F$%Ina z19zSV;Ugr`jBdI=gbKopxWwBw=#f0^MFOnS^_%k!mA?7zT;ARLRCfn<5t?_++Qnwk zGM42S3`LhX=EiCF)z|dfZ%g)I%5)JWS1F(Iqel;%PT7K;X zKRTw7Po%^g_LwAwRGV;v&yvw}%W@DpxX5)q+q^NgCtV$~e{4vd@Eb#Hk?W>4IQ84S zO%EiGNJIN)H9ysXMlMr#=<7nR2!+%X&y(|`vL`N*ldVn&p?kD=lr!03HyK$YHbNDi z&x1d5?5f<+4y0iuUXYs6Suts$Y2>B%;;Jbj+dkoH5a%_fj^Zr8R-0wRm$s1Ydle}b zgsPj|jU3T z122UBI>dvSXq)AqG}K{%*)#e$lDr?A4<7>uT*sQO@g6|MxEkEXALgJ<3PS z@~_H;{}(j=56u1tf$SVC|2G)9{tqy6|9^n-e+wUGA{KTw4yOO7%-QaS^;BI=Vd`8J zI6eF3YAhxtoN6SK^_Olnr^DUEItsoQH6j3kV|I@=ulEYJw<98xcSa!q9u&eA88^kD z7%H|-R2VK|JBQ^F<(ZLBn%B+D;E)&PEx!JOhMpmN|Kx0Fw2{G=2aSA+`8PSG9nUCl%%3}ZwVDN!G) z;!Ks;_02=NQ4FyPUB+##d+2f7w+z8zmK|$g5B>)AGi+QzYJ9>1DXkUm_dCDViT=eM z4YuWo-M;tPg#Fw1#2OvYXl!Nvo0cY+LUz;|$Mka?^X&omEA@z<8(+&D{mJp{h?gz+ zc4QoVSd_)f$Hv1U6toH@ZD4Xflm!_sc;l}nDJ3XDJq7wUCO`paqm}K_oxgDL1W`0$ z*AUr?*EK}l;d0F9(OcGk`G^S-BtAl3VqW0nC2n49BjeUw>TaX&H`F|O&N}{!C@=_o zK;~==(U55T{8@_likGndx#7bZP_>ZG zNt{G_M{+$PctA=0ARavVE-5Sr@6)IlQ0EldC5v15Hbdx%PLR}siBU?(ri>eMq4R?K z^xE@|D+?%?=Rb1zGLTz|uffs~9E_gxmwsCPLG^Lt%n-dHXPpScb@4^a-Gk!_$T@<> zTOzEiA2%!Hn5`+Yq{{I`A~htgx<7}!;^pt3A)^n1S3oUnX1LT@?C@L|91ePaJaM1S zz;|@Cwa-}QQ?-tqN#EhBDd=^uxlXQ>G8gxvh6co&%3&tiBHiIZCv_HM#%GWvHnj@!4U!&b+aTIxeb)n0e? z%2vMB$cr*MOgm6JI4sh|Wzc9iA6c|wvHuR5DkcyozoceKL8*|n$wpOA+4fh6rlG#7 zW#symv5G1$rx8r&9~p}*JxfVO%3`JX$4X;q(swWF@#zcyU67Op*-&X-A<*W2mZmV5-`BP8y)$G`QkHNY<8STIO2IwI?@rt|}y1i44N`Q-ESEr$6P@ zA?(SLIFsbwbTG)CtNZh%1>D#EGx77ou$`lqWDgbeQz2dMGfdYxM7j_b(BpK?`?+1w z3Yl6me1wypdTqB^7@O>8tf(1@(el3XL<;ts1@hzFw%cllLV3TM2#o-S02y1cJIB-e z3m#hCFJfg7sak8P2;s!Ri{WuCdoSJu2~xu~ z`uVAPNE@HK(og~QmwJlQA;8Emc4iexyY%3rUcCu&@nCkMbcPFESCQu{xAkwkf_gQuoyjzFNjB=>}NOy2@AIAwWhu47HnXp1nO987Ee0 zoxFOsZY902iKmMP1Cj^B-DBAjH&NPXNaK@Vc#^AWe`j;20kAnhgoTTi3lw z%PFEf`9)0~g?Jg-j}bsyeRm-suZ%Z0-pXPr7eBMLNoZ%)W{-`i!g8113*n6-p(+sO zLKycAP_)*_60S78pW>nM&g?0gz&5_eutI9-4k!?2$FOXmsFcQN#=a1x8h*!;Gl5W2Kup>dkfxCktK!ix;Q`ZBLDuT^I>>yax1pe1O z#tzs5%kzUOhjoLhdfE0zjq@TszyFaaYjLH#GFl~{vB-*deUi=|*|D4=me;Jzs?}>? z7poO2ewBHC*h!eVLJsgVmr?ihx_vPC5|Sr}52n|_FERpff48``Ch7#Z0lV?gvGuz_ zVnj0r^#`cgG{ zO}`69qGDrPkV`Yt_Ve+M4M;l7zNvSBop{7I{CFGwV&olX9|s*L ziWfC`K7KruCql|zS6%&^JvPY&M3!@oP|u*(MBr8JBr)Tr`Y?^fz55Bl7q`&6CD+d=J|&f3J7_ zpGO%LPiL^w`6;y-rJpCbn;I1S@MJ zY#B>V2MvrF8j@6~R1E;{uA^KTO*>9$fjG$^0o_YLVP8g{-jG=MMjj#2#2$Z38N(RD z&pzh$3%-dNu!1Z_9mCF=3U0#$;GM%ojo=1s_vb&5;HfG5&=IK(11JRg^KX>6hW)Pf zhYL?dF-8C3Ul@TuG5yO_lKtBVa;DJoCx%8xvRty}s39In#rxxco$k68sp?{8zKdA? zumMTQ5K3wsvl6C6P zdCQi~drF#ncyuZ*#gP7V(iuu&CKJ`taRj|G8dLboADS$Yie~W*rLkPwYAW7fmTU|v zT(W+sPF-^N@(401MB}vCDZ^jUeES86iROuv8|2n*V@r2_3vUWAlfA3naC{3fX~?_j zyc-$zH1rH>YyMVAj~8xuG`-W~9hT2(by4FmQSJNAyD9iy@A|&l$`CY0DtnvB`&wI4 z=?r?olO5;BK#ZG$lJnwEznpQ)shUF|v;yRL^^(Y<0>NBj*L|h69@}$Fp6)y()S%MYRg&-@Sn`U#P)Rcd)3rfhV@Ru_&jZYA*k8A{m%^IC<$D-v~xB9o$K1q#?9P< z-TG%_eTouW9L$5KGZ>K_cj$`eD07D1LteXzQ=_mcGU$zJAK8o1s{%g`Q?wve=ULL( z=JA+Zxl{jVYY?N{KB%1a0&Wd2nrZ!O;LB`R-vg23`?8Ni1AXR2r;Lq*m!qx}Q6u^5 z>ofu6oUk?GC0-+D#ISiyKuh!+g5d3J@% zbcwPKk5!TD__{g%qf%a9NWbQ->&K2M?jC>@?Rrcz9w{@qM2Z%RJDofA=;xFA8mWTd z41Z>xA$?*!R_VUef>nzK9(ox3A%u-Mgc*{VUPFSx6}~&yqOIxh#vi~MFUe2mYvfEQ zPFLoJt?ik)81CrtChWTSgt$I9URIY*@e_PB_D2Ca}=LeHbohoe2i`s|40fLcR&<^A=GpnE3l7ib|+HbWh{>gi22Vfkst{Qc&! zOW*blW7;tcM0LL4C!f0f_&i?E3+}bSzcRchx8vd!7zWra^DqY1kePakKYB1Ae_~OM z5*Ey^R-h1KB$$Rsat|FjGw>;rG6qSamKSG~A;*;eh!?7N=llVk+OY?zsTfwX+^{%! zZqwG(Jy)~MO0lV7u#HGzP3NjgVRgnK>7g6&Ab$%3i<7R`3^K89+(HXlzbF{!p_bCp1OLg9NPT>MQeFa8=@y4Nh`)IMA2i!n{H)%KGm z*P)Xamy1>RyqFJmZaFCA|2#y1W<&8M84!oovKCBiH%D=0Di6_apRtL37R|s*P`5o~ zlyRy9W1<@XnsU=F{45zOwj_5*e|8Wmj08i7cowOVrVnWe1_D~8f&+|k(zO{#-G-M`Mw-Q?(^?I z2Hdlkzem%0B>OjS^@D|4Eb5Yx#5hc*XNaB_lG7YnY-|7legmI|$S=3~~a%28ZwXE3>dXn# zhJP_eCw~8S@u5K<=_knH>3mRHKj;4MUCV38Sc0VI1gVTCyiiTAx}vRH4G+%@ALMh0 zFO$p8rdN!ps639~ZFnh8KvW*f%%>U^iYEJ=w;|KbBj~Ni+qt@+QfYqK*QrtjX7)A) z2^W_wVT<#%SKIMIckwI~PtmMN6mG&sy^p4M!lWLRXWi3_`8Z_}rAa^&YHo&piu^}V zrNX!gHh*F2E&#{Czt-jISaON5?DrJF_S%t1?vrR6_oR7uTLt)jx!&FCIg4|2X1BST z5%)=CXP@$59QJxGJ9VM{7xPsdiKhawD$y!gJ_^cfA8xN^8k{paBr$1htE{+)IZD4Q z!ybQeZ?o|yQ0&?1x!?kJS+gos?vJh#ym7>wBc(qweHyI4!RhNMCCpX|bQc9$sbnO zp%dpvV%AlY8#csr?u9E>)g8FBY0a6Z=J)RR!VFeco{Y4=O>Hi^Q&*mg(<~L*Rw>w? zO+$EWfSB%%(XF7+=nRiaY{sl-%;~i0LegaXNa<#oC(S|>#q%FqB5uKiG3%w+n%K0X2V~vG7Yw;F1hab3itJy~p;OZ# z7xJdm|ipgj~D*8(r zKfa(H_&`zBKMkd+rvH;*g=Q}6X7SLtRp+7No^OBOkq^ZRS1P?;xG)_cA)>dH^ody_ zVZr^}5QOa`+z?tg#6``&h2&O6Y;65e@*d!6sqoPy>hyBo_S^H=CK!OG;e093mFAGn zHN71nx5VENg`mJEcRRq8gzNn8TvF*4E}`Pcc%-ivYpb?CN~bto4B#tayOpGxAS%yo z)Czpp-!SoTX78+*)#*F*&t3l8lmBTsSOpO)ees4$nVvD2+(4zVA2{~lV9<#~Fi$Rq zflZ5CUgVZe4oO*^2^-P4ms)Mlc`cpv6o$27#E@8Ug6hU5Ai;y>XF!jed=;Lhm%6IW zR>{6DCQ#pYKO4bfxnD6wJ|dF-ym=cZ=2hPIyk?F*oySMoa2YtNPe84uu)N;?Ay+G+81!dA$=1z37cf}67n z?s#T|J* zj;ukMNwO8RnR=sC73vsMC2=4q@ab=S7wq*KqtqZDp6gG0e2$q``zSj7|QMFHX zDVI}I6P(V&shc}Qnfn~wMz%q~>md1i!+8rg@8YF|ME@}Ph*4lqzL);kFfv7`_3D8@ z%ia8SN<&-vqcp)Ojv!TH!Y z6avINSU|6n3jCch2i*JqWIJ!K5YRx%IA8-6g<2YG7>Kwxv@(y5rgtmZc{d-OP$P8S z4Masy90TgjP8+p1F>3$==90R6(8DGtX8j50qv_!@wZubgm7g08zox{B$`@W|bTL52 zCrT$VW@!~YltbL(+6wmHfMcugYp+Jq!e3JBZ>qL~ImSu!yNeCcDH%PI?;eF;h$Ts= zg>fmurH)>~Z@_y-#I&beo(O@fJKo&@@|t$^wy?rqc2-*3|wk^1RgfCi1$pd6c|a zJkHd?u9Cde#io+Fv_%z&{iuDHd&v`aF9odBDuGQH~uvk#6qvr^;qPB5)x#L{Nbt-$>X5Ge1!9CSZfl)nt_H!-ezqB zVS{Qg{|d$?PLVmY)36X~u%LeIz%O?2Qo|$jhZ!8g*xn9S^oWr-91nI7+J6?ua8Y2Oo^C^ueFb)T;3mRV?Cy@r150%1k4vlu?ZiZ5MhR1}M*^NQtd&f~CbY9FN(1x6bLuQ5qYjfFXR7%MQ5L+NV++a}~Qe4wZ(|>8y znJZN#ENw6&+$w8oCYUL?Lu(aN4RI!4Tv^@BEwhB^bm^)E^twnYF@UCwAIQLlciL1d6+uf zis0R_ym z;r|r9M{GKDDWkBDlopcVti1rK0rIqDhDuI!hZi2Mq$&=i@-M;&<0;T&=AaZ&1o`UH zpK-KROZQzzf#krn1k--cm)?GW{(=gkC;sHSh@gR>k)iv~6vkRNT zwSzuUYJxf$h#&v!rpw_K$lwf~3kuQ+C3d5!weZo4$&3}i2%rNp4a@jECT4Q!5&5dw`$RJ_pf@n;#B}gz&^9C;5kE zoEA7XRGV`YWUyHg)Cp%(Dh`|o%Lt!9HEQWEa6TDOuaG(sG8Q5pOfg6rh$bix^WM-| z2T-0Z$7ve?d&((WJM8@@WL5eBCEIRB`s|a=sNqcbc_vc(Q>g`^s!mO^aycgDN@bd1 z*2#UL@>1Kay1bbsqEI9qtqs!Vl0yG(RvNP#GM!MDJY|>f-OO`8bsgRQU6p3|;7iCW zcYiLxpM2)_w6ipst?sic=2>F9$^$!kCj-3JX}<<0@qL)p`6eI4GpOq+Ax-ga?VbCS7}kh8sGXo;$Ze1WPjk1C%)iS1-2 z^{e838B^gRX~@Y%^u0|frB-|em@Kl|ppbXDdl|tsz9S#XX)HFeLo}`>ISR3} z#$ayVHoC2n2Zlin#ySURlXkkLo*Qe}PzJrIMJ}_A1nbFabgc8qkf!%?wsEtgiypik zC4KfgQY$mE#byW23C;qB(7(KO|8&>*Wp$Spw~?e;7R{WfYgwYACstd^L%yJkiWW{g zU_jN+x!n4}7X^a=dT|lriUaR!{|D)msEasVIx6&Wz_l&=cfeAPAiYj?0BwEWyy419 zuD})CIELm>B=`0!J-uw+#@b=@bdCP1Dn5mJ!3wP^)y|qNCuh;9y~{cPal!%*N)}$drn6zB!p=tEZ^K!&`XZX{WTb zmx8egR>=aI5z5(wocl;HxgBCPVT4BKnLFN;!`di8n$69Y9ic{pw4X#y>LDoj6ub?p zaLz0$isUy01I=DUwn%5M86g~$;Sk3RV6cq-pK}>WF$4}dUwjyJSQ)IkA_KC-o0=Z)+eWJDQ0#UR^tddiX?3t9>_JU0mZZ@LL)`>@{oa5BGG@PB~SDB>s z1-+zYXSEtG>Cn0{0yxdr+#2dFQ$w#HsIwY<9C_#7`?|B~{HE&u0`+{%h)7es&1~<4 zeECK2*O>a^4){5d+tCCIRB+9hwZd|$Qu zD)-U>_#eD8c#iV*NC9(`j+4!*JJf`!-00_g2lEIWlBL~O@#nN26LsUwiUf()MQ|F} zwG%|NmIl#+j6>;g(%AA=5U-QTS_Em^%2#$-NV z#D|=#E0vomzV1`_JNI)0x?4@d%1BgB&HqkTlRMwk4(A^_Iv)9Q+_)S0B^f63HumVU z5;EduZEqLy3iNeke-xl+D|R&H`;L73rOTuN#k?hcD2!q`3x!mi{EiMgX%7VBhs#4e z3_aWi@l~9}qL?}Ng(N<>6_3lp_@pT`N42mQ9~ZfGhe}^KG4C%h#L0K7Xl-aq2PS<~ z6d^dKQTOc$kHqXa8C7&$fGQagvnV9vLgW_j`_*W5mX$SG7D+u8y?Fd0CH~diIydAwU8$1ZCyK0^a`&}XH~4TZTPpxv*>abAE8D4_CDMZKo3lakf|c6ME*;T) zTb)g1i{{9|+uJEd{q|ySi;l8M4aD}oKs}>TzI_@ej{UK|%6Jq?oJbAS; zW7=-{OC9JQi}GDVR=!~;zl2bXY2=OU^M3*Wk z_OD>0ShBe`D`R!#$Fi~893^1zN}b}XQh7Pzgl(}75r=`(5f2G{F1 zmflTM-hD@X#iKsD2}o%FZ#*8lAw!Tw$qG@tb)YBXHGe;Ssc+pp!Vbitkf=xS??BD5 zwXA>t9!;vAYBy>3CJ~Pz4J>gM6r4a~oV|zs=Dr=~X*|suTbb{N3z`A87Ty8T1z$Km z*nA+2$P#WaZwKBVr#KF*580P{{2Fm^k;uqf>HYinZy#<5X7HUIQY+*ugtelRU;Hf8 zEUbzL6JD_+SODf1*|;AmR2=xe5Yi0{2yL#6aKB2xmo-vt{S_nRc~eGmkIl#j(N`Uj z(L^8AAZ^mIByKRiumZv$u2$;?mA9r-K>UUFh4S>9#oacMA2(E)b}8vVSGD2m`_o;AGU zr1jPEcSVR_YN?8OF?6Lu$G6MfuVszVFv(G z?W%rE(x47dK1L-YO-xk^qHdgAqIfl#0H)?uOYU zB|}~mO>GG6`N3aVSLGhtFvw_v@rsA+N2P;%uS&ixJ4D(Jr3`dTzWEF71MH`(hdUy# zcAyEyJw50=J9NxUw)cyBwiBg>j=~;NyNdlqm72>LJ3skO%|X)r-o7(L=(sQRfN}W$ z`dCy6hna>ii>qk=EVs(;F#1Z3XF~Urvs8UYw-#2zDDOluMC@0;7c)Fodx}@awJ6@i zpU66|{mhl_#6c0u;oBP6Hg*kZuXD*Hrw^x(BuHcKwwtEhP{eA7;vfl6@^cyo{T-y! zux0%i;*b2!q5)UA!C8BfISlE)cxqzJ4{kiRwJAF7AU?qKMk@9=yc<}nA9?~bACoCA zM8}86&BAnoY6>Y2WD*D|Ws`F$hBT`m|IRvG1m@O$+@aL^{^DmyjVDqiKEOd37f3df zE7hsLie@0fag#LNHsTguFfc(kv6^3*X=`ZC-S|}QSS@3MEX=9@qGU4ITsuzJWa;Ff zZXj*HmPCB}5QFp!kMKXv_2@M16G~KSP;^D@UsQg}MtSx~bM7ym|HRZy*GQFJsoK~A zK=*$=9XHpvolRry)Zd9?q30a@wfk$(Tya;hU$EO98e`|lJB9J}2jN|km+nv$pEodB zwp5b*hG19yePNFKa+!OHAmFeODmb~Gyg@_7d3qSSb|r=D!a>lP$LyjmpR)&~w#ek# zMMxsKoOWX=55+>Dn?$HWUZ&9nnVlnr*C>EWOQ@ot9 z+yJp>aCm7<&>G^1LHy!%(_R#`i6K3Pn=+at`!!o&=6u z@db%Ts^|QGMGwmTY0-Gl%aD%ZO#%BsySZ@GCp$qy^tK zB|D+Pr}mrIj#ALlas@2|kN65L!>Zk8uaPl_*e9KCsJ7Pkehtg6?UP185JTgB#MD^3 zCnZ2KJ<7FN!WrQtcu6(nDiLskag1={BF-?{q%Iy!Z6*T^A=c0?Qt_udmqr-5Jb$kq zdR73`*djNOk>TY9x$7clf~xQ$XeRduu4*$@#JCRVdHqzab`3p*4e*4|B)yreW^#SBYwtDe3p)L!^a`2=2|+b^AA=;P;00ykC>5 z;{Cbw6tFv%uoU2eW97 zkff!PXHsjAJQH)!e%V`u8*nQ2WW<^dKrgjZoAX#6$TuUFJy-v@@z;5{tFj6slzBKw zgPs=qnLXq~E!v}zpbDO&2e>4;{uJW%p+9tp_2u?{J%VspX4m$}fzMslZq{~@>nm7& zc|G+eiQi~{9n#-qzZ{D$m40=v%m~^oexuWW#k^S8>|++3QDfzFXFaN^Z9-Sp`c$7x<$;;x)V!M~LF~3#RtQT0?h|pwVE8Iy#!7w#6SCRWM z(6+3yT|3Jp@nj=hIoi!r#6~UT;l{+^#&0BT+dOxw|Ff%e3|7Wk zx2Z#u8l=Lb=UtHe*z^t=6HG?>LoE3G$-YCDWlO2x&XDJJ&91L}s^fNP9JjuoG@bDf z#&4YPlLe)1b)vBPAkF@E$K`y! zAe6T=kKg$-?G$sd!`k2FqsqsFA-C=O1^Zx_aI3|YVJl~CK3nj9?&n2b3YO8?{w`uZ zg0Ef6i#L$&FmbwKE9M?Y#w?*B1y)-;u%kMWC4BmN**3bVL`h9A(Q8?joCH$E?8fWl zL55`%^tY|sd?-4&akx!gA?-dpN7c(Rk=fWtzTg=<%$syJrj~W|x_*X8pjxof%lGe< z(In5=-<=;e7&ZdTJ#)9I^4EtHTF%Dix%FktZ*x*km$n(z$}9GZWi)sBP!E3!$D2{vZLj zJXv4(tVmcZ2DknMNrH(tS?GOtXlD~5W^%Gj6pBnH_#<n}Z1o@!>|LENQ5*kYu7W zu^s(&pO}^x>XQpkEazT^#?2&NF7AlO)`28 zU&S@tb<8z;R>0<;x5WB&^p!Q9JMkgRVNA~jKAe*h?1hq+l|nO5_~&3t%Lcq+OnsCs zX25gMR=dkfc5j8k>Azm?CQXjXr?;i^IfbWwAlE^@GsR!Ok9W~NKfC?GF@K-4d0s_2 z?T2CB3xZX%06sGqYjQ=ZAs--jXQM;BE@j>lvvTi`_*LLnp5BXm6L!36|H4e3O8n*2 zpn~mA((E?C)xaZ=@^>?T{?TRpO5v{JHEjq*?&5D` z!nmwcS4;srHu9PJB2PCVi z#;-i}A=~?fUaPt3?`jHSLff^G=CsL{!){Emf#OAqcjZ6I%eZC3%F~`@Q3wgIV<|^< zh!G+9or{a{wHpyCI=Ttm7L@R_JLU@e<<#gM#(sCbzg!z!z4EKa>`guEB&MltZ#s@{ zWPA_rHS8?sN)XNbE^4N_&m@3K`q_hW+^W@}$n^rV)`XY27Fz=BwWk?-`KZ@C?S*@* zYVP>|7BTd%Nux-7zt(4G`KPLuXp?IzYL|!T5ohjOs{>7KbMbOppR`@BGg$MzDE8}r z{Og3-taV@Xxs>$KrTy8rk#ri19^W({T@b%N62P(XD{!G-iDE5@*9+*VY5E=b$y9Tw z|5HZZvs-xnb?72|2+Yff$LJU*;L=LaD&*;K1h57$4;iLc>T1FRwI1(?CqNU(04s%9 zYu7kfJ)DbO80?q%sbQQXTf5an{S=kkrvebxl1L(I7ETrHf1@EA)6{U!3~DQ+1Ge8= z19Jx0U!%DL+CMd2usCHBE+8`iLcNcww!w&oAxem|3CJ`-*SVGZp`O>Byq@i3(tyv) zD1!FWn{C8br1lqLz9HT6zqJ28=w;44+Hb4AkS-yPRJ)ItrquLWXjln+5_$y&@2$vg zsT6s-g&kwkST~5_VuT)n;Ml#`T`j@fHG`TOTN7vDfb%ZLOFCJ2v0Ri=n*T)LE0(T+ zI0B=?!pLo_TEt^31z3+RV<%6yGX-nRDBX>uxbrrMSf;2tQ)G9~c2uJA$nN*Ltmz=+ z9D^P=m*W+?tkDkQ-76$y4nL>}C~k718&&epr%872J)hjK#_@{%o7ukfXMct<5S!LI zxY&VMs*wx`5oIow|pO)ZKb1-l%Ll+Ou(*~;C2 zj~~*|dD(CZBf3a$t}f1Z08~h-E5orV5(;qCzgNVxklTNozP7mI+X)x`6|+5Vz20^c zc`TaZA;hWY_f zkM_!o{Bmx&vA^J7!Y@KL`+EObgOOYX0r%6f~ zqhVBkC>a8VwM~Fz)q?3MYVVhAg z>9QFyN@DfcjKIsR3zoEWG#alTY%E)a?_Q8^arqW_(D;T2`~uw%-GSvkNHV=-?7|~Q zQ3e#nB-hP@}gPI7_yF~ zM9Pw~#9%O%8O$)q(qajTCrPb)7lS zxu5&K&wb9loVl3i^EA6A_jwwU;+B)=#}Q-Ia?39xh)b1m7q+|Q1KLX^;my_O$G3_U z^;C#j-@N>nW#grHJcn*R3YTRl7U6E|@Aj}Byv`G?L8RDYRyxl{y7zpg=_5mzI)|GD zd~Zun-G5X6`DF4=S=8gXbQS&s)Iq;cWKnh|%j*__yLZ-T_zr_D6=b|9fz$R_9O2(b zn=9Vi%NdQm&2IgYli^uWHoZA8+H-q(WopAPU(0k>hUeWhV%b828+%JW@RH9qA$qJe0ZY5MfObt)Rgkzc`~;OD*mu9aU_el@zAH4*l2e5AWzR-k(F^ad zCE~-;due4+ZE{mWDD|8}{+dP_aPBCcEV0Plva~Zu?u!ide$P1Rjk2<&)F<2W-=ER< zXTWHrdD^&Xn(M=$mQD4#rR&N9vL)sjtn-SjHlcdhXp79;Rzl!v?royXe4J+e+D7G-a8d@zn* zC$`=i`FcF_pw6vGm8K_iqmeW9Q#w6$l^!k2{%Qr&OQb361I+`b76kR&Q_JCb!rLX3 zMY>A%2Hkx`+kJ>BxvvPFtX@iS2yLl08T0iVNPhS3s!-(9y<8yyior)p6ar;D*|(EN z2clN1jQbCUneV|Od`F(}nW?U{j`uZmeh%*|-zLFtN8Y&5!wOK%699gIran~zAJ{oj zYO1l~zp-cf<>yI}bln?aJC{v|8l9sT#=7Q{)=?d8xT0Az_RD4Kj5j-P2sn0%$ziyo zd&A2yNfQP3xG2A|{}(qeE&oq0%Zy4{4jXxQp^Ou8VM1Y7xPp%z_U#pY{%RHy`&S=- zNp6=nul&a2rOqg3+VO5N+shvBD`8i|kEsY&pXE3#sei-ZZcL1k>#M7oult1xKPF|l zA_5PT{OxJhFvjatSXU@Z*>PQ{QA*&cw0Y>tiG=;9ZwmHJXN_#v_2xwv?+JFs+d8== zjUI^&t?DR_xkwK0THoh6cpfgj^`Vl`braI_JFMwDox6y7-6yv-yj>k_Z;o%i5g;P= zEa{@yoB1la)}F7OgC3F`GA8F*UF+gpwKc?x(g>=;AuYt^f zuZ0~avR4(jr?#~nML`s|5EP4iI)kjQyspf=Ei%7lt7Gs3MeBC)8K~CVkntzi7r9zK zoe78y8HB)A)Z?YV)GJ%nr+R-JOP8V(aBiN6V(IA8lM3 zl*%=rL~hFxYuFMHv11Hz#y}6o(aFqm?y-w>tBu&yVCU3xba2;A+O0D=lp`xL*4HNQ zA;z`Vk9!||bsnF+niydO6=u`ig95=%bqax1>+FQ|Q3VjFd^ zSjSf5@H{So#qb}!Lr?L|pO7JHU<`^h>+j9nv#Ryz2tjY7EVMQ3oK|~P85^&+$Rjtf z`KqyK-jixE=;lC#;jWxLq4`fI61q7<`&AW|xi?GK$NCT2B}q|s4G07dx`?!5Hr-X4 zWFHdg;_H&Sz&UkU@(X52^4eeW75al{Qql96nJ)r~@rMGNo(JmY7$pJk=j3?6-=l>l z4;5jZ()I>E^7kGkRKJOSZxWb1*>$!F8|aN_O6h#4GG<%Lu-o7-RW8tOl-yQ+CE;}( z=hCOqY2Lh;lb23jik%Wta>KA*Z*kFh<vJ8CcI@?az12xo6s98x}@qhq&v4hO7QM}srsb~QLhbnpN&(*;h6=46KSfN^^RD> z$V49Js$KEG`>RR%srgP-9F{r8PE{iIuuS)32ghWbi-=-gngnc)tYFd=UeN+xwT=*@ zlC8Vza0!z05$>JZ_N}LxocYf?@sIY1ZtNfME+iT#So>h>FJ`93^G=;BQW6#1ug;JW zZI9y^bHwJrB)o(QiDHeD3+~5^zb?!b5~VYR=w0_Cd-ob9Qg^ci#hbzvKO9U}b>`)z z_Lce0BqMEyU!rV%%+0S+Ti%_5ztuQ8mhyy5x+NdD zjdt>FJb~yd_HFGL7wzfQK$H}OOa=5cXQfOv**v!Qq_I1@(~ZiXolWV;v&57H)P=7$ zp!F@bP(*}#zhqCmiSlv_9DX-=MDVJ~adV5~Q~o2lFW5$e`!m@c7cTHp^xkzFmBTJa z%LhfS7E~BHl&>0bmN$OhDU@T$^!<1PVvR5H&ylzoQPXZ`+Tqbv=1aF5H8|Si@^MDI zUZi{9zDpjjI^VTx^_8s_6jZdN8@biggx}MZ>Urpq{iR7NU#IuwWrC)pF2 z2Gf1c%0K@*0-Jxq-Rg0$hm7glZli4JG@&Yk+V&~dm6UoZLdnMBga${&<3{aq$bRWN zjxf0@hEirFONB>K>a63sKCf4?!XJf`jj)nOVd0(mBGjjIL$LE%1zLf8{U@`NuMiub zSvTC?aWLh83bO1}z5XZ59W047Wc8SSuU4TzUrKF2DDOpcHfe>1WJ8$ZiG_lbIO2%? zjzYny!qn_qL3c@cuifp;t&%CNmRM=0u!&HY$u2GQl(Z=wUmqc{kTZM7Q&-z89ws+bSb~1^};+0+!X8Mm!!t#zT zl?V=bDuz?sB~l97j(6tfUUjc)SXp2fQuby`FBa6WCh+OTtLr-9<4T!*&ZjLBEADLe z3*rdec7MQhe;{N)LVPi9b<*|f+(Nc<{wpqt>7a?_P1bFj>{iy%KScS#ykP#M`IiOq zB4=mR5EmI0v*TAro>2AsEWdvKl%E(^Y!}d(LNs+ zp`hT;UAc73kwLNU?XwhDKL3v{V_Q8gS>1e}m#OitweV)f1I4a;eoOqB_a49Od2?cr zsZUG%tMiz+KyIqVicg;87?hFtdPEQgDdV0jy;PEXqn+z5*F~-o>X~WAe8CNT6|H8- zv#94~+2m|MKJ!hFI636W%IGX#w^GpP#D>*kwLV|qFJav;r92F*PO}s>#!g7-UOQNw z;j@{maKlZ<1JwaB3$9B!sySZ@*Aq2fNj>HE{G>C~?^!ayHOkZ#le4)g%Qbf|O0bAZ zA~FnUAyEp$W5X)zCvbJXV#=DyYGRoq&ulVpJ0I6~UB?OGKJTD8of@})s<5R6IU_C} zS|!|jrcb7fORG*Ft-1BO;qLcGgvDUDZt**jUHK&)G4)S*D!;B9v{pCh6gtS8eM|wP zlKgm?&GCRnRQnfT6A-dw4Q2=TE6xrQ@e|v`%-@p)*P~MCIwYEdH`$X;1wh+?;US_2 z39hXLUh;-ydy+SiPNq^!N#0~92#D(DOoL*88-P<=o9bt!h{R!`if9BHibNokpm=4Z z4g3I|=uURfqBy&gpa=+Di{?P0(4oqBBm@pl1`ZTaNGt?SAbRd6k)2)Wpd18lN+)?7 zg(?Fgpn3pEj)a1OXe=fW09)k;RuB^Ta}D2t=P-Em|A3vdNob(-X!3-P2SCJL6QGf; z#yr9tyHqy@e&QRMUhhaA{p%b|E0HgFY0UGXV)NOUt{wR0{R#=AM27GpYylZN+M+mF zD)Ff6P0dp8qxlbeSNKU=89|Rj6IO~loT9d-oeb<1WT^2Bc|ExhpSIfLaaCgS^<7cp zv4fWa(^PSDRj}YirrP4vq`Fbs*ho0+(5*)oR2j4Cwkg9_G@nS4nuH3w)K@fGn!Acxl|^uG=FG1f|Cv*uYPE1NL&6WD6$;yR6|&Ws5yOr z+op4}p2e}M-C+~A^5R(9Hj=DUwlC7EOkcn=$w^QjHB?}3vKU}3DA3>eG)x$huazb1 zf?4aCnG1vek_^aSunPYPd$_5OJsr#eGjAW#8daM}BY}bUe@8#rn?@(N5WT^uHY9$( z#{!XSL3X6O(5%o{C8&}T7K%orp-Kn@6otb_L54AbHnl01qe!($LEIe|Ujs5b!zJz?9WO96oa+Ag_}{$DP#}?61tYRzi09f zEHD5otHzWDcZGNi|Kc&|EdYI@; zf*&S&gLMZ7ngur@(WpM&4kQ{B2~J@GHzGNbiC{Sc&Bg+j;P5CLxV|^l#}hEl6o?Tb zh2{yW0RAaUFa^q(FWG@)qOT1<0Hp)*qpl7o00sgx(4b(jnfy|)ep$r-hdCg&+OOt- zI{=;xz`Fi1hX(ntErI;p6F}gfO@RWgK1g527wD0IuC*xvyLAAd(>*#WCWPk7>k^izbu!8-`hgAZ~!H+rU-bCR0f;Sk}rsM#i+~D9QR4Pz1 z*5V%mKR|J!LXlv^m;eJE0JLWx;9LY=TML2F#whC`P(YlZwDhn#N(4Ow21n4+Q-}O- z7BKX6s15`dl7kz~#{&x2(IX&`I@$;VT1QC-jlk=H!J~`B>0p5fLn5`n0H)E2-t@I_ QKp~Y;$`BY#*H{noA6CgM@c;k- literal 0 HcmV?d00001 diff --git a/text_mining.py b/text_mining.py new file mode 100644 index 0000000..83cd38f --- /dev/null +++ b/text_mining.py @@ -0,0 +1,36 @@ +import requests +import re +from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer + +amontillado = requests.get('http://www.gutenberg.org/cache/epub/1063/pg1063.txt').text +raven = requests.get('http://www.gutenberg.org/cache/epub/17192/pg17192.txt').text +house = requests.get('http://www.gutenberg.org/cache/epub/932/pg932.txt').text + +def words(text): + #This removed all of the non-alphanumeric characters in a given string + return re.compile(r'\W+', re.UNICODE).split(text) + +def makeDict(wlist): + #This returns a dictionary of words and how frequently they appear in the story + wfreq = [wlist.count(p) for p in wlist] + return dict(zip(wlist,wfreq)) + +def sortDict(dfreq): + #This sorts the dictionary created in makeDict + sort = [(dfreq[key], key) for key in dfreq] + sort.sort() + sort.reverse() + return sort + +analyzer = SentimentIntensityAnalyzer() +analyzer2 = SentimentIntensityAnalyzer() +analyzer3 = SentimentIntensityAnalyzer() + + +if __name__ == "__main__": + print(sortDict(makeDict(words(amontillado)))) + print(sortDict(makeDict(words(raven)))) + print(sortDict(makeDict(words(house)))) + print(analyzer.polarity_scores(amontillado)) + print(analyzer2.polarity_scores(raven)) + print(analyzer3.polarity_scores(house)) From d8a77bc30092c6b0b8456cb6abc41dc9eeb53c74 Mon Sep 17 00:00:00 2001 From: Aurora Bunten Date: Thu, 23 Feb 2017 21:39:03 -0500 Subject: [PATCH 2/2] Turning in Text Mining Project --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 8cce527..15be262 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ # TextMining This is the base repo for the text mining and analysis project for Software Design at Olin College. + +Look for my pdf submitted in the general repository.