From e976b55a5c2a83f4f2ab1221ea228c9b8737db4f Mon Sep 17 00:00:00 2001 From: Simeon Wong Date: Thu, 14 Nov 2024 20:32:09 -0500 Subject: [PATCH 1/6] delete ip logs... WARNING UNTESTED! --- 02_activities/assignments/assignment.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index d81e9a77b..001291867 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -33,6 +33,7 @@ unzip rawdata.zip # 6. Repeat the above step for user logs and event logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs +rf -rf ./data # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed From ea20676d33161a6f4d0fcd3c4f7aa5360f0f4309 Mon Sep 17 00:00:00 2001 From: Simeon Wong Date: Thu, 14 Nov 2024 20:55:44 -0500 Subject: [PATCH 2/6] initialize README file with company name --- 02_activities/assignments/assignment.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 001291867..f2bfd22bd 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -11,6 +11,7 @@ set -x mkdir analysis output touch README.md +echo "# Project Name: DSI Consulting Inc." > README.md touch analysis/main.py # download client data From 162895f292e17a50c72dbeb88defef99f7a3ad20 Mon Sep 17 00:00:00 2001 From: gitau123 Date: Sun, 12 Apr 2026 14:08:48 -0400 Subject: [PATCH 3/6] add assignment.sh --- 02_activities/assignments/assignment.sh | 24 +++++++++++++++++------- 02_activities/assignments/rawdata.zip | Bin 45123 -> 0 bytes 2 files changed, 17 insertions(+), 7 deletions(-) delete mode 100644 02_activities/assignments/rawdata.zip diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 4b48cec8b..4b1057e5b 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -21,29 +21,39 @@ touch README.md touch analysis/main.py # download client data -curl -Lo rawdata.zip https://github.com/UofT-DSI/shell/raw/refs/heads/main/02_activities/assignments/rawdata.zip +curl --ssl-no-revoke -Lo rawdata.zip https://github.com/UofT-DSI/shell/raw/refs/heads/main/02_activities/assignments/rawdata.zip unzip -q rawdata.zip ########################################### # Complete assignment here # 1. Create a directory named data - +mkdir data # 2. Move the ./rawdata directory to ./data/raw (eg. move it into ./data and rename it to raw) - +mkdir ./data/raw/ +mv ./rawdata.zip ./data/raw/ +unzip ./data/raw/rawdata.zip -d ./data/raw/ # 3. List the contents of the ./data/raw directory +ls ./data/raw/rawdata/ -# 4. Create the directory ./data/processed, +# 4. Create the directory ./data/processed, +mkdir ./data/processed/ # then create the following sub-directories within it: server_logs, user_logs, and event_logs - +mkdir ./data/processed/server_logs/ +mkdir ./data/processed/user_logs/ +mkdir ./data/processed/event_logs/ # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs - +cp ./data/raw/rawdata/*server*.log ./data/processed/server_logs/ # 6. Repeat the above step for user logs and event logs +cp ./data/raw/rawdata/*user*.log ./data/processed/user_logs/ +cp ./data/raw/rawdata/*event*.log ./data/processed/event_logs/ # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs +rm ./data/raw/rawdata/*ipaddr*.log +rm ./data/processed/user_logs/*ipaddr*.log # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed - +find ./data/processed/ -type f > ./data/inventory.txt ########################################### diff --git a/02_activities/assignments/rawdata.zip b/02_activities/assignments/rawdata.zip deleted file mode 100644 index fe36b0faf63ec8dba8ae9f67a705ea07c357b944..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 45123 zcmbuI33OCN7KWpU>4VF>{NWf2Gv!VZQIL}efzMO*-dj0%DX z42qyEf?*sFASw`qz^HMQEy|Js2V@|qVQ2bfsN&w&_dToswC5n6c=~&{y6WrddN+S3 z_AGH<)gbF}WclPlMSr>TN9myYL8(bIMkl2uMGWX)A*j@iCEk<&yjE2VBqXxV%eb5*f z4QbS)H2PWaTPZLa(x^viv`g9CkuVz4s7Glubwu@1FdEXRM`^TqUSblAhBWF?8V%mM zXatOgH2RTR*9|TooB*RCjeexob@e6X$GfAQH43dQwX}x4Is6S&qtTE?y=u5`>`prF zUbC}CqalrY)o}0MxcV53hBWF`8eLR5KMzJjFSS>_)@z;)`4L7#8ucoT)}J)?2#kg_ z>Qx%8QEJ^$7!7IEt2CPM$@#-D8q%m&X>{hd;XlA=NTXS5U3bn*{T4<;8qHGcdfKf` z-?*clH405|HNTH8sI&;xXbhy$C^g(&Zf3Q2ui06nF_1>1)NrT1k=6l5LmG`z8g1Up z(+NgH8jVsK4eoTYBaDV-S(KV)O9t%h2BRU3Mk$SMXg8}XjD|ECr8HWY(WWzuhBO+b zH2T@Ot6g9;q|wc4UElm*O#+ODG`d->>#z2t^l?W!YZRJgYLcBl{mo)jqp^@iW7Tli zUt0DGS~E1g)$IOwaOP|7H9HSBG`ZE>el~L8t0)+m+MCo+@0?NgEfkDQ>`iK3&#Rlg z)E#WSRaPZO@8hN>jUJuqJMd3?GVPBJ{4bYJ?A|NBS&y*rf&9e3@50qy_o-!FR5bw` z332s#|5J@-_pV-4k=uE{P_GB#s`uv7QTZ|M^*Ri5G=x>}%k74Q#6s8@2&>+c|IvGF zB!rEHuu7HlL)Jw>STBTCw#r>{J{rQdgs{q0r!I$kAZ#lHt4)=+H8l>x#z9!+sh{&U zwQ|SWg)wQ|v{A#Yd!0tyoE8ZQIs4)d^vsC}D{_?rpKf>Z5qDu!^y!4&o1Bt1HaT_p zm~j)6eUkKDz#mF7Vqf>Le%z01Ged_}@;$Hh-12S72Yf%NMf^yqk%3~h9uJ>Mf3dRf znXP9JoEvbt`d;XT`CI4VTu;FDDlcz~E_ey&MhDDQVxBW7@@1SG6EIhqxn!T&3vh01 zz+9zfPvh+iajrLDu5$B)x315}xh(_cDmf3m6!!|wZ51e2-}X$(N`3+7#s$n(mK=EO zy}9mO7fS}beAVi`6gqLJyCq$y0WV&)cK5X3@Hk8jcwRHrpbJ)i?5ngpF43K8KOvo# zoSN?2))@DCl}iW~6 zVSl)i%7~J*gfuW7rA1~T&?mtkC&_r%hjw0%&%_U1lA<9?@{-A) z;v+a58+F|X*8SFZKj3gouDTU$c4m#kI2@C!ZU(#d>|@7pI3`!!4wiFv>2Vy6$yGOm zWez;{BM!&ps$0TVw|yuNhhuWpO<`|8-SImdj>%QGg}q;SPOdxLMXuONQ@fZJE%#P} z)5E-MI4UFRkx5$PdInU9quAmY`&>esJDCe zs0z!~tc%RgZ@P_aMU}Z-;kvP@t`5NV{#L%ad)+QWj!krR{B?1~!5T0cn&$eJLe3A1 zf?+f?$@P7OYab=nbw}5q0(03`0ovIs)k3liKq_h-%VoE>vF>rekieV)srS*mN zAH^@A7*0}3TDJV;K>PxVVI(D_-3|ZNAHRTN_(h_JAdH;!J0sroka+^-PWbOH_Cztn!_FpY2+8-8S%{Pa4xq4T) zF>lr$>k!F$`ar?vTS5X>&VO_3&b=tu-1r2nT)(UA)!iuAT(bnM1pE7lHeaG(bG;I< z66>_|v|T9JT&o1EgxWbc>oXK=u2TY5BCWgX;^!#XT%!c61R9#{`2q!->+>#H@8W7K zoScn<&9zCuY8}_l|6rdx*io3sGFGd&U&Z(4fx--6yLPu;y-(}#M0T!wy^f|f>?Psv z5IRsOy-wNhP%tvmx{D-YO|S1!Fd|IdK~nS3f`6f4M3}mJWclG;-=bhdn7VUh-IfyH zpkPFpx@+Weg)aX@!H6(*$HdufJpI@AUf{~Y4 zcZJMa>zRy#5n<|%kSjZ8O+dkjFm*S`q1>J0Q7|G*-3jvg_^V@4Fd|Id1+p@z%{UZ{ z2vc`}EbEps5(Oi|)ZHKJ!`6&)2RjNAnbhi2(@&|PuYkfd?EB!}(p?|R!&hH&uh&tS zhCLrPuoC9F&Pf+hFd|Id?Q!{9NC65)gsD3{zI-+RG73iKmhSTCFn8VWC>Rl@?(j%I zKeiABBf`|(9TEM)e?!5DFm-1~%~t1sMZt(Lbyr8@*_&>oU__X@qoYc-)LZUgM`0p! zOP%(7U-RsIP?(0j9Nb&FljBN!y?C%*WJ;?;rUmbASm<7_<1ix=S{*Tca$w>D5DQFa zeKP;t%aPB6SYR^i^Y|?%3ZA=*z31MlC9GFk{t|(rANc;b-NE{s`Bw`3|5??S3eePl z>q`aHBP*jTdDZ~GVj!_6htPa~E2Kj`vNCqc5TiqWJB3F1b4u%b?hX(s#pp!ePNAv& zobs_uG1@EGDKwy;QzGvbYcbjw*eNuNpHtS$6r){$okC;xIi*Pnu@)mO+bJ|*pHo)K z6eAhhDKt!^bFzOo2v>M%Q!7j8fu=L4u(v^_)^5 zQy{_6KzdG@R9dVB5)93r=ajQD1riL6ndg*IWyD$_!O%o`PRWxgkYH$dJf{pQE7k%D zh9<>x!Q_e*Xd=^0cwS2Ha#AT~hsbup(D-*w$(AXQU}(}ir*tYW)&dEJhPrdg4w(YI zE;PTLQ{pO!wLpTQQSF?vRi;3Kp{eYg65&fw^50uTLxQ0J?3}Vrra*$BS?iqA*q3T2 z)B*{H#;SA5J2C|l49!sIf~n(6@A0n%nqo99otKg+L!ggHG#H&z9`Jp77fOKyLo?7h z#fte8D3D-i+&QO|^qm+B6i6^M*_>16$Q0<+ry=H?a$TlCf}wfkoRaQ4bPz@i5)6$d z=afq_1riKRA?Fn9_N!0}Bp4bv&M9YP3M3esF3tsGeZ`Z1EzpZkbHsTm*0+!d5J)gI zG@O?*(D&PC{uF42(SUFcu~MG=At4CJGW`ST79&Sg_>~3B}sPA{lY(*Hwgxd);rkfL1 z`V)*2!R-W^$ju2geNEW58jQlf?F1UW%?S(r2}U{Jb^^`W=7jrwzt3T-!6?$(PN0$6 zoG{CuV3gi%C(!h4PWaWIU=-ACC(xj5PM8qvH%dmC+;##@#O6!xBq0P8zimg*_-p=- zF{JL@B535bNph{7Lc^}D$Vc_a%J|xWVic@x$IxtRj){LrF2*QF+m4}8)*Q3T0b`WA zYsb*!YL1DkCs$(>v}?!EplXiU?0_*!*0p13E;YwA^UVk46gtt2qIB&T8bi%7D;+RK z*|~NMO`YbLS`FlCjKXs57#cFoF|RpbjM8!K7@8@~F=ZZ>t1*hhwPR?AG)#^Q2JLaba3MFpz6#*e!QW zcdu9tG|_0PEidK^2MY8VissjHO#7B{F_3F$QZ2`9cfdfdp_#NC)4G*h4dfb{KFcxN z95B!npgFS~6B8#_1G$DK%5uzR2MpvInjOnA;jQIrAlJ~8SdLlmfPq{?^I$oqX&bp3 z$Tc+kmAht*jDcnV4SD6o)Nd;mWA>X!_G2L$=*o*(E@Pm1MzdQvrRJZ+VxVb8V_G?7 zu}pzx8BJv6l!|{AYk?*ija}t0|BHmeqE?YpK^m=UWw@$GRz}%&cZ*>d=q-)rr*h7G zCl2(UMnh6Lr&4=|dZ0HonuN+Zi=8;oyBdu<<(%pr9O{AI)@Y_F=PYsJKvtu{rJNJo z(V-qlI5eGO(!@@ViR>m< z1FcY+%gHgD9Wc-WrRkd-lv~5ds;824QkY#hy|nkY8x%C5OBqLLj@) zfJ+Vu>LpbIxrK&Va>z^(0-1#dS8_;!2!XspLn=9BLT_ofAgjec!nhusKfviGv2RUS= z2!URBnlZ>BwTDQRKvto7f*i6~gg{oI*?}BVeyCIlWEGkd$RW>*5XdSt6Oco0i4e#t zGy#xXWya&ea6zv-4g2GTTo4(MRcN#yFC=A{PzW@)XpSFe&KNLUr~{f= gG@*|(4oD2ciBrif=@1OtpKB;vYVg1+t0Cvu%DF6Tf From 25394a6ef0206e50027f3ac1cf8028a5da3be8ee Mon Sep 17 00:00:00 2001 From: gitau123 Date: Mon, 13 Apr 2026 15:57:33 -0400 Subject: [PATCH 4/6] fix directory structure --- 02_activities/assignments/assignment.sh | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 4b1057e5b..d84c52296 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -33,8 +33,10 @@ mkdir data mkdir ./data/raw/ mv ./rawdata.zip ./data/raw/ unzip ./data/raw/rawdata.zip -d ./data/raw/ +mv ./data/raw/rawdata/* ./data/raw/ +rmdir ./data/raw/rawdata/ # 3. List the contents of the ./data/raw directory -ls ./data/raw/rawdata/ +ls ./data/raw/ # 4. Create the directory ./data/processed, mkdir ./data/processed/ @@ -43,13 +45,14 @@ mkdir ./data/processed/server_logs/ mkdir ./data/processed/user_logs/ mkdir ./data/processed/event_logs/ # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs -cp ./data/raw/rawdata/*server*.log ./data/processed/server_logs/ +cp ./data/raw/*server*.log ./data/processed/server_logs/ # 6. Repeat the above step for user logs and event logs -cp ./data/raw/rawdata/*user*.log ./data/processed/user_logs/ -cp ./data/raw/rawdata/*event*.log ./data/processed/event_logs/ +cp ./data/raw/*user*.log ./data/processed/user_logs/ +cp ./data/raw/*event*.log ./data/processed/event_logs/ # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs -rm ./data/raw/rawdata/*ipaddr*.log +rm ./data/raw/*ipaddr*.log +rm ./data/raw/*ipaddr*.txt rm ./data/processed/user_logs/*ipaddr*.log # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed From 571470ea7d3b89399615266b441955b46d13f722 Mon Sep 17 00:00:00 2001 From: gitau123 Date: Mon, 13 Apr 2026 16:32:46 -0400 Subject: [PATCH 5/6] after accepting the merge I get a syntax error --- 02_activities/assignments/assignment.sh | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 6fb94ee05..b9f8e5dfe 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -52,14 +52,9 @@ cp ./data/raw/*user*.log ./data/processed/user_logs/ cp ./data/raw/*event*.log ./data/processed/event_logs/ # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs -<<<<<<< HEAD rm ./data/raw/*ipaddr*.log -rm ./data/raw/*ipaddr*.txt rm ./data/processed/user_logs/*ipaddr*.log -======= -rf -rf ./data ->>>>>>> ea20676d33161a6f4d0fcd3c4f7aa5360f0f4309 - +rm ./data/raw/*ipaddr*.txt # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed find ./data/processed/ -type f > ./data/inventory.txt From 13ffe330636f15dd70cd735e68fa0748689ec1cf Mon Sep 17 00:00:00 2001 From: gitau123 Date: Wed, 15 Apr 2026 17:23:56 -0400 Subject: [PATCH 6/6] moved raw data directory --- 02_activities/assignments/assignment.sh | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index b9f8e5dfe..0e509bda9 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -30,12 +30,8 @@ unzip -q rawdata.zip # 1. Create a directory named data mkdir data -# 2. Move the ./rawdata directory to ./data/raw (eg. move it into ./data and rename it to raw) -mkdir ./data/raw/ -mv ./rawdata.zip ./data/raw/ -unzip ./data/raw/rawdata.zip -d ./data/raw/ -mv ./data/raw/rawdata/* ./data/raw/ -rmdir ./data/raw/rawdata/ +# 2. Move the ./rawdata directory to ./data/raw (eg. move it into ./data and rename it to +mv ./rawdata ./data/raw # 3. List the contents of the ./data/raw directory ls ./data/raw/