From e976b55a5c2a83f4f2ab1221ea228c9b8737db4f Mon Sep 17 00:00:00 2001 From: Simeon Wong Date: Thu, 14 Nov 2024 20:32:09 -0500 Subject: [PATCH 1/8] delete ip logs... WARNING UNTESTED! --- 02_activities/assignments/assignment.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index d81e9a77b..001291867 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -33,6 +33,7 @@ unzip rawdata.zip # 6. Repeat the above step for user logs and event logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs +rf -rf ./data # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed From ea20676d33161a6f4d0fcd3c4f7aa5360f0f4309 Mon Sep 17 00:00:00 2001 From: Simeon Wong Date: Thu, 14 Nov 2024 20:55:44 -0500 Subject: [PATCH 2/8] initialize README file with company name --- 02_activities/assignments/assignment.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 001291867..f2bfd22bd 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -11,6 +11,7 @@ set -x mkdir analysis output touch README.md +echo "# Project Name: DSI Consulting Inc." > README.md touch analysis/main.py # download client data From 6041d6d84164e7f664826e384f6c336f7a1f2657 Mon Sep 17 00:00:00 2001 From: Nicole Yeung Date: Thu, 6 Nov 2025 18:56:00 -0500 Subject: [PATCH 3/8] completed part 1 --- 02_activities/assignments/assignment.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 4b2baa7ae..87c776a7e 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -21,13 +21,14 @@ touch README.md touch analysis/main.py # download client data -curl -Lo rawdata.zip https://github.com/UofT-DSI/shell/raw/refs/heads/main/02_activities/assignments/rawdata.zip +curl --ssl-no-revoke -LO https://github.com/UofT-DSI/shell/raw/main/02_activities/assignments/rawdata.zip unzip -q rawdata.zip ########################################### # Complete assignment here # 1. Create a directory named data +mkdir data # 2. Move the ./rawdata directory to ./data/raw From 80450fcd5deb67522165ede648bfb17cef533783 Mon Sep 17 00:00:00 2001 From: Nicole Yeung Date: Fri, 7 Nov 2025 15:07:38 -0500 Subject: [PATCH 4/8] Completed all tasks --- 02_activities/assignments/assignment.sh | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 87c776a7e..c881ed692 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -31,19 +31,37 @@ unzip -q rawdata.zip mkdir data # 2. Move the ./rawdata directory to ./data/raw +mv ./rawdata ./data/raw # 3. List the contents of the ./data/raw directory +ls ./data/raw # 4. In ./data/processed, create the following directories: server_logs, user_logs, and event_logs +cd data +mkdir processed +cd processed +mkdir server_logs user_logs event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs +cd ~/dsi3/shell/02_activities/assignments/newproject +cp ./data/raw/*server*.log ./data/processed/server_logs # 6. Repeat the above step for user logs and event logs +cp ./data/raw/*server*.log ./data/processed/user_logs +cp ./data/raw/*server*.log ./data/processed/event_logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs +cd ./data/raw/ +rm *ipaddr* -# 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed +cd ~/dsi3/shell/02_activities/assignments/newproject +cd ./data/processed/user_logs +rm *ipaddr* +# 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed +cd ~/dsi3/shell/02_activities/assignments/newproject/data +touch inventory.txt +ls ./processed/* >> inventory.txt ########################################### From 1cbe88c2e353d1baefaec1446d039500785973f9 Mon Sep 17 00:00:00 2001 From: Nicole Yeung Date: Fri, 7 Nov 2025 15:38:53 -0500 Subject: [PATCH 5/8] resubmit again #3 --- 02_activities/assignments/assignment.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index c881ed692..79abf3ceb 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -53,7 +53,6 @@ cp ./data/raw/*server*.log ./data/processed/event_logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs cd ./data/raw/ rm *ipaddr* - cd ~/dsi3/shell/02_activities/assignments/newproject cd ./data/processed/user_logs rm *ipaddr* From f8093bb3d45e3aad54c306865b2d72c0a3dd07f3 Mon Sep 17 00:00:00 2001 From: Nicole Yeung Date: Fri, 7 Nov 2025 16:28:48 -0500 Subject: [PATCH 6/8] completed all #4 --- 02_activities/assignments/assignment.sh | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 79abf3ceb..2c56f48bc 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -37,13 +37,10 @@ mv ./rawdata ./data/raw ls ./data/raw # 4. In ./data/processed, create the following directories: server_logs, user_logs, and event_logs -cd data -mkdir processed -cd processed -mkdir server_logs user_logs event_logs +mkdir -p data/dataprocessed +mkdir -p data/dataprocessed{server_logs,user_logs,event_logs} # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs -cd ~/dsi3/shell/02_activities/assignments/newproject cp ./data/raw/*server*.log ./data/processed/server_logs # 6. Repeat the above step for user logs and event logs @@ -51,16 +48,12 @@ cp ./data/raw/*server*.log ./data/processed/user_logs cp ./data/raw/*server*.log ./data/processed/event_logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs -cd ./data/raw/ -rm *ipaddr* -cd ~/dsi3/shell/02_activities/assignments/newproject -cd ./data/processed/user_logs -rm *ipaddr* +rm ./data/raw/*ipaddr* +rm ./data/processed/user_logs/*ipaddr* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed -cd ~/dsi3/shell/02_activities/assignments/newproject/data -touch inventory.txt -ls ./processed/* >> inventory.txt +touch ./data/processed/inventory.txt +ls ./data/processed/* >> inventory.txt ########################################### From bb8ad1a4685604c880799facf7cdb13c13a3310b Mon Sep 17 00:00:00 2001 From: Nicole Yeung Date: Fri, 7 Nov 2025 16:31:15 -0500 Subject: [PATCH 7/8] 5 --- 02_activities/assignments/assignment.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 2c56f48bc..52dee1db4 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -37,8 +37,8 @@ mv ./rawdata ./data/raw ls ./data/raw # 4. In ./data/processed, create the following directories: server_logs, user_logs, and event_logs -mkdir -p data/dataprocessed -mkdir -p data/dataprocessed{server_logs,user_logs,event_logs} +mkdir -p .data/dataprocessed +mkdir -p .data/dataprocessed{server_logs,user_logs,event_logs} # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs cp ./data/raw/*server*.log ./data/processed/server_logs From aa60d83784e874df8eacf2a2aa13106cbea21214 Mon Sep 17 00:00:00 2001 From: Nicole Yeung Date: Fri, 7 Nov 2025 16:48:01 -0500 Subject: [PATCH 8/8] 6 --- 02_activities/assignments/assignment.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 52dee1db4..25184358b 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -37,23 +37,23 @@ mv ./rawdata ./data/raw ls ./data/raw # 4. In ./data/processed, create the following directories: server_logs, user_logs, and event_logs -mkdir -p .data/dataprocessed -mkdir -p .data/dataprocessed{server_logs,user_logs,event_logs} +mkdir -p ./data/processed +mkdir -p ./data/processed/{server_logs,user_logs,event_logs} # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs cp ./data/raw/*server*.log ./data/processed/server_logs # 6. Repeat the above step for user logs and event logs -cp ./data/raw/*server*.log ./data/processed/user_logs -cp ./data/raw/*server*.log ./data/processed/event_logs +cp ./data/raw/*user*.log ./data/processed/user_logs +cp ./data/raw/*event*.log ./data/processed/event_logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs rm ./data/raw/*ipaddr* rm ./data/processed/user_logs/*ipaddr* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed -touch ./data/processed/inventory.txt -ls ./data/processed/* >> inventory.txt +touch ./data/inventory.txt +ls ./data/processed/*/* > ./data/inventory.txt ###########################################