From f4a37ddc212448c47f76460fe579d4af4dac533f Mon Sep 17 00:00:00 2001 From: Joe Germuska Date: Tue, 30 Jan 2024 13:56:32 -0600 Subject: [PATCH] proposed fix for jam values in table based files --- table_based/02_download_acs_2021_5yr.sh | 11 +++++++++++ table_based/02_download_acs_2022_1yr.sh | 12 ++++++++++++ 2 files changed, 23 insertions(+) diff --git a/table_based/02_download_acs_2021_5yr.sh b/table_based/02_download_acs_2021_5yr.sh index b9bf167..a519eca 100755 --- a/table_based/02_download_acs_2021_5yr.sh +++ b/table_based/02_download_acs_2021_5yr.sh @@ -13,3 +13,14 @@ curl -L "https://census-backup.b-cdn.net/programs-surveys/acs/summary_file/2021/ -o ${DATA_DIR}/ACS20215YR_Table_Shells.txt unzip -q ${DATA_DIR}/AllTables.zip + +# table-based files have "jam values" which should not be loaded into our database +JAM_VALUES="-222222222|-333333333|-555555555|-666666666|-888888888|-999999999" +TMP_DIR="${DATA_DIR}/tmp" +mkdir $TMP_DIR +for i in `ls ${DATA_DIR}/*.dat`; do + file_name=`basename $i` + sed -E "s/${JAM_VALUES}//g" $i > "${TMP_DIR}/${file_name}" + mv "${TMP_DIR}/${file_name}" $i +done; +rmdir $TMP_DIR diff --git a/table_based/02_download_acs_2022_1yr.sh b/table_based/02_download_acs_2022_1yr.sh index 5aadbe8..643c774 100755 --- a/table_based/02_download_acs_2022_1yr.sh +++ b/table_based/02_download_acs_2022_1yr.sh @@ -1,6 +1,7 @@ #!/bin/bash DATA_DIR=/home/ubuntu/data/acs2022_1yr DATA_SERVER="https://census-backup.b-cdn.net" + mkdir -p $DATA_DIR sudo apt-get -y install unzip @@ -10,3 +11,14 @@ curl -L "${DATA_SERVER}/programs-surveys/acs/summary_file/2022/table-based-SF/do curl -L "${DATA_SERVER}/programs-surveys/acs/summary_file/2022/table-based-SF/documentation/ACS20221YR_Table_Shells.txt" -o ${DATA_DIR}/ACS20221YR_Table_Shells.txt unzip -q -j -d $DATA_DIR $DATA_DIR/All_Tables.zip + +# table-based files have "jam values" which should not be loaded into our database +JAM_VALUES="-222222222|-333333333|-555555555|-666666666|-888888888|-999999999" +TMP_DIR="${DATA_DIR}/tmp" +mkdir $TMP_DIR +for i in `ls ${DATA_DIR}/*.dat`; do + file_name=`basename $i` + sed -E "s/${JAM_VALUES}//g" $i > "${TMP_DIR}/${file_name}" + mv "${TMP_DIR}/${file_name}" $i +done; +rmdir $TMP_DIR