diff --git a/scripts/insert-all-acknowledgements.py b/scripts/insert-all-acknowledgements.py index 0a65391..0aede0c 100644 --- a/scripts/insert-all-acknowledgements.py +++ b/scripts/insert-all-acknowledgements.py @@ -27,7 +27,7 @@ def run(): } start_fetch_time = time.time() - ref_data_url = "https://api.github.com/repos/gm3dmo/old-cmp/contents/data/acknowledgement.csv" + ref_data_url = "https://api.github.com/repos/gm3dmo/cmp-archive/contents/cmp_acknowledgement.csv" http = urllib3.PoolManager() r = http.request('GET', ref_data_url, headers=headers) end_fetch_time = time.time() diff --git a/scripts/insert-all-cemeteries.py b/scripts/insert-all-cemeteries.py index 56d8d0e..8bc91af 100644 --- a/scripts/insert-all-cemeteries.py +++ b/scripts/insert-all-cemeteries.py @@ -27,7 +27,7 @@ def run(): } start_fetch_time = time.time() - ref_data_url = "https://api.github.com/repos/gm3dmo/old-cmp/contents/data/cemetery.csv" + ref_data_url = "https://api.github.com/repos/gm3dmo/cmp-archive/contents/cmp_cemetery.csv" http = urllib3.PoolManager() r = http.request('GET', ref_data_url, headers=headers) end_fetch_time = time.time() @@ -35,7 +35,8 @@ def run(): reader = csv.DictReader(r.data.decode('utf-8').splitlines()) reader.fieldnames = [field.replace('.', '_') for field in reader.fieldnames] - # add a country model for each row in the csv file + # delete existing cemeteries before re-inserting + Cemetery.objects.all().delete() start_insert_time = time.time() for row in reader: if row['latitude'] == '': @@ -46,7 +47,7 @@ def run(): Cemetery.objects.create( id=row['id'], name=row['name'], - country_id=row['ccn3'], + country_id=row['country_id'], latitude=row['latitude'], longitude=row['longitude'] ) diff --git a/scripts/insert-all-companies.py b/scripts/insert-all-companies.py index 8f12d38..fb75564 100644 --- a/scripts/insert-all-companies.py +++ b/scripts/insert-all-companies.py @@ -31,7 +31,7 @@ def run(): title = sys.argv[2] start_fetch_time = time.time() - ref_data_url = "https://api.github.com/repos/gm3dmo/old-cmp/contents/data/company.csv" + ref_data_url = "https://api.github.com/repos/gm3dmo/cmp-archive/contents/cmp_company.csv" http = urllib3.PoolManager() r = http.request('GET', ref_data_url, headers=headers) end_fetch_time = time.time() diff --git a/scripts/insert-all-countries.py b/scripts/insert-all-countries.py index 7c20eb6..adb8d56 100644 --- a/scripts/insert-all-countries.py +++ b/scripts/insert-all-countries.py @@ -19,7 +19,6 @@ def run(): github_token= str(f"{env('READ_PAT')}") - print() title = sys.argv[2] headers = { @@ -27,11 +26,10 @@ def run(): 'Authorization': f'Bearer {github_token}' } - print() title = sys.argv[2] start_fetch_time = time.time() - ref_data_url = "https://api.github.com/repos/gm3dmo/old-cmp/contents/data/country.csv" + ref_data_url = "https://api.github.com/repos/gm3dmo/cmp-archive/contents/cmp_country.csv" http = urllib3.PoolManager() r = http.request('GET', ref_data_url, headers=headers) end_fetch_time = time.time() @@ -43,14 +41,14 @@ def run(): try: Country.objects.create( id = row['id'], - name = row['Name'], - alpha2 = row['Alpha2'], - alpha3 = row['Alpha3'], - country_number = row['CountryNumber'], - flag = "" + name = row['name'], + alpha2 = row['alpha2'], + alpha3 = row['alpha3'], + country_number = row['country_number'], + flag = row['flag'] ) except Exception as e: - print("Error with: " + row['Name']) + print("Error with: " + row['name']) raise e end_insert_time = time.time() diff --git a/scripts/insert-all-decorations.py b/scripts/insert-all-decorations.py index 521c85b..373b80f 100644 --- a/scripts/insert-all-decorations.py +++ b/scripts/insert-all-decorations.py @@ -30,7 +30,7 @@ def run(): title = sys.argv[2] start_fetch_time = time.time() - ref_data_url = "https://api.github.com/repos/gm3dmo/old-cmp/contents/data/decoration.csv" + ref_data_url = "https://api.github.com/repos/gm3dmo/cmp-archive/contents/cmp_decoration.csv" http = urllib3.PoolManager() r = http.request('GET', ref_data_url, headers=headers) end_fetch_time = time.time() diff --git a/scripts/insert-all-pow-camps.py b/scripts/insert-all-pow-camps.py index 1566226..c3755cd 100644 --- a/scripts/insert-all-pow-camps.py +++ b/scripts/insert-all-pow-camps.py @@ -27,7 +27,7 @@ def run(): } start_fetch_time = time.time() - ref_data_url = "https://api.github.com/repos/gm3dmo/old-cmp/contents/data/pow-camp.csv" + ref_data_url = "https://api.github.com/repos/gm3dmo/cmp-archive/contents/cmp_powcamp.csv" http = urllib3.PoolManager() r = http.request('GET', ref_data_url, headers=headers) end_fetch_time = time.time() @@ -41,11 +41,11 @@ def run(): try: PowCamp.objects.create( id=row['id'], - name=row['Name'], - country_id=row['PresentCountry_id'], - wartime_country=row['WartimeCountry'], - latitude=row['Latitude'], - longitude=row['Longitude'] + name=row['name'], + country_id=row['country_id'], + wartime_country=row['wartime_country'], + latitude=row['latitude'], + longitude=row['longitude'] ) except Exception as e: print(f"""💥row: ({row}) """) diff --git a/scripts/insert-all-ranks.py b/scripts/insert-all-ranks.py index 8fcd3c6..99065e8 100644 --- a/scripts/insert-all-ranks.py +++ b/scripts/insert-all-ranks.py @@ -27,7 +27,7 @@ def run(): } start_fetch_time = time.time() - ref_data_url = "https://api.github.com/repos/gm3dmo/old-cmp/contents/data/rank.csv" + ref_data_url = "https://api.github.com/repos/gm3dmo/cmp-archive/contents/cmp_rank.csv" http = urllib3.PoolManager() r = http.request('GET', ref_data_url, headers=headers) end_fetch_time = time.time() @@ -41,8 +41,8 @@ def run(): Rank.objects.create( id=row['id'], name=row['name'], - abbreviation=row['abbr'], - rank_class=row['class'] + abbreviation=row['abbreviation'], + rank_class=row['rank_class'] ) except Exception as e: print(f"""💥row: ({row}) """) diff --git a/scripts/insert-all-soldier-deaths.py b/scripts/insert-all-soldier-deaths.py index 77b482e..85c6ed4 100644 --- a/scripts/insert-all-soldier-deaths.py +++ b/scripts/insert-all-soldier-deaths.py @@ -9,7 +9,6 @@ def run(): import csv import time from cmp.models import SoldierDeath - from cmp.models import Company os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings') @@ -31,7 +30,7 @@ def run(): title = sys.argv[2] start_fetch_time = time.time() - ref_data_url = "https://api.github.com/repos/gm3dmo/old-cmp/contents/data/soldier-death.csv" + ref_data_url = "https://api.github.com/repos/gm3dmo/cmp-archive/contents/cmp_soldierdeath.csv" http = urllib3.PoolManager() r = http.request('GET', ref_data_url, headers=headers) end_fetch_time = time.time() @@ -43,19 +42,15 @@ def run(): for row in reader: #print(f"""row: ({row['id']}) cwgc:({row['cwgc_id']})""") try: - company = Company.objects.filter(name=row['company_id']) - if company: - company = company.first() - else: - #print(f"""row: ({row['id']}) cwgc:({row['cwgc_id']})""") - company = Company.objects.filter(name="UNKNOWN").first() cwgc_id = row.get('cwgc_id', 90909) if row.get('cwgc_id') != '' else 90909 + date_value = row['date'] if row['date'] != '' else None + company_id = int(row['company_id']) if row['company_id'] != '' else None + cemetery_id = int(row['cemetery_id']) if row['cemetery_id'] != '' else None SoldierDeath.objects.create( - #id=int(row['id']), soldier_id = int(row['soldier_id']), - date =row['Date'], - company_id = company.id, - cemetery_id = row['cemetery_id'], + date = date_value, + company_id = company_id, + cemetery_id = cemetery_id, cwgc_id = cwgc_id ) except Exception as e: diff --git a/scripts/insert-all-soldier-decorations.py b/scripts/insert-all-soldier-decorations.py index 2129996..1a6fb38 100644 --- a/scripts/insert-all-soldier-decorations.py +++ b/scripts/insert-all-soldier-decorations.py @@ -10,9 +10,6 @@ def run(): import time from cmp.models import SoldierDecoration - from cmp.models import Company - from cmp.models import Country - from cmp.models import Soldier os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings') @@ -32,51 +29,34 @@ def run(): } start_fetch_time = time.time() - ref_data_url = "https://api.github.com/repos/gm3dmo/old-cmp/contents/data/soldier-decoration-utf-8.csv" + ref_data_url = "https://api.github.com/repos/gm3dmo/cmp-archive/contents/cmp_soldierdecoration.csv" http = urllib3.PoolManager() r = http.request('GET', ref_data_url, headers=headers) end_fetch_time = time.time() # load the response into a csv dictionary reader - reader = csv.DictReader(r.data.decode('ISO-8859-1').splitlines()) + reader = csv.DictReader(r.data.decode('utf-8').splitlines()) start_insert_time = time.time() for row in reader: - #print(f"""row: ({row['id']}) cwgc:({row['cwgc_id']})""") try: - company = Company.objects.filter(name=row['company_id']) - if company: - company = company.first() - else: - #print(f"""row: ({row['id']}) cwgc:({row['cwgc_id']})""") - company = Company.objects.filter(name="UNKNOWN").first() - country = Country.objects.filter(name=row['country_id']) - if country: - country = country.first() - else: - country = Country.objects.filter(name="UNKNOWN").first() - gazette_date = row.get('gazetteDate', None) + gazette_date = row.get('gazette_date', None) if gazette_date == "": gazette_date = None - - if int(row.get("id")) == 384: - print(f"""row: {row}""") - breakpoint() + + decoration_id = int(row['decoration_id']) if row.get('decoration_id') else None SoldierDecoration.objects.create( - #id,soldier_id,company_id,decoration_id,gazetteIssue,gazettePage,gazetteDate,citation,notes,country_id - # create the model id = int(row['id']), - #soldier = soldier soldier_id = int(row['soldier_id']), - company_id = company.id, - decoration_id = int(row['decoration_id']), - gazette_issue = row['gazetteIssue'], - gazette_page = row['gazettePage'], + company_id = row['company_id'], + decoration_id = decoration_id, + gazette_issue = row['gazette_issue'], + gazette_page = row['gazette_page'], gazette_date = gazette_date, citation = row['citation'], notes = row['notes'], - country_id = country.id + country_id = row['country_id'] ) except Exception as e: print(f"""💥row: {row}""") diff --git a/scripts/insert-all-soldier-imprisonments.py b/scripts/insert-all-soldier-imprisonments.py index 67d5410..1c0c827 100644 --- a/scripts/insert-all-soldier-imprisonments.py +++ b/scripts/insert-all-soldier-imprisonments.py @@ -29,7 +29,7 @@ def run(): start_fetch_time = time.time() - ref_data_url = "https://api.github.com/repos/gm3dmo/old-cmp/contents/data/soldier-imprisonment.csv" + ref_data_url = "https://api.github.com/repos/gm3dmo/cmp-archive/contents/cmp_soldierimprisonment.csv" http = urllib3.PoolManager() r = http.request('GET', ref_data_url, headers=headers) end_fetch_time = time.time() @@ -43,11 +43,11 @@ def run(): SoldierImprisonment.objects.create( id = row['id'], soldier_id = row['soldier_id'], - legacy_company = row['company_id'], - pow_number = row['powNumber'], - pow_camp_id = row['powCamp_id'], - legacy_date_from = row['dateFrom'], - legacy_date_to = row['dateTo'], + legacy_company = row['legacy_company'], + pow_number = row['pow_number'], + pow_camp_id = row['pow_camp_id'], + legacy_date_from = row['legacy_date_from'], + legacy_date_to = row['legacy_date_to'], notes = row['notes'] ) except Exception as e: diff --git a/scripts/insert-all-soldiers.py b/scripts/insert-all-soldiers.py index 22da168..5c8df60 100644 --- a/scripts/insert-all-soldiers.py +++ b/scripts/insert-all-soldiers.py @@ -31,18 +31,13 @@ def run(): title = sys.argv[2] start_fetch_time = time.time() - ref_data_url = "https://api.github.com/repos/gm3dmo/old-cmp/contents/data/soldier.csv" + ref_data_url = "https://api.github.com/repos/gm3dmo/cmp-archive/contents/cmp_soldier.csv" http = urllib3.PoolManager() r = http.request('GET', ref_data_url, headers=headers) end_fetch_time = time.time() - - # Read raw data and split into lines while preserving line endings - raw_data = r.data.decode('utf-8') - lines = raw_data.splitlines(keepends=True) - - # Create CSV reader with the first line as header - reader = csv.DictReader(lines) + # load the response into a csv dictionary reader + reader = csv.DictReader(r.data.decode('utf-8').splitlines()) start_insert_time = time.time() for row in reader: @@ -58,8 +53,6 @@ def run(): notes = row['notes'] ) - # Debug print to verify what was saved - saved_soldier = Soldier.objects.get(id=row['id']) except Exception as e: print(f"""💥row: ({row}) """)