Skip to content

Commit b3c2b22

Browse files
committed
handle language codes in different formats
1 parent 4abe9a5 commit b3c2b22

2 files changed

Lines changed: 18 additions & 11 deletions

File tree

pygeometa/schemas/gbif_eml/__init__.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import re
22
from pathlib import Path
33

4+
import pycountry
45
from bs4 import BeautifulSoup
56
from pygeometa.schemas.base import BaseOutputSchema
67

@@ -91,7 +92,11 @@ def import_(self, metadata):
9192
mcf["metadata"]["identifier"] = text_or_null(identifier)
9293

9394
if language := dataset.find("language"):
94-
mcf["metadata"]["language"] = text_or_null(language)
95+
lang = text_or_null(language)
96+
if lang and pycountry.languages.get(alpha_3=lang):
97+
mcf["metadata"]["language"] = pycountry.languages.get(
98+
alpha_3=lang
99+
).alpha_2
95100

96101
idf = mcf["identification"]
97102

@@ -118,8 +123,8 @@ def import_(self, metadata):
118123
# )
119124

120125
idf["maintenancefrequency"] = (
121-
text_or_null(dataset.find("maintenanceUpdateFrequency")) or
122-
"unknown"
126+
text_or_null(dataset.find("maintenanceUpdateFrequency"))
127+
or "unknown"
123128
)
124129

125130
idf["dates"] = {"publication": text_or_null(dataset.find("pubDate"))}
@@ -137,8 +142,9 @@ def import_(self, metadata):
137142
]
138143

139144
spatial["crs"] = "4326"
140-
spatial["description"] = \
141-
text_or_null(dataset.find("geographicDescription"))
145+
spatial["description"] = text_or_null(
146+
dataset.find("geographicDescription")
147+
)
142148

143149
# temporal = idf["extents"]["temporal"]
144150
# temporal["begin"]
@@ -152,17 +158,17 @@ def import_(self, metadata):
152158
for r, obj in to_contact_role(dataset, "contact", "pointOfContact"):
153159
ct[r] = obj
154160

155-
for r, obj in to_contact_role(dataset,
156-
"metadataProvider",
157-
"distributor"):
161+
for r, obj in to_contact_role(
162+
dataset, "metadataProvider", "distributor"
163+
):
158164
ct[r] = obj
159165

160166
for r, obj in to_contact_role(dataset, "creator"):
161167
ct[r] = obj
162168

163-
for r, obj in to_contact_role(dataset,
164-
"personnel",
165-
"projectPersonnel"):
169+
for r, obj in to_contact_role(
170+
dataset, "personnel", "projectPersonnel"
171+
):
166172
ct[r] = obj
167173

168174
for idx, keyword_set in enumerate(dataset.find_all("keywordSet")):

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ lxml
55
OWSLib
66
pyyaml
77
beautifulsoup4
8+
pycountry

0 commit comments

Comments
 (0)