Skip to content

Commit 7cdd3d7

Browse files
author
Steve Baskauf
committed
Add UTF-8 encoding to all requests.post() HTTP POST bodies
1 parent d610240 commit 7cdd3d7

4 files changed

Lines changed: 29 additions & 12 deletions

File tree

vanderbot/README.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ Here are some queries that can be run to explore the data:
3232

3333
[Number of clinical trials at Vanderbilt by principal investigator](https://w.wiki/XKK)
3434

35-
The current release is [v1.6.3](https://github.com/HeardLibrary/linked-data/releases/tag/v1.6.3).
35+
The current release is [v1.6.4](https://github.com/HeardLibrary/linked-data/releases/tag/v1.6.4).
3636

3737
## How it works
3838

@@ -179,5 +179,9 @@ Version 1.6.3 is a minor upgrade that adds an updated version of the HTML, Javas
179179

180180
The upgrade now supports monolingual string values the complex value types globecoordinate and quantity. Other scripts were not affected.
181181

182+
## Release v1.6.4 (2021-01-27)
183+
184+
Version 1.6.4 contains a bug fix that explicitly encodes all HTTP POST bodies as UTF-8. This caused problems if strings being sent as part of a SPARQL query contained non-Latin characters.
185+
182186
----
183-
Revised 2021-01-26
187+
Revised 2021-01-27

vanderbot/vb3_match_wikidata.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@
2424
# -----------------------------------------
2525
# Version 1.5 change notes (2020-09-08):
2626
# - no changes
27+
# -----------------------------------------
28+
# Version 1.6.4 change notes (2021-01-27):
29+
# - contains a bug fix that explicitly encodes all HTTP POST bodies as UTF-8. This caused problems if strings being sent as
30+
# part of a SPARQL query contained non-Latin characters.
2731

2832
import requests # best library to manage HTTP transactions
2933
from bs4 import BeautifulSoup # web-scraping library
@@ -228,7 +232,7 @@ def searchNameAtWikidata(name):
228232
#print('searching for ', name)
229233
results = []
230234
# r = requests.get(wikidataEndpointUrl, params={'query' : query}, headers=requestHeaderDictionary)
231-
r = requests.post(wikidataEndpointUrl, data=query, headers=requestHeaderDictionary)
235+
r = requests.post(wikidataEndpointUrl, data=query.encode('utf-8'), headers=requestHeaderDictionary)
232236
try:
233237
data = r.json()
234238
statements = data['results']['bindings']
@@ -264,7 +268,7 @@ def searchWikidataDescription(qId):
264268
}'''
265269
#print(query)
266270
# r = requests.get(wikidataEndpointUrl, params={'query' : query}, headers=requestHeaderDictionary)
267-
r = requests.post(wikidataEndpointUrl, data=query, headers=requestHeaderDictionary)
271+
r = requests.post(wikidataEndpointUrl, data=query.encode('utf-8'), headers=requestHeaderDictionary)
268272
try:
269273
data = r.json()
270274
statements = data['results']['bindings']
@@ -310,7 +314,7 @@ def searchWikidataArticle(qId):
310314
}'''
311315
#print(query)
312316
# r = requests.get(wikidataEndpointUrl, params={'query' : query}, headers=requestHeaderDictionary)
313-
r = requests.post(wikidataEndpointUrl, data=query, headers=requestHeaderDictionary)
317+
r = requests.post(wikidataEndpointUrl, data=query.encode('utf-8'), headers=requestHeaderDictionary)
314318
try:
315319
data = r.json()
316320
statements = data['results']['bindings']
@@ -1073,7 +1077,7 @@ def identifiedInCrossref(doi, employee):
10731077

10741078
# The endpoint defaults to returning XML, so the Accept: header is required
10751079
# r = requests.get(wikidataEndpointUrl, params={'query' : query}, headers={'Accept' : 'application/json'})
1076-
r = requests.post(wikidataEndpointUrl, data=query, headers=requestHeaderDictionary)
1080+
r = requests.post(wikidataEndpointUrl, data=query.encode('utf-8'), headers=requestHeaderDictionary)
10771081

10781082
data = r.json()
10791083
#print(json.dumps(data,indent = 2))

vanderbot/vb6_upload_wikidata.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@
8383
# -----------------------------------------
8484
# Version 1.6.2 change notes (2020-12-01):
8585
# - Fixes a bug where an error was raised when a reference property did not have a value.
86+
# -----------------------------------------
87+
# Version 1.6.4 change notes (2021-01-27):
88+
# - contains a bug fix that explicitly encodes all HTTP POST bodies as UTF-8. This caused problems if strings being sent as
89+
# part of a SPARQL query contained non-Latin characters.
8690

8791
import json
8892
import requests
@@ -203,7 +207,7 @@ def searchLabelsDescriptionsAtWikidata(qIds, labelType, language):
203207

204208
returnValue = []
205209
# r = requests.get(endpointUrl, params={'query' : query}, headers=requestHeaderDictionary)
206-
r = requests.post(endpointUrl, data=query, headers=requestHeaderDictionary)
210+
r = requests.post(endpointUrl, data=query.encode('utf-8'), headers=requestHeaderDictionary)
207211
data = r.json()
208212
results = data['results']['bindings']
209213
for result in results:

vanderbot/vb_common_code.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@
2222
# -----------------------------------------
2323
# Version 1.5 change notes (2020-09-08):
2424
# - no changes
25+
# -----------------------------------------
26+
# Version 1.6.4 change notes (2021-01-27):
27+
# - contains a bug fix that explicitly encodes all HTTP POST bodies as UTF-8. This caused problems if strings being sent as
28+
# part of a SPARQL query contained non-Latin characters.
29+
2530

2631
import requests # best library to manage HTTP transactions
2732
from bs4 import BeautifulSoup # web-scraping library
@@ -237,7 +242,7 @@ def searchWikidataForQIdByOrcid(orcid, wikidataEndpointUrl, sparqlSleep):
237242
results = []
238243
acceptMediaType = 'application/json'
239244
# r = requests.get(wikidataEndpointUrl, params={'query' : query}, headers = generateHeaderDictionary(acceptMediaType))
240-
r = requests.post(wikidataEndpointUrl, data=query, headers = generateHeaderDictionary(acceptMediaType))
245+
r = requests.post(wikidataEndpointUrl, data=query.encode('utf-8'), headers = generateHeaderDictionary(acceptMediaType))
241246
try:
242247
data = r.json()
243248
statements = data['results']['bindings']
@@ -317,7 +322,7 @@ def __init__(self, **kwargs):
317322
# send a generic query and return a list of Q IDs
318323
def generic_query(self, query):
319324
# r = requests.get(self.endpoint, params={'query' : query}, headers=self.requestheader)
320-
r = requests.post(self.endpoint, data=query, headers=self.requestheader)
325+
r = requests.post(self.endpoint, data=query.encode('utf-8'), headers=self.requestheader)
321326
results_list = []
322327
try:
323328
#if 1==1: # replace try: to let errors occur, also comment out the except: clause
@@ -357,7 +362,7 @@ def single_property_values_for_item(self, qid):
357362
}'''
358363
#print(query)
359364
# r = requests.get(self.endpoint, params={'query' : query}, headers=self.requestheader)
360-
r = requests.post(self.endpoint, data=query, headers=self.requestheader)
365+
r = requests.post(self.endpoint, data=query.encode('utf-8'), headers=self.requestheader)
361366
results_list = []
362367
try:
363368
#if 1==1: # replace try: to let errors occur, also comment out the except: clause
@@ -423,7 +428,7 @@ def labels_descriptions(self, qids):
423428

424429
results_list = []
425430
# r = requests.get(self.endpoint, params={'query' : query}, headers=self.requestheader)
426-
r = requests.post(self.endpoint, data=query, headers=self.requestheader)
431+
r = requests.post(self.endpoint, data=query.encode('utf-8'), headers=self.requestheader)
427432
data = r.json()
428433
results = data['results']['bindings']
429434
for result in results:
@@ -482,7 +487,7 @@ def search_statement(self, qids, reference_property_list):
482487

483488
results_list = []
484489
# r = requests.get(self.endpoint, params={'query' : query}, headers=self.requestheader)
485-
r = requests.post(self.endpoint, data=query, headers=self.requestheader)
490+
r = requests.post(self.endpoint, data=query.encode('utf-8'), headers=self.requestheader)
486491
data = r.json()
487492
results = data['results']['bindings']
488493
# NOTE: There may be more than one reference per statement.

0 commit comments

Comments
 (0)