WCKBQuery/kbquery3 at master · bradley-benjamin26/WCKBQuery · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/python
# -*- coding: UTF-8 -*-

import requests
import re
import string
import csv
import xml.etree.ElementTree as ET

inputFile  = input("Input File: ")
outputFile = input("Name of file to save results: ")
queryType = input('What are you searching on? ISSN, ISBN, OCN, or Title: ')
collectionID = input("To search in a particular collection, enter its ID otherwise type 'no': ")
fieldNames = ["publication_title", "print_identifier", "online_identifier", "date_first_issue_online", "num_first_vol_online", "num_first_issue_online", "date_last_issue_online", "num_last_vol_online", "num_last_issue_online", "title_url", "first_author", "title_id", "embargo_info", "coverage_depth",	"coverage_notes", "publisher_name", "location", "title_notes", "staff_notes", "vendor_id", "oclc_collection_name", "oclc_collection_id", "oclc_entry_id", "oclc_linkscheme", "oclc_number",	"ACTION"]
completeKbartCheck = 0

#Add your API key below, assigned to the "key" variable

key = ''
ns = {'kb' : 'http://worldcat.org/kb', 'atom' : 'http://www.w3.org/2005/Atom', 'os' : 'http://a9.com/-/spec/opensearch/1.1/'}
#ns variable is used for namespaces for the XML that the KB API returns
if queryType == 'ISSN':
	searcher = 'issn='
elif queryType == 'ISBN':
	searcher = 'isbn='
elif queryType == 'Title':
	searcher = "title="
elif queryType == 'OCN':
	searcher = 'oclcnum='
else:
	searcher = 'q='

if collectionID == 'no':
	collectionSearch = ''
else:
	collectionSearch = "collection_uid="+collectionID

with open(inputFile, 'r') as f:
	with open(outputFile, 'a+') as o:
		o.write('number'+'	'+'status'+'	'+'title' + '	' + 'ISBN or ISSN' + '	' + 'ocn' + '	' + 'Collection Name' + '	' + 'KB ID' +'	' + 'coverage'+ '	' +'Search Term'+'	'+'\n')
		termCount = 0
		for terms in f:
			if queryType == 'Title':
				term = '"'+terms.strip()+'"'
				if "&" in term:
					term = term.replace('&', '&amp;')
			else:
				term = terms.strip()
			termCount +=1
			print('Search number: ' +str(termCount)+'    Search term: '+term)
			if term == '\n':
				o.write('blank'+'\n')
				print('no search term')
			else:
				url ='http://worldcat.org/webservices/kb/rest/entries/search?'+collectionSearch+'&'+searcher+term+'&wskey='+key
				print(url)
				r = requests.get(url.strip()).text
				results = r.encode('utf-8')
				root = ET.fromstring(results)
				resultCheck = root.find('os:totalResults', ns)
				emptyCheck = resultCheck.text
				if emptyCheck == "0":
					if collectionID == "no":
						o.write(term + ' not found' + '\n')
						print(term + ' not found')
					else:
						#if a collection ID is provided and a title is not found to be selected, this triggers the script to pull the full KBART
						#and then search to see if the title is not in the collection at all or simply not selected
						if completeKbartCheck == 0:
							collectionURL = 'http://worldcat.org/webservices/kb/rest/collections/'+collectionID+'?wskey='+key
							print(collectionURL)
							collectionResponse = requests.get(collectionURL.strip()).text
							collectionResults = collectionResponse.encode('utf-8')
							collectionResultsRoot = ET.fromstring(collectionResults)
							#Xpath may need to be fixed. Likely may need to do xpath just with attribute and value...?
							getLink = collectionResultsRoot.find('./atom:link[@title="kbart file"]', ns)
							link = getLink.get('href')
							kbartTitle = collectionID+'kbart.txt'
							fullLink = link + '?wskey='+key
							print(fullLink)
							print("downloading: " + kbartTitle)
							kbartRequest = requests.get(fullLink.strip()).text
							kbartTitle = collectionID+'kbart.txt'
							kbartWriter = open(kbartTitle, 'w+')
							kbartWriter.write(kbartRequest)
							kbartWriter.close()
						completeKbartCheck += 1
						kbartData = open(kbartTitle, 'r')
						#after writing KBART data to a file, the script then reads the KBART and tries to find a match based on the kind of query, ISSN, OCN, etc
						tsvreader = csv.DictReader(kbartData, delimiter='\t')
						matchFound = 'no'
						for row in tsvreader:
							if queryType == 'ISSN':
								if term == row['online_identifier']:
									matchFound = 'yes'
								elif term == row['print_identifier']:
									matchFound = 'yes'
							elif queryType == 'ISBN':
								if term == row['online_identifier']:
									matchFound = 'yes'
								elif term == row['print_identifier']:
									matchFound = 'yes'
							elif queryType == 'Title':
								if term == row['publication_title']:
									matchFound = 'yes'
							elif queryType == 'OCN':
								if term == row["oclc_number"]:
									matchFound = 'yes'
							else:
								matchFound = 'no'
						numb = str(termCount)
						if matchFound == 'yes':
							o.write(numb+'\t'+'not selected but available in global KB'+'\t\t\t\t\t\t\t'+term+'\n')
							print(term + ' not selected')
						elif matchFound == 'no':
							o.write(numb+'\t'+'not available'+'\t\t\t\t\t\t\t'+term+'\n')
							print(term + ' not available')

				else:
					entryLoopNumber = 0
					for entry in root.findall('atom:entry', ns):
						entryLoopNumber +=1
						numb =str(termCount)+'.'+str(entryLoopNumber)
						titleText = entry.find('atom:title', ns)
						status = 'selected'
						if titleText is None:
							title = "No title"
						else:
							title = titleText.text
						ocnText = entry.find('kb:oclcnum', ns)
						if ocnText is None:
							ocn = "no OCN"
						else:
							ocn = ocnText.text
						covText = entry.find('kb:coverage', ns)
						if covText is None:
							cov = "no coverage"
						else:
							cov = covText.text
						coll = entry.find('kb:collection_name', ns).text
						issnCheck = entry.find('kb:issn', ns)
						if issnCheck is None:
							isbnCheck = entry.find('kb:isbn', ns)
							if isbnCheck is None:
								sn = "No Standard number found"
							else:
								sn = isbnCheck.text
						else:
							sn = issnCheck.text
						uidText = entry.find('kb:entry_uid', ns)
						if uidText is None:
							uid = "no KB ID"
						else:
							uid = uidText.text
						data = numb+'	'+status+'	'+title+'	'+sn+'	'+ocn+'	'+coll+'	'+uid+'	'+cov+'	'+term+'	'+'\n'
						print(data)
						o.write(data)