-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcollecting_openaire.py
More file actions
54 lines (43 loc) · 1.82 KB
/
collecting_openaire.py
File metadata and controls
54 lines (43 loc) · 1.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import os
from xml.etree import ElementTree as et
import csv
from xml2csv_openaire import identifier, doc_type, langs, year, projects
xml_path = "D:/diplomka/praktická_část/download_xml/oaire_data/"
def make_headers():
# create file, make headers
with open('OpenAIRE_complete.csv', 'a', newline='') as csvFile:
writer = csv.writer(csvFile, delimiter=";")
writer.writerow(["id", "doc_type", "langs", "year",
"projects", "no_of_projects"])
def extracting_data(file):
print(file)
try:
with open(file, encoding="utf8") as f:
tree = et.parse(f)
document_root = tree.getroot()
results = document_root.findall("./results/result")
# print(results)
print(len(results))
for result in results:
# print(result)
row = [identifier(result), doc_type(result), langs(
result), year(result), projects(result), len(projects(
result))]
# print(row)
with open('OpenAIRE_complete.csv', 'a', newline='') as csvFile:
writer = csv.writer(csvFile, delimiter=";")
writer.writerow(row)
except Exception as err:
print(err)
with open("errors_aire.txt", "a") as error_file:
error_file.write(f"{file}: {str(err)}")
# PROGRAM START
make_headers()
i = 1
for dir_path, subdir_list, file_list in os.walk(xml_path):
for fname in file_list:
print(i)
i += 1
full_path = os.path.join(dir_path, fname)
extracting_data(full_path)
# pokud skript vyhodí chybu, tak úkoly vyřešené před pádem se uloží do mezipaměti a při novém spuštění skriptu se tyto řádky vytisknou do csv, takže např. se může do csv zapsat 5x záhlaví