-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathget_articles.py
More file actions
41 lines (30 loc) · 846 Bytes
/
get_articles.py
File metadata and controls
41 lines (30 loc) · 846 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# Script to get article text in JSON form for reference
import json
import random
import os
import codecs
import numpy as np
import re
from re import search
def save_articles_to_file(
jsonl_source="data/microwave_limb_sounder/mls_pubs.jsonl",
):
articles = {}
title = ""
text = ""
source = open(jsonl_source)
i = 0
for line in source:
i = i + 1
j = json.loads(line)
title = j.get("_source").get("title")
text = j.get("_source").get("text")
title = title.replace("/", "")
filename = "data/microwave_limb_sounder/raw_text/%s%d.txt" % (title, i)
output = open(filename, "w+")
output.write(text)
output.close()
source.close()
print(i)
if __name__ == "__main__":
save_articles_to_file()