-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcompile_tables.py
More file actions
73 lines (60 loc) · 2.21 KB
/
compile_tables.py
File metadata and controls
73 lines (60 loc) · 2.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
"""compile_tables
This pywikibot script reads tables from the pages in a category on the wiki,
then creates a combined table using fields that are in common to those
tables.
OPTIONS:
-fieldnames:FIELDNAMES (required)
List of table headers to use. Separate multiple values with semicolons.
-target:PAGENAME (required)
Target wiki pagename
-category:CATEGORY (required)
The wiki category containing the pages with tables.
"""
import pywikibot
from pywikibot import pagegenerators
from bs4 import BeautifulSoup
def parse_table(html):
soup = BeautifulSoup(html, 'html.parser')
data = []
for table in soup.findAll('table', {'class': 'wikitable'}):
for row in table.findAll('tr'):
k = row.th.string or None
if not k:
continue
v = ''.join([s for s in row.td.stripped_strings])
k = k.lower().strip()
if k == 'title':
v = '[[{}]]'.format(v.strip())
data.append((k, v))
return dict(data)
def run():
local_args = pywikibot.handle_args(args)
required = ['fieldnames', 'category', 'target']
options = {}
for arg in local_args:
option, sep, value = arg.partition(':')
options[option.strip('-')] = value
for option in required:
if not options.get(option, False):
value = pywikibot.input('Please enter a value for ' + option)
options[option] = value
site = pywikibot.Site()
category = pywikibot.Category(site, options['category'])
pages = pagegenerators.CategorizedPageGenerator(category)
rows = []
rows.append('{| class="wikitable sortable"')
fieldnames = [f.strip() for f in options['fieldnames'].split(';')]
rows.append('! ' + ' !! '.join(fieldnames))
for page in pages:
html=page.get_parsed_page()
data = parse_table(html)
rows.append('|-')
rows.append('| ' + ' || '.join([data[f] or '' for f in fieldnames]))
rows.append('|}')
wikitext = '\r\n'.join(rows)
target = pywikibot.Page(site, options['target'])
target.text = wikitext
target.save('Updated table based on contents of {}'.format(
options['category']))
if __name__ == '__main__':
run()