annotated-papers/generate.py at master · jgamper/annotated-papers · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
"""Usage:
          generate.py

@ Jevgenij Gamper 2020
Generates readme, badges given the pdfs and md files stored in .annotated/
"""
import os
from docopt import docopt
from pybadges import badge
from collections import Counter
from pytablewriter import MarkdownTableWriter

INTRO = (
    "# Annotated papers :pencil:\n\nTracks research papers, articles and their topics read since July 2020 :+1:, and some "
    "that were read prior to July 2020\n"
    "<p align='center'>\n    <br>\n    <img src='figures/title_collage.jpg' width='800'/>\n    <br>\n<p>\n\n\n"
    "## Topics \n\nEvery badge is a topic with the count of articles associated with that topic. Colour represents "
    "if the number of read articles with that topic is below (red) or above (blue) average count. \n\n"
)


def parse_file_name(file_name):
    """
    Splits file name into author, date, title, and topics
    :param file_name:
    :return:
    """
    extracted_topics = file_name.split("[")[1].split("]")[0].split(", ")

    author, date, title = file_name.split(" [")[0].split(" - ")

    return author, date, title, extracted_topics


def enumerate_and_extract():
    """
    Enumerates articles in pdf directory and extracts author, date, file name and topics
    :return:
    """
    authors = []
    dates = []
    titles = []
    topics = []
    for root, dirs, files in os.walk("annotated/"):
        for file in files:
            if file.endswith(".md") or file.endswith(".pdf"):
                author, date, title, extracted_topics = parse_file_name(file)
                authors.append(author)
                dates.append(date)
                titles.append(title)
                topics.append(extracted_topics)

    return authors, dates, titles, topics


def get_table_string(authors, dates, titles, topics):
    """
    Returns an object ready to write a table
    :param authors:
    :param s:
    :param titles:
    :param topics:
    :return:
    """
    writer = MarkdownTableWriter(
        table_name="Completed Articles",
        headers=["Author", "Title", "Year", "Topics"],
        value_matrix=[
            [a, tit, d, ", ".join(top)]
            for a, d, tit, top in zip(authors, dates, titles, topics)
        ],
        margin=1,  # add a whitespace for both sides of each cell
    )
    return writer.dumps()


def generate_topic_badges(topic_counts):
    """
    Generate topic badges with count
    :param topic_counts:
    :return:
    """
    # Sort topic_counts
    topic_counts = {
        k: v
        for k, v in sorted(topic_counts.items(), key=lambda item: item[1], reverse=True)
    }

    html = ""
    mean_count = int(sum(topic_counts.values()) / len(topic_counts))
    for topic, count in topic_counts.items():
        # Generate badge
        color = "red" if count < mean_count else "blue"
        s = badge(left_text=topic, right_text="{}".format(count), right_color=color)
        # Save svg
        svg_name = "{}.svg".format(topic.replace(" ", "_"))
        svg_path = os.path.join("figures", "badges")
        os.makedirs(svg_path, exist_ok=True)
        svg_path = os.path.join(svg_path, svg_name)
        with open(svg_path, "w") as f:
            f.write(s)
        html += "<img src='figures/badges/{}'/> ".format(svg_name)
    return html


def main():
    authors, dates, titles, topics = enumerate_and_extract()

    table = get_table_string(authors, dates, titles, topics)

    flat_topics = [item for sublist in topics for item in sublist]

    topic_counts = Counter(flat_topics)

    html = generate_topic_badges(topic_counts)

    readme = INTRO + html + "\n\n" + table

    with open("README.md", "w") as f:
        f.write(readme)


if __name__ == "__main__":
    arguments = docopt(__doc__)
    main()