From a2a2d07a77d0de7c69bc360ac54c8db780fa77cf Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Sun, 10 Nov 2019 23:34:45 +0300 Subject: [PATCH 01/43] Added rss_reader Iteration 1. Working on the model. Description shows excess HTML tags and other. Does not support setting "--json". --- .gitignore | 44 +------------------- rss_reader.py | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+), 43 deletions(-) create mode 100644 rss_reader.py diff --git a/.gitignore b/.gitignore index 894a44c..fc450d7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,7 @@ -# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class - -# C extensions *.so - -# Distribution / packaging .Python build/ develop-eggs/ @@ -24,18 +19,10 @@ wheels/ .installed.cfg *.egg MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec - -# Installer logs pip-log.txt pip-delete-this-directory.txt - -# Unit test / coverage reports htmlcov/ .tox/ .coverage @@ -46,42 +33,20 @@ coverage.xml *.cover .hypothesis/ .pytest_cache/ - -# Translations *.mo *.pot - -# Django stuff: *.log local_settings.py db.sqlite3 - -# Flask stuff: instance/ .webassets-cache - -# Scrapy stuff: .scrapy - -# Sphinx documentation docs/_build/ - -# PyBuilder target/ - -# Jupyter Notebook .ipynb_checkpoints - -# pyenv .python-version - -# celery beat schedule file celerybeat-schedule - -# SageMath parsed files *.sage.py - -# Environments .env .venv env/ @@ -89,16 +54,9 @@ venv/ ENV/ env.bak/ venv.bak/ - -# Spyder project settings .spyderproject .spyproject - -# Rope project settings .ropeproject - -# mkdocs documentation /site - -# mypy .mypy_cache/ +.idea/ diff --git a/rss_reader.py b/rss_reader.py new file mode 100644 index 0000000..4561030 --- /dev/null +++ b/rss_reader.py @@ -0,0 +1,110 @@ +import feedparser +import html + +import argparse + +__version__ = '0.1' + + +class RSSReader: + def execute(self, source, verbose, limit, as_json): + response = feedparser.parse(source) + title = self._parse_title_(response) + articles = self._parse_articles_(response, limit) + + self.print_all_articles(articles, feed=title) + + def print_all_articles(self, articles, feed=None): + if feed is not None: + print(f"Feed: {feed['feed']}\n") + + for article in articles: + self.print_article(article) + print('--------------------------------------------------') + + def print_article(self, article): + description, links = self.get_description_and_links(article) + print(f"Title: {article.title}\n" + f"Date: {article.published}\n" + f"Link: {article.link}\n\n" + f"{description}\n\n" + f"Links:") + for link in links: + print(link) + + @staticmethod + def delete_all_tags(text: str, tag): + start_ind = text.find(f'<{tag}') + while start_ind != -1: + end_ind = text.find(f'{tag}>') + text = text[:start_ind:] + text[end_ind + 1:] + start_ind = text.find(f'<{tag}') + return text + + @staticmethod + def get_description_and_links(article): + description = html.unescape(article.description) + images = [] + links = [] + i = 0 + ind = 1 + if (i := article.description.find('href="', i)) != -1: + str_url = article.description[i + 6: article.description.find('"', i + 7)] + links.append(f"[{ind}]: {str_url}") + + while (i := article.description.find('', i + 2) + 1 + description = description[:img_start] + description[img_end:] + + ind = 2 + for media in article.media_content: + links.append(f"[{ind}]: {media['url']}") + ind += 1 + return description, links + + @staticmethod + def _parse_title_(response): + try: + return {'feed': response['feed']['title']} + except KeyError: + return None + + @staticmethod + def _parse_articles_(response, limit): + result = response.entries + if limit is not None: + return result[0:min(limit, len(result))] + else: + return result + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('source', action='store', type=str, help='RSS URL') + parser.add_argument('--version', action='store_true', help='Print version info') + parser.add_argument('--json', action='store_true', help='Print result as JSON in stdout') + parser.add_argument('--verbose', action='store_true', help='Outputs verbose status messages') + parser.add_argument('--limit', type=int, help='Limit news topics if this parameter provided') + + settings = parser.parse_args() + + if settings.version: + print(__version__) + + RSSReader().execute("https://news.yahoo.com/rss/", settings.verbose, settings.limit, settings.json) + + +if __name__ == '__main__': + main() From 78af7b2319f0972fa945ebd7eae2a6020c2661f2 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Thu, 14 Nov 2019 15:15:54 +0300 Subject: [PATCH 02/43] Completed Iteration #1 Added: * File htmlparser. Contain all parsing functionality of program. * Added docstrings in all files. * Added logging. To use write '--verbose' after filename. * Added converting to JSON. Also now available output all news articles in JSON format. Change: * Functionality of class RSSReader. All parsing work removed into htmlparser module. Fixed all files with PEP8 --- htmlparser.py | 298 ++++++++++++++++++++++++++++++++++++++++++++++++++ rss_reader.py | 178 +++++++++++++++++++----------- 2 files changed, 411 insertions(+), 65 deletions(-) create mode 100644 htmlparser.py diff --git a/htmlparser.py b/htmlparser.py new file mode 100644 index 0000000..db43aef --- /dev/null +++ b/htmlparser.py @@ -0,0 +1,298 @@ +from abc import ABC, abstractmethod +from json import dumps + +__all__ = ['Parser'] + + +class Tag(ABC): + """ + Abstract class for working with tags as a class structure. + + """ + + def __init__(self, **kwargs): + for arg, val in kwargs.items(): + self.__setattr__(arg, val) + + @abstractmethod + def link(self, ind): + """ + Get string line for output in links section. + + :param ind: Sequence number in the queue + :type ind: int + :return: string line for output in links section + :rtype: str + """ + pass + + @abstractmethod + def __str__(self): + """ + Get string line for output tag in description section. + + :return: string line for output tag in description section + """ + pass + + +class A(Tag): + """ + Class for work with tag a (link) as a class struct. + """ + href = None + + def __str__(self): + """ + Get string line for output tag in description section. + + :return: string line for output tag in description section + """ + return "[link %d]" + + def link(self, ind): + """ + Get string line for output in links section. + + :param ind: Sequence number in the queue + :type ind: int + :return: string line for output in links section + :rtype: str + """ + return "[%d]: %s (link)" % (ind, self.href) + + +class Img(Tag): + """ + Class for work with tag img (image) as a class struct. + """ + src = None + alt = None + + def __str__(self): + """ + Get string line for output tag in description section. + + :return: string line for output tag in description section + """ + return f"[Image %d: {self.alt or 'None'}] " + + def link(self, ind): + """ + Get string line for output in links section. + + :param ind: Sequence number in the queue + :type ind: int + :return: string line for output in links section + :rtype: str + """ + return "[%d]: %s (image)" % (ind, self.src) + + +class HTMLParser: + """ + A class for parse news articles from response struct of module "feedparser". + Methods return JSON format of news articles or dict with info about given article. + """ + _table = { + 'a': A, + 'img': Img, + } + + def __init__(self): + self._stack = [] + + def get_json(self, articles, title): + """ + Method for converting given articles and title of RSS Source to JSON format. + + :param articles: articles for convert to JSON format + :param title: title of RSS Source + :return: JSON format of given articles with title of RSS Source + :rtype: str + """ + result = { + 'title': title['feed'], + 'articles': { + str(i): self._article_to_dict(articles[i]) for i in range(len(articles)) + } + } + return dumps(result) + + def parse_article(self, article): + """ + Method for converting article to dict with given article info in specified format + :param article: article for converting to dict in specified format + :type article: dict + :return: dict with article info in specified format + """ + description, links = self._process_text(article.description, True) + return {'title': article.title, + 'description': description, + 'link': article.link, + 'pubDate': article.published + }, links + + @staticmethod + def _get_next_tag(line): + """ + Method for getting startpos and endpos of tag in given string line + :param line: line with html tag + :type line: str + :return: (startpos, endpos) is a position of next tag in line if line have a tag, else None + :rtype: tuple or None + """ + if line.find('<') != -1: + startpos = line.find('<') + endpos = line.find('>', startpos) + 1 + return startpos, endpos + else: + return None + + def _create_tag(self, params): + """ + Method for creating Tag struct class from params. + + :param params: info for creating tag. + :type params: dict + :return: tag object if creating was successful, else None + :rtype: Tag or None + """ + try: + tag_type = next(iter(params)) + params.pop(tag_type) + return self._table[tag_type](**params) + except KeyError: + return None + + def _parse_params_from_line(self, tag_line): + """ + Method for getting all parameters from html tag string line. + If parameter have a value params save value. Else value is True. + + :param tag_line: line with tag parameters. + :type tag_line: str + :return: dict with parsed parameters. + :rtype: dict + """ + params = {} + tag_line = tag_line.strip('<>') + strings, tag_line = self._get_all_strings(tag_line) + words = tag_line.split() + for param in words: + pair = param.split('=') + if len(pair) == 1: + params.update({pair[0]: True}) + else: + params.update({pair[0]: strings.pop(0)}) + + return params + + @staticmethod + def _get_all_strings(tag_line): + """ + Method of cutting all string in quotes \"...\". + + :param tag_line: line with tag info and strings. + :type tag_line: str + :return: tuple (strings, tag_line). + strings is a list with all cutting strings. + tag_line is a given string parameter without cutting strings. + :rtype: tuple + """ + strings = [] + while (start_ind := tag_line.find('"')) != -1: + end_ind = tag_line.find('"', start_ind + 1) + 1 + strings.append(tag_line[start_ind + 1: end_ind - 1]) + tag_line = tag_line[:start_ind] + tag_line[end_ind:] + return strings, tag_line + + def _get_desc_only(self, line): + """ + Method for getting description on news article without inserts links. + + :param line: description with tags and useless links + :type line: str + :return: description on news article without useless info. Text description only + :rtype: str + """ + description, _ = self._process_text(line, False) + return description + + def _get_images_from_article(self, article): + """ + Method for parsing info about all images in given article. + + :param article: article for parse info about all images + :type article: dict + :return: list of tag Image objects info about images + :rtype: list + """ + if self._stack is []: + self._process_text(article.description, False) + return [obj for obj in self._stack if isinstance(obj, Img)] + + def _process_text(self, description, fill_desc): + """ + Method processing + + :param description: description of news article with useless info and tags + :param fill_desc: adding formatted links in description or not + :type description: str + :type fill_desc: True + :return: tuple (description, links) + description is description without useless info and tags. With inserts links or not + links is list with formatted strings with links from all created tag objects + :rtype: tuple + """ + self._stack.clear() + index_of_tag = 1 + links = [] + while (pos_tag := self._get_next_tag(description)) is not None: + first_quotes, last_quotes = pos_tag + full_tag_line = description[first_quotes: last_quotes] + parameters = self._parse_params_from_line(full_tag_line) + obj_tag = self._create_tag(parameters) + if obj_tag is not None: + self._stack.append(obj_tag) + if fill_desc: + description = description[:first_quotes] + (str(obj_tag) % index_of_tag) + description[last_quotes:] + else: + description = description[:first_quotes] + description[last_quotes:] + links.append(obj_tag.link(index_of_tag)) + index_of_tag += 1 + else: + description = description[:first_quotes] + description[last_quotes:] + + return description, links + + def _article_to_dict(self, article): + """ + Method for converting article info into dict of specific format. + + :param article: article for converting into dict of specific format + :type article: dict + :return: dict of specific format + :rtype: dict + """ + + def images_from_article_to_dict(art): + content = self._get_images_from_article(art) + return { + str(i): { + 'src': content[i].src, + 'alt': content[i].alt + } for i in range(len(content)) + } + + result = {'title': article.title, + 'description': self._get_desc_only(article.description), + 'link': article.link, + 'pubDate': article.published, + 'media': images_from_article_to_dict(article) + } + + return result + + +Parser = HTMLParser() diff --git a/rss_reader.py b/rss_reader.py index 4561030..b535f53 100644 --- a/rss_reader.py +++ b/rss_reader.py @@ -1,92 +1,140 @@ -import feedparser -import html +""" +Python RSS reader v0.8 + +Designed to download news from the entered url. + +Opportunities: + * Get version + * Conversion to JSON + * Logging + * Limiting articles + +For information enter + "python rss_reader.py --help" +in terminal to find more information. + +""" + +__version__ = "v0.8" +import logging +import feedparser import argparse -__version__ = '0.1' +from htmlparser import * class RSSReader: def execute(self, source, verbose, limit, as_json): + """ + Procedure executing program. Get additional setting parameters and running + + :param source: URL for downloading news articles + :param verbose: Output the logs of program + :param limit: Limit of output news articles + :param as_json: Show news articles as JSON + :type source: str + :type verbose: bool + :type limit: int + :type as_json: bool + """ + if verbose: + logging.basicConfig(level=logging.INFO) + logging.info("Logging enabled") + logging.info("Getting response from %s" % source) response = feedparser.parse(source) - title = self._parse_title_(response) - articles = self._parse_articles_(response, limit) - self.print_all_articles(articles, feed=title) + if response.status in range(200, 300): + logging.info("Status code %d. Getting articles from %s was successful" % (response.status, source)) + else: + logging.info("Status code %d. Getting articles from %s was unsuccessful" % (response.status, source)) - def print_all_articles(self, articles, feed=None): - if feed is not None: - print(f"Feed: {feed['feed']}\n") + title = self.parse_title(response) + articles = self.parse_articles(response, limit) - for article in articles: - self.print_article(article) - print('--------------------------------------------------') - - def print_article(self, article): - description, links = self.get_description_and_links(article) - print(f"Title: {article.title}\n" - f"Date: {article.published}\n" - f"Link: {article.link}\n\n" - f"{description}\n\n" - f"Links:") - for link in links: - print(link) + if as_json: + self.json_print(articles, title) + else: + self.sample_print(articles, title) @staticmethod - def delete_all_tags(text: str, tag): - start_ind = text.find(f'<{tag}') - while start_ind != -1: - end_ind = text.find(f'{tag}>') - text = text[:start_ind:] + text[end_ind + 1:] - start_ind = text.find(f'<{tag}') - return text + def json_print(articles, title): + """ + Procedure for output articles in JSON format. + + :param articles: articles for converting to JSON + :param title: header of RSS Source + :type articles: dict + :type title: dict + """ + logging.info("Start creating JSON format of feeds") + data = Parser.get_json(articles, title) + logging.info("Completed. Printing..") + print(data) @staticmethod - def get_description_and_links(article): - description = html.unescape(article.description) - images = [] - links = [] - i = 0 - ind = 1 - if (i := article.description.find('href="', i)) != -1: - str_url = article.description[i + 6: article.description.find('"', i + 7)] - links.append(f"[{ind}]: {str_url}") - - while (i := article.description.find('', i + 2) + 1 - description = description[:img_start] + description[img_end:] - - ind = 2 - for media in article.media_content: - links.append(f"[{ind}]: {media['url']}") - ind += 1 - return description, links + def sample_print(articles, title): + """ + Procedure for sample output of news articles. + + :param articles: articles for output + :param title: header of RSS Source + :type articles: dict + :type title: dict + """ + logging.info("Start creating readable format of feeds") + if title is not None: + print(f"Feed: {title['feed']}\n") + + for article in articles: + logging.info("Parsing article..") + art, links = Parser.parse_article(article) + print(f"Title: {art['title']}\n" + f"Date: {art['pubDate']}\n" + f"Link: {art['link']}\n\n" + f"{art['description']}\n\n" + f"Links:") + for link in links: + print(link) + print('################################################################################') @staticmethod - def _parse_title_(response): + def parse_title(response): + """ + Static method for parsing header of RSS Source. + + :param response: response struct for parse + :type response: dict + :return: header of RSS Source if parsing was successful, else None + :rtype: dict or None + """ try: - return {'feed': response['feed']['title']} + logging.info("Successfully get Header of RSS Source: %s" % response.feed.title) + return {'feed': response.feed.title} except KeyError: + logging.info("Getting header of RSS Source was unsuccessful") return None @staticmethod - def _parse_articles_(response, limit): + def parse_articles(response, limit): + """ + Parse articles from response struct. + If limit is None return articles given length, else return all available articles. + + :param response: response struct for parse + :param limit: limit of output news articles + :type response: dict + :type limit: int or None + :return: news articles of limited length + :rtype: dict + """ + logging.info("Start loading articles. Limit: %d" % limit or "None") result = response.entries if limit is not None: + logging.info("Completed. Loaded %d articles" % min(limit, len(result))) return result[0:min(limit, len(result))] else: + logging.info("Completed. Loaded all articles") return result @@ -101,9 +149,9 @@ def main(): settings = parser.parse_args() if settings.version: - print(__version__) + print(f'RSS Reader {__version__}') - RSSReader().execute("https://news.yahoo.com/rss/", settings.verbose, settings.limit, settings.json) + RSSReader().execute(settings.source, settings.verbose, settings.limit, settings.json) if __name__ == '__main__': From ec86d621ee1ac876dbeb40781e45811a5df02736 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Sat, 16 Nov 2019 22:21:35 +0300 Subject: [PATCH 03/43] Completed Iteration 2 Added: * Added __init__.py * Added setup.py * Added README.md * Added package `rss-reader` with project files Change: * Project structure * In file `rss_reader.py` change import module `htmlparser` -> `.htmlparser` Fixed all files with PEP8 --- README.md | 26 +++++++++++++++++++++ __init__.py | 1 + rss-reader/__init__.py | 1 + htmlparser.py => rss-reader/htmlparser.py | 0 rss_reader.py => rss-reader/rss_reader.py | 2 +- setup.py | 28 +++++++++++++++++++++++ 6 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 README.md create mode 100644 __init__.py create mode 100644 rss-reader/__init__.py rename htmlparser.py => rss-reader/htmlparser.py (100%) rename rss_reader.py => rss-reader/rss_reader.py (99%) create mode 100644 setup.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..d660c31 --- /dev/null +++ b/README.md @@ -0,0 +1,26 @@ +# Welcome +This project was created for the EPAM Python Courses 2019. + +## Installing + +### Download: + +`git clone https://github.com/TeRRoRlsT/PythonHomework.git` + +### Setup: +Go to repository **PythonHomework** and execute the command: + +`python3 setup.py install` + +## Running +To view the help for running project go to **PythonHomework** folder and execute the command: + +`python3 rss_reader.py --help` + +## Tests +For run unittest go to **PythonHomework** folder and execute the command: + +`python3 -m unittests test*` + + ## Authors +* Sergey Pivovar - BSUIR 2019 \ No newline at end of file diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/__init__.py @@ -0,0 +1 @@ + diff --git a/rss-reader/__init__.py b/rss-reader/__init__.py new file mode 100644 index 0000000..26421e1 --- /dev/null +++ b/rss-reader/__init__.py @@ -0,0 +1 @@ +__version__ = "0.8" diff --git a/htmlparser.py b/rss-reader/htmlparser.py similarity index 100% rename from htmlparser.py rename to rss-reader/htmlparser.py diff --git a/rss_reader.py b/rss-reader/rss_reader.py similarity index 99% rename from rss_reader.py rename to rss-reader/rss_reader.py index b535f53..12097a8 100644 --- a/rss_reader.py +++ b/rss-reader/rss_reader.py @@ -21,7 +21,7 @@ import feedparser import argparse -from htmlparser import * +from .htmlparser import * class RSSReader: diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..7b0ec7d --- /dev/null +++ b/setup.py @@ -0,0 +1,28 @@ +from setuptools import setup, find_packages + +with open('README.md') as file: + LONG_DESCRIPTION = file.read() + +PACKAGE = 'rss-reader' + +setup( + name=PACKAGE, + version=__import__(PACKAGE).__version__, + description="RSS News Reader for EPAM Python Courses", + long_description=LONG_DESCRIPTION, + long_description_content_type='text/markdown', + author="Pivovar Sergey", + author_email="pivovar-ser-leon@inbox.ru", + url="https://github.com/TeRRoRlsT/PythonHomework.git", + + packages=find_packages(), + + python_requires='>=3.8', + install_requires=['argparse', 'logging', 'feedparser', 'htmlparser', 'json'], + + classifiers=[ + "Intended Audience :: Developers", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + ], +) From 63e4a49584d879d000cc347c957516a885e08812 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Sun, 17 Nov 2019 01:12:38 +0300 Subject: [PATCH 04/43] Fix rss_reader In file `rss_reader.py` fix local import '.htmlparser' -> 'htmlparser' --- rss-reader/rss_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rss-reader/rss_reader.py b/rss-reader/rss_reader.py index 12097a8..b535f53 100644 --- a/rss-reader/rss_reader.py +++ b/rss-reader/rss_reader.py @@ -21,7 +21,7 @@ import feedparser import argparse -from .htmlparser import * +from htmlparser import * class RSSReader: From 424df94d2e5680294ef899519f70ecdc08a20baf Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Sun, 17 Nov 2019 10:37:36 +0300 Subject: [PATCH 05/43] Added error handling --- rss-reader/rss_reader.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/rss-reader/rss_reader.py b/rss-reader/rss_reader.py index b535f53..b4501f7 100644 --- a/rss-reader/rss_reader.py +++ b/rss-reader/rss_reader.py @@ -41,13 +41,15 @@ def execute(self, source, verbose, limit, as_json): if verbose: logging.basicConfig(level=logging.INFO) logging.info("Logging enabled") - logging.info("Getting response from %s" % source) - response = feedparser.parse(source) + logging.info(f"Getting response from {source}") + if 'status' not in (response := feedparser.parse(source.strip())) or len(response.entries) == 0: + print(f"Error: Impossible parse RSS Feeds from url '{source}'") + exit(0) if response.status in range(200, 300): - logging.info("Status code %d. Getting articles from %s was successful" % (response.status, source)) + logging.info(f"Status code {response.status}. Getting articles from {source} was successful") else: - logging.info("Status code %d. Getting articles from %s was unsuccessful" % (response.status, source)) + logging.info(f"Status code {response.status}. Getting articles from {source} was unsuccessful") title = self.parse_title(response) articles = self.parse_articles(response, limit) @@ -109,7 +111,7 @@ def parse_title(response): :rtype: dict or None """ try: - logging.info("Successfully get Header of RSS Source: %s" % response.feed.title) + logging.info(f"Successfully get Header of RSS Source: {response.feed.title}") return {'feed': response.feed.title} except KeyError: logging.info("Getting header of RSS Source was unsuccessful") @@ -128,10 +130,10 @@ def parse_articles(response, limit): :return: news articles of limited length :rtype: dict """ - logging.info("Start loading articles. Limit: %d" % limit or "None") + logging.info(f"Start loading articles. Limit: {limit or 'None'}") result = response.entries if limit is not None: - logging.info("Completed. Loaded %d articles" % min(limit, len(result))) + logging.info(f"Completed. Loaded {min(limit, len(result))} articles") return result[0:min(limit, len(result))] else: logging.info("Completed. Loaded all articles") @@ -151,6 +153,10 @@ def main(): if settings.version: print(f'RSS Reader {__version__}') + if settings.limit < 1: + print(f"Error: Impossible parse 0 and less RSS Feeds") + exit(0) + RSSReader().execute(settings.source, settings.verbose, settings.limit, settings.json) From 96b1110a2f5db1b9ee1c0c56c0d43af13e5283a3 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Sun, 17 Nov 2019 17:48:10 +0300 Subject: [PATCH 06/43] Rebuilt the project Change: * In file `rss_reader` change struct. Remove all parse functions to `htmlparser`. * In file `htmlparser` added model Article. Rewrite all method of class HTMLParser for work with model Article. * In all files fix docstrings with PEP8. Fix all files with PEP8. --- rss-reader/htmlparser.py | 264 +++++++++++++++++++++++++++++---------- rss-reader/rss_reader.py | 89 ++++--------- 2 files changed, 221 insertions(+), 132 deletions(-) diff --git a/rss-reader/htmlparser.py b/rss-reader/htmlparser.py index db43aef..4bd28a6 100644 --- a/rss-reader/htmlparser.py +++ b/rss-reader/htmlparser.py @@ -7,21 +7,29 @@ class Tag(ABC): """ Abstract class for working with tags as a class structure. - """ - def __init__(self, **kwargs): for arg, val in kwargs.items(): self.__setattr__(arg, val) @abstractmethod - def link(self, ind): + def link(self): + """ + Get media object source link. + + :return: media object source URL + :rtype: str """ - Get string line for output in links section. + pass + + @abstractmethod + def format_link(self, ind): + """ + Get formatted link to output in the links section. :param ind: Sequence number in the queue :type ind: int - :return: string line for output in links section + :return: string to output in links section :rtype: str """ pass @@ -29,34 +37,43 @@ def link(self, ind): @abstractmethod def __str__(self): """ - Get string line for output tag in description section. + Get string to output tag in the description section. - :return: string line for output tag in description section + :return: string to output tag in the description section """ pass class A(Tag): """ - Class for work with tag a (link) as a class struct. + Class for work with tag `a` (link) as a class struct. """ href = None def __str__(self): """ - Get string line for output tag in description section. + Get string to output tag in description section. - :return: string line for output tag in description section + :return: string to output tag in the description section """ return "[link %d]" - def link(self, ind): + def link(self): + """ + Get media object source link. + + :return: media object source URL + :rtype: str """ - Get string line for output in links section. + return self.href + + def format_link(self, ind): + """ + Get formatted link to output in the links section. :param ind: Sequence number in the queue :type ind: int - :return: string line for output in links section + :return: string to output in the links section :rtype: str """ return "[%d]: %s (link)" % (ind, self.href) @@ -71,24 +88,84 @@ class Img(Tag): def __str__(self): """ - Get string line for output tag in description section. + Get string to output tag in description section. - :return: string line for output tag in description section + :return: string to output tag in the description section """ return f"[Image %d: {self.alt or 'None'}] " - def link(self, ind): + def link(self): + """ + Get media object source link. + + :return: media object source URL + :rtype: str + """ + return self.src + + def format_link(self, ind): """ - Get string line for output in links section. + Get formatted link to output in the links section. :param ind: Sequence number in the queue :type ind: int - :return: string line for output in links section + :return: string to output in the links section :rtype: str """ return "[%d]: %s (image)" % (ind, self.src) +class Article: + """ + News feed object oriented model. Use for work with articles like a class. + """ + def __init__(self): + self.title = None + self.description = None + self.link = None + self.pubDate = None + self.media = None + self.links = None + + @classmethod + def from_dict(cls, fields: dict): + """ + An alternative constructor for creating an article model from the dict of the parsed article. + + :param fields: dict with all needed fields for current article + :type fields: dict + :return: article object with needed data in fields + :rtype: Article + """ + obj = cls() + for f, v in fields.items(): + setattr(obj, f, v) + + return obj + + def to_dict(self, fields=None): + """ + Method for getting a dictionary with all fields of self object. + You can customize fields by giving a list with needing you fields. + + :param fields: optional parameter to change the content of returned dict + :return: dict with all fields + """ + if fields is None: + _fields = ( + 'title', + 'description', + 'link', + 'pubDate', + 'media', + 'links', + ) + else: + _fields = fields + + return {f: getattr(self, f, None) for f in _fields} + + class HTMLParser: """ A class for parse news articles from response struct of module "feedparser". @@ -102,41 +179,89 @@ class HTMLParser: def __init__(self): self._stack = [] - def get_json(self, articles, title): + def get_json(self, response, limit): """ Method for converting given articles and title of RSS Source to JSON format. - :param articles: articles for convert to JSON format - :param title: title of RSS Source + :param response: response struct for parse + :param limit: required number of articles to show + :type response: dict + :type limit: int :return: JSON format of given articles with title of RSS Source :rtype: str """ + title, articles = self.parse_all(response, limit, False, False) + result = { 'title': title['feed'], 'articles': { - str(i): self._article_to_dict(articles[i]) for i in range(len(articles)) + i: articles[i].to_dict(None) for i in range(len(articles)) } } return dumps(result) - def parse_article(self, article): + def parse_all(self, response, limit, fill_desc=True, nice_links=True): """ - Method for converting article to dict with given article info in specified format - :param article: article for converting to dict in specified format - :type article: dict - :return: dict with article info in specified format + A method of parsing news articles and creating object models for easy access. + + :param response: response struct for parse + :param limit: required number of articles to show + :param fill_desc: adding formatted links in description or not + :param nice_links: return formatted links or not + :type response: dict + :type limit: int + :type fill_desc: bool + :type nice_links: bool + :return: return a tuple (title, articles). + Title is header of RSS Source. + Articles is a list of object of type Article was created from parsed feeds + :rtype: tuple + """ + raw_articles = self._get_limited_articles(response, limit) + nice_articles = [self._article_to_dict(article, fill_desc, nice_links) for article in raw_articles] + articles = [Article.from_dict(article) for article in nice_articles] + title = self._get_title(response) + return title, articles + + @staticmethod + def _get_title(response): + """ + Static method for parsing header of RSS Source. + + :param response: response struct for parse + :type response: dict + :return: header of RSS Source if parsing was successful, else None + :rtype: dict or None + """ + try: + return {'feed': response.feed.title} + except KeyError: + return None + + @staticmethod + def _get_limited_articles(response, limit): + """ + Method of limiting parsing articles from response struct. + If limit is None return articles given length, else return all available articles. + + :param response: response struct for parse + :param limit: limit of output news articles + :type response: dict + :type limit: int or None + :return: news articles of limited length + :rtype: dict """ - description, links = self._process_text(article.description, True) - return {'title': article.title, - 'description': description, - 'link': article.link, - 'pubDate': article.published - }, links + result = response.entries + if limit is not None: + return result[0:min(limit, len(result))] + else: + return result @staticmethod def _get_next_tag(line): """ - Method for getting startpos and endpos of tag in given string line + Method for getting startpos and endpos of tag in given string line. + :param line: line with html tag :type line: str :return: (startpos, endpos) is a position of next tag in line if line have a tag, else None @@ -153,7 +278,7 @@ def _create_tag(self, params): """ Method for creating Tag struct class from params. - :param params: info for creating tag. + :param params: info for creating tag :type params: dict :return: tag object if creating was successful, else None :rtype: Tag or None @@ -165,14 +290,14 @@ def _create_tag(self, params): except KeyError: return None - def _parse_params_from_line(self, tag_line): + def _get_params_from_line(self, tag_line): """ Method for getting all parameters from html tag string line. If parameter have a value params save value. Else value is True. - :param tag_line: line with tag parameters. + :param tag_line: line with tag parameters :type tag_line: str - :return: dict with parsed parameters. + :return: dict with parsed parameters :rtype: dict """ params = {} @@ -193,11 +318,11 @@ def _get_all_strings(tag_line): """ Method of cutting all string in quotes \"...\". - :param tag_line: line with tag info and strings. + :param tag_line: line with tag info and strings :type tag_line: str :return: tuple (strings, tag_line). strings is a list with all cutting strings. - tag_line is a given string parameter without cutting strings. + tag_line is a given string parameter without cutting strings :rtype: tuple """ strings = [] @@ -207,18 +332,6 @@ def _get_all_strings(tag_line): tag_line = tag_line[:start_ind] + tag_line[end_ind:] return strings, tag_line - def _get_desc_only(self, line): - """ - Method for getting description on news article without inserts links. - - :param line: description with tags and useless links - :type line: str - :return: description on news article without useless info. Text description only - :rtype: str - """ - description, _ = self._process_text(line, False) - return description - def _get_images_from_article(self, article): """ Method for parsing info about all images in given article. @@ -229,19 +342,23 @@ def _get_images_from_article(self, article): :rtype: list """ if self._stack is []: - self._process_text(article.description, False) + self._process_description(article.description, False, False) return [obj for obj in self._stack if isinstance(obj, Img)] - def _process_text(self, description, fill_desc): + def _process_description(self, description, fill_desc, nice_links): """ - Method processing + Method processing description. Use flags to control result. + Flag `fill_desc` adding formatted links in description. + Flag `nice_links` return formatted links. :param description: description of news article with useless info and tags :param fill_desc: adding formatted links in description or not + :param nice_links: return formatted links or not :type description: str - :type fill_desc: True - :return: tuple (description, links) - description is description without useless info and tags. With inserts links or not + :type fill_desc: bool + :type nice_links: bool + :return: tuple (description, links). + description is description without useless info and tags. With inserts links or not. links is list with formatted strings with links from all created tag objects :rtype: tuple """ @@ -251,7 +368,7 @@ def _process_text(self, description, fill_desc): while (pos_tag := self._get_next_tag(description)) is not None: first_quotes, last_quotes = pos_tag full_tag_line = description[first_quotes: last_quotes] - parameters = self._parse_params_from_line(full_tag_line) + parameters = self._get_params_from_line(full_tag_line) obj_tag = self._create_tag(parameters) if obj_tag is not None: self._stack.append(obj_tag) @@ -259,19 +376,28 @@ def _process_text(self, description, fill_desc): description = description[:first_quotes] + (str(obj_tag) % index_of_tag) + description[last_quotes:] else: description = description[:first_quotes] + description[last_quotes:] - links.append(obj_tag.link(index_of_tag)) + + if nice_links: + links.append(obj_tag.format_link(index_of_tag)) + else: + links.append(obj_tag.link()) + index_of_tag += 1 else: description = description[:first_quotes] + description[last_quotes:] return description, links - def _article_to_dict(self, article): + def _article_to_dict(self, article, fill_desc, nice_links): """ Method for converting article info into dict of specific format. :param article: article for converting into dict of specific format + :param fill_desc: adding formatted links in description or not + :param nice_links: return formatted links or not :type article: dict + :type fill_desc: bool + :type nice_links: bool :return: dict of specific format :rtype: dict """ @@ -285,12 +411,16 @@ def images_from_article_to_dict(art): } for i in range(len(content)) } - result = {'title': article.title, - 'description': self._get_desc_only(article.description), - 'link': article.link, - 'pubDate': article.published, - 'media': images_from_article_to_dict(article) - } + description, links = self._process_description(article.description, fill_desc, nice_links) + + result = { + 'title': article.title, + 'description': description, + 'link': article.link, + 'pubDate': article.published, + 'media': images_from_article_to_dict(article), + 'links': links, + } return result diff --git a/rss-reader/rss_reader.py b/rss-reader/rss_reader.py index b4501f7..1b42a9c 100644 --- a/rss-reader/rss_reader.py +++ b/rss-reader/rss_reader.py @@ -27,12 +27,12 @@ class RSSReader: def execute(self, source, verbose, limit, as_json): """ - Procedure executing program. Get additional setting parameters and running + Procedure executing program. Get additional setting parameters and running. :param source: URL for downloading news articles - :param verbose: Output the logs of program - :param limit: Limit of output news articles - :param as_json: Show news articles as JSON + :param verbose: output the logs of program + :param limit: limit of output news articles + :param as_json: show news articles as JSON :type source: str :type verbose: bool :type limit: int @@ -51,94 +51,53 @@ def execute(self, source, verbose, limit, as_json): else: logging.info(f"Status code {response.status}. Getting articles from {source} was unsuccessful") - title = self.parse_title(response) - articles = self.parse_articles(response, limit) - if as_json: - self.json_print(articles, title) + self.json_print(response, limit) else: - self.sample_print(articles, title) + self.sample_print(response, limit) @staticmethod - def json_print(articles, title): + def json_print(response, limit): """ Procedure for output articles in JSON format. - :param articles: articles for converting to JSON - :param title: header of RSS Source - :type articles: dict - :type title: dict + :param response: response struct for parse + :param limit: required number of articles to show + :type response: dict + :type limit: int """ logging.info("Start creating JSON format of feeds") - data = Parser.get_json(articles, title) + data = Parser.get_json(response, limit) logging.info("Completed. Printing..") print(data) @staticmethod - def sample_print(articles, title): + def sample_print(response, limit): """ Procedure for sample output of news articles. - :param articles: articles for output - :param title: header of RSS Source - :type articles: dict - :type title: dict + :param response: response struct for parse + :param limit: required number of articles to show + :type response: dict + :type limit: int """ + title, articles = Parser.parse_all(response, limit) logging.info("Start creating readable format of feeds") if title is not None: print(f"Feed: {title['feed']}\n") for article in articles: logging.info("Parsing article..") - art, links = Parser.parse_article(article) - print(f"Title: {art['title']}\n" - f"Date: {art['pubDate']}\n" - f"Link: {art['link']}\n\n" - f"{art['description']}\n\n" + + print(f"Title: {article.title}\n" + f"Date: {article.pubDate}\n" + f"Link: {article.link}\n\n" + f"{article.description}\n\n" f"Links:") - for link in links: + for link in article.links: print(link) print('################################################################################') - @staticmethod - def parse_title(response): - """ - Static method for parsing header of RSS Source. - - :param response: response struct for parse - :type response: dict - :return: header of RSS Source if parsing was successful, else None - :rtype: dict or None - """ - try: - logging.info(f"Successfully get Header of RSS Source: {response.feed.title}") - return {'feed': response.feed.title} - except KeyError: - logging.info("Getting header of RSS Source was unsuccessful") - return None - - @staticmethod - def parse_articles(response, limit): - """ - Parse articles from response struct. - If limit is None return articles given length, else return all available articles. - - :param response: response struct for parse - :param limit: limit of output news articles - :type response: dict - :type limit: int or None - :return: news articles of limited length - :rtype: dict - """ - logging.info(f"Start loading articles. Limit: {limit or 'None'}") - result = response.entries - if limit is not None: - logging.info(f"Completed. Loaded {min(limit, len(result))} articles") - return result[0:min(limit, len(result))] - else: - logging.info("Completed. Loaded all articles") - return result - def main(): parser = argparse.ArgumentParser() From 4da92c86a03715f5cea57b7cab895edb82d5c674 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Sun, 24 Nov 2019 23:41:29 +0300 Subject: [PATCH 07/43] Change signature Change: * Added in module `rss_reader` caching articles. Rewrite logging. Rewrite docstrings. Rewrite getting version of application * From module `htmlparser` remove model Article. Change docstrings. Change method HTMLParser.parse, now it return list of dicts of articles * Update version to `0.9` Fix all files with PEP8 --- .gitignore | 1 + rss-reader/__init__.py | 2 +- rss-reader/htmlparser.py | 213 +++++++++++---------------------------- rss-reader/rss_reader.py | 137 +++++++++++++++---------- 4 files changed, 142 insertions(+), 211 deletions(-) diff --git a/.gitignore b/.gitignore index fc450d7..bfa446e 100644 --- a/.gitignore +++ b/.gitignore @@ -60,3 +60,4 @@ venv.bak/ /site .mypy_cache/ .idea/ +*.sqlite3 diff --git a/rss-reader/__init__.py b/rss-reader/__init__.py index 26421e1..824978d 100644 --- a/rss-reader/__init__.py +++ b/rss-reader/__init__.py @@ -1 +1 @@ -__version__ = "0.8" +__version__ = "0.9" diff --git a/rss-reader/htmlparser.py b/rss-reader/htmlparser.py index 4bd28a6..d83025a 100644 --- a/rss-reader/htmlparser.py +++ b/rss-reader/htmlparser.py @@ -1,5 +1,6 @@ +import logging from abc import ABC, abstractmethod -from json import dumps +import html __all__ = ['Parser'] @@ -8,6 +9,7 @@ class Tag(ABC): """ Abstract class for working with tags as a class structure. """ + def __init__(self, **kwargs): for arg, val in kwargs.items(): self.__setattr__(arg, val) @@ -20,7 +22,6 @@ def link(self): :return: media object source URL :rtype: str """ - pass @abstractmethod def format_link(self, ind): @@ -32,7 +33,6 @@ def format_link(self, ind): :return: string to output in links section :rtype: str """ - pass @abstractmethod def __str__(self): @@ -41,7 +41,6 @@ def __str__(self): :return: string to output tag in the description section """ - pass class A(Tag): @@ -56,7 +55,7 @@ def __str__(self): :return: string to output tag in the description section """ - return "[link %d]" + return "[link {}]" def link(self): """ @@ -76,7 +75,7 @@ def format_link(self, ind): :return: string to output in the links section :rtype: str """ - return "[%d]: %s (link)" % (ind, self.href) + return f"[{ind}]: {self.href} (link)" class Img(Tag): @@ -92,7 +91,7 @@ def __str__(self): :return: string to output tag in the description section """ - return f"[Image %d: {self.alt or 'None'}] " + return "[Image {}: {} ] ".format('{}', self.alt) def link(self): """ @@ -112,58 +111,7 @@ def format_link(self, ind): :return: string to output in the links section :rtype: str """ - return "[%d]: %s (image)" % (ind, self.src) - - -class Article: - """ - News feed object oriented model. Use for work with articles like a class. - """ - def __init__(self): - self.title = None - self.description = None - self.link = None - self.pubDate = None - self.media = None - self.links = None - - @classmethod - def from_dict(cls, fields: dict): - """ - An alternative constructor for creating an article model from the dict of the parsed article. - - :param fields: dict with all needed fields for current article - :type fields: dict - :return: article object with needed data in fields - :rtype: Article - """ - obj = cls() - for f, v in fields.items(): - setattr(obj, f, v) - - return obj - - def to_dict(self, fields=None): - """ - Method for getting a dictionary with all fields of self object. - You can customize fields by giving a list with needing you fields. - - :param fields: optional parameter to change the content of returned dict - :return: dict with all fields - """ - if fields is None: - _fields = ( - 'title', - 'description', - 'link', - 'pubDate', - 'media', - 'links', - ) - else: - _fields = fields - - return {f: getattr(self, f, None) for f in _fields} + return f"[{ind}]: {self.src} (image)" class HTMLParser: @@ -177,66 +125,47 @@ class HTMLParser: } def __init__(self): - self._stack = [] - - def get_json(self, response, limit): - """ - Method for converting given articles and title of RSS Source to JSON format. - - :param response: response struct for parse - :param limit: required number of articles to show - :type response: dict - :type limit: int - :return: JSON format of given articles with title of RSS Source - :rtype: str - """ - title, articles = self.parse_all(response, limit, False, False) - - result = { - 'title': title['feed'], - 'articles': { - i: articles[i].to_dict(None) for i in range(len(articles)) - } - } - return dumps(result) + self._tags = [] - def parse_all(self, response, limit, fill_desc=True, nice_links=True): + def parse(self, response, limit): """ A method of parsing news articles and creating object models for easy access. :param response: response struct for parse :param limit: required number of articles to show - :param fill_desc: adding formatted links in description or not - :param nice_links: return formatted links or not :type response: dict :type limit: int - :type fill_desc: bool - :type nice_links: bool - :return: return a tuple (title, articles). + :return: return a dict {'title': str, 'articles': list). Title is header of RSS Source. - Articles is a list of object of type Article was created from parsed feeds - :rtype: tuple + Articles is a list of dicts with articles info which was created from parsed feeds + :rtype: dict """ + logging.info("Getting list of limited articles") raw_articles = self._get_limited_articles(response, limit) - nice_articles = [self._article_to_dict(article, fill_desc, nice_links) for article in raw_articles] - articles = [Article.from_dict(article) for article in nice_articles] - title = self._get_title(response) - return title, articles + + logging.info("Completed. Converting each article to dict") + nice_articles = [self._article_to_dict(article) for article in raw_articles] + + logging.info("Completed. Clear articles from HTML escapes") + articles = [self._clear_from_html(article) for article in nice_articles] + + logging.info("Getting a RSS source title") + title = response.feed.title + + return {'title': title, 'articles': articles} @staticmethod - def _get_title(response): + def _clear_from_html(article): """ - Static method for parsing header of RSS Source. + Method to clear html escapes from all fields of article. - :param response: response struct for parse - :type response: dict - :return: header of RSS Source if parsing was successful, else None - :rtype: dict or None + :param article: article to clear from HTML escapes + :return: clean article """ - try: - return {'feed': response.feed.title} - except KeyError: - return None + for k, v in article.items(): + article[k] = html.unescape(v) + + return article @staticmethod def _get_limited_articles(response, limit): @@ -253,8 +182,10 @@ def _get_limited_articles(response, limit): """ result = response.entries if limit is not None: + logging.info(f"Completed. Loaded {min(limit, len(result))} articles with limit {limit}") return result[0:min(limit, len(result))] else: + logging.info(f"Completed. Loaded {len(result)} articles without any limit") return result @staticmethod @@ -332,93 +263,63 @@ def _get_all_strings(tag_line): tag_line = tag_line[:start_ind] + tag_line[end_ind:] return strings, tag_line - def _get_images_from_article(self, article): - """ - Method for parsing info about all images in given article. - - :param article: article for parse info about all images - :type article: dict - :return: list of tag Image objects info about images - :rtype: list - """ - if self._stack is []: - self._process_description(article.description, False, False) - return [obj for obj in self._stack if isinstance(obj, Img)] - - def _process_description(self, description, fill_desc, nice_links): + def _process_description(self, desc): """ - Method processing description. Use flags to control result. - Flag `fill_desc` adding formatted links in description. - Flag `nice_links` return formatted links. + Method processing description. Return description of specific format. - :param description: description of news article with useless info and tags - :param fill_desc: adding formatted links in description or not - :param nice_links: return formatted links or not - :type description: str - :type fill_desc: bool - :type nice_links: bool + :param desc: description of news article with useless info and tags + :type desc: str :return: tuple (description, links). description is description without useless info and tags. With inserts links or not. links is list with formatted strings with links from all created tag objects :rtype: tuple """ - self._stack.clear() + self._tags.clear() index_of_tag = 1 links = [] - while (pos_tag := self._get_next_tag(description)) is not None: + while (pos_tag := self._get_next_tag(desc)) is not None: first_quotes, last_quotes = pos_tag - full_tag_line = description[first_quotes: last_quotes] + full_tag_line = desc[first_quotes: last_quotes] parameters = self._get_params_from_line(full_tag_line) obj_tag = self._create_tag(parameters) if obj_tag is not None: - self._stack.append(obj_tag) - if fill_desc: - description = description[:first_quotes] + (str(obj_tag) % index_of_tag) + description[last_quotes:] - else: - description = description[:first_quotes] + description[last_quotes:] - - if nice_links: - links.append(obj_tag.format_link(index_of_tag)) - else: - links.append(obj_tag.link()) + self._tags.append(obj_tag) + desc = desc[:first_quotes] + (str(obj_tag).format(index_of_tag)) + desc[last_quotes:] + links.append(obj_tag.format_link(index_of_tag)) index_of_tag += 1 else: - description = description[:first_quotes] + description[last_quotes:] + desc = desc[:first_quotes] + desc[last_quotes:] - return description, links + return desc, links - def _article_to_dict(self, article, fill_desc, nice_links): + def _article_to_dict(self, article): """ Method for converting article info into dict of specific format. :param article: article for converting into dict of specific format - :param fill_desc: adding formatted links in description or not - :param nice_links: return formatted links or not :type article: dict - :type fill_desc: bool - :type nice_links: bool :return: dict of specific format :rtype: dict """ - def images_from_article_to_dict(art): - content = self._get_images_from_article(art) - return { - str(i): { - 'src': content[i].src, - 'alt': content[i].alt - } for i in range(len(content)) - } + description, links = self._process_description(article.description) - description, links = self._process_description(article.description, fill_desc, nice_links) + images = [obj for obj in self._tags if isinstance(obj, Img)] + + media = { + str(i): { + 'src': images[i].src, + 'alt': images[i].alt + } for i in range(len(images)) + } result = { 'title': article.title, 'description': description, 'link': article.link, 'pubDate': article.published, - 'media': images_from_article_to_dict(article), + 'media': media, 'links': links, } diff --git a/rss-reader/rss_reader.py b/rss-reader/rss_reader.py index 1b42a9c..9add4ad 100644 --- a/rss-reader/rss_reader.py +++ b/rss-reader/rss_reader.py @@ -1,5 +1,5 @@ """ -Python RSS reader v0.8 +Python RSS reader Designed to download news from the entered url. @@ -8,96 +8,121 @@ * Conversion to JSON * Logging * Limiting articles + * Caching news feeds in SQLite database For information enter - "python rss_reader.py --help" + "python rss_reader.py -h" in terminal to find more information. """ +__package__ = 'rss-reader' -__version__ = "v0.8" - +import datetime +import json import logging import feedparser import argparse from htmlparser import * +from storage_controller import * class RSSReader: - def execute(self, source, verbose, limit, as_json): + def __call__(self, source, limit, as_json, date): """ Procedure executing program. Get additional setting parameters and running. :param source: URL for downloading news articles - :param verbose: output the logs of program - :param limit: limit of output news articles + :param limit: limit news topics if this parameter provided :param as_json: show news articles as JSON + :param date: print cached articles by date :type source: str - :type verbose: bool :type limit: int :type as_json: bool + :type date: str """ - if verbose: - logging.basicConfig(level=logging.INFO) - logging.info("Logging enabled") - logging.info(f"Getting response from {source}") - if 'status' not in (response := feedparser.parse(source.strip())) or len(response.entries) == 0: - print(f"Error: Impossible parse RSS Feeds from url '{source}'") + if limit and limit < 1: + print(f"Error: Impossible parse 0 and less RSS Feeds") exit(0) - if response.status in range(200, 300): - logging.info(f"Status code {response.status}. Getting articles from {source} was successful") + if not date: + logging.info("Start loading articles from RSS source") + articles = self._get_articles_from_url(source, limit) + logging.info("Completed. Saving articles in cache") + count = StorageController().save(source, articles['articles'], articles['title']) + logging.info(f"Completed. {count} articles was saved in cache") else: - logging.info(f"Status code {response.status}. Getting articles from {source} was unsuccessful") + logging.info("Start loading from cache") + try: + logging.info("Check date format") + datetime.datetime.strptime(date, "%Y%m%d") + except ValueError: + print(f"Error format date {date}. Need '%Y%m%d'") + exit(0) + logging.info("Date is correct. Start loading by date") + articles = StorageController().load(source, date, limit) + + logging.info("All articles was successfully loaded") if as_json: - self.json_print(response, limit) + self.json_print(articles) else: - self.sample_print(response, limit) + self.sample_print(articles) @staticmethod - def json_print(response, limit): - """ - Procedure for output articles in JSON format. + def _get_articles_from_url(source, limit): + logging.info("Completed. Check the availability of URL.") - :param response: response struct for parse - :param limit: required number of articles to show - :type response: dict - :type limit: int - """ - logging.info("Start creating JSON format of feeds") - data = Parser.get_json(response, limit) - logging.info("Completed. Printing..") - print(data) + if 'status' not in (response := feedparser.parse(source.strip())) or len(response.entries) == 0: + print(f"Error: Impossible parse RSS Feeds from url '{source}'") + exit(0) + + logging.info("Completed. Check status code of response.") + + if response.status in range(200, 300): + logging.info(f"Status code {response.status}. Getting articles from '{source}' was successful") + else: + print(f"Error connecting with URL '{source.strip()}' with status code {response.status}.") + exit(0) + + return Parser.parse(response, limit) @staticmethod - def sample_print(response, limit): + def sample_print(articles): """ Procedure for sample output of news articles. - :param response: response struct for parse - :param limit: required number of articles to show - :type response: dict - :type limit: int + :param articles: dict with title and list of news articles + :type articles: dict """ - title, articles = Parser.parse_all(response, limit) - logging.info("Start creating readable format of feeds") - if title is not None: - print(f"Feed: {title['feed']}\n") - - for article in articles: - logging.info("Parsing article..") - - print(f"Title: {article.title}\n" - f"Date: {article.pubDate}\n" - f"Link: {article.link}\n\n" - f"{article.description}\n\n" + logging.info("Start sample output") + + if (title := articles.get('title', None)) is not None: + print(f"Feed: {title}\n") + + for article in articles['articles']: + print(f"Title: {article['title']}\n" + f"Date: {article['pubDate']}\n" + f"Link: {article['link']}\n\n" + f"{article['description']}\n\n" f"Links:") - for link in article.links: + for link in article['links']: print(link) print('################################################################################') + @staticmethod + def json_print(articles): + """ + Procedure for output articles in JSON format. + + :param articles: dict with title and list of news articles + :type articles: dict + """ + logging.info("Converting all articles to JSON") + data = json.dumps(articles) + logging.info("Completed. Output JSON") + print(data) + def main(): parser = argparse.ArgumentParser() @@ -106,17 +131,21 @@ def main(): parser.add_argument('--json', action='store_true', help='Print result as JSON in stdout') parser.add_argument('--verbose', action='store_true', help='Outputs verbose status messages') parser.add_argument('--limit', type=int, help='Limit news topics if this parameter provided') + parser.add_argument('--date', type=str, help='Print cached articles by date') settings = parser.parse_args() if settings.version: - print(f'RSS Reader {__version__}') + print(f'RSS Reader {__import__(__package__).__version__}') - if settings.limit < 1: - print(f"Error: Impossible parse 0 and less RSS Feeds") - exit(0) + if settings.verbose: + logging.basicConfig(level=logging.INFO) + logging.info("Logging enabled") - RSSReader().execute(settings.source, settings.verbose, settings.limit, settings.json) + RSSReader()(settings.source, + settings.limit, + settings.json, + settings.date) if __name__ == '__main__': From 8337f862bea21897581e2c910f4075c90c4fbb38 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Sun, 24 Nov 2019 23:48:56 +0300 Subject: [PATCH 08/43] Added Caching Added: * `models` with ORM models for working with db * `/managers/` in module with managers of relevant models * `controller` with storage controller for work with db Fix all files with PEP8 --- rss-reader/storage_controller/__init__.py | 1 + rss-reader/storage_controller/controller.py | 130 ++++++++++++++++++ .../storage_controller/managers/__init__.py | 2 + .../managers/articlemanager.py | 30 ++++ .../managers/sourcemanager.py | 51 +++++++ rss-reader/storage_controller/models.py | 125 +++++++++++++++++ 6 files changed, 339 insertions(+) create mode 100644 rss-reader/storage_controller/__init__.py create mode 100644 rss-reader/storage_controller/controller.py create mode 100644 rss-reader/storage_controller/managers/__init__.py create mode 100644 rss-reader/storage_controller/managers/articlemanager.py create mode 100644 rss-reader/storage_controller/managers/sourcemanager.py create mode 100644 rss-reader/storage_controller/models.py diff --git a/rss-reader/storage_controller/__init__.py b/rss-reader/storage_controller/__init__.py new file mode 100644 index 0000000..323031d --- /dev/null +++ b/rss-reader/storage_controller/__init__.py @@ -0,0 +1 @@ +from storage_controller.controller import * diff --git a/rss-reader/storage_controller/controller.py b/rss-reader/storage_controller/controller.py new file mode 100644 index 0000000..d3ee9db --- /dev/null +++ b/rss-reader/storage_controller/controller.py @@ -0,0 +1,130 @@ +""" +Module with controllers for work with db. + +""" +import json +import logging +import datetime + +from peewee import PeeweeException + +from storage_controller.managers import * + +__all__ = ['StorageController'] + + +class StorageController: + """ + Controller for loading and saving articles in database. + """ + + def __init__(self): + try: + DB_HANDLE.connect() + self.articles = ArticleManager() + self.sources = SourceManager() + except PeeweeException as e: + print(e) + return + + def load(self, url, date, limit): + """ + Method for loading limited articles from database + + :param url: source URL for getting articles from db + :param date: date from which need to load articles in string + :param limit: limit of articles for loading + :type url: str + :type date: str + :type limit: int + :return: list of dicts of articles with date after a given date + :rtype: list + """ + + clr_url = url.strip('/\\') + logging.info(f"Start loading articles from storage") + articles = self.sources.get_articles_with_data_from(clr_url, date) + + logging.info(f"Completed. Cutting list of articles") + if limit is not None: + articles['articles'] = [article for i, article in enumerate(articles['articles']) if i < limit] + + logging.info(f"Completed. Converting date each article") + articles['articles'] = [self._convert_date(article) for article in articles['articles']] + + logging.info(f"Completed. Load from JSON links each article") + articles['articles'] = [self._load_links(article) for article in articles['articles']] + + logging.info(f"Completed. Load from JSON media each article") + articles['articles'] = [self._load_media(article) for article in articles['articles']] + + logging.info(f"Completed. Convert to dict each article") + articles['articles'] = [article.to_dict() for article in articles['articles']] + + return articles + + def save(self, url, articles, title): + """ + Method for saving parsed articles. + + :param url: string URL of RSS source + :param articles: parsed articles + :param title: title of RSS source + :type url: str + :type articles: list + :type title: str + :return: count of new created articles in db + :rtype: int + """ + clr_url = url.strip('/\\') + + logging.info(f"Getting source model") + source = self.sources.get_or_create(clr_url, title) + + logging.info(f"Completed. Saving articles in chosen source model") + count = self.articles.create_and_return(articles, source) + return count + + @staticmethod + def _load_media(article): + """ + Method for converting media of a given article from JSON. + + :param article: article with media in JSON + :type article: Article + :return: article with correct media + :rtype: Article + """ + article.media = json.loads(article.media) + return article + + @staticmethod + def _load_links(article): + """ + Method for converting links of a given article from JSON. + + :param article: article with links in JSON + :type article: Article + :return: article with correct links + :rtype: Article + """ + article.links = json.loads(article.links) + return article + + @staticmethod + def _convert_date(article, from_fmt="%Y-%m-%d", to_fmt="%a, %d %b %Y"): + """ + Method for converting date of a given article to specific format. + + :param article: article with incorrect format of date + :param from_fmt: optional parameter. Format to convert from + :param to_fmt: optional parameter. Format to convert to + :type article: Article + :type from_fmt: str + :type to_fmt: str + :return: Article object with correct format of date + :rtype: Article + """ + _date = datetime.datetime.strptime(article.pubDate, from_fmt) + article.pubDate = datetime.datetime.strftime(_date, to_fmt) + return article diff --git a/rss-reader/storage_controller/managers/__init__.py b/rss-reader/storage_controller/managers/__init__.py new file mode 100644 index 0000000..7ff6ec0 --- /dev/null +++ b/rss-reader/storage_controller/managers/__init__.py @@ -0,0 +1,2 @@ +from .articlemanager import ArticleManager, DB_HANDLE +from .sourcemanager import SourceManager, DB_HANDLE diff --git a/rss-reader/storage_controller/managers/articlemanager.py b/rss-reader/storage_controller/managers/articlemanager.py new file mode 100644 index 0000000..5bda5e8 --- /dev/null +++ b/rss-reader/storage_controller/managers/articlemanager.py @@ -0,0 +1,30 @@ +""" +Module manager of database model Article. + +""" +from storage_controller.models import Article, DB_HANDLE + +__all__ = ['ArticleManager'] + + +class ArticleManager: + def __init__(self): + Article.create_table() + + @staticmethod + def create_and_return(structs, source): + """ + Method for creating articles in list in db. Return count of created objects + + :param structs: list of articles structs + :param source: model Source object of feeds source + :type structs: list + :type source: Source + :return: count of new created objects + :rtype: int + """ + count = 0 + for struct in structs: + if Article.from_dict(struct, source) is not None: + count += 1 + return count diff --git a/rss-reader/storage_controller/managers/sourcemanager.py b/rss-reader/storage_controller/managers/sourcemanager.py new file mode 100644 index 0000000..4e2272b --- /dev/null +++ b/rss-reader/storage_controller/managers/sourcemanager.py @@ -0,0 +1,51 @@ +""" +Module manager of database model Article. + +""" +import logging + +from storage_controller.models import Source, DB_HANDLE + +__all__ = ['SourceManager'] + + +class SourceManager: + def __init__(self): + Source.create_table() + + @staticmethod + def get_or_create(url, title): + """ + Method for safe getting a Source model object. + + :param url: string link for init object + :param title: title of feeds source + :type url: str + :type title: str + :return: Source object. If object with such data is founded return it, + else created new object and return it. + :rtype: Source + """ + return Source.get_or_create(url, title=title) + + @staticmethod + def get_articles_with_data_from(url, date): + """ + Method to getting articles with date after a given date. + + :param url: URL-key for getting Source object + :param date: date for query + :type url: str + :type date: str + :return: dict with title of a rss source and founded articles + :rtype dict + """ + logging.info(f"Getting source model") + source = Source.get_or_create(url) + + logging.info(f"Completed. Getting articles from source") + articles = source.sort_by_date(date) + return { + 'title': source.title, + 'articles': articles, + } diff --git a/rss-reader/storage_controller/models.py b/rss-reader/storage_controller/models.py new file mode 100644 index 0000000..766bcb5 --- /dev/null +++ b/rss-reader/storage_controller/models.py @@ -0,0 +1,125 @@ +""" +Module with description of models in database. +Used SQLite3 database and peewee module for work with it. + +""" + +import datetime + +import peewee +import json + +__all__ = ['DB_HANDLE', 'Source', 'Article'] + +STORAGE_DATABASE = 'storage.sqlite3' + +DB_HANDLE = peewee.SqliteDatabase(STORAGE_DATABASE) + + +class Source(peewee.Model): + """ + Model for containing rss feed sources in SQLite database. + + Fields: + title: title of article + url: absolute URL to RSS source + """ + title = peewee.TextField(null=True) + url = peewee.TextField(unique=True) + + class Meta: + database = DB_HANDLE + db_table = "sources" + + @classmethod + def get_or_create(cls, url, title=None): + """ + Rewriten base method of safe getting Source model object. + + :param url: string link for init object + :param title: title of feeds source + :type url: str + :type title: str + :return: Source object. If object with such data is founded return it, + else created new object and return it. + :rtype: Source + """ + try: + return super().get(Source.url == url) + except peewee.DoesNotExist: + return cls.create(url=url, title=title) + + def sort_by_date(self, date): + """ + Method for get list of articles with a date after the given date. + + :param date: datetime for searching articles in string + :type date: str + :return: a list with of articles with a date after the given date + :rtype: list + """ + return self.articles.select().where(Article.pubDate >= date) + + +class Article(peewee.Model): + """ + Model for containing rss feed article in SQLite database. + All objects of this model ordered by pubDate field. + + Fields: + title: title of article + description: description of article + link: absolute URL to article + pubDate: date of publication article + media: all media objects from article + source: absolute URL to containing RSS source + links: all links from article in specified format + """ + title = peewee.TextField() + description = peewee.TextField() + link = peewee.CharField(unique=True) + pubDate = peewee.DateField(formats=["%Y%m%d", ]) + media = peewee.TextField() + source = peewee.ForeignKeyField(Source, backref='articles') + links = peewee.TextField() + + class Meta: + database = DB_HANDLE + db_table = "articles" + order_by = ('-pubDate',) + + @classmethod + def from_dict(cls, struct, source): + """ + Class method for creating Article model object from given dict. + + :param struct: dictionary with info about article + :param source: Source object of source feeds. Used for connect sources with articles + :type struct: dict + :type source: Source + :return: return Article object if no objects in db with such link. Else None + :rtype: Article or None + """ + try: + return cls.create( + title=struct['title'], + description=struct['description'], + link=struct['link'], + pubDate=datetime.datetime.strptime(struct['pubDate'], "%a, %d %b %Y %H:%M:%S %z"), + media=json.dumps(struct['media']), + source=source, + links=json.dumps(struct['links']), + ) + except peewee.IntegrityError: + return None + + def to_dict(self): + return { + 'title': self.title, + 'description': self.description, + 'link': self.link, + 'pubDate': self.pubDate, + 'media': self.media, + 'source': self.source.url, + 'links': self.links, + } From 6c1822c01c5b63f7175648ac138f8684ea14821c Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Mon, 25 Nov 2019 19:18:23 +0300 Subject: [PATCH 09/43] Update version to `1.0` + added args `--to-pdf TO_PDF` and `--to-html TO_HTML` + change signature of main RSSReader method. Update docstrings --- rss-reader/__init__.py | 2 +- rss-reader/rss_reader.py | 67 +++++++++++++--------------------------- 2 files changed, 22 insertions(+), 47 deletions(-) diff --git a/rss-reader/__init__.py b/rss-reader/__init__.py index 824978d..4802e90 100644 --- a/rss-reader/__init__.py +++ b/rss-reader/__init__.py @@ -1 +1 @@ -__version__ = "0.9" +__version__ = "1.0" diff --git a/rss-reader/rss_reader.py b/rss-reader/rss_reader.py index 9add4ad..30135c1 100644 --- a/rss-reader/rss_reader.py +++ b/rss-reader/rss_reader.py @@ -9,6 +9,7 @@ * Logging * Limiting articles * Caching news feeds in SQLite database + * Converting to PDF and HTML formats For information enter "python rss_reader.py -h" @@ -18,28 +19,33 @@ __package__ = 'rss-reader' import datetime -import json import logging import feedparser import argparse from htmlparser import * from storage_controller import * +from output_controller import * class RSSReader: - def __call__(self, source, limit, as_json, date): + def __call__(self, source, limit, date, **kwargs): """ Procedure executing program. Get additional setting parameters and running. :param source: URL for downloading news articles :param limit: limit news topics if this parameter provided - :param as_json: show news articles as JSON :param date: print cached articles by date + :param kwargs: optional parameter for control behavior of method. + Use one from this parameters: + * to_json: bool - output in JSON or not + * to_pdf: str - string filename for output + * to_html: str - string filename for output + Default sample output :type source: str :type limit: int - :type as_json: bool :type date: str + :type kwargs: dict """ if limit and limit < 1: print(f"Error: Impossible parse 0 and less RSS Feeds") @@ -64,10 +70,7 @@ def __call__(self, source, limit, as_json, date): logging.info("All articles was successfully loaded") - if as_json: - self.json_print(articles) - else: - self.sample_print(articles) + OutputController.print(articles, **kwargs) @staticmethod def _get_articles_from_url(source, limit): @@ -87,42 +90,6 @@ def _get_articles_from_url(source, limit): return Parser.parse(response, limit) - @staticmethod - def sample_print(articles): - """ - Procedure for sample output of news articles. - - :param articles: dict with title and list of news articles - :type articles: dict - """ - logging.info("Start sample output") - - if (title := articles.get('title', None)) is not None: - print(f"Feed: {title}\n") - - for article in articles['articles']: - print(f"Title: {article['title']}\n" - f"Date: {article['pubDate']}\n" - f"Link: {article['link']}\n\n" - f"{article['description']}\n\n" - f"Links:") - for link in article['links']: - print(link) - print('################################################################################') - - @staticmethod - def json_print(articles): - """ - Procedure for output articles in JSON format. - - :param articles: dict with title and list of news articles - :type articles: dict - """ - logging.info("Converting all articles to JSON") - data = json.dumps(articles) - logging.info("Completed. Output JSON") - print(data) - def main(): parser = argparse.ArgumentParser() @@ -132,9 +99,17 @@ def main(): parser.add_argument('--verbose', action='store_true', help='Outputs verbose status messages') parser.add_argument('--limit', type=int, help='Limit news topics if this parameter provided') parser.add_argument('--date', type=str, help='Print cached articles by date') + parser.add_argument('--to-pdf', type=str, help='Print result as PDF in entered file') + parser.add_argument('--to-html', type=str, help='Print result as HTML in entered file') settings = parser.parse_args() + output = { + 'to_json': settings.json, + 'to_pdf': settings.to_pdf, + 'to_html': settings.to_html, + } + if settings.version: print(f'RSS Reader {__import__(__package__).__version__}') @@ -144,8 +119,8 @@ def main(): RSSReader()(settings.source, settings.limit, - settings.json, - settings.date) + settings.date, + **output) if __name__ == '__main__': From 4f0ad7cc0a18661e8bcab650fea7658af018fe0e Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Mon, 25 Nov 2019 19:19:15 +0300 Subject: [PATCH 10/43] Update .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index bfa446e..ec0f11c 100644 --- a/.gitignore +++ b/.gitignore @@ -61,3 +61,5 @@ venv.bak/ .mypy_cache/ .idea/ *.sqlite3 +*.html +*.pdf From 708c9200c79dce47d063413da0d98d0106f28398 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Mon, 25 Nov 2019 20:12:48 +0300 Subject: [PATCH 11/43] Change returned info about images in articles in `htmlparser` module + fix bugs + fix imports in `rss_reader` --- rss-reader/htmlparser.py | 34 ++++++++++++++++++++++------------ rss-reader/rss_reader.py | 2 +- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/rss-reader/htmlparser.py b/rss-reader/htmlparser.py index d83025a..1f93b68 100644 --- a/rss-reader/htmlparser.py +++ b/rss-reader/htmlparser.py @@ -84,6 +84,8 @@ class Img(Tag): """ src = None alt = None + width = None + height = None def __str__(self): """ @@ -91,7 +93,7 @@ def __str__(self): :return: string to output tag in the description section """ - return "[Image {}: {} ] ".format('{}', self.alt) + return "[Image {}: {}] ".format('{}', self.alt) def link(self): """ @@ -263,7 +265,7 @@ def _get_all_strings(tag_line): tag_line = tag_line[:start_ind] + tag_line[end_ind:] return strings, tag_line - def _process_description(self, desc): + def _process_description(self, desc, fill_desc=True, fill_links=True): """ Method processing description. Return description of specific format. @@ -284,9 +286,14 @@ def _process_description(self, desc): obj_tag = self._create_tag(parameters) if obj_tag is not None: self._tags.append(obj_tag) - desc = desc[:first_quotes] + (str(obj_tag).format(index_of_tag)) + desc[last_quotes:] - links.append(obj_tag.format_link(index_of_tag)) - + if fill_desc: + desc = desc[:first_quotes] + str(obj_tag).format(index_of_tag) + desc[last_quotes:] + else: + desc = desc[:first_quotes] + desc[last_quotes:] + if fill_links: + links.append(obj_tag.format_link(index_of_tag)) + else: + links.append(obj_tag.link()) index_of_tag += 1 else: desc = desc[:first_quotes] + desc[last_quotes:] @@ -303,24 +310,27 @@ def _article_to_dict(self, article): :rtype: dict """ - description, links = self._process_description(article.description) + dec_description, dec_links = self._process_description(article.description) + description, links = self._process_description(article.description, False, False) images = [obj for obj in self._tags if isinstance(obj, Img)] - media = { - str(i): { - 'src': images[i].src, - 'alt': images[i].alt - } for i in range(len(images)) - } + media = [ + {"src": image.src, + "alt": image.alt, + "width": image.width, + "height": image.height} for image in images + ] result = { 'title': article.title, 'description': description, + 'dec_description': dec_description, 'link': article.link, 'pubDate': article.published, 'media': media, 'links': links, + 'dec_links': dec_links, } return result diff --git a/rss-reader/rss_reader.py b/rss-reader/rss_reader.py index 30135c1..3f26f0f 100644 --- a/rss-reader/rss_reader.py +++ b/rss-reader/rss_reader.py @@ -25,7 +25,7 @@ from htmlparser import * from storage_controller import * -from output_controller import * +from outputcontroller import * class RSSReader: From ccb6b018f113407483bc48cddf1593bf02adbf95 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Mon, 25 Nov 2019 20:14:33 +0300 Subject: [PATCH 12/43] Added converting articles to HTML format + added OutputController + added SamplePrintController + added JSONPrintController. All extends abstract BaseController --- rss-reader/outputcontroller.py | 119 +++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 rss-reader/outputcontroller.py diff --git a/rss-reader/outputcontroller.py b/rss-reader/outputcontroller.py new file mode 100644 index 0000000..ed78206 --- /dev/null +++ b/rss-reader/outputcontroller.py @@ -0,0 +1,119 @@ +import json +import logging +import abc + +import requests + +__all__ = ['OutputController'] + + +class BaseController(abc.ABC): + def print_to(self, articles, **kwargs): + """ + Method of processing articles and title. Without work with files + """ + + def _print_article(self, title): + pass + + def _print_title(self, article): + pass + + +class SamplePrintController(BaseController): + def print_to(self, articles, **kwargs): + """ + Procedure for sample output of news articles. + + :param articles: dict with title and list of news articles + :type articles: dict + """ + logging.info("Start sample output") + if (title := articles.get('title', None)) is not None: + self._print_title(title) + + for article in articles['articles']: + self._print_article(article) + + def _print_article(self, article): + print(f"Title: {article['title']}\n" + f"Date: {article['pubDate']}\n" + f"Link: {article['link']}\n\n" + f"{article['dec_description']}\n\n" + f"Links:") + for link in article['dec_links']: + print(link) + print('################################################################################') + + def _print_title(self, title): + print(f"Feed: {title}\n") + + +class JSONPrintController(BaseController): + def print_to(self, articles, **kwargs): + """ + Procedure for output articles in JSON format. + + :param articles: dict with title and list of news articles + :type articles: dict + """ + logging.info("Converting all articles to JSON") + data = json.dumps(articles) + logging.info("Completed. Output JSON") + print(data) + + +class PDFPrintController(BaseController): + pass + + +class HTMLPrintController(BaseController): + extension = '.html' + + def print_to(self, articles, **kwargs): + print_to = kwargs.get('filename', None) + if print_to is not None and not print_to.endswith(self.extension): + print_to += self.extension + + html_text = f"RSS Feeds" \ + f"

{articles['title']}

" \ + f"{''.join([self._print_article(art) for art in articles['articles']])}" + + with open(print_to, 'w') as file: + file.write(html_text) + + def _print_article(self, article): + result = "

" + result += '{}(Link to original' \ + '

'.format(article['link'], article['title']) + for image in article['media']: + attrs = " ".join([f"{k}=\"{v}\"" for k, v in image.items()]) + result += "
".format(attrs) + result += f'

Published: {article["pubDate"]}

' + result += f'

{article["description"]}


Links:

' + for i in range(len(article['dec_links'])): + result += f'{i + 1}) ' \ + f'{article["dec_links"][i]}
' + result += "
" + return result + + @staticmethod + def _download_to(link, filename): + img_data = requests.get(link).content + with open(filename, 'wb') as handler: + handler.write(img_data) + return filename + + +class OutputController: + @staticmethod + def print(articles, to_pdf=None, to_html=None, to_json=False): + if to_html is not None: + HTMLPrintController().print_to(articles, filename=to_html) + if to_pdf is not None: + PDFPrintController().print_to(articles, filename=to_pdf) + + if to_json: + JSONPrintController().print_to(articles) + else: + SamplePrintController().print_to(articles) From d67280ede71e379c646743be3cba321635be5bdc Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Tue, 26 Nov 2019 18:41:00 +0300 Subject: [PATCH 13/43] Added info about db in README --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index d660c31..7f88e00 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,10 @@ To view the help for running project go to **PythonHomework** folder and execute `python3 rss_reader.py --help` +### SQLite3 +This application uses SQLite3 database to cache all downloaded news articles. +If you use '--date YYYYMMDD' the application will load news articles from the DB with the date after the given date. + ## Tests For run unittest go to **PythonHomework** folder and execute the command: From 09545d0d7ea7f3bbe4766f044ae04e76bb786a9f Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Tue, 26 Nov 2019 18:42:08 +0300 Subject: [PATCH 14/43] Added and fixed all output controllers + write all docstrings --- rss-reader/outputcontroller.py | 284 +++++++++++++++++++++++++++++---- 1 file changed, 257 insertions(+), 27 deletions(-) diff --git a/rss-reader/outputcontroller.py b/rss-reader/outputcontroller.py index ed78206..63732f7 100644 --- a/rss-reader/outputcontroller.py +++ b/rss-reader/outputcontroller.py @@ -1,31 +1,57 @@ import json import logging import abc - +import os +import fpdf import requests __all__ = ['OutputController'] class BaseController(abc.ABC): + """ + Abstract base class for all output controllers. Using such interface for all controllers. + """ + def print_to(self, articles, **kwargs): """ - Method of processing articles and title. Without work with files + Procedure for output of news articles. + + :param articles: dict with title and list of news articles + :param kwargs: optional params. Use to extend a count given params in base method + :type articles: dict + """ + + def _print_article(self, article, **kwargs): """ + Method for output given articles in given PDF file. - def _print_article(self, title): - pass + :param article: article to output + :param kwargs: optional params. Use to extend a count given params in base method + :type article: dict + """ + + def _print_title(self, title, **kwargs): + """ + Method for output given title. - def _print_title(self, article): - pass + :param title: title to output + :param kwargs: optional params. Use to extend a count given params in base method + :type title: str + """ class SamplePrintController(BaseController): + """ + Class controller for sample output in standard out. + """ + def print_to(self, articles, **kwargs): """ Procedure for sample output of news articles. :param articles: dict with title and list of news articles + :param kwargs: optional params. Use to extend a count given params in base method :type articles: dict """ logging.info("Start sample output") @@ -35,7 +61,13 @@ def print_to(self, articles, **kwargs): for article in articles['articles']: self._print_article(article) - def _print_article(self, article): + def _print_article(self, article, **kwargs): + """ + Method for output given articles in given PDF file. + + :param article: articles to output + :param kwargs: optional params. Use to extend a count given params in base method + """ print(f"Title: {article['title']}\n" f"Date: {article['pubDate']}\n" f"Link: {article['link']}\n\n" @@ -45,16 +77,28 @@ def _print_article(self, article): print(link) print('################################################################################') - def _print_title(self, title): + def _print_title(self, title, **kwargs): + """ + Method for output given articles in given PDF file. + + :param title: title to output + :param kwargs: optional params. Use to extend a count given params in base method + :type title: str + """ print(f"Feed: {title}\n") class JSONPrintController(BaseController): + """ + Class controller for output JSON form of articles in standard out. + """ + def print_to(self, articles, **kwargs): """ Procedure for output articles in JSON format. :param articles: dict with title and list of news articles + :param kwargs: optional params. Use to extend a count given params in base method :type articles: dict """ logging.info("Converting all articles to JSON") @@ -64,50 +108,236 @@ def print_to(self, articles, **kwargs): class PDFPrintController(BaseController): - pass + """ + Class controller for output given articles in PDF in file. + """ + image_ext = 'jpg' + cache_folder = "__cache__" + extension = '.pdf' + delimiter_before = "_" * 59 + delimiter_after = "%d".rjust(55, '_').ljust(59, '_') + + def print_to(self, articles, **kwargs): + """ + Method for output given articles in given PDF file. + + :param articles: articles to output + :param kwargs: optional params. Use to extend a count given params in base method. + filename - name of output file + :type articles: dict + """ + print_to = kwargs.get('filename', None) + if print_to is not None and not print_to.endswith(self.extension): + print_to += self.extension + + writer = fpdf.FPDF() + writer.add_page() + self._print_title(articles['title'], writer=writer) + + for i, article in enumerate(articles['articles']): + self._print_article(article, writer=writer, ind=i) + + writer.output(print_to) + + def _print_title(self, title, **kwargs): + """ + Method for output given articles in given PDF file. + + :param title: title to output + :param kwargs: optional params. Use to extend a count given params in base method + writer - FPDF object for output in PDF file + :type title: str + """ + writer = kwargs['writer'] + writer.set_font('Courier', 'B', 20) + writer.multi_cell(0, 30, title, align='C') + + def _print_article(self, article, **kwargs): + """ + Method for output one article in PDF. + + :param article: article to output + :param kwargs: optional params. Use to extend a count given params in base method + writer - FPDF object for output in PDF file + ind - sequence number of article + :type article: dict + """ + writer = kwargs['writer'] + ind = kwargs['ind'] + + article = self._clean_each_elem_article(article) + + writer.set_font("Courier", 'B', 15) + writer.multi_cell(0, 10, self.delimiter_before) + + writer.set_font("Courier", "B", 13) + writer.multi_cell(0, 7, f"Title: {article['title']}", align="L") + + writer.set_font("Courier", "BI", 11) + writer.multi_cell(0, 10, f"Date: {article['pubDate']}", align='R') + + for img in article['media']: + self._draw_image(writer, img) + + writer.set_font("Courier", size=12) + writer.multi_cell(0, 5, article['description'], align='L') + + writer.set_font("Courier", "BI", size=9) + writer.multi_cell(0, 10, f"Link: {article['link']}", align='L') + + writer.set_font("Courier", 'B', 15) + writer.multi_cell(0, 10, self.delimiter_after % (ind + 1)) + + def _clean_each_elem_article(self, elem): + """ + Recursive method for cleaning errors with encoding 'latin-1' for output ready text in PDF file. + Go throw all elements of given objects and remove error with encoding 'latin-1'. + + :param elem: current element for checking and removing errors with encoding + :return: recursive call this method if givn object is collection, else string + """ + if type(elem) == str: + return elem.encode('latin-1', 'replace').decode('latin-1') + elif type(elem) == dict: + return {k: self._clean_each_elem_article(v) for k, v in elem.items()} + elif type(elem) == list: + return [self._clean_each_elem_article(el) for el in elem] + + def _draw_image(self, writer, image): + """ + Method for draw image in file by given FPDF writer. + + :param writer: FPDF object for drawing in file + :param image: dict with info about image + :type writer: fpdf.FPDF + :type image: dict + """ + try: + image_name = f"{image['src'].split('/')[-1]}.{self.image_ext}" + image_path = self._download_to(image['src'], image_name) + writer.image(image_path, type=self.image_ext, link=image['src'], x=(writer.w - 50) // 2) + except (ValueError, TypeError, RuntimeError): + writer.set_font("Courier", 'B', 10) + writer.multi_cell(0, 3, f"NO IMAGE: {image['alt']}", align='C') + + def _download_to(self, link, filename): + """ + Method for downloading image by link in given file. Return path to downloaded image. + + :param link: link to image + :param filename: name of file, such will be rewriten. + :type link: str + :type filename: str + :return: absolute path to downloaded image + :rtype: str + """ + if not os.path.exists(os.path.join(self.cache_folder)): + os.mkdir(os.path.join(self.cache_folder)) + img_data = requests.get(link).content + ready_image_path = os.path.join(self.cache_folder, filename) + with open(ready_image_path, 'wb') as handler: + handler.write(img_data) + + return ready_image_path class HTMLPrintController(BaseController): + """ + Class controller for output given articles using HTML in file. + """ extension = '.html' def print_to(self, articles, **kwargs): + """ + Method for output given articles in given file with HTML. + + :param articles: articles to output + :param kwargs: optional params. Use to extend a count given params in base method. + filename - name of output file + :type articles: dict + """ print_to = kwargs.get('filename', None) if print_to is not None and not print_to.endswith(self.extension): print_to += self.extension - html_text = f"RSS Feeds" \ - f"

{articles['title']}

" \ - f"{''.join([self._print_article(art) for art in articles['articles']])}" + html_text = f"" \ + f"" \ + f"" \ + f"" \ + f"RSS Feeds" \ + f"" \ + f"" \ + f"

{articles['title']}

" \ + f"{''.join([self._print_article(art) for art in articles['articles']])}" \ + f"" \ + f"" with open(print_to, 'w') as file: file.write(html_text) - def _print_article(self, article): - result = "

" - result += '{}(Link to original' \ + def _print_article(self, article, **kwargs): + """ + Method for output one article use HTML. + + :param article: article to output + :param kwargs: optional params. Use to extend a count given params in base method + :type article: dict + :return: string with html version of given article + :rtype: str + """ + result = "
" \ + "

" + result += '' \ + '{} (Link to original)' \ + '' \ '

'.format(article['link'], article['title']) for image in article['media']: attrs = " ".join([f"{k}=\"{v}\"" for k, v in image.items()]) - result += "
".format(attrs) - result += f'

Published: {article["pubDate"]}

' - result += f'

{article["description"]}


Links:

' + result += "
" \ + "".format(attrs) + result += f'

' \ + f'Published: {article["pubDate"]}' \ + f'

' + result += f'

' \ + f'{article["description"]}' \ + f'

' \ + f'
' \ + f'

' \ + f'Links:' \ + f'

' for i in range(len(article['dec_links'])): - result += f'{i + 1}) ' \ - f'{article["dec_links"][i]}
' + result += f'' \ + f'{article["dec_links"][i]}' \ + f'' \ + f'
' result += "
" return result - @staticmethod - def _download_to(link, filename): - img_data = requests.get(link).content - with open(filename, 'wb') as handler: - handler.write(img_data) - return filename - class OutputController: + """ + Main OutputController class such working with all controllers in this module. + """ + @staticmethod def print(articles, to_pdf=None, to_html=None, to_json=False): + """ + Method for the choice and run procedure of output given articles. + The output method depends on a given parameters. + HTML output - to_html = 'filename' + PDF output - to_pdf = 'filename' + JSON output - to_json = True + Default start sample output. + + :param articles: articles for output + :param to_pdf: filename for output in PDF + :param to_html: filename for output using HTML + :param to_json: Output a given articles in JSON format + :type articles: dict + :type to_pdf: str + :type to_html: str + :type to_json: bool + """ if to_html is not None: HTMLPrintController().print_to(articles, filename=to_html) if to_pdf is not None: From d902a68d4d809ab59c6adc39add9951ef39af448 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Tue, 26 Nov 2019 18:43:35 +0300 Subject: [PATCH 15/43] Clear __init__.py with PEP8 + added cache folder in gitignore --- .gitignore | 1 + __init__.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index ec0f11c..94a37c8 100644 --- a/.gitignore +++ b/.gitignore @@ -63,3 +63,4 @@ venv.bak/ *.sqlite3 *.html *.pdf +__cache__/ diff --git a/__init__.py b/__init__.py index 8b13789..e69de29 100644 --- a/__init__.py +++ b/__init__.py @@ -1 +0,0 @@ - From 4ae0a61330918cdfa17533eb031d617719795cdc Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Tue, 26 Nov 2019 18:45:06 +0300 Subject: [PATCH 16/43] Optimize method `create_and_return` in articlemanager --- rss-reader/storage_controller/managers/articlemanager.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/rss-reader/storage_controller/managers/articlemanager.py b/rss-reader/storage_controller/managers/articlemanager.py index 5bda5e8..346b8e7 100644 --- a/rss-reader/storage_controller/managers/articlemanager.py +++ b/rss-reader/storage_controller/managers/articlemanager.py @@ -23,8 +23,4 @@ def create_and_return(structs, source): :return: count of new created objects :rtype: int """ - count = 0 - for struct in structs: - if Article.from_dict(struct, source) is not None: - count += 1 - return count + return len([art for struct in structs if (art := Article.from_struct(struct, source)) is not None]) From 906d985e97d7422e6ec65f92feefc70f4cdd88bb Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Tue, 26 Nov 2019 18:49:21 +0300 Subject: [PATCH 17/43] Change behavior of Article model. Changes: * Moved loading/saving data control logic to Article model from controller * Extend model Article with 2 fields `dec_description` and `dec_links` * Rewrite docstrings with PEP8 --- rss-reader/storage_controller/controller.py | 56 --------------------- rss-reader/storage_controller/models.py | 35 ++++++++++--- 2 files changed, 28 insertions(+), 63 deletions(-) diff --git a/rss-reader/storage_controller/controller.py b/rss-reader/storage_controller/controller.py index d3ee9db..5f810cd 100644 --- a/rss-reader/storage_controller/controller.py +++ b/rss-reader/storage_controller/controller.py @@ -2,10 +2,7 @@ Module with controllers for work with db. """ -import json import logging -import datetime - from peewee import PeeweeException from storage_controller.managers import * @@ -49,15 +46,6 @@ def load(self, url, date, limit): if limit is not None: articles['articles'] = [article for i, article in enumerate(articles['articles']) if i < limit] - logging.info(f"Completed. Converting date each article") - articles['articles'] = [self._convert_date(article) for article in articles['articles']] - - logging.info(f"Completed. Load from JSON links each article") - articles['articles'] = [self._load_links(article) for article in articles['articles']] - - logging.info(f"Completed. Load from JSON media each article") - articles['articles'] = [self._load_media(article) for article in articles['articles']] - logging.info(f"Completed. Convert to dict each article") articles['articles'] = [article.to_dict() for article in articles['articles']] @@ -84,47 +72,3 @@ def save(self, url, articles, title): logging.info(f"Completed. Saving articles in chosen source model") count = self.articles.create_and_return(articles, source) return count - - @staticmethod - def _load_media(article): - """ - Method for converting media of a given article from JSON. - - :param article: article with media in JSON - :type article: Article - :return: article with correct media - :rtype: Article - """ - article.media = json.loads(article.media) - return article - - @staticmethod - def _load_links(article): - """ - Method for converting links of a given article from JSON. - - :param article: article with links in JSON - :type article: Article - :return: article with correct links - :rtype: Article - """ - article.links = json.loads(article.links) - return article - - @staticmethod - def _convert_date(article, from_fmt="%Y-%m-%d", to_fmt="%a, %d %b %Y"): - """ - Method for converting date of a given article to specific format. - - :param article: article with incorrect format of date - :param from_fmt: optional parameter. Format to convert from - :param to_fmt: optional parameter. Format to convert to - :type article: Article - :type from_fmt: str - :type to_fmt: str - :return: Article object with correct format of date - :rtype: Article - """ - _date = datetime.datetime.strptime(article.pubDate, from_fmt) - article.pubDate = datetime.datetime.strftime(_date, to_fmt) - return article diff --git a/rss-reader/storage_controller/models.py b/rss-reader/storage_controller/models.py index 766bcb5..2c787ab 100644 --- a/rss-reader/storage_controller/models.py +++ b/rss-reader/storage_controller/models.py @@ -69,19 +69,23 @@ class Article(peewee.Model): Fields: title: title of article description: description of article + dec_description: decorated description of article link: absolute URL to article pubDate: date of publication article media: all media objects from article source: absolute URL to containing RSS source - links: all links from article in specified format + links: all links from article without any formatting + dec_links: decorated links from article in special format """ title = peewee.TextField() description = peewee.TextField() + dec_description = peewee.TextField() link = peewee.CharField(unique=True) - pubDate = peewee.DateField(formats=["%Y%m%d", ]) + pubDate = peewee.DateTimeField() media = peewee.TextField() source = peewee.ForeignKeyField(Source, backref='articles') links = peewee.TextField() + dec_links = peewee.TextField() class Meta: database = DB_HANDLE @@ -89,9 +93,11 @@ class Meta: order_by = ('-pubDate',) @classmethod - def from_dict(cls, struct, source): + def from_struct(cls, struct, source): """ Class method for creating Article model object from given dict. + Object creating with safe load a pub date. If RSS feed have no pub date, + the article will be saved with the date of adding to the db. :param struct: dictionary with info about article :param source: Source object of source feeds. Used for connect sources with articles @@ -101,25 +107,40 @@ def from_dict(cls, struct, source): :rtype: Article or None """ try: + if struct['pubDate'] != 'None': + date = datetime.datetime.strptime(struct['pubDate'], "%a, %d %b %Y %H:%M") + else: + date = datetime.datetime.now() + return cls.create( title=struct['title'], description=struct['description'], + dec_description=struct['dec_description'], link=struct['link'], - pubDate=datetime.datetime.strptime(struct['pubDate'], "%a, %d %b %Y %H:%M:%S %z"), + pubDate=date, media=json.dumps(struct['media']), source=source, links=json.dumps(struct['links']), + dec_links=json.dumps(struct['dec_links']) ) except peewee.IntegrityError: return None def to_dict(self): + """ + Method for converting model objects to dict with all info. + + :return: dict with article info + :rtype: dict + """ return { 'title': self.title, 'description': self.description, + 'dec_description': self.dec_description, 'link': self.link, - 'pubDate': self.pubDate, - 'media': self.media, + 'pubDate': self.pubDate.strftime("%a, %d %b %Y %H:%M"), + 'media': json.loads(self.media), 'source': self.source.url, - 'links': self.links, + 'links': json.loads(self.links), + 'dec_links': json.loads(self.dec_links), } From a15613aaa128ee2f42f7599e7f0823e7c101ec9e Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Tue, 26 Nov 2019 18:55:44 +0300 Subject: [PATCH 18/43] Added new fields of Article model in htmlparser + optimized processing sequence + change method work with field pubDate --- rss-reader/htmlparser.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/rss-reader/htmlparser.py b/rss-reader/htmlparser.py index 1f93b68..dc0ccf2 100644 --- a/rss-reader/htmlparser.py +++ b/rss-reader/htmlparser.py @@ -1,6 +1,7 @@ import logging from abc import ABC, abstractmethod import html +import datetime __all__ = ['Parser'] @@ -156,18 +157,19 @@ def parse(self, response, limit): return {'title': title, 'articles': articles} - @staticmethod - def _clear_from_html(article): + def _clear_from_html(self, elem): """ Method to clear html escapes from all fields of article. - :param article: article to clear from HTML escapes + :param elem: article to clear from HTML escapes :return: clean article """ - for k, v in article.items(): - article[k] = html.unescape(v) - - return article + if type(elem) == str: + return html.unescape(elem) + elif type(elem) == dict: + return {k: self._clear_from_html(v) for k, v in elem.items()} + elif type(elem) == list: + return [self._clear_from_html(el) for el in elem] @staticmethod def _get_limited_articles(response, limit): @@ -322,12 +324,17 @@ def _article_to_dict(self, article): "height": image.height} for image in images ] + try: + date = datetime.datetime(*article.published_parsed[:6]).strftime("%a, %d %b %Y %H:%M") + except (AttributeError, ValueError): + date = 'None' + result = { 'title': article.title, 'description': description, 'dec_description': dec_description, 'link': article.link, - 'pubDate': article.published, + 'pubDate': date, 'media': media, 'links': links, 'dec_links': dec_links, From 4957b1c13cd7d6843c29f890ec8fb09774040e63 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Tue, 26 Nov 2019 18:57:30 +0300 Subject: [PATCH 19/43] Completed Iteration 4 Fixed docstring in rss_reader --- rss-reader/rss_reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rss-reader/rss_reader.py b/rss-reader/rss_reader.py index 3f26f0f..7150099 100644 --- a/rss-reader/rss_reader.py +++ b/rss-reader/rss_reader.py @@ -36,12 +36,12 @@ def __call__(self, source, limit, date, **kwargs): :param source: URL for downloading news articles :param limit: limit news topics if this parameter provided :param date: print cached articles by date - :param kwargs: optional parameter for control behavior of method. + :param kwargs: optional parameter for control behavior of output method. Use one from this parameters: * to_json: bool - output in JSON or not * to_pdf: str - string filename for output * to_html: str - string filename for output - Default sample output + Default start sample output :type source: str :type limit: int :type date: str From cb4eb466fd5576796b3d57d089b3308f471299fe Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Wed, 27 Nov 2019 01:06:48 +0300 Subject: [PATCH 20/43] Fix setup.py --- setup.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 7b0ec7d..ff89ca3 100644 --- a/setup.py +++ b/setup.py @@ -13,15 +13,14 @@ long_description_content_type='text/markdown', author="Pivovar Sergey", author_email="pivovar-ser-leon@inbox.ru", - url="https://github.com/TeRRoRlsT/PythonHomework.git", + url="https://github.com/TeRRoRlsT/Pygit sthonHomework.git", packages=find_packages(), python_requires='>=3.8', - install_requires=['argparse', 'logging', 'feedparser', 'htmlparser', 'json'], + install_requires=['feedparser', 'requests', 'fpdf', 'peewee'], classifiers=[ - "Intended Audience :: Developers", "Operating System :: OS Independent", "Programming Language :: Python :: 3", ], From 7c4d68fc08601da5bdcf055756ee081a9b92e6e9 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Wed, 27 Nov 2019 01:11:56 +0300 Subject: [PATCH 21/43] fix setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ff89ca3..25c9967 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ long_description_content_type='text/markdown', author="Pivovar Sergey", author_email="pivovar-ser-leon@inbox.ru", - url="https://github.com/TeRRoRlsT/Pygit sthonHomework.git", + url="https://github.com/TeRRoRlsT/PythonHomework.git", packages=find_packages(), From 0a7e75284e7d06093adbcc167681dc6556269707 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Wed, 27 Nov 2019 18:24:05 +0300 Subject: [PATCH 22/43] Added param `--colorize` with description --- rss-reader/rss_reader.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rss-reader/rss_reader.py b/rss-reader/rss_reader.py index 7150099..eb4aebf 100644 --- a/rss-reader/rss_reader.py +++ b/rss-reader/rss_reader.py @@ -10,6 +10,7 @@ * Limiting articles * Caching news feeds in SQLite database * Converting to PDF and HTML formats + * Print in colorize mode For information enter "python rss_reader.py -h" @@ -41,6 +42,7 @@ def __call__(self, source, limit, date, **kwargs): * to_json: bool - output in JSON or not * to_pdf: str - string filename for output * to_html: str - string filename for output + * colorize: bool - print the result in colorized mode Default start sample output :type source: str :type limit: int @@ -101,10 +103,12 @@ def main(): parser.add_argument('--date', type=str, help='Print cached articles by date') parser.add_argument('--to-pdf', type=str, help='Print result as PDF in entered file') parser.add_argument('--to-html', type=str, help='Print result as HTML in entered file') + parser.add_argument('--colorize', action='store_true', help='Print the result of the utility in colorized mode') settings = parser.parse_args() output = { + 'colorize': settings.colorize, 'to_json': settings.json, 'to_pdf': settings.to_pdf, 'to_html': settings.to_html, From b67a53720522d45e55917d7e6ed68f3ac48881f9 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Wed, 27 Nov 2019 18:34:44 +0300 Subject: [PATCH 23/43] Added implementation for standard output in colorized mode + update docstrings with PEP8 --- rss-reader/outputcontroller.py | 84 ++++++++++++++++++++++++++-------- 1 file changed, 64 insertions(+), 20 deletions(-) diff --git a/rss-reader/outputcontroller.py b/rss-reader/outputcontroller.py index 63732f7..2ee16ef 100644 --- a/rss-reader/outputcontroller.py +++ b/rss-reader/outputcontroller.py @@ -1,14 +1,28 @@ +""" +Module for output the result of the utility and printing in different formats. +Recommend use only class OutputController with parameters: + * to_json: bool - output in JSON or not + * to_pdf: str - string filename for output + * to_html: str - string filename for output + * colorize: bool - print the result in colorized mode +Default start sample output. + +""" +import os import json import logging -import abc -import os -import fpdf -import requests +from abc import ABC +from fpdf import FPDF +from requests import get +from colorama import init, Style, Fore + +# Initialization colorama for colorized output +init() __all__ = ['OutputController'] -class BaseController(abc.ABC): +class BaseController(ABC): """ Abstract base class for all output controllers. Using such interface for all controllers. """ @@ -52,29 +66,53 @@ def print_to(self, articles, **kwargs): :param articles: dict with title and list of news articles :param kwargs: optional params. Use to extend a count given params in base method + colorize: bool - print the result of the utility in colorized mode :type articles: dict """ logging.info("Start sample output") if (title := articles.get('title', None)) is not None: - self._print_title(title) + self._print_title(title, colorize=kwargs.get('colorize', False)) for article in articles['articles']: - self._print_article(article) + self._print_article(article, colorize=kwargs.get('colorize', False)) def _print_article(self, article, **kwargs): """ - Method for output given articles in given PDF file. + Method for output given articles in standard out. :param article: articles to output :param kwargs: optional params. Use to extend a count given params in base method + colorize: bool - print the result of the utility in colorized mode + :type article: dict """ - print(f"Title: {article['title']}\n" - f"Date: {article['pubDate']}\n" - f"Link: {article['link']}\n\n" - f"{article['dec_description']}\n\n" - f"Links:") + + if kwargs.get('colorize', False): + output = "{}Title:{} %s\n" \ + "{}Date: {}%s{}\n" \ + "{}Link: {}%s{}\n" \ + "\n" \ + "%s\n" \ + "\n" \ + "{}Links:{}".format(Fore.BLUE, Style.RESET_ALL, + Fore.BLUE, Fore.LIGHTYELLOW_EX, Style.RESET_ALL, + Fore.BLUE, Fore.YELLOW, Style.RESET_ALL, + Fore.BLUE, Style.RESET_ALL) + format_link = "{}%s{}" + else: + output = "Title: %s\n" \ + "Date: %s\n" \ + "Link: %s\n" \ + "\n" \ + "%s\n" \ + "\n" \ + "Links:" + format_link = "%s" + + print(output % (article['title'], article['pubDate'], article['link'], article['dec_description'])) + for link in article['dec_links']: - print(link) + params = (Fore.YELLOW, Style.RESET_ALL) if link.endswith('(link)') else (Fore.GREEN, Style.RESET_ALL) + print((format_link % link).format(*params)) print('################################################################################') def _print_title(self, title, **kwargs): @@ -83,9 +121,13 @@ def _print_title(self, title, **kwargs): :param title: title to output :param kwargs: optional params. Use to extend a count given params in base method + colorize: bool - print the result of the utility in colorized mode :type title: str """ - print(f"Feed: {title}\n") + if kwargs.get('colorize', False): + print(f"%sFeed: %s{title}%s\n" % (Fore.BLUE, Fore.LIGHTMAGENTA_EX, Style.RESET_ALL)) + else: + print(f"Feed: {title}\n") class JSONPrintController(BaseController): @@ -130,7 +172,7 @@ def print_to(self, articles, **kwargs): if print_to is not None and not print_to.endswith(self.extension): print_to += self.extension - writer = fpdf.FPDF() + writer = FPDF() writer.add_page() self._print_title(articles['title'], writer=writer) @@ -233,7 +275,7 @@ def _download_to(self, link, filename): """ if not os.path.exists(os.path.join(self.cache_folder)): os.mkdir(os.path.join(self.cache_folder)) - img_data = requests.get(link).content + img_data = get(link).content ready_image_path = os.path.join(self.cache_folder, filename) with open(ready_image_path, 'wb') as handler: handler.write(img_data) @@ -320,7 +362,7 @@ class OutputController: """ @staticmethod - def print(articles, to_pdf=None, to_html=None, to_json=False): + def print(articles, to_pdf=None, to_html=None, to_json=False, colorize=False): """ Method for the choice and run procedure of output given articles. The output method depends on a given parameters. @@ -332,11 +374,13 @@ def print(articles, to_pdf=None, to_html=None, to_json=False): :param articles: articles for output :param to_pdf: filename for output in PDF :param to_html: filename for output using HTML - :param to_json: Output a given articles in JSON format + :param to_json: Print given articles in JSON format + :param colorize: Print result in colorized mode :type articles: dict :type to_pdf: str :type to_html: str :type to_json: bool + :type colorize: bool """ if to_html is not None: HTMLPrintController().print_to(articles, filename=to_html) @@ -346,4 +390,4 @@ def print(articles, to_pdf=None, to_html=None, to_json=False): if to_json: JSONPrintController().print_to(articles) else: - SamplePrintController().print_to(articles) + SamplePrintController().print_to(articles, colorize=colorize) From 71b35707c63a252643226353514daa6b5f7e6387 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Wed, 27 Nov 2019 18:35:35 +0300 Subject: [PATCH 24/43] Added package `colorama` in setup --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 25c9967..b4d51a2 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ packages=find_packages(), python_requires='>=3.8', - install_requires=['feedparser', 'requests', 'fpdf', 'peewee'], + install_requires=['feedparser', 'requests', 'fpdf', 'peewee', 'colorama'], classifiers=[ "Operating System :: OS Independent", From 57bd2d2be66e1da304a3c22f8e6372469ad97235 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Wed, 27 Nov 2019 18:36:24 +0300 Subject: [PATCH 25/43] update version to `1.1` --- rss-reader/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rss-reader/__init__.py b/rss-reader/__init__.py index 4802e90..f901408 100644 --- a/rss-reader/__init__.py +++ b/rss-reader/__init__.py @@ -1 +1 @@ -__version__ = "1.0" +__version__ = "1.1" From 78530dbbb9eee0e8d36722232833f1cd983f1f59 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Wed, 27 Nov 2019 20:53:47 +0300 Subject: [PATCH 26/43] Complete Iteration 5 + Optimize all imports --- rss-reader/htmlparser.py | 4 ++-- rss-reader/outputcontroller.py | 5 +++-- rss-reader/rss_reader.py | 5 +++-- rss-reader/storage_controller/controller.py | 1 + rss-reader/storage_controller/models.py | 2 +- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/rss-reader/htmlparser.py b/rss-reader/htmlparser.py index dc0ccf2..4fae2ec 100644 --- a/rss-reader/htmlparser.py +++ b/rss-reader/htmlparser.py @@ -1,7 +1,7 @@ +import datetime +import html import logging from abc import ABC, abstractmethod -import html -import datetime __all__ = ['Parser'] diff --git a/rss-reader/outputcontroller.py b/rss-reader/outputcontroller.py index 2ee16ef..2c0cf04 100644 --- a/rss-reader/outputcontroller.py +++ b/rss-reader/outputcontroller.py @@ -8,13 +8,14 @@ Default start sample output. """ -import os import json import logging +import os from abc import ABC + +from colorama import init, Style, Fore from fpdf import FPDF from requests import get -from colorama import init, Style, Fore # Initialization colorama for colorized output init() diff --git a/rss-reader/rss_reader.py b/rss-reader/rss_reader.py index eb4aebf..43a9e34 100644 --- a/rss-reader/rss_reader.py +++ b/rss-reader/rss_reader.py @@ -19,14 +19,15 @@ """ __package__ = 'rss-reader' +import argparse import datetime import logging + import feedparser -import argparse from htmlparser import * -from storage_controller import * from outputcontroller import * +from storage_controller import * class RSSReader: diff --git a/rss-reader/storage_controller/controller.py b/rss-reader/storage_controller/controller.py index 5f810cd..211ae60 100644 --- a/rss-reader/storage_controller/controller.py +++ b/rss-reader/storage_controller/controller.py @@ -3,6 +3,7 @@ """ import logging + from peewee import PeeweeException from storage_controller.managers import * diff --git a/rss-reader/storage_controller/models.py b/rss-reader/storage_controller/models.py index 2c787ab..d971463 100644 --- a/rss-reader/storage_controller/models.py +++ b/rss-reader/storage_controller/models.py @@ -5,9 +5,9 @@ """ import datetime +import json import peewee -import json __all__ = ['DB_HANDLE', 'Source', 'Article'] From a3bcf80b688f6bcd65596f47b4f0ce00490c9cfd Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Wed, 27 Nov 2019 22:26:08 +0300 Subject: [PATCH 27/43] Rename modules and packages with PEP8 --- rss-reader/{htmlparser.py => html_parser.py} | 0 rss-reader/{outputcontroller.py => output_controller.py} | 0 rss-reader/rss_reader.py | 6 +++--- rss-reader/storage/__init__.py | 1 + rss-reader/{storage_controller => storage}/controller.py | 2 +- rss-reader/storage/managers/__init__.py | 2 ++ .../managers/article_manager.py} | 2 +- .../sourcemanager.py => storage/managers/source_manager.py} | 2 +- rss-reader/{storage_controller => storage}/models.py | 0 rss-reader/storage_controller/__init__.py | 1 - rss-reader/storage_controller/managers/__init__.py | 2 -- 11 files changed, 9 insertions(+), 9 deletions(-) rename rss-reader/{htmlparser.py => html_parser.py} (100%) rename rss-reader/{outputcontroller.py => output_controller.py} (100%) create mode 100644 rss-reader/storage/__init__.py rename rss-reader/{storage_controller => storage}/controller.py (98%) create mode 100644 rss-reader/storage/managers/__init__.py rename rss-reader/{storage_controller/managers/articlemanager.py => storage/managers/article_manager.py} (92%) rename rss-reader/{storage_controller/managers/sourcemanager.py => storage/managers/source_manager.py} (95%) rename rss-reader/{storage_controller => storage}/models.py (100%) delete mode 100644 rss-reader/storage_controller/__init__.py delete mode 100644 rss-reader/storage_controller/managers/__init__.py diff --git a/rss-reader/htmlparser.py b/rss-reader/html_parser.py similarity index 100% rename from rss-reader/htmlparser.py rename to rss-reader/html_parser.py diff --git a/rss-reader/outputcontroller.py b/rss-reader/output_controller.py similarity index 100% rename from rss-reader/outputcontroller.py rename to rss-reader/output_controller.py diff --git a/rss-reader/rss_reader.py b/rss-reader/rss_reader.py index 43a9e34..39a6851 100644 --- a/rss-reader/rss_reader.py +++ b/rss-reader/rss_reader.py @@ -25,9 +25,9 @@ import feedparser -from htmlparser import * -from outputcontroller import * -from storage_controller import * +from html_parser import * +from output_controller import * +from storage import * class RSSReader: diff --git a/rss-reader/storage/__init__.py b/rss-reader/storage/__init__.py new file mode 100644 index 0000000..999c7ba --- /dev/null +++ b/rss-reader/storage/__init__.py @@ -0,0 +1 @@ +from storage.controller import * diff --git a/rss-reader/storage_controller/controller.py b/rss-reader/storage/controller.py similarity index 98% rename from rss-reader/storage_controller/controller.py rename to rss-reader/storage/controller.py index 211ae60..51f547f 100644 --- a/rss-reader/storage_controller/controller.py +++ b/rss-reader/storage/controller.py @@ -6,7 +6,7 @@ from peewee import PeeweeException -from storage_controller.managers import * +from storage.managers import * __all__ = ['StorageController'] diff --git a/rss-reader/storage/managers/__init__.py b/rss-reader/storage/managers/__init__.py new file mode 100644 index 0000000..26d7a36 --- /dev/null +++ b/rss-reader/storage/managers/__init__.py @@ -0,0 +1,2 @@ +from .article_manager import ArticleManager, DB_HANDLE +from .source_manager import SourceManager, DB_HANDLE diff --git a/rss-reader/storage_controller/managers/articlemanager.py b/rss-reader/storage/managers/article_manager.py similarity index 92% rename from rss-reader/storage_controller/managers/articlemanager.py rename to rss-reader/storage/managers/article_manager.py index 346b8e7..9d8808a 100644 --- a/rss-reader/storage_controller/managers/articlemanager.py +++ b/rss-reader/storage/managers/article_manager.py @@ -2,7 +2,7 @@ Module manager of database model Article. """ -from storage_controller.models import Article, DB_HANDLE +from storage.models import Article, DB_HANDLE __all__ = ['ArticleManager'] diff --git a/rss-reader/storage_controller/managers/sourcemanager.py b/rss-reader/storage/managers/source_manager.py similarity index 95% rename from rss-reader/storage_controller/managers/sourcemanager.py rename to rss-reader/storage/managers/source_manager.py index 4e2272b..ef37d36 100644 --- a/rss-reader/storage_controller/managers/sourcemanager.py +++ b/rss-reader/storage/managers/source_manager.py @@ -4,7 +4,7 @@ """ import logging -from storage_controller.models import Source, DB_HANDLE +from storage.models import Source, DB_HANDLE __all__ = ['SourceManager'] diff --git a/rss-reader/storage_controller/models.py b/rss-reader/storage/models.py similarity index 100% rename from rss-reader/storage_controller/models.py rename to rss-reader/storage/models.py diff --git a/rss-reader/storage_controller/__init__.py b/rss-reader/storage_controller/__init__.py deleted file mode 100644 index 323031d..0000000 --- a/rss-reader/storage_controller/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from storage_controller.controller import * diff --git a/rss-reader/storage_controller/managers/__init__.py b/rss-reader/storage_controller/managers/__init__.py deleted file mode 100644 index 7ff6ec0..0000000 --- a/rss-reader/storage_controller/managers/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .articlemanager import ArticleManager, DB_HANDLE -from .sourcemanager import SourceManager, DB_HANDLE From 09814348b4e73571093be150697ac229d080a755 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Thu, 28 Nov 2019 00:58:21 +0300 Subject: [PATCH 28/43] Changed way to access fields in the response struct --- rss-reader/html_parser.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/rss-reader/html_parser.py b/rss-reader/html_parser.py index 4fae2ec..77b44ce 100644 --- a/rss-reader/html_parser.py +++ b/rss-reader/html_parser.py @@ -94,7 +94,7 @@ def __str__(self): :return: string to output tag in the description section """ - return "[Image {}: {}] ".format('{}', self.alt) + return "[Image {}: %s] " % self.alt def link(self): """ @@ -153,7 +153,7 @@ def parse(self, response, limit): articles = [self._clear_from_html(article) for article in nice_articles] logging.info("Getting a RSS source title") - title = response.feed.title + title = response['feed']['title'] return {'title': title, 'articles': articles} @@ -184,7 +184,7 @@ def _get_limited_articles(response, limit): :return: news articles of limited length :rtype: dict """ - result = response.entries + result = response['entries'] if limit is not None: logging.info(f"Completed. Loaded {min(limit, len(result))} articles with limit {limit}") return result[0:min(limit, len(result))] @@ -312,8 +312,8 @@ def _article_to_dict(self, article): :rtype: dict """ - dec_description, dec_links = self._process_description(article.description) - description, links = self._process_description(article.description, False, False) + dec_description, dec_links = self._process_description(article['description']) + description, links = self._process_description(article['description'], False, False) images = [obj for obj in self._tags if isinstance(obj, Img)] @@ -325,15 +325,15 @@ def _article_to_dict(self, article): ] try: - date = datetime.datetime(*article.published_parsed[:6]).strftime("%a, %d %b %Y %H:%M") + date = datetime.datetime(*article['published_parsed'][:6]).strftime("%a, %d %b %Y %H:%M") except (AttributeError, ValueError): date = 'None' result = { - 'title': article.title, + 'title': article['title'], 'description': description, 'dec_description': dec_description, - 'link': article.link, + 'link': article['link'], 'pubDate': date, 'media': media, 'links': links, From 1417f036d91da6f5e23cda6f40911821e913aab7 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Thu, 28 Nov 2019 02:24:20 +0300 Subject: [PATCH 29/43] Update and optimize methods of module `html_parser` --- rss-reader/html_parser.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rss-reader/html_parser.py b/rss-reader/html_parser.py index 77b44ce..9f2b8cf 100644 --- a/rss-reader/html_parser.py +++ b/rss-reader/html_parser.py @@ -167,9 +167,11 @@ def _clear_from_html(self, elem): if type(elem) == str: return html.unescape(elem) elif type(elem) == dict: - return {k: self._clear_from_html(v) for k, v in elem.items()} + return {self._clear_from_html(k): self._clear_from_html(v) for k, v in elem.items()} elif type(elem) == list: return [self._clear_from_html(el) for el in elem] + else: + return elem @staticmethod def _get_limited_articles(response, limit): @@ -202,10 +204,8 @@ def _get_next_tag(line): :return: (startpos, endpos) is a position of next tag in line if line have a tag, else None :rtype: tuple or None """ - if line.find('<') != -1: - startpos = line.find('<') - endpos = line.find('>', startpos) + 1 - return startpos, endpos + if (startpos := line.find('<')) != -1 and (endpos := line.find('>')) != -1: + return startpos, endpos + 1 else: return None From b918ffb9f7dacdbef10b489207b2db3700d07a10 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Thu, 28 Nov 2019 12:21:20 +0300 Subject: [PATCH 30/43] Update to v1.2 Updates: * Update README with PEP8 and update code lines * All imports are rewritten to absolute paths from packages. Some imports remained relative to the package * Renamed Source Root to `rssreader` * Fix docstring with PEP8 * Fix setup.py. Now it creates a utility `rss-reader` --- README.md | 14 ++++++++++---- rss-reader/__init__.py | 1 - rss-reader/storage/__init__.py | 1 - rssreader/__init__.py | 1 + {rss-reader => rssreader}/html_parser.py | 0 {rss-reader => rssreader}/output_controller.py | 0 {rss-reader => rssreader}/rss_reader.py | 10 +++++----- rssreader/storage/__init__.py | 1 + {rss-reader => rssreader}/storage/controller.py | 2 +- .../storage/managers/__init__.py | 0 .../storage/managers/article_manager.py | 2 +- .../storage/managers/source_manager.py | 2 +- {rss-reader => rssreader}/storage/models.py | 0 setup.py | 11 +++++++---- 14 files changed, 27 insertions(+), 18 deletions(-) delete mode 100644 rss-reader/__init__.py delete mode 100644 rss-reader/storage/__init__.py create mode 100644 rssreader/__init__.py rename {rss-reader => rssreader}/html_parser.py (100%) rename {rss-reader => rssreader}/output_controller.py (100%) rename {rss-reader => rssreader}/rss_reader.py (96%) create mode 100644 rssreader/storage/__init__.py rename {rss-reader => rssreader}/storage/controller.py (98%) rename {rss-reader => rssreader}/storage/managers/__init__.py (100%) rename {rss-reader => rssreader}/storage/managers/article_manager.py (92%) rename {rss-reader => rssreader}/storage/managers/source_manager.py (96%) rename {rss-reader => rssreader}/storage/models.py (100%) diff --git a/README.md b/README.md index 7f88e00..d2fe208 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,8 @@ This project was created for the EPAM Python Courses 2019. ## Installing +First, this app runs on Python version >=3.8. + ### Download: `git clone https://github.com/TeRRoRlsT/PythonHomework.git` @@ -10,12 +12,16 @@ This project was created for the EPAM Python Courses 2019. ### Setup: Go to repository **PythonHomework** and execute the command: -`python3 setup.py install` +`python3.8 -m pip install .` + +or + +`pip install .` ## Running -To view the help for running project go to **PythonHomework** folder and execute the command: +To view the help for running project go to **PythonHomework/rssreader** folder and execute the command: -`python3 rss_reader.py --help` +`python3.8 rss_reader.py --help` ### SQLite3 This application uses SQLite3 database to cache all downloaded news articles. @@ -24,7 +30,7 @@ If you use '--date YYYYMMDD' the application will load news articles from the DB ## Tests For run unittest go to **PythonHomework** folder and execute the command: -`python3 -m unittests test*` +`python3.8 -m unittest tests` ## Authors * Sergey Pivovar - BSUIR 2019 \ No newline at end of file diff --git a/rss-reader/__init__.py b/rss-reader/__init__.py deleted file mode 100644 index f901408..0000000 --- a/rss-reader/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__version__ = "1.1" diff --git a/rss-reader/storage/__init__.py b/rss-reader/storage/__init__.py deleted file mode 100644 index 999c7ba..0000000 --- a/rss-reader/storage/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from storage.controller import * diff --git a/rssreader/__init__.py b/rssreader/__init__.py new file mode 100644 index 0000000..b2a95f9 --- /dev/null +++ b/rssreader/__init__.py @@ -0,0 +1 @@ +__version__ = "1.2" diff --git a/rss-reader/html_parser.py b/rssreader/html_parser.py similarity index 100% rename from rss-reader/html_parser.py rename to rssreader/html_parser.py diff --git a/rss-reader/output_controller.py b/rssreader/output_controller.py similarity index 100% rename from rss-reader/output_controller.py rename to rssreader/output_controller.py diff --git a/rss-reader/rss_reader.py b/rssreader/rss_reader.py similarity index 96% rename from rss-reader/rss_reader.py rename to rssreader/rss_reader.py index 39a6851..ba15b7d 100644 --- a/rss-reader/rss_reader.py +++ b/rssreader/rss_reader.py @@ -13,11 +13,11 @@ * Print in colorize mode For information enter - "python rss_reader.py -h" + "python3.8 rss_reader -h" in terminal to find more information. """ -__package__ = 'rss-reader' +__package__ = 'rssreader' import argparse import datetime @@ -25,9 +25,9 @@ import feedparser -from html_parser import * -from output_controller import * -from storage import * +from rssreader.html_parser import * +from rssreader.output_controller import * +from rssreader.storage import * class RSSReader: diff --git a/rssreader/storage/__init__.py b/rssreader/storage/__init__.py new file mode 100644 index 0000000..038fb33 --- /dev/null +++ b/rssreader/storage/__init__.py @@ -0,0 +1 @@ +from .controller import * diff --git a/rss-reader/storage/controller.py b/rssreader/storage/controller.py similarity index 98% rename from rss-reader/storage/controller.py rename to rssreader/storage/controller.py index 51f547f..a318ef9 100644 --- a/rss-reader/storage/controller.py +++ b/rssreader/storage/controller.py @@ -6,7 +6,7 @@ from peewee import PeeweeException -from storage.managers import * +from .managers import * __all__ = ['StorageController'] diff --git a/rss-reader/storage/managers/__init__.py b/rssreader/storage/managers/__init__.py similarity index 100% rename from rss-reader/storage/managers/__init__.py rename to rssreader/storage/managers/__init__.py diff --git a/rss-reader/storage/managers/article_manager.py b/rssreader/storage/managers/article_manager.py similarity index 92% rename from rss-reader/storage/managers/article_manager.py rename to rssreader/storage/managers/article_manager.py index 9d8808a..766ca8d 100644 --- a/rss-reader/storage/managers/article_manager.py +++ b/rssreader/storage/managers/article_manager.py @@ -2,7 +2,7 @@ Module manager of database model Article. """ -from storage.models import Article, DB_HANDLE +from rssreader.storage.models import Article, DB_HANDLE __all__ = ['ArticleManager'] diff --git a/rss-reader/storage/managers/source_manager.py b/rssreader/storage/managers/source_manager.py similarity index 96% rename from rss-reader/storage/managers/source_manager.py rename to rssreader/storage/managers/source_manager.py index ef37d36..03749c1 100644 --- a/rss-reader/storage/managers/source_manager.py +++ b/rssreader/storage/managers/source_manager.py @@ -4,7 +4,7 @@ """ import logging -from storage.models import Source, DB_HANDLE +from rssreader.storage.models import Source, DB_HANDLE __all__ = ['SourceManager'] diff --git a/rss-reader/storage/models.py b/rssreader/storage/models.py similarity index 100% rename from rss-reader/storage/models.py rename to rssreader/storage/models.py diff --git a/setup.py b/setup.py index b4d51a2..38b06c4 100644 --- a/setup.py +++ b/setup.py @@ -3,11 +3,9 @@ with open('README.md') as file: LONG_DESCRIPTION = file.read() -PACKAGE = 'rss-reader' - setup( - name=PACKAGE, - version=__import__(PACKAGE).__version__, + name='rss-reader', + version=__import__('rssreader').__version__, description="RSS News Reader for EPAM Python Courses", long_description=LONG_DESCRIPTION, long_description_content_type='text/markdown', @@ -20,6 +18,11 @@ python_requires='>=3.8', install_requires=['feedparser', 'requests', 'fpdf', 'peewee', 'colorama'], + entry_points={ + 'console_scripts': [ + 'rss-reader=rssreader.rss_reader:main', + ] + }, classifiers=[ "Operating System :: OS Independent", "Programming Language :: Python :: 3", From 297dbb8048d5b411cb2558babf675d68243190c2 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Thu, 28 Nov 2019 13:47:58 +0300 Subject: [PATCH 31/43] Changed way to access fields in the response struct in `rss_reader` --- rssreader/rss_reader.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/rssreader/rss_reader.py b/rssreader/rss_reader.py index ba15b7d..082523f 100644 --- a/rssreader/rss_reader.py +++ b/rssreader/rss_reader.py @@ -79,16 +79,17 @@ def __call__(self, source, limit, date, **kwargs): def _get_articles_from_url(source, limit): logging.info("Completed. Check the availability of URL.") - if 'status' not in (response := feedparser.parse(source.strip())) or len(response.entries) == 0: + if 'status' not in (response := feedparser.parse(source.strip())) or len(response['entries']) == 0: print(f"Error: Impossible parse RSS Feeds from url '{source}'") exit(0) logging.info("Completed. Check status code of response.") - if response.status in range(200, 300): - logging.info(f"Status code {response.status}. Getting articles from '{source}' was successful") + if response['status'] in range(200, 300): + print(response) + logging.info(f"Status code {response['status']}. Getting articles from '{source}' was successful") else: - print(f"Error connecting with URL '{source.strip()}' with status code {response.status}.") + print(f"Error connecting with URL '{source.strip()}' with status code {response['status']}.") exit(0) return Parser.parse(response, limit) From 54969ff4de032f7cae583dd6cf57d324a1e4d777 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Thu, 28 Nov 2019 15:07:59 +0300 Subject: [PATCH 32/43] Add return in `RSSReader.__call__` + fix dosctring + optimize module --- rssreader/rss_reader.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/rssreader/rss_reader.py b/rssreader/rss_reader.py index 082523f..532fd41 100644 --- a/rssreader/rss_reader.py +++ b/rssreader/rss_reader.py @@ -46,8 +46,8 @@ def __call__(self, source, limit, date, **kwargs): * colorize: bool - print the result in colorized mode Default start sample output :type source: str - :type limit: int - :type date: str + :type limit: int or None + :type date: str or None :type kwargs: dict """ if limit and limit < 1: @@ -73,7 +73,7 @@ def __call__(self, source, limit, date, **kwargs): logging.info("All articles was successfully loaded") - OutputController.print(articles, **kwargs) + return OutputController.print(articles, **kwargs) @staticmethod def _get_articles_from_url(source, limit): @@ -86,7 +86,6 @@ def _get_articles_from_url(source, limit): logging.info("Completed. Check status code of response.") if response['status'] in range(200, 300): - print(response) logging.info(f"Status code {response['status']}. Getting articles from '{source}' was successful") else: print(f"Error connecting with URL '{source.strip()}' with status code {response['status']}.") From 3020dd41415690ca22eebb1a1dd240b9572ccb7c Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Thu, 28 Nov 2019 17:43:47 +0300 Subject: [PATCH 33/43] Added unittests --- rssreader/html_parser_tests.py | 306 ++++++++++++++++++ rssreader/output_controller_tests.py | 214 ++++++++++++ rssreader/rss_reader_tests.py | 92 ++++++ rssreader/storage/controller_tests.py | 14 + .../storage/managers/article_manager_tests.py | 14 + .../storage/managers/source_manager_tests.py | 14 + rssreader/storage/models_tests.py | 19 ++ rssreader/storage/tests.py | 9 + rssreader/tests.py | 9 + tests.py | 6 + 10 files changed, 697 insertions(+) create mode 100644 rssreader/html_parser_tests.py create mode 100644 rssreader/output_controller_tests.py create mode 100644 rssreader/rss_reader_tests.py create mode 100644 rssreader/storage/controller_tests.py create mode 100644 rssreader/storage/managers/article_manager_tests.py create mode 100644 rssreader/storage/managers/source_manager_tests.py create mode 100644 rssreader/storage/models_tests.py create mode 100644 rssreader/storage/tests.py create mode 100644 rssreader/tests.py create mode 100644 tests.py diff --git a/rssreader/html_parser_tests.py b/rssreader/html_parser_tests.py new file mode 100644 index 0000000..9785ae1 --- /dev/null +++ b/rssreader/html_parser_tests.py @@ -0,0 +1,306 @@ +import html +import time +from unittest import TestCase, main + +from html_parser import A, Img, HTMLParser + +__all__ = ['TestA', 'TestImg', 'TestHTMLParser'] + + +def equals_classes_a(obj1: A, obj2: A): + return obj1.href == obj2.href + + +def equals_classes_img(obj1: Img, obj2: Img): + return obj1.src == obj2.src and \ + obj1.alt == obj2.alt and \ + obj1.width == obj2.width and \ + obj1.height == obj2.height + + +class TestImg(TestCase): + def setUp(self): + self.fields = { + 'src': 'some_src', + 'alt': 'some_alt', + 'style': 'some_style', + 'width': 'some_width', + 'height': 'some_height', + } + self.img = Img(**self.fields) + + def test_create_obj(self): + self.assertTrue(issubclass(type(self.img), Img)) + + self.assertFalse(self.img is Img) + + def test_creating_fields(self): + self.assertEqual(self.img.src, self.fields['src']) + + self.assertEqual(self.img.alt, self.fields['alt']) + + self.assertEqual(self.img.width, self.fields['width']) + + self.assertEqual(self.img.height, self.fields['height']) + + def test_method_link(self): + self.assertEqual(self.img.link(), self.img.src) + + self.assertEqual(self.img.link(), self.fields['src']) + + self.assertNotEqual(self.img.link(), 'src') + + def test_method_format_link(self): + ind = 1 + self.assertEqual(self.img.format_link(ind), f"[{ind}]: {self.img.src} (image)") + + self.assertEqual(self.img.format_link(2), "[2]: some_src (image)") + + self.assertNotEqual(self.img.format_link(3), "[1]: some_src (image)") + + def test_method_str(self): + self.assertEqual(str(self.img), "[Image {}: %s] " % self.img.alt) + + self.assertNotEqual(str(self.img), "[Image {}: {}]") + + +class TestA(TestCase): + def setUp(self): + self.fields = { + 'href': 'some_href', + 'style': 'some_style', + 'align': 'some_align', + } + self.a = A(**self.fields) + + def test_create_obj(self): + self.assertTrue(issubclass(type(self.a), A)) + + self.assertFalse(self.a is A) + + def test_creating_fields(self): + self.assertEqual(self.a.href, self.fields['href']) + + self.assertNotEqual(self.a.href, 'href') + + self.assertEqual(self.a.style, self.fields['style']) + + def test_method_link(self): + self.assertEqual(self.a.link(), self.a.href) + + self.assertEqual(self.a.link(), self.fields['href']) + + self.assertNotEqual(self.a.link(), 'href') + + def test_method_format_link(self): + ind = 1 + self.assertEqual(self.a.format_link(ind), f"[{ind}]: {self.a.href} (link)") + + self.assertEqual(self.a.format_link(2), "[2]: some_href (link)") + + self.assertNotEqual(self.a.format_link(3), "[1]: some_href (link)") + + def test_method_str(self): + self.assertEqual(str(self.a), "[link {}]") + + self.assertNotEqual(str(self.a), "[link ]") + + +class TestHTMLParser(TestCase): + def setUp(self): + date = time.struct_time((2019, 11, 26, 20, 53, 11, 1, 330, 0)) + self.response = { + 'feed': { + 'title': 'Yahoo News - Latest News & Headlines', + }, + 'entries': [{ + 'title': 'Some title', + 'description': '

Alt of image 2Some long description


', + 'link': 'some long link', + 'published_parsed': date, + }] + } + self.article = { + 'title': 'Some title', + 'description': '

Alt of image 2Some long description


', + 'link': 'some long link', + 'published_parsed': date, + } + self.article_parsed = { + 'title': 'Some title', + 'description': 'Some long description', + 'dec_description': '[link 1][Image 2: Alt of image 2] Some long description', + 'link': 'some long link', + 'pubDate': 'Tue, 26 Nov 2019 20:53', + 'media': [{ + 'src': 'some long link to source of image 2', + 'alt': 'Alt of image 2', + 'width': '130', + 'height': '86' + }], + 'links': [ + 'some long link', + 'some long link to source of image 2' + ], + 'dec_links': [ + '[1]: some long link (link)', + '[2]: some long link to source of image 2 (image)' + ] + } + self.response_parsed = { + 'title': 'Yahoo News - Latest News & Headlines', + 'articles': [{ + 'title': 'Some title', + 'description': 'Some long description', + 'dec_description': '[link 1][Image 2: Alt of image 2] Some long description', + 'link': 'some long link', + 'pubDate': 'Tue, 26 Nov 2019 20:53', + 'media': [{ + 'src': 'some long link to source of image 2', + 'alt': 'Alt of image 2', + 'width': '130', + 'height': '86' + }], + 'links': [ + 'some long link', + 'some long link to source of image 2' + ], + 'dec_links': [ + '[1]: some long link (link)', + '[2]: some long link to source of image 2 (image)' + ] + }] + } + + self.parser = HTMLParser() + + def test_parse(self): + self.assertEqual(self.parser.parse(self.response, 1), self.response_parsed) + + def test_clear_from_html(self): + string = 'some_string' + self.assertEqual(self.parser._clear_from_html(html.escape(string)), string) + + dict_with_html_string = { + 1: html.escape('st&ri`ng'), + 'a': html.escape('s"tr>>i@ng\''), + html.escape('s"tr>>i@ng\''): html.escape('s"tr>>i@ng\'') + } + result = { + 1: 'st&ri`ng', + 'a': 's"tr>>i@ng\'', + 's"tr>>i@ng\'': 's"tr>>i@ng\'' + } + self.assertEqual(self.parser._clear_from_html(dict_with_html_string), result) + + list_with_html_string = [ + 1, + html.escape('st&ri`ng'), + 'a', + html.escape('s"tr>>i@ng\'') + ] + result = [ + 1, + 'st&ri`ng', + 'a', + 's"tr>>i@ng\'', + ] + self.assertEqual(self.parser._clear_from_html(list_with_html_string), result) + + def test_get_limited_articles(self): + self.assertEqual(self.parser._get_limited_articles(self.response, 1), [self.article]) + + def test_get_next_tag(self): + line_with_tags = '' + self.assertEqual(self.parser._get_next_tag(line_with_tags), (0, len(line_with_tags))) + + line_with_tags = 'some_alt' + self.assertEqual(self.parser._get_next_tag(line_with_tags), (0, len(line_with_tags))) + + line_with_tags = 'Alt of image 2Some long description


', + 'link': 'some long link', + 'published_parsed': time.struct_time((2019, 11, 26, 20, 53, 11, 1, 330, 0)), + }] + } + self.response_parsed = { + 'title': 'Yahoo News - Latest News & Headlines', + 'articles': [{ + 'title': 'Some title', + 'description': 'Some long description', + 'dec_description': '[link 1][Image 2: Alt of image 2] Some long description', + 'link': 'some long link', + 'pubDate': 'Tue, 26 Nov 2019 20:53', + 'media': [{ + 'src': 'some long link to source of image 2', + 'alt': 'Alt of image 2', + 'width': '130', + 'height': '86' + }], + 'links': [ + 'some long link', + 'some long link to source of image 2' + ], + 'dec_links': [ + '[1]: some long link (link)', + '[2]: some long link to source of image 2 (image)' + ] + }] + } + + def test_get_articles_from_url(self): + limit = 1 + with patch('rssreader.html_parser.Parser.parse') as rssreader_mock: + rssreader_mock.return_value = 'Successful' + with patch('feedparser.parse') as feedparser_mock: + feedparser_mock.return_value = self.response + self.assertEqual(self.reader._get_articles_from_url(self.source, limit), 'Successful') + + feedparser_mock.assert_called_with(self.source.strip()) + rssreader_mock.assert_called_with(self.response, limit) + + def test_call_save(self): + limit = 1 + with patch('rssreader.output_controller.OutputController.print') as print_mock: + print_mock.return_value = True + with patch('rssreader.storage.controller.StorageController.save') as ctrl_save_mock: + ctrl_save_mock.return_value = 1 + with patch('rss_reader.RSSReader._get_articles_from_url') as reader_mock: + reader_mock.return_value = self.response_parsed + self.assertTrue(self.reader(self.source, limit, None)) + + print_mock.assert_called_with(self.response_parsed) + ctrl_save_mock.assert_called_with(self.source, + self.response_parsed['articles'], + self.response_parsed['title']) + reader_mock.assert_called_with(self.source, limit) + + def test_call_load(self): + limit = 1 + date = '20191122' + with patch('rssreader.output_controller.OutputController.print') as print_mock: + print_mock.return_value = True + with patch('rssreader.storage.controller.StorageController.load') as ctrl_load_mock: + ctrl_load_mock.return_value = self.response_parsed + self.assertTrue(self.reader(self.source, limit, date)) + + print_mock.assert_called_with(self.response_parsed) + ctrl_load_mock.assert_called_with(self.source, date, limit) + + +if __name__ == '__main__': + unittest.main() diff --git a/rssreader/storage/controller_tests.py b/rssreader/storage/controller_tests.py new file mode 100644 index 0000000..d67f533 --- /dev/null +++ b/rssreader/storage/controller_tests.py @@ -0,0 +1,14 @@ +import unittest + +from .controller import StorageController + +__all__ = ['StorageController'] + + +class TestStorageController(unittest.TestCase): + def test_something(self): + self.assertEqual(True, True) + + +if __name__ == '__main__': + unittest.main() diff --git a/rssreader/storage/managers/article_manager_tests.py b/rssreader/storage/managers/article_manager_tests.py new file mode 100644 index 0000000..481eff3 --- /dev/null +++ b/rssreader/storage/managers/article_manager_tests.py @@ -0,0 +1,14 @@ +import unittest + +from .article_manager import ArticleManager + +__all__ = ['TestArticleManager'] + + +class TestArticleManager(unittest.TestCase): + def test_something(self): + self.assertEqual(True, True) + + +if __name__ == '__main__': + unittest.main() diff --git a/rssreader/storage/managers/source_manager_tests.py b/rssreader/storage/managers/source_manager_tests.py new file mode 100644 index 0000000..1fb75cc --- /dev/null +++ b/rssreader/storage/managers/source_manager_tests.py @@ -0,0 +1,14 @@ +import unittest + +from .source_manager import SourceManager + +__all__ = ['TestSourceManager'] + + +class TestSourceManager(unittest.TestCase): + def test_something(self): + self.assertEqual(True, True) + + +if __name__ == '__main__': + unittest.main() diff --git a/rssreader/storage/models_tests.py b/rssreader/storage/models_tests.py new file mode 100644 index 0000000..79ec14d --- /dev/null +++ b/rssreader/storage/models_tests.py @@ -0,0 +1,19 @@ +import unittest + +from .models import Source, Source + +__all__ = ['TestSourceModel', 'TestArticleModel'] + + +class TestSourceModel(unittest.TestCase): + def test_something(self): + self.assertEqual(True, True) + + +class TestArticleModel(unittest.TestCase): + def test_something(self): + self.assertEqual(True, True) + + +if __name__ == '__main__': + unittest.main() diff --git a/rssreader/storage/tests.py b/rssreader/storage/tests.py new file mode 100644 index 0000000..40a9623 --- /dev/null +++ b/rssreader/storage/tests.py @@ -0,0 +1,9 @@ +import unittest + +from .models_tests import * +from .controller_tests import * +from .managers.article_manager_tests import * +from .managers.source_manager_tests import * + +if __name__ == '__main__': + unittest.main() diff --git a/rssreader/tests.py b/rssreader/tests.py new file mode 100644 index 0000000..1a831ef --- /dev/null +++ b/rssreader/tests.py @@ -0,0 +1,9 @@ +import unittest + +from .storage.tests import * +from .html_parser_tests import * +from .output_controller_tests import * +from .rss_reader_tests import * + +if __name__ == '__main__': + unittest.main() diff --git a/tests.py b/tests.py new file mode 100644 index 0000000..20355ce --- /dev/null +++ b/tests.py @@ -0,0 +1,6 @@ +import unittest + +from rssreader.tests import * + +if __name__ == '__main__': + unittest.main() From 75e4f94dc0ee537267e9350651128379ad8246ff Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Thu, 28 Nov 2019 19:13:55 +0300 Subject: [PATCH 34/43] Added unittest OutputController --- rssreader/output_controller_tests.py | 57 ++++++++++++---------------- 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/rssreader/output_controller_tests.py b/rssreader/output_controller_tests.py index 073c60a..13c3bd9 100644 --- a/rssreader/output_controller_tests.py +++ b/rssreader/output_controller_tests.py @@ -2,13 +2,11 @@ import unittest from unittest.mock import patch, MagicMock -from output_controller import ( - SamplePrintController, - JSONPrintController, - PDFPrintController, - HTMLPrintController, - OutputController -) +from rssreader.output_controller import (SamplePrintController, + JSONPrintController, + PDFPrintController, + HTMLPrintController, + OutputController) class TestSamplePrintController(unittest.TestCase): @@ -135,12 +133,12 @@ def test_print_to(self): print_mock.assert_called_with(json.dumps(self.articles)) -# class TestPDFPrintController(unittest.TestCase): -# pass +class TestPDFPrintController(unittest.TestCase): + pass -# class TestHTMLPrintController(unittest.TestCase): -# pass +class TestHTMLPrintController(unittest.TestCase): + pass class TestOutputController(unittest.TestCase): @@ -175,39 +173,34 @@ def setUp(self): self.filename = 'filename' def test_print(self): + # Sample with patch('rssreader.output_controller.SamplePrintController.print_to') as chosen_printer: self.assertIsNone(self.controller.print(self.articles)) - self.assertEqual(chosen_printer.print_to.call_count, 1) # (self.articles, colorize=False) - del chosen_printer - - with patch('rssreader.output_controller.JSONPrintController.print_to') as chosen_printer: - self.assertIsNone(self.controller.print(self.articles)) - del chosen_printer - - with patch('rssreader.output_controller.PDFPrintController.print_to') as chosen_printer: - self.assertIsNone(self.controller.print(self.articles)) - del chosen_printer - - with patch('rssreader.output_controller.HTMLPrintController.print_to') as chosen_printer: - self.assertIsNone(self.controller.print(self.articles)) - del chosen_printer + chosen_printer.assert_called_once_with(self.articles, colorize=False) with patch('rssreader.output_controller.SamplePrintController.print_to') as chosen_printer: self.assertIsNone(self.controller.print(self.articles, colorize=True)) - del chosen_printer + chosen_printer.assert_called_once_with(self.articles, colorize=True) + + # JSON with patch('rssreader.output_controller.JSONPrintController.print_to') as chosen_printer: - self.assertIsNone(self.controller.print(self.articles, colorize=True)) - del chosen_printer + self.assertIsNone(self.controller.print(self.articles, to_json=True)) + + chosen_printer.assert_called_once_with(self.articles) + # PDF with patch('rssreader.output_controller.PDFPrintController.print_to') as chosen_printer: - self.assertIsNone(self.controller.print(self.articles, colorize=True)) - del chosen_printer + self.assertIsNone(self.controller.print(self.articles, to_pdf='filename')) + + chosen_printer.assert_called_once_with(self.articles, filename='filename') + # HTML with patch('rssreader.output_controller.HTMLPrintController.print_to') as chosen_printer: - self.assertIsNone(self.controller.print(self.articles, colorize=True)) - del chosen_printer + self.assertIsNone(self.controller.print(self.articles, to_html='filename')) + + chosen_printer.assert_called_once_with(self.articles, filename='filename') if __name__ == '__main__': From 394edba733efc4304ba4b9b95b685e89f03ef4d9 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Thu, 28 Nov 2019 19:41:29 +0300 Subject: [PATCH 35/43] Remove return value from RSSReader.__call__ --- rssreader/rss_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rssreader/rss_reader.py b/rssreader/rss_reader.py index 532fd41..60c0ee9 100644 --- a/rssreader/rss_reader.py +++ b/rssreader/rss_reader.py @@ -73,7 +73,7 @@ def __call__(self, source, limit, date, **kwargs): logging.info("All articles was successfully loaded") - return OutputController.print(articles, **kwargs) + OutputController.print(articles, **kwargs) @staticmethod def _get_articles_from_url(source, limit): From 9e85c65efcc8ffc1437b9661b27b029c416ed6d0 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Thu, 28 Nov 2019 19:46:10 +0300 Subject: [PATCH 36/43] Complete unittests for output_controller module --- rssreader/output_controller_tests.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/rssreader/output_controller_tests.py b/rssreader/output_controller_tests.py index 13c3bd9..ef1c1c1 100644 --- a/rssreader/output_controller_tests.py +++ b/rssreader/output_controller_tests.py @@ -4,8 +4,6 @@ from rssreader.output_controller import (SamplePrintController, JSONPrintController, - PDFPrintController, - HTMLPrintController, OutputController) @@ -133,14 +131,6 @@ def test_print_to(self): print_mock.assert_called_with(json.dumps(self.articles)) -class TestPDFPrintController(unittest.TestCase): - pass - - -class TestHTMLPrintController(unittest.TestCase): - pass - - class TestOutputController(unittest.TestCase): def setUp(self): self.controller = OutputController() @@ -192,14 +182,18 @@ def test_print(self): # PDF with patch('rssreader.output_controller.PDFPrintController.print_to') as chosen_printer: - self.assertIsNone(self.controller.print(self.articles, to_pdf='filename')) + with patch('rssreader.output_controller.SamplePrintController.print_to') as standard_printer: + self.assertIsNone(self.controller.print(self.articles, to_pdf='filename')) + standard_printer.assert_called_once_with(self.articles, colorize=False) chosen_printer.assert_called_once_with(self.articles, filename='filename') # HTML with patch('rssreader.output_controller.HTMLPrintController.print_to') as chosen_printer: - self.assertIsNone(self.controller.print(self.articles, to_html='filename')) + with patch('rssreader.output_controller.SamplePrintController.print_to') as standard_printer: + self.assertIsNone(self.controller.print(self.articles, to_html='filename')) + standard_printer.assert_called_once_with(self.articles, colorize=False) chosen_printer.assert_called_once_with(self.articles, filename='filename') From 2b130ccc7f3909dfc24fb3e3c762aa53d9ad3f3e Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Thu, 28 Nov 2019 21:56:44 +0300 Subject: [PATCH 37/43] fix imports in unittests html_parser_tests --- rssreader/html_parser_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rssreader/html_parser_tests.py b/rssreader/html_parser_tests.py index 9785ae1..f2f96b3 100644 --- a/rssreader/html_parser_tests.py +++ b/rssreader/html_parser_tests.py @@ -2,7 +2,7 @@ import time from unittest import TestCase, main -from html_parser import A, Img, HTMLParser +from rssreader.html_parser import A, Img, HTMLParser __all__ = ['TestA', 'TestImg', 'TestHTMLParser'] From b009b728146c4b9a8020955564ee487e37f2d6a3 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Thu, 28 Nov 2019 22:57:29 +0300 Subject: [PATCH 38/43] Completed unittests rss_reader_tests --- rssreader/rss_reader_tests.py | 49 +++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/rssreader/rss_reader_tests.py b/rssreader/rss_reader_tests.py index 2200ef1..e7675f0 100644 --- a/rssreader/rss_reader_tests.py +++ b/rssreader/rss_reader_tests.py @@ -1,14 +1,15 @@ import time import unittest -from unittest.mock import patch +from unittest.mock import patch, MagicMock -from rss_reader import RSSReader +from rssreader.rss_reader import RSSReader __all__ = ['TestRSSReader'] class TestRSSReader(unittest.TestCase): def setUp(self): + date = time.struct_time((2019, 11, 26, 20, 53, 11, 1, 330, 0)) self.reader = RSSReader() self.source = "https://news.yahoo.com/rss/" self.response = { @@ -20,7 +21,7 @@ def setUp(self): 'title': 'Some title', 'description': '

Alt of image 2Some long description


', 'link': 'some long link', - 'published_parsed': time.struct_time((2019, 11, 26, 20, 53, 11, 1, 330, 0)), + 'published_parsed': date, }] } self.response_parsed = { @@ -50,42 +51,46 @@ def setUp(self): def test_get_articles_from_url(self): limit = 1 - with patch('rssreader.html_parser.Parser.parse') as rssreader_mock: - rssreader_mock.return_value = 'Successful' + with patch('rssreader.html_parser.Parser.parse') as html_parser_mock: + html_parser_mock.return_value = 'Successful' with patch('feedparser.parse') as feedparser_mock: feedparser_mock.return_value = self.response self.assertEqual(self.reader._get_articles_from_url(self.source, limit), 'Successful') feedparser_mock.assert_called_with(self.source.strip()) - rssreader_mock.assert_called_with(self.response, limit) + html_parser_mock.assert_called_with(self.response, limit) def test_call_save(self): limit = 1 + self.reader._get_articles_from_url = MagicMock(return_value=self.response_parsed) with patch('rssreader.output_controller.OutputController.print') as print_mock: - print_mock.return_value = True - with patch('rssreader.storage.controller.StorageController.save') as ctrl_save_mock: - ctrl_save_mock.return_value = 1 - with patch('rss_reader.RSSReader._get_articles_from_url') as reader_mock: - reader_mock.return_value = self.response_parsed - self.assertTrue(self.reader(self.source, limit, None)) + + # Time for crutches :) + with patch('rssreader.storage.controller.StorageController.__init__') as crutch: + crutch.return_value = None # remove creating db file + # How to replace a class object on MagicMock without crutch? + + with patch('rssreader.storage.controller.StorageController.save') as storage_mock: + storage_mock.return_value = 1 + self.assertIsNone(self.reader(self.source, limit, None)) print_mock.assert_called_with(self.response_parsed) - ctrl_save_mock.assert_called_with(self.source, - self.response_parsed['articles'], - self.response_parsed['title']) - reader_mock.assert_called_with(self.source, limit) def test_call_load(self): limit = 1 date = '20191122' with patch('rssreader.output_controller.OutputController.print') as print_mock: - print_mock.return_value = True - with patch('rssreader.storage.controller.StorageController.load') as ctrl_load_mock: - ctrl_load_mock.return_value = self.response_parsed - self.assertTrue(self.reader(self.source, limit, date)) + # Time for crutches :) + with patch('rssreader.storage.controller.StorageController.__init__') as crutch: + crutch.return_value = None # remove creating db file + # How to replace a class object on MagicMock without crutch? - print_mock.assert_called_with(self.response_parsed) - ctrl_load_mock.assert_called_with(self.source, date, limit) + with patch('rssreader.storage.controller.StorageController.load') as storage_mock: + storage_mock.return_value = self.response_parsed + self.assertIsNone(self.reader(self.source, limit, date)) + + print_mock.assert_called_once_with(self.response_parsed) + storage_mock.assert_called_with(self.source, date, limit) if __name__ == '__main__': From 976ad9fc5b71e37fe061a8899a327c9f70b3a765 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Sat, 30 Nov 2019 17:37:48 +0300 Subject: [PATCH 39/43] Rename module `html_parser` to `feed_parser` --- rssreader/{html_parser.py => feed_parser.py} | 0 rssreader/{html_parser_tests.py => feed_parser_tests.py} | 2 +- rssreader/rss_reader.py | 2 +- rssreader/rss_reader_tests.py | 6 +++--- rssreader/tests.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) rename rssreader/{html_parser.py => feed_parser.py} (100%) rename rssreader/{html_parser_tests.py => feed_parser_tests.py} (99%) diff --git a/rssreader/html_parser.py b/rssreader/feed_parser.py similarity index 100% rename from rssreader/html_parser.py rename to rssreader/feed_parser.py diff --git a/rssreader/html_parser_tests.py b/rssreader/feed_parser_tests.py similarity index 99% rename from rssreader/html_parser_tests.py rename to rssreader/feed_parser_tests.py index f2f96b3..6ecf1d2 100644 --- a/rssreader/html_parser_tests.py +++ b/rssreader/feed_parser_tests.py @@ -2,7 +2,7 @@ import time from unittest import TestCase, main -from rssreader.html_parser import A, Img, HTMLParser +from rssreader.feed_parser import A, Img, HTMLParser __all__ = ['TestA', 'TestImg', 'TestHTMLParser'] diff --git a/rssreader/rss_reader.py b/rssreader/rss_reader.py index 60c0ee9..641d266 100644 --- a/rssreader/rss_reader.py +++ b/rssreader/rss_reader.py @@ -25,7 +25,7 @@ import feedparser -from rssreader.html_parser import * +from rssreader.feed_parser import * from rssreader.output_controller import * from rssreader.storage import * diff --git a/rssreader/rss_reader_tests.py b/rssreader/rss_reader_tests.py index e7675f0..514b2a8 100644 --- a/rssreader/rss_reader_tests.py +++ b/rssreader/rss_reader_tests.py @@ -51,14 +51,14 @@ def setUp(self): def test_get_articles_from_url(self): limit = 1 - with patch('rssreader.html_parser.Parser.parse') as html_parser_mock: - html_parser_mock.return_value = 'Successful' + with patch('rssreader.feed_parser.Parser.parse') as feed_parser_mock: + feed_parser_mock.return_value = 'Successful' with patch('feedparser.parse') as feedparser_mock: feedparser_mock.return_value = self.response self.assertEqual(self.reader._get_articles_from_url(self.source, limit), 'Successful') feedparser_mock.assert_called_with(self.source.strip()) - html_parser_mock.assert_called_with(self.response, limit) + feed_parser_mock.assert_called_with(self.response, limit) def test_call_save(self): limit = 1 diff --git a/rssreader/tests.py b/rssreader/tests.py index 1a831ef..4b2349c 100644 --- a/rssreader/tests.py +++ b/rssreader/tests.py @@ -1,7 +1,7 @@ import unittest from .storage.tests import * -from .html_parser_tests import * +from .feed_parser_tests import * from .output_controller_tests import * from .rss_reader_tests import * From 3a47a075c489bd5cc6056c4108b5adb37265cb2d Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Sun, 1 Dec 2019 21:36:39 +0300 Subject: [PATCH 40/43] Added part of iteration 6 Added server wrapper for implementation of `rss-reader` utility --- rssreader_server | 1 + 1 file changed, 1 insertion(+) create mode 160000 rssreader_server diff --git a/rssreader_server b/rssreader_server new file mode 160000 index 0000000..5c3b2aa --- /dev/null +++ b/rssreader_server @@ -0,0 +1 @@ +Subproject commit 5c3b2aa8dc7db01a06b6eabb1f4230b4e632e357 From 5608d1672c9f73c3c10f4f267e71119e56966473 Mon Sep 17 00:00:00 2001 From: PivovarSergey Date: Sun, 1 Dec 2019 21:44:44 +0300 Subject: [PATCH 41/43] Fix commited iteration 6 --- rssreader_server | 1 - rssreader_server/api_v1/__init__.py | 0 rssreader_server/api_v1/admin.py | 5 + rssreader_server/api_v1/apps.py | 5 + rssreader_server/api_v1/converters.py | 301 +++++++++++++++ rssreader_server/api_v1/feed_parser.py | 346 ++++++++++++++++++ rssreader_server/api_v1/managers/__init__.py | 2 + .../api_v1/managers/article_manager.py | 19 + .../api_v1/managers/source_manager.py | 40 ++ .../api_v1/migrations/__init__.py | 0 rssreader_server/api_v1/models/__init__.py | 2 + rssreader_server/api_v1/models/article.py | 54 +++ rssreader_server/api_v1/models/source.py | 19 + rssreader_server/api_v1/storage_controller.py | 49 +++ rssreader_server/api_v1/urls.py | 25 ++ rssreader_server/api_v1/view_controller.py | 97 +++++ rssreader_server/api_v1/views.py | 122 ++++++ rssreader_server/manage.py | 21 ++ rssreader_server/rssreader_server/__init__.py | 0 rssreader_server/rssreader_server/settings.py | 83 +++++ rssreader_server/rssreader_server/urls.py | 7 + rssreader_server/rssreader_server/wsgi.py | 16 + 22 files changed, 1213 insertions(+), 1 deletion(-) delete mode 160000 rssreader_server create mode 100644 rssreader_server/api_v1/__init__.py create mode 100644 rssreader_server/api_v1/admin.py create mode 100644 rssreader_server/api_v1/apps.py create mode 100644 rssreader_server/api_v1/converters.py create mode 100644 rssreader_server/api_v1/feed_parser.py create mode 100644 rssreader_server/api_v1/managers/__init__.py create mode 100644 rssreader_server/api_v1/managers/article_manager.py create mode 100644 rssreader_server/api_v1/managers/source_manager.py create mode 100644 rssreader_server/api_v1/migrations/__init__.py create mode 100644 rssreader_server/api_v1/models/__init__.py create mode 100644 rssreader_server/api_v1/models/article.py create mode 100644 rssreader_server/api_v1/models/source.py create mode 100644 rssreader_server/api_v1/storage_controller.py create mode 100644 rssreader_server/api_v1/urls.py create mode 100644 rssreader_server/api_v1/view_controller.py create mode 100644 rssreader_server/api_v1/views.py create mode 100755 rssreader_server/manage.py create mode 100644 rssreader_server/rssreader_server/__init__.py create mode 100644 rssreader_server/rssreader_server/settings.py create mode 100644 rssreader_server/rssreader_server/urls.py create mode 100644 rssreader_server/rssreader_server/wsgi.py diff --git a/rssreader_server b/rssreader_server deleted file mode 160000 index 5c3b2aa..0000000 --- a/rssreader_server +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 5c3b2aa8dc7db01a06b6eabb1f4230b4e632e357 diff --git a/rssreader_server/api_v1/__init__.py b/rssreader_server/api_v1/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rssreader_server/api_v1/admin.py b/rssreader_server/api_v1/admin.py new file mode 100644 index 0000000..0ea393a --- /dev/null +++ b/rssreader_server/api_v1/admin.py @@ -0,0 +1,5 @@ +from django.contrib import admin +from .models import * + +admin.site.register(Source) +admin.site.register(Article) diff --git a/rssreader_server/api_v1/apps.py b/rssreader_server/api_v1/apps.py new file mode 100644 index 0000000..9e01182 --- /dev/null +++ b/rssreader_server/api_v1/apps.py @@ -0,0 +1,5 @@ +from django.apps import AppConfig + + +class RssreaderAppConfig(AppConfig): + name = 'api_v1' diff --git a/rssreader_server/api_v1/converters.py b/rssreader_server/api_v1/converters.py new file mode 100644 index 0000000..88ac959 --- /dev/null +++ b/rssreader_server/api_v1/converters.py @@ -0,0 +1,301 @@ +""" +Module for output the result of the utility and printing in different formats. +Recommend use only class OutputController with parameters: + * to_json: bool - output in JSON or not + * to_pdf: str - string filename for output + * to_html: str - string filename for output +Default start sample output. + +""" +import json +import os +from abc import ABC + +from fpdf import FPDF +from requests import get + +__all__ = ['SamplePrintResponseConverter', 'JSONPrintResponseConverter', + 'PDFPrintResponseConverter', 'HTMLPrintResponseConverter'] + + +class BaseResponseConverter(ABC): + cache_folder = '__cache__' + + def print(self, articles, filename, **kwargs): + """ + Procedure for output of news articles. + + :param articles: dict with title and list of news articles + :param filename: name of the file output + :param kwargs: optional params. Use to extend a count given params in base method + :type articles: dict + """ + + def _print_article(self, article, **kwargs): + """ + Method for output given articles in given PDF file. + + :param article: article to output + :param kwargs: optional params. Use to extend a count given params in base method + :type article: dict + """ + + def _print_title(self, title, **kwargs): + """ + Method for output given title. + + :param title: title to output + :param kwargs: optional params. Use to extend a count given params in base method + :type title: str + """ + + +class SamplePrintResponseConverter(BaseResponseConverter): + """ + Class controller for sample output in standard out. + """ + delimiter = "#" * 80 + + def print(self, articles, filename, **kwargs): + """ + Method for output of given articles if given filename. + + :param articles: articles for output in file + :param filename: name of file to output + :return: path to file with result + :rtype: str + """ + if (title := articles.get('title', None)) is not None: + response_result = f"Feed: {title}\n" + + for article in articles['articles']: + response_result += self._print_article(article) + + with open(os.path.join(self.cache_folder, filename), 'w') as file: + file.write(response_result) + + return filename + + def _print_article(self, article, **kwargs): + """ + Method for output articles in PDF format. + + :param article: current dict with article info for output + :type dict + """ + response_result = f"Title: {article['title']}\n" \ + f"Date: {article['pubDate']}\n" \ + f"Link: {article['link']}\n" \ + f"\n" \ + f"{article['dec_description']}\n" \ + f"\n" \ + f"Links:" + + for link in article['dec_links']: + response_result += f"\n{link}" + response_result += f"\n{self.delimiter}" + + return response_result + + +class JSONPrintResponseConverter(BaseResponseConverter): + """ + Class controller for output JSON form of articles in standard out. + """ + + def print(self, articles, filename, **kwargs): + """ + Method for output of given articles if given filename. + + :param articles: articles for output in file + :param filename: name of file to output + :return: path to file with result + :rtype: str + """ + with open(os.path.join(self.cache_folder, filename), 'w') as file: + file.write(json.dumps(articles)) + + return filename + + +class PDFPrintResponseConverter(BaseResponseConverter): + """ + Class controller for output given articles in PDF in file. + """ + image_ext = 'jpg' + cache_folder = "__cache__" + extension = '.pdf' + delimiter_before = "_" * 59 + delimiter_after = "%d".rjust(55, '_').ljust(59, '_') + + def print(self, articles, filename, **kwargs): + """ + Method for output of given articles if given filename. + + :param articles: articles for output in file + :param filename: name of file to output + :return: path to file with result + :rtype: str + """ + writer = FPDF() + writer.add_page() + self._print_title(articles['title'], writer=writer) + + for i, article in enumerate(articles['articles']): + self._print_article(article, writer=writer, ind=i) + + writer.output(os.path.join(self.cache_folder, filename)) + + # os.removedirs(self.cache_folder) + + return filename + + def _print_title(self, title, **kwargs): + """ + Method for output title of RSS Feeds. + + :param title: title of RSS Feed + :rtype: dict + """ + writer = kwargs['writer'] + writer.set_font('Courier', 'B', 20) + writer.multi_cell(0, 30, title, align='C') + + def _print_article(self, article, **kwargs): + """ + Method for output articles in PDF format. + + :param article: current dict with article info for output + :type dict + """ + writer = kwargs['writer'] + ind = kwargs['ind'] + + article = self._clean_each_elem_article(article) + + writer.set_font("Courier", 'B', 15) + writer.multi_cell(0, 10, self.delimiter_before) + + writer.set_font("Courier", "B", 13) + writer.multi_cell(0, 7, f"Title: {article['title']}", align="L") + + writer.set_font("Courier", "BI", 11) + writer.multi_cell(0, 10, f"Date: {article['pubDate']}", align='R') + + for img in article['media']: + self._draw_image(writer, img) + + writer.set_font("Courier", size=12) + writer.multi_cell(0, 5, article['description'], align='L') + + writer.set_font("Courier", "BI", size=9) + writer.multi_cell(0, 10, f"Link: {article['link']}", align='L') + + writer.set_font("Courier", 'B', 15) + writer.multi_cell(0, 10, self.delimiter_after % (ind + 1)) + + def _clean_each_elem_article(self, elem): + """ + Recursive method for cleaning errors with encoding 'latin-1' for output ready text in PDF file. + Go throw all elements of given objects and remove error with encoding 'latin-1'. + + :param elem: current element for checking and removing errors with encoding + :return: recursive call this method if givn object is collection, else string + """ + if type(elem) == str: + return elem.encode('latin-1', 'replace').decode('latin-1') + elif type(elem) == dict: + return {k: self._clean_each_elem_article(v) for k, v in elem.items()} + elif type(elem) == list: + return [self._clean_each_elem_article(el) for el in elem] + + def _draw_image(self, writer, image): + """ + Method for draw image in file by given FPDF writer. + + :param writer: FPDF object for drawing in file + :param image: dict with info about image + :type writer: fpdf.FPDF + :type image: dict + """ + try: + image_name = f"{image['src'].split('/')[-1]}.{self.image_ext}" + image_path = self._download_to(image['src'], image_name) + writer.image(image_path, type=self.image_ext, link=image['src'], x=(writer.w - 50) // 2) + except (ValueError, TypeError, RuntimeError): + writer.set_font("Courier", 'B', 10) + writer.multi_cell(0, 3, f"NO IMAGE: {image['alt']}", align='C') + + def _download_to(self, link, filename): + """ + Method for downloading image by link in given file. Return path to downloaded image. + + :param link: link to image + :param filename: name of file, such will be rewriten. + :type link: str + :type filename: str + :return: absolute path to downloaded image + :rtype: str + """ + if not os.path.exists(os.path.join(self.cache_folder)): + os.mkdir(os.path.join(self.cache_folder)) + img_data = get(link).content + ready_image_path = os.path.join(self.cache_folder, filename) + with open(ready_image_path, 'wb') as handler: + handler.write(img_data) + + return ready_image_path + + +class HTMLPrintResponseConverter(BaseResponseConverter): + """ + Class controller for output given articles using HTML in file. + """ + extension = '.html' + + def print(self, articles, filename, **kwargs): + html_text = f"" \ + f"" \ + f"" \ + f"" \ + f"RSS Feeds" \ + f"" \ + f"" \ + f"

{articles['title']}

" \ + f"{''.join([self._print_article(art) for art in articles['articles']])}" \ + f"" \ + f"" + + with open(os.path.join(self.cache_folder, filename), 'w') as file: + file.write(html_text) + + return filename + + def _print_article(self, article, **kwargs): + result = "
" \ + "

" + result += '' \ + '{} (Link to original)' \ + '' \ + '

'.format(article['link'], article['title']) + for image in article['media']: + attrs = " ".join([f"{k}=\"{v}\"" for k, v in image.items()]) + result += "
" \ + "".format(attrs) + result += f'

' \ + f'Published: {article["pubDate"]}' \ + f'

' + result += f'

' \ + f'{article["description"]}' \ + f'

' \ + f'
' \ + f'

' \ + f'Links:' \ + f'

' + for i in range(len(article['dec_links'])): + result += f'' \ + f'{article["dec_links"][i]}' \ + f'' \ + f'
' + result += "
" + return result diff --git a/rssreader_server/api_v1/feed_parser.py b/rssreader_server/api_v1/feed_parser.py new file mode 100644 index 0000000..9f2b8cf --- /dev/null +++ b/rssreader_server/api_v1/feed_parser.py @@ -0,0 +1,346 @@ +import datetime +import html +import logging +from abc import ABC, abstractmethod + +__all__ = ['Parser'] + + +class Tag(ABC): + """ + Abstract class for working with tags as a class structure. + """ + + def __init__(self, **kwargs): + for arg, val in kwargs.items(): + self.__setattr__(arg, val) + + @abstractmethod + def link(self): + """ + Get media object source link. + + :return: media object source URL + :rtype: str + """ + + @abstractmethod + def format_link(self, ind): + """ + Get formatted link to output in the links section. + + :param ind: Sequence number in the queue + :type ind: int + :return: string to output in links section + :rtype: str + """ + + @abstractmethod + def __str__(self): + """ + Get string to output tag in the description section. + + :return: string to output tag in the description section + """ + + +class A(Tag): + """ + Class for work with tag `a` (link) as a class struct. + """ + href = None + + def __str__(self): + """ + Get string to output tag in description section. + + :return: string to output tag in the description section + """ + return "[link {}]" + + def link(self): + """ + Get media object source link. + + :return: media object source URL + :rtype: str + """ + return self.href + + def format_link(self, ind): + """ + Get formatted link to output in the links section. + + :param ind: Sequence number in the queue + :type ind: int + :return: string to output in the links section + :rtype: str + """ + return f"[{ind}]: {self.href} (link)" + + +class Img(Tag): + """ + Class for work with tag img (image) as a class struct. + """ + src = None + alt = None + width = None + height = None + + def __str__(self): + """ + Get string to output tag in description section. + + :return: string to output tag in the description section + """ + return "[Image {}: %s] " % self.alt + + def link(self): + """ + Get media object source link. + + :return: media object source URL + :rtype: str + """ + return self.src + + def format_link(self, ind): + """ + Get formatted link to output in the links section. + + :param ind: Sequence number in the queue + :type ind: int + :return: string to output in the links section + :rtype: str + """ + return f"[{ind}]: {self.src} (image)" + + +class HTMLParser: + """ + A class for parse news articles from response struct of module "feedparser". + Methods return JSON format of news articles or dict with info about given article. + """ + _table = { + 'a': A, + 'img': Img, + } + + def __init__(self): + self._tags = [] + + def parse(self, response, limit): + """ + A method of parsing news articles and creating object models for easy access. + + :param response: response struct for parse + :param limit: required number of articles to show + :type response: dict + :type limit: int + :return: return a dict {'title': str, 'articles': list). + Title is header of RSS Source. + Articles is a list of dicts with articles info which was created from parsed feeds + :rtype: dict + """ + logging.info("Getting list of limited articles") + raw_articles = self._get_limited_articles(response, limit) + + logging.info("Completed. Converting each article to dict") + nice_articles = [self._article_to_dict(article) for article in raw_articles] + + logging.info("Completed. Clear articles from HTML escapes") + articles = [self._clear_from_html(article) for article in nice_articles] + + logging.info("Getting a RSS source title") + title = response['feed']['title'] + + return {'title': title, 'articles': articles} + + def _clear_from_html(self, elem): + """ + Method to clear html escapes from all fields of article. + + :param elem: article to clear from HTML escapes + :return: clean article + """ + if type(elem) == str: + return html.unescape(elem) + elif type(elem) == dict: + return {self._clear_from_html(k): self._clear_from_html(v) for k, v in elem.items()} + elif type(elem) == list: + return [self._clear_from_html(el) for el in elem] + else: + return elem + + @staticmethod + def _get_limited_articles(response, limit): + """ + Method of limiting parsing articles from response struct. + If limit is None return articles given length, else return all available articles. + + :param response: response struct for parse + :param limit: limit of output news articles + :type response: dict + :type limit: int or None + :return: news articles of limited length + :rtype: dict + """ + result = response['entries'] + if limit is not None: + logging.info(f"Completed. Loaded {min(limit, len(result))} articles with limit {limit}") + return result[0:min(limit, len(result))] + else: + logging.info(f"Completed. Loaded {len(result)} articles without any limit") + return result + + @staticmethod + def _get_next_tag(line): + """ + Method for getting startpos and endpos of tag in given string line. + + :param line: line with html tag + :type line: str + :return: (startpos, endpos) is a position of next tag in line if line have a tag, else None + :rtype: tuple or None + """ + if (startpos := line.find('<')) != -1 and (endpos := line.find('>')) != -1: + return startpos, endpos + 1 + else: + return None + + def _create_tag(self, params): + """ + Method for creating Tag struct class from params. + + :param params: info for creating tag + :type params: dict + :return: tag object if creating was successful, else None + :rtype: Tag or None + """ + try: + tag_type = next(iter(params)) + params.pop(tag_type) + return self._table[tag_type](**params) + except KeyError: + return None + + def _get_params_from_line(self, tag_line): + """ + Method for getting all parameters from html tag string line. + If parameter have a value params save value. Else value is True. + + :param tag_line: line with tag parameters + :type tag_line: str + :return: dict with parsed parameters + :rtype: dict + """ + params = {} + tag_line = tag_line.strip('<>') + strings, tag_line = self._get_all_strings(tag_line) + words = tag_line.split() + for param in words: + pair = param.split('=') + if len(pair) == 1: + params.update({pair[0]: True}) + else: + params.update({pair[0]: strings.pop(0)}) + + return params + + @staticmethod + def _get_all_strings(tag_line): + """ + Method of cutting all string in quotes \"...\". + + :param tag_line: line with tag info and strings + :type tag_line: str + :return: tuple (strings, tag_line). + strings is a list with all cutting strings. + tag_line is a given string parameter without cutting strings + :rtype: tuple + """ + strings = [] + while (start_ind := tag_line.find('"')) != -1: + end_ind = tag_line.find('"', start_ind + 1) + 1 + strings.append(tag_line[start_ind + 1: end_ind - 1]) + tag_line = tag_line[:start_ind] + tag_line[end_ind:] + return strings, tag_line + + def _process_description(self, desc, fill_desc=True, fill_links=True): + """ + Method processing description. Return description of specific format. + + :param desc: description of news article with useless info and tags + :type desc: str + :return: tuple (description, links). + description is description without useless info and tags. With inserts links or not. + links is list with formatted strings with links from all created tag objects + :rtype: tuple + """ + self._tags.clear() + index_of_tag = 1 + links = [] + while (pos_tag := self._get_next_tag(desc)) is not None: + first_quotes, last_quotes = pos_tag + full_tag_line = desc[first_quotes: last_quotes] + parameters = self._get_params_from_line(full_tag_line) + obj_tag = self._create_tag(parameters) + if obj_tag is not None: + self._tags.append(obj_tag) + if fill_desc: + desc = desc[:first_quotes] + str(obj_tag).format(index_of_tag) + desc[last_quotes:] + else: + desc = desc[:first_quotes] + desc[last_quotes:] + if fill_links: + links.append(obj_tag.format_link(index_of_tag)) + else: + links.append(obj_tag.link()) + index_of_tag += 1 + else: + desc = desc[:first_quotes] + desc[last_quotes:] + + return desc, links + + def _article_to_dict(self, article): + """ + Method for converting article info into dict of specific format. + + :param article: article for converting into dict of specific format + :type article: dict + :return: dict of specific format + :rtype: dict + """ + + dec_description, dec_links = self._process_description(article['description']) + description, links = self._process_description(article['description'], False, False) + + images = [obj for obj in self._tags if isinstance(obj, Img)] + + media = [ + {"src": image.src, + "alt": image.alt, + "width": image.width, + "height": image.height} for image in images + ] + + try: + date = datetime.datetime(*article['published_parsed'][:6]).strftime("%a, %d %b %Y %H:%M") + except (AttributeError, ValueError): + date = 'None' + + result = { + 'title': article['title'], + 'description': description, + 'dec_description': dec_description, + 'link': article['link'], + 'pubDate': date, + 'media': media, + 'links': links, + 'dec_links': dec_links, + } + + return result + + +Parser = HTMLParser() diff --git a/rssreader_server/api_v1/managers/__init__.py b/rssreader_server/api_v1/managers/__init__.py new file mode 100644 index 0000000..ffe7dcc --- /dev/null +++ b/rssreader_server/api_v1/managers/__init__.py @@ -0,0 +1,2 @@ +from .source_manager import SourceManager +from .article_manager import ArticleManager diff --git a/rssreader_server/api_v1/managers/article_manager.py b/rssreader_server/api_v1/managers/article_manager.py new file mode 100644 index 0000000..7d86dea --- /dev/null +++ b/rssreader_server/api_v1/managers/article_manager.py @@ -0,0 +1,19 @@ +from api_v1.models import Article + +__all__ = ['ArticleManager'] + + +class ArticleManager: + @staticmethod + def create_and_return(structs, source): + """ + Method for creating articles in list in db. Return count of created objects + + :param structs: list of articles structs + :param source: model Source object of feeds source + :type structs: list + :type source: Source + :return: count of new created objects + :rtype: int + """ + return len([art for struct in structs if (art := Article.from_struct(struct, source)) is not None]) diff --git a/rssreader_server/api_v1/managers/source_manager.py b/rssreader_server/api_v1/managers/source_manager.py new file mode 100644 index 0000000..4c01f83 --- /dev/null +++ b/rssreader_server/api_v1/managers/source_manager.py @@ -0,0 +1,40 @@ +from api_v1.models import Source + +__all__ = ['SourceManager'] + + +class SourceManager: + @staticmethod + def get_or_create(url, title): + """ + Method for safe getting a Source model object. + + :param url: string link for init object + :param title: title of feeds source + :type url: str + :type title: str + :return: Source object. If object with such data is founded return it, + else created new object and return it. + :rtype: Source + """ + return Source.get_or_create(url, title=title) + + @staticmethod + def get_articles_with_data_from(url, date): + """ + Method to getting articles with date after a given date. + + :param url: URL-key for getting Source object + :param date: date for query + :type url: str + :type date: str + :return: dict with title of a rss source and founded articles + :rtype dict + """ + source = Source.get_or_create(url) + + articles = source.sort_by_date(date) + return { + 'title': source.title, + 'articles': articles, + } diff --git a/rssreader_server/api_v1/migrations/__init__.py b/rssreader_server/api_v1/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rssreader_server/api_v1/models/__init__.py b/rssreader_server/api_v1/models/__init__.py new file mode 100644 index 0000000..2f5afed --- /dev/null +++ b/rssreader_server/api_v1/models/__init__.py @@ -0,0 +1,2 @@ +from .article import Article +from .source import Source diff --git a/rssreader_server/api_v1/models/article.py b/rssreader_server/api_v1/models/article.py new file mode 100644 index 0000000..abb2f2c --- /dev/null +++ b/rssreader_server/api_v1/models/article.py @@ -0,0 +1,54 @@ +import datetime +import json + +from django.db import models, IntegrityError + + +class Article(models.Model): + title = models.TextField() + description = models.TextField() + dec_description = models.TextField() + link = models.TextField(unique=True) + pubDate = models.DateTimeField() + media = models.TextField() + source = models.ForeignKey('Source', on_delete=models.CASCADE, related_name='articles') + links = models.TextField() + dec_links = models.TextField() + + class Meta: + ordering = ['pubDate'] + + @classmethod + def from_struct(cls, struct, source): + try: + if struct['pubDate'] != 'None': + date = datetime.datetime.strptime(struct['pubDate'], "%a, %d %b %Y %H:%M") + else: + date = datetime.datetime.now() + + return cls.objects.create( + title=struct['title'], + description=struct['description'], + dec_description=struct['dec_description'], + link=struct['link'], + pubDate=date, + media=json.dumps(struct['media']), + source=source, + links=json.dumps(struct['links']), + dec_links=json.dumps(struct['dec_links']) + ) + except IntegrityError: + return None + + def to_dict(self): + return { + 'title': self.title, + 'description': self.description, + 'dec_description': self.dec_description, + 'link': self.link, + 'pubDate': self.pubDate.strftime("%a, %d %b %Y %H:%M"), + 'media': json.loads(self.media), + 'source': self.source.url, + 'links': json.loads(self.links), + 'dec_links': json.loads(self.dec_links), + } diff --git a/rssreader_server/api_v1/models/source.py b/rssreader_server/api_v1/models/source.py new file mode 100644 index 0000000..ebbe1be --- /dev/null +++ b/rssreader_server/api_v1/models/source.py @@ -0,0 +1,19 @@ +from django.db import models + + +class Source(models.Model): + title = models.TextField(null=True) + url = models.TextField(unique=True) + + class Meta: + ordering = ['title', ] + + @classmethod + def get_or_create(cls, url, title=None): + try: + return cls.objects.get(url=url) + except cls.DoesNotExist: + return cls.objects.create(url=url, title=title) + + def sort_by_date(self, date): + return self.articles.filter(pubDate__gte=date) diff --git a/rssreader_server/api_v1/storage_controller.py b/rssreader_server/api_v1/storage_controller.py new file mode 100644 index 0000000..19e77b8 --- /dev/null +++ b/rssreader_server/api_v1/storage_controller.py @@ -0,0 +1,49 @@ +from .managers import ArticleManager, SourceManager + + +class StorageController: + """ + Static controller for loading and saving articles in database. + """ + + @staticmethod + def load(url, date, limit): + """ + Method for loading limited articles from database + + :param url: source URL for getting articles from db + :param date: date from which need to load articles in string + :param limit: limit of articles for loading + :type url: str + :type date: str + :type limit: int + :return: list of dicts of articles with date after a given date + :rtype: list + """ + clr_url = url.strip('/\\') + articles = SourceManager.get_articles_with_data_from(clr_url, date) + + if limit is not None: + articles['articles'] = [article for i, article in enumerate(articles['articles']) if i < limit] + + articles['articles'] = [article.to_dict() for article in articles['articles']] + return articles + + @staticmethod + def save(url, articles, title): + """ + Method for saving parsed articles. + + :param url: string URL of RSS source + :param articles: parsed articles + :param title: title of RSS source + :type url: str + :type articles: list + :type title: str + :return: count of new created articles in db + :rtype: int + """ + clr_url = url.strip('/\\') + source = SourceManager.get_or_create(clr_url, title) + + return ArticleManager.create_and_return(articles, source) diff --git a/rssreader_server/api_v1/urls.py b/rssreader_server/api_v1/urls.py new file mode 100644 index 0000000..a22dfc1 --- /dev/null +++ b/rssreader_server/api_v1/urls.py @@ -0,0 +1,25 @@ +from django.urls import path + +from .views import * + +""" + API v1.0 + + /news/ + * method GET -> return news: + Method GET using for take parameters. + Optional parameters: + * url=URL RSS URL + * limit=LIMIT Limit news topics if this parameter provided + * date=DATE Print cached articles by date + * to_json Print result as JSON in browser + * to-pdf=TO_PDF Print result as PDF in file `TO_PDF` + * to-html=TO_HTML Print result as HTML in file `TO_PDF` + /help/ + * all methods -> return info about + +""" +urlpatterns = [ + path('news/', LoaderNews().download_result), + path('help/', show_help_view) +] diff --git a/rssreader_server/api_v1/view_controller.py b/rssreader_server/api_v1/view_controller.py new file mode 100644 index 0000000..975f8cb --- /dev/null +++ b/rssreader_server/api_v1/view_controller.py @@ -0,0 +1,97 @@ +import os + +from django.http import FileResponse + +from .converters import * + + +class ResponseBuilder: + """ + Base class with base logic of converting and load file with result. + """ + from_folder = '__cache__' + extension = None + converter = None + + def load(self, articles, filename): + """ + Base method of loading file with result of executing program. + + :param articles: dict with articles for converting and output + :param filename: name of file for output + :type articles: dict + :type filename: str + :return: + """ + result_response = self.load_result(articles, filename) + result_response = os.path.join(self.from_folder, result_response) + return FileResponse(open(result_response, 'rb'), + filename=result_response, + content_type='application/txt') + + def load_result(self, articles, filename): + """ + Method of converting result. Return name of file with result. + + :param articles: dict with articles for converting and output + :param filename: name of file for output + :type articles: dict + :type filename: str + :return: name of file for output, if process if successful + :rtype: str + """ + return self.converter().print(articles, filename=self.this_filename(filename)) + + def this_filename(self, filename): + """ + Method for correct name of file for current format. + + :param filename: + :return: + """ + return filename + self.extension if not filename.endswith(self.extension) else filename + + +class PDFResponse(ResponseBuilder): + """ + Class processing request articles to PDF format + """ + extension = '.pdf' + converter = PDFPrintResponseConverter + + +class JSONResponse(ResponseBuilder): + """ + Class processing request articles to JSON format + """ + extension = '.json' + converter = JSONPrintResponseConverter + + +class HTMLResponse(ResponseBuilder): + """ + Class processing request articles to HTML format + """ + extension = '.html' + converter = HTMLPrintResponseConverter + + +class SampleResponse(ResponseBuilder): + """ + Class processing request articles without any format + """ + extension = '.txt' + converter = SamplePrintResponseConverter + + +class ResponseController: + @staticmethod + def load_result_into_file(articles, to_pdf=None, to_html=None, to_json=None, to_sample=None): + if to_html is not None: + return HTMLResponse().load(articles, filename=to_html) + if to_pdf is not None: + return PDFResponse().load(articles, filename=to_pdf) + if to_json is not None: + return JSONResponse().load(articles, filename=to_json) + + return SampleResponse().load(articles, filename=to_sample) diff --git a/rssreader_server/api_v1/views.py b/rssreader_server/api_v1/views.py new file mode 100644 index 0000000..6660dea --- /dev/null +++ b/rssreader_server/api_v1/views.py @@ -0,0 +1,122 @@ +import datetime + +import feedparser +from django.http import HttpResponse, Http404 + +from api_v1.feed_parser import Parser +from api_v1.storage_controller import StorageController +from api_v1.view_controller import ResponseController + + +class LoaderNews: + def download_result(self, request): + """ + Method of process request + + :param request: request struct for processing + :type: dict + :return: response of process URL + :rtype: HttpResponse + """ + if request.method == 'GET': + kwargs = { + 'url': request.GET.get('url', None), + 'date': request.GET.get('date', None), + 'limit': request.GET.get('limit', None), + 'to_pdf': request.GET.get('to_pdf', None), + 'to_json': request.GET.get('to_json', None), + 'to_html': request.GET.get('to_html', None), + } + return self._execute(**kwargs) + else: + return Http404() + + def _execute(self, url, limit, date, **kwargs): + """ + Main method of processing request. + + :param url: URL RSS + :param limit: count of output articles, if given + :param date: datetime, need for load caching from storage + :param kwargs: optional parameters + :type url: str + :type limit: str + :type date: str + :type kwargs: dict + :return: http response of processing request + :rtype: HttpResponse + """ + if limit: + try: + limit = int(limit) + if limit < 1: + return HttpResponse(f"Bad given value limit '{limit}'", status=404) + except (ValueError, TypeError): + return HttpResponse(f"Bad given value limit '{limit}'", status=404) + + if not date: + articles = self._get_articles_from_url(url, limit) + + StorageController.save(url, articles['articles'], articles['title']) + else: + try: + datetime.datetime.strptime(date, "%Y%m%d") + except ValueError: + return HttpResponse(f"Error format date {date}. Need '%Y%m%d'", status=404) + articles = StorageController.load(url, datetime.datetime.strptime(date, "%Y%m%d"), limit) + + return ResponseController.load_result_into_file(articles, + to_html=kwargs.get('to_html', None), + to_json=kwargs.get('to_json', None), + to_pdf=kwargs.get('to_pdf', None), + to_sample=datetime.datetime.now().strftime("%d%m%Y%H%M%S")) + + @staticmethod + def _get_articles_from_url(url, limit): + """ + Method for downloading articles from given URL. + + :param url: RSS URL + :param limit: count of output articles, if given + :type url: str + :type limit: int + :return: dict with parsed articles + :rtype: dict + """ + if 'status' not in (response := feedparser.parse(url.strip())) or len(response['entries']) == 0: + return HttpResponse(f"Error: Impossible parse RSS Feeds from url '{url}'", status=404) + + if response['status'] in range(200, 300): + pass + else: + return HttpResponse(f"Error connecting with URL '{url.strip()}' with status code {response['status']}.", + status=404) + + return Parser.parse(response, limit) + + +def show_help_view(request): + """ + Method for output info about. + + :return: http response with info about API of current app + :rtype: HttpResponse + """ + html_result = f"" \ + f"" \ + f"" \ + f"" \ + f"RSS Feeds" \ + f"" \ + f"" \ + f"Method GET using for take parameters." \ + f"Optional parameters:" \ + f"\n\t* url=URL RSS URL" \ + f"\n\t* limit=LIMIT Limit news topics if this parameter provided" \ + f"\n\t* date=DATE Print cached articles by date" \ + f"\n\t* to_json Print result as JSON in browser" \ + f"\n\t* to-pdf=TO_PDF Print result as PDF in file `TO_PDF`" \ + f"\n\t* to-html=TO_HTML Print result as HTML in file `TO_PDF`</plaintext>" \ + f"</body>" \ + f"</html>" + return HttpResponse(html_result) diff --git a/rssreader_server/manage.py b/rssreader_server/manage.py new file mode 100755 index 0000000..e209c73 --- /dev/null +++ b/rssreader_server/manage.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +"""Django's command-line utility for administrative tasks.""" +import os +import sys + + +def main(): + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'rssreader_server.settings') + try: + from django.core.management import execute_from_command_line + except ImportError as exc: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) from exc + execute_from_command_line(sys.argv) + + +if __name__ == '__main__': + main() diff --git a/rssreader_server/rssreader_server/__init__.py b/rssreader_server/rssreader_server/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rssreader_server/rssreader_server/settings.py b/rssreader_server/rssreader_server/settings.py new file mode 100644 index 0000000..2d065dd --- /dev/null +++ b/rssreader_server/rssreader_server/settings.py @@ -0,0 +1,83 @@ +import os + +BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + +SECRET_KEY = 'ny#t%v)^98(0afy8#+$dbrp^strv6#+#_nif)w8o2%dbffx5ft' + +DEBUG = True + +ALLOWED_HOSTS = [] + +INSTALLED_APPS = [ + 'django.contrib.admin', + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.messages', + 'django.contrib.staticfiles', + 'api_v1', +] + +MIDDLEWARE = [ + 'django.middleware.security.SecurityMiddleware', + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.middleware.common.CommonMiddleware', + 'django.middleware.csrf.CsrfViewMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', + 'django.middleware.clickjacking.XFrameOptionsMiddleware', +] + +ROOT_URLCONF = 'rssreader_server.urls' + +TEMPLATES = [ + { + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'DIRS': [], + 'APP_DIRS': True, + 'OPTIONS': { + 'context_processors': [ + 'django.template.context_processors.debug', + 'django.template.context_processors.request', + 'django.contrib.auth.context_processors.auth', + 'django.contrib.messages.context_processors.messages', + ], + }, + }, +] + +WSGI_APPLICATION = 'rssreader_server.wsgi.application' + +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), + } +} + +AUTH_PASSWORD_VALIDATORS = [ + { + 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', + }, +] + +LANGUAGE_CODE = 'en-us' + +TIME_ZONE = 'UTC' + +USE_I18N = True + +USE_L10N = True + +USE_TZ = True + +STATIC_URL = '/static/' diff --git a/rssreader_server/rssreader_server/urls.py b/rssreader_server/rssreader_server/urls.py new file mode 100644 index 0000000..38d08ab --- /dev/null +++ b/rssreader_server/rssreader_server/urls.py @@ -0,0 +1,7 @@ +from django.contrib import admin +from django.urls import path, include + +urlpatterns = [ + path('admin/', admin.site.urls), + path('api/v1/', include('api_v1.urls')) +] diff --git a/rssreader_server/rssreader_server/wsgi.py b/rssreader_server/rssreader_server/wsgi.py new file mode 100644 index 0000000..8ba396b --- /dev/null +++ b/rssreader_server/rssreader_server/wsgi.py @@ -0,0 +1,16 @@ +""" +WSGI config for rssreader_server project. + +It exposes the WSGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/2.2/howto/deployment/wsgi/ +""" + +import os + +from django.core.wsgi import get_wsgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'rssreader_server.settings') + +application = get_wsgi_application() From 764224c652daaad3a3ab5227fb2e397397baae7f Mon Sep 17 00:00:00 2001 From: PivovarSergey <pivovar-ser-leon@inbox.ru> Date: Sun, 1 Dec 2019 22:26:35 +0300 Subject: [PATCH 42/43] Added return if no articles to show --- rssreader/rss_reader.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rssreader/rss_reader.py b/rssreader/rss_reader.py index 641d266..53f8273 100644 --- a/rssreader/rss_reader.py +++ b/rssreader/rss_reader.py @@ -71,6 +71,10 @@ def __call__(self, source, limit, date, **kwargs): logging.info("Date is correct. Start loading by date") articles = StorageController().load(source, date, limit) + if len(articles['articles']) < 1: + print(f"No news articles for output") + exit(0) + logging.info("All articles was successfully loaded") OutputController.print(articles, **kwargs) From 8a3ae96ca94ef750e6da0615e6467d80a2b40ac2 Mon Sep 17 00:00:00 2001 From: PivovarSergey <pivovar-ser-leon@inbox.ru> Date: Sun, 1 Dec 2019 22:40:59 +0300 Subject: [PATCH 43/43] Disable DEBUG mode in Django server and remove django admin --- rssreader_server/rssreader_server/settings.py | 3 +-- rssreader_server/rssreader_server/urls.py | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/rssreader_server/rssreader_server/settings.py b/rssreader_server/rssreader_server/settings.py index 2d065dd..20e879c 100644 --- a/rssreader_server/rssreader_server/settings.py +++ b/rssreader_server/rssreader_server/settings.py @@ -4,12 +4,11 @@ SECRET_KEY = 'ny#t%v)^98(0afy8#+$dbrp^strv6#+#_nif)w8o2%dbffx5ft' -DEBUG = True +DEBUG = False ALLOWED_HOSTS = [] INSTALLED_APPS = [ - 'django.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', diff --git a/rssreader_server/rssreader_server/urls.py b/rssreader_server/rssreader_server/urls.py index 38d08ab..21f33c3 100644 --- a/rssreader_server/rssreader_server/urls.py +++ b/rssreader_server/rssreader_server/urls.py @@ -1,7 +1,5 @@ -from django.contrib import admin from django.urls import path, include urlpatterns = [ - path('admin/', admin.site.urls), path('api/v1/', include('api_v1.urls')) ]