introduction-to-python-bsuir-2019 · el0ny · Nov 9, 2019 · Nov 12, 2019 · Nov 14, 2019 · Nov 14, 2019
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 Elia Onishchouk
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1 @@
+include requirements.txt
diff --git a/README.md b/README.md
@@ -0,0 +1,31 @@
+# Introduction to Python. Hometask
+
+RSS reader is a command-line utility which receives [RSS](wikipedia.org/wiki/RSS) URL and prints results in human-readable format.
+
+
+Utility provides the following interface:
+```shell
+usage: rss_reader.py [-h] [--version] [--json] [--verbose] [--limit LIMIT]
+                     source
+
+Pure Python command-line RSS reader.
+
+positional arguments:
+  source         RSS URL
+
+optional arguments:
+  -h, --help     show this help message and exit
+  --version      Print version info
+  --json         Print result as JSON in stdout
+  --verbose      Outputs verbose status messages
+  --limit LIMIT  Limit news topics if this parameter provided
+
+```
+
+With the argument `--json` the program converts the news into [JSON](https://en.wikipedia.org/wiki/JSON) format.
+
+With the argument `--limit` the program prints given number of news.
+
+With the argument `--verbose` the program prints all logs in stdout.
+
+Withe the argument `--version` the program prints in stdout it's current version and complete it's work.
diff --git a/json_schema.json b/json_schema.json
@@ -0,0 +1,91 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  type": "object",
+  "title": "News feed json schema",
+  "required": [
+        "Feed",
+        "Items"
+      ],
+  "properties": {
+	"Feed": {
+	  "type": "string",
+	  "title": "Feed",
+	  "description": "The title of the feed"
+	},
+	"Items": {
+	  "type": "array",
+	  "title": "News",
+	  "items": {
+		"type": "object",
+		"title": "news",
+		"required": [
+             "title",
+              "description",
+              "link",
+              "pubDate",
+              "source"
+            ],
+		"properties": {
+		  "title": {
+			"type": "string",
+			"title": "Title",
+			"description": "The title of the news"
+		  },
+		  "description": {
+			"type": "string",
+			"title": "Description",
+			"description": "The description of the news"
+		  },
+		  "link": {
+			"type": "string",
+			"title": "Link",
+			"description": "The origin link of the news"
+		  },
+		  "pubDate": {
+			"type": "string",
+			"title": "Date",
+			"description": "The date this news was published"
+		  },
+		  "source": {
+			"type": "object",
+			"title": "Links inside the description",
+			"required": [
+                  "images_links",
+                  "href_links",
+                  "video_links"
+                ],
+			"properties": {
+			  "images_links": {
+				"type": "array",
+				"title": "Images links",
+				"items": {
+				  "type": "string",
+				  "title": "Image link",
+				  "description": "The source of the image"
+				}
+			 },
+			  "href_links": {
+				"type": "array",
+				"title": "Hyper references",
+				"items": {
+				  "type": "string",
+				  "title": "URL link",
+				  "description": "The source of the hyper reference"
+				}
+			  },
+			  "video_links": {
+				"type": "array",
+				"title": "Video links",
+				"items": {
+				  "type": "string",
+				  "title": "Video link",
+				  "description": "The source of the video"
+				}
+			  }
+			}
+		  }
+		}
+	  }
+	}
+  }
+}
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,2 @@
+feedparser
+bs4
diff --git a/rss_reader/__init__.py b/rss_reader/__init__.py
diff --git a/rss_reader/rss_reader.py b/rss_reader/rss_reader.py
@@ -0,0 +1,171 @@
+import argparse
+import feedparser
+import logging
+import html
+import json
+from bs4 import BeautifulSoup
+# from tqdm import tqdm
+from rss_reader import version as vers
+# import version as vers
+
+
+class News_Feed:
+    def __init__(self, feed_title, items):
+        self.feed_title = feed_title
+        self.items = items
+
+    def print_to_json(self):
+        logging.info('Printing news in json format')
+        print(json.dumps({"Feed": self.feed_title, "Items": [item.return_item() for item in self.items]}))
+
+    def print_to_console(self):
+        logging.info('Printing news in console format')
+        print('Feed: {0}'.format(self.feed_title))
+        for item in self.items:
+            item.print_to_console()
+
+    def print_feed(self, json):
+        if(json):
+            self.print_to_json()
+        else:
+            self.print_to_console()
+
+
+class Item:
+    def __init__(self, title, date, link, description, links):
+        self.title = title
+        self.date = date
+        self.link = link
+        self.description = description
+        self.links = links
+
+    def print_to_console(self):
+        print('\nTitle: {0}'.format(self.title))
+        print('Date: {0}'.format(self.date))
+        print('Link: {0} \n'.format(self.link))
+        print(self.description)
+        print()
+
+        if self.links['href_links']:
+            print('\nLinks:')
+            for link in self.links['href_links']:
+                print(link)
+
+        if self.links['images_links']:
+            print('\nImages:')
+            for link in self.links['images_links']:
+                print(link)
+
+        if self.links['video_links']:
+            print('\nVideos:')
+            for link in self.links['video_links']:
+                print(link)
+        print('\n//////////////////////////////////////////////////////////////////////////')
+
+    def return_item(self):
+        return {"title": self.title, "description": self.description,
+                "link": self.link, "pubDate": self.date, "source": self.links}
+
+
+def set_argparse():
+    parser = argparse.ArgumentParser(description='Pure Python command-line RSS reader.')
+    parser.add_argument('source', type=str, help='RSS URL')
+
+    parser.add_argument('--version', action='version', version='%(prog)s v'+vers.__version__, help='Print version info')
+    parser.add_argument('--json', action='store_true', help='Print result as JSON in stdout')
+    parser.add_argument('--verbose', action='store_true', help='Outputs verbose status messages')
+    parser.add_argument('--limit', type=int, default=-1, help='Limit news topics if this parameter provided')
+    return parser.parse_args()
+
+
+def find_images(args, soup):
+    logging.info('Starting image finding')
+    image_iterator = 0
+    images_links = []
+    for img in soup.findAll('img'):
+
+        image_iterator += 1
+        if 'alt' in img.attrs and img['alt'] != '':
+            replaced_data = ' [image {0} | {1}] '.format(image_iterator, img['alt'])
+        else:
+            replaced_data = ' [image {0}]'.format(image_iterator)
+        src = img['src']
+        images_links.append('[{0}]: {1}'.format(image_iterator, src))
+        soup.find('img').replace_with(replaced_data)
+
+    logging.info('Image finding finished. Found %s images', image_iterator)
+    return images_links
+
+
+def find_href(args, soup):
+    logging.info('Starting link finding')
+    href_iterator = 0
+    href_links = []
+    for href in soup.findAll('a'):
+
+        if 'href' in href.attrs:
+            href_iterator += 1
+            link = href['href']
+            if href.text != '':
+                replaced_data = ' [link {0} | {1}] '.format(href_iterator, href.text)
+            else:
+                replaced_data = ' [link {0}] '.format(href_iterator)
+            href_links.append('[{0}]: {1}'.format(href_iterator, link))
+            soup.find('a').replace_with(replaced_data)
+    logging.info('Link finding finished. Found %s links', href_iterator)
+    return href_links
+
+
+def find_videos(args, soup):
+    logging.info('Starting video finding')
+    video_iterator = 0
+    video_links = []
+    for video in soup.findAll('iframe'):
+        if 'src' in video.attrs:
+            video_iterator += 1
+            link = video['src']
+            replaced_data = ' [video {0}] '.format(video_iterator)
+            video_links.append('[{0}]: {1}'.format(video_iterator, link))
+            soup.find('iframe').replace_with(final)
+    logging.info('Video finding finished. Found %s videos', video_iterator)
+    return video_links
+
+
+def main():
+    try:
+        args = set_argparse()
+        if args.verbose:
+            logging.basicConfig(format='%(asctime)s %(funcName)s %(message)s', datefmt='%I:%M:%S', level=logging.DEBUG)
+
+        logging.info('Application started. RSS source is %s', args.source)
+        NewsFeed = feedparser.parse(args.source)
+        if NewsFeed.bozo == 1:
+            raise Exception('The feed is not well-formed XML')
+        # if 'status' not in NewsFeed:
+        #    raise Exception('An error happened such that the feed does not contain an HTTP response')
+        if args.limit < 0 or args.limit > len(NewsFeed.entries):
+            args.limit = len(NewsFeed.entries)
+
+        news = []
+        logging.info('Begin processing each news')
+        for i in range(args.limit):
+            logging.info('Parsing news number %s', i+1)
+            entry = NewsFeed.entries[i]
+            soup = html.unescape(BeautifulSoup(entry['summary'], "html.parser"))
+            images_links = find_images(args, soup)
+            href_links = find_href(args, soup)
+            video_links = find_videos(args, soup)
+            links = {'images_links': images_links, 'href_links': href_links, 'video_links': video_links}
+            news.append(Item(html.unescape(entry['title']), entry['published'], entry['link'], html.unescape(soup.text), links))
+            logging.info('News number %s has parsed', i+1)
+
+        newsFeed = News_Feed(NewsFeed.feed.title, news)
+        newsFeed.print_feed(args.json)
+        logging.info('Application completed')
+
+    except Exception as e:
+        print(e)
+
+if __name__ == '__main__':
+
+    main()
diff --git a/rss_reader/version.py b/rss_reader/version.py
@@ -0,0 +1 @@
+__version__="1.3"
diff --git a/setup.py b/setup.py
@@ -0,0 +1,33 @@
+import setuptools
+from rss_reader import version
+
+with open("README.md", "r") as fh:
+    long_description = fh.read()
+
+setuptools.setup(
+    name="rss-reader", 
+    version=version.__version__,
+    author="Elia Onishchouk",
+    author_email="elias0n@mail.ru",
+    description="A simple command-line RSS reader",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/el0ny/PythonHomework",
+    packages=setuptools.find_packages(),
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ],
+    entry_points={  
+
+        'console_scripts': [
+
+            'rss_reader = rss_reader.rss_reader:main',
+
+        ],
+
+    },
+    install_requires=['feedparser', 'bs4'],
+    python_requires='>=3.6',
+)