Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2019 Elia Onishchouk

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include requirements.txt
31 changes: 31 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Introduction to Python. Hometask

RSS reader is a command-line utility which receives [RSS](wikipedia.org/wiki/RSS) URL and prints results in human-readable format.


Utility provides the following interface:
```shell
usage: rss_reader.py [-h] [--version] [--json] [--verbose] [--limit LIMIT]
source

Pure Python command-line RSS reader.

positional arguments:
source RSS URL

optional arguments:
-h, --help show this help message and exit
--version Print version info
--json Print result as JSON in stdout
--verbose Outputs verbose status messages
--limit LIMIT Limit news topics if this parameter provided

```

With the argument `--json` the program converts the news into [JSON](https://en.wikipedia.org/wiki/JSON) format.

With the argument `--limit` the program prints given number of news.

With the argument `--verbose` the program prints all logs in stdout.

Withe the argument `--version` the program prints in stdout it's current version and complete it's work.
91 changes: 91 additions & 0 deletions json_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
type": "object",
"title": "News feed json schema",
"required": [
"Feed",
"Items"
],
"properties": {
"Feed": {
"type": "string",
"title": "Feed",
"description": "The title of the feed"
},
"Items": {
"type": "array",
"title": "News",
"items": {
"type": "object",
"title": "news",
"required": [
"title",
"description",
"link",
"pubDate",
"source"
],
"properties": {
"title": {
"type": "string",
"title": "Title",
"description": "The title of the news"
},
"description": {
"type": "string",
"title": "Description",
"description": "The description of the news"
},
"link": {
"type": "string",
"title": "Link",
"description": "The origin link of the news"
},
"pubDate": {
"type": "string",
"title": "Date",
"description": "The date this news was published"
},
"source": {
"type": "object",
"title": "Links inside the description",
"required": [
"images_links",
"href_links",
"video_links"
],
"properties": {
"images_links": {
"type": "array",
"title": "Images links",
"items": {
"type": "string",
"title": "Image link",
"description": "The source of the image"
}
},
"href_links": {
"type": "array",
"title": "Hyper references",
"items": {
"type": "string",
"title": "URL link",
"description": "The source of the hyper reference"
}
},
"video_links": {
"type": "array",
"title": "Video links",
"items": {
"type": "string",
"title": "Video link",
"description": "The source of the video"
}
}
}
}
}
}
}
}
}
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
feedparser
bs4
Empty file added rss_reader/__init__.py
Empty file.
171 changes: 171 additions & 0 deletions rss_reader/rss_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
import argparse
import feedparser
import logging
import html
import json
from bs4 import BeautifulSoup
# from tqdm import tqdm
Comment thread
el0ny marked this conversation as resolved.
Outdated
from rss_reader import version as vers
# import version as vers


class News_Feed:
Comment thread
el0ny marked this conversation as resolved.
Outdated
def __init__(self, feed_title, items):
self.feed_title = feed_title
self.items = items

def print_to_json(self):
logging.info('Printing news in json format')
print(json.dumps({"Feed": self.feed_title, "Items": [item.return_item() for item in self.items]}))

def print_to_console(self):
logging.info('Printing news in console format')
print('Feed: {0}'.format(self.feed_title))
for item in self.items:
item.print_to_console()

def print_feed(self, json):
if(json):
self.print_to_json()
else:
self.print_to_console()


class Item:
Comment thread
el0ny marked this conversation as resolved.
def __init__(self, title, date, link, description, links):
self.title = title
self.date = date
self.link = link
self.description = description
self.links = links

def print_to_console(self):
print('\nTitle: {0}'.format(self.title))
print('Date: {0}'.format(self.date))
print('Link: {0} \n'.format(self.link))
print(self.description)
print()

if self.links['href_links']:
print('\nLinks:')
for link in self.links['href_links']:
print(link)

if self.links['images_links']:
print('\nImages:')
for link in self.links['images_links']:
print(link)

if self.links['video_links']:
print('\nVideos:')
for link in self.links['video_links']:
print(link)
print('\n//////////////////////////////////////////////////////////////////////////')

def return_item(self):
return {"title": self.title, "description": self.description,
"link": self.link, "pubDate": self.date, "source": self.links}


def set_argparse():
parser = argparse.ArgumentParser(description='Pure Python command-line RSS reader.')
parser.add_argument('source', type=str, help='RSS URL')

parser.add_argument('--version', action='version', version='%(prog)s v'+vers.__version__, help='Print version info')
parser.add_argument('--json', action='store_true', help='Print result as JSON in stdout')
parser.add_argument('--verbose', action='store_true', help='Outputs verbose status messages')
parser.add_argument('--limit', type=int, default=-1, help='Limit news topics if this parameter provided')
return parser.parse_args()


def find_images(args, soup):
logging.info('Starting image finding')
image_iterator = 0
images_links = []
for img in soup.findAll('img'):

image_iterator += 1
if 'alt' in img.attrs and img['alt'] != '':
replaced_data = ' [image {0} | {1}] '.format(image_iterator, img['alt'])
else:
replaced_data = ' [image {0}]'.format(image_iterator)
src = img['src']
images_links.append('[{0}]: {1}'.format(image_iterator, src))
soup.find('img').replace_with(replaced_data)

logging.info('Image finding finished. Found %s images', image_iterator)
return images_links


def find_href(args, soup):
logging.info('Starting link finding')
href_iterator = 0
href_links = []
for href in soup.findAll('a'):

if 'href' in href.attrs:
href_iterator += 1
link = href['href']
if href.text != '':
replaced_data = ' [link {0} | {1}] '.format(href_iterator, href.text)
else:
replaced_data = ' [link {0}] '.format(href_iterator)
href_links.append('[{0}]: {1}'.format(href_iterator, link))
soup.find('a').replace_with(replaced_data)
logging.info('Link finding finished. Found %s links', href_iterator)
return href_links


def find_videos(args, soup):
logging.info('Starting video finding')
video_iterator = 0
video_links = []
for video in soup.findAll('iframe'):
if 'src' in video.attrs:
video_iterator += 1
link = video['src']
replaced_data = ' [video {0}] '.format(video_iterator)
video_links.append('[{0}]: {1}'.format(video_iterator, link))
soup.find('iframe').replace_with(final)
logging.info('Video finding finished. Found %s videos', video_iterator)
return video_links


def main():
try:
args = set_argparse()
if args.verbose:
logging.basicConfig(format='%(asctime)s %(funcName)s %(message)s', datefmt='%I:%M:%S', level=logging.DEBUG)

logging.info('Application started. RSS source is %s', args.source)
NewsFeed = feedparser.parse(args.source)
if NewsFeed.bozo == 1:
raise Exception('The feed is not well-formed XML')
# if 'status' not in NewsFeed:
# raise Exception('An error happened such that the feed does not contain an HTTP response')
if args.limit < 0 or args.limit > len(NewsFeed.entries):
args.limit = len(NewsFeed.entries)

news = []
logging.info('Begin processing each news')
for i in range(args.limit):
logging.info('Parsing news number %s', i+1)
entry = NewsFeed.entries[i]
soup = html.unescape(BeautifulSoup(entry['summary'], "html.parser"))
images_links = find_images(args, soup)
href_links = find_href(args, soup)
video_links = find_videos(args, soup)
links = {'images_links': images_links, 'href_links': href_links, 'video_links': video_links}
news.append(Item(html.unescape(entry['title']), entry['published'], entry['link'], html.unescape(soup.text), links))
logging.info('News number %s has parsed', i+1)

newsFeed = News_Feed(NewsFeed.feed.title, news)
newsFeed.print_feed(args.json)
logging.info('Application completed')

except Exception as e:
print(e)

if __name__ == '__main__':

main()
1 change: 1 addition & 0 deletions rss_reader/version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__="1.3"
33 changes: 33 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import setuptools
from rss_reader import version

with open("README.md", "r") as fh:
long_description = fh.read()

setuptools.setup(
name="rss-reader",
version=version.__version__,
author="Elia Onishchouk",
author_email="elias0n@mail.ru",
description="A simple command-line RSS reader",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/el0ny/PythonHomework",
packages=setuptools.find_packages(),
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
entry_points={

'console_scripts': [

'rss_reader = rss_reader.rss_reader:main',

],

},
install_requires=['feedparser', 'bs4'],
python_requires='>=3.6',
)