diff --git a/podfox/__init__.py b/podfox/__init__.py index 12120b3..9b2ab09 100755 --- a/podfox/__init__.py +++ b/podfox/__init__.py @@ -7,7 +7,7 @@ podfox.py update [] [-c=] podfox.py feeds [-c=] podfox.py episodes [-c=] - podfox.py download [ --how-many=] [-c=] + podfox.py download [ --how-many=] [--rename-files] [-c=] podfox.py rename [-c=] podfox.py prune [ --maxage-days=] @@ -21,7 +21,9 @@ from colorama import Fore, Back, Style from docopt import docopt from os.path import expanduser +from urllib.parse import urlparse from sys import exit +from tqdm import tqdm import colorama import datetime import feedparser @@ -31,6 +33,10 @@ import requests import sys import re +import concurrent.futures +import threading +import logging +logging.basicConfig(level=logging.WARNING) # RSS datetimes follow RFC 2822, same as email headers. # this is the chain of stackoverflow posts that led me to believe this is true. @@ -40,11 +46,12 @@ # how-to-parse-a-rfc-2822-date-time-into-a-python-datetime from email.utils import parsedate -from time import mktime +from time import mktime, localtime, strftime CONFIGURATION_DEFAULTS = { "podcast-directory": "~/Podcasts", "maxnum": 5000, + "maxage-days": 0, "mimetypes": [ "audio/aac", "audio/ogg", "audio/mpeg", @@ -219,30 +226,60 @@ def episodes_from_feed(d): return episodes -def download_multiple(feed, maxnum): - for episode in feed['episodes']: - if maxnum == 0: - break - if not episode['downloaded'] and not episode_too_old(episode, CONFIGURATION['maxage-days']): - episode['filename'] = download_single(feed['shortname'], episode['url']) - episode['downloaded'] = True - maxnum -= 1 +def download_multiple(feed, maxnum, rename): + with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: + # parse up to maxnum of the not downloaded episodes + future_to_episodes = {} + for episode in list(filter(lambda ep: not ep['downloaded'] and not episode_too_old(ep, CONFIGURATION['maxage-days']), feed['episodes']))[:maxnum]: + filename = "" + + if rename: + title = episode['title'] + for c in '<>\"|*%?\\/': + title = title.replace(c, "") + title = title.replace(" ", "_").replace("’", "'").replace("—", "-").replace(":", ".") + # Shorten the title to max 120 characters + title = title[:120] + extension = os.path.splitext(urlparse(episode['url'])[2])[1] + filename = "{}_{}{}".format(strftime('%Y-%m-%d', localtime(episode['published'])), + title, extension) + + + future_to_episodes[executor.submit(download_single, feed['shortname'], episode['url'], filename)]=episode + + for future in concurrent.futures.as_completed(future_to_episodes): + episode = future_to_episodes[future] + try: + filename = future.result() + episode['filename'] = filename if filename else '' + episode['downloaded'] = True if filename else False + except Exception as exc: + print('%r generated an exception: %s' % (episode['title'], exc)) overwrite_config(feed) -def download_single(folder, url): - print(url) + +def download_single(folder, url, filename=""): + logging.info("{}: Parsing URL {}".format(threading.current_thread().name, url)) base = CONFIGURATION['podcast-directory'] r = requests.get(url.strip(), stream=True) - try: - filename = re.findall('filename="([^"]+)', r.headers['content-disposition'])[0] - except: - filename = get_filename_from_url(url) - print_green("{:s} downloading".format(filename)) - with open(os.path.join(base, folder, filename), 'wb') as f: - for chunk in r.iter_content(chunk_size=1024**2): - f.write(chunk) - print("done.") + if not filename: + try: + filename=re.findall('filename="([^"]+)',r.headers['content-disposition'])[0] + except: + filename = get_filename_from_url(url) + logging.info("{}: {:s} downloading".format(threading.current_thread().name, filename)) + try: + with open(os.path.join(base, folder, filename), 'wb') as f: + pbar = tqdm(total=int(r.headers['Content-Length']), unit='B', unit_scale=True, unit_divisor=1024) + pbar.set_description(filename if len(filename)<20 else filename[:20]) + for chunk in r.iter_content(chunk_size=1024**2): + f.write(chunk) + pbar.update(len(chunk)) + except EnvironmentError: + print_err("{}: Error while writing {}".format(threading.current_thread().name, filename)) + return '' + logging.info("{}: done.".format(threading.current_thread().name)) return filename def available_feeds(): @@ -393,11 +430,12 @@ def main(): maxnum = int(arguments['--how-many']) else: maxnum = CONFIGURATION['maxnum'] + rename_files = bool(arguments['--rename-files']) #download episodes for a specific feed if arguments['']: feed = find_feed(arguments['']) if feed: - download_multiple(feed, maxnum) + download_multiple(feed, maxnum, rename_files) exit(0) else: print_err("feed {} not found".format(arguments[''])) @@ -405,7 +443,7 @@ def main(): #download episodes for all feeds. else: for feed in available_feeds(): - download_multiple(feed, maxnum) + download_multiple(feed, maxnum, rename_files) exit(0) if arguments['rename']: rename(arguments[''], arguments['']) diff --git a/requirements.txt b/requirements.txt index 4c0d9cb..f945a93 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ colorama==0.3.7 docopt==0.6.2 feedparser==6.0.2 requests==2.20.0 +tqdm=4.48.2 diff --git a/setup.py b/setup.py index 1b51b6d..dd2c23a 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ required = f.read().splitlines() setup(name='podfox', - version='0.1.1', + version='0.1.2', description='Podcatcher for the terminal', url='http://github.com/brtmr/podfox', author='Bastian Reitemeier',