From 437cfb5eb5b170dffc07f0c8b4f0e9c8dc6619d5 Mon Sep 17 00:00:00 2001 From: antfred <42848457+antfred@users.noreply.github.com> Date: Thu, 30 Aug 2018 22:53:35 +0200 Subject: [PATCH 01/17] Added option to override file names * Added option to override file names Some podcasts provide always the same filename, or meaningless file names. I added an option --rename-files to the episodes download function so that filenames are generated per episode using the schema: %Y-%m-%d_.<ext> In addition, the title gets stripped of characters which would be illegal in some filesystems to prevent issues --- podfox/__init__.py | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/podfox/__init__.py b/podfox/__init__.py index 9452bbe..cbba3ff 100755 --- a/podfox/__init__.py +++ b/podfox/__init__.py @@ -7,7 +7,7 @@ podfox.py update [<shortname>] [-c=<path>] podfox.py feeds [-c=<path>] podfox.py episodes <shortname> [-c=<path>] - podfox.py download [<shortname> --how-many=<n>] [-c=<path>] + podfox.py download [<shortname> --how-many=<n>] [--rename-files] [-c=<path>] podfox.py rename <shortname> <newname> [-c=<path>] Options: @@ -20,6 +20,7 @@ from colorama import Fore, Back, Style from docopt import docopt from os.path import expanduser +from urllib.parse import urlparse from sys import exit import colorama import feedparser @@ -38,7 +39,7 @@ # how-to-parse-a-rfc-2822-date-time-into-a-python-datetime from email.utils import parsedate -from time import mktime +from time import mktime, localtime, strftime CONFIGURATION = {} @@ -189,26 +190,37 @@ def episodes_from_feed(d): return episodes -def download_multiple(feed, maxnum): +def download_multiple(feed, maxnum, rename): for episode in feed['episodes']: if maxnum == 0: break if not episode['downloaded']: - download_single(feed['shortname'], episode['url']) + if rename: + title = episode['title'] + for c in '<>\"|*%?\\/': + title = title.replace(c, "") + title = title.replace(" ", "_").replace("’", "'").replace("—", "-").replace(":", ".") + extension = os.path.splitext(urlparse(episode['url'])[2])[1] + filename = "{}_{}{}".format(strftime('%Y-%m-%d', localtime(episode['published'])), + title, extension) + download_single(feed['shortname'], episode['url'], filename) + else: + download_single(feed['shortname'], episode['url']) episode['downloaded'] = True maxnum -= 1 overwrite_config(feed) -def download_single(folder, url): +def download_single(folder, url, filename=""): print(url) base = CONFIGURATION['podcast-directory'] r = requests.get(url.strip(), stream=True) - try: - filename=re.findall('filename="([^"]+)',r.headers['content-disposition'])[0] - except: - filename = url.split('/')[-1] - filename = filename.split('?')[0] + if not filename: + try: + filename=re.findall('filename="([^"]+)',r.headers['content-disposition'])[0] + except: + filename = url.split('/')[-1] + filename = filename.split('?')[0] print_green("{:s} downloading".format(filename)) with open(os.path.join(base, folder, filename), 'wb') as f: for chunk in r.iter_content(chunk_size=1024**2): @@ -335,11 +347,12 @@ def main(): maxnum = int(arguments['--how-many']) else: maxnum = CONFIGURATION['maxnum'] + rename_files = bool(arguments['--rename-files']) #download episodes for a specific feed if arguments['<shortname>']: feed = find_feed(arguments['<shortname>']) if feed: - download_multiple(feed, maxnum) + download_multiple(feed, maxnum, rename_files) exit(0) else: print_err("feed {} not found".format(arguments['<shortname>'])) @@ -347,7 +360,7 @@ def main(): #download episodes for all feeds. else: for feed in available_feeds(): - download_multiple(feed, maxnum) + download_multiple(feed, maxnum, rename_files) exit(0) if arguments['rename']: rename(arguments['<shortname>'], arguments['<newname>']) From 0dc074e7ced7e3482ddbc397837ba361c21c83f4 Mon Sep 17 00:00:00 2001 From: antfred <42848457+antfred@users.noreply.github.com> Date: Thu, 30 Aug 2018 22:53:35 +0200 Subject: [PATCH 02/17] Added option to override file names * Added option to override file names Some podcasts provide always the same filename, or meaningless file names. I added an option --rename-files to the episodes download function so that filenames are generated per episode using the schema: %Y-%m-%d_<title>.<ext> In addition, the title gets stripped of characters which would be illegal in some filesystems to prevent issues --- podfox/__init__.py | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/podfox/__init__.py b/podfox/__init__.py index 9452bbe..cbba3ff 100755 --- a/podfox/__init__.py +++ b/podfox/__init__.py @@ -7,7 +7,7 @@ podfox.py update [<shortname>] [-c=<path>] podfox.py feeds [-c=<path>] podfox.py episodes <shortname> [-c=<path>] - podfox.py download [<shortname> --how-many=<n>] [-c=<path>] + podfox.py download [<shortname> --how-many=<n>] [--rename-files] [-c=<path>] podfox.py rename <shortname> <newname> [-c=<path>] Options: @@ -20,6 +20,7 @@ from colorama import Fore, Back, Style from docopt import docopt from os.path import expanduser +from urllib.parse import urlparse from sys import exit import colorama import feedparser @@ -38,7 +39,7 @@ # how-to-parse-a-rfc-2822-date-time-into-a-python-datetime from email.utils import parsedate -from time import mktime +from time import mktime, localtime, strftime CONFIGURATION = {} @@ -189,26 +190,37 @@ def episodes_from_feed(d): return episodes -def download_multiple(feed, maxnum): +def download_multiple(feed, maxnum, rename): for episode in feed['episodes']: if maxnum == 0: break if not episode['downloaded']: - download_single(feed['shortname'], episode['url']) + if rename: + title = episode['title'] + for c in '<>\"|*%?\\/': + title = title.replace(c, "") + title = title.replace(" ", "_").replace("’", "'").replace("—", "-").replace(":", ".") + extension = os.path.splitext(urlparse(episode['url'])[2])[1] + filename = "{}_{}{}".format(strftime('%Y-%m-%d', localtime(episode['published'])), + title, extension) + download_single(feed['shortname'], episode['url'], filename) + else: + download_single(feed['shortname'], episode['url']) episode['downloaded'] = True maxnum -= 1 overwrite_config(feed) -def download_single(folder, url): +def download_single(folder, url, filename=""): print(url) base = CONFIGURATION['podcast-directory'] r = requests.get(url.strip(), stream=True) - try: - filename=re.findall('filename="([^"]+)',r.headers['content-disposition'])[0] - except: - filename = url.split('/')[-1] - filename = filename.split('?')[0] + if not filename: + try: + filename=re.findall('filename="([^"]+)',r.headers['content-disposition'])[0] + except: + filename = url.split('/')[-1] + filename = filename.split('?')[0] print_green("{:s} downloading".format(filename)) with open(os.path.join(base, folder, filename), 'wb') as f: for chunk in r.iter_content(chunk_size=1024**2): @@ -335,11 +347,12 @@ def main(): maxnum = int(arguments['--how-many']) else: maxnum = CONFIGURATION['maxnum'] + rename_files = bool(arguments['--rename-files']) #download episodes for a specific feed if arguments['<shortname>']: feed = find_feed(arguments['<shortname>']) if feed: - download_multiple(feed, maxnum) + download_multiple(feed, maxnum, rename_files) exit(0) else: print_err("feed {} not found".format(arguments['<shortname>'])) @@ -347,7 +360,7 @@ def main(): #download episodes for all feeds. else: for feed in available_feeds(): - download_multiple(feed, maxnum) + download_multiple(feed, maxnum, rename_files) exit(0) if arguments['rename']: rename(arguments['<shortname>'], arguments['<newname>']) From d848909bc57e58a75a2d1b19d45d864776a75213 Mon Sep 17 00:00:00 2001 From: Antonio Frediani <antonio.frediani@gmail.com> Date: Sat, 8 Sep 2018 18:38:39 +0200 Subject: [PATCH 03/17] Added multithreading to the download --- podfox/__init__.py | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/podfox/__init__.py b/podfox/__init__.py index cbba3ff..7cf913c 100755 --- a/podfox/__init__.py +++ b/podfox/__init__.py @@ -30,6 +30,8 @@ import requests import sys import re +import concurrent.futures +import threading # RSS datetimes follow RFC 2822, same as email headers. # this is the chain of stackoverflow posts that led me to believe this is true. @@ -191,10 +193,12 @@ def episodes_from_feed(d): def download_multiple(feed, maxnum, rename): - for episode in feed['episodes']: - if maxnum == 0: - break - if not episode['downloaded']: + with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: + # parse up to maxnum of the not downloaded episodes + future_to_episodes = {} + for episode in list(filter(lambda ep: not ep['downloaded'], feed['episodes']))[:maxnum]: + filename = "" + if rename: title = episode['title'] for c in '<>\"|*%?\\/': @@ -203,16 +207,21 @@ def download_multiple(feed, maxnum, rename): extension = os.path.splitext(urlparse(episode['url'])[2])[1] filename = "{}_{}{}".format(strftime('%Y-%m-%d', localtime(episode['published'])), title, extension) - download_single(feed['shortname'], episode['url'], filename) - else: - download_single(feed['shortname'], episode['url']) - episode['downloaded'] = True - maxnum -= 1 + + + future_to_episodes[executor.submit(download_single, feed['shortname'], episode['url'], filename)]=episode + + for future in concurrent.futures.as_completed(future_to_episodes): + episode = future_to_episodes[future] + try: + episode['downloaded'] = future.result() + except Exception as exc: + print('%r generated an exception: %s' % (episode['title'], exc)) overwrite_config(feed) def download_single(folder, url, filename=""): - print(url) + print("{}: Parsing URL {}".format(threading.current_thread().name, url)) base = CONFIGURATION['podcast-directory'] r = requests.get(url.strip(), stream=True) if not filename: @@ -221,11 +230,17 @@ def download_single(folder, url, filename=""): except: filename = url.split('/')[-1] filename = filename.split('?')[0] - print_green("{:s} downloading".format(filename)) - with open(os.path.join(base, folder, filename), 'wb') as f: - for chunk in r.iter_content(chunk_size=1024**2): - f.write(chunk) - print("done.") + print_green("{}: {:s} downloading".format(threading.current_thread().name, filename)) + + try: + with open(os.path.join(base, folder, filename), 'wb') as f: + for chunk in r.iter_content(chunk_size=1024**2): + f.write(chunk) + except EnvironmentError: + print_err("{}: Error while writing {}".format(threading.current_thread().name, filename)) + return False + print("{}: done.".format(threading.current_thread().name)) + return True def available_feeds(): From d3f9add8e02116a12ba09c28256f56e23b3c49a7 Mon Sep 17 00:00:00 2001 From: Antonio Frediani <antonio.frediani@gmail.com> Date: Sat, 8 Sep 2018 18:38:39 +0200 Subject: [PATCH 04/17] Added multithreading to the download --- podfox/__init__.py | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/podfox/__init__.py b/podfox/__init__.py index cbba3ff..7cf913c 100755 --- a/podfox/__init__.py +++ b/podfox/__init__.py @@ -30,6 +30,8 @@ import requests import sys import re +import concurrent.futures +import threading # RSS datetimes follow RFC 2822, same as email headers. # this is the chain of stackoverflow posts that led me to believe this is true. @@ -191,10 +193,12 @@ def episodes_from_feed(d): def download_multiple(feed, maxnum, rename): - for episode in feed['episodes']: - if maxnum == 0: - break - if not episode['downloaded']: + with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: + # parse up to maxnum of the not downloaded episodes + future_to_episodes = {} + for episode in list(filter(lambda ep: not ep['downloaded'], feed['episodes']))[:maxnum]: + filename = "" + if rename: title = episode['title'] for c in '<>\"|*%?\\/': @@ -203,16 +207,21 @@ def download_multiple(feed, maxnum, rename): extension = os.path.splitext(urlparse(episode['url'])[2])[1] filename = "{}_{}{}".format(strftime('%Y-%m-%d', localtime(episode['published'])), title, extension) - download_single(feed['shortname'], episode['url'], filename) - else: - download_single(feed['shortname'], episode['url']) - episode['downloaded'] = True - maxnum -= 1 + + + future_to_episodes[executor.submit(download_single, feed['shortname'], episode['url'], filename)]=episode + + for future in concurrent.futures.as_completed(future_to_episodes): + episode = future_to_episodes[future] + try: + episode['downloaded'] = future.result() + except Exception as exc: + print('%r generated an exception: %s' % (episode['title'], exc)) overwrite_config(feed) def download_single(folder, url, filename=""): - print(url) + print("{}: Parsing URL {}".format(threading.current_thread().name, url)) base = CONFIGURATION['podcast-directory'] r = requests.get(url.strip(), stream=True) if not filename: @@ -221,11 +230,17 @@ def download_single(folder, url, filename=""): except: filename = url.split('/')[-1] filename = filename.split('?')[0] - print_green("{:s} downloading".format(filename)) - with open(os.path.join(base, folder, filename), 'wb') as f: - for chunk in r.iter_content(chunk_size=1024**2): - f.write(chunk) - print("done.") + print_green("{}: {:s} downloading".format(threading.current_thread().name, filename)) + + try: + with open(os.path.join(base, folder, filename), 'wb') as f: + for chunk in r.iter_content(chunk_size=1024**2): + f.write(chunk) + except EnvironmentError: + print_err("{}: Error while writing {}".format(threading.current_thread().name, filename)) + return False + print("{}: done.".format(threading.current_thread().name)) + return True def available_feeds(): From 2c12ec037a299a08a4b91a8bf399dcffebd9ccf6 Mon Sep 17 00:00:00 2001 From: n0trax <github@n0trax.io> Date: Sun, 31 Mar 2019 17:04:07 +0200 Subject: [PATCH 05/17] Add default configuration --- podfox/__init__.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/podfox/__init__.py b/podfox/__init__.py index 9452bbe..00d8ec5 100755 --- a/podfox/__init__.py +++ b/podfox/__init__.py @@ -40,6 +40,16 @@ from email.utils import parsedate from time import mktime +CONFIGURATION_DEFAULTS = { + "podcast-directory": "~/Podcasts", + "maxnum": 5000, + "mimetypes": [ "audio/aac", + "audio/ogg", + "audio/mpeg", + "audio/mp3", + "audio/mp4", + "video/mp4" ] +} CONFIGURATION = {} mimetypes = [ @@ -165,6 +175,8 @@ def overwrite_config(feed): def episodes_from_feed(d): + mimetypes = CONFIGURATION['mimetypes'] + episodes = [] for entry in d.entries: # convert publishing time to unix time, so that we can sort @@ -290,12 +302,20 @@ def main(): configfile = expanduser(arguments["--config"]) - with open(configfile) as conf_file: - try: - CONFIGURATION = json.load(conf_file) - except ValueError: - print("invalid json in configuration file.") - exit(-1) + try: + with open(configfile) as conf_file: + try: + userconf = json.load(conf_file) + except ValueError: + print("invalid json in configuration file.") + exit(-1) + except FileNotFoundError: + userconf = {} + + CONFIGURATION = CONFIGURATION_DEFAULTS.copy() + CONFIGURATION.update(userconf) + CONFIGURATION['podcast-directory'] = os.path.expanduser(CONFIGURATION['podcast-directory']) + #handle the commands if arguments['import']: if arguments['<shortname>'] is None: From 01449900490e27efe2eaf6a42771cf5025c82955 Mon Sep 17 00:00:00 2001 From: Tim Broder <timothy.broder@gmail.com> Date: Fri, 19 Apr 2019 09:09:05 -0400 Subject: [PATCH 06/17] Add support for audio/x-m4a --- podfox/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/podfox/__init__.py b/podfox/__init__.py index 9452bbe..afef322 100755 --- a/podfox/__init__.py +++ b/podfox/__init__.py @@ -45,7 +45,8 @@ mimetypes = [ 'audio/ogg', 'audio/mpeg', - 'video/mp4' + 'video/mp4', + 'audio/x-m4a' ] def print_err(err): From 3196062ff3bb836ed1eb6ea9997aeece46ee2f1a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 2 Oct 2019 22:21:20 +0000 Subject: [PATCH 07/17] Bump requests from 2.11.1 to 2.20.0 Bumps [requests](https://github.com/requests/requests) from 2.11.1 to 2.20.0. - [Release notes](https://github.com/requests/requests/releases) - [Changelog](https://github.com/psf/requests/blob/master/HISTORY.md) - [Commits](https://github.com/requests/requests/compare/v2.11.1...v2.20.0) Signed-off-by: dependabot[bot] <support@github.com> --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 392be50..257aef5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ colorama==0.3.7 docopt==0.6.2 feedparser==5.2.1 -requests==2.11.1 +requests==2.20.0 diff --git a/setup.py b/setup.py index c5f1d5a..1b51b6d 100644 --- a/setup.py +++ b/setup.py @@ -21,6 +21,6 @@ 'colorama==0.3.7', 'docopt==0.6.2', 'feedparser==5.2.1', - 'requests==2.11.1', + 'requests==2.20.0', ], ) From e7599d8fc0b851778f9aa9d1ba5b2fd9455bbcbc Mon Sep 17 00:00:00 2001 From: Hermann Wiesner <hermann.wiesner@posteo.de> Date: Sun, 27 Sep 2020 19:41:20 +0200 Subject: [PATCH 08/17] Added mimetype "audio/x-mpeg" and check for troubling or missing publishing date. Example: https://www.rbb-online.de/rbbkultur/podcasts/hannah-arendt-endlich-verstehen-podcast.xml/feed=podcast.xml --- podfox/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/podfox/__init__.py b/podfox/__init__.py index cfe27a5..14393a5 100755 --- a/podfox/__init__.py +++ b/podfox/__init__.py @@ -46,6 +46,7 @@ "mimetypes": [ "audio/aac", "audio/ogg", "audio/mpeg", + "audio/x-mpeg", "audio/mp3", "audio/mp4", "video/mp4" ] @@ -55,6 +56,7 @@ mimetypes = [ 'audio/ogg', 'audio/mpeg', + 'audio/x-mpeg', 'video/mp4', 'audio/x-m4a' ] @@ -182,7 +184,10 @@ def episodes_from_feed(d): for entry in d.entries: # convert publishing time to unix time, so that we can sort # this should be unix time, barring any timezone shenanigans - date = mktime(parsedate(entry.published)) + try: + date = mktime(parsedate(entry.published)) + except TypeError: + continue if hasattr(entry, 'links'): for link in entry.links: if not hasattr(link, 'type'): From 5b072701b5a377a45924b1ffec434513f1570855 Mon Sep 17 00:00:00 2001 From: Isak Rubin <dot5productions@gmail.com> Date: Thu, 17 Dec 2020 07:48:54 +0000 Subject: [PATCH 09/17] Allow usage of later versions of feedparser --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 257aef5..a4c7509 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ colorama==0.3.7 docopt==0.6.2 -feedparser==5.2.1 +feedparser>=5.2.1 requests==2.20.0 From 23c354d48fe4af92923a5dc33521abc628385e64 Mon Sep 17 00:00:00 2001 From: Antonio Frediani <antonio.frediani@gmail.com> Date: Sun, 17 Jan 2021 12:02:36 +0100 Subject: [PATCH 10/17] Added a progress bar while downloading episodes --- podfox/__init__.py | 15 ++++++++++++--- requirements.txt | 1 + 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/podfox/__init__.py b/podfox/__init__.py index 89f6759..2cc5370 100755 --- a/podfox/__init__.py +++ b/podfox/__init__.py @@ -22,6 +22,7 @@ from os.path import expanduser from urllib.parse import urlparse from sys import exit +from tqdm import tqdm import colorama import feedparser import json @@ -32,6 +33,8 @@ import re import concurrent.futures import threading +import logging +logging.basicConfig(level=logging.WARNING) # RSS datetimes follow RFC 2822, same as email headers. # this is the chain of stackoverflow posts that led me to believe this is true. @@ -234,7 +237,7 @@ def download_multiple(feed, maxnum, rename): def download_single(folder, url, filename=""): - print("{}: Parsing URL {}".format(threading.current_thread().name, url)) + logging.info("{}: Parsing URL {}".format(threading.current_thread().name, url)) base = CONFIGURATION['podcast-directory'] r = requests.get(url.strip(), stream=True) if not filename: @@ -243,16 +246,19 @@ def download_single(folder, url, filename=""): except: filename = url.split('/')[-1] filename = filename.split('?')[0] - print_green("{}: {:s} downloading".format(threading.current_thread().name, filename)) + logging.info("{}: {:s} downloading".format(threading.current_thread().name, filename)) try: with open(os.path.join(base, folder, filename), 'wb') as f: + pbar = tqdm(total=int(r.headers['Content-Length'])) + pbar.set_description(filename if len(filename)<20 else filename[:20]) for chunk in r.iter_content(chunk_size=1024**2): f.write(chunk) + pbar.update(len(chunk)) except EnvironmentError: print_err("{}: Error while writing {}".format(threading.current_thread().name, filename)) return False - print("{}: done.".format(threading.current_thread().name)) + logging.info("{}: done.".format(threading.current_thread().name)) return True @@ -344,6 +350,9 @@ def main(): CONFIGURATION.update(userconf) CONFIGURATION['podcast-directory'] = os.path.expanduser(CONFIGURATION['podcast-directory']) + # Check if we should use the progress bar + + #handle the commands if arguments['import']: if arguments['<shortname>'] is None: diff --git a/requirements.txt b/requirements.txt index 257aef5..71f594f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ colorama==0.3.7 docopt==0.6.2 feedparser==5.2.1 requests==2.20.0 +tqdm=4.48.2 \ No newline at end of file From 6574d0f1a18d210b02a56975102746f3de6c55a4 Mon Sep 17 00:00:00 2001 From: Antonio Frediani <antonio.frediani@gmail.com> Date: Sun, 17 Jan 2021 12:02:36 +0100 Subject: [PATCH 11/17] Added a progress bar while downloading episodes --- podfox/__init__.py | 15 ++++++++++++--- requirements.txt | 1 + 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/podfox/__init__.py b/podfox/__init__.py index 89f6759..2cc5370 100755 --- a/podfox/__init__.py +++ b/podfox/__init__.py @@ -22,6 +22,7 @@ from os.path import expanduser from urllib.parse import urlparse from sys import exit +from tqdm import tqdm import colorama import feedparser import json @@ -32,6 +33,8 @@ import re import concurrent.futures import threading +import logging +logging.basicConfig(level=logging.WARNING) # RSS datetimes follow RFC 2822, same as email headers. # this is the chain of stackoverflow posts that led me to believe this is true. @@ -234,7 +237,7 @@ def download_multiple(feed, maxnum, rename): def download_single(folder, url, filename=""): - print("{}: Parsing URL {}".format(threading.current_thread().name, url)) + logging.info("{}: Parsing URL {}".format(threading.current_thread().name, url)) base = CONFIGURATION['podcast-directory'] r = requests.get(url.strip(), stream=True) if not filename: @@ -243,16 +246,19 @@ def download_single(folder, url, filename=""): except: filename = url.split('/')[-1] filename = filename.split('?')[0] - print_green("{}: {:s} downloading".format(threading.current_thread().name, filename)) + logging.info("{}: {:s} downloading".format(threading.current_thread().name, filename)) try: with open(os.path.join(base, folder, filename), 'wb') as f: + pbar = tqdm(total=int(r.headers['Content-Length'])) + pbar.set_description(filename if len(filename)<20 else filename[:20]) for chunk in r.iter_content(chunk_size=1024**2): f.write(chunk) + pbar.update(len(chunk)) except EnvironmentError: print_err("{}: Error while writing {}".format(threading.current_thread().name, filename)) return False - print("{}: done.".format(threading.current_thread().name)) + logging.info("{}: done.".format(threading.current_thread().name)) return True @@ -344,6 +350,9 @@ def main(): CONFIGURATION.update(userconf) CONFIGURATION['podcast-directory'] = os.path.expanduser(CONFIGURATION['podcast-directory']) + # Check if we should use the progress bar + + #handle the commands if arguments['import']: if arguments['<shortname>'] is None: diff --git a/requirements.txt b/requirements.txt index 257aef5..71f594f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ colorama==0.3.7 docopt==0.6.2 feedparser==5.2.1 requests==2.20.0 +tqdm=4.48.2 \ No newline at end of file From f681340b917376ebc6c6b830dcb5eb42aa5d1394 Mon Sep 17 00:00:00 2001 From: Antonio Frediani <antonio.frediani@gmail.com> Date: Sun, 17 Jan 2021 12:27:12 +0100 Subject: [PATCH 12/17] Progress bar now autoscales --- podfox/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/podfox/__init__.py b/podfox/__init__.py index 2cc5370..5796eec 100755 --- a/podfox/__init__.py +++ b/podfox/__init__.py @@ -250,7 +250,7 @@ def download_single(folder, url, filename=""): try: with open(os.path.join(base, folder, filename), 'wb') as f: - pbar = tqdm(total=int(r.headers['Content-Length'])) + pbar = tqdm(total=int(r.headers['Content-Length']), unit='B', unit_scale=True, unit_divisor=1024) pbar.set_description(filename if len(filename)<20 else filename[:20]) for chunk in r.iter_content(chunk_size=1024**2): f.write(chunk) From e728c082f6344aa5a9ec8445ba49d8e832f06dd4 Mon Sep 17 00:00:00 2001 From: Antonio Frediani <antonio.frediani@gmail.com> Date: Sun, 17 Jan 2021 12:27:12 +0100 Subject: [PATCH 13/17] Progress bar now autoscales --- podfox/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/podfox/__init__.py b/podfox/__init__.py index 2cc5370..5796eec 100755 --- a/podfox/__init__.py +++ b/podfox/__init__.py @@ -250,7 +250,7 @@ def download_single(folder, url, filename=""): try: with open(os.path.join(base, folder, filename), 'wb') as f: - pbar = tqdm(total=int(r.headers['Content-Length'])) + pbar = tqdm(total=int(r.headers['Content-Length']), unit='B', unit_scale=True, unit_divisor=1024) pbar.set_description(filename if len(filename)<20 else filename[:20]) for chunk in r.iter_content(chunk_size=1024**2): f.write(chunk) From caac8b20959f0e0138471fea74df02fcc8afed98 Mon Sep 17 00:00:00 2001 From: Fred Thomsen <me@fredthomsen.net> Date: Sun, 16 May 2021 10:00:42 -0400 Subject: [PATCH 14/17] Add prune command Add ability to prune old episodes via prune command. Max age in days can be set via configuration or passed in via command line. This requires tracking of the filename used to store the podcast episode, so this has been added to the feed json. Pulling the filename out of the url can be used as a fallback; however, if the name has been pulled out of the HTTP headers, then this feature will not be backwards compatible for episodes named in this way and thus those will not be pruned. --- README.md | 6 +++++ podfox/__init__.py | 64 +++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 64 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index d8bdab0..ea38c31 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,7 @@ In podfox, every podcast is identified with its own `shortname`, which is restri podfox.py feeds podfox.py episodes <shortname> podfox.py download [<shortname> --how-many=<n>] + podfox.py prune [<shortname> --max-age-days=<n>] ``` ### Import @@ -107,3 +108,8 @@ Extortion Startups | TechSNAP 229 | Not Downloaded `podfox download ts --how-many=3` will download the 3 newest techsnap podcasts that have not yet been downloaded. (Skipping newer, but already downloaded ones). If the `--how-many` parameter is omitted, the `maxnum` parameter from the configuration file is used instead. + +### Pruning + +`podfox prune` will clean up episodes that are too old. If a max age is not set via parameter to command or in the configuration, then no pruning is done. + diff --git a/podfox/__init__.py b/podfox/__init__.py index 14393a5..12120b3 100755 --- a/podfox/__init__.py +++ b/podfox/__init__.py @@ -9,6 +9,7 @@ podfox.py episodes <shortname> [-c=<path>] podfox.py download [<shortname> --how-many=<n>] [-c=<path>] podfox.py rename <shortname> <newname> [-c=<path>] + podfox.py prune [<shortname> --maxage-days=<n>] Options: -c --config=<path> Specify an alternate config file [default: ~/.podfox.json] @@ -22,6 +23,7 @@ from os.path import expanduser from sys import exit import colorama +import datetime import feedparser import json import os @@ -79,6 +81,16 @@ def get_feed_file(shortname): return os.path.join(get_folder(shortname), 'feed.json') +def get_filename_from_url(url): + return url.split('/')[-1].split('?')[0] + + +def episode_too_old(episode, maxage): + now = datetime.datetime.utcnow() + dt_published = datetime.datetime.fromtimestamp(episode["published"]) + return maxage and (now - dt_published > datetime.timedelta(days=maxage)) + + def sort_feed(feed): feed['episodes'] = sorted(feed['episodes'], key=lambda k: k['published'], reverse=True) @@ -211,28 +223,27 @@ def download_multiple(feed, maxnum): for episode in feed['episodes']: if maxnum == 0: break - if not episode['downloaded']: - download_single(feed['shortname'], episode['url']) + if not episode['downloaded'] and not episode_too_old(episode, CONFIGURATION['maxage-days']): + episode['filename'] = download_single(feed['shortname'], episode['url']) episode['downloaded'] = True maxnum -= 1 overwrite_config(feed) - def download_single(folder, url): print(url) base = CONFIGURATION['podcast-directory'] r = requests.get(url.strip(), stream=True) try: - filename=re.findall('filename="([^"]+)',r.headers['content-disposition'])[0] + filename = re.findall('filename="([^"]+)', r.headers['content-disposition'])[0] except: - filename = url.split('/')[-1] - filename = filename.split('?')[0] + filename = get_filename_from_url(url) print_green("{:s} downloading".format(filename)) with open(os.path.join(base, folder, filename), 'wb') as f: for chunk in r.iter_content(chunk_size=1024**2): f.write(chunk) print("done.") + return filename def available_feeds(): ''' @@ -277,6 +288,27 @@ def rename(shortname, newname): feed['shortname'] = newname overwrite_config(feed) +def prune(feed, maxage=0): + shortname = feed['shortname'] + episodes = feed['episodes'] + + print(shortname) + for i, episode in enumerate(episodes): + if episode['downloaded'] and episode_too_old(episode, maxage): + episode_path = os.path.join( + get_folder(shortname), + episode.get("filename", get_filename_from_url(episode['url'])) + ) + try: + os.remove(episode_path) + except OSError: + print("Unable to remove file (%s) for episode: %s" % (episode_path, episode["title"],)) + else: + episodes[i]["downloaded"] = False + print("done.") + + overwrite_config(feed) + def pretty_print_feeds(feeds): format_str = Fore.GREEN + '{0:45.45} |' format_str += Fore.BLUE + ' {1:40}' + Fore.RESET + Back.RESET @@ -377,3 +409,23 @@ def main(): exit(0) if arguments['rename']: rename(arguments['<shortname>'], arguments['<newname>']) + + if arguments['prune']: + if arguments['--maxage-days']: + maxage = int(arguments['--maxage-days']) + else: + maxage = CONFIGURATION.get('maxage-days', 0) + + if arguments['<shortname>']: + feed = find_feed(arguments['<shortname>']) + if feed: + print_green('pruning {}'.format(feed['title'])) + prune(feed, maxage) + exit(0) + else: + print_err("feed {} not found".format(arguments['<shortname>'])) + exit(-1) + else: + for feed in available_feeds(): + print_green('pruning {}'.format(feed['title'])) + prune(feed, maxage) From bf2eb08b7361154ece8854bfc3d0e53399b750d6 Mon Sep 17 00:00:00 2001 From: Fred Thomsen <me@fredthomsen.net> Date: Mon, 7 Jun 2021 19:26:52 -0400 Subject: [PATCH 15/17] Update feedparser version for py39 Python3.9 removes the `base64.encodingstring` and `base64.decodestring` after being deprecated in an earlier version. Update feedparser to a version that doesn't reference these functions so that we don't crash on python3.9. --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 257aef5..4c0d9cb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ colorama==0.3.7 docopt==0.6.2 -feedparser==5.2.1 +feedparser==6.0.2 requests==2.20.0 From 9bd6bc654ebd9f2d974ef29232d6e6ceeb81b754 Mon Sep 17 00:00:00 2001 From: Eddie <42848457+antfred@users.noreply.github.com> Date: Sat, 26 Feb 2022 14:43:15 +0100 Subject: [PATCH 16/17] Limit file name length to 120 characters --- podfox/__init__.py | 2 ++ setup.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/podfox/__init__.py b/podfox/__init__.py index 5796eec..7209602 100755 --- a/podfox/__init__.py +++ b/podfox/__init__.py @@ -220,6 +220,8 @@ def download_multiple(feed, maxnum, rename): for c in '<>\"|*%?\\/': title = title.replace(c, "") title = title.replace(" ", "_").replace("’", "'").replace("—", "-").replace(":", ".") + # Shorten the title to max 120 characters + title = title[:120] extension = os.path.splitext(urlparse(episode['url'])[2])[1] filename = "{}_{}{}".format(strftime('%Y-%m-%d', localtime(episode['published'])), title, extension) diff --git a/setup.py b/setup.py index 1b51b6d..dd2c23a 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ required = f.read().splitlines() setup(name='podfox', - version='0.1.1', + version='0.1.2', description='Podcatcher for the terminal', url='http://github.com/brtmr/podfox', author='Bastian Reitemeier', From 5f6e9a9de4bfb484a6d3cccbd35d69ec09bfb58f Mon Sep 17 00:00:00 2001 From: Eddie <42848457+antfred@users.noreply.github.com> Date: Sat, 26 Feb 2022 14:43:15 +0100 Subject: [PATCH 17/17] Limit file name length to 120 characters --- podfox/__init__.py | 2 ++ setup.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/podfox/__init__.py b/podfox/__init__.py index 5796eec..7209602 100755 --- a/podfox/__init__.py +++ b/podfox/__init__.py @@ -220,6 +220,8 @@ def download_multiple(feed, maxnum, rename): for c in '<>\"|*%?\\/': title = title.replace(c, "") title = title.replace(" ", "_").replace("’", "'").replace("—", "-").replace(":", ".") + # Shorten the title to max 120 characters + title = title[:120] extension = os.path.splitext(urlparse(episode['url'])[2])[1] filename = "{}_{}{}".format(strftime('%Y-%m-%d', localtime(episode['published'])), title, extension) diff --git a/setup.py b/setup.py index 1b51b6d..dd2c23a 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ required = f.read().splitlines() setup(name='podfox', - version='0.1.1', + version='0.1.2', description='Podcatcher for the terminal', url='http://github.com/brtmr/podfox', author='Bastian Reitemeier',