From 437cfb5eb5b170dffc07f0c8b4f0e9c8dc6619d5 Mon Sep 17 00:00:00 2001
From: antfred <42848457+antfred@users.noreply.github.com>
Date: Thu, 30 Aug 2018 22:53:35 +0200
Subject: [PATCH 01/17] Added option to override file names
* Added option to override file names
Some podcasts provide always the same filename, or meaningless file names. I added an option --rename-files to the episodes download function so that filenames are generated per episode using the schema:
%Y-%m-%d_
.
In addition, the title gets stripped of characters which would be illegal in some filesystems to prevent issues
---
podfox/__init__.py | 37 +++++++++++++++++++++++++------------
1 file changed, 25 insertions(+), 12 deletions(-)
diff --git a/podfox/__init__.py b/podfox/__init__.py
index 9452bbe..cbba3ff 100755
--- a/podfox/__init__.py
+++ b/podfox/__init__.py
@@ -7,7 +7,7 @@
podfox.py update [] [-c=]
podfox.py feeds [-c=]
podfox.py episodes [-c=]
- podfox.py download [ --how-many=] [-c=]
+ podfox.py download [ --how-many=] [--rename-files] [-c=]
podfox.py rename [-c=]
Options:
@@ -20,6 +20,7 @@
from colorama import Fore, Back, Style
from docopt import docopt
from os.path import expanduser
+from urllib.parse import urlparse
from sys import exit
import colorama
import feedparser
@@ -38,7 +39,7 @@
# how-to-parse-a-rfc-2822-date-time-into-a-python-datetime
from email.utils import parsedate
-from time import mktime
+from time import mktime, localtime, strftime
CONFIGURATION = {}
@@ -189,26 +190,37 @@ def episodes_from_feed(d):
return episodes
-def download_multiple(feed, maxnum):
+def download_multiple(feed, maxnum, rename):
for episode in feed['episodes']:
if maxnum == 0:
break
if not episode['downloaded']:
- download_single(feed['shortname'], episode['url'])
+ if rename:
+ title = episode['title']
+ for c in '<>\"|*%?\\/':
+ title = title.replace(c, "")
+ title = title.replace(" ", "_").replace("’", "'").replace("—", "-").replace(":", ".")
+ extension = os.path.splitext(urlparse(episode['url'])[2])[1]
+ filename = "{}_{}{}".format(strftime('%Y-%m-%d', localtime(episode['published'])),
+ title, extension)
+ download_single(feed['shortname'], episode['url'], filename)
+ else:
+ download_single(feed['shortname'], episode['url'])
episode['downloaded'] = True
maxnum -= 1
overwrite_config(feed)
-def download_single(folder, url):
+def download_single(folder, url, filename=""):
print(url)
base = CONFIGURATION['podcast-directory']
r = requests.get(url.strip(), stream=True)
- try:
- filename=re.findall('filename="([^"]+)',r.headers['content-disposition'])[0]
- except:
- filename = url.split('/')[-1]
- filename = filename.split('?')[0]
+ if not filename:
+ try:
+ filename=re.findall('filename="([^"]+)',r.headers['content-disposition'])[0]
+ except:
+ filename = url.split('/')[-1]
+ filename = filename.split('?')[0]
print_green("{:s} downloading".format(filename))
with open(os.path.join(base, folder, filename), 'wb') as f:
for chunk in r.iter_content(chunk_size=1024**2):
@@ -335,11 +347,12 @@ def main():
maxnum = int(arguments['--how-many'])
else:
maxnum = CONFIGURATION['maxnum']
+ rename_files = bool(arguments['--rename-files'])
#download episodes for a specific feed
if arguments['']:
feed = find_feed(arguments[''])
if feed:
- download_multiple(feed, maxnum)
+ download_multiple(feed, maxnum, rename_files)
exit(0)
else:
print_err("feed {} not found".format(arguments['']))
@@ -347,7 +360,7 @@ def main():
#download episodes for all feeds.
else:
for feed in available_feeds():
- download_multiple(feed, maxnum)
+ download_multiple(feed, maxnum, rename_files)
exit(0)
if arguments['rename']:
rename(arguments[''], arguments[''])
From 0dc074e7ced7e3482ddbc397837ba361c21c83f4 Mon Sep 17 00:00:00 2001
From: antfred <42848457+antfred@users.noreply.github.com>
Date: Thu, 30 Aug 2018 22:53:35 +0200
Subject: [PATCH 02/17] Added option to override file names
* Added option to override file names
Some podcasts provide always the same filename, or meaningless file names. I added an option --rename-files to the episodes download function so that filenames are generated per episode using the schema:
%Y-%m-%d_.
In addition, the title gets stripped of characters which would be illegal in some filesystems to prevent issues
---
podfox/__init__.py | 37 +++++++++++++++++++++++++------------
1 file changed, 25 insertions(+), 12 deletions(-)
diff --git a/podfox/__init__.py b/podfox/__init__.py
index 9452bbe..cbba3ff 100755
--- a/podfox/__init__.py
+++ b/podfox/__init__.py
@@ -7,7 +7,7 @@
podfox.py update [] [-c=]
podfox.py feeds [-c=]
podfox.py episodes [-c=]
- podfox.py download [ --how-many=] [-c=]
+ podfox.py download [ --how-many=] [--rename-files] [-c=]
podfox.py rename [-c=]
Options:
@@ -20,6 +20,7 @@
from colorama import Fore, Back, Style
from docopt import docopt
from os.path import expanduser
+from urllib.parse import urlparse
from sys import exit
import colorama
import feedparser
@@ -38,7 +39,7 @@
# how-to-parse-a-rfc-2822-date-time-into-a-python-datetime
from email.utils import parsedate
-from time import mktime
+from time import mktime, localtime, strftime
CONFIGURATION = {}
@@ -189,26 +190,37 @@ def episodes_from_feed(d):
return episodes
-def download_multiple(feed, maxnum):
+def download_multiple(feed, maxnum, rename):
for episode in feed['episodes']:
if maxnum == 0:
break
if not episode['downloaded']:
- download_single(feed['shortname'], episode['url'])
+ if rename:
+ title = episode['title']
+ for c in '<>\"|*%?\\/':
+ title = title.replace(c, "")
+ title = title.replace(" ", "_").replace("’", "'").replace("—", "-").replace(":", ".")
+ extension = os.path.splitext(urlparse(episode['url'])[2])[1]
+ filename = "{}_{}{}".format(strftime('%Y-%m-%d', localtime(episode['published'])),
+ title, extension)
+ download_single(feed['shortname'], episode['url'], filename)
+ else:
+ download_single(feed['shortname'], episode['url'])
episode['downloaded'] = True
maxnum -= 1
overwrite_config(feed)
-def download_single(folder, url):
+def download_single(folder, url, filename=""):
print(url)
base = CONFIGURATION['podcast-directory']
r = requests.get(url.strip(), stream=True)
- try:
- filename=re.findall('filename="([^"]+)',r.headers['content-disposition'])[0]
- except:
- filename = url.split('/')[-1]
- filename = filename.split('?')[0]
+ if not filename:
+ try:
+ filename=re.findall('filename="([^"]+)',r.headers['content-disposition'])[0]
+ except:
+ filename = url.split('/')[-1]
+ filename = filename.split('?')[0]
print_green("{:s} downloading".format(filename))
with open(os.path.join(base, folder, filename), 'wb') as f:
for chunk in r.iter_content(chunk_size=1024**2):
@@ -335,11 +347,12 @@ def main():
maxnum = int(arguments['--how-many'])
else:
maxnum = CONFIGURATION['maxnum']
+ rename_files = bool(arguments['--rename-files'])
#download episodes for a specific feed
if arguments['']:
feed = find_feed(arguments[''])
if feed:
- download_multiple(feed, maxnum)
+ download_multiple(feed, maxnum, rename_files)
exit(0)
else:
print_err("feed {} not found".format(arguments['']))
@@ -347,7 +360,7 @@ def main():
#download episodes for all feeds.
else:
for feed in available_feeds():
- download_multiple(feed, maxnum)
+ download_multiple(feed, maxnum, rename_files)
exit(0)
if arguments['rename']:
rename(arguments[''], arguments[''])
From d848909bc57e58a75a2d1b19d45d864776a75213 Mon Sep 17 00:00:00 2001
From: Antonio Frediani
Date: Sat, 8 Sep 2018 18:38:39 +0200
Subject: [PATCH 03/17] Added multithreading to the download
---
podfox/__init__.py | 45 ++++++++++++++++++++++++++++++---------------
1 file changed, 30 insertions(+), 15 deletions(-)
diff --git a/podfox/__init__.py b/podfox/__init__.py
index cbba3ff..7cf913c 100755
--- a/podfox/__init__.py
+++ b/podfox/__init__.py
@@ -30,6 +30,8 @@
import requests
import sys
import re
+import concurrent.futures
+import threading
# RSS datetimes follow RFC 2822, same as email headers.
# this is the chain of stackoverflow posts that led me to believe this is true.
@@ -191,10 +193,12 @@ def episodes_from_feed(d):
def download_multiple(feed, maxnum, rename):
- for episode in feed['episodes']:
- if maxnum == 0:
- break
- if not episode['downloaded']:
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+ # parse up to maxnum of the not downloaded episodes
+ future_to_episodes = {}
+ for episode in list(filter(lambda ep: not ep['downloaded'], feed['episodes']))[:maxnum]:
+ filename = ""
+
if rename:
title = episode['title']
for c in '<>\"|*%?\\/':
@@ -203,16 +207,21 @@ def download_multiple(feed, maxnum, rename):
extension = os.path.splitext(urlparse(episode['url'])[2])[1]
filename = "{}_{}{}".format(strftime('%Y-%m-%d', localtime(episode['published'])),
title, extension)
- download_single(feed['shortname'], episode['url'], filename)
- else:
- download_single(feed['shortname'], episode['url'])
- episode['downloaded'] = True
- maxnum -= 1
+
+
+ future_to_episodes[executor.submit(download_single, feed['shortname'], episode['url'], filename)]=episode
+
+ for future in concurrent.futures.as_completed(future_to_episodes):
+ episode = future_to_episodes[future]
+ try:
+ episode['downloaded'] = future.result()
+ except Exception as exc:
+ print('%r generated an exception: %s' % (episode['title'], exc))
overwrite_config(feed)
def download_single(folder, url, filename=""):
- print(url)
+ print("{}: Parsing URL {}".format(threading.current_thread().name, url))
base = CONFIGURATION['podcast-directory']
r = requests.get(url.strip(), stream=True)
if not filename:
@@ -221,11 +230,17 @@ def download_single(folder, url, filename=""):
except:
filename = url.split('/')[-1]
filename = filename.split('?')[0]
- print_green("{:s} downloading".format(filename))
- with open(os.path.join(base, folder, filename), 'wb') as f:
- for chunk in r.iter_content(chunk_size=1024**2):
- f.write(chunk)
- print("done.")
+ print_green("{}: {:s} downloading".format(threading.current_thread().name, filename))
+
+ try:
+ with open(os.path.join(base, folder, filename), 'wb') as f:
+ for chunk in r.iter_content(chunk_size=1024**2):
+ f.write(chunk)
+ except EnvironmentError:
+ print_err("{}: Error while writing {}".format(threading.current_thread().name, filename))
+ return False
+ print("{}: done.".format(threading.current_thread().name))
+ return True
def available_feeds():
From d3f9add8e02116a12ba09c28256f56e23b3c49a7 Mon Sep 17 00:00:00 2001
From: Antonio Frediani
Date: Sat, 8 Sep 2018 18:38:39 +0200
Subject: [PATCH 04/17] Added multithreading to the download
---
podfox/__init__.py | 45 ++++++++++++++++++++++++++++++---------------
1 file changed, 30 insertions(+), 15 deletions(-)
diff --git a/podfox/__init__.py b/podfox/__init__.py
index cbba3ff..7cf913c 100755
--- a/podfox/__init__.py
+++ b/podfox/__init__.py
@@ -30,6 +30,8 @@
import requests
import sys
import re
+import concurrent.futures
+import threading
# RSS datetimes follow RFC 2822, same as email headers.
# this is the chain of stackoverflow posts that led me to believe this is true.
@@ -191,10 +193,12 @@ def episodes_from_feed(d):
def download_multiple(feed, maxnum, rename):
- for episode in feed['episodes']:
- if maxnum == 0:
- break
- if not episode['downloaded']:
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+ # parse up to maxnum of the not downloaded episodes
+ future_to_episodes = {}
+ for episode in list(filter(lambda ep: not ep['downloaded'], feed['episodes']))[:maxnum]:
+ filename = ""
+
if rename:
title = episode['title']
for c in '<>\"|*%?\\/':
@@ -203,16 +207,21 @@ def download_multiple(feed, maxnum, rename):
extension = os.path.splitext(urlparse(episode['url'])[2])[1]
filename = "{}_{}{}".format(strftime('%Y-%m-%d', localtime(episode['published'])),
title, extension)
- download_single(feed['shortname'], episode['url'], filename)
- else:
- download_single(feed['shortname'], episode['url'])
- episode['downloaded'] = True
- maxnum -= 1
+
+
+ future_to_episodes[executor.submit(download_single, feed['shortname'], episode['url'], filename)]=episode
+
+ for future in concurrent.futures.as_completed(future_to_episodes):
+ episode = future_to_episodes[future]
+ try:
+ episode['downloaded'] = future.result()
+ except Exception as exc:
+ print('%r generated an exception: %s' % (episode['title'], exc))
overwrite_config(feed)
def download_single(folder, url, filename=""):
- print(url)
+ print("{}: Parsing URL {}".format(threading.current_thread().name, url))
base = CONFIGURATION['podcast-directory']
r = requests.get(url.strip(), stream=True)
if not filename:
@@ -221,11 +230,17 @@ def download_single(folder, url, filename=""):
except:
filename = url.split('/')[-1]
filename = filename.split('?')[0]
- print_green("{:s} downloading".format(filename))
- with open(os.path.join(base, folder, filename), 'wb') as f:
- for chunk in r.iter_content(chunk_size=1024**2):
- f.write(chunk)
- print("done.")
+ print_green("{}: {:s} downloading".format(threading.current_thread().name, filename))
+
+ try:
+ with open(os.path.join(base, folder, filename), 'wb') as f:
+ for chunk in r.iter_content(chunk_size=1024**2):
+ f.write(chunk)
+ except EnvironmentError:
+ print_err("{}: Error while writing {}".format(threading.current_thread().name, filename))
+ return False
+ print("{}: done.".format(threading.current_thread().name))
+ return True
def available_feeds():
From 2c12ec037a299a08a4b91a8bf399dcffebd9ccf6 Mon Sep 17 00:00:00 2001
From: n0trax
Date: Sun, 31 Mar 2019 17:04:07 +0200
Subject: [PATCH 05/17] Add default configuration
---
podfox/__init__.py | 32 ++++++++++++++++++++++++++------
1 file changed, 26 insertions(+), 6 deletions(-)
diff --git a/podfox/__init__.py b/podfox/__init__.py
index 9452bbe..00d8ec5 100755
--- a/podfox/__init__.py
+++ b/podfox/__init__.py
@@ -40,6 +40,16 @@
from email.utils import parsedate
from time import mktime
+CONFIGURATION_DEFAULTS = {
+ "podcast-directory": "~/Podcasts",
+ "maxnum": 5000,
+ "mimetypes": [ "audio/aac",
+ "audio/ogg",
+ "audio/mpeg",
+ "audio/mp3",
+ "audio/mp4",
+ "video/mp4" ]
+}
CONFIGURATION = {}
mimetypes = [
@@ -165,6 +175,8 @@ def overwrite_config(feed):
def episodes_from_feed(d):
+ mimetypes = CONFIGURATION['mimetypes']
+
episodes = []
for entry in d.entries:
# convert publishing time to unix time, so that we can sort
@@ -290,12 +302,20 @@ def main():
configfile = expanduser(arguments["--config"])
- with open(configfile) as conf_file:
- try:
- CONFIGURATION = json.load(conf_file)
- except ValueError:
- print("invalid json in configuration file.")
- exit(-1)
+ try:
+ with open(configfile) as conf_file:
+ try:
+ userconf = json.load(conf_file)
+ except ValueError:
+ print("invalid json in configuration file.")
+ exit(-1)
+ except FileNotFoundError:
+ userconf = {}
+
+ CONFIGURATION = CONFIGURATION_DEFAULTS.copy()
+ CONFIGURATION.update(userconf)
+ CONFIGURATION['podcast-directory'] = os.path.expanduser(CONFIGURATION['podcast-directory'])
+
#handle the commands
if arguments['import']:
if arguments[''] is None:
From 01449900490e27efe2eaf6a42771cf5025c82955 Mon Sep 17 00:00:00 2001
From: Tim Broder
Date: Fri, 19 Apr 2019 09:09:05 -0400
Subject: [PATCH 06/17] Add support for audio/x-m4a
---
podfox/__init__.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/podfox/__init__.py b/podfox/__init__.py
index 9452bbe..afef322 100755
--- a/podfox/__init__.py
+++ b/podfox/__init__.py
@@ -45,7 +45,8 @@
mimetypes = [
'audio/ogg',
'audio/mpeg',
- 'video/mp4'
+ 'video/mp4',
+ 'audio/x-m4a'
]
def print_err(err):
From 3196062ff3bb836ed1eb6ea9997aeece46ee2f1a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 2 Oct 2019 22:21:20 +0000
Subject: [PATCH 07/17] Bump requests from 2.11.1 to 2.20.0
Bumps [requests](https://github.com/requests/requests) from 2.11.1 to 2.20.0.
- [Release notes](https://github.com/requests/requests/releases)
- [Changelog](https://github.com/psf/requests/blob/master/HISTORY.md)
- [Commits](https://github.com/requests/requests/compare/v2.11.1...v2.20.0)
Signed-off-by: dependabot[bot]
---
requirements.txt | 2 +-
setup.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/requirements.txt b/requirements.txt
index 392be50..257aef5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
colorama==0.3.7
docopt==0.6.2
feedparser==5.2.1
-requests==2.11.1
+requests==2.20.0
diff --git a/setup.py b/setup.py
index c5f1d5a..1b51b6d 100644
--- a/setup.py
+++ b/setup.py
@@ -21,6 +21,6 @@
'colorama==0.3.7',
'docopt==0.6.2',
'feedparser==5.2.1',
- 'requests==2.11.1',
+ 'requests==2.20.0',
],
)
From e7599d8fc0b851778f9aa9d1ba5b2fd9455bbcbc Mon Sep 17 00:00:00 2001
From: Hermann Wiesner
Date: Sun, 27 Sep 2020 19:41:20 +0200
Subject: [PATCH 08/17] Added mimetype "audio/x-mpeg" and check for troubling
or missing publishing date. Example:
https://www.rbb-online.de/rbbkultur/podcasts/hannah-arendt-endlich-verstehen-podcast.xml/feed=podcast.xml
---
podfox/__init__.py | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/podfox/__init__.py b/podfox/__init__.py
index cfe27a5..14393a5 100755
--- a/podfox/__init__.py
+++ b/podfox/__init__.py
@@ -46,6 +46,7 @@
"mimetypes": [ "audio/aac",
"audio/ogg",
"audio/mpeg",
+ "audio/x-mpeg",
"audio/mp3",
"audio/mp4",
"video/mp4" ]
@@ -55,6 +56,7 @@
mimetypes = [
'audio/ogg',
'audio/mpeg',
+ 'audio/x-mpeg',
'video/mp4',
'audio/x-m4a'
]
@@ -182,7 +184,10 @@ def episodes_from_feed(d):
for entry in d.entries:
# convert publishing time to unix time, so that we can sort
# this should be unix time, barring any timezone shenanigans
- date = mktime(parsedate(entry.published))
+ try:
+ date = mktime(parsedate(entry.published))
+ except TypeError:
+ continue
if hasattr(entry, 'links'):
for link in entry.links:
if not hasattr(link, 'type'):
From 5b072701b5a377a45924b1ffec434513f1570855 Mon Sep 17 00:00:00 2001
From: Isak Rubin
Date: Thu, 17 Dec 2020 07:48:54 +0000
Subject: [PATCH 09/17] Allow usage of later versions of feedparser
---
requirements.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/requirements.txt b/requirements.txt
index 257aef5..a4c7509 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
colorama==0.3.7
docopt==0.6.2
-feedparser==5.2.1
+feedparser>=5.2.1
requests==2.20.0
From 23c354d48fe4af92923a5dc33521abc628385e64 Mon Sep 17 00:00:00 2001
From: Antonio Frediani
Date: Sun, 17 Jan 2021 12:02:36 +0100
Subject: [PATCH 10/17] Added a progress bar while downloading episodes
---
podfox/__init__.py | 15 ++++++++++++---
requirements.txt | 1 +
2 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/podfox/__init__.py b/podfox/__init__.py
index 89f6759..2cc5370 100755
--- a/podfox/__init__.py
+++ b/podfox/__init__.py
@@ -22,6 +22,7 @@
from os.path import expanduser
from urllib.parse import urlparse
from sys import exit
+from tqdm import tqdm
import colorama
import feedparser
import json
@@ -32,6 +33,8 @@
import re
import concurrent.futures
import threading
+import logging
+logging.basicConfig(level=logging.WARNING)
# RSS datetimes follow RFC 2822, same as email headers.
# this is the chain of stackoverflow posts that led me to believe this is true.
@@ -234,7 +237,7 @@ def download_multiple(feed, maxnum, rename):
def download_single(folder, url, filename=""):
- print("{}: Parsing URL {}".format(threading.current_thread().name, url))
+ logging.info("{}: Parsing URL {}".format(threading.current_thread().name, url))
base = CONFIGURATION['podcast-directory']
r = requests.get(url.strip(), stream=True)
if not filename:
@@ -243,16 +246,19 @@ def download_single(folder, url, filename=""):
except:
filename = url.split('/')[-1]
filename = filename.split('?')[0]
- print_green("{}: {:s} downloading".format(threading.current_thread().name, filename))
+ logging.info("{}: {:s} downloading".format(threading.current_thread().name, filename))
try:
with open(os.path.join(base, folder, filename), 'wb') as f:
+ pbar = tqdm(total=int(r.headers['Content-Length']))
+ pbar.set_description(filename if len(filename)<20 else filename[:20])
for chunk in r.iter_content(chunk_size=1024**2):
f.write(chunk)
+ pbar.update(len(chunk))
except EnvironmentError:
print_err("{}: Error while writing {}".format(threading.current_thread().name, filename))
return False
- print("{}: done.".format(threading.current_thread().name))
+ logging.info("{}: done.".format(threading.current_thread().name))
return True
@@ -344,6 +350,9 @@ def main():
CONFIGURATION.update(userconf)
CONFIGURATION['podcast-directory'] = os.path.expanduser(CONFIGURATION['podcast-directory'])
+ # Check if we should use the progress bar
+
+
#handle the commands
if arguments['import']:
if arguments[''] is None:
diff --git a/requirements.txt b/requirements.txt
index 257aef5..71f594f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,4 @@ colorama==0.3.7
docopt==0.6.2
feedparser==5.2.1
requests==2.20.0
+tqdm=4.48.2
\ No newline at end of file
From 6574d0f1a18d210b02a56975102746f3de6c55a4 Mon Sep 17 00:00:00 2001
From: Antonio Frediani
Date: Sun, 17 Jan 2021 12:02:36 +0100
Subject: [PATCH 11/17] Added a progress bar while downloading episodes
---
podfox/__init__.py | 15 ++++++++++++---
requirements.txt | 1 +
2 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/podfox/__init__.py b/podfox/__init__.py
index 89f6759..2cc5370 100755
--- a/podfox/__init__.py
+++ b/podfox/__init__.py
@@ -22,6 +22,7 @@
from os.path import expanduser
from urllib.parse import urlparse
from sys import exit
+from tqdm import tqdm
import colorama
import feedparser
import json
@@ -32,6 +33,8 @@
import re
import concurrent.futures
import threading
+import logging
+logging.basicConfig(level=logging.WARNING)
# RSS datetimes follow RFC 2822, same as email headers.
# this is the chain of stackoverflow posts that led me to believe this is true.
@@ -234,7 +237,7 @@ def download_multiple(feed, maxnum, rename):
def download_single(folder, url, filename=""):
- print("{}: Parsing URL {}".format(threading.current_thread().name, url))
+ logging.info("{}: Parsing URL {}".format(threading.current_thread().name, url))
base = CONFIGURATION['podcast-directory']
r = requests.get(url.strip(), stream=True)
if not filename:
@@ -243,16 +246,19 @@ def download_single(folder, url, filename=""):
except:
filename = url.split('/')[-1]
filename = filename.split('?')[0]
- print_green("{}: {:s} downloading".format(threading.current_thread().name, filename))
+ logging.info("{}: {:s} downloading".format(threading.current_thread().name, filename))
try:
with open(os.path.join(base, folder, filename), 'wb') as f:
+ pbar = tqdm(total=int(r.headers['Content-Length']))
+ pbar.set_description(filename if len(filename)<20 else filename[:20])
for chunk in r.iter_content(chunk_size=1024**2):
f.write(chunk)
+ pbar.update(len(chunk))
except EnvironmentError:
print_err("{}: Error while writing {}".format(threading.current_thread().name, filename))
return False
- print("{}: done.".format(threading.current_thread().name))
+ logging.info("{}: done.".format(threading.current_thread().name))
return True
@@ -344,6 +350,9 @@ def main():
CONFIGURATION.update(userconf)
CONFIGURATION['podcast-directory'] = os.path.expanduser(CONFIGURATION['podcast-directory'])
+ # Check if we should use the progress bar
+
+
#handle the commands
if arguments['import']:
if arguments[''] is None:
diff --git a/requirements.txt b/requirements.txt
index 257aef5..71f594f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,4 @@ colorama==0.3.7
docopt==0.6.2
feedparser==5.2.1
requests==2.20.0
+tqdm=4.48.2
\ No newline at end of file
From f681340b917376ebc6c6b830dcb5eb42aa5d1394 Mon Sep 17 00:00:00 2001
From: Antonio Frediani
Date: Sun, 17 Jan 2021 12:27:12 +0100
Subject: [PATCH 12/17] Progress bar now autoscales
---
podfox/__init__.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/podfox/__init__.py b/podfox/__init__.py
index 2cc5370..5796eec 100755
--- a/podfox/__init__.py
+++ b/podfox/__init__.py
@@ -250,7 +250,7 @@ def download_single(folder, url, filename=""):
try:
with open(os.path.join(base, folder, filename), 'wb') as f:
- pbar = tqdm(total=int(r.headers['Content-Length']))
+ pbar = tqdm(total=int(r.headers['Content-Length']), unit='B', unit_scale=True, unit_divisor=1024)
pbar.set_description(filename if len(filename)<20 else filename[:20])
for chunk in r.iter_content(chunk_size=1024**2):
f.write(chunk)
From e728c082f6344aa5a9ec8445ba49d8e832f06dd4 Mon Sep 17 00:00:00 2001
From: Antonio Frediani
Date: Sun, 17 Jan 2021 12:27:12 +0100
Subject: [PATCH 13/17] Progress bar now autoscales
---
podfox/__init__.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/podfox/__init__.py b/podfox/__init__.py
index 2cc5370..5796eec 100755
--- a/podfox/__init__.py
+++ b/podfox/__init__.py
@@ -250,7 +250,7 @@ def download_single(folder, url, filename=""):
try:
with open(os.path.join(base, folder, filename), 'wb') as f:
- pbar = tqdm(total=int(r.headers['Content-Length']))
+ pbar = tqdm(total=int(r.headers['Content-Length']), unit='B', unit_scale=True, unit_divisor=1024)
pbar.set_description(filename if len(filename)<20 else filename[:20])
for chunk in r.iter_content(chunk_size=1024**2):
f.write(chunk)
From caac8b20959f0e0138471fea74df02fcc8afed98 Mon Sep 17 00:00:00 2001
From: Fred Thomsen
Date: Sun, 16 May 2021 10:00:42 -0400
Subject: [PATCH 14/17] Add prune command
Add ability to prune old episodes via prune command. Max age in days
can be set via configuration or passed in via command line. This
requires tracking of the filename used to store the podcast episode,
so this has been added to the feed json. Pulling the filename out of
the url can be used as a fallback; however, if the name has been pulled
out of the HTTP headers, then this feature will not be backwards
compatible for episodes named in this way and thus those will not be
pruned.
---
README.md | 6 +++++
podfox/__init__.py | 64 +++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 64 insertions(+), 6 deletions(-)
diff --git a/README.md b/README.md
index d8bdab0..ea38c31 100644
--- a/README.md
+++ b/README.md
@@ -52,6 +52,7 @@ In podfox, every podcast is identified with its own `shortname`, which is restri
podfox.py feeds
podfox.py episodes
podfox.py download [ --how-many=]
+ podfox.py prune [ --max-age-days=]
```
### Import
@@ -107,3 +108,8 @@ Extortion Startups | TechSNAP 229 | Not Downloaded
`podfox download ts --how-many=3` will download the 3 newest techsnap podcasts that have not yet been downloaded. (Skipping newer, but already downloaded ones). If the `--how-many` parameter is omitted, the `maxnum` parameter from the configuration file is used instead.
+
+### Pruning
+
+`podfox prune` will clean up episodes that are too old. If a max age is not set via parameter to command or in the configuration, then no pruning is done.
+
diff --git a/podfox/__init__.py b/podfox/__init__.py
index 14393a5..12120b3 100755
--- a/podfox/__init__.py
+++ b/podfox/__init__.py
@@ -9,6 +9,7 @@
podfox.py episodes [-c=]
podfox.py download [ --how-many=] [-c=]
podfox.py rename [-c=]
+ podfox.py prune [ --maxage-days=]
Options:
-c --config= Specify an alternate config file [default: ~/.podfox.json]
@@ -22,6 +23,7 @@
from os.path import expanduser
from sys import exit
import colorama
+import datetime
import feedparser
import json
import os
@@ -79,6 +81,16 @@ def get_feed_file(shortname):
return os.path.join(get_folder(shortname), 'feed.json')
+def get_filename_from_url(url):
+ return url.split('/')[-1].split('?')[0]
+
+
+def episode_too_old(episode, maxage):
+ now = datetime.datetime.utcnow()
+ dt_published = datetime.datetime.fromtimestamp(episode["published"])
+ return maxage and (now - dt_published > datetime.timedelta(days=maxage))
+
+
def sort_feed(feed):
feed['episodes'] = sorted(feed['episodes'], key=lambda k: k['published'],
reverse=True)
@@ -211,28 +223,27 @@ def download_multiple(feed, maxnum):
for episode in feed['episodes']:
if maxnum == 0:
break
- if not episode['downloaded']:
- download_single(feed['shortname'], episode['url'])
+ if not episode['downloaded'] and not episode_too_old(episode, CONFIGURATION['maxage-days']):
+ episode['filename'] = download_single(feed['shortname'], episode['url'])
episode['downloaded'] = True
maxnum -= 1
overwrite_config(feed)
-
def download_single(folder, url):
print(url)
base = CONFIGURATION['podcast-directory']
r = requests.get(url.strip(), stream=True)
try:
- filename=re.findall('filename="([^"]+)',r.headers['content-disposition'])[0]
+ filename = re.findall('filename="([^"]+)', r.headers['content-disposition'])[0]
except:
- filename = url.split('/')[-1]
- filename = filename.split('?')[0]
+ filename = get_filename_from_url(url)
print_green("{:s} downloading".format(filename))
with open(os.path.join(base, folder, filename), 'wb') as f:
for chunk in r.iter_content(chunk_size=1024**2):
f.write(chunk)
print("done.")
+ return filename
def available_feeds():
'''
@@ -277,6 +288,27 @@ def rename(shortname, newname):
feed['shortname'] = newname
overwrite_config(feed)
+def prune(feed, maxage=0):
+ shortname = feed['shortname']
+ episodes = feed['episodes']
+
+ print(shortname)
+ for i, episode in enumerate(episodes):
+ if episode['downloaded'] and episode_too_old(episode, maxage):
+ episode_path = os.path.join(
+ get_folder(shortname),
+ episode.get("filename", get_filename_from_url(episode['url']))
+ )
+ try:
+ os.remove(episode_path)
+ except OSError:
+ print("Unable to remove file (%s) for episode: %s" % (episode_path, episode["title"],))
+ else:
+ episodes[i]["downloaded"] = False
+ print("done.")
+
+ overwrite_config(feed)
+
def pretty_print_feeds(feeds):
format_str = Fore.GREEN + '{0:45.45} |'
format_str += Fore.BLUE + ' {1:40}' + Fore.RESET + Back.RESET
@@ -377,3 +409,23 @@ def main():
exit(0)
if arguments['rename']:
rename(arguments[''], arguments[''])
+
+ if arguments['prune']:
+ if arguments['--maxage-days']:
+ maxage = int(arguments['--maxage-days'])
+ else:
+ maxage = CONFIGURATION.get('maxage-days', 0)
+
+ if arguments['']:
+ feed = find_feed(arguments[''])
+ if feed:
+ print_green('pruning {}'.format(feed['title']))
+ prune(feed, maxage)
+ exit(0)
+ else:
+ print_err("feed {} not found".format(arguments['']))
+ exit(-1)
+ else:
+ for feed in available_feeds():
+ print_green('pruning {}'.format(feed['title']))
+ prune(feed, maxage)
From bf2eb08b7361154ece8854bfc3d0e53399b750d6 Mon Sep 17 00:00:00 2001
From: Fred Thomsen
Date: Mon, 7 Jun 2021 19:26:52 -0400
Subject: [PATCH 15/17] Update feedparser version for py39
Python3.9 removes the `base64.encodingstring` and `base64.decodestring`
after being deprecated in an earlier version. Update feedparser to a
version that doesn't reference these functions so that we don't crash on
python3.9.
---
requirements.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/requirements.txt b/requirements.txt
index 257aef5..4c0d9cb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
colorama==0.3.7
docopt==0.6.2
-feedparser==5.2.1
+feedparser==6.0.2
requests==2.20.0
From 9bd6bc654ebd9f2d974ef29232d6e6ceeb81b754 Mon Sep 17 00:00:00 2001
From: Eddie <42848457+antfred@users.noreply.github.com>
Date: Sat, 26 Feb 2022 14:43:15 +0100
Subject: [PATCH 16/17] Limit file name length to 120 characters
---
podfox/__init__.py | 2 ++
setup.py | 2 +-
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/podfox/__init__.py b/podfox/__init__.py
index 5796eec..7209602 100755
--- a/podfox/__init__.py
+++ b/podfox/__init__.py
@@ -220,6 +220,8 @@ def download_multiple(feed, maxnum, rename):
for c in '<>\"|*%?\\/':
title = title.replace(c, "")
title = title.replace(" ", "_").replace("’", "'").replace("—", "-").replace(":", ".")
+ # Shorten the title to max 120 characters
+ title = title[:120]
extension = os.path.splitext(urlparse(episode['url'])[2])[1]
filename = "{}_{}{}".format(strftime('%Y-%m-%d', localtime(episode['published'])),
title, extension)
diff --git a/setup.py b/setup.py
index 1b51b6d..dd2c23a 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@
required = f.read().splitlines()
setup(name='podfox',
- version='0.1.1',
+ version='0.1.2',
description='Podcatcher for the terminal',
url='http://github.com/brtmr/podfox',
author='Bastian Reitemeier',
From 5f6e9a9de4bfb484a6d3cccbd35d69ec09bfb58f Mon Sep 17 00:00:00 2001
From: Eddie <42848457+antfred@users.noreply.github.com>
Date: Sat, 26 Feb 2022 14:43:15 +0100
Subject: [PATCH 17/17] Limit file name length to 120 characters
---
podfox/__init__.py | 2 ++
setup.py | 2 +-
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/podfox/__init__.py b/podfox/__init__.py
index 5796eec..7209602 100755
--- a/podfox/__init__.py
+++ b/podfox/__init__.py
@@ -220,6 +220,8 @@ def download_multiple(feed, maxnum, rename):
for c in '<>\"|*%?\\/':
title = title.replace(c, "")
title = title.replace(" ", "_").replace("’", "'").replace("—", "-").replace(":", ".")
+ # Shorten the title to max 120 characters
+ title = title[:120]
extension = os.path.splitext(urlparse(episode['url'])[2])[1]
filename = "{}_{}{}".format(strftime('%Y-%m-%d', localtime(episode['published'])),
title, extension)
diff --git a/setup.py b/setup.py
index 1b51b6d..dd2c23a 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@
required = f.read().splitlines()
setup(name='podfox',
- version='0.1.1',
+ version='0.1.2',
description='Podcatcher for the terminal',
url='http://github.com/brtmr/podfox',
author='Bastian Reitemeier',