Spodcast v0.5.0 which:

- fixes #13 (Cannot download episodes anymore)
 - uses _librespot-python_ interfaces instead of raw web API access (needed to fix #13)
 - can not yet determine decrypted file size for Spotify-hosted episodes (which used to work) so will only look at the file name to determine whether an episode has already been downloaded. To retry corrupted downloads just remove the partially downloaded file and try again.
This commit is contained in:
Frank de Lange 2022-06-30 16:52:41 +00:00
parent c0f76f5290
commit 0877f04cb1
8 changed files with 143 additions and 152 deletions

View file

@ -1 +0,0 @@
https://github.com/kokarare1212/librespot-python/archive/refs/heads/rewrite.zip

View file

@ -1,6 +1,6 @@
[metadata] [metadata]
name = spodcast name = spodcast
version = 0.4.9 version = 0.5.0
description = A caching Spotify podcast to RSS proxy. description = A caching Spotify podcast to RSS proxy.
long_description = file:README.md long_description = file:README.md
long_description_content_type = text/markdown long_description_content_type = text/markdown
@ -20,7 +20,8 @@ platforms = any
packages = packages =
spodcast spodcast
install_requires = install_requires =
librespot >= 0.0.1 librespot >= 0.0.5
pybase62
ffmpeg-python ffmpeg-python
setuptools setuptools
include_package_data = include_package_data =

View file

@ -2,8 +2,9 @@ import logging
from itertools import islice from itertools import islice
from librespot.audio.decoders import AudioQuality from librespot.audio.decoders import AudioQuality
from librespot.metadata import ShowId, EpisodeId
from spodcast.podcast import download_episode, get_show_episodes from spodcast.podcast import download_episode, get_episodes
from spodcast.utils import regex_input_for_urls from spodcast.utils import regex_input_for_urls
from spodcast.spodcast import Spodcast from spodcast.spodcast import Spodcast
@ -15,10 +16,15 @@ def client(args) -> None:
if args.urls: if args.urls:
for spotify_url in args.urls: for spotify_url in args.urls:
episode_id, show_id = regex_input_for_urls(spotify_url) episode_id_str, show_id_str = regex_input_for_urls(spotify_url)
log.debug(f"episode_id {episode_id}. show_id {show_id}") log.debug(f"episode_id_str {episode_id_str}. show_id_str {show_id_str}")
if episode_id is not None: if episode_id_str is not None:
episode_id = EpisodeId.from_base62(episode_id_str)
log.debug("episode_id: %s", episode_id)
download_episode(episode_id)
elif show_id_str is not None:
show_id = ShowId.from_base62(show_id_str)
log.debug("show_id: %s", show_id)
for episode_id in islice(get_episodes(show_id), Spodcast.CONFIG.get_max_episodes()):
log.debug("episode_id: %s", episode_id)
download_episode(episode_id) download_episode(episode_id)
elif show_id is not None:
for episode in islice(get_show_episodes(show_id), Spodcast.CONFIG.get_max_episodes()):
download_episode(episode)

View file

@ -1,22 +1,8 @@
ERROR = 'error' TYPE = 'type'
ITEMS = 'items'
NAME = 'name'
DESCRIPTION = "description"
ID = 'id'
URL = 'url'
URI = 'uri'
EXTERNAL_URLS = 'external_urls'
SPOTIFY = 'spotify'
RELEASE_DATE = 'release_date'
IMAGES = 'images'
LIMIT = 'limit' LIMIT = 'limit'
OFFSET = 'offset' OFFSET = 'offset'
CREDENTIALS_PREFIX = 'spodcast-cred' CREDENTIALS_PREFIX = 'spodcast-cred'
AUTHORIZATION = 'Authorization'
DURATION_MS = 'duration_ms'
SHOW = 'show'
TYPE = 'type'
USER_READ_EMAIL = 'user-read-email' USER_READ_EMAIL = 'user-read-email'
PLAYLIST_READ_PRIVATE = 'playlist-read-private'
USER_LIBRARY_READ = 'user-library-read'
FILE_EXISTS = -1 FILE_EXISTS = -1
OPEN_SPOTIFY_URL = 'open.spotify.com'
IMAGE_CDN = lambda image_id_hex: f'https://i.scdn.co/image/{image_id_hex}'

View file

@ -6,68 +6,61 @@ from datetime import datetime
from html import escape from html import escape
import urllib.parse import urllib.parse
from librespot.metadata import EpisodeId import base62
from base62 import CHARSET_INVERTED
import ffmpeg import ffmpeg
from spodcast.const import ERROR, ID, ITEMS, NAME, SHOW, DURATION_MS, DESCRIPTION, RELEASE_DATE, URI, URL, EXTERNAL_URLS, IMAGES, SPOTIFY, FILE_EXISTS from librespot import util
from librespot.metadata import ShowId, EpisodeId
from librespot.core import ApiClient
from spodcast.const import FILE_EXISTS, IMAGE_CDN
from spodcast.feedgenerator import RSS_FEED_CODE, RSS_FEED_FILE_NAME, RSS_FEED_SHOW_INDEX, RSS_FEED_INFO_EXTENSION, RSS_FEED_SHOW_IMAGE, RSS_FEED_VERSION, get_index_version from spodcast.feedgenerator import RSS_FEED_CODE, RSS_FEED_FILE_NAME, RSS_FEED_SHOW_INDEX, RSS_FEED_INFO_EXTENSION, RSS_FEED_SHOW_IMAGE, RSS_FEED_VERSION, get_index_version
from spodcast.spotapi import EPISODE_INFO_URL, SHOWS_URL, EPISODE_DOWNLOAD_URL, ANON_PODCAST_DOMAIN from spodcast.utils import clean_filename, uri_to_url
from spodcast.utils import clean_filename
from spodcast.spodcast import Spodcast from spodcast.spodcast import Spodcast
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
def get_info(episode_id_str, target="episode"):
def hex_to_spotify_id(hex_id):
return base62.encodebytes(util.hex_to_bytes(hex_id), CHARSET_INVERTED)
def get_show_info(show_id_hex):
log.info("Fetching show information...")
show_id = ShowId.from_hex(show_id_hex)
uri = f'spotify:show:{hex_to_spotify_id(show_id_hex)}'
info = Spodcast.SESSION.api().get_metadata_4_show(show_id)
link = uri_to_url(uri)
description = info.description
image = IMAGE_CDN(util.bytes_to_hex(info.cover_image.image[1].file_id))
return link, description, image
def get_episode_info(episode_id_hex):
log.info("Fetching episode information...") log.info("Fetching episode information...")
(raw, info) = Spodcast.invoke_url(f'{EPISODE_INFO_URL}/{episode_id_str}') episode_id = EpisodeId.from_hex(episode_id_hex)
if not info: uri = f'spotify:episode:{hex_to_spotify_id(episode_id_hex)}'
log.error('INVALID EPISODE ID') info = Spodcast.SESSION.api().get_metadata_4_episode(episode_id)
podcast_name = info.show.name
podcast_id = util.bytes_to_hex(info.show.gid)
episode_name = info.name
duration_ms = info.duration
description = info.description
external_url = info.external_url if info.external_url else None
pt = info.publish_time
release_date = f'{pt.year}-{pt.month}-{pt.day}T{pt.hour}:{pt.minute}:00Z'
log.debug("episode info: %s", info) return podcast_name, podcast_id, duration_ms, episode_name, description, release_date, uri, external_url
if ERROR in info:
return None, None
if target == "episode":
podcast_name = info[SHOW][NAME]
episode_name = info[NAME]
duration_ms = info[DURATION_MS]
description = info[DESCRIPTION]
release_date = info[RELEASE_DATE]
uri = info[URI]
return podcast_name, duration_ms, episode_name, description, release_date, uri
elif target == "show":
podcast_name = info[SHOW][NAME]
link = info[SHOW][EXTERNAL_URLS][SPOTIFY]
description = info[SHOW][DESCRIPTION]
image = info[SHOW][IMAGES][0][URL]
return podcast_name, link, description, image
def get_show_episodes(show_id_str) -> list: def get_episodes(show_id):
episodes = [] info = Spodcast.SESSION.api().get_metadata_4_show(show_id)
offset = 0 episodes = info.episode
limit = 50 episodes.sort(key = lambda x: datetime.strptime(f'{x.publish_time.year}-{x.publish_time.month}-{x.publish_time.day}T{x.publish_time.hour}:{x.publish_time.minute}:00Z', "%Y-%m-%dT%H:%M:%SZ"), reverse=True)
log.info("Fetching episodes...") return [util.bytes_to_hex(episode.gid) for episode in episodes]
while True:
resp = Spodcast.invoke_url_with_params(
f'{SHOWS_URL}/{show_id_str}/episodes', limit=limit, offset=offset)
offset += limit
for episode in resp[ITEMS]:
episodes.append([episode[ID], episode[RELEASE_DATE]])
if len(resp[ITEMS]) < limit:
break
# some shows list episodes in the wrong order so reverse sort them by release date
episodes.sort(key=lambda x: datetime.strptime(x[1], "%Y-%m-%d"), reverse=True)
return [episode[0] for episode in episodes]
def download_file(url, filepath): def download_file(url, filepath):
@ -101,14 +94,24 @@ def download_file(url, filepath):
return filepath, os.path.getsize(filepath), mimetype return filepath, os.path.getsize(filepath), mimetype
def download_stream(stream, filepath): def download_stream(stream, filepath):
size = stream.input_stream.size size = stream.input_stream.size
mp3_filepath = os.path.splitext(filepath)[0] + ".mp3" mp3_filepath = os.path.splitext(filepath)[0] + ".mp3"
mimetype = "audio/ogg" mimetype = "audio/ogg"
if ( if (
((os.path.isfile(filepath) # "FILE SIZE CHECK TEMPORARILY OUT OF ORDER"
and abs(size - os.path.getsize(filepath)) < 1000) # Need to find a way to get decrypted content size
# from Spotify to enable file size checks, for now
# this only checks for the presence of a file with
# the same name. To recover from failed downloads
# simply remove incomplete files
#
#((os.path.isfile(filepath)
#and abs(size - os.path.getsize(filepath)) < 1000)
(os.path.isfile(filepath)
or (Spodcast.CONFIG.get_transcode() or (Spodcast.CONFIG.get_transcode()
and os.path.isfile(mp3_filepath))) and os.path.isfile(mp3_filepath)))
and Spodcast.CONFIG.get_skip_existing_files() and Spodcast.CONFIG.get_skip_existing_files()
@ -145,7 +148,8 @@ def download_stream(stream, filepath):
def download_episode(episode_id) -> None: def download_episode(episode_id) -> None:
podcast_name, duration_ms, episode_name, description, release_date, uri = get_info(episode_id, "episode") try:
podcast_name, podcast_id, duration_ms, episode_name, description, release_date, uri, download_url = get_episode_info(episode_id)
if podcast_name is None: if podcast_name is None:
log.warning('Skipping episode (podcast NOT FOUND)') log.warning('Skipping episode (podcast NOT FOUND)')
@ -153,14 +157,11 @@ def download_episode(episode_id) -> None:
log.warning('Skipping episode (episode NOT FOUND)') log.warning('Skipping episode (episode NOT FOUND)')
else: else:
filename = clean_filename(podcast_name + ' - ' + episode_name) filename = clean_filename(podcast_name + ' - ' + episode_name)
log.debug(Spodcast.invoke_url(EPISODE_DOWNLOAD_URL(episode_id)))
download_url = Spodcast.invoke_url(EPISODE_DOWNLOAD_URL(episode_id))[1]["data"]["episode"]["audio"]["items"][-1]["url"]
log.debug(f"download_url: {download_url}")
show_directory = os.path.realpath(os.path.join(Spodcast.CONFIG.get_root_path(), clean_filename(podcast_name) + '/')) show_directory = os.path.realpath(os.path.join(Spodcast.CONFIG.get_root_path(), clean_filename(podcast_name) + '/'))
os.makedirs(show_directory, exist_ok=True) os.makedirs(show_directory, exist_ok=True)
if ANON_PODCAST_DOMAIN in download_url: if download_url is None:
episode_stream_id = EpisodeId.from_base62(episode_id) episode_stream_id = EpisodeId.from_hex(episode_id)
stream = Spodcast.get_content_stream(episode_stream_id, Spodcast.DOWNLOAD_QUALITY) stream = Spodcast.get_content_stream(episode_stream_id, Spodcast.DOWNLOAD_QUALITY)
basename = f"{filename}.ogg" basename = f"{filename}.ogg"
filepath = os.path.join(show_directory, basename) filepath = os.path.join(show_directory, basename)
@ -181,7 +182,7 @@ def download_episode(episode_id) -> None:
"mimetype": mimetype, "mimetype": mimetype,
"medium": "audio", "medium": "audio",
"duration": int(duration_ms/1000), "duration": int(duration_ms/1000),
"date": time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.strptime(release_date, "%Y-%m-%d")), "date": time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.strptime(release_date, "%Y-%m-%dT%H:%M:%SZ")),
"title": escape(episode_name), "guid": uri, "description": escape(description), "title": escape(episode_name), "guid": uri, "description": escape(description),
"filename": urllib.parse.quote(basename), "filename": urllib.parse.quote(basename),
"size": int(size) } "size": int(size) }
@ -192,7 +193,7 @@ def download_episode(episode_id) -> None:
if Spodcast.CONFIG.get_rss_feed(): if Spodcast.CONFIG.get_rss_feed():
show_index_file_name = os.path.join(show_directory, f"{RSS_FEED_SHOW_INDEX}.{RSS_FEED_INFO_EXTENSION}") show_index_file_name = os.path.join(show_directory, f"{RSS_FEED_SHOW_INDEX}.{RSS_FEED_INFO_EXTENSION}")
if not os.path.isfile(show_index_file_name) or int(get_index_version(show_index_file_name)) < Spodcast.CONFIG.get_version_int(): if not os.path.isfile(show_index_file_name) or int(get_index_version(show_index_file_name)) < Spodcast.CONFIG.get_version_int():
podcast_name, link, description, image = get_info(episode_id, "show") podcast_link, podcast_description, podcast_image = get_show_info(podcast_id)
show_info = {} show_info = {}
if os.path.isfile(show_index_file_name): if os.path.isfile(show_index_file_name):
with open(show_index_file_name, encoding='utf-8') as file: with open(show_index_file_name, encoding='utf-8') as file:
@ -200,8 +201,8 @@ def download_episode(episode_id) -> None:
file.close() file.close()
show_info["version"] = str(RSS_FEED_VERSION + Spodcast.CONFIG.get_version_str()) show_info["version"] = str(RSS_FEED_VERSION + Spodcast.CONFIG.get_version_str())
show_info["title"] = escape(podcast_name) show_info["title"] = escape(podcast_name)
show_info["link"] = link show_info["link"] = podcast_link
show_info["description"] = escape(description) show_info["description"] = escape(podcast_description)
show_info["image"] = RSS_FEED_SHOW_IMAGE show_info["image"] = RSS_FEED_SHOW_IMAGE
show_index_file = open(show_index_file_name, "w") show_index_file = open(show_index_file_name, "w")
show_index_file.write(json.dumps(show_info)) show_index_file.write(json.dumps(show_info))
@ -209,7 +210,7 @@ def download_episode(episode_id) -> None:
show_image_name = os.path.join(show_directory, f"{RSS_FEED_SHOW_IMAGE}") show_image_name = os.path.join(show_directory, f"{RSS_FEED_SHOW_IMAGE}")
if not os.path.isfile(show_image_name): if not os.path.isfile(show_image_name):
download_file(image, show_image_name) download_file(podcast_image, show_image_name)
rss_file_name = os.path.join(show_directory, RSS_FEED_FILE_NAME) rss_file_name = os.path.join(show_directory, RSS_FEED_FILE_NAME)
if not os.path.isfile(rss_file_name) or int(get_index_version(rss_file_name)) < Spodcast.CONFIG.get_version_int(): if not os.path.isfile(rss_file_name) or int(get_index_version(rss_file_name)) < Spodcast.CONFIG.get_version_int():
@ -217,3 +218,5 @@ def download_episode(episode_id) -> None:
rss_file.write(RSS_FEED_CODE(Spodcast.CONFIG.get_version_str())) rss_file.write(RSS_FEED_CODE(Spodcast.CONFIG.get_version_str()))
rss_file.close() rss_file.close()
except ApiClient.StatusCodeException as status:
log.warning("episode %s, StatusCodeException: %s", episode_id, status)

View file

@ -137,6 +137,7 @@ class Spodcast:
@classmethod @classmethod
def invoke_url(cls, url, tryCount=0): def invoke_url(cls, url, tryCount=0):
headers = cls.get_auth_header() headers = cls.get_auth_header()
Spodcast.LOG.debug(headers)
response = requests.get(url, headers=headers) response = requests.get(url, headers=headers)
responsetext = response.text responsetext = response.text
responsejson = response.json() responsejson = response.json()

View file

@ -1,9 +0,0 @@
EPISODE_INFO_URL = 'https://api.spotify.com/v1/episodes'
SHOWS_URL = 'https://api.spotify.com/v1/shows'
EPISODE_DOWNLOAD_URL = lambda episode_id: f'https://api-partner.spotify.com/pathfinder/v1/query?operationName=getEpisode&variables={{"uri":"spotify:episode:{episode_id}"}}&extensions={{"persistedQuery":{{"version":1,"sha256Hash":"224ba0fd89fcfdfb3a15fa2d82a6112d3f4e2ac88fba5c6713de04d1b72cf482"}}}}'
ANON_PODCAST_DOMAIN = 'anon-podcast.scdn.co'

View file

@ -5,6 +5,7 @@ from enum import Enum
from typing import List, Tuple from typing import List, Tuple
from spodcast.spodcast import Spodcast from spodcast.spodcast import Spodcast
from spodcast.const import OPEN_SPOTIFY_URL
valid_filename_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) valid_filename_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
@ -47,3 +48,6 @@ def clean_filename(filename, whitelist=valid_filename_chars, replace=' '):
cleaned_filename = ''.join(c for c in cleaned_filename if c in whitelist) cleaned_filename = ''.join(c for c in cleaned_filename if c in whitelist)
return cleaned_filename return cleaned_filename
def uri_to_url(spotify_id):
(spotify,sp_type,sp_id) = spotify_id.split(':')
return f'https://{OPEN_SPOTIFY_URL}/{sp_type}/{sp_id}'