diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 787a1d2..0000000 --- a/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -https://github.com/kokarare1212/librespot-python/archive/refs/heads/rewrite.zip diff --git a/setup.cfg b/setup.cfg index 2015a57..f2bc7c2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = spodcast -version = 0.4.9 +version = 0.5.0 description = A caching Spotify podcast to RSS proxy. long_description = file:README.md long_description_content_type = text/markdown @@ -20,7 +20,8 @@ platforms = any packages = spodcast install_requires = - librespot >= 0.0.1 + librespot >= 0.0.5 + pybase62 ffmpeg-python setuptools include_package_data = diff --git a/spodcast/app.py b/spodcast/app.py index b6f684d..b4bf3dd 100644 --- a/spodcast/app.py +++ b/spodcast/app.py @@ -2,8 +2,9 @@ import logging from itertools import islice from librespot.audio.decoders import AudioQuality +from librespot.metadata import ShowId, EpisodeId -from spodcast.podcast import download_episode, get_show_episodes +from spodcast.podcast import download_episode, get_episodes from spodcast.utils import regex_input_for_urls from spodcast.spodcast import Spodcast @@ -15,10 +16,15 @@ def client(args) -> None: if args.urls: for spotify_url in args.urls: - episode_id, show_id = regex_input_for_urls(spotify_url) - log.debug(f"episode_id {episode_id}. show_id {show_id}") - if episode_id is not None: + episode_id_str, show_id_str = regex_input_for_urls(spotify_url) + log.debug(f"episode_id_str {episode_id_str}. show_id_str {show_id_str}") + if episode_id_str is not None: + episode_id = EpisodeId.from_base62(episode_id_str) + log.debug("episode_id: %s", episode_id) download_episode(episode_id) - elif show_id is not None: - for episode in islice(get_show_episodes(show_id), Spodcast.CONFIG.get_max_episodes()): - download_episode(episode) + elif show_id_str is not None: + show_id = ShowId.from_base62(show_id_str) + log.debug("show_id: %s", show_id) + for episode_id in islice(get_episodes(show_id), Spodcast.CONFIG.get_max_episodes()): + log.debug("episode_id: %s", episode_id) + download_episode(episode_id) diff --git a/spodcast/const.py b/spodcast/const.py index b311a6e..4f2f77c 100644 --- a/spodcast/const.py +++ b/spodcast/const.py @@ -1,22 +1,8 @@ -ERROR = 'error' -ITEMS = 'items' -NAME = 'name' -DESCRIPTION = "description" -ID = 'id' -URL = 'url' -URI = 'uri' -EXTERNAL_URLS = 'external_urls' -SPOTIFY = 'spotify' -RELEASE_DATE = 'release_date' -IMAGES = 'images' +TYPE = 'type' LIMIT = 'limit' OFFSET = 'offset' CREDENTIALS_PREFIX = 'spodcast-cred' -AUTHORIZATION = 'Authorization' -DURATION_MS = 'duration_ms' -SHOW = 'show' -TYPE = 'type' USER_READ_EMAIL = 'user-read-email' -PLAYLIST_READ_PRIVATE = 'playlist-read-private' -USER_LIBRARY_READ = 'user-library-read' FILE_EXISTS = -1 +OPEN_SPOTIFY_URL = 'open.spotify.com' +IMAGE_CDN = lambda image_id_hex: f'https://i.scdn.co/image/{image_id_hex}' diff --git a/spodcast/podcast.py b/spodcast/podcast.py index 572bb6f..edb3370 100644 --- a/spodcast/podcast.py +++ b/spodcast/podcast.py @@ -6,68 +6,61 @@ from datetime import datetime from html import escape import urllib.parse -from librespot.metadata import EpisodeId - +import base62 +from base62 import CHARSET_INVERTED import ffmpeg -from spodcast.const import ERROR, ID, ITEMS, NAME, SHOW, DURATION_MS, DESCRIPTION, RELEASE_DATE, URI, URL, EXTERNAL_URLS, IMAGES, SPOTIFY, FILE_EXISTS +from librespot import util +from librespot.metadata import ShowId, EpisodeId +from librespot.core import ApiClient + +from spodcast.const import FILE_EXISTS, IMAGE_CDN from spodcast.feedgenerator import RSS_FEED_CODE, RSS_FEED_FILE_NAME, RSS_FEED_SHOW_INDEX, RSS_FEED_INFO_EXTENSION, RSS_FEED_SHOW_IMAGE, RSS_FEED_VERSION, get_index_version -from spodcast.spotapi import EPISODE_INFO_URL, SHOWS_URL, EPISODE_DOWNLOAD_URL, ANON_PODCAST_DOMAIN -from spodcast.utils import clean_filename +from spodcast.utils import clean_filename, uri_to_url from spodcast.spodcast import Spodcast log = logging.getLogger(__name__) -def get_info(episode_id_str, target="episode"): + +def hex_to_spotify_id(hex_id): + return base62.encodebytes(util.hex_to_bytes(hex_id), CHARSET_INVERTED) + + +def get_show_info(show_id_hex): + log.info("Fetching show information...") + show_id = ShowId.from_hex(show_id_hex) + uri = f'spotify:show:{hex_to_spotify_id(show_id_hex)}' + info = Spodcast.SESSION.api().get_metadata_4_show(show_id) + link = uri_to_url(uri) + description = info.description + image = IMAGE_CDN(util.bytes_to_hex(info.cover_image.image[1].file_id)) + + return link, description, image + + +def get_episode_info(episode_id_hex): log.info("Fetching episode information...") - (raw, info) = Spodcast.invoke_url(f'{EPISODE_INFO_URL}/{episode_id_str}') - if not info: - log.error('INVALID EPISODE ID') + episode_id = EpisodeId.from_hex(episode_id_hex) + uri = f'spotify:episode:{hex_to_spotify_id(episode_id_hex)}' + info = Spodcast.SESSION.api().get_metadata_4_episode(episode_id) + podcast_name = info.show.name + podcast_id = util.bytes_to_hex(info.show.gid) + episode_name = info.name + duration_ms = info.duration + description = info.description + external_url = info.external_url if info.external_url else None + pt = info.publish_time + release_date = f'{pt.year}-{pt.month}-{pt.day}T{pt.hour}:{pt.minute}:00Z' - log.debug("episode info: %s", info) - - if ERROR in info: - return None, None - - if target == "episode": - - podcast_name = info[SHOW][NAME] - episode_name = info[NAME] - duration_ms = info[DURATION_MS] - description = info[DESCRIPTION] - release_date = info[RELEASE_DATE] - uri = info[URI] - - return podcast_name, duration_ms, episode_name, description, release_date, uri - - elif target == "show": - podcast_name = info[SHOW][NAME] - link = info[SHOW][EXTERNAL_URLS][SPOTIFY] - description = info[SHOW][DESCRIPTION] - image = info[SHOW][IMAGES][0][URL] - - return podcast_name, link, description, image + return podcast_name, podcast_id, duration_ms, episode_name, description, release_date, uri, external_url -def get_show_episodes(show_id_str) -> list: - episodes = [] - offset = 0 - limit = 50 +def get_episodes(show_id): + info = Spodcast.SESSION.api().get_metadata_4_show(show_id) + episodes = info.episode + episodes.sort(key = lambda x: datetime.strptime(f'{x.publish_time.year}-{x.publish_time.month}-{x.publish_time.day}T{x.publish_time.hour}:{x.publish_time.minute}:00Z', "%Y-%m-%dT%H:%M:%SZ"), reverse=True) - log.info("Fetching episodes...") - while True: - resp = Spodcast.invoke_url_with_params( - f'{SHOWS_URL}/{show_id_str}/episodes', limit=limit, offset=offset) - offset += limit - for episode in resp[ITEMS]: - episodes.append([episode[ID], episode[RELEASE_DATE]]) - if len(resp[ITEMS]) < limit: - break - - # some shows list episodes in the wrong order so reverse sort them by release date - episodes.sort(key=lambda x: datetime.strptime(x[1], "%Y-%m-%d"), reverse=True) - - return [episode[0] for episode in episodes] + return [util.bytes_to_hex(episode.gid) for episode in episodes] def download_file(url, filepath): @@ -101,14 +94,24 @@ def download_file(url, filepath): return filepath, os.path.getsize(filepath), mimetype + def download_stream(stream, filepath): size = stream.input_stream.size + mp3_filepath = os.path.splitext(filepath)[0] + ".mp3" mimetype = "audio/ogg" if ( - ((os.path.isfile(filepath) - and abs(size - os.path.getsize(filepath)) < 1000) + # "FILE SIZE CHECK TEMPORARILY OUT OF ORDER" + # Need to find a way to get decrypted content size + # from Spotify to enable file size checks, for now + # this only checks for the presence of a file with + # the same name. To recover from failed downloads + # simply remove incomplete files + # + #((os.path.isfile(filepath) + #and abs(size - os.path.getsize(filepath)) < 1000) + (os.path.isfile(filepath) or (Spodcast.CONFIG.get_transcode() and os.path.isfile(mp3_filepath))) and Spodcast.CONFIG.get_skip_existing_files() @@ -145,75 +148,75 @@ def download_stream(stream, filepath): def download_episode(episode_id) -> None: - podcast_name, duration_ms, episode_name, description, release_date, uri = get_info(episode_id, "episode") + try: + podcast_name, podcast_id, duration_ms, episode_name, description, release_date, uri, download_url = get_episode_info(episode_id) - if podcast_name is None: - log.warning('Skipping episode (podcast NOT FOUND)') - elif episode_name is None: - log.warning('Skipping episode (episode NOT FOUND)') - else: - filename = clean_filename(podcast_name + ' - ' + episode_name) - log.debug(Spodcast.invoke_url(EPISODE_DOWNLOAD_URL(episode_id))) - download_url = Spodcast.invoke_url(EPISODE_DOWNLOAD_URL(episode_id))[1]["data"]["episode"]["audio"]["items"][-1]["url"] - log.debug(f"download_url: {download_url}") - show_directory = os.path.realpath(os.path.join(Spodcast.CONFIG.get_root_path(), clean_filename(podcast_name) + '/')) - os.makedirs(show_directory, exist_ok=True) - - if ANON_PODCAST_DOMAIN in download_url: - episode_stream_id = EpisodeId.from_base62(episode_id) - stream = Spodcast.get_content_stream(episode_stream_id, Spodcast.DOWNLOAD_QUALITY) - basename = f"{filename}.ogg" - filepath = os.path.join(show_directory, basename) - path, size, mimetype = download_stream(stream, filepath) - basename = os.path.basename(path) # may have changed due to transcoding + if podcast_name is None: + log.warning('Skipping episode (podcast NOT FOUND)') + elif episode_name is None: + log.warning('Skipping episode (episode NOT FOUND)') else: - basename=f"{filename}.mp3" - filepath = os.path.join(show_directory, basename) - path, size, mimetype = download_file(download_url, filepath) + filename = clean_filename(podcast_name + ' - ' + episode_name) + show_directory = os.path.realpath(os.path.join(Spodcast.CONFIG.get_root_path(), clean_filename(podcast_name) + '/')) + os.makedirs(show_directory, exist_ok=True) - if size == FILE_EXISTS: - log.info(f"Skipped {podcast_name}: {episode_name}") - else: - log.warning(f"Downloaded {podcast_name}: {episode_name}") + if download_url is None: + episode_stream_id = EpisodeId.from_hex(episode_id) + stream = Spodcast.get_content_stream(episode_stream_id, Spodcast.DOWNLOAD_QUALITY) + basename = f"{filename}.ogg" + filepath = os.path.join(show_directory, basename) + path, size, mimetype = download_stream(stream, filepath) + basename = os.path.basename(path) # may have changed due to transcoding + else: + basename=f"{filename}.mp3" + filepath = os.path.join(show_directory, basename) + path, size, mimetype = download_file(download_url, filepath) + + if size == FILE_EXISTS: + log.info(f"Skipped {podcast_name}: {episode_name}") + else: + log.warning(f"Downloaded {podcast_name}: {episode_name}") + + if Spodcast.CONFIG.get_rss_feed(): + episode_info = { + "mimetype": mimetype, + "medium": "audio", + "duration": int(duration_ms/1000), + "date": time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.strptime(release_date, "%Y-%m-%dT%H:%M:%SZ")), + "title": escape(episode_name), "guid": uri, "description": escape(description), + "filename": urllib.parse.quote(basename), + "size": int(size) } + info_file = open(os.path.join(show_directory, f"{basename}.{RSS_FEED_INFO_EXTENSION}"), "w") + info_file.write(json.dumps(episode_info)) + info_file.close() if Spodcast.CONFIG.get_rss_feed(): - episode_info = { - "mimetype": mimetype, - "medium": "audio", - "duration": int(duration_ms/1000), - "date": time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.strptime(release_date, "%Y-%m-%d")), - "title": escape(episode_name), "guid": uri, "description": escape(description), - "filename": urllib.parse.quote(basename), - "size": int(size) } - info_file = open(os.path.join(show_directory, f"{basename}.{RSS_FEED_INFO_EXTENSION}"), "w") - info_file.write(json.dumps(episode_info)) - info_file.close() + show_index_file_name = os.path.join(show_directory, f"{RSS_FEED_SHOW_INDEX}.{RSS_FEED_INFO_EXTENSION}") + if not os.path.isfile(show_index_file_name) or int(get_index_version(show_index_file_name)) < Spodcast.CONFIG.get_version_int(): + podcast_link, podcast_description, podcast_image = get_show_info(podcast_id) + show_info = {} + if os.path.isfile(show_index_file_name): + with open(show_index_file_name, encoding='utf-8') as file: + show_info = json.load(file) + file.close() + show_info["version"] = str(RSS_FEED_VERSION + Spodcast.CONFIG.get_version_str()) + show_info["title"] = escape(podcast_name) + show_info["link"] = podcast_link + show_info["description"] = escape(podcast_description) + show_info["image"] = RSS_FEED_SHOW_IMAGE + show_index_file = open(show_index_file_name, "w") + show_index_file.write(json.dumps(show_info)) + show_index_file.close() - if Spodcast.CONFIG.get_rss_feed(): - show_index_file_name = os.path.join(show_directory, f"{RSS_FEED_SHOW_INDEX}.{RSS_FEED_INFO_EXTENSION}") - if not os.path.isfile(show_index_file_name) or int(get_index_version(show_index_file_name)) < Spodcast.CONFIG.get_version_int(): - podcast_name, link, description, image = get_info(episode_id, "show") - show_info = {} - if os.path.isfile(show_index_file_name): - with open(show_index_file_name, encoding='utf-8') as file: - show_info = json.load(file) - file.close() - show_info["version"] = str(RSS_FEED_VERSION + Spodcast.CONFIG.get_version_str()) - show_info["title"] = escape(podcast_name) - show_info["link"] = link - show_info["description"] = escape(description) - show_info["image"] = RSS_FEED_SHOW_IMAGE - show_index_file = open(show_index_file_name, "w") - show_index_file.write(json.dumps(show_info)) - show_index_file.close() + show_image_name = os.path.join(show_directory, f"{RSS_FEED_SHOW_IMAGE}") + if not os.path.isfile(show_image_name): + download_file(podcast_image, show_image_name) - show_image_name = os.path.join(show_directory, f"{RSS_FEED_SHOW_IMAGE}") - if not os.path.isfile(show_image_name): - download_file(image, show_image_name) - - rss_file_name = os.path.join(show_directory, RSS_FEED_FILE_NAME) - if not os.path.isfile(rss_file_name) or int(get_index_version(rss_file_name)) < Spodcast.CONFIG.get_version_int(): - rss_file = open(rss_file_name, "w") - rss_file.write(RSS_FEED_CODE(Spodcast.CONFIG.get_version_str())) - rss_file.close() + rss_file_name = os.path.join(show_directory, RSS_FEED_FILE_NAME) + if not os.path.isfile(rss_file_name) or int(get_index_version(rss_file_name)) < Spodcast.CONFIG.get_version_int(): + rss_file = open(rss_file_name, "w") + rss_file.write(RSS_FEED_CODE(Spodcast.CONFIG.get_version_str())) + rss_file.close() + except ApiClient.StatusCodeException as status: + log.warning("episode %s, StatusCodeException: %s", episode_id, status) diff --git a/spodcast/spodcast.py b/spodcast/spodcast.py index 5ab2564..b939ff0 100755 --- a/spodcast/spodcast.py +++ b/spodcast/spodcast.py @@ -137,6 +137,7 @@ class Spodcast: @classmethod def invoke_url(cls, url, tryCount=0): headers = cls.get_auth_header() + Spodcast.LOG.debug(headers) response = requests.get(url, headers=headers) responsetext = response.text responsejson = response.json() diff --git a/spodcast/spotapi.py b/spodcast/spotapi.py deleted file mode 100644 index 692ed52..0000000 --- a/spodcast/spotapi.py +++ /dev/null @@ -1,9 +0,0 @@ -EPISODE_INFO_URL = 'https://api.spotify.com/v1/episodes' - -SHOWS_URL = 'https://api.spotify.com/v1/shows' - -EPISODE_DOWNLOAD_URL = lambda episode_id: f'https://api-partner.spotify.com/pathfinder/v1/query?operationName=getEpisode&variables={{"uri":"spotify:episode:{episode_id}"}}&extensions={{"persistedQuery":{{"version":1,"sha256Hash":"224ba0fd89fcfdfb3a15fa2d82a6112d3f4e2ac88fba5c6713de04d1b72cf482"}}}}' - -ANON_PODCAST_DOMAIN = 'anon-podcast.scdn.co' - - diff --git a/spodcast/utils.py b/spodcast/utils.py index ecba4f0..f31c6c9 100644 --- a/spodcast/utils.py +++ b/spodcast/utils.py @@ -5,6 +5,7 @@ from enum import Enum from typing import List, Tuple from spodcast.spodcast import Spodcast +from spodcast.const import OPEN_SPOTIFY_URL valid_filename_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) @@ -47,3 +48,6 @@ def clean_filename(filename, whitelist=valid_filename_chars, replace=' '): cleaned_filename = ''.join(c for c in cleaned_filename if c in whitelist) return cleaned_filename +def uri_to_url(spotify_id): + (spotify,sp_type,sp_id) = spotify_id.split(':') + return f'https://{OPEN_SPOTIFY_URL}/{sp_type}/{sp_id}'