Spodcast/spodcast/podcast.py
Frank de Lange 0877f04cb1 Spodcast v0.5.0 which:
- fixes #13 (Cannot download episodes anymore)
 - uses _librespot-python_ interfaces instead of raw web API access (needed to fix #13)
 - can not yet determine decrypted file size for Spotify-hosted episodes (which used to work) so will only look at the file name to determine whether an episode has already been downloaded. To retry corrupted downloads just remove the partially downloaded file and try again.
2022-06-30 16:52:41 +00:00

222 lines
9.3 KiB
Python

import json
import logging
import os
import time
from datetime import datetime
from html import escape
import urllib.parse
import base62
from base62 import CHARSET_INVERTED
import ffmpeg
from librespot import util
from librespot.metadata import ShowId, EpisodeId
from librespot.core import ApiClient
from spodcast.const import FILE_EXISTS, IMAGE_CDN
from spodcast.feedgenerator import RSS_FEED_CODE, RSS_FEED_FILE_NAME, RSS_FEED_SHOW_INDEX, RSS_FEED_INFO_EXTENSION, RSS_FEED_SHOW_IMAGE, RSS_FEED_VERSION, get_index_version
from spodcast.utils import clean_filename, uri_to_url
from spodcast.spodcast import Spodcast
log = logging.getLogger(__name__)
def hex_to_spotify_id(hex_id):
return base62.encodebytes(util.hex_to_bytes(hex_id), CHARSET_INVERTED)
def get_show_info(show_id_hex):
log.info("Fetching show information...")
show_id = ShowId.from_hex(show_id_hex)
uri = f'spotify:show:{hex_to_spotify_id(show_id_hex)}'
info = Spodcast.SESSION.api().get_metadata_4_show(show_id)
link = uri_to_url(uri)
description = info.description
image = IMAGE_CDN(util.bytes_to_hex(info.cover_image.image[1].file_id))
return link, description, image
def get_episode_info(episode_id_hex):
log.info("Fetching episode information...")
episode_id = EpisodeId.from_hex(episode_id_hex)
uri = f'spotify:episode:{hex_to_spotify_id(episode_id_hex)}'
info = Spodcast.SESSION.api().get_metadata_4_episode(episode_id)
podcast_name = info.show.name
podcast_id = util.bytes_to_hex(info.show.gid)
episode_name = info.name
duration_ms = info.duration
description = info.description
external_url = info.external_url if info.external_url else None
pt = info.publish_time
release_date = f'{pt.year}-{pt.month}-{pt.day}T{pt.hour}:{pt.minute}:00Z'
return podcast_name, podcast_id, duration_ms, episode_name, description, release_date, uri, external_url
def get_episodes(show_id):
info = Spodcast.SESSION.api().get_metadata_4_show(show_id)
episodes = info.episode
episodes.sort(key = lambda x: datetime.strptime(f'{x.publish_time.year}-{x.publish_time.month}-{x.publish_time.day}T{x.publish_time.hour}:{x.publish_time.minute}:00Z', "%Y-%m-%dT%H:%M:%SZ"), reverse=True)
return [util.bytes_to_hex(episode.gid) for episode in episodes]
def download_file(url, filepath):
import functools
import pathlib
import shutil
import requests
mimetype = "audio/mpeg"
r = requests.get(url, stream=True, allow_redirects=True)
if r.status_code != 200:
r.raise_for_status() # Will only raise for 4xx codes, so...
log.error(f"Request to {url} returned status code {r.status_code}")
return
file_size = int(r.headers.get('Content-Length', 0))
if (
os.path.isfile(filepath)
and abs(file_size - os.path.getsize(filepath)) < 1000
and Spodcast.CONFIG.get_skip_existing_files()
):
return filepath, FILE_EXISTS, mimetype
log.info("Downloading file")
r.raw.read = functools.partial(r.raw.read, decode_content=True)
with open(filepath, "wb") as file:
shutil.copyfileobj(r.raw, file)
return filepath, os.path.getsize(filepath), mimetype
def download_stream(stream, filepath):
size = stream.input_stream.size
mp3_filepath = os.path.splitext(filepath)[0] + ".mp3"
mimetype = "audio/ogg"
if (
# "FILE SIZE CHECK TEMPORARILY OUT OF ORDER"
# Need to find a way to get decrypted content size
# from Spotify to enable file size checks, for now
# this only checks for the presence of a file with
# the same name. To recover from failed downloads
# simply remove incomplete files
#
#((os.path.isfile(filepath)
#and abs(size - os.path.getsize(filepath)) < 1000)
(os.path.isfile(filepath)
or (Spodcast.CONFIG.get_transcode()
and os.path.isfile(mp3_filepath)))
and Spodcast.CONFIG.get_skip_existing_files()
):
return filepath, FILE_EXISTS, mimetype
log.info("Downloading stream")
time_start = time.time()
downloaded = 0
with open(filepath, 'wb') as file:
for _ in range(int(size / Spodcast.CONFIG.get_chunk_size()) + 1):
data = stream.input_stream.stream().read(Spodcast.CONFIG.get_chunk_size())
file.write(data)
downloaded += len(data)
if Spodcast.CONFIG.get_download_real_time():
delta_real = time.time() - time_start
delta_want = (downloaded / size) * (duration_ms/1000)
log.debug(f"realtime enabled, waiting for {delta_real} seconds...")
if delta_want > delta_real:
time.sleep(delta_want - delta_real)
if Spodcast.CONFIG.get_transcode():
log.info("transcoding ogg->mp3")
transcoder = ffmpeg.input(filepath)
transcoder = ffmpeg.output(transcoder, mp3_filepath)
ffmpeg.run(transcoder, quiet=True)
file.close()
os.unlink(filepath)
filepath = mp3_filepath
downloaded = os.path.getsize(filepath)
mimetype = "audio/mpeg"
return filepath, downloaded, mimetype
def download_episode(episode_id) -> None:
try:
podcast_name, podcast_id, duration_ms, episode_name, description, release_date, uri, download_url = get_episode_info(episode_id)
if podcast_name is None:
log.warning('Skipping episode (podcast NOT FOUND)')
elif episode_name is None:
log.warning('Skipping episode (episode NOT FOUND)')
else:
filename = clean_filename(podcast_name + ' - ' + episode_name)
show_directory = os.path.realpath(os.path.join(Spodcast.CONFIG.get_root_path(), clean_filename(podcast_name) + '/'))
os.makedirs(show_directory, exist_ok=True)
if download_url is None:
episode_stream_id = EpisodeId.from_hex(episode_id)
stream = Spodcast.get_content_stream(episode_stream_id, Spodcast.DOWNLOAD_QUALITY)
basename = f"{filename}.ogg"
filepath = os.path.join(show_directory, basename)
path, size, mimetype = download_stream(stream, filepath)
basename = os.path.basename(path) # may have changed due to transcoding
else:
basename=f"{filename}.mp3"
filepath = os.path.join(show_directory, basename)
path, size, mimetype = download_file(download_url, filepath)
if size == FILE_EXISTS:
log.info(f"Skipped {podcast_name}: {episode_name}")
else:
log.warning(f"Downloaded {podcast_name}: {episode_name}")
if Spodcast.CONFIG.get_rss_feed():
episode_info = {
"mimetype": mimetype,
"medium": "audio",
"duration": int(duration_ms/1000),
"date": time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.strptime(release_date, "%Y-%m-%dT%H:%M:%SZ")),
"title": escape(episode_name), "guid": uri, "description": escape(description),
"filename": urllib.parse.quote(basename),
"size": int(size) }
info_file = open(os.path.join(show_directory, f"{basename}.{RSS_FEED_INFO_EXTENSION}"), "w")
info_file.write(json.dumps(episode_info))
info_file.close()
if Spodcast.CONFIG.get_rss_feed():
show_index_file_name = os.path.join(show_directory, f"{RSS_FEED_SHOW_INDEX}.{RSS_FEED_INFO_EXTENSION}")
if not os.path.isfile(show_index_file_name) or int(get_index_version(show_index_file_name)) < Spodcast.CONFIG.get_version_int():
podcast_link, podcast_description, podcast_image = get_show_info(podcast_id)
show_info = {}
if os.path.isfile(show_index_file_name):
with open(show_index_file_name, encoding='utf-8') as file:
show_info = json.load(file)
file.close()
show_info["version"] = str(RSS_FEED_VERSION + Spodcast.CONFIG.get_version_str())
show_info["title"] = escape(podcast_name)
show_info["link"] = podcast_link
show_info["description"] = escape(podcast_description)
show_info["image"] = RSS_FEED_SHOW_IMAGE
show_index_file = open(show_index_file_name, "w")
show_index_file.write(json.dumps(show_info))
show_index_file.close()
show_image_name = os.path.join(show_directory, f"{RSS_FEED_SHOW_IMAGE}")
if not os.path.isfile(show_image_name):
download_file(podcast_image, show_image_name)
rss_file_name = os.path.join(show_directory, RSS_FEED_FILE_NAME)
if not os.path.isfile(rss_file_name) or int(get_index_version(rss_file_name)) < Spodcast.CONFIG.get_version_int():
rss_file = open(rss_file_name, "w")
rss_file.write(RSS_FEED_CODE(Spodcast.CONFIG.get_version_str()))
rss_file.close()
except ApiClient.StatusCodeException as status:
log.warning("episode %s, StatusCodeException: %s", episode_id, status)