Spodcast/spodcast/podcast.py

224 lines
9.3 KiB
Python

import json
import logging
import os
import time
from datetime import datetime
from html import escape
import urllib.parse
import base62
from base62 import CHARSET_INVERTED
import ffmpeg
from librespot import util
from librespot.metadata import ShowId, EpisodeId
from librespot.core import ApiClient
from spodcast.const import FILE_EXISTS, IMAGE_CDN
from spodcast.feedgenerator import RSS_FEED_CODE, RSS_FEED_FILE_NAME, RSS_FEED_SHOW_INDEX, RSS_FEED_INFO_EXTENSION, RSS_FEED_SHOW_IMAGE, RSS_FEED_VERSION, get_index_version
from spodcast.utils import clean_filename, uri_to_url
from spodcast.spodcast import Spodcast
log = logging.getLogger(__name__)
def hex_to_spotify_id(hex_id):
return base62.encodebytes(util.hex_to_bytes(hex_id), CHARSET_INVERTED)
def get_show_info(show_id_hex):
log.info("Fetching show information...")
show_id = ShowId.from_hex(show_id_hex)
uri = f'spotify:show:{hex_to_spotify_id(show_id_hex)}'
info = Spodcast.SESSION.api().get_metadata_4_show(show_id)
link = uri_to_url(uri)
description = info.description
image = IMAGE_CDN(util.bytes_to_hex(info.cover_image.image[1].file_id))
return link, description, image
def get_episode_info(episode_id_hex):
log.info("Fetching episode information...")
episode_id = EpisodeId.from_hex(episode_id_hex)
uri = f'spotify:episode:{hex_to_spotify_id(episode_id_hex)}'
info = Spodcast.SESSION.api().get_metadata_4_episode(episode_id)
podcast_name = info.show.name
podcast_id = util.bytes_to_hex(info.show.gid)
episode_name = info.name
duration_ms = info.duration
description = info.description
external_url = info.external_url if info.external_url else None
pt = info.publish_time
release_date = f'{pt.year}-{pt.month}-{pt.day}T{pt.hour}:{pt.minute}:00Z'
return podcast_name, podcast_id, duration_ms, episode_name, description, release_date, uri, external_url
def get_episodes(show_id):
info = Spodcast.SESSION.api().get_metadata_4_show(show_id)
episodes = info.episode
episodes.sort(key = lambda x: datetime.strptime(f'{x.publish_time.year}-{x.publish_time.month}-{x.publish_time.day}T{x.publish_time.hour}:{x.publish_time.minute}:00Z', "%Y-%m-%dT%H:%M:%SZ"), reverse=True)
return [util.bytes_to_hex(episode.gid) for episode in episodes]
def download_file(url, filepath):
import functools
import pathlib
import shutil
import requests
mimetype = "audio/mpeg"
r = requests.get(url, stream=True, allow_redirects=True)
if r.status_code != 200:
r.raise_for_status() # Will only raise for 4xx codes, so...
log.error(f"Request to {url} returned status code {r.status_code}")
return
file_size = int(r.headers.get('Content-Length', 0))
if (
os.path.isfile(filepath)
and abs(file_size - os.path.getsize(filepath)) < 1000
and Spodcast.CONFIG.get_skip_existing_files()
):
return filepath, FILE_EXISTS, mimetype
log.info("Downloading file")
r.raw.read = functools.partial(r.raw.read, decode_content=True)
with open(filepath, "wb") as file:
shutil.copyfileobj(r.raw, file)
return filepath, os.path.getsize(filepath), mimetype
def download_stream(stream, filepath):
size = stream.input_stream.size
mp3_filepath = os.path.splitext(filepath)[0] + ".mp3"
mimetype = "audio/ogg"
if (
# "FILE SIZE CHECK TEMPORARILY OUT OF ORDER"
# Need to find a way to get decrypted content size
# from Spotify to enable file size checks, for now
# this only checks for the presence of a file with
# the same name. To recover from failed downloads
# simply remove incomplete files
#
#((os.path.isfile(filepath)
#and abs(size - os.path.getsize(filepath)) < 1000)
(os.path.isfile(filepath)
or (Spodcast.CONFIG.get_transcode()
and os.path.isfile(mp3_filepath)))
and Spodcast.CONFIG.get_skip_existing_files()
):
return filepath, FILE_EXISTS, mimetype
log.info("Downloading stream")
time_start = time.time()
downloaded = 0
with open(filepath, 'wb') as file:
data = b""
while data := stream.input_stream.stream().read(Spodcast.CONFIG.get_chunk_size()):
if data == b"":
break
file.write(data)
downloaded += len(data)
if Spodcast.CONFIG.get_download_real_time():
delta_real = time.time() - time_start
delta_want = (downloaded / size) * (duration_ms/1000)
log.debug(f"realtime enabled, waiting for {delta_real} seconds...")
if delta_want > delta_real:
time.sleep(delta_want - delta_real)
if Spodcast.CONFIG.get_transcode():
log.info("transcoding ogg->mp3")
transcoder = ffmpeg.input(filepath)
transcoder = ffmpeg.output(transcoder, mp3_filepath)
ffmpeg.run(transcoder, quiet=True)
file.close()
os.unlink(filepath)
filepath = mp3_filepath
downloaded = os.path.getsize(filepath)
mimetype = "audio/mpeg"
return filepath, downloaded, mimetype
def download_episode(episode_id) -> None:
try:
podcast_name, podcast_id, duration_ms, episode_name, description, release_date, uri, download_url = get_episode_info(episode_id)
if podcast_name is None:
log.warning('Skipping episode (podcast NOT FOUND)')
elif episode_name is None:
log.warning('Skipping episode (episode NOT FOUND)')
else:
filename = clean_filename(podcast_name + ' - ' + episode_name)
show_directory = os.path.realpath(os.path.join(Spodcast.CONFIG.get_root_path(), clean_filename(podcast_name) + '/'))
os.makedirs(show_directory, exist_ok=True)
if download_url is None:
episode_stream_id = EpisodeId.from_hex(episode_id)
stream = Spodcast.get_content_stream(episode_stream_id, Spodcast.DOWNLOAD_QUALITY)
basename = f"{filename}.ogg"
filepath = os.path.join(show_directory, basename)
path, size, mimetype = download_stream(stream, filepath)
basename = os.path.basename(path) # may have changed due to transcoding
else:
basename=f"{filename}.mp3"
filepath = os.path.join(show_directory, basename)
path, size, mimetype = download_file(download_url, filepath)
if size == FILE_EXISTS:
log.info(f"Skipped {podcast_name}: {episode_name}")
else:
log.warning(f"Downloaded {podcast_name}: {episode_name}")
if Spodcast.CONFIG.get_rss_feed():
episode_info = {
"mimetype": mimetype,
"medium": "audio",
"duration": int(duration_ms/1000),
"date": time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.strptime(release_date, "%Y-%m-%dT%H:%M:%SZ")),
"title": escape(episode_name), "guid": uri, "description": escape(description),
"filename": urllib.parse.quote(basename),
"size": int(size) }
info_file = open(os.path.join(show_directory, f"{basename}.{RSS_FEED_INFO_EXTENSION}"), "w")
info_file.write(json.dumps(episode_info))
info_file.close()
if Spodcast.CONFIG.get_rss_feed():
show_index_file_name = os.path.join(show_directory, f"{RSS_FEED_SHOW_INDEX}.{RSS_FEED_INFO_EXTENSION}")
if not os.path.isfile(show_index_file_name) or int(get_index_version(show_index_file_name)) < Spodcast.CONFIG.get_version_int():
podcast_link, podcast_description, podcast_image = get_show_info(podcast_id)
show_info = {}
if os.path.isfile(show_index_file_name):
with open(show_index_file_name, encoding='utf-8') as file:
show_info = json.load(file)
file.close()
show_info["version"] = str(RSS_FEED_VERSION + Spodcast.CONFIG.get_version_str())
show_info["title"] = escape(podcast_name)
show_info["link"] = podcast_link
show_info["description"] = escape(podcast_description)
show_info["image"] = RSS_FEED_SHOW_IMAGE
show_index_file = open(show_index_file_name, "w")
show_index_file.write(json.dumps(show_info))
show_index_file.close()
show_image_name = os.path.join(show_directory, f"{RSS_FEED_SHOW_IMAGE}")
if not os.path.isfile(show_image_name):
download_file(podcast_image, show_image_name)
rss_file_name = os.path.join(show_directory, RSS_FEED_FILE_NAME)
if not os.path.isfile(rss_file_name) or int(get_index_version(rss_file_name)) < Spodcast.CONFIG.get_version_int():
rss_file = open(rss_file_name, "w")
rss_file.write(RSS_FEED_CODE(Spodcast.CONFIG.get_version_str()))
rss_file.close()
except ApiClient.StatusCodeException as status:
log.warning("episode %s, StatusCodeException: %s", episode_id, status)