mirror of
https://code.eliotberriot.com/funkwhale/funkwhale.git
synced 2025-10-03 19:29:15 +02:00
Import trust source
This commit is contained in:
parent
ad7e6a97e5
commit
1bee3a4675
14 changed files with 872 additions and 429 deletions
|
@ -1,9 +1,10 @@
|
|||
import collections
|
||||
import logging
|
||||
import os
|
||||
|
||||
from django.utils import timezone
|
||||
from django.db import transaction
|
||||
from django.db.models import F
|
||||
from django.db.models import F, Q
|
||||
from django.dispatch import receiver
|
||||
|
||||
from musicbrainzngs import ResponseError
|
||||
|
@ -14,7 +15,6 @@ from funkwhale_api.common import preferences
|
|||
from funkwhale_api.federation import activity, actors, routes
|
||||
from funkwhale_api.federation import library as lb
|
||||
from funkwhale_api.federation import library as federation_serializers
|
||||
from funkwhale_api.providers.acoustid import get_acoustid_client
|
||||
from funkwhale_api.taskapp import celery
|
||||
|
||||
from . import lyrics as lyrics_utils
|
||||
|
@ -26,102 +26,32 @@ from . import serializers
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@celery.app.task(name="acoustid.set_on_upload")
|
||||
@celery.require_instance(models.Upload, "upload")
|
||||
def set_acoustid_on_upload(upload):
|
||||
client = get_acoustid_client()
|
||||
result = client.get_best_match(upload.audio_file.path)
|
||||
|
||||
def update(id):
|
||||
upload.acoustid_track_id = id
|
||||
upload.save(update_fields=["acoustid_track_id"])
|
||||
return id
|
||||
|
||||
if result:
|
||||
return update(result["id"])
|
||||
|
||||
|
||||
def import_track_from_remote(metadata):
|
||||
try:
|
||||
track_mbid = metadata["recording"]["musicbrainz_id"]
|
||||
assert track_mbid # for null/empty values
|
||||
except (KeyError, AssertionError):
|
||||
pass
|
||||
else:
|
||||
return models.Track.get_or_create_from_api(mbid=track_mbid)[0]
|
||||
|
||||
try:
|
||||
album_mbid = metadata["release"]["musicbrainz_id"]
|
||||
assert album_mbid # for null/empty values
|
||||
except (KeyError, AssertionError):
|
||||
pass
|
||||
else:
|
||||
album, _ = models.Album.get_or_create_from_api(mbid=album_mbid)
|
||||
return models.Track.get_or_create_from_title(
|
||||
metadata["title"], artist=album.artist, album=album
|
||||
)[0]
|
||||
|
||||
try:
|
||||
artist_mbid = metadata["artist"]["musicbrainz_id"]
|
||||
assert artist_mbid # for null/empty values
|
||||
except (KeyError, AssertionError):
|
||||
pass
|
||||
else:
|
||||
artist, _ = models.Artist.get_or_create_from_api(mbid=artist_mbid)
|
||||
album, _ = models.Album.get_or_create_from_title(
|
||||
metadata["album_title"], artist=artist
|
||||
)
|
||||
return models.Track.get_or_create_from_title(
|
||||
metadata["title"], artist=artist, album=album
|
||||
)[0]
|
||||
|
||||
# worst case scenario, we have absolutely no way to link to a
|
||||
# musicbrainz resource, we rely on the name/titles
|
||||
artist, _ = models.Artist.get_or_create_from_name(metadata["artist_name"])
|
||||
album, _ = models.Album.get_or_create_from_title(
|
||||
metadata["album_title"], artist=artist
|
||||
)
|
||||
return models.Track.get_or_create_from_title(
|
||||
metadata["title"], artist=artist, album=album
|
||||
)[0]
|
||||
|
||||
|
||||
def update_album_cover(album, upload, replace=False):
|
||||
def update_album_cover(album, source=None, cover_data=None, replace=False):
|
||||
if album.cover and not replace:
|
||||
return
|
||||
|
||||
if upload:
|
||||
# maybe the file has a cover embedded?
|
||||
if cover_data:
|
||||
return album.get_image(data=cover_data)
|
||||
|
||||
if source and source.startswith("file://"):
|
||||
# let's look for a cover in the same directory
|
||||
path = os.path.dirname(source.replace("file://", "", 1))
|
||||
logger.info("[Album %s] scanning covers from %s", album.pk, path)
|
||||
cover = get_cover_from_fs(path)
|
||||
if cover:
|
||||
return album.get_image(data=cover)
|
||||
if album.mbid:
|
||||
try:
|
||||
metadata = upload.get_metadata()
|
||||
except FileNotFoundError:
|
||||
metadata = None
|
||||
if metadata:
|
||||
cover = metadata.get_picture("cover_front")
|
||||
if cover:
|
||||
# best case scenario, cover is embedded in the track
|
||||
logger.info("[Album %s] Using cover embedded in file", album.pk)
|
||||
return album.get_image(data=cover)
|
||||
if upload.source and upload.source.startswith("file://"):
|
||||
# let's look for a cover in the same directory
|
||||
path = os.path.dirname(upload.source.replace("file://", "", 1))
|
||||
logger.info("[Album %s] scanning covers from %s", album.pk, path)
|
||||
cover = get_cover_from_fs(path)
|
||||
if cover:
|
||||
return album.get_image(data=cover)
|
||||
if not album.mbid:
|
||||
return
|
||||
try:
|
||||
logger.info(
|
||||
"[Album %s] Fetching cover from musicbrainz release %s",
|
||||
album.pk,
|
||||
str(album.mbid),
|
||||
)
|
||||
return album.get_image()
|
||||
except ResponseError as exc:
|
||||
logger.warning(
|
||||
"[Album %s] cannot fetch cover from musicbrainz: %s", album.pk, str(exc)
|
||||
)
|
||||
logger.info(
|
||||
"[Album %s] Fetching cover from musicbrainz release %s",
|
||||
album.pk,
|
||||
str(album.mbid),
|
||||
)
|
||||
return album.get_image()
|
||||
except ResponseError as exc:
|
||||
logger.warning(
|
||||
"[Album %s] cannot fetch cover from musicbrainz: %s", album.pk, str(exc)
|
||||
)
|
||||
|
||||
|
||||
IMAGE_TYPES = [("jpg", "image/jpeg"), ("png", "image/png")]
|
||||
|
@ -244,15 +174,15 @@ def scan_library_page(library_scan, page_url):
|
|||
scan_library_page.delay(library_scan_id=library_scan.pk, page_url=next_page)
|
||||
|
||||
|
||||
def getter(data, *keys):
|
||||
def getter(data, *keys, default=None):
|
||||
if not data:
|
||||
return
|
||||
return default
|
||||
v = data
|
||||
for k in keys:
|
||||
try:
|
||||
v = v[k]
|
||||
except KeyError:
|
||||
return
|
||||
return default
|
||||
|
||||
return v
|
||||
|
||||
|
@ -269,12 +199,17 @@ def fail_import(upload, error_code):
|
|||
upload.import_details = {"error_code": error_code}
|
||||
upload.import_date = timezone.now()
|
||||
upload.save(update_fields=["import_details", "import_status", "import_date"])
|
||||
signals.upload_import_status_updated.send(
|
||||
old_status=old_status,
|
||||
new_status=upload.import_status,
|
||||
upload=upload,
|
||||
sender=None,
|
||||
|
||||
broadcast = getter(
|
||||
upload.import_metadata, "funkwhale", "config", "broadcast", default=True
|
||||
)
|
||||
if broadcast:
|
||||
signals.upload_import_status_updated.send(
|
||||
old_status=old_status,
|
||||
new_status=upload.import_status,
|
||||
upload=upload,
|
||||
sender=None,
|
||||
)
|
||||
|
||||
|
||||
@celery.app.task(name="music.process_upload")
|
||||
|
@ -285,22 +220,29 @@ def fail_import(upload, error_code):
|
|||
"upload",
|
||||
)
|
||||
def process_upload(upload):
|
||||
data = upload.import_metadata or {}
|
||||
import_metadata = upload.import_metadata or {}
|
||||
old_status = upload.import_status
|
||||
audio_file = upload.get_audio_file()
|
||||
try:
|
||||
track = get_track_from_import_metadata(upload.import_metadata or {})
|
||||
if not track and upload.audio_file:
|
||||
# easy ways did not work. Now we have to be smart and use
|
||||
# metadata from the file itself if any
|
||||
track = import_track_data_from_file(upload.audio_file.file, hints=data)
|
||||
if not track and upload.metadata:
|
||||
# we can try to import using federation metadata
|
||||
track = import_track_from_remote(upload.metadata)
|
||||
additional_data = {}
|
||||
if not audio_file:
|
||||
# we can only rely on user proveded data
|
||||
final_metadata = import_metadata
|
||||
else:
|
||||
# we use user provided data and data from the file itself
|
||||
m = metadata.Metadata(audio_file)
|
||||
file_metadata = m.all()
|
||||
final_metadata = collections.ChainMap(
|
||||
additional_data, import_metadata, file_metadata
|
||||
)
|
||||
additional_data["cover_data"] = m.get_picture("cover_front")
|
||||
additional_data["upload_source"] = upload.source
|
||||
track = get_track_from_import_metadata(final_metadata)
|
||||
except UploadImportError as e:
|
||||
return fail_import(upload, e.code)
|
||||
except Exception:
|
||||
fail_import(upload, "unknown_error")
|
||||
raise
|
||||
return fail_import(upload, "unknown_error")
|
||||
|
||||
# under some situations, we want to skip the import (
|
||||
# for instance if the user already owns the files)
|
||||
owned_duplicates = get_owned_duplicates(upload, track)
|
||||
|
@ -342,33 +284,69 @@ def process_upload(upload):
|
|||
"bitrate",
|
||||
]
|
||||
)
|
||||
signals.upload_import_status_updated.send(
|
||||
old_status=old_status,
|
||||
new_status=upload.import_status,
|
||||
upload=upload,
|
||||
sender=None,
|
||||
broadcast = getter(
|
||||
import_metadata, "funkwhale", "config", "broadcast", default=True
|
||||
)
|
||||
routes.outbox.dispatch(
|
||||
{"type": "Create", "object": {"type": "Audio"}}, context={"upload": upload}
|
||||
if broadcast:
|
||||
signals.upload_import_status_updated.send(
|
||||
old_status=old_status,
|
||||
new_status=upload.import_status,
|
||||
upload=upload,
|
||||
sender=None,
|
||||
)
|
||||
dispatch_outbox = getter(
|
||||
import_metadata, "funkwhale", "config", "dispatch_outbox", default=True
|
||||
)
|
||||
if not track.album.cover:
|
||||
update_album_cover(track.album, upload)
|
||||
if dispatch_outbox:
|
||||
routes.outbox.dispatch(
|
||||
{"type": "Create", "object": {"type": "Audio"}}, context={"upload": upload}
|
||||
)
|
||||
|
||||
|
||||
def get_track_from_import_metadata(data):
|
||||
track_mbid = getter(data, "track", "mbid")
|
||||
track_uuid = getter(data, "track", "uuid")
|
||||
def federation_audio_track_to_metadata(payload):
|
||||
"""
|
||||
Given a valid payload as returned by federation.serializers.TrackSerializer.validated_data,
|
||||
returns a correct metadata payload for use with get_track_from_import_metadata.
|
||||
"""
|
||||
musicbrainz_recordingid = payload.get("musicbrainzId")
|
||||
musicbrainz_artistid = payload["artists"][0].get("musicbrainzId")
|
||||
musicbrainz_albumartistid = payload["album"]["artists"][0].get("musicbrainzId")
|
||||
musicbrainz_albumid = payload["album"].get("musicbrainzId")
|
||||
|
||||
if track_mbid:
|
||||
# easiest case: there is a MBID provided in the import_metadata
|
||||
return models.Track.get_or_create_from_api(mbid=track_mbid)[0]
|
||||
if track_uuid:
|
||||
# another easy case, we have a reference to a uuid of a track that
|
||||
# already exists in our database
|
||||
try:
|
||||
return models.Track.objects.get(uuid=track_uuid)
|
||||
except models.Track.DoesNotExist:
|
||||
raise UploadImportError(code="track_uuid_not_found")
|
||||
new_data = {
|
||||
"title": payload["name"],
|
||||
"album": payload["album"]["name"],
|
||||
"track_number": payload["position"],
|
||||
"artist": payload["artists"][0]["name"],
|
||||
"album_artist": payload["album"]["artists"][0]["name"],
|
||||
"date": payload["album"].get("released"),
|
||||
# musicbrainz
|
||||
"musicbrainz_recordingid": str(musicbrainz_recordingid)
|
||||
if musicbrainz_recordingid
|
||||
else None,
|
||||
"musicbrainz_artistid": str(musicbrainz_artistid)
|
||||
if musicbrainz_artistid
|
||||
else None,
|
||||
"musicbrainz_albumartistid": str(musicbrainz_albumartistid)
|
||||
if musicbrainz_albumartistid
|
||||
else None,
|
||||
"musicbrainz_albumid": str(musicbrainz_albumid)
|
||||
if musicbrainz_albumid
|
||||
else None,
|
||||
# federation
|
||||
"fid": payload["id"],
|
||||
"artist_fid": payload["artists"][0]["id"],
|
||||
"album_artist_fid": payload["album"]["artists"][0]["id"],
|
||||
"album_fid": payload["album"]["id"],
|
||||
"fdate": payload["published"],
|
||||
"album_fdate": payload["album"]["published"],
|
||||
"album_artist_fdate": payload["album"]["artists"][0]["published"],
|
||||
"artist_fdate": payload["artists"][0]["published"],
|
||||
}
|
||||
cover = payload["album"].get("cover")
|
||||
if cover:
|
||||
new_data["cover_data"] = {"mimetype": cover["mediaType"], "url": cover["href"]}
|
||||
return new_data
|
||||
|
||||
|
||||
def get_owned_duplicates(upload, track):
|
||||
|
@ -385,45 +363,191 @@ def get_owned_duplicates(upload, track):
|
|||
)
|
||||
|
||||
|
||||
def get_best_candidate_or_create(model, query, defaults, sort_fields):
|
||||
"""
|
||||
Like queryset.get_or_create() but does not crash if multiple objects
|
||||
are returned on the get() call
|
||||
"""
|
||||
candidates = model.objects.filter(query)
|
||||
if candidates:
|
||||
|
||||
return sort_candidates(candidates, sort_fields)[0], False
|
||||
|
||||
return model.objects.create(**defaults), True
|
||||
|
||||
|
||||
def sort_candidates(candidates, important_fields):
|
||||
"""
|
||||
Given a list of objects and a list of fields,
|
||||
will return a sorted list of those objects by score.
|
||||
|
||||
Score is higher for objects that have a non-empty attribute
|
||||
that is also present in important fields::
|
||||
|
||||
artist1 = Artist(mbid=None, fid=None)
|
||||
artist2 = Artist(mbid="something", fid=None)
|
||||
|
||||
# artist2 has a mbid, so is sorted first
|
||||
assert sort_candidates([artist1, artist2], ['mbid'])[0] == artist2
|
||||
|
||||
Only supports string fields.
|
||||
"""
|
||||
|
||||
# map each fields to its score, giving a higher score to first fields
|
||||
fields_scores = {f: i + 1 for i, f in enumerate(sorted(important_fields))}
|
||||
candidates_with_scores = []
|
||||
for candidate in candidates:
|
||||
current_score = 0
|
||||
for field, score in fields_scores.items():
|
||||
v = getattr(candidate, field, "")
|
||||
if v:
|
||||
current_score += score
|
||||
|
||||
candidates_with_scores.append((candidate, current_score))
|
||||
|
||||
return [c for c, s in reversed(sorted(candidates_with_scores, key=lambda v: v[1]))]
|
||||
|
||||
|
||||
@transaction.atomic
|
||||
def import_track_data_from_file(file, hints={}):
|
||||
data = metadata.Metadata(file)
|
||||
album = None
|
||||
def get_track_from_import_metadata(data):
|
||||
track_uuid = getter(data, "funkwhale", "track", "uuid")
|
||||
|
||||
if track_uuid:
|
||||
# easy case, we have a reference to a uuid of a track that
|
||||
# already exists in our database
|
||||
try:
|
||||
track = models.Track.objects.get(uuid=track_uuid)
|
||||
except models.Track.DoesNotExist:
|
||||
raise UploadImportError(code="track_uuid_not_found")
|
||||
|
||||
if not track.album.cover:
|
||||
update_album_cover(
|
||||
track.album,
|
||||
source=data.get("upload_source"),
|
||||
cover_data=data.get("cover_data"),
|
||||
)
|
||||
return track
|
||||
|
||||
from_activity_id = data.get("from_activity_id", None)
|
||||
track_mbid = data.get("musicbrainz_recordingid", None)
|
||||
album_mbid = data.get("musicbrainz_albumid", None)
|
||||
track_fid = getter(data, "fid")
|
||||
|
||||
query = None
|
||||
|
||||
if album_mbid and track_mbid:
|
||||
# to gain performance and avoid additional mb lookups,
|
||||
# we import from the release data, which is already cached
|
||||
return models.Track.get_or_create_from_release(album_mbid, track_mbid)[0]
|
||||
elif track_mbid:
|
||||
return models.Track.get_or_create_from_api(track_mbid)[0]
|
||||
elif album_mbid:
|
||||
album = models.Album.get_or_create_from_api(album_mbid)[0]
|
||||
query = Q(mbid=track_mbid, album__mbid=album_mbid)
|
||||
|
||||
artist = album.artist if album else None
|
||||
if track_fid:
|
||||
query = query | Q(fid=track_fid) if query else Q(fid=track_fid)
|
||||
|
||||
if query:
|
||||
# second easy case: we have a (track_mbid, album_mbid) pair or
|
||||
# a federation uuid we can check on
|
||||
try:
|
||||
return sort_candidates(models.Track.objects.filter(query), ["mbid", "fid"])[
|
||||
0
|
||||
]
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
# get / create artist and album artist
|
||||
artist_mbid = data.get("musicbrainz_artistid", None)
|
||||
if not artist:
|
||||
if artist_mbid:
|
||||
artist = models.Artist.get_or_create_from_api(artist_mbid)[0]
|
||||
else:
|
||||
artist = models.Artist.objects.get_or_create(
|
||||
name__iexact=data.get("artist"), defaults={"name": data.get("artist")}
|
||||
)[0]
|
||||
artist_fid = data.get("artist_fid", None)
|
||||
artist_name = data["artist"]
|
||||
query = Q(name__iexact=artist_name)
|
||||
if artist_mbid:
|
||||
query |= Q(mbid=artist_mbid)
|
||||
if artist_fid:
|
||||
query |= Q(fid=artist_fid)
|
||||
defaults = {
|
||||
"name": artist_name,
|
||||
"mbid": artist_mbid,
|
||||
"fid": artist_fid,
|
||||
"from_activity_id": from_activity_id,
|
||||
}
|
||||
if data.get("artist_fdate"):
|
||||
defaults["creation_date"] = data.get("artist_fdate")
|
||||
|
||||
release_date = data.get("date", default=None)
|
||||
if not album:
|
||||
album = models.Album.objects.get_or_create(
|
||||
title__iexact=data.get("album"),
|
||||
artist=artist,
|
||||
defaults={"title": data.get("album"), "release_date": release_date},
|
||||
)[0]
|
||||
position = data.get("track_number", default=None)
|
||||
track = models.Track.objects.get_or_create(
|
||||
title__iexact=data.get("title"),
|
||||
album=album,
|
||||
defaults={"title": data.get("title"), "position": position},
|
||||
artist = get_best_candidate_or_create(
|
||||
models.Artist, query, defaults=defaults, sort_fields=["mbid", "fid"]
|
||||
)[0]
|
||||
|
||||
album_artist_name = data.get("album_artist", artist_name)
|
||||
if album_artist_name == artist_name:
|
||||
album_artist = artist
|
||||
else:
|
||||
query = Q(name__iexact=album_artist_name)
|
||||
album_artist_mbid = data.get("musicbrainz_albumartistid", None)
|
||||
album_artist_fid = data.get("album_artist_fid", None)
|
||||
if album_artist_mbid:
|
||||
query |= Q(mbid=album_artist_mbid)
|
||||
if album_artist_fid:
|
||||
query |= Q(fid=album_artist_fid)
|
||||
defaults = {
|
||||
"name": album_artist_name,
|
||||
"mbid": album_artist_mbid,
|
||||
"fid": album_artist_fid,
|
||||
"from_activity_id": from_activity_id,
|
||||
}
|
||||
if data.get("album_artist_fdate"):
|
||||
defaults["creation_date"] = data.get("album_artist_fdate")
|
||||
|
||||
album_artist = get_best_candidate_or_create(
|
||||
models.Artist, query, defaults=defaults, sort_fields=["mbid", "fid"]
|
||||
)[0]
|
||||
|
||||
# get / create album
|
||||
album_title = data["album"]
|
||||
album_fid = data.get("album_fid", None)
|
||||
query = Q(title__iexact=album_title, artist=album_artist)
|
||||
if album_mbid:
|
||||
query |= Q(mbid=album_mbid)
|
||||
if album_fid:
|
||||
query |= Q(fid=album_fid)
|
||||
defaults = {
|
||||
"title": album_title,
|
||||
"artist": album_artist,
|
||||
"mbid": album_mbid,
|
||||
"release_date": data.get("date"),
|
||||
"fid": album_fid,
|
||||
"from_activity_id": from_activity_id,
|
||||
}
|
||||
if data.get("album_fdate"):
|
||||
defaults["creation_date"] = data.get("album_fdate")
|
||||
|
||||
album = get_best_candidate_or_create(
|
||||
models.Album, query, defaults=defaults, sort_fields=["mbid", "fid"]
|
||||
)[0]
|
||||
if not album.cover:
|
||||
update_album_cover(
|
||||
album, source=data.get("upload_source"), cover_data=data.get("cover_data")
|
||||
)
|
||||
|
||||
# get / create track
|
||||
track_title = data["title"]
|
||||
track_number = data.get("track_number", 1)
|
||||
query = Q(title__iexact=track_title, artist=artist, album=album)
|
||||
if track_mbid:
|
||||
query |= Q(mbid=track_mbid)
|
||||
if track_fid:
|
||||
query |= Q(fid=track_fid)
|
||||
defaults = {
|
||||
"title": track_title,
|
||||
"album": album,
|
||||
"mbid": track_mbid,
|
||||
"artist": artist,
|
||||
"position": track_number,
|
||||
"fid": track_fid,
|
||||
"from_activity_id": from_activity_id,
|
||||
}
|
||||
if data.get("fdate"):
|
||||
defaults["creation_date"] = data.get("fdate")
|
||||
|
||||
track = get_best_candidate_or_create(
|
||||
models.Track, query, defaults=defaults, sort_fields=["mbid", "fid"]
|
||||
)[0]
|
||||
|
||||
return track
|
||||
|
||||
|
||||
|
@ -432,6 +556,7 @@ def broadcast_import_status_update_to_owner(old_status, new_status, upload, **kw
|
|||
user = upload.library.actor.get_user()
|
||||
if not user:
|
||||
return
|
||||
|
||||
group = "user.{}.imports".format(user.pk)
|
||||
channels.group_send(
|
||||
group,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue