Fix #799: Removed broken/instable lyrics feature

This commit is contained in:
Eliot Berriot 2019-04-23 10:31:29 +02:00
parent a414461f49
commit 0b94227782
No known key found for this signature in database
GPG key ID: DD6965E2476E5C27
16 changed files with 34 additions and 384 deletions

View file

@ -39,22 +39,6 @@ class ImportJobAdmin(admin.ModelAdmin):
list_filter = ["status"]
@admin.register(models.Work)
class WorkAdmin(admin.ModelAdmin):
list_display = ["title", "mbid", "language", "nature"]
list_select_related = True
search_fields = ["title"]
list_filter = ["language", "nature"]
@admin.register(models.Lyrics)
class LyricsAdmin(admin.ModelAdmin):
list_display = ["url", "id", "url"]
list_select_related = True
search_fields = ["url", "work__title"]
list_filter = ["work__language"]
@admin.register(models.Upload)
class UploadAdmin(admin.ModelAdmin):
list_display = [

View file

@ -164,27 +164,6 @@ class UploadVersionFactory(NoUpdateOnCreate, factory.django.DjangoModelFactory):
model = "music.UploadVersion"
@registry.register
class WorkFactory(NoUpdateOnCreate, factory.django.DjangoModelFactory):
mbid = factory.Faker("uuid4")
language = "eng"
nature = "song"
title = factory.Faker("sentence", nb_words=3)
class Meta:
model = "music.Work"
@registry.register
class LyricsFactory(NoUpdateOnCreate, factory.django.DjangoModelFactory):
work = factory.SubFactory(WorkFactory)
url = factory.Faker("url")
content = factory.Faker("paragraphs", nb=4)
class Meta:
model = "music.Lyrics"
@registry.register
class TagFactory(NoUpdateOnCreate, factory.django.DjangoModelFactory):
name = factory.SelfAttribute("slug")

View file

@ -47,4 +47,4 @@ class Mapping(object):
)
registry = {"Artist": Importer, "Track": Importer, "Album": Importer, "Work": Importer}
registry = {"Artist": Importer, "Track": Importer, "Album": Importer}

View file

@ -1,31 +0,0 @@
import urllib.request
from bs4 import BeautifulSoup
def _get_html(url):
with urllib.request.urlopen(url) as response:
html = response.read()
return html.decode("utf-8")
def extract_content(html):
soup = BeautifulSoup(html, "html.parser")
return soup.find_all("div", class_="lyricbox")[0].contents
def clean_content(contents):
final_content = ""
for e in contents:
if e == "\n":
continue
if e.name == "script":
continue
if e.name == "br":
final_content += "\n"
continue
try:
final_content += e.text
except AttributeError:
final_content += str(e)
return final_content

View file

@ -0,0 +1,31 @@
# Generated by Django 2.1.7 on 2019-04-23 08:20
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
('music', '0038_attributed_to'),
]
operations = [
migrations.RemoveField(
model_name='lyrics',
name='work',
),
migrations.RemoveField(
model_name='work',
name='from_activity',
),
migrations.RemoveField(
model_name='track',
name='work',
),
migrations.DeleteModel(
name='Lyrics',
),
migrations.DeleteModel(
name='Work',
),
]

View file

@ -6,7 +6,6 @@ import tempfile
import urllib.parse
import uuid
import markdown
import pendulum
import pydub
from django.conf import settings
@ -379,77 +378,6 @@ def import_album(v):
return a
def link_recordings(instance, cleaned_data, raw_data):
tracks = [r["target"] for r in raw_data["recording-relation-list"]]
Track.objects.filter(mbid__in=tracks).update(work=instance)
def import_lyrics(instance, cleaned_data, raw_data):
try:
url = [
url_data
for url_data in raw_data["url-relation-list"]
if url_data["type"] == "lyrics"
][0]["target"]
except (IndexError, KeyError):
return
l, _ = Lyrics.objects.get_or_create(work=instance, url=url)
return l
class Work(APIModelMixin):
language = models.CharField(max_length=20)
nature = models.CharField(max_length=50)
title = models.CharField(max_length=255)
api = musicbrainz.api.works
api_includes = ["url-rels", "recording-rels"]
musicbrainz_model = "work"
federation_namespace = "works"
musicbrainz_mapping = {
"mbid": {"musicbrainz_field_name": "id"},
"title": {"musicbrainz_field_name": "title"},
"language": {"musicbrainz_field_name": "language"},
"nature": {"musicbrainz_field_name": "type", "converter": lambda v: v.lower()},
}
import_hooks = [import_lyrics, link_recordings]
def fetch_lyrics(self):
lyric = self.lyrics.first()
if lyric:
return lyric
data = self.api.get(self.mbid, includes=["url-rels"])["work"]
lyric = import_lyrics(self, {}, data)
return lyric
def get_federation_id(self):
if self.fid:
return self.fid
return None
class Lyrics(models.Model):
uuid = models.UUIDField(unique=True, db_index=True, default=uuid.uuid4)
work = models.ForeignKey(
Work, related_name="lyrics", null=True, blank=True, on_delete=models.CASCADE
)
url = models.URLField(unique=True)
content = models.TextField(null=True, blank=True)
@property
def content_rendered(self):
return markdown.markdown(
self.content,
safe_mode=True,
enable_attributes=False,
extensions=["markdown.extensions.nl2br"],
)
class TrackQuerySet(common_models.LocalFromFidQuerySet, models.QuerySet):
def for_nested_serialization(self):
return self.select_related().select_related("album__artist", "artist")
@ -499,9 +427,6 @@ class Track(APIModelMixin):
album = models.ForeignKey(
Album, related_name="tracks", null=True, blank=True, on_delete=models.CASCADE
)
work = models.ForeignKey(
Work, related_name="tracks", null=True, blank=True, on_delete=models.CASCADE
)
license = models.ForeignKey(
License,
null=True,
@ -523,7 +448,7 @@ class Track(APIModelMixin):
federation_namespace = "tracks"
musicbrainz_model = "recording"
api = musicbrainz.api.recordings
api_includes = ["artist-credits", "releases", "media", "tags", "work-rels"]
api_includes = ["artist-credits", "releases", "media", "tags"]
musicbrainz_mapping = {
"mbid": {"musicbrainz_field_name": "id"},
"title": {"musicbrainz_field_name": "title"},
@ -552,20 +477,6 @@ class Track(APIModelMixin):
self.artist = self.album.artist
super().save(**kwargs)
def get_work(self):
if self.work:
return self.work
data = self.api.get(self.mbid, includes=["work-rels"])
try:
work_data = data["recording"]["work-relation-list"][0]["work"]
except (IndexError, KeyError):
return
work, _ = Work.get_or_create_from_api(mbid=work_data["id"])
return work
def get_lyrics_url(self):
return reverse("api:v1:tracks-lyrics", kwargs={"pk": self.pk})
@property
def full_name(self):
try:

View file

@ -190,7 +190,6 @@ class TrackUploadSerializer(serializers.ModelSerializer):
class TrackSerializer(serializers.ModelSerializer):
artist = ArtistSimpleSerializer(read_only=True)
album = TrackAlbumSerializer(read_only=True)
lyrics = serializers.SerializerMethodField()
uploads = serializers.SerializerMethodField()
listen_url = serializers.SerializerMethodField()
@ -206,7 +205,6 @@ class TrackSerializer(serializers.ModelSerializer):
"creation_date",
"position",
"disc_number",
"lyrics",
"uploads",
"listen_url",
"copyright",
@ -214,9 +212,6 @@ class TrackSerializer(serializers.ModelSerializer):
"is_local",
)
def get_lyrics(self, obj):
return obj.get_lyrics_url()
def get_listen_url(self, obj):
return obj.listen_url
@ -377,12 +372,6 @@ class SimpleAlbumSerializer(serializers.ModelSerializer):
fields = ("id", "mbid", "title", "release_date", "cover")
class LyricsSerializer(serializers.ModelSerializer):
class Meta:
model = models.Lyrics
fields = ("id", "work", "content", "content_rendered")
class TrackActivitySerializer(activity_serializers.ModelSerializer):
type = serializers.SerializerMethodField()
name = serializers.CharField(source="title")

View file

@ -17,7 +17,6 @@ from funkwhale_api.federation import library as lb
from funkwhale_api.taskapp import celery
from . import licenses
from . import lyrics as lyrics_utils
from . import models
from . import metadata
from . import signals
@ -70,16 +69,6 @@ def get_cover_from_fs(dir_path):
return {"mimetype": m, "content": c.read()}
@celery.app.task(name="Lyrics.fetch_content")
@celery.require_instance(models.Lyrics, "lyrics")
def fetch_content(lyrics):
html = lyrics_utils._get_html(lyrics.url)
content = lyrics_utils.extract_content(html)
cleaned_content = lyrics_utils.clean_content(content)
lyrics.content = cleaned_content
lyrics.save(update_fields=["content"])
@celery.app.task(name="music.start_library_scan")
@celery.require_instance(
models.LibraryScan.objects.select_related().filter(status="pending"), "library_scan"

View file

@ -219,31 +219,6 @@ class TrackViewSet(
)
return queryset
@action(methods=["get"], detail=True)
@transaction.non_atomic_requests
def lyrics(self, request, *args, **kwargs):
try:
track = models.Track.objects.get(pk=kwargs["pk"])
except models.Track.DoesNotExist:
return Response(status=404)
work = track.work
if not work:
work = track.get_work()
if not work:
return Response({"error": "unavailable work "}, status=404)
lyrics = work.fetch_lyrics()
try:
if not lyrics.content:
tasks.fetch_content(lyrics_id=lyrics.pk)
lyrics.refresh_from_db()
except AttributeError:
return Response({"error": "unavailable lyrics"}, status=404)
serializer = serializers.LyricsSerializer(lyrics)
return Response(serializer.data)
libraries = action(methods=["get"], detail=True)(
get_libraries(filter_uploads=lambda o, uploads: uploads.filter(track=o))
)

View file

@ -40,10 +40,6 @@ class API(object):
_api.get_recording_by_id, max_age=settings.MUSICBRAINZ_CACHE_DURATION
)
class works(object):
search = memo(_api.search_works, max_age=settings.MUSICBRAINZ_CACHE_DURATION)
get = memo(_api.get_work_by_id, max_age=settings.MUSICBRAINZ_CACHE_DURATION)
class releases(object):
search = memo(_api.search_releases, max_age=settings.MUSICBRAINZ_CACHE_DURATION)
get = memo(_api.get_release_by_id, max_age=settings.MUSICBRAINZ_CACHE_DURATION)