Upgraded celery to 4.1, added endpoint logic for fingerprinting audio files

This commit is contained in:
Eliot Berriot 2017-12-26 21:12:37 +01:00
parent 4834b9e450
commit 5d2dbbc828
No known key found for this signature in database
GPG key ID: DD6965E2476E5C27
25 changed files with 345 additions and 71 deletions

View file

@ -0,0 +1,18 @@
# Generated by Django 2.0 on 2017-12-26 16:39
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('music', '0015_bind_track_file_to_import_job'),
]
operations = [
migrations.AddField(
model_name='trackfile',
name='acoustid_track_id',
field=models.UUIDField(blank=True, null=True),
),
]

View file

@ -15,11 +15,9 @@ from django.utils import timezone
from taggit.managers import TaggableManager
from versatileimagefield.fields import VersatileImageField
from funkwhale_api.taskapp import celery
from funkwhale_api import downloader
from funkwhale_api import musicbrainz
from . import importers
from . import lyrics as lyrics_utils
class APIModelMixin(models.Model):
@ -255,14 +253,6 @@ class Lyrics(models.Model):
url = models.URLField(unique=True)
content = models.TextField(null=True, blank=True)
@celery.app.task(name='Lyrics.fetch_content', filter=celery.task_method)
def fetch_content(self):
html = lyrics_utils._get_html(self.url)
content = lyrics_utils.extract_content(html)
cleaned_content = lyrics_utils.clean_content(content)
self.content = cleaned_content
self.save()
@property
def content_rendered(self):
return markdown.markdown(
@ -362,6 +352,7 @@ class TrackFile(models.Model):
audio_file = models.FileField(upload_to='tracks/%Y/%m/%d', max_length=255)
source = models.URLField(null=True, blank=True)
duration = models.IntegerField(null=True, blank=True)
acoustid_track_id = models.UUIDField(null=True, blank=True)
def download_file(self):
# import the track file, since there is not any
@ -429,26 +420,3 @@ class ImportJob(models.Model):
class Meta:
ordering = ('id', )
@celery.app.task(name='ImportJob.run', filter=celery.task_method)
def run(self, replace=False):
try:
track, created = Track.get_or_create_from_api(mbid=self.mbid)
track_file = None
if replace:
track_file = track.files.first()
elif track.files.count() > 0:
return
track_file = track_file or TrackFile(
track=track, source=self.source)
track_file.download_file()
track_file.save()
self.status = 'finished'
self.track_file = track_file
self.save()
return track.pk
except Exception as exc:
if not settings.DEBUG:
raise ImportJob.run.retry(args=[self], exc=exc, countdown=30, max_retries=3)
raise

View file

@ -0,0 +1,56 @@
from funkwhale_api.taskapp import celery
from funkwhale_api.providers.acoustid import get_acoustid_client
from django.conf import settings
from . import models
from . import lyrics as lyrics_utils
@celery.app.task(name='acoustid.set_on_track_file')
@celery.require_instance(models.TrackFile, 'track_file')
def set_acoustid_on_track_file(track_file):
client = get_acoustid_client()
result = client.get_best_match(track_file.audio_file.path)
def update(id):
track_file.acoustid_track_id = id
track_file.save(update_fields=['acoustid_track_id'])
return id
if result:
return update(result['id'])
@celery.app.task(name='ImportJob.run', bind=True)
@celery.require_instance(models.ImportJob, 'import_job')
def import_job_run(self, import_job, replace=False):
try:
track, created = models.Track.get_or_create_from_api(mbid=import_job.mbid)
track_file = None
if replace:
track_file = track.files.first()
elif track.files.count() > 0:
return
track_file = track_file or models.TrackFile(
track=track, source=import_job.source)
track_file.download_file()
track_file.save()
import_job.status = 'finished'
import_job.track_file = track_file
import_job.save()
return track.pk
except Exception as exc:
if not settings.DEBUG:
raise import_job_run.retry(args=[self], exc=exc, countdown=30, max_retries=3)
raise
@celery.app.task(name='Lyrics.fetch_content')
@celery.require_instance(models.Lyrics, 'lyrics')
def fetch_content(lyrics):
html = lyrics_utils._get_html(lyrics.url)
content = lyrics_utils.extract_content(html)
cleaned_content = lyrics_utils.clean_content(content)
lyrics.content = cleaned_content
lyrics.save(update_fields=['content'])

View file

@ -22,6 +22,7 @@ from . import models
from . import serializers
from . import importers
from . import filters
from . import tasks
from . import utils
@ -129,7 +130,8 @@ class TrackViewSet(TagViewSetMixin, SearchMixin, viewsets.ReadOnlyModelViewSet):
lyrics = work.fetch_lyrics()
try:
if not lyrics.content:
lyrics.fetch_content()
tasks.fetch_content(lyrics_id=lyrics.pk)
lyrics.refresh_from_db()
except AttributeError:
return Response({'error': 'unavailable lyrics'}, status=404)
serializer = serializers.LyricsSerializer(lyrics)
@ -244,7 +246,7 @@ class SubmitViewSet(viewsets.ViewSet):
pass
batch = models.ImportBatch.objects.create(submitted_by=request.user)
job = models.ImportJob.objects.create(mbid=request.POST['mbid'], batch=batch, source=request.POST['import_url'])
job.run.delay()
tasks.import_job_run.delay(import_job_id=job.pk)
serializer = serializers.ImportBatchSerializer(batch)
return Response(serializer.data)
@ -272,7 +274,7 @@ class SubmitViewSet(viewsets.ViewSet):
models.TrackFile.objects.get(track__mbid=row['mbid'])
except models.TrackFile.DoesNotExist:
job = models.ImportJob.objects.create(mbid=row['mbid'], batch=batch, source=row['source'])
job.run.delay()
tasks.import_job_run.delay(import_job_id=job.pk)
serializer = serializers.ImportBatchSerializer(batch)
return serializer.data, batch

View file

@ -0,0 +1,27 @@
import acoustid
from dynamic_preferences.registries import global_preferences_registry
class Client(object):
def __init__(self, api_key):
self.api_key = api_key
def match(self, file_path):
return acoustid.match(self.api_key, file_path, parse=False)
def get_best_match(self, file_path):
results = self.match(file_path=file_path)
MIN_SCORE_FOR_MATCH = 0.8
try:
rows = results['results']
except KeyError:
return
for row in rows:
if row['score'] >= MIN_SCORE_FOR_MATCH:
return row
def get_acoustid_client():
manager = global_preferences_registry.manager()
return Client(api_key=manager['providers_acoustid__api_key'])

View file

@ -0,0 +1,13 @@
from dynamic_preferences.types import StringPreference, Section
from dynamic_preferences.registries import global_preferences_registry
acoustid = Section('providers_acoustid')
@global_preferences_registry.register
class APIKey(StringPreference):
section = acoustid
name = 'api_key'
default = ''
verbose_name = 'Acoustid API key'
help_text = 'The API key used to query AcoustID. Get one at https://acoustid.org/new-application.'

View file

@ -1,20 +1,20 @@
import acoustid
import os
import datetime
from django.core.files import File
from funkwhale_api.taskapp import celery
from funkwhale_api.providers.acoustid import get_acoustid_client
from funkwhale_api.music import models, metadata
@celery.app.task(name='audiofile.from_path')
def from_path(path):
def import_metadata_without_musicbrainz(path):
data = metadata.Metadata(path)
artist = models.Artist.objects.get_or_create(
name__iexact=data.get('artist'),
defaults={
'name': data.get('artist'),
'mbid': data.get('musicbrainz_artistid', None),
},
)[0]
@ -39,11 +39,33 @@ def from_path(path):
'mbid': data.get('musicbrainz_recordingid', None),
},
)[0]
return track
def import_metadata_with_musicbrainz(path):
pass
@celery.app.task(name='audiofile.from_path')
def from_path(path):
acoustid_track_id = None
try:
client = get_acoustid_client()
result = client.get_best_match(path)
acoustid_track_id = result['id']
except acoustid.WebServiceError:
track = import_metadata_without_musicbrainz(path)
except (TypeError, KeyError):
track = import_metadata_without_musicbrainz(path)
else:
track, created = models.Track.get_or_create_from_api(
mbid=result['recordings'][0]['id']
)
if track.files.count() > 0:
raise ValueError('File already exists for track {}'.format(track.pk))
track_file = models.TrackFile(track=track)
track_file = models.TrackFile(
track=track, acoustid_track_id=acoustid_track_id)
track_file.audio_file.save(
os.path.basename(path),
File(open(path, 'rb'))

View file

@ -1,10 +1,12 @@
from __future__ import absolute_import
import os
import functools
from celery import Celery
from django.apps import AppConfig
from django.conf import settings
from celery.contrib.methods import task_method
if not settings.configured:
# set the default Django settings module for the 'celery' program.
@ -21,12 +23,20 @@ class CeleryConfig(AppConfig):
def ready(self):
# Using a string here means the worker will not have to
# pickle the object when using Windows.
app.config_from_object('django.conf:settings')
app.config_from_object('django.conf:settings', namespace='CELERY')
app.autodiscover_tasks(lambda: settings.INSTALLED_APPS, force=True)
@app.task(bind=True)
def debug_task(self):
print('Request: {0!r}'.format(self.request)) # pragma: no cover
def require_instance(model_or_qs, parameter_name):
def decorator(function):
@functools.wraps(function)
def inner(*args, **kwargs):
pk = kwargs.pop('_'.join([parameter_name, 'id']))
try:
instance = model_or_qs.get(pk=pk)
except AttributeError:
instance = model_or_qs.objects.get(pk=pk)
kwargs[parameter_name] = instance
return function(*args, **kwargs)
return inner
return decorator