Simplify cleanString function to prepare python3 compatibility

2025-10-04 18:09:16 +02:00 · 2019-03-09 11:34:57 +01:00 · 2019-03-09 11:34:57 +01:00 · 2f40ef1826
commit 2f40ef1826
parent 3797c9a9f0
1 changed files with 5 additions and 12 deletions
--- a/lib/utils.py
+++ b/lib/utils.py
@ -3,9 +3,10 @@

 from ConfigParser import RawConfigParser, NoOptionError, NoSectionError
 from os.path import dirname, splitext, basename, isfile
+import re
 from os import devnull
 from subprocess import check_call, CalledProcessError, STDOUT
-import unicodedata
+import unidecode
 import logging

 ### CATEGORIES ###
@ -195,16 +196,8 @@ def upcaseFirstLetter(s):


 def cleanString(toclean):
-    toclean = toclean.split(' ')
-    cleaned = ''
-    for s in toclean:
-        if s == '':
-            continue
-        strtoclean = unicodedata.normalize('NFKD', unicode (s, 'utf-8')).encode('ASCII', 'ignore')
-        strtoclean = ''.join(e for e in strtoclean if e.isalnum())
-        if strtoclean == '':
-            continue
-        strtoclean = upcaseFirstLetter(strtoclean)
-        cleaned = cleaned + strtoclean
+    toclean = toclean.decode('utf-8')
+    toclean = unidecode.unidecode(toclean)
+    cleaned = re.sub('[^A-Za-z0-9]+', '', toclean)

    return cleaned