Simplify cleanString function to prepare python3 compatibility

This commit is contained in:
LecygneNoir 2019-03-09 11:34:57 +01:00
parent 3797c9a9f0
commit 2f40ef1826

View file

@ -3,9 +3,10 @@
from ConfigParser import RawConfigParser, NoOptionError, NoSectionError
from os.path import dirname, splitext, basename, isfile
import re
from os import devnull
from subprocess import check_call, CalledProcessError, STDOUT
import unicodedata
import unidecode
import logging
### CATEGORIES ###
@ -195,16 +196,8 @@ def upcaseFirstLetter(s):
def cleanString(toclean):
toclean = toclean.split(' ')
cleaned = ''
for s in toclean:
if s == '':
continue
strtoclean = unicodedata.normalize('NFKD', unicode (s, 'utf-8')).encode('ASCII', 'ignore')
strtoclean = ''.join(e for e in strtoclean if e.isalnum())
if strtoclean == '':
continue
strtoclean = upcaseFirstLetter(strtoclean)
cleaned = cleaned + strtoclean
toclean = toclean.decode('utf-8')
toclean = unidecode.unidecode(toclean)
cleaned = re.sub('[^A-Za-z0-9]+', '', toclean)
return cleaned