mirror of
https://git.lecygnenoir.info/LecygneNoir/prismedia.git
synced 2025-10-04 18:09:16 +02:00
Simplify cleanString function to prepare python3 compatibility
This commit is contained in:
parent
3797c9a9f0
commit
2f40ef1826
1 changed files with 5 additions and 12 deletions
17
lib/utils.py
17
lib/utils.py
|
@ -3,9 +3,10 @@
|
|||
|
||||
from ConfigParser import RawConfigParser, NoOptionError, NoSectionError
|
||||
from os.path import dirname, splitext, basename, isfile
|
||||
import re
|
||||
from os import devnull
|
||||
from subprocess import check_call, CalledProcessError, STDOUT
|
||||
import unicodedata
|
||||
import unidecode
|
||||
import logging
|
||||
|
||||
### CATEGORIES ###
|
||||
|
@ -195,16 +196,8 @@ def upcaseFirstLetter(s):
|
|||
|
||||
|
||||
def cleanString(toclean):
|
||||
toclean = toclean.split(' ')
|
||||
cleaned = ''
|
||||
for s in toclean:
|
||||
if s == '':
|
||||
continue
|
||||
strtoclean = unicodedata.normalize('NFKD', unicode (s, 'utf-8')).encode('ASCII', 'ignore')
|
||||
strtoclean = ''.join(e for e in strtoclean if e.isalnum())
|
||||
if strtoclean == '':
|
||||
continue
|
||||
strtoclean = upcaseFirstLetter(strtoclean)
|
||||
cleaned = cleaned + strtoclean
|
||||
toclean = toclean.decode('utf-8')
|
||||
toclean = unidecode.unidecode(toclean)
|
||||
cleaned = re.sub('[^A-Za-z0-9]+', '', toclean)
|
||||
|
||||
return cleaned
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue