From d73f2d6ae43a4bcafdbcc2cb24503001ef41769e Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Sun, 30 Dec 2012 21:46:32 +0100 Subject: Removed strip_accents function. --- source/utils.py | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'source') diff --git a/source/utils.py b/source/utils.py index f6cd93d1..b37ecb8b 100755 --- a/source/utils.py +++ b/source/utils.py @@ -116,19 +116,6 @@ def unescape(text): return text # leave as is return re.sub("&#?\w+;", fixup, text) -def not_combining(char): - return unicodedata.category(char) != 'Mn' - -def strip_accents(text, encoding): - """ - Strip accents. - - >>> print strip_accents("déjà", "utf-8") - deja - """ - unicode_text= unicodedata.normalize('NFD', text) - return filter(not_combining, unicode_text) - def normalize_filename(name): """ Normalize a file name. @@ -137,7 +124,6 @@ def normalize_filename(name): file_name = re.sub("[\s.]", "_", file_name) file_name = file_name.strip('_') file_name = file_name.strip('.') - #file_name = strip_accents(file_name, "utf-8") return os.path.normpath(file_name) def load_stop_words(): -- cgit