aboutsummaryrefslogtreecommitdiff
path: root/pyaggr3g470r/duplicate.py
diff options
context:
space:
mode:
Diffstat (limited to 'pyaggr3g470r/duplicate.py')
-rw-r--r--pyaggr3g470r/duplicate.py4
1 files changed, 3 insertions, 1 deletions
diff --git a/pyaggr3g470r/duplicate.py b/pyaggr3g470r/duplicate.py
index 23f4adc0..d63cb2e7 100644
--- a/pyaggr3g470r/duplicate.py
+++ b/pyaggr3g470r/duplicate.py
@@ -11,6 +11,8 @@ def compare_documents(feed):
"""
duplicates = []
for pair in itertools.combinations(feed.articles, 2):
- if pair[0].content != "" and pair[0].content == pair[1].content:
+ if pair[0].content != "" and \
+ (utils.clear_string(pair[0].title) == utils.clear_string(pair[1].title) or \
+ utils.clear_string(pair[0].content) == utils.clear_string(pair[1].content)):
duplicates.append(pair)
return duplicates \ No newline at end of file
bgstack15