diff options
Diffstat (limited to 'pyaggr3g470r/duplicate.py')
-rw-r--r-- | pyaggr3g470r/duplicate.py | 20 |
1 files changed, 0 insertions, 20 deletions
diff --git a/pyaggr3g470r/duplicate.py b/pyaggr3g470r/duplicate.py deleted file mode 100644 index d4c6e31a..00000000 --- a/pyaggr3g470r/duplicate.py +++ /dev/null @@ -1,20 +0,0 @@ -#! /usr/bin/env python -#-*- coding: utf-8 -*- - -import itertools -from datetime import timedelta - -from pyaggr3g470r import utils - -def compare_documents(feed): - """ - Compare a list of documents by pair. - """ - duplicates = [] - for pair in itertools.combinations(feed.articles, 2): - date1 = pair[0].date - date2 = pair[1].date - if utils.clear_string(pair[0].title) == utils.clear_string(pair[1].title) and \ - (date1 - date2) < timedelta(days = 1): - duplicates.append(pair) - return duplicates
\ No newline at end of file |