diff options
author | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2015-03-08 12:07:36 +0100 |
---|---|---|
committer | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2015-03-08 12:07:36 +0100 |
commit | 2378de49ba37116c5bf93054fd6aed65fa44022a (patch) | |
tree | 90bb6efddb1d8fc4772c74fcf5dda4dccef74b1a /pyaggr3g470r/duplicate.py | |
parent | Better handling of the error logging in the crawler. (diff) | |
download | newspipe-2378de49ba37116c5bf93054fd6aed65fa44022a.tar.gz newspipe-2378de49ba37116c5bf93054fd6aed65fa44022a.tar.bz2 newspipe-2378de49ba37116c5bf93054fd6aed65fa44022a.zip |
Moved duplicate() function in utils.py. Some minor cosmethic changes.
Diffstat (limited to 'pyaggr3g470r/duplicate.py')
-rw-r--r-- | pyaggr3g470r/duplicate.py | 20 |
1 files changed, 0 insertions, 20 deletions
diff --git a/pyaggr3g470r/duplicate.py b/pyaggr3g470r/duplicate.py deleted file mode 100644 index d4c6e31a..00000000 --- a/pyaggr3g470r/duplicate.py +++ /dev/null @@ -1,20 +0,0 @@ -#! /usr/bin/env python -#-*- coding: utf-8 -*- - -import itertools -from datetime import timedelta - -from pyaggr3g470r import utils - -def compare_documents(feed): - """ - Compare a list of documents by pair. - """ - duplicates = [] - for pair in itertools.combinations(feed.articles, 2): - date1 = pair[0].date - date2 = pair[1].date - if utils.clear_string(pair[0].title) == utils.clear_string(pair[1].title) and \ - (date1 - date2) < timedelta(days = 1): - duplicates.append(pair) - return duplicates
\ No newline at end of file |