aboutsummaryrefslogtreecommitdiff
path: root/pyaggr3g470r/duplicate.py
blob: 6f78fd8117a1672f8409a7ade4aeca8052a26d36 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#! /usr/bin/env python
#-*- coding: utf-8 -*-

from datetime import timedelta

import itertools
import utils


def compare_documents(feed):
    """
    Compare a list of documents by pair.
    """
    duplicates = []
    for pair in itertools.combinations(feed.articles, 2):
        date1 = pair[0].date
        date2 = pair[1].date
        if pair[0].content != "" and \
            utils.clear_string(pair[0].title) == utils.clear_string(pair[1].title) and \
            (date1 - date2) < timedelta(days = 1):
            duplicates.append(pair)
    return duplicates
bgstack15