blob: f34e284eadc41878e78bd0081985156ce7dfdc41 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
#! /usr/bin/env python
#-*- coding: utf-8 -*-
import itertools
from datetime import timedelta
import utils
def compare_documents(feed):
"""
Compare a list of documents by pair.
"""
duplicates = []
for pair in itertools.combinations(feed.articles, 2):
date1 = pair[0].date
date2 = pair[1].date
if utils.clear_string(pair[0].title) == utils.clear_string(pair[1].title) and \
(date1 - date2) < timedelta(days = 1):
duplicates.append(pair)
return duplicates
|