From 32c475582d7bf3ff316728aa1bfac6fb87d532bc Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Fri, 28 Nov 2014 15:23:54 +0100 Subject: Speed improvements: combinations generates less pair than product... --- pyaggr3g470r/compare.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'pyaggr3g470r') diff --git a/pyaggr3g470r/compare.py b/pyaggr3g470r/compare.py index 26ac57ab..80f3d694 100644 --- a/pyaggr3g470r/compare.py +++ b/pyaggr3g470r/compare.py @@ -39,8 +39,7 @@ def compare_documents(feed): """ nltk.download("punkt") duplicates = [] - for pair in [(elem[0], elem[1]) for elem in itertools.product(feed.articles, repeat=2) - if elem[0].id != elem[1].id]: + for pair in itertools.combinations(feed.articles, 2): try: result = cosine_sim(*pair) if abs(result.item() - 1.0) < 1e-10: -- cgit