aboutsummaryrefslogtreecommitdiff
path: root/pyaggr3g470r/compare.py
diff options
context:
space:
mode:
authorCédric Bonhomme <cedric@cedricbonhomme.org>2014-11-28 15:23:54 +0100
committerCédric Bonhomme <cedric@cedricbonhomme.org>2014-11-28 15:23:54 +0100
commit32c475582d7bf3ff316728aa1bfac6fb87d532bc (patch)
treeae07a8dc5c22e439ddc90050f2914eec5994a3b4 /pyaggr3g470r/compare.py
parentCompare files with cosine. (diff)
downloadnewspipe-32c475582d7bf3ff316728aa1bfac6fb87d532bc.tar.gz
newspipe-32c475582d7bf3ff316728aa1bfac6fb87d532bc.tar.bz2
newspipe-32c475582d7bf3ff316728aa1bfac6fb87d532bc.zip
Speed improvements: combinations generates less pair than product...
Diffstat (limited to 'pyaggr3g470r/compare.py')
-rw-r--r--pyaggr3g470r/compare.py3
1 files changed, 1 insertions, 2 deletions
diff --git a/pyaggr3g470r/compare.py b/pyaggr3g470r/compare.py
index 26ac57ab..80f3d694 100644
--- a/pyaggr3g470r/compare.py
+++ b/pyaggr3g470r/compare.py
@@ -39,8 +39,7 @@ def compare_documents(feed):
"""
nltk.download("punkt")
duplicates = []
- for pair in [(elem[0], elem[1]) for elem in itertools.product(feed.articles, repeat=2)
- if elem[0].id != elem[1].id]:
+ for pair in itertools.combinations(feed.articles, 2):
try:
result = cosine_sim(*pair)
if abs(result.item() - 1.0) < 1e-10:
bgstack15