diff options
author | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2014-11-28 15:23:54 +0100 |
---|---|---|
committer | Cédric Bonhomme <cedric@cedricbonhomme.org> | 2014-11-28 15:23:54 +0100 |
commit | 32c475582d7bf3ff316728aa1bfac6fb87d532bc (patch) | |
tree | ae07a8dc5c22e439ddc90050f2914eec5994a3b4 | |
parent | Compare files with cosine. (diff) | |
download | newspipe-32c475582d7bf3ff316728aa1bfac6fb87d532bc.tar.gz newspipe-32c475582d7bf3ff316728aa1bfac6fb87d532bc.tar.bz2 newspipe-32c475582d7bf3ff316728aa1bfac6fb87d532bc.zip |
Speed improvements: combinations generates less pair than product...
-rw-r--r-- | pyaggr3g470r/compare.py | 3 |
1 files changed, 1 insertions, 2 deletions
diff --git a/pyaggr3g470r/compare.py b/pyaggr3g470r/compare.py index 26ac57ab..80f3d694 100644 --- a/pyaggr3g470r/compare.py +++ b/pyaggr3g470r/compare.py @@ -39,8 +39,7 @@ def compare_documents(feed): """ nltk.download("punkt") duplicates = [] - for pair in [(elem[0], elem[1]) for elem in itertools.product(feed.articles, repeat=2) - if elem[0].id != elem[1].id]: + for pair in itertools.combinations(feed.articles, 2): try: result = cosine_sim(*pair) if abs(result.item() - 1.0) < 1e-10: |