Test with tanimoto distance.

author: Cédric Bonhomme <kimble.mandel@gmail.com> 2012-11-18 17:31:21 +0100
committer: Cédric Bonhomme <kimble.mandel@gmail.com> 2012-11-18 17:31:21 +0100
commit: 0d59de5f07abde759b86fd1f587dda0ddea0a029 (patch)
tree: fb4f3daeb8477ff1a826a0a5c4bb21f04b14105e
parent: Test draw function. Set K to 8. (diff)
download: newspipe-0d59de5f07abde759b86fd1f587dda0ddea0a029.tar.gz
newspipe-0d59de5f07abde759b86fd1f587dda0ddea0a029.tar.bz2
newspipe-0d59de5f07abde759b86fd1f587dda0ddea0a029.zip
2 files changed, 13 insertions, 2 deletions
diff --git a/source/clusters.py b/source/clusters.py
index 7122c55d..bdfebe6e 100755
--- a/source/clusters.py
+++ b/source/clusters.py
@@ -38,6 +38,17 @@ def pearson(v1,v2):
 
   return 1.0-num/den
 
+def tanimoto(v1, v2):
+    c1, c2, shr = 0, 0, 0
+    for i in range(len(v1)):
+        if v1[i] != 0:
+            c1 += 1 # in v1
+        if v2[i] != 0:
+            c2 += 1 # in v2
+        if v1[i] != 0 and v2[i] != 0:
+            shr += 1 # in both
+    return 1.0 - (float(shr) / (c1 + c2 - shr))
+
 def kcluster(rows,distance=pearson,k=4):
   # Determine the minimum and maximum values for each point
   ranges=[(min([row[i] for row in rows]),max([row[i] for row in rows])) 
diff --git a/source/testclusters.py b/source/testclusters.py
index ea6406b1..a16d3492 100644
--- a/source/testclusters.py
+++ b/source/testclusters.py
@@ -2,14 +2,14 @@
 
 import clusters
 
-K = 8
+K = 7
 
 blognames,words,data = clusters.readfile("blogdata1.txt")
 
 coords = clusters.scaledown(data)
 
 print "Generating clusters..."
-kclust = clusters.kcluster(data, k=K)
+kclust = clusters.kcluster(data, k=K, distance=clusters.pearson)
 print
 print "Clusters:"
 for i in range(K):
author	Cédric Bonhomme <kimble.mandel@gmail.com>	2012-11-18 17:31:21 +0100
committer	Cédric Bonhomme <kimble.mandel@gmail.com>	2012-11-18 17:31:21 +0100
commit	0d59de5f07abde759b86fd1f587dda0ddea0a029 (patch)
tree	fb4f3daeb8477ff1a826a0a5c4bb21f04b14105e
parent	Test draw function. Set K to 8. (diff)
download	newspipe-0d59de5f07abde759b86fd1f587dda0ddea0a029.tar.gz newspipe-0d59de5f07abde759b86fd1f587dda0ddea0a029.tar.bz2 newspipe-0d59de5f07abde759b86fd1f587dda0ddea0a029.zip