diff options
author | Cédric Bonhomme <kimble.mandel@gmail.com> | 2012-11-18 17:31:21 +0100 |
---|---|---|
committer | Cédric Bonhomme <kimble.mandel@gmail.com> | 2012-11-18 17:31:21 +0100 |
commit | 0d59de5f07abde759b86fd1f587dda0ddea0a029 (patch) | |
tree | fb4f3daeb8477ff1a826a0a5c4bb21f04b14105e /source | |
parent | Test draw function. Set K to 8. (diff) | |
download | newspipe-0d59de5f07abde759b86fd1f587dda0ddea0a029.tar.gz newspipe-0d59de5f07abde759b86fd1f587dda0ddea0a029.tar.bz2 newspipe-0d59de5f07abde759b86fd1f587dda0ddea0a029.zip |
Test with tanimoto distance.
Diffstat (limited to 'source')
-rwxr-xr-x | source/clusters.py | 11 | ||||
-rw-r--r-- | source/testclusters.py | 4 |
2 files changed, 13 insertions, 2 deletions
diff --git a/source/clusters.py b/source/clusters.py index 7122c55d..bdfebe6e 100755 --- a/source/clusters.py +++ b/source/clusters.py @@ -38,6 +38,17 @@ def pearson(v1,v2): return 1.0-num/den
+def tanimoto(v1, v2):
+ c1, c2, shr = 0, 0, 0
+ for i in range(len(v1)):
+ if v1[i] != 0:
+ c1 += 1 # in v1
+ if v2[i] != 0:
+ c2 += 1 # in v2
+ if v1[i] != 0 and v2[i] != 0:
+ shr += 1 # in both
+ return 1.0 - (float(shr) / (c1 + c2 - shr))
+
def kcluster(rows,distance=pearson,k=4):
# Determine the minimum and maximum values for each point
ranges=[(min([row[i] for row in rows]),max([row[i] for row in rows]))
diff --git a/source/testclusters.py b/source/testclusters.py index ea6406b1..a16d3492 100644 --- a/source/testclusters.py +++ b/source/testclusters.py @@ -2,14 +2,14 @@ import clusters -K = 8 +K = 7 blognames,words,data = clusters.readfile("blogdata1.txt") coords = clusters.scaledown(data) print "Generating clusters..." -kclust = clusters.kcluster(data, k=K) +kclust = clusters.kcluster(data, k=K, distance=clusters.pearson) print print "Clusters:" for i in range(K): |