diff options
author | Cédric Bonhomme <kimble.mandel@gmail.com> | 2012-11-18 17:58:18 +0100 |
---|---|---|
committer | Cédric Bonhomme <kimble.mandel@gmail.com> | 2012-11-18 17:58:18 +0100 |
commit | 55e82439302ee81656f796c4a64af6e6d51bccc8 (patch) | |
tree | 962c50f62748a08f25ec22f4dfb6fea465eae2f1 /source | |
parent | mb:68e2ad40-1fd7-4939-953f-fd4cdf19803b (diff) | |
download | newspipe-55e82439302ee81656f796c4a64af6e6d51bccc8.tar.gz newspipe-55e82439302ee81656f796c4a64af6e6d51bccc8.tar.bz2 newspipe-55e82439302ee81656f796c4a64af6e6d51bccc8.zip |
Test with euclidian distance.
Diffstat (limited to 'source')
-rwxr-xr-x | source/clusters.py | 7 | ||||
-rw-r--r-- | source/testclusters.py | 2 |
2 files changed, 8 insertions, 1 deletions
diff --git a/source/clusters.py b/source/clusters.py index 02766bdf..e53fac9b 100755 --- a/source/clusters.py +++ b/source/clusters.py @@ -1,6 +1,7 @@ #! /usr/bin/env python
#-*- coding: utf-8 -*-
+import math
import random
from math import sqrt
@@ -51,6 +52,12 @@ def tanimoto(v1, v2): shr += 1 # in both
return 1.0 - (float(shr) / (c1 + c2 - shr))
+def euclidian(v1, v2):
+ d = 0.0
+ for i in range(len(v1)):
+ d += (v1[i] - v2[i])**2
+ return math.sqrt(d)
+
def kcluster(rows,distance=pearson,k=4):
# Determine the minimum and maximum values for each point
ranges=[(min([row[i] for row in rows]),max([row[i] for row in rows]))
diff --git a/source/testclusters.py b/source/testclusters.py index 728e9c1b..6feabfc8 100644 --- a/source/testclusters.py +++ b/source/testclusters.py @@ -10,7 +10,7 @@ blognames,words,data = clusters.readfile("blogdata1.txt") coords = clusters.scaledown(data) print "Generating clusters..." -kclust = clusters.kcluster(data, k=K, distance=clusters.pearson) +kclust = clusters.kcluster(data, k=K, distance=clusters.euclidian) print print "Clusters:" for i in range(K): |