From 0d59de5f07abde759b86fd1f587dda0ddea0a029 Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Sun, 18 Nov 2012 17:31:21 +0100 Subject: Test with tanimoto distance. --- source/clusters.py | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'source/clusters.py') diff --git a/source/clusters.py b/source/clusters.py index 7122c55d..bdfebe6e 100755 --- a/source/clusters.py +++ b/source/clusters.py @@ -38,6 +38,17 @@ def pearson(v1,v2): return 1.0-num/den +def tanimoto(v1, v2): + c1, c2, shr = 0, 0, 0 + for i in range(len(v1)): + if v1[i] != 0: + c1 += 1 # in v1 + if v2[i] != 0: + c2 += 1 # in v2 + if v1[i] != 0 and v2[i] != 0: + shr += 1 # in both + return 1.0 - (float(shr) / (c1 + c2 - shr)) + def kcluster(rows,distance=pearson,k=4): # Determine the minimum and maximum values for each point ranges=[(min([row[i] for row in rows]),max([row[i] for row in rows])) -- cgit