third_party/WebKit/Source/build/scripts/cluster.py - chromium/src.git - Git at Google

 import math
 import random as rand


 def l2_pairwise_distance(v1, v2):
     """Euclidean distance from each point in v1 to each point in v2

     Args:
         v1: list of point 1
         v2: list of point 2

     Returns:
         distance matrix between each point in v1 and v2
     """
     nrow = len(v1)
     ncol = len(v2)
     dist_mat = [[0 for _ in range(ncol)] for _ in range(nrow)]
     for i in range(nrow):
         for j in range(ncol):
             dist_mat[i][j] = math.sqrt((v1[i] - v2[j]) ** 2)
     return dist_mat


 def calculate_error(k_means_matrix):
     """Calculate the sum of distance from each point to its nearest cluster center

     Args:
         k_means_matrix: distance matrix of point to cluster center

     Returns:
         Sum of distance from each point to its nearest cluster center
     """
     return sum([min(dist) for dist in k_means_matrix])


 def k_means(x_input, n_cluster=3, n_iter=100, n_tries=10):
     """Perform 1-D k-means clustering on a list of numbers x_input

     Args:
         x_input: list of numbers
         n_cluster: number of clusters
         n_iter: number of iterations

     Returns:
         centers: list of n_cluster elements containing the cluster centers
         min_dist_idx: list of len(x_input) elements containing the nearest cluster center's id
         error_value: sum of all distance from each point to its nearest cluster center
     """
     results = []
     for _ in range(n_tries):
         error_value = 0
         rand.seed(None)
         centers = sorted([rand.uniform(0.0, 100.0) for i in range(n_cluster)])
         min_dist_idx = [0] * len(x_input)
         i = 0
         while i < n_iter:
             failed = False
             dist_mat = l2_pairwise_distance(x_input, centers)
             error_value = calculate_error(dist_mat)
             min_dist_idx = [dist.index(min(dist)) for dist in dist_mat]
             centers = [0] * n_cluster
             count = [0] * n_cluster
             for j in range(len(x_input)):
                 centers[min_dist_idx[j]] += x_input[j]
                 count[min_dist_idx[j]] += 1

             for j in range(n_cluster):
                 if count[j] == 0:
                     centers = sorted([rand.uniform(0.0, 100.0) for i in range(n_cluster)])
                     failed = True
                     break

             if failed:
                 i = 0
                 continue

             for j in range(n_cluster):
                 centers[j] = centers[j] / count[j]
             i += 1

         results.append((centers, min_dist_idx, error_value))

     return min(results, key=lambda x: x[2])
	import math
	import random as rand


	def l2_pairwise_distance(v1, v2):
	"""Euclidean distance from each point in v1 to each point in v2

	Args:
	v1: list of point 1
	v2: list of point 2

	Returns:
	distance matrix between each point in v1 and v2
	"""
	nrow = len(v1)
	ncol = len(v2)
	dist_mat = [[0 for _ in range(ncol)] for _ in range(nrow)]
	for i in range(nrow):
	for j in range(ncol):
	dist_mat[i][j] = math.sqrt((v1[i] - v2[j]) ** 2)
	return dist_mat


	def calculate_error(k_means_matrix):
	"""Calculate the sum of distance from each point to its nearest cluster center

	Args:
	k_means_matrix: distance matrix of point to cluster center

	Returns:
	Sum of distance from each point to its nearest cluster center
	"""
	return sum([min(dist) for dist in k_means_matrix])


	def k_means(x_input, n_cluster=3, n_iter=100, n_tries=10):
	"""Perform 1-D k-means clustering on a list of numbers x_input

	Args:
	x_input: list of numbers
	n_cluster: number of clusters
	n_iter: number of iterations

	Returns:
	centers: list of n_cluster elements containing the cluster centers
	min_dist_idx: list of len(x_input) elements containing the nearest cluster center's id
	error_value: sum of all distance from each point to its nearest cluster center
	"""
	results = []
	for _ in range(n_tries):
	error_value = 0
	rand.seed(None)
	centers = sorted([rand.uniform(0.0, 100.0) for i in range(n_cluster)])
	min_dist_idx = [0] * len(x_input)
	i = 0
	while i < n_iter:
	failed = False
	dist_mat = l2_pairwise_distance(x_input, centers)
	error_value = calculate_error(dist_mat)
	min_dist_idx = [dist.index(min(dist)) for dist in dist_mat]
	centers = [0] * n_cluster
	count = [0] * n_cluster
	for j in range(len(x_input)):
	centers[min_dist_idx[j]] += x_input[j]
	count[min_dist_idx[j]] += 1

	for j in range(n_cluster):
	if count[j] == 0:
	centers = sorted([rand.uniform(0.0, 100.0) for i in range(n_cluster)])
	failed = True
	break

	if failed:
	i = 0
	continue

	for j in range(n_cluster):
	centers[j] = centers[j] / count[j]
	i += 1

	results.append((centers, min_dist_idx, error_value))

	return min(results, key=lambda x: x[2])