export type Point = number[]; export interface Cluster { centroid: Point; points: Point[]; } export interface KMeansOptions { batchSize?: number; maxIterations?: number; tolerance?: number; } export interface KMeansResult { clusters: Cluster[]; iterations: number; converged: boolean; } export class KMeans { private readonly k: number; private readonly batchSize: number; private readonly maxIterations: number; private readonly tolerance: number; private readonly data: Point[]; private centroids: Point[] = []; constructor(data: Point[], k: number, options: KMeansOptions = {}) { this.data = data; this.k = k; this.batchSize = options.batchSize ?? 32; this.maxIterations = options.maxIterations ?? 100; this.tolerance = options.tolerance ?? 0.0001; } private static euclideanDistance(p1: Point, p2: Point): number { return Math.sqrt(p1.reduce((sum, val, i) => sum + (val - p2[i]) ** 2, 0)); } private initializeCentroids(): void { const dataCopy = [...this.data]; for (let i = 0; i < this.k; i++) { const randomIndex = Math.floor(Math.random() * dataCopy.length); this.centroids.push([...dataCopy[randomIndex]]); dataCopy.splice(randomIndex, 1); } } /** * Creates a random sample of the data. */ private createMiniBatch(): Point[] { const miniBatch: Point[] = []; const dataCopy = [...this.data]; for (let i = 0; i < this.batchSize && dataCopy.length > 0; i++) { const randomIndex = Math.floor(Math.random() * dataCopy.length); miniBatch.push(dataCopy[randomIndex]); dataCopy.splice(randomIndex, 1); } return miniBatch; } /** * Assigns all points in the full dataset to the final centroids. */ private assignFinalClusters(): Cluster[] { const clusters: Cluster[] = this.centroids.map(c => ({ centroid: c, points: [] })); for (const point of this.data) { let minDistance = Infinity; let closestClusterIndex = -1; for (let i = 0; i < this.centroids.length; i++) { const distance = KMeans.euclideanDistance(point, this.centroids[i]); if (distance < minDistance) { minDistance = distance; closestClusterIndex = i; } } if (closestClusterIndex !== -1) { clusters[closestClusterIndex].points.push(point); } } return clusters; } public run(): KMeansResult { this.initializeCentroids(); const clusterPointCounts = new Array(this.k).fill(0); let converged = false; let iterations = 0; for (let i = 0; i < this.maxIterations; i++) { iterations = i + 1; const miniBatch = this.createMiniBatch(); const previousCentroids = this.centroids.map(c => [...c]); // Assign points in the batch and update centroids gradually for (const point of miniBatch) { let minDistance = Infinity; let closestClusterIndex = -1; for (let j = 0; j < this.k; j++) { const distance = KMeans.euclideanDistance(point, this.centroids[j]); if (distance < minDistance) { minDistance = distance; closestClusterIndex = j; } } if (closestClusterIndex !== -1) { clusterPointCounts[closestClusterIndex]++; const learningRate = 1 / clusterPointCounts[closestClusterIndex]; const centroidToUpdate = this.centroids[closestClusterIndex]; // Move the centroid slightly towards the new point for (let dim = 0; dim < centroidToUpdate.length; dim++) { centroidToUpdate[dim] = (1 - learningRate) * centroidToUpdate[dim] + learningRate * point[dim]; } } } // Check for convergence let totalMovement = 0; for(let j = 0; j < this.k; j++) { totalMovement += KMeans.euclideanDistance(previousCentroids[j], this.centroids[j]); } if (totalMovement < this.tolerance) { converged = true; break; } } // After training, assign all points to the final centroids const finalClusters = this.assignFinalClusters(); return { clusters: finalClusters, iterations, converged }; } }