analytics-api/kmeans.ts
raymond faa546d474 modify kmeans, add retailing functions
modify the kmeans to mini-batch k-means "ml/kmeans"
add PI値 "retail/purchase-index", 平均客単価 "retail/average-spend"
2025-09-02 07:04:53 +00:00

144 lines
No EOL
4.9 KiB
TypeScript

export type Point = number[];
export interface Cluster {
centroid: Point;
points: Point[];
}
export interface KMeansOptions {
batchSize?: number;
maxIterations?: number;
tolerance?: number;
}
export interface KMeansResult {
clusters: Cluster[];
iterations: number;
converged: boolean;
}
export class KMeans {
private readonly k: number;
private readonly batchSize: number;
private readonly maxIterations: number;
private readonly tolerance: number;
private readonly data: Point[];
private centroids: Point[] = [];
constructor(data: Point[], k: number, options: KMeansOptions = {}) {
this.data = data;
this.k = k;
this.batchSize = options.batchSize ?? 32;
this.maxIterations = options.maxIterations ?? 100;
this.tolerance = options.tolerance ?? 0.0001;
}
private static euclideanDistance(p1: Point, p2: Point): number {
return Math.sqrt(p1.reduce((sum, val, i) => sum + (val - p2[i]) ** 2, 0));
}
private initializeCentroids(): void {
const dataCopy = [...this.data];
for (let i = 0; i < this.k; i++) {
const randomIndex = Math.floor(Math.random() * dataCopy.length);
this.centroids.push([...dataCopy[randomIndex]]);
dataCopy.splice(randomIndex, 1);
}
}
/**
* Creates a random sample of the data.
*/
private createMiniBatch(): Point[] {
const miniBatch: Point[] = [];
const dataCopy = [...this.data];
for (let i = 0; i < this.batchSize && dataCopy.length > 0; i++) {
const randomIndex = Math.floor(Math.random() * dataCopy.length);
miniBatch.push(dataCopy[randomIndex]);
dataCopy.splice(randomIndex, 1);
}
return miniBatch;
}
/**
* Assigns all points in the full dataset to the final centroids.
*/
private assignFinalClusters(): Cluster[] {
const clusters: Cluster[] = this.centroids.map(c => ({ centroid: c, points: [] }));
for (const point of this.data) {
let minDistance = Infinity;
let closestClusterIndex = -1;
for (let i = 0; i < this.centroids.length; i++) {
const distance = KMeans.euclideanDistance(point, this.centroids[i]);
if (distance < minDistance) {
minDistance = distance;
closestClusterIndex = i;
}
}
if (closestClusterIndex !== -1) {
clusters[closestClusterIndex].points.push(point);
}
}
return clusters;
}
public run(): KMeansResult {
this.initializeCentroids();
const clusterPointCounts = new Array(this.k).fill(0);
let converged = false;
let iterations = 0;
for (let i = 0; i < this.maxIterations; i++) {
iterations = i + 1;
const miniBatch = this.createMiniBatch();
const previousCentroids = this.centroids.map(c => [...c]);
// Assign points in the batch and update centroids gradually
for (const point of miniBatch) {
let minDistance = Infinity;
let closestClusterIndex = -1;
for (let j = 0; j < this.k; j++) {
const distance = KMeans.euclideanDistance(point, this.centroids[j]);
if (distance < minDistance) {
minDistance = distance;
closestClusterIndex = j;
}
}
if (closestClusterIndex !== -1) {
clusterPointCounts[closestClusterIndex]++;
const learningRate = 1 / clusterPointCounts[closestClusterIndex];
const centroidToUpdate = this.centroids[closestClusterIndex];
// Move the centroid slightly towards the new point
for (let dim = 0; dim < centroidToUpdate.length; dim++) {
centroidToUpdate[dim] = (1 - learningRate) * centroidToUpdate[dim] + learningRate * point[dim];
}
}
}
// Check for convergence
let totalMovement = 0;
for(let j = 0; j < this.k; j++) {
totalMovement += KMeans.euclideanDistance(previousCentroids[j], this.centroids[j]);
}
if (totalMovement < this.tolerance) {
converged = true;
break;
}
}
// After training, assign all points to the final centroids
const finalClusters = this.assignFinalClusters();
return {
clusters: finalClusters,
iterations,
converged
};
}
}