modify kmeans, add retailing functions
modify the kmeans to mini-batch k-means "ml/kmeans" add PI値 "retail/purchase-index", 平均客単価 "retail/average-spend"
This commit is contained in:
parent
93d192a995
commit
faa546d474
2 changed files with 124 additions and 71 deletions
140
kmeans.ts
140
kmeans.ts
|
|
@ -1,15 +1,16 @@
|
|||
// kmeans.ts - K-Means clustering algorithm
|
||||
|
||||
export interface Point {
|
||||
x: number;
|
||||
y: number;
|
||||
}
|
||||
export type Point = number[];
|
||||
|
||||
export interface Cluster {
|
||||
centroid: Point;
|
||||
points: Point[];
|
||||
}
|
||||
|
||||
export interface KMeansOptions {
|
||||
batchSize?: number;
|
||||
maxIterations?: number;
|
||||
tolerance?: number;
|
||||
}
|
||||
|
||||
export interface KMeansResult {
|
||||
clusters: Cluster[];
|
||||
iterations: number;
|
||||
|
|
@ -18,99 +19,124 @@ export interface KMeansResult {
|
|||
|
||||
export class KMeans {
|
||||
private readonly k: number;
|
||||
private readonly batchSize: number;
|
||||
private readonly maxIterations: number;
|
||||
private readonly tolerance: number;
|
||||
private readonly data: Point[];
|
||||
private clusters: Cluster[] = [];
|
||||
private centroids: Point[] = [];
|
||||
|
||||
constructor(data: Point[], k: number, maxIterations: number = 50) {
|
||||
this.k = k;
|
||||
this.maxIterations = maxIterations;
|
||||
constructor(data: Point[], k: number, options: KMeansOptions = {}) {
|
||||
this.data = data;
|
||||
this.k = k;
|
||||
this.batchSize = options.batchSize ?? 32;
|
||||
this.maxIterations = options.maxIterations ?? 100;
|
||||
this.tolerance = options.tolerance ?? 0.0001;
|
||||
}
|
||||
|
||||
private static euclideanDistance(p1: Point, p2: Point): number {
|
||||
const dx = p2.x - p1.x;
|
||||
const dy = p2.y - p1.y;
|
||||
return Math.sqrt(dx * dx + dy * dy);
|
||||
return Math.sqrt(p1.reduce((sum, val, i) => sum + (val - p2[i]) ** 2, 0));
|
||||
}
|
||||
|
||||
private initializeCentroids(): void {
|
||||
const centroids: Point[] = [];
|
||||
const dataCopy = [...this.data];
|
||||
for (let i = 0; i < this.k && dataCopy.length > 0; i++) {
|
||||
for (let i = 0; i < this.k; i++) {
|
||||
const randomIndex = Math.floor(Math.random() * dataCopy.length);
|
||||
const centroid = { ...dataCopy[randomIndex] };
|
||||
centroids.push(centroid);
|
||||
this.centroids.push([...dataCopy[randomIndex]]);
|
||||
dataCopy.splice(randomIndex, 1);
|
||||
}
|
||||
this.clusters = centroids.map(c => ({ centroid: c, points: [] }));
|
||||
}
|
||||
|
||||
private assignClusters(pointAssignments: number[]): boolean {
|
||||
let hasChanged = false;
|
||||
|
||||
for (const cluster of this.clusters) {
|
||||
cluster.points = [];
|
||||
/**
|
||||
* Creates a random sample of the data.
|
||||
*/
|
||||
private createMiniBatch(): Point[] {
|
||||
const miniBatch: Point[] = [];
|
||||
const dataCopy = [...this.data];
|
||||
for (let i = 0; i < this.batchSize && dataCopy.length > 0; i++) {
|
||||
const randomIndex = Math.floor(Math.random() * dataCopy.length);
|
||||
miniBatch.push(dataCopy[randomIndex]);
|
||||
dataCopy.splice(randomIndex, 1);
|
||||
}
|
||||
return miniBatch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Assigns all points in the full dataset to the final centroids.
|
||||
*/
|
||||
private assignFinalClusters(): Cluster[] {
|
||||
const clusters: Cluster[] = this.centroids.map(c => ({ centroid: c, points: [] }));
|
||||
|
||||
this.data.forEach((point, pointIndex) => {
|
||||
for (const point of this.data) {
|
||||
let minDistance = Infinity;
|
||||
let closestClusterIndex = -1;
|
||||
|
||||
this.clusters.forEach((cluster, clusterIndex) => {
|
||||
const distance = KMeans.euclideanDistance(point, cluster.centroid);
|
||||
for (let i = 0; i < this.centroids.length; i++) {
|
||||
const distance = KMeans.euclideanDistance(point, this.centroids[i]);
|
||||
if (distance < minDistance) {
|
||||
minDistance = distance;
|
||||
closestClusterIndex = clusterIndex;
|
||||
closestClusterIndex = i;
|
||||
}
|
||||
});
|
||||
|
||||
if (pointAssignments[pointIndex] !== closestClusterIndex) {
|
||||
hasChanged = true;
|
||||
pointAssignments[pointIndex] = closestClusterIndex;
|
||||
}
|
||||
|
||||
if (closestClusterIndex !== -1) {
|
||||
this.clusters[closestClusterIndex].points.push(point);
|
||||
clusters[closestClusterIndex].points.push(point);
|
||||
}
|
||||
});
|
||||
|
||||
return hasChanged;
|
||||
}
|
||||
|
||||
private updateCentroids(): void {
|
||||
for (const cluster of this.clusters) {
|
||||
if (cluster.points.length === 0) continue;
|
||||
|
||||
const sumX = cluster.points.reduce((sum, p) => sum + p.x, 0);
|
||||
const sumY = cluster.points.reduce((sum, p) => sum + p.y, 0);
|
||||
|
||||
cluster.centroid.x = sumX / cluster.points.length;
|
||||
cluster.centroid.y = sumY / cluster.points.length;
|
||||
}
|
||||
return clusters;
|
||||
}
|
||||
|
||||
public run(): KMeansResult {
|
||||
this.initializeCentroids();
|
||||
|
||||
const pointAssignments = new Array(this.data.length).fill(-1);
|
||||
|
||||
let iterations = 0;
|
||||
const clusterPointCounts = new Array(this.k).fill(0);
|
||||
let converged = false;
|
||||
|
||||
let iterations = 0;
|
||||
|
||||
for (let i = 0; i < this.maxIterations; i++) {
|
||||
iterations = i + 1;
|
||||
const hasChanged = this.assignClusters(pointAssignments);
|
||||
this.updateCentroids();
|
||||
const miniBatch = this.createMiniBatch();
|
||||
const previousCentroids = this.centroids.map(c => [...c]);
|
||||
|
||||
if (!hasChanged) {
|
||||
// Assign points in the batch and update centroids gradually
|
||||
for (const point of miniBatch) {
|
||||
let minDistance = Infinity;
|
||||
let closestClusterIndex = -1;
|
||||
|
||||
for (let j = 0; j < this.k; j++) {
|
||||
const distance = KMeans.euclideanDistance(point, this.centroids[j]);
|
||||
if (distance < minDistance) {
|
||||
minDistance = distance;
|
||||
closestClusterIndex = j;
|
||||
}
|
||||
}
|
||||
|
||||
if (closestClusterIndex !== -1) {
|
||||
clusterPointCounts[closestClusterIndex]++;
|
||||
const learningRate = 1 / clusterPointCounts[closestClusterIndex];
|
||||
const centroidToUpdate = this.centroids[closestClusterIndex];
|
||||
|
||||
// Move the centroid slightly towards the new point
|
||||
for (let dim = 0; dim < centroidToUpdate.length; dim++) {
|
||||
centroidToUpdate[dim] = (1 - learningRate) * centroidToUpdate[dim] + learningRate * point[dim];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for convergence
|
||||
let totalMovement = 0;
|
||||
for(let j = 0; j < this.k; j++) {
|
||||
totalMovement += KMeans.euclideanDistance(previousCentroids[j], this.centroids[j]);
|
||||
}
|
||||
|
||||
if (totalMovement < this.tolerance) {
|
||||
converged = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// After training, assign all points to the final centroids
|
||||
const finalClusters = this.assignFinalClusters();
|
||||
|
||||
return {
|
||||
clusters: this.clusters,
|
||||
clusters: finalClusters,
|
||||
iterations,
|
||||
converged
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue