modify kmeans, add retailing functions

modify the kmeans to mini-batch k-means "ml/kmeans"
add PI値 "retail/purchase-index", 平均客単価 "retail/average-spend"
This commit is contained in:
raymond 2025-09-02 07:04:53 +00:00
parent 93d192a995
commit faa546d474
2 changed files with 124 additions and 71 deletions

138
kmeans.ts
View file

@ -1,15 +1,16 @@
// kmeans.ts - K-Means clustering algorithm
export interface Point {
x: number;
y: number;
}
export type Point = number[];
export interface Cluster {
centroid: Point;
points: Point[];
}
export interface KMeansOptions {
batchSize?: number;
maxIterations?: number;
tolerance?: number;
}
export interface KMeansResult {
clusters: Cluster[];
iterations: number;
@ -18,99 +19,124 @@ export interface KMeansResult {
export class KMeans {
private readonly k: number;
private readonly batchSize: number;
private readonly maxIterations: number;
private readonly tolerance: number;
private readonly data: Point[];
private clusters: Cluster[] = [];
private centroids: Point[] = [];
constructor(data: Point[], k: number, maxIterations: number = 50) {
this.k = k;
this.maxIterations = maxIterations;
constructor(data: Point[], k: number, options: KMeansOptions = {}) {
this.data = data;
this.k = k;
this.batchSize = options.batchSize ?? 32;
this.maxIterations = options.maxIterations ?? 100;
this.tolerance = options.tolerance ?? 0.0001;
}
private static euclideanDistance(p1: Point, p2: Point): number {
const dx = p2.x - p1.x;
const dy = p2.y - p1.y;
return Math.sqrt(dx * dx + dy * dy);
return Math.sqrt(p1.reduce((sum, val, i) => sum + (val - p2[i]) ** 2, 0));
}
private initializeCentroids(): void {
const centroids: Point[] = [];
const dataCopy = [...this.data];
for (let i = 0; i < this.k && dataCopy.length > 0; i++) {
for (let i = 0; i < this.k; i++) {
const randomIndex = Math.floor(Math.random() * dataCopy.length);
const centroid = { ...dataCopy[randomIndex] };
centroids.push(centroid);
this.centroids.push([...dataCopy[randomIndex]]);
dataCopy.splice(randomIndex, 1);
}
this.clusters = centroids.map(c => ({ centroid: c, points: [] }));
}
private assignClusters(pointAssignments: number[]): boolean {
let hasChanged = false;
for (const cluster of this.clusters) {
cluster.points = [];
/**
* Creates a random sample of the data.
*/
private createMiniBatch(): Point[] {
const miniBatch: Point[] = [];
const dataCopy = [...this.data];
for (let i = 0; i < this.batchSize && dataCopy.length > 0; i++) {
const randomIndex = Math.floor(Math.random() * dataCopy.length);
miniBatch.push(dataCopy[randomIndex]);
dataCopy.splice(randomIndex, 1);
}
return miniBatch;
}
this.data.forEach((point, pointIndex) => {
/**
* Assigns all points in the full dataset to the final centroids.
*/
private assignFinalClusters(): Cluster[] {
const clusters: Cluster[] = this.centroids.map(c => ({ centroid: c, points: [] }));
for (const point of this.data) {
let minDistance = Infinity;
let closestClusterIndex = -1;
this.clusters.forEach((cluster, clusterIndex) => {
const distance = KMeans.euclideanDistance(point, cluster.centroid);
for (let i = 0; i < this.centroids.length; i++) {
const distance = KMeans.euclideanDistance(point, this.centroids[i]);
if (distance < minDistance) {
minDistance = distance;
closestClusterIndex = clusterIndex;
closestClusterIndex = i;
}
});
if (pointAssignments[pointIndex] !== closestClusterIndex) {
hasChanged = true;
pointAssignments[pointIndex] = closestClusterIndex;
}
if (closestClusterIndex !== -1) {
this.clusters[closestClusterIndex].points.push(point);
clusters[closestClusterIndex].points.push(point);
}
});
return hasChanged;
}
private updateCentroids(): void {
for (const cluster of this.clusters) {
if (cluster.points.length === 0) continue;
const sumX = cluster.points.reduce((sum, p) => sum + p.x, 0);
const sumY = cluster.points.reduce((sum, p) => sum + p.y, 0);
cluster.centroid.x = sumX / cluster.points.length;
cluster.centroid.y = sumY / cluster.points.length;
}
return clusters;
}
public run(): KMeansResult {
this.initializeCentroids();
const pointAssignments = new Array(this.data.length).fill(-1);
let iterations = 0;
const clusterPointCounts = new Array(this.k).fill(0);
let converged = false;
let iterations = 0;
for (let i = 0; i < this.maxIterations; i++) {
iterations = i + 1;
const hasChanged = this.assignClusters(pointAssignments);
this.updateCentroids();
const miniBatch = this.createMiniBatch();
const previousCentroids = this.centroids.map(c => [...c]);
if (!hasChanged) {
// Assign points in the batch and update centroids gradually
for (const point of miniBatch) {
let minDistance = Infinity;
let closestClusterIndex = -1;
for (let j = 0; j < this.k; j++) {
const distance = KMeans.euclideanDistance(point, this.centroids[j]);
if (distance < minDistance) {
minDistance = distance;
closestClusterIndex = j;
}
}
if (closestClusterIndex !== -1) {
clusterPointCounts[closestClusterIndex]++;
const learningRate = 1 / clusterPointCounts[closestClusterIndex];
const centroidToUpdate = this.centroids[closestClusterIndex];
// Move the centroid slightly towards the new point
for (let dim = 0; dim < centroidToUpdate.length; dim++) {
centroidToUpdate[dim] = (1 - learningRate) * centroidToUpdate[dim] + learningRate * point[dim];
}
}
}
// Check for convergence
let totalMovement = 0;
for(let j = 0; j < this.k; j++) {
totalMovement += KMeans.euclideanDistance(previousCentroids[j], this.centroids[j]);
}
if (totalMovement < this.tolerance) {
converged = true;
break;
}
}
// After training, assign all points to the final centroids
const finalClusters = this.assignFinalClusters();
return {
clusters: this.clusters,
clusters: finalClusters,
iterations,
converged
};

View file

@ -6,7 +6,7 @@
import express from 'express';
import * as math from 'mathjs';
import * as _ from 'lodash';
import { KMeans, Point } from './kmeans';
import { KMeans, KMeansOptions } from './kmeans';
import { getWeekNumber, getSameWeekDayLastYear } from './time-helper';
const app = express();
@ -223,18 +223,19 @@ class AnalyticsEngine {
}
// K-means wrapper (uses imported KMeans class)
kmeans(matrix: DataMatrix, nClusters: number): { clusters: number[][][], centroids: number[][] } {
validateMatrix(matrix);
if (matrix.data[0].length !== 2) {
throw new Error('K-means implementation currently only supports 2D data.');
}
const points = matrix.data.map(row => ({ x: row[0], y: row[1] }));
const kmeans = new KMeans(points, nClusters);
const result = kmeans.run();
const centroids = result.clusters.map(c => [c.centroid.x, c.centroid.y]);
const clusters = result.clusters.map(c => c.points.map(p => [p.x, p.y]));
return { clusters, centroids };
}
kmeans(matrix: DataMatrix, nClusters: number, options: KMeansOptions = {}): { clusters: number[][][], centroids: number[][] } {
validateMatrix(matrix);
const points: number[][] = matrix.data;
// Use the new MiniBatchKMeans class
const kmeans = new KMeans(points, nClusters, options);
const result = kmeans.run();
const centroids = result.clusters.map(c => c.centroid);
const clusters = result.clusters.map(c => c.points);
return { clusters, centroids };
}
// Time helper wrapper functions
getWeekNumber(dateString: string): number {
@ -266,6 +267,20 @@ class AnalyticsEngine {
if (salePrice === 0) throw new Error('Sale price cannot be zero');
return (salePrice - cost) / salePrice;
}
averageSpendPerCustomer(totalRevenue: number, numberOfCustomers: number): number {
if (numberOfCustomers === 0) {
throw new Error('Number of customers cannot be zero');
}
return totalRevenue / numberOfCustomers;
}
purchaseIndex(totalItemsSold: number, numberOfCustomers: number): number {
if (numberOfCustomers === 0) {
throw new Error('Number of customers cannot be zero');
}
return (totalItemsSold / numberOfCustomers) * 1000;
}
}
// Initialize analytics engine
@ -325,7 +340,9 @@ createRoute(app, 'post', '/api/series/rolling', (req) => {
});
// Machine learning routes
createRoute(app, 'post', '/api/ml/kmeans', (req) => analytics.kmeans(req.body.matrix, req.body.nClusters));
createRoute(app, 'post', '/api/ml/kmeans', (req) => {
return analytics.kmeans(req.body.matrix, req.body.nClusters, req.body.options);
});
// Time helper routes
createRoute(app, 'post', '/api/time/week-number', (req) => {
@ -344,6 +361,16 @@ createRoute(app, 'post', '/api/retail/lift-value', (req) => analytics.liftValue(
createRoute(app, 'post', '/api/retail/cost-ratio', (req) => analytics.costRatio(req.body.cost, req.body.salePrice));
createRoute(app, 'post', '/api/retail/gross-margin', (req) => analytics.grossMarginRate(req.body.salePrice, req.body.cost));
createRoute(app, 'post', '/api/retail/average-spend', (req) => {
const { totalRevenue, numberOfCustomers } = req.body;
return analytics.averageSpendPerCustomer(totalRevenue, numberOfCustomers);
});
createRoute(app, 'post', '/api/retail/purchase-index', (req) => {
const { totalItemsSold, numberOfCustomers } = req.body;
return analytics.purchaseIndex(totalItemsSold, numberOfCustomers);
});
// ========================================
// ERROR HANDLING
// ========================================