From faa546d474a1f29e02d6b340a70a16682e53e3eb Mon Sep 17 00:00:00 2001 From: raymond Date: Tue, 2 Sep 2025 07:04:53 +0000 Subject: [PATCH] modify kmeans, add retailing functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit modify the kmeans to mini-batch k-means "ml/kmeans" add PI値 "retail/purchase-index", 平均客単価 "retail/average-spend" --- kmeans.ts | 140 ++++++++++++++++++++++++++++++++---------------------- server.ts | 55 +++++++++++++++------ 2 files changed, 124 insertions(+), 71 deletions(-) diff --git a/kmeans.ts b/kmeans.ts index 0f837c1..12b85e2 100644 --- a/kmeans.ts +++ b/kmeans.ts @@ -1,15 +1,16 @@ -// kmeans.ts - K-Means clustering algorithm - -export interface Point { - x: number; - y: number; -} +export type Point = number[]; export interface Cluster { centroid: Point; points: Point[]; } +export interface KMeansOptions { + batchSize?: number; + maxIterations?: number; + tolerance?: number; +} + export interface KMeansResult { clusters: Cluster[]; iterations: number; @@ -18,99 +19,124 @@ export interface KMeansResult { export class KMeans { private readonly k: number; + private readonly batchSize: number; private readonly maxIterations: number; + private readonly tolerance: number; private readonly data: Point[]; - private clusters: Cluster[] = []; + private centroids: Point[] = []; - constructor(data: Point[], k: number, maxIterations: number = 50) { - this.k = k; - this.maxIterations = maxIterations; + constructor(data: Point[], k: number, options: KMeansOptions = {}) { this.data = data; + this.k = k; + this.batchSize = options.batchSize ?? 32; + this.maxIterations = options.maxIterations ?? 100; + this.tolerance = options.tolerance ?? 0.0001; } private static euclideanDistance(p1: Point, p2: Point): number { - const dx = p2.x - p1.x; - const dy = p2.y - p1.y; - return Math.sqrt(dx * dx + dy * dy); + return Math.sqrt(p1.reduce((sum, val, i) => sum + (val - p2[i]) ** 2, 0)); } private initializeCentroids(): void { - const centroids: Point[] = []; const dataCopy = [...this.data]; - for (let i = 0; i < this.k && dataCopy.length > 0; i++) { + for (let i = 0; i < this.k; i++) { const randomIndex = Math.floor(Math.random() * dataCopy.length); - const centroid = { ...dataCopy[randomIndex] }; - centroids.push(centroid); + this.centroids.push([...dataCopy[randomIndex]]); dataCopy.splice(randomIndex, 1); } - this.clusters = centroids.map(c => ({ centroid: c, points: [] })); } - private assignClusters(pointAssignments: number[]): boolean { - let hasChanged = false; - - for (const cluster of this.clusters) { - cluster.points = []; + /** + * Creates a random sample of the data. + */ + private createMiniBatch(): Point[] { + const miniBatch: Point[] = []; + const dataCopy = [...this.data]; + for (let i = 0; i < this.batchSize && dataCopy.length > 0; i++) { + const randomIndex = Math.floor(Math.random() * dataCopy.length); + miniBatch.push(dataCopy[randomIndex]); + dataCopy.splice(randomIndex, 1); } + return miniBatch; + } + + /** + * Assigns all points in the full dataset to the final centroids. + */ + private assignFinalClusters(): Cluster[] { + const clusters: Cluster[] = this.centroids.map(c => ({ centroid: c, points: [] })); - this.data.forEach((point, pointIndex) => { + for (const point of this.data) { let minDistance = Infinity; let closestClusterIndex = -1; - - this.clusters.forEach((cluster, clusterIndex) => { - const distance = KMeans.euclideanDistance(point, cluster.centroid); + for (let i = 0; i < this.centroids.length; i++) { + const distance = KMeans.euclideanDistance(point, this.centroids[i]); if (distance < minDistance) { minDistance = distance; - closestClusterIndex = clusterIndex; + closestClusterIndex = i; } - }); - - if (pointAssignments[pointIndex] !== closestClusterIndex) { - hasChanged = true; - pointAssignments[pointIndex] = closestClusterIndex; } - if (closestClusterIndex !== -1) { - this.clusters[closestClusterIndex].points.push(point); + clusters[closestClusterIndex].points.push(point); } - }); - - return hasChanged; - } - - private updateCentroids(): void { - for (const cluster of this.clusters) { - if (cluster.points.length === 0) continue; - - const sumX = cluster.points.reduce((sum, p) => sum + p.x, 0); - const sumY = cluster.points.reduce((sum, p) => sum + p.y, 0); - - cluster.centroid.x = sumX / cluster.points.length; - cluster.centroid.y = sumY / cluster.points.length; } + return clusters; } public run(): KMeansResult { this.initializeCentroids(); - const pointAssignments = new Array(this.data.length).fill(-1); - - let iterations = 0; + const clusterPointCounts = new Array(this.k).fill(0); let converged = false; - + let iterations = 0; + for (let i = 0; i < this.maxIterations; i++) { iterations = i + 1; - const hasChanged = this.assignClusters(pointAssignments); - this.updateCentroids(); + const miniBatch = this.createMiniBatch(); + const previousCentroids = this.centroids.map(c => [...c]); - if (!hasChanged) { + // Assign points in the batch and update centroids gradually + for (const point of miniBatch) { + let minDistance = Infinity; + let closestClusterIndex = -1; + + for (let j = 0; j < this.k; j++) { + const distance = KMeans.euclideanDistance(point, this.centroids[j]); + if (distance < minDistance) { + minDistance = distance; + closestClusterIndex = j; + } + } + + if (closestClusterIndex !== -1) { + clusterPointCounts[closestClusterIndex]++; + const learningRate = 1 / clusterPointCounts[closestClusterIndex]; + const centroidToUpdate = this.centroids[closestClusterIndex]; + + // Move the centroid slightly towards the new point + for (let dim = 0; dim < centroidToUpdate.length; dim++) { + centroidToUpdate[dim] = (1 - learningRate) * centroidToUpdate[dim] + learningRate * point[dim]; + } + } + } + + // Check for convergence + let totalMovement = 0; + for(let j = 0; j < this.k; j++) { + totalMovement += KMeans.euclideanDistance(previousCentroids[j], this.centroids[j]); + } + + if (totalMovement < this.tolerance) { converged = true; break; } } + // After training, assign all points to the final centroids + const finalClusters = this.assignFinalClusters(); + return { - clusters: this.clusters, + clusters: finalClusters, iterations, converged }; diff --git a/server.ts b/server.ts index 99769ec..b686305 100644 --- a/server.ts +++ b/server.ts @@ -6,7 +6,7 @@ import express from 'express'; import * as math from 'mathjs'; import * as _ from 'lodash'; -import { KMeans, Point } from './kmeans'; +import { KMeans, KMeansOptions } from './kmeans'; import { getWeekNumber, getSameWeekDayLastYear } from './time-helper'; const app = express(); @@ -223,18 +223,19 @@ class AnalyticsEngine { } // K-means wrapper (uses imported KMeans class) - kmeans(matrix: DataMatrix, nClusters: number): { clusters: number[][][], centroids: number[][] } { - validateMatrix(matrix); - if (matrix.data[0].length !== 2) { - throw new Error('K-means implementation currently only supports 2D data.'); - } - const points = matrix.data.map(row => ({ x: row[0], y: row[1] })); - const kmeans = new KMeans(points, nClusters); - const result = kmeans.run(); - const centroids = result.clusters.map(c => [c.centroid.x, c.centroid.y]); - const clusters = result.clusters.map(c => c.points.map(p => [p.x, p.y])); - return { clusters, centroids }; - } + kmeans(matrix: DataMatrix, nClusters: number, options: KMeansOptions = {}): { clusters: number[][][], centroids: number[][] } { + validateMatrix(matrix); + const points: number[][] = matrix.data; + + // Use the new MiniBatchKMeans class + const kmeans = new KMeans(points, nClusters, options); + const result = kmeans.run(); + + const centroids = result.clusters.map(c => c.centroid); + const clusters = result.clusters.map(c => c.points); + + return { clusters, centroids }; +} // Time helper wrapper functions getWeekNumber(dateString: string): number { @@ -266,6 +267,20 @@ class AnalyticsEngine { if (salePrice === 0) throw new Error('Sale price cannot be zero'); return (salePrice - cost) / salePrice; } + + averageSpendPerCustomer(totalRevenue: number, numberOfCustomers: number): number { + if (numberOfCustomers === 0) { + throw new Error('Number of customers cannot be zero'); + } + return totalRevenue / numberOfCustomers; + } + + purchaseIndex(totalItemsSold: number, numberOfCustomers: number): number { + if (numberOfCustomers === 0) { + throw new Error('Number of customers cannot be zero'); + } + return (totalItemsSold / numberOfCustomers) * 1000; + } } // Initialize analytics engine @@ -325,7 +340,9 @@ createRoute(app, 'post', '/api/series/rolling', (req) => { }); // Machine learning routes -createRoute(app, 'post', '/api/ml/kmeans', (req) => analytics.kmeans(req.body.matrix, req.body.nClusters)); +createRoute(app, 'post', '/api/ml/kmeans', (req) => { + return analytics.kmeans(req.body.matrix, req.body.nClusters, req.body.options); +}); // Time helper routes createRoute(app, 'post', '/api/time/week-number', (req) => { @@ -344,6 +361,16 @@ createRoute(app, 'post', '/api/retail/lift-value', (req) => analytics.liftValue( createRoute(app, 'post', '/api/retail/cost-ratio', (req) => analytics.costRatio(req.body.cost, req.body.salePrice)); createRoute(app, 'post', '/api/retail/gross-margin', (req) => analytics.grossMarginRate(req.body.salePrice, req.body.cost)); +createRoute(app, 'post', '/api/retail/average-spend', (req) => { + const { totalRevenue, numberOfCustomers } = req.body; + return analytics.averageSpendPerCustomer(totalRevenue, numberOfCustomers); +}); + +createRoute(app, 'post', '/api/retail/purchase-index', (req) => { + const { totalItemsSold, numberOfCustomers } = req.body; + return analytics.purchaseIndex(totalItemsSold, numberOfCustomers); +}); + // ======================================== // ERROR HANDLING // ========================================