analytics-api/services/analytics_engine.ts
2025-09-25 16:28:20 +09:00

208 lines
7.9 KiB
TypeScript

import * as math from 'mathjs';
import * as _ from 'lodash';
import { DataSeries, DataMatrix, Condition, ApiResponse } from '../types/index';
import { RollingWindow } from './rolling_window';
import { KMeans, KMeansOptions } from './kmeans';
import { getWeekNumber, getSameWeekDayLastYear } from './time-helper';
import { calculateLinearRegression, generateForecast, calculatePredictionIntervals, ForecastResult } from './prediction';
export const handleError = (error: unknown): string => {
return error instanceof Error ? error.message : 'Unknown error';
};
export const validateSeries = (series: DataSeries): void => {
if (!series || !Array.isArray(series.values) || series.values.length === 0) {
throw new Error('Series must contain at least one value');
}
};
export const validateMatrix = (matrix: DataMatrix): void => {
if (!matrix || !Array.isArray(matrix.data) || matrix.data.length === 0) {
throw new Error('Matrix must contain at least one row');
}
};
export class AnalyticsEngine {
private applyConditions(series: DataSeries, conditions: Condition[] = []): number[] {
if (conditions.length === 0) return series.values;
return series.values; // TODO: Implement filtering
}
// Basic statistical functions
unique(series: DataSeries): number[] {
validateSeries(series);
return _.uniq(series.values);
}
mean(series: DataSeries, conditions: Condition[] = []): number {
validateSeries(series);
const filteredValues = this.applyConditions(series, conditions);
if (filteredValues.length === 0) throw new Error('No data points match conditions');
return Number(math.mean(filteredValues));
}
count(series: DataSeries, conditions: Condition[] = []): number {
validateSeries(series);
const filteredValues = this.applyConditions(series, conditions);
if (filteredValues.length === 0) throw new Error('No data points match conditions');
return filteredValues.length;
}
distinctCount(series: DataSeries, conditions: Condition[] = []): number {
validateSeries(series);
const filteredValues = this.applyConditions(series, conditions);
const uniqueValues = _.uniq(filteredValues);
return uniqueValues.length;
}
variance(series: DataSeries, conditions: Condition[] = []): number {
validateSeries(series);
const filteredValues = this.applyConditions(series, conditions);
if (filteredValues.length === 0) throw new Error('No data points match conditions');
return Number(math.variance(filteredValues));
}
standardDeviation(series: DataSeries, conditions: Condition[] = []): number {
validateSeries(series);
const filteredValues = this.applyConditions(series, conditions);
if (filteredValues.length === 0) throw new Error('No data points match conditions');
return Number(math.std(filteredValues));
}
percentile(series: DataSeries, percent: number, ascending: boolean = true, conditions: Condition[] = []): number {
validateSeries(series);
const filteredValues = this.applyConditions(series, conditions);
if (filteredValues.length === 0) throw new Error('No data points match conditions');
const sorted = ascending ? _.sortBy(filteredValues) : _.sortBy(filteredValues).reverse();
const index = (percent / 100) * (sorted.length - 1);
const lower = Math.floor(index);
const upper = Math.ceil(index);
const weight = index % 1;
return sorted[lower] * (1 - weight) + sorted[upper] * weight;
}
median(series: DataSeries, conditions: Condition[] = []): number {
return this.percentile(series, 50, true, conditions);
}
mode(series: DataSeries, conditions: Condition[] = []): number[] {
validateSeries(series);
const filteredValues = this.applyConditions(series, conditions);
const frequency = _.countBy(filteredValues);
const maxFreq = Math.max(...Object.values(frequency));
return Object.keys(frequency)
.filter(key => frequency[key] === maxFreq)
.map(Number);
}
max(series: DataSeries, conditions: Condition[] = []): number {
validateSeries(series);
const filteredValues = this.applyConditions(series, conditions);
if (filteredValues.length === 0) throw new Error('No data points match conditions');
return Math.max(...filteredValues);
}
min(series: DataSeries, conditions: Condition[] = []): number {
validateSeries(series);
const filteredValues = this.applyConditions(series, conditions);
if (filteredValues.length === 0) throw new Error('No data points match conditions');
return Math.min(...filteredValues);
}
correlation(series1: DataSeries, series2: DataSeries): number {
validateSeries(series1);
validateSeries(series2);
if (series1.values.length !== series2.values.length) {
throw new Error('Series must have same length for correlation');
}
const x = series1.values;
const y = series2.values;
const n = x.length;
const sumX = _.sum(x);
const sumY = _.sum(y);
const sumXY = _.sum(x.map((xi, i) => xi * y[i]));
const sumX2 = _.sum(x.map(xi => xi * xi));
const sumY2 = _.sum(y.map(yi => yi * yi));
const numerator = n * sumXY - sumX * sumY;
const denominator = Math.sqrt((n * sumX2 - sumX * sumX) * (n * sumY2 - sumY * sumY));
return numerator / denominator;
}
// Rolling window functions
rolling(series: DataSeries, windowSize: number): RollingWindow {
validateSeries(series);
if (windowSize <= 0) {
throw new Error('Window size must be a positive number.');
}
if (series.values.length < windowSize) {
return new RollingWindow([]);
}
const windows: number[][] = [];
for (let i = 0; i <= series.values.length - windowSize; i++) {
const window = series.values.slice(i, i + windowSize);
windows.push(window);
}
return new RollingWindow(windows);
}
movingAverage(series: DataSeries, windowSize: number): number[] {
return this.rolling(series, windowSize).mean();
}
// K-means wrapper (uses imported KMeans class)
kmeans(matrix: DataMatrix, nClusters: number, options: KMeansOptions = {}): { clusters: number[][][], centroids: number[][] } {
validateMatrix(matrix);
const points: number[][] = matrix.data;
// Use the new MiniBatchKMeans class
const kmeans = new KMeans(points, nClusters, options);
const result = kmeans.run();
const centroids = result.clusters.map(c => (c as any).centroid);
const clusters = result.clusters.map(c => (c as any).points);
return { clusters, centroids };
}
// Time helper wrapper functions
getWeekNumber(dateString: string): number {
return getWeekNumber(dateString);
}
getSameWeekDayLastYear(dateString: string): string {
return getSameWeekDayLastYear(dateString);
}
// ========================================
// Prediction functions
// ========================================
timeSeriesForecast(series: DataSeries, forecastPeriods: number): ForecastResult {
validateSeries(series);
const model = calculateLinearRegression(series.values);
const forecast = generateForecast(model, series.values.length, forecastPeriods);
const predictionIntervals = calculatePredictionIntervals(series.values, model, forecast);
return {
forecast,
predictionIntervals,
modelParameters: {
slope: model.slope,
intercept: model.intercept,
},
};
}
}
export const analytics = new AnalyticsEngine();