208 lines
7.9 KiB
TypeScript
208 lines
7.9 KiB
TypeScript
import * as math from 'mathjs';
|
|
import * as _ from 'lodash';
|
|
import { DataSeries, DataMatrix, Condition, ApiResponse } from '../types/index';
|
|
import { RollingWindow } from './rolling_window';
|
|
import { KMeans, KMeansOptions } from './kmeans';
|
|
import { getWeekNumber, getSameWeekDayLastYear } from './time-helper';
|
|
import { calculateLinearRegression, generateForecast, calculatePredictionIntervals, ForecastResult } from './prediction';
|
|
|
|
export const handleError = (error: unknown): string => {
|
|
return error instanceof Error ? error.message : 'Unknown error';
|
|
};
|
|
export const validateSeries = (series: DataSeries): void => {
|
|
if (!series || !Array.isArray(series.values) || series.values.length === 0) {
|
|
throw new Error('Series must contain at least one value');
|
|
}
|
|
};
|
|
|
|
export const validateMatrix = (matrix: DataMatrix): void => {
|
|
if (!matrix || !Array.isArray(matrix.data) || matrix.data.length === 0) {
|
|
throw new Error('Matrix must contain at least one row');
|
|
}
|
|
};
|
|
|
|
export class AnalyticsEngine {
|
|
|
|
private applyConditions(series: DataSeries, conditions: Condition[] = []): number[] {
|
|
if (conditions.length === 0) return series.values;
|
|
return series.values; // TODO: Implement filtering
|
|
}
|
|
|
|
// Basic statistical functions
|
|
unique(series: DataSeries): number[] {
|
|
validateSeries(series);
|
|
return _.uniq(series.values);
|
|
}
|
|
|
|
mean(series: DataSeries, conditions: Condition[] = []): number {
|
|
validateSeries(series);
|
|
const filteredValues = this.applyConditions(series, conditions);
|
|
if (filteredValues.length === 0) throw new Error('No data points match conditions');
|
|
return Number(math.mean(filteredValues));
|
|
}
|
|
|
|
count(series: DataSeries, conditions: Condition[] = []): number {
|
|
validateSeries(series);
|
|
const filteredValues = this.applyConditions(series, conditions);
|
|
if (filteredValues.length === 0) throw new Error('No data points match conditions');
|
|
return filteredValues.length;
|
|
}
|
|
|
|
distinctCount(series: DataSeries, conditions: Condition[] = []): number {
|
|
validateSeries(series);
|
|
const filteredValues = this.applyConditions(series, conditions);
|
|
const uniqueValues = _.uniq(filteredValues);
|
|
return uniqueValues.length;
|
|
}
|
|
|
|
variance(series: DataSeries, conditions: Condition[] = []): number {
|
|
validateSeries(series);
|
|
const filteredValues = this.applyConditions(series, conditions);
|
|
if (filteredValues.length === 0) throw new Error('No data points match conditions');
|
|
return Number(math.variance(filteredValues));
|
|
}
|
|
|
|
standardDeviation(series: DataSeries, conditions: Condition[] = []): number {
|
|
validateSeries(series);
|
|
const filteredValues = this.applyConditions(series, conditions);
|
|
if (filteredValues.length === 0) throw new Error('No data points match conditions');
|
|
return Number(math.std(filteredValues));
|
|
}
|
|
|
|
percentile(series: DataSeries, percent: number, ascending: boolean = true, conditions: Condition[] = []): number {
|
|
validateSeries(series);
|
|
const filteredValues = this.applyConditions(series, conditions);
|
|
if (filteredValues.length === 0) throw new Error('No data points match conditions');
|
|
|
|
const sorted = ascending ? _.sortBy(filteredValues) : _.sortBy(filteredValues).reverse();
|
|
const index = (percent / 100) * (sorted.length - 1);
|
|
const lower = Math.floor(index);
|
|
const upper = Math.ceil(index);
|
|
const weight = index % 1;
|
|
|
|
return sorted[lower] * (1 - weight) + sorted[upper] * weight;
|
|
}
|
|
|
|
median(series: DataSeries, conditions: Condition[] = []): number {
|
|
return this.percentile(series, 50, true, conditions);
|
|
}
|
|
|
|
mode(series: DataSeries, conditions: Condition[] = []): number[] {
|
|
validateSeries(series);
|
|
const filteredValues = this.applyConditions(series, conditions);
|
|
const frequency = _.countBy(filteredValues);
|
|
const maxFreq = Math.max(...Object.values(frequency));
|
|
|
|
return Object.keys(frequency)
|
|
.filter(key => frequency[key] === maxFreq)
|
|
.map(Number);
|
|
}
|
|
|
|
max(series: DataSeries, conditions: Condition[] = []): number {
|
|
validateSeries(series);
|
|
const filteredValues = this.applyConditions(series, conditions);
|
|
if (filteredValues.length === 0) throw new Error('No data points match conditions');
|
|
return Math.max(...filteredValues);
|
|
}
|
|
|
|
min(series: DataSeries, conditions: Condition[] = []): number {
|
|
validateSeries(series);
|
|
const filteredValues = this.applyConditions(series, conditions);
|
|
if (filteredValues.length === 0) throw new Error('No data points match conditions');
|
|
return Math.min(...filteredValues);
|
|
}
|
|
|
|
correlation(series1: DataSeries, series2: DataSeries): number {
|
|
validateSeries(series1);
|
|
validateSeries(series2);
|
|
|
|
if (series1.values.length !== series2.values.length) {
|
|
throw new Error('Series must have same length for correlation');
|
|
}
|
|
|
|
const x = series1.values;
|
|
const y = series2.values;
|
|
const n = x.length;
|
|
|
|
const sumX = _.sum(x);
|
|
const sumY = _.sum(y);
|
|
const sumXY = _.sum(x.map((xi, i) => xi * y[i]));
|
|
const sumX2 = _.sum(x.map(xi => xi * xi));
|
|
const sumY2 = _.sum(y.map(yi => yi * yi));
|
|
|
|
const numerator = n * sumXY - sumX * sumY;
|
|
const denominator = Math.sqrt((n * sumX2 - sumX * sumX) * (n * sumY2 - sumY * sumY));
|
|
|
|
return numerator / denominator;
|
|
}
|
|
|
|
// Rolling window functions
|
|
rolling(series: DataSeries, windowSize: number): RollingWindow {
|
|
validateSeries(series);
|
|
if (windowSize <= 0) {
|
|
throw new Error('Window size must be a positive number.');
|
|
}
|
|
if (series.values.length < windowSize) {
|
|
return new RollingWindow([]);
|
|
}
|
|
|
|
const windows: number[][] = [];
|
|
for (let i = 0; i <= series.values.length - windowSize; i++) {
|
|
const window = series.values.slice(i, i + windowSize);
|
|
windows.push(window);
|
|
}
|
|
return new RollingWindow(windows);
|
|
}
|
|
|
|
movingAverage(series: DataSeries, windowSize: number): number[] {
|
|
return this.rolling(series, windowSize).mean();
|
|
}
|
|
|
|
// K-means wrapper (uses imported KMeans class)
|
|
kmeans(matrix: DataMatrix, nClusters: number, options: KMeansOptions = {}): { clusters: number[][][], centroids: number[][] } {
|
|
validateMatrix(matrix);
|
|
const points: number[][] = matrix.data;
|
|
|
|
// Use the new MiniBatchKMeans class
|
|
const kmeans = new KMeans(points, nClusters, options);
|
|
const result = kmeans.run();
|
|
|
|
const centroids = result.clusters.map(c => (c as any).centroid);
|
|
const clusters = result.clusters.map(c => (c as any).points);
|
|
|
|
return { clusters, centroids };
|
|
}
|
|
|
|
// Time helper wrapper functions
|
|
getWeekNumber(dateString: string): number {
|
|
return getWeekNumber(dateString);
|
|
}
|
|
|
|
getSameWeekDayLastYear(dateString: string): string {
|
|
return getSameWeekDayLastYear(dateString);
|
|
}
|
|
|
|
// ========================================
|
|
// Prediction functions
|
|
// ========================================
|
|
|
|
timeSeriesForecast(series: DataSeries, forecastPeriods: number): ForecastResult {
|
|
validateSeries(series);
|
|
|
|
const model = calculateLinearRegression(series.values);
|
|
const forecast = generateForecast(model, series.values.length, forecastPeriods);
|
|
const predictionIntervals = calculatePredictionIntervals(series.values, model, forecast);
|
|
|
|
return {
|
|
forecast,
|
|
predictionIntervals,
|
|
modelParameters: {
|
|
slope: model.slope,
|
|
intercept: model.intercept,
|
|
},
|
|
};
|
|
}
|
|
}
|
|
|
|
export const analytics = new AnalyticsEngine();
|
|
|