From ca8bded9495343ee243ec890e74a355dea3aeab6 Mon Sep 17 00:00:00 2001 From: RaymondHung-datacom Date: Thu, 25 Sep 2025 16:28:20 +0900 Subject: [PATCH] reconstruct --- api-documentation.html | 46 -- package.json | 35 + server.ts | 400 ++-------- .../analysis_pipelines.ts | 266 +++---- services/analytics_engine.ts | 208 ++++++ convolution.ts => services/convolution.ts | 0 kmeans.ts => services/kmeans.ts | 286 ++++---- services/pivot_table.ts | 36 + prediction.ts => services/prediction.ts | 200 ++--- services/retail_metrics.ts | 77 ++ services/rolling_window.ts | 30 + .../signal_processing_convolution.ts | 0 time-helper.ts => services/time-helper.ts | 44 +- timeseries.ts => services/timeseries.ts | 692 +++++++++--------- tests/analyticsEngine.test.ts | 21 + tsconfig.json | 15 + types/index.ts | 22 + 17 files changed, 1268 insertions(+), 1110 deletions(-) delete mode 100644 api-documentation.html create mode 100644 package.json rename analysis_pipelines.ts => services/analysis_pipelines.ts (96%) create mode 100644 services/analytics_engine.ts rename convolution.ts => services/convolution.ts (100%) rename kmeans.ts => services/kmeans.ts (97%) create mode 100644 services/pivot_table.ts rename prediction.ts => services/prediction.ts (94%) create mode 100644 services/retail_metrics.ts create mode 100644 services/rolling_window.ts rename signal_processing_convolution.ts => services/signal_processing_convolution.ts (100%) rename time-helper.ts => services/time-helper.ts (91%) rename timeseries.ts => services/timeseries.ts (97%) create mode 100644 tests/analyticsEngine.test.ts create mode 100644 tsconfig.json create mode 100644 types/index.ts diff --git a/api-documentation.html b/api-documentation.html deleted file mode 100644 index 67e690a..0000000 --- a/api-documentation.html +++ /dev/null @@ -1,46 +0,0 @@ - - - - - - - API Documentation - - - - -
- - - - - \ No newline at end of file diff --git a/package.json b/package.json new file mode 100644 index 0000000..09be592 --- /dev/null +++ b/package.json @@ -0,0 +1,35 @@ +{ + "name": "analytics-api", + "version": "1.0.0", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [], + "author": "", + "license": "ISC", + "description": "", + "dependencies": { + "cors": "^2.8.5", + "date-fns": "^4.1.0", + "express": "^4.21.2", + "lodash": "^4.17.21", + "mathjs": "^14.6.0", + "swagger-ui-express": "^5.0.1" + }, + "devDependencies": { + "@types/cors": "^2.8.19", + "@types/express": "^4.17.23", + "@types/jest": "^30.0.0", + "@types/lodash": "^4.17.20", + "@types/node": "^24.3.0", + "@types/swagger-jsdoc": "^6.0.4", + "@types/swagger-ui-express": "^4.1.8", + "concurrently": "^9.2.1", + "jest": "^30.1.3", + "swagger-jsdoc": "^6.2.8", + "ts-jest": "^29.4.4", + "ts-node": "^10.9.2", + "typescript": "^5.9.2" + } +} diff --git a/server.ts b/server.ts index 4fefdd5..1c071f0 100644 --- a/server.ts +++ b/server.ts @@ -6,30 +6,25 @@ import express from 'express'; import swaggerJsdoc from 'swagger-jsdoc'; import swaggerUi from 'swagger-ui-express'; -import * as math from 'mathjs'; -import * as _ from 'lodash'; -import cors from 'cors'; // <-- 1. IMPORT THE CORS PACKAGE +import cors from 'cors'; // Assuming these files exist in the same directory // import { KMeans, KMeansOptions } from './kmeans'; // import { getWeekNumber, getSameWeekDayLastYear } from './time-helper'; // import { calculateLinearRegression, generateForecast, calculatePredictionIntervals, ForecastResult } from './prediction'; -import { SignalProcessor, SmoothingOptions, EdgeDetectionOptions } from './signal_processing_convolution'; -import { TimeSeriesAnalyzer, ARIMAOptions } from './timeseries'; -import { AnalysisPipelines } from './analysis_pipelines'; -import { convolve1D, convolve2D, ConvolutionKernels } from './convolution'; - -// Dummy interfaces/classes if the files are not present, to prevent compile errors -interface KMeansOptions {} -class KMeans { constructor(p: any, n: any, o: any) {}; run = () => ({ clusters: [] }) } -const getWeekNumber = (d: string) => 1; -const getSameWeekDayLastYear = (d: string) => new Date().toISOString(); -interface ForecastResult {} -const calculateLinearRegression = (v: any) => ({slope: 1, intercept: 0}); -const generateForecast = (m: any, l: any, p: any) => []; -const calculatePredictionIntervals = (v: any, m: any, f: any) => []; - +import { SignalProcessor, SmoothingOptions, EdgeDetectionOptions } from './services/signal_processing_convolution'; +import { TimeSeriesAnalyzer, ARIMAOptions } from './services/timeseries'; +import { AnalysisPipelines } from './services/analysis_pipelines'; +import { convolve1D, convolve2D, ConvolutionKernels } from './services/convolution'; +import { DataSeries, DataMatrix, Condition, ApiResponse } from './types/index'; +import { handleError, validateSeries, validateMatrix } from './services/analytics_engine'; +import { ForecastResult } from './services/prediction'; +import { analytics } from './services/analytics_engine'; +import { purchaseRate, liftValue, costRatio, grossMarginRate, averageSpendPerCustomer, purchaseIndex } from './services/retail_metrics'; +import { RollingWindow } from './services/rolling_window'; +import { pivotTable, PivotOptions } from './services/pivot_table'; +// Initialize Express app const app = express(); app.use(express.json()); app.use(cors()); // <-- 2. ENABLE CORS FOR ALL ROUTES @@ -56,301 +51,6 @@ const swaggerSpec = swaggerJsdoc(swaggerOptions); app.use('/api-docs', swaggerUi.serve, swaggerUi.setup(swaggerSpec)); -// ======================================== -// TYPE DEFINITIONS -// ======================================== - -interface DataSeries { - values: number[]; - labels?: string[]; -} - -interface DataMatrix { - data: number[][]; - columns?: string[]; - rows?: string[]; -} -interface Condition { - field: string; - operator: '>' | '<' | '=' | '>=' | '<=' | '!='; - value: number | string; -} - -interface ApiResponse { - success: boolean; - data?: T; - error?: string; -} - -// ======================================== -// HELPER FUNCTIONS -// ======================================== - -const handleError = (error: unknown): string => { - return error instanceof Error ? error.message : 'Unknown error'; -}; -const validateSeries = (series: DataSeries): void => { - if (!series || !Array.isArray(series.values) || series.values.length === 0) { - throw new Error('Series must contain at least one value'); - } -}; - -const validateMatrix = (matrix: DataMatrix): void => { - if (!matrix || !Array.isArray(matrix.data) || matrix.data.length === 0) { - throw new Error('Matrix must contain at least one row'); - } -}; - -/** - * A helper class to provide a fluent API for rolling window calculations. - */ -class RollingWindow { - private windows: number[][]; - - constructor(windows: number[][]) { - this.windows = windows; - } - - mean(): number[] { - return this.windows.map(window => Number(math.mean(window))); - } - - sum(): number[] { - return this.windows.map(window => _.sum(window)); - } - - min(): number[] { - return this.windows.map(window => Math.min(...window)); - } - - max(): number[] { - return this.windows.map(window => Math.max(...window)); - } - - toArray(): number[][] { - return this.windows; - } -} - -// ======================================== -// ANALYTICS ENGINE (Simplified) -// ======================================== - -class AnalyticsEngine { - - private applyConditions(series: DataSeries, conditions: Condition[] = []): number[] { - if (conditions.length === 0) return series.values; - return series.values; // TODO: Implement filtering - } - - // Basic statistical functions - unique(series: DataSeries): number[] { - validateSeries(series); - return _.uniq(series.values); - } - - mean(series: DataSeries, conditions: Condition[] = []): number { - validateSeries(series); - const filteredValues = this.applyConditions(series, conditions); - if (filteredValues.length === 0) throw new Error('No data points match conditions'); - return Number(math.mean(filteredValues)); - } - - count(series: DataSeries, conditions: Condition[] = []): number { - validateSeries(series); - const filteredValues = this.applyConditions(series, conditions); - if (filteredValues.length === 0) throw new Error('No data points match conditions'); - return filteredValues.length; - } - - variance(series: DataSeries, conditions: Condition[] = []): number { - validateSeries(series); - const filteredValues = this.applyConditions(series, conditions); - if (filteredValues.length === 0) throw new Error('No data points match conditions'); - return Number(math.variance(filteredValues)); - } - - standardDeviation(series: DataSeries, conditions: Condition[] = []): number { - validateSeries(series); - const filteredValues = this.applyConditions(series, conditions); - if (filteredValues.length === 0) throw new Error('No data points match conditions'); - return Number(math.std(filteredValues)); - } - - percentile(series: DataSeries, percent: number, ascending: boolean = true, conditions: Condition[] = []): number { - validateSeries(series); - const filteredValues = this.applyConditions(series, conditions); - if (filteredValues.length === 0) throw new Error('No data points match conditions'); - - const sorted = ascending ? _.sortBy(filteredValues) : _.sortBy(filteredValues).reverse(); - const index = (percent / 100) * (sorted.length - 1); - const lower = Math.floor(index); - const upper = Math.ceil(index); - const weight = index % 1; - - return sorted[lower] * (1 - weight) + sorted[upper] * weight; - } - - median(series: DataSeries, conditions: Condition[] = []): number { - return this.percentile(series, 50, true, conditions); - } - - mode(series: DataSeries, conditions: Condition[] = []): number[] { - validateSeries(series); - const filteredValues = this.applyConditions(series, conditions); - const frequency = _.countBy(filteredValues); - const maxFreq = Math.max(...Object.values(frequency)); - - return Object.keys(frequency) - .filter(key => frequency[key] === maxFreq) - .map(Number); - } - - max(series: DataSeries, conditions: Condition[] = []): number { - validateSeries(series); - const filteredValues = this.applyConditions(series, conditions); - if (filteredValues.length === 0) throw new Error('No data points match conditions'); - return Math.max(...filteredValues); - } - - min(series: DataSeries, conditions: Condition[] = []): number { - validateSeries(series); - const filteredValues = this.applyConditions(series, conditions); - if (filteredValues.length === 0) throw new Error('No data points match conditions'); - return Math.min(...filteredValues); - } - - correlation(series1: DataSeries, series2: DataSeries): number { - validateSeries(series1); - validateSeries(series2); - - if (series1.values.length !== series2.values.length) { - throw new Error('Series must have same length for correlation'); - } - - const x = series1.values; - const y = series2.values; - const n = x.length; - - const sumX = _.sum(x); - const sumY = _.sum(y); - const sumXY = _.sum(x.map((xi, i) => xi * y[i])); - const sumX2 = _.sum(x.map(xi => xi * xi)); - const sumY2 = _.sum(y.map(yi => yi * yi)); - - const numerator = n * sumXY - sumX * sumY; - const denominator = Math.sqrt((n * sumX2 - sumX * sumX) * (n * sumY2 - sumY * sumY)); - - return numerator / denominator; - } - - // Rolling window functions - rolling(series: DataSeries, windowSize: number): RollingWindow { - validateSeries(series); - if (windowSize <= 0) { - throw new Error('Window size must be a positive number.'); - } - if (series.values.length < windowSize) { - return new RollingWindow([]); - } - - const windows: number[][] = []; - for (let i = 0; i <= series.values.length - windowSize; i++) { - const window = series.values.slice(i, i + windowSize); - windows.push(window); - } - return new RollingWindow(windows); - } - - movingAverage(series: DataSeries, windowSize: number): number[] { - return this.rolling(series, windowSize).mean(); - } - - // K-means wrapper (uses imported KMeans class) - kmeans(matrix: DataMatrix, nClusters: number, options: KMeansOptions = {}): { clusters: number[][][], centroids: number[][] } { - validateMatrix(matrix); - const points: number[][] = matrix.data; - - // Use the new MiniBatchKMeans class - const kmeans = new KMeans(points, nClusters, options); - const result = kmeans.run(); - - const centroids = result.clusters.map(c => (c as any).centroid); - const clusters = result.clusters.map(c => (c as any).points); - - return { clusters, centroids }; - } - - // Time helper wrapper functions - getWeekNumber(dateString: string): number { - return getWeekNumber(dateString); - } - - getSameWeekDayLastYear(dateString: string): string { - return getSameWeekDayLastYear(dateString); - } - - // Retail functions - purchaseRate(productPurchases: number, totalTransactions: number): number { - if (totalTransactions === 0) throw new Error('Total transactions cannot be zero'); - return (productPurchases / totalTransactions) * 100; - } - - liftValue(jointPurchaseRate: number, productAPurchaseRate: number, productBPurchaseRate: number): number { - const expectedJointRate = productAPurchaseRate * productBPurchaseRate; - if (expectedJointRate === 0) throw new Error('Expected joint rate cannot be zero'); - return jointPurchaseRate / expectedJointRate; - } - - costRatio(cost: number, salePrice: number): number { - if (salePrice === 0) throw new Error('Sale price cannot be zero'); - return cost / salePrice; - } - - grossMarginRate(salePrice: number, cost: number): number { - if (salePrice === 0) throw new Error('Sale price cannot be zero'); - return (salePrice - cost) / salePrice; - } - - averageSpendPerCustomer(totalRevenue: number, numberOfCustomers: number): number { - if (numberOfCustomers === 0) { - throw new Error('Number of customers cannot be zero'); - } - return totalRevenue / numberOfCustomers; - } - - purchaseIndex(totalItemsSold: number, numberOfCustomers: number): number { - if (numberOfCustomers === 0) { - throw new Error('Number of customers cannot be zero'); - } - return (totalItemsSold / numberOfCustomers) * 1000; - } - - // ======================================== - // Prediction functions - // ======================================== - - timeSeriesForecast(series: DataSeries, forecastPeriods: number): ForecastResult { - validateSeries(series); - - const model = calculateLinearRegression(series.values); - const forecast = generateForecast(model, series.values.length, forecastPeriods); - const predictionIntervals = calculatePredictionIntervals(series.values, model, forecast); - - return { - forecast, - predictionIntervals, - modelParameters: { - slope: model.slope, - intercept: model.intercept, - }, - }; - } -} - -// Initialize analytics engine -const analytics = new AnalyticsEngine(); - // ======================================== // API ROUTES // ======================================== @@ -779,6 +479,45 @@ app.post('/api/correlation', (req, res) => { } }); +/** + * @swagger + * /api/pivot-table: + * post: + * summary: Generate a pivot table from records + * description: Returns a pivot table based on the provided data and options + * tags: [Data Transformation] + * requestBody: + * required: true + * content: + * application/json: + * schema: + * type: object + * properties: + * data: + * type: array + * items: + * type: object + * description: Array of records to pivot + * options: + * $ref: '#/components/schemas/PivotOptions' + * responses: + * '200': + * description: Pivot table generated successfully + * '400': + * description: Invalid input data + */ +app.post('/api/pivot-table', (req, res) => { + try { + const { data, options } = req.body; + // You can pass analytics.mean, analytics.count, etc. as options.aggFunc if needed + const result = pivotTable(data, options); + res.status(200).json({ success: true, data: result }); + } catch (error) { + const errorMessage = handleError(error); + res.status(400).json({ success: false, error: errorMessage }); + } +}); + /** * @swagger * /api/series/moving-average: @@ -1150,7 +889,7 @@ app.post('/api/time/same-day-last-year', (req, res) => { */ app.post('/api/retail/purchase-rate', (req, res) => { try { - const result = analytics.purchaseRate(req.body.productPurchases, req.body.totalTransactions); + const result = purchaseRate(req.body.productPurchases, req.body.totalTransactions); res.status(200).json({ success: true, data: result } as ApiResponse); } catch (error) { const errorMessage = handleError(error); @@ -1192,7 +931,7 @@ app.post('/api/retail/purchase-rate', (req, res) => { */ app.post('/api/retail/lift-value', (req, res) => { try { - const result = analytics.liftValue(req.body.jointPurchaseRate, req.body.productAPurchaseRate, req.body.productBPurchaseRate); + const result = liftValue(req.body.jointPurchaseRate, req.body.productAPurchaseRate, req.body.productBPurchaseRate); res.status(200).json({ success: true, data: result } as ApiResponse); } catch (error) { const errorMessage = handleError(error); @@ -1230,7 +969,7 @@ app.post('/api/retail/lift-value', (req, res) => { */ app.post('/api/retail/cost-ratio', (req, res) => { try { - const result = analytics.costRatio(req.body.cost, req.body.salePrice); + const result = costRatio(req.body.cost, req.body.salePrice); res.status(200).json({ success: true, data: result } as ApiResponse); } catch (error) { const errorMessage = handleError(error); @@ -1268,7 +1007,7 @@ app.post('/api/retail/cost-ratio', (req, res) => { */ app.post('/api/retail/gross-margin', (req, res) => { try { - const result = analytics.grossMarginRate(req.body.salePrice, req.body.cost); + const result = grossMarginRate(req.body.salePrice, req.body.cost); res.status(200).json({ success: true, data: result } as ApiResponse); } catch (error) { const errorMessage = handleError(error); @@ -1307,7 +1046,7 @@ app.post('/api/retail/gross-margin', (req, res) => { app.post('/api/retail/average-spend', (req, res) => { try { const { totalRevenue, numberOfCustomers } = req.body; - const result = analytics.averageSpendPerCustomer(totalRevenue, numberOfCustomers); + const result = averageSpendPerCustomer(totalRevenue, numberOfCustomers); res.status(200).json({ success: true, data: result } as ApiResponse); } catch (error) { const errorMessage = handleError(error); @@ -1346,7 +1085,7 @@ app.post('/api/retail/average-spend', (req, res) => { app.post('/api/retail/purchase-index', (req, res) => { try { const { totalItemsSold, numberOfCustomers } = req.body; - const result = analytics.purchaseIndex(totalItemsSold, numberOfCustomers); + const result = purchaseIndex(totalItemsSold, numberOfCustomers); res.status(200).json({ success: true, data: result } as ApiResponse); } catch (error) { const errorMessage = handleError(error); @@ -1826,6 +1565,29 @@ app.get('/api/kernels/:name', (req, res) => { * s: * type: integer * description: The seasonal period length (e.g., 7 for weekly). + * PivotOptions: + * type: object + * required: + * - index + * - columns + * - values + * properties: + * index: + * type: array + * items: + * type: string + * description: Keys to use as row labels + * columns: + * type: array + * items: + * type: string + * description: Keys to use as column labels + * values: + * type: string + * description: Key to aggregate + * aggFunc: + * type: string + * description: Aggregation function name (e.g., "sum", "mean", "count") * ApiResponse: * type: object * properties: diff --git a/analysis_pipelines.ts b/services/analysis_pipelines.ts similarity index 96% rename from analysis_pipelines.ts rename to services/analysis_pipelines.ts index a35ee7a..54b277f 100644 --- a/analysis_pipelines.ts +++ b/services/analysis_pipelines.ts @@ -1,133 +1,133 @@ -// analysis_pipelines.ts - High-level workflows for common analysis tasks. - -import { SignalProcessor } from './signal_processing_convolution'; -import { TimeSeriesAnalyzer, STLDecomposition } from './timeseries'; - -/** - * The comprehensive result of a denoise and detrend operation. - */ -export interface DenoiseAndDetrendResult { - original: number[]; - smoothed: number[]; - decomposition: STLDecomposition; -} - -/** - * The result of an automatic SARIMA parameter search. - */ -export interface AutoArimaResult { - bestModel: { - p: number; - d: number; - q: number; - P: number; - D: number; - Q: number; - s: number; // Correctly included - aic: number; - }; - searchLog: { p: number; d: number; q: number; P: number; D: number; Q: number; s: number; aic: number }[]; -} - - -/** - * A class containing high-level analysis pipelines that combine - * functions from various processing libraries. - */ -export class AnalysisPipelines { - - /** - * A full pipeline to take a raw signal, smooth it to remove noise, - * and then decompose it into trend, seasonal, and residual components. - * @param series The original time series data. - * @param period The seasonal period for STL decomposition. - * @param smoothWindow The window size for the initial smoothing (denoising) pass. - * @returns An object containing the original, smoothed, and decomposed series. - */ - static denoiseAndDetrend(series: number[], period: number, smoothWindow: number = 5): DenoiseAndDetrendResult { - // Ensure window is odd for symmetry - if (smoothWindow > 1 && smoothWindow % 2 === 0) { - smoothWindow++; - } - const smoothed = SignalProcessor.smooth(series, { - method: 'gaussian', - windowSize: smoothWindow - }); - - const decomposition = TimeSeriesAnalyzer.stlDecomposition(smoothed, period); - - return { - original: series, - smoothed: smoothed, - decomposition: decomposition, - }; - } - - /** - * [FINAL CORRECTED VERSION] Performs a full grid search to find the optimal SARIMA parameters. - * This version now correctly includes 's' in the final result object. - * @param series The original time series data. - * @param seasonalPeriod The seasonal period of the data (e.g., 7 for weekly, 12 for monthly). - * @returns An object containing the best model parameters and a log of the search. - */ - static findBestArimaParameters( - series: number[], - seasonalPeriod: number, - maxD: number = 1, - maxP: number = 2, - maxQ: number = 2, - maxSeasonalD: number = 1, - maxSeasonalP: number = 2, - maxSeasonalQ: number = 2 - ): AutoArimaResult { - - const searchLog: any[] = []; - let bestModel: any = { aic: Infinity }; - - const calculateAIC = (residuals: number[], numParams: number): number => { - const n = residuals.length; - if (n === 0) return Infinity; - const sse = residuals.reduce((sum, r) => sum + r * r, 0); - if (sse < 1e-9) return -Infinity; // Perfect fit - const logLikelihood = -n / 2 * (Math.log(2 * Math.PI) + Math.log(sse / n)) - n / 2; - return 2 * numParams - 2 * logLikelihood; - }; - - // Grid search over all parameter combinations - for (let d = 0; d <= maxD; d++) { - for (let p = 0; p <= maxP; p++) { - for (let q = 0; q <= maxQ; q++) { - for (let D = 0; D <= maxSeasonalD; D++) { - for (let P = 0; P <= maxSeasonalP; P++) { - for (let Q = 0; Q <= maxSeasonalQ; Q++) { - // Skip trivial models where nothing is done - if (p === 0 && d === 0 && q === 0 && P === 0 && D === 0 && Q === 0) continue; - - const options = { p, d, q, P, D, Q, s: seasonalPeriod }; - try { - const { residuals } = TimeSeriesAnalyzer.arimaForecast(series, options, 0); - const numParams = p + q + P + Q; - const aic = calculateAIC(residuals, numParams); - - // Construct the full model info object, ensuring 's' is included - const modelInfo = { p, d, q, P, D, Q, s: seasonalPeriod, aic }; - searchLog.push(modelInfo); - - if (modelInfo.aic < bestModel.aic) { - bestModel = modelInfo; - } - } catch (error) { - // Skip invalid parameter combinations that cause errors - } - } } } } } } - - if (bestModel.aic === Infinity) { - throw new Error("Could not find a suitable SARIMA model. The data may be too short or complex."); - } - - // Sort the log by AIC for easier reading - searchLog.sort((a, b) => a.aic - b.aic); - - return { bestModel, searchLog }; - } -} +// analysis_pipelines.ts - High-level workflows for common analysis tasks. + +import { SignalProcessor } from './signal_processing_convolution'; +import { TimeSeriesAnalyzer, STLDecomposition } from './timeseries'; + +/** + * The comprehensive result of a denoise and detrend operation. + */ +export interface DenoiseAndDetrendResult { + original: number[]; + smoothed: number[]; + decomposition: STLDecomposition; +} + +/** + * The result of an automatic SARIMA parameter search. + */ +export interface AutoArimaResult { + bestModel: { + p: number; + d: number; + q: number; + P: number; + D: number; + Q: number; + s: number; + aic: number; + }; + searchLog: { p: number; d: number; q: number; P: number; D: number; Q: number; s: number; aic: number }[]; +} + + +/** + * A class containing high-level analysis pipelines that combine + * functions from various processing libraries. + */ +export class AnalysisPipelines { + + /** + * A full pipeline to take a raw signal, smooth it to remove noise, + * and then decompose it into trend, seasonal, and residual components. + * @param series The original time series data. + * @param period The seasonal period for STL decomposition. + * @param smoothWindow The window size for the initial smoothing (denoising) pass. + * @returns An object containing the original, smoothed, and decomposed series. + */ + static denoiseAndDetrend(series: number[], period: number, smoothWindow: number = 5): DenoiseAndDetrendResult { + // Ensure window is odd for symmetry + if (smoothWindow > 1 && smoothWindow % 2 === 0) { + smoothWindow++; + } + const smoothed = SignalProcessor.smooth(series, { + method: 'gaussian', + windowSize: smoothWindow + }); + + const decomposition = TimeSeriesAnalyzer.stlDecomposition(smoothed, period); + + return { + original: series, + smoothed: smoothed, + decomposition: decomposition, + }; + } + + /** + * [FINAL CORRECTED VERSION] Performs a full grid search to find the optimal SARIMA parameters. + * This version now correctly includes 's' in the final result object. + * @param series The original time series data. + * @param seasonalPeriod The seasonal period of the data (e.g., 7 for weekly, 12 for monthly). + * @returns An object containing the best model parameters and a log of the search. + */ + static findBestArimaParameters( + series: number[], + seasonalPeriod: number, + maxD: number = 1, + maxP: number = 2, + maxQ: number = 2, + maxSeasonalD: number = 1, + maxSeasonalP: number = 2, + maxSeasonalQ: number = 2 + ): AutoArimaResult { + + const searchLog: any[] = []; + let bestModel: any = { aic: Infinity }; + + const calculateAIC = (residuals: number[], numParams: number): number => { + const n = residuals.length; + if (n === 0) return Infinity; + const sse = residuals.reduce((sum, r) => sum + r * r, 0); + if (sse < 1e-9) return -Infinity; // Perfect fit + const logLikelihood = -n / 2 * (Math.log(2 * Math.PI) + Math.log(sse / n)) - n / 2; + return 2 * numParams - 2 * logLikelihood; + }; + + // Grid search over all parameter combinations + for (let d = 0; d <= maxD; d++) { + for (let p = 0; p <= maxP; p++) { + for (let q = 0; q <= maxQ; q++) { + for (let D = 0; D <= maxSeasonalD; D++) { + for (let P = 0; P <= maxSeasonalP; P++) { + for (let Q = 0; Q <= maxSeasonalQ; Q++) { + // Skip trivial models where nothing is done + if (p === 0 && d === 0 && q === 0 && P === 0 && D === 0 && Q === 0) continue; + + const options = { p, d, q, P, D, Q, s: seasonalPeriod }; + try { + const { residuals } = TimeSeriesAnalyzer.arimaForecast(series, options, 0); + const numParams = p + q + P + Q; + const aic = calculateAIC(residuals, numParams); + + // Construct the full model info object, ensuring 's' is included + const modelInfo = { p, d, q, P, D, Q, s: seasonalPeriod, aic }; + searchLog.push(modelInfo); + + if (modelInfo.aic < bestModel.aic) { + bestModel = modelInfo; + } + } catch (error) { + // Skip invalid parameter combinations that cause errors + } + } } } } } } + + if (bestModel.aic === Infinity) { + throw new Error("Could not find a suitable SARIMA model. The data may be too short or complex."); + } + + // Sort the log by AIC for easier reading + searchLog.sort((a, b) => a.aic - b.aic); + + return { bestModel, searchLog }; + } +} diff --git a/services/analytics_engine.ts b/services/analytics_engine.ts new file mode 100644 index 0000000..88979d3 --- /dev/null +++ b/services/analytics_engine.ts @@ -0,0 +1,208 @@ +import * as math from 'mathjs'; +import * as _ from 'lodash'; +import { DataSeries, DataMatrix, Condition, ApiResponse } from '../types/index'; +import { RollingWindow } from './rolling_window'; +import { KMeans, KMeansOptions } from './kmeans'; +import { getWeekNumber, getSameWeekDayLastYear } from './time-helper'; +import { calculateLinearRegression, generateForecast, calculatePredictionIntervals, ForecastResult } from './prediction'; + +export const handleError = (error: unknown): string => { + return error instanceof Error ? error.message : 'Unknown error'; +}; +export const validateSeries = (series: DataSeries): void => { + if (!series || !Array.isArray(series.values) || series.values.length === 0) { + throw new Error('Series must contain at least one value'); + } +}; + +export const validateMatrix = (matrix: DataMatrix): void => { + if (!matrix || !Array.isArray(matrix.data) || matrix.data.length === 0) { + throw new Error('Matrix must contain at least one row'); + } +}; + +export class AnalyticsEngine { + + private applyConditions(series: DataSeries, conditions: Condition[] = []): number[] { + if (conditions.length === 0) return series.values; + return series.values; // TODO: Implement filtering + } + + // Basic statistical functions + unique(series: DataSeries): number[] { + validateSeries(series); + return _.uniq(series.values); + } + + mean(series: DataSeries, conditions: Condition[] = []): number { + validateSeries(series); + const filteredValues = this.applyConditions(series, conditions); + if (filteredValues.length === 0) throw new Error('No data points match conditions'); + return Number(math.mean(filteredValues)); + } + + count(series: DataSeries, conditions: Condition[] = []): number { + validateSeries(series); + const filteredValues = this.applyConditions(series, conditions); + if (filteredValues.length === 0) throw new Error('No data points match conditions'); + return filteredValues.length; + } + + distinctCount(series: DataSeries, conditions: Condition[] = []): number { + validateSeries(series); + const filteredValues = this.applyConditions(series, conditions); + const uniqueValues = _.uniq(filteredValues); + return uniqueValues.length; + } + + variance(series: DataSeries, conditions: Condition[] = []): number { + validateSeries(series); + const filteredValues = this.applyConditions(series, conditions); + if (filteredValues.length === 0) throw new Error('No data points match conditions'); + return Number(math.variance(filteredValues)); + } + + standardDeviation(series: DataSeries, conditions: Condition[] = []): number { + validateSeries(series); + const filteredValues = this.applyConditions(series, conditions); + if (filteredValues.length === 0) throw new Error('No data points match conditions'); + return Number(math.std(filteredValues)); + } + + percentile(series: DataSeries, percent: number, ascending: boolean = true, conditions: Condition[] = []): number { + validateSeries(series); + const filteredValues = this.applyConditions(series, conditions); + if (filteredValues.length === 0) throw new Error('No data points match conditions'); + + const sorted = ascending ? _.sortBy(filteredValues) : _.sortBy(filteredValues).reverse(); + const index = (percent / 100) * (sorted.length - 1); + const lower = Math.floor(index); + const upper = Math.ceil(index); + const weight = index % 1; + + return sorted[lower] * (1 - weight) + sorted[upper] * weight; + } + + median(series: DataSeries, conditions: Condition[] = []): number { + return this.percentile(series, 50, true, conditions); + } + + mode(series: DataSeries, conditions: Condition[] = []): number[] { + validateSeries(series); + const filteredValues = this.applyConditions(series, conditions); + const frequency = _.countBy(filteredValues); + const maxFreq = Math.max(...Object.values(frequency)); + + return Object.keys(frequency) + .filter(key => frequency[key] === maxFreq) + .map(Number); + } + + max(series: DataSeries, conditions: Condition[] = []): number { + validateSeries(series); + const filteredValues = this.applyConditions(series, conditions); + if (filteredValues.length === 0) throw new Error('No data points match conditions'); + return Math.max(...filteredValues); + } + + min(series: DataSeries, conditions: Condition[] = []): number { + validateSeries(series); + const filteredValues = this.applyConditions(series, conditions); + if (filteredValues.length === 0) throw new Error('No data points match conditions'); + return Math.min(...filteredValues); + } + + correlation(series1: DataSeries, series2: DataSeries): number { + validateSeries(series1); + validateSeries(series2); + + if (series1.values.length !== series2.values.length) { + throw new Error('Series must have same length for correlation'); + } + + const x = series1.values; + const y = series2.values; + const n = x.length; + + const sumX = _.sum(x); + const sumY = _.sum(y); + const sumXY = _.sum(x.map((xi, i) => xi * y[i])); + const sumX2 = _.sum(x.map(xi => xi * xi)); + const sumY2 = _.sum(y.map(yi => yi * yi)); + + const numerator = n * sumXY - sumX * sumY; + const denominator = Math.sqrt((n * sumX2 - sumX * sumX) * (n * sumY2 - sumY * sumY)); + + return numerator / denominator; + } + + // Rolling window functions + rolling(series: DataSeries, windowSize: number): RollingWindow { + validateSeries(series); + if (windowSize <= 0) { + throw new Error('Window size must be a positive number.'); + } + if (series.values.length < windowSize) { + return new RollingWindow([]); + } + + const windows: number[][] = []; + for (let i = 0; i <= series.values.length - windowSize; i++) { + const window = series.values.slice(i, i + windowSize); + windows.push(window); + } + return new RollingWindow(windows); + } + + movingAverage(series: DataSeries, windowSize: number): number[] { + return this.rolling(series, windowSize).mean(); + } + + // K-means wrapper (uses imported KMeans class) + kmeans(matrix: DataMatrix, nClusters: number, options: KMeansOptions = {}): { clusters: number[][][], centroids: number[][] } { + validateMatrix(matrix); + const points: number[][] = matrix.data; + + // Use the new MiniBatchKMeans class + const kmeans = new KMeans(points, nClusters, options); + const result = kmeans.run(); + + const centroids = result.clusters.map(c => (c as any).centroid); + const clusters = result.clusters.map(c => (c as any).points); + + return { clusters, centroids }; + } + + // Time helper wrapper functions + getWeekNumber(dateString: string): number { + return getWeekNumber(dateString); + } + + getSameWeekDayLastYear(dateString: string): string { + return getSameWeekDayLastYear(dateString); + } + + // ======================================== + // Prediction functions + // ======================================== + + timeSeriesForecast(series: DataSeries, forecastPeriods: number): ForecastResult { + validateSeries(series); + + const model = calculateLinearRegression(series.values); + const forecast = generateForecast(model, series.values.length, forecastPeriods); + const predictionIntervals = calculatePredictionIntervals(series.values, model, forecast); + + return { + forecast, + predictionIntervals, + modelParameters: { + slope: model.slope, + intercept: model.intercept, + }, + }; + } +} + +export const analytics = new AnalyticsEngine(); + diff --git a/convolution.ts b/services/convolution.ts similarity index 100% rename from convolution.ts rename to services/convolution.ts diff --git a/kmeans.ts b/services/kmeans.ts similarity index 97% rename from kmeans.ts rename to services/kmeans.ts index 12b85e2..a0ae502 100644 --- a/kmeans.ts +++ b/services/kmeans.ts @@ -1,144 +1,144 @@ -export type Point = number[]; - -export interface Cluster { - centroid: Point; - points: Point[]; -} - -export interface KMeansOptions { - batchSize?: number; - maxIterations?: number; - tolerance?: number; -} - -export interface KMeansResult { - clusters: Cluster[]; - iterations: number; - converged: boolean; -} - -export class KMeans { - private readonly k: number; - private readonly batchSize: number; - private readonly maxIterations: number; - private readonly tolerance: number; - private readonly data: Point[]; - private centroids: Point[] = []; - - constructor(data: Point[], k: number, options: KMeansOptions = {}) { - this.data = data; - this.k = k; - this.batchSize = options.batchSize ?? 32; - this.maxIterations = options.maxIterations ?? 100; - this.tolerance = options.tolerance ?? 0.0001; - } - - private static euclideanDistance(p1: Point, p2: Point): number { - return Math.sqrt(p1.reduce((sum, val, i) => sum + (val - p2[i]) ** 2, 0)); - } - - private initializeCentroids(): void { - const dataCopy = [...this.data]; - for (let i = 0; i < this.k; i++) { - const randomIndex = Math.floor(Math.random() * dataCopy.length); - this.centroids.push([...dataCopy[randomIndex]]); - dataCopy.splice(randomIndex, 1); - } - } - - /** - * Creates a random sample of the data. - */ - private createMiniBatch(): Point[] { - const miniBatch: Point[] = []; - const dataCopy = [...this.data]; - for (let i = 0; i < this.batchSize && dataCopy.length > 0; i++) { - const randomIndex = Math.floor(Math.random() * dataCopy.length); - miniBatch.push(dataCopy[randomIndex]); - dataCopy.splice(randomIndex, 1); - } - return miniBatch; - } - - /** - * Assigns all points in the full dataset to the final centroids. - */ - private assignFinalClusters(): Cluster[] { - const clusters: Cluster[] = this.centroids.map(c => ({ centroid: c, points: [] })); - - for (const point of this.data) { - let minDistance = Infinity; - let closestClusterIndex = -1; - for (let i = 0; i < this.centroids.length; i++) { - const distance = KMeans.euclideanDistance(point, this.centroids[i]); - if (distance < minDistance) { - minDistance = distance; - closestClusterIndex = i; - } - } - if (closestClusterIndex !== -1) { - clusters[closestClusterIndex].points.push(point); - } - } - return clusters; - } - - public run(): KMeansResult { - this.initializeCentroids(); - - const clusterPointCounts = new Array(this.k).fill(0); - let converged = false; - let iterations = 0; - - for (let i = 0; i < this.maxIterations; i++) { - iterations = i + 1; - const miniBatch = this.createMiniBatch(); - const previousCentroids = this.centroids.map(c => [...c]); - - // Assign points in the batch and update centroids gradually - for (const point of miniBatch) { - let minDistance = Infinity; - let closestClusterIndex = -1; - - for (let j = 0; j < this.k; j++) { - const distance = KMeans.euclideanDistance(point, this.centroids[j]); - if (distance < minDistance) { - minDistance = distance; - closestClusterIndex = j; - } - } - - if (closestClusterIndex !== -1) { - clusterPointCounts[closestClusterIndex]++; - const learningRate = 1 / clusterPointCounts[closestClusterIndex]; - const centroidToUpdate = this.centroids[closestClusterIndex]; - - // Move the centroid slightly towards the new point - for (let dim = 0; dim < centroidToUpdate.length; dim++) { - centroidToUpdate[dim] = (1 - learningRate) * centroidToUpdate[dim] + learningRate * point[dim]; - } - } - } - - // Check for convergence - let totalMovement = 0; - for(let j = 0; j < this.k; j++) { - totalMovement += KMeans.euclideanDistance(previousCentroids[j], this.centroids[j]); - } - - if (totalMovement < this.tolerance) { - converged = true; - break; - } - } - - // After training, assign all points to the final centroids - const finalClusters = this.assignFinalClusters(); - - return { - clusters: finalClusters, - iterations, - converged - }; - } +export type Point = number[]; + +export interface Cluster { + centroid: Point; + points: Point[]; +} + +export interface KMeansOptions { + batchSize?: number; + maxIterations?: number; + tolerance?: number; +} + +export interface KMeansResult { + clusters: Cluster[]; + iterations: number; + converged: boolean; +} + +export class KMeans { + private readonly k: number; + private readonly batchSize: number; + private readonly maxIterations: number; + private readonly tolerance: number; + private readonly data: Point[]; + private centroids: Point[] = []; + + constructor(data: Point[], k: number, options: KMeansOptions = {}) { + this.data = data; + this.k = k; + this.batchSize = options.batchSize ?? 32; + this.maxIterations = options.maxIterations ?? 100; + this.tolerance = options.tolerance ?? 0.0001; + } + + private static euclideanDistance(p1: Point, p2: Point): number { + return Math.sqrt(p1.reduce((sum, val, i) => sum + (val - p2[i]) ** 2, 0)); + } + + private initializeCentroids(): void { + const dataCopy = [...this.data]; + for (let i = 0; i < this.k; i++) { + const randomIndex = Math.floor(Math.random() * dataCopy.length); + this.centroids.push([...dataCopy[randomIndex]]); + dataCopy.splice(randomIndex, 1); + } + } + + /** + * Creates a random sample of the data. + */ + private createMiniBatch(): Point[] { + const miniBatch: Point[] = []; + const dataCopy = [...this.data]; + for (let i = 0; i < this.batchSize && dataCopy.length > 0; i++) { + const randomIndex = Math.floor(Math.random() * dataCopy.length); + miniBatch.push(dataCopy[randomIndex]); + dataCopy.splice(randomIndex, 1); + } + return miniBatch; + } + + /** + * Assigns all points in the full dataset to the final centroids. + */ + private assignFinalClusters(): Cluster[] { + const clusters: Cluster[] = this.centroids.map(c => ({ centroid: c, points: [] })); + + for (const point of this.data) { + let minDistance = Infinity; + let closestClusterIndex = -1; + for (let i = 0; i < this.centroids.length; i++) { + const distance = KMeans.euclideanDistance(point, this.centroids[i]); + if (distance < minDistance) { + minDistance = distance; + closestClusterIndex = i; + } + } + if (closestClusterIndex !== -1) { + clusters[closestClusterIndex].points.push(point); + } + } + return clusters; + } + + public run(): KMeansResult { + this.initializeCentroids(); + + const clusterPointCounts = new Array(this.k).fill(0); + let converged = false; + let iterations = 0; + + for (let i = 0; i < this.maxIterations; i++) { + iterations = i + 1; + const miniBatch = this.createMiniBatch(); + const previousCentroids = this.centroids.map(c => [...c]); + + // Assign points in the batch and update centroids gradually + for (const point of miniBatch) { + let minDistance = Infinity; + let closestClusterIndex = -1; + + for (let j = 0; j < this.k; j++) { + const distance = KMeans.euclideanDistance(point, this.centroids[j]); + if (distance < minDistance) { + minDistance = distance; + closestClusterIndex = j; + } + } + + if (closestClusterIndex !== -1) { + clusterPointCounts[closestClusterIndex]++; + const learningRate = 1 / clusterPointCounts[closestClusterIndex]; + const centroidToUpdate = this.centroids[closestClusterIndex]; + + // Move the centroid slightly towards the new point + for (let dim = 0; dim < centroidToUpdate.length; dim++) { + centroidToUpdate[dim] = (1 - learningRate) * centroidToUpdate[dim] + learningRate * point[dim]; + } + } + } + + // Check for convergence + let totalMovement = 0; + for(let j = 0; j < this.k; j++) { + totalMovement += KMeans.euclideanDistance(previousCentroids[j], this.centroids[j]); + } + + if (totalMovement < this.tolerance) { + converged = true; + break; + } + } + + // After training, assign all points to the final centroids + const finalClusters = this.assignFinalClusters(); + + return { + clusters: finalClusters, + iterations, + converged + }; + } } \ No newline at end of file diff --git a/services/pivot_table.ts b/services/pivot_table.ts new file mode 100644 index 0000000..515c5e0 --- /dev/null +++ b/services/pivot_table.ts @@ -0,0 +1,36 @@ +import { analytics } from './analytics_engine'; // Import your analytics engine + +export interface PivotOptions { + index: string[]; + columns: string[]; + values: string; + aggFunc?: (items: number[]) => number; // Aggregation function (e.g., analytics.mean) +} + +export function pivotTable( + data: Record[], + options: PivotOptions +): Record> { + const { index, columns, values, aggFunc = arr => arr.reduce((a, b) => a + b, 0) } = options; + const cellMap: Record> = {}; + + data.forEach(row => { + const rowKey = index.map(k => row[k]).join('|'); + const colKey = columns.map(k => row[k]).join('|'); + + if (!cellMap[rowKey]) cellMap[rowKey] = {}; + if (!cellMap[rowKey][colKey]) cellMap[rowKey][colKey] = []; + cellMap[rowKey][colKey].push(row[values]); + }); + + // Apply aggregation function to each cell + const result: Record> = {}; + Object.entries(cellMap).forEach(([rowKey, cols]) => { + result[rowKey] = {}; + Object.entries(cols).forEach(([colKey, valuesArr]) => { + result[rowKey][colKey] = aggFunc(valuesArr); + }); + }); + + return result; +} \ No newline at end of file diff --git a/prediction.ts b/services/prediction.ts similarity index 94% rename from prediction.ts rename to services/prediction.ts index eb46525..799c5db 100644 --- a/prediction.ts +++ b/services/prediction.ts @@ -1,101 +1,101 @@ -import * as math from 'mathjs'; - -// The structure for the returned regression model -export interface LinearRegressionModel { - slope: number; - intercept: number; - predict: (x: number) => number; -} - -// The structure for the full forecast output -export interface ForecastResult { - forecast: number[]; - predictionIntervals: { - upperBound: number[]; - lowerBound: number[]; - }; - modelParameters: { - slope: number; - intercept: number; - }; -} - -/** - * Calculates the linear regression model from a time series. - * @param yValues The historical data points (e.g., sales per month). - * @returns {LinearRegressionModel} An object containing the model's parameters and a predict function. - */ -export function calculateLinearRegression(yValues: number[]): LinearRegressionModel { - if (yValues.length < 2) { - throw new Error('At least two data points are required for linear regression.'); - } - - const xValues = Array.from({ length: yValues.length }, (_, i) => i); - - const meanX = Number(math.mean(xValues)); - const meanY = Number(math.mean(yValues)); - const stdDevX = Number(math.std(xValues, 'uncorrected')); - const stdDevY = Number(math.std(yValues, 'uncorrected')); - - // Ensure stdDevX is not zero to avoid division by zero - if (stdDevX === 0) { - // This happens if all xValues are the same, which is impossible in this time series context, - // but it's good practice to handle. A vertical line has an infinite slope. - // For simplicity, we can return a model with zero slope. - return { slope: 0, intercept: meanY, predict: (x: number) => meanY }; - } - - // Cast the result of math.sum to a Number - const correlationNumerator = Number(math.sum(xValues.map((x, i) => (x - meanX) * (yValues[i] - meanY)))); - - const correlation = correlationNumerator / ((xValues.length - 1) * stdDevX * stdDevY); - - const slope = correlation * (stdDevY / stdDevX); - const intercept = meanY - slope * meanX; - - const predict = (x: number): number => slope * x + intercept; - - return { slope, intercept, predict }; -} - -/** - * Generates a forecast for a specified number of future periods. - * @param model The calculated linear regression model. - * @param historicalDataLength The number of historical data points. - * @param forecastPeriods The number of future periods to predict. - * @returns {number[]} An array of forecasted values. - */ -export function generateForecast(model: LinearRegressionModel, historicalDataLength: number, forecastPeriods: number): number[] { - const forecast: number[] = []; - const startPeriod = historicalDataLength; - - for (let i = 0; i < forecastPeriods; i++) { - const futureX = startPeriod + i; - forecast.push(model.predict(futureX)); - } - return forecast; -} - -/** - * Calculates prediction intervals to show the range of uncertainty. - * @param yValues The original historical data. - * @param model The calculated linear regression model. - * @param forecast The array of forecasted values. - * @returns An object with upperBound and lowerBound arrays. - */ -export function calculatePredictionIntervals(yValues: number[], model: LinearRegressionModel, forecast: number[]) { - const n = yValues.length; - const residualsSquaredSum = yValues.reduce((sum, y, i) => { - const predictedY = model.predict(i); - return sum + (y - predictedY) ** 2; - }, 0); - const stdError = Math.sqrt(residualsSquaredSum / (n - 2)); - - const zScore = 1.96; // For a 95% confidence level - const marginOfError = zScore * stdError; - - const upperBound = forecast.map(val => val + marginOfError); - const lowerBound = forecast.map(val => val - marginOfError); - - return { upperBound, lowerBound }; +import * as math from 'mathjs'; + +// The structure for the returned regression model +export interface LinearRegressionModel { + slope: number; + intercept: number; + predict: (x: number) => number; +} + +// The structure for the full forecast output +export interface ForecastResult { + forecast: number[]; + predictionIntervals: { + upperBound: number[]; + lowerBound: number[]; + }; + modelParameters: { + slope: number; + intercept: number; + }; +} + +/** + * Calculates the linear regression model from a time series. + * @param yValues The historical data points (e.g., sales per month). + * @returns {LinearRegressionModel} An object containing the model's parameters and a predict function. + */ +export function calculateLinearRegression(yValues: number[]): LinearRegressionModel { + if (yValues.length < 2) { + throw new Error('At least two data points are required for linear regression.'); + } + + const xValues = Array.from({ length: yValues.length }, (_, i) => i); + + const meanX = Number(math.mean(xValues)); + const meanY = Number(math.mean(yValues)); + const stdDevX = Number(math.std(xValues, 'uncorrected')); + const stdDevY = Number(math.std(yValues, 'uncorrected')); + + // Ensure stdDevX is not zero to avoid division by zero + if (stdDevX === 0) { + // This happens if all xValues are the same, which is impossible in this time series context, + // but it's good practice to handle. A vertical line has an infinite slope. + // For simplicity, we can return a model with zero slope. + return { slope: 0, intercept: meanY, predict: (x: number) => meanY }; + } + + // Cast the result of math.sum to a Number + const correlationNumerator = Number(math.sum(xValues.map((x, i) => (x - meanX) * (yValues[i] - meanY)))); + + const correlation = correlationNumerator / ((xValues.length) * stdDevX * stdDevY); + + const slope = correlation * (stdDevY / stdDevX); + const intercept = meanY - slope * meanX; + + const predict = (x: number): number => slope * x + intercept; + + return { slope, intercept, predict }; +} + +/** + * Generates a forecast for a specified number of future periods. + * @param model The calculated linear regression model. + * @param historicalDataLength The number of historical data points. + * @param forecastPeriods The number of future periods to predict. + * @returns {number[]} An array of forecasted values. + */ +export function generateForecast(model: LinearRegressionModel, historicalDataLength: number, forecastPeriods: number): number[] { + const forecast: number[] = []; + const startPeriod = historicalDataLength; + + for (let i = 0; i < forecastPeriods; i++) { + const futureX = startPeriod + i; + forecast.push(model.predict(futureX)); + } + return forecast; +} + +/** + * Calculates prediction intervals to show the range of uncertainty. + * @param yValues The original historical data. + * @param model The calculated linear regression model. + * @param forecast The array of forecasted values. + * @returns An object with upperBound and lowerBound arrays. + */ +export function calculatePredictionIntervals(yValues: number[], model: LinearRegressionModel, forecast: number[]) { + const n = yValues.length; + const residualsSquaredSum = yValues.reduce((sum, y, i) => { + const predictedY = model.predict(i); + return sum + (y - predictedY) ** 2; + }, 0); + const stdError = Math.sqrt(residualsSquaredSum / (n - 2)); + + const zScore = 1.96; // For a 95% confidence level + const marginOfError = zScore * stdError; + + const upperBound = forecast.map(val => val + marginOfError); + const lowerBound = forecast.map(val => val - marginOfError); + + return { upperBound, lowerBound }; } \ No newline at end of file diff --git a/services/retail_metrics.ts b/services/retail_metrics.ts new file mode 100644 index 0000000..08b0b78 --- /dev/null +++ b/services/retail_metrics.ts @@ -0,0 +1,77 @@ + export function purchaseIndex(totalItemsSold: number, numberOfCustomers: number): number { + if (numberOfCustomers === 0) { + throw new Error('Number of customers cannot be zero'); + } + return (totalItemsSold / numberOfCustomers) * 1000; + } + + export function purchaseRate(productPurchases: number, totalTransactions: number): number; + export function purchaseRate(productPurchases: number[], totalTransactions: number[]): number[]; + export function purchaseRate(productPurchases: number | number[], totalTransactions: number | number[]): number | number[] { + if (Array.isArray(productPurchases) && Array.isArray(totalTransactions)) { + if (productPurchases.length !== totalTransactions.length) throw new Error('Arrays must have the same length'); + return productPurchases.map((pp, i) => purchaseRate(pp, totalTransactions[i])); + } + if (typeof productPurchases === 'number' && typeof totalTransactions === 'number') { + if (totalTransactions === 0) throw new Error('Total transactions cannot be zero'); + return (productPurchases / totalTransactions) * 100; + } + throw new Error('Input types must match'); + } + + export function liftValue(jointPurchaseRate: number, productAPurchaseRate: number, productBPurchaseRate: number): number; + export function liftValue(jointPurchaseRate: number[], productAPurchaseRate: number[], productBPurchaseRate: number[]): number[]; + export function liftValue(jointPurchaseRate: number | number[], productAPurchaseRate: number | number[], productBPurchaseRate: number | number[]): number | number[] { + if (Array.isArray(jointPurchaseRate) && Array.isArray(productAPurchaseRate) && Array.isArray(productBPurchaseRate)) { + if (jointPurchaseRate.length !== productAPurchaseRate.length || jointPurchaseRate.length !== productBPurchaseRate.length) throw new Error('Arrays must have the same length'); + return jointPurchaseRate.map((jpr, i) => liftValue(jpr, productAPurchaseRate[i], productBPurchaseRate[i])); + } + if (typeof jointPurchaseRate === 'number' && typeof productAPurchaseRate === 'number' && typeof productBPurchaseRate === 'number') { + const expectedJointRate = productAPurchaseRate * productBPurchaseRate; + if (expectedJointRate === 0) throw new Error('Expected joint rate cannot be zero'); + return jointPurchaseRate / expectedJointRate; + } + throw new Error('Input types must match'); + } + + export function costRatio(cost: number, salePrice: number): number; + export function costRatio(cost: number[], salePrice: number[]): number[]; + export function costRatio(cost: number | number[], salePrice: number | number[]): number | number[] { + if (Array.isArray(cost) && Array.isArray(salePrice)) { + if (cost.length !== salePrice.length) throw new Error('Arrays must have the same length'); + return cost.map((c, i) => costRatio(c, salePrice[i])); + } + if (typeof cost === 'number' && typeof salePrice === 'number') { + if (salePrice === 0) throw new Error('Sale price cannot be zero'); + return cost / salePrice; + } + throw new Error('Input types must match'); + } + + export function grossMarginRate(salePrice: number, cost: number): number; + export function grossMarginRate(salePrice: number[], cost: number[]): number[]; + export function grossMarginRate(salePrice: number | number[], cost: number | number[]): number | number[] { + if (Array.isArray(salePrice) && Array.isArray(cost)) { + if (salePrice.length !== cost.length) throw new Error('Arrays must have the same length'); + return salePrice.map((sp, i) => grossMarginRate(sp, cost[i])); + } + if (typeof salePrice === 'number' && typeof cost === 'number') { + if (salePrice === 0) throw new Error('Sale price cannot be zero'); + return (salePrice - cost) / salePrice; + } + throw new Error('Input types must match'); + } + + export function averageSpendPerCustomer(totalRevenue: number, numberOfCustomers: number): number; + export function averageSpendPerCustomer(totalRevenue: number[], numberOfCustomers: number[]): number[]; + export function averageSpendPerCustomer(totalRevenue: number | number[], numberOfCustomers: number | number[]): number | number[] { + if (Array.isArray(totalRevenue) && Array.isArray(numberOfCustomers)) { + if (totalRevenue.length !== numberOfCustomers.length) throw new Error('Arrays must have the same length'); + return totalRevenue.map((tr, i) => averageSpendPerCustomer(tr, numberOfCustomers[i])); + } + if (typeof totalRevenue === 'number' && typeof numberOfCustomers === 'number') { + if (numberOfCustomers === 0) throw new Error('Number of customers cannot be zero'); + return totalRevenue / numberOfCustomers; + } + throw new Error('Input types must match'); + } \ No newline at end of file diff --git a/services/rolling_window.ts b/services/rolling_window.ts new file mode 100644 index 0000000..2e11e1e --- /dev/null +++ b/services/rolling_window.ts @@ -0,0 +1,30 @@ +import * as math from 'mathjs'; +import * as _ from 'lodash'; + +export class RollingWindow { + private windows: number[][]; + + constructor(windows: number[][]) { + this.windows = windows; + } + + mean(): number[] { + return this.windows.map(window => Number(math.mean(window))); + } + + sum(): number[] { + return this.windows.map(window => _.sum(window)); + } + + min(): number[] { + return this.windows.map(window => Math.min(...window)); + } + + max(): number[] { + return this.windows.map(window => Math.max(...window)); + } + + toArray(): number[][] { + return this.windows; + } +} \ No newline at end of file diff --git a/signal_processing_convolution.ts b/services/signal_processing_convolution.ts similarity index 100% rename from signal_processing_convolution.ts rename to services/signal_processing_convolution.ts diff --git a/time-helper.ts b/services/time-helper.ts similarity index 91% rename from time-helper.ts rename to services/time-helper.ts index b7acecc..06faa9f 100644 --- a/time-helper.ts +++ b/services/time-helper.ts @@ -1,24 +1,22 @@ -// time-helpers.ts - Date and time utility functions - -import { getISOWeek, getISODay, subYears, setISOWeek, setISODay, isValid } from 'date-fns'; - -export const getWeekNumber = (dateString: string): number => { - const date = new Date(dateString); - if (!isValid(date)) { - throw new Error('Invalid date string provided.'); - } - return getISOWeek(date); -}; - -export const getSameWeekDayLastYear = (dateString: string): string => { - const baseDate = new Date(dateString); - if (!isValid(baseDate)) { - throw new Error('Invalid date string provided.'); - } - const originalWeek = getISOWeek(baseDate); - const originalDayOfWeek = getISODay(baseDate); - const lastYearDate = subYears(baseDate, 1); - const dateWithWeekSet = setISOWeek(lastYearDate, originalWeek); - const finalDate = setISODay(dateWithWeekSet, originalDayOfWeek); - return finalDate.toISOString().split('T')[0]; // Return as YYYY-MM-DD +import { getISOWeek, getISODay, subYears, setISOWeek, setISODay, isValid } from 'date-fns'; + +export const getWeekNumber = (dateString: string): number => { + const date = new Date(dateString); + if (!isValid(date)) { + throw new Error('Invalid date string provided.'); + } + return getISOWeek(date); +}; + +export const getSameWeekDayLastYear = (dateString: string): string => { + const baseDate = new Date(dateString); + if (!isValid(baseDate)) { + throw new Error('Invalid date string provided.'); + } + const originalWeek = getISOWeek(baseDate); + const originalDayOfWeek = getISODay(baseDate); + const lastYearDate = subYears(baseDate, 1); + const dateWithWeekSet = setISOWeek(lastYearDate, originalWeek); + const finalDate = setISODay(dateWithWeekSet, originalDayOfWeek); + return finalDate.toISOString().split('T')[0]; // Return as YYYY-MM-DD }; \ No newline at end of file diff --git a/timeseries.ts b/services/timeseries.ts similarity index 97% rename from timeseries.ts rename to services/timeseries.ts index 077c81f..3ceac50 100644 --- a/timeseries.ts +++ b/services/timeseries.ts @@ -1,346 +1,346 @@ -// timeseries.ts - A library for time series analysis, focusing on ARIMA. - -// ======================================== -// TYPE DEFINITIONS -// ======================================== - -/** - * Defines the parameters for an ARIMA model. - * (p, d, q) are the non-seasonal components. - * (P, D, Q, s) are the optional seasonal components for SARIMA. - */ -export interface ARIMAOptions { - p: number; // AutoRegressive (AR) order - d: number; // Differencing (I) order - q: number; // Moving Average (MA) order - P?: number; // Seasonal AR order - D?: number; // Seasonal Differencing order - Q?: number; // Seasonal MA order - s?: number; // Seasonal period length -} - -/** - * The result object from an ARIMA forecast. - */ -export interface ARIMAForecastResult { - forecast: number[]; // The predicted future values - residuals: number[]; // The errors of the model fit on the original data - model: ARIMAOptions; // The model parameters used -} - -/** - * The result object from an STL decomposition. - */ -export interface STLDecomposition { - seasonal: number[]; // The seasonal component of the series - trend: number[]; // The trend component of the series - residual: number[]; // The remainder/residual component - original: number[]; // The original series, for comparison -} - - -/** - * A class for performing time series analysis, including identification and forecasting. - */ -export class TimeSeriesAnalyzer { - - // ======================================== - // 1. IDENTIFICATION METHODS - // ======================================== - - /** - * Calculates the difference of a time series. - * This is the 'I' (Integrated) part of ARIMA, used to make a series stationary. - * @param series The input data series. - * @param lag The lag to difference by (usually 1). - * @returns A new, differenced time series. - */ - static difference(series: number[], lag: number = 1): number[] { - if (lag < 1 || !Number.isInteger(lag)) { - throw new Error('Lag must be a positive integer.'); - } - if (series.length <= lag) { - return []; - } - - const differenced: number[] = []; - for (let i = lag; i < series.length; i++) { - differenced.push(series[i] - series[i - lag]); - } - return differenced; - } - - /** - * Helper function to calculate the autocovariance of a series at a given lag. - */ - private static autocovariance(series: number[], lag: number): number { - const n = series.length; - if (lag >= n) return 0; - const mean = series.reduce((a, b) => a + b) / n; - let sum = 0; - for (let i = lag; i < n; i++) { - sum += (series[i] - mean) * (series[i - lag] - mean); - } - return sum / n; - } - - /** - * Calculates the Autocorrelation Function (ACF) for a time series. - * ACF helps in determining the 'q' parameter for an ARIMA model. - * @param series The input data series. - * @param maxLag The maximum number of lags to calculate. - * @returns An array of correlation values from lag 1 to maxLag. - */ - static calculateACF(series: number[], maxLag: number): number[] { - if (series.length < 2) return []; - - const variance = this.autocovariance(series, 0); - if (variance === 0) { - return new Array(maxLag).fill(1); - } - - const acf: number[] = []; - for (let lag = 1; lag <= maxLag; lag++) { - acf.push(this.autocovariance(series, lag) / variance); - } - return acf; - } - - /** - * Calculates the Partial Autocorrelation Function (PACF) for a time series. - * This now uses the Durbin-Levinson algorithm for an accurate calculation. - * PACF helps in determining the 'p' parameter for an ARIMA model. - * @param series The input data series. - * @param maxLag The maximum number of lags to calculate. - * @returns An array of partial correlation values from lag 1 to maxLag. - */ - static calculatePACF(series: number[], maxLag: number): number[] { - const acf = this.calculateACF(series, maxLag); - const pacf: number[] = []; - - if (acf.length === 0) return []; - - pacf.push(acf[0]); // PACF at lag 1 is the same as ACF at lag 1 - - for (let k = 2; k <= maxLag; k++) { - let numerator = acf[k - 1]; - let denominator = 1; - - const phi = new Array(k + 1).fill(0).map(() => new Array(k + 1).fill(0)); - - for(let i=1; i<=k; i++) { - phi[i][i] = acf[i-1]; - } - - for (let j = 1; j < k; j++) { - const factor = pacf[j - 1]; - numerator -= factor * acf[k - j - 1]; - denominator -= factor * acf[j - 1]; - } - - if (Math.abs(denominator) < 1e-9) { // Avoid division by zero - pacf.push(0); - continue; - } - - const pacf_k = numerator / denominator; - pacf.push(pacf_k); - } - - return pacf; - } - - /** - * Decomposes a time series using the robust Classical Additive method. - * This version correctly isolates trend, seasonal, and residual components. - * @param series The input data series. - * @param period The seasonal period (e.g., 7 for daily data with a weekly cycle). - * @returns An object containing the seasonal, trend, and residual series. - */ - static stlDecomposition(series: number[], period: number): STLDecomposition { - if (series.length < 2 * period) { - throw new Error("Series must be at least twice the length of the seasonal period."); - } - - // Helper for a centered moving average - const movingAverage = (data: number[], window: number) => { - const result = []; - const halfWindow = Math.floor(window / 2); - for (let i = 0; i < data.length; i++) { - const start = Math.max(0, i - halfWindow); - const end = Math.min(data.length, i + halfWindow + 1); - let sum = 0; - for (let j = start; j < end; j++) { - sum += data[j]; - } - result.push(sum / (end - start)); - } - return result; - }; - - // Step 1: Calculate the trend using a centered moving average. - // If period is even, we use a 2x-MA to center it correctly. - let trend: number[]; - if (period % 2 === 0) { - const intermediate = movingAverage(series, period); - trend = movingAverage(intermediate, 2); - } else { - trend = movingAverage(series, period); - } - - // Step 2: Detrend the series - const detrended = series.map((val, i) => val - trend[i]); - - // Step 3: Calculate the seasonal component by averaging the detrended values for each period - const seasonalAverages = new Array(period).fill(0); - const seasonalCounts = new Array(period).fill(0); - for (let i = 0; i < series.length; i++) { - if (!isNaN(detrended[i])) { - const seasonIndex = i % period; - seasonalAverages[seasonIndex] += detrended[i]; - seasonalCounts[seasonIndex]++; - } - } - - for (let i = 0; i < period; i++) { - seasonalAverages[i] /= seasonalCounts[i]; - } - - // Center the seasonal component to have a mean of zero - const seasonalMean = seasonalAverages.reduce((a, b) => a + b, 0) / period; - const centeredSeasonalAverages = seasonalAverages.map(avg => avg - seasonalMean); - - const seasonal = new Array(series.length).fill(0); - for (let i = 0; i < series.length; i++) { - seasonal[i] = centeredSeasonalAverages[i % period]; - } - - // Step 4: Calculate the residual component - const residual = detrended.map((val, i) => val - seasonal[i]); - - return { - original: series, - seasonal, - trend, - residual, - }; - } - - - // ======================================== - // 2. FORECASTING METHODS - // ======================================== - - /** - * [UPGRADED] Generates a forecast using a simplified SARIMA model. - * This implementation now handles both non-seasonal (p,d,q) and seasonal (P,D,Q,s) components. - * @param series The input time series data. - * @param options The SARIMA parameters. - * @param forecastSteps The number of future steps to predict. - * @returns An object containing the forecast and model residuals. - */ - static arimaForecast(series: number[], options: ARIMAOptions, forecastSteps: number): ARIMAForecastResult { - const { p, d, q, P = 0, D = 0, Q = 0, s = 0 } = options; - - if (series.length < p + d + (P + D) * s + q + Q * s) { - throw new Error("Data series is too short for the specified SARIMA order."); - } - - const originalSeries = [...series]; - let differencedSeries = [...series]; - const diffLog: { lag: number, values: number[] }[] = []; - - // Step 1: Apply seasonal differencing 'D' times - for (let i = 0; i < D; i++) { - diffLog.push({ lag: s, values: differencedSeries.slice(-s) }); - differencedSeries = this.difference(differencedSeries, s); - } - - // Step 2: Apply non-seasonal differencing 'd' times - for (let i = 0; i < d; i++) { - diffLog.push({ lag: 1, values: differencedSeries.slice(-1) }); - differencedSeries = this.difference(differencedSeries, 1); - } - - const n = differencedSeries.length; - // Simplified coefficients - const arCoeffs = p > 0 ? new Array(p).fill(1 / p) : []; - const maCoeffs = q > 0 ? new Array(q).fill(1 / q) : []; - const sarCoeffs = P > 0 ? new Array(P).fill(1 / P) : []; - const smaCoeffs = Q > 0 ? new Array(Q).fill(1 / Q) : []; - - const residuals: number[] = new Array(n).fill(0); - const fitted: number[] = new Array(n).fill(0); - - // Step 3: Fit the model - const startIdx = Math.max(p, q, P * s, Q * s); - for (let t = startIdx; t < n; t++) { - // Non-seasonal AR - let arVal = 0; - for (let i = 0; i < p; i++) arVal += arCoeffs[i] * differencedSeries[t - 1 - i]; - - // Non-seasonal MA - let maVal = 0; - for (let i = 0; i < q; i++) maVal += maCoeffs[i] * residuals[t - 1 - i]; - - // Seasonal AR - let sarVal = 0; - for (let i = 0; i < P; i++) sarVal += sarCoeffs[i] * differencedSeries[t - s * (i + 1)]; - - // Seasonal MA - let smaVal = 0; - for (let i = 0; i < Q; i++) smaVal += smaCoeffs[i] * residuals[t - s * (i + 1)]; - - fitted[t] = arVal + maVal + sarVal + smaVal; - residuals[t] = differencedSeries[t] - fitted[t]; - } - - // Step 4: Generate the forecast - const forecastDifferenced: number[] = []; - const extendedSeries = [...differencedSeries]; - const extendedResiduals = [...residuals]; - - for (let f = 0; f < forecastSteps; f++) { - const t = n + f; - let nextForecast = 0; - - // AR - for (let i = 0; i < p; i++) nextForecast += arCoeffs[i] * extendedSeries[t - 1 - i]; - // MA (future residuals are 0) - for (let i = 0; i < q; i++) nextForecast += maCoeffs[i] * extendedResiduals[t - 1 - i]; - // SAR - for (let i = 0; i < P; i++) nextForecast += sarCoeffs[i] * extendedSeries[t - s * (i + 1)]; - // SMA - for (let i = 0; i < Q; i++) nextForecast += smaCoeffs[i] * extendedResiduals[t - s * (i + 1)]; - - forecastDifferenced.push(nextForecast); - extendedSeries.push(nextForecast); - extendedResiduals.push(0); - } - - // Step 5: Invert the differencing - let forecast = [...forecastDifferenced]; - for (let i = diffLog.length - 1; i >= 0; i--) { - const { lag, values } = diffLog[i]; - const inverted = []; - const fullHistory = [...originalSeries, ...forecast]; // Need a temporary full history for inversion - - // A simpler inversion method for forecasting - let history = [...series]; - for (const forecastVal of forecast) { - const lastSeasonalVal = history[history.length - lag]; - const invertedVal = forecastVal + lastSeasonalVal; - inverted.push(invertedVal); - history.push(invertedVal); - } - forecast = inverted; - } - - return { - forecast, - residuals, - model: options, - }; - } -} - +// timeseries.ts - A library for time series analysis, focusing on ARIMA. + +// ======================================== +// TYPE DEFINITIONS +// ======================================== + +/** + * Defines the parameters for an ARIMA model. + * (p, d, q) are the non-seasonal components. + * (P, D, Q, s) are the optional seasonal components for SARIMA. + */ +export interface ARIMAOptions { + p: number; // AutoRegressive (AR) order + d: number; // Differencing (I) order + q: number; // Moving Average (MA) order + P?: number; // Seasonal AR order + D?: number; // Seasonal Differencing order + Q?: number; // Seasonal MA order + s?: number; // Seasonal period length +} + +/** + * The result object from an ARIMA forecast. + */ +export interface ARIMAForecastResult { + forecast: number[]; // The predicted future values + residuals: number[]; // The errors of the model fit on the original data + model: ARIMAOptions; // The model parameters used +} + +/** + * The result object from an STL decomposition. + */ +export interface STLDecomposition { + seasonal: number[]; // The seasonal component of the series + trend: number[]; // The trend component of the series + residual: number[]; // The remainder/residual component + original: number[]; // The original series, for comparison +} + + +/** + * A class for performing time series analysis, including identification and forecasting. + */ +export class TimeSeriesAnalyzer { + + // ======================================== + // 1. IDENTIFICATION METHODS + // ======================================== + + /** + * Calculates the difference of a time series. + * This is the 'I' (Integrated) part of ARIMA, used to make a series stationary. + * @param series The input data series. + * @param lag The lag to difference by (usually 1). + * @returns A new, differenced time series. + */ + static difference(series: number[], lag: number = 1): number[] { + if (lag < 1 || !Number.isInteger(lag)) { + throw new Error('Lag must be a positive integer.'); + } + if (series.length <= lag) { + return []; + } + + const differenced: number[] = []; + for (let i = lag; i < series.length; i++) { + differenced.push(series[i] - series[i - lag]); + } + return differenced; + } + + /** + * Helper function to calculate the autocovariance of a series at a given lag. + */ + private static autocovariance(series: number[], lag: number): number { + const n = series.length; + if (lag >= n) return 0; + const mean = series.reduce((a, b) => a + b) / n; + let sum = 0; + for (let i = lag; i < n; i++) { + sum += (series[i] - mean) * (series[i - lag] - mean); + } + return sum / n; + } + + /** + * Calculates the Autocorrelation Function (ACF) for a time series. + * ACF helps in determining the 'q' parameter for an ARIMA model. + * @param series The input data series. + * @param maxLag The maximum number of lags to calculate. + * @returns An array of correlation values from lag 1 to maxLag. + */ + static calculateACF(series: number[], maxLag: number): number[] { + if (series.length < 2) return []; + + const variance = this.autocovariance(series, 0); + if (variance === 0) { + return new Array(maxLag).fill(1); + } + + const acf: number[] = []; + for (let lag = 1; lag <= maxLag; lag++) { + acf.push(this.autocovariance(series, lag) / variance); + } + return acf; + } + + /** + * Calculates the Partial Autocorrelation Function (PACF) for a time series. + * This now uses the Durbin-Levinson algorithm for an accurate calculation. + * PACF helps in determining the 'p' parameter for an ARIMA model. + * @param series The input data series. + * @param maxLag The maximum number of lags to calculate. + * @returns An array of partial correlation values from lag 1 to maxLag. + */ + static calculatePACF(series: number[], maxLag: number): number[] { + const acf = this.calculateACF(series, maxLag); + const pacf: number[] = []; + + if (acf.length === 0) return []; + + pacf.push(acf[0]); // PACF at lag 1 is the same as ACF at lag 1 + + for (let k = 2; k <= maxLag; k++) { + let numerator = acf[k - 1]; + let denominator = 1; + + const phi = new Array(k + 1).fill(0).map(() => new Array(k + 1).fill(0)); + + for(let i=1; i<=k; i++) { + phi[i][i] = acf[i-1]; + } + + for (let j = 1; j < k; j++) { + const factor = pacf[j - 1]; + numerator -= factor * acf[k - j - 1]; + denominator -= factor * acf[j - 1]; + } + + if (Math.abs(denominator) < 1e-9) { // Avoid division by zero + pacf.push(0); + continue; + } + + const pacf_k = numerator / denominator; + pacf.push(pacf_k); + } + + return pacf; + } + + /** + * Decomposes a time series using the robust Classical Additive method. + * This version correctly isolates trend, seasonal, and residual components. + * @param series The input data series. + * @param period The seasonal period (e.g., 7 for daily data with a weekly cycle). + * @returns An object containing the seasonal, trend, and residual series. + */ + static stlDecomposition(series: number[], period: number): STLDecomposition { + if (series.length < 2 * period) { + throw new Error("Series must be at least twice the length of the seasonal period."); + } + + // Helper for a centered moving average + const movingAverage = (data: number[], window: number) => { + const result = []; + const halfWindow = Math.floor(window / 2); + for (let i = 0; i < data.length; i++) { + const start = Math.max(0, i - halfWindow); + const end = Math.min(data.length, i + halfWindow + 1); + let sum = 0; + for (let j = start; j < end; j++) { + sum += data[j]; + } + result.push(sum / (end - start)); + } + return result; + }; + + // Step 1: Calculate the trend using a centered moving average. + // If period is even, we use a 2x-MA to center it correctly. + let trend: number[]; + if (period % 2 === 0) { + const intermediate = movingAverage(series, period); + trend = movingAverage(intermediate, 2); + } else { + trend = movingAverage(series, period); + } + + // Step 2: Detrend the series + const detrended = series.map((val, i) => val - trend[i]); + + // Step 3: Calculate the seasonal component by averaging the detrended values for each period + const seasonalAverages = new Array(period).fill(0); + const seasonalCounts = new Array(period).fill(0); + for (let i = 0; i < series.length; i++) { + if (!isNaN(detrended[i])) { + const seasonIndex = i % period; + seasonalAverages[seasonIndex] += detrended[i]; + seasonalCounts[seasonIndex]++; + } + } + + for (let i = 0; i < period; i++) { + seasonalAverages[i] /= seasonalCounts[i]; + } + + // Center the seasonal component to have a mean of zero + const seasonalMean = seasonalAverages.reduce((a, b) => a + b, 0) / period; + const centeredSeasonalAverages = seasonalAverages.map(avg => avg - seasonalMean); + + const seasonal = new Array(series.length).fill(0); + for (let i = 0; i < series.length; i++) { + seasonal[i] = centeredSeasonalAverages[i % period]; + } + + // Step 4: Calculate the residual component + const residual = detrended.map((val, i) => val - seasonal[i]); + + return { + original: series, + seasonal, + trend, + residual, + }; + } + + + // ======================================== + // 2. FORECASTING METHODS + // ======================================== + + /** + * [UPGRADED] Generates a forecast using a simplified SARIMA model. + * This implementation now handles both non-seasonal (p,d,q) and seasonal (P,D,Q,s) components. + * @param series The input time series data. + * @param options The SARIMA parameters. + * @param forecastSteps The number of future steps to predict. + * @returns An object containing the forecast and model residuals. + */ + static arimaForecast(series: number[], options: ARIMAOptions, forecastSteps: number): ARIMAForecastResult { + const { p, d, q, P = 0, D = 0, Q = 0, s = 0 } = options; + + if (series.length < p + d + (P + D) * s + q + Q * s) { + throw new Error("Data series is too short for the specified SARIMA order."); + } + + const originalSeries = [...series]; + let differencedSeries = [...series]; + const diffLog: { lag: number, values: number[] }[] = []; + + // Step 1: Apply seasonal differencing 'D' times + for (let i = 0; i < D; i++) { + diffLog.push({ lag: s, values: differencedSeries.slice(-s) }); + differencedSeries = this.difference(differencedSeries, s); + } + + // Step 2: Apply non-seasonal differencing 'd' times + for (let i = 0; i < d; i++) { + diffLog.push({ lag: 1, values: differencedSeries.slice(-1) }); + differencedSeries = this.difference(differencedSeries, 1); + } + + const n = differencedSeries.length; + // Simplified coefficients + const arCoeffs = p > 0 ? new Array(p).fill(1 / p) : []; + const maCoeffs = q > 0 ? new Array(q).fill(1 / q) : []; + const sarCoeffs = P > 0 ? new Array(P).fill(1 / P) : []; + const smaCoeffs = Q > 0 ? new Array(Q).fill(1 / Q) : []; + + const residuals: number[] = new Array(n).fill(0); + const fitted: number[] = new Array(n).fill(0); + + // Step 3: Fit the model + const startIdx = Math.max(p, q, P * s, Q * s); + for (let t = startIdx; t < n; t++) { + // Non-seasonal AR + let arVal = 0; + for (let i = 0; i < p; i++) arVal += arCoeffs[i] * differencedSeries[t - 1 - i]; + + // Non-seasonal MA + let maVal = 0; + for (let i = 0; i < q; i++) maVal += maCoeffs[i] * residuals[t - 1 - i]; + + // Seasonal AR + let sarVal = 0; + for (let i = 0; i < P; i++) sarVal += sarCoeffs[i] * differencedSeries[t - s * (i + 1)]; + + // Seasonal MA + let smaVal = 0; + for (let i = 0; i < Q; i++) smaVal += smaCoeffs[i] * residuals[t - s * (i + 1)]; + + fitted[t] = arVal + maVal + sarVal + smaVal; + residuals[t] = differencedSeries[t] - fitted[t]; + } + + // Step 4: Generate the forecast + const forecastDifferenced: number[] = []; + const extendedSeries = [...differencedSeries]; + const extendedResiduals = [...residuals]; + + for (let f = 0; f < forecastSteps; f++) { + const t = n + f; + let nextForecast = 0; + + // AR + for (let i = 0; i < p; i++) nextForecast += arCoeffs[i] * extendedSeries[t - 1 - i]; + // MA (future residuals are 0) + for (let i = 0; i < q; i++) nextForecast += maCoeffs[i] * extendedResiduals[t - 1 - i]; + // SAR + for (let i = 0; i < P; i++) nextForecast += sarCoeffs[i] * extendedSeries[t - s * (i + 1)]; + // SMA + for (let i = 0; i < Q; i++) nextForecast += smaCoeffs[i] * extendedResiduals[t - s * (i + 1)]; + + forecastDifferenced.push(nextForecast); + extendedSeries.push(nextForecast); + extendedResiduals.push(0); + } + + // Step 5: Invert the differencing + let forecast = [...forecastDifferenced]; + for (let i = diffLog.length - 1; i >= 0; i--) { + const { lag, values } = diffLog[i]; + const inverted = []; + const fullHistory = [...originalSeries, ...forecast]; // Need a temporary full history for inversion + + // A simpler inversion method for forecasting + let history = [...series]; + for (const forecastVal of forecast) { + const lastSeasonalVal = history[history.length - lag]; + const invertedVal = forecastVal + lastSeasonalVal; + inverted.push(invertedVal); + history.push(invertedVal); + } + forecast = inverted; + } + + return { + forecast, + residuals, + model: options, + }; + } +} + diff --git a/tests/analyticsEngine.test.ts b/tests/analyticsEngine.test.ts new file mode 100644 index 0000000..b8391f9 --- /dev/null +++ b/tests/analyticsEngine.test.ts @@ -0,0 +1,21 @@ +import { analytics } from '../services/analytics_engine'; + +describe('AnalyticsEngine', () => { + test('mean returns correct average', () => { + const series = { values: [1, 2, 3, 4, 5] }; + const result = analytics.mean(series); + expect(result).toBe(3); + }); + + test('max returns correct maximum', () => { + const series = { values: [1, 2, 3, 4, 5] }; + const result = analytics.max(series); + expect(result).toBe(5); + }); + + test('min returns correct minimum', () => { + const series = { values: [1, 2, 3, 4, 5] }; + const result = analytics.min(series); + expect(result).toBe(1); + }); +}); \ No newline at end of file diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..0d6c2f4 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "target": "ES2020", + "module": "commonjs", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true, + "outDir": "./dist", + "rootDir": "./" + }, + "include": ["**/*.ts"], + "exclude": ["node_modules", "dist"] +} \ No newline at end of file diff --git a/types/index.ts b/types/index.ts new file mode 100644 index 0000000..8cf56d2 --- /dev/null +++ b/types/index.ts @@ -0,0 +1,22 @@ +export interface DataSeries { + values: number[]; + labels?: string[]; +} + +export interface DataMatrix { + data: number[][]; + columns?: string[]; + rows?: string[]; +} + +export interface Condition { + field: string; + operator: '>' | '<' | '=' | '>=' | '<=' | '!='; + value: number | string; +} + +export interface ApiResponse { + success: boolean; + data?: T; + error?: string; +} \ No newline at end of file