analytics-api/services/analysis_pipelines.ts
2025-09-25 16:28:20 +09:00

133 lines
4.4 KiB
TypeScript

// analysis_pipelines.ts - High-level workflows for common analysis tasks.
import { SignalProcessor } from './signal_processing_convolution';
import { TimeSeriesAnalyzer, STLDecomposition } from './timeseries';
/**
* The comprehensive result of a denoise and detrend operation.
*/
export interface DenoiseAndDetrendResult {
original: number[];
smoothed: number[];
decomposition: STLDecomposition;
}
/**
* The result of an automatic SARIMA parameter search.
*/
export interface AutoArimaResult {
bestModel: {
p: number;
d: number;
q: number;
P: number;
D: number;
Q: number;
s: number;
aic: number;
};
searchLog: { p: number; d: number; q: number; P: number; D: number; Q: number; s: number; aic: number }[];
}
/**
* A class containing high-level analysis pipelines that combine
* functions from various processing libraries.
*/
export class AnalysisPipelines {
/**
* A full pipeline to take a raw signal, smooth it to remove noise,
* and then decompose it into trend, seasonal, and residual components.
* @param series The original time series data.
* @param period The seasonal period for STL decomposition.
* @param smoothWindow The window size for the initial smoothing (denoising) pass.
* @returns An object containing the original, smoothed, and decomposed series.
*/
static denoiseAndDetrend(series: number[], period: number, smoothWindow: number = 5): DenoiseAndDetrendResult {
// Ensure window is odd for symmetry
if (smoothWindow > 1 && smoothWindow % 2 === 0) {
smoothWindow++;
}
const smoothed = SignalProcessor.smooth(series, {
method: 'gaussian',
windowSize: smoothWindow
});
const decomposition = TimeSeriesAnalyzer.stlDecomposition(smoothed, period);
return {
original: series,
smoothed: smoothed,
decomposition: decomposition,
};
}
/**
* [FINAL CORRECTED VERSION] Performs a full grid search to find the optimal SARIMA parameters.
* This version now correctly includes 's' in the final result object.
* @param series The original time series data.
* @param seasonalPeriod The seasonal period of the data (e.g., 7 for weekly, 12 for monthly).
* @returns An object containing the best model parameters and a log of the search.
*/
static findBestArimaParameters(
series: number[],
seasonalPeriod: number,
maxD: number = 1,
maxP: number = 2,
maxQ: number = 2,
maxSeasonalD: number = 1,
maxSeasonalP: number = 2,
maxSeasonalQ: number = 2
): AutoArimaResult {
const searchLog: any[] = [];
let bestModel: any = { aic: Infinity };
const calculateAIC = (residuals: number[], numParams: number): number => {
const n = residuals.length;
if (n === 0) return Infinity;
const sse = residuals.reduce((sum, r) => sum + r * r, 0);
if (sse < 1e-9) return -Infinity; // Perfect fit
const logLikelihood = -n / 2 * (Math.log(2 * Math.PI) + Math.log(sse / n)) - n / 2;
return 2 * numParams - 2 * logLikelihood;
};
// Grid search over all parameter combinations
for (let d = 0; d <= maxD; d++) {
for (let p = 0; p <= maxP; p++) {
for (let q = 0; q <= maxQ; q++) {
for (let D = 0; D <= maxSeasonalD; D++) {
for (let P = 0; P <= maxSeasonalP; P++) {
for (let Q = 0; Q <= maxSeasonalQ; Q++) {
// Skip trivial models where nothing is done
if (p === 0 && d === 0 && q === 0 && P === 0 && D === 0 && Q === 0) continue;
const options = { p, d, q, P, D, Q, s: seasonalPeriod };
try {
const { residuals } = TimeSeriesAnalyzer.arimaForecast(series, options, 0);
const numParams = p + q + P + Q;
const aic = calculateAIC(residuals, numParams);
// Construct the full model info object, ensuring 's' is included
const modelInfo = { p, d, q, P, D, Q, s: seasonalPeriod, aic };
searchLog.push(modelInfo);
if (modelInfo.aic < bestModel.aic) {
bestModel = modelInfo;
}
} catch (error) {
// Skip invalid parameter combinations that cause errors
}
} } } } } }
if (bestModel.aic === Infinity) {
throw new Error("Could not find a suitable SARIMA model. The data may be too short or complex.");
}
// Sort the log by AIC for easier reading
searchLog.sort((a, b) => a.aic - b.aic);
return { bestModel, searchLog };
}
}