diff --git a/docs/api-reference/io-time-series-ssa-forecast.md b/docs/api-reference/io-time-series-ssa-forecast.md new file mode 100644 index 0000000000..ae510add41 --- /dev/null +++ b/docs/api-reference/io-time-series-ssa-forecast.md @@ -0,0 +1,5 @@ +### Input and Output Columns +There is only one input column. +The input column must be where a value indicates a value at a timestamp in the time series. + +It produces either just one vector of forecasted values or three vectors: a vector of forecasted values, a vector of confidence lower bounds and a vector of confidence upper bounds. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnn.cs index 9b17e86244..9fd6b849a8 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnn.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectAnomalyBySrCnn.cs @@ -40,7 +40,7 @@ public static void Example() ITransformer model = ml.Transforms.DetectAnomalyBySrCnn(outputColumnName, inputColumnName, 16, 5, 5, 3, 8, 0.35).Fit(dataView); // Create a time series prediction engine from the model. - var engine = model.CreateTimeSeriesPredictionFunction(ml); + var engine = model.CreateTimeSeriesEngine(ml); Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Data\tAlert\tScore\tMag"); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs index 8404ca2f63..0bc494790f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs @@ -57,7 +57,7 @@ public static void Example() ITransformer model = ml.Transforms.DetectChangePointBySsa(outputColumnName, inputColumnName, confidence, changeHistoryLength, TrainingSize, SeasonalitySize + 1).Fit(dataView); // Create a prediction engine from the model for feeding new data. - var engine = model.CreateTimeSeriesPredictionFunction(ml); + var engine = model.CreateTimeSeriesEngine(ml); // Start streaming new data points with no change point to the prediction engine. Console.WriteLine($"Output from ChangePoint predictions on new data:"); @@ -99,7 +99,7 @@ public static void Example() model = ml.Model.Load(file, out DataViewSchema schema); // We must create a new prediction engine from the persisted model. - engine = model.CreateTimeSeriesPredictionFunction(ml); + engine = model.CreateTimeSeriesEngine(ml); // Run predictions on the loaded model. for (int i = 0; i < 5; i++) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaStream.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaStream.cs index 0cd9b0fa77..d9a97ee5ef 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaStream.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaStream.cs @@ -57,7 +57,7 @@ public static void Example() ITransformer model = ml.Transforms.DetectChangePointBySsa(outputColumnName, inputColumnName, confidence, changeHistoryLength, TrainingSize, SeasonalitySize + 1).Fit(dataView); // Create a prediction engine from the model for feeding new data. - var engine = model.CreateTimeSeriesPredictionFunction(ml); + var engine = model.CreateTimeSeriesEngine(ml); // Start streaming new data points with no change point to the prediction engine. Console.WriteLine($"Output from ChangePoint predictions on new data:"); @@ -103,7 +103,7 @@ public static void Example() model = ml.Model.Load(stream, out DataViewSchema schema); // We must create a new prediction engine from the persisted model. - engine = model.CreateTimeSeriesPredictionFunction(ml); + engine = model.CreateTimeSeriesEngine(ml); // Run predictions on the loaded model. for (int i = 0; i < 5; i++) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs index 2890c4ec57..466080c49d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs @@ -56,7 +56,7 @@ public static void Example() ITransformer model = ml.Transforms.DetectIidChangePoint(outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView); // Create a time series prediction engine from the model. - var engine = model.CreateTimeSeriesPredictionFunction(ml); + var engine = model.CreateTimeSeriesEngine(ml); Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Data\tAlert\tScore\tP-Value\tMartingale value"); @@ -97,7 +97,7 @@ public static void Example() model = ml.Model.Load(file, out DataViewSchema schema); // Create a time series prediction engine from the checkpointed model. - engine = model.CreateTimeSeriesPredictionFunction(ml); + engine = model.CreateTimeSeriesEngine(ml); for (int index = 0; index < 8; index++) { // Anomaly change point detection. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs index 439006a7ec..1395399571 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs @@ -48,7 +48,7 @@ public static void Example() ITransformer model = ml.Transforms.DetectIidSpike(outputColumnName, inputColumnName, 95, Size).Fit(dataView); // Create a time series prediction engine from the model. - var engine = model.CreateTimeSeriesPredictionFunction(ml); + var engine = model.CreateTimeSeriesEngine(ml); Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Data\tAlert\tScore\tP-Value"); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs index 38094bb672..39198e5f8d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs @@ -54,7 +54,7 @@ public static void Example() ITransformer model = ml.Transforms.DetectSpikeBySsa(outputColumnName, inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(dataView); // Create a prediction engine from the model for feeding new data. - var engine = model.CreateTimeSeriesPredictionFunction(ml); + var engine = model.CreateTimeSeriesEngine(ml); // Start streaming new data points with no change point to the prediction engine. Console.WriteLine($"Output from spike predictions on new data:"); @@ -94,7 +94,7 @@ public static void Example() model = ml.Model.Load(file, out DataViewSchema schema); // We must create a new prediction engine from the persisted model. - engine = model.CreateTimeSeriesPredictionFunction(ml); + engine = model.CreateTimeSeriesEngine(ml); // Run predictions on the loaded model. for (int i = 0; i < 5; i++) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs index 8ea98c8d99..0be475ff58 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs @@ -1,8 +1,8 @@ using System; using System.Collections.Generic; +using System.IO; using Microsoft.ML; using Microsoft.ML.Transforms.TimeSeries; -using Microsoft.ML.TimeSeries; namespace Samples.Dynamic { @@ -16,8 +16,7 @@ public static void Example() // as well as the source of randomness. var ml = new MLContext(); - // Generate sample series data with a recurring pattern - const int SeasonalitySize = 5; + // Generate sample series data with a recurring pattern. var data = new List() { new TimeSeriesData(0), @@ -44,43 +43,58 @@ public static void Example() // Setup arguments. var inputColumnName = nameof(TimeSeriesData.Value); + var outputColumnName = nameof(ForecastResult.Forecast); // Instantiate the forecasting model. - var model = ml.Forecasting.AdaptiveSingularSpectrumSequenceModeler(inputColumnName, data.Count, SeasonalitySize + 1, SeasonalitySize, - 1, AdaptiveSingularSpectrumSequenceModeler.RankSelectionMethod.Exact, null, SeasonalitySize / 2, false, false); + var model = ml.Forecasting.ForecastBySsa(outputColumnName, inputColumnName, 5, 11, data.Count, 5); // Train. - model.Train(dataView); + var transformer = model.Fit(dataView); // Forecast next five values. - var forecast = model.Forecast(5); + var forecastEngine = transformer.CreateTimeSeriesEngine(ml); + var forecast = forecastEngine.Predict(); + Console.WriteLine($"Forecasted values:"); - Console.WriteLine("[{0}]", string.Join(", ", forecast)); + Console.WriteLine("[{0}]", string.Join(", ", forecast.Forecast)); // Forecasted values: - // [2.452744, 2.589339, 2.729183, 2.873005, 3.028931] + // [1.977226, 1.020494, 1.760543, 3.437509, 4.266461] // Update with new observations. - dataView = ml.Data.LoadFromEnumerable(new List() { new TimeSeriesData(0), new TimeSeriesData(0), new TimeSeriesData(0), new TimeSeriesData(0) }); - model.Update(dataView); + forecastEngine.Predict(new TimeSeriesData(0)); + forecastEngine.Predict(new TimeSeriesData(0)); + forecastEngine.Predict(new TimeSeriesData(0)); + forecastEngine.Predict(new TimeSeriesData(0)); // Checkpoint. - ml.Model.SaveForecastingModel(model, "model.zip"); + forecastEngine.CheckPoint(ml, "model.zip"); // Load the checkpointed model from disk. - var modelCopy = ml.Model.LoadForecastingModel("model.zip"); + // Load the model. + ITransformer modelCopy; + using (var file = File.OpenRead("model.zip")) + modelCopy = ml.Model.Load(file, out DataViewSchema schema); + + // We must create a new prediction engine from the persisted model. + var forecastEngineCopy = modelCopy.CreateTimeSeriesEngine(ml); // Forecast with the checkpointed model loaded from disk. - forecast = modelCopy.Forecast(5); - Console.WriteLine("[{0}]", string.Join(", ", forecast)); - // [0.8681176, 0.8185108, 0.8069275, 0.84405, 0.9455081] + forecast = forecastEngineCopy.Predict(); + Console.WriteLine("[{0}]", string.Join(", ", forecast.Forecast)); + // [1.791331, 1.255525, 0.3060154, -0.200446, 0.5657795] // Forecast with the original model(that was checkpointed to disk). - forecast = model.Forecast(5); - Console.WriteLine("[{0}]", string.Join(", ", forecast)); - // [0.8681176, 0.8185108, 0.8069275, 0.84405, 0.9455081] + forecast = forecastEngine.Predict(); + Console.WriteLine("[{0}]", string.Join(", ", forecast.Forecast)); + // [1.791331, 1.255525, 0.3060154, -0.200446, 0.5657795] } + class ForecastResult + { + public float[] Forecast { get; set; } + } + class TimeSeriesData { public float Value; diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs index b5c3c5c9c2..571cf9325a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs @@ -2,7 +2,7 @@ using System.Collections.Generic; using Microsoft.ML; using Microsoft.ML.Transforms.TimeSeries; -using Microsoft.ML.TimeSeries; +using System.IO; namespace Samples.Dynamic { @@ -16,8 +16,7 @@ public static void Example() // as well as the source of randomness. var ml = new MLContext(); - // Generate sample series data with a recurring pattern - const int SeasonalitySize = 5; + // Generate sample series data with a recurring pattern. var data = new List() { new TimeSeriesData(0), @@ -44,50 +43,58 @@ public static void Example() // Setup arguments. var inputColumnName = nameof(TimeSeriesData.Value); + var outputColumnName = nameof(ForecastResult.Forecast); - // Instantiate forecasting model. - var model = ml.Forecasting.AdaptiveSingularSpectrumSequenceModeler(inputColumnName, data.Count, SeasonalitySize + 1, SeasonalitySize, - 1, AdaptiveSingularSpectrumSequenceModeler.RankSelectionMethod.Exact, null, SeasonalitySize / 2, shouldComputeForecastIntervals: true, false); + // Instantiate the forecasting model. + var model = ml.Forecasting.ForecastBySsa(outputColumnName, inputColumnName, 5, 11, data.Count, 5, + confidenceLevel: 0.95f, + forcastingConfidentLowerBoundColumnName: "ConfidenceLowerBound", + forcastingConfidentUpperBoundColumnName: "ConfidenceUpperBound"); // Train. - model.Train(dataView); - - // Forecast next five values with confidence internal. - float[] forecast; - float[] confidenceIntervalLowerBounds; - float[] confidenceIntervalUpperBounds; - model.ForecastWithConfidenceIntervals(5, out forecast, out confidenceIntervalLowerBounds, out confidenceIntervalUpperBounds); - PrintForecastValuesAndIntervals(forecast, confidenceIntervalLowerBounds, confidenceIntervalUpperBounds); + var transformer = model.Fit(dataView); + + // Forecast next five values. + var forecastEngine = transformer.CreateTimeSeriesEngine(ml); + var forecast = forecastEngine.Predict(); + + PrintForecastValuesAndIntervals(forecast.Forecast, forecast.ConfidenceLowerBound, forecast.ConfidenceUpperBound); // Forecasted values: - // [2.452744, 2.589339, 2.729183, 2.873005, 3.028931] + // [1.977226, 1.020494, 1.760543, 3.437509, 4.266461] // Confidence intervals: - // [-0.2235315 - 5.12902] [-0.08777174 - 5.266451] [0.05076938 - 5.407597] [0.1925406 - 5.553469] [0.3469928 - 5.71087] + // [0.3451088 - 3.609343] [-0.7967533 - 2.83774] [-0.058467 - 3.579552] [1.61505 - 5.259968] [2.349299 - 6.183623] // Update with new observations. - dataView = ml.Data.LoadFromEnumerable(new List() { new TimeSeriesData(0), new TimeSeriesData(0), new TimeSeriesData(0), new TimeSeriesData(0) }); - model.Update(dataView); + forecastEngine.Predict(new TimeSeriesData(0)); + forecastEngine.Predict(new TimeSeriesData(0)); + forecastEngine.Predict(new TimeSeriesData(0)); + forecastEngine.Predict(new TimeSeriesData(0)); // Checkpoint. - ml.Model.SaveForecastingModel(model, "model.zip"); + forecastEngine.CheckPoint(ml, "model.zip"); // Load the checkpointed model from disk. - var modelCopy = ml.Model.LoadForecastingModel("model.zip"); + // Load the model. + ITransformer modelCopy; + using (var file = File.OpenRead("model.zip")) + modelCopy = ml.Model.Load(file, out DataViewSchema schema); + + // We must create a new prediction engine from the persisted model. + var forecastEngineCopy = modelCopy.CreateTimeSeriesEngine(ml); // Forecast with the checkpointed model loaded from disk. - modelCopy.ForecastWithConfidenceIntervals(5, out forecast, out confidenceIntervalLowerBounds, out confidenceIntervalUpperBounds); - PrintForecastValuesAndIntervals(forecast, confidenceIntervalLowerBounds, confidenceIntervalUpperBounds); - // Forecasted values: - // [0.8681176, 0.8185108, 0.8069275, 0.84405, 0.9455081] + forecast = forecastEngineCopy.Predict(); + PrintForecastValuesAndIntervals(forecast.Forecast, forecast.ConfidenceLowerBound, forecast.ConfidenceUpperBound); + // [1.791331, 1.255525, 0.3060154, -0.200446, 0.5657795] // Confidence intervals: - // [-1.808158 - 3.544394] [-1.8586 - 3.495622] [-1.871486 - 3.485341] [-1.836414 - 3.524514] [-1.736431 - 3.627447] + // [0.1592142 - 3.423448] [-0.5617217 - 3.072772] [-1.512994 - 2.125025] [-2.022905 - 1.622013] [-1.351382 - 2.482941] // Forecast with the original model(that was checkpointed to disk). - model.ForecastWithConfidenceIntervals(5, out forecast, out confidenceIntervalLowerBounds, out confidenceIntervalUpperBounds); - PrintForecastValuesAndIntervals(forecast, confidenceIntervalLowerBounds, confidenceIntervalUpperBounds); - // Forecasted values: - // [0.8681176, 0.8185108, 0.8069275, 0.84405, 0.9455081] + forecast = forecastEngine.Predict(); + PrintForecastValuesAndIntervals(forecast.Forecast, forecast.ConfidenceLowerBound, forecast.ConfidenceUpperBound); + // [1.791331, 1.255525, 0.3060154, -0.200446, 0.5657795] // Confidence intervals: - // [-1.808158 - 3.544394] [-1.8586 - 3.495622] [-1.871486 - 3.485341] [-1.836414 - 3.524514] [-1.736431 - 3.627447] + // [0.1592142 - 3.423448] [-0.5617217 - 3.072772] [-1.512994 - 2.125025] [-2.022905 - 1.622013] [-1.351382 - 2.482941] } static void PrintForecastValuesAndIntervals(float[] forecast, float[] confidenceIntervalLowerBounds, float[] confidenceIntervalUpperBounds) @@ -100,6 +107,13 @@ static void PrintForecastValuesAndIntervals(float[] forecast, float[] confidence Console.WriteLine(); } + class ForecastResult + { + public float[] Forecast { get; set; } + public float[] ConfidenceLowerBound { get; set; } + public float[] ConfidenceUpperBound { get; set; } + } + class TimeSeriesData { public float Value; diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs index eb49938927..5b318ae9e3 100644 --- a/docs/samples/Microsoft.ML.Samples/Program.cs +++ b/docs/samples/Microsoft.ML.Samples/Program.cs @@ -1,5 +1,6 @@ using System; using System.Reflection; +using Samples.Dynamic; namespace Microsoft.ML.Samples { diff --git a/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs b/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs index da14de2bcb..1986eac4bb 100644 --- a/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs +++ b/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs @@ -6,1699 +6,1566 @@ using System.Collections.Generic; using System.Numerics; using Microsoft.ML; +using Microsoft.ML.CommandLine; using Microsoft.ML.Data; using Microsoft.ML.Internal.CpuMath; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Runtime; -using Microsoft.ML.TimeSeries; using Microsoft.ML.Transforms.TimeSeries; -[assembly: LoadableClass(typeof(AdaptiveSingularSpectrumSequenceModeler.AdaptiveSingularSpectrumSequenceModelerInternal), - typeof(AdaptiveSingularSpectrumSequenceModeler.AdaptiveSingularSpectrumSequenceModelerInternal), null, typeof(SignatureLoadModel), +[assembly: LoadableClass(typeof(AdaptiveSingularSpectrumSequenceModelerInternal), + typeof(AdaptiveSingularSpectrumSequenceModelerInternal), null, typeof(SignatureLoadModel), "SSA Sequence Modeler", - AdaptiveSingularSpectrumSequenceModeler.AdaptiveSingularSpectrumSequenceModelerInternal.LoaderSignature)] - -[assembly: LoadableClass(typeof(AdaptiveSingularSpectrumSequenceModeler), - typeof(AdaptiveSingularSpectrumSequenceModeler), null, typeof(SignatureLoadModel), - "SSA Sequence Modeler Wrapper", - AdaptiveSingularSpectrumSequenceModeler.LoaderSignature)] + AdaptiveSingularSpectrumSequenceModelerInternal.LoaderSignature)] namespace Microsoft.ML.Transforms.TimeSeries { + /// + /// Ranking selection method for the signal. + /// + public enum RankSelectionMethod + { + Fixed, + Exact, + Fast + } + + /// + /// Growth ratio. Defined as Growth^(1/TimeSpan). + /// + public struct GrowthRatio + { + [Argument(ArgumentType.AtMostOnce, HelpText = "Time span of growth ratio. Must be strictly positive.", SortOrder = 1)] + public int TimeSpan; + + [Argument(ArgumentType.AtMostOnce, HelpText = "Growth. Must be non-negative.", SortOrder = 2)] + public Double Growth; + + public Double Ratio { get { return Math.Pow(Growth, 1d / TimeSpan); } } + } + /// /// This class implements basic Singular Spectrum Analysis (SSA) model for modeling univariate time-series. /// For the details of the model, refer to http://arxiv.org/pdf/1206.6910.pdf. /// - public sealed class AdaptiveSingularSpectrumSequenceModeler : ICanForecast + internal sealed class AdaptiveSingularSpectrumSequenceModelerInternal : SequenceModelerBase { - /// - /// Ranking selection method for the signal. - /// - public enum RankSelectionMethod + internal const string LoaderSignature = "SSAModel"; + + internal sealed class SsaForecastResult : ForecastResultBase { - Fixed, - Exact, - Fast + public VBuffer ForecastStandardDeviation; + public VBuffer UpperBound; + public VBuffer LowerBound; + public Single ConfidenceLevel; + + internal bool CanComputeForecastIntervals; + internal Single BoundOffset; + + public bool IsVarianceValid { get { return CanComputeForecastIntervals; } } } - /// - /// Growth ratio. Defined as Growth^(1/TimeSpan). - /// - public struct GrowthRatio + public sealed class ModelInfo { - private int _timeSpan; - private Double _growth; + /// + /// The singular values of the SSA of the input time-series + /// + public Single[] Spectrum; /// - /// Time span of growth ratio. Must be strictly positive. + /// The roots of the characteristic polynomial after stabilization (meaningful only if the model is stabilized.) /// - public int TimeSpan - { - get - { - return _timeSpan; - } - set - { - Contracts.CheckParam(value > 0, nameof(TimeSpan), "The time span must be strictly positive."); - _timeSpan = value; - } - } + public Complex[] RootsAfterStabilization; /// - /// Growth. Must be non-negative. + /// The roots of the characteristic polynomial before stabilization (meaningful only if the model is stabilized.) /// - public Double Growth - { - get - { - return _growth; - } - set - { - Contracts.CheckParam(value >= 0, nameof(Growth), "The growth must be non-negative."); - _growth = value; - } - } + public Complex[] RootsBeforeStabilization; - public GrowthRatio(int timeSpan = 1, double growth = Double.PositiveInfinity) - { - Contracts.CheckParam(timeSpan > 0, nameof(TimeSpan), "The time span must be strictly positive."); - Contracts.CheckParam(growth >= 0, nameof(Growth), "The growth must be non-negative."); + /// + /// The rank of the model + /// + public int Rank; - _growth = growth; - _timeSpan = timeSpan; - } + /// + /// The window size used to compute the SSA model + /// + public int WindowSize; - public Double Ratio { get { return Math.Pow(_growth, 1d / _timeSpan); } } - } + /// + /// The auto-regressive coefficients learned by the model + /// + public Single[] AutoRegressiveCoefficients; - private AdaptiveSingularSpectrumSequenceModelerInternal _modeler; + /// + /// The flag indicating whether the model has been trained + /// + public bool IsTrained; - private readonly string _inputColumnName; + /// + /// The flag indicating a naive model is trained instead of SSA + /// + public bool IsNaiveModelTrained; - internal const string LoaderSignature = "ForecastModel"; + /// + /// The flag indicating whether the learned model has an exponential trend (meaningful only if the model is stabilized.) + /// + public bool IsExponentialTrendPresent; - private readonly IHost _host; + /// + /// The flag indicating whether the learned model has a polynomial trend (meaningful only if the model is stabilized.) + /// + public bool IsPolynomialTrendPresent; - private static VersionInfo GetVersionInfo() - { - return new VersionInfo( - modelSignature: "SSAMODLW", - verWrittenCur: 0x00010001, // Initial - verReadableCur: 0x00010001, - verWeCanReadBack: 0x00010001, - loaderSignature: LoaderSignature, - loaderAssemblyName: typeof(AdaptiveSingularSpectrumSequenceModeler).Assembly.FullName); - } + /// + /// The flag indicating whether the learned model has been stabilized + /// + public bool IsStabilized; - internal AdaptiveSingularSpectrumSequenceModeler(IHostEnvironment env, string inputColumnName, int trainSize, int seriesLength, int windowSize, Single discountFactor = 1, - RankSelectionMethod rankSelectionMethod = RankSelectionMethod.Exact, int? rank = null, int? maxRank = null, - bool shouldComputeForecastIntervals = true, bool shouldstablize = true, bool shouldMaintainInfo = false, GrowthRatio? maxGrowth = null) - { - Contracts.CheckValue(env, nameof(env)); - _host = env.Register(LoaderSignature); - _host.CheckParam(!string.IsNullOrEmpty(inputColumnName), nameof(inputColumnName)); + /// + /// The flag indicating whether any artificial seasonality (a seasonality with period greater than the window size) is removed + /// (meaningful only if the model is stabilized.) + /// + public bool IsArtificialSeasonalityRemoved; - _inputColumnName = inputColumnName; - _modeler = new AdaptiveSingularSpectrumSequenceModelerInternal(env, trainSize, seriesLength, windowSize, discountFactor, - rankSelectionMethod, rank, maxRank, shouldComputeForecastIntervals, shouldstablize, shouldMaintainInfo, maxGrowth); + /// + /// The exponential trend magnitude (meaningful only if the model is stabilized.) + /// + public Double ExponentialTrendFactor; } - internal AdaptiveSingularSpectrumSequenceModeler(IHostEnvironment env, ModelLoadContext ctx) - { - Contracts.CheckValue(env, nameof(env)); - _host = env.Register(LoaderSignature); - _inputColumnName = ctx.Reader.ReadString(); - ctx.LoadModel(_host, out _modeler, "ForecastWrapper"); - } + private ModelInfo _info; /// - /// This class implements basic Singular Spectrum Analysis (SSA) model for modeling univariate time-series. - /// For the details of the model, refer to http://arxiv.org/pdf/1206.6910.pdf. + /// Returns the meta information about the learned model. /// - internal sealed class AdaptiveSingularSpectrumSequenceModelerInternal : SequenceModelerBase + public ModelInfo Info { - internal const string LoaderSignature = "SSAModel"; - - internal sealed class SsaForecastResult : ForecastResultBase - { - public VBuffer ForecastStandardDeviation; - public VBuffer UpperBound; - public VBuffer LowerBound; - public Single ConfidenceLevel; + get { return _info; } + } - internal bool CanComputeForecastIntervals; - internal Single BoundOffset; + private Single[] _alpha; + private Single[] _state; + private readonly FixedSizeQueue _buffer; + private CpuAlignedVector _x; + private CpuAlignedVector _xSmooth; + private int _windowSize; + private readonly int _seriesLength; + private readonly RankSelectionMethod _rankSelectionMethod; + private readonly Single _discountFactor; + private readonly int _trainSize; + private int _maxRank; + private readonly Double _maxTrendRatio; + private readonly bool _shouldStablize; + private readonly bool _shouldMaintainInfo; - public bool IsVarianceValid { get { return CanComputeForecastIntervals; } } - } + private readonly IHost _host; - public sealed class ModelInfo - { - /// - /// The singular values of the SSA of the input time-series - /// - public Single[] Spectrum; - - /// - /// The roots of the characteristic polynomial after stabilization (meaningful only if the model is stabilized.) - /// - public Complex[] RootsAfterStabilization; - - /// - /// The roots of the characteristic polynomial before stabilization (meaningful only if the model is stabilized.) - /// - public Complex[] RootsBeforeStabilization; - - /// - /// The rank of the model - /// - public int Rank; - - /// - /// The window size used to compute the SSA model - /// - public int WindowSize; - - /// - /// The auto-regressive coefficients learned by the model - /// - public Single[] AutoRegressiveCoefficients; - - /// - /// The flag indicating whether the model has been trained - /// - public bool IsTrained; - - /// - /// The flag indicating a naive model is trained instead of SSA - /// - public bool IsNaiveModelTrained; - - /// - /// The flag indicating whether the learned model has an exponential trend (meaningful only if the model is stabilized.) - /// - public bool IsExponentialTrendPresent; - - /// - /// The flag indicating whether the learned model has a polynomial trend (meaningful only if the model is stabilized.) - /// - public bool IsPolynomialTrendPresent; - - /// - /// The flag indicating whether the learned model has been stabilized - /// - public bool IsStabilized; - - /// - /// The flag indicating whether any artificial seasonality (a seasonality with period greater than the window size) is removed - /// (meaningful only if the model is stabilized.) - /// - public bool IsArtificialSeasonalityRemoved; - - /// - /// The exponential trend magnitude (meaningful only if the model is stabilized.) - /// - public Double ExponentialTrendFactor; - } + private CpuAlignedMatrixRow _wTrans; + private Single _observationNoiseVariance; + private Single _observationNoiseMean; + private Single _autoregressionNoiseVariance; + private Single _autoregressionNoiseMean; - private ModelInfo _info; + private int _rank; + private CpuAlignedVector _y; + private Single _nextPrediction; - /// - /// Returns the meta information about the learned model. - /// - public ModelInfo Info - { - get { return _info; } - } + /// + /// Determines whether the confidence interval required for forecasting. + /// + public bool ShouldComputeForecastIntervals { get; set; } - private Single[] _alpha; - private Single[] _state; - private readonly FixedSizeQueue _buffer; - private CpuAlignedVector _x; - private CpuAlignedVector _xSmooth; - private int _windowSize; - private readonly int _seriesLength; - private readonly RankSelectionMethod _rankSelectionMethod; - private readonly Single _discountFactor; - private readonly int _trainSize; - private int _maxRank; - private readonly Double _maxTrendRatio; - private readonly bool _shouldStablize; - private readonly bool _shouldMaintainInfo; - - private readonly IHost _host; - - private CpuAlignedMatrixRow _wTrans; - private Single _observationNoiseVariance; - private Single _observationNoiseMean; - private Single _autoregressionNoiseVariance; - private Single _autoregressionNoiseMean; - - private int _rank; - private CpuAlignedVector _y; - private Single _nextPrediction; + /// + /// Returns the rank of the subspace used for SSA projection (parameter r). + /// + public int Rank { get { return _rank; } } - /// - /// Determines whether the confidence interval required for forecasting. - /// - public bool ShouldComputeForecastIntervals { get; set; } + /// + /// Returns the smoothed (via SSA projection) version of the last series observation fed to the model. + /// + public Single LastSmoothedValue { get { return _state[_windowSize - 2]; } } - /// - /// Returns the rank of the subspace used for SSA projection (parameter r). - /// - public int Rank { get { return _rank; } } + /// + /// Returns the last series observation fed to the model. + /// + public Single LastValue { get { return _buffer.Count > 0 ? _buffer[_buffer.Count - 1] : Single.NaN; } } - /// - /// Returns the smoothed (via SSA projection) version of the last series observation fed to the model. - /// - public Single LastSmoothedValue { get { return _state[_windowSize - 2]; } } + private static VersionInfo GetVersionInfo() + { + return new VersionInfo( + modelSignature: "SSAMODLR", + //verWrittenCur: 0x00010001, // Initial + verWrittenCur: 0x00010002, // Added saving _state and _nextPrediction + verReadableCur: 0x00010002, + verWeCanReadBack: 0x00010001, + loaderSignature: LoaderSignature, + loaderAssemblyName: typeof(AdaptiveSingularSpectrumSequenceModelerInternal).Assembly.FullName); + } - /// - /// Returns the last series observation fed to the model. - /// - public Single LastValue { get { return _buffer.Count > 0 ? _buffer[_buffer.Count - 1] : Single.NaN; } } + private const int VersionSavingStateAndPrediction = 0x00010002; - private static VersionInfo GetVersionInfo() + /// + /// The constructor for Adaptive SSA model. + /// + /// The exception context. + /// The length of series from the begining used for training. + /// The length of series that is kept in buffer for modeling (parameter N). + /// The length of the window on the series for building the trajectory matrix (parameter L). + /// The discount factor in [0,1] used for online updates (default = 1). + /// The rank selection method (default = Exact). + /// The desired rank of the subspace used for SSA projection (parameter r). This parameter should be in the range in [1, windowSize]. + /// If set to null, the rank is automatically determined based on prediction error minimization. (default = null) + /// The maximum rank considered during the rank selection process. If not provided (i.e. set to null), it is set to windowSize - 1. + /// The flag determining whether the confidence bounds for the point forecasts should be computed. (default = true) + /// The flag determining whether the model should be stabilized. + /// The flag determining whether the meta information for the model needs to be maintained. + /// The maximum growth on the exponential trend. + public AdaptiveSingularSpectrumSequenceModelerInternal(IHostEnvironment env, int trainSize, int seriesLength, int windowSize, Single discountFactor = 1, + RankSelectionMethod rankSelectionMethod = RankSelectionMethod.Exact, int? rank = null, int? maxRank = null, + bool shouldComputeForecastIntervals = true, bool shouldstablize = true, bool shouldMaintainInfo = false, GrowthRatio? maxGrowth = null) + : base() + { + Contracts.CheckValue(env, nameof(env)); + _host = env.Register(LoaderSignature); + _host.CheckParam(windowSize >= 2, nameof(windowSize), "The window size should be at least 2."); // ...because otherwise we have nothing to autoregress on + _host.CheckParam(seriesLength > windowSize, nameof(seriesLength), "The series length should be greater than the window size."); + _host.Check(trainSize > 2 * windowSize, "The input size for training should be greater than twice the window size."); + _host.CheckParam(0 <= discountFactor && discountFactor <= 1, nameof(discountFactor), "The discount factor should be in [0,1]."); + _host.CheckParam(maxGrowth == null || maxGrowth.Value.TimeSpan > 0, nameof(GrowthRatio.TimeSpan), "The time span must be strictly positive."); + _host.CheckParam(maxGrowth == null || maxGrowth.Value.Growth >= 0, nameof(GrowthRatio.Growth), "The growth must be non-negative."); + + if (maxRank != null) { - return new VersionInfo( - modelSignature: "SSAMODLR", - //verWrittenCur: 0x00010001, // Initial - verWrittenCur: 0x00010002, // Added saving _state and _nextPrediction - verReadableCur: 0x00010002, - verWeCanReadBack: 0x00010001, - loaderSignature: LoaderSignature, - loaderAssemblyName: typeof(AdaptiveSingularSpectrumSequenceModelerInternal).Assembly.FullName); + _maxRank = (int)maxRank; + _host.CheckParam(1 <= _maxRank && _maxRank < windowSize, nameof(maxRank), + "The max rank should be in [1, windowSize)."); } + else + _maxRank = windowSize - 1; - private const int VersionSavingStateAndPrediction = 0x00010002; - - /// - /// The constructor for Adaptive SSA model. - /// - /// The exception context. - /// The length of series from the begining used for training. - /// The length of series that is kept in buffer for modeling (parameter N). - /// The length of the window on the series for building the trajectory matrix (parameter L). - /// The discount factor in [0,1] used for online updates (default = 1). - /// The rank selection method (default = Exact). - /// The desired rank of the subspace used for SSA projection (parameter r). This parameter should be in the range in [1, windowSize]. - /// If set to null, the rank is automatically determined based on prediction error minimization. (default = null) - /// The maximum rank considered during the rank selection process. If not provided (i.e. set to null), it is set to windowSize - 1. - /// The flag determining whether the confidence bounds for the point forecasts should be computed. (default = true) - /// The flag determining whether the model should be stabilized. - /// The flag determining whether the meta information for the model needs to be maintained. - /// The maximum growth on the exponential trend - public AdaptiveSingularSpectrumSequenceModelerInternal(IHostEnvironment env, int trainSize, int seriesLength, int windowSize, Single discountFactor = 1, - RankSelectionMethod rankSelectionMethod = RankSelectionMethod.Exact, int? rank = null, int? maxRank = null, - bool shouldComputeForecastIntervals = true, bool shouldstablize = true, bool shouldMaintainInfo = false, GrowthRatio? maxGrowth = null) - : base() + _rankSelectionMethod = rankSelectionMethod; + if (_rankSelectionMethod == RankSelectionMethod.Fixed) { - Contracts.CheckValue(env, nameof(env)); - _host = env.Register(LoaderSignature); - _host.CheckParam(windowSize >= 2, nameof(windowSize), "The window size should be at least 2."); // ...because otherwise we have nothing to autoregress on - _host.CheckParam(seriesLength > windowSize, nameof(seriesLength), "The series length should be greater than the window size."); - _host.Check(trainSize > 2 * windowSize, "The input series length for training should be greater than twice the window size."); - _host.CheckParam(0 <= discountFactor && discountFactor <= 1, nameof(discountFactor), "The discount factor should be in [0,1]."); - - if (maxRank != null) + if (rank != null) { - _maxRank = (int)maxRank; - _host.CheckParam(1 <= _maxRank && _maxRank < windowSize, nameof(maxRank), - "The max rank should be in [1, windowSize)."); + _rank = (int)rank; + _host.CheckParam(1 <= _rank && _rank < windowSize, nameof(rank), "The rank should be in [1, windowSize)."); } else - _maxRank = windowSize - 1; - - _rankSelectionMethod = rankSelectionMethod; - if (_rankSelectionMethod == RankSelectionMethod.Fixed) - { - if (rank != null) - { - _rank = (int)rank; - _host.CheckParam(1 <= _rank && _rank < windowSize, nameof(rank), "The rank should be in [1, windowSize)."); - } - else - _rank = _maxRank; - } + _rank = _maxRank; + } - _seriesLength = seriesLength; - _windowSize = windowSize; - _trainSize = trainSize; - _discountFactor = discountFactor; + _seriesLength = seriesLength; + _windowSize = windowSize; + _trainSize = trainSize; + _discountFactor = discountFactor; - _buffer = new FixedSizeQueue(seriesLength); + _buffer = new FixedSizeQueue(seriesLength); - _alpha = new Single[windowSize - 1]; - _state = new Single[windowSize - 1]; - _x = new CpuAlignedVector(windowSize, CpuMathUtils.GetVectorAlignment()); - _xSmooth = new CpuAlignedVector(windowSize, CpuMathUtils.GetVectorAlignment()); - ShouldComputeForecastIntervals = shouldComputeForecastIntervals; + _alpha = new Single[windowSize - 1]; + _state = new Single[windowSize - 1]; + _x = new CpuAlignedVector(windowSize, CpuMathUtils.GetVectorAlignment()); + _xSmooth = new CpuAlignedVector(windowSize, CpuMathUtils.GetVectorAlignment()); + ShouldComputeForecastIntervals = shouldComputeForecastIntervals; - _observationNoiseVariance = 0; - _autoregressionNoiseVariance = 0; - _observationNoiseMean = 0; - _autoregressionNoiseMean = 0; - _shouldStablize = shouldstablize; - _maxTrendRatio = maxGrowth == null ? Double.PositiveInfinity : ((GrowthRatio)maxGrowth).Ratio; + _observationNoiseVariance = 0; + _autoregressionNoiseVariance = 0; + _observationNoiseMean = 0; + _autoregressionNoiseMean = 0; + _shouldStablize = shouldstablize; + _maxTrendRatio = maxGrowth == null ? Double.PositiveInfinity : ((GrowthRatio)maxGrowth).Ratio; - _shouldMaintainInfo = shouldMaintainInfo; - if (_shouldMaintainInfo) - { - _info = new ModelInfo(); - _info.WindowSize = _windowSize; - } + _shouldMaintainInfo = shouldMaintainInfo; + if (_shouldMaintainInfo) + { + _info = new ModelInfo(); + _info.WindowSize = _windowSize; } + } - /// - /// The copy constructor. - /// - /// An object whose contents are copied to the current object. - private AdaptiveSingularSpectrumSequenceModelerInternal(AdaptiveSingularSpectrumSequenceModelerInternal model) + /// + /// The copy constructor. + /// + /// An object whose contents are copied to the current object. + private AdaptiveSingularSpectrumSequenceModelerInternal(AdaptiveSingularSpectrumSequenceModelerInternal model) + { + _host = model._host.Register(LoaderSignature); + _host.Assert(model._windowSize >= 2); + _host.Assert(model._seriesLength > model._windowSize); + _host.Assert(model._trainSize > 2 * model._windowSize); + _host.Assert(0 <= model._discountFactor && model._discountFactor <= 1); + _host.Assert(1 <= model._rank && model._rank < model._windowSize); + + _rank = model._rank; + _maxRank = model._maxRank; + _rankSelectionMethod = model._rankSelectionMethod; + _seriesLength = model._seriesLength; + _windowSize = model._windowSize; + _trainSize = model._trainSize; + _discountFactor = model._discountFactor; + ShouldComputeForecastIntervals = model.ShouldComputeForecastIntervals; + _observationNoiseVariance = model._observationNoiseVariance; + _autoregressionNoiseVariance = model._autoregressionNoiseVariance; + _observationNoiseMean = model._observationNoiseMean; + _autoregressionNoiseMean = model._autoregressionNoiseMean; + _nextPrediction = model._nextPrediction; + _maxTrendRatio = model._maxTrendRatio; + _shouldStablize = model._shouldStablize; + _shouldMaintainInfo = model._shouldMaintainInfo; + _info = model._info; + _buffer = model._buffer.Clone(); + _alpha = new Single[_windowSize - 1]; + Array.Copy(model._alpha, _alpha, _windowSize - 1); + _state = new Single[_windowSize - 1]; + Array.Copy(model._state, _state, _windowSize - 1); + + _x = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); + _xSmooth = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); + + if (model._wTrans != null) { - _host = model._host.Register(LoaderSignature); - _host.Assert(model._windowSize >= 2); - _host.Assert(model._seriesLength > model._windowSize); - _host.Assert(model._trainSize > 2 * model._windowSize); - _host.Assert(0 <= model._discountFactor && model._discountFactor <= 1); - _host.Assert(1 <= model._rank && model._rank < model._windowSize); - - _rank = model._rank; - _maxRank = model._maxRank; - _rankSelectionMethod = model._rankSelectionMethod; - _seriesLength = model._seriesLength; - _windowSize = model._windowSize; - _trainSize = model._trainSize; - _discountFactor = model._discountFactor; - ShouldComputeForecastIntervals = model.ShouldComputeForecastIntervals; - _observationNoiseVariance = model._observationNoiseVariance; - _autoregressionNoiseVariance = model._autoregressionNoiseVariance; - _observationNoiseMean = model._observationNoiseMean; - _autoregressionNoiseMean = model._autoregressionNoiseMean; - _nextPrediction = model._nextPrediction; - _maxTrendRatio = model._maxTrendRatio; - _shouldStablize = model._shouldStablize; - _shouldMaintainInfo = model._shouldMaintainInfo; - _info = model._info; - _buffer = model._buffer.Clone(); - _alpha = new Single[_windowSize - 1]; - Array.Copy(model._alpha, _alpha, _windowSize - 1); - _state = new Single[_windowSize - 1]; - Array.Copy(model._state, _state, _windowSize - 1); + _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); + _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); + _wTrans.CopyFrom(model._wTrans); + } + } - _x = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); - _xSmooth = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); + public AdaptiveSingularSpectrumSequenceModelerInternal(IHostEnvironment env, ModelLoadContext ctx) + { + Contracts.CheckValue(env, nameof(env)); + _host = env.Register(LoaderSignature); - if (model._wTrans != null) - { - _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); - _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); - _wTrans.CopyFrom(model._wTrans); - } + // *** Binary format *** + // int: _seriesLength + // int: _windowSize + // int: _trainSize + // int: _rank + // float: _discountFactor + // RankSelectionMethod: _rankSelectionMethod + // bool: isWeightSet + // float[]: _alpha + // float[]: _state + // bool: ShouldComputeForecastIntervals + // float: _observationNoiseVariance + // float: _autoregressionNoiseVariance + // float: _observationNoiseMean + // float: _autoregressionNoiseMean + // float: _nextPrediction + // int: _maxRank + // bool: _shouldStablize + // bool: _shouldMaintainInfo + // double: _maxTrendRatio + // float[]: _wTrans (only if _isWeightSet == true) + + _seriesLength = ctx.Reader.ReadInt32(); + // Do an early check. We'll have the stricter check later. + _host.CheckDecode(_seriesLength > 2); + + _windowSize = ctx.Reader.ReadInt32(); + _host.CheckDecode(_windowSize >= 2); + _host.CheckDecode(_seriesLength > _windowSize); + + _trainSize = ctx.Reader.ReadInt32(); + _host.CheckDecode(_trainSize > 2 * _windowSize); + + _rank = ctx.Reader.ReadInt32(); + _host.CheckDecode(1 <= _rank && _rank < _windowSize); + + _discountFactor = ctx.Reader.ReadSingle(); + _host.CheckDecode(0 <= _discountFactor && _discountFactor <= 1); + + byte temp; + temp = ctx.Reader.ReadByte(); + _rankSelectionMethod = (RankSelectionMethod)temp; + bool isWeightSet = ctx.Reader.ReadBoolByte(); + + _alpha = ctx.Reader.ReadFloatArray(); + _host.CheckDecode(Utils.Size(_alpha) == _windowSize - 1); + + if (ctx.Header.ModelVerReadable >= VersionSavingStateAndPrediction) + { + _state = ctx.Reader.ReadFloatArray(); + _host.CheckDecode(Utils.Size(_state) == _windowSize - 1); } + else + _state = new Single[_windowSize - 1]; - public AdaptiveSingularSpectrumSequenceModelerInternal(IHostEnvironment env, ModelLoadContext ctx) - { - Contracts.CheckValue(env, nameof(env)); - _host = env.Register(LoaderSignature); - - // *** Binary format *** - // int: _seriesLength - // int: _windowSize - // int: _trainSize - // int: _rank - // float: _discountFactor - // RankSelectionMethod: _rankSelectionMethod - // bool: isWeightSet - // float[]: _alpha - // float[]: _state - // bool: ShouldComputeForecastIntervals - // float: _observationNoiseVariance - // float: _autoregressionNoiseVariance - // float: _observationNoiseMean - // float: _autoregressionNoiseMean - // float: _nextPrediction - // int: _maxRank - // bool: _shouldStablize - // bool: _shouldMaintainInfo - // double: _maxTrendRatio - // float[]: _wTrans (only if _isWeightSet == true) - - _seriesLength = ctx.Reader.ReadInt32(); - // Do an early check. We'll have the stricter check later. - _host.CheckDecode(_seriesLength > 2); - - _windowSize = ctx.Reader.ReadInt32(); - _host.CheckDecode(_windowSize >= 2); - _host.CheckDecode(_seriesLength > _windowSize); - - _trainSize = ctx.Reader.ReadInt32(); - _host.CheckDecode(_trainSize > 2 * _windowSize); - - _rank = ctx.Reader.ReadInt32(); - _host.CheckDecode(1 <= _rank && _rank < _windowSize); - - _discountFactor = ctx.Reader.ReadSingle(); - _host.CheckDecode(0 <= _discountFactor && _discountFactor <= 1); - - byte temp; - temp = ctx.Reader.ReadByte(); - _rankSelectionMethod = (RankSelectionMethod)temp; - bool isWeightSet = ctx.Reader.ReadBoolByte(); - - _alpha = ctx.Reader.ReadFloatArray(); - _host.CheckDecode(Utils.Size(_alpha) == _windowSize - 1); - - if (ctx.Header.ModelVerReadable >= VersionSavingStateAndPrediction) - { - _state = ctx.Reader.ReadFloatArray(); - _host.CheckDecode(Utils.Size(_state) == _windowSize - 1); - } - else - _state = new Single[_windowSize - 1]; + ShouldComputeForecastIntervals = ctx.Reader.ReadBoolByte(); - ShouldComputeForecastIntervals = ctx.Reader.ReadBoolByte(); + _observationNoiseVariance = ctx.Reader.ReadSingle(); + _host.CheckDecode(_observationNoiseVariance >= 0); - _observationNoiseVariance = ctx.Reader.ReadSingle(); - _host.CheckDecode(_observationNoiseVariance >= 0); + _autoregressionNoiseVariance = ctx.Reader.ReadSingle(); + _host.CheckDecode(_autoregressionNoiseVariance >= 0); - _autoregressionNoiseVariance = ctx.Reader.ReadSingle(); - _host.CheckDecode(_autoregressionNoiseVariance >= 0); + _observationNoiseMean = ctx.Reader.ReadSingle(); + _autoregressionNoiseMean = ctx.Reader.ReadSingle(); + if (ctx.Header.ModelVerReadable >= VersionSavingStateAndPrediction) + _nextPrediction = ctx.Reader.ReadSingle(); - _observationNoiseMean = ctx.Reader.ReadSingle(); - _autoregressionNoiseMean = ctx.Reader.ReadSingle(); - if (ctx.Header.ModelVerReadable >= VersionSavingStateAndPrediction) - _nextPrediction = ctx.Reader.ReadSingle(); + _maxRank = ctx.Reader.ReadInt32(); + _host.CheckDecode(1 <= _maxRank && _maxRank <= _windowSize - 1); - _maxRank = ctx.Reader.ReadInt32(); - _host.CheckDecode(1 <= _maxRank && _maxRank <= _windowSize - 1); + _shouldStablize = ctx.Reader.ReadBoolByte(); + _shouldMaintainInfo = ctx.Reader.ReadBoolByte(); + if (_shouldMaintainInfo) + { + _info = new ModelInfo(); + _info.AutoRegressiveCoefficients = new Single[_windowSize - 1]; + Array.Copy(_alpha, _info.AutoRegressiveCoefficients, _windowSize - 1); - _shouldStablize = ctx.Reader.ReadBoolByte(); - _shouldMaintainInfo = ctx.Reader.ReadBoolByte(); - if (_shouldMaintainInfo) - { - _info = new ModelInfo(); - _info.AutoRegressiveCoefficients = new Single[_windowSize - 1]; - Array.Copy(_alpha, _info.AutoRegressiveCoefficients, _windowSize - 1); + _info.IsStabilized = _shouldStablize; + _info.Rank = _rank; + _info.WindowSize = _windowSize; + } - _info.IsStabilized = _shouldStablize; - _info.Rank = _rank; - _info.WindowSize = _windowSize; - } + _maxTrendRatio = ctx.Reader.ReadDouble(); + _host.CheckDecode(_maxTrendRatio >= 0); - _maxTrendRatio = ctx.Reader.ReadDouble(); - _host.CheckDecode(_maxTrendRatio >= 0); + if (isWeightSet) + { + var tempArray = ctx.Reader.ReadFloatArray(); + _host.CheckDecode(Utils.Size(tempArray) == _rank * _windowSize); + _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); + int i = 0; + _wTrans.CopyFrom(tempArray, ref i); + tempArray = ctx.Reader.ReadFloatArray(); + i = 0; + _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); + _y.CopyFrom(tempArray, ref i); + } - if (isWeightSet) - { - var tempArray = ctx.Reader.ReadFloatArray(); - _host.CheckDecode(Utils.Size(tempArray) == _rank * _windowSize); - _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); - int i = 0; - _wTrans.CopyFrom(tempArray, ref i); - tempArray = ctx.Reader.ReadFloatArray(); - i = 0; - _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); - _y.CopyFrom(tempArray, ref i); - } + _buffer = TimeSeriesUtils.DeserializeFixedSizeQueueSingle(ctx.Reader, _host); + _x = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); + _xSmooth = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); + } - _buffer = TimeSeriesUtils.DeserializeFixedSizeQueueSingle(ctx.Reader, _host); - _x = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); - _xSmooth = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); - } + private protected override void SaveModel(ModelSaveContext ctx) + { + _host.CheckValue(ctx, nameof(ctx)); + ctx.CheckAtModel(); + ctx.SetVersionInfo(GetVersionInfo()); - private protected override void SaveModel(ModelSaveContext ctx) + _host.Assert(_windowSize >= 2); + _host.Assert(_seriesLength > _windowSize); + _host.Assert(_trainSize > 2 * _windowSize); + _host.Assert(0 <= _discountFactor && _discountFactor <= 1); + _host.Assert(1 <= _rank && _rank < _windowSize); + _host.Assert(Utils.Size(_alpha) == _windowSize - 1); + _host.Assert(_observationNoiseVariance >= 0); + _host.Assert(_autoregressionNoiseVariance >= 0); + _host.Assert(1 <= _maxRank && _maxRank <= _windowSize - 1); + _host.Assert(_maxTrendRatio >= 0); + + // *** Binary format *** + // int: _seriesLength + // int: _windowSize + // int: _trainSize + // int: _rank + // float: _discountFactor + // RankSelectionMethod: _rankSelectionMethod + // bool: _isWeightSet + // float[]: _alpha + // float[]: _state + // bool: ShouldComputeForecastIntervals + // float: _observationNoiseVariance + // float: _autoregressionNoiseVariance + // float: _observationNoiseMean + // float: _autoregressionNoiseMean + // float: _nextPrediction + // int: _maxRank + // bool: _shouldStablize + // bool: _shouldMaintainInfo + // double: _maxTrendRatio + // float[]: _wTrans (only if _isWeightSet == true) + + ctx.Writer.Write(_seriesLength); + ctx.Writer.Write(_windowSize); + ctx.Writer.Write(_trainSize); + ctx.Writer.Write(_rank); + ctx.Writer.Write(_discountFactor); + ctx.Writer.Write((byte)_rankSelectionMethod); + ctx.Writer.WriteBoolByte(_wTrans != null); + ctx.Writer.WriteSingleArray(_alpha); + ctx.Writer.WriteSingleArray(_state); + ctx.Writer.WriteBoolByte(ShouldComputeForecastIntervals); + ctx.Writer.Write(_observationNoiseVariance); + ctx.Writer.Write(_autoregressionNoiseVariance); + ctx.Writer.Write(_observationNoiseMean); + ctx.Writer.Write(_autoregressionNoiseMean); + ctx.Writer.Write(_nextPrediction); + ctx.Writer.Write(_maxRank); + ctx.Writer.WriteBoolByte(_shouldStablize); + ctx.Writer.WriteBoolByte(_shouldMaintainInfo); + ctx.Writer.Write(_maxTrendRatio); + + if (_wTrans != null) { - _host.CheckValue(ctx, nameof(ctx)); - ctx.CheckAtModel(); - ctx.SetVersionInfo(GetVersionInfo()); - - _host.Assert(_windowSize >= 2); - _host.Assert(_seriesLength > _windowSize); - _host.Assert(_trainSize > 2 * _windowSize); - _host.Assert(0 <= _discountFactor && _discountFactor <= 1); - _host.Assert(1 <= _rank && _rank < _windowSize); - _host.Assert(Utils.Size(_alpha) == _windowSize - 1); - _host.Assert(_observationNoiseVariance >= 0); - _host.Assert(_autoregressionNoiseVariance >= 0); - _host.Assert(1 <= _maxRank && _maxRank <= _windowSize - 1); - _host.Assert(_maxTrendRatio >= 0); - - // *** Binary format *** - // int: _seriesLength - // int: _windowSize - // int: _trainSize - // int: _rank - // float: _discountFactor - // RankSelectionMethod: _rankSelectionMethod - // bool: _isWeightSet - // float[]: _alpha - // float[]: _state - // bool: ShouldComputeForecastIntervals - // float: _observationNoiseVariance - // float: _autoregressionNoiseVariance - // float: _observationNoiseMean - // float: _autoregressionNoiseMean - // float: _nextPrediction - // int: _maxRank - // bool: _shouldStablize - // bool: _shouldMaintainInfo - // double: _maxTrendRatio - // float[]: _wTrans (only if _isWeightSet == true) - - ctx.Writer.Write(_seriesLength); - ctx.Writer.Write(_windowSize); - ctx.Writer.Write(_trainSize); - ctx.Writer.Write(_rank); - ctx.Writer.Write(_discountFactor); - ctx.Writer.Write((byte)_rankSelectionMethod); - ctx.Writer.WriteBoolByte(_wTrans != null); - ctx.Writer.WriteSingleArray(_alpha); - ctx.Writer.WriteSingleArray(_state); - ctx.Writer.WriteBoolByte(ShouldComputeForecastIntervals); - ctx.Writer.Write(_observationNoiseVariance); - ctx.Writer.Write(_autoregressionNoiseVariance); - ctx.Writer.Write(_observationNoiseMean); - ctx.Writer.Write(_autoregressionNoiseMean); - ctx.Writer.Write(_nextPrediction); - ctx.Writer.Write(_maxRank); - ctx.Writer.WriteBoolByte(_shouldStablize); - ctx.Writer.WriteBoolByte(_shouldMaintainInfo); - ctx.Writer.Write(_maxTrendRatio); - - if (_wTrans != null) - { - // REVIEW: this may not be the most efficient way for serializing an aligned matrix. - var tempArray = new Single[_rank * _windowSize]; - int iv = 0; - _wTrans.CopyTo(tempArray, ref iv); - ctx.Writer.WriteSingleArray(tempArray); - tempArray = new float[_rank]; - iv = 0; - _y.CopyTo(tempArray, ref iv); - ctx.Writer.WriteSingleArray(tempArray); - } - - TimeSeriesUtils.SerializeFixedSizeQueue(_buffer, ctx.Writer); + // REVIEW: this may not be the most efficient way for serializing an aligned matrix. + var tempArray = new Single[_rank * _windowSize]; + int iv = 0; + _wTrans.CopyTo(tempArray, ref iv); + ctx.Writer.WriteSingleArray(tempArray); + tempArray = new float[_rank]; + iv = 0; + _y.CopyTo(tempArray, ref iv); + ctx.Writer.WriteSingleArray(tempArray); } - private static void ReconstructSignal(TrajectoryMatrix tMat, Single[] singularVectors, int rank, Single[] output) - { - Contracts.Assert(tMat != null); - Contracts.Assert(1 <= rank && rank <= tMat.WindowSize); - Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * rank); - Contracts.Assert(Utils.Size(output) >= tMat.SeriesLength); + TimeSeriesUtils.SerializeFixedSizeQueue(_buffer, ctx.Writer); + } - var k = tMat.SeriesLength - tMat.WindowSize + 1; - Contracts.Assert(k > 0); + private static void ReconstructSignal(TrajectoryMatrix tMat, Single[] singularVectors, int rank, Single[] output) + { + Contracts.Assert(tMat != null); + Contracts.Assert(1 <= rank && rank <= tMat.WindowSize); + Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * rank); + Contracts.Assert(Utils.Size(output) >= tMat.SeriesLength); - var v = new Single[k]; - int i; + var k = tMat.SeriesLength - tMat.WindowSize + 1; + Contracts.Assert(k > 0); - for (i = 0; i < tMat.SeriesLength; ++i) - output[i] = 0; + var v = new Single[k]; + int i; - for (i = 0; i < rank; ++i) - { - // Reconstructing the i-th eigen triple component and adding it to output - tMat.MultiplyTranspose(singularVectors, v, false, tMat.WindowSize * i, 0); - tMat.RankOneHankelization(singularVectors, v, 1, output, true, tMat.WindowSize * i, 0, 0); - } - } + for (i = 0; i < tMat.SeriesLength; ++i) + output[i] = 0; - private static void ReconstructSignalTailFast(Single[] series, TrajectoryMatrix tMat, Single[] singularVectors, int rank, Single[] output) + for (i = 0; i < rank; ++i) { - Contracts.Assert(tMat != null); - Contracts.Assert(1 <= rank && rank <= tMat.WindowSize); - Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * rank); + // Reconstructing the i-th eigen triple component and adding it to output + tMat.MultiplyTranspose(singularVectors, v, false, tMat.WindowSize * i, 0); + tMat.RankOneHankelization(singularVectors, v, 1, output, true, tMat.WindowSize * i, 0, 0); + } + } - int len = 2 * tMat.WindowSize - 1; - Contracts.Assert(Utils.Size(output) >= len); + private static void ReconstructSignalTailFast(Single[] series, TrajectoryMatrix tMat, Single[] singularVectors, int rank, Single[] output) + { + Contracts.Assert(tMat != null); + Contracts.Assert(1 <= rank && rank <= tMat.WindowSize); + Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * rank); - Single v; - /*var k = tMat.SeriesLength - tMat.WindowSize + 1; - Contracts.Assert(k > 0); - var v = new Single[k];*/ + int len = 2 * tMat.WindowSize - 1; + Contracts.Assert(Utils.Size(output) >= len); - Single temp; - int i; - int j; - int offset1 = tMat.SeriesLength - 2 * tMat.WindowSize + 1; - int offset2 = tMat.SeriesLength - tMat.WindowSize; + Single v; + /*var k = tMat.SeriesLength - tMat.WindowSize + 1; + Contracts.Assert(k > 0); + var v = new Single[k];*/ - for (i = 0; i < len; ++i) - output[i] = 0; + Single temp; + int i; + int j; + int offset1 = tMat.SeriesLength - 2 * tMat.WindowSize + 1; + int offset2 = tMat.SeriesLength - tMat.WindowSize; - for (i = 0; i < rank; ++i) - { - // Reconstructing the i-th eigen triple component and adding it to output - v = 0; - for (j = offset1; j < offset1 + tMat.WindowSize; ++j) - v += series[j] * singularVectors[tMat.WindowSize * i - offset1 + j]; + for (i = 0; i < len; ++i) + output[i] = 0; - for (j = 0; j < tMat.WindowSize - 1; ++j) - output[j] += v * singularVectors[tMat.WindowSize * i + j]; + for (i = 0; i < rank; ++i) + { + // Reconstructing the i-th eigen triple component and adding it to output + v = 0; + for (j = offset1; j < offset1 + tMat.WindowSize; ++j) + v += series[j] * singularVectors[tMat.WindowSize * i - offset1 + j]; - temp = v * singularVectors[tMat.WindowSize * (i + 1) - 1]; + for (j = 0; j < tMat.WindowSize - 1; ++j) + output[j] += v * singularVectors[tMat.WindowSize * i + j]; - v = 0; - for (j = offset2; j < offset2 + tMat.WindowSize; ++j) - v += series[j] * singularVectors[tMat.WindowSize * i - offset2 + j]; + temp = v * singularVectors[tMat.WindowSize * (i + 1) - 1]; - for (j = tMat.WindowSize; j < 2 * tMat.WindowSize - 1; ++j) - output[j] += v * singularVectors[tMat.WindowSize * (i - 1) + j + 1]; + v = 0; + for (j = offset2; j < offset2 + tMat.WindowSize; ++j) + v += series[j] * singularVectors[tMat.WindowSize * i - offset2 + j]; - temp += v * singularVectors[tMat.WindowSize * i]; - output[tMat.WindowSize - 1] += (temp / 2); - } + for (j = tMat.WindowSize; j < 2 * tMat.WindowSize - 1; ++j) + output[j] += v * singularVectors[tMat.WindowSize * (i - 1) + j + 1]; + + temp += v * singularVectors[tMat.WindowSize * i]; + output[tMat.WindowSize - 1] += (temp / 2); } + } - private static void ComputeNoiseMoments(Single[] series, Single[] signal, Single[] alpha, out Single observationNoiseVariance, out Single autoregressionNoiseVariance, - out Single observationNoiseMean, out Single autoregressionNoiseMean, int startIndex = 0) + private static void ComputeNoiseMoments(Single[] series, Single[] signal, Single[] alpha, out Single observationNoiseVariance, out Single autoregressionNoiseVariance, + out Single observationNoiseMean, out Single autoregressionNoiseMean, int startIndex = 0) + { + Contracts.Assert(Utils.Size(alpha) > 0); + Contracts.Assert(Utils.Size(signal) > 2 * Utils.Size(alpha)); // To assure that the autoregression noise variance is unbiased. + Contracts.Assert(Utils.Size(series) >= Utils.Size(signal) + startIndex); + + var signalLength = Utils.Size(signal); + var windowSize = Utils.Size(alpha) + 1; + var k = signalLength - windowSize + 1; + Contracts.Assert(k > 0); + + var y = new Single[k]; + int i; + + observationNoiseMean = 0; + observationNoiseVariance = 0; + autoregressionNoiseMean = 0; + autoregressionNoiseVariance = 0; + + // Computing the observation noise moments + for (i = 0; i < signalLength; ++i) + observationNoiseMean += (series[i + startIndex] - signal[i]); + observationNoiseMean /= signalLength; + + for (i = 0; i < signalLength; ++i) + observationNoiseVariance += (series[i + startIndex] - signal[i]) * (series[i + startIndex] - signal[i]); + observationNoiseVariance /= signalLength; + observationNoiseVariance -= (observationNoiseMean * observationNoiseMean); + + // Computing the auto-regression noise moments + TrajectoryMatrix xTM = new TrajectoryMatrix(null, signal, windowSize - 1, signalLength - 1); + xTM.MultiplyTranspose(alpha, y); + + for (i = 0; i < k; ++i) + autoregressionNoiseMean += (signal[windowSize - 1 + i] - y[i]); + autoregressionNoiseMean /= k; + + for (i = 0; i < k; ++i) { - Contracts.Assert(Utils.Size(alpha) > 0); - Contracts.Assert(Utils.Size(signal) > 2 * Utils.Size(alpha)); // To assure that the autoregression noise variance is unbiased. - Contracts.Assert(Utils.Size(series) >= Utils.Size(signal) + startIndex); - - var signalLength = Utils.Size(signal); - var windowSize = Utils.Size(alpha) + 1; - var k = signalLength - windowSize + 1; - Contracts.Assert(k > 0); + autoregressionNoiseVariance += (signal[windowSize - 1 + i] - y[i] - autoregressionNoiseMean) * + (signal[windowSize - 1 + i] - y[i] - autoregressionNoiseMean); + } - var y = new Single[k]; - int i; + autoregressionNoiseVariance /= (k - windowSize + 1); + Contracts.Assert(autoregressionNoiseVariance >= 0); + } - observationNoiseMean = 0; - observationNoiseVariance = 0; - autoregressionNoiseMean = 0; - autoregressionNoiseVariance = 0; + private static int DetermineSignalRank(Single[] series, TrajectoryMatrix tMat, Single[] singularVectors, Single[] singularValues, + Single[] outputSignal, int maxRank) + { + Contracts.Assert(tMat != null); + Contracts.Assert(Utils.Size(series) >= tMat.SeriesLength); + Contracts.Assert(Utils.Size(outputSignal) >= tMat.SeriesLength); + Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * tMat.WindowSize); + Contracts.Assert(Utils.Size(singularValues) >= tMat.WindowSize); + Contracts.Assert(1 <= maxRank && maxRank <= tMat.WindowSize - 1); + + var inputSeriesLength = tMat.SeriesLength; + var k = inputSeriesLength - tMat.WindowSize + 1; + Contracts.Assert(k > 0); + + var x = new Single[inputSeriesLength]; + var y = new Single[k]; + var alpha = new Single[tMat.WindowSize - 1]; + var v = new Single[k]; + + Single nu = 0; + Double minErr = Double.MaxValue; + int minIndex = maxRank - 1; + int evaluationLength = Math.Min(Math.Max(tMat.WindowSize, 200), k); + + TrajectoryMatrix xTM = new TrajectoryMatrix(null, x, tMat.WindowSize - 1, inputSeriesLength - 1); + + int i; + int j; + int n; + Single temp; + Double error; + Double sumSingularVals = 0; + Single lambda; + Single observationNoiseMean; + + FixedSizeQueue window = new FixedSizeQueue(tMat.WindowSize - 1); + + for (i = 0; i < tMat.WindowSize; ++i) + sumSingularVals += singularValues[i]; + + for (i = 0; i < maxRank; ++i) + { + // Updating the auto-regressive coefficients + lambda = singularVectors[tMat.WindowSize * i + tMat.WindowSize - 1]; + for (j = 0; j < tMat.WindowSize - 1; ++j) + alpha[j] += lambda * singularVectors[tMat.WindowSize * i + j]; - // Computing the observation noise moments - for (i = 0; i < signalLength; ++i) - observationNoiseMean += (series[i + startIndex] - signal[i]); - observationNoiseMean /= signalLength; + // Updating nu + nu += lambda * lambda; - for (i = 0; i < signalLength; ++i) - observationNoiseVariance += (series[i + startIndex] - signal[i]) * (series[i + startIndex] - signal[i]); - observationNoiseVariance /= signalLength; - observationNoiseVariance -= (observationNoiseMean * observationNoiseMean); + // Reconstructing the i-th eigen triple component and adding it to x + tMat.MultiplyTranspose(singularVectors, v, false, tMat.WindowSize * i, 0); + tMat.RankOneHankelization(singularVectors, v, 1, x, true, tMat.WindowSize * i, 0, 0); - // Computing the auto-regression noise moments - TrajectoryMatrix xTM = new TrajectoryMatrix(null, signal, windowSize - 1, signalLength - 1); - xTM.MultiplyTranspose(alpha, y); + observationNoiseMean = 0; + for (j = 0; j < inputSeriesLength; ++j) + observationNoiseMean += (series[j] - x[j]); + observationNoiseMean /= inputSeriesLength; - for (i = 0; i < k; ++i) - autoregressionNoiseMean += (signal[windowSize - 1 + i] - y[i]); - autoregressionNoiseMean /= k; + for (j = inputSeriesLength - evaluationLength - tMat.WindowSize + 1; j < inputSeriesLength - evaluationLength; ++j) + window.AddLast(x[j]); - for (i = 0; i < k; ++i) + error = 0; + for (j = inputSeriesLength - evaluationLength; j < inputSeriesLength; ++j) { - autoregressionNoiseVariance += (signal[windowSize - 1 + i] - y[i] - autoregressionNoiseMean) * - (signal[windowSize - 1 + i] - y[i] - autoregressionNoiseMean); + temp = 0; + for (n = 0; n < tMat.WindowSize - 1; ++n) + temp += alpha[n] * window[n]; + + temp /= (1 - nu); + temp += observationNoiseMean; + window.AddLast(temp); + error += Math.Abs(series[j] - temp); + if (error > minErr) + break; } - autoregressionNoiseVariance /= (k - windowSize + 1); - Contracts.Assert(autoregressionNoiseVariance >= 0); + if (error < minErr) + { + minErr = error; + minIndex = i; + Array.Copy(x, outputSignal, inputSeriesLength); + } } - private static int DetermineSignalRank(Single[] series, TrajectoryMatrix tMat, Single[] singularVectors, Single[] singularValues, - Single[] outputSignal, int maxRank) - { - Contracts.Assert(tMat != null); - Contracts.Assert(Utils.Size(series) >= tMat.SeriesLength); - Contracts.Assert(Utils.Size(outputSignal) >= tMat.SeriesLength); - Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * tMat.WindowSize); - Contracts.Assert(Utils.Size(singularValues) >= tMat.WindowSize); - Contracts.Assert(1 <= maxRank && maxRank <= tMat.WindowSize - 1); - - var inputSeriesLength = tMat.SeriesLength; - var k = inputSeriesLength - tMat.WindowSize + 1; - Contracts.Assert(k > 0); - - var x = new Single[inputSeriesLength]; - var y = new Single[k]; - var alpha = new Single[tMat.WindowSize - 1]; - var v = new Single[k]; - - Single nu = 0; - Double minErr = Double.MaxValue; - int minIndex = maxRank - 1; - int evaluationLength = Math.Min(Math.Max(tMat.WindowSize, 200), k); - - TrajectoryMatrix xTM = new TrajectoryMatrix(null, x, tMat.WindowSize - 1, inputSeriesLength - 1); - - int i; - int j; - int n; - Single temp; - Double error; - Double sumSingularVals = 0; - Single lambda; - Single observationNoiseMean; - - FixedSizeQueue window = new FixedSizeQueue(tMat.WindowSize - 1); - - for (i = 0; i < tMat.WindowSize; ++i) - sumSingularVals += singularValues[i]; - - for (i = 0; i < maxRank; ++i) - { - // Updating the auto-regressive coefficients - lambda = singularVectors[tMat.WindowSize * i + tMat.WindowSize - 1]; - for (j = 0; j < tMat.WindowSize - 1; ++j) - alpha[j] += lambda * singularVectors[tMat.WindowSize * i + j]; + return minIndex + 1; + } - // Updating nu - nu += lambda * lambda; + internal override void InitState() + { + for (int i = 0; i < _windowSize - 2; ++i) + _state[i] = 0; - // Reconstructing the i-th eigen triple component and adding it to x - tMat.MultiplyTranspose(singularVectors, v, false, tMat.WindowSize * i, 0); - tMat.RankOneHankelization(singularVectors, v, 1, x, true, tMat.WindowSize * i, 0, 0); + _buffer.Clear(); + } - observationNoiseMean = 0; - for (j = 0; j < inputSeriesLength; ++j) - observationNoiseMean += (series[j] - x[j]); - observationNoiseMean /= inputSeriesLength; + private static int DetermineSignalRankFast(Single[] series, TrajectoryMatrix tMat, Single[] singularVectors, Single[] singularValues, int maxRank) + { + Contracts.Assert(tMat != null); + Contracts.Assert(Utils.Size(series) >= tMat.SeriesLength); + Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * tMat.WindowSize); + Contracts.Assert(Utils.Size(singularValues) >= tMat.WindowSize); + Contracts.Assert(1 <= maxRank && maxRank <= tMat.WindowSize - 1); + + var inputSeriesLength = tMat.SeriesLength; + var k = inputSeriesLength - tMat.WindowSize + 1; + Contracts.Assert(k > 0); + + var x = new Single[tMat.WindowSize - 1]; + var alpha = new Single[tMat.WindowSize - 1]; + Single v; + + Single nu = 0; + Double minErr = Double.MaxValue; + int minIndex = maxRank - 1; + int evaluationLength = Math.Min(Math.Max(tMat.WindowSize, 200), k); + + int i; + int j; + int n; + int offset; + Single temp; + Double error; + Single lambda; + Single observationNoiseMean; + + FixedSizeQueue window = new FixedSizeQueue(tMat.WindowSize - 1); + + for (i = 0; i < maxRank; ++i) + { + // Updating the auto-regressive coefficients + lambda = singularVectors[tMat.WindowSize * i + tMat.WindowSize - 1]; + for (j = 0; j < tMat.WindowSize - 1; ++j) + alpha[j] += lambda * singularVectors[tMat.WindowSize * i + j]; - for (j = inputSeriesLength - evaluationLength - tMat.WindowSize + 1; j < inputSeriesLength - evaluationLength; ++j) - window.AddLast(x[j]); + // Updating nu + nu += lambda * lambda; - error = 0; - for (j = inputSeriesLength - evaluationLength; j < inputSeriesLength; ++j) - { - temp = 0; - for (n = 0; n < tMat.WindowSize - 1; ++n) - temp += alpha[n] * window[n]; - - temp /= (1 - nu); - temp += observationNoiseMean; - window.AddLast(temp); - error += Math.Abs(series[j] - temp); - if (error > minErr) - break; - } + // Reconstructing the i-th eigen triple component and adding it to x + v = 0; + offset = inputSeriesLength - evaluationLength - tMat.WindowSize + 1; - if (error < minErr) - { - minErr = error; - minIndex = i; - Array.Copy(x, outputSignal, inputSeriesLength); - } - } + for (j = offset; j <= inputSeriesLength - evaluationLength; ++j) + v += series[j] * singularVectors[tMat.WindowSize * i - offset + j]; - return minIndex + 1; - } + for (j = 0; j < tMat.WindowSize - 1; ++j) + x[j] += v * singularVectors[tMat.WindowSize * i + j]; - internal override void InitState() - { - for (int i = 0; i < _windowSize - 2; ++i) - _state[i] = 0; + // Computing the empirical observation noise mean + observationNoiseMean = 0; + for (j = offset; j < inputSeriesLength - evaluationLength; ++j) + observationNoiseMean += (series[j] - x[j - offset]); + observationNoiseMean /= (tMat.WindowSize - 1); - _buffer.Clear(); - } + for (j = 0; j < tMat.WindowSize - 1; ++j) + window.AddLast(x[j]); - private static int DetermineSignalRankFast(Single[] series, TrajectoryMatrix tMat, Single[] singularVectors, Single[] singularValues, int maxRank) - { - Contracts.Assert(tMat != null); - Contracts.Assert(Utils.Size(series) >= tMat.SeriesLength); - Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * tMat.WindowSize); - Contracts.Assert(Utils.Size(singularValues) >= tMat.WindowSize); - Contracts.Assert(1 <= maxRank && maxRank <= tMat.WindowSize - 1); - - var inputSeriesLength = tMat.SeriesLength; - var k = inputSeriesLength - tMat.WindowSize + 1; - Contracts.Assert(k > 0); - - var x = new Single[tMat.WindowSize - 1]; - var alpha = new Single[tMat.WindowSize - 1]; - Single v; - - Single nu = 0; - Double minErr = Double.MaxValue; - int minIndex = maxRank - 1; - int evaluationLength = Math.Min(Math.Max(tMat.WindowSize, 200), k); - - int i; - int j; - int n; - int offset; - Single temp; - Double error; - Single lambda; - Single observationNoiseMean; - - FixedSizeQueue window = new FixedSizeQueue(tMat.WindowSize - 1); - - for (i = 0; i < maxRank; ++i) + error = 0; + for (j = inputSeriesLength - evaluationLength; j < inputSeriesLength; ++j) { - // Updating the auto-regressive coefficients - lambda = singularVectors[tMat.WindowSize * i + tMat.WindowSize - 1]; - for (j = 0; j < tMat.WindowSize - 1; ++j) - alpha[j] += lambda * singularVectors[tMat.WindowSize * i + j]; - - // Updating nu - nu += lambda * lambda; + temp = 0; + for (n = 0; n < tMat.WindowSize - 1; ++n) + temp += alpha[n] * window[n]; + + temp /= (1 - nu); + temp += observationNoiseMean; + window.AddLast(temp); + error += Math.Abs(series[j] - temp); + if (error > minErr) + break; + } - // Reconstructing the i-th eigen triple component and adding it to x - v = 0; - offset = inputSeriesLength - evaluationLength - tMat.WindowSize + 1; + if (error < minErr) + { + minErr = error; + minIndex = i; + } + } - for (j = offset; j <= inputSeriesLength - evaluationLength; ++j) - v += series[j] * singularVectors[tMat.WindowSize * i - offset + j]; + return minIndex + 1; + } - for (j = 0; j < tMat.WindowSize - 1; ++j) - x[j] += v * singularVectors[tMat.WindowSize * i + j]; + private class SignalComponent + { + public Double Phase; + public int Index; + public int Cluster; - // Computing the empirical observation noise mean - observationNoiseMean = 0; - for (j = offset; j < inputSeriesLength - evaluationLength; ++j) - observationNoiseMean += (series[j] - x[j - offset]); - observationNoiseMean /= (tMat.WindowSize - 1); + public SignalComponent(Double phase, int index) + { + Phase = phase; + Index = index; + } + } - for (j = 0; j < tMat.WindowSize - 1; ++j) - window.AddLast(x[j]); + private bool Stabilize() + { + if (Utils.Size(_alpha) == 1) + { + if (_shouldMaintainInfo) + _info.RootsBeforeStabilization = new[] { new Complex(_alpha[0], 0) }; - error = 0; - for (j = inputSeriesLength - evaluationLength; j < inputSeriesLength; ++j) - { - temp = 0; - for (n = 0; n < tMat.WindowSize - 1; ++n) - temp += alpha[n] * window[n]; - - temp /= (1 - nu); - temp += observationNoiseMean; - window.AddLast(temp); - error += Math.Abs(series[j] - temp); - if (error > minErr) - break; - } + if (_alpha[0] > 1) + _alpha[0] = 1; + else if (_alpha[0] < -1) + _alpha[0] = -1; - if (error < minErr) - { - minErr = error; - minIndex = i; - } + if (_shouldMaintainInfo) + { + _info.IsStabilized = true; + _info.RootsAfterStabilization = new[] { new Complex(_alpha[0], 0) }; + _info.IsExponentialTrendPresent = false; + _info.IsPolynomialTrendPresent = false; + _info.ExponentialTrendFactor = Math.Abs(_alpha[0]); } + return true; + } - return minIndex + 1; + var coeff = new Double[_windowSize - 1]; + Complex[] roots = null; + bool trendFound = false; + bool polynomialTrendFound = false; + Double maxTrendMagnitude = Double.NegativeInfinity; + Double maxNonTrendMagnitude = Double.NegativeInfinity; + Double eps = 1e-9; + Double highFrequenceyBoundry = Math.PI / 2; + var sortedComponents = new List(); + var trendComponents = new List(); + int i; + + // Computing the roots of the characteristic polynomial + for (i = 0; i < _windowSize - 1; ++i) + coeff[i] = -_alpha[i]; + + if (!PolynomialUtils.FindPolynomialRoots(coeff, ref roots)) + return false; + + if (_shouldMaintainInfo) + { + _info.RootsBeforeStabilization = new Complex[_windowSize - 1]; + Array.Copy(roots, _info.RootsBeforeStabilization, _windowSize - 1); } - private class SignalComponent + // Detecting trend components + for (i = 0; i < _windowSize - 1; ++i) { - public Double Phase; - public int Index; - public int Cluster; + if (roots[i].Magnitude > 1 && (Math.Abs(roots[i].Phase) <= eps || Math.Abs(Math.Abs(roots[i].Phase) - Math.PI) <= eps)) + trendComponents.Add(i); + } - public SignalComponent(Double phase, int index) + // Removing unobserved seasonalities and consequently introducing polynomial trend + for (i = 0; i < _windowSize - 1; ++i) + { + if (roots[i].Phase != 0) { - Phase = phase; - Index = index; + if (roots[i].Magnitude >= 1 && 2 * Math.PI / Math.Abs(roots[i].Phase) > _windowSize) + { + /*if (roots[i].Real > 1) + { + polynomialTrendFound = true; + roots[i] = new Complex(Math.Max(1, roots[i].Magnitude), 0); + maxPolynomialTrendMagnitude = Math.Max(maxPolynomialTrendMagnitude, roots[i].Magnitude); + } + else + roots[i] = Complex.FromPolarCoordinates(1, 0); + //roots[i] = Complex.FromPolarCoordinates(0.99, roots[i].Phase);*/ + + /* if (_maxTrendRatio > 1) + { + roots[i] = new Complex(roots[i].Real, 0); + polynomialTrendFound = true; + } + else + roots[i] = roots[i].Imaginary > 0 ? new Complex(roots[i].Real, 0) : new Complex(1, 0);*/ + + roots[i] = new Complex(roots[i].Real, 0); + polynomialTrendFound = true; + + if (_shouldMaintainInfo) + _info.IsArtificialSeasonalityRemoved = true; + } + else if (roots[i].Magnitude > 1) + sortedComponents.Add(new SignalComponent(roots[i].Phase, i)); } } - private bool Stabilize() + if (_maxTrendRatio > 1) { - if (Utils.Size(_alpha) == 1) + // Combining the close exponential-seasonal components + if (sortedComponents.Count > 1 && polynomialTrendFound) { - if (_shouldMaintainInfo) - _info.RootsBeforeStabilization = new[] { new Complex(_alpha[0], 0) }; + sortedComponents.Sort((a, b) => a.Phase.CompareTo(b.Phase)); + var clusterNum = 0; - if (_alpha[0] > 1) - _alpha[0] = 1; - else if (_alpha[0] < -1) - _alpha[0] = -1; - - if (_shouldMaintainInfo) + for (i = 0; i < sortedComponents.Count - 1; ++i) { - _info.IsStabilized = true; - _info.RootsAfterStabilization = new[] { new Complex(_alpha[0], 0) }; - _info.IsExponentialTrendPresent = false; - _info.IsPolynomialTrendPresent = false; - _info.ExponentialTrendFactor = Math.Abs(_alpha[0]); + if ((sortedComponents[i].Phase < 0 && sortedComponents[i + 1].Phase < 0) || + (sortedComponents[i].Phase > 0 && sortedComponents[i + 1].Phase > 0)) + { + sortedComponents[i].Cluster = clusterNum; + if (Math.Abs(sortedComponents[i + 1].Phase - sortedComponents[i].Phase) > 0.05) + clusterNum++; + sortedComponents[i + 1].Cluster = clusterNum; + } + else + clusterNum++; } - return true; - } - var coeff = new Double[_windowSize - 1]; - Complex[] roots = null; - bool trendFound = false; - bool polynomialTrendFound = false; - Double maxTrendMagnitude = Double.NegativeInfinity; - Double maxNonTrendMagnitude = Double.NegativeInfinity; - Double eps = 1e-9; - Double highFrequenceyBoundry = Math.PI / 2; - var sortedComponents = new List(); - var trendComponents = new List(); - int i; - - // Computing the roots of the characteristic polynomial - for (i = 0; i < _windowSize - 1; ++i) - coeff[i] = -_alpha[i]; + int start = 0; + bool polynomialSeasonalityFound = false; + Double largestSeasonalityPhase = 0; + for (i = 1; i < sortedComponents.Count; ++i) + { + if (sortedComponents[i].Cluster != sortedComponents[i - 1].Cluster) + { + if (i - start > 1) // There are more than one point in the cluster + { + Double avgPhase = 0; + Double avgMagnitude = 0; - if (!PolynomialUtils.FindPolynomialRoots(coeff, ref roots)) - return false; - - if (_shouldMaintainInfo) - { - _info.RootsBeforeStabilization = new Complex[_windowSize - 1]; - Array.Copy(roots, _info.RootsBeforeStabilization, _windowSize - 1); - } + for (var j = start; j < i; ++j) + { + avgPhase += sortedComponents[j].Phase; + avgMagnitude += roots[sortedComponents[j].Index].Magnitude; + } + avgPhase /= (i - start); + avgMagnitude /= (i - start); - // Detecting trend components - for (i = 0; i < _windowSize - 1; ++i) - { - if (roots[i].Magnitude > 1 && (Math.Abs(roots[i].Phase) <= eps || Math.Abs(Math.Abs(roots[i].Phase) - Math.PI) <= eps)) - trendComponents.Add(i); - } + for (var j = start; j < i; ++j) + roots[sortedComponents[j].Index] = Complex.FromPolarCoordinates(avgMagnitude, + avgPhase); - // Removing unobserved seasonalities and consequently introducing polynomial trend - for (i = 0; i < _windowSize - 1; ++i) - { - if (roots[i].Phase != 0) - { - if (roots[i].Magnitude >= 1 && 2 * Math.PI / Math.Abs(roots[i].Phase) > _windowSize) - { - /*if (roots[i].Real > 1) - { - polynomialTrendFound = true; - roots[i] = new Complex(Math.Max(1, roots[i].Magnitude), 0); - maxPolynomialTrendMagnitude = Math.Max(maxPolynomialTrendMagnitude, roots[i].Magnitude); + if (!polynomialSeasonalityFound && avgPhase > 0) + { + largestSeasonalityPhase = avgPhase; + polynomialSeasonalityFound = true; + } } - else - roots[i] = Complex.FromPolarCoordinates(1, 0); - //roots[i] = Complex.FromPolarCoordinates(0.99, roots[i].Phase);*/ - - /* if (_maxTrendRatio > 1) - { - roots[i] = new Complex(roots[i].Real, 0); - polynomialTrendFound = true; - } - else - roots[i] = roots[i].Imaginary > 0 ? new Complex(roots[i].Real, 0) : new Complex(1, 0);*/ - - roots[i] = new Complex(roots[i].Real, 0); - polynomialTrendFound = true; - if (_shouldMaintainInfo) - _info.IsArtificialSeasonalityRemoved = true; + start = i; } - else if (roots[i].Magnitude > 1) - sortedComponents.Add(new SignalComponent(roots[i].Phase, i)); } } - if (_maxTrendRatio > 1) + // Combining multiple exponential trends into polynomial ones + if (!polynomialTrendFound) { - // Combining the close exponential-seasonal components - if (sortedComponents.Count > 1 && polynomialTrendFound) - { - sortedComponents.Sort((a, b) => a.Phase.CompareTo(b.Phase)); - var clusterNum = 0; + var ind1 = -1; + var ind2 = -1; - for (i = 0; i < sortedComponents.Count - 1; ++i) - { - if ((sortedComponents[i].Phase < 0 && sortedComponents[i + 1].Phase < 0) || - (sortedComponents[i].Phase > 0 && sortedComponents[i + 1].Phase > 0)) - { - sortedComponents[i].Cluster = clusterNum; - if (Math.Abs(sortedComponents[i + 1].Phase - sortedComponents[i].Phase) > 0.05) - clusterNum++; - sortedComponents[i + 1].Cluster = clusterNum; - } - else - clusterNum++; - } - - int start = 0; - bool polynomialSeasonalityFound = false; - Double largestSeasonalityPhase = 0; - for (i = 1; i < sortedComponents.Count; ++i) - { - if (sortedComponents[i].Cluster != sortedComponents[i - 1].Cluster) - { - if (i - start > 1) // There are more than one point in the cluster - { - Double avgPhase = 0; - Double avgMagnitude = 0; - - for (var j = start; j < i; ++j) - { - avgPhase += sortedComponents[j].Phase; - avgMagnitude += roots[sortedComponents[j].Index].Magnitude; - } - avgPhase /= (i - start); - avgMagnitude /= (i - start); - - for (var j = start; j < i; ++j) - roots[sortedComponents[j].Index] = Complex.FromPolarCoordinates(avgMagnitude, - avgPhase); - - if (!polynomialSeasonalityFound && avgPhase > 0) - { - largestSeasonalityPhase = avgPhase; - polynomialSeasonalityFound = true; - } - } - - start = i; - } - } - } - - // Combining multiple exponential trends into polynomial ones - if (!polynomialTrendFound) + foreach (var ind in trendComponents) { - var ind1 = -1; - var ind2 = -1; - - foreach (var ind in trendComponents) - { - if (Math.Abs(roots[ind].Phase) <= eps) - { - ind1 = ind; - break; - } - } - - for (i = 0; i < _windowSize - 1; ++i) - { - if (Math.Abs(roots[i].Phase) <= eps && 0.9 <= roots[i].Magnitude && i != ind1) - { - ind2 = i; - break; - } - } - - if (ind1 >= 0 && ind2 >= 0 && ind1 != ind2) + if (Math.Abs(roots[ind].Phase) <= eps) { - roots[ind1] = Complex.FromPolarCoordinates(1, 0); - roots[ind2] = Complex.FromPolarCoordinates(1, 0); - polynomialTrendFound = true; + ind1 = ind; + break; } } - } - if (polynomialTrendFound) // Suppress the exponential trend - { - maxTrendMagnitude = Math.Min(1, _maxTrendRatio); - foreach (var ind in trendComponents) - roots[ind] = Complex.FromPolarCoordinates(0.99, roots[ind].Phase); - } - else - { - // Spotting the exponential trend components and finding the max trend magnitude for (i = 0; i < _windowSize - 1; ++i) { - if (roots[i].Magnitude > 1 && Math.Abs(roots[i].Phase) <= eps) + if (Math.Abs(roots[i].Phase) <= eps && 0.9 <= roots[i].Magnitude && i != ind1) { - trendFound = true; - maxTrendMagnitude = Math.Max(maxTrendMagnitude, roots[i].Magnitude); + ind2 = i; + break; } - else - maxNonTrendMagnitude = Math.Max(maxNonTrendMagnitude, roots[i].Magnitude); } - if (!trendFound) - maxTrendMagnitude = 1; - - maxTrendMagnitude = Math.Min(maxTrendMagnitude, _maxTrendRatio); - } - - // Squeezing all components below the maximum trend magnitude - var smallTrendMagnitude = Math.Min(maxTrendMagnitude, (maxTrendMagnitude + 1) / 2); - for (i = 0; i < _windowSize - 1; ++i) - { - if (roots[i].Magnitude >= maxTrendMagnitude) + if (ind1 >= 0 && ind2 >= 0 && ind1 != ind2) { - if ((highFrequenceyBoundry < roots[i].Phase && roots[i].Phase < Math.PI - eps) || - (-Math.PI + eps < roots[i].Phase && roots[i].Phase < -highFrequenceyBoundry)) - roots[i] = Complex.FromPolarCoordinates(smallTrendMagnitude, roots[i].Phase); - else - roots[i] = Complex.FromPolarCoordinates(maxTrendMagnitude, roots[i].Phase); + roots[ind1] = Complex.FromPolarCoordinates(1, 0); + roots[ind2] = Complex.FromPolarCoordinates(1, 0); + polynomialTrendFound = true; } } + } - // Correcting all the other trend components + if (polynomialTrendFound) // Suppress the exponential trend + { + maxTrendMagnitude = Math.Min(1, _maxTrendRatio); + foreach (var ind in trendComponents) + roots[ind] = Complex.FromPolarCoordinates(0.99, roots[ind].Phase); + } + else + { + // Spotting the exponential trend components and finding the max trend magnitude for (i = 0; i < _windowSize - 1; ++i) { - var phase = roots[i].Phase; - if (Math.Abs(phase) <= eps) - roots[i] = new Complex(roots[i].Magnitude, 0); - else if (Math.Abs(phase - Math.PI) <= eps) - roots[i] = new Complex(-roots[i].Magnitude, 0); - else if (Math.Abs(phase + Math.PI) <= eps) - roots[i] = new Complex(-roots[i].Magnitude, 0); + if (roots[i].Magnitude > 1 && Math.Abs(roots[i].Phase) <= eps) + { + trendFound = true; + maxTrendMagnitude = Math.Max(maxTrendMagnitude, roots[i].Magnitude); + } + else + maxNonTrendMagnitude = Math.Max(maxNonTrendMagnitude, roots[i].Magnitude); } - // Computing the characteristic polynomial from the modified roots - try - { - if (!PolynomialUtils.FindPolynomialCoefficients(roots, ref coeff)) - return false; - } - catch (OverflowException) - { - return false; - } + if (!trendFound) + maxTrendMagnitude = 1; - // Updating alpha - for (i = 0; i < _windowSize - 1; ++i) - _alpha[i] = (Single)(-coeff[i]); + maxTrendMagnitude = Math.Min(maxTrendMagnitude, _maxTrendRatio); + } - if (_shouldMaintainInfo) + // Squeezing all components below the maximum trend magnitude + var smallTrendMagnitude = Math.Min(maxTrendMagnitude, (maxTrendMagnitude + 1) / 2); + for (i = 0; i < _windowSize - 1; ++i) + { + if (roots[i].Magnitude >= maxTrendMagnitude) { - _info.RootsAfterStabilization = roots; - _info.IsStabilized = true; - _info.IsPolynomialTrendPresent = polynomialTrendFound; - _info.IsExponentialTrendPresent = maxTrendMagnitude > 1; - _info.ExponentialTrendFactor = maxTrendMagnitude; + if ((highFrequenceyBoundry < roots[i].Phase && roots[i].Phase < Math.PI - eps) || + (-Math.PI + eps < roots[i].Phase && roots[i].Phase < -highFrequenceyBoundry)) + roots[i] = Complex.FromPolarCoordinates(smallTrendMagnitude, roots[i].Phase); + else + roots[i] = Complex.FromPolarCoordinates(maxTrendMagnitude, roots[i].Phase); } + } - return true; + // Correcting all the other trend components + for (i = 0; i < _windowSize - 1; ++i) + { + var phase = roots[i].Phase; + if (Math.Abs(phase) <= eps) + roots[i] = new Complex(roots[i].Magnitude, 0); + else if (Math.Abs(phase - Math.PI) <= eps) + roots[i] = new Complex(-roots[i].Magnitude, 0); + else if (Math.Abs(phase + Math.PI) <= eps) + roots[i] = new Complex(-roots[i].Magnitude, 0); } - /// - /// Feeds the next observation on the series to the model and as a result changes the state of the model. - /// - /// The next observation on the series. - /// Determines whether the model parameters also need to be updated upon consuming the new observation (default = false). - internal override void Consume(ref Single input, bool updateModel = false) + // Computing the characteristic polynomial from the modified roots + try + { + if (!PolynomialUtils.FindPolynomialCoefficients(roots, ref coeff)) + return false; + } + catch (OverflowException) { - if (Single.IsNaN(input)) - return; + return false; + } - int i; + // Updating alpha + for (i = 0; i < _windowSize - 1; ++i) + _alpha[i] = (Single)(-coeff[i]); - if (_wTrans == null) - { - _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); - _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); - Single[] vecs = new Single[_rank * _windowSize]; + if (_shouldMaintainInfo) + { + _info.RootsAfterStabilization = roots; + _info.IsStabilized = true; + _info.IsPolynomialTrendPresent = polynomialTrendFound; + _info.IsExponentialTrendPresent = maxTrendMagnitude > 1; + _info.ExponentialTrendFactor = maxTrendMagnitude; + } - for (i = 0; i < _rank; ++i) - vecs[(_windowSize + 1) * i] = 1; + return true; + } - i = 0; - _wTrans.CopyFrom(vecs, ref i); - } + /// + /// Feeds the next observation on the series to the model and as a result changes the state of the model. + /// + /// The next observation on the series. + /// Determines whether the model parameters also need to be updated upon consuming the new observation (default = false). + internal override void Consume(ref Single input, bool updateModel = false) + { + if (Single.IsNaN(input)) + return; - // Forming vector x + int i; - if (_buffer.Count == 0) - { - for (i = 0; i < _windowSize - 1; ++i) - _buffer.AddLast(_state[i]); - } + if (_wTrans == null) + { + _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); + _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); + Single[] vecs = new Single[_rank * _windowSize]; - int len = _buffer.Count; - for (i = 0; i < _windowSize - len - 1; ++i) - _x[i] = 0; - for (i = Math.Max(0, len - _windowSize + 1); i < len; ++i) - _x[i - len + _windowSize - 1] = _buffer[i]; - _x[_windowSize - 1] = input; + for (i = 0; i < _rank; ++i) + vecs[(_windowSize + 1) * i] = 1; - // Computing y: Eq. (11) in https://hal-institut-mines-telecom.archives-ouvertes.fr/hal-00479772/file/twocolumns.pdf - CpuAligenedMathUtils.MatTimesSrc(_wTrans, _x, _y); + i = 0; + _wTrans.CopyFrom(vecs, ref i); + } - // Updating the state vector - CpuAligenedMathUtils.MatTranTimesSrc(_wTrans, _y, _xSmooth); + // Forming vector x - _nextPrediction = _autoregressionNoiseMean + _observationNoiseMean; - for (i = 0; i < _windowSize - 2; ++i) - { - _state[i] = ((_windowSize - 2 - i) * _state[i + 1] + _xSmooth[i + 1]) / (_windowSize - 1 - i); - _nextPrediction += _state[i] * _alpha[i]; - } - _state[_windowSize - 2] = _xSmooth[_windowSize - 1]; - _nextPrediction += _state[_windowSize - 2] * _alpha[_windowSize - 2]; + if (_buffer.Count == 0) + { + for (i = 0; i < _windowSize - 1; ++i) + _buffer.AddLast(_state[i]); + } - if (updateModel) - { - // REVIEW: to be implemented in the next version based on the FAPI algorithm - // in https://hal-institut-mines-telecom.archives-ouvertes.fr/hal-00479772/file/twocolumns.pdf. - } + int len = _buffer.Count; + for (i = 0; i < _windowSize - len - 1; ++i) + _x[i] = 0; + for (i = Math.Max(0, len - _windowSize + 1); i < len; ++i) + _x[i - len + _windowSize - 1] = _buffer[i]; + _x[_windowSize - 1] = input; + + // Computing y: Eq. (11) in https://hal-institut-mines-telecom.archives-ouvertes.fr/hal-00479772/file/twocolumns.pdf + CpuAligenedMathUtils.MatTimesSrc(_wTrans, _x, _y); + + // Updating the state vector + CpuAligenedMathUtils.MatTranTimesSrc(_wTrans, _y, _xSmooth); - _buffer.AddLast(input); + _nextPrediction = _autoregressionNoiseMean + _observationNoiseMean; + for (i = 0; i < _windowSize - 2; ++i) + { + _state[i] = ((_windowSize - 2 - i) * _state[i + 1] + _xSmooth[i + 1]) / (_windowSize - 1 - i); + _nextPrediction += _state[i] * _alpha[i]; } + _state[_windowSize - 2] = _xSmooth[_windowSize - 1]; + _nextPrediction += _state[_windowSize - 2] * _alpha[_windowSize - 2]; - /// - /// Train the model parameters based on a training series. - /// - /// The training time-series. - internal override void Train(FixedSizeQueue data) + if (updateModel) { - _host.CheckParam(data != null, nameof(data), "The input series for training cannot be null."); - _host.CheckParam(data.Count >= _trainSize, nameof(data), "The input series for training does not have enough points for training."); + // REVIEW: to be implemented in the next version based on the FAPI algorithm + // in https://hal-institut-mines-telecom.archives-ouvertes.fr/hal-00479772/file/twocolumns.pdf. + } - Single[] dataArray = new Single[_trainSize]; + _buffer.AddLast(input); + } - int i; - int count; - for (i = 0, count = 0; count < _trainSize && i < data.Count; ++i) - if (!Single.IsNaN(data[i])) - dataArray[count++] = data[i]; + /// + /// Train the model parameters based on a training series. + /// + /// The training time-series. + internal override void Train(FixedSizeQueue data) + { + _host.CheckParam(data != null, nameof(data), "The input series for training cannot be null."); + _host.CheckParam(data.Count >= _trainSize, nameof(data), "The input series for training does not have enough points for training."); - if (_shouldMaintainInfo) - { - _info = new ModelInfo(); - _info.WindowSize = _windowSize; - } + Single[] dataArray = new Single[_trainSize]; - if (count <= 2 * _windowSize) - { -#if !TLCSSA - using (var ch = _host.Start("Train")) - ch.Warning( - "Training cannot be completed because the input series for training does not have enough points."); -#endif - } - else - { - if (count != _trainSize) - Array.Resize(ref dataArray, count); + int i; + int count; + for (i = 0, count = 0; count < _trainSize && i < data.Count; ++i) + if (!Single.IsNaN(data[i])) + dataArray[count++] = data[i]; - TrainCore(dataArray, count); - } + if (_shouldMaintainInfo) + { + _info = new ModelInfo(); + _info.WindowSize = _windowSize; } + if (count <= 2 * _windowSize) + { #if !TLCSSA - /// - /// Train the model parameters based on a training series. - /// - /// The training time-series. - internal override void Train(RoleMappedData data) + using (var ch = _host.Start("Train")) + ch.Warning( + "Training cannot be completed because the input series for training does not have enough points."); +#endif + } + else { - _host.CheckValue(data, nameof(data)); - _host.CheckParam(data.Schema.Feature.HasValue, nameof(data), "Must have features column."); - var featureCol = data.Schema.Feature.Value; - if (featureCol.Type != NumberDataViewType.Single) - throw _host.ExceptSchemaMismatch(nameof(data), "feature", featureCol.Name, "Single", featureCol.Type.ToString()); + if (count != _trainSize) + Array.Resize(ref dataArray, count); - Single[] dataArray = new Single[_trainSize]; + TrainCore(dataArray, count); + } + } - int count = 0; - using (var cursor = data.Data.GetRowCursor(featureCol)) - { - var getVal = cursor.GetGetter(featureCol); - Single val = default; - while (cursor.MoveNext() && count < _trainSize) - { - getVal(ref val); - if (!Single.IsNaN(val)) - dataArray[count++] = val; - } - } +#if !TLCSSA + /// + /// Train the model parameters based on a training series. + /// + /// The training time-series. + internal override void Train(RoleMappedData data) + { + _host.CheckValue(data, nameof(data)); + _host.CheckParam(data.Schema.Feature.HasValue, nameof(data), "Must have features column."); + var featureCol = data.Schema.Feature.Value; + if (featureCol.Type != NumberDataViewType.Single) + throw _host.ExceptSchemaMismatch(nameof(data), "feature", featureCol.Name, "Single", featureCol.Type.ToString()); - if (_shouldMaintainInfo) - { - _info = new ModelInfo(); - _info.WindowSize = _windowSize; - } + Single[] dataArray = new Single[_trainSize]; - if (count <= 2 * _windowSize) - { - using (var ch = _host.Start("Train")) - ch.Warning("Training cannot be completed because the input series for training does not have enough points."); - } - else + int count = 0; + using (var cursor = data.Data.GetRowCursor(featureCol)) + { + var getVal = cursor.GetGetter(featureCol); + Single val = default; + while (cursor.MoveNext() && count < _trainSize) { - if (count != _trainSize) - Array.Resize(ref dataArray, count); - - TrainCore(dataArray, count); + getVal(ref val); + if (!Single.IsNaN(val)) + dataArray[count++] = val; } } -#endif - private void TrainCore(Single[] dataArray, int originalSeriesLength) + if (_shouldMaintainInfo) { - _host.Assert(Utils.Size(dataArray) > 0); - Single[] singularVals; - Single[] leftSingularVecs; - var learnNaiveModel = false; + _info = new ModelInfo(); + _info.WindowSize = _windowSize; + } - var signalLength = _rankSelectionMethod == RankSelectionMethod.Exact ? originalSeriesLength : 2 * _windowSize - 1;//originalSeriesLength; - var signal = new Single[signalLength]; + if (count <= 2 * _windowSize) + { + using (var ch = _host.Start("Train")) + ch.Warning("Training cannot be completed because the input series for training does not have enough points."); + } + else + { + if (count != _trainSize) + Array.Resize(ref dataArray, count); - int i; - // Creating the trajectory matrix for the series - TrajectoryMatrix tMat = new TrajectoryMatrix(_host, dataArray, _windowSize, originalSeriesLength); + TrainCore(dataArray, count); + } + } +#endif - // Computing the SVD of the trajectory matrix - if (!tMat.ComputeSvd(out singularVals, out leftSingularVecs)) - learnNaiveModel = true; - else + private void TrainCore(Single[] dataArray, int originalSeriesLength) + { + _host.Assert(Utils.Size(dataArray) > 0); + Single[] singularVals; + Single[] leftSingularVecs; + var learnNaiveModel = false; + + var signalLength = _rankSelectionMethod == RankSelectionMethod.Exact ? originalSeriesLength : 2 * _windowSize - 1;//originalSeriesLength; + var signal = new Single[signalLength]; + + int i; + // Creating the trajectory matrix for the series + TrajectoryMatrix tMat = new TrajectoryMatrix(_host, dataArray, _windowSize, originalSeriesLength); + + // Computing the SVD of the trajectory matrix + if (!tMat.ComputeSvd(out singularVals, out leftSingularVecs)) + learnNaiveModel = true; + else + { + for (i = 0; i < _windowSize * _maxRank; ++i) { - for (i = 0; i < _windowSize * _maxRank; ++i) + if (Single.IsNaN(leftSingularVecs[i])) { - if (Single.IsNaN(leftSingularVecs[i])) - { - learnNaiveModel = true; - break; - } + learnNaiveModel = true; + break; } } + } - // Checking for standard eigenvectors, if found reduce the window size and reset training. - if (!learnNaiveModel) + // Checking for standard eigenvectors, if found reduce the window size and reset training. + if (!learnNaiveModel) + { + for (i = 0; i < _windowSize; ++i) { - for (i = 0; i < _windowSize; ++i) + var v = leftSingularVecs[(i + 1) * _windowSize - 1]; + if (v * v == 1) { - var v = leftSingularVecs[(i + 1) * _windowSize - 1]; - if (v * v == 1) + if (_windowSize > 2) { - if (_windowSize > 2) - { - _windowSize--; - _maxRank = _windowSize / 2; - _alpha = new Single[_windowSize - 1]; - _state = new Single[_windowSize - 1]; - _x = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); - _xSmooth = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); - - TrainCore(dataArray, originalSeriesLength); - return; - } - else - { - learnNaiveModel = true; - break; - } + _windowSize--; + _maxRank = _windowSize / 2; + _alpha = new Single[_windowSize - 1]; + _state = new Single[_windowSize - 1]; + _x = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); + _xSmooth = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); + + TrainCore(dataArray, originalSeriesLength); + return; + } + else + { + learnNaiveModel = true; + break; } } } + } - // Learn the naive (averaging) model in case the eigen decomposition is not possible - if (learnNaiveModel) - { + // Learn the naive (averaging) model in case the eigen decomposition is not possible + if (learnNaiveModel) + { #if !TLCSSA - using (var ch = _host.Start("Train")) - ch.Warning("The precise SSA model cannot be trained."); + using (var ch = _host.Start("Train")) + ch.Warning("The precise SSA model cannot be trained."); #endif - _rank = 1; - var temp = (Single)(1f / Math.Sqrt(_windowSize)); - for (i = 0; i < _windowSize; ++i) - leftSingularVecs[i] = temp; - } - else - { - // Computing the signal rank - if (_rankSelectionMethod == RankSelectionMethod.Exact) - _rank = DetermineSignalRank(dataArray, tMat, leftSingularVecs, singularVals, signal, _maxRank); - else if (_rankSelectionMethod == RankSelectionMethod.Fast) - _rank = DetermineSignalRankFast(dataArray, tMat, leftSingularVecs, singularVals, _maxRank); - } + _rank = 1; + var temp = (Single)(1f / Math.Sqrt(_windowSize)); + for (i = 0; i < _windowSize; ++i) + leftSingularVecs[i] = temp; + } + else + { + // Computing the signal rank + if (_rankSelectionMethod == RankSelectionMethod.Exact) + _rank = DetermineSignalRank(dataArray, tMat, leftSingularVecs, singularVals, signal, _maxRank); + else if (_rankSelectionMethod == RankSelectionMethod.Fast) + _rank = DetermineSignalRankFast(dataArray, tMat, leftSingularVecs, singularVals, _maxRank); + } - // Setting the the y vector - _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); + // Setting the the y vector + _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); - // Setting the weight matrix - _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); - i = 0; - _wTrans.CopyFrom(leftSingularVecs, ref i); + // Setting the weight matrix + _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); + i = 0; + _wTrans.CopyFrom(leftSingularVecs, ref i); - // Setting alpha - Single nu = 0; - for (i = 0; i < _rank; ++i) - { - _y[i] = leftSingularVecs[_windowSize * (i + 1) - 1]; - nu += _y[i] * _y[i]; - } + // Setting alpha + Single nu = 0; + for (i = 0; i < _rank; ++i) + { + _y[i] = leftSingularVecs[_windowSize * (i + 1) - 1]; + nu += _y[i] * _y[i]; + } - CpuAligenedMathUtils.MatTranTimesSrc(_wTrans, _y, _xSmooth); - for (i = 0; i < _windowSize - 1; ++i) - _alpha[i] = _xSmooth[i] / (1 - nu); + CpuAligenedMathUtils.MatTranTimesSrc(_wTrans, _y, _xSmooth); + for (i = 0; i < _windowSize - 1; ++i) + _alpha[i] = _xSmooth[i] / (1 - nu); - // Stabilizing the model - if (_shouldStablize && !learnNaiveModel) + // Stabilizing the model + if (_shouldStablize && !learnNaiveModel) + { + if (!Stabilize()) { - if (!Stabilize()) - { #if !TLCSSA - using (var ch = _host.Start("Train")) - ch.Warning("The trained model cannot be stablized."); + using (var ch = _host.Start("Train")) + ch.Warning("The trained model cannot be stablized."); #endif - } } + } - // Computing the noise moments - if (ShouldComputeForecastIntervals) - { - if (_rankSelectionMethod != RankSelectionMethod.Exact) - ReconstructSignalTailFast(dataArray, tMat, leftSingularVecs, _rank, signal); - - ComputeNoiseMoments(dataArray, signal, _alpha, out _observationNoiseVariance, out _autoregressionNoiseVariance, - out _observationNoiseMean, out _autoregressionNoiseMean, originalSeriesLength - signalLength); - _observationNoiseMean = 0; - _autoregressionNoiseMean = 0; - } + // Computing the noise moments + if (ShouldComputeForecastIntervals) + { + if (_rankSelectionMethod != RankSelectionMethod.Exact) + ReconstructSignalTailFast(dataArray, tMat, leftSingularVecs, _rank, signal); - // Setting the state - _nextPrediction = _autoregressionNoiseMean + _observationNoiseMean; + ComputeNoiseMoments(dataArray, signal, _alpha, out _observationNoiseVariance, out _autoregressionNoiseVariance, + out _observationNoiseMean, out _autoregressionNoiseMean, originalSeriesLength - signalLength); + _observationNoiseMean = 0; + _autoregressionNoiseMean = 0; + } - if (_buffer.Count > 0) // Use the buffer to set the state when there are data points pushed into the buffer using the Consume() method - { - int len = _buffer.Count; - for (i = 0; i < _windowSize - len; ++i) - _x[i] = 0; - for (i = Math.Max(0, len - _windowSize); i < len; ++i) - _x[i - len + _windowSize] = _buffer[i]; - } - else // use the training data points otherwise - { - for (i = originalSeriesLength - _windowSize; i < originalSeriesLength; ++i) - _x[i - originalSeriesLength + _windowSize] = dataArray[i]; - } + // Setting the state + _nextPrediction = _autoregressionNoiseMean + _observationNoiseMean; - CpuAligenedMathUtils.MatTimesSrc(_wTrans, _x, _y); - CpuAligenedMathUtils.MatTranTimesSrc(_wTrans, _y, _xSmooth); + if (_buffer.Count > 0) // Use the buffer to set the state when there are data points pushed into the buffer using the Consume() method + { + int len = _buffer.Count; + for (i = 0; i < _windowSize - len; ++i) + _x[i] = 0; + for (i = Math.Max(0, len - _windowSize); i < len; ++i) + _x[i - len + _windowSize] = _buffer[i]; + } + else // use the training data points otherwise + { + for (i = originalSeriesLength - _windowSize; i < originalSeriesLength; ++i) + _x[i - originalSeriesLength + _windowSize] = dataArray[i]; + } - for (i = 1; i < _windowSize; ++i) - { - _state[i - 1] = _xSmooth[i]; - _nextPrediction += _state[i - 1] * _alpha[i - 1]; - } + CpuAligenedMathUtils.MatTimesSrc(_wTrans, _x, _y); + CpuAligenedMathUtils.MatTranTimesSrc(_wTrans, _y, _xSmooth); - if (_shouldMaintainInfo) - { - _info.IsTrained = true; - _info.WindowSize = _windowSize; - _info.AutoRegressiveCoefficients = new Single[_windowSize - 1]; - Array.Copy(_alpha, _info.AutoRegressiveCoefficients, _windowSize - 1); - _info.Rank = _rank; - _info.IsNaiveModelTrained = learnNaiveModel; - _info.Spectrum = singularVals; - } + for (i = 1; i < _windowSize; ++i) + { + _state[i - 1] = _xSmooth[i]; + _nextPrediction += _state[i - 1] * _alpha[i - 1]; } - /// - /// Forecasts the future values of the series up to the given horizon. - /// - /// The forecast result. - /// The forecast horizon. - internal override void Forecast(ref ForecastResultBase result, int horizon = 1) + if (_shouldMaintainInfo) { - _host.CheckParam(horizon >= 1, nameof(horizon), "The horizon parameter should be greater than 0."); - if (result == null) - result = new SsaForecastResult(); - - var str = "The result argument must be of type " + typeof(SsaForecastResult).ToString(); - _host.CheckParam(result is SsaForecastResult, nameof(result), str); + _info.IsTrained = true; + _info.WindowSize = _windowSize; + _info.AutoRegressiveCoefficients = new Single[_windowSize - 1]; + Array.Copy(_alpha, _info.AutoRegressiveCoefficients, _windowSize - 1); + _info.Rank = _rank; + _info.IsNaiveModelTrained = learnNaiveModel; + _info.Spectrum = singularVals; + } + } - var output = result as SsaForecastResult; + /// + /// Forecasts the future values of the series up to the given horizon. + /// + /// The forecast result. + /// The forecast horizon. + internal override void Forecast(ref ForecastResultBase result, int horizon = 1) + { + _host.CheckParam(horizon >= 1, nameof(horizon), "The horizon parameter should be greater than 0."); + if (result == null) + result = new SsaForecastResult(); - var resEditor = VBufferEditor.Create(ref result.PointForecast, horizon); + var str = "The result argument must be of type " + typeof(SsaForecastResult).ToString(); + _host.CheckParam(result is SsaForecastResult, nameof(result), str); - int i; - int j; - int k; + var output = result as SsaForecastResult; - // Computing the point forecasts - resEditor.Values[0] = _nextPrediction; - for (i = 1; i < horizon; ++i) - { - k = 0; - resEditor.Values[i] = _autoregressionNoiseMean + _observationNoiseMean; - for (j = i; j < _windowSize - 1; ++j, ++k) - resEditor.Values[i] += _state[j] * _alpha[k]; + var resEditor = VBufferEditor.Create(ref result.PointForecast, horizon); - for (j = Math.Max(0, i - _windowSize + 1); j < i; ++j, ++k) - resEditor.Values[i] += resEditor.Values[j] * _alpha[k]; - } + int i; + int j; + int k; - // Computing the forecast variances - if (ShouldComputeForecastIntervals) - { - var sdEditor = VBufferEditor.Create(ref output.ForecastStandardDeviation, horizon); - var lastCol = new FixedSizeQueue(_windowSize - 1); + // Computing the point forecasts + resEditor.Values[0] = _nextPrediction; + for (i = 1; i < horizon; ++i) + { + k = 0; + resEditor.Values[i] = _autoregressionNoiseMean + _observationNoiseMean; + for (j = i; j < _windowSize - 1; ++j, ++k) + resEditor.Values[i] += _state[j] * _alpha[k]; - for (i = 0; i < _windowSize - 3; ++i) - lastCol.AddLast(0); - lastCol.AddLast(1); - lastCol.AddLast(_alpha[_windowSize - 2]); - sdEditor.Values[0] = _autoregressionNoiseVariance + _observationNoiseVariance; + for (j = Math.Max(0, i - _windowSize + 1); j < i; ++j, ++k) + resEditor.Values[i] += resEditor.Values[j] * _alpha[k]; + } - for (i = 1; i < horizon; ++i) - { - Single temp = 0; - for (j = 0; j < _windowSize - 1; ++j) - temp += _alpha[j] * lastCol[j]; - lastCol.AddLast(temp); + // Computing the forecast variances + if (ShouldComputeForecastIntervals) + { + var sdEditor = VBufferEditor.Create(ref output.ForecastStandardDeviation, horizon); + var lastCol = new FixedSizeQueue(_windowSize - 1); - sdEditor.Values[i] = sdEditor.Values[i - 1] + _autoregressionNoiseVariance * temp * temp; - } + for (i = 0; i < _windowSize - 3; ++i) + lastCol.AddLast(0); + lastCol.AddLast(1); + lastCol.AddLast(_alpha[_windowSize - 2]); + sdEditor.Values[0] = _autoregressionNoiseVariance + _observationNoiseVariance; - for (i = 0; i < horizon; ++i) - sdEditor.Values[i] = (float)Math.Sqrt(sdEditor.Values[i]); + for (i = 1; i < horizon; ++i) + { + Single temp = 0; + for (j = 0; j < _windowSize - 1; ++j) + temp += _alpha[j] * lastCol[j]; + lastCol.AddLast(temp); - output.ForecastStandardDeviation = sdEditor.Commit(); + sdEditor.Values[i] = sdEditor.Values[i - 1] + _autoregressionNoiseVariance * temp * temp; } - result.PointForecast = resEditor.Commit(); - output.CanComputeForecastIntervals = ShouldComputeForecastIntervals; - output.BoundOffset = 0; - } - - /// - /// Predicts the next value on the series. - /// - /// The prediction result. - internal override void PredictNext(ref Single output) - { - output = _nextPrediction; - } + for (i = 0; i < horizon; ++i) + sdEditor.Values[i] = (float)Math.Sqrt(sdEditor.Values[i]); - internal override SequenceModelerBase Clone() - { - return new AdaptiveSingularSpectrumSequenceModelerInternal(this); + output.ForecastStandardDeviation = sdEditor.Commit(); } - /// - /// Computes the forecast intervals for the input forecast object at the given confidence level. The results are stored in the forecast object. - /// - /// The input forecast object - /// The confidence level in [0, 1) - internal static void ComputeForecastIntervals(ref SsaForecastResult forecast, Single confidenceLevel = 0.95f) - { - Contracts.CheckParam(0 <= confidenceLevel && confidenceLevel < 1, nameof(confidenceLevel), "The confidence level must be in [0, 1)."); - Contracts.CheckValue(forecast, nameof(forecast)); - Contracts.Check(forecast.CanComputeForecastIntervals, "The forecast intervals cannot be computed for this forecast object."); - - var meanForecast = forecast.PointForecast.GetValues(); - var horizon = meanForecast.Length; - var sdForecast = forecast.ForecastStandardDeviation.GetValues(); - Contracts.Check(sdForecast.Length >= horizon, "The forecast standard deviation values are not available."); + result.PointForecast = resEditor.Commit(); + output.CanComputeForecastIntervals = ShouldComputeForecastIntervals; + output.BoundOffset = 0; + } - forecast.ConfidenceLevel = confidenceLevel; - if (horizon == 0) - return; + /// + /// Predicts the next value on the series. + /// + /// The prediction result. + internal override void PredictNext(ref Single output) + { + output = _nextPrediction; + } - var upper = VBufferEditor.Create(ref forecast.UpperBound, horizon); - var lower = VBufferEditor.Create(ref forecast.LowerBound, horizon); + internal override SequenceModelerBase Clone() + { + return new AdaptiveSingularSpectrumSequenceModelerInternal(this); + } - var z = ProbabilityFunctions.Probit(0.5 + confidenceLevel / 2.0); - double temp; + /// + /// Computes the forecast intervals for the input forecast object at the given confidence level. The results are stored in the forecast object. + /// + /// The input forecast object + /// The confidence level in [0, 1) + internal static void ComputeForecastIntervals(ref SsaForecastResult forecast, Single confidenceLevel = 0.95f) + { + Contracts.CheckParam(0 <= confidenceLevel && confidenceLevel < 1, nameof(confidenceLevel), "The confidence level must be in [0, 1)."); + Contracts.CheckValue(forecast, nameof(forecast)); + Contracts.Check(forecast.CanComputeForecastIntervals, "The forecast intervals cannot be computed for this forecast object."); - for (int i = 0; i < horizon; ++i) - { - temp = z * sdForecast[i]; - upper.Values[i] = (Single)(meanForecast[i] + forecast.BoundOffset + temp); - lower.Values[i] = (Single)(meanForecast[i] + forecast.BoundOffset - temp); - } + var meanForecast = forecast.PointForecast.GetValues(); + var horizon = meanForecast.Length; + var sdForecast = forecast.ForecastStandardDeviation.GetValues(); + Contracts.Check(sdForecast.Length >= horizon, "The forecast standard deviation values are not available."); - forecast.UpperBound = upper.Commit(); - forecast.LowerBound = lower.Commit(); - } + forecast.ConfidenceLevel = confidenceLevel; + if (horizon == 0) + return; - public void Train(IDataView dataView, string inputColumnName) => Train(new RoleMappedData(dataView, null, inputColumnName)); + var upper = VBufferEditor.Create(ref forecast.UpperBound, horizon); + var lower = VBufferEditor.Create(ref forecast.LowerBound, horizon); - public float[] Forecast(int horizon) - { - ForecastResultBase result = null; - Forecast(ref result, horizon); - return result.PointForecast.GetValues().ToArray(); - } + var z = ProbabilityFunctions.Probit(0.5 + confidenceLevel / 2.0); + double temp; - public void ForecastWithConfidenceIntervals(int horizon, out float[] forecast, out float[] confidenceIntervalLowerBounds, out float[] confidenceIntervalUpperBounds, float confidenceLevel = 0.95f) + for (int i = 0; i < horizon; ++i) { - ForecastResultBase result = null; - Forecast(ref result, horizon); - SsaForecastResult ssaResult = (SsaForecastResult)result; - ComputeForecastIntervals(ref ssaResult, confidenceLevel); - forecast = result.PointForecast.GetValues().ToArray(); - confidenceIntervalLowerBounds = ssaResult.LowerBound.GetValues().ToArray(); - confidenceIntervalUpperBounds = ssaResult.UpperBound.GetValues().ToArray(); + temp = z * sdForecast[i]; + upper.Values[i] = (Single)(meanForecast[i] + forecast.BoundOffset + temp); + lower.Values[i] = (Single)(meanForecast[i] + forecast.BoundOffset - temp); } - public void Update(IDataView dataView, string inputColumnName) - { - _host.CheckParam(dataView != null, nameof(dataView), "The input series for updating cannot be null."); + forecast.UpperBound = upper.Commit(); + forecast.LowerBound = lower.Commit(); + } - var data = new RoleMappedData(dataView, null, inputColumnName); - if (data.Schema.Feature.Value.Type != NumberDataViewType.Single) - throw _host.ExceptUserArg(nameof(data.Schema.Feature.Value.Name), "The time series input column has " + - "type '{0}', but must be a float.", data.Schema.Feature.Value.Type); + public void Train(IDataView dataView, string inputColumnName) => Train(new RoleMappedData(dataView, null, inputColumnName)); - var col = data.Schema.Feature.Value; - using (var cursor = data.Data.GetRowCursor(data.Data.Schema)) - { - var getVal = cursor.GetGetter(col); - Single val = default(Single); - while (cursor.MoveNext()) - { - getVal(ref val); - if (!Single.IsNaN(val)) - Consume(ref val); - } - } - } + public float[] Forecast(int horizon) + { + ForecastResultBase result = null; + Forecast(ref result, horizon); + return result.PointForecast.GetValues().ToArray(); } - /// - /// Train a forecasting model from an . - /// - /// Reference to the - public void Train(IDataView dataView) => _modeler.Train(dataView, _inputColumnName); + public void ForecastWithConfidenceIntervals(int horizon, out float[] forecast, out float[] confidenceIntervalLowerBounds, out float[] confidenceIntervalUpperBounds, float confidenceLevel = 0.95f) + { + ForecastResultBase result = null; + Forecast(ref result, horizon); + SsaForecastResult ssaResult = (SsaForecastResult)result; + ComputeForecastIntervals(ref ssaResult, confidenceLevel); + forecast = result.PointForecast.GetValues().ToArray(); + confidenceIntervalLowerBounds = ssaResult.LowerBound.GetValues().ToArray(); + confidenceIntervalUpperBounds = ssaResult.UpperBound.GetValues().ToArray(); + } - /// - /// Update a forecasting model with the new observations in the form of an . - /// - /// Reference to the observations as an - /// Name of the input column to update from. If null then input column name specified at model initiation is taken as default. - public void Update(IDataView dataView, string inputColumnName = null) => _modeler.Update(dataView, inputColumnName ?? _inputColumnName); + public void Update(IDataView dataView, string inputColumnName) + { + _host.CheckParam(dataView != null, nameof(dataView), "The input series for updating cannot be null."); - /// - /// Perform forecasting until a particular . - /// - /// Number of values to forecast. - /// Forecasted values. - public float[] Forecast(int horizon) => _modeler.Forecast(horizon); + var data = new RoleMappedData(dataView, null, inputColumnName); + if (data.Schema.Feature.Value.Type != NumberDataViewType.Single) + throw _host.ExceptUserArg(nameof(data.Schema.Feature.Value.Name), "The time series input column has " + + "type '{0}', but must be a float.", data.Schema.Feature.Value.Type); - /// - /// For saving a model into a repository. - /// - public void Save(ModelSaveContext ctx) - { - _host.CheckValue(ctx, nameof(ctx)); - ctx.CheckAtModel(); - ctx.SetVersionInfo(GetVersionInfo()); - ctx.Writer.Write(_inputColumnName); - ctx.SaveModel(_modeler, "ForecastWrapper"); + var col = data.Schema.Feature.Value; + using (var cursor = data.Data.GetRowCursor(data.Data.Schema)) + { + var getVal = cursor.GetGetter(col); + Single val = default(Single); + while (cursor.MoveNext()) + { + getVal(ref val); + if (!Single.IsNaN(val)) + Consume(ref val); + } + } } - - /// - /// Perform forecasting until a particular and also computes confidence intervals. - /// For confidence intervals to be computed the model must be trained with - /// set to true. - /// - /// Number of values to forecast. - /// Forecasted values - /// Lower bound confidence intervals of forecasted values. - /// Upper bound confidence intervals of forecasted values. - /// Forecast confidence level. - public void ForecastWithConfidenceIntervals(int horizon, out float[] forecast, out float[] confidenceIntervalLowerBounds, out float[] confidenceIntervalUpperBounds, float confidenceLevel = 0.95f) => - _modeler.ForecastWithConfidenceIntervals(horizon, out forecast, out confidenceIntervalLowerBounds, out confidenceIntervalUpperBounds, confidenceLevel); } } diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index e36633d7b5..bfec448467 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -2,10 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; using Microsoft.ML.Data; using Microsoft.ML.Transforms.TimeSeries; -using static Microsoft.ML.Transforms.TimeSeries.AdaptiveSingularSpectrumSequenceModeler; namespace Microsoft.ML { @@ -149,23 +147,30 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog => new SrCnnAnomalyEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, windowSize, backAddWindowSize, lookaheadWindowSize, averageingWindowSize, judgementWindowSize, threshold, inputColumnName); /// - /// Singular Spectrum Analysis (SSA) model for modeling univariate time-series. + /// Singular Spectrum Analysis (SSA) model for univariate time-series forecasting. /// For the details of the model, refer to http://arxiv.org/pdf/1206.6910.pdf. /// /// Catalog. - /// The name of the column on which forecasting needs to be performed. + /// Name of the column resulting from the transformation of . + /// Name of column to transform. If set to , the value of the will be used as source. + /// The vector contains Alert, Raw Score, P-Value as first three values. + /// The length of the window on the series for building the trajectory matrix (parameter L). + /// The length of series that is kept in buffer for modeling (parameter N). /// The length of series from the begining used for training. - /// The length of series that is kept in buffer for modeling (parameter N from reference papar). - /// The length of the window on the series for building the trajectory matrix (parameter L from reference papar). - /// The discount factor in [0,1] used for online updates (default = 1). - /// The rank selection method (default = Exact). - /// The desired rank of the subspace used for SSA projection (parameter r from reference papar). This parameter should be in the range in [1, ]. - /// If set to null, the rank is automatically determined based on prediction error minimization. (default = null) - /// The maximum rank considered during the rank selection process. If not provided (i.e. set to null), it is set to - 1. - /// The flag determining whether the confidence bounds for the point forecasts should be computed. (default = ) - /// The flag determining whether the model should be stabilized. + /// The number of values to forecast. + /// The flag determing whether the model is adaptive. + /// The discount factor in [0,1] used for online updates. + /// The rank selection method. + /// The desired rank of the subspace used for SSA projection (parameter r). This parameter should be in the range in [1, windowSize]. + /// If set to null, the rank is automatically determined based on prediction error minimization. + /// The maximum rank considered during the rank selection process. If not provided (i.e. set to null), it is set to windowSize - 1. + /// The flag determining whether the model should be stabilized. /// The flag determining whether the meta information for the model needs to be maintained. - /// The maximum growth on the exponential trend + /// The maximum growth on the exponential trend. + /// The name of the confidence interval lower bound column. If not specified then confidence intervals will not be calculated. + /// The name of the confidence interval upper bound column. If not specified then confidence intervals will not be calculated. + /// The confidence level for forecasting. + /// Set this to true if horizon will change after training(at prediction time). /// /// /// /// /// - public static AdaptiveSingularSpectrumSequenceModeler AdaptiveSingularSpectrumSequenceModeler(this ForecastingCatalog catalog, - string inputColumnName, int trainSize, int seriesLength, int windowSize, Single discountFactor = 1, RankSelectionMethod rankSelectionMethod = RankSelectionMethod.Exact, - int? rank = null, int? maxRank = null, bool shouldComputeForecastIntervals = true, bool shouldstablize = true, bool shouldMaintainInfo = false, GrowthRatio? maxGrowth = null) => - new AdaptiveSingularSpectrumSequenceModeler(CatalogUtils.GetEnvironment(catalog), inputColumnName, trainSize, seriesLength, windowSize, discountFactor, - rankSelectionMethod, rank, maxRank, shouldComputeForecastIntervals, shouldstablize, shouldMaintainInfo, maxGrowth); + public static SsaForecastingEstimator ForecastBySsa( + this ForecastingCatalog catalog, string outputColumnName, string inputColumnName, int windowSize, int seriesLength, int trainSize, int horizon, + bool isAdaptive = false, float discountFactor = 1, RankSelectionMethod rankSelectionMethod = RankSelectionMethod.Exact, int? rank = null, + int? maxRank = null, bool shouldStablize = true, bool shouldMaintainInfo = false, GrowthRatio? maxGrowth = null, string forcastingConfidentLowerBoundColumnName = null, + string forcastingConfidentUpperBoundColumnName = null, float confidenceLevel = 0.95f, bool variableHorizon = false) => + new SsaForecastingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, windowSize, seriesLength, trainSize, + horizon, isAdaptive, discountFactor, rankSelectionMethod, rank, maxRank, shouldStablize, shouldMaintainInfo, maxGrowth, forcastingConfidentLowerBoundColumnName, + forcastingConfidentUpperBoundColumnName, confidenceLevel, variableHorizon); } } diff --git a/src/Microsoft.ML.TimeSeries/Forecast.cs b/src/Microsoft.ML.TimeSeries/Forecast.cs deleted file mode 100644 index 9321d115f0..0000000000 --- a/src/Microsoft.ML.TimeSeries/Forecast.cs +++ /dev/null @@ -1,93 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System.IO; -using Microsoft.ML.Data; -using static Microsoft.ML.Transforms.TimeSeries.AdaptiveSingularSpectrumSequenceModeler; - -namespace Microsoft.ML.TimeSeries -{ - /// - /// Interface for forecasting models. - /// - /// The type of values that are forecasted. - public interface ICanForecast : ICanSaveModel - { - /// - /// Train a forecasting model from an . - /// - /// Training data. - void Train(IDataView dataView); - - /// - /// Update a forecasting model with the new observations in the form of an . - /// - /// Reference to the observations as an - /// Name of the input column to update from. - void Update(IDataView dataView, string inputColumnName = null); - - /// - /// Perform forecasting until a particular . - /// - /// Number of values to forecast. - /// Forecasted values. - T[] Forecast(int horizon); - - /// - /// Perform forecasting until a particular and also computes confidence intervals. - /// - /// Number of values to forecast. - /// Forecasted values - /// Lower bound confidence intervals of forecasted values. - /// Upper bound confidence intervals of forecasted values. - /// Forecast confidence level. - void ForecastWithConfidenceIntervals(int horizon, out T[] forecast, out float[] confidenceIntervalLowerBounds, out float[] confidenceIntervalUpperBounds, float confidenceLevel = 0.95f); - } - - public static class ForecastExtensions - { - /// - /// Load a model. - /// - /// The type of , usually float. - /// - /// File path to load the model from. - /// model. - public static ICanForecast LoadForecastingModel(this ModelOperationsCatalog catalog, string filePath) - { - var env = CatalogUtils.GetEnvironment(catalog); - using (var file = File.OpenRead(filePath)) - { - using (var rep = RepositoryReader.Open(file, env)) - { - ModelLoadContext.LoadModel, SignatureLoadModel>(env, out var model, rep, LoaderSignature); - return model; - } - } - } - - /// - /// Save a model to a file specified by - /// - /// - /// - /// model to save. - /// File path to save the model to. - public static void SaveForecastingModel(this ModelOperationsCatalog catalog, ICanForecast model, string filePath) - { - var env = CatalogUtils.GetEnvironment(catalog); - using (var file = File.Create(filePath)) - { - using (var ch = env.Start("Saving forecasting model.")) - { - using (var rep = RepositoryWriter.CreateNew(file, ch)) - { - ModelSaveContext.SaveModel(rep, model, LoaderSignature); - rep.Commit(); - } - } - } - } - } -} diff --git a/src/Microsoft.ML.TimeSeries/PredictionFunction.cs b/src/Microsoft.ML.TimeSeries/PredictionEngine.cs similarity index 66% rename from src/Microsoft.ML.TimeSeries/PredictionFunction.cs rename to src/Microsoft.ML.TimeSeries/PredictionEngine.cs index 310be9bd36..75bcf88407 100644 --- a/src/Microsoft.ML.TimeSeries/PredictionFunction.cs +++ b/src/Microsoft.ML.TimeSeries/PredictionEngine.cs @@ -26,6 +26,7 @@ internal interface IStatefulTransformer : ITransformer /// /// Creates a clone of the transfomer. Used for taking the snapshot of the state. + /// This is used to create multiple time series with their own state. /// /// IStatefulTransformer Clone(); @@ -33,14 +34,22 @@ internal interface IStatefulTransformer : ITransformer internal abstract class StatefulRow : DataViewRow { - public abstract Action GetPinger(); + public abstract Action GetPinger(); } internal interface IStatefulRowMapper : IRowMapper { void CloneState(); - Action CreatePinger(DataViewRow input, Func activeOutput, out Action disposer); + Action CreatePinger(DataViewRow input, Func activeOutput, out Action disposer); + } + + internal class PingerArgument + { + public long RowPosition; + public float? ConfidenceLevel; + public int? Horizon; + public bool DontConsumeSource; } /// @@ -51,16 +60,16 @@ internal interface IStatefulRowMapper : IRowMapper /// /// The user-defined type that holds the example. /// The user-defined type that holds the prediction. - public sealed class TimeSeriesPredictionFunction : PredictionEngineBase + public sealed class TimeSeriesPredictionEngine : PredictionEngineBase where TSrc : class where TDst : class, new() { - private Action _pinger; + private Action _pinger; private long _rowPosition; private ITransformer InputTransformer { get; set; } /// - /// Checkpoints to disk with the updated + /// Checkpoints to disk with the updated /// state. /// /// Usually . @@ -83,7 +92,7 @@ public void CheckPoint(IHostEnvironment env, string modelPath) } /// - /// Checkpoints to a with the updated + /// Checkpoints to a with the updated /// state. /// /// Usually . @@ -102,14 +111,14 @@ public void CheckPoint(IHostEnvironment env, Stream stream) env.CheckParam(stream != null, nameof(stream)); if (Transformer is ITransformerChainAccessor) - { + { - new TransformerChain - (((ITransformerChainAccessor)Transformer).Transformers, - ((ITransformerChainAccessor)Transformer).Scopes).SaveTo(env, stream); - } - else - Transformer.SaveTo(env, stream); + new TransformerChain + (((ITransformerChainAccessor)Transformer).Transformers, + ((ITransformerChainAccessor)Transformer).Scopes).SaveTo(env, stream); + } + else + Transformer.SaveTo(env, stream); } private static ITransformer CloneTransformers(ITransformer transformer) @@ -132,10 +141,10 @@ private static ITransformer CloneTransformers(ITransformer transformer) } /// - /// Contructor for creating time series specific prediction engine. It allows update the time series model to be updated with the observations + /// Contructor for creating time series specific prediction engine. It allows the time series model to be updated with the observations /// seen at prediction time via /// - public TimeSeriesPredictionFunction(IHostEnvironment env, ITransformer transformer, bool ignoreMissingColumns, + public TimeSeriesPredictionEngine(IHostEnvironment env, ITransformer transformer, bool ignoreMissingColumns, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) : base(env, CloneTransformers(transformer), ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition) { @@ -188,11 +197,11 @@ internal DataViewRow GetStatefulRows(DataViewRow input, IRowToRowMapper mapper, return result; } - private Action CreatePinger(List rows) + internal Action CreatePinger(List rows) { if (rows.Count == 0) return position => { }; - Action pinger = null; + Action pinger = null; foreach (var row in rows) pinger += row.GetPinger(); return pinger; @@ -255,32 +264,113 @@ private protected override Func TransformerChec } /// - /// Run prediction pipeline on one example. + /// Performs prediction. In the case of forecasting only task can be left as null. + /// If is not null then it could be used to update forecasting models with new obervation. + /// For anomaly detection the model is always updated with . /// - /// The example to run on. - /// The object to store the prediction in. If it's null, a new one will be created, otherwise the old one - /// is reused. - public override void Predict(TSrc example, ref TDst prediction) + /// Input to the prediction engine. + /// Forecasting/Prediction from the engine. + /// Used to indicate the number of values to forecast. + /// Used in forecasting model for confidence. + public void Predict(TSrc example, ref TDst prediction, int? horizon = null, float? confidenceLevel = null) { - Contracts.CheckValue(example, nameof(example)); - ExtractValues(example); - if (prediction == null) - prediction = new TDst(); + if (example != null && prediction != null) + { + //Update models and make a prediction after updating. + Contracts.CheckValue(example, nameof(example)); + ExtractValues(example); + + // Update state. + _pinger(new PingerArgument() + { + RowPosition = _rowPosition, + ConfidenceLevel = confidenceLevel, + Horizon = horizon + }); - // Update state. - _pinger(_rowPosition); + // Predict. + FillValues(prediction); - // Predict. - FillValues(prediction); + _rowPosition++; + } + else if (prediction != null) + { + //Forecast. + + // Signal all time series models to not fetch src values in getters. + _pinger(new PingerArgument() + { + RowPosition = _rowPosition, + DontConsumeSource = true, + ConfidenceLevel = confidenceLevel, + Horizon = horizon + }); + + // Predict. The expectation is user has asked for columns that are + // forecasting columns and hence will not trigger a getter that needs an input. + FillValues(prediction); + } + else if (example != null) + { + //Update models. - _rowPosition++; + //Extract value that needs to propagated to all the models. + Contracts.CheckValue(example, nameof(example)); + ExtractValues(example); + + // Update state. + _pinger(new PingerArgument() + { + RowPosition = _rowPosition, + ConfidenceLevel = confidenceLevel, + Horizon = horizon + }); + + _rowPosition++; + } + } + + /// + /// Performs prediction. In the case of forecasting only task can be left as null. + /// If is not null then it could be used to update forecasting models with new obervation. + /// For anomaly detection the model is always updated with . + /// + /// Input to the prediction engine. + /// Forecasting/Prediction from the engine. + public override void Predict(TSrc example, ref TDst prediction) => Predict(example, ref prediction); + + /// + /// Performs prediction. In the case of forecasting only task can be left as null. + /// If is not null then it could be used to update forecasting models with new obervation. + /// + /// Input to the prediction engine. + /// Number of values to forecast. + /// Confidence level for forecasting. + /// Prediction/Forecasting after the model has been updated with + public TDst Predict(TSrc example, int? horizon = null, float? confidenceLevel = null) + { + TDst dst = new TDst(); + Predict(example, ref dst, horizon, confidenceLevel); + return dst; + } + + /// + /// Forecasting only task. + /// + /// Number of values to forecast. + /// Confidence level for forecasting. + public TDst Predict(int? horizon = null, float? confidenceLevel = null) + { + TDst dst = new TDst(); + Predict(null, ref dst, horizon, confidenceLevel); + return dst; } } public static class PredictionFunctionExtensions { /// - /// creates a prediction function/engine for a time series pipeline + /// creates a prediction engine for a time series pipeline. /// It updates the state of time series model with observations seen at prediction phase and allows checkpointing the model. /// /// Class describing input schema to the model. @@ -290,7 +380,7 @@ public static class PredictionFunctionExtensions /// To ignore missing columns. Default is false. /// Input schema definition. Default is null. /// Output schema definition. Default is null. - ///

Example code can be found by searching for TimeSeriesPredictionFunction in ML.NET.

+ ///

Example code can be found by searching for TimeSeriesPredictionEngine in ML.NET.

/// /// /// /// /// - public static TimeSeriesPredictionFunction CreateTimeSeriesPredictionFunction(this ITransformer transformer, IHostEnvironment env, + public static TimeSeriesPredictionEngine CreateTimeSeriesEngine(this ITransformer transformer, IHostEnvironment env, bool ignoreMissingColumns = false, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) where TSrc : class where TDst : class, new() @@ -308,7 +398,7 @@ public static TimeSeriesPredictionFunction CreateTimeSeriesPredictio env.CheckValue(transformer, nameof(transformer)); env.CheckValueOrNull(inputSchemaDefinition); env.CheckValueOrNull(outputSchemaDefinition); - return new TimeSeriesPredictionFunction(env, transformer, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition); + return new TimeSeriesPredictionEngine(env, transformer, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition); } } } diff --git a/src/Microsoft.ML.TimeSeries/SSaForecasting.cs b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs new file mode 100644 index 0000000000..ab8684c147 --- /dev/null +++ b/src/Microsoft.ML.TimeSeries/SSaForecasting.cs @@ -0,0 +1,361 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.CommandLine; +using Microsoft.ML.Data; +using Microsoft.ML.Runtime; +using Microsoft.ML.Transforms.TimeSeries; + +[assembly: LoadableClass(SsaForecastingTransformer.Summary, typeof(IDataTransform), typeof(SsaForecastingTransformer), typeof(SsaForecastingTransformer.Options), typeof(SignatureDataTransform), + SsaForecastingTransformer.UserName, SsaForecastingTransformer.LoaderSignature, SsaForecastingTransformer.ShortName)] + +[assembly: LoadableClass(SsaForecastingTransformer.Summary, typeof(IDataTransform), typeof(SsaForecastingTransformer), null, typeof(SignatureLoadDataTransform), + SsaForecastingTransformer.UserName, SsaForecastingTransformer.LoaderSignature)] + +[assembly: LoadableClass(SsaForecastingTransformer.Summary, typeof(SsaForecastingTransformer), null, typeof(SignatureLoadModel), + SsaForecastingTransformer.UserName, SsaForecastingTransformer.LoaderSignature)] + +[assembly: LoadableClass(typeof(IRowMapper), typeof(SsaForecastingTransformer), null, typeof(SignatureLoadRowMapper), + SsaForecastingTransformer.UserName, SsaForecastingTransformer.LoaderSignature)] + +namespace Microsoft.ML.Transforms.TimeSeries +{ + /// + /// resulting from fitting a . + /// + public sealed class SsaForecastingTransformer : SsaForecastingBaseWrapper, IStatefulTransformer + { + internal const string Summary = "This transform forecasts using Singular Spectrum Analysis (SSA)."; + internal const string LoaderSignature = "SsaForecasting"; + internal const string UserName = "SSA Forecasting"; + internal const string ShortName = "ssafcst"; + + internal sealed class Options : TransformInputBase + { + [Argument(ArgumentType.Required, HelpText = "The name of the source column.", ShortName = "src", SortOrder = 1, Purpose = SpecialPurpose.ColumnName)] + public string Source; + + [Argument(ArgumentType.Required, HelpText = "The name of the new column.", SortOrder = 2)] + public string Name; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the confidence interval lower bound column.", ShortName = "cnfminname", SortOrder = 3)] + public string ForcastingConfidentLowerBoundColumnName; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the confidence interval upper bound column.", ShortName = "cnfmaxnname", SortOrder = 3)] + public string ForcastingConfidentUpperBoundColumnName; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The discount factor in [0,1] used for online updates.", ShortName = "disc", SortOrder = 5)] + public float DiscountFactor = 1; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The flag determing whether the model is adaptive", ShortName = "adp", SortOrder = 6)] + public bool IsAdaptive = false; + + [Argument(ArgumentType.Required, HelpText = "The length of the window on the series for building the trajectory matrix (parameter L).", SortOrder = 2)] + public int WindowSize; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The rank selection method.", SortOrder = 3)] + public RankSelectionMethod RankSelectionMethod = RankSelectionMethod.Exact; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The desired rank of the subspace used for SSA projection (parameter r). This parameter should be in the range in [1, windowSize]. " + + "If set to null, the rank is automatically determined based on prediction error minimization.", SortOrder = 3)] + public int? Rank = null; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The maximum rank considered during the rank selection process. If not provided (i.e. set to null), it is set to windowSize - 1.", SortOrder = 3)] + public int? MaxRank = null; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The flag determining whether the model should be stabilized.", SortOrder = 3)] + public bool ShouldStablize = true; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The flag determining whether the meta information for the model needs to be maintained.", SortOrder = 3)] + public bool ShouldMaintainInfo = false; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The maximum growth on the exponential trend.", SortOrder = 3)] + public GrowthRatio? MaxGrowth = null; + + [Argument(ArgumentType.Required, HelpText = "The length of series that is kept in buffer for modeling (parameter N).", SortOrder = 2)] + public int SeriesLength; + + [Argument(ArgumentType.Required, HelpText = "The length of series from the begining used for training.", SortOrder = 2)] + public int TrainSize; + + [Argument(ArgumentType.Required, HelpText = "The number of values to forecast.", SortOrder = 2)] + public int Horizon; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The confidence level in [0, 1) for forecasting.", SortOrder = 2)] + public float ConfidenceLevel = 0.95f; + + [Argument(ArgumentType.AtMostOnce, HelpText = "Set this to true horizon will change at prediction time.", SortOrder = 2)] + public bool VariableHorizon; + } + + private sealed class BaseArguments : SsaForecastingOptions + { + public BaseArguments(Options options) + { + Source = options.Source; + Name = options.Name; + ForcastingConfidentLowerBoundColumnName = options.ForcastingConfidentLowerBoundColumnName; + ForcastingConfidentUpperBoundColumnName = options.ForcastingConfidentUpperBoundColumnName; + WindowSize = options.WindowSize; + DiscountFactor = options.DiscountFactor; + IsAdaptive = options.IsAdaptive; + RankSelectionMethod = options.RankSelectionMethod; + Rank = options.Rank; + ShouldStablize = options.ShouldStablize; + MaxGrowth = options.MaxGrowth; + SeriesLength = options.SeriesLength; + TrainSize = options.TrainSize; + Horizon = options.Horizon; + ConfidenceLevel = options.ConfidenceLevel; + VariableHorizon = options.VariableHorizon; + } + } + + private static VersionInfo GetVersionInfo() + { + return new VersionInfo( + modelSignature: "FRCSTRNS", + verWrittenCur: 0x00010001, // Initial + verReadableCur: 0x00010001, + verWeCanReadBack: 0x00010001, + loaderSignature: LoaderSignature, + loaderAssemblyName: typeof(SsaForecastingTransformer).Assembly.FullName); + } + + internal SsaForecastingTransformer(IHostEnvironment env, Options options, IDataView input) + : base(new BaseArguments(options), LoaderSignature, env) + { + InternalTransform.Model.Train(new RoleMappedData(input, null, InternalTransform.InputColumnName)); + } + + // Factory method for SignatureDataTransform. + private static IDataTransform Create(IHostEnvironment env, Options options, IDataView input) + { + Contracts.CheckValue(env, nameof(env)); + env.CheckValue(options, nameof(options)); + env.CheckValue(input, nameof(input)); + + return new SsaForecastingTransformer(env, options, input).MakeDataTransform(input); + } + + internal SsaForecastingTransformer(IHostEnvironment env, Options options) + : base(new BaseArguments(options), LoaderSignature, env) + { + // This constructor is empty. + } + + // Factory method for SignatureLoadDataTransform. + private static IDataTransform Create(IHostEnvironment env, ModelLoadContext ctx, IDataView input) + { + Contracts.CheckValue(env, nameof(env)); + env.CheckValue(ctx, nameof(ctx)); + env.CheckValue(input, nameof(input)); + + return new SsaForecastingTransformer(env, ctx).MakeDataTransform(input); + } + + IStatefulTransformer IStatefulTransformer.Clone() + { + var clone = (SsaForecastingTransformer)MemberwiseClone(); + clone.InternalTransform.Model = clone.InternalTransform.Model.Clone(); + clone.InternalTransform.StateRef = (SsaForecastingBase.State)clone.InternalTransform.StateRef.Clone(); + clone.InternalTransform.StateRef.InitState(clone.InternalTransform, InternalTransform.Host); + return clone; + } + + // Factory method for SignatureLoadModel. + private static SsaForecastingTransformer Create(IHostEnvironment env, ModelLoadContext ctx) + { + Contracts.CheckValue(env, nameof(env)); + env.CheckValue(ctx, nameof(ctx)); + ctx.CheckAtModel(GetVersionInfo()); + + return new SsaForecastingTransformer(env, ctx); + } + + internal SsaForecastingTransformer(IHostEnvironment env, ModelLoadContext ctx) + : base(env, ctx, LoaderSignature) + { + // *** Binary format *** + // + InternalTransform.Host.CheckDecode(InternalTransform.IsAdaptive == false); + } + + private protected override void SaveModel(ModelSaveContext ctx) + { + InternalTransform.Host.CheckValue(ctx, nameof(ctx)); + ctx.CheckAtModel(); + ctx.SetVersionInfo(GetVersionInfo()); + + InternalTransform.Host.Assert(InternalTransform.IsAdaptive == false); + + // *** Binary format *** + // + + base.SaveModel(ctx); + } + + // Factory method for SignatureLoadRowMapper. + private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, DataViewSchema inputSchema) + => Create(env, ctx).MakeRowMapper(inputSchema); + } + + /// + /// Forecasts using Singular Spectrum Analysis. + /// + /// + /// | + /// | Output column data type | Vector of | + /// + /// | | | + /// | -- | -- | + /// | Does this estimator need to look at the data to train its parameters? | Yes | + /// | Input column data type | | + /// | Output column data type | Three vectors of | + /// + /// [!include[io](~/../docs/samples/docs/api-reference/time-series-props.md)] + /// + /// [!include[io](~/../docs/samples/docs/api-reference/time-series-ssa.md)] + /// + /// Check the See Also section for links to usage examples. + /// ]]> + /// + /// + public sealed class SsaForecastingEstimator : IEstimator + { + private readonly IHost _host; + private readonly SsaForecastingTransformer.Options _options; + + /// + /// Create a new instance of + /// + /// Host Environment. + /// Name of the column resulting from the transformation of . + /// Name of column to transform. If set to , the value of the will be used as source. + /// The vector contains Alert, Raw Score, P-Value as first three values. + /// The length of the window on the series for building the trajectory matrix (parameter L). + /// The length of series that is kept in buffer for modeling (parameter N). + /// The length of series from the begining used for training. + /// The number of values to forecast. + /// The flag determing whether the model is adaptive. + /// The discount factor in [0,1] used for online updates. + /// The rank selection method. + /// The desired rank of the subspace used for SSA projection (parameter r). This parameter should be in the range in [1, windowSize]. + /// If set to null, the rank is automatically determined based on prediction error minimization. + /// The maximum rank considered during the rank selection process. If not provided (i.e. set to null), it is set to windowSize - 1. + /// The flag determining whether the model should be stabilized. + /// The flag determining whether the meta information for the model needs to be maintained. + /// The maximum growth on the exponential trend. + /// The name of the confidence interval lower bound column. If not specified then confidence intervals will not be calculated. + /// The name of the confidence interval upper bound column. If not specified then confidence intervals will not be calculated. + /// The confidence level for forecasting. + /// Set this to true if horizon will change after training. + internal SsaForecastingEstimator(IHostEnvironment env, + string outputColumnName, + string inputColumnName, + int windowSize, + int seriesLength, + int trainSize, + int horizon, + bool isAdaptive = false, + float discountFactor = 1, + RankSelectionMethod rankSelectionMethod = RankSelectionMethod.Exact, + int? rank = null, + int? maxRank = null, + bool shouldStablize = true, + bool shouldMaintainInfo = false, + GrowthRatio? maxGrowth = null, + string forcastingConfidentLowerBoundColumnName = null, + string forcastingConfidentUpperBoundColumnName = null, + float confidenceLevel = 0.95f, + bool variableHorizon = false) + : this(env, new SsaForecastingTransformer.Options + { + Source = inputColumnName ?? outputColumnName, + Name = outputColumnName, + DiscountFactor = discountFactor, + IsAdaptive = isAdaptive, + WindowSize = windowSize, + RankSelectionMethod = rankSelectionMethod, + Rank = rank, + MaxRank = maxRank, + ShouldStablize = shouldStablize, + ShouldMaintainInfo = shouldMaintainInfo, + MaxGrowth = maxGrowth, + ConfidenceLevel = confidenceLevel, + ForcastingConfidentLowerBoundColumnName = forcastingConfidentLowerBoundColumnName, + ForcastingConfidentUpperBoundColumnName = forcastingConfidentUpperBoundColumnName, + SeriesLength = seriesLength, + TrainSize = trainSize, + VariableHorizon = variableHorizon, + Horizon = horizon + }) + { + } + + internal SsaForecastingEstimator(IHostEnvironment env, SsaForecastingTransformer.Options options) + { + Contracts.CheckValue(env, nameof(env)); + _host = env.Register(nameof(SsaForecastingEstimator)); + + _host.CheckNonEmpty(options.Name, nameof(options.Name)); + _host.CheckNonEmpty(options.Source, nameof(options.Source)); + + _options = options; + } + + /// + /// Train and return a transformer. + /// + public SsaForecastingTransformer Fit(IDataView input) + { + _host.CheckValue(input, nameof(input)); + return new SsaForecastingTransformer(_host, _options, input); + } + + /// + /// Schema propagation for transformers. + /// Returns the output schema of the data, if the input schema is like the one provided. + /// Creates three output columns if confidence intervals are requested otherwise + /// just one. + /// + public SchemaShape GetOutputSchema(SchemaShape inputSchema) + { + _host.CheckValue(inputSchema, nameof(inputSchema)); + + if (!inputSchema.TryFindColumn(_options.Source, out var col)) + throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", _options.Source); + if (col.ItemType != NumberDataViewType.Single) + throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", _options.Source, "Single", col.GetTypeString()); + + var resultDic = inputSchema.ToDictionary(x => x.Name); + resultDic[_options.Name] = new SchemaShape.Column( + _options.Name, SchemaShape.Column.VectorKind.Vector, NumberDataViewType.Single, false); + + if (!string.IsNullOrEmpty(_options.ForcastingConfidentUpperBoundColumnName)) + { + resultDic[_options.ForcastingConfidentLowerBoundColumnName] = new SchemaShape.Column( + _options.ForcastingConfidentLowerBoundColumnName, SchemaShape.Column.VectorKind.Vector, + NumberDataViewType.Single, false); + + resultDic[_options.ForcastingConfidentUpperBoundColumnName] = new SchemaShape.Column( + _options.ForcastingConfidentUpperBoundColumnName, SchemaShape.Column.VectorKind.Vector, + NumberDataViewType.Single, false); + } + + return new SchemaShape(resultDic.Values); + } + } +} diff --git a/src/Microsoft.ML.TimeSeries/SequentialAnomalyDetectionTransformBase.cs b/src/Microsoft.ML.TimeSeries/SequentialAnomalyDetectionTransformBase.cs index 915e74ab39..5048a38dac 100644 --- a/src/Microsoft.ML.TimeSeries/SequentialAnomalyDetectionTransformBase.cs +++ b/src/Microsoft.ML.TimeSeries/SequentialAnomalyDetectionTransformBase.cs @@ -377,28 +377,31 @@ private Delegate MakeGetter(DataViewRow input, AnomalyDetectionStateBase state) return valueGetter; } - public Action CreatePinger(DataViewRow input, Func activeOutput, out Action disposer) + public Action CreatePinger(DataViewRow input, Func activeOutput, out Action disposer) { disposer = null; - Action pinger = null; + Action pinger = null; if (activeOutput(0)) pinger = MakePinger(input, State); return pinger; } - private Action MakePinger(DataViewRow input, AnomalyDetectionStateBase state) + private Action MakePinger(DataViewRow input, AnomalyDetectionStateBase state) + { + _host.AssertValue(input); + var srcGetter = input.GetGetter(input.Schema[_inputColumnIndex]); + Action pinger = (PingerArgument args) => { - _host.AssertValue(input); - var srcGetter = input.GetGetter(input.Schema[_inputColumnIndex]); - Action pinger = (long rowPosition) => - { - TInput src = default; - srcGetter(ref src); - state.UpdateState(ref src, rowPosition, _parent.WindowSize > 0); - }; - return pinger; - } + if (args.DontConsumeSource) + return; + + TInput src = default; + srcGetter(ref src); + state.UpdateState(ref src, args.RowPosition, _parent.WindowSize > 0); + }; + return pinger; + } public void CloneState() { @@ -516,7 +519,7 @@ private protected override void SetNaOutput(ref VBuffer dst) dst = editor.Commit(); } - private protected sealed override void TransformCore(ref TInput input, FixedSizeQueue windowedBuffer, long iteration, ref VBuffer dst) + public sealed override void TransformCore(ref TInput input, FixedSizeQueue windowedBuffer, long iteration, ref VBuffer dst) { var outputLength = Parent.OutputLength; Host.Assert(outputLength >= 2); diff --git a/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs b/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs new file mode 100644 index 0000000000..27557df7de --- /dev/null +++ b/src/Microsoft.ML.TimeSeries/SequentialForecastingTransformBase.cs @@ -0,0 +1,319 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.IO; +using System.Threading; +using Microsoft.ML.CommandLine; +using Microsoft.ML.Data; +using Microsoft.ML.Runtime; +using static Microsoft.ML.DataViewSchema; + +namespace Microsoft.ML.Transforms.TimeSeries +{ + + /// + /// The base class that can be inherited by the 'Argument' classes in the derived classes containing the shared input parameters. + /// + internal abstract class ForecastingArgumentsBase + { + [Argument(ArgumentType.Required, HelpText = "The name of the source column", ShortName = "src", + SortOrder = 1, Purpose = SpecialPurpose.ColumnName)] + public string Source; + + [Argument(ArgumentType.Required, HelpText = "The name of the new column", ShortName = "name", + SortOrder = 2)] + public string Name; + + [Argument(ArgumentType.Required, HelpText = "The name of the confidence interval lower bound column.", ShortName = "cnfminname", + SortOrder = 2)] + public string ForcastingConfidentLowerBoundColumnName; + + [Argument(ArgumentType.Required, HelpText = "The name of the confidence interval upper bound column.", ShortName = "cnfmaxnname", + SortOrder = 2)] + public string ForcastingConfidentUpperBoundColumnName; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The length of series from the begining used for training.", ShortName = "wnd", + SortOrder = 3)] + public int TrainSize = 1; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The size of the initial window. The default value " + + "is set to 0, which means there is no initial window considered.", ShortName = "initwnd", SortOrder = 5)] + public int SeriesLength = 0; + } + + /// + /// The base class for forecasting transforms that also supports confidence intervals for each forecasted value. + /// For more details, please refer to http://arxiv.org/pdf/1204.3251.pdf + /// + /// The type of the input sequence + /// The type of the input sequence + internal abstract class SequentialForecastingTransformBase : SequentialTransformerBase, TState> + where TState : SequentialForecastingTransformBase.ForecastingStateBase, new() + { + + // The size of the VBuffer in the dst column. + private readonly int _outputLength; + + private protected SequentialForecastingTransformBase(int windowSize, int initialWindowSize, + string inputColumnName, string outputColumnName, string forecastingConfidenceIntervalMinOutputColumnName, + string forecastingConfidenceIntervalMaxOutputColumnName, string name, int outputLength, IHostEnvironment env) + : base(Contracts.CheckRef(env, nameof(env)).Register(name), windowSize, initialWindowSize, + outputColumnName, forecastingConfidenceIntervalMinOutputColumnName, + forecastingConfidenceIntervalMaxOutputColumnName, inputColumnName, new VectorDataViewType(NumberDataViewType.Single, outputLength)) + { + _outputLength = outputLength; + } + + private protected SequentialForecastingTransformBase(ForecastingArgumentsBase args, string name, int outputLength, IHostEnvironment env) + : this(args.TrainSize, args.SeriesLength, args.Source, args.ForcastingConfidentLowerBoundColumnName, + args.ForcastingConfidentUpperBoundColumnName, args.Name, name, outputLength, env) + { + } + + private protected SequentialForecastingTransformBase(IHostEnvironment env, ModelLoadContext ctx, string name) + : base(Contracts.CheckRef(env, nameof(env)).Register(name), ctx) + { + _outputLength = ctx.Reader.ReadInt32(); + // *** Binary format *** + // + } + + private protected override void SaveModel(ModelSaveContext ctx) + { + Host.CheckValue(ctx, nameof(ctx)); + ctx.CheckAtModel(); + + // *** Binary format *** + // + + base.SaveModel(ctx); + ctx.Writer.Write(_outputLength); + } + + internal override IStatefulRowMapper MakeRowMapper(DataViewSchema schema) => new Mapper(Host, this, schema); + + internal sealed class Mapper : IStatefulRowMapper + { + private readonly IHost _host; + private readonly SequentialForecastingTransformBase _parent; + private readonly DataViewSchema _parentSchema; + private readonly int _inputColumnIndex; + private ForecastingStateBase State { get; set; } + private bool _dontFetchSrcValue; + + public Mapper(IHostEnvironment env, SequentialForecastingTransformBase parent, DataViewSchema inputSchema) + { + Contracts.CheckValue(env, nameof(env)); + _host = env.Register(nameof(Mapper)); + _host.CheckValue(inputSchema, nameof(inputSchema)); + _host.CheckValue(parent, nameof(parent)); + + if (!inputSchema.TryGetColumnIndex(parent.InputColumnName, out _inputColumnIndex)) + throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", parent.InputColumnName); + + var colType = inputSchema[_inputColumnIndex].Type; + if (colType != NumberDataViewType.Single) + throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", parent.InputColumnName, "Single", colType.ToString()); + + _parent = parent; + _parentSchema = inputSchema; + State = (ForecastingStateBase)_parent.StateRef; + _dontFetchSrcValue = false; + } + + public DataViewSchema.DetachedColumn[] GetOutputColumns() + { + DetachedColumn[] info; + + if (!string.IsNullOrEmpty(_parent.ForecastingConfidenceIntervalMaxOutputColumnName)) + { + info = new DetachedColumn[3]; + info[0] = new DetachedColumn(_parent.OutputColumnName, new VectorDataViewType(NumberDataViewType.Single, _parent._outputLength)); + info[1] = new DetachedColumn(_parent.ForecastingConfidenceIntervalMinOutputColumnName, new VectorDataViewType(NumberDataViewType.Single, _parent._outputLength)); + info[2] = new DetachedColumn(_parent.ForecastingConfidenceIntervalMaxOutputColumnName, new VectorDataViewType(NumberDataViewType.Single, _parent._outputLength)); + } + else + { + info = new DetachedColumn[1]; + info[0] = new DetachedColumn(_parent.OutputColumnName, new VectorDataViewType(NumberDataViewType.Single, _parent._outputLength)); + } + + return info; + } + + public Func GetDependencies(Func activeOutput) + { + if (activeOutput(0)) + return col => col == _inputColumnIndex; + else + return col => false; + } + + void ICanSaveModel.Save(ModelSaveContext ctx) => _parent.SaveModel(ctx); + + public Delegate[] CreateGetters(DataViewRow input, Func activeOutput, out Action disposer) + { + disposer = null; + var getters = string.IsNullOrEmpty(_parent.ForecastingConfidenceIntervalMaxOutputColumnName) ? new Delegate[1] : new Delegate[3]; + + if (activeOutput(0)) + { + ValueGetter> valueGetter = (ref VBuffer dst) => + { + State.Forecast(ref dst); + }; + + getters[0] = valueGetter; + } + + if (!string.IsNullOrEmpty(_parent.ForecastingConfidenceIntervalMaxOutputColumnName)) + { + if (activeOutput(1)) + { + ValueGetter> valueGetter = (ref VBuffer dst) => + { + State.ConfidenceIntervalLowerBound(ref dst); + }; + + getters[1] = valueGetter; + } + + if (activeOutput(2)) + { + ValueGetter> valueGetter = (ref VBuffer dst) => + { + State.ConfidenceIntervalUpperBound(ref dst); + }; + + getters[2] = valueGetter; + } + } + return getters; + } + + private delegate void ProcessData(ref TInput src, ref VBuffer dst); + + private Delegate MakeGetter(DataViewRow input, ForecastingStateBase state) + { + _host.AssertValue(input); + var srcGetter = input.GetGetter(input.Schema[_inputColumnIndex]); + ProcessData processData = _parent.WindowSize > 0 ? + (ProcessData)state.Process : state.ProcessWithoutBuffer; + + ValueGetter> valueGetter = (ref VBuffer dst) => + { + TInput src = default; + if (_dontFetchSrcValue) + { + state.TransformCore(ref src, null, 0, ref dst); + return; + } + + srcGetter(ref src); + processData(ref src, ref dst); + + }; + return valueGetter; + } + + public Action CreatePinger(DataViewRow input, Func activeOutput, out Action disposer) + { + disposer = null; + Action pinger = null; + if (activeOutput(0)) + pinger = MakePinger(input, State); + + return pinger; + } + + private Action MakePinger(DataViewRow input, ForecastingStateBase state) + { + _host.AssertValue(input); + var srcGetter = input.GetGetter(input.Schema[_inputColumnIndex]); + Action pinger = (PingerArgument args) => + { + state.LocalConfidenceLevel = args.ConfidenceLevel; + state.LocalHorizon = args.Horizon; + + // This means don't call srcGetter in getters. + if (args.DontConsumeSource) + { + _dontFetchSrcValue = true; + return; + } + + _dontFetchSrcValue = false; + TInput src = default; + srcGetter(ref src); + state.UpdateState(ref src, args.RowPosition, _parent.WindowSize > 0); + }; + return pinger; + } + + public void CloneState() + { + if (Interlocked.Increment(ref _parent.StateRefCount) > 1) + { + State = (ForecastingStateBase)_parent.StateRef.Clone(); + } + } + + public ITransformer GetTransformer() + { + return _parent; + } + } + /// + /// The base state class for sequential anomaly detection: this class implements the p-values and martinagle calculations for anomaly detection + /// given that the raw anomaly score calculation is specified by the derived classes. + /// + internal abstract class ForecastingStateBase : SequentialTransformerBase, TState>.StateBase + { + // A reference to the parent transform. + protected SequentialForecastingTransformBase Parent; + internal int? LocalHorizon; + internal float? LocalConfidenceLevel; + + private protected ForecastingStateBase() { } + + private protected override void CloneCore(TState state) + { + base.CloneCore(state); + } + + private protected ForecastingStateBase(BinaryReader reader) : base(reader) + { + } + + internal override void Save(BinaryWriter writer) + { + base.Save(writer); + } + + private protected override void SetNaOutput(ref VBuffer dst) + { + var outputLength = Parent._outputLength; + var editor = VBufferEditor.Create(ref dst, outputLength); + + for (int i = 0; i < outputLength; ++i) + editor.Values[i] = float.NaN; + + dst = editor.Commit(); + } + + private protected sealed override void InitializeStateCore(bool disk = false) + { + Parent = (SequentialForecastingTransformBase)ParentTransform; + Host.Assert(WindowSize >= 0); + InitializeForecaster(); + } + + /// + /// The abstract method that realizes the initialization functionality for the forecaster. + /// + private protected abstract void InitializeForecaster(); + } + } +} diff --git a/src/Microsoft.ML.TimeSeries/SequentialTransformBase.cs b/src/Microsoft.ML.TimeSeries/SequentialTransformBase.cs index a5657e1b56..931cece4b0 100644 --- a/src/Microsoft.ML.TimeSeries/SequentialTransformBase.cs +++ b/src/Microsoft.ML.TimeSeries/SequentialTransformBase.cs @@ -29,6 +29,29 @@ public DataBox(T value) } } + /// + /// The box class that is used to box the TInput and TOutput for the LambdaTransform. + /// This is for the case where there are three output columns. + /// + /// The type to be boxed, e.g. TInput or TOutput + internal sealed class DataBoxForecastingWithConfidenceIntervals + { + public T Forecast; + public T ConfidenceIntervalLowerBound; + public T ConfidenceIntervalUpperBound; + + public DataBoxForecastingWithConfidenceIntervals() + { + } + + public DataBoxForecastingWithConfidenceIntervals(T forecast, T confidenceIntervalLowerBound, T confidenceIntervalUpperBound) + { + Forecast = forecast; + ConfidenceIntervalLowerBound = confidenceIntervalLowerBound; + ConfidenceIntervalUpperBound = confidenceIntervalUpperBound; + } + } + /// /// The base class for sequential processing transforms. This class implements the basic sliding window buffering. The derived classes need to specify the transform logic, /// the initialization logic and the learning logic via implementing the abstract methods TransformCore(), InitializeStateCore() and LearnStateFromDataCore(), respectively diff --git a/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs b/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs index 7f0c98574f..af6017a6ca 100644 --- a/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs +++ b/src/Microsoft.ML.TimeSeries/SequentialTransformerBase.cs @@ -26,6 +26,7 @@ namespace Microsoft.ML.Transforms.TimeSeries internal abstract class SequentialTransformerBase : IStatefulTransformer where TState : SequentialTransformerBase.StateBase, new() { + public SequentialTransformerBase() { } /// /// The base class for encapsulating the State object for sequential processing. This class implements a windowed buffer. @@ -165,8 +166,45 @@ public void UpdateStateCore(ref TInput input, bool buffer = true) } } + public void Process(ref TInput input, ref TOutput output1, ref TOutput output2, ref TOutput output3) + { + //Using prediction engine will not evaluate the below condition to true. + if (PreviousPosition == -1) + UpdateStateCore(ref input); + + if (InitialWindowedBuffer.Count < InitialWindowSize) + { + SetNaOutput(ref output1); + + if (InitialWindowedBuffer.Count == InitialWindowSize) + LearnStateFromDataCore(InitialWindowedBuffer); + } + else + { + TransformCore(ref input, WindowedBuffer, RowCounter - InitialWindowSize, ref output1, ref output2, ref output3); + } + } + + public void ProcessWithoutBuffer(ref TInput input, ref TOutput output1, ref TOutput output2, ref TOutput output3) + { + //Using prediction engine will not evaluate the below condition to true. + if (PreviousPosition == -1) + UpdateStateCore(ref input); + + if (InitialWindowedBuffer.Count < InitialWindowSize) + { + SetNaOutput(ref output1); + + if (InitialWindowedBuffer.Count == InitialWindowSize) + LearnStateFromDataCore(InitialWindowedBuffer); + } + else + TransformCore(ref input, WindowedBuffer, RowCounter - InitialWindowSize, ref output1, ref output2, ref output3); + } + public void Process(ref TInput input, ref TOutput output) { + //Using prediction engine will not evaluate the below condition to true. if (PreviousPosition == -1) UpdateStateCore(ref input); @@ -185,8 +223,9 @@ public void Process(ref TInput input, ref TOutput output) public void ProcessWithoutBuffer(ref TInput input, ref TOutput output) { + //Using prediction engine will not evaluate the below condition to true. if (PreviousPosition == -1) - UpdateStateCore(ref input, false); + UpdateStateCore(ref input); if (InitialWindowedBuffer.Count < InitialWindowSize) { @@ -212,7 +251,37 @@ private protected virtual void SetNaOutput(ref TOutput dst) { } /// A reference to the dst object. /// A reference to the windowed buffer. /// A long number that indicates the number of times TransformCore has been called so far (starting value = 0). - private protected virtual void TransformCore(ref TInput input, FixedSizeQueue windowedBuffer, long iteration, ref TOutput dst) + public virtual void TransformCore(ref TInput input, FixedSizeQueue windowedBuffer, long iteration, ref TOutput dst) + { + + } + + public virtual void Forecast(ref TOutput dst) + { + + } + + public virtual void ConfidenceIntervalLowerBound(ref TOutput dst) + { + + } + + public virtual void ConfidenceIntervalUpperBound(ref TOutput dst) + { + + } + + /// + /// The abstract method that realizes the main logic for the transform. + /// + /// A reference to the input object. + /// A reference to the dst object. + /// + /// + /// A reference to the windowed buffer. + /// A long number that indicates the number of times TransformCore has been called so far (starting value = 0). + private protected virtual void TransformCore(ref TInput input, FixedSizeQueue windowedBuffer, long iteration, + ref TOutput dst1, ref TOutput dst2, ref TOutput dst3) { } @@ -266,6 +335,8 @@ private protected virtual void CloneCore(TState state) internal readonly string InputColumnName; internal readonly string OutputColumnName; + internal readonly string ForecastingConfidenceIntervalMinOutputColumnName; + internal readonly string ForecastingConfidenceIntervalMaxOutputColumnName; private protected DataViewType OutputColumnType; bool ITransformer.IsRowToRowMapper => false; @@ -282,7 +353,8 @@ private protected virtual void CloneCore(TState state) /// The name of the dst column. /// The name of the input column. /// - private protected SequentialTransformerBase(IHost host, int windowSize, int initialWindowSize, string outputColumnName, string inputColumnName, DataViewType outputColType) + private protected SequentialTransformerBase(IHost host, int windowSize, int initialWindowSize, + string outputColumnName, string inputColumnName, DataViewType outputColType) { Host = host; Host.CheckParam(initialWindowSize >= 0, nameof(initialWindowSize), "Must be non-negative."); @@ -299,6 +371,15 @@ private protected SequentialTransformerBase(IHost host, int windowSize, int init WindowSize = windowSize; } + private protected SequentialTransformerBase(IHost host, int windowSize, int initialWindowSize, + string outputColumnName, string forecastingConfidenceIntervalMinOutputColumnName, + string forecastingConfidenceIntervalMaxOutputColumnName, string inputColumnName, DataViewType outputColType) : + this(host, windowSize, initialWindowSize, outputColumnName, inputColumnName, outputColType) + { + ForecastingConfidenceIntervalMinOutputColumnName = forecastingConfidenceIntervalMinOutputColumnName; + ForecastingConfidenceIntervalMaxOutputColumnName = forecastingConfidenceIntervalMaxOutputColumnName; + } + private protected SequentialTransformerBase(IHost host, ModelLoadContext ctx) { Host = host; @@ -322,6 +403,8 @@ private protected SequentialTransformerBase(IHost host, ModelLoadContext ctx) InputColumnName = inputColumnName; OutputColumnName = outputColumnName; + ForecastingConfidenceIntervalMinOutputColumnName = ctx.Reader.ReadString(); + ForecastingConfidenceIntervalMaxOutputColumnName = ctx.Reader.ReadString(); InitialWindowSize = initialWindowSize; WindowSize = windowSize; @@ -348,7 +431,8 @@ private protected virtual void SaveModel(ModelSaveContext ctx) ctx.Writer.Write(InitialWindowSize); ctx.SaveNonEmptyString(InputColumnName); ctx.SaveNonEmptyString(OutputColumnName); - + ctx.Writer.Write(ForecastingConfidenceIntervalMinOutputColumnName ?? string.Empty); + ctx.Writer.Write(ForecastingConfidenceIntervalMaxOutputColumnName ?? string.Empty); var bs = new BinarySaver(Host, new BinarySaver.Arguments()); bs.TryWriteTypeDescription(ctx.Writer.BaseStream, OutputColumnType, out int byteWritten); } @@ -393,10 +477,12 @@ public SequentialDataTransform(IHost host, SequentialTransformerBase 0, _parent.OutputColumnType); + _parent.OutputColumnName, _parent.ForecastingConfidenceIntervalMinOutputColumnName, + _parent.ForecastingConfidenceIntervalMaxOutputColumnName, InitFunction, _parent.WindowSize > 0, _parent.OutputColumnType); + _mapper = mapper; _bindings = new ColumnBindings(input.Schema, _mapper.GetOutputColumns()); } @@ -404,24 +490,49 @@ public SequentialDataTransform(IHost host, SequentialTransformerBase _mapper.CloneState(); private static IDataTransform CreateLambdaTransform(IHost host, IDataView input, string inputColumnName, - string outputColumnName, Action initFunction, bool hasBuffer, DataViewType outputColTypeOverride) + string outputColumnName, string forecastingConfidenceIntervalMinOutputColumnName, + string forecastingConfidenceIntervalMaxOutputColumnName, Action initFunction, bool hasBuffer, DataViewType outputColTypeOverride) { var inputSchema = SchemaDefinition.Create(typeof(DataBox)); inputSchema[0].ColumnName = inputColumnName; - var outputSchema = SchemaDefinition.Create(typeof(DataBox)); - outputSchema[0].ColumnName = outputColumnName; + SchemaDefinition outputSchema; + + if (!string.IsNullOrEmpty(forecastingConfidenceIntervalMinOutputColumnName)) + { + outputSchema = SchemaDefinition.Create(typeof(DataBoxForecastingWithConfidenceIntervals)); + outputSchema[0].ColumnName = outputColumnName; + + if (outputColTypeOverride != null) + outputSchema[0].ColumnType = outputSchema[1].ColumnType = outputSchema[2].ColumnType = outputColTypeOverride; - if (outputColTypeOverride != null) - outputSchema[0].ColumnType = outputColTypeOverride; + outputSchema[1].ColumnName = forecastingConfidenceIntervalMinOutputColumnName; + outputSchema[2].ColumnName = forecastingConfidenceIntervalMaxOutputColumnName; - Action, DataBox, TState> lambda; - if (hasBuffer) - lambda = MapFunction; + Action, DataBoxForecastingWithConfidenceIntervals, TState> lambda; + if (hasBuffer) + lambda = MapFunction; + else + lambda = MapFunctionWithoutBuffer; + + return LambdaTransform.CreateMap(host, input, lambda, initFunction, inputSchema, outputSchema); + } else - lambda = MapFunctionWithoutBuffer; + { + outputSchema = SchemaDefinition.Create(typeof(DataBox)); + outputSchema[0].ColumnName = outputColumnName; + + if (outputColTypeOverride != null) + outputSchema[0].ColumnType = outputColTypeOverride; - return LambdaTransform.CreateMap(host, input, lambda, initFunction, inputSchema, outputSchema); + Action, DataBox, TState> lambda; + if (hasBuffer) + lambda = MapFunction; + else + lambda = MapFunctionWithoutBuffer; + + return LambdaTransform.CreateMap(host, input, lambda, initFunction, inputSchema, outputSchema); + } } private static void MapFunction(DataBox input, DataBox output, TState state) @@ -429,11 +540,21 @@ private static void MapFunction(DataBox input, DataBox output, state.Process(ref input.Value, ref output.Value); } + private static void MapFunction(DataBox input, DataBoxForecastingWithConfidenceIntervals output, TState state) + { + state.Process(ref input.Value, ref output.Forecast, ref output.ConfidenceIntervalLowerBound, ref output.ConfidenceIntervalUpperBound); + } + private static void MapFunctionWithoutBuffer(DataBox input, DataBox output, TState state) { state.ProcessWithoutBuffer(ref input.Value, ref output.Value); } + private static void MapFunctionWithoutBuffer(DataBox input, DataBoxForecastingWithConfidenceIntervals output, TState state) + { + state.ProcessWithoutBuffer(ref input.Value, ref output.Forecast, ref output.ConfidenceIntervalLowerBound, ref output.ConfidenceIntervalUpperBound); + } + private void InitFunction(TState state) { state.InitState(_parent.WindowSize, _parent.InitialWindowSize, _parent, _parent.Host); @@ -500,7 +621,7 @@ private sealed class RowImpl : StatefulRow private readonly DataViewSchema _schema; private readonly DataViewRow _input; private readonly Delegate[] _getters; - private readonly Action _pinger; + private readonly Action _pinger; private readonly Action _disposer; private bool _disposed; private readonly ColumnBindings _bindings; @@ -511,7 +632,7 @@ private sealed class RowImpl : StatefulRow public override long Batch => _input.Batch; - public RowImpl(ColumnBindings bindings, DataViewRow input, Delegate[] getters, Action pinger, Action disposer) + public RowImpl(ColumnBindings bindings, DataViewRow input, Delegate[] getters, Action pinger, Action disposer) { Contracts.CheckValue(bindings, nameof(bindings)); Contracts.CheckValue(input, nameof(input)); @@ -550,8 +671,8 @@ public override ValueGetter GetGetter(DataViewSchema.Column column) return fn; } - public override Action GetPinger() => - _pinger as Action ?? throw Contracts.Except("Invalid TValue in GetPinger: '{0}'", typeof(long)); + public override Action GetPinger() => + _pinger as Action ?? throw Contracts.Except("Invalid TValue in GetPinger: '{0}'", typeof(PingerArgument)); /// /// Returns whether the given column is active in this row. @@ -824,7 +945,7 @@ private sealed class StatefulRowImpl : StatefulRow { private readonly DataViewRow _input; private readonly Delegate[] _getters; - private readonly Action _pinger; + private readonly Action _pinger; private readonly Action _disposer; private readonly TimeSeriesRowToRowMapperTransform _parent; @@ -836,7 +957,7 @@ private sealed class StatefulRowImpl : StatefulRow public override DataViewSchema Schema { get; } public StatefulRowImpl(DataViewRow input, TimeSeriesRowToRowMapperTransform parent, - DataViewSchema schema, Delegate[] getters, Action pinger, Action disposer) + DataViewSchema schema, Delegate[] getters, Action pinger, Action disposer) { _input = input; _parent = parent; @@ -873,8 +994,8 @@ public override ValueGetter GetGetter(DataViewSchema.Column colu return fn; } - public override Action GetPinger() => - _pinger as Action ?? throw Contracts.Except("Invalid TValue in GetPinger: '{0}'", typeof(long)); + public override Action GetPinger() => + _pinger as Action ?? throw Contracts.Except("Invalid TValue in GetPinger: '{0}'", typeof(PingerArgument)); public override ValueGetter GetIdGetter() => _input.GetIdGetter(); diff --git a/src/Microsoft.ML.TimeSeries/SrCnnTransformBase.cs b/src/Microsoft.ML.TimeSeries/SrCnnTransformBase.cs index 9f64fa2167..c76973bef2 100644 --- a/src/Microsoft.ML.TimeSeries/SrCnnTransformBase.cs +++ b/src/Microsoft.ML.TimeSeries/SrCnnTransformBase.cs @@ -220,25 +220,28 @@ private Delegate MakeGetter(DataViewRow input, SrCnnStateBase state) return valueGetter; } - public Action CreatePinger(DataViewRow input, Func activeOutput, out Action disposer) + public Action CreatePinger(DataViewRow input, Func activeOutput, out Action disposer) { disposer = null; - Action pinger = null; + Action pinger = null; if (activeOutput(0)) pinger = MakePinger(input, State); return pinger; } - private Action MakePinger(DataViewRow input, SrCnnStateBase state) + private Action MakePinger(DataViewRow input, SrCnnStateBase state) { _host.AssertValue(input); var srcGetter = input.GetGetter(input.Schema[_inputColumnIndex]); - Action pinger = (long rowPosition) => + Action pinger = (PingerArgument args) => { + if (args.DontConsumeSource) + return; + TInput src = default; srcGetter(ref src); - state.UpdateState(ref src, rowPosition, _parent.WindowSize > 0); + state.UpdateState(ref src, args.RowPosition, _parent.WindowSize > 0); }; return pinger; } @@ -289,7 +292,7 @@ private protected override void SetNaOutput(ref VBuffer dst) dst = editor.Commit(); } - private protected sealed override void TransformCore(ref TInput input, FixedSizeQueue windowedBuffer, long iteration, ref VBuffer dst) + public sealed override void TransformCore(ref TInput input, FixedSizeQueue windowedBuffer, long iteration, ref VBuffer dst) { var outputLength = Parent.OutputLength; diff --git a/src/Microsoft.ML.TimeSeries/SsaAnomalyDetectionBase.cs b/src/Microsoft.ML.TimeSeries/SsaAnomalyDetectionBase.cs index 0c823f2de1..f93efb6aa7 100644 --- a/src/Microsoft.ML.TimeSeries/SsaAnomalyDetectionBase.cs +++ b/src/Microsoft.ML.TimeSeries/SsaAnomalyDetectionBase.cs @@ -201,8 +201,8 @@ public SsaAnomalyDetectionBase(SsaOptions options, string name, IHostEnvironment ErrorFunc = ErrorFunctionUtils.GetErrorFunction(ErrorFunction); IsAdaptive = options.IsAdaptive; // Creating the master SSA model - Model = new AdaptiveSingularSpectrumSequenceModeler.AdaptiveSingularSpectrumSequenceModelerInternal(Host, options.InitialWindowSize, SeasonalWindowSize + 1, SeasonalWindowSize, - DiscountFactor, AdaptiveSingularSpectrumSequenceModeler.RankSelectionMethod.Exact, null, SeasonalWindowSize / 2, false, false); + Model = new AdaptiveSingularSpectrumSequenceModelerInternal(Host, options.InitialWindowSize, SeasonalWindowSize + 1, SeasonalWindowSize, + DiscountFactor, RankSelectionMethod.Exact, null, SeasonalWindowSize / 2, false, false); StateRef = new State(); StateRef.InitState(WindowSize, InitialWindowSize, this, Host); diff --git a/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs b/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs new file mode 100644 index 0000000000..86a48781c7 --- /dev/null +++ b/src/Microsoft.ML.TimeSeries/SsaForecastingBase.cs @@ -0,0 +1,314 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.IO; +using Microsoft.ML.CommandLine; +using Microsoft.ML.Data; +using Microsoft.ML.Internal.Utilities; +using Microsoft.ML.Runtime; + +namespace Microsoft.ML.Transforms.TimeSeries +{ + /// + /// The wrapper to that implements the general anomaly detection transform based on Singular Spectrum modeling of the time-series. + /// For the details of the Singular Spectrum Analysis (SSA), refer to http://arxiv.org/pdf/1206.6910.pdf. + /// + public class SsaForecastingBaseWrapper : IStatefulTransformer, ICanSaveModel + { + /// + /// Whether a call to should succeed, on an + /// appropriate schema. + /// + bool ITransformer.IsRowToRowMapper => ((ITransformer)InternalTransform).IsRowToRowMapper; + + /// + /// Creates a clone of the transformer. Used for taking the snapshot of the state. + /// + /// + IStatefulTransformer IStatefulTransformer.Clone() => InternalTransform.Clone(); + + /// + /// Schema propagation for transformers. + /// Returns the output schema of the data, if the input schema is like the one provided. + /// + public DataViewSchema GetOutputSchema(DataViewSchema inputSchema) => InternalTransform.GetOutputSchema(inputSchema); + + /// + /// Constructs a row-to-row mapper based on an input schema. If + /// is false, then an exception should be thrown. If the input schema is in any way + /// unsuitable for constructing the mapper, an exception should likewise be thrown. + /// + /// The input schema for which we should get the mapper. + /// The row to row mapper. + IRowToRowMapper ITransformer.GetRowToRowMapper(DataViewSchema inputSchema) + => ((ITransformer)InternalTransform).GetRowToRowMapper(inputSchema); + + /// + /// Same as but also supports mechanism to save the state. + /// + /// The input schema for which we should get the mapper. + /// The row to row mapper. + public IRowToRowMapper GetStatefulRowToRowMapper(DataViewSchema inputSchema) + => ((IStatefulTransformer)InternalTransform).GetStatefulRowToRowMapper(inputSchema); + + /// + /// Take the data in, make transformations, output the data. + /// Note that 's are lazy, so no actual transformations happen here, just schema validation. + /// + public IDataView Transform(IDataView input) => InternalTransform.Transform(input); + + /// + /// For saving a model into a repository. + /// + void ICanSaveModel.Save(ModelSaveContext ctx) => SaveModel(ctx); + + private protected virtual void SaveModel(ModelSaveContext ctx) => InternalTransform.SaveThis(ctx); + + /// + /// Creates a row mapper from Schema. + /// + internal IStatefulRowMapper MakeRowMapper(DataViewSchema schema) => InternalTransform.MakeRowMapper(schema); + + /// + /// Creates an IDataTransform from an IDataView. + /// + internal IDataTransform MakeDataTransform(IDataView input) => InternalTransform.MakeDataTransform(input); + + /// + /// Options for SSA Anomaly Detection. + /// + internal abstract class SsaForecastingOptions : ForecastingArgumentsBase + { + [Argument(ArgumentType.AtMostOnce, HelpText = "The discount factor in [0, 1]", ShortName = "disc", SortOrder = 12)] + public Single DiscountFactor = 1; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The function used to compute the error between the expected and the observed value", ShortName = "err", SortOrder = 13)] + public ErrorFunction ErrorFunction = ErrorFunction.SignedDifference; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The flag determing whether the model is adaptive", ShortName = "adp", SortOrder = 14)] + public bool IsAdaptive = false; + public int WindowSize; + public RankSelectionMethod RankSelectionMethod; + public int? Rank; + public int? MaxRank; + public bool ShouldStablize; + public bool ShouldMaintainInfo; + public GrowthRatio? MaxGrowth; + public int Horizon; + public float ConfidenceLevel; + public bool VariableHorizon; + } + + internal SsaForecastingBase InternalTransform; + + internal SsaForecastingBaseWrapper(SsaForecastingOptions options, string name, IHostEnvironment env) + { + InternalTransform = new SsaForecastingBase(options, name, env, this); + } + //string forecastingConfidenceIntervalMinOutputColumnName, string forecastingConfidenceIntervalMaxOutputColumnName, int horizon, bool computeConfidenceIntervals + internal SsaForecastingBaseWrapper(IHostEnvironment env, ModelLoadContext ctx, string name) + { + InternalTransform = new SsaForecastingBase(env, ctx, name); + } + + /// + /// This base class that implements the general anomaly detection transform based on Singular Spectrum modeling of the time-series. + /// For the details of the Singular Spectrum Analysis (SSA), refer to http://arxiv.org/pdf/1206.6910.pdf. + /// + internal sealed class SsaForecastingBase : SequentialForecastingTransformBase + { + internal SsaForecastingBaseWrapper Parent; + internal readonly bool IsAdaptive; + internal readonly int Horizon; + internal readonly float ConfidenceLevel; + internal SequenceModelerBase Model; + + public SsaForecastingBase(SsaForecastingOptions options, string name, IHostEnvironment env, SsaForecastingBaseWrapper parent) + : base(options.TrainSize, 0, options.Source, options.Name, options.ForcastingConfidentLowerBoundColumnName, + options.ForcastingConfidentUpperBoundColumnName, name, options.VariableHorizon ? 0: options.Horizon, env) + { + Host.CheckUserArg(0 <= options.DiscountFactor && options.DiscountFactor <= 1, nameof(options.DiscountFactor), "Must be in the range [0, 1]."); + IsAdaptive = options.IsAdaptive; + Horizon = options.Horizon; + ConfidenceLevel = options.ConfidenceLevel; + // Creating the master SSA model + Model = new AdaptiveSingularSpectrumSequenceModelerInternal(Host, options.TrainSize, options.SeriesLength, options.WindowSize, + options.DiscountFactor, options.RankSelectionMethod, options.Rank, options.MaxRank, !string.IsNullOrEmpty(options.ForcastingConfidentLowerBoundColumnName), + options.ShouldStablize, options.ShouldMaintainInfo, options.MaxGrowth); + + StateRef = new State(); + StateRef.InitState(WindowSize, InitialWindowSize, this, Host); + Parent = parent; + } + + public SsaForecastingBase(IHostEnvironment env, ModelLoadContext ctx, string name) : base(env, ctx, name) + { + // *** Binary format *** + // + // bool: _isAdaptive + // int32: Horizon + // bool: ComputeConfidenceIntervals + // State: StateRef + // AdaptiveSingularSpectrumSequenceModeler: _model + + Host.CheckDecode(InitialWindowSize == 0); + + IsAdaptive = ctx.Reader.ReadBoolean(); + Horizon = ctx.Reader.ReadInt32(); + ConfidenceLevel = ctx.Reader.ReadFloat(); + StateRef = new State(ctx.Reader); + + ctx.LoadModel, SignatureLoadModel>(env, out Model, "SSA"); + Host.CheckDecode(Model != null); + StateRef.InitState(this, Host); + } + + public override DataViewSchema GetOutputSchema(DataViewSchema inputSchema) + { + Host.CheckValue(inputSchema, nameof(inputSchema)); + + if (!inputSchema.TryGetColumnIndex(InputColumnName, out var col)) + throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", InputColumnName); + + var colType = inputSchema[col].Type; + if (colType != NumberDataViewType.Single) + throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", InputColumnName, "Single", colType.ToString()); + + return Transform(new EmptyDataView(Host, inputSchema)).Schema; + } + + private protected override void SaveModel(ModelSaveContext ctx) + { + ((ICanSaveModel)Parent).Save(ctx); + } + + internal void SaveThis(ModelSaveContext ctx) + { + Host.CheckValue(ctx, nameof(ctx)); + ctx.CheckAtModel(); + + Host.Assert(InitialWindowSize == 0); + Host.Assert(Model != null); + + // *** Binary format *** + // + // bool: _isAdaptive + // int32: Horizon + // State: StateRef + // AdaptiveSingularSpectrumSequenceModeler: _model + + base.SaveModel(ctx); + ctx.Writer.Write(IsAdaptive); + ctx.Writer.Write(Horizon); + ctx.Writer.Write(ConfidenceLevel); + StateRef.Save(ctx.Writer); + + ctx.SaveModel(Model, "SSA"); + } + + internal sealed class State : ForecastingStateBase + { + private SequenceModelerBase _model; + private SsaForecastingBase _parentForecaster; + + public State() + { + } + + internal State(BinaryReader reader) : base(reader) + { + WindowedBuffer = TimeSeriesUtils.DeserializeFixedSizeQueueSingle(reader, Host); + InitialWindowedBuffer = TimeSeriesUtils.DeserializeFixedSizeQueueSingle(reader, Host); + } + + internal override void Save(BinaryWriter writer) + { + base.Save(writer); + TimeSeriesUtils.SerializeFixedSizeQueue(WindowedBuffer, writer); + TimeSeriesUtils.SerializeFixedSizeQueue(InitialWindowedBuffer, writer); + } + + private protected override void CloneCore(State state) + { + base.CloneCore(state); + Contracts.Assert(state is State); + var stateLocal = state as State; + stateLocal.WindowedBuffer = WindowedBuffer.Clone(); + stateLocal.InitialWindowedBuffer = InitialWindowedBuffer.Clone(); + if (_model != null) + { + _parentForecaster.Model = _parentForecaster.Model.Clone(); + _model = _parentForecaster.Model; + } + } + + private protected override void LearnStateFromDataCore(FixedSizeQueue data) + { + // This method is empty because there is no need to implement a training logic here. + } + + private protected override void InitializeForecaster() + { + _parentForecaster = (SsaForecastingBase)Parent; + _model = _parentForecaster.Model; + } + + public override void TransformCore(ref float input, FixedSizeQueue windowedBuffer, long iteration, ref VBuffer dst) + { + // Forecasting is being done without prediction engine. Update the model + // with the observation. + if (PreviousPosition == -1) + Consume(input); + + dst = new VBuffer(LocalHorizon ?? _parentForecaster.Horizon, + ((AdaptiveSingularSpectrumSequenceModelerInternal)_model).Forecast(_parentForecaster.Horizon)); + } + + private protected override void TransformCore(ref float input, FixedSizeQueue windowedBuffer, long iteration, + ref VBuffer dst1, ref VBuffer dst2, ref VBuffer dst3) + { + // Forecasting is being done without prediction engine. Update the model + // with the observation. + if (PreviousPosition == -1) + Consume(input); + + ((AdaptiveSingularSpectrumSequenceModelerInternal)_model).ForecastWithConfidenceIntervals(LocalHorizon ?? _parentForecaster.Horizon, + out float[] forecast, out float[] min, out float[] max, LocalConfidenceLevel ?? _parentForecaster.ConfidenceLevel); + + dst1 = new VBuffer(_parentForecaster.Horizon, forecast); + dst2 = new VBuffer(_parentForecaster.Horizon, min); + dst3 = new VBuffer(_parentForecaster.Horizon, max); + } + + public override void Forecast( ref VBuffer dst) + { + int horizon = LocalHorizon ?? _parentForecaster.Horizon; + dst = new VBuffer(horizon, ((AdaptiveSingularSpectrumSequenceModelerInternal)_model).Forecast(horizon)); + } + + public override void ConfidenceIntervalLowerBound(ref VBuffer dst) + { + int horizon = LocalHorizon ?? _parentForecaster.Horizon; + ((AdaptiveSingularSpectrumSequenceModelerInternal)_model).ForecastWithConfidenceIntervals(horizon, + out float[] forecast, out float[] min, out float[] max, LocalConfidenceLevel ?? _parentForecaster.ConfidenceLevel); + + dst = new VBuffer(horizon, min); + } + + public override void ConfidenceIntervalUpperBound(ref VBuffer dst) + { + int horizon = LocalHorizon ?? _parentForecaster.Horizon; + ((AdaptiveSingularSpectrumSequenceModelerInternal)_model).ForecastWithConfidenceIntervals(horizon, + out float[] forecast, out float[] min, out float[] max, LocalConfidenceLevel ?? _parentForecaster.ConfidenceLevel); + + dst = new VBuffer(horizon, max); + } + + public override void Consume(Single input) => _model.Consume(ref input, _parentForecaster.IsAdaptive); + } + } + } +} diff --git a/src/Microsoft.ML.TimeSeries/TimeSeriesProcessing.cs b/src/Microsoft.ML.TimeSeries/TimeSeriesProcessing.cs index a7dd59cfc9..9420aad1f3 100644 --- a/src/Microsoft.ML.TimeSeries/TimeSeriesProcessing.cs +++ b/src/Microsoft.ML.TimeSeries/TimeSeriesProcessing.cs @@ -124,5 +124,19 @@ public static CommonOutputs.TransformOutput SsaSpikeDetector(IHostEnvironment en OutputData = view }; } + + [TlcModule.EntryPoint(Desc = TimeSeries.SsaForecastingTransformer.Summary, + UserName = TimeSeries.SsaForecastingTransformer.UserName, + ShortName = TimeSeries.SsaForecastingTransformer.ShortName)] + internal static CommonOutputs.TransformOutput SsaForecasting(IHostEnvironment env, SsaForecastingTransformer.Options options) + { + var h = EntryPointUtils.CheckArgsAndCreateHost(env, "SsaForecasting", options); + var view = new SsaForecastingEstimator(h, options).Fit(options.Data).Transform(options.Data); + return new CommonOutputs.TransformOutput() + { + Model = new TransformModelImpl(h, view, options.Data), + OutputData = view + }; + } } } diff --git a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv index f18a9e9e2b..6288a254ae 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv +++ b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv @@ -38,6 +38,7 @@ TimeSeriesProcessingEntryPoints.PercentileThresholdTransform Detects the values TimeSeriesProcessingEntryPoints.PValueTransform This P-Value transform calculates the p-value of the current input in the sequence with regard to the values in the sliding window. Microsoft.ML.Transforms.TimeSeries.TimeSeriesProcessingEntryPoints PValueTransform Microsoft.ML.Transforms.TimeSeries.PValueTransform+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput TimeSeriesProcessingEntryPoints.SlidingWindowTransform Returns the last values for a time series [y(t-d-l+1), y(t-d-l+2), ..., y(t-l-1), y(t-l)] where d is the size of the window, l the lag and y is a Float. Microsoft.ML.Transforms.TimeSeries.TimeSeriesProcessingEntryPoints SlidingWindowTransform Microsoft.ML.Transforms.TimeSeries.SlidingWindowTransformBase`1+Arguments[System.Single] Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput TimeSeriesProcessingEntryPoints.SsaChangePointDetector This transform detects the change-points in a seasonal time-series using Singular Spectrum Analysis (SSA). Microsoft.ML.Transforms.TimeSeries.TimeSeriesProcessingEntryPoints SsaChangePointDetector Microsoft.ML.Transforms.TimeSeries.SsaChangePointDetector+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput +TimeSeriesProcessingEntryPoints.SsaForecasting This transform forecasts using Singular Spectrum Analysis (SSA). Microsoft.ML.Transforms.TimeSeries.TimeSeriesProcessingEntryPoints SsaForecasting Microsoft.ML.Transforms.TimeSeries.SsaForecastingTransformer+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput TimeSeriesProcessingEntryPoints.SsaSpikeDetector This transform detects the spikes in a seasonal time-series using Singular Spectrum Analysis (SSA). Microsoft.ML.Transforms.TimeSeries.TimeSeriesProcessingEntryPoints SsaSpikeDetector Microsoft.ML.Transforms.TimeSeries.SsaSpikeDetector+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Trainers.AveragedPerceptronBinaryClassifier Averaged Perceptron Binary Classifier. Microsoft.ML.Trainers.AveragedPerceptronTrainer TrainBinary Microsoft.ML.Trainers.AveragedPerceptronTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput Trainers.EnsembleBinaryClassifier Train binary ensemble. Microsoft.ML.Trainers.Ensemble.Ensemble CreateBinaryEnsemble Microsoft.ML.Trainers.Ensemble.EnsembleTrainer+Arguments Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index 99b0d3a8ab..57ced3cd61 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -4007,6 +4007,244 @@ "ITransformOutput" ] }, + { + "Name": "TimeSeriesProcessingEntryPoints.SsaForecasting", + "Desc": "This transform forecasts using Singular Spectrum Analysis (SSA).", + "FriendlyName": "SSA Forecasting", + "ShortName": "ssafcst", + "Inputs": [ + { + "Name": "Source", + "Type": "String", + "Desc": "The name of the source column.", + "Aliases": [ + "src" + ], + "Required": true, + "SortOrder": 1.0, + "IsNullable": false + }, + { + "Name": "Data", + "Type": "DataView", + "Desc": "Input dataset", + "Required": true, + "SortOrder": 1.0, + "IsNullable": false + }, + { + "Name": "Name", + "Type": "String", + "Desc": "The name of the new column.", + "Required": true, + "SortOrder": 2.0, + "IsNullable": false + }, + { + "Name": "WindowSize", + "Type": "Int", + "Desc": "The length of the window on the series for building the trajectory matrix (parameter L).", + "Required": true, + "SortOrder": 2.0, + "IsNullable": false, + "Default": 0 + }, + { + "Name": "SeriesLength", + "Type": "Int", + "Desc": "The length of series that is kept in buffer for modeling (parameter N).", + "Required": true, + "SortOrder": 2.0, + "IsNullable": false, + "Default": 0 + }, + { + "Name": "TrainSize", + "Type": "Int", + "Desc": "The length of series from the begining used for training.", + "Required": true, + "SortOrder": 2.0, + "IsNullable": false, + "Default": 0 + }, + { + "Name": "Horizon", + "Type": "Int", + "Desc": "The number of values to forecast.", + "Required": true, + "SortOrder": 2.0, + "IsNullable": false, + "Default": 0 + }, + { + "Name": "ConfidenceLevel", + "Type": "Float", + "Desc": "The confidence level in [0, 1) for forecasting.", + "Required": false, + "SortOrder": 2.0, + "IsNullable": false, + "Default": 0.95 + }, + { + "Name": "VariableHorizon", + "Type": "Bool", + "Desc": "Set this to true horizon will change at prediction time.", + "Required": false, + "SortOrder": 2.0, + "IsNullable": false, + "Default": false + }, + { + "Name": "ForcastingConfidentLowerBoundColumnName", + "Type": "String", + "Desc": "The name of the confidence interval lower bound column.", + "Aliases": [ + "cnfminname" + ], + "Required": false, + "SortOrder": 3.0, + "IsNullable": false, + "Default": null + }, + { + "Name": "ForcastingConfidentUpperBoundColumnName", + "Type": "String", + "Desc": "The name of the confidence interval upper bound column.", + "Aliases": [ + "cnfmaxnname" + ], + "Required": false, + "SortOrder": 3.0, + "IsNullable": false, + "Default": null + }, + { + "Name": "RankSelectionMethod", + "Type": { + "Kind": "Enum", + "Values": [ + "Fixed", + "Exact", + "Fast" + ] + }, + "Desc": "The rank selection method.", + "Required": false, + "SortOrder": 3.0, + "IsNullable": false, + "Default": "Exact" + }, + { + "Name": "Rank", + "Type": "Int", + "Desc": "The desired rank of the subspace used for SSA projection (parameter r). This parameter should be in the range in [1, windowSize]. If set to null, the rank is automatically determined based on prediction error minimization.", + "Required": false, + "SortOrder": 3.0, + "IsNullable": true, + "Default": null + }, + { + "Name": "MaxRank", + "Type": "Int", + "Desc": "The maximum rank considered during the rank selection process. If not provided (i.e. set to null), it is set to windowSize - 1.", + "Required": false, + "SortOrder": 3.0, + "IsNullable": true, + "Default": null + }, + { + "Name": "ShouldStablize", + "Type": "Bool", + "Desc": "The flag determining whether the model should be stabilized.", + "Required": false, + "SortOrder": 3.0, + "IsNullable": false, + "Default": true + }, + { + "Name": "ShouldMaintainInfo", + "Type": "Bool", + "Desc": "The flag determining whether the meta information for the model needs to be maintained.", + "Required": false, + "SortOrder": 3.0, + "IsNullable": false, + "Default": false + }, + { + "Name": "MaxGrowth", + "Type": { + "Kind": "Struct", + "Fields": [ + { + "Name": "TimeSpan", + "Type": "Int", + "Desc": "Time span of growth ratio. Must be strictly positive.", + "Required": false, + "SortOrder": 1.0, + "IsNullable": false, + "Default": 0 + }, + { + "Name": "Growth", + "Type": "Float", + "Desc": "Growth. Must be non-negative.", + "Required": false, + "SortOrder": 2.0, + "IsNullable": false, + "Default": 0.0 + } + ] + }, + "Desc": "The maximum growth on the exponential trend.", + "Required": false, + "SortOrder": 3.0, + "IsNullable": true, + "Default": null + }, + { + "Name": "DiscountFactor", + "Type": "Float", + "Desc": "The discount factor in [0,1] used for online updates.", + "Aliases": [ + "disc" + ], + "Required": false, + "SortOrder": 5.0, + "IsNullable": false, + "Default": 1.0 + }, + { + "Name": "IsAdaptive", + "Type": "Bool", + "Desc": "The flag determing whether the model is adaptive", + "Aliases": [ + "adp" + ], + "Required": false, + "SortOrder": 6.0, + "IsNullable": false, + "Default": false + } + ], + "Outputs": [ + { + "Name": "OutputData", + "Type": "DataView", + "Desc": "Transformed dataset" + }, + { + "Name": "Model", + "Type": "TransformModel", + "Desc": "Transform model" + } + ], + "InputKind": [ + "ITransformInput" + ], + "OutputKind": [ + "ITransformOutput" + ] + }, { "Name": "TimeSeriesProcessingEntryPoints.SsaSpikeDetector", "Desc": "This transform detects the spikes in a seasonal time-series using Singular Spectrum Analysis (SSA).", diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index dd25deb016..d8f4387d75 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -2,12 +2,10 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; using System.Collections.Generic; using System.IO; using Microsoft.ML.Data; using Microsoft.ML.TestFramework.Attributes; -using Microsoft.ML.TimeSeries; using Microsoft.ML.Transforms.TimeSeries; using Xunit; @@ -24,6 +22,18 @@ private sealed class Prediction #pragma warning restore CS0649 } + private sealed class ForecastPrediction + { +#pragma warning disable CS0649 + [VectorType(4)] + public float[] Forecast; + [VectorType(4)] + public float[] MinCnf; + [VectorType(4)] + public float[] MaxCnf; +#pragma warning restore CS0649 + } + public class Prediction1 { public float Random; @@ -43,6 +53,22 @@ public Data(float value) } } + class ForecastResult + { +#pragma warning disable CS0649 + public float Forecast; +#pragma warning restore CS0649 + } + + class ForecastResultArray + { +#pragma warning disable CS0649 + public float[] Forecast; + public float[] ConfidenceLowerBound; + public float[] ConfidenceUpperBound; +#pragma warning restore CS0649 + } + private sealed class TimeSeriesData { public float Value; @@ -187,7 +213,7 @@ public void ChangePointDetectionWithSeasonalityPredictionEngineNoColumn() var model = pipeline.Fit(dataView); //Create prediction function. - var engine = model.CreateTimeSeriesPredictionFunction(ml); + var engine = model.CreateTimeSeriesEngine(ml); //Checkpoint with no inputs passed at prediction. var modelPath = "temp.zip"; @@ -200,7 +226,7 @@ public void ChangePointDetectionWithSeasonalityPredictionEngineNoColumn() model2 = ml.Model.Load(file, out var schema); //Raw score after state gets updated with two inputs. - var engine2 = model2.CreateTimeSeriesPredictionFunction(ml); + var engine2 = model2.CreateTimeSeriesEngine(ml); var prediction2 = engine2.Predict(new Data(1)); //Raw score after first input. Assert.Equal(1.1661833524703979, prediction2.Change[1], precision: 5); // Raw score @@ -221,7 +247,7 @@ public void ChangePointDetectionWithSeasonalityPredictionEngineNoColumn() //Load the model with state updated with just one input, then pass in the second input //and raw score should match the raw score obtained by passing the two input in the first model. - var engine3 = model3.CreateTimeSeriesPredictionFunction(ml); + var engine3 = model3.CreateTimeSeriesEngine(ml); var prediction3 = engine3.Predict(new Data(1)); Assert.Equal(0.12216401100158691, prediction2.Change[1], precision: 5); // Raw score } @@ -262,9 +288,9 @@ public void ChangePointDetectionWithSeasonalityPredictionEngine() // Train. var model = pipeline.Fit(dataView); - + //Model 1: Prediction #1. - var engine = model.CreateTimeSeriesPredictionFunction(ml); + var engine = model.CreateTimeSeriesEngine(ml); var prediction = engine.Predict(new Data(1)); Assert.Equal(0, prediction.Change[0], precision: 7); // Alert Assert.Equal(1.1661833524703979, prediction.Change[1], precision: 5); // Raw score @@ -288,7 +314,7 @@ public void ChangePointDetectionWithSeasonalityPredictionEngine() model2 = ml.Model.Load(file, out var schema); //Predict and expect the same result after checkpointing(Prediction #2). - engine = model2.CreateTimeSeriesPredictionFunction(ml); + engine = model2.CreateTimeSeriesEngine(ml); prediction = engine.Predict(new Data(1)); Assert.Equal(0, prediction.Change[0], precision: 7); // Alert Assert.Equal(0.12216401100158691, prediction.Change[1], precision: 5); // Raw score @@ -296,6 +322,150 @@ public void ChangePointDetectionWithSeasonalityPredictionEngine() Assert.Equal(1.5292508189989167E-07, prediction.Change[3], precision: 5); // Martingale score } + [LessThanNetCore30OrNotNetCoreFact("netcoreapp3.0 output differs from Baseline")] + public void SsaForecast() + { + var env = new MLContext(); + const int ChangeHistorySize = 10; + const int SeasonalitySize = 10; + const int NumberOfSeasonsInTraining = 5; + + List data = new List(); + var dataView = env.Data.LoadFromEnumerable(data); + + var args = new SsaForecastingTransformer.Options() + { + ConfidenceLevel = 0.95f, + Source = "Value", + Name = "Forecast", + ForcastingConfidentLowerBoundColumnName = "MinCnf", + ForcastingConfidentUpperBoundColumnName = "MaxCnf", + WindowSize = 10, + SeriesLength = 11, + TrainSize = 22, + Horizon = 4, + IsAdaptive = true + }; + + for (int j = 0; j < NumberOfSeasonsInTraining; j++) + for (int i = 0; i < SeasonalitySize; i++) + data.Add(new Data(i)); + + for (int i = 0; i < ChangeHistorySize; i++) + data.Add(new Data(i * 100)); + + // Train + var detector = new SsaForecastingEstimator(env, args).Fit(dataView); + // Transform + var output = detector.Transform(dataView); + // Get predictions + var enumerator = env.Data.CreateEnumerable(output, true).GetEnumerator(); + ForecastPrediction row = null; + List expectedForecast = new List() { 0.191491723f, 2.53994083f, 5.26454258f, 7.37313938f }; + List minCnf = new List() { -3.9741993f, -2.36872721f, 0.09407653f, 2.18899345f }; + List maxCnf = new List() { 4.3571825f, 7.448609f, 10.435009f, 12.5572853f }; + enumerator.MoveNext(); + row = enumerator.Current; + + for (int localIndex = 0; localIndex < 4; localIndex++) + { + Assert.Equal(expectedForecast[localIndex], row.Forecast[localIndex], precision: 7); + Assert.Equal(minCnf[localIndex], row.MinCnf[localIndex], precision: 7); + Assert.Equal(maxCnf[localIndex], row.MaxCnf[localIndex], precision: 7); + } + + } + + [LessThanNetCore30OrNotNetCoreFact("netcoreapp3.0 output differs from Baseline")] + public void SsaForecastPredictionEngine() + { + const int ChangeHistorySize = 10; + const int SeasonalitySize = 10; + const int NumberOfSeasonsInTraining = 5; + + List data = new List(); + + var ml = new MLContext(seed: 1); + var dataView = ml.Data.LoadFromEnumerable(data); + + var args = new SsaForecastingTransformer.Options() + { + ConfidenceLevel = 0.95f, + Source = "Value", + Name = "Forecast", + WindowSize = 10, + SeriesLength = 11, + TrainSize = 22, + Horizon = 4, + ForcastingConfidentLowerBoundColumnName = "ConfidenceLowerBound", + ForcastingConfidentUpperBoundColumnName = "ConfidenceUpperBound", + VariableHorizon = true + }; + + for (int j = 0; j < NumberOfSeasonsInTraining; j++) + for (int i = 0; i < SeasonalitySize; i++) + data.Add(new Data(i)); + + for (int i = 0; i < ChangeHistorySize; i++) + data.Add(new Data(i * 100)); + + // Train + var model = ml.Transforms.Text.FeaturizeText("Text_Featurized", "Text") + .Append(ml.Transforms.Conversion.ConvertType("Value", "Value", DataKind.Single)) + .Append(ml.Forecasting.ForecastBySsa("Forecast", "Value", 10, 11, 22, 4, + forcastingConfidentLowerBoundColumnName: "ConfidenceLowerBound", + forcastingConfidentUpperBoundColumnName: "ConfidenceUpperBound", variableHorizon: true)) + .Append(ml.Transforms.Concatenate("Forecast", "Forecast", "ConfidenceLowerBound", "ConfidenceUpperBound")) + .Fit(dataView); + + //Prediction engine. + var engine = model.CreateTimeSeriesEngine(ml); + ForecastResultArray result = new ForecastResultArray(); + + // Forecast and change the horizon to 5. + engine.Predict(null, ref result, horizon: 5); + // [Forecast, ConfidenceLowerBound, ConfidenceUpperBound] + Assert.Equal(result.Forecast, new float[] { -1.02245092f, 0.08333081f, 2.60737085f, 5.397319f, 7.500832f, -5.188142f, -4.82533741f, + -2.563095f, 0.213172823f, 2.29317045f, 3.14324f, 4.991999f, 7.777837f, 10.5814648f, 12.7084932f }); + + // Update the forecasting model. + engine.Predict(new Data(2)); + + // Update the model and then forecast. + engine.Predict(new Data(2), ref result); + + engine.CheckPoint(ml, "model.zip"); + // [Forecast, ConfidenceLowerBound, ConfidenceUpperBound] + Assert.Equal(result.Forecast, new float[] { 4.310587f, 6.39716768f, 7.73934f, 8.029469f, 0.144895911f, + 1.48849952f, 2.568874f, 2.84532261f, 8.476278f, 11.3058357f, 12.9098063f, 13.2136145f }); + + // Checkpoint the model. + ITransformer modelCopy; + using (var file = File.OpenRead("model.zip")) + modelCopy = ml.Model.Load(file, out DataViewSchema schema); + + // We must create a new prediction engine from the persisted model. + var forecastEngineCopy = modelCopy.CreateTimeSeriesEngine(ml); + ForecastResultArray resultCopy = new ForecastResultArray(); + + // Update both the models. + engine.Predict(new Data(3)); + forecastEngineCopy.Predict(new Data(3)); + + // Forecast values with the original and check-pointed model. + forecastEngineCopy.Predict(null, ref resultCopy, horizon: 5); + engine.Predict(null, ref result, horizon: 5); + // [Forecast, ConfidenceLowerBound, ConfidenceUpperBound] + Assert.Equal(result.Forecast, new float[] { 6.00658846f, 7.506871f, 7.96424866f, 7.17514229f, + 5.02655172f, 1.84089744f, 2.59820318f, 2.79378271f, 1.99099624f, + -0.181109816f, 10.1722794f, 12.41554f, 13.1347151f, 12.3592882f, 10.2342129f}); + + // The forecasted results should be the same because the state of the models + // is the same. + Assert.Equal(result.Forecast, resultCopy.Forecast); + + } + [Fact] public void AnomalyDetectionWithSrCnn() { @@ -336,103 +506,5 @@ public void AnomalyDetectionWithSrCnn() k += 1; } } - - [Fact] - public void Forecasting() - { - const int SeasonalitySize = 10; - const int NumberOfSeasonsInTraining = 5; - - List data = new List(); - - var ml = new MLContext(seed: 1); - var dataView = ml.Data.LoadFromEnumerable(data); - - for (int j = 0; j < NumberOfSeasonsInTraining; j++) - for (int i = 0; i < SeasonalitySize; i++) - data.Add(new Data(i)); - - // Create forecasting model. - var model = ml.Forecasting.AdaptiveSingularSpectrumSequenceModeler("Value", data.Count, SeasonalitySize + 1, SeasonalitySize, - 1, AdaptiveSingularSpectrumSequenceModeler.RankSelectionMethod.Exact, null, SeasonalitySize / 2, false, false); - - // Train. - model.Train(dataView); - - // Forecast. - var forecast = model.Forecast(5); - - // Update with new observations. - model.Update(dataView); - - // Checkpoint. - ml.Model.SaveForecastingModel(model, "model.zip"); - - // Load the checkpointed model from disk. - var modelCopy = ml.Model.LoadForecastingModel("model.zip"); - - // Forecast with the checkpointed model loaded from disk. - var forecastCopy = modelCopy.Forecast(5); - - // Forecast with the original model(that was checkpointed to disk). - forecast = model.Forecast(5); - - // Both the forecasted values from model loaded from disk and - // already in memory model should be the same. - Assert.Equal(forecast, forecastCopy); - } - - [Fact] - public void ForecastingWithConfidenceInterval() - { - const int SeasonalitySize = 10; - const int NumberOfSeasonsInTraining = 5; - - List data = new List(); - - var ml = new MLContext(seed: 1); - var dataView = ml.Data.LoadFromEnumerable(data); - - for (int j = 0; j < NumberOfSeasonsInTraining; j++) - for (int i = 0; i < SeasonalitySize; i++) - data.Add(new Data(i)); - - // Create forecasting model. - var model = ml.Forecasting.AdaptiveSingularSpectrumSequenceModeler("Value", data.Count, SeasonalitySize + 1, SeasonalitySize, - 1, AdaptiveSingularSpectrumSequenceModeler.RankSelectionMethod.Exact, null, SeasonalitySize / 2, shouldComputeForecastIntervals: true, false); - - // Train. - model.Train(dataView); - - // Forecast. - float[] forecast; - float[] lowConfInterval; - float[] upperConfInterval; - model.ForecastWithConfidenceIntervals(5, out forecast, out lowConfInterval, out upperConfInterval); - - // Update with new observations. - model.Update(dataView); - - // Checkpoint. - ml.Model.SaveForecastingModel(model, "model.zip"); - - // Load the checkpointed model from disk. - var modelCopy = ml.Model.LoadForecastingModel("model.zip"); - - // Forecast with the checkpointed model loaded from disk. - float[] forecastCopy; - float[] lowConfIntervalCopy; - float[] upperConfIntervalCopy; - modelCopy.ForecastWithConfidenceIntervals(5, out forecastCopy, out lowConfIntervalCopy, out upperConfIntervalCopy); - - // Forecast with the original model(that was checkpointed to disk). - model.ForecastWithConfidenceIntervals(5, out forecast, out lowConfInterval, out upperConfInterval); - - // Both the forecasted values from model loaded from disk and - // already in memory model should be the same. - Assert.Equal(forecast, forecastCopy); - Assert.Equal(lowConfInterval, lowConfIntervalCopy); - Assert.Equal(upperConfInterval, upperConfIntervalCopy); - } } } diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesEstimatorTests.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesEstimatorTests.cs index 399b276194..efb25b43e3 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesEstimatorTests.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesEstimatorTests.cs @@ -74,6 +74,42 @@ void TestSsaChangePointEstimator() Done(); } + [Fact] + void TestSsaForecastingEstimator() + { + const int ChangeHistorySize = 10; + const int SeasonalitySize = 10; + const int NumberOfSeasonsInTraining = 5; + + List data = new List(); + + var ml = new MLContext(seed: 1); + var dataView = ml.Data.LoadFromEnumerable(data); + + for (int j = 0; j < NumberOfSeasonsInTraining; j++) + for (int i = 0; i < SeasonalitySize; i++) + data.Add(new Data(i)); + + for (int i = 0; i < ChangeHistorySize; i++) + data.Add(new Data(i * 100)); + + // Train + var pipe = new SsaForecastingEstimator(Env, "Forecast", "Value", 10, 11, 22, 4, + forcastingConfidentLowerBoundColumnName: "ConfidenceLowerBound", + forcastingConfidentUpperBoundColumnName: "ConfidenceUpperBound"); + + var xyData = new List { new TestDataXY() { A = new float[inputSize] } }; + var stringData = new List { new TestDataDifferntType() { data_0 = new string[inputSize] } }; + + var invalidDataWrongNames = ML.Data.LoadFromEnumerable(xyData); + var invalidDataWrongTypes = ML.Data.LoadFromEnumerable(stringData); + + TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongTypes); + TestEstimatorCore(pipe, dataView, invalidInput: invalidDataWrongNames); + + Done(); + } + [Fact] void TestSsaSpikeEstimator() {