diff --git a/src/DotNetBridge/Bridge.cs b/src/DotNetBridge/Bridge.cs
index 26e5a84d..96100247 100644
--- a/src/DotNetBridge/Bridge.cs
+++ b/src/DotNetBridge/Bridge.cs
@@ -17,7 +17,7 @@
using Microsoft.ML.Trainers.FastTree;
using Microsoft.ML.Trainers.LightGbm;
using Microsoft.ML.Transforms;
-using Microsoft.ML.TimeSeries;
+using Microsoft.ML.Transforms.TimeSeries;
namespace Microsoft.MachineLearning.DotNetBridge
{
@@ -329,7 +329,7 @@ private static unsafe int GenericExec(EnvironmentBlock* penv, sbyte* psz, int cd
//env.ComponentCatalog.RegisterAssembly(typeof(SaveOnnxCommand).Assembly);
//env.ComponentCatalog.RegisterAssembly(typeof(TimeSeriesProcessingEntryPoints).Assembly);
//env.ComponentCatalog.RegisterAssembly(typeof(ParquetLoader).Assembly);
- env.ComponentCatalog.RegisterAssembly(typeof(ForecastExtensions).Assembly);
+ env.ComponentCatalog.RegisterAssembly(typeof(SsaChangePointDetector).Assembly);
using (var ch = host.Start("Executing"))
{
diff --git a/src/python/nimbusml.pyproj b/src/python/nimbusml.pyproj
index 49492026..1f344b47 100644
--- a/src/python/nimbusml.pyproj
+++ b/src/python/nimbusml.pyproj
@@ -89,6 +89,7 @@
+
@@ -139,6 +140,7 @@
+
@@ -235,6 +237,7 @@
+
@@ -277,6 +280,7 @@
+
@@ -586,6 +590,7 @@
+
@@ -593,6 +598,7 @@
+
diff --git a/src/python/nimbusml/examples/SsaForecaster.py b/src/python/nimbusml/examples/SsaForecaster.py
new file mode 100644
index 00000000..1662a8ff
--- /dev/null
+++ b/src/python/nimbusml/examples/SsaForecaster.py
@@ -0,0 +1,47 @@
+###############################################################################
+# SsaForecaster
+import pandas as pd
+from nimbusml import Pipeline, FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.timeseries import SsaForecaster
+
+# data input (as a FileDataStream)
+path = get_dataset('timeseries').as_filepath()
+
+data = FileDataStream.read_csv(path)
+print(data.head())
+# t1 t2 t3
+# 0 0.01 0.01 0.0100
+# 1 0.02 0.02 0.0200
+# 2 0.03 0.03 0.0200
+# 3 0.03 0.03 0.0250
+# 4 0.03 0.03 0.0005
+
+# define the training pipeline
+pipeline = Pipeline([
+ SsaForecaster(series_length=6,
+ train_size=8,
+ window_size=3,
+ horizon=2,
+ columns={'t2_fc': 't2'})
+])
+
+result = pipeline.fit_transform(data)
+
+pd.set_option('display.float_format', lambda x: '%.2f' % x)
+print(result)
+
+# Output
+#
+# t1 t2 t3 t2_fc.0 t2_fc.1
+# 0 0.01 0.01 0.01 0.10 0.12
+# 1 0.02 0.02 0.02 0.06 0.08
+# 2 0.03 0.03 0.02 0.04 0.05
+# 3 0.03 0.03 0.03 0.05 0.06
+# 4 0.03 0.03 0.00 0.05 0.07
+# 5 0.03 0.05 0.01 0.06 0.08
+# 6 0.05 0.07 0.05 0.09 0.12
+# 7 0.07 0.09 0.09 0.12 0.16
+# 8 0.09 99.00 99.00 57.92 82.88
+# 9 1.10 0.10 0.10 60.50 77.18
+
diff --git a/src/python/nimbusml/examples/examples_from_dataframe/SsaForecaster_df.py b/src/python/nimbusml/examples/examples_from_dataframe/SsaForecaster_df.py
new file mode 100644
index 00000000..c93636fc
--- /dev/null
+++ b/src/python/nimbusml/examples/examples_from_dataframe/SsaForecaster_df.py
@@ -0,0 +1,86 @@
+###############################################################################
+# SsaForecaster
+import numpy as np
+import pandas as pd
+from nimbusml.timeseries import SsaForecaster
+
+# This example creates a time series (list of data with the
+# i-th element corresponding to the i-th time slot).
+
+# Generate sample series data with a recurring pattern
+seasonality_size = 5
+seasonal_data = np.arange(seasonality_size)
+
+data = np.tile(seasonal_data, 3)
+X_train = pd.Series(data, name="ts")
+
+# X_train looks like this
+# 0 0
+# 1 1
+# 2 2
+# 3 3
+# 4 4
+# 5 0
+# 6 1
+# 7 2
+# 8 3
+# 9 4
+# 10 0
+# 11 1
+# 12 2
+# 13 3
+# 14 4
+
+x_test = X_train.copy()
+x_test[-3:] = [100, 110, 120]
+
+# x_test looks like this
+# 0 0
+# 1 1
+# 2 2
+# 3 3
+# 4 4
+# 5 0
+# 6 1
+# 7 2
+# 8 3
+# 9 4
+# 10 0
+# 11 1
+# 12 100
+# 13 110
+# 14 120
+
+training_seasons = 3
+training_size = seasonality_size * training_seasons
+
+forecaster = SsaForecaster(series_length=8,
+ train_size=training_size,
+ window_size=seasonality_size + 1,
+ horizon=4) << {'fc': 'ts'}
+
+forecaster.fit(X_train, verbose=1)
+data = forecaster.transform(x_test)
+
+pd.set_option('display.float_format', lambda x: '%.2f' % x)
+print(data)
+
+# The fc.x columns are the forecasts
+# given the input in the ts column.
+#
+# ts fc.0 fc.1 fc.2 fc.3
+# 0 0 1.00 2.00 3.00 4.00
+# 1 1 2.00 3.00 4.00 -0.00
+# 2 2 3.00 4.00 -0.00 1.00
+# 3 3 4.00 -0.00 1.00 2.00
+# 4 4 -0.00 1.00 2.00 3.00
+# 5 0 1.00 2.00 3.00 4.00
+# 6 1 2.00 3.00 4.00 -0.00
+# 7 2 3.00 4.00 -0.00 1.00
+# 8 3 4.00 -0.00 1.00 2.00
+# 9 4 -0.00 1.00 2.00 3.00
+# 10 0 1.00 2.00 3.00 4.00
+# 11 1 2.00 3.00 4.00 -0.00
+# 12 100 3.00 4.00 0.00 1.00
+# 13 110 4.00 -0.00 1.00 75.50
+# 14 120 -0.00 1.00 83.67 83.25
diff --git a/src/python/nimbusml/internal/core/timeseries/ssaforecaster.py b/src/python/nimbusml/internal/core/timeseries/ssaforecaster.py
new file mode 100644
index 00000000..bbef090c
--- /dev/null
+++ b/src/python/nimbusml/internal/core/timeseries/ssaforecaster.py
@@ -0,0 +1,141 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+SsaForecaster
+"""
+
+__all__ = ["SsaForecaster"]
+
+
+from ...entrypoints.timeseriesprocessingentrypoints_ssaforecasting import \
+ timeseriesprocessingentrypoints_ssaforecasting
+from ...utils.utils import trace
+from ..base_pipeline_item import BasePipelineItem, DefaultSignature
+
+
+class SsaForecaster(BasePipelineItem, DefaultSignature):
+ """
+ **Description**
+ This transform forecasts using Singular Spectrum Analysis (SSA).
+
+ :param window_size: The length of the window on the series for building the
+ trajectory matrix (parameter L).
+
+ :param series_length: The length of series that is kept in buffer for
+ modeling (parameter N).
+
+ :param train_size: The length of series from the begining used for
+ training.
+
+ :param horizon: The number of values to forecast.
+
+ :param confidence_level: The confidence level in [0, 1) for forecasting.
+
+ :param variable_horizon: Set this to true horizon will change at prediction
+ time.
+
+ :param confidence_lower_bound_column: The name of the confidence interval
+ lower bound column.
+
+ :param confidence_upper_bound_column: The name of the confidence interval
+ upper bound column.
+
+ :param rank_selection_method: The rank selection method.
+
+ :param rank: The desired rank of the subspace used for SSA projection
+ (parameter r). This parameter should be in the range in [1,
+ windowSize]. If set to null, the rank is automatically determined based
+ on prediction error minimization.
+
+ :param max_rank: The maximum rank considered during the rank selection
+ process. If not provided (i.e. set to null), it is set to windowSize -
+ 1.
+
+ :param should_stabilize: The flag determining whether the model should be
+ stabilized.
+
+ :param should_maintain_info: The flag determining whether the meta
+ information for the model needs to be maintained.
+
+ :param max_growth: The maximum growth on the exponential trend.
+
+ :param discount_factor: The discount factor in [0,1] used for online
+ updates.
+
+ :param is_adaptive: The flag determing whether the model is adaptive.
+
+ :param params: Additional arguments sent to compute engine.
+
+ """
+
+ @trace
+ def __init__(
+ self,
+ window_size=0,
+ series_length=0,
+ train_size=0,
+ horizon=0,
+ confidence_level=0.95,
+ variable_horizon=False,
+ confidence_lower_bound_column=None,
+ confidence_upper_bound_column=None,
+ rank_selection_method='Exact',
+ rank=None,
+ max_rank=None,
+ should_stabilize=True,
+ should_maintain_info=False,
+ max_growth=None,
+ discount_factor=1.0,
+ is_adaptive=False,
+ **params):
+ BasePipelineItem.__init__(
+ self, type='transform', **params)
+
+ self.window_size = window_size
+ self.series_length = series_length
+ self.train_size = train_size
+ self.horizon = horizon
+ self.confidence_level = confidence_level
+ self.variable_horizon = variable_horizon
+ self.confidence_lower_bound_column = confidence_lower_bound_column
+ self.confidence_upper_bound_column = confidence_upper_bound_column
+ self.rank_selection_method = rank_selection_method
+ self.rank = rank
+ self.max_rank = max_rank
+ self.should_stabilize = should_stabilize
+ self.should_maintain_info = should_maintain_info
+ self.max_growth = max_growth
+ self.discount_factor = discount_factor
+ self.is_adaptive = is_adaptive
+
+ @property
+ def _entrypoint(self):
+ return timeseriesprocessingentrypoints_ssaforecasting
+
+ @trace
+ def _get_node(self, **all_args):
+ algo_args = dict(
+ source=self.source,
+ name=self._name_or_source,
+ window_size=self.window_size,
+ series_length=self.series_length,
+ train_size=self.train_size,
+ horizon=self.horizon,
+ confidence_level=self.confidence_level,
+ variable_horizon=self.variable_horizon,
+ confidence_lower_bound_column=self.confidence_lower_bound_column,
+ confidence_upper_bound_column=self.confidence_upper_bound_column,
+ rank_selection_method=self.rank_selection_method,
+ rank=self.rank,
+ max_rank=self.max_rank,
+ should_stabilize=self.should_stabilize,
+ should_maintain_info=self.should_maintain_info,
+ max_growth=self.max_growth,
+ discount_factor=self.discount_factor,
+ is_adaptive=self.is_adaptive)
+
+ all_args.update(algo_args)
+ return self._entrypoint(**all_args)
diff --git a/src/python/nimbusml/internal/entrypoints/timeseriesprocessingentrypoints_ssaforecasting.py b/src/python/nimbusml/internal/entrypoints/timeseriesprocessingentrypoints_ssaforecasting.py
new file mode 100644
index 00000000..f02da3a7
--- /dev/null
+++ b/src/python/nimbusml/internal/entrypoints/timeseriesprocessingentrypoints_ssaforecasting.py
@@ -0,0 +1,214 @@
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+TimeSeriesProcessingEntryPoints.SsaForecasting
+"""
+
+import numbers
+
+from ..utils.entrypoints import EntryPoint
+from ..utils.utils import try_set, unlist
+
+
+def timeseriesprocessingentrypoints_ssaforecasting(
+ source,
+ data,
+ name,
+ output_data=None,
+ model=None,
+ window_size=0,
+ series_length=0,
+ train_size=0,
+ horizon=0,
+ confidence_level=0.95,
+ variable_horizon=False,
+ confidence_lower_bound_column=None,
+ confidence_upper_bound_column=None,
+ rank_selection_method='Exact',
+ rank=None,
+ max_rank=None,
+ should_stabilize=True,
+ should_maintain_info=False,
+ max_growth=None,
+ discount_factor=1.0,
+ is_adaptive=False,
+ **params):
+ """
+ **Description**
+ This transform forecasts using Singular Spectrum Analysis (SSA).
+
+ :param source: The name of the source column. (inputs).
+ :param data: Input dataset (inputs).
+ :param name: The name of the new column. (inputs).
+ :param window_size: The length of the window on the series for
+ building the trajectory matrix (parameter L). (inputs).
+ :param series_length: The length of series that is kept in buffer
+ for modeling (parameter N). (inputs).
+ :param train_size: The length of series from the begining used
+ for training. (inputs).
+ :param horizon: The number of values to forecast. (inputs).
+ :param confidence_level: The confidence level in [0, 1) for
+ forecasting. (inputs).
+ :param variable_horizon: Set this to true horizon will change at
+ prediction time. (inputs).
+ :param confidence_lower_bound_column: The name of the confidence
+ interval lower bound column. (inputs).
+ :param confidence_upper_bound_column: The name of the confidence
+ interval upper bound column. (inputs).
+ :param rank_selection_method: The rank selection method.
+ (inputs).
+ :param rank: The desired rank of the subspace used for SSA
+ projection (parameter r). This parameter should be in the
+ range in [1, windowSize]. If set to null, the rank is
+ automatically determined based on prediction error
+ minimization. (inputs).
+ :param max_rank: The maximum rank considered during the rank
+ selection process. If not provided (i.e. set to null), it is
+ set to windowSize - 1. (inputs).
+ :param should_stabilize: The flag determining whether the model
+ should be stabilized. (inputs).
+ :param should_maintain_info: The flag determining whether the
+ meta information for the model needs to be maintained.
+ (inputs).
+ :param max_growth: The maximum growth on the exponential trend.
+ (inputs).
+ :param discount_factor: The discount factor in [0,1] used for
+ online updates. (inputs).
+ :param is_adaptive: The flag determing whether the model is
+ adaptive (inputs).
+ :param output_data: Transformed dataset (outputs).
+ :param model: Transform model (outputs).
+ """
+
+ entrypoint_name = 'TimeSeriesProcessingEntryPoints.SsaForecasting'
+ inputs = {}
+ outputs = {}
+
+ if source is not None:
+ inputs['Source'] = try_set(
+ obj=source,
+ none_acceptable=False,
+ is_of_type=str,
+ is_column=True)
+ if data is not None:
+ inputs['Data'] = try_set(
+ obj=data,
+ none_acceptable=False,
+ is_of_type=str)
+ if name is not None:
+ inputs['Name'] = try_set(
+ obj=name,
+ none_acceptable=False,
+ is_of_type=str,
+ is_column=True)
+ if window_size is not None:
+ inputs['WindowSize'] = try_set(
+ obj=window_size,
+ none_acceptable=False,
+ is_of_type=numbers.Real)
+ if series_length is not None:
+ inputs['SeriesLength'] = try_set(
+ obj=series_length,
+ none_acceptable=False,
+ is_of_type=numbers.Real)
+ if train_size is not None:
+ inputs['TrainSize'] = try_set(
+ obj=train_size,
+ none_acceptable=False,
+ is_of_type=numbers.Real)
+ if horizon is not None:
+ inputs['Horizon'] = try_set(
+ obj=horizon,
+ none_acceptable=False,
+ is_of_type=numbers.Real)
+ if confidence_level is not None:
+ inputs['ConfidenceLevel'] = try_set(
+ obj=confidence_level,
+ none_acceptable=True,
+ is_of_type=numbers.Real)
+ if variable_horizon is not None:
+ inputs['VariableHorizon'] = try_set(
+ obj=variable_horizon,
+ none_acceptable=True,
+ is_of_type=bool)
+ if confidence_lower_bound_column is not None:
+ inputs['ConfidenceLowerBoundColumn'] = try_set(
+ obj=confidence_lower_bound_column,
+ none_acceptable=True,
+ is_of_type=str,
+ is_column=True)
+ if confidence_upper_bound_column is not None:
+ inputs['ConfidenceUpperBoundColumn'] = try_set(
+ obj=confidence_upper_bound_column,
+ none_acceptable=True,
+ is_of_type=str,
+ is_column=True)
+ if rank_selection_method is not None:
+ inputs['RankSelectionMethod'] = try_set(
+ obj=rank_selection_method,
+ none_acceptable=True,
+ is_of_type=str,
+ values=[
+ 'Fixed',
+ 'Exact',
+ 'Fast'])
+ if rank is not None:
+ inputs['Rank'] = try_set(
+ obj=rank,
+ none_acceptable=True,
+ is_of_type=numbers.Real)
+ if max_rank is not None:
+ inputs['MaxRank'] = try_set(
+ obj=max_rank,
+ none_acceptable=True,
+ is_of_type=numbers.Real)
+ if should_stabilize is not None:
+ inputs['ShouldStabilize'] = try_set(
+ obj=should_stabilize,
+ none_acceptable=True,
+ is_of_type=bool)
+ if should_maintain_info is not None:
+ inputs['ShouldMaintainInfo'] = try_set(
+ obj=should_maintain_info,
+ none_acceptable=True,
+ is_of_type=bool)
+ if max_growth is not None:
+ inputs['MaxGrowth'] = try_set(
+ obj=max_growth,
+ none_acceptable=True,
+ is_of_type=dict,
+ field_names=[
+ 'TimeSpan',
+ 'Growth'])
+ if discount_factor is not None:
+ inputs['DiscountFactor'] = try_set(
+ obj=discount_factor,
+ none_acceptable=True,
+ is_of_type=numbers.Real)
+ if is_adaptive is not None:
+ inputs['IsAdaptive'] = try_set(
+ obj=is_adaptive,
+ none_acceptable=True,
+ is_of_type=bool)
+ if output_data is not None:
+ outputs['OutputData'] = try_set(
+ obj=output_data,
+ none_acceptable=False,
+ is_of_type=str)
+ if model is not None:
+ outputs['Model'] = try_set(
+ obj=model,
+ none_acceptable=False,
+ is_of_type=str)
+
+ input_variables = {
+ x for x in unlist(inputs.values())
+ if isinstance(x, str) and x.startswith("$")}
+ output_variables = {
+ x for x in unlist(outputs.values())
+ if isinstance(x, str) and x.startswith("$")}
+
+ entrypoint = EntryPoint(
+ name=entrypoint_name, inputs=inputs, outputs=outputs,
+ input_variables=input_variables,
+ output_variables=output_variables)
+ return entrypoint
diff --git a/src/python/nimbusml/tests/timeseries/test_ssaforecaster.py b/src/python/nimbusml/tests/timeseries/test_ssaforecaster.py
new file mode 100644
index 00000000..d4cc3e84
--- /dev/null
+++ b/src/python/nimbusml/tests/timeseries/test_ssaforecaster.py
@@ -0,0 +1,65 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+
+import unittest
+
+import numpy as np
+import pandas as pd
+from nimbusml import Pipeline, FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.timeseries import SsaForecaster
+
+
+class TestSsaForecaster(unittest.TestCase):
+
+ @unittest.skip('ml.net libraries containing timeseries forecasting are not included with nimbusml yet.')
+ def test_simple_forecast(self):
+ seasonality_size = 5
+ seasonal_data = np.arange(seasonality_size)
+
+ data = np.tile(seasonal_data, 3)
+
+ X_train = pd.Series(data, name="ts")
+
+ training_seasons = 3
+ training_size = seasonality_size * training_seasons
+
+ forecaster = SsaForecaster(series_length=8,
+ train_size=training_size,
+ window_size=seasonality_size + 1,
+ horizon=2) << {'fc': 'ts'}
+
+ forecaster.fit(X_train, verbose=1)
+ data = forecaster.transform(X_train)
+
+ self.assertEqual(round(data.loc[0, 'fc.0']), 1.0)
+ self.assertEqual(round(data.loc[0, 'fc.1']), 2.0)
+
+ self.assertEqual(len(data['fc.0']), 15)
+
+ @unittest.skip('ml.net libraries containing timeseries forecasting are not included with nimbusml yet.')
+ def test_multiple_user_specified_columns_is_not_allowed(self):
+ path = get_dataset('timeseries').as_filepath()
+ data = FileDataStream.read_csv(path)
+
+ try:
+ pipeline = Pipeline([
+ SsaForecaster(series_length=8,
+ train_size=15,
+ window_size=5,
+ horizon=2,
+ columns=['t2', 't3'])
+ ])
+ pipeline.fit_transform(data)
+
+ except RuntimeError as e:
+ self.assertTrue('Only one column is allowed' in str(e))
+ return
+
+ self.fail()
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/src/python/nimbusml/timeseries/__init__.py b/src/python/nimbusml/timeseries/__init__.py
index 807e3a7b..64e66add 100644
--- a/src/python/nimbusml/timeseries/__init__.py
+++ b/src/python/nimbusml/timeseries/__init__.py
@@ -2,10 +2,12 @@
from .iidchangepointdetector import IidChangePointDetector
from .ssaspikedetector import SsaSpikeDetector
from .ssachangepointdetector import SsaChangePointDetector
+from .ssaforecaster import SsaForecaster
__all__ = [
'IidSpikeDetector',
'IidChangePointDetector',
'SsaSpikeDetector',
- 'SsaChangePointDetector'
+ 'SsaChangePointDetector',
+ 'SsaForecaster'
]
diff --git a/src/python/nimbusml/timeseries/ssaforecaster.py b/src/python/nimbusml/timeseries/ssaforecaster.py
new file mode 100644
index 00000000..7b63ae33
--- /dev/null
+++ b/src/python/nimbusml/timeseries/ssaforecaster.py
@@ -0,0 +1,137 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+SsaForecaster
+"""
+
+__all__ = ["SsaForecaster"]
+
+
+from sklearn.base import TransformerMixin
+
+from ..base_transform import BaseTransform
+from ..internal.core.timeseries.ssaforecaster import SsaForecaster as core
+from ..internal.utils.utils import trace
+
+
+class SsaForecaster(core, BaseTransform, TransformerMixin):
+ """
+ **Description**
+ This transform forecasts using Singular Spectrum Analysis (SSA).
+
+ :param columns: see `Columns `_.
+
+ :param window_size: The length of the window on the series for building the
+ trajectory matrix (parameter L).
+
+ :param series_length: The length of series that is kept in buffer for
+ modeling (parameter N).
+
+ :param train_size: The length of series from the begining used for
+ training.
+
+ :param horizon: The number of values to forecast.
+
+ :param confidence_level: The confidence level in [0, 1) for forecasting.
+
+ :param variable_horizon: Set this to true horizon will change at prediction
+ time.
+
+ :param confidence_lower_bound_column: The name of the confidence interval
+ lower bound column.
+
+ :param confidence_upper_bound_column: The name of the confidence interval
+ upper bound column.
+
+ :param rank_selection_method: The rank selection method.
+
+ :param rank: The desired rank of the subspace used for SSA projection
+ (parameter r). This parameter should be in the range in [1,
+ windowSize]. If set to null, the rank is automatically determined based
+ on prediction error minimization.
+
+ :param max_rank: The maximum rank considered during the rank selection
+ process. If not provided (i.e. set to null), it is set to windowSize -
+ 1.
+
+ :param should_stabilize: The flag determining whether the model should be
+ stabilized.
+
+ :param should_maintain_info: The flag determining whether the meta
+ information for the model needs to be maintained.
+
+ :param max_growth: The maximum growth on the exponential trend.
+
+ :param discount_factor: The discount factor in [0,1] used for online
+ updates.
+
+ :param is_adaptive: The flag determing whether the model is adaptive.
+
+ :param params: Additional arguments sent to compute engine.
+
+ """
+
+ @trace
+ def __init__(
+ self,
+ window_size=0,
+ series_length=0,
+ train_size=0,
+ horizon=0,
+ confidence_level=0.95,
+ variable_horizon=False,
+ confidence_lower_bound_column=None,
+ confidence_upper_bound_column=None,
+ rank_selection_method='Exact',
+ rank=None,
+ max_rank=None,
+ should_stabilize=True,
+ should_maintain_info=False,
+ max_growth=None,
+ discount_factor=1.0,
+ is_adaptive=False,
+ columns=None,
+ **params):
+
+ if columns:
+ params['columns'] = columns
+ BaseTransform.__init__(self, **params)
+ core.__init__(
+ self,
+ window_size=window_size,
+ series_length=series_length,
+ train_size=train_size,
+ horizon=horizon,
+ confidence_level=confidence_level,
+ variable_horizon=variable_horizon,
+ confidence_lower_bound_column=confidence_lower_bound_column,
+ confidence_upper_bound_column=confidence_upper_bound_column,
+ rank_selection_method=rank_selection_method,
+ rank=rank,
+ max_rank=max_rank,
+ should_stabilize=should_stabilize,
+ should_maintain_info=should_maintain_info,
+ max_growth=max_growth,
+ discount_factor=discount_factor,
+ is_adaptive=is_adaptive,
+ **params)
+ self._columns = columns
+
+ def get_params(self, deep=False):
+ """
+ Get the parameters for this operator.
+ """
+ return core.get_params(self)
+
+ def _nodes_with_presteps(self):
+ """
+ Inserts preprocessing before this one.
+ """
+ from ..preprocessing.schema import TypeConverter
+ return [
+ TypeConverter(
+ result_type='R4')._steal_io(self),
+ self]
diff --git a/src/python/tests/test_estimator_checks.py b/src/python/tests/test_estimator_checks.py
index 5dac16f5..de6d5900 100644
--- a/src/python/tests/test_estimator_checks.py
+++ b/src/python/tests/test_estimator_checks.py
@@ -17,7 +17,8 @@
from nimbusml.preprocessing import TensorFlowScorer
from nimbusml.preprocessing.filter import SkipFilter, TakeFilter
from nimbusml.timeseries import (IidSpikeDetector, IidChangePointDetector,
- SsaSpikeDetector, SsaChangePointDetector)
+ SsaSpikeDetector, SsaChangePointDetector,
+ SsaForecaster)
from sklearn.utils.estimator_checks import _yield_all_checks, MULTI_OUTPUT
this = os.path.abspath(os.path.dirname(__file__))
@@ -62,6 +63,8 @@
'check_fit2d_1sample', # SSA requires more than one sample
'SsaChangePointDetector': 'check_estimator_sparse_data'
'check_fit2d_1sample', # SSA requires more than one sample
+ 'SsaForecaster': 'check_estimator_sparse_data'
+ 'check_fit2d_1sample', # SSA requires more than one sample
# bug, low tolerance
'FastLinearRegressor': 'check_supervised_y_2d, '
'check_regressor_data_not_an_array, '
@@ -193,6 +196,11 @@
'IidChangePointDetector': IidChangePointDetector(columns=['F0']),
'SsaSpikeDetector': SsaSpikeDetector(columns=['F0'], seasonal_window_size=2),
'SsaChangePointDetector': SsaChangePointDetector(columns=['F0'], seasonal_window_size=2),
+ 'SsaForecaster': SsaForecaster(columns=['F0'],
+ window_size=2,
+ series_length=5,
+ train_size=5,
+ horizon=1),
'TensorFlowScorer': TensorFlowScorer(
model_location=os.path.join(
this,
@@ -270,6 +278,9 @@ def load_json(file_path):
# skip SymSgdBinaryClassifier for now, because of crashes.
if 'SymSgdBinaryClassifier' in class_name:
continue
+ # skip for now because the ml.net binaries do not contain the SsaForecasting code.
+ if 'SsaForecaster' in class_name:
+ continue
mod = __import__('nimbusml.' + e[0], fromlist=[str(class_name)])
the_class = getattr(mod, class_name)
diff --git a/src/python/tools/compiler_utils.py b/src/python/tools/compiler_utils.py
index 9a5e1e07..c64f5af3 100644
--- a/src/python/tools/compiler_utils.py
+++ b/src/python/tools/compiler_utils.py
@@ -135,6 +135,7 @@ def _nodes_with_presteps(self):
'IidChangePointDetector': timeseries_to_r4_converter,
'SsaSpikeDetector': timeseries_to_r4_converter,
'SsaChangePointDetector': timeseries_to_r4_converter,
+ 'SsaForecaster': timeseries_to_r4_converter,
'PcaTransformer':
'''from ..preprocessing.schema import TypeConverter
diff --git a/src/python/tools/manifest.json b/src/python/tools/manifest.json
index 67951c74..8aa6ca9a 100644
--- a/src/python/tools/manifest.json
+++ b/src/python/tools/manifest.json
@@ -4007,6 +4007,244 @@
"ITransformOutput"
]
},
+ {
+ "Name": "TimeSeriesProcessingEntryPoints.SsaForecasting",
+ "Desc": "This transform forecasts using Singular Spectrum Analysis (SSA).",
+ "FriendlyName": "SSA Forecasting",
+ "ShortName": "ssafcst",
+ "Inputs": [
+ {
+ "Name": "Source",
+ "Type": "String",
+ "Desc": "The name of the source column.",
+ "Aliases": [
+ "src"
+ ],
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Data",
+ "Type": "DataView",
+ "Desc": "Input dataset",
+ "Required": true,
+ "SortOrder": 1.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "Name",
+ "Type": "String",
+ "Desc": "The name of the new column.",
+ "Required": true,
+ "SortOrder": 2.0,
+ "IsNullable": false
+ },
+ {
+ "Name": "WindowSize",
+ "Type": "Int",
+ "Desc": "The length of the window on the series for building the trajectory matrix (parameter L).",
+ "Required": true,
+ "SortOrder": 2.0,
+ "IsNullable": false,
+ "Default": 0
+ },
+ {
+ "Name": "SeriesLength",
+ "Type": "Int",
+ "Desc": "The length of series that is kept in buffer for modeling (parameter N).",
+ "Required": true,
+ "SortOrder": 2.0,
+ "IsNullable": false,
+ "Default": 0
+ },
+ {
+ "Name": "TrainSize",
+ "Type": "Int",
+ "Desc": "The length of series from the begining used for training.",
+ "Required": true,
+ "SortOrder": 2.0,
+ "IsNullable": false,
+ "Default": 0
+ },
+ {
+ "Name": "Horizon",
+ "Type": "Int",
+ "Desc": "The number of values to forecast.",
+ "Required": true,
+ "SortOrder": 2.0,
+ "IsNullable": false,
+ "Default": 0
+ },
+ {
+ "Name": "ConfidenceLevel",
+ "Type": "Float",
+ "Desc": "The confidence level in [0, 1) for forecasting.",
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": false,
+ "Default": 0.95
+ },
+ {
+ "Name": "VariableHorizon",
+ "Type": "Bool",
+ "Desc": "Set this to true horizon will change at prediction time.",
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": false,
+ "Default": false
+ },
+ {
+ "Name": "ConfidenceLowerBoundColumn",
+ "Type": "String",
+ "Desc": "The name of the confidence interval lower bound column.",
+ "Aliases": [
+ "cnfminname"
+ ],
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "ConfidenceUpperBoundColumn",
+ "Type": "String",
+ "Desc": "The name of the confidence interval upper bound column.",
+ "Aliases": [
+ "cnfmaxnname"
+ ],
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": null
+ },
+ {
+ "Name": "RankSelectionMethod",
+ "Type": {
+ "Kind": "Enum",
+ "Values": [
+ "Fixed",
+ "Exact",
+ "Fast"
+ ]
+ },
+ "Desc": "The rank selection method.",
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": "Exact"
+ },
+ {
+ "Name": "Rank",
+ "Type": "Int",
+ "Desc": "The desired rank of the subspace used for SSA projection (parameter r). This parameter should be in the range in [1, windowSize]. If set to null, the rank is automatically determined based on prediction error minimization.",
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "MaxRank",
+ "Type": "Int",
+ "Desc": "The maximum rank considered during the rank selection process. If not provided (i.e. set to null), it is set to windowSize - 1.",
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "ShouldStabilize",
+ "Type": "Bool",
+ "Desc": "The flag determining whether the model should be stabilized.",
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": true
+ },
+ {
+ "Name": "ShouldMaintainInfo",
+ "Type": "Bool",
+ "Desc": "The flag determining whether the meta information for the model needs to be maintained.",
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": false,
+ "Default": false
+ },
+ {
+ "Name": "MaxGrowth",
+ "Type": {
+ "Kind": "Struct",
+ "Fields": [
+ {
+ "Name": "TimeSpan",
+ "Type": "Int",
+ "Desc": "Time span of growth ratio. Must be strictly positive.",
+ "Required": false,
+ "SortOrder": 1.0,
+ "IsNullable": false,
+ "Default": 0
+ },
+ {
+ "Name": "Growth",
+ "Type": "Float",
+ "Desc": "Growth. Must be non-negative.",
+ "Required": false,
+ "SortOrder": 2.0,
+ "IsNullable": false,
+ "Default": 0.0
+ }
+ ]
+ },
+ "Desc": "The maximum growth on the exponential trend.",
+ "Required": false,
+ "SortOrder": 3.0,
+ "IsNullable": true,
+ "Default": null
+ },
+ {
+ "Name": "DiscountFactor",
+ "Type": "Float",
+ "Desc": "The discount factor in [0,1] used for online updates.",
+ "Aliases": [
+ "disc"
+ ],
+ "Required": false,
+ "SortOrder": 5.0,
+ "IsNullable": false,
+ "Default": 1.0
+ },
+ {
+ "Name": "IsAdaptive",
+ "Type": "Bool",
+ "Desc": "The flag determing whether the model is adaptive",
+ "Aliases": [
+ "adp"
+ ],
+ "Required": false,
+ "SortOrder": 6.0,
+ "IsNullable": false,
+ "Default": false
+ }
+ ],
+ "Outputs": [
+ {
+ "Name": "OutputData",
+ "Type": "DataView",
+ "Desc": "Transformed dataset"
+ },
+ {
+ "Name": "Model",
+ "Type": "TransformModel",
+ "Desc": "Transform model"
+ }
+ ],
+ "InputKind": [
+ "ITransformInput"
+ ],
+ "OutputKind": [
+ "ITransformOutput"
+ ]
+ },
{
"Name": "TimeSeriesProcessingEntryPoints.SsaSpikeDetector",
"Desc": "This transform detects the spikes in a seasonal time-series using Singular Spectrum Analysis (SSA).",
diff --git a/src/python/tools/manifest_diff.json b/src/python/tools/manifest_diff.json
index 25708e21..58d6b3a5 100644
--- a/src/python/tools/manifest_diff.json
+++ b/src/python/tools/manifest_diff.json
@@ -563,6 +563,12 @@
"Module": "timeseries",
"Type": "Transform"
},
+ {
+ "Name": "TimeSeriesProcessingEntryPoints.SsaForecasting",
+ "NewName": "SsaForecaster",
+ "Module": "timeseries",
+ "Type": "Transform"
+ },
{
"Name": "Trainers.PoissonRegressor",
"NewName": "PoissonRegressionRegressor",