diff --git a/src/DotNetBridge/Bridge.cs b/src/DotNetBridge/Bridge.cs
index 1395c998..26e5a84d 100644
--- a/src/DotNetBridge/Bridge.cs
+++ b/src/DotNetBridge/Bridge.cs
@@ -17,6 +17,7 @@
using Microsoft.ML.Trainers.FastTree;
using Microsoft.ML.Trainers.LightGbm;
using Microsoft.ML.Transforms;
+using Microsoft.ML.TimeSeries;
namespace Microsoft.MachineLearning.DotNetBridge
{
@@ -328,6 +329,7 @@ private static unsafe int GenericExec(EnvironmentBlock* penv, sbyte* psz, int cd
//env.ComponentCatalog.RegisterAssembly(typeof(SaveOnnxCommand).Assembly);
//env.ComponentCatalog.RegisterAssembly(typeof(TimeSeriesProcessingEntryPoints).Assembly);
//env.ComponentCatalog.RegisterAssembly(typeof(ParquetLoader).Assembly);
+ env.ComponentCatalog.RegisterAssembly(typeof(ForecastExtensions).Assembly);
using (var ch = host.Start("Executing"))
{
diff --git a/src/DotNetBridge/DotNetBridge.csproj b/src/DotNetBridge/DotNetBridge.csproj
index 92365878..fab49e2e 100644
--- a/src/DotNetBridge/DotNetBridge.csproj
+++ b/src/DotNetBridge/DotNetBridge.csproj
@@ -41,5 +41,6 @@
+
diff --git a/src/Platforms/build.csproj b/src/Platforms/build.csproj
index 7491fac8..e75aa8f3 100644
--- a/src/Platforms/build.csproj
+++ b/src/Platforms/build.csproj
@@ -20,6 +20,7 @@
+
diff --git a/src/python/docs/sphinx/make.bat b/src/python/docs/sphinx/make.bat
index e79eca32..50b8b4ee 100644
--- a/src/python/docs/sphinx/make.bat
+++ b/src/python/docs/sphinx/make.bat
@@ -9,11 +9,13 @@ REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=%PYTHONINTERPRETER% -m sphinx
)
+
+:: Todo: Fix the issue here, the installtion guide is not showing correctly
set SOURCEDIR=.
set BUILDDIR=%~dp0_build
set SPHINXPROJ=microsoftml
-if "%1" == "" goto html:
+if "%1" == "" goto html:
set format=%1
goto next:
@@ -52,4 +54,4 @@ goto end:
@echo An issue happened. Check %BUILDDIR%\%format% is not here.
:end
-popd
+popd
\ No newline at end of file
diff --git a/src/python/nimbusml.pyproj b/src/python/nimbusml.pyproj
index 48076cc7..d4b65307 100644
--- a/src/python/nimbusml.pyproj
+++ b/src/python/nimbusml.pyproj
@@ -88,6 +88,9 @@
+
+
+
@@ -109,6 +112,7 @@
+
@@ -134,6 +138,9 @@
+
+
+
@@ -159,6 +166,7 @@
+
@@ -181,7 +189,6 @@
-
@@ -219,6 +226,11 @@
+
+
+
+
+
@@ -569,6 +581,16 @@
+
+
+
+
+
+
+
+
+
+
@@ -741,6 +763,7 @@
+
@@ -762,6 +785,7 @@
+
@@ -778,6 +802,7 @@
+
diff --git a/src/python/nimbusml/examples/IidChangePointDetector.py b/src/python/nimbusml/examples/IidChangePointDetector.py
new file mode 100644
index 00000000..d9b4fdb1
--- /dev/null
+++ b/src/python/nimbusml/examples/IidChangePointDetector.py
@@ -0,0 +1,38 @@
+###############################################################################
+# IidChangePointDetector
+from nimbusml import Pipeline, FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.timeseries import IidChangePointDetector
+
+# data input (as a FileDataStream)
+path = get_dataset('timeseries').as_filepath()
+
+data = FileDataStream.read_csv(path)
+print(data.head())
+# t1 t2 t3
+# 0 0.01 0.01 0.0100
+# 1 0.02 0.02 0.0200
+# 2 0.03 0.03 0.0200
+# 3 0.03 0.03 0.0250
+# 4 0.03 0.03 0.0005
+
+# define the training pipeline
+pipeline = Pipeline([
+ IidChangePointDetector(columns={'t2_cp': 't2'}, change_history_length=4)
+])
+
+result = pipeline.fit_transform(data)
+print(result)
+
+# t1 t2 t3 t2_cp.Alert t2_cp.Raw Score t2_cp.P-Value Score t2_cp.Martingale Score
+# 0 0.01 0.01 0.0100 0.0 0.01 5.000000e-01 1.212573e-03
+# 1 0.02 0.02 0.0200 0.0 0.02 4.960106e-01 1.221347e-03
+# 2 0.03 0.03 0.0200 0.0 0.03 1.139087e-02 3.672914e-02
+# 3 0.03 0.03 0.0250 0.0 0.03 2.058296e-01 8.164447e-02
+# 4 0.03 0.03 0.0005 0.0 0.03 2.804577e-01 1.373786e-01
+# 5 0.03 0.05 0.0100 1.0 0.05 1.448886e-06 1.315014e+04
+# 6 0.05 0.07 0.0500 0.0 0.07 2.616611e-03 4.941587e+04
+# 7 0.07 0.09 0.0900 0.0 0.09 3.053187e-02 2.752614e+05
+# 8 0.09 99.00 99.0000 0.0 99.00 1.000000e-08 1.389396e+12
+# 9 1.10 0.10 0.1000 1.0 0.10 3.778296e-01 1.854344e+07
+
diff --git a/src/python/nimbusml/examples/IidSpikeDetector.py b/src/python/nimbusml/examples/IidSpikeDetector.py
new file mode 100644
index 00000000..f21e138e
--- /dev/null
+++ b/src/python/nimbusml/examples/IidSpikeDetector.py
@@ -0,0 +1,37 @@
+###############################################################################
+# IidSpikeDetector
+from nimbusml import Pipeline, FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.timeseries import IidSpikeDetector
+
+# data input (as a FileDataStream)
+path = get_dataset('timeseries').as_filepath()
+
+data = FileDataStream.read_csv(path)
+print(data.head())
+# t1 t2 t3
+# 0 0.01 0.01 0.0100
+# 1 0.02 0.02 0.0200
+# 2 0.03 0.03 0.0200
+# 3 0.03 0.03 0.0250
+# 4 0.03 0.03 0.0005
+
+# define the training pipeline
+pipeline = Pipeline([
+ IidSpikeDetector(columns={'t2_spikes': 't2'}, pvalue_history_length=5)
+])
+
+result = pipeline.fit_transform(data)
+print(result)
+# t1 t2 t3 t2_spikes.Alert t2_spikes.Raw Score t2_spikes.P-Value Score
+# 0 0.01 0.01 0.0100 0.0 0.01 5.000000e-01
+# 1 0.02 0.02 0.0200 0.0 0.02 4.960106e-01
+# 2 0.03 0.03 0.0200 0.0 0.03 1.139087e-02
+# 3 0.03 0.03 0.0250 0.0 0.03 2.058296e-01
+# 4 0.03 0.03 0.0005 0.0 0.03 2.804577e-01
+# 5 0.03 0.05 0.0100 1.0 0.05 3.743552e-03
+# 6 0.05 0.07 0.0500 1.0 0.07 4.136079e-03
+# 7 0.07 0.09 0.0900 0.0 0.09 2.242496e-02
+# 8 0.09 99.00 99.0000 1.0 99.00 1.000000e-08
+# 9 1.10 0.10 0.1000 0.0 0.10 4.015681e-01
+
diff --git a/src/python/nimbusml/examples/SsaChangePointDetector.py b/src/python/nimbusml/examples/SsaChangePointDetector.py
new file mode 100644
index 00000000..2f002135
--- /dev/null
+++ b/src/python/nimbusml/examples/SsaChangePointDetector.py
@@ -0,0 +1,40 @@
+###############################################################################
+# SsaChangePointDetector
+from nimbusml import Pipeline, FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.timeseries import SsaChangePointDetector
+
+# data input (as a FileDataStream)
+path = get_dataset('timeseries').as_filepath()
+
+data = FileDataStream.read_csv(path)
+print(data.head())
+# t1 t2 t3
+# 0 0.01 0.01 0.0100
+# 1 0.02 0.02 0.0200
+# 2 0.03 0.03 0.0200
+# 3 0.03 0.03 0.0250
+# 4 0.03 0.03 0.0005
+
+# define the training pipeline
+pipeline = Pipeline([
+ SsaChangePointDetector(columns={'t2_cp': 't2'},
+ change_history_length=4,
+ training_window_size=8,
+ seasonal_window_size=3)
+])
+
+result = pipeline.fit_transform(data)
+print(result)
+
+# t1 t2 t3 t2_cp.Alert t2_cp.Raw Score t2_cp.P-Value Score t2_cp.Martingale Score
+# 0 0.01 0.01 0.0100 0.0 -0.111334 5.000000e-01 0.001213
+# 1 0.02 0.02 0.0200 0.0 -0.076755 4.862075e-01 0.001243
+# 2 0.03 0.03 0.0200 0.0 -0.034871 3.856320e-03 0.099119
+# 3 0.03 0.03 0.0250 0.0 -0.012559 8.617091e-02 0.482400
+# 4 0.03 0.03 0.0005 0.0 -0.015723 2.252377e-01 0.988788
+# 5 0.03 0.05 0.0100 0.0 -0.001133 1.767711e-01 2.457946
+# 6 0.05 0.07 0.0500 0.0 0.006265 9.170460e-02 0.141898
+# 7 0.07 0.09 0.0900 0.0 0.002383 2.701134e-01 0.050747
+# 8 0.09 99.00 99.0000 1.0 98.879520 1.000000e-08 210274.372059
+# 9 1.10 0.10 0.1000 0.0 -57.817568 6.635692e-02 507877.454862
diff --git a/src/python/nimbusml/examples/SsaSpikeDetector.py b/src/python/nimbusml/examples/SsaSpikeDetector.py
new file mode 100644
index 00000000..299c4475
--- /dev/null
+++ b/src/python/nimbusml/examples/SsaSpikeDetector.py
@@ -0,0 +1,40 @@
+###############################################################################
+# SsaSpikeDetector
+from nimbusml import Pipeline, FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.timeseries import SsaSpikeDetector
+
+# data input (as a FileDataStream)
+path = get_dataset('timeseries').as_filepath()
+
+data = FileDataStream.read_csv(path)
+print(data.head())
+# t1 t2 t3
+# 0 0.01 0.01 0.0100
+# 1 0.02 0.02 0.0200
+# 2 0.03 0.03 0.0200
+# 3 0.03 0.03 0.0250
+# 4 0.03 0.03 0.0005
+
+# define the training pipeline
+pipeline = Pipeline([
+ SsaSpikeDetector(columns={'t2_spikes': 't2'},
+ pvalue_history_length=4,
+ training_window_size=8,
+ seasonal_window_size=3)
+])
+
+result = pipeline.fit_transform(data)
+print(result)
+
+# t1 t2 t3 t2_spikes.Alert t2_spikes.Raw Score t2_spikes.P-Value Score
+# 0 0.01 0.01 0.0100 0.0 -0.111334 5.000000e-01
+# 1 0.02 0.02 0.0200 0.0 -0.076755 4.862075e-01
+# 2 0.03 0.03 0.0200 0.0 -0.034871 3.856320e-03
+# 3 0.03 0.03 0.0250 0.0 -0.012559 8.617091e-02
+# 4 0.03 0.03 0.0005 0.0 -0.015723 2.252377e-01
+# 5 0.03 0.05 0.0100 0.0 -0.001133 1.767711e-01
+# 6 0.05 0.07 0.0500 0.0 0.006265 9.170460e-02
+# 7 0.07 0.09 0.0900 0.0 0.002383 2.701134e-01
+# 8 0.09 99.00 99.0000 1.0 98.879520 1.000000e-08
+# 9 1.10 0.10 0.1000 0.0 -57.817568 6.635692e-02
diff --git a/src/python/nimbusml/examples/examples_from_dataframe/IidChangePointDetector_df.py b/src/python/nimbusml/examples/examples_from_dataframe/IidChangePointDetector_df.py
new file mode 100644
index 00000000..00d19531
--- /dev/null
+++ b/src/python/nimbusml/examples/examples_from_dataframe/IidChangePointDetector_df.py
@@ -0,0 +1,34 @@
+###############################################################################
+# IidChangePointDetector
+import pandas as pd
+from nimbusml.timeseries import IidChangePointDetector
+
+# Create a sample series with a change
+input_data = [5, 5, 5, 5, 5, 5, 5, 5]
+input_data.extend([7, 7, 7, 7, 7, 7, 7, 7])
+
+X_train = pd.Series(input_data, name="ts")
+
+cpd = IidChangePointDetector(confidence=95, change_history_length=4) << {'result': 'ts'}
+data = cpd.fit_transform(X_train)
+
+print(data)
+
+# ts result.Alert result.Raw Score result.P-Value Score result.Martingale Score
+# 0 5 0.0 5.0 5.000000e-01 0.001213
+# 1 5 0.0 5.0 5.000000e-01 0.001213
+# 2 5 0.0 5.0 5.000000e-01 0.001213
+# 3 5 0.0 5.0 5.000000e-01 0.001213
+# 4 5 0.0 5.0 5.000000e-01 0.001213
+# 5 5 0.0 5.0 5.000000e-01 0.001213
+# 6 5 0.0 5.0 5.000000e-01 0.001213
+# 7 5 0.0 5.0 5.000000e-01 0.001213
+# 8 7 1.0 7.0 1.000000e-08 10298.666376 <-- alert is on, predicted changepoint
+# 9 7 0.0 7.0 1.328455e-01 33950.164799
+# 10 7 0.0 7.0 2.613750e-01 60866.342063
+# 11 7 0.0 7.0 3.776152e-01 78362.038772
+# 12 7 0.0 7.0 5.000000e-01 0.009226
+# 13 7 0.0 7.0 5.000000e-01 0.002799
+# 14 7 0.0 7.0 5.000000e-01 0.001561
+# 15 7 0.0 7.0 5.000000e-01 0.001213
+
diff --git a/src/python/nimbusml/examples/examples_from_dataframe/IidSpikeDetector_df.py b/src/python/nimbusml/examples/examples_from_dataframe/IidSpikeDetector_df.py
new file mode 100644
index 00000000..93ab346d
--- /dev/null
+++ b/src/python/nimbusml/examples/examples_from_dataframe/IidSpikeDetector_df.py
@@ -0,0 +1,26 @@
+###############################################################################
+# IidSpikeDetector
+import pandas as pd
+from nimbusml.timeseries import IidSpikeDetector
+
+X_train = pd.Series([5, 5, 5, 5, 5, 10, 5, 5, 5, 5, 5], name="ts")
+
+isd = IidSpikeDetector(confidence=95, pvalue_history_length=2.5) << {'result': 'ts'}
+
+isd.fit(X_train, verbose=1)
+data = isd.transform(X_train)
+
+print(data)
+
+# ts result.Alert result.Raw Score result.P-Value Score
+# 0 5.0 0.0 5.0 5.000000e-01
+# 1 5.0 0.0 5.0 5.000000e-01
+# 2 5.0 0.0 5.0 5.000000e-01
+# 3 5.0 0.0 5.0 5.000000e-01
+# 4 5.0 0.0 5.0 5.000000e-01
+# 5 10.0 1.0 10.0 1.000000e-08 <-- alert is on, predicted spike
+# 6 5.0 0.0 5.0 2.613750e-01
+# 7 5.0 0.0 5.0 2.613750e-01
+# 8 5.0 0.0 5.0 5.000000e-01
+# 9 5.0 0.0 5.0 5.000000e-01
+# 10 5.0 0.0 5.0 5.000000e-01
diff --git a/src/python/nimbusml/examples/examples_from_dataframe/SsaChangePointDetector_df.py b/src/python/nimbusml/examples/examples_from_dataframe/SsaChangePointDetector_df.py
new file mode 100644
index 00000000..9bea570e
--- /dev/null
+++ b/src/python/nimbusml/examples/examples_from_dataframe/SsaChangePointDetector_df.py
@@ -0,0 +1,77 @@
+###############################################################################
+# SsaChangePointDetector
+import numpy as np
+import pandas as pd
+from nimbusml.timeseries import SsaChangePointDetector
+
+# This example creates a time series (list of data with the
+# i-th element corresponding to the i-th time slot).
+# The estimator is applied to identify points where data distribution changed.
+# This estimator can account for temporal seasonality in the data.
+
+# Generate sample series data with a recurring
+# pattern and a spike within the pattern
+seasonality_size = 5
+seasonal_data = np.arange(seasonality_size)
+
+data = np.tile(seasonal_data, 3)
+data = np.append(data, [0, 100, 200, 300, 400]) # change distribution
+
+X_train = pd.Series(data, name="ts")
+
+# X_train looks like this
+# 0 0
+# 1 1
+# 2 2
+# 3 3
+# 4 4
+# 5 0
+# 6 1
+# 7 2
+# 8 3
+# 9 4
+# 10 0
+# 11 1
+# 12 2
+# 13 3
+# 14 4
+# 15 0
+# 16 100
+# 17 200
+# 18 300
+# 19 400
+
+training_seasons = 3
+training_size = seasonality_size * training_seasons
+
+cpd = SsaChangePointDetector(confidence=95,
+ change_history_length=8,
+ training_window_size=training_size,
+ seasonal_window_size=seasonality_size + 1) << {'result': 'ts'}
+
+cpd.fit(X_train, verbose=1)
+data = cpd.transform(X_train)
+
+print(data)
+
+# ts result.Alert result.Raw Score result.P-Value Score result.Martingale Score
+# 0 0 0.0 -2.531824 5.000000e-01 1.470334e-06
+# 1 1 0.0 -0.008832 5.818072e-03 8.094459e-05
+# 2 2 0.0 0.763040 1.374071e-01 2.588526e-04
+# 3 3 0.0 0.693811 2.797713e-01 4.365186e-04
+# 4 4 0.0 1.442079 1.838294e-01 1.074242e-03
+# 5 0 0.0 -1.844414 1.707238e-01 2.825599e-03
+# 6 1 0.0 0.219578 4.364025e-01 3.193633e-03
+# 7 2 0.0 0.201708 4.505472e-01 3.507451e-03
+# 8 3 0.0 0.157089 4.684456e-01 3.719387e-03
+# 9 4 0.0 1.329494 1.773046e-01 1.717610e-04
+# 10 0 0.0 -1.792391 7.353794e-02 3.014897e-04
+# 11 1 0.0 0.161634 4.999295e-01 1.788041e-04
+# 12 2 0.0 0.092626 4.953789e-01 7.326680e-05
+# 13 3 0.0 0.084648 4.514174e-01 3.053876e-05
+# 14 4 0.0 1.305554 1.202619e-01 9.741702e-05
+# 15 0 0.0 -1.792391 7.264402e-02 5.034093e-04
+# 16 100 1.0 99.161634 1.000000e-08 4.031944e+03 <-- alert is on, predicted change point
+# 17 200 0.0 185.229474 5.485437e-04 7.312609e+05
+# 18 300 0.0 270.403543 1.259683e-02 3.578470e+06
+# 19 400 0.0 357.113747 2.978766e-02 4.529837e+07
diff --git a/src/python/nimbusml/examples/examples_from_dataframe/SsaSpikeDetector_df.py b/src/python/nimbusml/examples/examples_from_dataframe/SsaSpikeDetector_df.py
new file mode 100644
index 00000000..d1297d09
--- /dev/null
+++ b/src/python/nimbusml/examples/examples_from_dataframe/SsaSpikeDetector_df.py
@@ -0,0 +1,80 @@
+###############################################################################
+# SsaSpikeDetector
+import numpy as np
+import pandas as pd
+from nimbusml.timeseries import SsaSpikeDetector
+
+# This example creates a time series (list of data with the
+# i-th element corresponding to the i-th time slot).
+# The estimator is applied to identify spiking points in the series.
+# This estimator can account for temporal seasonality in the data.
+
+# Generate sample series data with a recurring
+# pattern and a spike within the pattern
+seasonality_size = 5
+seasonal_data = np.arange(seasonality_size)
+
+data = np.tile(seasonal_data, 3)
+data = np.append(data, [100]) # add a spike
+data = np.append(data, seasonal_data)
+
+X_train = pd.Series(data, name="ts")
+
+# X_train looks like this
+# 0 0
+# 1 1
+# 2 2
+# 3 3
+# 4 4
+# 5 0
+# 6 1
+# 7 2
+# 8 3
+# 9 4
+# 10 0
+# 11 1
+# 12 2
+# 13 3
+# 14 4
+# 15 100
+# 16 0
+# 17 1
+# 18 2
+# 19 3
+# 20 4
+
+training_seasons = 3
+training_size = seasonality_size * training_seasons
+
+ssd = SsaSpikeDetector(confidence=95,
+ pvalue_history_length=8,
+ training_window_size=training_size,
+ seasonal_window_size=seasonality_size + 1) << {'result': 'ts'}
+
+ssd.fit(X_train, verbose=1)
+data = ssd.transform(X_train)
+
+print(data)
+
+# ts result.Alert result.Raw Score result.P-Value Score
+# 0 0 0.0 -2.531824 5.000000e-01
+# 1 1 0.0 -0.008832 5.818072e-03
+# 2 2 0.0 0.763040 1.374071e-01
+# 3 3 0.0 0.693811 2.797713e-01
+# 4 4 0.0 1.442079 1.838294e-01
+# 5 0 0.0 -1.844414 1.707238e-01
+# 6 1 0.0 0.219578 4.364025e-01
+# 7 2 0.0 0.201708 4.505472e-01
+# 8 3 0.0 0.157089 4.684456e-01
+# 9 4 0.0 1.329494 1.773046e-01
+# 10 0 0.0 -1.792391 7.353794e-02
+# 11 1 0.0 0.161634 4.999295e-01
+# 12 2 0.0 0.092626 4.953789e-01
+# 13 3 0.0 0.084648 4.514174e-01
+# 14 4 0.0 1.305554 1.202619e-01
+# 15 100 1.0 98.207609 1.000000e-08 <-- alert is on, predicted spike
+# 16 0 0.0 -13.831450 2.912225e-01
+# 17 1 0.0 -1.741884 4.379857e-01
+# 18 2 0.0 -0.465426 4.557261e-01
+# 19 3 0.0 -16.497133 2.926521e-01
+# 20 4 0.0 -29.817375 2.060473e-01
diff --git a/src/python/nimbusml/internal/core/timeseries/__init__.py b/src/python/nimbusml/internal/core/timeseries/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/python/nimbusml/internal/core/timeseries/_iidchangepointdetector.py b/src/python/nimbusml/internal/core/timeseries/_iidchangepointdetector.py
new file mode 100644
index 00000000..ae874a1c
--- /dev/null
+++ b/src/python/nimbusml/internal/core/timeseries/_iidchangepointdetector.py
@@ -0,0 +1,107 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+IidChangePointDetector
+"""
+
+__all__ = ["IidChangePointDetector"]
+
+
+from ...entrypoints.timeseriesprocessingentrypoints_iidchangepointdetector import \
+ timeseriesprocessingentrypoints_iidchangepointdetector
+from ...utils.utils import trace
+from ..base_pipeline_item import BasePipelineItem, DefaultSignature
+
+
+class IidChangePointDetector(BasePipelineItem, DefaultSignature):
+ """
+
+ This transform detects the change-points in an i.i.d. sequence using
+ adaptive kernel density estimation and martingales.
+
+ .. remarks::
+ ``IIDChangePointDetector`` assumes a sequence of data points that are
+ independently sampled from one
+ stationary distribution. `Adaptive kernel density estimation
+ `_
+ is used to model the distribution.
+
+ This transform detects
+ change points by calculating the martingale score for the sliding
+ window based on the estimated distribution.
+ The idea is based on the `Exchangeability
+ Martingales `_ that
+ detects a change of distribution over a stream of i.i.d. values. In
+ short, the value of the
+ martingale score starts increasing significantly when a sequence of
+ small p-values are detected in a row; this
+ indicates the change of the distribution of the underlying data
+ generation process.
+
+ :param confidence: The confidence for change point detection in the range
+ [0, 100]. Used to set the threshold of the martingale score for
+ triggering alert.
+
+ :param change_history_length: The length of the sliding window on p-value
+ for computing the martingale score.
+
+ :param martingale: The type of martingale betting function used for
+ computing the martingale score. Available options are {``Power``,
+ ``Mixture``}.
+
+ :param power_martingale_epsilon: The epsilon parameter for the Power
+ martingale if martingale is set to ``Power``.
+
+ :param params: Additional arguments sent to compute engine.
+
+ .. seealso::
+ :py:func:`IIDSpikeDetector
+ `,
+ :py:func:`SsaSpikeDetector
+ `,
+ :py:func:`SsaChangePointDetector
+ `.
+
+ .. index:: models, timeseries, transform
+
+ Example:
+ .. literalinclude::
+ /../nimbusml/examples/IidSpikeChangePointDetector.py
+ :language: python
+ """
+
+ @trace
+ def __init__(
+ self,
+ confidence=95.0,
+ change_history_length=20,
+ martingale='Power',
+ power_martingale_epsilon=0.1,
+ **params):
+ BasePipelineItem.__init__(
+ self, type='transform', **params)
+
+ self.confidence = confidence
+ self.change_history_length = change_history_length
+ self.martingale = martingale
+ self.power_martingale_epsilon = power_martingale_epsilon
+
+ @property
+ def _entrypoint(self):
+ return timeseriesprocessingentrypoints_iidchangepointdetector
+
+ @trace
+ def _get_node(self, **all_args):
+ algo_args = dict(
+ source=self.source,
+ name=self._name_or_source,
+ confidence=self.confidence,
+ change_history_length=self.change_history_length,
+ martingale=self.martingale,
+ power_martingale_epsilon=self.power_martingale_epsilon)
+
+ all_args.update(algo_args)
+ return self._entrypoint(**all_args)
diff --git a/src/python/nimbusml/internal/core/timeseries/_iidspikedetector.py b/src/python/nimbusml/internal/core/timeseries/_iidspikedetector.py
new file mode 100644
index 00000000..00712d77
--- /dev/null
+++ b/src/python/nimbusml/internal/core/timeseries/_iidspikedetector.py
@@ -0,0 +1,91 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+IidSpikeDetector
+"""
+
+__all__ = ["IidSpikeDetector"]
+
+
+from ...entrypoints.timeseriesprocessingentrypoints_iidspikedetector import \
+ timeseriesprocessingentrypoints_iidspikedetector
+from ...utils.utils import trace
+from ..base_pipeline_item import BasePipelineItem, DefaultSignature
+
+
+class IidSpikeDetector(BasePipelineItem, DefaultSignature):
+ """
+
+ This transform detects the spikes in a i.i.d. sequence using adaptive
+ kernel density estimation.
+
+ .. remarks::
+ ``IIDSpikeDetector`` assumes a sequence of data points that are
+ independently sampled from one stationary
+ distribution. `Adaptive kernel density estimation
+ `_
+ is used to model the distribution.
+ The `p-value score
+ indicates the likelihood of the current observation according to
+ the estimated distribution. The lower its value, the more likely the
+ current point is an outlier.
+
+ :param confidence: The confidence for spike detection in the range [0,
+ 100].
+
+ :param side: The argument that determines whether to detect positive or
+ negative anomalies, or both. Available options are {``Positive``,
+ ``Negative``, ``TwoSided``}.
+
+ :param pvalue_history_length: The size of the sliding window for computing
+ the p-value.
+
+ :param params: Additional arguments sent to compute engine.
+
+ .. seealso::
+ :py:func:`IIDChangePointDetector
+ `,
+ :py:func:`SsaSpikeDetector
+ `,
+ :py:func:`SsaChangePointDetector
+ `.
+
+ .. index:: models, timeseries, transform
+
+ Example:
+ .. literalinclude:: /../nimbusml/examples/IidSpikePointDetector.py
+ :language: python
+ """
+
+ @trace
+ def __init__(
+ self,
+ confidence=99.0,
+ side='TwoSided',
+ pvalue_history_length=100,
+ **params):
+ BasePipelineItem.__init__(
+ self, type='transform', **params)
+
+ self.confidence = confidence
+ self.side = side
+ self.pvalue_history_length = pvalue_history_length
+
+ @property
+ def _entrypoint(self):
+ return timeseriesprocessingentrypoints_iidspikedetector
+
+ @trace
+ def _get_node(self, **all_args):
+ algo_args = dict(
+ source=self.source,
+ name=self._name_or_source,
+ confidence=self.confidence,
+ side=self.side,
+ pvalue_history_length=self.pvalue_history_length)
+
+ all_args.update(algo_args)
+ return self._entrypoint(**all_args)
diff --git a/src/python/nimbusml/internal/core/timeseries/_ssachangepointdetector.py b/src/python/nimbusml/internal/core/timeseries/_ssachangepointdetector.py
new file mode 100644
index 00000000..297fae42
--- /dev/null
+++ b/src/python/nimbusml/internal/core/timeseries/_ssachangepointdetector.py
@@ -0,0 +1,138 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+SsaChangePointDetector
+"""
+
+__all__ = ["SsaChangePointDetector"]
+
+
+from ...entrypoints.timeseriesprocessingentrypoints_ssachangepointdetector import \
+ timeseriesprocessingentrypoints_ssachangepointdetector
+from ...utils.utils import trace
+from ..base_pipeline_item import BasePipelineItem, DefaultSignature
+
+
+class SsaChangePointDetector(BasePipelineItem, DefaultSignature):
+ """
+
+ This transform detects the change-points in a seasonal time-series
+ using Singular Spectrum Analysis (SSA).
+
+ .. remarks::
+ `Singular Spectrum Analysis (SSA)
+ `_ is a
+ powerful framework for decomposing the time-series into trend,
+ seasonality and noise components as well as forecasting the future
+ values of the time-series. In order to remove the
+ effect of such components on anomaly detection, this transform add
+ SSA as a time-series modeler component in the detection pipeline.
+
+ The SSA component will be trained and it predicts the next expected
+ value on the time-series under normal condition; this expected value
+ is
+ further used to calculate the amount of deviation from the normal
+ behavior at that timestamp.
+ The distribution of this deviation is then modeled using `Adaptive
+ kernel density estimation
+ `_.
+
+ This transform detects
+ change points by calculating the martingale score for the sliding
+ window based on the estimated distribution of deviations.
+ The idea is based on the `Exchangeability
+ Martingales `_ that
+ detects a change of distribution over a stream of i.i.d. values. In
+ short, the value of the
+ martingale score starts increasing significantly when a sequence of
+ small p-values detected in a row; this
+ indicates the change of the distribution of the underlying data
+ generation process.
+
+ :param training_window_size: The number of points, N, from the beginning
+ of the sequence used to train the SSA model.
+
+ :param confidence: The confidence for change point detection in the range
+ [0, 100].
+
+ :param seasonal_window_size: An upper bound, L, on the largest relevant
+ seasonality in the input time-series, which also
+ determines the order of the autoregression of SSA. It must satisfy 2
+ < L < N/2.
+
+ :param change_history_length: The length of the sliding window on p-value
+ for computing the martingale score.
+
+ :param error_function: The function used to compute the error between the
+ expected and the observed value. Possible values are:
+ {``SignedDifference``, ``AbsoluteDifference``, ``SignedProportion``,
+ ``AbsoluteProportion``, ``SquaredDifference``}.
+
+ :param martingale: The type of martingale betting function used for
+ computing the martingale score. Available options are {``Power``,
+ ``Mixture``}.
+
+ :param power_martingale_epsilon: The epsilon parameter for the Power
+ martingale if martingale is set to ``Power``.
+
+ :param params: Additional arguments sent to compute engine.
+
+ .. seealso::
+ :py:func:`IIDChangePointDetector
+ `,
+ :py:func:`IIDSpikeDetector
+ `,
+ :py:func:`SsaSpikeDetector
+ `.
+
+ .. index:: models, timeseries, transform
+
+ Example:
+ .. literalinclude:: /../nimbusml/examples/SsaChangePointDetector.py
+ :language: python
+ """
+
+ @trace
+ def __init__(
+ self,
+ training_window_size=100,
+ confidence=95.0,
+ seasonal_window_size=10,
+ change_history_length=20,
+ error_function='SignedDifference',
+ martingale='Power',
+ power_martingale_epsilon=0.1,
+ **params):
+ BasePipelineItem.__init__(
+ self, type='transform', **params)
+
+ self.training_window_size = training_window_size
+ self.confidence = confidence
+ self.seasonal_window_size = seasonal_window_size
+ self.change_history_length = change_history_length
+ self.error_function = error_function
+ self.martingale = martingale
+ self.power_martingale_epsilon = power_martingale_epsilon
+
+ @property
+ def _entrypoint(self):
+ return timeseriesprocessingentrypoints_ssachangepointdetector
+
+ @trace
+ def _get_node(self, **all_args):
+ algo_args = dict(
+ source=self.source,
+ name=self._name_or_source,
+ training_window_size=self.training_window_size,
+ confidence=self.confidence,
+ seasonal_window_size=self.seasonal_window_size,
+ change_history_length=self.change_history_length,
+ error_function=self.error_function,
+ martingale=self.martingale,
+ power_martingale_epsilon=self.power_martingale_epsilon)
+
+ all_args.update(algo_args)
+ return self._entrypoint(**all_args)
diff --git a/src/python/nimbusml/internal/core/timeseries/_ssaspikedetector.py b/src/python/nimbusml/internal/core/timeseries/_ssaspikedetector.py
new file mode 100644
index 00000000..6a1097f8
--- /dev/null
+++ b/src/python/nimbusml/internal/core/timeseries/_ssaspikedetector.py
@@ -0,0 +1,129 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+SsaSpikeDetector
+"""
+
+__all__ = ["SsaSpikeDetector"]
+
+
+from ...entrypoints.timeseriesprocessingentrypoints_ssaspikedetector import \
+ timeseriesprocessingentrypoints_ssaspikedetector
+from ...utils.utils import trace
+from ..base_pipeline_item import BasePipelineItem, DefaultSignature
+
+
+class SsaSpikeDetector(BasePipelineItem, DefaultSignature):
+ """
+
+ This transform detects the spikes in a seasonal time-series using
+ Singular Spectrum Analysis (SSA).
+
+ .. remarks::
+ `Singular Spectrum Analysis (SSA)
+ `_ is a
+ powerful
+ framework for decomposing the time-series into trend, seasonality and
+ noise components as well as forecasting
+ the future values of the time-series. In order to remove the effect
+ of such components on anomaly detection,
+ this transform adds SSA as a time-series modeler component in the
+ detection pipeline.
+
+ The SSA component will be trained and it predicts the next expected
+ value on the time-series under normal condition; this expected value
+ is
+ further used to calculate the amount of deviation from the normal
+ (predicted) behavior at that timestamp.
+ The distribution of this deviation is then modeled using `Adaptive
+ kernel density estimation
+ `_.
+
+ The `p-value score for the
+ current deviation is calculated based on the
+ estimated distribution. The lower its value, the more likely the
+ current point is an outlier.
+
+ :param training_window_size: The number of points, N, from the beginning
+ of the sequence used to train the SSA
+ model.
+
+ :param confidence: The confidence for spike detection in the range [0,
+ 100].
+
+ :param seasonal_window_size: An upper bound, L, on the largest relevant
+ seasonality in the input time-series, which
+ also determines the order of the autoregression of SSA. It must
+ satisfy 2 < L < N/2.
+
+ :param side: The argument that determines whether to detect positive or
+ negative anomalies, or both. Available
+ options are {``Positive``, ``Negative``, ``TwoSided``}.
+
+ :param pvalue_history_length: The size of the sliding window for computing
+ the p-value.
+
+ :param error_function: The function used to compute the error between the
+ expected and the observed value. Possible
+ values are {``SignedDifference``, ``AbsoluteDifference``,
+ ``SignedProportion``, ``AbsoluteProportion``,
+ ``SquaredDifference``}.
+
+ :param params: Additional arguments sent to compute engine.
+
+ .. seealso::
+ :py:func:`IIDChangePointDetector
+ `,
+ :py:func:`IIDSpikeDetector
+ `,
+ :py:func:`SsaChangePointDetector
+ `.
+
+ .. index:: models, timeseries, transform
+
+ Example:
+ .. literalinclude:: /../nimbusml/examples/SsaSpikeDetector.py
+ :language: python
+ """
+
+ @trace
+ def __init__(
+ self,
+ training_window_size=100,
+ confidence=99.0,
+ seasonal_window_size=10,
+ side='TwoSided',
+ pvalue_history_length=100,
+ error_function='SignedDifference',
+ **params):
+ BasePipelineItem.__init__(
+ self, type='transform', **params)
+
+ self.training_window_size = training_window_size
+ self.confidence = confidence
+ self.seasonal_window_size = seasonal_window_size
+ self.side = side
+ self.pvalue_history_length = pvalue_history_length
+ self.error_function = error_function
+
+ @property
+ def _entrypoint(self):
+ return timeseriesprocessingentrypoints_ssaspikedetector
+
+ @trace
+ def _get_node(self, **all_args):
+ algo_args = dict(
+ source=self.source,
+ name=self._name_or_source,
+ training_window_size=self.training_window_size,
+ confidence=self.confidence,
+ seasonal_window_size=self.seasonal_window_size,
+ side=self.side,
+ pvalue_history_length=self.pvalue_history_length,
+ error_function=self.error_function)
+
+ all_args.update(algo_args)
+ return self._entrypoint(**all_args)
diff --git a/src/python/nimbusml/tests/timeseries/__init__.py b/src/python/nimbusml/tests/timeseries/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/python/nimbusml/tests/timeseries/test_iidchangepointdetector.py b/src/python/nimbusml/tests/timeseries/test_iidchangepointdetector.py
new file mode 100644
index 00000000..e15863d1
--- /dev/null
+++ b/src/python/nimbusml/tests/timeseries/test_iidchangepointdetector.py
@@ -0,0 +1,47 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+
+import unittest
+
+import pandas as pd
+from nimbusml import Pipeline, FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.timeseries import IidChangePointDetector
+
+
+class TestIidChangePointDetector(unittest.TestCase):
+
+ def test_correct_data_is_marked_as_change_point(self):
+ input_data = [5, 5, 5, 5, 5, 5, 5, 5]
+ input_data.extend([7, 7, 7, 7, 7, 7, 7, 7])
+ X_train = pd.Series(input_data, name="ts")
+
+ cpd = IidChangePointDetector(confidence=95, change_history_length=4) << {'result': 'ts'}
+ data = cpd.fit_transform(X_train)
+
+ self.assertEqual(data.loc[8, 'result.Alert'], 1.0)
+
+ data = data.loc[data['result.Alert'] == 1.0]
+ self.assertEqual(len(data), 1)
+
+ def test_multiple_user_specified_columns_is_not_allowed(self):
+ path = get_dataset('timeseries').as_filepath()
+ data = FileDataStream.read_csv(path)
+
+ try:
+ pipeline = Pipeline([
+ IidChangePointDetector(columns=['t2', 't3'], change_history_length=5)
+ ])
+ pipeline.fit_transform(data)
+
+ except RuntimeError as e:
+ self.assertTrue('Only one column is allowed' in str(e))
+ return
+
+ self.fail()
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/src/python/nimbusml/tests/timeseries/test_iidspikedetector.py b/src/python/nimbusml/tests/timeseries/test_iidspikedetector.py
new file mode 100644
index 00000000..61a105f8
--- /dev/null
+++ b/src/python/nimbusml/tests/timeseries/test_iidspikedetector.py
@@ -0,0 +1,62 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+
+import unittest
+
+import pandas as pd
+from nimbusml import Pipeline, FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.timeseries import IidSpikeDetector
+from nimbusml.preprocessing.schema import TypeConverter
+
+
+class TestIidSpikeDetector(unittest.TestCase):
+
+ def test_correct_data_is_marked_as_anomaly(self):
+ X_train = pd.Series([5, 5, 5, 5, 5, 10, 5, 5, 5, 5, 5], name="ts")
+ isd = IidSpikeDetector(confidence=95, pvalue_history_length=3) << {'result': 'ts'}
+ data = isd.fit_transform(X_train)
+
+ data = data.loc[data['result.Alert'] == 1.0]
+ self.assertEqual(len(data), 1)
+ self.assertEqual(data.iloc[0]['ts'], 10.0)
+
+ def test_multiple_user_specified_columns_is_not_allowed(self):
+ path = get_dataset('timeseries').as_filepath()
+ data = FileDataStream.read_csv(path)
+
+ try:
+ pipeline = Pipeline([
+ IidSpikeDetector(columns=['t2', 't3'], pvalue_history_length=5)
+ ])
+ pipeline.fit_transform(data)
+
+ except RuntimeError as e:
+ self.assertTrue('Only one column is allowed' in str(e))
+ return
+
+ self.fail()
+
+ def test_pre_transform_does_not_convert_non_time_series_columns(self):
+ X_train = pd.DataFrame({
+ 'Date': ['2017-01', '2017-02', '2017-03'],
+ 'Values': [5.0, 5.0, 5.0]})
+
+ self.assertEqual(len(X_train.dtypes), 2)
+ self.assertEqual(str(X_train.dtypes[0]), 'object')
+ self.assertTrue(str(X_train.dtypes[1]).startswith('float'))
+
+ isd = IidSpikeDetector(confidence=95, pvalue_history_length=3) << 'Values'
+ data = isd.fit_transform(X_train)
+
+ self.assertEqual(len(data.dtypes), 4)
+ self.assertEqual(str(data.dtypes[0]), 'object')
+ self.assertTrue(str(data.dtypes[1]).startswith('float'))
+ self.assertTrue(str(data.dtypes[2]).startswith('float'))
+ self.assertTrue(str(data.dtypes[3]).startswith('float'))
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/src/python/nimbusml/tests/timeseries/test_ssachangepointdetector.py b/src/python/nimbusml/tests/timeseries/test_ssachangepointdetector.py
new file mode 100644
index 00000000..d3ad27ef
--- /dev/null
+++ b/src/python/nimbusml/tests/timeseries/test_ssachangepointdetector.py
@@ -0,0 +1,61 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+
+import unittest
+
+import numpy as np
+import pandas as pd
+from nimbusml import Pipeline, FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.timeseries import SsaChangePointDetector
+
+
+class TestSsaChangePointDetector(unittest.TestCase):
+
+ def test_correct_data_is_marked_as_change_point(self):
+ seasonality_size = 5
+ seasonal_data = np.arange(seasonality_size)
+
+ data = np.tile(seasonal_data, 3)
+ data = np.append(data, [0, 100, 200, 300, 400]) # change distribution
+
+ X_train = pd.Series(data, name="ts")
+
+ training_seasons = 3
+ training_size = seasonality_size * training_seasons
+
+ cpd = SsaChangePointDetector(confidence=95,
+ change_history_length=8,
+ training_window_size=training_size,
+ seasonal_window_size=seasonality_size + 1) << {'result': 'ts'}
+
+ cpd.fit(X_train, verbose=1)
+ data = cpd.transform(X_train)
+
+
+ self.assertEqual(data.loc[16, 'result.Alert'], 1.0)
+
+ data = data.loc[data['result.Alert'] == 1.0]
+ self.assertEqual(len(data), 1)
+
+ def test_multiple_user_specified_columns_is_not_allowed(self):
+ path = get_dataset('timeseries').as_filepath()
+ data = FileDataStream.read_csv(path)
+
+ try:
+ pipeline = Pipeline([
+ SsaChangePointDetector(columns=['t2', 't3'], change_history_length=5)
+ ])
+ pipeline.fit_transform(data)
+
+ except RuntimeError as e:
+ self.assertTrue('Only one column is allowed' in str(e))
+ return
+
+ self.fail()
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/src/python/nimbusml/tests/timeseries/test_ssaspikedetector.py b/src/python/nimbusml/tests/timeseries/test_ssaspikedetector.py
new file mode 100644
index 00000000..74610661
--- /dev/null
+++ b/src/python/nimbusml/tests/timeseries/test_ssaspikedetector.py
@@ -0,0 +1,60 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+
+import unittest
+
+import numpy as np
+import pandas as pd
+from nimbusml import Pipeline, FileDataStream
+from nimbusml.datasets import get_dataset
+from nimbusml.timeseries import SsaSpikeDetector
+
+
+class TestSsaSpikeDetector(unittest.TestCase):
+
+ def test_correct_data_is_marked_as_spike(self):
+ seasonality_size = 5
+ seasonal_data = np.arange(seasonality_size)
+
+ data = np.tile(seasonal_data, 3)
+ data = np.append(data, [100]) # add a spike
+ data = np.append(data, seasonal_data)
+
+ X_train = pd.Series(data, name="ts")
+ training_seasons = 3
+ training_size = seasonality_size * training_seasons
+
+ ssd = SsaSpikeDetector(confidence=95,
+ pvalue_history_length=8,
+ training_window_size=training_size,
+ seasonal_window_size=seasonality_size + 1) << {'result': 'ts'}
+
+ ssd.fit(X_train)
+ data = ssd.transform(X_train)
+
+ self.assertEqual(data.loc[15, 'result.Alert'], 1.0)
+
+ data = data.loc[data['result.Alert'] == 1.0]
+ self.assertEqual(len(data), 1)
+
+ def test_multiple_user_specified_columns_is_not_allowed(self):
+ path = get_dataset('timeseries').as_filepath()
+ data = FileDataStream.read_csv(path)
+
+ try:
+ pipeline = Pipeline([
+ SsaSpikeDetector(columns=['t2', 't3'], pvalue_history_length=5)
+ ])
+ pipeline.fit_transform(data)
+
+ except RuntimeError as e:
+ self.assertTrue('Only one column is allowed' in str(e))
+ return
+
+ self.fail()
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/src/python/nimbusml/timeseries/__init__.py b/src/python/nimbusml/timeseries/__init__.py
new file mode 100644
index 00000000..13db4520
--- /dev/null
+++ b/src/python/nimbusml/timeseries/__init__.py
@@ -0,0 +1,11 @@
+from ._iidspikedetector import IidSpikeDetector
+from ._iidchangepointdetector import IidChangePointDetector
+from ._ssaspikedetector import SsaSpikeDetector
+from ._ssachangepointdetector import SsaChangePointDetector
+
+__all__ = [
+ 'IidSpikeDetector',
+ 'IidChangePointDetector',
+ 'SsaSpikeDetector',
+ 'SsaChangePointDetector'
+]
diff --git a/src/python/nimbusml/timeseries/_iidchangepointdetector.py b/src/python/nimbusml/timeseries/_iidchangepointdetector.py
new file mode 100644
index 00000000..0df53ba7
--- /dev/null
+++ b/src/python/nimbusml/timeseries/_iidchangepointdetector.py
@@ -0,0 +1,119 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+IidChangePointDetector
+"""
+
+__all__ = ["IidChangePointDetector"]
+
+
+from sklearn.base import TransformerMixin
+
+from ..base_transform import BaseTransform
+from ..internal.core.timeseries._iidchangepointdetector import \
+ IidChangePointDetector as core
+from ..internal.utils.utils import trace
+
+
+class IidChangePointDetector(
+ core,
+ BaseTransform,
+ TransformerMixin):
+ """
+
+ This transform detects the change-points in an i.i.d. sequence using
+ adaptive kernel density estimation and martingales.
+
+ .. remarks::
+ ``IIDChangePointDetector`` assumes a sequence of data points that are
+ independently sampled from one
+ stationary distribution. `Adaptive kernel density estimation
+ `_
+ is used to model the distribution.
+
+ This transform detects
+ change points by calculating the martingale score for the sliding
+ window based on the estimated distribution.
+ The idea is based on the `Exchangeability
+ Martingales `_ that
+ detects a change of distribution over a stream of i.i.d. values. In
+ short, the value of the
+ martingale score starts increasing significantly when a sequence of
+ small p-values are detected in a row; this
+ indicates the change of the distribution of the underlying data
+ generation process.
+
+ :param columns: see `Columns `_.
+
+ :param confidence: The confidence for change point detection in the range
+ [0, 100]. Used to set the threshold of the martingale score for
+ triggering alert.
+
+ :param change_history_length: The length of the sliding window on p-value
+ for computing the martingale score.
+
+ :param martingale: The type of martingale betting function used for
+ computing the martingale score. Available options are {``Power``,
+ ``Mixture``}.
+
+ :param power_martingale_epsilon: The epsilon parameter for the Power
+ martingale if martingale is set to ``Power``.
+
+ :param params: Additional arguments sent to compute engine.
+
+ .. seealso::
+ :py:func:`IIDSpikeDetector
+ `,
+ :py:func:`SsaSpikeDetector
+ `,
+ :py:func:`SsaChangePointDetector
+ `.
+
+ .. index:: models, timeseries, transform
+
+ Example:
+ .. literalinclude::
+ /../nimbusml/examples/IidSpikeChangePointDetector.py
+ :language: python
+ """
+
+ @trace
+ def __init__(
+ self,
+ confidence=95.0,
+ change_history_length=20,
+ martingale='Power',
+ power_martingale_epsilon=0.1,
+ columns=None,
+ **params):
+
+ if columns:
+ params['columns'] = columns
+ BaseTransform.__init__(self, **params)
+ core.__init__(
+ self,
+ confidence=confidence,
+ change_history_length=change_history_length,
+ martingale=martingale,
+ power_martingale_epsilon=power_martingale_epsilon,
+ **params)
+ self._columns = columns
+
+ def get_params(self, deep=False):
+ """
+ Get the parameters for this operator.
+ """
+ return core.get_params(self)
+
+ def _nodes_with_presteps(self):
+ """
+ Inserts preprocessing before this one.
+ """
+ from ..preprocessing.schema import TypeConverter
+ return [
+ TypeConverter(
+ result_type='R4')._steal_io(self),
+ self]
diff --git a/src/python/nimbusml/timeseries/_iidspikedetector.py b/src/python/nimbusml/timeseries/_iidspikedetector.py
new file mode 100644
index 00000000..51582ae8
--- /dev/null
+++ b/src/python/nimbusml/timeseries/_iidspikedetector.py
@@ -0,0 +1,101 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+IidSpikeDetector
+"""
+
+__all__ = ["IidSpikeDetector"]
+
+
+from sklearn.base import TransformerMixin
+
+from ..base_transform import BaseTransform
+from ..internal.core.timeseries._iidspikedetector import \
+ IidSpikeDetector as core
+from ..internal.utils.utils import trace
+
+
+class IidSpikeDetector(core, BaseTransform, TransformerMixin):
+ """
+
+ This transform detects the spikes in a i.i.d. sequence using adaptive
+ kernel density estimation.
+
+ .. remarks::
+ ``IIDSpikeDetector`` assumes a sequence of data points that are
+ independently sampled from one stationary
+ distribution. `Adaptive kernel density estimation
+ `_
+ is used to model the distribution.
+ The `p-value score
+ indicates the likelihood of the current observation according to
+ the estimated distribution. The lower its value, the more likely the
+ current point is an outlier.
+
+ :param columns: see `Columns `_.
+
+ :param confidence: The confidence for spike detection in the range [0,
+ 100].
+
+ :param side: The argument that determines whether to detect positive or
+ negative anomalies, or both. Available options are {``Positive``,
+ ``Negative``, ``TwoSided``}.
+
+ :param pvalue_history_length: The size of the sliding window for computing
+ the p-value.
+
+ :param params: Additional arguments sent to compute engine.
+
+ .. seealso::
+ :py:func:`IIDChangePointDetector
+ `,
+ :py:func:`SsaSpikeDetector
+ `,
+ :py:func:`SsaChangePointDetector
+ `.
+
+ .. index:: models, timeseries, transform
+
+ Example:
+ .. literalinclude:: /../nimbusml/examples/IidSpikePointDetector.py
+ :language: python
+ """
+
+ @trace
+ def __init__(
+ self,
+ confidence=99.0,
+ side='TwoSided',
+ pvalue_history_length=100,
+ columns=None,
+ **params):
+
+ if columns:
+ params['columns'] = columns
+ BaseTransform.__init__(self, **params)
+ core.__init__(
+ self,
+ confidence=confidence,
+ side=side,
+ pvalue_history_length=pvalue_history_length,
+ **params)
+ self._columns = columns
+
+ def get_params(self, deep=False):
+ """
+ Get the parameters for this operator.
+ """
+ return core.get_params(self)
+
+ def _nodes_with_presteps(self):
+ """
+ Inserts preprocessing before this one.
+ """
+ from ..preprocessing.schema import TypeConverter
+ return [
+ TypeConverter(
+ result_type='R4')._steal_io(self),
+ self]
diff --git a/src/python/nimbusml/timeseries/_ssachangepointdetector.py b/src/python/nimbusml/timeseries/_ssachangepointdetector.py
new file mode 100644
index 00000000..3b02d49e
--- /dev/null
+++ b/src/python/nimbusml/timeseries/_ssachangepointdetector.py
@@ -0,0 +1,147 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+SsaChangePointDetector
+"""
+
+__all__ = ["SsaChangePointDetector"]
+
+
+from sklearn.base import TransformerMixin
+
+from ..base_transform import BaseTransform
+from ..internal.core.timeseries._ssachangepointdetector import \
+ SsaChangePointDetector as core
+from ..internal.utils.utils import trace
+
+
+class SsaChangePointDetector(
+ core,
+ BaseTransform,
+ TransformerMixin):
+ """
+
+ This transform detects the change-points in a seasonal time-series
+ using Singular Spectrum Analysis (SSA).
+
+ .. remarks::
+ `Singular Spectrum Analysis (SSA)
+ `_ is a
+ powerful framework for decomposing the time-series into trend,
+ seasonality and noise components as well as forecasting the future
+ values of the time-series. In order to remove the
+ effect of such components on anomaly detection, this transform add
+ SSA as a time-series modeler component in the detection pipeline.
+
+ The SSA component will be trained and it predicts the next expected
+ value on the time-series under normal condition; this expected value
+ is
+ further used to calculate the amount of deviation from the normal
+ behavior at that timestamp.
+ The distribution of this deviation is then modeled using `Adaptive
+ kernel density estimation
+ `_.
+
+ This transform detects
+ change points by calculating the martingale score for the sliding
+ window based on the estimated distribution of deviations.
+ The idea is based on the `Exchangeability
+ Martingales `_ that
+ detects a change of distribution over a stream of i.i.d. values. In
+ short, the value of the
+ martingale score starts increasing significantly when a sequence of
+ small p-values detected in a row; this
+ indicates the change of the distribution of the underlying data
+ generation process.
+
+ :param columns: see `Columns `_.
+
+ :param training_window_size: The number of points, N, from the beginning
+ of the sequence used to train the SSA model.
+
+ :param confidence: The confidence for change point detection in the range
+ [0, 100].
+
+ :param seasonal_window_size: An upper bound, L, on the largest relevant
+ seasonality in the input time-series, which also
+ determines the order of the autoregression of SSA. It must satisfy 2
+ < L < N/2.
+
+ :param change_history_length: The length of the sliding window on p-value
+ for computing the martingale score.
+
+ :param error_function: The function used to compute the error between the
+ expected and the observed value. Possible values are:
+ {``SignedDifference``, ``AbsoluteDifference``, ``SignedProportion``,
+ ``AbsoluteProportion``, ``SquaredDifference``}.
+
+ :param martingale: The type of martingale betting function used for
+ computing the martingale score. Available options are {``Power``,
+ ``Mixture``}.
+
+ :param power_martingale_epsilon: The epsilon parameter for the Power
+ martingale if martingale is set to ``Power``.
+
+ :param params: Additional arguments sent to compute engine.
+
+ .. seealso::
+ :py:func:`IIDChangePointDetector
+ `,
+ :py:func:`IIDSpikeDetector
+ `,
+ :py:func:`SsaSpikeDetector
+ `.
+
+ .. index:: models, timeseries, transform
+
+ Example:
+ .. literalinclude:: /../nimbusml/examples/SsaChangePointDetector.py
+ :language: python
+ """
+
+ @trace
+ def __init__(
+ self,
+ training_window_size=100,
+ confidence=95.0,
+ seasonal_window_size=10,
+ change_history_length=20,
+ error_function='SignedDifference',
+ martingale='Power',
+ power_martingale_epsilon=0.1,
+ columns=None,
+ **params):
+
+ if columns:
+ params['columns'] = columns
+ BaseTransform.__init__(self, **params)
+ core.__init__(
+ self,
+ training_window_size=training_window_size,
+ confidence=confidence,
+ seasonal_window_size=seasonal_window_size,
+ change_history_length=change_history_length,
+ error_function=error_function,
+ martingale=martingale,
+ power_martingale_epsilon=power_martingale_epsilon,
+ **params)
+ self._columns = columns
+
+ def get_params(self, deep=False):
+ """
+ Get the parameters for this operator.
+ """
+ return core.get_params(self)
+
+ def _nodes_with_presteps(self):
+ """
+ Inserts preprocessing before this one.
+ """
+ from ..preprocessing.schema import TypeConverter
+ return [
+ TypeConverter(
+ result_type='R4')._steal_io(self),
+ self]
diff --git a/src/python/nimbusml/timeseries/_ssaspikedetector.py b/src/python/nimbusml/timeseries/_ssaspikedetector.py
new file mode 100644
index 00000000..ad831a15
--- /dev/null
+++ b/src/python/nimbusml/timeseries/_ssaspikedetector.py
@@ -0,0 +1,136 @@
+# --------------------------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------------------------
+# - Generated by tools/entrypoint_compiler.py: do not edit by hand
+"""
+SsaSpikeDetector
+"""
+
+__all__ = ["SsaSpikeDetector"]
+
+
+from sklearn.base import TransformerMixin
+
+from ..base_transform import BaseTransform
+from ..internal.core.timeseries._ssaspikedetector import \
+ SsaSpikeDetector as core
+from ..internal.utils.utils import trace
+
+
+class SsaSpikeDetector(core, BaseTransform, TransformerMixin):
+ """
+
+ This transform detects the spikes in a seasonal time-series using
+ Singular Spectrum Analysis (SSA).
+
+ .. remarks::
+ `Singular Spectrum Analysis (SSA)
+ `_ is a
+ powerful
+ framework for decomposing the time-series into trend, seasonality and
+ noise components as well as forecasting
+ the future values of the time-series. In order to remove the effect
+ of such components on anomaly detection,
+ this transform adds SSA as a time-series modeler component in the
+ detection pipeline.
+
+ The SSA component will be trained and it predicts the next expected
+ value on the time-series under normal condition; this expected value
+ is
+ further used to calculate the amount of deviation from the normal
+ (predicted) behavior at that timestamp.
+ The distribution of this deviation is then modeled using `Adaptive
+ kernel density estimation
+ `_.
+
+ The `p-value score for the
+ current deviation is calculated based on the
+ estimated distribution. The lower its value, the more likely the
+ current point is an outlier.
+
+ :param columns: see `Columns `_.
+
+ :param training_window_size: The number of points, N, from the beginning
+ of the sequence used to train the SSA
+ model.
+
+ :param confidence: The confidence for spike detection in the range [0,
+ 100].
+
+ :param seasonal_window_size: An upper bound, L, on the largest relevant
+ seasonality in the input time-series, which
+ also determines the order of the autoregression of SSA. It must
+ satisfy 2 < L < N/2.
+
+ :param side: The argument that determines whether to detect positive or
+ negative anomalies, or both. Available
+ options are {``Positive``, ``Negative``, ``TwoSided``}.
+
+ :param pvalue_history_length: The size of the sliding window for computing
+ the p-value.
+
+ :param error_function: The function used to compute the error between the
+ expected and the observed value. Possible
+ values are {``SignedDifference``, ``AbsoluteDifference``,
+ ``SignedProportion``, ``AbsoluteProportion``,
+ ``SquaredDifference``}.
+
+ :param params: Additional arguments sent to compute engine.
+
+ .. seealso::
+ :py:func:`IIDChangePointDetector
+ `,
+ :py:func:`IIDSpikeDetector
+ `,
+ :py:func:`SsaChangePointDetector
+ `.
+
+ .. index:: models, timeseries, transform
+
+ Example:
+ .. literalinclude:: /../nimbusml/examples/SsaSpikeDetector.py
+ :language: python
+ """
+
+ @trace
+ def __init__(
+ self,
+ training_window_size=100,
+ confidence=99.0,
+ seasonal_window_size=10,
+ side='TwoSided',
+ pvalue_history_length=100,
+ error_function='SignedDifference',
+ columns=None,
+ **params):
+
+ if columns:
+ params['columns'] = columns
+ BaseTransform.__init__(self, **params)
+ core.__init__(
+ self,
+ training_window_size=training_window_size,
+ confidence=confidence,
+ seasonal_window_size=seasonal_window_size,
+ side=side,
+ pvalue_history_length=pvalue_history_length,
+ error_function=error_function,
+ **params)
+ self._columns = columns
+
+ def get_params(self, deep=False):
+ """
+ Get the parameters for this operator.
+ """
+ return core.get_params(self)
+
+ def _nodes_with_presteps(self):
+ """
+ Inserts preprocessing before this one.
+ """
+ from ..preprocessing.schema import TypeConverter
+ return [
+ TypeConverter(
+ result_type='R4')._steal_io(self),
+ self]
diff --git a/src/python/tests/test_estimator_checks.py b/src/python/tests/test_estimator_checks.py
index 07b1453c..5dac16f5 100644
--- a/src/python/tests/test_estimator_checks.py
+++ b/src/python/tests/test_estimator_checks.py
@@ -16,6 +16,8 @@
from nimbusml.internal.entrypoints._ngramextractor_ngram import n_gram
from nimbusml.preprocessing import TensorFlowScorer
from nimbusml.preprocessing.filter import SkipFilter, TakeFilter
+from nimbusml.timeseries import (IidSpikeDetector, IidChangePointDetector,
+ SsaSpikeDetector, SsaChangePointDetector)
from sklearn.utils.estimator_checks import _yield_all_checks, MULTI_OUTPUT
this = os.path.abspath(os.path.dirname(__file__))
@@ -53,6 +55,13 @@
# fix pending in PR, bug cant handle csr matrix
'RangeFilter': 'check_estimators_dtypes, '
'check_estimator_sparse_data',
+ # time series do not currently support sparse matrices
+ 'IidSpikeDetector': 'check_estimator_sparse_data',
+ 'IidChangePointDetector': 'check_estimator_sparse_data',
+ 'SsaSpikeDetector': 'check_estimator_sparse_data'
+ 'check_fit2d_1sample', # SSA requires more than one sample
+ 'SsaChangePointDetector': 'check_estimator_sparse_data'
+ 'check_fit2d_1sample', # SSA requires more than one sample
# bug, low tolerance
'FastLinearRegressor': 'check_supervised_y_2d, '
'check_regressor_data_not_an_array, '
@@ -180,6 +189,10 @@
'NGramFeaturizer': NGramFeaturizer(word_feature_extractor=n_gram()),
'SkipFilter': SkipFilter(count=5),
'TakeFilter': TakeFilter(count=100000),
+ 'IidSpikeDetector': IidSpikeDetector(columns=['F0']),
+ 'IidChangePointDetector': IidChangePointDetector(columns=['F0']),
+ 'SsaSpikeDetector': SsaSpikeDetector(columns=['F0'], seasonal_window_size=2),
+ 'SsaChangePointDetector': SsaChangePointDetector(columns=['F0'], seasonal_window_size=2),
'TensorFlowScorer': TensorFlowScorer(
model_location=os.path.join(
this,
diff --git a/src/python/tests_extended/test_docs_example.py b/src/python/tests_extended/test_docs_example.py
index 50333cd9..23fb2f82 100644
--- a/src/python/tests_extended/test_docs_example.py
+++ b/src/python/tests_extended/test_docs_example.py
@@ -70,6 +70,14 @@ def test_examples(self):
'NaiveBayesClassifier_df.py'
]:
continue
+ # skip for ubuntu 14 tests
+ if platform.linux_distribution()[0] == 'Ubuntu' and platform.linux_distribution()[1][:2] == '14':
+ if name in [
+ # libdl needs to be setup
+ 'Image.py',
+ 'Image_df.py'
+ ]:
+ continue
# skip for centos7 tests
if platform.linux_distribution()[0] == 'CentOS Linux':
if name in [
diff --git a/src/python/tools/compiler_utils.py b/src/python/tools/compiler_utils.py
index d7462c78..9a5e1e07 100644
--- a/src/python/tools/compiler_utils.py
+++ b/src/python/tools/compiler_utils.py
@@ -120,6 +120,10 @@ def _nodes_with_presteps(self):
'''from ..schema import TypeConverter
return [TypeConverter(result_type='R4')._steal_io(self), self]'''
+timeseries_to_r4_converter = \
+ '''from ..preprocessing.schema import TypeConverter
+return [TypeConverter(result_type='R4')._steal_io(self), self]'''
+
_presteps = {
'MinMaxScaler': int_to_r4_converter,
'MeanVarianceScaler': int_to_r4_converter,
@@ -127,6 +131,11 @@ def _nodes_with_presteps(self):
'Binner': int_to_r4_converter,
# 'SupervisedBinner': int_to_r4_converter, # not exist in nimbusml
+ 'IidSpikeDetector': timeseries_to_r4_converter,
+ 'IidChangePointDetector': timeseries_to_r4_converter,
+ 'SsaSpikeDetector': timeseries_to_r4_converter,
+ 'SsaChangePointDetector': timeseries_to_r4_converter,
+
'PcaTransformer':
'''from ..preprocessing.schema import TypeConverter
if type(self._columns) == dict:
diff --git a/src/python/tools/manifest_diff.json b/src/python/tools/manifest_diff.json
index c19aad98..25708e21 100644
--- a/src/python/tools/manifest_diff.json
+++ b/src/python/tools/manifest_diff.json
@@ -539,6 +539,30 @@
"Module": "decomposition",
"Type": "Anomaly"
},
+ {
+ "Name": "TimeSeriesProcessingEntryPoints.IidSpikeDetector",
+ "NewName": "IidSpikeDetector",
+ "Module": "timeseries",
+ "Type": "Transform"
+ },
+ {
+ "Name": "TimeSeriesProcessingEntryPoints.IidChangePointDetector",
+ "NewName": "IidChangePointDetector",
+ "Module": "timeseries",
+ "Type": "Transform"
+ },
+ {
+ "Name": "TimeSeriesProcessingEntryPoints.SsaSpikeDetector",
+ "NewName": "SsaSpikeDetector",
+ "Module": "timeseries",
+ "Type": "Transform"
+ },
+ {
+ "Name": "TimeSeriesProcessingEntryPoints.SsaChangePointDetector",
+ "NewName": "SsaChangePointDetector",
+ "Module": "timeseries",
+ "Type": "Transform"
+ },
{
"Name": "Trainers.PoissonRegressor",
"NewName": "PoissonRegressionRegressor",