From a438f7dfd75d97fba4fda3dcda47b12c990c1d0f Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 1 Jul 2021 13:57:33 +0300
Subject: [PATCH 01/16] extract kernel-ridge-regression example from
 st/examples (#234)

---
 examples/kernel-ridge-regression/script.jl | 195 +++++++++++++++------
 1 file changed, 144 insertions(+), 51 deletions(-)

diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl
index 5064a8a45..a76905462 100644
--- a/examples/kernel-ridge-regression/script.jl
+++ b/examples/kernel-ridge-regression/script.jl
@@ -1,66 +1,159 @@
 # # Kernel Ridge Regression
 #
-# !!! warning
-#     This example is under construction
-
-# Setup
+# Building on linear regression, we can fit non-linear data sets by introducing a feature space. In a higher-dimensional feature space, we can overfit the data; ridge regression introduces regularization to avoid this. In this notebook we show how we can use KernelFunctions.jl for *kernel* ridge regression.
 
+## Loading and setup of required packages
 using KernelFunctions
-using MLDataUtils
-using Zygote
-using Flux
-using Distributions, LinearAlgebra
-using Plots
-
-Flux.@functor SqExponentialKernel
-Flux.@functor ScaleTransform
-Flux.@functor KernelSum
-Flux.@functor Matern32Kernel
-
-# Generate date
-
-xmin = -3;
-xmax = 3;
-x = range(xmin, xmax; length=100)
-x_test = range(xmin, xmax; length=300)
-x, y = noisy_function(sinc, x; noise=0.1)
-X = RowVecs(reshape(x, :, 1))
-X_test = RowVecs(reshape(x_test, :, 1))
-#md nothing #hide
-
-# Set up kernel and regularisation parameter
-
-k = SqExponentialKernel() + Matern32Kernel() ∘ ScaleTransform(2.0)
-λ = [-1.0]
-#md nothing #hide
+using LinearAlgebra
+using Distributions
 
-#
+## Plotting
+using Plots;
+default(; lw=2.0, legendfontsize=15.0);
 
-f(x, k, λ) = kernelmatrix(k, x, X) / (kernelmatrix(k, X) + exp(λ[1]) * I) * y
-f(X, k, 1.0)
+using Random: seed!
+seed!(42);
 
-#
+# ## From linear regression to ridge regression
+# Here we use a one-dimensional toy problem. We generate data using the fourth-order polynomial $f(x) = (x+4)(x+1)(x-1)(x-3)$:
+
+f_truth(x) = (x + 4) * (x + 1) * (x - 1) * (x - 3)
+
+x_train = collect(-5:0.5:5)
+x_test = collect(-5:0.1:5)
+
+noise = rand(Uniform(-10, 10), size(x_train))
+y_train = f_truth.(x_train) + noise
+y_test = f_truth.(x_test)
+
+plot(x_test, y_test; label=raw"$f(x)$")
+scatter!(x_train, y_train; label="observations")
+
+# For training inputs $\mathrm{X}=(\mathbf{x}_n)_{n=1}^N$ and observations $\mathbf{y}=(y_n)_{n=1}^N$, the linear regression weights $\mathbf{w}$ using the least-squares estimator are given by
+# ```math
+# \mathbf{w} = (\mathrm{X}^\top \mathrm{X})^{-1} \mathrm{X}^\top \mathbf{y}
+# ```
+# We predict at test inputs $\mathbf{x}_*$ using
+# ```math
+# \hat{y}_* = \mathbf{x}_*^\top \mathbf{w}
+# ```
+# This is implemented by `linear_regression`:
+
+function linear_regression(X, y, Xstar)
+    weights = (X' * X) \ (X' * y)
+    return Xstar * weights
+end
+
+# A linear regression fit to the above data set:
+
+y_pred = linear_regression(x_train, y_train, x_test)
+scatter(x_train, y_train; label="observations")
+plot!(x_test, y_pred; label="linear fit")
+
+# We can improve the fit by including additional features, i.e. generalizing to $\mathrm{X} = (\phi(x_n))_{n=1}^N$, where $\phi(x)$ constructs a feature vector for each input $x$. Here we include powers of the input, $\phi(x) = (1, x, x^2, \dots, x^d)$:
 
-loss(k, λ) = (ŷ -> sum(y - ŷ) / length(y) + exp(λ[1]) * norm(ŷ))(f(X, k, λ))
-loss(k, λ)
+function featurize_poly(x; degree=1)
+    xcols = [x .^ d for d in 0:degree]
+    return hcat(xcols...)
+end
+
+function featurized_fit_and_plot(degree)
+    X = featurize_poly(x_train; degree=degree)
+    Xstar = featurize_poly(x_test; degree=degree)
+    y_pred = linear_regression(X, y_train, Xstar)
+    scatter(x_train, y_train; legend=false, title="fit of order $degree")
+    return plot!(x_test, y_pred)
+end
+
+plot([featurized_fit_and_plot(degree) for degree in 1:4]...)
 
+# Note that the fit becomes perfect when we include exactly as many orders in the features as we have in the underlying polynomial (4).
 #
+# However, when increasing the number of features, we can quickly overfit to noise in the data set:
+
+featurized_fit_and_plot(18)
+
+# To counteract this unwanted behaviour, we can introduce regularization. This leads to *ridge regression* with $L_2$ regularization of the weights ([Tikhonov regularization](https://en.wikipedia.org/wiki/Tikhonov_regularization)).
+# Instead of the weights in linear regression,
+# $$
+# \mathbf{w} = (\mathrm{X}^\top \mathrm{X})^{-1} \mathrm{X}^\top \mathbf{y}
+# $$
+# we introduce the ridge parameter $\lambda$:
+# $$
+# \mathbf{w} = (\mathrm{X}^\top \mathrm{X} + \lambda \mathbb{1})^{-1} \mathrm{X}^\top \mathbf{y}
+# $$
+# As before, we predict at test inputs $\mathbf{x}_*$ using
+# ```math
+# \hat{y}_* = \mathbf{x}_*^\top \mathbf{w}
+# ```
+# This is implemented by `ridge_regression`:
+
+function ridge_regression(X, y, Xstar, lambda)
+    weights = (X' * X + lambda * I) \ (X' * y)
+    return Xstar * weights
+end
 
-ps = Flux.params(k)
-push!(ps, λ)
-opt = Flux.Momentum(0.1)
-#md nothing #hide
-
-plots = []
-for i in 1:10
-    grads = Zygote.gradient(() -> loss(k, λ), ps)
-    Flux.Optimise.update!(opt, ps, grads)
-    p = Plots.scatter(x, y; lab="data", title="Loss = $(loss(k,λ))")
-    Plots.plot!(x_test, f(X_test, k, λ); lab="Prediction", lw=3.0)
-    push!(plots, p)
+function regularized_fit_and_plot(degree, lambda)
+    X = featurize_poly(x_train; degree=degree)
+    Xstar = featurize_poly(x_test; degree=degree)
+    y_pred = ridge_regression(X, y_train, Xstar, lambda)
+    scatter(x_train, y_train; legend=false, title="\$\\lambda=$lambda\$")
+    return plot!(x_test, y_pred)
 end
 
+plot([regularized_fit_and_plot(18, lambda) for lambda in [1e-4, 1e-2, 0.1, 10]]...)
+
+# Instead of constructing the feature matrix explicitly, we can use *kernels* to replace inner products of feature vectors with a kernel evaluation: $\langle \phi(x), \phi(x') \rangle = k(x, x')$ or $\mathrm{X} \mathrm{X}^\top = \mathrm{K}$, where $\mathrm{K}_{ij} = k(x_i, x_j)$.
+#
+# To apply this "kernel trick" to ridge regression, we can rewrite the ridge estimate for the weights
+# $$
+# \mathbf{w} = (\mathrm{X}^\top \mathrm{X} + \lambda \mathbb{1})^{-1} \mathrm{X}^\top \mathbf{y}
+# $$
+# using the [matrix inversion lemma](https://tlienart.github.io/pub/csml/mtheory/matinvlem.html#basic_lemmas)
+# as
+# $$
+# \mathbf{w} = \mathrm{X}^\top (\mathrm{X} \mathrm{X}^\top + \lambda \mathbb{1})^{-1} \mathbf{y}
+# $$
+# where we can now replace the inner product with the kernel matrix,
+# $$
+# \mathbf{w} = \mathrm{X}^\top (\mathrm{K} + \lambda \mathbb{1})^{-1} \mathbf{y}
+# $$
+# And the prediction yields another inner product,
+# ```math
+# \hat{y}_* = \mathbf{x}_*^\top \mathbf{w} = \langle \mathbf{x}_*, \mathbf{w} \rangle = \mathbf{k}_* (\mathrm{K} + \lambda \mathbb{1})^{-1} \mathbf{y}
+# ```
+# where $(\mathbf{k}_*)_n = k(x_*, x_n)$.
 #
+# This is implemented by `kernel_ridge_regression`:
+
+function kernel_ridge_regression(k, X, y, Xstar, lambda)
+    K = kernelmatrix(k, X)
+    kstar = kernelmatrix(k, Xstar, X)
+    return kstar * ((K + lambda * I) \ y)
+end
+
+# Now, instead of explicitly constructing features, we can simply pass in a `PolynomialKernel` object:
+
+function kernelized_fit_and_plot(kernel, lambda=1e-4)
+    y_pred = kernel_ridge_regression(kernel, x_train, y_train, x_test, lambda)
+    if kernel isa PolynomialKernel
+        title = string("order ", kernel.degree)
+    else
+        title = string(kernel)
+    end
+    scatter(x_train, y_train; label=nothing)
+    p = plot!(
+        x_test,
+        y_pred;
+        label=nothing,
+        title=title,
+        #title=string(raw"$\lambda=", lambda, raw"$")
+    )
+    return p
+end
+
+plot([kernelized_fit_and_plot(PolynomialKernel(; degree=degree, c=1)) for degree in 1:4]...)
+
+# However, we can now also use kernels that would have an infinite-dimensional feature expansion, such as the squared exponential kernel:
 
-l = @layout grid(10, 1)
-plot(plots...; layout=l, size=(300, 1500))
+kernelized_fit_and_plot(SqExponentialKernel())

From 6df69cdf8696dae62e56681292cdfde6bbde70fc Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 1 Jul 2021 16:04:19 +0300
Subject: [PATCH 02/16] fix display math

---
 examples/kernel-ridge-regression/script.jl | 24 +++++++++++-----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl
index a76905462..4dac8b28f 100644
--- a/examples/kernel-ridge-regression/script.jl
+++ b/examples/kernel-ridge-regression/script.jl
@@ -42,7 +42,7 @@ scatter!(x_train, y_train; label="observations")
 function linear_regression(X, y, Xstar)
     weights = (X' * X) \ (X' * y)
     return Xstar * weights
-end
+end;
 
 # A linear regression fit to the above data set:
 
@@ -75,13 +75,13 @@ featurized_fit_and_plot(18)
 
 # To counteract this unwanted behaviour, we can introduce regularization. This leads to *ridge regression* with $L_2$ regularization of the weights ([Tikhonov regularization](https://en.wikipedia.org/wiki/Tikhonov_regularization)).
 # Instead of the weights in linear regression,
-# $$
+# ```math
 # \mathbf{w} = (\mathrm{X}^\top \mathrm{X})^{-1} \mathrm{X}^\top \mathbf{y}
-# $$
+# ```
 # we introduce the ridge parameter $\lambda$:
-# $$
+# ```math
 # \mathbf{w} = (\mathrm{X}^\top \mathrm{X} + \lambda \mathbb{1})^{-1} \mathrm{X}^\top \mathbf{y}
-# $$
+# ```
 # As before, we predict at test inputs $\mathbf{x}_*$ using
 # ```math
 # \hat{y}_* = \mathbf{x}_*^\top \mathbf{w}
@@ -106,18 +106,18 @@ plot([regularized_fit_and_plot(18, lambda) for lambda in [1e-4, 1e-2, 0.1, 10]].
 # Instead of constructing the feature matrix explicitly, we can use *kernels* to replace inner products of feature vectors with a kernel evaluation: $\langle \phi(x), \phi(x') \rangle = k(x, x')$ or $\mathrm{X} \mathrm{X}^\top = \mathrm{K}$, where $\mathrm{K}_{ij} = k(x_i, x_j)$.
 #
 # To apply this "kernel trick" to ridge regression, we can rewrite the ridge estimate for the weights
-# $$
+# ```math
 # \mathbf{w} = (\mathrm{X}^\top \mathrm{X} + \lambda \mathbb{1})^{-1} \mathrm{X}^\top \mathbf{y}
-# $$
+# ```
 # using the [matrix inversion lemma](https://tlienart.github.io/pub/csml/mtheory/matinvlem.html#basic_lemmas)
 # as
-# $$
+# ```math
 # \mathbf{w} = \mathrm{X}^\top (\mathrm{X} \mathrm{X}^\top + \lambda \mathbb{1})^{-1} \mathbf{y}
-# $$
+# ```
 # where we can now replace the inner product with the kernel matrix,
-# $$
+# ```math
 # \mathbf{w} = \mathrm{X}^\top (\mathrm{K} + \lambda \mathbb{1})^{-1} \mathbf{y}
-# $$
+# ```
 # And the prediction yields another inner product,
 # ```math
 # \hat{y}_* = \mathbf{x}_*^\top \mathbf{w} = \langle \mathbf{x}_*, \mathbf{w} \rangle = \mathbf{k}_* (\mathrm{K} + \lambda \mathbb{1})^{-1} \mathbf{y}
@@ -130,7 +130,7 @@ function kernel_ridge_regression(k, X, y, Xstar, lambda)
     K = kernelmatrix(k, X)
     kstar = kernelmatrix(k, Xstar, X)
     return kstar * ((K + lambda * I) \ y)
-end
+end;
 
 # Now, instead of explicitly constructing features, we can simply pass in a `PolynomialKernel` object:
 

From 674c008f65a089aa9ca885e837abad117d915006 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 1 Jul 2021 16:12:10 +0300
Subject: [PATCH 03/16] fix plot title

---
 examples/kernel-ridge-regression/script.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl
index 4dac8b28f..3be9b2d05 100644
--- a/examples/kernel-ridge-regression/script.jl
+++ b/examples/kernel-ridge-regression/script.jl
@@ -139,7 +139,7 @@ function kernelized_fit_and_plot(kernel, lambda=1e-4)
     if kernel isa PolynomialKernel
         title = string("order ", kernel.degree)
     else
-        title = string(kernel)
+        title = string(nameof(typeof(kernel)))
     end
     scatter(x_train, y_train; label=nothing)
     p = plot!(

From cce41c4e0c9a087b8caca4915b54eb7b3becde21 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Fri, 2 Jul 2021 11:26:45 +0300
Subject: [PATCH 04/16] headings

---
 examples/kernel-ridge-regression/script.jl | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl
index 3be9b2d05..321f68f9d 100644
--- a/examples/kernel-ridge-regression/script.jl
+++ b/examples/kernel-ridge-regression/script.jl
@@ -9,12 +9,12 @@ using Distributions
 
 ## Plotting
 using Plots;
-default(; lw=2.0, legendfontsize=15.0);
+default(; lw=2.0, legendfontsize=11.0);
 
 using Random: seed!
 seed!(42);
 
-# ## From linear regression to ridge regression
+# ## Toy data
 # Here we use a one-dimensional toy problem. We generate data using the fourth-order polynomial $f(x) = (x+4)(x+1)(x-1)(x-3)$:
 
 f_truth(x) = (x + 4) * (x + 1) * (x - 1) * (x - 3)
@@ -29,6 +29,7 @@ y_test = f_truth.(x_test)
 plot(x_test, y_test; label=raw"$f(x)$")
 scatter!(x_train, y_train; label="observations")
 
+# ## Linear regression
 # For training inputs $\mathrm{X}=(\mathbf{x}_n)_{n=1}^N$ and observations $\mathbf{y}=(y_n)_{n=1}^N$, the linear regression weights $\mathbf{w}$ using the least-squares estimator are given by
 # ```math
 # \mathbf{w} = (\mathrm{X}^\top \mathrm{X})^{-1} \mathrm{X}^\top \mathbf{y}
@@ -50,6 +51,7 @@ y_pred = linear_regression(x_train, y_train, x_test)
 scatter(x_train, y_train; label="observations")
 plot!(x_test, y_pred; label="linear fit")
 
+# ## Featurization
 # We can improve the fit by including additional features, i.e. generalizing to $\mathrm{X} = (\phi(x_n))_{n=1}^N$, where $\phi(x)$ constructs a feature vector for each input $x$. Here we include powers of the input, $\phi(x) = (1, x, x^2, \dots, x^d)$:
 
 function featurize_poly(x; degree=1)
@@ -73,6 +75,7 @@ plot([featurized_fit_and_plot(degree) for degree in 1:4]...)
 
 featurized_fit_and_plot(18)
 
+# ## Ridge regression
 # To counteract this unwanted behaviour, we can introduce regularization. This leads to *ridge regression* with $L_2$ regularization of the weights ([Tikhonov regularization](https://en.wikipedia.org/wiki/Tikhonov_regularization)).
 # Instead of the weights in linear regression,
 # ```math
@@ -103,6 +106,7 @@ end
 
 plot([regularized_fit_and_plot(18, lambda) for lambda in [1e-4, 1e-2, 0.1, 10]]...)
 
+# ## Kernel ridge regression
 # Instead of constructing the feature matrix explicitly, we can use *kernels* to replace inner products of feature vectors with a kernel evaluation: $\langle \phi(x), \phi(x') \rangle = k(x, x')$ or $\mathrm{X} \mathrm{X}^\top = \mathrm{K}$, where $\mathrm{K}_{ij} = k(x_i, x_j)$.
 #
 # To apply this "kernel trick" to ridge regression, we can rewrite the ridge estimate for the weights

From 45b645ccf430a6006131e5ae5497e1bccb7694e6 Mon Sep 17 00:00:00 2001
From: st-- <st--@users.noreply.github.com>
Date: Tue, 6 Jul 2021 23:17:37 +0300
Subject: [PATCH 05/16] Apply suggestions from code review

Co-authored-by: David Widmann <devmotion@users.noreply.github.com>
---
 examples/kernel-ridge-regression/script.jl | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl
index 321f68f9d..feb2fa74c 100644
--- a/examples/kernel-ridge-regression/script.jl
+++ b/examples/kernel-ridge-regression/script.jl
@@ -22,7 +22,7 @@ f_truth(x) = (x + 4) * (x + 1) * (x - 1) * (x - 3)
 x_train = collect(-5:0.5:5)
 x_test = collect(-5:0.1:5)
 
-noise = rand(Uniform(-10, 10), size(x_train))
+noise = rand(Uniform(-10, 10), length(x_train))
 y_train = f_truth.(x_train) + noise
 y_test = f_truth.(x_test)
 
@@ -55,8 +55,7 @@ plot!(x_test, y_pred; label="linear fit")
 # We can improve the fit by including additional features, i.e. generalizing to $\mathrm{X} = (\phi(x_n))_{n=1}^N$, where $\phi(x)$ constructs a feature vector for each input $x$. Here we include powers of the input, $\phi(x) = (1, x, x^2, \dots, x^d)$:
 
 function featurize_poly(x; degree=1)
-    xcols = [x .^ d for d in 0:degree]
-    return hcat(xcols...)
+    return repeat(x, 1, degree + 1) .^ (0:degree)'
 end
 
 function featurized_fit_and_plot(degree)
@@ -67,7 +66,7 @@ function featurized_fit_and_plot(degree)
     return plot!(x_test, y_pred)
 end
 
-plot([featurized_fit_and_plot(degree) for degree in 1:4]...)
+plot((featurized_fit_and_plot(degree) for degree in 1:4)...)
 
 # Note that the fit becomes perfect when we include exactly as many orders in the features as we have in the underlying polynomial (4).
 #
@@ -104,7 +103,7 @@ function regularized_fit_and_plot(degree, lambda)
     return plot!(x_test, y_pred)
 end
 
-plot([regularized_fit_and_plot(18, lambda) for lambda in [1e-4, 1e-2, 0.1, 10]]...)
+plot((regularized_fit_and_plot(18, lambda) for lambda in (1e-4, 1e-2, 0.1, 10))...)
 
 # ## Kernel ridge regression
 # Instead of constructing the feature matrix explicitly, we can use *kernels* to replace inner products of feature vectors with a kernel evaluation: $\langle \phi(x), \phi(x') \rangle = k(x, x')$ or $\mathrm{X} \mathrm{X}^\top = \mathrm{K}$, where $\mathrm{K}_{ij} = k(x_i, x_j)$.
@@ -151,12 +150,11 @@ function kernelized_fit_and_plot(kernel, lambda=1e-4)
         y_pred;
         label=nothing,
         title=title,
-        #title=string(raw"$\lambda=", lambda, raw"$")
     )
     return p
 end
 
-plot([kernelized_fit_and_plot(PolynomialKernel(; degree=degree, c=1)) for degree in 1:4]...)
+plot((kernelized_fit_and_plot(PolynomialKernel(; degree=degree, c=1)) for degree in 1:4)...)
 
 # However, we can now also use kernels that would have an infinite-dimensional feature expansion, such as the squared exponential kernel:
 

From 8d7787df426147d3f0fd2101ed600007b209974b Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Tue, 6 Jul 2021 23:19:36 +0300
Subject: [PATCH 06/16] update Project/Manifest.toml

---
 .../kernel-ridge-regression/Manifest.toml     | 235 ------------------
 examples/kernel-ridge-regression/Project.toml |   6 -
 2 files changed, 241 deletions(-)

diff --git a/examples/kernel-ridge-regression/Manifest.toml b/examples/kernel-ridge-regression/Manifest.toml
index c3a4f40f2..8b8a2d648 100644
--- a/examples/kernel-ridge-regression/Manifest.toml
+++ b/examples/kernel-ridge-regression/Manifest.toml
@@ -1,16 +1,5 @@
 # This file is machine-generated - editing it directly is not advised
 
-[[AbstractFFTs]]
-deps = ["LinearAlgebra"]
-git-tree-sha1 = "485ee0867925449198280d4af84bdb46a2a404d0"
-uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c"
-version = "1.0.1"
-
-[[AbstractTrees]]
-git-tree-sha1 = "03e0550477d86222521d254b741d470ba17ea0b5"
-uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
-version = "0.3.4"
-
 [[Adapt]]
 deps = ["LinearAlgebra"]
 git-tree-sha1 = "84918055d15b3114ede17ac6a7182f68870c16f7"
@@ -23,12 +12,6 @@ uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
 [[Artifacts]]
 uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
-[[BFloat16s]]
-deps = ["LinearAlgebra", "Test"]
-git-tree-sha1 = "4af69e205efc343068dc8722b8dfec1ade89254a"
-uuid = "ab4f0b2a-ad5b-11e8-123f-65d77653426b"
-version = "0.1.0"
-
 [[Base64]]
 uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
 
@@ -38,41 +21,18 @@ git-tree-sha1 = "c3598e525718abcc440f69cc6d5f60dda0a1b61e"
 uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0"
 version = "1.0.6+5"
 
-[[CEnum]]
-git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9"
-uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
-version = "0.4.1"
-
-[[CUDA]]
-deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CompilerSupportLibraries_jll", "DataStructures", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "LazyArtifacts", "Libdl", "LinearAlgebra", "Logging", "Memoization", "Printf", "Random", "Random123", "RandomNumbers", "Reexport", "Requires", "SparseArrays", "SpecialFunctions", "TimerOutputs"]
-git-tree-sha1 = "f6f6d2fc7a80b7710b2db4ecb1f59a1b2c2a715a"
-uuid = "052768ef-5323-5732-b1bb-66c8b64840ba"
-version = "3.3.0"
-
 [[Cairo_jll]]
 deps = ["Artifacts", "Bzip2_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "JLLWrappers", "LZO_jll", "Libdl", "Pixman_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"]
 git-tree-sha1 = "e2f47f6d8337369411569fd45ae5753ca10394c6"
 uuid = "83423d85-b0ee-5818-9007-b63ccbeb887a"
 version = "1.16.0+6"
 
-[[ChainRules]]
-deps = ["ChainRulesCore", "Compat", "LinearAlgebra", "Random", "Statistics"]
-git-tree-sha1 = "a41f9e72cffd789d5e19e75f1626b2786d640151"
-uuid = "082447d4-558c-5d27-93f4-14fc19e9eca2"
-version = "0.8.11"
-
 [[ChainRulesCore]]
 deps = ["Compat", "LinearAlgebra", "SparseArrays"]
 git-tree-sha1 = "dbc9aae1227cfddaa9d2552f3ecba5b641f6cce9"
 uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 version = "0.10.5"
 
-[[CodecZlib]]
-deps = ["TranscodingStreams", "Zlib_jll"]
-git-tree-sha1 = "ded953804d019afa9a3f98981d99b33e3db7b6da"
-uuid = "944b1d66-785c-5afd-91f1-9de20f533193"
-version = "0.7.0"
-
 [[ColorSchemes]]
 deps = ["ColorTypes", "Colors", "FixedPointNumbers", "Random", "StaticArrays"]
 git-tree-sha1 = "c8fd01e4b736013bc61b704871d20503b33ea402"
@@ -91,12 +51,6 @@ git-tree-sha1 = "417b0ed7b8b838aa6ca0a87aadf1bb9eb111ce40"
 uuid = "5ae59095-9a9b-59fe-a467-6f913c188581"
 version = "0.12.8"
 
-[[CommonSubexpressions]]
-deps = ["MacroTools", "Test"]
-git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7"
-uuid = "bbf7d656-a473-5ed7-a52c-81e309532950"
-version = "0.3.0"
-
 [[Compat]]
 deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
 git-tree-sha1 = "e4e2b39db08f967cc1360951f01e8a75ec441cab"
@@ -118,22 +72,11 @@ git-tree-sha1 = "9f02045d934dc030edad45944ea80dbd1f0ebea7"
 uuid = "d38c429a-6771-53c6-b99e-75d170b6e991"
 version = "0.5.7"
 
-[[Crayons]]
-git-tree-sha1 = "3f71217b538d7aaee0b69ab47d9b7724ca8afa0d"
-uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f"
-version = "4.0.4"
-
 [[DataAPI]]
 git-tree-sha1 = "dfb3b7e89e395be1e25c2ad6d7690dc29cc53b1d"
 uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
 version = "1.6.0"
 
-[[DataFrames]]
-deps = ["Compat", "DataAPI", "Future", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrettyTables", "Printf", "REPL", "Reexport", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"]
-git-tree-sha1 = "66ee4fe515a9294a8836ef18eea7239c6ac3db5e"
-uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
-version = "1.1.1"
-
 [[DataStructures]]
 deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
 git-tree-sha1 = "4437b64df1e0adccc3e5d1adbc3ac741095e4677"
@@ -153,18 +96,6 @@ uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
 deps = ["Mmap"]
 uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
 
-[[DiffResults]]
-deps = ["StaticArrays"]
-git-tree-sha1 = "c18e98cba888c6c25d1c3b048e4b3380ca956805"
-uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
-version = "1.0.3"
-
-[[DiffRules]]
-deps = ["NaNMath", "Random", "SpecialFunctions"]
-git-tree-sha1 = "214c3fcac57755cfda163d91c58893a8723f93e9"
-uuid = "b552c78f-8df3-52c6-915a-8e097449b14b"
-version = "1.0.2"
-
 [[Distances]]
 deps = ["LinearAlgebra", "Statistics", "StatsAPI"]
 git-tree-sha1 = "abe4ad222b26af3337262b8afb28fab8d215e9f8"
@@ -203,11 +134,6 @@ git-tree-sha1 = "b3bfd02e98aedfa5cf885665493c5598c350cd2f"
 uuid = "2e619515-83b5-522b-bb60-26c02a35a201"
 version = "2.2.10+0"
 
-[[ExprTools]]
-git-tree-sha1 = "10407a39b87f29d47ebaca8edbc75d7c302ff93e"
-uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
-version = "0.1.3"
-
 [[FFMPEG]]
 deps = ["FFMPEG_jll"]
 git-tree-sha1 = "b57e3acbe22f8484b4b5ff66a7499717fe1a9cc8"
@@ -232,12 +158,6 @@ git-tree-sha1 = "335bfdceacc84c5cdf16aadc768aa5ddfc5383cc"
 uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
 version = "0.8.4"
 
-[[Flux]]
-deps = ["AbstractTrees", "Adapt", "CUDA", "CodecZlib", "Colors", "DelimitedFiles", "Functors", "Juno", "LinearAlgebra", "MacroTools", "NNlib", "NNlibCUDA", "Pkg", "Printf", "Random", "Reexport", "SHA", "Statistics", "StatsBase", "Test", "ZipFile", "Zygote"]
-git-tree-sha1 = "0b3c6d0ce57d3b793eabd346ccc8f605035ef079"
-uuid = "587475ba-b771-5e3f-ad9e-33799f191a9c"
-version = "0.12.4"
-
 [[Fontconfig_jll]]
 deps = ["Artifacts", "Bzip2_jll", "Expat_jll", "FreeType2_jll", "JLLWrappers", "Libdl", "Libuuid_jll", "Pkg", "Zlib_jll"]
 git-tree-sha1 = "35895cf184ceaab11fd778b4590144034a167a2f"
@@ -250,12 +170,6 @@ git-tree-sha1 = "8339d61043228fdd3eb658d86c926cb282ae72a8"
 uuid = "59287772-0a20-5a39-b81b-1366585eb4c0"
 version = "0.4.2"
 
-[[ForwardDiff]]
-deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "NaNMath", "Printf", "Random", "SpecialFunctions", "StaticArrays"]
-git-tree-sha1 = "e2af66012e08966366a43251e1fd421522908be6"
-uuid = "f6369f11-7733-5829-9624-2563aa707210"
-version = "0.10.18"
-
 [[FreeType2_jll]]
 deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"]
 git-tree-sha1 = "cbd58c9deb1d304f5a245a0b7eb841a2560cfec6"
@@ -274,28 +188,12 @@ git-tree-sha1 = "a7bb2af991c43dcf5c3455d276dd83976799634f"
 uuid = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
 version = "0.2.1"
 
-[[Future]]
-deps = ["Random"]
-uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
-
 [[GLFW_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Libglvnd_jll", "Pkg", "Xorg_libXcursor_jll", "Xorg_libXi_jll", "Xorg_libXinerama_jll", "Xorg_libXrandr_jll"]
 git-tree-sha1 = "dba1e8614e98949abfa60480b13653813d8f0157"
 uuid = "0656b61e-2033-5cc2-a64a-77c0f6c09b89"
 version = "3.3.5+0"
 
-[[GPUArrays]]
-deps = ["AbstractFFTs", "Adapt", "LinearAlgebra", "Printf", "Random", "Serialization", "Statistics"]
-git-tree-sha1 = "3683030b5479249abaa18aa930fc02307fed05d3"
-uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
-version = "7.0.0"
-
-[[GPUCompiler]]
-deps = ["DataStructures", "ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"]
-git-tree-sha1 = "765d5b600d3177f1d422c9489525938dd8bd95d1"
-uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
-version = "0.12.2"
-
 [[GR]]
 deps = ["Base64", "DelimitedFiles", "GR_jll", "HTTP", "JSON", "Libdl", "LinearAlgebra", "Pkg", "Printf", "Random", "Serialization", "Sockets", "Test", "UUIDs"]
 git-tree-sha1 = "b83e3125048a9c3158cbb7ca423790c7b1b57bea"
@@ -343,12 +241,6 @@ git-tree-sha1 = "f7be53659ab06ddc986428d3a9dcc95f6fa6705a"
 uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89"
 version = "0.2.2"
 
-[[IRTools]]
-deps = ["InteractiveUtils", "MacroTools", "Test"]
-git-tree-sha1 = "c67e7515a11f726f44083e74f218d134396d6510"
-uuid = "7869d1d1-7146-5819-86e3-90919afe41df"
-version = "0.4.2"
-
 [[IniFile]]
 deps = ["Test"]
 git-tree-sha1 = "098e4d2c533924c921f9f9847274f2ad89e018b8"
@@ -359,12 +251,6 @@ version = "0.5.0"
 deps = ["Markdown"]
 uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 
-[[InvertedIndices]]
-deps = ["Test"]
-git-tree-sha1 = "15732c475062348b0165684ffe28e85ea8396afc"
-uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
-version = "1.0.0"
-
 [[IterTools]]
 git-tree-sha1 = "05110a2ab1fc5f932622ffea2a003221f4782c18"
 uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
@@ -393,12 +279,6 @@ git-tree-sha1 = "d735490ac75c5cb9f1b00d8b5509c11984dc6943"
 uuid = "aacddb02-875f-59d6-b918-886e6ef4fbf8"
 version = "2.1.0+0"
 
-[[Juno]]
-deps = ["Base64", "Logging", "Media", "Profile"]
-git-tree-sha1 = "07cb43290a840908a771552911a6274bc6c072c7"
-uuid = "e5e0dc1b-0480-54bc-9374-aad01c23163d"
-version = "0.8.4"
-
 [[KernelFunctions]]
 deps = ["ChainRulesCore", "Compat", "CompositionsBase", "Distances", "FillArrays", "Functors", "LinearAlgebra", "Random", "Requires", "SpecialFunctions", "StatsBase", "StatsFuns", "TensorCore", "Test", "ZygoteRules"]
 path = "../.."
@@ -411,12 +291,6 @@ git-tree-sha1 = "f6250b16881adf048549549fba48b1161acdac8c"
 uuid = "c1c5ebd0-6772-5130-a774-d5fcae4a789d"
 version = "3.100.1+0"
 
-[[LLVM]]
-deps = ["CEnum", "Libdl", "Printf", "Unicode"]
-git-tree-sha1 = "b3cd5971a37d3ac3c13ca805916b90878c699eaf"
-uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
-version = "3.8.0"
-
 [[LZO_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
 git-tree-sha1 = "e5b909bcf985c5e2605737d2ce278ed791b89be6"
@@ -434,16 +308,6 @@ git-tree-sha1 = "a4b12a1bd2ebade87891ab7e36fdbce582301a92"
 uuid = "23fbe1c1-3f47-55db-b15f-69d7ec21a316"
 version = "0.15.6"
 
-[[LazyArtifacts]]
-deps = ["Artifacts", "Pkg"]
-uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
-
-[[LearnBase]]
-deps = ["LinearAlgebra", "StatsBase"]
-git-tree-sha1 = "47e6f4623c1db88570c7a7fa66c6528b92ba4725"
-uuid = "7f8f8fb0-2700-5f03-b4bd-41f8cfc144b6"
-version = "0.3.0"
-
 [[LibCURL]]
 deps = ["LibCURL_jll", "MozillaCACerts_jll"]
 uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
@@ -536,35 +400,12 @@ version = "0.2.4"
 [[Logging]]
 uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
 
-[[MLDataPattern]]
-deps = ["LearnBase", "MLLabelUtils", "Random", "SparseArrays", "StatsBase"]
-git-tree-sha1 = "e99514e96e8b8129bb333c69e063a56ab6402b5b"
-uuid = "9920b226-0b2a-5f5f-9153-9aa70a013f8b"
-version = "0.5.4"
-
-[[MLDataUtils]]
-deps = ["DataFrames", "DelimitedFiles", "LearnBase", "MLDataPattern", "MLLabelUtils", "Statistics", "StatsBase"]
-git-tree-sha1 = "ee54803aea12b9c8ee972e78ece11ac6023715e6"
-uuid = "cc2ba9b6-d476-5e6d-8eaf-a92d5412d41d"
-version = "0.5.4"
-
-[[MLLabelUtils]]
-deps = ["LearnBase", "MappedArrays", "StatsBase"]
-git-tree-sha1 = "3211c1fdd1efaefa692c8cf60e021fb007b76a08"
-uuid = "66a33bbf-0c2b-5fc8-a008-9da813334f0a"
-version = "0.5.6"
-
 [[MacroTools]]
 deps = ["Markdown", "Random"]
 git-tree-sha1 = "6a8a2a625ab0dea913aba95c11370589e0239ff0"
 uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
 version = "0.5.6"
 
-[[MappedArrays]]
-git-tree-sha1 = "18d3584eebc861e311a552cbb67723af8edff5de"
-uuid = "dbb5928d-eab1-5f90-85c2-b9b0edb7c900"
-version = "0.4.0"
-
 [[Markdown]]
 deps = ["Base64"]
 uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
@@ -584,18 +425,6 @@ git-tree-sha1 = "e498ddeee6f9fdb4551ce855a46f54dbd900245f"
 uuid = "442fdcdd-2543-5da2-b0f3-8c86c306513e"
 version = "0.3.1"
 
-[[Media]]
-deps = ["MacroTools", "Test"]
-git-tree-sha1 = "75a54abd10709c01f1b86b84ec225d26e840ed58"
-uuid = "e89f7d12-3494-54d1-8411-f7d8b9ae1f27"
-version = "0.5.0"
-
-[[Memoization]]
-deps = ["MacroTools"]
-git-tree-sha1 = "a9175def295e0dc1f6da80e8e733a01dd0f36a56"
-uuid = "6fafb56a-5788-4b4e-91ca-c0cea6611c73"
-version = "0.1.11"
-
 [[Missings]]
 deps = ["DataAPI"]
 git-tree-sha1 = "4ea90bd5d3985ae1f9a908bd4500ae88921c5ce7"
@@ -608,18 +437,6 @@ uuid = "a63ad114-7e13-5084-954f-fe012c677804"
 [[MozillaCACerts_jll]]
 uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
 
-[[NNlib]]
-deps = ["Adapt", "ChainRulesCore", "Compat", "LinearAlgebra", "Pkg", "Requires", "Statistics"]
-git-tree-sha1 = "7461639cef384a2ad058005b49e32b318d844343"
-uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
-version = "0.7.22"
-
-[[NNlibCUDA]]
-deps = ["CUDA", "LinearAlgebra", "NNlib", "Random", "Statistics"]
-git-tree-sha1 = "bd8b29bf75be7a6c2b288b4b9a4e8903d0376ac1"
-uuid = "a00861dc-f156-4864-bf3c-e6376f28a68d"
-version = "0.1.3"
-
 [[NaNMath]]
 git-tree-sha1 = "bfe47e760d60b82b66b61d2d44128b62e3a369fb"
 uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
@@ -703,32 +520,16 @@ git-tree-sha1 = "e995fa1821b6daff8b107a8eafbec234ae2263d0"
 uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 version = "1.16.5"
 
-[[PooledArrays]]
-deps = ["DataAPI", "Future"]
-git-tree-sha1 = "cde4ce9d6f33219465b55162811d8de8139c0414"
-uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
-version = "1.2.1"
-
 [[Preferences]]
 deps = ["TOML"]
 git-tree-sha1 = "00cfd92944ca9c760982747e9a1d0d5d86ab1e5a"
 uuid = "21216c6a-2e73-6563-6e65-726566657250"
 version = "1.2.2"
 
-[[PrettyTables]]
-deps = ["Crayons", "Formatting", "Markdown", "Reexport", "Tables"]
-git-tree-sha1 = "0d1245a357cc61c8cd61934c07447aa569ff22e6"
-uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
-version = "1.1.0"
-
 [[Printf]]
 deps = ["Unicode"]
 uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 
-[[Profile]]
-deps = ["Printf"]
-uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
-
 [[Qt5Base_jll]]
 deps = ["Artifacts", "CompilerSupportLibraries_jll", "Fontconfig_jll", "Glib_jll", "JLLWrappers", "Libdl", "Libglvnd_jll", "OpenSSL_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libxcb_jll", "Xorg_xcb_util_image_jll", "Xorg_xcb_util_keysyms_jll", "Xorg_xcb_util_renderutil_jll", "Xorg_xcb_util_wm_jll", "Zlib_jll", "xkbcommon_jll"]
 git-tree-sha1 = "ad368663a5e20dbb8d6dc2fddeefe4dae0781ae8"
@@ -749,18 +550,6 @@ uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
 deps = ["Serialization"]
 uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 
-[[Random123]]
-deps = ["Libdl", "Random", "RandomNumbers"]
-git-tree-sha1 = "0e8b146557ad1c6deb1367655e052276690e71a3"
-uuid = "74087812-796a-5b5d-8853-05524746bad3"
-version = "1.4.2"
-
-[[RandomNumbers]]
-deps = ["Random", "Requires"]
-git-tree-sha1 = "441e6fc35597524ada7f85e13df1f4e10137d16f"
-uuid = "e6cf234a-135c-5ec9-84dd-332b85af5143"
-version = "1.4.0"
-
 [[RecipesBase]]
 git-tree-sha1 = "b3fb709f3c97bfc6e948be68beeecb55a0b340ae"
 uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
@@ -903,18 +692,6 @@ version = "0.1.1"
 deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
 uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
-[[TimerOutputs]]
-deps = ["ExprTools", "Printf"]
-git-tree-sha1 = "bf8aacc899a1bd16522d0350e1e2310510d77236"
-uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
-version = "0.5.9"
-
-[[TranscodingStreams]]
-deps = ["Random", "Test"]
-git-tree-sha1 = "7c53c35547de1c5b9d46a4797cf6d8253807108c"
-uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
-version = "0.9.5"
-
 [[URIs]]
 git-tree-sha1 = "97bbe755a53fe859669cd907f2d96aee8d2c1355"
 uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
@@ -1077,12 +854,6 @@ git-tree-sha1 = "79c31e7844f6ecf779705fbc12146eb190b7d845"
 uuid = "c5fb5394-a638-5e4d-96e5-b29de1b5cf10"
 version = "1.4.0+3"
 
-[[ZipFile]]
-deps = ["Libdl", "Printf", "Zlib_jll"]
-git-tree-sha1 = "c3a5637e27e914a7a445b8d0ad063d701931e9f7"
-uuid = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
-version = "0.9.3"
-
 [[Zlib_jll]]
 deps = ["Libdl"]
 uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
@@ -1093,12 +864,6 @@ git-tree-sha1 = "cc4bf3fdde8b7e3e9fa0351bdeedba1cf3b7f6e6"
 uuid = "3161d3a3-bdf6-5164-811a-617609db77b4"
 version = "1.5.0+0"
 
-[[Zygote]]
-deps = ["AbstractFFTs", "ChainRules", "ChainRulesCore", "DiffRules", "Distributed", "FillArrays", "ForwardDiff", "IRTools", "InteractiveUtils", "LinearAlgebra", "MacroTools", "NaNMath", "Random", "Requires", "SpecialFunctions", "Statistics", "ZygoteRules"]
-git-tree-sha1 = "b1d95edd4e693066c38c13a10aab0a8f6a6e2f65"
-uuid = "e88e6eb3-aa80-5325-afca-941959d7151f"
-version = "0.6.12"
-
 [[ZygoteRules]]
 deps = ["MacroTools"]
 git-tree-sha1 = "9e7a1e8ca60b742e508a315c17eef5211e7fbfd7"
diff --git a/examples/kernel-ridge-regression/Project.toml b/examples/kernel-ridge-regression/Project.toml
index f3b3a5b77..13219d95c 100644
--- a/examples/kernel-ridge-regression/Project.toml
+++ b/examples/kernel-ridge-regression/Project.toml
@@ -1,19 +1,13 @@
 [deps]
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
-Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 KernelFunctions = "ec8451be-7e33-11e9-00cf-bbf324bd1392"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
-MLDataUtils = "cc2ba9b6-d476-5e6d-8eaf-a92d5412d41d"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
-Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [compat]
 Distributions = "0.25"
-Flux = "0.12"
 KernelFunctions = "0.10"
 Literate = "2"
-MLDataUtils = "0.5"
 Plots = "1"
-Zygote = "0.6"
 julia = "1.3"

From 5fb788096ceab7cd4750e5c76503715a5ffc9146 Mon Sep 17 00:00:00 2001
From: st-- <st--@users.noreply.github.com>
Date: Tue, 6 Jul 2021 23:21:55 +0300
Subject: [PATCH 07/16] Update examples/kernel-ridge-regression/script.jl

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 examples/kernel-ridge-regression/script.jl | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl
index feb2fa74c..2c83aa15f 100644
--- a/examples/kernel-ridge-regression/script.jl
+++ b/examples/kernel-ridge-regression/script.jl
@@ -145,12 +145,7 @@ function kernelized_fit_and_plot(kernel, lambda=1e-4)
         title = string(nameof(typeof(kernel)))
     end
     scatter(x_train, y_train; label=nothing)
-    p = plot!(
-        x_test,
-        y_pred;
-        label=nothing,
-        title=title,
-    )
+    p = plot!(x_test, y_pred; label=nothing, title=title)
     return p
 end
 

From ade425ce4920eba2cc977c57f4eeb9e7bfe1a720 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 8 Jul 2021 11:28:34 +0300
Subject: [PATCH 08/16] VSCode cell markers

---
 examples/kernel-ridge-regression/script.jl | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl
index 2c83aa15f..ca4f7fe4e 100644
--- a/examples/kernel-ridge-regression/script.jl
+++ b/examples/kernel-ridge-regression/script.jl
@@ -14,6 +14,7 @@ default(; lw=2.0, legendfontsize=11.0);
 using Random: seed!
 seed!(42);
 
+##
 # ## Toy data
 # Here we use a one-dimensional toy problem. We generate data using the fourth-order polynomial $f(x) = (x+4)(x+1)(x-1)(x-3)$:
 
@@ -29,6 +30,7 @@ y_test = f_truth.(x_test)
 plot(x_test, y_test; label=raw"$f(x)$")
 scatter!(x_train, y_train; label="observations")
 
+##
 # ## Linear regression
 # For training inputs $\mathrm{X}=(\mathbf{x}_n)_{n=1}^N$ and observations $\mathbf{y}=(y_n)_{n=1}^N$, the linear regression weights $\mathbf{w}$ using the least-squares estimator are given by
 # ```math
@@ -51,6 +53,7 @@ y_pred = linear_regression(x_train, y_train, x_test)
 scatter(x_train, y_train; label="observations")
 plot!(x_test, y_pred; label="linear fit")
 
+##
 # ## Featurization
 # We can improve the fit by including additional features, i.e. generalizing to $\mathrm{X} = (\phi(x_n))_{n=1}^N$, where $\phi(x)$ constructs a feature vector for each input $x$. Here we include powers of the input, $\phi(x) = (1, x, x^2, \dots, x^d)$:
 
@@ -68,12 +71,14 @@ end
 
 plot((featurized_fit_and_plot(degree) for degree in 1:4)...)
 
+##
 # Note that the fit becomes perfect when we include exactly as many orders in the features as we have in the underlying polynomial (4).
 #
 # However, when increasing the number of features, we can quickly overfit to noise in the data set:
 
 featurized_fit_and_plot(18)
 
+##
 # ## Ridge regression
 # To counteract this unwanted behaviour, we can introduce regularization. This leads to *ridge regression* with $L_2$ regularization of the weights ([Tikhonov regularization](https://en.wikipedia.org/wiki/Tikhonov_regularization)).
 # Instead of the weights in linear regression,
@@ -105,6 +110,7 @@ end
 
 plot((regularized_fit_and_plot(18, lambda) for lambda in (1e-4, 1e-2, 0.1, 10))...)
 
+##
 # ## Kernel ridge regression
 # Instead of constructing the feature matrix explicitly, we can use *kernels* to replace inner products of feature vectors with a kernel evaluation: $\langle \phi(x), \phi(x') \rangle = k(x, x')$ or $\mathrm{X} \mathrm{X}^\top = \mathrm{K}$, where $\mathrm{K}_{ij} = k(x_i, x_j)$.
 #
@@ -151,6 +157,7 @@ end
 
 plot((kernelized_fit_and_plot(PolynomialKernel(; degree=degree, c=1)) for degree in 1:4)...)
 
+##
 # However, we can now also use kernels that would have an infinite-dimensional feature expansion, such as the squared exponential kernel:
 
 kernelized_fit_and_plot(SqExponentialKernel())

From d5041c4bb392f55103cb712ba8ee6ed9018b634e Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 8 Jul 2021 11:28:56 +0300
Subject: [PATCH 09/16] use range directly and extent for test set

---
 examples/kernel-ridge-regression/script.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl
index ca4f7fe4e..8acc6f74a 100644
--- a/examples/kernel-ridge-regression/script.jl
+++ b/examples/kernel-ridge-regression/script.jl
@@ -20,8 +20,8 @@ seed!(42);
 
 f_truth(x) = (x + 4) * (x + 1) * (x - 1) * (x - 3)
 
-x_train = collect(-5:0.5:5)
-x_test = collect(-5:0.1:5)
+x_train = -5:0.5:5
+x_test = -7:0.1:7
 
 noise = rand(Uniform(-10, 10), length(x_train))
 y_train = f_truth.(x_train) + noise

From 753775b70f107c34a323e1d07fd0fddeb176faab Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 8 Jul 2021 11:29:12 +0300
Subject: [PATCH 10/16] minor plot clean up

---
 examples/kernel-ridge-regression/script.jl | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl
index 8acc6f74a..6c702dcf4 100644
--- a/examples/kernel-ridge-regression/script.jl
+++ b/examples/kernel-ridge-regression/script.jl
@@ -28,7 +28,7 @@ y_train = f_truth.(x_train) + noise
 y_test = f_truth.(x_test)
 
 plot(x_test, y_test; label=raw"$f(x)$")
-scatter!(x_train, y_train; label="observations")
+scatter!(x_train, y_train; seriescolor=1, label="observations")
 
 ##
 # ## Linear regression
@@ -151,8 +151,7 @@ function kernelized_fit_and_plot(kernel, lambda=1e-4)
         title = string(nameof(typeof(kernel)))
     end
     scatter(x_train, y_train; label=nothing)
-    p = plot!(x_test, y_pred; label=nothing, title=title)
-    return p
+    plot!(x_test, y_pred; label=nothing, title=title)
 end
 
 plot((kernelized_fit_and_plot(PolynomialKernel(; degree=degree, c=1)) for degree in 1:4)...)

From fb8675f5affc3471a07a43401167838dc9d21143 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 8 Jul 2021 11:29:28 +0300
Subject: [PATCH 11/16] change noise level and orders/lambdas for ridge
 regression

---
 examples/kernel-ridge-regression/script.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl
index 6c702dcf4..e060d576d 100644
--- a/examples/kernel-ridge-regression/script.jl
+++ b/examples/kernel-ridge-regression/script.jl
@@ -23,7 +23,7 @@ f_truth(x) = (x + 4) * (x + 1) * (x - 1) * (x - 3)
 x_train = -5:0.5:5
 x_test = -7:0.1:7
 
-noise = rand(Uniform(-10, 10), length(x_train))
+noise = rand(Uniform(-20, 20), length(x_train))
 y_train = f_truth.(x_train) + noise
 y_test = f_truth.(x_test)
 
@@ -76,7 +76,7 @@ plot((featurized_fit_and_plot(degree) for degree in 1:4)...)
 #
 # However, when increasing the number of features, we can quickly overfit to noise in the data set:
 
-featurized_fit_and_plot(18)
+featurized_fit_and_plot(20)
 
 ##
 # ## Ridge regression
@@ -108,7 +108,7 @@ function regularized_fit_and_plot(degree, lambda)
     return plot!(x_test, y_pred)
 end
 
-plot((regularized_fit_and_plot(18, lambda) for lambda in (1e-4, 1e-2, 0.1, 10))...)
+plot((regularized_fit_and_plot(20, lambda) for lambda in (1e-3, 1e-2, 1e-1, 1))...)
 
 ##
 # ## Kernel ridge regression

From f69a4c8bd37662ea977ec21a59f84ee464963ad4 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 8 Jul 2021 11:29:34 +0300
Subject: [PATCH 12/16] fix ylims for all plots

---
 examples/kernel-ridge-regression/script.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl
index e060d576d..3ce823fb9 100644
--- a/examples/kernel-ridge-regression/script.jl
+++ b/examples/kernel-ridge-regression/script.jl
@@ -9,7 +9,7 @@ using Distributions
 
 ## Plotting
 using Plots;
-default(; lw=2.0, legendfontsize=11.0);
+default(; lw=2.0, legendfontsize=11.0, ylims=(-150, 500));
 
 using Random: seed!
 seed!(42);

From 39161cf35ae422a2f023233684a3803dc8e3b61e Mon Sep 17 00:00:00 2001
From: st-- <st--@users.noreply.github.com>
Date: Thu, 8 Jul 2021 11:55:31 +0300
Subject: [PATCH 13/16] Update examples/kernel-ridge-regression/script.jl

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 examples/kernel-ridge-regression/script.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl
index 3ce823fb9..86a351998 100644
--- a/examples/kernel-ridge-regression/script.jl
+++ b/examples/kernel-ridge-regression/script.jl
@@ -151,7 +151,7 @@ function kernelized_fit_and_plot(kernel, lambda=1e-4)
         title = string(nameof(typeof(kernel)))
     end
     scatter(x_train, y_train; label=nothing)
-    plot!(x_test, y_pred; label=nothing, title=title)
+    return plot!(x_test, y_pred; label=nothing, title=title)
 end
 
 plot((kernelized_fit_and_plot(PolynomialKernel(; degree=degree, c=1)) for degree in 1:4)...)

From 0ff6ed8e4db154800ba25ecc1e19392bdc382f77 Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 8 Jul 2021 12:31:31 +0300
Subject: [PATCH 14/16] remove VSCode block delimiters

---
 docs/literate.jl | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/docs/literate.jl b/docs/literate.jl
index 0c8a0e2fe..fc8f19799 100644
--- a/docs/literate.jl
+++ b/docs/literate.jl
@@ -13,8 +13,8 @@ Pkg.activate(EXAMPLEPATH)
 Pkg.instantiate()
 using Literate: Literate
 
-# Add link to nbviewer below the first heading of level 1
 function preprocess(content)
+    # Add link to nbviewer below the first heading of level 1
     sub = SubstitutionString(
         """
 #md # ```@meta
@@ -35,7 +35,12 @@ function preprocess(content)
 #
         """,
     )
-    return replace(content, r"^# # [^\n]*"m => sub; count=1)
+    content = replace(content, r"^# # [^\n]*"m => sub; count=1)
+
+    # remove VSCode `##` block delimiters
+    content = replace(content, r"^##$"m => "")
+
+    return content
 end
 
 # Convert to markdown and notebook

From 19c32b599d3ff7e7f754bdb4b33aa8207165290d Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 8 Jul 2021 12:34:34 +0300
Subject: [PATCH 15/16] remove whole line

---
 docs/literate.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/literate.jl b/docs/literate.jl
index fc8f19799..452d1cdf2 100644
--- a/docs/literate.jl
+++ b/docs/literate.jl
@@ -37,8 +37,8 @@ function preprocess(content)
     )
     content = replace(content, r"^# # [^\n]*"m => sub; count=1)
 
-    # remove VSCode `##` block delimiters
-    content = replace(content, r"^##$"m => "")
+    # remove VSCode `##` block delimiter lines
+    content = replace(content, r"^##$."ms => "")
 
     return content
 end

From c563577ba899cc7f0139fd563f80475a661e1eaf Mon Sep 17 00:00:00 2001
From: ST John <st--@users.noreply.github.com>
Date: Thu, 8 Jul 2021 13:31:13 +0300
Subject: [PATCH 16/16] add \tilde{} to disambiguate

---
 examples/kernel-ridge-regression/script.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl
index 86a351998..62afc5944 100644
--- a/examples/kernel-ridge-regression/script.jl
+++ b/examples/kernel-ridge-regression/script.jl
@@ -55,7 +55,7 @@ plot!(x_test, y_pred; label="linear fit")
 
 ##
 # ## Featurization
-# We can improve the fit by including additional features, i.e. generalizing to $\mathrm{X} = (\phi(x_n))_{n=1}^N$, where $\phi(x)$ constructs a feature vector for each input $x$. Here we include powers of the input, $\phi(x) = (1, x, x^2, \dots, x^d)$:
+# We can improve the fit by including additional features, i.e. generalizing to $\tilde{\mathrm{X}} = (\phi(x_n))_{n=1}^N$, where $\phi(x)$ constructs a feature vector for each input $x$. Here we include powers of the input, $\phi(x) = (1, x, x^2, \dots, x^d)$:
 
 function featurize_poly(x; degree=1)
     return repeat(x, 1, degree + 1) .^ (0:degree)'
@@ -112,7 +112,7 @@ plot((regularized_fit_and_plot(20, lambda) for lambda in (1e-3, 1e-2, 1e-1, 1)).
 
 ##
 # ## Kernel ridge regression
-# Instead of constructing the feature matrix explicitly, we can use *kernels* to replace inner products of feature vectors with a kernel evaluation: $\langle \phi(x), \phi(x') \rangle = k(x, x')$ or $\mathrm{X} \mathrm{X}^\top = \mathrm{K}$, where $\mathrm{K}_{ij} = k(x_i, x_j)$.
+# Instead of constructing the feature matrix explicitly, we can use *kernels* to replace inner products of feature vectors with a kernel evaluation: $\langle \phi(x), \phi(x') \rangle = k(x, x')$ or $\tilde{\mathrm{X}} \tilde{\mathrm{X}}^\top = \mathrm{K}$, where $\mathrm{K}_{ij} = k(x_i, x_j)$.
 #
 # To apply this "kernel trick" to ridge regression, we can rewrite the ridge estimate for the weights
 # ```math