From a438f7dfd75d97fba4fda3dcda47b12c990c1d0f Mon Sep 17 00:00:00 2001 From: ST John Date: Thu, 1 Jul 2021 13:57:33 +0300 Subject: [PATCH 01/16] extract kernel-ridge-regression example from st/examples (#234) --- examples/kernel-ridge-regression/script.jl | 195 +++++++++++++++------ 1 file changed, 144 insertions(+), 51 deletions(-) diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl index 5064a8a45..a76905462 100644 --- a/examples/kernel-ridge-regression/script.jl +++ b/examples/kernel-ridge-regression/script.jl @@ -1,66 +1,159 @@ # # Kernel Ridge Regression # -# !!! warning -# This example is under construction - -# Setup +# Building on linear regression, we can fit non-linear data sets by introducing a feature space. In a higher-dimensional feature space, we can overfit the data; ridge regression introduces regularization to avoid this. In this notebook we show how we can use KernelFunctions.jl for *kernel* ridge regression. +## Loading and setup of required packages using KernelFunctions -using MLDataUtils -using Zygote -using Flux -using Distributions, LinearAlgebra -using Plots - -Flux.@functor SqExponentialKernel -Flux.@functor ScaleTransform -Flux.@functor KernelSum -Flux.@functor Matern32Kernel - -# Generate date - -xmin = -3; -xmax = 3; -x = range(xmin, xmax; length=100) -x_test = range(xmin, xmax; length=300) -x, y = noisy_function(sinc, x; noise=0.1) -X = RowVecs(reshape(x, :, 1)) -X_test = RowVecs(reshape(x_test, :, 1)) -#md nothing #hide - -# Set up kernel and regularisation parameter - -k = SqExponentialKernel() + Matern32Kernel() ∘ ScaleTransform(2.0) -λ = [-1.0] -#md nothing #hide +using LinearAlgebra +using Distributions -# +## Plotting +using Plots; +default(; lw=2.0, legendfontsize=15.0); -f(x, k, λ) = kernelmatrix(k, x, X) / (kernelmatrix(k, X) + exp(λ[1]) * I) * y -f(X, k, 1.0) +using Random: seed! +seed!(42); -# +# ## From linear regression to ridge regression +# Here we use a one-dimensional toy problem. We generate data using the fourth-order polynomial $f(x) = (x+4)(x+1)(x-1)(x-3)$: + +f_truth(x) = (x + 4) * (x + 1) * (x - 1) * (x - 3) + +x_train = collect(-5:0.5:5) +x_test = collect(-5:0.1:5) + +noise = rand(Uniform(-10, 10), size(x_train)) +y_train = f_truth.(x_train) + noise +y_test = f_truth.(x_test) + +plot(x_test, y_test; label=raw"$f(x)$") +scatter!(x_train, y_train; label="observations") + +# For training inputs $\mathrm{X}=(\mathbf{x}_n)_{n=1}^N$ and observations $\mathbf{y}=(y_n)_{n=1}^N$, the linear regression weights $\mathbf{w}$ using the least-squares estimator are given by +# ```math +# \mathbf{w} = (\mathrm{X}^\top \mathrm{X})^{-1} \mathrm{X}^\top \mathbf{y} +# ``` +# We predict at test inputs $\mathbf{x}_*$ using +# ```math +# \hat{y}_* = \mathbf{x}_*^\top \mathbf{w} +# ``` +# This is implemented by `linear_regression`: + +function linear_regression(X, y, Xstar) + weights = (X' * X) \ (X' * y) + return Xstar * weights +end + +# A linear regression fit to the above data set: + +y_pred = linear_regression(x_train, y_train, x_test) +scatter(x_train, y_train; label="observations") +plot!(x_test, y_pred; label="linear fit") + +# We can improve the fit by including additional features, i.e. generalizing to $\mathrm{X} = (\phi(x_n))_{n=1}^N$, where $\phi(x)$ constructs a feature vector for each input $x$. Here we include powers of the input, $\phi(x) = (1, x, x^2, \dots, x^d)$: -loss(k, λ) = (ŷ -> sum(y - ŷ) / length(y) + exp(λ[1]) * norm(ŷ))(f(X, k, λ)) -loss(k, λ) +function featurize_poly(x; degree=1) + xcols = [x .^ d for d in 0:degree] + return hcat(xcols...) +end + +function featurized_fit_and_plot(degree) + X = featurize_poly(x_train; degree=degree) + Xstar = featurize_poly(x_test; degree=degree) + y_pred = linear_regression(X, y_train, Xstar) + scatter(x_train, y_train; legend=false, title="fit of order $degree") + return plot!(x_test, y_pred) +end + +plot([featurized_fit_and_plot(degree) for degree in 1:4]...) +# Note that the fit becomes perfect when we include exactly as many orders in the features as we have in the underlying polynomial (4). # +# However, when increasing the number of features, we can quickly overfit to noise in the data set: + +featurized_fit_and_plot(18) + +# To counteract this unwanted behaviour, we can introduce regularization. This leads to *ridge regression* with $L_2$ regularization of the weights ([Tikhonov regularization](https://en.wikipedia.org/wiki/Tikhonov_regularization)). +# Instead of the weights in linear regression, +# $$ +# \mathbf{w} = (\mathrm{X}^\top \mathrm{X})^{-1} \mathrm{X}^\top \mathbf{y} +# $$ +# we introduce the ridge parameter $\lambda$: +# $$ +# \mathbf{w} = (\mathrm{X}^\top \mathrm{X} + \lambda \mathbb{1})^{-1} \mathrm{X}^\top \mathbf{y} +# $$ +# As before, we predict at test inputs $\mathbf{x}_*$ using +# ```math +# \hat{y}_* = \mathbf{x}_*^\top \mathbf{w} +# ``` +# This is implemented by `ridge_regression`: + +function ridge_regression(X, y, Xstar, lambda) + weights = (X' * X + lambda * I) \ (X' * y) + return Xstar * weights +end -ps = Flux.params(k) -push!(ps, λ) -opt = Flux.Momentum(0.1) -#md nothing #hide - -plots = [] -for i in 1:10 - grads = Zygote.gradient(() -> loss(k, λ), ps) - Flux.Optimise.update!(opt, ps, grads) - p = Plots.scatter(x, y; lab="data", title="Loss = $(loss(k,λ))") - Plots.plot!(x_test, f(X_test, k, λ); lab="Prediction", lw=3.0) - push!(plots, p) +function regularized_fit_and_plot(degree, lambda) + X = featurize_poly(x_train; degree=degree) + Xstar = featurize_poly(x_test; degree=degree) + y_pred = ridge_regression(X, y_train, Xstar, lambda) + scatter(x_train, y_train; legend=false, title="\$\\lambda=$lambda\$") + return plot!(x_test, y_pred) end +plot([regularized_fit_and_plot(18, lambda) for lambda in [1e-4, 1e-2, 0.1, 10]]...) + +# Instead of constructing the feature matrix explicitly, we can use *kernels* to replace inner products of feature vectors with a kernel evaluation: $\langle \phi(x), \phi(x') \rangle = k(x, x')$ or $\mathrm{X} \mathrm{X}^\top = \mathrm{K}$, where $\mathrm{K}_{ij} = k(x_i, x_j)$. +# +# To apply this "kernel trick" to ridge regression, we can rewrite the ridge estimate for the weights +# $$ +# \mathbf{w} = (\mathrm{X}^\top \mathrm{X} + \lambda \mathbb{1})^{-1} \mathrm{X}^\top \mathbf{y} +# $$ +# using the [matrix inversion lemma](https://tlienart.github.io/pub/csml/mtheory/matinvlem.html#basic_lemmas) +# as +# $$ +# \mathbf{w} = \mathrm{X}^\top (\mathrm{X} \mathrm{X}^\top + \lambda \mathbb{1})^{-1} \mathbf{y} +# $$ +# where we can now replace the inner product with the kernel matrix, +# $$ +# \mathbf{w} = \mathrm{X}^\top (\mathrm{K} + \lambda \mathbb{1})^{-1} \mathbf{y} +# $$ +# And the prediction yields another inner product, +# ```math +# \hat{y}_* = \mathbf{x}_*^\top \mathbf{w} = \langle \mathbf{x}_*, \mathbf{w} \rangle = \mathbf{k}_* (\mathrm{K} + \lambda \mathbb{1})^{-1} \mathbf{y} +# ``` +# where $(\mathbf{k}_*)_n = k(x_*, x_n)$. # +# This is implemented by `kernel_ridge_regression`: + +function kernel_ridge_regression(k, X, y, Xstar, lambda) + K = kernelmatrix(k, X) + kstar = kernelmatrix(k, Xstar, X) + return kstar * ((K + lambda * I) \ y) +end + +# Now, instead of explicitly constructing features, we can simply pass in a `PolynomialKernel` object: + +function kernelized_fit_and_plot(kernel, lambda=1e-4) + y_pred = kernel_ridge_regression(kernel, x_train, y_train, x_test, lambda) + if kernel isa PolynomialKernel + title = string("order ", kernel.degree) + else + title = string(kernel) + end + scatter(x_train, y_train; label=nothing) + p = plot!( + x_test, + y_pred; + label=nothing, + title=title, + #title=string(raw"$\lambda=", lambda, raw"$") + ) + return p +end + +plot([kernelized_fit_and_plot(PolynomialKernel(; degree=degree, c=1)) for degree in 1:4]...) + +# However, we can now also use kernels that would have an infinite-dimensional feature expansion, such as the squared exponential kernel: -l = @layout grid(10, 1) -plot(plots...; layout=l, size=(300, 1500)) +kernelized_fit_and_plot(SqExponentialKernel()) From 6df69cdf8696dae62e56681292cdfde6bbde70fc Mon Sep 17 00:00:00 2001 From: ST John Date: Thu, 1 Jul 2021 16:04:19 +0300 Subject: [PATCH 02/16] fix display math --- examples/kernel-ridge-regression/script.jl | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl index a76905462..4dac8b28f 100644 --- a/examples/kernel-ridge-regression/script.jl +++ b/examples/kernel-ridge-regression/script.jl @@ -42,7 +42,7 @@ scatter!(x_train, y_train; label="observations") function linear_regression(X, y, Xstar) weights = (X' * X) \ (X' * y) return Xstar * weights -end +end; # A linear regression fit to the above data set: @@ -75,13 +75,13 @@ featurized_fit_and_plot(18) # To counteract this unwanted behaviour, we can introduce regularization. This leads to *ridge regression* with $L_2$ regularization of the weights ([Tikhonov regularization](https://en.wikipedia.org/wiki/Tikhonov_regularization)). # Instead of the weights in linear regression, -# $$ +# ```math # \mathbf{w} = (\mathrm{X}^\top \mathrm{X})^{-1} \mathrm{X}^\top \mathbf{y} -# $$ +# ``` # we introduce the ridge parameter $\lambda$: -# $$ +# ```math # \mathbf{w} = (\mathrm{X}^\top \mathrm{X} + \lambda \mathbb{1})^{-1} \mathrm{X}^\top \mathbf{y} -# $$ +# ``` # As before, we predict at test inputs $\mathbf{x}_*$ using # ```math # \hat{y}_* = \mathbf{x}_*^\top \mathbf{w} @@ -106,18 +106,18 @@ plot([regularized_fit_and_plot(18, lambda) for lambda in [1e-4, 1e-2, 0.1, 10]]. # Instead of constructing the feature matrix explicitly, we can use *kernels* to replace inner products of feature vectors with a kernel evaluation: $\langle \phi(x), \phi(x') \rangle = k(x, x')$ or $\mathrm{X} \mathrm{X}^\top = \mathrm{K}$, where $\mathrm{K}_{ij} = k(x_i, x_j)$. # # To apply this "kernel trick" to ridge regression, we can rewrite the ridge estimate for the weights -# $$ +# ```math # \mathbf{w} = (\mathrm{X}^\top \mathrm{X} + \lambda \mathbb{1})^{-1} \mathrm{X}^\top \mathbf{y} -# $$ +# ``` # using the [matrix inversion lemma](https://tlienart.github.io/pub/csml/mtheory/matinvlem.html#basic_lemmas) # as -# $$ +# ```math # \mathbf{w} = \mathrm{X}^\top (\mathrm{X} \mathrm{X}^\top + \lambda \mathbb{1})^{-1} \mathbf{y} -# $$ +# ``` # where we can now replace the inner product with the kernel matrix, -# $$ +# ```math # \mathbf{w} = \mathrm{X}^\top (\mathrm{K} + \lambda \mathbb{1})^{-1} \mathbf{y} -# $$ +# ``` # And the prediction yields another inner product, # ```math # \hat{y}_* = \mathbf{x}_*^\top \mathbf{w} = \langle \mathbf{x}_*, \mathbf{w} \rangle = \mathbf{k}_* (\mathrm{K} + \lambda \mathbb{1})^{-1} \mathbf{y} @@ -130,7 +130,7 @@ function kernel_ridge_regression(k, X, y, Xstar, lambda) K = kernelmatrix(k, X) kstar = kernelmatrix(k, Xstar, X) return kstar * ((K + lambda * I) \ y) -end +end; # Now, instead of explicitly constructing features, we can simply pass in a `PolynomialKernel` object: From 674c008f65a089aa9ca885e837abad117d915006 Mon Sep 17 00:00:00 2001 From: ST John Date: Thu, 1 Jul 2021 16:12:10 +0300 Subject: [PATCH 03/16] fix plot title --- examples/kernel-ridge-regression/script.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl index 4dac8b28f..3be9b2d05 100644 --- a/examples/kernel-ridge-regression/script.jl +++ b/examples/kernel-ridge-regression/script.jl @@ -139,7 +139,7 @@ function kernelized_fit_and_plot(kernel, lambda=1e-4) if kernel isa PolynomialKernel title = string("order ", kernel.degree) else - title = string(kernel) + title = string(nameof(typeof(kernel))) end scatter(x_train, y_train; label=nothing) p = plot!( From cce41c4e0c9a087b8caca4915b54eb7b3becde21 Mon Sep 17 00:00:00 2001 From: ST John Date: Fri, 2 Jul 2021 11:26:45 +0300 Subject: [PATCH 04/16] headings --- examples/kernel-ridge-regression/script.jl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl index 3be9b2d05..321f68f9d 100644 --- a/examples/kernel-ridge-regression/script.jl +++ b/examples/kernel-ridge-regression/script.jl @@ -9,12 +9,12 @@ using Distributions ## Plotting using Plots; -default(; lw=2.0, legendfontsize=15.0); +default(; lw=2.0, legendfontsize=11.0); using Random: seed! seed!(42); -# ## From linear regression to ridge regression +# ## Toy data # Here we use a one-dimensional toy problem. We generate data using the fourth-order polynomial $f(x) = (x+4)(x+1)(x-1)(x-3)$: f_truth(x) = (x + 4) * (x + 1) * (x - 1) * (x - 3) @@ -29,6 +29,7 @@ y_test = f_truth.(x_test) plot(x_test, y_test; label=raw"$f(x)$") scatter!(x_train, y_train; label="observations") +# ## Linear regression # For training inputs $\mathrm{X}=(\mathbf{x}_n)_{n=1}^N$ and observations $\mathbf{y}=(y_n)_{n=1}^N$, the linear regression weights $\mathbf{w}$ using the least-squares estimator are given by # ```math # \mathbf{w} = (\mathrm{X}^\top \mathrm{X})^{-1} \mathrm{X}^\top \mathbf{y} @@ -50,6 +51,7 @@ y_pred = linear_regression(x_train, y_train, x_test) scatter(x_train, y_train; label="observations") plot!(x_test, y_pred; label="linear fit") +# ## Featurization # We can improve the fit by including additional features, i.e. generalizing to $\mathrm{X} = (\phi(x_n))_{n=1}^N$, where $\phi(x)$ constructs a feature vector for each input $x$. Here we include powers of the input, $\phi(x) = (1, x, x^2, \dots, x^d)$: function featurize_poly(x; degree=1) @@ -73,6 +75,7 @@ plot([featurized_fit_and_plot(degree) for degree in 1:4]...) featurized_fit_and_plot(18) +# ## Ridge regression # To counteract this unwanted behaviour, we can introduce regularization. This leads to *ridge regression* with $L_2$ regularization of the weights ([Tikhonov regularization](https://en.wikipedia.org/wiki/Tikhonov_regularization)). # Instead of the weights in linear regression, # ```math @@ -103,6 +106,7 @@ end plot([regularized_fit_and_plot(18, lambda) for lambda in [1e-4, 1e-2, 0.1, 10]]...) +# ## Kernel ridge regression # Instead of constructing the feature matrix explicitly, we can use *kernels* to replace inner products of feature vectors with a kernel evaluation: $\langle \phi(x), \phi(x') \rangle = k(x, x')$ or $\mathrm{X} \mathrm{X}^\top = \mathrm{K}$, where $\mathrm{K}_{ij} = k(x_i, x_j)$. # # To apply this "kernel trick" to ridge regression, we can rewrite the ridge estimate for the weights From 45b645ccf430a6006131e5ae5497e1bccb7694e6 Mon Sep 17 00:00:00 2001 From: st-- Date: Tue, 6 Jul 2021 23:17:37 +0300 Subject: [PATCH 05/16] Apply suggestions from code review Co-authored-by: David Widmann --- examples/kernel-ridge-regression/script.jl | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl index 321f68f9d..feb2fa74c 100644 --- a/examples/kernel-ridge-regression/script.jl +++ b/examples/kernel-ridge-regression/script.jl @@ -22,7 +22,7 @@ f_truth(x) = (x + 4) * (x + 1) * (x - 1) * (x - 3) x_train = collect(-5:0.5:5) x_test = collect(-5:0.1:5) -noise = rand(Uniform(-10, 10), size(x_train)) +noise = rand(Uniform(-10, 10), length(x_train)) y_train = f_truth.(x_train) + noise y_test = f_truth.(x_test) @@ -55,8 +55,7 @@ plot!(x_test, y_pred; label="linear fit") # We can improve the fit by including additional features, i.e. generalizing to $\mathrm{X} = (\phi(x_n))_{n=1}^N$, where $\phi(x)$ constructs a feature vector for each input $x$. Here we include powers of the input, $\phi(x) = (1, x, x^2, \dots, x^d)$: function featurize_poly(x; degree=1) - xcols = [x .^ d for d in 0:degree] - return hcat(xcols...) + return repeat(x, 1, degree + 1) .^ (0:degree)' end function featurized_fit_and_plot(degree) @@ -67,7 +66,7 @@ function featurized_fit_and_plot(degree) return plot!(x_test, y_pred) end -plot([featurized_fit_and_plot(degree) for degree in 1:4]...) +plot((featurized_fit_and_plot(degree) for degree in 1:4)...) # Note that the fit becomes perfect when we include exactly as many orders in the features as we have in the underlying polynomial (4). # @@ -104,7 +103,7 @@ function regularized_fit_and_plot(degree, lambda) return plot!(x_test, y_pred) end -plot([regularized_fit_and_plot(18, lambda) for lambda in [1e-4, 1e-2, 0.1, 10]]...) +plot((regularized_fit_and_plot(18, lambda) for lambda in (1e-4, 1e-2, 0.1, 10))...) # ## Kernel ridge regression # Instead of constructing the feature matrix explicitly, we can use *kernels* to replace inner products of feature vectors with a kernel evaluation: $\langle \phi(x), \phi(x') \rangle = k(x, x')$ or $\mathrm{X} \mathrm{X}^\top = \mathrm{K}$, where $\mathrm{K}_{ij} = k(x_i, x_j)$. @@ -151,12 +150,11 @@ function kernelized_fit_and_plot(kernel, lambda=1e-4) y_pred; label=nothing, title=title, - #title=string(raw"$\lambda=", lambda, raw"$") ) return p end -plot([kernelized_fit_and_plot(PolynomialKernel(; degree=degree, c=1)) for degree in 1:4]...) +plot((kernelized_fit_and_plot(PolynomialKernel(; degree=degree, c=1)) for degree in 1:4)...) # However, we can now also use kernels that would have an infinite-dimensional feature expansion, such as the squared exponential kernel: From 8d7787df426147d3f0fd2101ed600007b209974b Mon Sep 17 00:00:00 2001 From: ST John Date: Tue, 6 Jul 2021 23:19:36 +0300 Subject: [PATCH 06/16] update Project/Manifest.toml --- .../kernel-ridge-regression/Manifest.toml | 235 ------------------ examples/kernel-ridge-regression/Project.toml | 6 - 2 files changed, 241 deletions(-) diff --git a/examples/kernel-ridge-regression/Manifest.toml b/examples/kernel-ridge-regression/Manifest.toml index c3a4f40f2..8b8a2d648 100644 --- a/examples/kernel-ridge-regression/Manifest.toml +++ b/examples/kernel-ridge-regression/Manifest.toml @@ -1,16 +1,5 @@ # This file is machine-generated - editing it directly is not advised -[[AbstractFFTs]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "485ee0867925449198280d4af84bdb46a2a404d0" -uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" -version = "1.0.1" - -[[AbstractTrees]] -git-tree-sha1 = "03e0550477d86222521d254b741d470ba17ea0b5" -uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" -version = "0.3.4" - [[Adapt]] deps = ["LinearAlgebra"] git-tree-sha1 = "84918055d15b3114ede17ac6a7182f68870c16f7" @@ -23,12 +12,6 @@ uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" [[Artifacts]] uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" -[[BFloat16s]] -deps = ["LinearAlgebra", "Test"] -git-tree-sha1 = "4af69e205efc343068dc8722b8dfec1ade89254a" -uuid = "ab4f0b2a-ad5b-11e8-123f-65d77653426b" -version = "0.1.0" - [[Base64]] uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" @@ -38,41 +21,18 @@ git-tree-sha1 = "c3598e525718abcc440f69cc6d5f60dda0a1b61e" uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0" version = "1.0.6+5" -[[CEnum]] -git-tree-sha1 = "215a9aa4a1f23fbd05b92769fdd62559488d70e9" -uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" -version = "0.4.1" - -[[CUDA]] -deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CompilerSupportLibraries_jll", "DataStructures", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "LazyArtifacts", "Libdl", "LinearAlgebra", "Logging", "Memoization", "Printf", "Random", "Random123", "RandomNumbers", "Reexport", "Requires", "SparseArrays", "SpecialFunctions", "TimerOutputs"] -git-tree-sha1 = "f6f6d2fc7a80b7710b2db4ecb1f59a1b2c2a715a" -uuid = "052768ef-5323-5732-b1bb-66c8b64840ba" -version = "3.3.0" - [[Cairo_jll]] deps = ["Artifacts", "Bzip2_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "JLLWrappers", "LZO_jll", "Libdl", "Pixman_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"] git-tree-sha1 = "e2f47f6d8337369411569fd45ae5753ca10394c6" uuid = "83423d85-b0ee-5818-9007-b63ccbeb887a" version = "1.16.0+6" -[[ChainRules]] -deps = ["ChainRulesCore", "Compat", "LinearAlgebra", "Random", "Statistics"] -git-tree-sha1 = "a41f9e72cffd789d5e19e75f1626b2786d640151" -uuid = "082447d4-558c-5d27-93f4-14fc19e9eca2" -version = "0.8.11" - [[ChainRulesCore]] deps = ["Compat", "LinearAlgebra", "SparseArrays"] git-tree-sha1 = "dbc9aae1227cfddaa9d2552f3ecba5b641f6cce9" uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" version = "0.10.5" -[[CodecZlib]] -deps = ["TranscodingStreams", "Zlib_jll"] -git-tree-sha1 = "ded953804d019afa9a3f98981d99b33e3db7b6da" -uuid = "944b1d66-785c-5afd-91f1-9de20f533193" -version = "0.7.0" - [[ColorSchemes]] deps = ["ColorTypes", "Colors", "FixedPointNumbers", "Random", "StaticArrays"] git-tree-sha1 = "c8fd01e4b736013bc61b704871d20503b33ea402" @@ -91,12 +51,6 @@ git-tree-sha1 = "417b0ed7b8b838aa6ca0a87aadf1bb9eb111ce40" uuid = "5ae59095-9a9b-59fe-a467-6f913c188581" version = "0.12.8" -[[CommonSubexpressions]] -deps = ["MacroTools", "Test"] -git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7" -uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" -version = "0.3.0" - [[Compat]] deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] git-tree-sha1 = "e4e2b39db08f967cc1360951f01e8a75ec441cab" @@ -118,22 +72,11 @@ git-tree-sha1 = "9f02045d934dc030edad45944ea80dbd1f0ebea7" uuid = "d38c429a-6771-53c6-b99e-75d170b6e991" version = "0.5.7" -[[Crayons]] -git-tree-sha1 = "3f71217b538d7aaee0b69ab47d9b7724ca8afa0d" -uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" -version = "4.0.4" - [[DataAPI]] git-tree-sha1 = "dfb3b7e89e395be1e25c2ad6d7690dc29cc53b1d" uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" version = "1.6.0" -[[DataFrames]] -deps = ["Compat", "DataAPI", "Future", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrettyTables", "Printf", "REPL", "Reexport", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"] -git-tree-sha1 = "66ee4fe515a9294a8836ef18eea7239c6ac3db5e" -uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -version = "1.1.1" - [[DataStructures]] deps = ["Compat", "InteractiveUtils", "OrderedCollections"] git-tree-sha1 = "4437b64df1e0adccc3e5d1adbc3ac741095e4677" @@ -153,18 +96,6 @@ uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" deps = ["Mmap"] uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" -[[DiffResults]] -deps = ["StaticArrays"] -git-tree-sha1 = "c18e98cba888c6c25d1c3b048e4b3380ca956805" -uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" -version = "1.0.3" - -[[DiffRules]] -deps = ["NaNMath", "Random", "SpecialFunctions"] -git-tree-sha1 = "214c3fcac57755cfda163d91c58893a8723f93e9" -uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" -version = "1.0.2" - [[Distances]] deps = ["LinearAlgebra", "Statistics", "StatsAPI"] git-tree-sha1 = "abe4ad222b26af3337262b8afb28fab8d215e9f8" @@ -203,11 +134,6 @@ git-tree-sha1 = "b3bfd02e98aedfa5cf885665493c5598c350cd2f" uuid = "2e619515-83b5-522b-bb60-26c02a35a201" version = "2.2.10+0" -[[ExprTools]] -git-tree-sha1 = "10407a39b87f29d47ebaca8edbc75d7c302ff93e" -uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" -version = "0.1.3" - [[FFMPEG]] deps = ["FFMPEG_jll"] git-tree-sha1 = "b57e3acbe22f8484b4b5ff66a7499717fe1a9cc8" @@ -232,12 +158,6 @@ git-tree-sha1 = "335bfdceacc84c5cdf16aadc768aa5ddfc5383cc" uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" version = "0.8.4" -[[Flux]] -deps = ["AbstractTrees", "Adapt", "CUDA", "CodecZlib", "Colors", "DelimitedFiles", "Functors", "Juno", "LinearAlgebra", "MacroTools", "NNlib", "NNlibCUDA", "Pkg", "Printf", "Random", "Reexport", "SHA", "Statistics", "StatsBase", "Test", "ZipFile", "Zygote"] -git-tree-sha1 = "0b3c6d0ce57d3b793eabd346ccc8f605035ef079" -uuid = "587475ba-b771-5e3f-ad9e-33799f191a9c" -version = "0.12.4" - [[Fontconfig_jll]] deps = ["Artifacts", "Bzip2_jll", "Expat_jll", "FreeType2_jll", "JLLWrappers", "Libdl", "Libuuid_jll", "Pkg", "Zlib_jll"] git-tree-sha1 = "35895cf184ceaab11fd778b4590144034a167a2f" @@ -250,12 +170,6 @@ git-tree-sha1 = "8339d61043228fdd3eb658d86c926cb282ae72a8" uuid = "59287772-0a20-5a39-b81b-1366585eb4c0" version = "0.4.2" -[[ForwardDiff]] -deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "NaNMath", "Printf", "Random", "SpecialFunctions", "StaticArrays"] -git-tree-sha1 = "e2af66012e08966366a43251e1fd421522908be6" -uuid = "f6369f11-7733-5829-9624-2563aa707210" -version = "0.10.18" - [[FreeType2_jll]] deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"] git-tree-sha1 = "cbd58c9deb1d304f5a245a0b7eb841a2560cfec6" @@ -274,28 +188,12 @@ git-tree-sha1 = "a7bb2af991c43dcf5c3455d276dd83976799634f" uuid = "d9f16b24-f501-4c13-a1f2-28368ffc5196" version = "0.2.1" -[[Future]] -deps = ["Random"] -uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" - [[GLFW_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Libglvnd_jll", "Pkg", "Xorg_libXcursor_jll", "Xorg_libXi_jll", "Xorg_libXinerama_jll", "Xorg_libXrandr_jll"] git-tree-sha1 = "dba1e8614e98949abfa60480b13653813d8f0157" uuid = "0656b61e-2033-5cc2-a64a-77c0f6c09b89" version = "3.3.5+0" -[[GPUArrays]] -deps = ["AbstractFFTs", "Adapt", "LinearAlgebra", "Printf", "Random", "Serialization", "Statistics"] -git-tree-sha1 = "3683030b5479249abaa18aa930fc02307fed05d3" -uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" -version = "7.0.0" - -[[GPUCompiler]] -deps = ["DataStructures", "ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"] -git-tree-sha1 = "765d5b600d3177f1d422c9489525938dd8bd95d1" -uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" -version = "0.12.2" - [[GR]] deps = ["Base64", "DelimitedFiles", "GR_jll", "HTTP", "JSON", "Libdl", "LinearAlgebra", "Pkg", "Printf", "Random", "Serialization", "Sockets", "Test", "UUIDs"] git-tree-sha1 = "b83e3125048a9c3158cbb7ca423790c7b1b57bea" @@ -343,12 +241,6 @@ git-tree-sha1 = "f7be53659ab06ddc986428d3a9dcc95f6fa6705a" uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89" version = "0.2.2" -[[IRTools]] -deps = ["InteractiveUtils", "MacroTools", "Test"] -git-tree-sha1 = "c67e7515a11f726f44083e74f218d134396d6510" -uuid = "7869d1d1-7146-5819-86e3-90919afe41df" -version = "0.4.2" - [[IniFile]] deps = ["Test"] git-tree-sha1 = "098e4d2c533924c921f9f9847274f2ad89e018b8" @@ -359,12 +251,6 @@ version = "0.5.0" deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" -[[InvertedIndices]] -deps = ["Test"] -git-tree-sha1 = "15732c475062348b0165684ffe28e85ea8396afc" -uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" -version = "1.0.0" - [[IterTools]] git-tree-sha1 = "05110a2ab1fc5f932622ffea2a003221f4782c18" uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e" @@ -393,12 +279,6 @@ git-tree-sha1 = "d735490ac75c5cb9f1b00d8b5509c11984dc6943" uuid = "aacddb02-875f-59d6-b918-886e6ef4fbf8" version = "2.1.0+0" -[[Juno]] -deps = ["Base64", "Logging", "Media", "Profile"] -git-tree-sha1 = "07cb43290a840908a771552911a6274bc6c072c7" -uuid = "e5e0dc1b-0480-54bc-9374-aad01c23163d" -version = "0.8.4" - [[KernelFunctions]] deps = ["ChainRulesCore", "Compat", "CompositionsBase", "Distances", "FillArrays", "Functors", "LinearAlgebra", "Random", "Requires", "SpecialFunctions", "StatsBase", "StatsFuns", "TensorCore", "Test", "ZygoteRules"] path = "../.." @@ -411,12 +291,6 @@ git-tree-sha1 = "f6250b16881adf048549549fba48b1161acdac8c" uuid = "c1c5ebd0-6772-5130-a774-d5fcae4a789d" version = "3.100.1+0" -[[LLVM]] -deps = ["CEnum", "Libdl", "Printf", "Unicode"] -git-tree-sha1 = "b3cd5971a37d3ac3c13ca805916b90878c699eaf" -uuid = "929cbde3-209d-540e-8aea-75f648917ca0" -version = "3.8.0" - [[LZO_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] git-tree-sha1 = "e5b909bcf985c5e2605737d2ce278ed791b89be6" @@ -434,16 +308,6 @@ git-tree-sha1 = "a4b12a1bd2ebade87891ab7e36fdbce582301a92" uuid = "23fbe1c1-3f47-55db-b15f-69d7ec21a316" version = "0.15.6" -[[LazyArtifacts]] -deps = ["Artifacts", "Pkg"] -uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" - -[[LearnBase]] -deps = ["LinearAlgebra", "StatsBase"] -git-tree-sha1 = "47e6f4623c1db88570c7a7fa66c6528b92ba4725" -uuid = "7f8f8fb0-2700-5f03-b4bd-41f8cfc144b6" -version = "0.3.0" - [[LibCURL]] deps = ["LibCURL_jll", "MozillaCACerts_jll"] uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" @@ -536,35 +400,12 @@ version = "0.2.4" [[Logging]] uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" -[[MLDataPattern]] -deps = ["LearnBase", "MLLabelUtils", "Random", "SparseArrays", "StatsBase"] -git-tree-sha1 = "e99514e96e8b8129bb333c69e063a56ab6402b5b" -uuid = "9920b226-0b2a-5f5f-9153-9aa70a013f8b" -version = "0.5.4" - -[[MLDataUtils]] -deps = ["DataFrames", "DelimitedFiles", "LearnBase", "MLDataPattern", "MLLabelUtils", "Statistics", "StatsBase"] -git-tree-sha1 = "ee54803aea12b9c8ee972e78ece11ac6023715e6" -uuid = "cc2ba9b6-d476-5e6d-8eaf-a92d5412d41d" -version = "0.5.4" - -[[MLLabelUtils]] -deps = ["LearnBase", "MappedArrays", "StatsBase"] -git-tree-sha1 = "3211c1fdd1efaefa692c8cf60e021fb007b76a08" -uuid = "66a33bbf-0c2b-5fc8-a008-9da813334f0a" -version = "0.5.6" - [[MacroTools]] deps = ["Markdown", "Random"] git-tree-sha1 = "6a8a2a625ab0dea913aba95c11370589e0239ff0" uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" version = "0.5.6" -[[MappedArrays]] -git-tree-sha1 = "18d3584eebc861e311a552cbb67723af8edff5de" -uuid = "dbb5928d-eab1-5f90-85c2-b9b0edb7c900" -version = "0.4.0" - [[Markdown]] deps = ["Base64"] uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" @@ -584,18 +425,6 @@ git-tree-sha1 = "e498ddeee6f9fdb4551ce855a46f54dbd900245f" uuid = "442fdcdd-2543-5da2-b0f3-8c86c306513e" version = "0.3.1" -[[Media]] -deps = ["MacroTools", "Test"] -git-tree-sha1 = "75a54abd10709c01f1b86b84ec225d26e840ed58" -uuid = "e89f7d12-3494-54d1-8411-f7d8b9ae1f27" -version = "0.5.0" - -[[Memoization]] -deps = ["MacroTools"] -git-tree-sha1 = "a9175def295e0dc1f6da80e8e733a01dd0f36a56" -uuid = "6fafb56a-5788-4b4e-91ca-c0cea6611c73" -version = "0.1.11" - [[Missings]] deps = ["DataAPI"] git-tree-sha1 = "4ea90bd5d3985ae1f9a908bd4500ae88921c5ce7" @@ -608,18 +437,6 @@ uuid = "a63ad114-7e13-5084-954f-fe012c677804" [[MozillaCACerts_jll]] uuid = "14a3606d-f60d-562e-9121-12d972cd8159" -[[NNlib]] -deps = ["Adapt", "ChainRulesCore", "Compat", "LinearAlgebra", "Pkg", "Requires", "Statistics"] -git-tree-sha1 = "7461639cef384a2ad058005b49e32b318d844343" -uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" -version = "0.7.22" - -[[NNlibCUDA]] -deps = ["CUDA", "LinearAlgebra", "NNlib", "Random", "Statistics"] -git-tree-sha1 = "bd8b29bf75be7a6c2b288b4b9a4e8903d0376ac1" -uuid = "a00861dc-f156-4864-bf3c-e6376f28a68d" -version = "0.1.3" - [[NaNMath]] git-tree-sha1 = "bfe47e760d60b82b66b61d2d44128b62e3a369fb" uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" @@ -703,32 +520,16 @@ git-tree-sha1 = "e995fa1821b6daff8b107a8eafbec234ae2263d0" uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" version = "1.16.5" -[[PooledArrays]] -deps = ["DataAPI", "Future"] -git-tree-sha1 = "cde4ce9d6f33219465b55162811d8de8139c0414" -uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" -version = "1.2.1" - [[Preferences]] deps = ["TOML"] git-tree-sha1 = "00cfd92944ca9c760982747e9a1d0d5d86ab1e5a" uuid = "21216c6a-2e73-6563-6e65-726566657250" version = "1.2.2" -[[PrettyTables]] -deps = ["Crayons", "Formatting", "Markdown", "Reexport", "Tables"] -git-tree-sha1 = "0d1245a357cc61c8cd61934c07447aa569ff22e6" -uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" -version = "1.1.0" - [[Printf]] deps = ["Unicode"] uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" -[[Profile]] -deps = ["Printf"] -uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" - [[Qt5Base_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "Fontconfig_jll", "Glib_jll", "JLLWrappers", "Libdl", "Libglvnd_jll", "OpenSSL_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libxcb_jll", "Xorg_xcb_util_image_jll", "Xorg_xcb_util_keysyms_jll", "Xorg_xcb_util_renderutil_jll", "Xorg_xcb_util_wm_jll", "Zlib_jll", "xkbcommon_jll"] git-tree-sha1 = "ad368663a5e20dbb8d6dc2fddeefe4dae0781ae8" @@ -749,18 +550,6 @@ uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" deps = ["Serialization"] uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -[[Random123]] -deps = ["Libdl", "Random", "RandomNumbers"] -git-tree-sha1 = "0e8b146557ad1c6deb1367655e052276690e71a3" -uuid = "74087812-796a-5b5d-8853-05524746bad3" -version = "1.4.2" - -[[RandomNumbers]] -deps = ["Random", "Requires"] -git-tree-sha1 = "441e6fc35597524ada7f85e13df1f4e10137d16f" -uuid = "e6cf234a-135c-5ec9-84dd-332b85af5143" -version = "1.4.0" - [[RecipesBase]] git-tree-sha1 = "b3fb709f3c97bfc6e948be68beeecb55a0b340ae" uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" @@ -903,18 +692,6 @@ version = "0.1.1" deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -[[TimerOutputs]] -deps = ["ExprTools", "Printf"] -git-tree-sha1 = "bf8aacc899a1bd16522d0350e1e2310510d77236" -uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" -version = "0.5.9" - -[[TranscodingStreams]] -deps = ["Random", "Test"] -git-tree-sha1 = "7c53c35547de1c5b9d46a4797cf6d8253807108c" -uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" -version = "0.9.5" - [[URIs]] git-tree-sha1 = "97bbe755a53fe859669cd907f2d96aee8d2c1355" uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" @@ -1077,12 +854,6 @@ git-tree-sha1 = "79c31e7844f6ecf779705fbc12146eb190b7d845" uuid = "c5fb5394-a638-5e4d-96e5-b29de1b5cf10" version = "1.4.0+3" -[[ZipFile]] -deps = ["Libdl", "Printf", "Zlib_jll"] -git-tree-sha1 = "c3a5637e27e914a7a445b8d0ad063d701931e9f7" -uuid = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" -version = "0.9.3" - [[Zlib_jll]] deps = ["Libdl"] uuid = "83775a58-1f1d-513f-b197-d71354ab007a" @@ -1093,12 +864,6 @@ git-tree-sha1 = "cc4bf3fdde8b7e3e9fa0351bdeedba1cf3b7f6e6" uuid = "3161d3a3-bdf6-5164-811a-617609db77b4" version = "1.5.0+0" -[[Zygote]] -deps = ["AbstractFFTs", "ChainRules", "ChainRulesCore", "DiffRules", "Distributed", "FillArrays", "ForwardDiff", "IRTools", "InteractiveUtils", "LinearAlgebra", "MacroTools", "NaNMath", "Random", "Requires", "SpecialFunctions", "Statistics", "ZygoteRules"] -git-tree-sha1 = "b1d95edd4e693066c38c13a10aab0a8f6a6e2f65" -uuid = "e88e6eb3-aa80-5325-afca-941959d7151f" -version = "0.6.12" - [[ZygoteRules]] deps = ["MacroTools"] git-tree-sha1 = "9e7a1e8ca60b742e508a315c17eef5211e7fbfd7" diff --git a/examples/kernel-ridge-regression/Project.toml b/examples/kernel-ridge-regression/Project.toml index f3b3a5b77..13219d95c 100644 --- a/examples/kernel-ridge-regression/Project.toml +++ b/examples/kernel-ridge-regression/Project.toml @@ -1,19 +1,13 @@ [deps] Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" -Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" KernelFunctions = "ec8451be-7e33-11e9-00cf-bbf324bd1392" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" -MLDataUtils = "cc2ba9b6-d476-5e6d-8eaf-a92d5412d41d" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" -Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] Distributions = "0.25" -Flux = "0.12" KernelFunctions = "0.10" Literate = "2" -MLDataUtils = "0.5" Plots = "1" -Zygote = "0.6" julia = "1.3" From 5fb788096ceab7cd4750e5c76503715a5ffc9146 Mon Sep 17 00:00:00 2001 From: st-- Date: Tue, 6 Jul 2021 23:21:55 +0300 Subject: [PATCH 07/16] Update examples/kernel-ridge-regression/script.jl Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- examples/kernel-ridge-regression/script.jl | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl index feb2fa74c..2c83aa15f 100644 --- a/examples/kernel-ridge-regression/script.jl +++ b/examples/kernel-ridge-regression/script.jl @@ -145,12 +145,7 @@ function kernelized_fit_and_plot(kernel, lambda=1e-4) title = string(nameof(typeof(kernel))) end scatter(x_train, y_train; label=nothing) - p = plot!( - x_test, - y_pred; - label=nothing, - title=title, - ) + p = plot!(x_test, y_pred; label=nothing, title=title) return p end From ade425ce4920eba2cc977c57f4eeb9e7bfe1a720 Mon Sep 17 00:00:00 2001 From: ST John Date: Thu, 8 Jul 2021 11:28:34 +0300 Subject: [PATCH 08/16] VSCode cell markers --- examples/kernel-ridge-regression/script.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl index 2c83aa15f..ca4f7fe4e 100644 --- a/examples/kernel-ridge-regression/script.jl +++ b/examples/kernel-ridge-regression/script.jl @@ -14,6 +14,7 @@ default(; lw=2.0, legendfontsize=11.0); using Random: seed! seed!(42); +## # ## Toy data # Here we use a one-dimensional toy problem. We generate data using the fourth-order polynomial $f(x) = (x+4)(x+1)(x-1)(x-3)$: @@ -29,6 +30,7 @@ y_test = f_truth.(x_test) plot(x_test, y_test; label=raw"$f(x)$") scatter!(x_train, y_train; label="observations") +## # ## Linear regression # For training inputs $\mathrm{X}=(\mathbf{x}_n)_{n=1}^N$ and observations $\mathbf{y}=(y_n)_{n=1}^N$, the linear regression weights $\mathbf{w}$ using the least-squares estimator are given by # ```math @@ -51,6 +53,7 @@ y_pred = linear_regression(x_train, y_train, x_test) scatter(x_train, y_train; label="observations") plot!(x_test, y_pred; label="linear fit") +## # ## Featurization # We can improve the fit by including additional features, i.e. generalizing to $\mathrm{X} = (\phi(x_n))_{n=1}^N$, where $\phi(x)$ constructs a feature vector for each input $x$. Here we include powers of the input, $\phi(x) = (1, x, x^2, \dots, x^d)$: @@ -68,12 +71,14 @@ end plot((featurized_fit_and_plot(degree) for degree in 1:4)...) +## # Note that the fit becomes perfect when we include exactly as many orders in the features as we have in the underlying polynomial (4). # # However, when increasing the number of features, we can quickly overfit to noise in the data set: featurized_fit_and_plot(18) +## # ## Ridge regression # To counteract this unwanted behaviour, we can introduce regularization. This leads to *ridge regression* with $L_2$ regularization of the weights ([Tikhonov regularization](https://en.wikipedia.org/wiki/Tikhonov_regularization)). # Instead of the weights in linear regression, @@ -105,6 +110,7 @@ end plot((regularized_fit_and_plot(18, lambda) for lambda in (1e-4, 1e-2, 0.1, 10))...) +## # ## Kernel ridge regression # Instead of constructing the feature matrix explicitly, we can use *kernels* to replace inner products of feature vectors with a kernel evaluation: $\langle \phi(x), \phi(x') \rangle = k(x, x')$ or $\mathrm{X} \mathrm{X}^\top = \mathrm{K}$, where $\mathrm{K}_{ij} = k(x_i, x_j)$. # @@ -151,6 +157,7 @@ end plot((kernelized_fit_and_plot(PolynomialKernel(; degree=degree, c=1)) for degree in 1:4)...) +## # However, we can now also use kernels that would have an infinite-dimensional feature expansion, such as the squared exponential kernel: kernelized_fit_and_plot(SqExponentialKernel()) From d5041c4bb392f55103cb712ba8ee6ed9018b634e Mon Sep 17 00:00:00 2001 From: ST John Date: Thu, 8 Jul 2021 11:28:56 +0300 Subject: [PATCH 09/16] use range directly and extent for test set --- examples/kernel-ridge-regression/script.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl index ca4f7fe4e..8acc6f74a 100644 --- a/examples/kernel-ridge-regression/script.jl +++ b/examples/kernel-ridge-regression/script.jl @@ -20,8 +20,8 @@ seed!(42); f_truth(x) = (x + 4) * (x + 1) * (x - 1) * (x - 3) -x_train = collect(-5:0.5:5) -x_test = collect(-5:0.1:5) +x_train = -5:0.5:5 +x_test = -7:0.1:7 noise = rand(Uniform(-10, 10), length(x_train)) y_train = f_truth.(x_train) + noise From 753775b70f107c34a323e1d07fd0fddeb176faab Mon Sep 17 00:00:00 2001 From: ST John Date: Thu, 8 Jul 2021 11:29:12 +0300 Subject: [PATCH 10/16] minor plot clean up --- examples/kernel-ridge-regression/script.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl index 8acc6f74a..6c702dcf4 100644 --- a/examples/kernel-ridge-regression/script.jl +++ b/examples/kernel-ridge-regression/script.jl @@ -28,7 +28,7 @@ y_train = f_truth.(x_train) + noise y_test = f_truth.(x_test) plot(x_test, y_test; label=raw"$f(x)$") -scatter!(x_train, y_train; label="observations") +scatter!(x_train, y_train; seriescolor=1, label="observations") ## # ## Linear regression @@ -151,8 +151,7 @@ function kernelized_fit_and_plot(kernel, lambda=1e-4) title = string(nameof(typeof(kernel))) end scatter(x_train, y_train; label=nothing) - p = plot!(x_test, y_pred; label=nothing, title=title) - return p + plot!(x_test, y_pred; label=nothing, title=title) end plot((kernelized_fit_and_plot(PolynomialKernel(; degree=degree, c=1)) for degree in 1:4)...) From fb8675f5affc3471a07a43401167838dc9d21143 Mon Sep 17 00:00:00 2001 From: ST John Date: Thu, 8 Jul 2021 11:29:28 +0300 Subject: [PATCH 11/16] change noise level and orders/lambdas for ridge regression --- examples/kernel-ridge-regression/script.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl index 6c702dcf4..e060d576d 100644 --- a/examples/kernel-ridge-regression/script.jl +++ b/examples/kernel-ridge-regression/script.jl @@ -23,7 +23,7 @@ f_truth(x) = (x + 4) * (x + 1) * (x - 1) * (x - 3) x_train = -5:0.5:5 x_test = -7:0.1:7 -noise = rand(Uniform(-10, 10), length(x_train)) +noise = rand(Uniform(-20, 20), length(x_train)) y_train = f_truth.(x_train) + noise y_test = f_truth.(x_test) @@ -76,7 +76,7 @@ plot((featurized_fit_and_plot(degree) for degree in 1:4)...) # # However, when increasing the number of features, we can quickly overfit to noise in the data set: -featurized_fit_and_plot(18) +featurized_fit_and_plot(20) ## # ## Ridge regression @@ -108,7 +108,7 @@ function regularized_fit_and_plot(degree, lambda) return plot!(x_test, y_pred) end -plot((regularized_fit_and_plot(18, lambda) for lambda in (1e-4, 1e-2, 0.1, 10))...) +plot((regularized_fit_and_plot(20, lambda) for lambda in (1e-3, 1e-2, 1e-1, 1))...) ## # ## Kernel ridge regression From f69a4c8bd37662ea977ec21a59f84ee464963ad4 Mon Sep 17 00:00:00 2001 From: ST John Date: Thu, 8 Jul 2021 11:29:34 +0300 Subject: [PATCH 12/16] fix ylims for all plots --- examples/kernel-ridge-regression/script.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl index e060d576d..3ce823fb9 100644 --- a/examples/kernel-ridge-regression/script.jl +++ b/examples/kernel-ridge-regression/script.jl @@ -9,7 +9,7 @@ using Distributions ## Plotting using Plots; -default(; lw=2.0, legendfontsize=11.0); +default(; lw=2.0, legendfontsize=11.0, ylims=(-150, 500)); using Random: seed! seed!(42); From 39161cf35ae422a2f023233684a3803dc8e3b61e Mon Sep 17 00:00:00 2001 From: st-- Date: Thu, 8 Jul 2021 11:55:31 +0300 Subject: [PATCH 13/16] Update examples/kernel-ridge-regression/script.jl Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- examples/kernel-ridge-regression/script.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl index 3ce823fb9..86a351998 100644 --- a/examples/kernel-ridge-regression/script.jl +++ b/examples/kernel-ridge-regression/script.jl @@ -151,7 +151,7 @@ function kernelized_fit_and_plot(kernel, lambda=1e-4) title = string(nameof(typeof(kernel))) end scatter(x_train, y_train; label=nothing) - plot!(x_test, y_pred; label=nothing, title=title) + return plot!(x_test, y_pred; label=nothing, title=title) end plot((kernelized_fit_and_plot(PolynomialKernel(; degree=degree, c=1)) for degree in 1:4)...) From 0ff6ed8e4db154800ba25ecc1e19392bdc382f77 Mon Sep 17 00:00:00 2001 From: ST John Date: Thu, 8 Jul 2021 12:31:31 +0300 Subject: [PATCH 14/16] remove VSCode block delimiters --- docs/literate.jl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/literate.jl b/docs/literate.jl index 0c8a0e2fe..fc8f19799 100644 --- a/docs/literate.jl +++ b/docs/literate.jl @@ -13,8 +13,8 @@ Pkg.activate(EXAMPLEPATH) Pkg.instantiate() using Literate: Literate -# Add link to nbviewer below the first heading of level 1 function preprocess(content) + # Add link to nbviewer below the first heading of level 1 sub = SubstitutionString( """ #md # ```@meta @@ -35,7 +35,12 @@ function preprocess(content) # """, ) - return replace(content, r"^# # [^\n]*"m => sub; count=1) + content = replace(content, r"^# # [^\n]*"m => sub; count=1) + + # remove VSCode `##` block delimiters + content = replace(content, r"^##$"m => "") + + return content end # Convert to markdown and notebook From 19c32b599d3ff7e7f754bdb4b33aa8207165290d Mon Sep 17 00:00:00 2001 From: ST John Date: Thu, 8 Jul 2021 12:34:34 +0300 Subject: [PATCH 15/16] remove whole line --- docs/literate.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/literate.jl b/docs/literate.jl index fc8f19799..452d1cdf2 100644 --- a/docs/literate.jl +++ b/docs/literate.jl @@ -37,8 +37,8 @@ function preprocess(content) ) content = replace(content, r"^# # [^\n]*"m => sub; count=1) - # remove VSCode `##` block delimiters - content = replace(content, r"^##$"m => "") + # remove VSCode `##` block delimiter lines + content = replace(content, r"^##$."ms => "") return content end From c563577ba899cc7f0139fd563f80475a661e1eaf Mon Sep 17 00:00:00 2001 From: ST John Date: Thu, 8 Jul 2021 13:31:13 +0300 Subject: [PATCH 16/16] add \tilde{} to disambiguate --- examples/kernel-ridge-regression/script.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/kernel-ridge-regression/script.jl b/examples/kernel-ridge-regression/script.jl index 86a351998..62afc5944 100644 --- a/examples/kernel-ridge-regression/script.jl +++ b/examples/kernel-ridge-regression/script.jl @@ -55,7 +55,7 @@ plot!(x_test, y_pred; label="linear fit") ## # ## Featurization -# We can improve the fit by including additional features, i.e. generalizing to $\mathrm{X} = (\phi(x_n))_{n=1}^N$, where $\phi(x)$ constructs a feature vector for each input $x$. Here we include powers of the input, $\phi(x) = (1, x, x^2, \dots, x^d)$: +# We can improve the fit by including additional features, i.e. generalizing to $\tilde{\mathrm{X}} = (\phi(x_n))_{n=1}^N$, where $\phi(x)$ constructs a feature vector for each input $x$. Here we include powers of the input, $\phi(x) = (1, x, x^2, \dots, x^d)$: function featurize_poly(x; degree=1) return repeat(x, 1, degree + 1) .^ (0:degree)' @@ -112,7 +112,7 @@ plot((regularized_fit_and_plot(20, lambda) for lambda in (1e-3, 1e-2, 1e-1, 1)). ## # ## Kernel ridge regression -# Instead of constructing the feature matrix explicitly, we can use *kernels* to replace inner products of feature vectors with a kernel evaluation: $\langle \phi(x), \phi(x') \rangle = k(x, x')$ or $\mathrm{X} \mathrm{X}^\top = \mathrm{K}$, where $\mathrm{K}_{ij} = k(x_i, x_j)$. +# Instead of constructing the feature matrix explicitly, we can use *kernels* to replace inner products of feature vectors with a kernel evaluation: $\langle \phi(x), \phi(x') \rangle = k(x, x')$ or $\tilde{\mathrm{X}} \tilde{\mathrm{X}}^\top = \mathrm{K}$, where $\mathrm{K}_{ij} = k(x_i, x_j)$. # # To apply this "kernel trick" to ridge regression, we can rewrite the ridge estimate for the weights # ```math