diff --git a/.gitignore b/.gitignore index e3b2d76a492..c36591f03d8 100644 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,8 @@ benchmarks/*.csv *.o-* *.exe *.a +# mac debug folders +*.dSYM/ # Intel template building blocks (TBB) lib/tbb diff --git a/Jenkinsfile b/Jenkinsfile index 991215acca3..78c9e6be934 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -322,6 +322,32 @@ pipeline { } post { always { retry(3) { deleteDir() } } } } + stage('Laplace Unit Tests') { + agent { + docker { + image 'stanorg/ci:gpu-cpp17' + label 'linux' + args '--cap-add SYS_PTRACE' + } + } + when { + expression { + !skipRemainingStages + } + } + steps { + unstash 'MathSetup' + sh "echo CXXFLAGS += -march=native -mtune=native >> make/local" + sh "echo O=3 >> make/local" + script { + if (params.optimizeUnitTests || isBranch('develop') || isBranch('master')) { + sh "echo CXXFLAGS += -fsanitize=address >> make/local" + } + runTests("test/unit/math/laplace/*_test.cpp", false) + } + } + post { always { retry(3) { deleteDir() } } } + } stage('OpenCL GPU tests') { agent { docker { diff --git a/doxygen/doxygen.cfg b/doxygen/doxygen.cfg index 3d2ca2ee1c8..e2b8ced498a 100644 --- a/doxygen/doxygen.cfg +++ b/doxygen/doxygen.cfg @@ -2741,3 +2741,38 @@ MSCGEN_TOOL = # command). MSCFILE_DIRS = + +ALIASES += laplace_options="\ +\param[in] tolerance controls the convergence criterion when finding the mode in the Laplace approximation. \ +\param[in] max_num_steps maximum number of steps before the Newton solver breaks and returns an error. \ +\param[in] hessian_block_size Block size of Hessian of log likelihood w.r.t latent Gaussian variable theta. \ +\param[in] solver Type of Newton solver. Each corresponds to a distinct choice of B matrix (i.e. application SWM formula): \ +1. computes square-root of negative Hessian. \ +2. computes square-root of covariance matrix. \ +3. computes no square-root and uses LU decomposition. \ +\param[in] max_steps_line_search Number of steps after which the algorithm gives up on doing a line search. If 0, no linesearch. \ +" + +ALIASES += laplace_common_template_args="\ + \tparam ThetaVec A type inheriting from `Eigen::EigenBase` with dynamic sized rows and 1 column. \ + \tparam CovarFun A functor with an `operator()(CovarArgsElements..., {TrainTupleElements...| PredTupleElements...})` \ + method. The `operator()` method should accept as arguments the \ + inner elements of `CovarArgs`. The return type of the `operator()` method \ + should be a type inheriting from `Eigen::EigenBase` with dynamic sized \ + rows and columns. \ + \tparam CovarArgs A tuple of types to passed as the first arguments of `CovarFun::operator()`\ +" + +ALIASES += laplace_common_args="\ +\param[in] theta_0 the initial guess for the Laplace approximation. \ +\param[in] covariance_function a function which returns the prior covariance. \ +\param[in] covar_args arguments for the covariance function. \ +" + +ALIASES += msg_arg="\ +\param[in, out] msgs stream for messages from likelihood and covariance \ +" + +ALIASES += rng_arg="\ +\param[in, out] rng Random number generator \ +" diff --git a/make/tests b/make/tests index 5f608b65381..0fbe233945b 100644 --- a/make/tests +++ b/make/tests @@ -23,7 +23,6 @@ benchmarks/%$(EXE) : benchmarks/%.cpp $(GTEST)/src/gtest-all.o $(MPI_TARGETS) $( test/% : CXXFLAGS += $(CXXFLAGS_GTEST) test/% : CPPFLAGS += $(CPPFLAGS_GTEST) test/% : INC += $(INC_GTEST) - test/%$(EXE) : test/%.o $(GTEST)/src/gtest_main.cc $(GTEST)/src/gtest-all.o $(MPI_TARGETS) $(TBB_TARGETS) $(LINK.cpp) $^ $(LDLIBS) $(OUTPUT_OPTION) diff --git a/stan/math/fwd/meta/is_fvar.hpp b/stan/math/fwd/meta/is_fvar.hpp index e208d08bc71..c5d67de8894 100644 --- a/stan/math/fwd/meta/is_fvar.hpp +++ b/stan/math/fwd/meta/is_fvar.hpp @@ -21,5 +21,8 @@ struct is_fvar>::value>> : std::true_type {}; +template +inline constexpr bool is_fvar_v = is_fvar::value; + } // namespace stan #endif diff --git a/stan/math/mix.hpp b/stan/math/mix.hpp index 876916443ce..62f9cf96465 100644 --- a/stan/math/mix.hpp +++ b/stan/math/mix.hpp @@ -1,6 +1,10 @@ #ifndef STAN_MATH_MIX_HPP #define STAN_MATH_MIX_HPP +#include +#include +#include + #include #include #include @@ -26,4 +30,6 @@ #include +#include + #endif diff --git a/stan/math/mix/functor.hpp b/stan/math/mix/functor.hpp index 8e4367ee187..d8e76990be8 100644 --- a/stan/math/mix/functor.hpp +++ b/stan/math/mix/functor.hpp @@ -2,13 +2,16 @@ #define STAN_MATH_MIX_FUNCTOR_HPP #include -#include #include +#include #include #include #include #include +#include +#include +#include +#include #include #include - #endif diff --git a/stan/math/mix/functor/derivative.hpp b/stan/math/mix/functor/derivative.hpp index 478063fe82b..7acc00934da 100644 --- a/stan/math/mix/functor/derivative.hpp +++ b/stan/math/mix/functor/derivative.hpp @@ -1,9 +1,9 @@ #ifndef STAN_MATH_MIX_FUNCTOR_DERIVATIVE_HPP #define STAN_MATH_MIX_FUNCTOR_DERIVATIVE_HPP +#include #include #include -#include #include namespace stan { @@ -21,7 +21,7 @@ namespace math { * @param[out] dfx_dx Value of derivative */ template -void derivative(const F& f, const T& x, T& fx, T& dfx_dx) { +inline void derivative(const F& f, const T& x, T& fx, T& dfx_dx) { fvar x_fvar = fvar(x, 1.0); fvar fx_fvar = f(x_fvar); fx = fx_fvar.val_; diff --git a/stan/math/mix/functor/finite_diff_grad_hessian.hpp b/stan/math/mix/functor/finite_diff_grad_hessian.hpp index 95bab8427e3..42ac3f5652b 100644 --- a/stan/math/mix/functor/finite_diff_grad_hessian.hpp +++ b/stan/math/mix/functor/finite_diff_grad_hessian.hpp @@ -38,10 +38,10 @@ namespace math { * @param[in] epsilon perturbation size */ template -void finite_diff_grad_hessian(const F& f, const Eigen::VectorXd& x, double& fx, - Eigen::MatrixXd& hess, - std::vector& grad_hess_fx, - double epsilon = 1e-04) { +inline void finite_diff_grad_hessian(const F& f, const Eigen::VectorXd& x, + double& fx, Eigen::MatrixXd& hess, + std::vector& grad_hess_fx, + double epsilon = 1e-04) { int d = x.size(); grad_hess_fx.clear(); diff --git a/stan/math/mix/functor/finite_diff_grad_hessian_auto.hpp b/stan/math/mix/functor/finite_diff_grad_hessian_auto.hpp index 8c38ed5477f..b41a54e0320 100644 --- a/stan/math/mix/functor/finite_diff_grad_hessian_auto.hpp +++ b/stan/math/mix/functor/finite_diff_grad_hessian_auto.hpp @@ -41,9 +41,9 @@ namespace math { * @param[out] grad_hess_fx gradient of Hessian of function at argument */ template -void finite_diff_grad_hessian_auto(const F& f, const Eigen::VectorXd& x, - double& fx, Eigen::MatrixXd& hess, - std::vector& grad_hess_fx) { +inline void finite_diff_grad_hessian_auto( + const F& f, const Eigen::VectorXd& x, double& fx, Eigen::MatrixXd& hess, + std::vector& grad_hess_fx) { int d = x.size(); grad_hess_fx.clear(); diff --git a/stan/math/mix/functor/grad_hessian.hpp b/stan/math/mix/functor/grad_hessian.hpp index d8abb272feb..c3478bcc113 100644 --- a/stan/math/mix/functor/grad_hessian.hpp +++ b/stan/math/mix/functor/grad_hessian.hpp @@ -1,9 +1,9 @@ #ifndef STAN_MATH_MIX_FUNCTOR_GRAD_HESSIAN_HPP #define STAN_MATH_MIX_FUNCTOR_GRAD_HESSIAN_HPP +#include #include #include -#include #include #include @@ -39,7 +39,7 @@ namespace math { * @param[out] grad_H Gradient of the Hessian of function at argument */ template -void grad_hessian( +inline void grad_hessian( const F& f, const Eigen::Matrix& x, double& fx, Eigen::Matrix& H, std::vector >& diff --git a/stan/math/mix/functor/grad_tr_mat_times_hessian.hpp b/stan/math/mix/functor/grad_tr_mat_times_hessian.hpp index c8f72b98a00..7b7bec13b32 100644 --- a/stan/math/mix/functor/grad_tr_mat_times_hessian.hpp +++ b/stan/math/mix/functor/grad_tr_mat_times_hessian.hpp @@ -1,10 +1,10 @@ #ifndef STAN_MATH_MIX_FUNCTOR_GRAD_TR_MAT_TIMES_HESSIAN_HPP #define STAN_MATH_MIX_FUNCTOR_GRAD_TR_MAT_TIMES_HESSIAN_HPP +#include #include #include #include -#include #include #include @@ -12,7 +12,7 @@ namespace stan { namespace math { template -void grad_tr_mat_times_hessian( +inline void grad_tr_mat_times_hessian( const F& f, const Eigen::Matrix& x, const Eigen::Matrix& M, Eigen::Matrix& grad_tr_MH) { @@ -26,7 +26,7 @@ void grad_tr_mat_times_hessian( Matrix x_var(x.size()); for (int i = 0; i < x.size(); ++i) { - x_var(i) = x(i); + x_var.coeffRef(i) = x(i); } Matrix, Dynamic, 1> x_fvar(x.size()); diff --git a/stan/math/mix/functor/gradient_dot_vector.hpp b/stan/math/mix/functor/gradient_dot_vector.hpp index b664effd19b..5b5326ddb0a 100644 --- a/stan/math/mix/functor/gradient_dot_vector.hpp +++ b/stan/math/mix/functor/gradient_dot_vector.hpp @@ -1,19 +1,19 @@ #ifndef STAN_MATH_MIX_FUNCTOR_GRADIENT_DOT_VECTOR_HPP #define STAN_MATH_MIX_FUNCTOR_GRADIENT_DOT_VECTOR_HPP +#include #include #include -#include #include namespace stan { namespace math { template -void gradient_dot_vector(const F& f, - const Eigen::Matrix& x, - const Eigen::Matrix& v, T1& fx, - T1& grad_fx_dot_v) { +inline void gradient_dot_vector(const F& f, + const Eigen::Matrix& x, + const Eigen::Matrix& v, + T1& fx, T1& grad_fx_dot_v) { using Eigen::Matrix; Matrix, Eigen::Dynamic, 1> x_fvar(x.size()); for (int i = 0; i < x.size(); ++i) { diff --git a/stan/math/mix/functor/hessian.hpp b/stan/math/mix/functor/hessian.hpp index 601444384ea..ae0e93132d4 100644 --- a/stan/math/mix/functor/hessian.hpp +++ b/stan/math/mix/functor/hessian.hpp @@ -1,9 +1,9 @@ #ifndef STAN_MATH_MIX_FUNCTOR_HESSIAN_HPP #define STAN_MATH_MIX_FUNCTOR_HESSIAN_HPP +#include #include #include -#include #include namespace stan { @@ -39,9 +39,10 @@ namespace math { * @param[out] H Hessian of function at argument */ template -void hessian(const F& f, const Eigen::Matrix& x, - double& fx, Eigen::Matrix& grad, - Eigen::Matrix& H) { +inline void hessian(const F& f, + const Eigen::Matrix& x, + double& fx, Eigen::Matrix& grad, + Eigen::Matrix& H) { H.resize(x.size(), x.size()); grad.resize(x.size()); diff --git a/stan/math/mix/functor/hessian_block_diag.hpp b/stan/math/mix/functor/hessian_block_diag.hpp new file mode 100644 index 00000000000..80e84f68454 --- /dev/null +++ b/stan/math/mix/functor/hessian_block_diag.hpp @@ -0,0 +1,56 @@ +#ifndef STAN_MATH_MIX_FUNCTOR_HESSIAN_BLOCK_DIAG_HPP +#define STAN_MATH_MIX_FUNCTOR_HESSIAN_BLOCK_DIAG_HPP + +#include +#include + +namespace stan { +namespace math { + +/** + * Returns a block diagonal Hessian by computing the relevant directional + * derivatives and storing them in a matrix. + * For m the size of each block, the operations const m calls to + * hessian_times_vector, that is m forward sweeps and m reverse sweeps. + * @tparam F Type of function to differentiate. + * @tparam Eta Type of additional arguments passed to F. + * @tparam Args Type of variadic arguments passed to F. + * @param f Function to differentiate. + * @param x Arguments with respect to which we differentiate. + * @param eta Additional arguments for f. + * @param hessian_block_size + * @param args Additional variadic arguments for f. + */ +template +inline Eigen::SparseMatrix hessian_block_diag( + F&& f, const Eigen::VectorXd& x, const Eigen::Index hessian_block_size, + Args&&... args) { + using Eigen::MatrixXd; + using Eigen::VectorXd; + + const Eigen::Index x_size = x.size(); + Eigen::SparseMatrix H(x_size, x_size); + H.reserve(Eigen::VectorXi::Constant(x_size, hessian_block_size)); + VectorXd v(x_size); + Eigen::Index n_blocks = x_size / hessian_block_size; + Eigen::VectorXd Hv = Eigen::VectorXd::Zero(x_size); + for (Eigen::Index i = 0; i < hessian_block_size; ++i) { + v.setZero(); + for (Eigen::Index j = i; j < x_size; j += hessian_block_size) { + v.coeffRef(j) = 1; + } + hessian_times_vector(f, Hv, x, v, args...); + for (int j = 0; j < n_blocks; ++j) { + for (int k = 0; k < hessian_block_size; ++k) { + H.insert(k + j * hessian_block_size, i + j * hessian_block_size) + = Hv(k + j * hessian_block_size); + } + } + } + return H; +} + +} // namespace math +} // namespace stan + +#endif diff --git a/stan/math/mix/functor/hessian_times_vector.hpp b/stan/math/mix/functor/hessian_times_vector.hpp index 5d3bf9fb567..dbd1c62c347 100644 --- a/stan/math/mix/functor/hessian_times_vector.hpp +++ b/stan/math/mix/functor/hessian_times_vector.hpp @@ -1,9 +1,9 @@ #ifndef STAN_MATH_MIX_FUNCTOR_HESSIAN_TIMES_VECTOR_HPP #define STAN_MATH_MIX_FUNCTOR_HESSIAN_TIMES_VECTOR_HPP +#include #include #include -#include #include #include @@ -11,11 +11,10 @@ namespace stan { namespace math { template -void hessian_times_vector(const F& f, - const Eigen::Matrix& x, - const Eigen::Matrix& v, - double& fx, - Eigen::Matrix& Hv) { +inline void hessian_times_vector( + const F& f, const Eigen::Matrix& x, + const Eigen::Matrix& v, double& fx, + Eigen::Matrix& Hv) { using Eigen::Matrix; // Run nested autodiff in this scope @@ -35,10 +34,13 @@ void hessian_times_vector(const F& f, Hv(i) = x_var(i).adj(); } } -template + +template * = nullptr, + require_stan_scalar_t* = nullptr> void hessian_times_vector(const F& f, const Eigen::Matrix& x, - const Eigen::Matrix& v, T& fx, + const EigVec& v, T& fx, Eigen::Matrix& Hv) { using Eigen::Matrix; Matrix grad; @@ -47,6 +49,29 @@ void hessian_times_vector(const F& f, Hv = H * v; } +/** + * Overload Hessian_times_vector function, under stan/math/mix/functor + * to handle functions which take in arguments + * and pstream. + */ +template * = nullptr> +inline void hessian_times_vector(const F& f, XAdj& x_adj, XVec&& x, VVec&& v, + Args&&... args) { + nested_rev_autodiff nested; + const Eigen::Index x_size = x.size(); + Eigen::Matrix x_var = std::forward(x); + Eigen::Matrix, Eigen::Dynamic, 1> x_fvar(x_size); + for (Eigen::Index i = 0; i < x_size; i++) { + x_fvar(i) = fvar(x_var(i), v(i)); + } + fvar fx_fvar = f(x_fvar, args...); + grad(fx_fvar.d_.vi_); + x_adj = x_var.adj(); +} + } // namespace math } // namespace stan + #endif diff --git a/stan/math/mix/functor/laplace_base_rng.hpp b/stan/math/mix/functor/laplace_base_rng.hpp new file mode 100644 index 00000000000..822388ed166 --- /dev/null +++ b/stan/math/mix/functor/laplace_base_rng.hpp @@ -0,0 +1,72 @@ +#ifndef STAN_MATH_MIX_FUNCTOR_LAPLACE_BASE_RNG_HPP +#define STAN_MATH_MIX_FUNCTOR_LAPLACE_BASE_RNG_HPP + +#include +#include +#include +#include + +namespace stan { +namespace math { + +/** + * In a latent gaussian model, + * + * theta ~ Normal(theta | 0, Sigma(phi, x)) + * y ~ pi(y | theta, eta) + * + * returns a multivariate normal random variate sampled + * from the Laplace approximation of p(theta_pred | y, phi, x_pred). + * Note that while the data is observed at x (train_tuple), the new samples + * are drawn for covariates x_pred (pred_tuple). + * To sample the "original" theta's, set pred_tuple = train_tuple. + * @tparam LLFunc Type of likelihood function. + * @tparam LLArgs Tuple of arguments types of likelihood function. + * \laplace_common_template_args + * @tparam RNG A valid boost rng type + * @param ll_fun Likelihood function. + * @param ll_args Arguments for likelihood function. + * \laplace_common_args + * @param options Control parameter for optimizer underlying Laplace approx. + * \rng_arg + * \msg_arg + */ +template < + typename LLFunc, typename LLArgs, typename ThetaVec, typename CovarFun, + typename CovarArgs, typename RNG, require_all_eigen_t* = nullptr, + require_t>* = nullptr> +inline Eigen::VectorXd laplace_base_rng(LLFunc&& ll_fun, LLArgs&& ll_args, + ThetaVec&& theta_0, + CovarFun&& covariance_function, + CovarArgs&& covar_args, + const laplace_options& options, + RNG& rng, std::ostream* msgs) { + auto md_est = internal::laplace_marginal_density_est( + ll_fun, std::forward(ll_args), std::forward(theta_0), + std::forward(covariance_function), + to_ref(std::forward(covar_args)), options, msgs); + // Modified R&W method + auto&& covariance_train = md_est.covariance; + Eigen::VectorXd mean_train = covariance_train * md_est.theta_grad; + if (options.solver == 1 || options.solver == 2) { + Eigen::MatrixXd V_dec + = md_est.L.template triangularView().solve( + md_est.W_r * covariance_train); + Eigen::MatrixXd Sigma = covariance_train - V_dec.transpose() * V_dec; + return multi_normal_rng(std::move(mean_train), std::move(Sigma), rng); + } else { + Eigen::MatrixXd Sigma + = covariance_train + - covariance_train + * (md_est.W_r + - md_est.W_r + * md_est.LU.solve(covariance_train * md_est.W_r)) + * covariance_train; + return multi_normal_rng(std::move(mean_train), std::move(Sigma), rng); + } +} + +} // namespace math +} // namespace stan + +#endif diff --git a/stan/math/mix/functor/laplace_likelihood.hpp b/stan/math/mix/functor/laplace_likelihood.hpp new file mode 100644 index 00000000000..1b54f33fcd9 --- /dev/null +++ b/stan/math/mix/functor/laplace_likelihood.hpp @@ -0,0 +1,454 @@ +#ifndef STAN_MATH_MIX_FUNCTOR_LAPLACE_LIKELIHOOD_HPP +#define STAN_MATH_MIX_FUNCTOR_LAPLACE_LIKELIHOOD_HPP + +#include +#include +#include + +namespace stan { +namespace math { + +/** + * functions to compute the log density, first, second, + * and third-order derivatives for a likelihoood specified by the user. + */ +namespace laplace_likelihood { +namespace internal { +/** + * @tparam F A functor with `opertor()(Args&&...)` returning a scalar + * @tparam Theta A class assignable to an Eigen vector type + * @tparam Stream Type of stream for messages. + * @tparam Args Type of variadic arguments. + * @param f Log likelihood function. + * @param theta Latent Gaussian variable. + * @param msgs Stream for messages. + * @param args Additional variational arguments for likelihood function. + */ +template * = nullptr> +inline auto log_likelihood(F&& f, Theta&& theta, Stream* msgs, Args&&... args) { + return std::forward(f)(std::forward(theta), + std::forward(args)..., msgs); +} + +/** + * Decide if object should be deep or shallow copied when + * using @ref conditional_copy_and_promote . + */ +enum class COPY_TYPE { SHALLOW = 0, DEEP = 1 }; + +/** + * Conditional copy and promote a type's scalar type to a `PromotedType`. + * @tparam Filter type trait with a static constexpr bool member `value` + * that is true if the type should be promoted. Otherwise, the type is + * left unchanged. + * @tparam PromotedType type to promote the scalar to. + * @tparam CopyType type of copy to perform. + * @tparam Args variadic arguments. + * @param args variadic arguments to conditionally copy and promote. + * @return a tuple where each element is either a reference to the original + * argument or a promoted copy of the argument. + */ +template