diff --git a/modules/rgbd/include/opencv2/rgbd.hpp b/modules/rgbd/include/opencv2/rgbd.hpp
index 550d0cc8398..4b40539b61e 100755
--- a/modules/rgbd/include/opencv2/rgbd.hpp
+++ b/modules/rgbd/include/opencv2/rgbd.hpp
@@ -15,6 +15,7 @@
 #include "opencv2/rgbd/dynafu.hpp"
 #include "opencv2/rgbd/large_kinfu.hpp"
 #include "opencv2/rgbd/detail/pose_graph.hpp"
+#include "opencv2/rgbd/colored_kinfu.hpp"
 
 
 /** @defgroup rgbd RGB-Depth Processing
diff --git a/modules/rgbd/include/opencv2/rgbd/colored_kinfu.hpp b/modules/rgbd/include/opencv2/rgbd/colored_kinfu.hpp
new file mode 100644
index 00000000000..427f830b972
--- /dev/null
+++ b/modules/rgbd/include/opencv2/rgbd/colored_kinfu.hpp
@@ -0,0 +1,261 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory
+
+#ifndef __OPENCV_RGBD_COLORED_KINFU_HPP__
+#define __OPENCV_RGBD_COLORED_KINFU_HPP__
+
+#include "opencv2/core.hpp"
+#include "opencv2/core/affine.hpp"
+#include <opencv2/rgbd/volume.hpp>
+
+namespace cv {
+namespace colored_kinfu {
+//! @addtogroup kinect_fusion
+//! @{
+
+struct CV_EXPORTS_W Params
+{
+
+    CV_WRAP Params(){}
+
+    /**
+     * @brief Constructor for Params
+     * Sets the initial pose of the TSDF volume.
+     * @param volumeInitialPoseRot rotation matrix
+     * @param volumeInitialPoseTransl translation vector
+     */
+    CV_WRAP Params(Matx33f volumeInitialPoseRot, Vec3f volumeInitialPoseTransl)
+    {
+      setInitialVolumePose(volumeInitialPoseRot,volumeInitialPoseTransl);
+    }
+
+    /**
+     * @brief Constructor for Params
+     * Sets the initial pose of the TSDF volume.
+     * @param volumeInitialPose 4 by 4 Homogeneous Transform matrix to set the intial pose of TSDF volume
+     */
+    CV_WRAP Params(Matx44f volumeInitialPose)
+    {
+      setInitialVolumePose(volumeInitialPose);
+    }
+
+    /**
+     * @brief Set Initial Volume Pose
+     * Sets the initial pose of the TSDF volume.
+     * @param R rotation matrix
+     * @param t translation vector
+     */
+    CV_WRAP void setInitialVolumePose(Matx33f R, Vec3f t);
+
+    /**
+     * @brief Set Initial Volume Pose
+     * Sets the initial pose of the TSDF volume.
+     * @param homogen_tf 4 by 4 Homogeneous Transform matrix to set the intial pose of TSDF volume
+     */
+    CV_WRAP void setInitialVolumePose(Matx44f homogen_tf);
+
+    /**
+     * @brief Default parameters
+     * A set of parameters which provides better model quality, can be very slow.
+     */
+    CV_WRAP static Ptr<Params> defaultParams();
+
+    /** @brief Coarse parameters
+    A set of parameters which provides better speed, can fail to match frames
+    in case of rapid sensor motion.
+    */
+    CV_WRAP static Ptr<Params> coarseParams();
+
+    /** @brief HashTSDF parameters
+      A set of parameters suitable for use with HashTSDFVolume
+    */
+    CV_WRAP static Ptr<Params> hashTSDFParams(bool isCoarse);
+
+    /** @brief ColoredTSDF parameters
+      A set of parameters suitable for use with HashTSDFVolume
+    */
+    CV_WRAP static Ptr<Params> coloredTSDFParams(bool isCoarse);
+
+    /** @brief frame size in pixels */
+    CV_PROP_RW Size frameSize;
+
+    /** @brief rgb frame size in pixels */
+    CV_PROP_RW Size rgb_frameSize;
+
+    CV_PROP_RW kinfu::VolumeType volumeType;
+
+    /** @brief camera intrinsics */
+    CV_PROP_RW Matx33f intr;
+
+    /** @brief rgb camera intrinsics */
+    CV_PROP_RW Matx33f rgb_intr;
+
+    /** @brief pre-scale per 1 meter for input values
+
+    Typical values are:
+         * 5000 per 1 meter for the 16-bit PNG files of TUM database
+         * 1000 per 1 meter for Kinect 2 device
+         * 1 per 1 meter for the 32-bit float images in the ROS bag files
+    */
+    CV_PROP_RW float depthFactor;
+
+    /** @brief Depth sigma in meters for bilateral smooth */
+    CV_PROP_RW float bilateral_sigma_depth;
+    /** @brief Spatial sigma in pixels for bilateral smooth */
+    CV_PROP_RW float bilateral_sigma_spatial;
+    /** @brief Kernel size in pixels for bilateral smooth */
+    CV_PROP_RW int   bilateral_kernel_size;
+
+    /** @brief Number of pyramid levels for ICP */
+    CV_PROP_RW int pyramidLevels;
+
+    /** @brief Resolution of voxel space
+
+    Number of voxels in each dimension.
+    */
+    CV_PROP_RW Vec3i volumeDims;
+    /** @brief Size of voxel in meters */
+    CV_PROP_RW float voxelSize;
+
+    /** @brief Minimal camera movement in meters
+
+    Integrate new depth frame only if camera movement exceeds this value.
+    */
+    CV_PROP_RW float tsdf_min_camera_movement;
+
+    /** @brief initial volume pose in meters */
+    Affine3f volumePose;
+
+    /** @brief distance to truncate in meters
+
+    Distances to surface that exceed this value will be truncated to 1.0.
+    */
+    CV_PROP_RW float tsdf_trunc_dist;
+
+    /** @brief max number of frames per voxel
+
+    Each voxel keeps running average of distances no longer than this value.
+    */
+    CV_PROP_RW int tsdf_max_weight;
+
+    /** @brief A length of one raycast step
+
+    How much voxel sizes we skip each raycast step
+    */
+    CV_PROP_RW float raycast_step_factor;
+
+    // gradient delta in voxel sizes
+    // fixed at 1.0f
+    // float gradient_delta_factor;
+
+    /** @brief light pose for rendering in meters */
+    CV_PROP_RW Vec3f lightPose;
+
+    /** @brief distance theshold for ICP in meters */
+    CV_PROP_RW float icpDistThresh;
+    /** angle threshold for ICP in radians */
+    CV_PROP_RW float icpAngleThresh;
+    /** number of ICP iterations for each pyramid level */
+    CV_PROP_RW std::vector<int> icpIterations;
+
+    /** @brief Threshold for depth truncation in meters
+
+    All depth values beyond this threshold will be set to zero
+    */
+    CV_PROP_RW float truncateThreshold;
+};
+
+/** @brief KinectFusion implementation
+
+  This class implements a 3d reconstruction algorithm described in
+  @cite kinectfusion paper.
+
+  It takes a sequence of depth images taken from depth sensor
+  (or any depth images source such as stereo camera matching algorithm or even raymarching renderer).
+  The output can be obtained as a vector of points and their normals
+  or can be Phong-rendered from given camera pose.
+
+  An internal representation of a model is a voxel cuboid that keeps TSDF values
+  which are a sort of distances to the surface (for details read the @cite kinectfusion article about TSDF).
+  There is no interface to that representation yet.
+
+  KinFu uses OpenCL acceleration automatically if available.
+  To enable or disable it explicitly use cv::setUseOptimized() or cv::ocl::setUseOpenCL().
+
+  This implementation is based on [kinfu-remake](https://github.com/Nerei/kinfu_remake).
+
+  Note that the KinectFusion algorithm was patented and its use may be restricted by
+  the list of patents mentioned in README.md file in this module directory.
+
+  That's why you need to set the OPENCV_ENABLE_NONFREE option in CMake to use KinectFusion.
+*/
+class CV_EXPORTS_W ColoredKinFu
+{
+public:
+    CV_WRAP static Ptr<ColoredKinFu> create(const Ptr<Params>& _params);
+    virtual ~ColoredKinFu();
+
+    /** @brief Get current parameters */
+    virtual const Params& getParams() const = 0;
+
+    /** @brief Renders a volume into an image
+
+      Renders a 0-surface of TSDF using Phong shading into a CV_8UC4 Mat.
+      Light pose is fixed in KinFu params.
+
+        @param image resulting image
+        @param cameraPose pose of camera to render from. If empty then render from current pose
+        which is a last frame camera pose.
+    */
+
+    CV_WRAP virtual void render(OutputArray image, const Matx44f& cameraPose = Matx44f::eye()) const = 0;
+
+    /** @brief Gets points and normals of current 3d mesh
+
+      The order of normals corresponds to order of points.
+      The order of points is undefined.
+
+        @param points vector of points which are 4-float vectors
+        @param normals vector of normals which are 4-float vectors
+     */
+    CV_WRAP virtual void getCloud(OutputArray points, OutputArray normals) const = 0;
+
+    /** @brief Gets points of current 3d mesh
+
+     The order of points is undefined.
+
+        @param points vector of points which are 4-float vectors
+     */
+    CV_WRAP virtual void getPoints(OutputArray points) const = 0;
+
+    /** @brief Calculates normals for given points
+        @param points input vector of points which are 4-float vectors
+        @param normals output vector of corresponding normals which are 4-float vectors
+     */
+    CV_WRAP virtual  void getNormals(InputArray points, OutputArray normals) const = 0;
+
+    /** @brief Resets the algorithm
+
+    Clears current model and resets a pose.
+    */
+    CV_WRAP virtual void reset() = 0;
+
+    /** @brief Get current pose in voxel space */
+    virtual const Affine3f getPose() const = 0;
+
+    /** @brief Process next depth frame
+        @param depth input Mat of depth frame
+        @param rgb   input Mat of rgb (colored) frame
+
+        @return true if succeeded to align new frame with current scene, false if opposite
+    */
+    CV_WRAP virtual bool update(InputArray depth, InputArray rgb) = 0;
+};
+
+//! @}
+}
+}
+#endif
diff --git a/modules/rgbd/include/opencv2/rgbd/kinfu.hpp b/modules/rgbd/include/opencv2/rgbd/kinfu.hpp
index 95f732b6769..924314eddbe 100644
--- a/modules/rgbd/include/opencv2/rgbd/kinfu.hpp
+++ b/modules/rgbd/include/opencv2/rgbd/kinfu.hpp
@@ -74,14 +74,22 @@ struct CV_EXPORTS_W Params
     */
     CV_WRAP static Ptr<Params> hashTSDFParams(bool isCoarse);
 
+    /** @brief ColoredTSDF parameters
+      A set of parameters suitable for use with ColoredTSDFVolume
+    */
+    CV_WRAP static Ptr<Params> coloredTSDFParams(bool isCoarse);
+
     /** @brief frame size in pixels */
     CV_PROP_RW Size frameSize;
 
+    /** @brief rgb frame size in pixels */
     CV_PROP_RW kinfu::VolumeType volumeType;
 
     /** @brief camera intrinsics */
     CV_PROP_RW Matx33f intr;
 
+    /** @brief rgb camera intrinsics */
+    CV_PROP_RW Matx33f rgb_intr;
     /** @brief pre-scale per 1 meter for input values
 
     Typical values are:
diff --git a/modules/rgbd/include/opencv2/rgbd/large_kinfu.hpp b/modules/rgbd/include/opencv2/rgbd/large_kinfu.hpp
index 7f1428c6d51..2f2d510e8e5 100644
--- a/modules/rgbd/include/opencv2/rgbd/large_kinfu.hpp
+++ b/modules/rgbd/include/opencv2/rgbd/large_kinfu.hpp
@@ -41,6 +41,9 @@ struct CV_EXPORTS_W Params
     /** @brief camera intrinsics */
     CV_PROP_RW Matx33f intr;
 
+    /** @brief rgb camera intrinsics */
+    CV_PROP_RW Matx33f rgb_intr;
+
     /** @brief pre-scale per 1 meter for input values
         Typical values are:
              * 5000 per 1 meter for the 16-bit PNG files of TUM database
diff --git a/modules/rgbd/include/opencv2/rgbd/volume.hpp b/modules/rgbd/include/opencv2/rgbd/volume.hpp
index 33f63b1fbfb..3fb1d98ef4c 100644
--- a/modules/rgbd/include/opencv2/rgbd/volume.hpp
+++ b/modules/rgbd/include/opencv2/rgbd/volume.hpp
@@ -30,8 +30,13 @@ class CV_EXPORTS_W Volume
 
     virtual void integrate(InputArray _depth, float depthFactor, const Matx44f& cameraPose,
                            const kinfu::Intr& intrinsics, const int frameId = 0)               = 0;
+    virtual void integrate(InputArray _depth, InputArray _rgb, float depthFactor,
+                           const Matx44f& cameraPose, const kinfu::Intr& intrinsics,
+                           const Intr& rgb_intrinsics, const int frameId = 0)                  = 0;
     virtual void raycast(const Matx44f& cameraPose, const kinfu::Intr& intrinsics,
                          const Size& frameSize, OutputArray points, OutputArray normals) const = 0;
+    virtual void raycast(const Matx44f& cameraPose, const kinfu::Intr& intrinsics, const Size& frameSize,
+                         OutputArray points, OutputArray normals, OutputArray colors) const    = 0;
     virtual void fetchNormals(InputArray points, OutputArray _normals) const                   = 0;
     virtual void fetchPointsNormals(OutputArray points, OutputArray normals) const             = 0;
     virtual void reset()                                                                       = 0;
@@ -45,8 +50,9 @@ class CV_EXPORTS_W Volume
 
 enum class VolumeType
 {
-    TSDF     = 0,
-    HASHTSDF = 1
+    TSDF        = 0,
+    HASHTSDF    = 1,
+    COLOREDTSDF = 2
 };
 
 struct CV_EXPORTS_W VolumeParams
diff --git a/modules/rgbd/perf/perf_tsdf.cpp b/modules/rgbd/perf/perf_tsdf.cpp
index 149093665ce..da928f98f22 100644
--- a/modules/rgbd/perf/perf_tsdf.cpp
+++ b/modules/rgbd/perf/perf_tsdf.cpp
@@ -323,6 +323,7 @@ PERF_TEST(Perf_TSDF, integrate)
         startTimer();
         settings.volume->integrate(depth, settings._params->depthFactor, pose, settings._params->intr);
         stopTimer();
+        depth.release();
     }
     SANITY_CHECK_NOTHING();
 }
@@ -358,6 +359,7 @@ PERF_TEST(Perf_HashTSDF, integrate)
         startTimer();
         settings.volume->integrate(depth, settings._params->depthFactor, pose, settings._params->intr);
         stopTimer();
+        depth.release();
     }
     SANITY_CHECK_NOTHING();
 }
diff --git a/modules/rgbd/samples/colored_kinfu_demo.cpp b/modules/rgbd/samples/colored_kinfu_demo.cpp
new file mode 100644
index 00000000000..5500c9dc1aa
--- /dev/null
+++ b/modules/rgbd/samples/colored_kinfu_demo.cpp
@@ -0,0 +1,276 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory
+
+#include <iostream>
+#include <fstream>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/calib3d.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/rgbd/colored_kinfu.hpp>
+
+#include "io_utils.hpp"
+
+using namespace cv;
+using namespace cv::kinfu;
+using namespace cv::colored_kinfu;
+using namespace cv::io_utils;
+
+#ifdef HAVE_OPENCV_VIZ
+#include <opencv2/viz.hpp>
+#endif
+
+#ifdef HAVE_OPENCV_VIZ
+const std::string vizWindowName = "cloud";
+
+struct PauseCallbackArgs
+{
+    PauseCallbackArgs(ColoredKinFu& _kf) : kf(_kf)
+    { }
+
+    ColoredKinFu& kf;
+};
+
+void pauseCallback(const viz::MouseEvent& me, void* args);
+void pauseCallback(const viz::MouseEvent& me, void* args)
+{
+    if(me.type == viz::MouseEvent::Type::MouseMove       ||
+       me.type == viz::MouseEvent::Type::MouseScrollDown ||
+       me.type == viz::MouseEvent::Type::MouseScrollUp)
+    {
+        PauseCallbackArgs pca = *((PauseCallbackArgs*)(args));
+        viz::Viz3d window(vizWindowName);
+        UMat rendered;
+        pca.kf.render(rendered, window.getViewerPose().matrix);
+        imshow("render", rendered);
+        waitKey(1);
+    }
+}
+#endif
+
+static const char* keys =
+{
+    "{help h usage ? | | print this message   }"
+    "{depth  | | Path to folder with depth.txt and rgb.txt files listing a set of depth and rgb images }"
+    "{camera |0| Index of depth camera to be used as a depth source }"
+    "{coarse | | Run on coarse settings (fast but ugly) or on default (slow but looks better),"
+        " in coarse mode points and normals are displayed }"
+    "{idle   | | Do not run KinFu, just display depth frames }"
+    "{record | | Write depth frames to specified file list"
+        " (the same format as for the 'depth' key) }"
+};
+
+static const std::string message =
+ "\nThis demo uses live depth input or RGB-D dataset taken from"
+ "\nhttps://vision.in.tum.de/data/datasets/rgbd-dataset"
+ "\nto demonstrate KinectFusion implementation \n";
+
+
+int main(int argc, char **argv)
+{
+    bool coarse = false;
+    bool idle = false;
+    std::string recordPath;
+
+    CommandLineParser parser(argc, argv, keys);
+    parser.about(message);
+
+    if(!parser.check())
+    {
+        parser.printMessage();
+        parser.printErrors();
+        return -1;
+    }
+
+    if(parser.has("help"))
+    {
+        parser.printMessage();
+        return 0;
+    }
+    if(parser.has("coarse"))
+    {
+        coarse = true;
+    }
+    if(parser.has("record"))
+    {
+        recordPath = parser.get<String>("record");
+    }
+    if(parser.has("idle"))
+    {
+        idle = true;
+    }
+
+    Ptr<DepthSource> ds;
+    Ptr<RGBSource> rgbs;
+
+    if (parser.has("depth"))
+        ds = makePtr<DepthSource>(parser.get<String>("depth") + "/depth.txt");
+    else
+        ds = makePtr<DepthSource>(parser.get<int>("camera"));
+
+    //TODO: intrinsics for camera
+    rgbs = makePtr<RGBSource>(parser.get<String>("depth") + "/rgb.txt");
+
+    if (ds->empty())
+    {
+        std::cerr << "Failed to open depth source" << std::endl;
+        parser.printMessage();
+        return -1;
+    }
+
+    Ptr<DepthWriter> depthWriter;
+    Ptr<RGBWriter> rgbWriter;
+
+    if (!recordPath.empty())
+    {
+        depthWriter = makePtr<DepthWriter>(recordPath);
+        rgbWriter = makePtr<RGBWriter>(recordPath);
+    }
+    Ptr<colored_kinfu::Params> params;
+    Ptr<ColoredKinFu> kf;
+
+    params = colored_kinfu::Params::coloredTSDFParams(coarse);
+
+    // These params can be different for each depth sensor
+    ds->updateParams(*params);
+
+    rgbs->updateParams(*params);
+
+    // Enables OpenCL explicitly (by default can be switched-off)
+    cv::setUseOptimized(false);
+
+    // Scene-specific params should be tuned for each scene individually
+    //float cubeSize = 1.f;
+    //params->voxelSize = cubeSize/params->volumeDims[0]; //meters
+    //params->tsdf_trunc_dist = 0.01f; //meters
+    //params->icpDistThresh = 0.01f; //meters
+    //params->volumePose = Affine3f().translate(Vec3f(-cubeSize/2.f, -cubeSize/2.f, 0.25f)); //meters
+    //params->tsdf_max_weight = 16;
+
+    if(!idle)
+        kf = ColoredKinFu::create(params);
+
+#ifdef HAVE_OPENCV_VIZ
+    cv::viz::Viz3d window(vizWindowName);
+    window.setViewerPose(Affine3f::Identity());
+    bool pause = false;
+#endif
+
+    UMat rendered;
+    UMat points;
+    UMat normals;
+
+    int64 prevTime = getTickCount();
+
+    for(UMat frame = ds->getDepth(); !frame.empty(); frame = ds->getDepth())
+    {
+        if(depthWriter)
+            depthWriter->append(frame);
+        UMat rgb_frame = rgbs->getRGB();
+#ifdef HAVE_OPENCV_VIZ
+        if(pause)
+        {
+            // doesn't happen in idle mode
+            kf->getCloud(points, normals);
+            if(!points.empty() && !normals.empty())
+            {
+                viz::WCloud cloudWidget(points, viz::Color::white());
+                viz::WCloudNormals cloudNormals(points, normals, /*level*/1, /*scale*/0.05, viz::Color::gray());
+                window.showWidget("cloud", cloudWidget);
+                window.showWidget("normals", cloudNormals);
+
+                Vec3d volSize = kf->getParams().voxelSize*Vec3d(kf->getParams().volumeDims);
+                window.showWidget("cube", viz::WCube(Vec3d::all(0),
+                                                     volSize),
+                                  kf->getParams().volumePose);
+                PauseCallbackArgs pca(*kf);
+                window.registerMouseCallback(pauseCallback, (void*)&pca);
+                window.showWidget("text", viz::WText(cv::String("Move camera in this window. "
+                                                                "Close the window or press Q to resume"), Point()));
+                window.spin();
+                window.removeWidget("text");
+                window.removeWidget("cloud");
+                window.removeWidget("normals");
+                window.registerMouseCallback(0);
+            }
+
+            pause = false;
+        }
+        else
+#endif
+        {
+            UMat cvt8;
+            float depthFactor = params->depthFactor;
+            convertScaleAbs(frame, cvt8, 0.25*256. / depthFactor);
+            if(!idle)
+            {
+                imshow("depth", cvt8);
+                imshow("rgb", rgb_frame);
+                if(!kf->update(frame, rgb_frame))
+                {
+                    kf->reset();
+                }
+#ifdef HAVE_OPENCV_VIZ
+                else
+                {
+                    if(coarse)
+                    {
+                        kf->getCloud(points, normals);
+                        if(!points.empty() && !normals.empty())
+                        {
+                            viz::WCloud cloudWidget(points, viz::Color::white());
+                            viz::WCloudNormals cloudNormals(points, normals, /*level*/1, /*scale*/0.05, viz::Color::gray());
+                            window.showWidget("cloud", cloudWidget);
+                            window.showWidget("normals", cloudNormals);
+                        }
+                    }
+
+                    //window.showWidget("worldAxes", viz::WCoordinateSystem());
+                    Vec3d volSize = kf->getParams().voxelSize*kf->getParams().volumeDims;
+                    window.showWidget("cube", viz::WCube(Vec3d::all(0),
+                                                         volSize),
+                                      kf->getParams().volumePose);
+                    window.setViewerPose(kf->getPose());
+                    window.spinOnce(1, true);
+                }
+#endif
+
+                kf->render(rendered);
+            }
+            else
+            {
+                rendered = cvt8;
+            }
+        }
+
+        int64 newTime = getTickCount();
+        putText(rendered, cv::format("FPS: %2d press R to reset, P to pause, Q to quit",
+                                     (int)(getTickFrequency()/(newTime - prevTime))),
+                Point(0, rendered.rows-1), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 255));
+        prevTime = newTime;
+
+        imshow("render", rendered);
+
+        int c = waitKey(1);
+        switch (c)
+        {
+        case 'r':
+            if(!idle)
+                kf->reset();
+            break;
+        case 'q':
+            return 0;
+#ifdef HAVE_OPENCV_VIZ
+        case 'p':
+            if(!idle)
+                pause = true;
+#endif
+        default:
+            break;
+        }
+    }
+
+    return 0;
+}
diff --git a/modules/rgbd/samples/io_utils.hpp b/modules/rgbd/samples/io_utils.hpp
index f0a3a1e7372..56170b4d42b 100644
--- a/modules/rgbd/samples/io_utils.hpp
+++ b/modules/rgbd/samples/io_utils.hpp
@@ -12,6 +12,7 @@
 #include <opencv2/highgui.hpp>
 #include <opencv2/rgbd/kinfu.hpp>
 #include <opencv2/rgbd/large_kinfu.hpp>
+#include <opencv2/rgbd/colored_kinfu.hpp>
 
 namespace cv
 {
@@ -79,27 +80,45 @@ struct DepthWriter
 
 namespace Kinect2Params
 {
-static const Size frameSize = Size(512, 424);
+static const Size depth_frameSize = Size(512, 424);
 // approximate values, no guarantee to be correct
-static const float focal = 366.1f;
-static const float cx    = 258.2f;
-static const float cy    = 204.f;
-static const float k1    = 0.12f;
-static const float k2    = -0.34f;
-static const float k3    = 0.12f;
+static const float depth_focal = 366.1f;
+static const float depth_cx    = 258.2f;
+static const float depth_cy    = 204.f;
+static const float depth_k1    = 0.12f;
+static const float depth_k2    = -0.34f;
+static const float depth_k3    = 0.12f;
+
+static const Size rgb_frameSize = Size(640, 480);
+static const float rgb_focal = 525.0f;
+static const float rgb_cx    = 319.5f;
+static const float rgb_cy    = 239.5f;
+static const float rgb_k1    = 0.0f;
+static const float rgb_k2    = 0.0f;
+static const float rgb_k3    = 0.0f;
+
 };  // namespace Kinect2Params
 
 namespace AstraParams
 {
-static const Size frameSize = Size(640, 480);
+static const Size depth_frameSize = Size(640, 480);
 // approximate values, no guarantee to be correct
-static const float fx    = 535.4f;
-static const float fy    = 539.2f;
-static const float cx    = 320.1f;
-static const float cy    = 247.6f;
-static const float k1    = 0.0f;
-static const float k2    = 0.0f;
-static const float k3    = 0.0f;
+static const float depth_fx = 535.4f;
+static const float depth_fy = 539.2f;
+static const float depth_cx = 320.1f;
+static const float depth_cy = 247.6f;
+static const float depth_k1 = 0.0f;
+static const float depth_k2 = 0.0f;
+static const float depth_k3 = 0.0f;
+
+static const Size rgb_frameSize = Size(640, 480);
+static const float rgb_focal = 525.0f;
+static const float rgb_cx    = 319.5f;
+static const float rgb_cy    = 239.5f;
+static const float rgb_k1    = 0.0f;
+static const float rgb_k2    = 0.0f;
+static const float rgb_k3    = 0.0f;
+
 };  // namespace Kinect2Params
 
 struct DepthSource
@@ -181,7 +200,7 @@ struct DepthSource
             // workaround for Kinect 2
             if (sourceType == Type::DEPTH_KINECT2)
             {
-                out = out(Rect(Point(), Kinect2Params::frameSize));
+                out = out(Rect(Point(), Kinect2Params::depth_frameSize));
 
                 UMat outCopy;
                 // linear remap adds gradient between valid and invalid pixels
@@ -212,20 +231,20 @@ struct DepthSource
             Size frameSize;
             if (sourceType == Type::DEPTH_KINECT2)
             {
-                fx = fy = Kinect2Params::focal;
-                cx      = Kinect2Params::cx;
-                cy      = Kinect2Params::cy;
+                fx = fy = Kinect2Params::depth_focal;
+                cx      = Kinect2Params::depth_cx;
+                cy      = Kinect2Params::depth_cy;
 
-                frameSize = Kinect2Params::frameSize;
+                frameSize = Kinect2Params::depth_frameSize;
             }
             else if (sourceType == Type::DEPTH_ASTRA)
             {
-                fx      = AstraParams::fx;
-                fy      = AstraParams::fy;
-                cx      = AstraParams::cx;
-                cy      = AstraParams::cy;
+                fx      = AstraParams::depth_fx;
+                fy      = AstraParams::depth_fy;
+                cx      = AstraParams::depth_cx;
+                cy      = AstraParams::depth_cy;
 
-                frameSize = AstraParams::frameSize;
+                frameSize = AstraParams::depth_frameSize;
             }
             else
             {
@@ -295,9 +314,9 @@ struct DepthSource
             if (sourceType == Type::DEPTH_KINECT2)
             {
                 Matx<float, 1, 5> distCoeffs;
-                distCoeffs(0) = Kinect2Params::k1;
-                distCoeffs(1) = Kinect2Params::k2;
-                distCoeffs(4) = Kinect2Params::k3;
+                distCoeffs(0) = Kinect2Params::depth_k1;
+                distCoeffs(1) = Kinect2Params::depth_k2;
+                distCoeffs(4) = Kinect2Params::depth_k3;
 
                 initUndistortRectifyMap(params.intr, distCoeffs, cv::noArray(), params.intr,
                                         params.frameSize, CV_16SC2, undistortMap1, undistortMap2);
@@ -317,9 +336,31 @@ struct DepthSource
             if (sourceType == Type::DEPTH_KINECT2)
             {
                 Matx<float, 1, 5> distCoeffs;
-                distCoeffs(0) = Kinect2Params::k1;
-                distCoeffs(1) = Kinect2Params::k2;
-                distCoeffs(4) = Kinect2Params::k3;
+                distCoeffs(0) = Kinect2Params::depth_k1;
+                distCoeffs(1) = Kinect2Params::depth_k2;
+                distCoeffs(4) = Kinect2Params::depth_k3;
+
+                initUndistortRectifyMap(params.intr, distCoeffs, cv::noArray(), params.intr,
+                                        params.frameSize, CV_16SC2, undistortMap1, undistortMap2);
+            }
+        }
+    }
+
+    void updateParams(colored_kinfu::Params& params)
+    {
+        if (vc.isOpened())
+        {
+            updateIntrinsics(params.intr, params.frameSize, params.depthFactor);
+            updateVolumeParams(params.volumeDims, params.voxelSize,
+                               params.tsdf_trunc_dist, params.volumePose, params.truncateThreshold);
+            updateICPParams(params.icpDistThresh, params.bilateral_sigma_depth);
+
+            if (sourceType == Type::DEPTH_KINECT2)
+            {
+                Matx<float, 1, 5> distCoeffs;
+                distCoeffs(0) = Kinect2Params::depth_k1;
+                distCoeffs(1) = Kinect2Params::depth_k2;
+                distCoeffs(4) = Kinect2Params::depth_k3;
 
                 initUndistortRectifyMap(params.intr, distCoeffs, cv::noArray(), params.intr,
                                         params.frameSize, CV_16SC2, undistortMap1, undistortMap2);
@@ -333,6 +374,243 @@ struct DepthSource
     UMat undistortMap1, undistortMap2;
     Type sourceType;
 };
+
+
+static std::vector<std::string> readRGB(const std::string& fileList)
+{
+    std::vector<std::string> v;
+
+    std::fstream file(fileList);
+    if (!file.is_open())
+        throw std::runtime_error("Failed to read rgb list");
+
+    std::string dir;
+    size_t slashIdx = fileList.rfind('/');
+    slashIdx = slashIdx != std::string::npos ? slashIdx : fileList.rfind('\\');
+    dir = fileList.substr(0, slashIdx);
+
+    while (!file.eof())
+    {
+        std::string s, imgPath;
+        std::getline(file, s);
+        if (s.empty() || s[0] == '#')
+            continue;
+        std::stringstream ss;
+        ss << s;
+        double thumb;
+        ss >> thumb >> imgPath;
+        v.push_back(dir + '/' + imgPath);
+    }
+
+    return v;
+}
+
+struct RGBWriter
+{
+    RGBWriter(std::string fileList) : file(fileList, std::ios::out), count(0), dir()
+    {
+        size_t slashIdx = fileList.rfind('/');
+        slashIdx = slashIdx != std::string::npos ? slashIdx : fileList.rfind('\\');
+        dir = fileList.substr(0, slashIdx);
+
+        if (!file.is_open())
+            throw std::runtime_error("Failed to write rgb list");
+
+        file << "# rgb maps saved from device" << std::endl;
+        file << "# useless_number filename" << std::endl;
+    }
+
+    void append(InputArray _rgb)
+    {
+        Mat rgb = _rgb.getMat();
+        std::string rgbFname = cv::format("%04d.png", count);
+        std::string fullRGBFname = dir + '/' + rgbFname;
+        if (!imwrite(fullRGBFname, rgb))
+            throw std::runtime_error("Failed to write rgb to file " + fullRGBFname);
+        file << count++ << " " << rgbFname << std::endl;
+    }
+
+    std::fstream file;
+    int count;
+    std::string dir;
+};
+
+struct RGBSource
+{
+   public:
+    enum Type
+    {
+        RGB_LIST,
+        RGB_KINECT2_LIST,
+        RGB_KINECT2,
+        RGB_REALSENSE,
+        RGB_ASTRA
+    };
+
+    RGBSource(int cam) : RGBSource("", cam) {}
+
+    RGBSource(String fileListName) : RGBSource(fileListName, -1) {}
+
+    RGBSource(String fileListName, int cam)
+        : rgbFileList(fileListName.empty() ? std::vector<std::string>()
+                                             : readRGB(fileListName)),
+          frameIdx(0),
+          undistortMap1(),
+          undistortMap2()
+    {
+        if (cam >= 0)
+        {
+            vc = VideoCapture(VideoCaptureAPIs::CAP_OPENNI2 + cam);
+            if (vc.isOpened())
+            {
+                if(cam == 20)
+                    sourceType = Type::RGB_ASTRA;
+                else
+                    sourceType = Type::RGB_KINECT2;
+            }
+            else
+            {
+                vc = VideoCapture(VideoCaptureAPIs::CAP_REALSENSE + cam);
+                if (vc.isOpened())
+                {
+                    sourceType = Type::RGB_REALSENSE;
+                }
+            }
+        }
+        else
+        {
+            vc         = VideoCapture();
+            sourceType = Type::RGB_KINECT2_LIST;
+        }
+    }
+
+    UMat getRGB()
+    {
+        UMat out;
+        if (!vc.isOpened())
+        {
+            if (frameIdx < rgbFileList.size())
+            {
+                Mat f = cv::imread(rgbFileList[frameIdx++], IMREAD_COLOR);
+                f.copyTo(out);
+            }
+            else
+            {
+                return UMat();
+            }
+        }
+        else
+        {
+            vc.grab();
+            switch (sourceType)
+            {
+                case Type::RGB_KINECT2: vc.retrieve(out, CAP_OPENNI_BGR_IMAGE); break;
+                case Type::RGB_REALSENSE: vc.retrieve(out, CAP_INTELPERC_IMAGE); break;
+                default:
+                    // unknown rgb source
+                    vc.retrieve(out);
+            }
+
+            // workaround for Kinect 2
+            if (sourceType == Type::RGB_KINECT2)
+            {
+                out = out(Rect(Point(), Kinect2Params::rgb_frameSize));
+
+                UMat outCopy;
+                // linear remap adds gradient between valid and invalid pixels
+                // which causes garbage, use nearest instead
+                remap(out, outCopy, undistortMap1, undistortMap2, cv::INTER_NEAREST);
+
+                cv::flip(outCopy, out, 1);
+            }
+        }
+        if (out.empty())
+            throw std::runtime_error("Matrix is empty");
+        return out;
+    }
+
+    bool empty() { return rgbFileList.empty() && !(vc.isOpened()); }
+
+    void updateIntrinsics(Matx33f& _rgb_intrinsics, Size& _rgb_frameSize)
+    {
+        if (vc.isOpened())
+        {
+            // this should be set in according to user's rgb sensor
+            int w = (int)vc.get(VideoCaptureProperties::CAP_PROP_FRAME_WIDTH);
+            int h = (int)vc.get(VideoCaptureProperties::CAP_PROP_FRAME_HEIGHT);
+
+            // it's recommended to calibrate sensor to obtain its intrinsics
+            float rgb_fx, rgb_fy, rgb_cx, rgb_cy;
+            Size rgb_frameSize;
+            if (sourceType == Type::RGB_KINECT2)
+            {
+                rgb_fx = rgb_fy = Kinect2Params::rgb_focal;
+                rgb_cx      = Kinect2Params::rgb_cx;
+                rgb_cy      = Kinect2Params::rgb_cy;
+
+                rgb_frameSize = Kinect2Params::rgb_frameSize;
+            }
+            else if (sourceType == Type::RGB_ASTRA)
+            {
+                rgb_fx = rgb_fy = AstraParams::rgb_focal;
+                rgb_cx = AstraParams::rgb_cx;
+                rgb_cy = AstraParams::rgb_cy;
+
+                rgb_frameSize = AstraParams::rgb_frameSize;
+            }
+            else
+            {
+                // TODO: replace to rgb types
+                rgb_fx = rgb_fy = Kinect2Params::rgb_focal;
+                rgb_cx = Kinect2Params::rgb_cx;
+                rgb_cy = Kinect2Params::rgb_cy;
+                rgb_frameSize = Size(w, h);
+            }
+
+            Matx33f rgb_camMatrix = Matx33f(rgb_fx, 0, rgb_cx, 0, rgb_fy, rgb_cy, 0, 0, 1);
+            _rgb_intrinsics = rgb_camMatrix;
+            _rgb_frameSize  = rgb_frameSize;
+        }
+    }
+
+    void updateVolumeParams(const Vec3i&, float&, float&, Affine3f&)
+    {
+        // TODO: do this settings for rgb image
+    }
+
+    void updateICPParams(float&)
+    {
+        // TODO: do this settings for rgb image icp
+    }
+
+    void updateParams(colored_kinfu::Params& params)
+    {
+        if (vc.isOpened())
+        {
+            updateIntrinsics(params.rgb_intr, params.rgb_frameSize);
+            updateVolumeParams(params.volumeDims, params.voxelSize,
+                               params.tsdf_trunc_dist, params.volumePose);
+            updateICPParams(params.icpDistThresh);
+
+            if (sourceType == Type::RGB_KINECT2)
+            {
+                Matx<float, 1, 5> distCoeffs;
+                distCoeffs(0) = Kinect2Params::rgb_k1;
+                distCoeffs(1) = Kinect2Params::rgb_k2;
+                distCoeffs(4) = Kinect2Params::rgb_k3;
+
+                initUndistortRectifyMap(params.intr, distCoeffs, cv::noArray(), params.intr,
+                                        params.frameSize, CV_16SC2, undistortMap1, undistortMap2);
+            }
+        }
+    }
+
+    std::vector<std::string> rgbFileList;
+    size_t frameIdx;
+    VideoCapture vc;
+    UMat undistortMap1, undistortMap2;
+    Type sourceType;
+};
 }  // namespace io_utils
 
 }  // namespace cv
diff --git a/modules/rgbd/src/colored_kinfu.cpp b/modules/rgbd/src/colored_kinfu.cpp
new file mode 100644
index 00000000000..dc87be6b39a
--- /dev/null
+++ b/modules/rgbd/src/colored_kinfu.cpp
@@ -0,0 +1,391 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory
+
+#include "precomp.hpp"
+#include "fast_icp.hpp"
+#include "tsdf.hpp"
+#include "hash_tsdf.hpp"
+#include "colored_tsdf.hpp"
+#include "kinfu_frame.hpp"
+
+namespace cv {
+namespace colored_kinfu {
+using namespace kinfu;
+
+void Params::setInitialVolumePose(Matx33f R, Vec3f t)
+{
+    setInitialVolumePose(Affine3f(R,t).matrix);
+}
+
+void Params::setInitialVolumePose(Matx44f homogen_tf)
+{
+    Params::volumePose.matrix = homogen_tf;
+}
+
+Ptr<Params> Params::defaultParams()
+{
+    Params p;
+
+    p.frameSize = Size(640, 480);
+
+    p.volumeType = VolumeType::TSDF;
+
+    float fx, fy, cx, cy;
+    fx = fy = 525.f;
+    cx = p.frameSize.width/2 - 0.5f;
+    cy = p.frameSize.height/2 - 0.5f;
+    p.intr = Matx33f(fx,  0, cx,
+                      0, fy, cy,
+                      0,  0,  1);
+
+    float rgb_fx, rgb_fy, rgb_cx, rgb_cy;
+    rgb_fx = 515.0f;
+    rgb_fy = 550.0f;
+    rgb_cx = 319.5f;
+    rgb_cy = 239.5f;
+    p.rgb_intr = Matx33f(rgb_fx,      0, rgb_cx,
+                              0, rgb_fy, rgb_cy,
+                              0,      0,      1);
+
+    // 5000 for the 16-bit PNG files
+    // 1 for the 32-bit float images in the ROS bag files
+    p.depthFactor = 5000;
+
+    // sigma_depth is scaled by depthFactor when calling bilateral filter
+    p.bilateral_sigma_depth = 0.04f;  //meter
+    p.bilateral_sigma_spatial = 4.5; //pixels
+    p.bilateral_kernel_size = 7;     //pixels
+
+    p.icpAngleThresh = (float)(30. * CV_PI / 180.); // radians
+    p.icpDistThresh = 0.1f; // meters
+
+    p.icpIterations = {10, 5, 4};
+    p.pyramidLevels = (int)p.icpIterations.size();
+
+    p.tsdf_min_camera_movement = 0.f; //meters, disabled
+
+    p.volumeDims = Vec3i::all(512); //number of voxels
+
+    float volSize = 3.f;
+    p.voxelSize = volSize/512.f; //meters
+
+    // default pose of volume cube
+    p.volumePose = Affine3f().translate(Vec3f(-volSize/2.f, -volSize/2.f, 0.5f));
+    p.tsdf_trunc_dist = 7 * p.voxelSize; // about 0.04f in meters
+    p.tsdf_max_weight = 64;   //frames
+
+    p.raycast_step_factor = 0.25f;  //in voxel sizes
+    // gradient delta factor is fixed at 1.0f and is not used
+    //p.gradient_delta_factor = 0.5f; //in voxel sizes
+
+    //p.lightPose = p.volume_pose.translation()/4; //meters
+    p.lightPose = Vec3f::all(0.f); //meters
+
+    // depth truncation is not used by default but can be useful in some scenes
+    p.truncateThreshold = 0.f; //meters
+
+    return makePtr<Params>(p);
+}
+
+Ptr<Params> Params::coarseParams()
+{
+    Ptr<Params> p = defaultParams();
+
+    p->icpIterations = {5, 3, 2};
+    p->pyramidLevels = (int)p->icpIterations.size();
+
+    float volSize = 3.f;
+    p->volumeDims = Vec3i::all(128); //number of voxels
+    p->voxelSize  = volSize/128.f;
+    p->tsdf_trunc_dist = 2 * p->voxelSize; // 0.04f in meters
+
+    p->raycast_step_factor = 0.75f;  //in voxel sizes
+
+    return p;
+}
+
+Ptr<Params> Params::hashTSDFParams(bool isCoarse)
+{
+    Ptr<Params> p;
+    if (isCoarse)
+        p = coarseParams();
+    else
+        p = defaultParams();
+    p->volumeType = VolumeType::HASHTSDF;
+    p->truncateThreshold = rgbd::Odometry::DEFAULT_MAX_DEPTH();
+    return p;
+}
+
+Ptr<Params> Params::coloredTSDFParams(bool isCoarse)
+{
+    Ptr<Params> p;
+    if(isCoarse)
+        p = coarseParams();
+    else
+        p = defaultParams();
+    p->volumeType = VolumeType::COLOREDTSDF;
+
+    return p;
+}
+
+// MatType should be Mat or UMat
+template< typename MatType>
+class ColoredKinFuImpl : public ColoredKinFu
+{
+public:
+    ColoredKinFuImpl(const Params& _params);
+    virtual ~ColoredKinFuImpl();
+
+    const Params& getParams() const CV_OVERRIDE;
+
+    void render(OutputArray image, const Matx44f& cameraPose) const CV_OVERRIDE;
+
+    virtual void getCloud(OutputArray points, OutputArray normals) const CV_OVERRIDE;
+    void getPoints(OutputArray points) const CV_OVERRIDE;
+    void getNormals(InputArray points, OutputArray normals) const CV_OVERRIDE;
+
+    void reset() CV_OVERRIDE;
+
+    const Affine3f getPose() const CV_OVERRIDE;
+
+    bool update(InputArray depth, InputArray rgb) CV_OVERRIDE;
+
+    bool updateT(const MatType& depth, const MatType& rgb);
+
+private:
+    Params params;
+
+    cv::Ptr<ICP> icp;
+    cv::Ptr<Volume> volume;
+
+    int frameCounter;
+    Matx44f pose;
+    std::vector<MatType> pyrPoints;
+    std::vector<MatType> pyrNormals;
+    std::vector<MatType> pyrColors;
+};
+
+
+template< typename MatType >
+ColoredKinFuImpl<MatType>::ColoredKinFuImpl(const Params &_params) :
+    params(_params),
+    icp(makeICP(params.intr, params.icpIterations, params.icpAngleThresh, params.icpDistThresh)),
+    pyrPoints(), pyrNormals(), pyrColors()
+{
+    volume = makeVolume(params.volumeType, params.voxelSize, params.volumePose.matrix, params.raycast_step_factor,
+                        params.tsdf_trunc_dist, params.tsdf_max_weight, params.truncateThreshold, params.volumeDims);
+    reset();
+}
+
+template< typename MatType >
+void ColoredKinFuImpl<MatType >::reset()
+{
+    frameCounter = 0;
+    pose = Affine3f::Identity().matrix;
+    volume->reset();
+}
+
+template< typename MatType >
+ColoredKinFuImpl<MatType>::~ColoredKinFuImpl()
+{ }
+
+template< typename MatType >
+const Params& ColoredKinFuImpl<MatType>::getParams() const
+{
+    return params;
+}
+
+template< typename MatType >
+const Affine3f ColoredKinFuImpl<MatType>::getPose() const
+{
+    return pose;
+}
+
+
+template<>
+bool ColoredKinFuImpl<Mat>::update(InputArray _depth, InputArray _rgb)
+{
+    CV_Assert(!_depth.empty() && _depth.size() == params.frameSize);
+
+    Mat depth;
+    Mat rgb;
+    if(_depth.isUMat())
+    {
+        _depth.copyTo(depth);
+        _rgb.copyTo(rgb);
+        return updateT(depth, rgb);
+    }
+    else
+    {
+        return updateT(_depth.getMat(), _rgb.getMat());
+    }
+}
+
+
+template<>
+bool ColoredKinFuImpl<UMat>::update(InputArray _depth, InputArray _rgb)
+{
+    CV_Assert(!_depth.empty() && _depth.size() == params.frameSize);
+
+    UMat depth;
+    UMat rgb;
+    if(!_depth.isUMat())
+    {
+        _depth.copyTo(depth);
+        _rgb.copyTo(rgb);
+        return updateT(depth, rgb);
+    }
+    else
+    {
+        return updateT(_depth.getUMat(), _rgb.getUMat());
+    }
+}
+
+
+template< typename MatType >
+bool ColoredKinFuImpl<MatType>::updateT(const MatType& _depth, const MatType& _rgb)
+{
+    CV_TRACE_FUNCTION();
+
+    MatType depth;
+    MatType rgb;
+
+    if(_depth.type() != DEPTH_TYPE)
+        _depth.convertTo(depth, DEPTH_TYPE);
+    else
+        depth = _depth;
+
+    if (_rgb.type() != COLOR_TYPE)
+    {
+        cv::Mat rgb_tmp, rgbchannel[3], z;
+        std::vector<Mat> channels;
+        _rgb.convertTo(rgb_tmp, COLOR_TYPE);
+        cv::split(rgb_tmp, rgbchannel);
+        z = cv::Mat::zeros(rgbchannel[0].size(), CV_32F);
+        channels.push_back(rgbchannel[0]); channels.push_back(rgbchannel[1]);
+        channels.push_back(rgbchannel[2]); channels.push_back(z);
+        merge(channels, rgb);
+    }
+    else
+        rgb = _rgb;
+
+
+    std::vector<MatType> newPoints, newNormals, newColors;
+    makeColoredFrameFromDepth(depth, rgb,
+                       newPoints, newNormals, newColors,
+                       params.intr, params.rgb_intr,
+                       params.pyramidLevels,
+                       params.depthFactor,
+                       params.bilateral_sigma_depth,
+                       params.bilateral_sigma_spatial,
+                       params.bilateral_kernel_size,
+                       params.truncateThreshold);
+
+    if(frameCounter == 0)
+    {
+        // use depth instead of distance
+        volume->integrate(depth, rgb, params.depthFactor, pose, params.intr, params.rgb_intr);
+        pyrPoints  = newPoints;
+        pyrNormals = newNormals;
+        pyrColors  = newColors;
+    }
+    else
+    {
+        Affine3f affine;
+        bool success = icp->estimateTransform(affine, pyrPoints, pyrNormals, newPoints, newNormals);
+        if(!success)
+            return false;
+
+        pose = (Affine3f(pose) * affine).matrix;
+
+        float rnorm = (float)cv::norm(affine.rvec());
+        float tnorm = (float)cv::norm(affine.translation());
+        // We do not integrate volume if camera does not move
+        if((rnorm + tnorm)/2 >= params.tsdf_min_camera_movement)
+        {
+            // use depth instead of distance
+            volume->integrate(depth, rgb, params.depthFactor, pose, params.intr, params.rgb_intr);
+        }
+        MatType& points  = pyrPoints [0];
+        MatType& normals = pyrNormals[0];
+        MatType& colors  = pyrColors [0];
+        volume->raycast(pose, params.intr, params.frameSize, points, normals, colors);
+        buildPyramidPointsNormals(points, normals, pyrPoints, pyrNormals,
+                                  params.pyramidLevels);
+    }
+
+    frameCounter++;
+    return true;
+}
+
+
+template< typename MatType >
+void ColoredKinFuImpl<MatType>::render(OutputArray image, const Matx44f& _cameraPose) const
+{
+    CV_TRACE_FUNCTION();
+
+    Affine3f cameraPose(_cameraPose);
+    Affine3f _pose(pose);
+
+    const Affine3f id = Affine3f::Identity();
+    if((cameraPose.rotation() == _pose.rotation() && cameraPose.translation() == _pose.translation()) ||
+       (cameraPose.rotation() == id.rotation()   && cameraPose.translation() == id.translation()))
+    {
+        renderPointsNormalsColors(pyrPoints[0], pyrNormals[0], pyrColors[0],image, params.lightPose);
+    }
+    else
+    {
+        MatType points, normals, colors;
+        volume->raycast(_cameraPose, params.intr, params.frameSize, points, normals, colors);
+        renderPointsNormalsColors(points, normals, colors, image, params.lightPose);
+    }
+}
+
+
+template< typename MatType >
+void ColoredKinFuImpl<MatType>::getCloud(OutputArray p, OutputArray n) const
+{
+    volume->fetchPointsNormals(p, n);
+}
+
+
+template< typename MatType >
+void ColoredKinFuImpl<MatType>::getPoints(OutputArray points) const
+{
+    volume->fetchPointsNormals(points, noArray());
+}
+
+
+template< typename MatType >
+void ColoredKinFuImpl<MatType>::getNormals(InputArray points, OutputArray normals) const
+{
+    volume->fetchNormals(points, normals);
+}
+
+// importing class
+
+#ifdef OPENCV_ENABLE_NONFREE
+
+Ptr<ColoredKinFu> ColoredKinFu::create(const Ptr<Params>& params)
+{
+    CV_Assert((int)params->icpIterations.size() == params->pyramidLevels);
+    CV_Assert(params->intr(0,1) == 0 && params->intr(1,0) == 0 && params->intr(2,0) == 0 && params->intr(2,1) == 0 && params->intr(2,2) == 1);
+    return makePtr< ColoredKinFuImpl<Mat> >(*params);
+}
+
+#else
+Ptr<ColoredKinFu> ColoredKinFu::create(const Ptr<Params>& /* params */)
+{
+    CV_Error(Error::StsNotImplemented,
+             "This algorithm is patented and is excluded in this configuration; "
+             "Set OPENCV_ENABLE_NONFREE CMake option and rebuild the library");
+}
+#endif
+
+ColoredKinFu::~ColoredKinFu() {}
+
+} // namespace kinfu
+} // namespace cv
diff --git a/modules/rgbd/src/colored_tsdf.cpp b/modules/rgbd/src/colored_tsdf.cpp
new file mode 100644
index 00000000000..116136a7f21
--- /dev/null
+++ b/modules/rgbd/src/colored_tsdf.cpp
@@ -0,0 +1,1020 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory
+
+#include "precomp.hpp"
+#include "colored_tsdf.hpp"
+#include "tsdf_functions.hpp"
+#include "opencl_kernels_rgbd.hpp"
+
+#define USE_INTERPOLATION_IN_GETNORMAL 1
+
+namespace cv {
+
+namespace kinfu {
+
+ColoredTSDFVolume::ColoredTSDFVolume(float _voxelSize, Matx44f _pose, float _raycastStepFactor, float _truncDist,
+                       int _maxWeight, Point3i _resolution, bool zFirstMemOrder)
+    : Volume(_voxelSize, _pose, _raycastStepFactor),
+      volResolution(_resolution),
+      maxWeight( WeightType(_maxWeight) )
+{
+    CV_Assert(_maxWeight < 255);
+    // Unlike original code, this should work with any volume size
+    // Not only when (x,y,z % 32) == 0
+    volSize   = Point3f(volResolution) * voxelSize;
+    truncDist = std::max(_truncDist, 2.1f * voxelSize);
+
+    // (xRes*yRes*zRes) array
+    // Depending on zFirstMemOrder arg:
+    // &elem(x, y, z) = data + x*zRes*yRes + y*zRes + z;
+    // &elem(x, y, z) = data + x + y*xRes + z*xRes*yRes;
+    int xdim, ydim, zdim;
+    if(zFirstMemOrder)
+    {
+        xdim = volResolution.z * volResolution.y;
+        ydim = volResolution.z;
+        zdim = 1;
+    }
+    else
+    {
+        xdim = 1;
+        ydim = volResolution.x;
+        zdim = volResolution.x * volResolution.y;
+    }
+
+    volDims = Vec4i(xdim, ydim, zdim);
+    neighbourCoords = Vec8i(
+        volDims.dot(Vec4i(0, 0, 0)),
+        volDims.dot(Vec4i(0, 0, 1)),
+        volDims.dot(Vec4i(0, 1, 0)),
+        volDims.dot(Vec4i(0, 1, 1)),
+        volDims.dot(Vec4i(1, 0, 0)),
+        volDims.dot(Vec4i(1, 0, 1)),
+        volDims.dot(Vec4i(1, 1, 0)),
+        volDims.dot(Vec4i(1, 1, 1))
+    );
+}
+
+class ColoredTSDFVolumeCPU : public ColoredTSDFVolume
+{
+public:
+    // dimension in voxels, size in meters
+    ColoredTSDFVolumeCPU(float _voxelSize, cv::Matx44f _pose, float _raycastStepFactor, float _truncDist,
+        int _maxWeight, Vec3i _resolution, bool zFirstMemOrder = true);
+    virtual void integrate(InputArray, float, const Matx44f&, const kinfu::Intr&, const int) override
+        { CV_Error(Error::StsNotImplemented, "Not implemented"); };
+    virtual void integrate(InputArray _depth, InputArray _rgb, float depthFactor, const Matx44f& cameraPose,
+        const kinfu::Intr& depth_intrinsics, const Intr& rgb_intrinsics, const int frameId = 0) override;
+    virtual void raycast(const Matx44f& cameraPose, const kinfu::Intr& depth_intrinsics, const Size& frameSize,
+        OutputArray points, OutputArray normals, OutputArray colors) const override;
+    virtual void raycast(const Matx44f&, const kinfu::Intr&, const Size&, OutputArray, OutputArray) const override
+        { CV_Error(Error::StsNotImplemented, "Not implemented"); };
+
+    virtual void fetchNormals(InputArray points, OutputArray _normals) const override;
+    virtual void fetchPointsNormals(OutputArray points, OutputArray normals) const override;
+
+    virtual void reset() override;
+    virtual RGBTsdfVoxel at(const Vec3i& volumeIdx) const;
+
+    float interpolateVoxel(const cv::Point3f& p) const;
+    Point3f getNormalVoxel(const cv::Point3f& p) const;
+    float interpolateColor(float tx, float ty, float tz, float vx[8]) const;
+    Point3f getColorVoxel(const cv::Point3f& p) const;
+
+#if USE_INTRINSICS
+    float interpolateVoxel(const v_float32x4& p) const;
+    v_float32x4 getNormalVoxel(const v_float32x4& p) const;
+    v_float32x4 getColorVoxel(const v_float32x4& p) const;
+#endif
+
+    Vec4i volStrides;
+    Vec6f frameParams;
+    Mat pixNorms;
+    // See zFirstMemOrder arg of parent class constructor
+    // for the array layout info
+    // Consist of Voxel elements
+    Mat volume;
+};
+
+// dimension in voxels, size in meters
+ColoredTSDFVolumeCPU::ColoredTSDFVolumeCPU(float _voxelSize, cv::Matx44f _pose, float _raycastStepFactor,
+                             float _truncDist, int _maxWeight, Vec3i _resolution,
+                             bool zFirstMemOrder)
+    : ColoredTSDFVolume(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, _resolution,
+                 zFirstMemOrder)
+{
+    int xdim, ydim, zdim;
+    if (zFirstMemOrder)
+    {
+        xdim = volResolution.z * volResolution.y;
+        ydim = volResolution.z;
+        zdim = 1;
+    }
+    else
+    {
+        xdim = 1;
+        ydim = volResolution.x;
+        zdim = volResolution.x * volResolution.y;
+    }
+    volStrides = Vec4i(xdim, ydim, zdim);
+
+    volume = Mat(1, volResolution.x * volResolution.y * volResolution.z, rawType<RGBTsdfVoxel>());
+
+    reset();
+}
+
+// zero volume, leave rest params the same
+void ColoredTSDFVolumeCPU::reset()
+{
+    CV_TRACE_FUNCTION();
+
+    volume.forEach<VecRGBTsdfVoxel>([](VecRGBTsdfVoxel& vv, const int* /* position */)
+    {
+        RGBTsdfVoxel& v = reinterpret_cast<RGBTsdfVoxel&>(vv);
+        v.tsdf = floatToTsdf(0.0f); v.weight = 0;
+    });
+}
+
+RGBTsdfVoxel ColoredTSDFVolumeCPU::at(const Vec3i& volumeIdx) const
+{
+    //! Out of bounds
+    if ((volumeIdx[0] >= volResolution.x || volumeIdx[0] < 0) ||
+        (volumeIdx[1] >= volResolution.y || volumeIdx[1] < 0) ||
+        (volumeIdx[2] >= volResolution.z || volumeIdx[2] < 0))
+    {
+        return RGBTsdfVoxel(floatToTsdf(1.f), 0, 160, 160, 160);
+    }
+
+    const RGBTsdfVoxel* volData = volume.ptr<RGBTsdfVoxel>();
+    int coordBase =
+        volumeIdx[0] * volDims[0] + volumeIdx[1] * volDims[1] + volumeIdx[2] * volDims[2];
+    return volData[coordBase];
+}
+
+// use depth instead of distance (optimization)
+void ColoredTSDFVolumeCPU::integrate(InputArray _depth, InputArray _rgb, float depthFactor, const Matx44f& cameraPose,
+                              const Intr& depth_intrinsics, const Intr& rgb_intrinsics, const int frameId)
+{
+    CV_TRACE_FUNCTION();
+    CV_UNUSED(frameId);
+    CV_Assert(_depth.type() == DEPTH_TYPE);
+    CV_Assert(!_depth.empty());
+    Depth depth = _depth.getMat();
+    Colors rgb = _rgb.getMat();
+    Vec6f newParams((float)depth.rows, (float)depth.cols,
+        depth_intrinsics.fx, depth_intrinsics.fy,
+        depth_intrinsics.cx, depth_intrinsics.cy);
+    if (!(frameParams == newParams))
+    {
+        frameParams = newParams;
+        pixNorms = preCalculationPixNorm(depth, depth_intrinsics);
+    }
+
+    integrateRGBVolumeUnit(truncDist, voxelSize, maxWeight, (this->pose).matrix, volResolution, volStrides, depth, rgb,
+        depthFactor, cameraPose, depth_intrinsics, rgb_intrinsics, pixNorms, volume);
+}
+
+#if USE_INTRINSICS
+// all coordinate checks should be done in inclosing cycle
+inline float ColoredTSDFVolumeCPU::interpolateVoxel(const Point3f& _p) const
+{
+    v_float32x4 p(_p.x, _p.y, _p.z, 0);
+    return interpolateVoxel(p);
+}
+
+inline float ColoredTSDFVolumeCPU::interpolateVoxel(const v_float32x4& p) const
+{
+    // tx, ty, tz = floor(p)
+    v_int32x4 ip = v_floor(p);
+    v_float32x4 t = p - v_cvt_f32(ip);
+    float tx = t.get0();
+    t = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
+    float ty = t.get0();
+    t = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
+    float tz = t.get0();
+
+    int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
+    const RGBTsdfVoxel* volData = volume.ptr<RGBTsdfVoxel>();
+
+    int ix = ip.get0();
+    ip = v_rotate_right<1>(ip);
+    int iy = ip.get0();
+    ip = v_rotate_right<1>(ip);
+    int iz = ip.get0();
+
+    int coordBase = ix * xdim + iy * ydim + iz * zdim;
+
+    TsdfType vx[8];
+    for (int i = 0; i < 8; i++)
+        vx[i] = volData[neighbourCoords[i] + coordBase].tsdf;
+
+    v_float32x4 v0246 = tsdfToFloat_INTR(v_int32x4(vx[0], vx[2], vx[4], vx[6]));
+    v_float32x4 v1357 = tsdfToFloat_INTR(v_int32x4(vx[1], vx[3], vx[5], vx[7]));
+    v_float32x4 vxx = v0246 + v_setall_f32(tz) * (v1357 - v0246);
+
+    v_float32x4 v00_10 = vxx;
+    v_float32x4 v01_11 = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(vxx)));
+
+    v_float32x4 v0_1 = v00_10 + v_setall_f32(ty) * (v01_11 - v00_10);
+    float v0 = v0_1.get0();
+    v0_1 = v_reinterpret_as_f32(v_rotate_right<2>(v_reinterpret_as_u32(v0_1)));
+    float v1 = v0_1.get0();
+
+    return v0 + tx * (v1 - v0);
+}
+#else
+inline float ColoredTSDFVolumeCPU::interpolateVoxel(const Point3f& p) const
+{
+    int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
+
+    int ix = cvFloor(p.x);
+    int iy = cvFloor(p.y);
+    int iz = cvFloor(p.z);
+
+    float tx = p.x - ix;
+    float ty = p.y - iy;
+    float tz = p.z - iz;
+
+    int coordBase = ix*xdim + iy*ydim + iz*zdim;
+    const RGBTsdfVoxel* volData = volume.ptr<RGBTsdfVoxel>();
+
+    float vx[8];
+    for (int i = 0; i < 8; i++)
+        vx[i] = tsdfToFloat(volData[neighbourCoords[i] + coordBase].tsdf);
+
+    float v00 = vx[0] + tz*(vx[1] - vx[0]);
+    float v01 = vx[2] + tz*(vx[3] - vx[2]);
+    float v10 = vx[4] + tz*(vx[5] - vx[4]);
+    float v11 = vx[6] + tz*(vx[7] - vx[6]);
+
+    float v0 = v00 + ty*(v01 - v00);
+    float v1 = v10 + ty*(v11 - v10);
+
+    return v0 + tx*(v1 - v0);
+}
+#endif
+
+
+#if USE_INTRINSICS
+//gradientDeltaFactor is fixed at 1.0 of voxel size
+inline Point3f ColoredTSDFVolumeCPU::getNormalVoxel(const Point3f& _p) const
+{
+    v_float32x4 p(_p.x, _p.y, _p.z, 0.f);
+    v_float32x4 result = getNormalVoxel(p);
+    float CV_DECL_ALIGNED(16) ares[4];
+    v_store_aligned(ares, result);
+    return Point3f(ares[0], ares[1], ares[2]);
+}
+
+inline v_float32x4 ColoredTSDFVolumeCPU::getNormalVoxel(const v_float32x4& p) const
+{
+    if (v_check_any(p < v_float32x4(1.f, 1.f, 1.f, 0.f)) ||
+        v_check_any(p >= v_float32x4((float)(volResolution.x - 2),
+            (float)(volResolution.y - 2),
+            (float)(volResolution.z - 2), 1.f))
+        )
+        return nanv;
+
+    v_int32x4 ip = v_floor(p);
+    v_float32x4 t = p - v_cvt_f32(ip);
+    float tx = t.get0();
+    t = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
+    float ty = t.get0();
+    t = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
+    float tz = t.get0();
+
+    const int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
+    const RGBTsdfVoxel* volData = volume.ptr<RGBTsdfVoxel>();
+
+    int ix = ip.get0(); ip = v_rotate_right<1>(ip);
+    int iy = ip.get0(); ip = v_rotate_right<1>(ip);
+    int iz = ip.get0();
+
+    int coordBase = ix * xdim + iy * ydim + iz * zdim;
+
+    float CV_DECL_ALIGNED(16) an[4];
+    an[0] = an[1] = an[2] = an[3] = 0.f;
+    for (int c = 0; c < 3; c++)
+    {
+        const int dim = volDims[c];
+        float& nv = an[c];
+
+        float vx[8];
+        for (int i = 0; i < 8; i++)
+            vx[i] = tsdfToFloat(volData[neighbourCoords[i] + coordBase + 1 * dim].tsdf) -
+                    tsdfToFloat(volData[neighbourCoords[i] + coordBase - 1 * dim].tsdf);
+
+        v_float32x4 v0246(vx[0], vx[2], vx[4], vx[6]);
+        v_float32x4 v1357(vx[1], vx[3], vx[5], vx[7]);
+        v_float32x4 vxx = v0246 + v_setall_f32(tz) * (v1357 - v0246);
+
+        v_float32x4 v00_10 = vxx;
+        v_float32x4 v01_11 = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(vxx)));
+
+        v_float32x4 v0_1 = v00_10 + v_setall_f32(ty) * (v01_11 - v00_10);
+        float v0 = v0_1.get0();
+        v0_1 = v_reinterpret_as_f32(v_rotate_right<2>(v_reinterpret_as_u32(v0_1)));
+        float v1 = v0_1.get0();
+
+        nv = v0 + tx * (v1 - v0);
+    }
+
+    v_float32x4 n = v_load_aligned(an);
+    v_float32x4 Norm = v_sqrt(v_setall_f32(v_reduce_sum(n * n)));
+
+    return Norm.get0() < 0.0001f ? nanv : n / Norm;
+}
+#else
+inline Point3f ColoredTSDFVolumeCPU::getNormalVoxel(const Point3f& p) const
+{
+    const int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
+    const RGBTsdfVoxel* volData = volume.ptr<RGBTsdfVoxel>();
+
+    if(p.x < 1 || p.x >= volResolution.x - 2 ||
+       p.y < 1 || p.y >= volResolution.y - 2 ||
+       p.z < 1 || p.z >= volResolution.z - 2)
+        return nan3;
+
+    int ix = cvFloor(p.x);
+    int iy = cvFloor(p.y);
+    int iz = cvFloor(p.z);
+
+    float tx = p.x - ix;
+    float ty = p.y - iy;
+    float tz = p.z - iz;
+
+    int coordBase = ix*xdim + iy*ydim + iz*zdim;
+
+    Vec3f an;
+    for(int c = 0; c < 3; c++)
+    {
+        const int dim = volDims[c];
+        float& nv = an[c];
+
+        float vx[8];
+        for(int i = 0; i < 8; i++)
+            vx[i] = tsdfToFloat(volData[neighbourCoords[i] + coordBase + 1 * dim].tsdf) -
+                    tsdfToFloat(volData[neighbourCoords[i] + coordBase - 1 * dim].tsdf);
+
+        float v00 = vx[0] + tz*(vx[1] - vx[0]);
+        float v01 = vx[2] + tz*(vx[3] - vx[2]);
+        float v10 = vx[4] + tz*(vx[5] - vx[4]);
+        float v11 = vx[6] + tz*(vx[7] - vx[6]);
+
+        float v0 = v00 + ty*(v01 - v00);
+        float v1 = v10 + ty*(v11 - v10);
+
+        nv = v0 + tx*(v1 - v0);
+    }
+
+    float nv = sqrt(an[0] * an[0] +
+                    an[1] * an[1] +
+                    an[2] * an[2]);
+    return nv < 0.0001f ? nan3 : an / nv;
+}
+#endif
+
+#if USE_INTRINSICS
+inline float ColoredTSDFVolumeCPU::interpolateColor(float tx, float ty, float tz, float vx[8]) const
+{
+    v_float32x4 v0246, v1357;
+    v_load_deinterleave(vx, v0246, v1357);
+
+    v_float32x4 vxx = v0246 + v_setall_f32(tz) * (v1357 - v0246);
+
+    v_float32x4 v00_10 = vxx;
+    v_float32x4 v01_11 = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(vxx)));
+
+    v_float32x4 v0_1 = v00_10 + v_setall_f32(ty) * (v01_11 - v00_10);
+    float v0 = v0_1.get0();
+    v0_1 = v_reinterpret_as_f32(v_rotate_right<2>(v_reinterpret_as_u32(v0_1)));
+    float v1 = v0_1.get0();
+
+    return v0 + tx * (v1 - v0);
+}
+#else
+inline float ColoredTSDFVolumeCPU::interpolateColor(float tx, float ty, float tz, float vx[8]) const
+{
+    float v00 = vx[0] + tz * (vx[1] - vx[0]);
+    float v01 = vx[2] + tz * (vx[3] - vx[2]);
+    float v10 = vx[4] + tz * (vx[5] - vx[4]);
+    float v11 = vx[6] + tz * (vx[7] - vx[6]);
+
+    float v0 = v00 + ty * (v01 - v00);
+    float v1 = v10 + ty * (v11 - v10);
+
+    return v0 + tx * (v1 - v0);
+}
+#endif
+
+#if USE_INTRINSICS
+//gradientDeltaFactor is fixed at 1.0 of voxel size
+inline Point3f ColoredTSDFVolumeCPU::getColorVoxel(const Point3f& _p) const
+{
+    v_float32x4 p(_p.x, _p.y, _p.z, 0.f);
+    v_float32x4 result = getColorVoxel(p);
+    float CV_DECL_ALIGNED(16) ares[4];
+    v_store_aligned(ares, result);
+    return Point3f(ares[0], ares[1], ares[2]);
+}
+inline v_float32x4 ColoredTSDFVolumeCPU::getColorVoxel(const v_float32x4& p) const
+{
+    if (v_check_any(p < v_float32x4(1.f, 1.f, 1.f, 0.f)) ||
+        v_check_any(p >= v_float32x4((float)(volResolution.x - 2),
+            (float)(volResolution.y - 2),
+            (float)(volResolution.z - 2), 1.f))
+        )
+        return nanv;
+
+    v_int32x4 ip = v_floor(p);
+
+    const int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
+    const RGBTsdfVoxel* volData = volume.ptr<RGBTsdfVoxel>();
+
+    int ix = ip.get0(); ip = v_rotate_right<1>(ip);
+    int iy = ip.get0(); ip = v_rotate_right<1>(ip);
+    int iz = ip.get0();
+
+    int coordBase = ix * xdim + iy * ydim + iz * zdim;
+    float CV_DECL_ALIGNED(16) rgb[4];
+
+#if USE_INTERPOLATION_IN_GETNORMAL
+    float r[8], g[8], b[8];
+    for (int i = 0; i < 8; i++)
+    {
+        r[i] = (float)volData[neighbourCoords[i] + coordBase].r;
+        g[i] = (float)volData[neighbourCoords[i] + coordBase].g;
+        b[i] = (float)volData[neighbourCoords[i] + coordBase].b;
+    }
+
+    v_float32x4 vsi(voxelSizeInv, voxelSizeInv, voxelSizeInv, voxelSizeInv);
+    v_float32x4 ptVox = p * vsi;
+    v_int32x4 iptVox = v_floor(ptVox);
+    v_float32x4 t = ptVox - v_cvt_f32(iptVox);
+    float tx = t.get0(); t = v_rotate_right<1>(t);
+    float ty = t.get0(); t = v_rotate_right<1>(t);
+    float tz = t.get0();
+    rgb[0] = interpolateColor(tx, ty, tz, r);
+    rgb[1] = interpolateColor(tx, ty, tz, g);
+    rgb[2] = interpolateColor(tx, ty, tz, b);
+    rgb[3] = 0.f;
+#else
+    rgb[0] = volData[coordBase].r;
+    rgb[1] = volData[coordBase].g;
+    rgb[2] = volData[coordBase].b;
+    rgb[3] = 0.f;
+#endif
+    v_float32x4 res = v_load_aligned(rgb);
+    return res;
+}
+#else
+inline Point3f ColoredTSDFVolumeCPU::getColorVoxel(const Point3f& p) const
+{
+    const int xdim = volDims[0], ydim = volDims[1], zdim = volDims[2];
+    const RGBTsdfVoxel* volData = volume.ptr<RGBTsdfVoxel>();
+
+
+
+    if(p.x < 1 || p.x >= volResolution.x - 2 ||
+       p.y < 1 || p.y >= volResolution.y - 2 ||
+       p.z < 1 || p.z >= volResolution.z - 2)
+        return nan3;
+
+    int ix = cvFloor(p.x);
+    int iy = cvFloor(p.y);
+    int iz = cvFloor(p.z);
+
+    int coordBase = ix*xdim + iy*ydim + iz*zdim;
+    Point3f res;
+
+#if USE_INTERPOLATION_IN_GETNORMAL
+    // TODO: create better interpolation or remove this simple version
+    float r[8], g[8], b[8];
+    for (int i = 0; i < 8; i++)
+    {
+        r[i] = (float) volData[neighbourCoords[i] + coordBase].r;
+        g[i] = (float) volData[neighbourCoords[i] + coordBase].g;
+        b[i] = (float) volData[neighbourCoords[i] + coordBase].b;
+    }
+
+    Point3f ptVox = p * voxelSizeInv;
+    Vec3i iptVox(cvFloor(ptVox.x), cvFloor(ptVox.y), cvFloor(ptVox.z));
+    float tx = ptVox.x - iptVox[0];
+    float ty = ptVox.y - iptVox[1];
+    float tz = ptVox.z - iptVox[2];
+
+    res=Point3f(interpolateColor(tx, ty, tz, r),
+                interpolateColor(tx, ty, tz, g),
+                interpolateColor(tx, ty, tz, b));
+#else
+    res=Point3f(volData[coordBase].r, volData[coordBase].g, volData[coordBase].b);
+#endif
+    colorFix(res);
+    return res;
+}
+#endif
+
+struct ColorRaycastInvoker : ParallelLoopBody
+{
+    ColorRaycastInvoker(Points& _points, Normals& _normals, Colors& _colors, const Matx44f& cameraPose,
+                  const Intr& depth_intrinsics, const ColoredTSDFVolumeCPU& _volume) :
+        ParallelLoopBody(),
+        points(_points),
+        normals(_normals),
+        colors(_colors),
+        volume(_volume),
+        tstep(volume.truncDist * volume.raycastStepFactor),
+        // We do subtract voxel size to minimize checks after
+        // Note: origin of volume coordinate is placed
+        // in the center of voxel (0,0,0), not in the corner of the voxel!
+        boxMax(volume.volSize - Point3f(volume.voxelSize,
+                                        volume.voxelSize,
+                                        volume.voxelSize)),
+        boxMin(),
+        cam2vol(volume.pose.inv() * Affine3f(cameraPose)),
+        vol2cam(Affine3f(cameraPose.inv()) * volume.pose),
+        reprojDepth(depth_intrinsics.makeReprojector())
+    {  }
+#if USE_INTRINSICS
+    virtual void operator() (const Range& range) const override
+    {
+        const v_float32x4 vfxy(reprojDepth.fxinv, reprojDepth.fyinv, 0, 0);
+        const v_float32x4 vcxy(reprojDepth.cx, reprojDepth.cy, 0, 0);
+
+        const float(&cm)[16] = cam2vol.matrix.val;
+        const v_float32x4 camRot0(cm[0], cm[4], cm[8], 0);
+        const v_float32x4 camRot1(cm[1], cm[5], cm[9], 0);
+        const v_float32x4 camRot2(cm[2], cm[6], cm[10], 0);
+        const v_float32x4 camTrans(cm[3], cm[7], cm[11], 0);
+
+        const v_float32x4 boxDown(boxMin.x, boxMin.y, boxMin.z, 0.f);
+        const v_float32x4 boxUp(boxMax.x, boxMax.y, boxMax.z, 0.f);
+
+        const v_float32x4 invVoxelSize = v_float32x4(volume.voxelSizeInv,
+            volume.voxelSizeInv,
+            volume.voxelSizeInv, 1.f);
+
+        const float(&vm)[16] = vol2cam.matrix.val;
+        const v_float32x4 volRot0(vm[0], vm[4], vm[8], 0);
+        const v_float32x4 volRot1(vm[1], vm[5], vm[9], 0);
+        const v_float32x4 volRot2(vm[2], vm[6], vm[10], 0);
+        const v_float32x4 volTrans(vm[3], vm[7], vm[11], 0);
+
+        for (int y = range.start; y < range.end; y++)
+        {
+            ptype* ptsRow = points[y];
+            ptype* nrmRow = normals[y];
+            ptype* clrRow = colors[y];
+
+            for (int x = 0; x < points.cols; x++)
+            {
+                v_float32x4 point = nanv, normal = nanv, color = nanv;
+
+                v_float32x4 orig = camTrans;
+
+                // get direction through pixel in volume space:
+
+                // 1. reproject (x, y) on projecting plane where z = 1.f
+                v_float32x4 planed = (v_float32x4((float)x, (float)y, 0.f, 0.f) - vcxy) * vfxy;
+                planed = v_combine_low(planed, v_float32x4(1.f, 0.f, 0.f, 0.f));
+
+                // 2. rotate to volume space
+                planed = v_matmuladd(planed, camRot0, camRot1, camRot2, v_setzero_f32());
+
+                // 3. normalize
+                v_float32x4 invNorm = v_invsqrt(v_setall_f32(v_reduce_sum(planed * planed)));
+                v_float32x4 dir = planed * invNorm;
+
+                // compute intersection of ray with all six bbox planes
+                v_float32x4 rayinv = v_setall_f32(1.f) / dir;
+                // div by zero should be eliminated by these products
+                v_float32x4 tbottom = rayinv * (boxDown - orig);
+                v_float32x4 ttop = rayinv * (boxUp - orig);
+
+                // re-order intersections to find smallest and largest on each axis
+                v_float32x4 minAx = v_min(ttop, tbottom);
+                v_float32x4 maxAx = v_max(ttop, tbottom);
+
+                // near clipping plane
+                const float clip = 0.f;
+                float _minAx[4], _maxAx[4];
+                v_store(_minAx, minAx);
+                v_store(_maxAx, maxAx);
+                float tmin = max({ _minAx[0], _minAx[1], _minAx[2], clip });
+                float tmax = min({ _maxAx[0], _maxAx[1], _maxAx[2] });
+
+                // precautions against getting coordinates out of bounds
+                tmin = tmin + tstep;
+                tmax = tmax - tstep;
+
+                if (tmin < tmax)
+                {
+                    // interpolation optimized a little
+                    orig *= invVoxelSize;
+                    dir *= invVoxelSize;
+
+                    int xdim = volume.volDims[0];
+                    int ydim = volume.volDims[1];
+                    int zdim = volume.volDims[2];
+                    v_float32x4 rayStep = dir * v_setall_f32(tstep);
+                    v_float32x4 next = (orig + dir * v_setall_f32(tmin));
+                    float f = volume.interpolateVoxel(next), fnext = f;
+
+                    //raymarch
+                    int steps = 0;
+                    int nSteps = cvFloor((tmax - tmin) / tstep);
+                    for (; steps < nSteps; steps++)
+                    {
+                        next += rayStep;
+                        v_int32x4 ip = v_round(next);
+                        int ix = ip.get0(); ip = v_rotate_right<1>(ip);
+                        int iy = ip.get0(); ip = v_rotate_right<1>(ip);
+                        int iz = ip.get0();
+                        int coord = ix * xdim + iy * ydim + iz * zdim;
+
+                        fnext = tsdfToFloat(volume.volume.at<RGBTsdfVoxel>(coord).tsdf);
+                        if (fnext != f)
+                        {
+                            fnext = volume.interpolateVoxel(next);
+
+                            // when ray crosses a surface
+                            if (std::signbit(f) != std::signbit(fnext))
+                                break;
+
+                            f = fnext;
+                        }
+                    }
+
+                    // if ray penetrates a surface from outside
+                    // linearly interpolate t between two f values
+                    if (f > 0.f && fnext < 0.f)
+                    {
+                        v_float32x4 tp = next - rayStep;
+                        float ft = volume.interpolateVoxel(tp);
+                        float ftdt = volume.interpolateVoxel(next);
+                        float ts = tmin + tstep * (steps - ft / (ftdt - ft));
+
+                        // avoid division by zero
+                        if (!cvIsNaN(ts) && !cvIsInf(ts))
+                        {
+                            v_float32x4 pv = (orig + dir * v_setall_f32(ts));
+                            v_float32x4 nv = volume.getNormalVoxel(pv);
+                            v_float32x4 cv = volume.getColorVoxel(pv);
+
+                            if (!isNaN(nv))
+                            {
+                                color = cv;
+                                //convert pv and nv to camera space
+                                normal = v_matmuladd(nv, volRot0, volRot1, volRot2, v_setzero_f32());
+                                // interpolation optimized a little
+                                point = v_matmuladd(pv * v_float32x4(volume.voxelSize,
+                                    volume.voxelSize,
+                                    volume.voxelSize, 1.f),
+                                    volRot0, volRot1, volRot2, volTrans);
+                            }
+                        }
+                    }
+                }
+
+                v_store((float*)(&ptsRow[x]), point);
+                v_store((float*)(&nrmRow[x]), normal);
+                v_store((float*)(&clrRow[x]), color);
+            }
+        }
+    }
+#else
+    virtual void operator() (const Range& range) const override
+    {
+        const Point3f camTrans = cam2vol.translation();
+        const Matx33f  camRot  = cam2vol.rotation();
+        const Matx33f  volRot  = vol2cam.rotation();
+
+        for(int y = range.start; y < range.end; y++)
+        {
+            ptype* ptsRow = points[y];
+            ptype* nrmRow = normals[y];
+            ptype* clrRow = colors[y];
+
+            for(int x = 0; x < points.cols; x++)
+            {
+                Point3f point = nan3, normal = nan3, color = nan3;
+
+                Point3f orig = camTrans;
+                // direction through pixel in volume space
+                Point3f dir = normalize(Vec3f(camRot * reprojDepth(Point3f(float(x), float(y), 1.f))));
+
+                // compute intersection of ray with all six bbox planes
+                Vec3f rayinv(1.f/dir.x, 1.f/dir.y, 1.f/dir.z);
+                Point3f tbottom = rayinv.mul(boxMin - orig);
+                Point3f ttop    = rayinv.mul(boxMax - orig);
+
+                // re-order intersections to find smallest and largest on each axis
+                Point3f minAx(min(ttop.x, tbottom.x), min(ttop.y, tbottom.y), min(ttop.z, tbottom.z));
+                Point3f maxAx(max(ttop.x, tbottom.x), max(ttop.y, tbottom.y), max(ttop.z, tbottom.z));
+
+                // near clipping plane
+                const float clip = 0.f;
+                //float tmin = max(max(max(minAx.x, minAx.y), max(minAx.x, minAx.z)), clip);
+                //float tmax =     min(min(maxAx.x, maxAx.y), min(maxAx.x, maxAx.z));
+                float tmin = max({ minAx.x, minAx.y, minAx.z, clip });
+                float tmax = min({ maxAx.x, maxAx.y, maxAx.z });
+
+                // precautions against getting coordinates out of bounds
+                tmin = tmin + tstep;
+                tmax = tmax - tstep;
+
+                if(tmin < tmax)
+                {
+                    // interpolation optimized a little
+                    orig = orig*volume.voxelSizeInv;
+                    dir  =  dir*volume.voxelSizeInv;
+
+                    Point3f rayStep = dir * tstep;
+                    Point3f next = (orig + dir * tmin);
+                    float f = volume.interpolateVoxel(next), fnext = f;
+
+                    //raymarch
+                    int steps = 0;
+                    int nSteps = int(floor((tmax - tmin)/tstep));
+                    for(; steps < nSteps; steps++)
+                    {
+                        next += rayStep;
+                        int xdim = volume.volDims[0];
+                        int ydim = volume.volDims[1];
+                        int zdim = volume.volDims[2];
+                        int ix = cvRound(next.x);
+                        int iy = cvRound(next.y);
+                        int iz = cvRound(next.z);
+                        fnext = tsdfToFloat(volume.volume.at<RGBTsdfVoxel>(ix*xdim + iy*ydim + iz*zdim).tsdf);
+                        if(fnext != f)
+                        {
+                            fnext = volume.interpolateVoxel(next);
+                            // when ray crosses a surface
+                            if(std::signbit(f) != std::signbit(fnext))
+                                break;
+
+                            f = fnext;
+                        }
+                    }
+                    // if ray penetrates a surface from outside
+                    // linearly interpolate t between two f values
+                    if(f > 0.f && fnext < 0.f)
+                    {
+                        Point3f tp    = next - rayStep;
+                        float ft   = volume.interpolateVoxel(tp);
+                        float ftdt = volume.interpolateVoxel(next);
+                        // float t = tmin + steps*tstep;
+                        // float ts = t - tstep*ft/(ftdt - ft);
+                        float ts = tmin + tstep*(steps - ft/(ftdt - ft));
+
+                        // avoid division by zero
+                        if(!cvIsNaN(ts) && !cvIsInf(ts))
+                        {
+                            Point3f pv = (orig + dir*ts);
+                            Point3f nv = volume.getNormalVoxel(pv);
+                            Point3f cv = volume.getColorVoxel(pv);
+                            if(!isNaN(nv))
+                            {
+                                //convert pv and nv to camera space
+                                normal = volRot * nv;
+                                color = cv;
+                                // interpolation optimized a little
+                                point = vol2cam * (pv*volume.voxelSize);
+                            }
+                        }
+                    }
+                }
+                ptsRow[x] = toPtype(point);
+                nrmRow[x] = toPtype(normal);
+                clrRow[x] = toPtype(color);
+            }
+        }
+    }
+#endif
+
+    Points& points;
+    Normals& normals;
+    Colors& colors;
+    const ColoredTSDFVolumeCPU& volume;
+
+    const float tstep;
+
+    const Point3f boxMax;
+    const Point3f boxMin;
+
+    const Affine3f cam2vol;
+    const Affine3f vol2cam;
+    const Intr::Reprojector reprojDepth;
+};
+
+
+void ColoredTSDFVolumeCPU::raycast(const Matx44f& cameraPose, const Intr& depth_intrinsics, const Size& frameSize,
+                            OutputArray _points, OutputArray _normals, OutputArray _colors) const
+{
+    CV_TRACE_FUNCTION();
+
+    CV_Assert(frameSize.area() > 0);
+
+    _points.create (frameSize, POINT_TYPE);
+    _normals.create(frameSize, POINT_TYPE);
+    _colors.create(frameSize, POINT_TYPE);
+
+    Points points   =  _points.getMat();
+    Normals normals = _normals.getMat();
+    Colors colors = _colors.getMat();
+    ColorRaycastInvoker ri(points, normals, colors, cameraPose, depth_intrinsics, *this);
+
+    const int nstripes = -1;
+    parallel_for_(Range(0, points.rows), ri, nstripes);
+}
+
+
+struct ColorFetchPointsNormalsInvoker : ParallelLoopBody
+{
+    ColorFetchPointsNormalsInvoker(const ColoredTSDFVolumeCPU& _volume,
+                              std::vector<std::vector<ptype>>& _pVecs,
+                              std::vector<std::vector<ptype>>& _nVecs,
+                              bool _needNormals) :
+        ParallelLoopBody(),
+        vol(_volume),
+        pVecs(_pVecs),
+        nVecs(_nVecs),
+        needNormals(_needNormals)
+    {
+        volDataStart = vol.volume.ptr<RGBTsdfVoxel>();
+    }
+
+    inline void coord(std::vector<ptype>& points, std::vector<ptype>& normals,
+                      int x, int y, int z, Point3f V, float v0, int axis) const
+    {
+        // 0 for x, 1 for y, 2 for z
+        bool limits = false;
+        Point3i shift;
+        float Vc = 0.f;
+        if(axis == 0)
+        {
+            shift  = Point3i(1, 0, 0);
+            limits = (x + 1 < vol.volResolution.x);
+            Vc     = V.x;
+        }
+        if(axis == 1)
+        {
+            shift  = Point3i(0, 1, 0);
+            limits = (y + 1 < vol.volResolution.y);
+            Vc     = V.y;
+        }
+        if(axis == 2)
+        {
+            shift  = Point3i(0, 0, 1);
+            limits = (z + 1 < vol.volResolution.z);
+            Vc     = V.z;
+        }
+
+        if(limits)
+        {
+            const RGBTsdfVoxel& voxeld = volDataStart[(x+shift.x)*vol.volDims[0] +
+                                                   (y+shift.y)*vol.volDims[1] +
+                                                   (z+shift.z)*vol.volDims[2]];
+            float vd = tsdfToFloat(voxeld.tsdf);
+
+            if(voxeld.weight != 0 && vd != 1.f)
+            {
+                if((v0 > 0 && vd < 0) || (v0 < 0 && vd > 0))
+                {
+                    //linearly interpolate coordinate
+                    float Vn    = Vc + vol.voxelSize;
+                    float dinv  = 1.f/(abs(v0)+abs(vd));
+                    float inter = (Vc*abs(vd) + Vn*abs(v0))*dinv;
+
+                    Point3f p(shift.x ? inter : V.x,
+                              shift.y ? inter : V.y,
+                              shift.z ? inter : V.z);
+                    {
+                        points.push_back(toPtype(vol.pose * p));
+                        if(needNormals)
+                            normals.push_back(toPtype(vol.pose.rotation() *
+                                                      vol.getNormalVoxel(p*vol.voxelSizeInv)));
+                    }
+                }
+            }
+        }
+    }
+
+    virtual void operator() (const Range& range) const override
+    {
+        std::vector<ptype> points, normals;
+        for(int x = range.start; x < range.end; x++)
+        {
+            const RGBTsdfVoxel* volDataX = volDataStart + x*vol.volDims[0];
+            for(int y = 0; y < vol.volResolution.y; y++)
+            {
+                const RGBTsdfVoxel* volDataY = volDataX + y*vol.volDims[1];
+                for(int z = 0; z < vol.volResolution.z; z++)
+                {
+                    const RGBTsdfVoxel& voxel0 = volDataY[z*vol.volDims[2]];
+                    float v0             = tsdfToFloat(voxel0.tsdf);
+                    if(voxel0.weight != 0 && v0 != 1.f)
+                    {
+                        Point3f V(Point3f((float)x + 0.5f, (float)y + 0.5f, (float)z + 0.5f)*vol.voxelSize);
+
+                        coord(points, normals, x, y, z, V, v0, 0);
+                        coord(points, normals, x, y, z, V, v0, 1);
+                        coord(points, normals, x, y, z, V, v0, 2);
+
+                    } // if voxel is not empty
+                }
+            }
+        }
+
+        AutoLock al(mutex);
+        pVecs.push_back(points);
+        nVecs.push_back(normals);
+    }
+
+    const ColoredTSDFVolumeCPU& vol;
+    std::vector<std::vector<ptype>>& pVecs;
+    std::vector<std::vector<ptype>>& nVecs;
+    const RGBTsdfVoxel* volDataStart;
+    bool needNormals;
+    mutable Mutex mutex;
+};
+
+void ColoredTSDFVolumeCPU::fetchPointsNormals(OutputArray _points, OutputArray _normals) const
+{
+    CV_TRACE_FUNCTION();
+
+    if(_points.needed())
+    {
+        std::vector<std::vector<ptype>> pVecs, nVecs;
+        ColorFetchPointsNormalsInvoker fi(*this, pVecs, nVecs, _normals.needed());
+        Range range(0, volResolution.x);
+        const int nstripes = -1;
+        parallel_for_(range, fi, nstripes);
+        std::vector<ptype> points, normals;
+        for(size_t i = 0; i < pVecs.size(); i++)
+        {
+            points.insert(points.end(), pVecs[i].begin(), pVecs[i].end());
+            normals.insert(normals.end(), nVecs[i].begin(), nVecs[i].end());
+        }
+
+        _points.create((int)points.size(), 1, POINT_TYPE);
+        if(!points.empty())
+            Mat((int)points.size(), 1, POINT_TYPE, &points[0]).copyTo(_points.getMat());
+
+        if(_normals.needed())
+        {
+            _normals.create((int)normals.size(), 1, POINT_TYPE);
+            if(!normals.empty())
+                Mat((int)normals.size(), 1, POINT_TYPE, &normals[0]).copyTo(_normals.getMat());
+        }
+    }
+}
+
+void ColoredTSDFVolumeCPU::fetchNormals(InputArray _points, OutputArray _normals) const
+{
+    CV_TRACE_FUNCTION();
+    CV_Assert(!_points.empty());
+    if(_normals.needed())
+    {
+        Points points = _points.getMat();
+        CV_Assert(points.type() == POINT_TYPE);
+
+        _normals.createSameSize(_points, _points.type());
+        Normals normals = _normals.getMat();
+
+        const ColoredTSDFVolumeCPU& _vol = *this;
+        auto PushNormals = [&](const ptype& pp, const int* position)
+        {
+            const ColoredTSDFVolumeCPU& vol(_vol);
+            Affine3f invPose(vol.pose.inv());
+            Point3f p = fromPtype(pp);
+            Point3f n = nan3;
+            if (!isNaN(p))
+            {
+                Point3f voxPt = (invPose * p);
+                voxPt = voxPt * vol.voxelSizeInv;
+                n = vol.pose.rotation() * vol.getNormalVoxel(voxPt);
+            }
+            normals(position[0], position[1]) = toPtype(n);
+        };
+        points.forEach(PushNormals);
+    }
+}
+
+Ptr<ColoredTSDFVolume> makeColoredTSDFVolume(float _voxelSize, Matx44f _pose, float _raycastStepFactor,
+                                   float _truncDist, int _maxWeight, Point3i _resolution)
+{
+    return makePtr<ColoredTSDFVolumeCPU>(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, _resolution);
+}
+
+Ptr<ColoredTSDFVolume> makeColoredTSDFVolume(const VolumeParams& _params)
+{
+    return makePtr<ColoredTSDFVolumeCPU>(_params.voxelSize, _params.pose.matrix, _params.raycastStepFactor,
+                                  _params.tsdfTruncDist, _params.maxWeight, _params.resolution);
+}
+
+} // namespace kinfu
+} // namespace cv
diff --git a/modules/rgbd/src/colored_tsdf.hpp b/modules/rgbd/src/colored_tsdf.hpp
new file mode 100644
index 00000000000..d6b09b0156c
--- /dev/null
+++ b/modules/rgbd/src/colored_tsdf.hpp
@@ -0,0 +1,61 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this
+// module's directory
+
+#ifndef __OPENCV_KINFU_COLORED_TSDF_H__
+#define __OPENCV_KINFU_COLORED_TSDF_H__
+
+#include <opencv2/rgbd/volume.hpp>
+
+#include "kinfu_frame.hpp"
+#include "utils.hpp"
+
+namespace cv
+{
+namespace kinfu
+{
+
+typedef int8_t TsdfType;
+typedef uchar WeightType;
+typedef short int ColorType;
+struct RGBTsdfVoxel
+{
+    RGBTsdfVoxel(TsdfType _tsdf, WeightType _weight, ColorType _r, ColorType _g, ColorType _b) :
+        tsdf(_tsdf), weight(_weight), r(_r), g(_g), b(_b)
+    { }
+    TsdfType tsdf;
+    WeightType weight;
+    ColorType r, g, b;
+};
+
+typedef Vec<uchar, sizeof(RGBTsdfVoxel)> VecRGBTsdfVoxel;
+
+class ColoredTSDFVolume : public Volume
+{
+   public:
+    // dimension in voxels, size in meters
+    ColoredTSDFVolume(float _voxelSize, Matx44f _pose, float _raycastStepFactor, float _truncDist,
+               int _maxWeight, Point3i _resolution, bool zFirstMemOrder = true);
+    virtual ~ColoredTSDFVolume() = default;
+
+   public:
+
+    Point3i volResolution;
+    WeightType maxWeight;
+
+    Point3f volSize;
+    float truncDist;
+    Vec4i volDims;
+    Vec8i neighbourCoords;
+};
+
+Ptr<ColoredTSDFVolume> makeColoredTSDFVolume(float _voxelSize, Matx44f _pose, float _raycastStepFactor,
+                               float _truncDist, int _maxWeight, Point3i _resolution);
+Ptr<ColoredTSDFVolume> makeColoredTSDFVolume(const VolumeParams& _params);
+
+}  // namespace kinfu
+}  // namespace cv
+#endif
diff --git a/modules/rgbd/src/hash_tsdf.cpp b/modules/rgbd/src/hash_tsdf.cpp
index 08338fe83ef..4e492064a44 100644
--- a/modules/rgbd/src/hash_tsdf.cpp
+++ b/modules/rgbd/src/hash_tsdf.cpp
@@ -99,11 +99,14 @@ class HashTSDFVolumeCPU : public HashTSDFVolume
 
     HashTSDFVolumeCPU(const VolumeParams& _volumeParams, bool zFirstMemOrder = true);
 
+    virtual void integrate(InputArray, InputArray, float, const Matx44f&, const kinfu::Intr&, const Intr&, const int) override
+        { CV_Error(Error::StsNotImplemented, "Not implemented"); };
     void integrate(InputArray _depth, float depthFactor, const Matx44f& cameraPose, const kinfu::Intr& intrinsics,
         const int frameId = 0) override;
     void raycast(const Matx44f& cameraPose, const kinfu::Intr& intrinsics, const Size& frameSize, OutputArray points,
         OutputArray normals) const override;
-
+    void raycast(const Matx44f&, const kinfu::Intr&, const Size&, OutputArray, OutputArray, OutputArray) const override
+        { CV_Error(Error::StsNotImplemented, "Not implemented"); };
     void fetchNormals(InputArray points, OutputArray _normals) const override;
     void fetchPointsNormals(OutputArray points, OutputArray normals) const override;
 
@@ -894,11 +897,14 @@ class HashTSDFVolumeGPU : public HashTSDFVolume
 
     void markActive(const Matx44f& cameraPose, const Intr& intrinsics, const Size frameSz, const int frameId);
 
+    virtual void integrate(InputArray, InputArray, float, const Matx44f&, const kinfu::Intr&, const Intr&, const int) override
+        { CV_Error(Error::StsNotImplemented, "Not implemented"); };
     void integrate(InputArray _depth, float depthFactor, const Matx44f& cameraPose, const kinfu::Intr& intrinsics,
         const int frameId = 0) override;
     void raycast(const Matx44f& cameraPose, const kinfu::Intr& intrinsics, const Size& frameSize, OutputArray points,
         OutputArray normals) const override;
-
+    void raycast(const Matx44f&, const kinfu::Intr&, const Size&, OutputArray, OutputArray, OutputArray) const override
+        { CV_Error(Error::StsNotImplemented, "Not implemented"); };
 
     void fetchNormals(InputArray points, OutputArray _normals) const override;
     void fetchPointsNormals(OutputArray points, OutputArray normals) const override;
diff --git a/modules/rgbd/src/kinfu.cpp b/modules/rgbd/src/kinfu.cpp
index 1d8314aae6f..7f9f1b32361 100644
--- a/modules/rgbd/src/kinfu.cpp
+++ b/modules/rgbd/src/kinfu.cpp
@@ -107,6 +107,18 @@ Ptr<Params> Params::hashTSDFParams(bool isCoarse)
     return p;
 }
 
+Ptr<Params> Params::coloredTSDFParams(bool isCoarse)
+{
+    Ptr<Params> p;
+    if (isCoarse)
+        p = coarseParams();
+    else
+        p = defaultParams();
+    p->volumeType = VolumeType::COLOREDTSDF;
+
+    return p;
+}
+
 // MatType should be Mat or UMat
 template< typename MatType>
 class KinFuImpl : public KinFu
diff --git a/modules/rgbd/src/kinfu_frame.cpp b/modules/rgbd/src/kinfu_frame.cpp
index 76d80902b7b..b8df7c1f5a7 100644
--- a/modules/rgbd/src/kinfu_frame.cpp
+++ b/modules/rgbd/src/kinfu_frame.cpp
@@ -12,6 +12,7 @@ namespace cv {
 namespace kinfu {
 
 static void computePointsNormals(const cv::kinfu::Intr, float depthFactor, const Depth, Points, Normals );
+void computePointsNormalsColors(const Intr, const Intr, float, const Depth, const Colors, Points, Normals, Colors);
 static Depth pyrDownBilateral(const Depth depth, float sigma);
 static void pyrDownPointsNormals(const Points p, const Normals n, Points& pdown, Normals& ndown);
 
@@ -42,6 +43,13 @@ inline float specPow<1>(float x)
     return x;
 }
 
+inline void colorFix(Point3f& c)
+{
+    if (c.x > 255) c.x = 255;
+    if (c.y > 255) c.y = 255;
+    if (c.z > 255) c.z = 255;
+}
+
 struct RenderInvoker : ParallelLoopBody
 {
     RenderInvoker(const Points& _points, const Normals& _normals, Mat_<Vec4b>& _img, Affine3f _lightPose, Size _sz) :
@@ -105,6 +113,52 @@ struct RenderInvoker : ParallelLoopBody
     Size sz;
 };
 
+struct RenderColorInvoker : ParallelLoopBody
+{
+    RenderColorInvoker(const Points& _points, const Colors& _colors, Mat_<Vec4b>& _img, Affine3f _lightPose, Size _sz) :
+        ParallelLoopBody(),
+        points(_points),
+        colors(_colors),
+        img(_img),
+        lightPose(_lightPose),
+        sz(_sz)
+    { }
+
+    virtual void operator ()(const Range& range) const override
+    {
+        for(int y = range.start; y < range.end; y++)
+        {
+            Vec4b* imgRow = img[y];
+            const ptype* ptsRow = points[y];
+            const ptype* clrRow = colors[y];
+
+            for(int x = 0; x < sz.width; x++)
+            {
+                Point3f p = fromPtype(ptsRow[x]);
+                Point3f c = fromPtype(clrRow[x]);
+                Vec4b color;
+
+                if(isNaN(p) || isNaN(c))
+                {
+                    color = Vec4b(0, 32, 0, 0);
+                }
+                else
+                {
+                    color = Vec4b((uchar)c.x, (uchar)c.y, (uchar)c.z, (uchar)0);
+                }
+
+                imgRow[x] = color;
+            }
+        }
+    }
+
+    const Points& points;
+    const Colors& colors;
+    Mat_<Vec4b>& img;
+    Affine3f lightPose;
+    Size sz;
+};
+
 
 void pyrDownPointsNormals(const Points p, const Normals n, Points &pdown, Normals &ndown)
 {
@@ -275,6 +329,81 @@ struct ComputePointsNormalsInvoker : ParallelLoopBody
     float dfac;
 };
 
+struct ComputePointsNormalsColorsInvoker : ParallelLoopBody
+{
+    ComputePointsNormalsColorsInvoker(const Depth& _depth, const Colors& _rgb, Points& _points, Normals& _normals, Colors& _colors,
+                                const Intr::Reprojector& _reproj, const Intr::Projector& _rgb_reproj, float _dfac) :
+        ParallelLoopBody(),
+        depth(_depth),
+        rgb(_rgb),
+        points(_points),
+        normals(_normals),
+        colors(_colors),
+        reproj(_reproj),
+        rgb_proj(_rgb_reproj),
+        dfac(_dfac)
+    { }
+
+    virtual void operator ()(const Range& range) const override
+    {
+        for(int y = range.start; y < range.end; y++)
+        {
+            const depthType* depthRow0 = depth[y];
+            const depthType* depthRow1 = (y < depth.rows - 1) ? depth[y + 1] : 0;
+            ptype    *ptsRow = points[y];
+            ptype   *normRow = normals[y];
+            ptype    *clrRow = colors[y];
+
+            for(int x = 0; x < depth.cols; x++)
+            {
+                depthType d00 = depthRow0[x];
+                depthType z00 = d00*dfac;
+                Point3f v00 = reproj(Point3f((float)x, (float)y, z00));
+                Point2f proj = rgb_proj(v00);
+                int rgb_u = (int) proj.x, rgb_v = (int) proj.y;
+
+                Point3f p = nan3, n = nan3, c = nan3;
+                if(x < depth.cols - 1 && y < depth.rows - 1 &&
+                   rgb_v >= 0 && rgb_v < rgb.rows && rgb_u >= 0 && rgb_u < rgb.cols)
+                {
+                    depthType d01 = depthRow0[x+1];
+                    depthType d10 = depthRow1[x];
+
+                    depthType z01 = d01*dfac;
+                    depthType z10 = d10*dfac;
+
+                    // before it was
+                    //if(z00*z01*z10 != 0)
+                    if(z00 != 0 && z01 != 0 && z10 != 0)
+                    {
+                        Point3f v01 = reproj(Point3f((float)(x+1), (float)(y+0), z01));
+                        Point3f v10 = reproj(Point3f((float)(x+0), (float)(y+1), z10));
+
+                        cv::Vec3f vec = (v01-v00).cross(v10-v00);
+                        n = -normalize(vec);
+                        p = v00;
+                        c = fromPtype(rgb.at<Vec4f>(rgb_v, rgb_u));
+                        colorFix(c);
+                    }
+                }
+
+                ptsRow[x] = toPtype(p);
+                normRow[x] = toPtype(n);
+                clrRow[x] = toPtype(c);
+            }
+        }
+    }
+
+    const Depth& depth;
+    const Colors& rgb;
+    Points& points;
+    Normals& normals;
+    Colors& colors;
+    const Intr::Reprojector& reproj;
+    const Intr::Projector& rgb_proj;
+    float dfac;
+};
+
 void computePointsNormals(const Intr intr, float depthFactor, const Depth depth,
                           Points points, Normals normals)
 {
@@ -297,6 +426,30 @@ void computePointsNormals(const Intr intr, float depthFactor, const Depth depth,
     parallel_for_(range, ci, nstripes);
 }
 
+void computePointsNormalsColors(const Intr intr, const Intr rgb_intr, float depthFactor,
+    const Depth depth, const Colors rgb, Points points, Normals normals, Colors colors)
+{
+    CV_TRACE_FUNCTION();
+
+    CV_Assert(!points.empty() && !normals.empty());
+    CV_Assert(depth.size() == points.size());
+    CV_Assert(depth.size() == normals.size());
+    CV_Assert(depth.size() == colors.size());
+
+    // conversion to meters
+    // before it was:
+    //float dfac = 0.001f/depthFactor;
+    float dfac = 1.f/depthFactor;
+
+    Intr::Reprojector reproj = intr.makeReprojector();
+    Intr::Projector projRGB = rgb_intr.makeProjector();
+
+    ComputePointsNormalsColorsInvoker ci(depth, rgb, points, normals, colors, reproj, projRGB, dfac);
+    Range range(0, depth.rows);
+    const int nstripes = -1;
+    parallel_for_(range, ci, nstripes);
+}
+
 ///////// GPU implementation /////////
 
 #ifdef HAVE_OPENCL
@@ -598,6 +751,33 @@ void renderPointsNormals(InputArray _points, InputArray _normals, OutputArray im
     parallel_for_(range, ri, nstripes);
 }
 
+void renderPointsNormalsColors(InputArray _points, InputArray _normals, InputArray _colors, OutputArray image, Affine3f lightPose)
+{
+    CV_TRACE_FUNCTION();
+
+    CV_Assert(_points.size().area() > 0);
+    CV_Assert(_points.size() == _normals.size());
+
+    Size sz = _points.size();
+    image.create(sz, CV_8UC4);
+
+    CV_OCL_RUN(_points.isUMat() && _normals.isUMat() && image.isUMat(),
+               ocl_renderPointsNormals(_points.getUMat(),
+                                       _normals.getUMat(),
+                                       image.getUMat(), lightPose))
+
+    Points  points  = _points.getMat();
+    Normals normals = _normals.getMat();
+    Colors colors = _colors.getMat();
+
+    Mat_<Vec4b> img = image.getMat();
+
+    RenderColorInvoker ri(points, colors, img, lightPose, sz);
+    Range range(0, sz.height);
+    const int nstripes = -1;
+    parallel_for_(range, ri, nstripes);
+}
+
 
 void makeFrameFromDepth(InputArray _depth,
                         OutputArray pyrPoints, OutputArray pyrNormals,
@@ -659,6 +839,65 @@ void makeFrameFromDepth(InputArray _depth,
     }
 }
 
+void makeColoredFrameFromDepth(InputArray _depth, InputArray _rgb,
+                        OutputArray pyrPoints, OutputArray pyrNormals, OutputArray pyrColors,
+                        const Intr intr, const Intr rgb_intr, int levels, float depthFactor,
+                        float sigmaDepth, float sigmaSpatial, int kernelSize,
+                        float truncateThreshold)
+{
+    CV_TRACE_FUNCTION();
+
+    CV_Assert(_depth.type() == DEPTH_TYPE);
+
+
+    int kp = pyrPoints.kind(), kn = pyrNormals.kind(), kc = pyrColors.kind();
+    CV_Assert(kp == _InputArray::STD_ARRAY_MAT || kp == _InputArray::STD_VECTOR_MAT);
+    CV_Assert(kn == _InputArray::STD_ARRAY_MAT || kn == _InputArray::STD_VECTOR_MAT);
+    CV_Assert(kc == _InputArray::STD_ARRAY_MAT || kc == _InputArray::STD_VECTOR_MAT);
+
+    Depth depth = _depth.getMat();
+    Colors rgb  = _rgb.getMat();
+    // looks like OpenCV's bilateral filter works the same as KinFu's
+    Depth smooth;
+    Depth depthNoNans = depth.clone();
+    patchNaNs(depthNoNans);
+    bilateralFilter(depthNoNans, smooth, kernelSize, sigmaDepth*depthFactor, sigmaSpatial);
+
+    // depth truncation can be used in some scenes
+    Depth depthThreshold;
+    if(truncateThreshold > 0.f)
+        threshold(smooth, depthThreshold, truncateThreshold * depthFactor, 0.0, THRESH_TOZERO_INV);
+    else
+        depthThreshold = smooth;
+
+    // we don't need depth pyramid outside this method
+    // if we do, the code is to be refactored
+
+    Depth scaled = depthThreshold;
+    Size sz = smooth.size();
+    pyrPoints.create(levels, 1, POINT_TYPE);
+    pyrNormals.create(levels, 1, POINT_TYPE);
+    pyrColors.create(levels, 1, POINT_TYPE);
+    for(int i = 0; i < levels; i++)
+    {
+        pyrPoints .create(sz, POINT_TYPE, i);
+        pyrNormals.create(sz, POINT_TYPE, i);
+        pyrColors.create(sz, POINT_TYPE, i);
+
+        Points  p = pyrPoints. getMatRef(i);
+        Normals n = pyrNormals.getMatRef(i);
+        Colors  c = pyrColors. getMatRef(i);
+
+        computePointsNormalsColors(intr.scale(i), rgb_intr.scale(i), depthFactor, scaled, rgb, p, n, c);
+
+        if(i < levels - 1)
+        {
+            sz.width /= 2; sz.height /= 2;
+            scaled = pyrDownBilateral(scaled, sigmaDepth*depthFactor);
+        }
+    }
+}
+
 
 void buildPyramidPointsNormals(InputArray _points, InputArray _normals,
                                OutputArrayOfArrays pyrPoints, OutputArrayOfArrays pyrNormals,
diff --git a/modules/rgbd/src/kinfu_frame.hpp b/modules/rgbd/src/kinfu_frame.hpp
index 16327d36aa5..199cfcf2c03 100644
--- a/modules/rgbd/src/kinfu_frame.hpp
+++ b/modules/rgbd/src/kinfu_frame.hpp
@@ -73,19 +73,27 @@ inline ptype toPtype(const cv::Vec3f& x)
 enum
 {
     DEPTH_TYPE = DataType<depthType>::type,
-    POINT_TYPE = DataType<ptype    >::type
+    POINT_TYPE = DataType<ptype    >::type,
+    COLOR_TYPE = DataType<ptype    >::type
 };
 
 typedef cv::Mat_< ptype > Points;
 typedef Points Normals;
+typedef Points Colors;
 
 typedef cv::Mat_< depthType > Depth;
 
 void renderPointsNormals(InputArray _points, InputArray _normals, OutputArray image, cv::Affine3f lightPose);
+void renderPointsNormalsColors(InputArray _points, InputArray _normals, InputArray _colors, OutputArray image, Affine3f lightPose);
 void makeFrameFromDepth(InputArray depth, OutputArray pyrPoints, OutputArray pyrNormals,
                         const Intr intr, int levels, float depthFactor,
                         float sigmaDepth, float sigmaSpatial, int kernelSize,
                         float truncateThreshold);
+void makeColoredFrameFromDepth(InputArray _depth, InputArray _rgb,
+                        OutputArray pyrPoints, OutputArray pyrNormals, OutputArray pyrColors,
+                        const Intr intr, const Intr rgb_intr, int levels, float depthFactor,
+                        float sigmaDepth, float sigmaSpatial, int kernelSize,
+                        float truncateThreshold);
 void buildPyramidPointsNormals(InputArray _points, InputArray _normals,
                                OutputArrayOfArrays pyrPoints, OutputArrayOfArrays pyrNormals,
                                int levels);
diff --git a/modules/rgbd/src/precomp.hpp b/modules/rgbd/src/precomp.hpp
index 4852274f3b0..945bc1537e9 100644
--- a/modules/rgbd/src/precomp.hpp
+++ b/modules/rgbd/src/precomp.hpp
@@ -21,6 +21,7 @@
 #include "opencv2/core/private.hpp"
 #include "opencv2/core/hal/intrin.hpp"
 #include "opencv2/core/ocl.hpp"
+#include "opencv2/core.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/calib3d.hpp"
 #include "opencv2/rgbd.hpp"
diff --git a/modules/rgbd/src/tsdf.cpp b/modules/rgbd/src/tsdf.cpp
index 991d525b9d6..7b76985eb43 100644
--- a/modules/rgbd/src/tsdf.cpp
+++ b/modules/rgbd/src/tsdf.cpp
@@ -687,7 +687,6 @@ struct FetchPointsNormalsInvoker : ParallelLoopBody
                                                    (y+shift.y)*vol.volDims[1] +
                                                    (z+shift.z)*vol.volDims[2]];
             float vd = tsdfToFloat(voxeld.tsdf);
-
             if(voxeld.weight != 0 && vd != 1.f)
             {
                 if((v0 > 0 && vd < 0) || (v0 < 0 && vd > 0))
@@ -761,6 +760,7 @@ void TSDFVolumeCPU::fetchPointsNormals(OutputArray _points, OutputArray _normals
         Range range(0, volResolution.x);
         const int nstripes = -1;
         parallel_for_(range, fi, nstripes);
+
         std::vector<ptype> points, normals;
         for(size_t i = 0; i < pVecs.size(); i++)
         {
diff --git a/modules/rgbd/src/tsdf.hpp b/modules/rgbd/src/tsdf.hpp
index 67361aa59d9..a3f561fbe4a 100644
--- a/modules/rgbd/src/tsdf.hpp
+++ b/modules/rgbd/src/tsdf.hpp
@@ -62,6 +62,10 @@ class TSDFVolumeCPU : public TSDFVolume
                            const kinfu::Intr& intrinsics, const int frameId = 0) override;
     virtual void raycast(const Matx44f& cameraPose, const kinfu::Intr& intrinsics, const Size& frameSize,
                          OutputArray points, OutputArray normals) const override;
+    virtual void integrate(InputArray, InputArray, float, const Matx44f&, const kinfu::Intr&, const Intr&, const int) override
+        { CV_Error(Error::StsNotImplemented, "Not implemented"); };
+    virtual void raycast(const Matx44f&, const kinfu::Intr&, const Size&, OutputArray, OutputArray, OutputArray) const override
+        { CV_Error(Error::StsNotImplemented, "Not implemented"); };
 
     virtual void fetchNormals(InputArray points, OutputArray _normals) const override;
     virtual void fetchPointsNormals(OutputArray points, OutputArray normals) const override;
@@ -97,6 +101,10 @@ class TSDFVolumeGPU : public TSDFVolume
                            const kinfu::Intr& intrinsics, const int frameId = 0) override;
     virtual void raycast(const Matx44f& cameraPose, const kinfu::Intr& intrinsics, const Size& frameSize,
                          OutputArray _points, OutputArray _normals) const override;
+    virtual void integrate(InputArray, InputArray, float, const Matx44f&, const kinfu::Intr&, const Intr&, const int) override
+        { CV_Error(Error::StsNotImplemented, "Not implemented"); };
+    virtual void raycast(const Matx44f&, const kinfu::Intr&, const Size&, OutputArray, OutputArray, OutputArray) const override
+        { CV_Error(Error::StsNotImplemented, "Not implemented"); };
 
     virtual void fetchPointsNormals(OutputArray points, OutputArray normals) const override;
     virtual void fetchNormals(InputArray points, OutputArray normals) const override;
diff --git a/modules/rgbd/src/tsdf_functions.cpp b/modules/rgbd/src/tsdf_functions.cpp
index b0e5276cba7..3f8bc26f011 100644
--- a/modules/rgbd/src/tsdf_functions.cpp
+++ b/modules/rgbd/src/tsdf_functions.cpp
@@ -418,6 +418,306 @@ void integrateVolumeUnit(
     parallel_for_(integrateRange, IntegrateInvoker);
 
 }
+void integrateRGBVolumeUnit(
+    float truncDist, float voxelSize, int maxWeight,
+    cv::Matx44f _pose, Point3i volResolution, Vec4i volStrides,
+    InputArray _depth, InputArray _rgb, float depthFactor, const cv::Matx44f& cameraPose,
+    const cv::kinfu::Intr& depth_intrinsics, const cv::kinfu::Intr& rgb_intrinsics, InputArray _pixNorms, InputArray _volume)
+{
+    CV_TRACE_FUNCTION();
+
+    CV_Assert(_depth.type() == DEPTH_TYPE);
+    CV_Assert(!_depth.empty());
+    cv::Affine3f vpose(_pose);
+    Depth depth = _depth.getMat();
+    Colors color = _rgb.getMat();
+    Range integrateRange(0, volResolution.x);
+
+    Mat volume = _volume.getMat();
+    Mat pixNorms = _pixNorms.getMat();
+    const Intr::Projector projDepth(depth_intrinsics.makeProjector());
+    const Intr::Projector projRGB(rgb_intrinsics);
+    const cv::Affine3f vol2cam(Affine3f(cameraPose.inv()) * vpose);
+    const float truncDistInv(1.f / truncDist);
+    const float dfac(1.f / depthFactor);
+    RGBTsdfVoxel* volDataStart = volume.ptr<RGBTsdfVoxel>();
+
+#if USE_INTRINSICS
+    auto IntegrateInvoker = [&](const Range& range)
+    {
+        // zStep == vol2cam*(Point3f(x, y, 1)*voxelSize) - basePt;
+        Point3f zStepPt = Point3f(vol2cam.matrix(0, 2),
+            vol2cam.matrix(1, 2),
+            vol2cam.matrix(2, 2)) * voxelSize;
+
+        v_float32x4 zStep(zStepPt.x, zStepPt.y, zStepPt.z, 0);
+        v_float32x4 vfxy(projDepth.fx, projDepth.fy, 0.f, 0.f), vcxy(projDepth.cx, projDepth.cy, 0.f, 0.f);
+        v_float32x4 rgb_vfxy(projRGB.fx, projRGB.fy, 0.f, 0.f), rgb_vcxy(projRGB.cx, projRGB.cy, 0.f, 0.f);
+        const v_float32x4 upLimits = v_cvt_f32(v_int32x4(depth.cols - 1, depth.rows - 1, 0, 0));
+
+        for (int x = range.start; x < range.end; x++)
+        {
+            RGBTsdfVoxel* volDataX = volDataStart + x * volStrides[0];
+            for (int y = 0; y < volResolution.y; y++)
+            {
+                RGBTsdfVoxel* volDataY = volDataX + y * volStrides[1];
+                // optimization of camSpace transformation (vector addition instead of matmul at each z)
+                Point3f basePt = vol2cam * (Point3f((float)x, (float)y, 0) * voxelSize);
+                v_float32x4 camSpacePt(basePt.x, basePt.y, basePt.z, 0);
+
+                int startZ, endZ;
+                if (abs(zStepPt.z) > 1e-5)
+                {
+                    int baseZ = (int)(-basePt.z / zStepPt.z);
+                    if (zStepPt.z > 0)
+                    {
+                        startZ = baseZ;
+                        endZ = volResolution.z;
+                    }
+                    else
+                    {
+                        startZ = 0;
+                        endZ = baseZ;
+                    }
+                }
+                else
+                {
+                    if (basePt.z > 0)
+                    {
+                        startZ = 0;
+                        endZ = volResolution.z;
+                    }
+                    else
+                    {
+                        // z loop shouldn't be performed
+                        startZ = endZ = 0;
+                    }
+                }
+                startZ = max(0, startZ);
+                endZ = min(int(volResolution.z), endZ);
+                for (int z = startZ; z < endZ; z++)
+                {
+                    // optimization of the following:
+                    //Point3f volPt = Point3f(x, y, z)*voxelSize;
+                    //Point3f camSpacePt = vol2cam * volPt;
+                    camSpacePt += zStep;
+
+                    float zCamSpace = v_reinterpret_as_f32(v_rotate_right<2>(v_reinterpret_as_u32(camSpacePt))).get0();
+                    if (zCamSpace <= 0.f)
+                        continue;
+
+                    v_float32x4 camPixVec = camSpacePt / v_setall_f32(zCamSpace);
+                    v_float32x4 projected = v_muladd(camPixVec, vfxy, vcxy);
+                    // leave only first 2 lanes
+                    projected = v_reinterpret_as_f32(v_reinterpret_as_u32(projected) &
+                        v_uint32x4(0xFFFFFFFF, 0xFFFFFFFF, 0, 0));
+
+                    depthType v;
+                    // bilinearly interpolate depth at projected
+                    {
+                        const v_float32x4& pt = projected;
+                        // check coords >= 0 and < imgSize
+                        v_uint32x4 limits = v_reinterpret_as_u32(pt < v_setzero_f32()) |
+                            v_reinterpret_as_u32(pt >= upLimits);
+                        limits = limits | v_rotate_right<1>(limits);
+                        if (limits.get0())
+                            continue;
+
+                        // xi, yi = floor(pt)
+                        v_int32x4 ip = v_floor(pt);
+                        v_int32x4 ipshift = ip;
+                        int xi = ipshift.get0();
+                        ipshift = v_rotate_right<1>(ipshift);
+                        int yi = ipshift.get0();
+
+                        const depthType* row0 = depth[yi + 0];
+                        const depthType* row1 = depth[yi + 1];
+
+                        // v001 = [v(xi + 0, yi + 0), v(xi + 1, yi + 0)]
+                        v_float32x4 v001 = v_load_low(row0 + xi);
+                        // v101 = [v(xi + 0, yi + 1), v(xi + 1, yi + 1)]
+                        v_float32x4 v101 = v_load_low(row1 + xi);
+
+                        v_float32x4 vall = v_combine_low(v001, v101);
+
+                        // assume correct depth is positive
+                        // don't fix missing data
+                        if (v_check_all(vall > v_setzero_f32()))
+                        {
+                            v_float32x4 t = pt - v_cvt_f32(ip);
+                            float tx = t.get0();
+                            t = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(t)));
+                            v_float32x4 ty = v_setall_f32(t.get0());
+                            // vx is y-interpolated between rows 0 and 1
+                            v_float32x4 vx = v001 + ty * (v101 - v001);
+                            float v0 = vx.get0();
+                            vx = v_reinterpret_as_f32(v_rotate_right<1>(v_reinterpret_as_u32(vx)));
+                            float v1 = vx.get0();
+                            v = v0 + tx * (v1 - v0);
+                        }
+                        else
+                            continue;
+                    }
+
+                    v_float32x4 projectedRGB = v_muladd(camPixVec, rgb_vfxy, rgb_vcxy);
+                    // leave only first 2 lanes
+                    projectedRGB = v_reinterpret_as_f32(v_reinterpret_as_u32(projected) &
+                        v_uint32x4(0xFFFFFFFF, 0xFFFFFFFF, 0, 0));
+
+                    // norm(camPixVec) produces double which is too slow
+                    int _u = (int)projected.get0();
+                    int _v = (int)v_rotate_right<1>(projected).get0();
+                    int rgb_u = (int)projectedRGB.get0();
+                    int rgb_v = (int)v_rotate_right<1>(projectedRGB).get0();
+
+                    if (!(_u >= 0 && _u < depth.cols && _v >= 0 && _v < depth.rows &&
+                        rgb_v >= 0 && rgb_v < color.rows && rgb_u >= 0 && rgb_u < color.cols))
+                        continue;
+                    float pixNorm = pixNorms.at<float>(_v, _u);
+                    Vec4f colorRGB = color.at<Vec4f>(rgb_v, rgb_u);
+                    //float pixNorm = sqrt(v_reduce_sum(camPixVec*camPixVec));
+                    // difference between distances of point and of surface to camera
+                    float sdf = pixNorm * (v * dfac - zCamSpace);
+                    // possible alternative is:
+                    // kftype sdf = norm(camSpacePt)*(v*dfac/camSpacePt.z - 1);
+                    if (sdf >= -truncDist)
+                    {
+                        TsdfType tsdf = floatToTsdf(fmin(1.f, sdf * truncDistInv));
+
+                        RGBTsdfVoxel& voxel = volDataY[z * volStrides[2]];
+                        WeightType& weight = voxel.weight;
+                        TsdfType& value = voxel.tsdf;
+                        ColorType& r = voxel.r;
+                        ColorType& g = voxel.g;
+                        ColorType& b = voxel.b;
+
+                        // update RGB
+                        r = (ColorType)((float)(r * weight) + (colorRGB[0])) / (weight + 1);
+                        g = (ColorType)((float)(g * weight) + (colorRGB[1])) / (weight + 1);
+                        b = (ColorType)((float)(b * weight) + (colorRGB[2])) / (weight + 1);
+                        colorFix(r, g, b);
+                        // update TSDF
+                        value = floatToTsdf((tsdfToFloat(value) * weight + tsdfToFloat(tsdf)) / (weight + 1));
+                        weight = WeightType(min(int(weight + 1), int(maxWeight)));
+                    }
+                }
+            }
+        }
+    };
+#else
+    auto IntegrateInvoker = [&](const Range& range)
+    {
+        for (int x = range.start; x < range.end; x++)
+        {
+            RGBTsdfVoxel* volDataX = volDataStart + x * volStrides[0];
+            for (int y = 0; y < volResolution.y; y++)
+            {
+                RGBTsdfVoxel* volDataY = volDataX + y * volStrides[1];
+                // optimization of camSpace transformation (vector addition instead of matmul at each z)
+                Point3f basePt = vol2cam * (Point3f(float(x), float(y), 0.0f) * voxelSize);
+                Point3f camSpacePt = basePt;
+                // zStep == vol2cam*(Point3f(x, y, 1)*voxelSize) - basePt;
+                // zStep == vol2cam*[Point3f(x, y, 1) - Point3f(x, y, 0)]*voxelSize
+                Point3f zStep = Point3f(vol2cam.matrix(0, 2),
+                    vol2cam.matrix(1, 2),
+                    vol2cam.matrix(2, 2)) * voxelSize;
+                int startZ, endZ;
+                if (abs(zStep.z) > 1e-5)
+                {
+                    int baseZ = int(-basePt.z / zStep.z);
+                    if (zStep.z > 0)
+                    {
+                        startZ = baseZ;
+                        endZ = volResolution.z;
+                    }
+                    else
+                    {
+                        startZ = 0;
+                        endZ = baseZ;
+                    }
+                }
+                else
+                {
+                    if (basePt.z > 0)
+                    {
+                        startZ = 0;
+                        endZ = volResolution.z;
+                    }
+                    else
+                    {
+                        // z loop shouldn't be performed
+                        startZ = endZ = 0;
+                    }
+                }
+                startZ = max(0, startZ);
+                endZ = min(int(volResolution.z), endZ);
+
+                for (int z = startZ; z < endZ; z++)
+                {
+                    // optimization of the following:
+                    //Point3f volPt = Point3f(x, y, z)*volume.voxelSize;
+                    //Point3f camSpacePt = vol2cam * volPt;
+
+                    camSpacePt += zStep;
+                    if (camSpacePt.z <= 0)
+                        continue;
+
+                    Point3f camPixVec;
+                    Point2f projected = projDepth(camSpacePt, camPixVec);
+                    Point2f projectedRGB = projRGB(camSpacePt, camPixVec);
+
+
+                    depthType v = bilinearDepth(depth, projected);
+                    if (v == 0) {
+                        continue;
+                    }
+
+                    int _u = (int) projected.x;
+                    int _v = (int) projected.y;
+
+                    int rgb_u = (int) projectedRGB.x;
+                    int rgb_v = (int) projectedRGB.y;
+
+                    if (!(_v >= 0 && _v < depth.rows && _u >= 0 && _u < depth.cols  &&
+                        rgb_v >= 0 && rgb_v < color.rows && rgb_u >= 0 && rgb_u < color.cols))
+                        continue;
+
+                    float pixNorm = pixNorms.at<float>(_v, _u);
+                    Vec4f colorRGB = color.at<Vec4f>(rgb_v, rgb_u);
+                    // difference between distances of point and of surface to camera
+                    float sdf = pixNorm * (v * dfac - camSpacePt.z);
+                    // possible alternative is:
+                    // kftype sdf = norm(camSpacePt)*(v*dfac/camSpacePt.z - 1);
+                    if (sdf >= -truncDist)
+                    {
+                        TsdfType tsdf = floatToTsdf(fmin(1.f, sdf * truncDistInv));
+
+                        RGBTsdfVoxel& voxel = volDataY[z * volStrides[2]];
+                        WeightType& weight = voxel.weight;
+                        TsdfType& value = voxel.tsdf;
+                        ColorType& r = voxel.r;
+                        ColorType& g = voxel.g;
+                        ColorType& b = voxel.b;
+
+                        // update RGB
+                        if (weight < 1)
+                        {
+                            r = (ColorType)((float)(r * weight) + (colorRGB[0])) / (weight + 1);
+                            g = (ColorType)((float)(g * weight) + (colorRGB[1])) / (weight + 1);
+                            b = (ColorType)((float)(b * weight) + (colorRGB[2])) / (weight + 1);
+                        }
+
+                        // update TSDF
+                        value = floatToTsdf((tsdfToFloat(value) * weight + tsdfToFloat(tsdf)) / (weight + 1));
+                        weight = WeightType( min(int(weight + 1), int(maxWeight)) );
+                    }
+                }
+            }
+        }
+    };
+#endif
+    parallel_for_(integrateRange, IntegrateInvoker);
+}
 
 } // namespace kinfu
 } // namespace cv
diff --git a/modules/rgbd/src/tsdf_functions.hpp b/modules/rgbd/src/tsdf_functions.hpp
index 09efecf1252..1031474d0b7 100644
--- a/modules/rgbd/src/tsdf_functions.hpp
+++ b/modules/rgbd/src/tsdf_functions.hpp
@@ -9,6 +9,7 @@
 
 #include <opencv2/rgbd/volume.hpp>
 #include "tsdf.hpp"
+#include "colored_tsdf.hpp"
 
 namespace cv
 {
@@ -34,6 +35,20 @@ inline float tsdfToFloat(TsdfType num)
     return float(num) * (-1.f / 128.f);
 }
 
+inline void colorFix(ColorType& r, ColorType& g, ColorType&b)
+{
+    if (r > 255) r = 255;
+    if (g > 255) g = 255;
+    if (b > 255) b = 255;
+}
+
+inline void colorFix(Point3f& c)
+{
+    if (c.x > 255) c.x = 255;
+    if (c.y > 255) c.y = 255;
+    if (c.z > 255) c.z = 255;
+}
+
 cv::Mat preCalculationPixNorm(Depth depth, const Intr& intrinsics);
 cv::UMat preCalculationPixNormGPU(const UMat& depth, const Intr& intrinsics);
 
@@ -45,6 +60,12 @@ void integrateVolumeUnit(
     InputArray _depth, float depthFactor, const cv::Matx44f& cameraPose,
     const cv::kinfu::Intr& intrinsics, InputArray _pixNorms, InputArray _volume);
 
+void integrateRGBVolumeUnit(
+    float truncDist, float voxelSize, int maxWeight,
+    cv::Matx44f _pose, Point3i volResolution, Vec4i volStrides,
+    InputArray _depth, InputArray _rgb, float depthFactor, const cv::Matx44f& cameraPose,
+    const cv::kinfu::Intr& depth_intrinsics, const cv::kinfu::Intr& rgb_intrinsics, InputArray _pixNorms, InputArray _volume);
+
 
 class CustomHashSet
 {
diff --git a/modules/rgbd/src/volume.cpp b/modules/rgbd/src/volume.cpp
index 88c46fe4b67..c05238f2602 100644
--- a/modules/rgbd/src/volume.cpp
+++ b/modules/rgbd/src/volume.cpp
@@ -4,10 +4,11 @@
 
 #include <opencv2/rgbd/volume.hpp>
 
-#include "hash_tsdf.hpp"
 #include "opencv2/core/base.hpp"
 #include "precomp.hpp"
 #include "tsdf.hpp"
+#include "hash_tsdf.hpp"
+#include "colored_tsdf.hpp"
 
 namespace cv
 {
@@ -38,6 +39,14 @@ Ptr<VolumeParams> VolumeParams::defaultParams(VolumeType _volumeType)
         params.tsdfTruncDist = 7 * params.voxelSize;  //! About 0.04f in meters
         return makePtr<VolumeParams>(params);
     }
+    else if (params.type == VolumeType::COLOREDTSDF)
+    {
+        params.resolution = Vec3i::all(512);
+        params.voxelSize = volumeSize / 512.f;
+        params.depthTruncThreshold = 0.f;  // depthTruncThreshold not required for TSDF
+        params.tsdfTruncDist = 7 * params.voxelSize;  //! About 0.04f in meters
+        return makePtr<VolumeParams>(params);
+    }
     CV_Error(Error::StsBadArg, "Invalid VolumeType does not have parameters");
 }
 
@@ -60,6 +69,13 @@ Ptr<VolumeParams> VolumeParams::coarseParams(VolumeType _volumeType)
         params->tsdfTruncDist = 2 * params->voxelSize;  //! About 0.04f in meters
         return params;
     }
+    else if (params->type == VolumeType::COLOREDTSDF)
+    {
+        params->resolution = Vec3i::all(128);
+        params->voxelSize = volumeSize / 128.f;
+        params->tsdfTruncDist = 2 * params->voxelSize;  //! About 0.04f in meters
+        return params;
+    }
     CV_Error(Error::StsBadArg, "Invalid VolumeType does not have parameters");
 }
 
@@ -69,6 +85,8 @@ Ptr<Volume> makeVolume(const VolumeParams& _volumeParams)
         return kinfu::makeTSDFVolume(_volumeParams);
     else if(_volumeParams.type == VolumeType::HASHTSDF)
         return kinfu::makeHashTSDFVolume(_volumeParams);
+    else if(_volumeParams.type == VolumeType::COLOREDTSDF)
+        return kinfu::makeColoredTSDFVolume(_volumeParams);
     CV_Error(Error::StsBadArg, "Invalid VolumeType does not have parameters");
 }
 
@@ -85,6 +103,10 @@ Ptr<Volume> makeVolume(VolumeType _volumeType, float _voxelSize, Matx44f _pose,
     {
         return makeHashTSDFVolume(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, _truncateThreshold);
     }
+    else if (_volumeType == VolumeType::COLOREDTSDF)
+    {
+        return makeColoredTSDFVolume(_voxelSize, _pose, _raycastStepFactor, _truncDist, _maxWeight, _presolution);
+    }
     CV_Error(Error::StsBadArg, "Invalid VolumeType does not have parameters");
 }
 
diff --git a/modules/rgbd/test/test_colored_kinfu.cpp b/modules/rgbd/test/test_colored_kinfu.cpp
new file mode 100644
index 00000000000..4303b260e3e
--- /dev/null
+++ b/modules/rgbd/test/test_colored_kinfu.cpp
@@ -0,0 +1,479 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+// This code is also subject to the license terms in the LICENSE_KinectFusion.md file found in this module's directory
+
+#include "test_precomp.hpp"
+
+// Inspired by Inigo Quilez' raymarching guide:
+// http://iquilezles.org/www/articles/distfunctions/distfunctions.htm
+
+namespace opencv_test {
+namespace {
+
+using namespace cv;
+
+/** Reprojects screen point to camera space given z coord. */
+struct Reprojector
+{
+    Reprojector() {}
+    inline Reprojector(Matx33f intr)
+    {
+        fxinv = 1.f / intr(0, 0), fyinv = 1.f / intr(1, 1);
+        cx = intr(0, 2), cy = intr(1, 2);
+    }
+    template<typename T>
+    inline cv::Point3_<T> operator()(cv::Point3_<T> p) const
+    {
+        T x = p.z * (p.x - cx) * fxinv;
+        T y = p.z * (p.y - cy) * fyinv;
+        return cv::Point3_<T>(x, y, p.z);
+    }
+
+    float fxinv, fyinv, cx, cy;
+};
+
+template<class Scene>
+struct RenderInvoker : ParallelLoopBody
+{
+    RenderInvoker(Mat_<float>& _frame, Affine3f _pose,
+        Reprojector _reproj,
+        float _depthFactor) : ParallelLoopBody(),
+        frame(_frame),
+        pose(_pose),
+        reproj(_reproj),
+        depthFactor(_depthFactor)
+    { }
+
+    virtual void operator ()(const cv::Range& r) const
+    {
+        for (int y = r.start; y < r.end; y++)
+        {
+            float* frameRow = frame[y];
+            for (int x = 0; x < frame.cols; x++)
+            {
+                float pix = 0;
+
+                Point3f orig = pose.translation();
+                // direction through pixel
+                Point3f screenVec = reproj(Point3f((float)x, (float)y, 1.f));
+                float xyt = 1.f / (screenVec.x * screenVec.x +
+                    screenVec.y * screenVec.y + 1.f);
+                Point3f dir = normalize(Vec3f(pose.rotation() * screenVec));
+                // screen space axis
+                dir.y = -dir.y;
+
+                const float maxDepth = 20.f;
+                const float maxSteps = 256;
+                float t = 0.f;
+                for (int step = 0; step < maxSteps && t < maxDepth; step++)
+                {
+                    Point3f p = orig + dir * t;
+                    float d = Scene::map(p);
+                    if (d < 0.000001f)
+                    {
+                        float depth = std::sqrt(t * t * xyt);
+                        pix = depth * depthFactor;
+                        break;
+                    }
+                    t += d;
+                }
+
+                frameRow[x] = pix;
+            }
+        }
+    }
+
+    Mat_<float>& frame;
+    Affine3f pose;
+    Reprojector reproj;
+    float depthFactor;
+};
+
+template<class Scene>
+struct RenderColorInvoker : ParallelLoopBody
+{
+    RenderColorInvoker(Mat_<Vec3f>& _frame, Affine3f _pose,
+        Reprojector _reproj,
+        float _depthFactor) : ParallelLoopBody(),
+        frame(_frame),
+        pose(_pose),
+        reproj(_reproj),
+        depthFactor(_depthFactor)
+    { }
+
+    virtual void operator ()(const cv::Range& r) const
+    {
+        for (int y = r.start; y < r.end; y++)
+        {
+            Vec3f* frameRow = frame[y];
+            for (int x = 0; x < frame.cols; x++)
+            {
+                Vec3f pix = 0;
+
+                Point3f orig = pose.translation();
+                // direction through pixel
+                Point3f screenVec = reproj(Point3f((float)x, (float)y, 1.f));
+                Point3f dir = normalize(Vec3f(pose.rotation() * screenVec));
+                // screen space axis
+                dir.y = -dir.y;
+
+                const float maxDepth = 20.f;
+                const float maxSteps = 256;
+                float t = 0.f;
+                for (int step = 0; step < maxSteps && t < maxDepth; step++)
+                {
+                    Point3f p = orig + dir * t;
+                    float d = Scene::map(p);
+                    if (d < 0.000001f)
+                    {
+                        float m = 0.25f;
+                        float p0 = float(abs(fmod(p.x, m)) > m / 2.f);
+                        float p1 = float(abs(fmod(p.y, m)) > m / 2.f);
+                        float p2 = float(abs(fmod(p.z, m)) > m / 2.f);
+
+                        pix[0] = p0 + p1;
+                        pix[1] = p1 + p2;
+                        pix[2] = p0 + p2;
+
+                        pix *= 128.f;
+                        break;
+                    }
+                    t += d;
+                }
+
+                frameRow[x] = pix;
+            }
+        }
+    }
+
+    Mat_<Vec3f>& frame;
+    Affine3f pose;
+    Reprojector reproj;
+    float depthFactor;
+};
+
+struct Scene
+{
+    virtual ~Scene() {}
+    static Ptr<Scene> create(int nScene, Size sz, Matx33f _intr, float _depthFactor);
+    virtual Mat depth(Affine3f pose) = 0;
+    virtual Mat rgb(Affine3f pose) = 0;
+    virtual std::vector<Affine3f> getPoses() = 0;
+};
+
+struct CubeSpheresScene : Scene
+{
+    const int framesPerCycle = 32;
+    const float nCycles = 0.25f;
+    const Affine3f startPose = Affine3f(Vec3f(-0.5f, 0.f, 0.f), Vec3f(2.1f, 1.4f, -2.1f));
+
+    CubeSpheresScene(Size sz, Matx33f _intr, float _depthFactor) :
+        frameSize(sz), intr(_intr), depthFactor(_depthFactor)
+    { }
+
+    static float map(Point3f p)
+    {
+        float plane = p.y + 0.5f;
+
+        Point3f boxPose = p - Point3f(-0.0f, 0.3f, 0.0f);
+        float boxSize = 0.5f;
+        float roundness = 0.08f;
+        Point3f boxTmp;
+        boxTmp.x = max(abs(boxPose.x) - boxSize, 0.0f);
+        boxTmp.y = max(abs(boxPose.y) - boxSize, 0.0f);
+        boxTmp.z = max(abs(boxPose.z) - boxSize, 0.0f);
+        float roundBox = (float)cv::norm(boxTmp) - roundness;
+
+        float sphereRadius = 0.7f;
+        float sphere = (float)cv::norm(boxPose) - sphereRadius;
+
+        float boxMinusSphere = max(roundBox, -sphere);
+
+        float sphere2 = (float)cv::norm(p - Point3f(0.3f, 1.f, 0.f)) - 0.1f;
+        float sphere3 = (float)cv::norm(p - Point3f(0.0f, 1.f, 0.f)) - 0.2f;
+        float res = min(min(plane, boxMinusSphere), min(sphere2, sphere3));
+
+        return res;
+    }
+
+    Mat depth(Affine3f pose) override
+    {
+        Mat_<float> frame(frameSize);
+        Reprojector reproj(intr);
+
+        Range range(0, frame.rows);
+        parallel_for_(range, RenderInvoker<CubeSpheresScene>(frame, pose, reproj, depthFactor));
+
+        return std::move(frame);
+    }
+
+    Mat rgb(Affine3f pose) override
+    {
+        Mat_<Vec3f> frame(frameSize);
+        Reprojector reproj(intr);
+
+        Range range(0, frame.rows);
+        parallel_for_(range, RenderColorInvoker<CubeSpheresScene>(frame, pose, reproj, depthFactor));
+
+        return std::move(frame);
+    }
+
+    std::vector<Affine3f> getPoses() override
+    {
+        std::vector<Affine3f> poses;
+        for (int i = 0; i < (int)(framesPerCycle * nCycles); i++)
+        {
+            float angle = (float)(CV_2PI * i / framesPerCycle);
+            Affine3f pose;
+            pose = pose.rotate(startPose.rotation());
+            pose = pose.rotate(Vec3f(0.f, -1.f, 0.f) * angle);
+            pose = pose.translate(Vec3f(startPose.translation()[0] * sin(angle),
+                startPose.translation()[1],
+                startPose.translation()[2] * cos(angle)));
+            poses.push_back(pose);
+        }
+
+        return poses;
+    }
+
+    Size frameSize;
+    Matx33f intr;
+    float depthFactor;
+};
+
+
+struct RotatingScene : Scene
+{
+    const int framesPerCycle = 32;
+    const float nCycles = 0.5f;
+    const Affine3f startPose = Affine3f(Vec3f(-1.f, 0.f, 0.f), Vec3f(1.5f, 2.f, -1.5f));
+
+    RotatingScene(Size sz, Matx33f _intr, float _depthFactor) :
+        frameSize(sz), intr(_intr), depthFactor(_depthFactor)
+    {
+        cv::RNG rng(0);
+        rng.fill(randTexture, cv::RNG::UNIFORM, 0.f, 1.f);
+    }
+
+    static float noise(Point2f pt)
+    {
+        pt.x = abs(pt.x - (int)pt.x);
+        pt.y = abs(pt.y - (int)pt.y);
+        pt *= 256.f;
+
+        int xi = cvFloor(pt.x), yi = cvFloor(pt.y);
+
+        const float* row0 = randTexture[(yi + 0) % 256];
+        const float* row1 = randTexture[(yi + 1) % 256];
+
+        float v00 = row0[(xi + 0) % 256];
+        float v01 = row0[(xi + 1) % 256];
+        float v10 = row1[(xi + 0) % 256];
+        float v11 = row1[(xi + 1) % 256];
+
+        float tx = pt.x - xi, ty = pt.y - yi;
+        float v0 = v00 + tx * (v01 - v00);
+        float v1 = v10 + tx * (v11 - v10);
+        return v0 + ty * (v1 - v0);
+    }
+
+    static float map(Point3f p)
+    {
+        const Point3f torPlace(0.f, 0.f, 0.f);
+        Point3f torPos(p - torPlace);
+        const Point2f torusParams(1.f, 0.2f);
+        Point2f torq(std::sqrt(torPos.x * torPos.x + torPos.z * torPos.z) - torusParams.x, torPos.y);
+        float torus = (float)cv::norm(torq) - torusParams.y;
+
+        const Point3f cylShift(0.25f, 0.25f, 0.25f);
+
+        Point3f cylPos = Point3f(abs(std::fmod(p.x - 0.1f, cylShift.x)),
+            p.y,
+            abs(std::fmod(p.z - 0.2f, cylShift.z))) - cylShift * 0.5f;
+
+        const Point2f cylParams(0.1f,
+            0.1f + 0.1f * sin(p.x * p.y * 5.f /* +std::log(1.f+abs(p.x*0.1f)) */));
+        Point2f cyld = Point2f(abs(std::sqrt(cylPos.x * cylPos.x + cylPos.z * cylPos.z)), abs(cylPos.y)) - cylParams;
+        float pins = min(max(cyld.x, cyld.y), 0.0f) + (float)cv::norm(Point2f(max(cyld.x, 0.f), max(cyld.y, 0.f)));
+
+        float terrain = p.y + 0.25f * noise(Point2f(p.x, p.z) * 0.01f);
+
+        float res = min(terrain, max(-pins, torus));
+
+        return res;
+    }
+
+    Mat depth(Affine3f pose) override
+    {
+        Mat_<float> frame(frameSize);
+        Reprojector reproj(intr);
+
+        Range range(0, frame.rows);
+        parallel_for_(range, RenderInvoker<RotatingScene>(frame, pose, reproj, depthFactor));
+
+        return std::move(frame);
+    }
+
+    Mat rgb(Affine3f pose) override
+    {
+        Mat_<Vec3f> frame(frameSize);
+        Reprojector reproj(intr);
+
+        Range range(0, frame.rows);
+        parallel_for_(range, RenderColorInvoker<RotatingScene>(frame, pose, reproj, depthFactor));
+
+        return std::move(frame);
+    }
+
+    std::vector<Affine3f> getPoses() override
+    {
+        std::vector<Affine3f> poses;
+        for (int i = 0; i < framesPerCycle * nCycles; i++)
+        {
+            float angle = (float)(CV_2PI * i / framesPerCycle);
+            Affine3f pose;
+            pose = pose.rotate(startPose.rotation());
+            pose = pose.rotate(Vec3f(0.f, -1.f, 0.f) * angle);
+            pose = pose.translate(Vec3f(startPose.translation()[0] * sin(angle),
+                startPose.translation()[1],
+                startPose.translation()[2] * cos(angle)));
+            poses.push_back(pose);
+        }
+
+        return poses;
+    }
+
+    Size frameSize;
+    Matx33f intr;
+    float depthFactor;
+    static cv::Mat_<float> randTexture;
+};
+
+Mat_<float> RotatingScene::randTexture(256, 256);
+
+Ptr<Scene> Scene::create(int nScene, Size sz, Matx33f _intr, float _depthFactor)
+{
+    if (nScene == 0)
+        return makePtr<RotatingScene>(sz, _intr, _depthFactor);
+    else
+        return makePtr<CubeSpheresScene>(sz, _intr, _depthFactor);
+}
+
+static inline void CheckFrequency(Mat image)
+{
+    float all = (float)image.size().height * image.size().width;
+    int cc1 = 0, cc2 = 0, cc3 = 0, cc4 = 0;
+    Vec3b c1 = Vec3b(200, 0, 0), c2 = Vec3b(0, 200, 0);
+    Vec3b c3 = Vec3b(0, 0, 200), c4 = Vec3b(100, 100, 100);
+    for (int i = 0; i < image.size().height; i++)
+    {
+        for (int j = 0; j < image.size().width; j++)
+        {
+            Vec3b color = image.at<Vec3b>(i, j);
+            if (color == c1) cc1++;
+            if (color == c2) cc2++;
+            if (color == c3) cc3++;
+            if (color == c4) cc4++;
+        }
+    }
+    ASSERT_LT(float(cc1) / all, 0.2);
+    ASSERT_LT(float(cc2) / all, 0.2);
+    ASSERT_LT(float(cc3) / all, 0.2);
+    ASSERT_LT(float(cc4) / all, 0.2);
+}
+
+static const bool display = false;
+
+void flyTest(bool hiDense, bool test_colors)
+{
+    Ptr<colored_kinfu::Params> params;
+    params = colored_kinfu::Params::coloredTSDFParams(!hiDense);
+    Ptr<Scene> scene = Scene::create(false, params->frameSize, params->intr, params->depthFactor);
+
+    Ptr<colored_kinfu::ColoredKinFu> kf = colored_kinfu::ColoredKinFu::create(params);
+
+    std::vector<Affine3f> poses = scene->getPoses();
+    Affine3f startPoseGT = poses[0], startPoseKF;
+    Affine3f pose, kfPose;
+    for (size_t i = 0; i < poses.size(); i++)
+    {
+        pose = poses[i];
+
+        Mat depth = scene->depth(pose);
+        //DEBUG
+        Mat rgb = scene->rgb(pose);// creareRGBframe(depth.size());
+
+        ASSERT_TRUE(kf->update(depth, rgb));
+
+        kfPose = kf->getPose();
+        if (i == 0)
+            startPoseKF = kfPose;
+
+        pose = (startPoseGT.inv() * pose) * startPoseKF;
+
+        if (display)
+        {
+            imshow("depth", depth * (1.f / params->depthFactor / 4.f));
+            imshow("rgb", rgb * (1.f / 255.f));
+            Mat rendered;
+            kf->render(rendered);
+            imshow("render", rendered);
+            waitKey(10);
+        }
+
+        if (test_colors)
+        {
+            Mat rendered;
+            kf->render(rendered);
+            CheckFrequency(rendered);
+            return;
+        }
+    }
+
+    double rvecThreshold = hiDense ? 0.01 : 0.02;
+    ASSERT_LT(cv::norm(kfPose.rvec() - pose.rvec()), rvecThreshold);
+    double poseThreshold = hiDense ? 0.03 : 0.1;
+    ASSERT_LT(cv::norm(kfPose.translation() - pose.translation()), poseThreshold);
+}
+
+
+#ifdef OPENCV_ENABLE_NONFREE
+TEST(ColoredKinectFusion, lowDense)
+#else
+TEST(ColoredKinectFusion, DISABLED_lowDense)
+#endif
+{
+    flyTest(false, false);
+}
+
+#ifdef OPENCV_ENABLE_NONFREE
+TEST(ColoredKinectFusion, highDense)
+#else
+TEST(KinectFusion, DISABLED_highDense)
+#endif
+{
+    flyTest(true, false);
+}
+
+#ifdef OPENCV_ENABLE_NONFREE
+TEST(ColoredKinectFusion, color_lowDense)
+#else
+TEST(ColoredKinectFusion, DISABLED_color_lowDense)
+#endif
+{
+    flyTest(false, true);
+}
+
+#ifdef OPENCV_ENABLE_NONFREE
+TEST(ColoredKinectFusion, color_highDense)
+#else
+TEST(KinectFusion, DISABLED_color_highDense)
+#endif
+{
+    flyTest(true, true);
+}
+
+}
+} // namespace