Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ OPTION(BUILD_EXAMPLES "Build examples." OFF)
OPTION(HAVE_CUDA "Have cuda_runtime.h." OFF)

IF(HAVE_CUDA)
# noop
INCLUDE_DIRECTORIES(${CUDA_HOME}/include)
LINK_DIRECTORIES(${CUDA_HOME}/lib64)
ELSE()
ADD_DEFINITIONS(-DUSE_FAKE_CUDA_RUNTIME)
ENDIF()
Expand Down
10 changes: 10 additions & 0 deletions INSTALL
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/sh
set -e

if [ -z $PREFIX ]; then
PREFIX=$HOME/local
fi

./configure --prefix=$PREFIX

make install
19 changes: 14 additions & 5 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
set -e

PREFIX=$(pwd)/local
CUDA_HOME=/usr/local/cuda
USE_OPENCV=0
BUILD_TESTS=0
BUILD_BENCHMARKS=0
Expand All @@ -11,10 +12,6 @@ BUILD_GBENCH=0
HAVE_CUDA=0
VERBOSE=0

if [ $(find /usr/include/cuda_runtime.h | wc -l) -gt 0 ]; then
HAVE_CUDA=1
fi

parse_args() {
for i in "$@"; do
case $i in
Expand Down Expand Up @@ -48,6 +45,10 @@ parse_args() {
--build-gbench)
BUILD_GBENCH=1
;;
--with-cuda=*)
CUDA_HOME="${i#*=}"
echo "configure --with-cuda=$CUDA_HOME"
;;
--verbose)
VERBOSE=1
;;
Expand All @@ -57,6 +58,10 @@ parse_args() {
;;
esac
done

if [ -f $CUDA_HOME/include/cuda_runtime.h ]; then
HAVE_CUDA=1
fi
}

CMAKE_FLAGS=
Expand Down Expand Up @@ -96,7 +101,11 @@ add_cmake_flags() {
add_cmake_flag BUILD_TESTS ${BUILD_TESTS}
add_cmake_flag BUILD_BENCHMARKS ${BUILD_BENCHMARKS}
add_cmake_flag BUILD_EXAMPLES ${BUILD_EXAMPLES}
add_cmake_flag HAVE_CUDA ${HAVE_CUDA}

if [ ${HAVE_CUDA} -eq 1 ]; then
add_cmake_flag HAVE_CUDA ${HAVE_CUDA}
add_cmake_flag CUDA_HOME $CUDA_HOME
fi

if [ ${BUILD_EXAMPLES} -eq 1 ]; then
add_cmake_flag USE_OPENCV ${USE_OPENCV}
Expand Down
28 changes: 28 additions & 0 deletions include/ttl/bits/std_copy.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#pragma once
#include <ttl/bits/std_cuda_allocator.hpp>
#include <ttl/bits/std_tensor.hpp>

namespace ttl
{
namespace internal
{
namespace experimental
{
template <typename R, typename S>
void copy(const basic_tensor<R, S, host_memory, readwrite> &dst,
const basic_tensor<R, S, cuda_memory, readonly> &src)
{
using copier = internal::cuda_copier;
copier::copy<copier::d2h>(dst.data(), src.data(), src.data_size());
}

template <typename R, typename S>
void copy(const basic_tensor<R, S, cuda_memory, readwrite> &dst,
const basic_tensor<R, S, host_memory, readonly> &src)
{
using copier = internal::cuda_copier;
copier::copy<copier::h2d>(dst.data(), src.data(), src.data_size());
}
} // namespace experimental
} // namespace internal
} // namespace ttl
1 change: 0 additions & 1 deletion include/ttl/bits/std_range.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,5 @@ basic_integer_range<N> range(N m, N n)
{
return basic_integer_range<N>(m, n);
}

} // namespace internal
} // namespace ttl
30 changes: 10 additions & 20 deletions include/ttl/bits/std_tensor_mixin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ class basic_scalar_mixin
using data_ref = typename trait::ref_type;
using data_t = typename trait::Data;

using Dim = typename S::dimension_type;

data_t data_;

protected:
Expand All @@ -33,23 +35,17 @@ class basic_scalar_mixin

basic_scalar_mixin(data_ptr data, const S &) : data_(data) {}

constexpr Dim size() const { return 1; }

constexpr auto dims() const { return S().dims(); }

constexpr size_t data_size() const { return sizeof(R); }

data_ptr data() const { return data_.get(); }

data_ptr data_end() const { return data_.get() + 1; }

S shape() const { return S(); }

void from_host(const void *data) const
{
basic_copier<D, host_memory>()(data_.get(), data, data_size());
}

void to_host(void *data) const
{
basic_copier<host_memory, D>()(data, data_.get(), data_size());
}
};

template <typename R, typename S, typename D, typename A>
Expand Down Expand Up @@ -121,6 +117,10 @@ class basic_tensor_mixin

static constexpr auto rank = S::rank;

Dim size() const { return shape_.size(); }

const auto &dims() const { return shape_.dims(); }

size_t data_size() const { return shape_.size() * sizeof(R); }

const S &shape() const { return shape_; }
Expand Down Expand Up @@ -158,16 +158,6 @@ class basic_tensor_mixin
return slice_type(data_.get() + i * sub_shape.size(),
batch(j - i, sub_shape));
}

void from_host(const void *data) const
{
basic_copier<D, host_memory>()(data_.get(), data, data_size());
}

void to_host(void *data) const
{
basic_copier<host_memory, D>()(data, data_.get(), data_size());
}
};
} // namespace internal
} // namespace ttl
8 changes: 8 additions & 0 deletions include/ttl/experimental/copy
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// # -*- mode: c++ -*-
#pragma once
#include <ttl/bits/std_copy.hpp>

namespace ttl
{
using internal::experimental::copy;
} // namespace ttl
6 changes: 3 additions & 3 deletions include/ttl/range
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@
#include <cstdint>

#include <ttl/bits/std_range.hpp>
#include <ttl/bits/std_tensor_fwd.hpp>

namespace ttl
{
using internal::range;

using rank_t = uint8_t;

// FIXME: make T less generic
template <rank_t r, typename T> auto range(const T &t)
template <rank_t r, typename R, typename S, typename D, typename A>
auto range(const internal::basic_tensor<R, S, D, A> &t)
{
return range(std::get<r>(t.shape().dims()));
}

} // namespace ttl
8 changes: 5 additions & 3 deletions tests/bench_cuda_tensor.cpp
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
#include "benchmark.hpp"

#include <ttl/cuda_tensor>
#include <ttl/experimental/copy>

template <typename R, int n> struct bench_cuda_tensor {
template <typename R, int n>
struct bench_cuda_tensor {
static void run(benchmark::State &state)
{
ttl::cuda_tensor<R, 1> m1(n);
ttl::tensor<R, 1> m2(n);

for (auto _ : state) {
m1.from_host(m2.data());
m1.to_host(m2.data());
ttl::copy(ttl::ref(m1), ttl::view(m2));
ttl::copy(ttl::ref(m2), ttl::view(m1));
}
}
};
Expand Down
35 changes: 35 additions & 0 deletions tests/test_copy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#include "testing.hpp"

#include <ttl/algorithm>
#include <ttl/cuda_tensor>
#include <ttl/device>
#include <ttl/experimental/copy>
#include <ttl/range>
#include <ttl/tensor>

void test_copy(int n)
{
ttl::tensor<int, 1> x_host(n);
ttl::cuda_tensor<int, 1> x_cuda(n);

ttl::fill(ttl::ref(x_host), 1);
ttl::copy(ttl::ref(x_cuda), ttl::view(x_host));

ttl::fill(ttl::ref(x_host), 2);
for (auto i : ttl::range<0>(x_host)) { ASSERT_EQ(x_host.data()[i], 2); }

ttl::copy(ttl::ref(x_host), ttl::view(x_cuda));
for (auto i : ttl::range<0>(x_host)) { ASSERT_EQ(x_host.data()[i], 1); }
}

TEST(copy_test, test_copy)
{
test_copy(1);
test_copy(2);
test_copy(10);
test_copy(100);
test_copy(1000);
test_copy(1 << 20);
test_copy(1 << 20);
test_copy(1 << 20);
}
30 changes: 17 additions & 13 deletions tests/test_cuda_tensor.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "testing.hpp"

#include <ttl/cuda_tensor>
#include <ttl/experimental/copy>
#include <ttl/range>
#include <ttl/tensor>

Expand All @@ -23,11 +24,10 @@ TEST(cuda_tensor_test, test0)
{
using R = float;
cuda_tensor<R, 0> m0;

tensor<R, 0> x;

m0.from_host(x.data());
m0.to_host(x.data());
ttl::copy(ttl::ref(m0), ttl::view(x));
ttl::copy(ttl::ref(x), ttl::view(m0));
}

TEST(cuda_tensor_test, test1)
Expand All @@ -42,8 +42,8 @@ TEST(cuda_tensor_test, test2)
cuda_tensor<R, 2> m1(10, 100);
tensor<R, 2> m2(10, 100);

m1.from_host(m2.data());
m1.to_host(m2.data());
ttl::copy(ttl::ref(m1), ttl::view(m2));
ttl::copy(ttl::ref(m2), ttl::view(m1));

m1.slice(1, 2);
auto r = ref(m1);
Expand All @@ -58,14 +58,16 @@ TEST(cuda_tensor_test, test_3)
cuda_tensor<R, 2> m1(ttl::make_shape(10, 100));
}

template <typename R, uint8_t r> void test_auto_ref()
template <typename R, uint8_t r>
void test_auto_ref()
{
static_assert(
std::is_convertible<cuda_tensor<R, r>, cuda_tensor_ref<R, r>>::value,
"can't convert to ref");
}

template <typename R, uint8_t r> void test_auto_view()
template <typename R, uint8_t r>
void test_auto_view()
{
static_assert(
std::is_convertible<cuda_tensor<R, r>, cuda_tensor_view<R, r>>::value,
Expand All @@ -87,28 +89,30 @@ TEST(cuda_tensor_test, test_convert)
test_auto_view<int, 2>();
}

template <typename R, uint8_t r> void test_copy(const ttl::shape<r> &shape)
template <typename R, uint8_t r>
void test_copy(const ttl::shape<r> &shape)
{
tensor<R, r> x(shape);
cuda_tensor<R, r> y(shape);
tensor<R, r> z(shape);

std::iota(x.data(), x.data_end(), 1);
y.from_host(x.data());
y.to_host(z.data());

ttl::copy(ttl::ref(y), ttl::view(x));
ttl::copy(ttl::ref(z), ttl::view(y));

for (auto i : ttl::range(shape.size())) {
ASSERT_EQ(x.data()[i], z.data()[i]);
}

{
cuda_tensor_ref<R, r> ry = ref(y);
ry.from_host(x.data());
ry.to_host(x.data());
ttl::copy(ry, ttl::view(x));
ttl::copy(ttl::ref(z), ttl::view(ry));
}
{
cuda_tensor_view<R, r> vy = view(y);
vy.to_host(x.data());
ttl::copy(ttl::ref(x), vy);
}
}

Expand Down
14 changes: 14 additions & 0 deletions tests/test_public_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,16 @@ ttl::shape<r> unit_shape()
return ttl::shape<r>(dims);
}

template <typename T>
void test_public_apis(const T &t)
{
const auto size = t.size();
ASSERT_EQ(size, static_cast<decltype(size)>(1));

const auto dims = t.dims();
static_assert(dims.size() == T::rank, "");
}

template <ttl::rank_t r>
struct test_ranked_type {
template <typename R>
Expand All @@ -65,6 +75,10 @@ struct test_ranked_type {
Tensor t(unit_shape<r>());
TensorRef tr(t);
TensorView tv(t);

test_public_apis(t);
test_public_apis(tr);
test_public_apis(tv);
}
};

Expand Down
Loading