Skip to content

Commit f3d26d6

Browse files
authored
dev-0.6.0 (#78)
* INSTALL * make ttl::range less generic * ttl::copy * --with-cuda * add .size() and .dims() to tensor types (#79) * add size method to tensor * dims() * deprecate from_host, to_host (#80) * support customize install prefix
1 parent c0c6ffb commit f3d26d6

File tree

13 files changed

+150
-52
lines changed

13 files changed

+150
-52
lines changed

CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ OPTION(BUILD_EXAMPLES "Build examples." OFF)
2727
OPTION(HAVE_CUDA "Have cuda_runtime.h." OFF)
2828

2929
IF(HAVE_CUDA)
30-
# noop
30+
INCLUDE_DIRECTORIES(${CUDA_HOME}/include)
31+
LINK_DIRECTORIES(${CUDA_HOME}/lib64)
3132
ELSE()
3233
ADD_DEFINITIONS(-DUSE_FAKE_CUDA_RUNTIME)
3334
ENDIF()

INSTALL

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/bin/sh
2+
set -e
3+
4+
if [ -z $PREFIX ]; then
5+
PREFIX=$HOME/local
6+
fi
7+
8+
./configure --prefix=$PREFIX
9+
10+
make install

configure

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
set -e
33

44
PREFIX=$(pwd)/local
5+
CUDA_HOME=/usr/local/cuda
56
USE_OPENCV=0
67
BUILD_TESTS=0
78
BUILD_BENCHMARKS=0
@@ -11,10 +12,6 @@ BUILD_GBENCH=0
1112
HAVE_CUDA=0
1213
VERBOSE=0
1314

14-
if [ $(find /usr/include/cuda_runtime.h | wc -l) -gt 0 ]; then
15-
HAVE_CUDA=1
16-
fi
17-
1815
parse_args() {
1916
for i in "$@"; do
2017
case $i in
@@ -48,6 +45,10 @@ parse_args() {
4845
--build-gbench)
4946
BUILD_GBENCH=1
5047
;;
48+
--with-cuda=*)
49+
CUDA_HOME="${i#*=}"
50+
echo "configure --with-cuda=$CUDA_HOME"
51+
;;
5152
--verbose)
5253
VERBOSE=1
5354
;;
@@ -57,6 +58,10 @@ parse_args() {
5758
;;
5859
esac
5960
done
61+
62+
if [ -f $CUDA_HOME/include/cuda_runtime.h ]; then
63+
HAVE_CUDA=1
64+
fi
6065
}
6166

6267
CMAKE_FLAGS=
@@ -96,7 +101,11 @@ add_cmake_flags() {
96101
add_cmake_flag BUILD_TESTS ${BUILD_TESTS}
97102
add_cmake_flag BUILD_BENCHMARKS ${BUILD_BENCHMARKS}
98103
add_cmake_flag BUILD_EXAMPLES ${BUILD_EXAMPLES}
99-
add_cmake_flag HAVE_CUDA ${HAVE_CUDA}
104+
105+
if [ ${HAVE_CUDA} -eq 1 ]; then
106+
add_cmake_flag HAVE_CUDA ${HAVE_CUDA}
107+
add_cmake_flag CUDA_HOME $CUDA_HOME
108+
fi
100109

101110
if [ ${BUILD_EXAMPLES} -eq 1 ]; then
102111
add_cmake_flag USE_OPENCV ${USE_OPENCV}

include/ttl/bits/std_copy.hpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#pragma once
2+
#include <ttl/bits/std_cuda_allocator.hpp>
3+
#include <ttl/bits/std_tensor.hpp>
4+
5+
namespace ttl
6+
{
7+
namespace internal
8+
{
9+
namespace experimental
10+
{
11+
template <typename R, typename S>
12+
void copy(const basic_tensor<R, S, host_memory, readwrite> &dst,
13+
const basic_tensor<R, S, cuda_memory, readonly> &src)
14+
{
15+
using copier = internal::cuda_copier;
16+
copier::copy<copier::d2h>(dst.data(), src.data(), src.data_size());
17+
}
18+
19+
template <typename R, typename S>
20+
void copy(const basic_tensor<R, S, cuda_memory, readwrite> &dst,
21+
const basic_tensor<R, S, host_memory, readonly> &src)
22+
{
23+
using copier = internal::cuda_copier;
24+
copier::copy<copier::h2d>(dst.data(), src.data(), src.data_size());
25+
}
26+
} // namespace experimental
27+
} // namespace internal
28+
} // namespace ttl

include/ttl/bits/std_range.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,5 @@ basic_integer_range<N> range(N m, N n)
4545
{
4646
return basic_integer_range<N>(m, n);
4747
}
48-
4948
} // namespace internal
5049
} // namespace ttl

include/ttl/bits/std_tensor_mixin.hpp

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ class basic_scalar_mixin
1515
using data_ref = typename trait::ref_type;
1616
using data_t = typename trait::Data;
1717

18+
using Dim = typename S::dimension_type;
19+
1820
data_t data_;
1921

2022
protected:
@@ -33,23 +35,17 @@ class basic_scalar_mixin
3335

3436
basic_scalar_mixin(data_ptr data, const S &) : data_(data) {}
3537

38+
constexpr Dim size() const { return 1; }
39+
40+
constexpr auto dims() const { return S().dims(); }
41+
3642
constexpr size_t data_size() const { return sizeof(R); }
3743

3844
data_ptr data() const { return data_.get(); }
3945

4046
data_ptr data_end() const { return data_.get() + 1; }
4147

4248
S shape() const { return S(); }
43-
44-
void from_host(const void *data) const
45-
{
46-
basic_copier<D, host_memory>()(data_.get(), data, data_size());
47-
}
48-
49-
void to_host(void *data) const
50-
{
51-
basic_copier<host_memory, D>()(data, data_.get(), data_size());
52-
}
5349
};
5450

5551
template <typename R, typename S, typename D, typename A>
@@ -121,6 +117,10 @@ class basic_tensor_mixin
121117

122118
static constexpr auto rank = S::rank;
123119

120+
Dim size() const { return shape_.size(); }
121+
122+
const auto &dims() const { return shape_.dims(); }
123+
124124
size_t data_size() const { return shape_.size() * sizeof(R); }
125125

126126
const S &shape() const { return shape_; }
@@ -158,16 +158,6 @@ class basic_tensor_mixin
158158
return slice_type(data_.get() + i * sub_shape.size(),
159159
batch(j - i, sub_shape));
160160
}
161-
162-
void from_host(const void *data) const
163-
{
164-
basic_copier<D, host_memory>()(data_.get(), data, data_size());
165-
}
166-
167-
void to_host(void *data) const
168-
{
169-
basic_copier<host_memory, D>()(data, data_.get(), data_size());
170-
}
171161
};
172162
} // namespace internal
173163
} // namespace ttl

include/ttl/experimental/copy

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// # -*- mode: c++ -*-
2+
#pragma once
3+
#include <ttl/bits/std_copy.hpp>
4+
5+
namespace ttl
6+
{
7+
using internal::experimental::copy;
8+
} // namespace ttl

include/ttl/range

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,17 @@
44
#include <cstdint>
55

66
#include <ttl/bits/std_range.hpp>
7+
#include <ttl/bits/std_tensor_fwd.hpp>
78

89
namespace ttl
910
{
1011
using internal::range;
1112

1213
using rank_t = uint8_t;
1314

14-
// FIXME: make T less generic
15-
template <rank_t r, typename T> auto range(const T &t)
15+
template <rank_t r, typename R, typename S, typename D, typename A>
16+
auto range(const internal::basic_tensor<R, S, D, A> &t)
1617
{
1718
return range(std::get<r>(t.shape().dims()));
1819
}
19-
2020
} // namespace ttl

tests/bench_cuda_tensor.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
#include "benchmark.hpp"
22

33
#include <ttl/cuda_tensor>
4+
#include <ttl/experimental/copy>
45

5-
template <typename R, int n> struct bench_cuda_tensor {
6+
template <typename R, int n>
7+
struct bench_cuda_tensor {
68
static void run(benchmark::State &state)
79
{
810
ttl::cuda_tensor<R, 1> m1(n);
911
ttl::tensor<R, 1> m2(n);
1012

1113
for (auto _ : state) {
12-
m1.from_host(m2.data());
13-
m1.to_host(m2.data());
14+
ttl::copy(ttl::ref(m1), ttl::view(m2));
15+
ttl::copy(ttl::ref(m2), ttl::view(m1));
1416
}
1517
}
1618
};

tests/test_copy.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#include "testing.hpp"
2+
3+
#include <ttl/algorithm>
4+
#include <ttl/cuda_tensor>
5+
#include <ttl/device>
6+
#include <ttl/experimental/copy>
7+
#include <ttl/range>
8+
#include <ttl/tensor>
9+
10+
void test_copy(int n)
11+
{
12+
ttl::tensor<int, 1> x_host(n);
13+
ttl::cuda_tensor<int, 1> x_cuda(n);
14+
15+
ttl::fill(ttl::ref(x_host), 1);
16+
ttl::copy(ttl::ref(x_cuda), ttl::view(x_host));
17+
18+
ttl::fill(ttl::ref(x_host), 2);
19+
for (auto i : ttl::range<0>(x_host)) { ASSERT_EQ(x_host.data()[i], 2); }
20+
21+
ttl::copy(ttl::ref(x_host), ttl::view(x_cuda));
22+
for (auto i : ttl::range<0>(x_host)) { ASSERT_EQ(x_host.data()[i], 1); }
23+
}
24+
25+
TEST(copy_test, test_copy)
26+
{
27+
test_copy(1);
28+
test_copy(2);
29+
test_copy(10);
30+
test_copy(100);
31+
test_copy(1000);
32+
test_copy(1 << 20);
33+
test_copy(1 << 20);
34+
test_copy(1 << 20);
35+
}

0 commit comments

Comments
 (0)