Skip to content

Commit cb4f492

Browse files
authored
Merge pull request #1 from PaddlePaddle/develop
update
2 parents 37fd49d + 97dcf6e commit cb4f492

File tree

177 files changed

+20504
-757
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

177 files changed

+20504
-757
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.vscode/

.gitmodules

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
1-
[submodule "external/Paddle"]
2-
path = external/Paddle
3-
url = https://github.com/PaddlePaddle/Paddle
41
[submodule "external/book"]
52
path = external/book
63
url = https://github.com/PaddlePaddle/book
4+
[submodule "external/Anakin"]
5+
path = external/Anakin
6+
url = https://github.com/PaddlePaddle/Anakin
7+
[submodule "external/paddle-mobile"]
8+
path = external/paddle-mobile
9+
url = https://github.com/PaddlePaddle/paddle-mobile
10+
[submodule "external/Paddle"]
11+
path = external/Paddle
12+
url = https://github.com/PaddlePaddle/Paddle
713
[submodule "external/models"]
814
path = external/models
915
url = https://github.com/PaddlePaddle/models
File renamed without changes.
File renamed without changes.

doc/fluid/advanced_usage/deploy/anakin_gpu_benchmark.md renamed to doc/fluid/advanced_usage/deploy/anakin/anakin_gpu_benchmark.md

Lines changed: 49 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -30,21 +30,19 @@
3030

3131
| BatchSize | TensorRT | Anakin |
3232
| --- | --- | --- |
33-
| 1 | 8.8690 | 8.2815 |
34-
| 2 | 15.5344 | 13.9116 |
35-
| 4 | 26.6000 | 21.8747 |
36-
| 8 | 49.8279 | 40.4076 |
37-
| 32 | 188.6270 | 163.7660 |
33+
| 1 | 8.53945 | 8.18737 |
34+
| 2 | 14.2269 | 13.8976 |
35+
| 4 | 24.2803 | 21.7976 |
36+
| 8 | 45.6003 | 40.319 |
3837

3938
- GPU Memory Used (`MB`)
4039

4140
| BatchSize | TensorRT | Anakin |
4241
| --- | --- | --- |
43-
| 1 | 963 | 997 |
44-
| 2 | 965 | 1039 |
45-
| 4 | 991 | 1115 |
46-
| 8 | 1067 | 1269 |
47-
| 32 | 1715 | 2193 |
42+
| 1 | 1053.88 | 762.73 |
43+
| 2 | 1055.71 | 762.41 |
44+
| 4 | 1003.22 | 832.75 |
45+
| 8 | 1108.77 | 926.9 |
4846

4947

5048
### <span id = '2'>Yolo </span>
@@ -53,109 +51,100 @@
5351

5452
| BatchSize | TensorRT | Anakin |
5553
| --- | --- | --- |
56-
| 1 | 16.4596| 15.2124 |
57-
| 2 | 26.6347| 25.0442 |
58-
| 4 | 43.3695| 43.5017 |
59-
| 8 | 80.9139 | 80.9880 |
60-
| 32 | 293.8080| 310.8810 |
54+
| 1 | 8.41606| 7.07977 |
55+
| 2 | 16.6588| 15.2216 |
56+
| 4 | 31.9955| 30.5102 |
57+
| 8 | 66.1107 | 64.3658 |
6158

6259
- GPU Memory Used (`MB`)
6360

6461
| BatchSize | TensorRT | Anakin |
6562
| --- | --- | --- |
66-
| 1 | 1569 | 1775 |
67-
| 2 | 1649 | 1815 |
68-
| 4 | 1709 | 1887 |
69-
| 8 | 1731 | 2031 |
70-
| 32 | 2253 | 2907 |
63+
| 1 | 1054.71 | 299.8 |
64+
| 2 | 951.51 | 347.47 |
65+
| 4 | 846.9 | 438.47 |
66+
| 8 | 1042.31 | 515.15 |
7167

7268
### <span id = '3'> Resnet50 </span>
7369

7470
- Latency (`ms`) of different batch
7571

7672
| BatchSize | TensorRT | Anakin |
7773
| --- | --- | --- |
78-
| 1 | 4.2459 | 4.1061 |
79-
| 2 | 6.2627 | 6.5159 |
80-
| 4 | 10.1277 | 11.3327 |
81-
| 8 | 17.8209 | 20.6680 |
82-
| 32 | 65.8582 | 77.8858 |
74+
| 1 | 4.10063 | 3.33845 |
75+
| 2 | 6.10941 | 5.54814 |
76+
| 4 | 9.90233 | 10.2763 |
77+
| 8 | 17.3287 | 20.0783 |
8378

8479
- GPU Memory Used (`MB`)
8580

8681
| BatchSize | TensorRT | Anakin |
8782
| --- | --- | --- |
88-
| 1 | 531 | 503 |
89-
| 2 | 543 | 517 |
90-
| 4 | 583 | 541 |
91-
| 8 | 611 | 589 |
92-
| 32 | 809 | 879 |
83+
| 1 | 1059.15 | 299.86 |
84+
| 2 | 1077.8 | 340.78 |
85+
| 4 | 903.04 | 395 |
86+
| 8 | 832.53 | 508.86 |
9387

9488
### <span id = '4'> Resnet101 </span>
9589

9690
- Latency (`ms`) of different batch
9791

9892
| BatchSize | TensorRT | Anakin |
9993
| --- | --- | --- |
100-
| 1 | 7.5562 | 7.0837 |
101-
| 2 | 11.6023 | 11.4079 |
102-
| 4 | 18.3650 | 20.0493 |
103-
| 8 | 32.7632 | 36.0648 |
104-
| 32 | 123.2550 | 135.4880 |
94+
| 1 | 7.29828 | 5.672 |
95+
| 2 | 11.2037 | 9.42352 |
96+
| 4 | 17.9306 | 18.0936 |
97+
| 8 | 31.4804 | 35.7439 |
10598

10699
- GPU Memory Used (`MB)`
107100

108101
| BatchSize | TensorRT | Anakin |
109102
| --- | --- | --- |
110-
| 1 | 701 | 683 |
111-
| 2 | 713 | 697 |
112-
| 4 | 793 | 721 |
113-
| 8 | 819 | 769 |
114-
| 32 | 1043 | 1059 |
103+
| 1 | 1161.94 | 429.22 |
104+
| 2 | 1190.92 | 531.92 |
105+
| 4 | 994.11 | 549.7 |
106+
| 8 | 945.47 | 653.06 |
115107

116108
### <span id = '5'> MobileNet V1 </span>
117109

118110
- Latency (`ms`) of different batch
119111

120112
| BatchSize | TensorRT | Anakin |
121113
| --- | --- | --- |
122-
| 1 | 45.5156 | 1.3947 |
123-
| 2 | 46.5585 | 2.5483 |
124-
| 4 | 48.4242 | 4.3404 |
125-
| 8 | 52.7957 | 8.1513 |
126-
| 32 | 83.2519 | 31.3178 |
114+
| 1 | 1.52692 | 1.39282 |
115+
| 2 | 1.98091 | 2.05788 |
116+
| 4 | 3.2705 | 4.03476 |
117+
| 8 | 5.15652 | 7.06651 |
127118

128119
- GPU Memory Used (`MB`)
129120

130121
| BatchSize | TensorRT | Anakin |
131122
| --- | --- | --- |
132-
| 1 | 329 | 283 |
133-
| 2 | 345 | 289 |
134-
| 4 | 371 | 299 |
135-
| 8 | 393 | 319 |
136-
| 32 | 531 | 433 |
123+
| 1 | 1144.35 | 99.6 |
124+
| 2 | 1160.03 | 199.75 |
125+
| 4 | 1098 | 184.33 |
126+
| 8 | 990.71 | 232.11 |
137127

138128
### <span id = '6'> MobileNet V2</span>
139129

140130
- Latency (`ms`) of different batch
141131

142132
| BatchSize | TensorRT | Anakin |
143133
| --- | --- | --- |
144-
| 1 | 65.6861 | 2.9842 |
145-
| 2 | 66.6814 | 4.7472 |
146-
| 4 | 69.7114 | 7.4163 |
147-
| 8 | 76.1092 | 12.8779 |
148-
| 32 | 124.9810 | 47.2142 |
134+
| 1 | 1.95961 | 1.78249 |
135+
| 2 | 2.8709 | 3.01144 |
136+
| 4 | 4.46131 | 5.43946 |
137+
| 8 | 7.161 | 10.2081 |
149138

150139
- GPU Memory Used (`MB`)
151140

152141
| BatchSize | TensorRT | Anakin |
153142
| --- | --- | --- |
154-
| 1 | 341 | 293 |
155-
| 2 | 353 | 301 |
156-
| 4 | 385 | 319 |
157-
| 8 | 421 | 351 |
158-
| 32 | 637 | 551 |
143+
| 1 | 1154.69 | 195.25 |
144+
| 2 | 1187.25 | 227.6 |
145+
| 4 | 1053 | 241.75 |
146+
| 8 | 1062.48 | 352.18 |
147+
159148

160149
## How to run those Benchmark models
161150

File renamed without changes.
File renamed without changes.

doc/fluid/advanced_usage/deploy/anakin_tutorial.md renamed to doc/fluid/advanced_usage/deploy/anakin/anakin_tutorial.md

Lines changed: 38 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -114,64 +114,67 @@ Anakin中数据类型与基本数据类型的对应如下:
114114
115115
理论上,Anakin支持申明1维以上的tensor,但是对于Anakin中的Op来说,只支持NW、NHW、NCHW、NCHW_C4这四种LayOut,其中NCHW是默认的LayOuteType,NCHW_C4是专门针对于int8这种数据类型的。
116116
117-
**例子:**
117+
例子
118118
119-
下面的代码将展示如何使用tensor, 我们建议先看看这些示例。
119+
下面的代码将展示如何使用tensor, 我们建议先看看这些示例。
120120
121-
要想获得更多关于tensor的信息, 请参考 *soure_path/core/tensor.h*
121+
要想获得更多关于tensor的信息, 请参考 *soure_path/core/tensor.h*
122122
123-
1. 使用shape对象初始化tensor
123+
> 1. 使用shape对象初始化tensor
124124
125-
```cpp
126-
//create a null tensor. A null tensor holds for nothing.
127-
//tensor's buffer is resident at CPU and its datatype is AK_FLOAT.
128-
//tensor's Layout is NCHW(default)
129-
Tensor<X86, AK_FLOAT> mytensor;
125+
```c++
126+
//create a null tensor. A null tensor holds for nothing.
127+
//tensor's buffer is resident at CPU and its datatype is AK_FLOAT.
128+
//tensor's Layout is NCHW(default)
129+
Tensor<X86, AK_FLOAT> mytensor;
130130
131-
//1. using shape object to create a tensor.
132-
Shape shape1(NUM); //1-D shape. NUM is the number of dimention.
133-
Tensor<X86, AK_FLOAT, W> mytensor1(shape1); //1-D tensor.
131+
//1. using shape object to create a tensor.
132+
Shape shape1(NUM); //1-D shape. NUM is the number of dimention.
133+
Tensor<X86, AK_FLOAT, W> mytensor1(shape1); //1-D tensor.
134134
135-
// A 4-D shape
136-
Shape shape2(N, C, H, W); // batch x channel x height x width
135+
// A 4-D shape
136+
Shape shape2(N, C, H, W); // batch x channel x height x width
137137
```
138138
139-
`注意:Shape的维度必须和tensor的`[LayoutType](#layout)`相同,比如Shape(N,C,H,W), 那么Tensor的 LayoutType必须是NCHW,否则会出错。如下列代码所示`
139+
>`注意:Shape的维度必须和tensor的`[LayoutType](#layout)`相同,比如Shape(N,C,H,W), 那么Tensor的 LayoutType必须是NCHW,否则会出错。如下列代码所示`
140140
141141
```c++
142-
// A 4-D tensor.
143-
Tensor<X86, AK_FLOAT> mytensor2(shape2); //right
142+
// A 4-D tensor.
143+
Tensor<X86, AK_FLOAT> mytensor2(shape2); //right
144+
145+
//A 4-D tensor which is resident at GPU and its datatype is AK_INT8
146+
Tensor<NV, AK_INT8> mytensor3(shape2); //right
144147
145-
//A 4-D tensor which is resident at GPU and its datatype is AK_INT8
146-
Tensor<NV, AK_INT8> mytensor3(shape2); //right
148+
Tensor<X86, AK_FLOAT, NHW> mytensor4(shape2); //wrong!! shape's dimetion must be equal to tensor's Layout.
149+
Tensor<NV, AK_FLOAT, NCHW_C4> mytensor5(shape2); //wrong!!!!
147150
148-
Tensor<X86, AK_FLOAT, NHW> mytensor4(shape2); //wrong!! shape's dimetion must be equal to tensor's Layout.
149-
Tensor<NV, AK_FLOAT, NCHW_C4> mytensor5(shape2); //wrong!!!!
150151
```
151152
152-
2. 使用现有的数据和shape初始化tensor
153+
> 2. 使用现有的数据和shape初始化tensor
153154
154155
```c++
155-
/**
156-
* A construtor of Tensor.
157-
* data_ptr is a pointer to any data type of data
158-
* TargetType is type of a platform [Anakin TargetType]
159-
* id : device id
160-
* shape: a Anakin shape
161-
*/
162-
Tensor(Dtype* data_ptr, TargetType_t target, int id, Shape shape);
163156
164-
//using existing data feed to a tensor
165-
Tensor<X86, AK_FLOAT> mytensor(data_ptr, TargetType, device_id, shape); //shape must has dimention (N, C, H, W).
157+
/**
158+
* A construtor of Tensor.
159+
* data_ptr is a pointer to any data type of data
160+
* TargetType is type of a platform [Anakin TargetType]
161+
* id : device id
162+
* shape: a Anakin shape
163+
*/
164+
Tensor(Dtype* data_ptr, TargetType_t target, int id, Shape shape);
165+
166+
//using existing data feed to a tensor
167+
Tensor<X86, AK_FLOAT> mytensor(data_ptr, TargetType, device_id, shape); //shape must has dimention (N, C, H, W).
168+
166169
```
167170
168-
3. 使用tensor初始化tensor
171+
> 3. 使用tensor初始化tensor
169172
170173
```c++
171-
Tensor<NV, AK_FLOAT> tensor(exist_tensor);
174+
Tensor<NV, AK_FLOAT> tensor(exist_tensor);
172175
```
173176
174-
>提示: 你可以用` typedef Tensor<X86, AK_FLOAT> Tensor4d_X86 `方便定义tensor
177+
> 提示: 你可以用` typedef Tensor<X86, AK_FLOAT> Tensor4d_X86 `方便定义tensor
175178
176179
#### 填充tensor数据区
177180
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)