PaddlePaddle
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.gitmodules‎
Lines changed: 9 additions & 3 deletions b/‎.gitmodules‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎doc/fluid/advanced_usage/deploy/anakin_arm_benchmark.md‎ renamed to ‎doc/fluid/advanced_usage/deploy/anakin/anakin_arm_benchmark.md‎ b/‎doc/fluid/advanced_usage/deploy/anakin_arm_benchmark.md‎ renamed to ‎doc/fluid/advanced_usage/deploy/anakin/anakin_arm_benchmark.md‎
diff --git a/‎doc/fluid/advanced_usage/deploy/anakin_example.md‎ renamed to ‎doc/fluid/advanced_usage/deploy/anakin/anakin_example.md‎ b/‎doc/fluid/advanced_usage/deploy/anakin_example.md‎ renamed to ‎doc/fluid/advanced_usage/deploy/anakin/anakin_example.md‎
diff --git a/‎doc/fluid/advanced_usage/deploy/anakin_gpu_benchmark.md‎ renamed to ‎doc/fluid/advanced_usage/deploy/anakin/anakin_gpu_benchmark.md‎
Lines changed: 49 additions & 60 deletions b/‎doc/fluid/advanced_usage/deploy/anakin_gpu_benchmark.md‎ renamed to ‎doc/fluid/advanced_usage/deploy/anakin/anakin_gpu_benchmark.md‎
Lines changed: 49 additions & 60 deletions
diff --git a/‎doc/fluid/advanced_usage/deploy/anakin_parser_design.md‎ renamed to ‎doc/fluid/advanced_usage/deploy/anakin/anakin_parser_design.md‎ b/‎doc/fluid/advanced_usage/deploy/anakin_parser_design.md‎ renamed to ‎doc/fluid/advanced_usage/deploy/anakin/anakin_parser_design.md‎
diff --git a/‎doc/fluid/advanced_usage/deploy/anakin_run_on_arm.md‎ renamed to ‎doc/fluid/advanced_usage/deploy/anakin/anakin_run_on_arm.md‎ b/‎doc/fluid/advanced_usage/deploy/anakin_run_on_arm.md‎ renamed to ‎doc/fluid/advanced_usage/deploy/anakin/anakin_run_on_arm.md‎
diff --git a/‎doc/fluid/advanced_usage/deploy/anakin_tutorial.md‎ renamed to ‎doc/fluid/advanced_usage/deploy/anakin/anakin_tutorial.md‎
Lines changed: 38 additions & 35 deletions b/‎doc/fluid/advanced_usage/deploy/anakin_tutorial.md‎ renamed to ‎doc/fluid/advanced_usage/deploy/anakin/anakin_tutorial.md‎
Lines changed: 38 additions & 35 deletions
diff --git a/‎doc/fluid/advanced_usage/deploy/convert_paddle_to_anakin.md‎ renamed to ‎doc/fluid/advanced_usage/deploy/anakin/convert_paddle_to_anakin.md‎ b/‎doc/fluid/advanced_usage/deploy/convert_paddle_to_anakin.md‎ renamed to ‎doc/fluid/advanced_usage/deploy/anakin/convert_paddle_to_anakin.md‎
diff --git a/‎doc/fluid/advanced_usage/deploy/how_to_add_anakin_op.md‎ renamed to ‎doc/fluid/advanced_usage/deploy/anakin/how_to_add_anakin_op.md‎ b/‎doc/fluid/advanced_usage/deploy/how_to_add_anakin_op.md‎ renamed to ‎doc/fluid/advanced_usage/deploy/anakin/how_to_add_anakin_op.md‎
@@ -0,0 +1 @@
+.vscode/
@@ -1,9 +1,15 @@
-[submodule "external/Paddle"]
-	path = external/Paddle
-	url = https://github.com/PaddlePaddle/Paddle
 [submodule "external/book"]
 	path = external/book
 	url = https://github.com/PaddlePaddle/book
+[submodule "external/Anakin"]
+	path = external/Anakin
+	url = https://github.com/PaddlePaddle/Anakin
+[submodule "external/paddle-mobile"]
+	path = external/paddle-mobile
+	url = https://github.com/PaddlePaddle/paddle-mobile
+[submodule "external/Paddle"]
+	path = external/Paddle
+	url = https://github.com/PaddlePaddle/Paddle
 [submodule "external/models"]
 	path = external/models
 	url = https://github.com/PaddlePaddle/models
@@ -30,21 +30,19 @@
 
 | BatchSize | TensorRT | Anakin |
 | --- | --- | --- |
-| 1 | 8.8690 | 8.2815 |
-| 2 | 15.5344 | 13.9116 |
-| 4 | 26.6000 | 21.8747 |
-| 8 | 49.8279 | 40.4076 |
-| 32 | 188.6270 | 163.7660 |
+| 1 | 8.53945 | 8.18737 |
+| 2 | 14.2269 | 13.8976 |
+| 4 | 24.2803 | 21.7976 |
+| 8 | 45.6003 | 40.319 |
 
 - GPU Memory Used (`MB`)
 
 | BatchSize | TensorRT | Anakin |
 | --- | --- | --- |
-| 1 | 963 | 997 |
-| 2 | 965 | 1039 |
-| 4 | 991 | 1115 |
-| 8 | 1067 | 1269 |
-| 32 | 1715 | 2193 |
+| 1 | 1053.88 | 762.73 |
+| 2 | 1055.71 | 762.41 |
+| 4 | 1003.22 | 832.75 |
+| 8 | 1108.77 | 926.9 |
 
 
 ### <span id = '2'>Yolo </span>
@@ -53,109 +51,100 @@
 
 | BatchSize | TensorRT | Anakin |
 | --- | --- | --- |
-| 1 | 16.4596| 15.2124 |
-| 2 | 26.6347| 25.0442 |
-| 4 | 43.3695| 43.5017 |
-| 8 | 80.9139 | 80.9880 |
-| 32 | 293.8080| 310.8810 |
+| 1 | 8.41606| 7.07977 |
+| 2 | 16.6588| 15.2216 |
+| 4 | 31.9955| 30.5102 |
+| 8 | 66.1107 | 64.3658 |
 
 - GPU Memory Used (`MB`)
 
 | BatchSize | TensorRT | Anakin |
 | --- | --- | --- |
-| 1 | 1569 | 1775 |
-| 2 | 1649 | 1815 |
-| 4 | 1709 | 1887 |
-| 8 | 1731 | 2031 |
-| 32 | 2253 | 2907 |
+| 1 | 1054.71  | 299.8 |
+| 2 | 951.51  | 347.47 |
+| 4 | 846.9  | 438.47 |
+| 8 | 1042.31  | 515.15 |
 
 ### <span id = '3'> Resnet50 </span>
 
 - Latency (`ms`) of different batch
 
 | BatchSize | TensorRT | Anakin |
 | --- | --- | --- |
-| 1 | 4.2459   |  4.1061 |
-| 2 |  6.2627  |  6.5159 |
-| 4 | 10.1277  | 11.3327 |
-| 8 | 17.8209  | 20.6680 |
-| 32 | 65.8582 | 77.8858 |
+| 1 | 4.10063  |  3.33845 |
+| 2 |  6.10941 |  5.54814 |
+| 4 | 9.90233  | 10.2763 |
+| 8 | 17.3287  |   20.0783 |
 
 - GPU Memory Used (`MB`)
 
 | BatchSize | TensorRT | Anakin |
 | --- | --- | --- |
-| 1 | 531  | 503 |
-| 2 | 543  | 517 |
-| 4 | 583 | 541 |
-| 8 | 611 | 589 |
-| 32 |  809 | 879 |
+| 1 | 1059.15 | 299.86 |
+| 2 | 1077.8  | 340.78 |
+| 4 | 903.04  | 395 |
+| 8 | 832.53  | 508.86 |
 
 ### <span id = '4'> Resnet101 </span>
 
 - Latency (`ms`) of different batch
 
 | BatchSize | TensorRT | Anakin |
 | --- | --- | --- |
-| 1 | 7.5562 | 7.0837 |
-| 2 | 11.6023 | 11.4079 |
-| 4 | 18.3650 | 20.0493 |
-| 8 | 32.7632 | 36.0648 |
-| 32 | 123.2550 | 135.4880 |
+| 1 | 7.29828 | 5.672 |
+| 2 | 11.2037 | 9.42352 |
+| 4 | 17.9306 | 18.0936 |
+| 8 | 31.4804 | 35.7439 |
 
 - GPU Memory Used (`MB)`
 
 | BatchSize | TensorRT | Anakin |
 | --- | --- | --- |
-| 1 | 701  | 683 |
-| 2 | 713  | 697 |
-| 4 | 793 | 721 |
-| 8 | 819 | 769 |
-| 32 | 1043 | 1059 |
+| 1 | 1161.94 | 429.22 |
+| 2 | 1190.92 | 531.92 |
+| 4 | 994.11  | 549.7 |
+| 8 | 945.47  | 653.06 |
 
 ###  <span id = '5'> MobileNet V1 </span>
 
 - Latency (`ms`) of different batch
 
 | BatchSize | TensorRT | Anakin |
 | --- | --- | --- |
-| 1 | 45.5156  |  1.3947 |
-| 2 |  46.5585  |  2.5483 |
-| 4 | 48.4242  | 4.3404 |
-| 8 |  52.7957 |  8.1513 |
-| 32 | 83.2519 | 31.3178 |
+| 1 | 1.52692  |  1.39282 |
+| 2 |  1.98091  |  2.05788 |
+| 4 | 3.2705  | 4.03476 |
+| 8 |  5.15652 |  7.06651 |
 
 - GPU Memory Used (`MB`)
 
 | BatchSize | TensorRT | Anakin |
 | --- | --- | --- |
-| 1 | 329  | 283 |
-| 2 | 345  | 289 |
-| 4 | 371 | 299 |
-| 8 | 393 | 319 |
-| 32 |  531 | 433 |
+| 1 | 1144.35   | 99.6 |
+| 2 | 1160.03    | 199.75 |
+| 4 | 1098  | 184.33 |
+| 8 | 990.71  | 232.11 |
 
 ###  <span id = '6'> MobileNet V2</span>
 
 - Latency (`ms`) of different batch
 
 | BatchSize | TensorRT | Anakin |
 | --- | --- | --- |
-| 1 | 65.6861 | 2.9842 |
-| 2 | 66.6814 | 4.7472 |
-| 4 | 69.7114 | 7.4163 |
-| 8 | 76.1092 | 12.8779 |
-| 32 | 124.9810 | 47.2142 |
+| 1 | 1.95961 | 1.78249 |
+| 2 | 2.8709 | 3.01144 |
+| 4 | 4.46131 | 5.43946 |
+| 8 | 7.161 | 10.2081 |
 
 - GPU Memory Used (`MB`)
 
 | BatchSize | TensorRT | Anakin |
 | --- | --- | --- |
-| 1 | 341 | 293 |
-| 2 | 353 | 301 |
-| 4 | 385 | 319 |
-| 8 | 421 | 351 |
-| 32 | 637 | 551 |
+| 1 | 1154.69 | 195.25 |
+| 2 | 1187.25 | 227.6 |
+| 4 | 1053 | 241.75 |
+| 8 | 1062.48 | 352.18 |
+
 
 ## How to run those Benchmark models
 
 
@@ -114,64 +114,67 @@ Anakin中数据类型与基本数据类型的对应如下:
 
   理论上，Anakin支持申明1维以上的tensor，但是对于Anakin中的Op来说，只支持NW、NHW、NCHW、NCHW_C4这四种LayOut，其中NCHW是默认的LayOuteType，NCHW_C4是专门针对于int8这种数据类型的。
 
-  **例子：**
+  例子
 
-下面的代码将展示如何使用tensor， 我们建议先看看这些示例。
+    下面的代码将展示如何使用tensor， 我们建议先看看这些示例。
 
-要想获得更多关于tensor的信息， 请参考 *soure_path/core/tensor.h*
+    要想获得更多关于tensor的信息， 请参考 *soure_path/core/tensor.h*
 
-1. 使用shape对象初始化tensor
+    > 1. 使用shape对象初始化tensor
 
-    ```cpp
-    //create a null tensor. A null tensor holds for nothing.
-    //tensor's buffer  is resident at CPU and its datatype is AK_FLOAT.
-    //tensor's Layout is NCHW(default)
-    Tensor<X86, AK_FLOAT> mytensor;
+    ```c++
+      //create a null tensor. A null tensor holds for nothing.
+      //tensor's buffer  is resident at CPU and its datatype is AK_FLOAT.
+      //tensor's Layout is NCHW(default)
+      Tensor<X86, AK_FLOAT> mytensor;
 
-    //1. using shape object to create a tensor.
-    Shape shape1(NUM); //1-D shape. NUM is the number of dimention.
-    Tensor<X86, AK_FLOAT, W> mytensor1(shape1); //1-D tensor.
+      //1. using shape object to create a tensor.
+      Shape shape1(NUM); //1-D shape. NUM is the number of dimention.
+      Tensor<X86, AK_FLOAT, W> mytensor1(shape1); //1-D tensor.
 
-    // A 4-D shape
-    Shape shape2(N, C, H, W); // batch x channel x height x width
+      // A 4-D shape
+      Shape shape2(N, C, H, W); // batch x channel x height x width
     ```
 
-    `注意：Shape的维度必须和tensor的`[LayoutType](#layout)`相同，比如Shape(N,C,H,W), 那么Tensor的 LayoutType必须是NCHW，否则会出错。如下列代码所示`
+    >`注意：Shape的维度必须和tensor的`[LayoutType](#layout)`相同，比如Shape(N,C,H,W), 那么Tensor的 LayoutType必须是NCHW，否则会出错。如下列代码所示`
 
     ```c++
-    // A 4-D tensor.
-    Tensor<X86, AK_FLOAT> mytensor2(shape2);  //right
+       // A 4-D tensor.
+       Tensor<X86, AK_FLOAT> mytensor2(shape2);  //right
+
+       //A 4-D tensor which is resident at GPU and its datatype is AK_INT8
+       Tensor<NV, AK_INT8> mytensor3(shape2);   //right
 
-    //A 4-D tensor which is resident at GPU and its datatype is AK_INT8
-    Tensor<NV, AK_INT8> mytensor3(shape2);   //right
+       Tensor<X86, AK_FLOAT, NHW> mytensor4(shape2); //wrong!! shape's dimetion must be equal to tensor's Layout.
+       Tensor<NV, AK_FLOAT, NCHW_C4> mytensor5(shape2); //wrong!!!!
 
-    Tensor<X86, AK_FLOAT, NHW> mytensor4(shape2); //wrong!! shape's dimetion must be equal to tensor's Layout.
-    Tensor<NV, AK_FLOAT, NCHW_C4> mytensor5(shape2); //wrong!!!!
     ```
 
-2. 使用现有的数据和shape初始化tensor
+    > 2. 使用现有的数据和shape初始化tensor
 
     ```c++
-    /**
-    *  A construtor of Tensor.
-    *  data_ptr is a pointer to any data type of data
-    *  TargetType is type of a platform [Anakin TargetType]
-    *  id : device id
-    *  shape: a Anakin shape
-    */
-    Tensor(Dtype* data_ptr, TargetType_t target, int id, Shape shape);
 
-    //using existing data feed to a tensor
-    Tensor<X86, AK_FLOAT> mytensor(data_ptr, TargetType, device_id, shape); //shape must has dimention (N, C, H, W).
+       /**
+       *  A construtor of Tensor.
+       *  data_ptr is a pointer to any data type of data
+       *  TargetType is type of a platform [Anakin TargetType]
+       *  id : device id
+       *  shape: a Anakin shape
+       */
+       Tensor(Dtype* data_ptr, TargetType_t target, int id, Shape shape);
+
+       //using existing data feed to a tensor
+       Tensor<X86, AK_FLOAT> mytensor(data_ptr, TargetType, device_id, shape); //shape must has dimention (N, C, H, W).
+
     ```
 
-3. 使用tensor初始化tensor
+    > 3. 使用tensor初始化tensor
 
     ```c++
-    Tensor<NV, AK_FLOAT> tensor(exist_tensor);
+       Tensor<NV, AK_FLOAT> tensor(exist_tensor);
     ```
 
->提示： 你可以用` typedef Tensor<X86, AK_FLOAT> Tensor4d_X86 `方便定义tensor
+    > 提示： 你可以用` typedef Tensor<X86, AK_FLOAT> Tensor4d_X86 `方便定义tensor
 
 #### 填充tensor数据区