Skip to content

Commit 41a0c06

Browse files
committed
完成kudu sink和side的本地开发和测试 添加说明文档
1 parent 6bb3435 commit 41a0c06

File tree

19 files changed

+2228
-0
lines changed

19 files changed

+2228
-0
lines changed

docs/kuduSide.md

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
2+
## 1.格式:
3+
All:
4+
```
5+
create table sideTable(
6+
id int,
7+
tablename1 VARCHAR,
8+
PRIMARY KEY(id),
9+
PERIOD FOR SYSTEM_TIME
10+
)WITH(
11+
type='kudu',
12+
kuduMasters ='ip1,ip2,ip3',
13+
tableName ='impala::default.testSide',
14+
cache ='ALL',
15+
primaryKey='id,xx',
16+
lowerBoundPrimaryKey='10,xx',
17+
upperBoundPrimaryKey='15,xx',
18+
workerCount='1',
19+
defaultOperationTimeoutMs='600000',
20+
defaultSocketReadTimeoutMs='6000000',
21+
batchSizeBytes='100000000',
22+
limitNum='1000',
23+
isFaultTolerant='false',
24+
partitionedJoin='false'
25+
);
26+
```
27+
LRU:
28+
```
29+
create table sideTable(
30+
id int,
31+
tablename1 VARCHAR,
32+
PRIMARY KEY(id),
33+
PERIOD FOR SYSTEM_TIME
34+
)WITH(
35+
type='kudu',
36+
kuduMasters ='ip1,ip2,ip3',
37+
tableName ='impala::default.testSide',
38+
cache ='LRU',
39+
workerCount='1',
40+
defaultOperationTimeoutMs='600000',
41+
defaultSocketReadTimeoutMs='6000000',
42+
batchSizeBytes='100000000',
43+
limitNum='1000',
44+
isFaultTolerant='false',
45+
partitionedJoin='false'
46+
);
47+
```
48+
49+
## 2.支持版本
50+
kudu 1.9.0+cdh6.2.0
51+
52+
## 3.表结构定义
53+
54+
|参数名称|含义|
55+
|----|---|
56+
| tableName | 注册到flink的表名称(可选填;不填默认和hbase对应的表名称相同)|
57+
| colName | 列名称|
58+
| colType | 列类型 [colType支持的类型](colType.md)|
59+
| PERIOD FOR SYSTEM_TIME | 关键字表明该定义的表为维表信息|
60+
| PRIMARY KEY(keyInfo) | 维表主键定义;多个列之间用逗号隔开|
61+
62+
## 3.参数
63+
64+
65+
|参数名称|含义|是否必填|默认值|
66+
|----|---|---|-----|
67+
|type | 表明维表的类型[hbase\|mysql|\kudu]|||
68+
| kuduMasters | kudu master节点的地址;格式ip[ip,ip2]|||
69+
| tableName | kudu 的表名称|||
70+
| workerCount | 工作线程数 ||
71+
| defaultOperationTimeoutMs | 写入操作超时时间 ||
72+
| defaultSocketReadTimeoutMs | socket读取超时时间 ||
73+
| primaryKey | 需要过滤的主键 ALL模式独有 ||
74+
| lowerBoundPrimaryKey | 需要过滤的主键的最小值 ALL模式独有 ||
75+
| upperBoundPrimaryKey | 需要过滤的主键的最大值(不包含) ALL模式独有 ||
76+
| workerCount | 工作线程数 ||
77+
| defaultOperationTimeoutMs | 写入操作超时时间 ||
78+
| defaultSocketReadTimeoutMs | socket读取超时时间 ||
79+
| batchSizeBytes |返回数据的大小 ||
80+
| limitNum |返回数据的条数 ||
81+
| isFaultTolerant |查询是否容错 查询失败是否扫描第二个副本 默认false 容错 ||
82+
| cache | 维表缓存策略(NONE/LRU/ALL)||NONE|
83+
| partitionedJoin | 是否在維表join之前先根据 設定的key 做一次keyby操作(可以減少维表的数据缓存量)||false|
84+
85+
86+
--------------
87+
> 缓存策略
88+
* NONE: 不做内存缓存
89+
* LRU:
90+
* cacheSize: 缓存的条目数量
91+
* cacheTTLMs:缓存的过期时间(ms)
92+
93+
## 4.样例
94+
All:
95+
```
96+
create table sideTable(
97+
id int,
98+
tablename1 VARCHAR,
99+
PRIMARY KEY(id),
100+
PERIOD FOR SYSTEM_TIME
101+
)WITH(
102+
type='kudu',
103+
kuduMasters ='ip1,ip2,ip3',
104+
tableName ='impala::default.testSide',
105+
cache ='ALL',
106+
primaryKey='id,xx',
107+
lowerBoundPrimaryKey='10,xx',
108+
upperBoundPrimaryKey='15,xx',
109+
partitionedJoin='false'
110+
);
111+
```
112+
LRU:
113+
```
114+
create table sideTable(
115+
id int,
116+
tablename1 VARCHAR,
117+
PRIMARY KEY(id),
118+
PERIOD FOR SYSTEM_TIME
119+
)WITH(
120+
type='kudu',
121+
kuduMasters ='ip1,ip2,ip3',
122+
tableName ='impala::default.testSide',
123+
cache ='LRU',
124+
partitionedJoin='false'
125+
);
126+
```
127+

docs/kuduSink.md

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
## 1.格式:
2+
```
3+
CREATE TABLE tableName(
4+
colName colType,
5+
...
6+
colNameX colType
7+
)WITH(
8+
type ='kudu',
9+
kuduMasters ='ip1,ip2,ip3',
10+
tableName ='impala::default.test',
11+
writeMode='upsert',
12+
workerCount='1',
13+
defaultOperationTimeoutMs='600000',
14+
defaultSocketReadTimeoutMs='6000000',
15+
parallelism ='parllNum'
16+
);
17+
18+
19+
```
20+
21+
## 2.支持版本
22+
kudu 1.9.0+cdh6.2.0
23+
24+
## 3.表结构定义
25+
26+
|参数名称|含义|
27+
|----|---|
28+
| tableName | 在 sql 中使用的名称;即注册到flink-table-env上的名称
29+
| colName | 列名称,redis中存储为 表名:主键名:主键值:列名]|
30+
| colType | 列类型 [colType支持的类型](colType.md)|
31+
32+
33+
## 4.参数:
34+
35+
|参数名称|含义|是否必填|默认值|
36+
|----|---|---|-----|
37+
|type | 表明 输出表类型[mysql\|hbase\|elasticsearch\redis\|kudu\]|||
38+
| kuduMasters | kudu master节点的地址;格式ip[ip,ip2]|||
39+
| tableName | kudu 的表名称|||
40+
| writeMode | 写入kudu的模式 insert|update|upsert |否 |upsert
41+
| workerCount | 工作线程数 ||
42+
| defaultOperationTimeoutMs | 写入操作超时时间 ||
43+
| defaultSocketReadTimeoutMs | socket读取超时时间 ||
44+
|parallelism | 并行度设置||1|
45+
46+
47+
## 5.样例:
48+
```
49+
CREATE TABLE MyResult(
50+
id int,
51+
title VARCHAR,
52+
amount decimal,
53+
tablename1 VARCHAR
54+
)WITH(
55+
type ='kudu',
56+
kuduMasters ='localhost1,localhost2,localhost3',
57+
tableName ='impala::default.test',
58+
writeMode='upsert',
59+
parallelism ='1'
60+
);
61+
62+
```

kudu/kudu-side/kudu-all-side/pom.xml

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0"
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5+
<parent>
6+
<artifactId>sql.side.kudu</artifactId>
7+
<groupId>com.dtstack.flink</groupId>
8+
<version>1.0-SNAPSHOT</version>
9+
<relativePath>../pom.xml</relativePath>
10+
</parent>
11+
<modelVersion>4.0.0</modelVersion>
12+
13+
<artifactId>sql.side.all.kudu</artifactId>
14+
<name>kudu-all-side</name>
15+
16+
<packaging>jar</packaging>
17+
18+
<dependencies>
19+
<dependency>
20+
<groupId>com.dtstack.flink</groupId>
21+
<artifactId>sql.side.kudu.core</artifactId>
22+
<version>1.0-SNAPSHOT</version>
23+
</dependency>
24+
</dependencies>
25+
26+
27+
<build>
28+
<plugins>
29+
<plugin>
30+
<groupId>org.apache.maven.plugins</groupId>
31+
<artifactId>maven-shade-plugin</artifactId>
32+
<version>1.4</version>
33+
<executions>
34+
<execution>
35+
<phase>package</phase>
36+
<goals>
37+
<goal>shade</goal>
38+
</goals>
39+
<configuration>
40+
<artifactSet>
41+
<excludes>
42+
43+
</excludes>
44+
</artifactSet>
45+
<filters>
46+
<filter>
47+
<artifact>*:*</artifact>
48+
<excludes>
49+
<exclude>META-INF/*.SF</exclude>
50+
<exclude>META-INF/*.DSA</exclude>
51+
<exclude>META-INF/*.RSA</exclude>
52+
</excludes>
53+
</filter>
54+
</filters>
55+
</configuration>
56+
</execution>
57+
</executions>
58+
</plugin>
59+
60+
<plugin>
61+
<artifactId>maven-antrun-plugin</artifactId>
62+
<version>1.2</version>
63+
<executions>
64+
<execution>
65+
<id>copy-resources</id>
66+
<!-- here the phase you need -->
67+
<phase>package</phase>
68+
<goals>
69+
<goal>run</goal>
70+
</goals>
71+
<configuration>
72+
<tasks>
73+
<copy todir="${basedir}/../../../plugins/kuduallside">
74+
<fileset dir="target/">
75+
<include name="${project.artifactId}-${project.version}.jar"/>
76+
</fileset>
77+
</copy>
78+
79+
<move file="${basedir}/../../../plugins/kuduallside/${project.artifactId}-${project.version}.jar"
80+
tofile="${basedir}/../../../plugins/kuduallside/${project.name}.jar"/>
81+
</tasks>
82+
</configuration>
83+
</execution>
84+
</executions>
85+
</plugin>
86+
</plugins>
87+
</build>
88+
89+
</project>

0 commit comments

Comments
 (0)