sql-machine-learning
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cmd/sqlflowserver/main_test.go‎
Lines changed: 14 additions & 14 deletions b/‎cmd/sqlflowserver/main_test.go‎
Lines changed: 14 additions & 14 deletions
diff --git a/‎doc/design/design_alps_submitter.md‎
Lines changed: 6 additions & 6 deletions b/‎doc/design/design_alps_submitter.md‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎doc/design/design_analyzer.md‎
Lines changed: 2 additions & 2 deletions b/‎doc/design/design_analyzer.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/design/design_ant_xgboost.md‎
Lines changed: 2 additions & 2 deletions b/‎doc/design/design_ant_xgboost.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/design/design_clustermodel.md‎
Lines changed: 3 additions & 3 deletions b/‎doc/design/design_clustermodel.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎doc/design/design_database_abstraction_layer.md‎
Lines changed: 2 additions & 2 deletions b/‎doc/design/design_database_abstraction_layer.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/design/design_elasticdl_on_sqlflow.md‎
Lines changed: 3 additions & 3 deletions b/‎doc/design/design_elasticdl_on_sqlflow.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎doc/design/design_feature_derivation.md‎
Lines changed: 4 additions & 4 deletions b/‎doc/design/design_feature_derivation.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎doc/design/design_high-available_sqlflow_server.md‎
Lines changed: 3 additions & 3 deletions b/‎doc/design/design_high-available_sqlflow_server.md‎
Lines changed: 3 additions & 3 deletions
@@ -30,7 +30,7 @@ Here are examples for training a Tensorflow [DNNClassifer](https://www.tensorflo
 ```sql
 sqlflow> SELECT *
 FROM iris.train
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH model.n_classes = 3, model.hidden_units = [10, 20]
 COLUMN sepal_length, sepal_width, petal_length, petal_width
 LABEL class
 
@@ -529,7 +529,7 @@ func CaseTrainSQL(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := fmt.Sprintf(`SELECT *
 FROM %s.%s
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH model.n_classes = 3, model.hidden_units = [10, 20]
 COLUMN sepal_length, sepal_width, petal_length, petal_width
 LABEL class
@@ -587,7 +587,7 @@ func CaseTrainCustomModel(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := `SELECT *
 FROM iris.train
-TRAIN sqlflow_models.DNNClassifier
+TO TRAIN sqlflow_models.DNNClassifier
 WITH model.n_classes = 3, model.hidden_units = [10, 20]
 COLUMN sepal_length, sepal_width, petal_length, petal_width
 LABEL class
@@ -643,7 +643,7 @@ func CaseTrainTextClassification(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := `SELECT *
 FROM text_cn.train_processed
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH model.n_classes = 17, model.hidden_units = [10, 20]
 COLUMN EMBEDDING(CATEGORY_ID(news_title,16000,COMMA),128,mean)
 LABEL class_id
@@ -671,7 +671,7 @@ func CaseTrainTextClassificationCustomLSTM(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := `SELECT *
 FROM text_cn.train_processed
-TRAIN sqlflow_models.StackedBiLSTMClassifier
+TO TRAIN sqlflow_models.StackedBiLSTMClassifier
 WITH model.n_classes = 17, model.stack_units = [16], train.epoch = 1, train.batch_size = 32
 COLUMN EMBEDDING(SEQ_CATEGORY_ID(news_title,1600,COMMA),128,mean)
 LABEL class_id
@@ -697,7 +697,7 @@ func CaseTrainSQLWithHyperParams(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := `SELECT *
 FROM iris.train
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH model.n_classes = 3, model.hidden_units = [10, 20], train.batch_size = 10, train.epoch = 2
 COLUMN sepal_length, sepal_width, petal_length, petal_width
 LABEL class
@@ -723,7 +723,7 @@ func CaseTrainDeepWideModel(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := `SELECT *
 FROM iris.train
-TRAIN DNNLinearCombinedClassifier
+TO TRAIN DNNLinearCombinedClassifier
 WITH model.n_classes = 3, model.dnn_hidden_units = [10, 20], train.batch_size = 10, train.epoch = 2
 COLUMN sepal_length, sepal_width FOR linear_feature_columns
 COLUMN petal_length, petal_width FOR dnn_feature_columns
@@ -751,7 +751,7 @@ func CaseTrainCustomModelWithHyperParams(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := `SELECT *
 FROM iris.train
-TRAIN sqlflow_models.DNNClassifier
+TO TRAIN sqlflow_models.DNNClassifier
 WITH model.n_classes = 3, model.hidden_units = [10, 20], train.batch_size = 10, train.epoch=2
 COLUMN sepal_length, sepal_width, petal_length, petal_width
 LABEL class
@@ -777,7 +777,7 @@ func CaseSparseFeature(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := `SELECT *
 FROM text_cn.train
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH model.n_classes = 3, model.hidden_units = [10, 20]
 COLUMN EMBEDDING(CATEGORY_ID(news_title,16000,COMMA),128,mean)
 LABEL class_id
@@ -804,7 +804,7 @@ func CaseTrainElasticDL(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := fmt.Sprintf(`SELECT sepal_length, sepal_width, petal_length, petal_width, class
 FROM %s.%s
-TRAIN ElasticDLDNNClassifier
+TO TRAIN ElasticDLDNNClassifier
 WITH
 			model.optimizer = "optimizer",
 			model.loss = "loss",
@@ -866,7 +866,7 @@ func CaseTrainALPS(t *testing.T) {
 	trainSQL := fmt.Sprintf(`SELECT deep_id, user_space_stat, user_behavior_stat, space_stat, l
 FROM %s.sparse_column_test
 LIMIT 100
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH model.n_classes = 2, model.hidden_units = [10, 20], train.batch_size = 10, engine.ps_num=0, engine.worker_num=0, engine.type=local
 COLUMN SPARSE(deep_id,15033,COMMA,int),
        SPARSE(user_space_stat,310,COMMA,int),
@@ -901,7 +901,7 @@ func CaseTrainALPSRemoteModel(t *testing.T) {
 	trainSQL := fmt.Sprintf(`SELECT deep_id, user_space_stat, user_behavior_stat, space_stat, l
 FROM %s.sparse_column_test
 LIMIT 100
-TRAIN models.estimator.dnn_classifier.DNNClassifier
+TO TRAIN models.estimator.dnn_classifier.DNNClassifier
 WITH 
 	model.n_classes = 2, model.hidden_units = [10, 20], train.batch_size = 10, engine.ps_num=0, engine.worker_num=0, engine.type=local,
 	gitlab.project = "Alps/sqlflow-models",
@@ -940,7 +940,7 @@ func CaseTrainALPSFeatureMap(t *testing.T) {
 	trainSQL := fmt.Sprintf(`SELECT dense, deep, item, test_sparse_with_fm.label
 FROM %s.test_sparse_with_fm
 LIMIT 32
-TRAIN alipay.SoftmaxClassifier
+TO TRAIN alipay.SoftmaxClassifier
 WITH train.max_steps = 32, eval.steps=32, train.batch_size=8, engine.ps_num=0, engine.worker_num=0, engine.type = local
 COLUMN DENSE(dense, none, comma),
        DENSE(item, 1, comma, int)
@@ -991,7 +991,7 @@ func CaseTrainRegression(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := fmt.Sprintf(`SELECT *
 FROM housing.train
-TRAIN LinearRegressor
+TO TRAIN LinearRegressor
 WITH model.label_dimension=1
 COLUMN f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13
 LABEL target
@@ -1053,7 +1053,7 @@ func CaseTrainXGBoostRegression(t *testing.T) {
 	trainSQL := fmt.Sprintf(`
 SELECT *
 FROM housing.train
-TRAIN xgboost.gbtree
+TO TRAIN xgboost.gbtree
 WITH
 		objective="reg:squarederror",
 		train.num_boost_round = 30
 
@@ -89,7 +89,7 @@ The column `c1` is dense encoded and `c2` is sparse encoded, `c3` is label colum
 select 
   c1, c2, c3 as class
 from kaggle_credit_fraud_training_data
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH
   ...
 COLUMN
@@ -148,7 +148,7 @@ Here is an example which do `BUCKETIZED` on `c2` then `CROSS` with `c1`.
 select 
     c1, c2, c3 as class
 from kaggle_credit_fraud_training_data
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH
   ...
 COLUMN
@@ -162,7 +162,7 @@ Feature Expressions except for Tensorflow Feature Column API should raise an err
 ```sql
 /* Not supported */
 select * from kaggle_credit_fraud_training_data
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH
     ...
 COLUMN
@@ -206,7 +206,7 @@ Let's create a DNNClassifier example, the minimum parameters of the constructor
 select 
     c1, c2, c3 as class
 from kaggle_credit_fraud_training_data
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH
     estimator.hidden_units = [10, 20],
     train_spec.max_steps = 2000,
@@ -223,7 +223,7 @@ For now, we will pass the result of snippet code as `feature_columns` parameters
 select 
     c1, c2, c3, c4, c5 as class
 from kaggle_credit_fraud_training_data
-TRAIN DNNLinearCombinedClassifier
+TO TRAIN DNNLinearCombinedClassifier
 WITH
   linear_feature_columns = [fc1, fc2]
   dnn_feature_columns = [fc3]
@@ -234,4 +234,4 @@ COLUMN
   CROSS([fc1, fc2, f3]) as fc3
 LABEL class
 ...
-```
+```
@@ -8,13 +8,13 @@ This design doc introduces how to support the `Analyze SQL` in SQLFlow with SHAP
 
 ## User Interface
 
-Users usually use a **TRAIN SQL** to train a model and then analyze the model using an **ANALYZE SQL**, the simple pipeline like:
+Users usually use a **TO TRAIN SQL** to train a model and then analyze the model using an **ANALYZE SQL**, the simple pipeline like:
 
 Train SQL:
 
 ``` sql
 SELECT * FROM train_table
-TRAIN xgboost.Estimator
+TO TRAIN xgboost.Estimator
 WITH
     train.objective = "reg:linear"
 COLUMN x
 
@@ -33,7 +33,7 @@ Comparing to python API provided by `xgboost`, it is easier to build a python co
 ### User Experience
 
 In terms of sqlflow users, xgboost is an alternative `Estimator` like `TensorFlow Estimators`. 
-Working with xgboost is quite similar to working with TensorFlow Estimators; just change `TRAIN DNNClassifier` into `TRAIN XGBoostEstimator`. 
+Working with xgboost is quite similar to working with TensorFlow Estimators; just change `TO TRAIN DNNClassifier` into `TO TRAIN XGBoostEstimator`. 
 
 In addition, xgboost specific parameters can be configured in the same way as TensorFlow parameters. 
 
@@ -44,7 +44,7 @@ Below is a demo about training/predicting via xgboost :
 select 
     c1, c2, c3, c4, c5 as class
 from kaggle_credit_fraud_training_data
-TRAIN XGBoostEstimator
+TO TRAIN XGBoostEstimator
 WITH
   booster = "gbtree"
   objective = "logistic:binary"
 
@@ -22,13 +22,13 @@ The figure below demonstrates the overall workflow for cluster model training, w
 
 In this scenario, we focus on the extraction of data patterns in unsupervised learning. 
 
-So, the user can use `TRAIN` keyword to training a model. The user can also specify the training hyper-parameters with the keyword `WITH` and determine whether to use pre-trained model by `USING`. The training and predicting syntax looks like:
+So, the user can use `TO TRAIN` keyword to training a model. The user can also specify the training hyper-parameters with the keyword `WITH` and determine whether to use pre-trained model by `USING`. The training and predicting syntax looks like:
 
-TRAIN SQL:
+TO TRAIN SQL:
 
 ``` sql
 SELECT * FROM input_table
-TRAIN clusterModel
+TO TRAIN clusterModel
 WITH
     model.encode_units = [100, 7]
     model.n_clusters = 5
 
@@ -8,7 +8,7 @@ SQLFlow calls Go's [standard database API](https://golang.org/pkg/database/sql/)
 
 ### Data Retrieval
 
-The basic idea of SQLFlow is to extend the SELECT statement of SQL to have the TRAIN and PREDICT clauses.  For more discussion, please refer to the [syntax design](/doc/design/design_syntax.md).  SQLFlow translates such "extended SQL statements" into submitter programs, which forward the part from SELECT to TRAIN or PREDICT, which we call the "standard part", to the SQL engine.  SQLFlow also accepts the SELECT statement without TRAIN or PREDICT clauses and would forward such "standard statements" to the engine.  It is noticeable that the "standard part" or "standard statements" are not standardized.  For example, various engines use different syntax for `FULL OUTER JOIN`.
+The basic idea of SQLFlow is to extend the SELECT statement of SQL to have the TO TRAIN and PREDICT clauses.  For more discussion, please refer to the [syntax design](/doc/design/design_syntax.md).  SQLFlow translates such "extended SQL statements" into submitter programs, which forward the part from SELECT to TO TRAIN or PREDICT, which we call the "standard part", to the SQL engine.  SQLFlow also accepts the SELECT statement without TO TRAIN or PREDICT clauses and would forward such "standard statements" to the engine.  It is noticeable that the "standard part" or "standard statements" are not standardized.  For example, various engines use different syntax for `FULL OUTER JOIN`.
 
 - Hive supports `FULL OUTER JOIN` directly.
 - MySQL doesn't have `FULL OUTER JOIN`. However, a user can emulates `FULL OUTER JOIN` using `LEFT JOIN`, `UNION` and `RIGHT JOIN`.
@@ -24,7 +24,7 @@ SELECT
       name,
       age,
       income 
-FROM  employee TRAIN DNNRegressor 
+FROM  employee TO TRAIN DNNRegressor 
 WITH  hidden_layers=[10,50,10] 
 COLUMN name, agee LABEL income;
 ```
 
@@ -12,7 +12,7 @@ This is a design doc on integration with [ElasticDL](https://github.com/wangkuiy
 SELECT
     c1, c2, c3, c4, c5 as class
 FROM training_data
-TRAIN ElasticDLKerasClassifier
+TO TRAIN ElasticDLKerasClassifier
 WITH
   optimizer = "optimizer",
   loss = "loss",
@@ -47,7 +47,7 @@ Users can provide run-time configurations to ElasticDL job via additional parame
 SELECT
     c1, c2, c3, c4, c5 as class
 FROM training_data
-TRAIN ElasticDLKerasClassifier
+TO TRAIN ElasticDLKerasClassifier
 WITH
   optimizer = "optimizer",
   loss = "loss",
@@ -73,7 +73,7 @@ INTO trained_elasticdl_keras_classifier;
 Steps:
 
 1. Based on `SELECT ... FROM ...`, read ODPS table and write it to [RecordIO](https://github.com/wangkuiyi/recordio) files, including both features and labels. These files will be stored in [Kubernetes Persistent Volumes](https://kubernetes.io/docs/concepts/storage/persistent-volumes/). In the future, we will support reading ODPS table directly without having to convert it to RecordIO files.
-2. Generate model definition file (e.g. [cifar10_functional_api.py](https://github.com/wangkuiyi/elasticdl/blob/develop/model_zoo/cifar10_functional_api/cifar10_functional_api.py)) that will be used in `TRAIN` clause, which includes:
+2. Generate model definition file (e.g. [cifar10_functional_api.py](https://github.com/wangkuiyi/elasticdl/blob/develop/model_zoo/cifar10_functional_api/cifar10_functional_api.py)) that will be used in `TO TRAIN` clause, which includes:
 
    - In model definition function e.g. `custom_model()`, we need to configure model input and output shapes correctly in `inputs = tf.keras.layers.Input(shape=<input_shape>)` (only when the model is defined using `tf.keras` functional APIs) and `outputs = tf.keras.layers.Dense(<num_classes>)`(based on `COLUMN ... LABEL ...`). For this MVP, users can provide `<input_shape>` and `<num_classes>` using `WITH` clause which will then get passed to the model constructor `custom_model(input_shape, num_classes)` via `--params` argument in ElasticDL high-level API. In the future, this will be inferred from the ODPS table.
    - Pass additional parameters from `WITH` clause to `custom_model()`'s instantiation, such as `optimizer` and `loss`.
 
@@ -24,7 +24,7 @@ assume that we are using all columns to train and no longer need to write
 
 ```sql
 SELECT * FROM creditcardfraud
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 LABEL class
 INTO my_model_name;
 ```
@@ -38,7 +38,7 @@ the SQL statement should look like:
 
 ```sql
 SELECT * FROM creditcardfraud
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 COLUMN YOUR_NORMALIZE_FUNC(time)
 LABEL class
 INTO my_model_name;
@@ -64,7 +64,7 @@ statement, the SQL statement for the above case should be like:
 
 ```sql
 SELECT * FROM training_table
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 COLUMN EMBEDDING(c, 128, "sum"),
        EMBEDDING(SPARSE(d, [1000000]), 512, "sum")
 LABEL label
@@ -75,7 +75,7 @@ You can also write the full description of every column like below:
 
 ```sql
 SELECT a, b, c, d, label FROM training_table
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 COLUMN a, b,
        EMBEDDING(DENSE(c, [64]), 128, "sum"),
        EMBEDDING(SPARSE(d, [1000000]), 512, "sum")
 
@@ -196,11 +196,11 @@ func (r *KubernetesJobRunner) fetch(jobID string) (*pb.Result, error) (
 
 ### Store the Trained Model
 
-For example, a tinny `TRAIN` statement:
+For example, a tinny `TO TRAIN` statement:
 
 ``` sql
 SELECT ...
-TRAIN DNNClassifer
+TO TRAIN DNNClassifer
 WITH
   ...
 COLUMN ...
@@ -209,7 +209,7 @@ INTO sqlflow_model
 
 This SQL statment would save the model named `sqlflow_model` which contains two parts:
 
-1. The `TRAIN` statement, which would be saved as a `.mod` file.
+1. The `TO TRAIN` statement, which would be saved as a `.mod` file.
 1. The Model weights, which would be saved as a `.tar.gz` file.
 
 An example of a trained model folder is as follows: