From 0c7388e24b99b77aaab9dd74444eedc017a05874 Mon Sep 17 00:00:00 2001 From: w7u Date: Mon, 14 Oct 2019 17:14:26 +0800 Subject: [PATCH 1/9] Remove complaints: "GOLANG: composite literal uses unkeyed fields" --- pkg/sql/codegen_alps.go | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/pkg/sql/codegen_alps.go b/pkg/sql/codegen_alps.go index 237c4b067a..4979dae8a9 100644 --- a/pkg/sql/codegen_alps.go +++ b/pkg/sql/codegen_alps.go @@ -188,7 +188,9 @@ func newALPSTrainFiller(pr *extendedSelect, db *DB, session *pb.Session, ds *tra // TODO(joyyoj) read feature mapping table's name from table attributes. // TODO(joyyoj) pr may contains partition. - fmap := columns.FeatureMap{pr.tables[0] + "_feature_map", ""} + fmap := columns.FeatureMap{ + Table: pr.tables[0] + "_feature_map", + Partition: ""} var meta metadata fields := make([]string, 0) if db != nil { @@ -670,22 +672,22 @@ func (meta *metadata) getDenseColumnInfo(keys []string, refColumns map[string]*c shape[0] = len(fields) if userSpec, ok := refColumns[ct.Name()]; ok { output[ct.Name()] = &columns.ColumnSpec{ - ct.Name(), - false, - shape, - userSpec.DType, - userSpec.Delimiter, - nil, - *meta.featureMap} + ColumnName: ct.Name(), + IsSparse: false, + Shape: shape, + DType: userSpec.DType, + Delimiter: userSpec.Delimiter, + Vocabulary: nil, + FeatureMap: *meta.featureMap} } else { output[ct.Name()] = &columns.ColumnSpec{ - ct.Name(), - false, - shape, - "float", - ",", - nil, - *meta.featureMap} + ColumnName: ct.Name(), + IsSparse: false, + Shape: shape, + DType: "float", + Delimiter: ",", + Vocabulary: nil, + FeatureMap: *meta.featureMap} } } } @@ -732,7 +734,14 @@ func (meta *metadata) getSparseColumnInfo() (map[string]*columns.ColumnSpec, err column, present := output[*name] if !present { shape := make([]int, 0, 1000) - column := &columns.ColumnSpec{*name, true, shape, "int64", "", nil, *meta.featureMap} + column := &columns.ColumnSpec{ + ColumnName: *name, + IsSparse: true, + Shape: shape, + DType: "int64", + Delimiter: "", + Vocabulary: nil, + FeatureMap: *meta.featureMap} column.DType = "int64" output[*name] = column } From 27d23c3396dcd025a6ea3eafcbec2f993b9be557 Mon Sep 17 00:00:00 2001 From: w7u Date: Mon, 14 Oct 2019 17:15:08 +0800 Subject: [PATCH 2/9] add explainer for AnalyzeIR --- pkg/sql/codegen/intermediate_representation.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/sql/codegen/intermediate_representation.go b/pkg/sql/codegen/intermediate_representation.go index 9a481d07bb..305959caaa 100644 --- a/pkg/sql/codegen/intermediate_representation.go +++ b/pkg/sql/codegen/intermediate_representation.go @@ -117,6 +117,8 @@ type AnalyzeIR struct { // "select ... analyze ... with analyze.plot_type = "bar"", // the Attributes will be {"analyze.plot_type": "bar"} Attributes map[string]interface{} + // SQLFlow supports TreeExplainer so far. + Explainer string // TrainIR is the TrainIR used for generating the training job of the corresponding model TrainIR TrainIR } From 1f8dad72bd78ff7b6f841f4b7cb8162014df7033 Mon Sep 17 00:00:00 2001 From: w7u Date: Mon, 14 Oct 2019 21:41:00 +0800 Subject: [PATCH 3/9] add test for analyzeIR --- pkg/sql/ir_generator.go | 15 +++++++++++++++ pkg/sql/ir_generator_test.go | 22 ++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/pkg/sql/ir_generator.go b/pkg/sql/ir_generator.go index 580283aaed..9f7f765452 100644 --- a/pkg/sql/ir_generator.go +++ b/pkg/sql/ir_generator.go @@ -98,6 +98,21 @@ func generatePredictIR(slct *extendedSelect, connStr string, cwd string, modelDi }, nil } +func generateAnalyzeIR(slct *extendedSelect, connStr string) (*codegen.AnalyzeIR, error) { + attrs, err := generateAttributeIR(&slct.analyzeAttrs) + if err != nil { + return nil, err + } + return &codegen.AnalyzeIR{ + DataSource: connStr, + Select: slct.standardSelect.String(), + Attributes: attrs, + Explainer: slct.explainer, + // TrainIR is the TrainIR used for generating the training job of the corresponding model + // TrainIR TrainIR + }, nil +} + func generateAttributeIR(attrs *attrs) (map[string]interface{}, error) { ret := make(map[string]interface{}) for k, v := range *attrs { diff --git a/pkg/sql/ir_generator_test.go b/pkg/sql/ir_generator_test.go index cd39b7e7f8..f14c69d5c2 100644 --- a/pkg/sql/ir_generator_test.go +++ b/pkg/sql/ir_generator_test.go @@ -193,3 +193,25 @@ INTO sqlflow_models.mymodel;`, testDB, modelDir, nil) a.True(ok) a.Equal("sepal_length", nc.FieldMeta.Name) } + +func TestGenerateAnalyzeIR(t *testing.T) { + a := assert.New(t) + stmt := ` + SELECT * + FROM iris.train + ANALYZE sqlflow_models.my_xgboost_model + WITH + shap_summary.plot_type="bar", + shap_summary.alpha=1, + shap_summary.sort=True + USING TreeExplainer; + ` + pr, e := newParser().Parse(stmt) + a.NoError(e) + + connStr := "mysql://root:root@tcp(localhost)" + ir, e := generateAnalyzeIR(pr, connStr) + a.NoError(e) + a.Equal(ir.Explainer, "TreeExplainer") + a.Equal(ir.DataSource, connStr) +} From 3e8324c0524fd42c724b0836ca15af38b14d5be8 Mon Sep 17 00:00:00 2001 From: w7u Date: Thu, 17 Oct 2019 21:02:41 +0800 Subject: [PATCH 4/9] add ut for generateAnalyzeIR --- .../codegen/intermediate_representation.go | 2 +- pkg/sql/ir_generator.go | 29 ++++++++---- pkg/sql/ir_generator_test.go | 46 ++++++++++++++++--- 3 files changed, 61 insertions(+), 16 deletions(-) diff --git a/pkg/sql/codegen/intermediate_representation.go b/pkg/sql/codegen/intermediate_representation.go index 305959caaa..372eab4972 100644 --- a/pkg/sql/codegen/intermediate_representation.go +++ b/pkg/sql/codegen/intermediate_representation.go @@ -120,5 +120,5 @@ type AnalyzeIR struct { // SQLFlow supports TreeExplainer so far. Explainer string // TrainIR is the TrainIR used for generating the training job of the corresponding model - TrainIR TrainIR + TrainIR *TrainIR } diff --git a/pkg/sql/ir_generator.go b/pkg/sql/ir_generator.go index 9f7f765452..f7b80bbaea 100644 --- a/pkg/sql/ir_generator.go +++ b/pkg/sql/ir_generator.go @@ -72,20 +72,27 @@ func generateTrainIR(slct *extendedSelect, connStr string) (*codegen.TrainIR, er }, nil } -func generatePredictIR(slct *extendedSelect, connStr string, cwd string, modelDir string) (*codegen.PredictIR, error) { - attrMap, err := generateAttributeIR(&slct.predAttrs) +func generateTrainIRByModel(slct *extendedSelect, connStr, cwd, modelDir string) (*codegen.TrainIR, error) { + db, err := open(connStr) if err != nil { return nil, err } - db, err := open(connStr) + defer db.Close() + + slctWithTrain, _, err := loadModelMeta(slct, db, cwd, modelDir, slct.trainedModel) if err != nil { return nil, err } - slctWithTrain, _, err := loadModelMeta(slct, db, cwd, modelDir, slct.model) + return generateTrainIR(slctWithTrain, connStr) +} + +func generatePredictIR(slct *extendedSelect, connStr string, cwd string, modelDir string) (*codegen.PredictIR, error) { + attrMap, err := generateAttributeIR(&slct.predAttrs) if err != nil { return nil, err } - trainir, err := generateTrainIR(slctWithTrain, connStr) + + trainIR, err := generateTrainIRByModel(slct, connStr, cwd, modelDir) if err != nil { return nil, err } @@ -94,22 +101,26 @@ func generatePredictIR(slct *extendedSelect, connStr string, cwd string, modelDi Select: slct.standardSelect.String(), ResultTable: slct.into, Attributes: attrMap, - TrainIR: trainir, + TrainIR: trainIR, }, nil } -func generateAnalyzeIR(slct *extendedSelect, connStr string) (*codegen.AnalyzeIR, error) { +func generateAnalyzeIR(slct *extendedSelect, connStr, cwd, modelDir string) (*codegen.AnalyzeIR, error) { attrs, err := generateAttributeIR(&slct.analyzeAttrs) if err != nil { return nil, err } + + trainIR, err := generateTrainIRByModel(slct, connStr, cwd, modelDir) + if err != nil { + return nil, err + } return &codegen.AnalyzeIR{ DataSource: connStr, Select: slct.standardSelect.String(), Attributes: attrs, Explainer: slct.explainer, - // TrainIR is the TrainIR used for generating the training job of the corresponding model - // TrainIR TrainIR + TrainIR: trainIR, }, nil } diff --git a/pkg/sql/ir_generator_test.go b/pkg/sql/ir_generator_test.go index f14c69d5c2..375582dc7f 100644 --- a/pkg/sql/ir_generator_test.go +++ b/pkg/sql/ir_generator_test.go @@ -195,8 +195,35 @@ INTO sqlflow_models.mymodel;`, testDB, modelDir, nil) } func TestGenerateAnalyzeIR(t *testing.T) { + if getEnv("SQLFLOW_TEST_DB", "mysql") != "mysql" { + t.Skip(fmt.Sprintf("%s: skip test", getEnv("SQLFLOW_TEST_DB", "mysql"))) + } a := assert.New(t) - stmt := ` + + modelDir, e := ioutil.TempDir("/tmp", "sqlflow_models") + a.Nil(e) + defer os.RemoveAll(modelDir) + stream := runExtendedSQL(` + SELECT * + FROM iris.train + TO TRAIN xgboost.gbtree + WITH + objective="multi:softprob", + train.num_boost_round = 30, + eta = 3.1, + num_class = 3 + COLUMN sepal_length, sepal_width, petal_length, petal_width + LABEL class + INTO sqlflow_models.my_xgboost_model; + `, testDB, modelDir, nil) + a.True(goodStream(stream.ReadAll())) + + // Test generate PredicrIR + cwd, e := ioutil.TempDir("/tmp", "sqlflow") + a.Nil(e) + defer os.RemoveAll(cwd) + + pr, e := newParser().Parse(` SELECT * FROM iris.train ANALYZE sqlflow_models.my_xgboost_model @@ -205,13 +232,20 @@ func TestGenerateAnalyzeIR(t *testing.T) { shap_summary.alpha=1, shap_summary.sort=True USING TreeExplainer; - ` - pr, e := newParser().Parse(stmt) + `) a.NoError(e) - connStr := "mysql://root:root@tcp(localhost)" - ir, e := generateAnalyzeIR(pr, connStr) + connStr := "mysql://root:root@tcp(127.0.0.1:3306)/?maxAllowedPacket=0" + ir, e := generateAnalyzeIR(pr, connStr, cwd, modelDir) a.NoError(e) - a.Equal(ir.Explainer, "TreeExplainer") a.Equal(ir.DataSource, connStr) + a.Equal(ir.Explainer, "TreeExplainer") + a.Equal(len(ir.Attributes), 3) + a.Equal(ir.Attributes["shap_summary.sort"], "True") + a.Equal(ir.Attributes["shap_summary.plot_type"], "bar") + a.Equal(ir.Attributes["shap_summary.alpha"], 1) + + nc, ok := ir.TrainIR.Features["feature_columns"][0].(*codegen.NumericColumn) + a.True(ok) + a.Equal("sepal_length", nc.FieldMeta.Name) } From d635bf281ca6cbba17f712bc422005a86d49c5b8 Mon Sep 17 00:00:00 2001 From: w7u Date: Thu, 17 Oct 2019 21:48:21 +0800 Subject: [PATCH 5/9] remove TO before TRAIN/ANALYZE in ut --- pkg/sql/ir_generator_test.go | 24 ++++++++++++++---------- pkg/sql/parser.go | 13 ------------- 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/pkg/sql/ir_generator_test.go b/pkg/sql/ir_generator_test.go index 375582dc7f..43a898438a 100644 --- a/pkg/sql/ir_generator_test.go +++ b/pkg/sql/ir_generator_test.go @@ -158,9 +158,11 @@ func TestGeneratePredictIR(t *testing.T) { } a := assert.New(t) parser := newParser() - predSQL := `SELECT * FROM iris.test -TO PREDICT iris.predict.class -USING sqlflow_models.mymodel;` + predSQL := ` + SELECT * FROM iris.test + PREDICT iris.predict.class + USING sqlflow_models.mymodel; + ` r, e := parser.Parse(predSQL) a.NoError(e) @@ -170,12 +172,14 @@ USING sqlflow_models.mymodel;` modelDir, e := ioutil.TempDir("/tmp", "sqlflow_models") a.Nil(e) defer os.RemoveAll(modelDir) - stream := runExtendedSQL(`SELECT * FROM iris.train -TO TRAIN DNNClassifier -WITH model.n_classes=3, model.hidden_units=[10,20] -COLUMN sepal_length, sepal_width, petal_length, petal_width -LABEL class -INTO sqlflow_models.mymodel;`, testDB, modelDir, nil) + stream := runExtendedSQL(` + SELECT * FROM iris.train + TRAIN DNNClassifier + WITH model.n_classes=3, model.hidden_units=[10,20] + COLUMN sepal_length, sepal_width, petal_length, petal_width + LABEL class + INTO sqlflow_models.mymodel; + `, testDB, modelDir, nil) a.True(goodStream(stream.ReadAll())) // Test generate PredicrIR @@ -206,7 +210,7 @@ func TestGenerateAnalyzeIR(t *testing.T) { stream := runExtendedSQL(` SELECT * FROM iris.train - TO TRAIN xgboost.gbtree + TRAIN xgboost.gbtree WITH objective="multi:softprob", train.num_boost_round = 30, diff --git a/pkg/sql/parser.go b/pkg/sql/parser.go index 9e7e42adba..e8a1853b99 100644 --- a/pkg/sql/parser.go +++ b/pkg/sql/parser.go @@ -1,16 +1,3 @@ -// Copyright 2019 The SQLFlow Authors. All rights reserved. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - // Code generated by goyacc -p sql -o parser.go sql.y. DO NOT EDIT. //line sql.y:2 From b6c98973a9f7ceb12f4e7b0401e52576c10b33da Mon Sep 17 00:00:00 2001 From: w7u Date: Thu, 17 Oct 2019 22:47:07 +0800 Subject: [PATCH 6/9] fix TestGeneratePredictIR --- pkg/sql/ir_generator.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/sql/ir_generator.go b/pkg/sql/ir_generator.go index f7b80bbaea..f3363d6899 100644 --- a/pkg/sql/ir_generator.go +++ b/pkg/sql/ir_generator.go @@ -72,14 +72,14 @@ func generateTrainIR(slct *extendedSelect, connStr string) (*codegen.TrainIR, er }, nil } -func generateTrainIRByModel(slct *extendedSelect, connStr, cwd, modelDir string) (*codegen.TrainIR, error) { +func generateTrainIRByModel(slct *extendedSelect, connStr, cwd, modelDir, model string) (*codegen.TrainIR, error) { db, err := open(connStr) if err != nil { return nil, err } defer db.Close() - slctWithTrain, _, err := loadModelMeta(slct, db, cwd, modelDir, slct.trainedModel) + slctWithTrain, _, err := loadModelMeta(slct, db, cwd, modelDir, model) if err != nil { return nil, err } @@ -92,7 +92,7 @@ func generatePredictIR(slct *extendedSelect, connStr string, cwd string, modelDi return nil, err } - trainIR, err := generateTrainIRByModel(slct, connStr, cwd, modelDir) + trainIR, err := generateTrainIRByModel(slct, connStr, cwd, modelDir, slct.model) if err != nil { return nil, err } @@ -111,7 +111,7 @@ func generateAnalyzeIR(slct *extendedSelect, connStr, cwd, modelDir string) (*co return nil, err } - trainIR, err := generateTrainIRByModel(slct, connStr, cwd, modelDir) + trainIR, err := generateTrainIRByModel(slct, connStr, cwd, modelDir, slct.trainedModel) if err != nil { return nil, err } From 00733b8e113943853dffec780717ecec2d68ed7e Mon Sep 17 00:00:00 2001 From: w7u Date: Thu, 17 Oct 2019 22:49:51 +0800 Subject: [PATCH 7/9] head --- pkg/sql/parser.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pkg/sql/parser.go b/pkg/sql/parser.go index e8a1853b99..9e7e42adba 100644 --- a/pkg/sql/parser.go +++ b/pkg/sql/parser.go @@ -1,3 +1,16 @@ +// Copyright 2019 The SQLFlow Authors. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + // Code generated by goyacc -p sql -o parser.go sql.y. DO NOT EDIT. //line sql.y:2 From 39760b4eba20169bdda4cc00573d59067abbe391 Mon Sep 17 00:00:00 2001 From: w7u Date: Fri, 18 Oct 2019 10:19:36 +0800 Subject: [PATCH 8/9] fix comments --- pkg/sql/codegen/intermediate_representation.go | 2 +- pkg/sql/ir_generator_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/sql/codegen/intermediate_representation.go b/pkg/sql/codegen/intermediate_representation.go index 372eab4972..a33c493637 100644 --- a/pkg/sql/codegen/intermediate_representation.go +++ b/pkg/sql/codegen/intermediate_representation.go @@ -117,7 +117,7 @@ type AnalyzeIR struct { // "select ... analyze ... with analyze.plot_type = "bar"", // the Attributes will be {"analyze.plot_type": "bar"} Attributes map[string]interface{} - // SQLFlow supports TreeExplainer so far. + // Explainer types. For example TreeExplainer. Explainer string // TrainIR is the TrainIR used for generating the training job of the corresponding model TrainIR *TrainIR diff --git a/pkg/sql/ir_generator_test.go b/pkg/sql/ir_generator_test.go index 43a898438a..512ac8eaab 100644 --- a/pkg/sql/ir_generator_test.go +++ b/pkg/sql/ir_generator_test.go @@ -222,7 +222,7 @@ func TestGenerateAnalyzeIR(t *testing.T) { `, testDB, modelDir, nil) a.True(goodStream(stream.ReadAll())) - // Test generate PredicrIR + // Test generate AnalyzeIR cwd, e := ioutil.TempDir("/tmp", "sqlflow") a.Nil(e) defer os.RemoveAll(cwd) From 7225c3b5312037ddd4a4b976afd4cbd2b5b76592 Mon Sep 17 00:00:00 2001 From: w7u Date: Fri, 18 Oct 2019 14:42:30 +0800 Subject: [PATCH 9/9] fix endline --- pkg/sql/ir_generator_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/sql/ir_generator_test.go b/pkg/sql/ir_generator_test.go index 906a6cfc82..9e2517ba39 100644 --- a/pkg/sql/ir_generator_test.go +++ b/pkg/sql/ir_generator_test.go @@ -270,4 +270,4 @@ func TestInferStringValue(t *testing.T) { a.Equal(inferStringValue("2.3"), float32(2.3)) a.Equal(inferStringValue("\"2.3\""), "2.3") a.Equal(inferStringValue("'2.3'"), "2.3") -} \ No newline at end of file +}