From ac1a54122cc56bc72d1251c4d8d48849b1ba117d Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Sat, 31 Aug 2019 23:55:44 +0800 Subject: [PATCH 1/7] rename xgboost to ant_xgboost --- ...gn.md => ant-xgboost_on_sqlflow_design.md} | 4 +- scripts/test_e2e.sh | 4 +- ...egen_xgboost.go => codegen_ant_xgboost.go} | 102 +++++++++--------- ...st_test.go => codegen_ant_xgboost_test.go} | 44 ++++---- sql/executor.go | 4 +- sql/executor_test.go | 6 +- .../{xgboost => ant_xgboost}/__init__.py | 2 +- .../{xgboost => ant_xgboost}/common.py | 0 .../sqlflow_ant_xgboost_main.py} | 6 +- .../sqlflow_data_source.py | 0 .../sqlflow_data_source_test.py | 0 ...c_xgboost.py => test_magic_ant_xgboost.py} | 4 +- 12 files changed, 88 insertions(+), 88 deletions(-) rename doc/{xgboost_on_sqlflow_design.md => ant-xgboost_on_sqlflow_design.md} (97%) rename sql/{codegen_xgboost.go => codegen_ant_xgboost.go} (87%) rename sql/{codegen_xgboost_test.go => codegen_ant_xgboost_test.go} (92%) rename sql/python/sqlflow_submitter/{xgboost => ant_xgboost}/__init__.py (93%) rename sql/python/sqlflow_submitter/{xgboost => ant_xgboost}/common.py (100%) rename sql/python/sqlflow_submitter/{xgboost/sqlflow_xgboost_main.py => ant_xgboost/sqlflow_ant_xgboost_main.py} (92%) rename sql/python/sqlflow_submitter/{xgboost => ant_xgboost}/sqlflow_data_source.py (100%) rename sql/python/sqlflow_submitter/{xgboost => ant_xgboost}/sqlflow_data_source_test.py (100%) rename sql/python/{test_magic_xgboost.py => test_magic_ant_xgboost.py} (96%) diff --git a/doc/xgboost_on_sqlflow_design.md b/doc/ant-xgboost_on_sqlflow_design.md similarity index 97% rename from doc/xgboost_on_sqlflow_design.md rename to doc/ant-xgboost_on_sqlflow_design.md index b8bdd9d0b1..8582f36c69 100644 --- a/doc/xgboost_on_sqlflow_design.md +++ b/doc/ant-xgboost_on_sqlflow_design.md @@ -1,8 +1,8 @@ -# _Design:_ xgboost on sqlflow +# _Design:_ AntXGBoost on sqlflow ## Overview -This is a design doc about why and how to support running xgboost via sqlflow as a machine learning estimator. +This is a design doc about why and how to support running AntXGBboost via sqlflow as a machine learning estimator. We propose to build a lightweight python template for xgboost on basis of `xgblauncher`, an incubating xgboost wrapper in [ant-xgboost](https://github.com/alipay/ant-xgboost). diff --git a/scripts/test_e2e.sh b/scripts/test_e2e.sh index 194fa4a11a..47646d1202 100644 --- a/scripts/test_e2e.sh +++ b/scripts/test_e2e.sh @@ -48,8 +48,8 @@ export PYTHONPATH=$GOPATH/src/github.com/sql-machine-learning/sqlflow/sql/python sqlflowserver --datasource=${DATASOURCE} & # e2e test for standard SQL SQLFLOW_SERVER=localhost:50051 ipython sql/python/test_magic.py -# e2e test for xgboost train and prediciton SQL. -SQLFLOW_SERVER=localhost:50051 ipython sql/python/test_magic_xgboost.py +# e2e test for ant-xgboost train and prediciton SQL. +SQLFLOW_SERVER=localhost:50051 ipython sql/python/test_magic_ant_xgboost.py # TODO(terrytangyuan): Enable this when ElasticDL is open sourced # e2e test for ElasticDL SQL # export SQLFLOW_submitter=elasticdl diff --git a/sql/codegen_xgboost.go b/sql/codegen_ant_xgboost.go similarity index 87% rename from sql/codegen_xgboost.go rename to sql/codegen_ant_xgboost.go index 72a6e7a794..5083d98d9c 100644 --- a/sql/codegen_xgboost.go +++ b/sql/codegen_ant_xgboost.go @@ -28,7 +28,7 @@ import ( "sqlflow.org/gomaxcompute" ) -type xgboostFiller struct { +type antXGBoostFiller struct { ModelPath string xgLearningFields xgColumnFields @@ -151,15 +151,15 @@ func xgMultiSparseError(colNames []string) error { } func xgUnknownFCError(kw string) error { - return fmt.Errorf("xgUnknownFCError: feature column keyword(`%s`) is not supported by xgboost engine", kw) + return fmt.Errorf("xgUnknownFCError: feature column keyword(`%s`) is not supported by ant-xgboost engine", kw) } func xgUnsupportedColTagError() error { - return fmt.Errorf("xgUnsupportedColTagError: valid column tags of xgboost engine([feature_columns, group, weight])") + return fmt.Errorf("xgUnsupportedColTagError: valid column tags of ant-xgboost engine([feature_columns, group, weight])") } -func uIntPartial(key string, ptrFn func(*xgboostFiller) *uint) func(*map[string][]string, *xgboostFiller) error { - return func(a *map[string][]string, r *xgboostFiller) error { +func uIntPartial(key string, ptrFn func(*antXGBoostFiller) *uint) func(*map[string][]string, *antXGBoostFiller) error { + return func(a *map[string][]string, r *antXGBoostFiller) error { // xgParseAttr will ensure the key is existing in map val, _ := (*a)[key] if len(val) != 1 { @@ -177,8 +177,8 @@ func uIntPartial(key string, ptrFn func(*xgboostFiller) *uint) func(*map[string] } } -func fp32Partial(key string, ptrFn func(*xgboostFiller) *float32) func(*map[string][]string, *xgboostFiller) error { - return func(a *map[string][]string, r *xgboostFiller) error { +func fp32Partial(key string, ptrFn func(*antXGBoostFiller) *float32) func(*map[string][]string, *antXGBoostFiller) error { + return func(a *map[string][]string, r *antXGBoostFiller) error { // xgParseAttr will ensure the key is existing in map val, _ := (*a)[key] if len(val) != 1 { @@ -196,8 +196,8 @@ func fp32Partial(key string, ptrFn func(*xgboostFiller) *float32) func(*map[stri } } -func boolPartial(key string, ptrFn func(*xgboostFiller) *bool) func(*map[string][]string, *xgboostFiller) error { - return func(a *map[string][]string, r *xgboostFiller) error { +func boolPartial(key string, ptrFn func(*antXGBoostFiller) *bool) func(*map[string][]string, *antXGBoostFiller) error { + return func(a *map[string][]string, r *antXGBoostFiller) error { // xgParseAttr will ensure the key is existing in map val, _ := (*a)[key] if len(val) != 1 { @@ -214,8 +214,8 @@ func boolPartial(key string, ptrFn func(*xgboostFiller) *bool) func(*map[string] } } -func strPartial(key string, ptrFn func(*xgboostFiller) *string) func(*map[string][]string, *xgboostFiller) error { - return func(a *map[string][]string, r *xgboostFiller) error { +func strPartial(key string, ptrFn func(*antXGBoostFiller) *string) func(*map[string][]string, *antXGBoostFiller) error { + return func(a *map[string][]string, r *antXGBoostFiller) error { // xgParseAttr will ensure the key is existing in map val, _ := (*a)[key] if len(val) != 1 { @@ -231,8 +231,8 @@ func strPartial(key string, ptrFn func(*xgboostFiller) *string) func(*map[string } } -func sListPartial(key string, ptrFn func(*xgboostFiller) *[]string) func(*map[string][]string, *xgboostFiller) error { - return func(a *map[string][]string, r *xgboostFiller) error { +func sListPartial(key string, ptrFn func(*antXGBoostFiller) *[]string) func(*map[string][]string, *antXGBoostFiller) error { + return func(a *map[string][]string, r *antXGBoostFiller) error { // xgParseAttr will ensure the key is existing in map val, _ := (*a)[key] strListPtr := ptrFn(r) @@ -245,37 +245,37 @@ func sListPartial(key string, ptrFn func(*xgboostFiller) *[]string) func(*map[st } } -var xgbTrainAttrSetterMap = map[string]func(*map[string][]string, *xgboostFiller) error{ +var xgbTrainAttrSetterMap = map[string]func(*map[string][]string, *antXGBoostFiller) error{ // booster params - "train.objective": strPartial("train.objective", func(r *xgboostFiller) *string { return &(r.Objective) }), - "train.booster": strPartial("train.booster", func(r *xgboostFiller) *string { return &(r.Booster) }), - "train.max_depth": uIntPartial("train.max_depth", func(r *xgboostFiller) *uint { return &(r.MaxDepth) }), - "train.num_class": uIntPartial("train.num_class", func(r *xgboostFiller) *uint { return &(r.NumClass) }), - "train.eta": fp32Partial("train.eta", func(r *xgboostFiller) *float32 { return &(r.Eta) }), - "train.tree_method": strPartial("train.tree_method", func(r *xgboostFiller) *string { return &(r.TreeMethod) }), - "train.eval_metric": strPartial("train.eval_metric", func(r *xgboostFiller) *string { return &(r.EvalMetric) }), - "train.subsample": fp32Partial("train.subsample", func(r *xgboostFiller) *float32 { return &(r.Subsample) }), - "train.colsample_bytree": fp32Partial("train.colsample_bytree", func(r *xgboostFiller) *float32 { return &(r.ColSampleByTree) }), - "train.colsample_bylevel": fp32Partial("train.colsample_bylevel", func(r *xgboostFiller) *float32 { return &(r.ColSampleByLevel) }), - "train.max_bin": uIntPartial("train.max_bin", func(r *xgboostFiller) *uint { return &(r.MaxBin) }), - "train.convergence_criteria": strPartial("train.convergence_criteria", func(r *xgboostFiller) *string { return &(r.ConvergenceCriteria) }), - "train.verbosity": uIntPartial("train.verbosity", func(r *xgboostFiller) *uint { return &(r.Verbosity) }), + "train.objective": strPartial("train.objective", func(r *antXGBoostFiller) *string { return &(r.Objective) }), + "train.booster": strPartial("train.booster", func(r *antXGBoostFiller) *string { return &(r.Booster) }), + "train.max_depth": uIntPartial("train.max_depth", func(r *antXGBoostFiller) *uint { return &(r.MaxDepth) }), + "train.num_class": uIntPartial("train.num_class", func(r *antXGBoostFiller) *uint { return &(r.NumClass) }), + "train.eta": fp32Partial("train.eta", func(r *antXGBoostFiller) *float32 { return &(r.Eta) }), + "train.tree_method": strPartial("train.tree_method", func(r *antXGBoostFiller) *string { return &(r.TreeMethod) }), + "train.eval_metric": strPartial("train.eval_metric", func(r *antXGBoostFiller) *string { return &(r.EvalMetric) }), + "train.subsample": fp32Partial("train.subsample", func(r *antXGBoostFiller) *float32 { return &(r.Subsample) }), + "train.colsample_bytree": fp32Partial("train.colsample_bytree", func(r *antXGBoostFiller) *float32 { return &(r.ColSampleByTree) }), + "train.colsample_bylevel": fp32Partial("train.colsample_bylevel", func(r *antXGBoostFiller) *float32 { return &(r.ColSampleByLevel) }), + "train.max_bin": uIntPartial("train.max_bin", func(r *antXGBoostFiller) *uint { return &(r.MaxBin) }), + "train.convergence_criteria": strPartial("train.convergence_criteria", func(r *antXGBoostFiller) *string { return &(r.ConvergenceCriteria) }), + "train.verbosity": uIntPartial("train.verbosity", func(r *antXGBoostFiller) *uint { return &(r.Verbosity) }), // xgboost train controllers - "train.num_round": uIntPartial("train.num_round", func(r *xgboostFiller) *uint { return &(r.NumRound) }), - "train.auto_train": boolPartial("train.auto_train", func(r *xgboostFiller) *bool { return &(r.AutoTrain) }), + "train.num_round": uIntPartial("train.num_round", func(r *antXGBoostFiller) *uint { return &(r.NumRound) }), + "train.auto_train": boolPartial("train.auto_train", func(r *antXGBoostFiller) *bool { return &(r.AutoTrain) }), // Label, Group, Weight and xgFeatureFields are parsed from columnClause } -var xgbPredAttrSetterMap = map[string]func(*map[string][]string, *xgboostFiller) error{ +var xgbPredAttrSetterMap = map[string]func(*map[string][]string, *antXGBoostFiller) error{ // xgboost output columns (for prediction) - "pred.append_columns": sListPartial("pred.append_columns", func(r *xgboostFiller) *[]string { return &(r.AppendColumns) }), - "pred.prob_column": strPartial("pred.prob_column", func(r *xgboostFiller) *string { return &(r.ProbColumn) }), - "pred.detail_column": strPartial("pred.detail_column", func(r *xgboostFiller) *string { return &(r.DetailColumn) }), - "pred.encoding_column": strPartial("pred.encoding_column", func(r *xgboostFiller) *string { return &(r.EncodingColumn) }), + "pred.append_columns": sListPartial("pred.append_columns", func(r *antXGBoostFiller) *[]string { return &(r.AppendColumns) }), + "pred.prob_column": strPartial("pred.prob_column", func(r *antXGBoostFiller) *string { return &(r.ProbColumn) }), + "pred.detail_column": strPartial("pred.detail_column", func(r *antXGBoostFiller) *string { return &(r.DetailColumn) }), + "pred.encoding_column": strPartial("pred.encoding_column", func(r *antXGBoostFiller) *string { return &(r.EncodingColumn) }), // Label, Group, Weight and xgFeatureFields are parsed from columnClause } -func xgParseAttr(pr *extendedSelect, r *xgboostFiller) error { +func xgParseAttr(pr *extendedSelect, r *antXGBoostFiller) error { var rawAttrs map[string]*expr if pr.train { rawAttrs = pr.trainAttrs @@ -300,8 +300,8 @@ func xgParseAttr(pr *extendedSelect, r *xgboostFiller) error { } } - // fill xgboostFiller with attrs - var setterMap map[string]func(*map[string][]string, *xgboostFiller) error + // fill antXGBoostFiller with attrs + var setterMap map[string]func(*map[string][]string, *antXGBoostFiller) error if pr.train { setterMap = xgbTrainAttrSetterMap } else { @@ -334,7 +334,7 @@ func xgParseAttr(pr *extendedSelect, r *xgboostFiller) error { // data example: COLUMN SPARSE("0:1.5 1:100.1f 11:-1.2", [20], " ") // 2. tf feature columns // Roughly same as TFEstimator, except output shape of feaColumns are required to be 1-dim. -func parseFeatureColumns(columns *exprlist, r *xgboostFiller) error { +func parseFeatureColumns(columns *exprlist, r *antXGBoostFiller) error { feaCols, colSpecs, err := resolveTrainColumns(columns) if err != nil { return err @@ -355,7 +355,7 @@ func parseFeatureColumns(columns *exprlist, r *xgboostFiller) error { // parseSparseKeyValueFeatures, parse features which is identified by `SPARSE`. // ex: SPARSE(col1, [100], comma) -func parseSparseKeyValueFeatures(colSpecs []*columnSpec, r *xgboostFiller) error { +func parseSparseKeyValueFeatures(colSpecs []*columnSpec, r *antXGBoostFiller) error { var colNames []string for _, spec := range colSpecs { colNames = append(colNames, spec.ColumnName) @@ -401,7 +401,7 @@ func isSimpleColumn(col featureColumn) bool { return false } -func parseDenseFeatures(feaCols []featureColumn, r *xgboostFiller) error { +func parseDenseFeatures(feaCols []featureColumn, r *antXGBoostFiller) error { allSimpleCol := true for _, col := range feaCols { if allSimpleCol && !isSimpleColumn(col) { @@ -487,7 +487,7 @@ func parseSimpleColumn(field string, columns *exprlist) (*xgFeatureMeta, error) return fm, nil } -func xgParseColumns(pr *extendedSelect, filler *xgboostFiller) error { +func xgParseColumns(pr *extendedSelect, filler *antXGBoostFiller) error { for target, columns := range pr.columns { switch target { case "feature_columns": @@ -529,31 +529,31 @@ func xgParseColumns(pr *extendedSelect, filler *xgboostFiller) error { return nil } -func xgParseEstimator(pr *extendedSelect, filler *xgboostFiller) error { +func xgParseEstimator(pr *extendedSelect, filler *antXGBoostFiller) error { switch strings.ToUpper(pr.estimator) { - case "XGBOOST.ESTIMATOR": + case "ANTXGBOOST.ESTIMATOR": if len(filler.Objective) == 0 { return xgParseEstimatorError(pr.estimator, fmt.Errorf("objective must be defined")) } - case "XGBOOST.CLASSIFIER": + case "ANTXGBOOST.CLASSIFIER": if obj := filler.Objective; len(obj) == 0 { filler.Objective = "binary:logistic" } else if !strings.HasPrefix(obj, "binary") && !strings.HasPrefix(obj, "multi") { return xgParseEstimatorError(pr.estimator, fmt.Errorf("found non classification objective(%s)", obj)) } - case "XGBOOST.BINARYCLASSIFIER": + case "ANTXGBOOST.BINARYCLASSIFIER": if obj := filler.Objective; len(obj) == 0 { filler.Objective = "binary:logistic" } else if !strings.HasPrefix(obj, "binary") { return xgParseEstimatorError(pr.estimator, fmt.Errorf("found non binary objective(%s)", obj)) } - case "XGBOOST.MULTICLASSIFIER": + case "ANTXGBOOST.MULTICLASSIFIER": if obj := filler.Objective; len(obj) == 0 { filler.Objective = "multi:softprob" } else if !strings.HasPrefix(obj, "multi") { return xgParseEstimatorError(pr.estimator, fmt.Errorf("found non multi-class objective(%s)", obj)) } - case "XGBOOST.REGRESSOR": + case "ANTXGBOOST.REGRESSOR": if obj := filler.Objective; len(obj) == 0 { filler.Objective = "reg:squarederror" } else if !strings.HasPrefix(obj, "reg") && !strings.HasPrefix(obj, "rank") { @@ -566,8 +566,8 @@ func xgParseEstimator(pr *extendedSelect, filler *xgboostFiller) error { return nil } -func newXGBoostFiller(pr *extendedSelect, ds *trainAndValDataset, fts fieldTypes, db *DB) (*xgboostFiller, error) { - filler := &xgboostFiller{ +func newXGBoostFiller(pr *extendedSelect, ds *trainAndValDataset, fts fieldTypes, db *DB) (*antXGBoostFiller, error) { + filler := &antXGBoostFiller{ ModelPath: pr.save, } filler.IsTrain = pr.train @@ -696,7 +696,7 @@ func xgFillDatabaseInfo(r *xgDataSourceFields, db *DB) error { return nil } -func xgCreatePredictionTable(pr *extendedSelect, r *xgboostFiller, db *DB) error { +func xgCreatePredictionTable(pr *extendedSelect, r *antXGBoostFiller, db *DB) error { dropStmt := fmt.Sprintf("drop table if exists %s;", r.OutputTable) if _, e := db.Exec(dropStmt); e != nil { return fmt.Errorf("failed executing %s: %q", dropStmt, e) @@ -784,7 +784,7 @@ var xgTemplate = template.Must(template.New("codegenXG").Parse(xgTemplateText)) const xgTemplateText = ` from launcher.config_fields import JobType -from sqlflow_submitter.xgboost import run_with_sqlflow +from sqlflow_submitter.ant_xgboost import run_with_sqlflow {{if .IsTrain}} mode = JobType.TRAIN diff --git a/sql/codegen_xgboost_test.go b/sql/codegen_ant_xgboost_test.go similarity index 92% rename from sql/codegen_xgboost_test.go rename to sql/codegen_ant_xgboost_test.go index bacec49ee9..cc742377dc 100644 --- a/sql/codegen_xgboost_test.go +++ b/sql/codegen_ant_xgboost_test.go @@ -22,10 +22,10 @@ import ( ) const ( - testXGTrainSelectIris = ` + testAntXGTrainSelectIris = ` SELECT * FROM iris.train -TRAIN xgboost.Estimator +TRAIN antxgboost.Estimator WITH train.objective = "multi:softprob", train.num_class = 3, @@ -37,7 +37,7 @@ COLUMN sepal_length, sepal_width, petal_length, petal_width LABEL class INTO sqlflow_models.my_xgboost_model; ` - testXGPredSelectIris = ` + testAntXGPredSelectIris = ` SELECT * FROM iris.test PREDICT iris.predict.result @@ -52,10 +52,10 @@ USING sqlflow_models.my_xgboost_model; func TestPartials(t *testing.T) { a := assert.New(t) tmpMap := make(map[string][]string) - filler := &xgboostFiller{} + filler := &antXGBoostFiller{} // test strPartial - part := strPartial("obj", func(r *xgboostFiller) *string { return &(r.Objective) }) + part := strPartial("obj", func(r *antXGBoostFiller) *string { return &(r.Objective) }) tmpMap["obj"] = []string{"binary:logistic"} e := part(&tmpMap, filler) a.NoError(e) @@ -78,7 +78,7 @@ func TestPartials(t *testing.T) { a.Equal(filler.Objective, "reg:linear") // test uIntPartial - part = uIntPartial("num_class", func(r *xgboostFiller) *uint { return &(r.NumClass) }) + part = uIntPartial("num_class", func(r *antXGBoostFiller) *uint { return &(r.NumClass) }) tmpMap["num_class"] = []string{"3"} e = part(&tmpMap, filler) a.NoError(e) @@ -87,7 +87,7 @@ func TestPartials(t *testing.T) { a.Equal(ok, false) // test fp32Partial - part = fp32Partial("eta", func(r *xgboostFiller) *float32 { return &(r.Eta) }) + part = fp32Partial("eta", func(r *antXGBoostFiller) *float32 { return &(r.Eta) }) tmpMap["eta"] = []string{"-0.33"} e = part(&tmpMap, filler) a.NoError(e) @@ -96,7 +96,7 @@ func TestPartials(t *testing.T) { a.Equal(ok, false) // test boolPartial - part = boolPartial("auto_train", func(r *xgboostFiller) *bool { return &(r.AutoTrain) }) + part = boolPartial("auto_train", func(r *antXGBoostFiller) *bool { return &(r.AutoTrain) }) tmpMap["auto_train"] = []string{"false"} e = part(&tmpMap, filler) a.NoError(e) @@ -109,7 +109,7 @@ func TestPartials(t *testing.T) { a.Equal(filler.AutoTrain, true) // test sListPartial - part = sListPartial("append_columns", func(r *xgboostFiller) *[]string { return &(r.AppendColumns) }) + part = sListPartial("append_columns", func(r *antXGBoostFiller) *[]string { return &(r.AppendColumns) }) tmpMap["append_columns"] = []string{"AA", "BB", "CC"} e = part(&tmpMap, filler) a.NoError(e) @@ -126,8 +126,8 @@ func TestXGBoostAttr(t *testing.T) { } parser := newParser() - parseAndFill := func(clause string) *xgboostFiller { - filler := &xgboostFiller{} + parseAndFill := func(clause string) *antXGBoostFiller { + filler := &antXGBoostFiller{} r, e := parser.Parse(clause) a.NoError(e) e = xgParseAttr(r, filler) @@ -137,7 +137,7 @@ func TestXGBoostAttr(t *testing.T) { trainClause := ` SELECT a, b, c, d, e FROM table_xx -TRAIN xgboost.Estimator +TRAIN antxgboost.Estimator WITH train.objective = "binary:logistic", train.booster = gblinear, @@ -199,14 +199,14 @@ func TestColumnClause(t *testing.T) { parser := newParser() sqlHead := ` SELECT a, b, c, d, e FROM table_xx -TRAIN xgboost.Estimator +TRAIN antxgboost.Estimator WITH attr_x = XXX ` sqlTail := ` LABEL e INTO model_table; ` // test sparseKV schema - filler := &xgboostFiller{} + filler := &antXGBoostFiller{} sparseKVSpec := ` COLUMN SPARSE(a, 100, comma) ` r, e := parser.Parse(sqlHead + sparseKVSpec + sqlTail) a.NoError(e) @@ -227,7 +227,7 @@ LABEL e INTO model_table; a.EqualValues("e", filler.Label) // test raw columns - filler = &xgboostFiller{} + filler = &antXGBoostFiller{} rawColumnsSpec := " COLUMN a, b, b, c, d, c " r, _ = parser.Parse(sqlHead + rawColumnsSpec + sqlTail) e = xgParseColumns(r, filler) @@ -248,7 +248,7 @@ LABEL e INTO model_table; } // test tf.feature_columns - filler = &xgboostFiller{} + filler = &antXGBoostFiller{} fcSpec := " COLUMN a, b, c, EMBEDDING(CATEGORY_ID(d, 2000), 8, mean) FOR feature_columns " r, _ = parser.Parse(sqlHead + fcSpec + sqlTail) e = xgParseColumns(r, filler) @@ -259,7 +259,7 @@ LABEL e INTO model_table; a.True(filler.IsTensorFlowIntegrated) // test group & weight - filler = &xgboostFiller{} + filler = &antXGBoostFiller{} groupWeightSpec := " COLUMN gg FOR group COLUMN ww FOR weight " r, _ = parser.Parse(sqlHead + fcSpec + groupWeightSpec + sqlTail) e = xgParseColumns(r, filler) @@ -270,7 +270,7 @@ LABEL e INTO model_table; a.EqualValues("ww", filler.Weight) // test xgMixSchemaError - filler = &xgboostFiller{} + filler = &antXGBoostFiller{} wrongColSpec := " COLUMN SPARSE(a, 2000, comma), b, c, d " r, _ = parser.Parse(sqlHead + wrongColSpec + sqlTail) e = xgParseColumns(r, filler) @@ -278,7 +278,7 @@ LABEL e INTO model_table; a.EqualValues(e, xgParseColumnError("feature_columns", xgMixSchemaError())) // test `DENSE` keyword - filler = &xgboostFiller{} + filler = &antXGBoostFiller{} wrongColSpec = " COLUMN DENSE(b, 5, comma) " r, _ = parser.Parse(sqlHead + wrongColSpec + sqlTail) e = xgParseColumns(r, filler) @@ -286,7 +286,7 @@ LABEL e INTO model_table; a.EqualValues(e, xgParseColumnError("feature_columns", xgUnknownFCError("DENSE"))) // test xgMultiSparseError - filler = &xgboostFiller{} + filler = &antXGBoostFiller{} wrongColSpec = " COLUMN SPARSE(a, 2000, comma), SPARSE(b, 100, comma) " r, _ = parser.Parse(sqlHead + wrongColSpec + sqlTail) e = xgParseColumns(r, filler) @@ -294,7 +294,7 @@ LABEL e INTO model_table; a.EqualValues(e, xgParseColumnError("feature_columns", xgMultiSparseError([]string{"a", "b"}))) // test xgUnsupportedColTagError - filler = &xgboostFiller{} + filler = &antXGBoostFiller{} unsupportedSpec := " COLUMN gg FOR group COLUMN ww FOR xxxxx " r, _ = parser.Parse(sqlHead + fcSpec + unsupportedSpec + sqlTail) e = xgParseColumns(r, filler) @@ -308,7 +308,7 @@ func TestXGBoostFiller(t *testing.T) { parser := newParser() trainClause := ` SELECT * FROM iris.train -TRAIN xgboost.Regressor +TRAIN antxgboost.Regressor WITH train.max_depth = 5, train.eta = 0.03, diff --git a/sql/executor.go b/sql/executor.go index fdaab0f2e3..20fd4ab340 100644 --- a/sql/executor.go +++ b/sql/executor.go @@ -393,7 +393,7 @@ func train(wr *PipeWriter, tr *extendedSelect, db *DB, cwd string, modelDir stri } var program bytes.Buffer - if strings.HasPrefix(strings.ToUpper(tr.estimator), `XGBOOST.`) { + if strings.HasPrefix(strings.ToUpper(tr.estimator), `ANTXGBOOST.`) { // TODO(sperlingxx): write a separate train pipeline for xgboost to support remote mode if e := genXG(&program, tr, ds, fts, db); e != nil { return fmt.Errorf("genXG %v", e) @@ -459,7 +459,7 @@ func pred(wr *PipeWriter, pr *extendedSelect, db *DB, cwd string, modelDir strin } var buf bytes.Buffer - if strings.HasPrefix(strings.ToUpper(pr.estimator), `XGBOOST.`) { + if strings.HasPrefix(strings.ToUpper(pr.estimator), `ANTXGBOOST.`) { // TODO(sperlingxx): write a separate pred pipeline for xgboost to support remote mode if e := genXG(&buf, pr, nil, fts, db); e != nil { return fmt.Errorf("genXG %v", e) diff --git a/sql/executor_test.go b/sql/executor_test.go index 1dc22a2db9..54cdac76db 100644 --- a/sql/executor_test.go +++ b/sql/executor_test.go @@ -71,16 +71,16 @@ func TestSplitExtendedSQL(t *testing.T) { a.Equal(`train a with b;`, s[0]) } -func TestExecutorTrainAndPredictXGBoost(t *testing.T) { +func TestExecutorTrainAndPredictAntXGBoost(t *testing.T) { a := assert.New(t) modelDir, e := ioutil.TempDir("/tmp", "sqlflow_models") a.Nil(e) defer os.RemoveAll(modelDir) a.NotPanics(func() { - stream := runExtendedSQL(testXGTrainSelectIris, testDB, modelDir, nil) + stream := runExtendedSQL(testAntXGTrainSelectIris, testDB, modelDir, nil) a.True(goodStream(stream.ReadAll())) - stream = runExtendedSQL(testXGPredSelectIris, testDB, modelDir, nil) + stream = runExtendedSQL(testAntXGPredSelectIris, testDB, modelDir, nil) a.True(goodStream(stream.ReadAll())) }) } diff --git a/sql/python/sqlflow_submitter/xgboost/__init__.py b/sql/python/sqlflow_submitter/ant_xgboost/__init__.py similarity index 93% rename from sql/python/sqlflow_submitter/xgboost/__init__.py rename to sql/python/sqlflow_submitter/ant_xgboost/__init__.py index 47dd54a5e7..c6ac079572 100644 --- a/sql/python/sqlflow_submitter/xgboost/__init__.py +++ b/sql/python/sqlflow_submitter/ant_xgboost/__init__.py @@ -12,7 +12,7 @@ # limitations under the License. from .sqlflow_data_source import SQLFlowDataSource, SQLFlowDSConfig -from .sqlflow_xgboost_main import run_with_sqlflow +from .sqlflow_ant_xgboost_main import run_with_sqlflow from .common import XGBoostError __all__ = ['run_with_sqlflow', 'SQLFlowDataSource', 'SQLFlowDSConfig', 'XGBoostError'] diff --git a/sql/python/sqlflow_submitter/xgboost/common.py b/sql/python/sqlflow_submitter/ant_xgboost/common.py similarity index 100% rename from sql/python/sqlflow_submitter/xgboost/common.py rename to sql/python/sqlflow_submitter/ant_xgboost/common.py diff --git a/sql/python/sqlflow_submitter/xgboost/sqlflow_xgboost_main.py b/sql/python/sqlflow_submitter/ant_xgboost/sqlflow_ant_xgboost_main.py similarity index 92% rename from sql/python/sqlflow_submitter/xgboost/sqlflow_xgboost_main.py rename to sql/python/sqlflow_submitter/ant_xgboost/sqlflow_ant_xgboost_main.py index e6a0c8bf38..e3f12a09a1 100644 --- a/sql/python/sqlflow_submitter/xgboost/sqlflow_xgboost_main.py +++ b/sql/python/sqlflow_submitter/ant_xgboost/sqlflow_ant_xgboost_main.py @@ -16,8 +16,8 @@ from launcher import register_data_source, config_helper, config_fields as cf, train, predict -from sqlflow_submitter.xgboost.common import XGBoostError -from sqlflow_submitter.xgboost.sqlflow_data_source import SQLFlowDSConfig, SQLFlowDataSource +from sqlflow_submitter.ant_xgboost.common import XGBoostError +from sqlflow_submitter.ant_xgboost.sqlflow_data_source import SQLFlowDSConfig, SQLFlowDataSource register_data_source('sqlflow', SQLFlowDSConfig, SQLFlowDataSource) @@ -29,7 +29,7 @@ def run_with_sqlflow(mode: str, column_config: str, valid_data_source_config: str = None): if mode not in (cf.JobType.TRAIN, cf.JobType.PREDICT): - raise XGBoostError('Unknown run mode(%s) of xgboost launcher.' % mode) + raise XGBoostError('Unknown run mode(%s) of ant-xgboost launcher.' % mode) is_train = mode == cf.JobType.TRAIN def parse_json_str(string: str): diff --git a/sql/python/sqlflow_submitter/xgboost/sqlflow_data_source.py b/sql/python/sqlflow_submitter/ant_xgboost/sqlflow_data_source.py similarity index 100% rename from sql/python/sqlflow_submitter/xgboost/sqlflow_data_source.py rename to sql/python/sqlflow_submitter/ant_xgboost/sqlflow_data_source.py diff --git a/sql/python/sqlflow_submitter/xgboost/sqlflow_data_source_test.py b/sql/python/sqlflow_submitter/ant_xgboost/sqlflow_data_source_test.py similarity index 100% rename from sql/python/sqlflow_submitter/xgboost/sqlflow_data_source_test.py rename to sql/python/sqlflow_submitter/ant_xgboost/sqlflow_data_source_test.py diff --git a/sql/python/test_magic_xgboost.py b/sql/python/test_magic_ant_xgboost.py similarity index 96% rename from sql/python/test_magic_xgboost.py rename to sql/python/test_magic_ant_xgboost.py index b03a0def98..a838beffb3 100644 --- a/sql/python/test_magic_xgboost.py +++ b/sql/python/test_magic_ant_xgboost.py @@ -24,7 +24,7 @@ class TestSQLFlowMagic(unittest.TestCase): train_statement = """ SELECT * FROM iris.train -TRAIN xgboost.Estimator +TRAIN antxgboost.Estimator WITH train.objective = "multi:softprob", train.num_class = 3, @@ -46,7 +46,7 @@ class TestSQLFlowMagic(unittest.TestCase): USING sqlflow_models.my_xgboost_model; """ - def test_xgboost(self): + def test_antxgboost(self): ipython.run_cell_magic("sqlflow", "", self.train_statement) ipython.run_cell_magic("sqlflow", "", self.pred_statement) From fdeb9922a5fcb7821740be18e03215a0333afda9 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Sat, 31 Aug 2019 23:56:53 +0800 Subject: [PATCH 2/7] update --- doc/ant-xgboost_on_sqlflow_design.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/ant-xgboost_on_sqlflow_design.md b/doc/ant-xgboost_on_sqlflow_design.md index 8582f36c69..c7b8c6eb94 100644 --- a/doc/ant-xgboost_on_sqlflow_design.md +++ b/doc/ant-xgboost_on_sqlflow_design.md @@ -1,8 +1,8 @@ -# _Design:_ AntXGBoost on sqlflow +# _Design:_ ant-xgboost on sqlflow ## Overview -This is a design doc about why and how to support running AntXGBboost via sqlflow as a machine learning estimator. +This is a design doc about why and how to support running ant-xgboost via sqlflow as a machine learning estimator. We propose to build a lightweight python template for xgboost on basis of `xgblauncher`, an incubating xgboost wrapper in [ant-xgboost](https://github.com/alipay/ant-xgboost). From 7290c630fdf942ef43392642182d6b8fce4621e7 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Sun, 1 Sep 2019 07:42:24 +0800 Subject: [PATCH 3/7] renmae xgboosterror to antxgboosterror --- sql/python/sqlflow_submitter/ant_xgboost/__init__.py | 4 ++-- sql/python/sqlflow_submitter/ant_xgboost/common.py | 2 +- .../ant_xgboost/sqlflow_ant_xgboost_main.py | 8 ++++---- .../ant_xgboost/sqlflow_data_source.py | 10 +++++----- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/sql/python/sqlflow_submitter/ant_xgboost/__init__.py b/sql/python/sqlflow_submitter/ant_xgboost/__init__.py index c6ac079572..0123cb5d67 100644 --- a/sql/python/sqlflow_submitter/ant_xgboost/__init__.py +++ b/sql/python/sqlflow_submitter/ant_xgboost/__init__.py @@ -13,6 +13,6 @@ from .sqlflow_data_source import SQLFlowDataSource, SQLFlowDSConfig from .sqlflow_ant_xgboost_main import run_with_sqlflow -from .common import XGBoostError +from .common import AntXGBoostError -__all__ = ['run_with_sqlflow', 'SQLFlowDataSource', 'SQLFlowDSConfig', 'XGBoostError'] +__all__ = ['run_with_sqlflow', 'SQLFlowDataSource', 'SQLFlowDSConfig', 'AntXGBoostError'] diff --git a/sql/python/sqlflow_submitter/ant_xgboost/common.py b/sql/python/sqlflow_submitter/ant_xgboost/common.py index 098f183836..1d189a4520 100644 --- a/sql/python/sqlflow_submitter/ant_xgboost/common.py +++ b/sql/python/sqlflow_submitter/ant_xgboost/common.py @@ -12,5 +12,5 @@ # limitations under the License. -class XGBoostError(Exception): +class AntXGBoostError(Exception): pass diff --git a/sql/python/sqlflow_submitter/ant_xgboost/sqlflow_ant_xgboost_main.py b/sql/python/sqlflow_submitter/ant_xgboost/sqlflow_ant_xgboost_main.py index e3f12a09a1..e50f9f8f15 100644 --- a/sql/python/sqlflow_submitter/ant_xgboost/sqlflow_ant_xgboost_main.py +++ b/sql/python/sqlflow_submitter/ant_xgboost/sqlflow_ant_xgboost_main.py @@ -16,7 +16,7 @@ from launcher import register_data_source, config_helper, config_fields as cf, train, predict -from sqlflow_submitter.ant_xgboost.common import XGBoostError +from sqlflow_submitter.ant_xgboost.common import AntXGBoostError from sqlflow_submitter.ant_xgboost.sqlflow_data_source import SQLFlowDSConfig, SQLFlowDataSource register_data_source('sqlflow', SQLFlowDSConfig, SQLFlowDataSource) @@ -29,7 +29,7 @@ def run_with_sqlflow(mode: str, column_config: str, valid_data_source_config: str = None): if mode not in (cf.JobType.TRAIN, cf.JobType.PREDICT): - raise XGBoostError('Unknown run mode(%s) of ant-xgboost launcher.' % mode) + raise AntXGBoostError('Unknown run mode(%s) of ant-xgboost launcher.' % mode) is_train = mode == cf.JobType.TRAIN def parse_json_str(string: str): @@ -71,10 +71,10 @@ def parse_json_str(string: str): train_fields = cf.TrainFields(learning_fields, data_fields, model_fields) train(train_fields) except Exception as e: - raise XGBoostError('XGBoost training task failed: %s' % e) + raise AntXGBoostError('XGBoost training task failed: %s' % e) else: try: pred_fields = cf.PredictFields(data_fields, model_fields) predict(pred_fields) except Exception as e: - raise XGBoostError('XGBoost prediction task failed: %s' % e) + raise AntXGBoostError('XGBoost prediction task failed: %s' % e) diff --git a/sql/python/sqlflow_submitter/ant_xgboost/sqlflow_data_source.py b/sql/python/sqlflow_submitter/ant_xgboost/sqlflow_data_source.py index 145681caaf..ef37885f10 100644 --- a/sql/python/sqlflow_submitter/ant_xgboost/sqlflow_data_source.py +++ b/sql/python/sqlflow_submitter/ant_xgboost/sqlflow_data_source.py @@ -19,7 +19,7 @@ from launcher import DataSource, config_fields, XGBoostResult, XGBoostRecord from launcher.data_units import RecordBuilder -from .common import XGBoostError +from .common import AntXGBoostError from ..db import connect, db_generator, buffered_db_writer @@ -40,7 +40,7 @@ def convert_shape(cls, value) -> typing.List: elif isinstance(value, typing.List): return value else: - raise XGBoostError('invalid shape %s of FeatureMeta' % value) + raise AntXGBoostError('invalid shape %s of FeatureMeta' % value) class SQLFlowDSConfig(typing.NamedTuple): @@ -63,11 +63,11 @@ def __init__(self, rank: int, num_worker: int, source_conf): super().__init__(rank, num_worker, column_conf, source_conf) if not isinstance(source_conf, SQLFlowDSConfig): - raise XGBoostError("SQLFlowDataSource: invalid source conf") + raise AntXGBoostError("SQLFlowDataSource: invalid source conf") # TODO: support tf.feature_column transformation if source_conf.is_tf_integrated: - raise XGBoostError('So far, tf transformation is not supported in xgboost job.') + raise AntXGBoostError('So far, tf transformation is not supported in xgboost job.') self._train = source_conf.is_train self._rcd_builder = RecordBuilder(column_conf.features) @@ -144,7 +144,7 @@ def writer_maker(table_schema): if not self._train: if not source_conf.output_table: - raise XGBoostError('Output_table must be defined in xgboost prediction job.') + raise AntXGBoostError('Output_table must be defined in xgboost prediction job.') def _read_impl(self): label = None From 4e4bce8cce6fdc1649b0364ec5301b50c115a277 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Mon, 2 Sep 2019 08:55:55 +0800 Subject: [PATCH 4/7] fix ci --- sql/codegen_analyze.go | 4 ++-- sql/executor.go | 6 +++--- .../sqlflow_submitter/ant_xgboost/sqlflow_data_source.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sql/codegen_analyze.go b/sql/codegen_analyze.go index a73c44015a..469e5b01be 100644 --- a/sql/codegen_analyze.go +++ b/sql/codegen_analyze.go @@ -38,10 +38,10 @@ func newAnalyzeFiller(db *DB, columns []string, label string) (*analyzeFiller, e } func readFeatureNames(pr *extendedSelect, db *DB) ([]string, string, error) { - if strings.HasPrefix(strings.ToUpper(pr.estimator), `XGBOOST.`) { + if strings.HasPrefix(strings.ToUpper(pr.estimator), `ANTXGBOOST.`) { // TODO(weiguo): It's a quick way to read column and label names from // xgboost.*, but too heavy. - xgbFiller, err := newXGBoostFiller(pr, nil, db) + xgbFiller, err := newAntXGBoostFiller(pr, nil, db) if err != nil { return nil, "", err } diff --git a/sql/executor.go b/sql/executor.go index c313ae9410..f589e04dcc 100644 --- a/sql/executor.go +++ b/sql/executor.go @@ -373,7 +373,7 @@ func buildFiller(es *extendedSelect, ds *trainAndValDataset, fts fieldTypes, db dataset = ds } if strings.HasPrefix(strings.ToUpper(es.estimator), `XGBOOST.`) { - return newXGBoostFiller(es, dataset, db) + return newAntXGBoostFiller(es, dataset, db) } return newFiller(es, dataset, fts, db) } @@ -386,7 +386,7 @@ func train(wr *PipeWriter, tr *extendedSelect, db *DB, cwd string, modelDir stri var program bytes.Buffer if strings.HasPrefix(strings.ToUpper(tr.estimator), `ANTXGBOOST.`) { - // TODO(sperlingxx): write a separate train pipeline for xgboost to support remote mode + // TODO(sperlingxx): write a separate train pipeline for ant-xgboost to support remote mode if e := genXG(&program, tr, ds, fts, db); e != nil { return fmt.Errorf("genXG %v", e) } @@ -452,7 +452,7 @@ func pred(wr *PipeWriter, pr *extendedSelect, db *DB, cwd string, modelDir strin var buf bytes.Buffer if strings.HasPrefix(strings.ToUpper(pr.estimator), `ANTXGBOOST.`) { - // TODO(sperlingxx): write a separate pred pipeline for xgboost to support remote mode + // TODO(sperlingxx): write a separate pred pipeline for ant-xgboost to support remote mode if e := genXG(&buf, pr, nil, fts, db); e != nil { return fmt.Errorf("genXG %v", e) } diff --git a/sql/python/sqlflow_submitter/ant_xgboost/sqlflow_data_source.py b/sql/python/sqlflow_submitter/ant_xgboost/sqlflow_data_source.py index ef37885f10..c997572704 100644 --- a/sql/python/sqlflow_submitter/ant_xgboost/sqlflow_data_source.py +++ b/sql/python/sqlflow_submitter/ant_xgboost/sqlflow_data_source.py @@ -67,7 +67,7 @@ def __init__(self, rank: int, num_worker: int, # TODO: support tf.feature_column transformation if source_conf.is_tf_integrated: - raise AntXGBoostError('So far, tf transformation is not supported in xgboost job.') + raise AntXGBoostError('So far, tf transformation is not supported in ant-xgboost job.') self._train = source_conf.is_train self._rcd_builder = RecordBuilder(column_conf.features) From fc158ffa1d28da42e01d19c7efd785f532e6508a Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Mon, 2 Sep 2019 09:21:02 +0800 Subject: [PATCH 5/7] fix ci --- sql/codegen_ant_xgboost_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/codegen_ant_xgboost_test.go b/sql/codegen_ant_xgboost_test.go index f43e64b238..013d90b589 100644 --- a/sql/codegen_ant_xgboost_test.go +++ b/sql/codegen_ant_xgboost_test.go @@ -326,7 +326,7 @@ LABEL e INTO model_table; ` pr, e := parser.Parse(trainClause) a.NoError(e) - filler, e := newXGBoostFiller(pr, nil, testDB) + filler, e := newAntXGBoostFiller(pr, nil, testDB) a.NoError(e) a.True(filler.IsTrain) @@ -372,7 +372,7 @@ LABEL e INTO model_table; // test with trainAndValDataset ds := &trainAndValDataset{training: "TrainTable", validation: "EvalTable"} - filler, e = newXGBoostFiller(pr, ds, testDB) + filler, e = newAntXGBoostFiller(pr, ds, testDB) a.NoError(e) trainSlct := strings.TrimSuffix(strings.Replace(filler.StandardSelect, "\n", " ", -1), ";") a.EqualValues("SELECT * FROM TrainTable", trainSlct) @@ -389,7 +389,7 @@ LABEL e INTO model_table; pr, e = parser.Parse(testPredictSelectIris) a.NoError(e) - filler, e = newXGBoostFiller(pr, nil, testDB) + filler, e = newAntXGBoostFiller(pr, nil, testDB) a.NoError(e) a.Equal("class", filler.ResultColumn) a.Equal("iris.predict", filler.OutputTable) From 88d39853be96c873b6448b5b7b2cf990980ebadb Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Mon, 2 Sep 2019 11:19:57 +0800 Subject: [PATCH 6/7] fix ci --- sql/executor_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/executor_test.go b/sql/executor_test.go index 96d7a3947f..bf65b9d548 100644 --- a/sql/executor_test.go +++ b/sql/executor_test.go @@ -83,7 +83,7 @@ func TestExecutorTrainAnalyzePredictAntXGBoost(t *testing.T) { stream = runExtendedSQL(testAntXGAnalyzeSelectIris, testDB, modelDir, nil) a.True(goodStream(stream.ReadAll())) - stream = runExtendedSQL(testXGPredSelectIris, testDB, modelDir, nil) + stream = runExtendedSQL(testAntXGPredSelectIris, testDB, modelDir, nil) a.True(goodStream(stream.ReadAll())) }) } From 568b47c2e2e14d68c0284a6c45116efb89053d6a Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Mon, 2 Sep 2019 14:52:15 +0800 Subject: [PATCH 7/7] train xgboost which can be consistant with xgboost and ant-xgboost --- sql/codegen_analyze.go | 2 +- sql/codegen_ant_xgboost.go | 10 +++++----- sql/codegen_ant_xgboost_test.go | 8 ++++---- sql/executor.go | 4 ++-- sql/python/test_magic_ant_xgboost.py | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/sql/codegen_analyze.go b/sql/codegen_analyze.go index 469e5b01be..bd609c023a 100644 --- a/sql/codegen_analyze.go +++ b/sql/codegen_analyze.go @@ -38,7 +38,7 @@ func newAnalyzeFiller(db *DB, columns []string, label string) (*analyzeFiller, e } func readFeatureNames(pr *extendedSelect, db *DB) ([]string, string, error) { - if strings.HasPrefix(strings.ToUpper(pr.estimator), `ANTXGBOOST.`) { + if strings.HasPrefix(strings.ToUpper(pr.estimator), `XGBOOST.`) { // TODO(weiguo): It's a quick way to read column and label names from // xgboost.*, but too heavy. xgbFiller, err := newAntXGBoostFiller(pr, nil, db) diff --git a/sql/codegen_ant_xgboost.go b/sql/codegen_ant_xgboost.go index ee65d89325..8e92fa6a58 100644 --- a/sql/codegen_ant_xgboost.go +++ b/sql/codegen_ant_xgboost.go @@ -531,29 +531,29 @@ func xgParseColumns(pr *extendedSelect, filler *antXGBoostFiller) error { func xgParseEstimator(pr *extendedSelect, filler *antXGBoostFiller) error { switch strings.ToUpper(pr.estimator) { - case "ANTXGBOOST.ESTIMATOR": + case "XGBOOST.ESTIMATOR": if len(filler.Objective) == 0 { return xgParseEstimatorError(pr.estimator, fmt.Errorf("objective must be defined")) } - case "ANTXGBOOST.CLASSIFIER": + case "XGBOOST.CLASSIFIER": if obj := filler.Objective; len(obj) == 0 { filler.Objective = "binary:logistic" } else if !strings.HasPrefix(obj, "binary") && !strings.HasPrefix(obj, "multi") { return xgParseEstimatorError(pr.estimator, fmt.Errorf("found non classification objective(%s)", obj)) } - case "ANTXGBOOST.BINARYCLASSIFIER": + case "XGBOOST.BINARYCLASSIFIER": if obj := filler.Objective; len(obj) == 0 { filler.Objective = "binary:logistic" } else if !strings.HasPrefix(obj, "binary") { return xgParseEstimatorError(pr.estimator, fmt.Errorf("found non binary objective(%s)", obj)) } - case "ANTXGBOOST.MULTICLASSIFIER": + case "XGBOOST.MULTICLASSIFIER": if obj := filler.Objective; len(obj) == 0 { filler.Objective = "multi:softprob" } else if !strings.HasPrefix(obj, "multi") { return xgParseEstimatorError(pr.estimator, fmt.Errorf("found non multi-class objective(%s)", obj)) } - case "ANTXGBOOST.REGRESSOR": + case "XGBOOST.REGRESSOR": if obj := filler.Objective; len(obj) == 0 { filler.Objective = "reg:squarederror" } else if !strings.HasPrefix(obj, "reg") && !strings.HasPrefix(obj, "rank") { diff --git a/sql/codegen_ant_xgboost_test.go b/sql/codegen_ant_xgboost_test.go index 013d90b589..7aed012505 100644 --- a/sql/codegen_ant_xgboost_test.go +++ b/sql/codegen_ant_xgboost_test.go @@ -25,7 +25,7 @@ const ( testAntXGTrainSelectIris = ` SELECT * FROM iris.train -TRAIN antxgboost.Estimator +TRAIN xgboost.Estimator WITH train.objective = "multi:softprob", train.num_class = 3, @@ -142,7 +142,7 @@ func TestXGBoostAttr(t *testing.T) { trainClause := ` SELECT a, b, c, d, e FROM table_xx -TRAIN antxgboost.Estimator +TRAIN xgboost.Estimator WITH train.objective = "binary:logistic", train.booster = gblinear, @@ -204,7 +204,7 @@ func TestColumnClause(t *testing.T) { parser := newParser() sqlHead := ` SELECT a, b, c, d, e FROM table_xx -TRAIN antxgboost.Estimator +TRAIN xgboost.Estimator WITH attr_x = XXX ` sqlTail := ` @@ -313,7 +313,7 @@ func TestXGBoostFiller(t *testing.T) { parser := newParser() trainClause := ` SELECT * FROM iris.train -TRAIN antxgboost.Regressor +TRAIN xgboost.Regressor WITH train.max_depth = 5, train.eta = 0.03, diff --git a/sql/executor.go b/sql/executor.go index f589e04dcc..17187c56dd 100644 --- a/sql/executor.go +++ b/sql/executor.go @@ -385,7 +385,7 @@ func train(wr *PipeWriter, tr *extendedSelect, db *DB, cwd string, modelDir stri } var program bytes.Buffer - if strings.HasPrefix(strings.ToUpper(tr.estimator), `ANTXGBOOST.`) { + if strings.HasPrefix(strings.ToUpper(tr.estimator), `XGBOOST.`) { // TODO(sperlingxx): write a separate train pipeline for ant-xgboost to support remote mode if e := genXG(&program, tr, ds, fts, db); e != nil { return fmt.Errorf("genXG %v", e) @@ -451,7 +451,7 @@ func pred(wr *PipeWriter, pr *extendedSelect, db *DB, cwd string, modelDir strin } var buf bytes.Buffer - if strings.HasPrefix(strings.ToUpper(pr.estimator), `ANTXGBOOST.`) { + if strings.HasPrefix(strings.ToUpper(pr.estimator), `XGBOOST.`) { // TODO(sperlingxx): write a separate pred pipeline for ant-xgboost to support remote mode if e := genXG(&buf, pr, nil, fts, db); e != nil { return fmt.Errorf("genXG %v", e) diff --git a/sql/python/test_magic_ant_xgboost.py b/sql/python/test_magic_ant_xgboost.py index a838beffb3..220a9726e0 100644 --- a/sql/python/test_magic_ant_xgboost.py +++ b/sql/python/test_magic_ant_xgboost.py @@ -24,7 +24,7 @@ class TestSQLFlowMagic(unittest.TestCase): train_statement = """ SELECT * FROM iris.train -TRAIN antxgboost.Estimator +TRAIN xgboost.Estimator WITH train.objective = "multi:softprob", train.num_class = 3,