[ https://issues.apache.org/jira/browse/HIVEMALL-119?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
ITO Ryuichi updated HIVEMALL-119: --------------------------------- Description: Although this error rises on [this branch](https://github.com/amaya382/incubator-hivemall/tree/cross-compiling), probably on head of master too. It seems that labels aren't set properly. ```sh make xgboost-native-local mvn package -Dmaven.test.skip=true docker-compose -f resources/docker/docker-compose.yml build docker-compose -f resources/docker/docker-compose.yml up -d && docker attach hivemall ``` On docker ```sh bin/prepare_iris.sh hive ``` On hive ```sql -- add jar /opt/hivemall/target/hivemall-core-0.4.2-rc.2-with-dependencies.jar; -- source /opt/hivemall/resources/ddl/define-all.hive; add jar /opt/hivemall/target/hivemall-xgboost-0.60-0.4.2-rc.2-with-dependencies.jar; source /opt/hivemall/resources/ddl/define-additional.hive; set hivevar:f0_min=4.3; set hivevar:f0_max=7.9; set hivevar:f1_min=2.0; set hivevar:f1_max=4.4; set hivevar:f2_min=1.0; set hivevar:f2_max=6.9; set hivevar:f3_min=0.1; set hivevar:f3_max=2.5; use iris; create or replace view iris_scaled as select rowid, label, add_bias(array( concat("1:", rescale(features[0],${hivevar:f0_min},${hivevar:f0_max})), concat("2:", rescale(features[1],${hivevar:f1_min},${hivevar:f1_max})), concat("3:", rescale(features[2],${hivevar:f2_min},${hivevar:f2_max})), concat("4:", rescale(features[3],${hivevar:f3_min},${hivevar:f3_max})) )) as features from iris_raw; -- select * from iris_scaled limit 3; -- 1 Iris-setosa ["1:0.22222215","2:0.625","3:0.0677966","4:0.041666664","0:1.0"] -- 2 Iris-setosa ["1:0.16666664","2:0.41666666","3:0.0677966","4:0.041666664","0:1.0"] -- 3 Iris-setosa ["1:0.11111101","2:0.5","3:0.05084745","4:0.041666664","0:1.0"] select train_xgboost_classifier(features, case when label = 'Iris-setosa' then 1.0 else 0.0 end) from iris_scaled; -- got exception ``` ``` Failed with exception java.io.IOException:java.lang.ClassCastException: org.apache.hadoop.io.Text cannot be cast to java.lang.String [20:51:33] dmlc-core/include/dmlc/logging.h:235: [20:51:33] src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) label set cannot be empty org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) label set cannot be empty Check failed: (info.labels.size()) != (0) label set cannot be empty at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313) at org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) at org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552) at org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535) at org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191) at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233) at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399) at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776) at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714) at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hadoop.util.RunJar.run(RunJar.java:221) at org.apache.hadoop.util.RunJar.main(RunJar.java:136) org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) label set cannot be empty at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313) at org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) at org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552) at org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535) at org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191) at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233) at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399) at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776) at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714) at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hadoop.util.RunJar.run(RunJar.java:221) at org.apache.hadoop.util.RunJar.main(RunJar.java:136) Time taken: 3.375 seconds ``` was: Although this error raises on [this branch](https://github.com/amaya382/incubator-hivemall/tree/cross-compiling), probably on head of master too. It seems that labels aren't set properly. ```sh make xgboost-native-local mvn package -Dmaven.test.skip=true docker-compose -f resources/docker/docker-compose.yml build docker-compose -f resources/docker/docker-compose.yml up -d && docker attach hivemall ``` On docker ```sh bin/prepare_iris.sh hive ``` On hive ```sql -- add jar /opt/hivemall/target/hivemall-core-0.4.2-rc.2-with-dependencies.jar; -- source /opt/hivemall/resources/ddl/define-all.hive; add jar /opt/hivemall/target/hivemall-xgboost-0.60-0.4.2-rc.2-with-dependencies.jar; source /opt/hivemall/resources/ddl/define-additional.hive; set hivevar:f0_min=4.3; set hivevar:f0_max=7.9; set hivevar:f1_min=2.0; set hivevar:f1_max=4.4; set hivevar:f2_min=1.0; set hivevar:f2_max=6.9; set hivevar:f3_min=0.1; set hivevar:f3_max=2.5; use iris; create or replace view iris_scaled as select rowid, label, add_bias(array( concat("1:", rescale(features[0],${hivevar:f0_min},${hivevar:f0_max})), concat("2:", rescale(features[1],${hivevar:f1_min},${hivevar:f1_max})), concat("3:", rescale(features[2],${hivevar:f2_min},${hivevar:f2_max})), concat("4:", rescale(features[3],${hivevar:f3_min},${hivevar:f3_max})) )) as features from iris_raw; -- select * from iris_scaled limit 3; -- 1 Iris-setosa ["1:0.22222215","2:0.625","3:0.0677966","4:0.041666664","0:1.0"] -- 2 Iris-setosa ["1:0.16666664","2:0.41666666","3:0.0677966","4:0.041666664","0:1.0"] -- 3 Iris-setosa ["1:0.11111101","2:0.5","3:0.05084745","4:0.041666664","0:1.0"] select train_xgboost_classifier(features, case when label = 'Iris-setosa' then 1.0 else 0.0 end) from iris_scaled; -- got exception ``` ``` Failed with exception java.io.IOException:java.lang.ClassCastException: org.apache.hadoop.io.Text cannot be cast to java.lang.String [20:51:33] dmlc-core/include/dmlc/logging.h:235: [20:51:33] src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) label set cannot be empty org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) label set cannot be empty Check failed: (info.labels.size()) != (0) label set cannot be empty at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313) at org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) at org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552) at org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535) at org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191) at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233) at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399) at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776) at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714) at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hadoop.util.RunJar.run(RunJar.java:221) at org.apache.hadoop.util.RunJar.main(RunJar.java:136) org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) label set cannot be empty at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313) at org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) at org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552) at org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535) at org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191) at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233) at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278) at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269) at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399) at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776) at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714) at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hadoop.util.RunJar.run(RunJar.java:221) at org.apache.hadoop.util.RunJar.main(RunJar.java:136) Time taken: 3.375 seconds ``` > Fail to use xgboost on Hive > --------------------------- > > Key: HIVEMALL-119 > URL: https://issues.apache.org/jira/browse/HIVEMALL-119 > Project: Hivemall > Issue Type: Bug > Environment: Head of > https://github.com/amaya382/incubator-hivemall/tree/cross-compiling > On docker, xgboost native built on local (Linux) > Reporter: ITO Ryuichi > Labels: xgboost > > Although this error rises on [this > branch](https://github.com/amaya382/incubator-hivemall/tree/cross-compiling), > probably on head of master too. > It seems that labels aren't set properly. > ```sh > make xgboost-native-local > mvn package -Dmaven.test.skip=true > docker-compose -f resources/docker/docker-compose.yml build > docker-compose -f resources/docker/docker-compose.yml up -d && docker attach > hivemall > ``` > On docker > ```sh > bin/prepare_iris.sh > hive > ``` > On hive > ```sql > -- add jar > /opt/hivemall/target/hivemall-core-0.4.2-rc.2-with-dependencies.jar; > -- source /opt/hivemall/resources/ddl/define-all.hive; > add jar > /opt/hivemall/target/hivemall-xgboost-0.60-0.4.2-rc.2-with-dependencies.jar; > source /opt/hivemall/resources/ddl/define-additional.hive; > set hivevar:f0_min=4.3; > set hivevar:f0_max=7.9; > set hivevar:f1_min=2.0; > set hivevar:f1_max=4.4; > set hivevar:f2_min=1.0; > set hivevar:f2_max=6.9; > set hivevar:f3_min=0.1; > set hivevar:f3_max=2.5; > use iris; > create or replace view iris_scaled > as > select > rowid, > label, > add_bias(array( > concat("1:", rescale(features[0],${hivevar:f0_min},${hivevar:f0_max})), > concat("2:", rescale(features[1],${hivevar:f1_min},${hivevar:f1_max})), > concat("3:", rescale(features[2],${hivevar:f2_min},${hivevar:f2_max})), > concat("4:", rescale(features[3],${hivevar:f3_min},${hivevar:f3_max})) > )) as features > from > iris_raw; > -- select * from iris_scaled limit 3; > -- 1 Iris-setosa > ["1:0.22222215","2:0.625","3:0.0677966","4:0.041666664","0:1.0"] > -- 2 Iris-setosa > ["1:0.16666664","2:0.41666666","3:0.0677966","4:0.041666664","0:1.0"] > -- 3 Iris-setosa > ["1:0.11111101","2:0.5","3:0.05084745","4:0.041666664","0:1.0"] > select train_xgboost_classifier(features, case when label = 'Iris-setosa' > then 1.0 else 0.0 end) from iris_scaled; -- got exception > ``` > ``` > Failed with exception java.io.IOException:java.lang.ClassCastException: > org.apache.hadoop.io.Text cannot be cast to java.lang.String > [20:51:33] dmlc-core/include/dmlc/logging.h:235: [20:51:33] > src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) > label set cannot be empty > org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] > src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) > label set cannot be empty > Check failed: (info.labels.size()) != (0) label set cannot be empty > at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313) > at > org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152) > at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683) > at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) > at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535) > at > org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191) > at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233) > at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183) > at > org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399) > at > org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at org.apache.hadoop.util.RunJar.run(RunJar.java:221) > at org.apache.hadoop.util.RunJar.main(RunJar.java:136) > org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] > src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) > label set cannot be empty > at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313) > at > org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152) > at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683) > at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) > at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552) > at > org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535) > at > org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191) > at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233) > at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183) > at > org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399) > at > org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at org.apache.hadoop.util.RunJar.run(RunJar.java:221) > at org.apache.hadoop.util.RunJar.main(RunJar.java:136) > Time taken: 3.375 seconds > ``` -- This message was sent by Atlassian JIRA (v6.4.14#64029)