Github user myui commented on the issue:
https://github.com/apache/incubator-hivemall/pull/98
Confirmed that the following system test passes successfully on EMR.
https://github.com/apache/incubator-hivemall/pull/79
```sql
set hivevar:n_samples=16281;
set hivevar:total_steps=32562;
drop table if exists logress_model;
create table logress_model as
select
feature,
avg(weight) as weight
from
(
select
train_classifier(add_bias(features), label, '-loss logloss -opt SGD
-reg no -eta simple -total_steps ${total_steps}') as (feature, weight)
-- logress(add_bias(features), label, '-total_steps ${total_steps}')
as (feature, weight)
from
train_x3
) t
group by feature;
WITH test_exploded as (
select
rowid,
label,
extract_feature(feature) as feature,
extract_weight(feature) as value
from
test LATERAL VIEW explode(add_bias(features)) t AS feature
),
predict as (
select
t.rowid,
sigmoid(sum(m.weight * t.value)) as prob,
CAST((case when sigmoid(sum(m.weight * t.value)) >= 0.5 then 1.0 else
0.0 end) as FLOAT) as label
from
test_exploded t LEFT OUTER JOIN
logress_model m ON (t.feature = m.feature)
group by
t.rowid
),
submit as (
select
t.label as actual,
pd.label as predicted,
pd.prob as probability
from
test t JOIN predict pd
on (t.rowid = pd.rowid)
)
select count(1) / ${n_samples} from submit
where actual = predicted;
```
```sql
drop table if exists adagrad_model;
create table adagrad_model as
select
feature,
avg(weight) as weight
from
(
select
-- train_adagrad_regr(features, label) as (feature, weight)
train_regression(features, label, '-loss squaredloss -opt AdaGrad -reg
no') as (feature, weight)
from
train_x3
) t
group by feature;
WITH test_exploded as (
select
rowid,
label,
extract_feature(feature) as feature,
extract_weight(feature) as value
from
test LATERAL VIEW explode(add_bias(features)) t AS feature
),
predict as (
select
t.rowid,
sigmoid(sum(m.weight * t.value)) as prob
from
test_exploded t LEFT OUTER JOIN
adagrad_model m ON (t.feature = m.feature)
group by
t.rowid
),
submit as (
select
t.label as actual,
pd.prob as probability
from
test t JOIN predict pd
on (t.rowid = pd.rowid)
)
select rmse(probability, actual) from submit;
```
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---