Github user fmcquillan99 commented on the issue:
https://github.com/apache/madlib/pull/291
Wondering about order for varchar and text casting.
For this data set:
```
DROP TABLE IF EXISTS golf CASCADE;
CREATE TABLE golf (
id int,
"OUTLOOK" varchar,
temperature smallint,
humidity real,
"Temp_Humidity" double precision[],
clouds_airquality text[],
windy boolean,
class text,
observation_weight double precision
);
INSERT INTO golf VALUES
(1,'sunny', 85, 85, ARRAY[85, 85],ARRAY['none', 'unhealthy'],
'false','Don''t Play', 5.0),
(2, 'sunny', 80, 90, ARRAY[80, 90], ARRAY['none', 'moderate'], 'true',
'Don''t Play', 5.0),
(3, 'overcast', 83, 78, ARRAY[83, 78], ARRAY['low', 'moderate'], 'false',
'Play', 1.5),
(4, 'rain', 70, 96, ARRAY[70, 96], ARRAY['low', 'moderate'], 'false',
'Play', 1.0),
(5, 'rain', 68, 80, ARRAY[68, 80], ARRAY['medium', 'good'], 'false',
'Play', 1.0),
(6, 'rain', 65, 70, ARRAY[65, 70], ARRAY['low', 'unhealthy'], 'true',
'Don''t Play', 1.0),
(7, 'overcast', 64, 65, ARRAY[64, 65], ARRAY['medium', 'moderate'], 'true',
'Play', 1.5),
(8, 'sunny', 72, 95, ARRAY[72, 95], ARRAY['high', 'unhealthy'], 'false',
'Don''t Play', 5.0),
(9, 'sunny', 69, 70, ARRAY[69, 70], ARRAY['high', 'good'], 'false', 'Play',
5.0),
(10, 'rain', 75, 80, ARRAY[75, 80], ARRAY['medium', 'good'], 'false',
'Play', 1.0),
(11, 'sunny', 75, 70, ARRAY[75, 70], ARRAY['none', 'good'], 'true', 'Play',
5.0),
(12, 'overcast', 72, 90, ARRAY[72, 90], ARRAY['medium', 'moderate'],
'true', 'Play', 1.5),
(13, 'overcast', 81, 75, ARRAY[81, 75], ARRAY['medium', 'moderate'],
'false', 'Play', 1.5),
(14, 'rain', 71, 80, ARRAY[71, 80], ARRAY['low', 'unhealthy'], 'true',
'Don''t Play', 1.0);
```
(1)
```
DROP TABLE IF EXISTS cols2vec_result, cols2vec_result_summary;
SELECT madlib.cols2vec(
'golf',
'cols2vec_result',
'"OUTLOOK", class'
);
```
produces a varchar array:
```
select *
from INFORMATION_SCHEMA.COLUMNS where table_name = 'out99';
-[ RECORD 1 ]------------+------------------
table_catalog | madlib
table_schema | public
table_name | out99
column_name | f2
ordinal_position | 2
column_default |
is_nullable | YES
data_type | character varying
character_maximum_length |
character_octet_length | 1073741824
numeric_precision |
numeric_precision_radix |
numeric_scale |
datetime_precision |
interval_type |
interval_precision |
character_set_catalog |
character_set_schema |
character_set_name |
collation_catalog |
collation_schema |
collation_name |
domain_catalog |
domain_schema |
domain_name |
udt_catalog | madlib
udt_schema | pg_catalog
udt_name | varchar
scope_catalog |
scope_schema |
scope_name |
maximum_cardinality |
dtd_identifier | 2
is_self_referencing | NO
is_identity | NO
identity_generation |
identity_start |
identity_increment |
identity_maximum |
identity_minimum |
identity_cycle |
is_generated | NEVER
generation_expression |
is_updatable | YES
-[ RECORD 2 ]------------+------------------
table_catalog | madlib
table_schema | public
table_name | out99
column_name | f1
ordinal_position | 1
column_default |
is_nullable | YES
data_type | character varying
character_maximum_length |
character_octet_length | 1073741824
numeric_precision |
numeric_precision_radix |
numeric_scale |
datetime_precision |
interval_type |
interval_precision |
character_set_catalog |
character_set_schema |
character_set_name |
collation_catalog |
collation_schema |
collation_name |
domain_catalog |
domain_schema |
domain_name |
udt_catalog | madlib
udt_schema | pg_catalog
udt_name | varchar
scope_catalog |
scope_schema |
scope_name |
maximum_cardinality |
dtd_identifier | 1
is_self_referencing | NO
is_identity | NO
identity_generation |
identity_start |
identity_increment |
identity_maximum |
identity_minimum |
identity_cycle |
is_generated | NEVER
generation_expression |
is_updatable | YES
```
(2)
```
DROP TABLE IF EXISTS cols2vec_result, cols2vec_result_summary;
SELECT madlib.cols2vec(
'golf',
'cols2vec_result',
'class, "OUTLOOK"'
);
```
produces a text array:
```
select *
from INFORMATION_SCHEMA.COLUMNS where table_name = 'out99';
-[ RECORD 1 ]------------+-----------
table_catalog | madlib
table_schema | public
table_name | out99
column_name | f2
ordinal_position | 2
column_default |
is_nullable | YES
data_type | text
character_maximum_length |
character_octet_length | 1073741824
numeric_precision |
numeric_precision_radix |
numeric_scale |
datetime_precision |
interval_type |
interval_precision |
character_set_catalog |
character_set_schema |
character_set_name |
collation_catalog |
collation_schema |
collation_name |
domain_catalog |
domain_schema |
domain_name |
udt_catalog | madlib
udt_schema | pg_catalog
udt_name | text
scope_catalog |
scope_schema |
scope_name |
maximum_cardinality |
dtd_identifier | 2
is_self_referencing | NO
is_identity | NO
identity_generation |
identity_start |
identity_increment |
identity_maximum |
identity_minimum |
identity_cycle |
is_generated | NEVER
generation_expression |
is_updatable | YES
-[ RECORD 2 ]------------+-----------
table_catalog | madlib
table_schema | public
table_name | out99
column_name | f1
ordinal_position | 1
column_default |
is_nullable | YES
data_type | text
character_maximum_length |
character_octet_length | 1073741824
numeric_precision |
numeric_precision_radix |
numeric_scale |
datetime_precision |
interval_type |
interval_precision |
character_set_catalog |
character_set_schema |
character_set_name |
collation_catalog |
collation_schema |
collation_name |
domain_catalog |
domain_schema |
domain_name |
udt_catalog | madlib
udt_schema | pg_catalog
udt_name | text
scope_catalog |
scope_schema |
scope_name |
maximum_cardinality |
dtd_identifier | 1
is_self_referencing | NO
is_identity | NO
identity_generation |
identity_start |
identity_increment |
identity_maximum |
identity_minimum |
identity_cycle |
is_generated | NEVER
generation_expression |
is_updatable | YES
```
Why is that?
---