[ 
https://issues.apache.org/jira/browse/SPARK-13605?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Michael Armbrust updated SPARK-13605:
-------------------------------------
    Description: 
in the current environment the only way to turn a List or JavaRDD into a 
DataSet with columns is to use a  Encoders.bean(MyBean.class); The current 
implementation fails if a Bean property is not a basic type or a Bean.
I would like to see one of the following
1) Default to JavaSerialization for any Java Object implementing Serializable 
when using bean Encoder
2) Allow an encoder which is a Map<Class,Encoder> and look up entries in 
encoding classes - an ideal implementation would look for the class then any 
interfaces and then search base classes 

The following code illustrates the issue
{code}
/**
 * This class is a good Java bean but one field holds an object
 * which is not a bean
 */
public class MyBean  implements Serializable {
    private int m_count;
    private String m_Name;
    private MyUnBean m_UnBean;

    public MyBean(int count, String name, MyUnBean unBean) {
        m_count = count;
        m_Name = name;
        m_UnBean = unBean;
    }

    public int getCount() {return m_count; }
    public void setCount(int count) {m_count = count;}
    public String getName() {return m_Name;}
    public void setName(String name) {m_Name = name;}
    public MyUnBean getUnBean() {return m_UnBean;}
    public void setUnBean(MyUnBean unBean) {m_UnBean = unBean;}
}
/**
 * This is a Java object which is not a bean
 * no getters or setters but is serializable
 */
public class MyUnBean implements Serializable {
    public final int count;
    public final String name;

    public MyUnBean(int count, String name) {
        this.count = count;
        this.name = name;
    }
}

**
 * This code creates a list of objects containing MyBean -
 * a Java Bean containing one field which is not bean 
 * It then attempts and fails to use a bean encoder 
 * to make a DataSet
 */
public class DatasetTest {
    public static final Random RND = new Random();
    public static final int LIST_SIZE = 100;

    public static String makeName() {
        return Integer.toString(RND.nextInt());
    }

    public static MyUnBean makeUnBean() {
        return new MyUnBean(RND.nextInt(), makeName());
    }

    public static MyBean makeBean() {
        return new MyBean(RND.nextInt(), makeName(), makeUnBean());
    }

    /**
     * Make a list of MyBeans
     * @return
     */
    public static List<MyBean> makeBeanList() {
        List<MyBean> holder = new ArrayList<MyBean>();
        for (int i = 0; i < LIST_SIZE; i++) {
            holder.add(makeBean());
        }
        return holder;
    }

    public static SQLContext getSqlContext() {
        SparkConf sparkConf = new SparkConf();
        sparkConf.setAppName("BeanTest") ;
        Option<String> option = sparkConf.getOption("spark.master");
        if (!option.isDefined())    // use local over nothing
            sparkConf.setMaster("local[*]");
        JavaSparkContext ctx = new JavaSparkContext(sparkConf) ;
        return new SQLContext(ctx);
    }


    public static void main(String[] args) {
        SQLContext sqlContext = getSqlContext();

        Encoder<MyBean> evidence = Encoders.bean(MyBean.class);
        Encoder<MyUnBean> evidence2 = 
Encoders.javaSerialization(MyUnBean.class);

        List<MyBean> holder = makeBeanList();
 // fails at this line with
// Exception in thread "main" java.lang.UnsupportedOperationException: no 
encoder found for com.lordjoe.testing.MyUnBean

        Dataset<MyBean> beanSet  = sqlContext.createDataset( holder, evidence);

        long count = beanSet.count();
        if(count != LIST_SIZE)
            throw new IllegalStateException("bad count");

    }
}
{code}

  was:
in the current environment the only way to turn a List or JavaRDD into a 
DataSet with columns is to use a  Encoders.bean(MyBean.class); The current 
implementation fails if a Bean property is not a basic type or a Bean.
I would like to see one of the following
1) Default to JavaSerialization for any Java Object implementing Serializable 
when using bean Encoder
2) Allow an encoder which is a Map<Class,Encoder> and look up entries in 
encoding classes - an ideal implementation would look for the class then any 
interfaces and then search base classes 

The following code illustrates the issue
/**
 * This class is a good Java bean but one field holds an object
 * which is not a bean
 */
public class MyBean  implements Serializable {
    private int m_count;
    private String m_Name;
    private MyUnBean m_UnBean;

    public MyBean(int count, String name, MyUnBean unBean) {
        m_count = count;
        m_Name = name;
        m_UnBean = unBean;
    }

    public int getCount() {return m_count; }
    public void setCount(int count) {m_count = count;}
    public String getName() {return m_Name;}
    public void setName(String name) {m_Name = name;}
    public MyUnBean getUnBean() {return m_UnBean;}
    public void setUnBean(MyUnBean unBean) {m_UnBean = unBean;}
}
/**
 * This is a Java object which is not a bean
 * no getters or setters but is serializable
 */
public class MyUnBean implements Serializable {
    public final int count;
    public final String name;

    public MyUnBean(int count, String name) {
        this.count = count;
        this.name = name;
    }
}

**
 * This code creates a list of objects containing MyBean -
 * a Java Bean containing one field which is not bean 
 * It then attempts and fails to use a bean encoder 
 * to make a DataSet
 */
public class DatasetTest {
    public static final Random RND = new Random();
    public static final int LIST_SIZE = 100;

    public static String makeName() {
        return Integer.toString(RND.nextInt());
    }

    public static MyUnBean makeUnBean() {
        return new MyUnBean(RND.nextInt(), makeName());
    }

    public static MyBean makeBean() {
        return new MyBean(RND.nextInt(), makeName(), makeUnBean());
    }

    /**
     * Make a list of MyBeans
     * @return
     */
    public static List<MyBean> makeBeanList() {
        List<MyBean> holder = new ArrayList<MyBean>();
        for (int i = 0; i < LIST_SIZE; i++) {
            holder.add(makeBean());
        }
        return holder;
    }

    public static SQLContext getSqlContext() {
        SparkConf sparkConf = new SparkConf();
        sparkConf.setAppName("BeanTest") ;
        Option<String> option = sparkConf.getOption("spark.master");
        if (!option.isDefined())    // use local over nothing
            sparkConf.setMaster("local[*]");
        JavaSparkContext ctx = new JavaSparkContext(sparkConf) ;
        return new SQLContext(ctx);
    }


    public static void main(String[] args) {
        SQLContext sqlContext = getSqlContext();

        Encoder<MyBean> evidence = Encoders.bean(MyBean.class);
        Encoder<MyUnBean> evidence2 = 
Encoders.javaSerialization(MyUnBean.class);

        List<MyBean> holder = makeBeanList();
 // fails at this line with
// Exception in thread "main" java.lang.UnsupportedOperationException: no 
encoder found for com.lordjoe.testing.MyUnBean

        Dataset<MyBean> beanSet  = sqlContext.createDataset( holder, evidence);

        long count = beanSet.count();
        if(count != LIST_SIZE)
            throw new IllegalStateException("bad count");

    }
}



> Bean encoder cannot handle nonbean properties - no way to Encode nonbean Java 
> objects with columns
> --------------------------------------------------------------------------------------------------
>
>                 Key: SPARK-13605
>                 URL: https://issues.apache.org/jira/browse/SPARK-13605
>             Project: Spark
>          Issue Type: New Feature
>          Components: Java API, SQL
>    Affects Versions: 1.6.0
>         Environment: Any
>            Reporter: Steven Lewis
>              Labels: easytest, features
>             Fix For: 1.6.0
>
>
> in the current environment the only way to turn a List or JavaRDD into a 
> DataSet with columns is to use a  Encoders.bean(MyBean.class); The current 
> implementation fails if a Bean property is not a basic type or a Bean.
> I would like to see one of the following
> 1) Default to JavaSerialization for any Java Object implementing Serializable 
> when using bean Encoder
> 2) Allow an encoder which is a Map<Class,Encoder> and look up entries in 
> encoding classes - an ideal implementation would look for the class then any 
> interfaces and then search base classes 
> The following code illustrates the issue
> {code}
> /**
>  * This class is a good Java bean but one field holds an object
>  * which is not a bean
>  */
> public class MyBean  implements Serializable {
>     private int m_count;
>     private String m_Name;
>     private MyUnBean m_UnBean;
>     public MyBean(int count, String name, MyUnBean unBean) {
>         m_count = count;
>         m_Name = name;
>         m_UnBean = unBean;
>     }
>     public int getCount() {return m_count; }
>     public void setCount(int count) {m_count = count;}
>     public String getName() {return m_Name;}
>     public void setName(String name) {m_Name = name;}
>     public MyUnBean getUnBean() {return m_UnBean;}
>     public void setUnBean(MyUnBean unBean) {m_UnBean = unBean;}
> }
> /**
>  * This is a Java object which is not a bean
>  * no getters or setters but is serializable
>  */
> public class MyUnBean implements Serializable {
>     public final int count;
>     public final String name;
>     public MyUnBean(int count, String name) {
>         this.count = count;
>         this.name = name;
>     }
> }
> **
>  * This code creates a list of objects containing MyBean -
>  * a Java Bean containing one field which is not bean 
>  * It then attempts and fails to use a bean encoder 
>  * to make a DataSet
>  */
> public class DatasetTest {
>     public static final Random RND = new Random();
>     public static final int LIST_SIZE = 100;
>     public static String makeName() {
>         return Integer.toString(RND.nextInt());
>     }
>     public static MyUnBean makeUnBean() {
>         return new MyUnBean(RND.nextInt(), makeName());
>     }
>     public static MyBean makeBean() {
>         return new MyBean(RND.nextInt(), makeName(), makeUnBean());
>     }
>     /**
>      * Make a list of MyBeans
>      * @return
>      */
>     public static List<MyBean> makeBeanList() {
>         List<MyBean> holder = new ArrayList<MyBean>();
>         for (int i = 0; i < LIST_SIZE; i++) {
>             holder.add(makeBean());
>         }
>         return holder;
>     }
>     public static SQLContext getSqlContext() {
>         SparkConf sparkConf = new SparkConf();
>         sparkConf.setAppName("BeanTest") ;
>         Option<String> option = sparkConf.getOption("spark.master");
>         if (!option.isDefined())    // use local over nothing
>             sparkConf.setMaster("local[*]");
>         JavaSparkContext ctx = new JavaSparkContext(sparkConf) ;
>         return new SQLContext(ctx);
>     }
>     public static void main(String[] args) {
>         SQLContext sqlContext = getSqlContext();
>         Encoder<MyBean> evidence = Encoders.bean(MyBean.class);
>         Encoder<MyUnBean> evidence2 = 
> Encoders.javaSerialization(MyUnBean.class);
>         List<MyBean> holder = makeBeanList();
>  // fails at this line with
> // Exception in thread "main" java.lang.UnsupportedOperationException: no 
> encoder found for com.lordjoe.testing.MyUnBean
>         Dataset<MyBean> beanSet  = sqlContext.createDataset( holder, 
> evidence);
>         long count = beanSet.count();
>         if(count != LIST_SIZE)
>             throw new IllegalStateException("bad count");
>     }
> }
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to