try to change codes in evaluate method like,
for (int i = 0; i < numElements; i++) {
Object element = listOI.getListElement(arguments[0].get(), i);
Object product = structOI.getStructFieldData(element,
structOI.getStructFieldRef("productCategory"));
ret.add(((PrimitiveObjectInspector)prodCatOI).getPrimitiveWritableObject(product));
}
2013/3/29 Peter Chu <[email protected]>:
> Sorry, the test should be following (changed extract_shas to
> extract_product_category):
>
> import org.apache.hadoop.hive.ql.metadata.HiveException;
> import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
> import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
> import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
> import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
> import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
> import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
> import
> org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
> import org.testng.annotations.Test;
>
> import java.util.ArrayList;
> import java.util.List;
>
> public class TestGenericUDFExtractProductCategory
> {
> ArrayList<String> fieldNames = new ArrayList<String>();
> ArrayList<ObjectInspector> fieldObjectInspectors = new
> ArrayList<ObjectInspector>();
>
> @Test
> public void simpleTest()
> throws Exception
> {
> ListObjectInspector firstInspector = new MyListObjectInspector();
>
> ArrayList test = new ArrayList();
> test.add("test");
>
> ArrayList test2 = new ArrayList();
> test2.add(test);
>
> StructObjectInspector soi =
> ObjectInspectorFactory.getStandardStructObjectInspector(test, test2);
>
> fieldNames.add("productCategory");
>
> fieldObjectInspectors.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
>
> GenericUDF.DeferredObject firstDeferredObject = new
> MyDeferredObject(test2);
>
> GenericUDF extract_product_category = new
> GenericUDFExtractProductCategory();
>
> extract_product_category.initialize(new
> ObjectInspector[]{firstInspector});
>
> extract_product_category.evaluate(new
> DeferredObject[]{firstDeferredObject});
> }
>
> public class MyDeferredObject implements DeferredObject
> {
> private Object value;
>
> public MyDeferredObject(Object value) {
> this.value = value;
> }
>
> @Override
> public Object get() throws HiveException
> {
> return value;
> }
> }
>
> private class MyListObjectInspector implements ListObjectInspector
> {
> @Override
> public ObjectInspector getListElementObjectInspector()
> {
> return
> ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames,
> fieldObjectInspectors);
> }
>
> @Override
> public Object getListElement(Object data, int index)
> {
> List myList = (List) data;
> if (myList == null || index > myList.size()) {
> return null;
> }
> return myList.get(index);
> }
>
> @Override
> public int getListLength(Object data)
> {
> if (data == null) {
> return -1;
> }
> return ((List) data).size();
> }
>
> @Override
> public List<?> getList(Object data)
> {
> return (List) data;
> }
>
> @Override
> public String getTypeName()
> {
> return null; //To change body of implemented methods use File |
> Settings | File Templates.
> }
>
> @Override
> public Category getCategory()
> {
> return Category.LIST;
> }
> }
> }
>
> ________________________________
> From: [email protected]
> To: [email protected]
> Subject: A GenericUDF Function to Extract a Field From an Array of Structs
> Date: Thu, 28 Mar 2013 14:16:33 -0700
>
> I am trying to write a GenericUDF function to collect all of a specific
> struct field(s) within an array for each record, and return them in an array
> as well.
>
> I wrote the UDF (as below), and it seems to work but:
>
> 1) It does not work when I am performing this on an external table, it works
> fine on a managed table, any idea?
>
> 2) I am having a tough time writing a test on this. I have attached the
> test I have so far, and it does not work,
> always getting 'java.util.ArrayList cannot be cast to
> org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector' or cannot
> cast String to LazyString',
> my question is how do I supply a list of structs for the evalue method?
>
> Any help will be greatly appreciated.
>
> Thanks,
> Peter
>
> The table:
>
> CREATE EXTERNAL TABLE FOO (
> TS string,
> customerId string,
> products array< struct<productCategory:string> >
> )
> PARTITIONED BY (ds string)
> ROW FORMAT SERDE 'some.serde'
> WITH SERDEPROPERTIES ('error.ignore'='true')
> LOCATION 'some_locations'
> ;
>
> A row of record holds:
> 1340321132000, 'some_company',
> [{"productCategory":"footwear"},{"productCategory":"eyewear"}]
>
> This is my code:
>
> import org.apache.hadoop.hive.ql.exec.Description;
> import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
> import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
> import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
> import org.apache.hadoop.hive.ql.metadata.HiveException;
> import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
> import org.apache.hadoop.hive.serde2.lazy.LazyString;
> import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
> import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
> import
> org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
> import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
> import
> org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
> import org.apache.hadoop.hive.serde2.objectinspector.StructField;
> import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
> import
> org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
> import
> org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
> import org.apache.hadoop.io.Text;
>
> import java.util.ArrayList;
>
> @Description(name = "extract_product_category",
> value = "_FUNC_( array< struct<productCategory:string> > ) - Collect
> all product category field values inside an array of struct(s), and return
> the results in an array<string>",
> extended = "Example:\n SELECT
> _FUNC_(array_of_structs_with_product_category_field)")
> public class GenericUDFExtractProductCategory
> extends GenericUDF
> {
> private ArrayList ret;
>
> private ListObjectInspector listOI;
> private StructObjectInspector structOI;
> private ObjectInspector prodCatOI;
>
> @Override
> public ObjectInspector initialize(ObjectInspector[] args)
> throws UDFArgumentException
> {
> if (args.length != 1) {
> throw new UDFArgumentLengthException("The function
> extract_product_category() requires exactly one argument.");
> }
>
> if (args[0].getCategory() != Category.LIST) {
> throw new UDFArgumentTypeException(0, "Type array<struct> is
> expected to be the argument for extract_product_category but " +
> args[0].getTypeName() + " is found instead");
> }
>
> listOI = ((ListObjectInspector) args[0]);
> structOI = ((StructObjectInspector)
> listOI.getListElementObjectInspector());
>
> if (structOI.getAllStructFieldRefs().size() != 1) {
> throw new UDFArgumentTypeException(0, "Incorrect number of
> fields in the struct, should be one");
> }
>
> StructField productCategoryField =
> structOI.getStructFieldRef("productCategory");
> //If not, throw exception
> if (productCategoryField == null) {
> throw new UDFArgumentTypeException(0, "NO \"productCategory\"
> field in input structure");
> }
>
> //Are they of the correct types?
> //We store these object inspectors for use in the evaluate() method
> prodCatOI = productCategoryField.getFieldObjectInspector();
>
> //First are they primitives
> if (prodCatOI.getCategory() != Category.PRIMITIVE) {
> throw new UDFArgumentTypeException(0, "productCategory field
> must be of string type");
> }
>
> //Are they of the correct primitives?
> if (((PrimitiveObjectInspector)prodCatOI).getPrimitiveCategory() !=
> PrimitiveObjectInspector.PrimitiveCategory.STRING) {
> throw new UDFArgumentTypeException(0, "productCategory field
> must be of string type");
> }
>
> ret = new ArrayList();
>
> return
> ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
> }
>
> @Override
> public ArrayList evaluate(DeferredObject[] arguments)
> throws HiveException
> {
> ret.clear();
>
> if (arguments.length != 1) {
> return null;
> }
>
> if (arguments[0].get() == null) {
> return null;
> }
>
> int numElements = listOI.getListLength(arguments[0].get());
>
> for (int i = 0; i < numElements; i++) {
> LazyString prodCatDataObject = (LazyString)
> (structOI.getStructFieldData(listOI.getListElement(arguments[0].get(), i),
> structOI.getStructFieldRef("productCategory")));
> Text productCategoryValue = ((StringObjectInspector)
> prodCatOI).getPrimitiveWritableObject(prodCatDataObject);
> ret.add(productCategoryValue);
> }
> return ret;
> }
>
> @Override
> public String getDisplayString(String[] strings)
> {
> assert (strings.length > 0);
> StringBuilder sb = new StringBuilder();
> sb.append("extract_product_category(");
> sb.append(strings[0]);
> sb.append(")");
> return sb.toString();
> }
> }
>
>
> My Test:
>
> import org.apache.hadoop.hive.ql.metadata.HiveException;
> import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
> import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
> import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
> import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
> import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
> import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
> import
> org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
> import org.testng.annotations.Test;
>
> import java.util.ArrayList;
> import java.util.List;
>
> public class TestGenericUDFExtractShas
> {
> ArrayList<String> fieldNames = new ArrayList<String>();
> ArrayList<ObjectInspector> fieldObjectInspectors = new
> ArrayList<ObjectInspector>();
>
> @Test
> public void simpleTest()
> throws Exception
> {
> ListObjectInspector firstInspector = new MyListObjectInspector();
>
> ArrayList test = new ArrayList();
> test.add("test");
>
> ArrayList test2 = new ArrayList();
> test2.add(test);
>
> StructObjectInspector soi =
> ObjectInspectorFactory.getStandardStructObjectInspector(test, test2);
>
> fieldNames.add("productCategory");
>
> fieldObjectInspectors.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
>
> GenericUDF.DeferredObject firstDeferredObject = new
> MyDeferredObject(test2);
>
> GenericUDF extract_shas = new GenericUDFExtractShas();
>
> extract_shas.initialize(new ObjectInspector[]{firstInspector});
>
> extract_shas.evaluate(new DeferredObject[]{firstDeferredObject});
> }
>
> public class MyDeferredObject implements DeferredObject
> {
> private Object value;
>
> public MyDeferredObject(Object value) {
> this.value = value;
> }
>
> @Override
> public Object get() throws HiveException
> {
> return value;
> }
> }
>
> private class MyListObjectInspector implements ListObjectInspector
> {
> @Override
> public ObjectInspector getListElementObjectInspector()
> {
> return
> ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames,
> fieldObjectInspectors);
> }
>
> @Override
> public Object getListElement(Object data, int index)
> {
> List myList = (List) data;
> if (myList == null || index > myList.size()) {
> return null;
> }
> return myList.get(index);
> }
>
> @Override
> public int getListLength(Object data)
> {
> if (data == null) {
> return -1;
> }
> return ((List) data).size();
> }
>
> @Override
> public List<?> getList(Object data)
> {
> return (List) data;
> }
>
> @Override
> public String getTypeName()
> {
> return null; //To change body of implemented methods use File |
> Settings | File Templates.
> }
>
> @Override
> public Category getCategory()
> {
> return Category.LIST;
> }
> }
> }