It's just experience.

2014-10-16 13:51 GMT+04:00 Jakub Stransky <[email protected]>:

> I tried to pass schema directly as follows:
>
> STORE finaldata INTO '/user/pig/outputTest/20120422' USING AvroStorage(
>
> 'schema','{"type":"record","name":"PosData","namespace":"com.ncr.bigdata.dm.pif.avro","fields":[{"name":"Version","type":"int"},{"name":"Dob","type":{"type":"record","name":"DobType","fields":[{"name":"Value","type":"int"}]}},{"name":"StoreId","type":"string"},{"name":"TransactionBlockNumber","type":["null","int"],"default":null},{"name":"TransactionData","type":["null",{"type":"array","items":{"type":"record","name":"TransactionData","fields":[{"name":"TransactionHeader","type":{"type":"record","name":"TransactionHeader","fields":[{"name":"Dob","type":"DobType"},{"name":"StoreId","type":"string"},{"name":"TransactionId","type":"int"},{"name":"TransactionTime","type":{"type":"record","name":"Date","fields":[{"name":"UnixUtcTime","type":"long"},{"name":"OffsetMinutes","type":"int"}]}},{"name":"TerminalId","type":["null","string"],"default":null},{"name":"ResponsibleEmployees","type":["null",{"type":"record","name":"ResponsibleEmployees","fields":[{"name":"Employee","type":{"type":"record","name":"Employee","fields":[{"name":"Id","type":"string"},{"name":"Name","type":"string"}]}},{"name":"Manager","type":["null","Employee"],"default":null}]}],"default":null}]}},{"name":"CustomData","type":["null",{"type":"array","items":{"type":"record","name":"KeyValue","fields":[{"name":"Key","type":"string"},{"name":"Value","type":"string"}]}}],"default":null},{"name":"StoreInfo","type":["null",{"type":"record","name":"StoreInfo","fields":[{"name":"IsQuickService","type":"boolean"},{"name":"CurrencyIsoCode","type":"string"}]}],"default":null},{"name":"NewChecks","type":["null",{"type":"array","items":{"type":"record","name":"NewCheckData","fields":[{"name":"CheckId","type":"string"},{"name":"CheckHeader","type":{"type":"record","name":"CheckHeader","fields":[{"name":"CarriedOver","type":["null","boolean"],"default":null},{"name":"TerminalId","type":["null","string"],"default":null},{"name":"Training","type":["null","boolean"],"default":null},{"name":"Period","type":{"type":"record","name":"LabeledId","fields":[{"name":"Id","type":"string"},{"name":"Label","type":"string"}]}},{"name":"GroupInfo","type":{"type":"record","name":"GroupInfo","fields":[{"name":"Id","type":"string"},{"name":"Label","type":"LabeledId"},{"name":"IsTable","type":"boolean"}]}},{"name":"Events","type":["null",{"type":"array","items":{"type":"record","name":"CheckEvent","fields":[{"name":"CustomEventLabel","type":["null","string"],"default":null},{"name":"Time","type":"Date"},{"name":"CheckEventType","type":{"type":"enum","name":"EventType","symbols":["CheckClose","CheckOpen","Custom","CheckPrint"]}}]}}],"default":null},{"name":"CheckResponsibleEmployees","type":["null",{"type":"array","items":{"type":"record","name":"CheckResponsibleEmployee","fields":[{"name":"Employee","type":"Employee"},{"name":"Time","type":["null","Date"],"default":null}]}}],"default":null},{"name":"GuestCounting","type":{"type":"record","name":"GuestCounting","fields":[{"name":"Guests","type":{"type":"record","name":"DecimalNumber","fields":[{"name":"Value","type":"string"}]}},{"name":"Mode","type":{"type":"enum","name":"GuestCountingMode","symbols":["PerCheck","PerGroup"]}}]}},{"name":"PrintedCheckId","type":"string"},{"name":"RevenueCenter","type":["null","LabeledId"],"default":null},{"name":"Room","type":["null","LabeledId"],"default":null}]}}]}}],"default":null},{"name":"Checks","type":["null",{"type":"array","items":{"type":"record","name":"CheckData","fields":[{"name":"CheckId","type":"string"},{"name":"CheckHeaderUpdate","type":["null",{"type":"record","name":"CheckHeaderUpdate","fields":[{"name":"Period","type":["null","LabeledId"],"default":null},{"name":"GroupInfo","type":["null","GroupInfo"],"default":null},{"name":"Events","type":["null",{"type":"array","items":"CheckEvent"}],"default":null},{"name":"CheckResponsibleEmployees","type":["null",{"type":"array","items":"CheckResponsibleEmployee"}],"default":null},{"name":"GuestCounting","type":["null","GuestCounting"],"default":null},{"name":"PrintedCheckId","type":["null","string"],"default":null},{"name":"RevenueCenter","type":["null","LabeledId"],"default":null},{"name":"Room","type":["null","LabeledId"],"default":null}]}],"default":null},{"name":"Summary","type":["null",{"type":"record","name":"CheckSummary","fields":[{"name":"NetAmount","type":"DecimalNumber"},{"name":"Total","type":"DecimalNumber"}]}],"default":null},{"name":"CheckItems","type":["null",{"type":"array","items":{"type":"record","name":"CheckItem","fields":[{"name":"AbstractCheckElement","type":{"type":"record","name":"AbstractCheckElement","fields":[{"name":"Amount","type":"DecimalNumber"},{"name":"ElementId","type":"string"},{"name":"ElementKind","type":"LabeledId"},{"name":"CreatedOn","type":"Date"},{"name":"ResponsibleEmployees","type":["null","ResponsibleEmployees"],"default":null}]}},{"name":"Categories","type":["null",{"type":"array","items":{"type":"record","name":"Category","fields":[{"name":"CategoryInfo","type":"LabeledId"},{"name":"Type","type":{"type":"enum","name":"CategoryType","symbols":["Sales","NonSales"]}}]}}],"default":null},{"name":"ModifierInfo","type":["null",{"type":"record","name":"ItemModifierInfo","fields":[{"name":"Label","type":"LabeledId"},{"name":"ItemModifierInfoType","type":{"type":"enum","name":"ModifierType","symbols":["ModifiedOrAdded","Removed"]}}]}],"default":null},{"name":"NetAmount","type":"DecimalNumber"},{"name":"OrderMode","type":["null","LabeledId"],"default":null},{"name":"OriginalPrice","type":"DecimalNumber"},{"name":"ParentItem","type":["null","string"],"default":null},{"name":"Quantity","type":["null","DecimalNumber"],"default":null},{"name":"Revenue","type":["null","boolean"],"default":null},{"name":"Seat","type":["null","int"],"default":null},{"name":"ProcessedInKitchen","type":["null","boolean"],"default":null},{"name":"GiftCard","type":["null","boolean"],"default":null},{"name":"SplitItemElementId","type":["null","string"],"default":null}]}}],"default":null},{"name":"Comps","type":["null",{"type":"array","items":{"type":"record","name":"CheckComp","fields":[{"name":"AbstractCheckLinkedElement","type":{"type":"record","name":"AbstractCheckLinkedElement","fields":[{"name":"AbstractCheckElement","type":"AbstractCheckElement"},{"name":"Items","type":["null",{"type":"array","items":{"type":"record","name":"ItemAmount","fields":[{"name":"Amount","type":["null","DecimalNumber"],"default":null},{"name":"ElementId","type":"string"}]}}],"default":null}]}},{"name":"CheckCompType","type":{"type":"enum","name":"CompType","symbols":["Default","Loyalty"]}},{"name":"Note","type":["null","string"],"default":null}]}}],"default":null},{"name":"Payments","type":["null",{"type":"array","items":{"type":"record","name":"CheckPayment","fields":[{"name":"AbstractCheckElement","type":"AbstractCheckElement"},{"name":"ChangeBack","type":["null","DecimalNumber"],"default":null},{"name":"DocumentId","type":["null","string"],"default":null},{"name":"Rounding","type":["null","DecimalNumber"],"default":null},{"name":"Tip","type":["null","DecimalNumber"],"default":null},{"name":"CheckPaymentType","type":{"type":"enum","name":"PaymentType","symbols":["Cash","CardPayment","GiftCard","HouseAccount","Custom"]}},{"name":"Card","type":{"type":"enum","name":"CardVendor","symbols":["NotApplicable","Unknown","Amex","DinersCarteBlanche","Discover","EnRoute","Jcb","Mastercard","Private","Visa"]}}]}}],"default":null},{"name":"Promos","type":["null",{"type":"array","items":{"type":"record","name":"CheckPromo","fields":[{"name":"AbstractCheckLinkedElement","type":"AbstractCheckLinkedElement"},{"name":"Discount","type":["null","DecimalNumber"],"default":null},{"name":"CheckPromoType","type":{"type":"enum","name":"PromoType","symbols":["Default","Combo","Item","Loyalty"]}}]}}],"default":null},{"name":"Surcharges","type":["null",{"type":"array","items":{"type":"record","name":"CheckSurcharge","fields":[{"name":"AbstractCheckLinkedElement","type":"AbstractCheckLinkedElement"},{"name":"Rate","type":["null","DecimalNumber"],"default":null},{"name":"CheckSurchargeType","type":{"type":"enum","name":"SurchargeType","symbols":["Default","Gratuity","Tax"]}},{"name":"Accounting","type":{"type":"enum","name":"AccountingType","symbols":["Exclusive","Inclusive"]}}]}}],"default":null},{"name":"Voids","type":["null",{"type":"array","items":{"type":"record","name":"CheckVoid","fields":[{"name":"AbstractCheckLinkedElement","type":"AbstractCheckLinkedElement"},{"name":"CheckVoidType","type":{"type":"enum","name":"VoidType","symbols":["Default","Clear"]}},{"name":"Note","type":["null","string"],"default":null}]}}],"default":null},{"name":"RemovedElements","type":["null",{"type":"array","items":{"type":"record","name":"RemovedElement","fields":[{"name":"ElementId","type":"string"},{"name":"RemovedElementType","type":{"type":"enum","name":"CheckItemType","symbols":["Comp","Item","Payment","Promo","Surcharge","Void"]}}]}}],"default":null}]}}],"default":null},{"name":"LaborData","type":["null",{"type":"array","items":{"type":"record","name":"LaborData","fields":[{"name":"Shifts","type":["null",{"type":"array","items":{"type":"record","name":"Shift","fields":[{"name":"State","type":{"type":"enum","name":"ShiftState","symbols":["ClockedIn","ClockedOut","ClockedOutBySystem"]}},{"name":"StartDate","type":"Date"},{"name":"EndDate","type":["null","Date"],"default":null},{"name":"TotalPay","type":["null","DecimalNumber"],"default":null},{"name":"PayRates","type":["null",{"type":"array","items":{"type":"record","name":"ShiftPayRate","fields":[{"name":"AfterHours","type":"int"},{"name":"HourlyRate","type":"DecimalNumber"},{"name":"IsOvertime","type":"boolean"}]}}],"default":null},{"name":"ShiftNumber","type":"int"},{"name":"Job","type":"LabeledId"},{"name":"Breaks","type":["null",{"type":"array","items":{"type":"record","name":"Break","fields":[{"name":"Paid","type":"boolean"},{"name":"StartDate","type":"Date"},{"name":"EndDate","type":["null","Date"],"default":null}]}}],"default":null},{"name":"IsManager","type":"boolean"}]}}],"default":null},{"name":"Employee","type":"Employee"}]}}],"default":null}]}}],"default":null},{"name":"Created","type":"Date"}]}'
> );
>
> >> );
> 2014-10-16 04:53:54,616 [main] INFO
>  org.apache.pig.tools.pigstats.ScriptState - Pig features used in the
> script: HASH_JOIN,GROUP_BY,DISTINCT,FILTER
> 2014-10-16 04:53:54,619 [main] INFO
>  org.apache.pig.newplan.logical.optimizer.LogicalPlanOptimizer -
> {RULES_ENABLED=[AddForEach, ColumnMapKeyPrune,
> DuplicateForEachColumnRewrite, GroupByConstParallelSetter,
> ImplicitSplitInserter, LimitOptimizer, LoadTypeCastInserter, MergeFilter,
> MergeForEach, NewPartitionFilterOptimizer, PartitionFilterOptimizer,
> PushDownForEachFlatten, PushUpFilter, SplitFilter, StreamTypeCastInserter],
> RULES_DISABLED=[FilterLogicExpressionSimplifier]}
> 2014-10-16 04:53:54,631 [main] ERROR org.apache.pig.tools.grunt.Grunt -
> ERROR 2116:
> <line 930, column 0> Output Location Validation Failed for:
> '/user/pig/outputTest/20120422 More info to follow:
> *Pig Schema contains a name that is not allowed in Avro*
> Details at logfile: /home/pig/scripts/pig_1413452533120.log
>
> *Seems that pig schema is checked before the provided schema is applied or
> what I am doing wrong?*
> You mentioned an interesting aspect:
>  *If you pass schema as url you MUST map names in relation to schema.*
>     If you pass schema directly as json, you don't have to map relation
> fileds by names.
>
> Could you point me to some example or documentation how that would be
> performed? I don't see any such documentation on AvroStorage wiki.
>
> Thanks
> jakub
>
> On 16 October 2014 10:09, Serega Sheypak <[email protected]> wrote:
>
> > Try not to pass schema as file, pecify schema json directly in
> AvroStorage
> > configuration.
> > I tried both approaches.
> > 1. It's more pleasant to pass schema as url
> > 2. If you pass schema as url you MUST map names in relation to schema.
> >     If you pass schema directly as json, you don't have to map relation
> > fileds by names. It works fro piggy-bank 0.11
> > I don't know which version you use.
> >
> > 2014-10-16 11:21 GMT+04:00 Jakub Stransky <[email protected]>:
> >
> > > STORE finaldata INTO '$OUT' USING AvroStorage('schema_uri','$SCHEMA');
> > >
> > > OUT=/user/pig/outputTest/20120422  that is the location where I would
> > like
> > > to get the final date store. And under the $SCHEMA variable I tried
> > various
> > > combination:
> > > $SCHEMA=hdfs://namenodeha:8020/user/pig/outputTest/pif.json  - that is
> > the
> > > avro schema I would like to use for store. The only difference should
> be
> > > that the schema doesn't contain operator deduplicate (::)
> > > Because that is the only problem in schema - the :: operator is used
> here
> > >  with no reason. Just need to get rid of that dirtydata::
> > >
> > > finaldata: {*dirtydata::*Version: int,*dirtydata::*Dob: (Value: int)
> > > *,dirtydata::*StoreId: chararray,*dirtydata::*TransactionBlockNumber:
> > int,
> > > *dirtydata::*TransactionData: {TransactionData: (TransactionHeader:
> (Dob:
> > > (Value: int),StoreId: chararray,TransactionId: int,TransactionTime:
> > > (UnixUtcTime: long,OffsetMinutes: int),TerminalId:
> > > chararray,ResponsibleEmployees: (Employee: (Id: chararray,Name:
> > > chararray),Manager: (Id: chararray,Name: chararray))),CustomData:
> > > {KeyValue: (Key: chararray,Value: chararray)},StoreInfo:
> (IsQuickService:
> > > boolean,CurrencyIsoCode: chararray),NewChecks: {NewCheckData: (CheckId:
> > > chararray,CheckHeader: (CarriedOver: boolean,TerminalId:
> > > chararray,Training: boolean,Period: (Id: chararray,Label:
> > > chararray),GroupInfo: (Id: chararray,Label: (Id: chararray,Label:
> > > chararray),IsTable: boolean),Events: {CheckEvent: (CustomEventLabel:
> > > chararray,Time: (UnixUtcTime: long,OffsetMinutes: int),CheckEventType:
> > > chararray)},CheckResponsibleEmployees: {CheckResponsibleEmployee:
> > > (Employee: (Id: chararray,Name: chararray),Time: (UnixUtcTime:
> > > long,OffsetMinutes: int))},GuestCounting: (Guests: (Value:
> > chararray),Mode:
> > > chararray),PrintedCheckId: chararray,RevenueCenter: (Id:
> chararray,Label:
> > > chararray),Room: (Id: chararray,Label: chararray)))},Checks:
> {CheckData:
> > > (CheckId: chararray,CheckHeaderUpdate: (Period: (Id: chararray,Label:
> > > chararray),GroupInfo: (Id: chararray,Label: (Id: chararray,Label:
> > > chararray),IsTable: boolean),Events: {CheckEvent: (CustomEventLabel:
> > > chararray,Time: (UnixUtcTime: long,OffsetMinutes: int),CheckEventType:
> > > chararray)},CheckResponsibleEmployees: {CheckResponsibleEmployee:
> > > (Employee: (Id: chararray,Name: chararray),Time: (UnixUtcTime:
> > > long,OffsetMinutes: int))},GuestCounting: (Guests: (Value:
> > chararray),Mode:
> > > chararray),PrintedCheckId: chararray,RevenueCenter: (Id:
> chararray,Label:
> > > chararray),Room: (Id: chararray,Label: chararray)),Summary: (NetAmount:
> > > (Value: chararray),Total: (Value: chararray)),CheckItems: {CheckItem:
> > > (AbstractCheckElement: (Amount: (Value: chararray),ElementId:
> > > chararray,ElementKind: (Id: chararray,Label: chararray),CreatedOn:
> > > (UnixUtcTime: long,OffsetMinutes: int),ResponsibleEmployees: (Employee:
> > > (Id: chararray,Name: chararray),Manager: (Id: chararray,Name:
> > > chararray))),Categories: {Category: (CategoryInfo: (Id:
> chararray,Label:
> > > chararray),Type: chararray)},ModifierInfo: (Label: (Id:
> chararray,Label:
> > > chararray),ItemModifierInfoType: chararray),NetAmount: (Value:
> > > chararray),OrderMode: (Id: chararray,Label: chararray),OriginalPrice:
> > > (Value: chararray),ParentItem: chararray,Quantity: (Value:
> > > chararray),Revenue: boolean,Seat: int,ProcessedInKitchen:
> > boolean,GiftCard:
> > > boolean,SplitItemElementId: chararray)},Comps: {CheckComp:
> > > (AbstractCheckLinkedElement: (AbstractCheckElement: (Amount: (Value:
> > > chararray),ElementId: chararray,ElementKind: (Id: chararray,Label:
> > > chararray),CreatedOn: (UnixUtcTime: long,OffsetMinutes:
> > > int),ResponsibleEmployees: (Employee: (Id: chararray,Name:
> > > chararray),Manager: (Id: chararray,Name: chararray))),Items:
> {ItemAmount:
> > > (Amount: (Value: chararray),ElementId: chararray)}),CheckCompType:
> > > chararray,Note: chararray)},Payments: {CheckPayment:
> > (AbstractCheckElement:
> > > (Amount: (Value: chararray),ElementId: chararray,ElementKind: (Id:
> > > chararray,Label: chararray),CreatedOn: (UnixUtcTime:
> long,OffsetMinutes:
> > > int),ResponsibleEmployees: (Employee: (Id: chararray,Name:
> > > chararray),Manager: (Id: chararray,Name: chararray))),ChangeBack:
> (Value:
> > > chararray),DocumentId: chararray,Rounding: (Value: chararray),Tip:
> > (Value:
> > > chararray),CheckPaymentType: chararray,Card: chararray)},Promos:
> > > {CheckPromo: (AbstractCheckLinkedElement: (AbstractCheckElement:
> (Amount:
> > > (Value: chararray),ElementId: chararray,ElementKind: (Id:
> > chararray,Label:
> > > chararray),CreatedOn: (UnixUtcTime: long,OffsetMinutes:
> > > int),ResponsibleEmployees: (Employee: (Id: chararray,Name:
> > > chararray),Manager: (Id: chararray,Name: chararray))),Items:
> {ItemAmount:
> > > (Amount: (Value: chararray),ElementId: chararray)}),Discount: (Value:
> > > chararray),CheckPromoType: chararray)},Surcharges: {CheckSurcharge:
> > > (AbstractCheckLinkedElement: (AbstractCheckElement: (Amount: (Value:
> > > chararray),ElementId: chararray,ElementKind: (Id: chararray,Label:
> > > chararray),CreatedOn: (UnixUtcTime: long,OffsetMinutes:
> > > int),ResponsibleEmployees: (Employee: (Id: chararray,Name:
> > > chararray),Manager: (Id: chararray,Name: chararray))),Items:
> {ItemAmount:
> > > (Amount: (Value: chararray),ElementId: chararray)}),Rate: (Value:
> > > chararray),CheckSurchargeType: chararray,Accounting: chararray)},Voids:
> > > {CheckVoid: (AbstractCheckLinkedElement: (AbstractCheckElement:
> (Amount:
> > > (Value: chararray),ElementId: chararray,ElementKind: (Id:
> > chararray,Label:
> > > chararray),CreatedOn: (UnixUtcTime: long,OffsetMinutes:
> > > int),ResponsibleEmployees: (Employee: (Id: chararray,Name:
> > > chararray),Manager: (Id: chararray,Name: chararray))),Items:
> {ItemAmount:
> > > (Amount: (Value: chararray),ElementId: chararray)}),CheckVoidType:
> > > chararray,Note: chararray)},RemovedElements: {RemovedElement:
> (ElementId:
> > > chararray,RemovedElementType: chararray)})},LaborData: {LaborData:
> > (Shifts:
> > > {Shift: (State: chararray,StartDate: (UnixUtcTime: long,OffsetMinutes:
> > > int),EndDate: (UnixUtcTime: long,OffsetMinutes: int),TotalPay: (Value:
> > > chararray),PayRates: {ShiftPayRate: (AfterHours: int,HourlyRate:
> (Value:
> > > chararray),IsOvertime: boolean)},ShiftNumber: int,Job: (Id:
> > > chararray,Label: chararray),Breaks: {Break: (Paid: boolean,StartDate:
> > > (UnixUtcTime: long,OffsetMinutes: int),EndDate: (UnixUtcTime:
> > > long,OffsetMinutes: int))},IsManager: boolean)},Employee: (Id:
> > > chararray,Name: chararray))})},*dirtydata::Created*: (UnixUtcTime:
> > > long,OffsetMinutes: int)}
> > >
> > >
> > >
> > >
> > > On 15 October 2014 19:00, Serega Sheypak <[email protected]>
> > wrote:
> > >
> > > > what are values for these variables:
> > > > STORE finaldata INTO '$OUT' USING
> AvroStorage('schema_uri','$SCHEMA');
> > > >
> > > > 2014-10-15 17:51 GMT+04:00 Jakub Stransky <[email protected]>:
> > > >
> > > > > No_schema_check doesn't help. Essentially we need either to remove
> > > > relation
> > > > > name or to ensure that schema is used during store. Here it seems
> > that
> > > > even
> > > > > schema is supplied the internal schema take precedence. And that
> > causes
> > > > > problems
> > > > >
> > > > > On 15 October 2014 15:41, praveenesh kumar <[email protected]>
> > > wrote:
> > > > >
> > > > > > Not really sure, but can you try adding 'no_schema_check'while
> > using
> > > > > > AvroStorage in Store function.
> > > > > >
> > > > > > On Wed, Oct 15, 2014 at 1:59 PM, Jakub Stransky <
> > > [email protected]
> > > > >
> > > > > > wrote:
> > > > > >
> > > > > > > Hello experienced users,
> > > > > > >
> > > > > > > I am working with avro data files using AvroStorage and I am
> > facing
> > > > > > > following issue. I cannot store the data of my result back to
> > avro
> > > > data
> > > > > > > file.
> > > > > > >
> > > > > > > I have following script
> > > > > > > inputdata = load '$INP' using AvroStorage();
> > > > > > > dirtydata = DISTINCT inputdata;
> > > > > > > sodtr = FILTER dirtydata BY TransactionBlockNumber == 1;
> > > > > > > sto   = FOREACH sodtr GENERATE Dob.Value AS Dob,StoreId,
> > > > > > > Created.UnixUtcTime;
> > > > > > > g     = GROUP sto BY  (Dob,StoreId);
> > > > > > > sodtime = FOREACH g GENERATE group.Dob AS Dob, group.StoreId AS
> > > > > StoreId,
> > > > > > > MAX(sto.UnixUtcTime) AS latestStartOfDayTime;
> > > > > > >
> > > > > > > joined = JOIN dirtydata BY (Dob.Value, StoreId) LEFT OUTER,
> > sodtime
> > > > BY
> > > > > > > (Dob, StoreId);
> > > > > > >
> > > > > > > cleandata = FILTER joined BY dirtydata::Created.UnixUtcTime >=
> > > > > > > sodtime.latestStartOfDayTime; --1412864846
> > > > > > > finaldata = FOREACH cleandata GENERATE dirtydata::Version ..
> > > > > > > dirtydata::Created;
> > > > > > >
> > > > > > > STORE finaldata INTO '$OUT' USING
> > > > AvroStorage('schema_uri','$SCHEMA');
> > > > > > >
> > > > > > > Where $SCHEMA contains exactly the same schema as inputdata. By
> > pig
> > > > > > > operations I got several nested relation, columns etc. Those
> > should
> > > > be
> > > > > > > removed by .. operator. Resulting schema using describe
> > > > > > >
> > > > > > >
> > > > > > > finaldata: {dirtydata*::*Version: int,dirtydata::Dob: (Value:
> > > > > > > int),dirtydata::StoreId:
> > > chararray,dirtydata::TransactionBlockNumber:
> > > > > > > int,dirtydata::TransactionData: {TransactionData:
> > > (TransactionHeader:
> > > > > > (Dob:
> > > > > > > (Value: int),StoreId: chararray,TransactionId:
> > int,TransactionTime:
> > > > > > > (UnixUtcTime: long,OffsetMinutes: int),TerminalId:
> > > > > > > chararray,ResponsibleEmployees: (Employee: (Id: chararray,Name:
> > > > > > > chararray),Manager: (Id: chararray,Name:
> chararray))),CustomData:
> > > > > > > {KeyValue: (Key: chararray,Value: chararray)},StoreInfo:
> > > > > (IsQuickService:
> > > > > > > boolean,CurrencyIsoCode: chararray),NewChecks: {NewCheckData:
> > > > (CheckId:
> > > > > > > chararray,CheckHeader: (CarriedOver: boolean,TerminalId:
> > > > > > > chararray,Training: boolean,Period: (Id: chararray,Label:
> > > > > > > chararray),GroupInfo: (Id: chararray,Label: (Id:
> chararray,Label:
> > > > > > > chararray),IsTable: boolean),Events: {CheckEvent:
> > > (CustomEventLabel:
> > > > > > > chararray,Time: (UnixUtcTime: long,OffsetMinutes:
> > > > int),CheckEventType:
> > > > > > > chararray)},CheckResponsibleEmployees:
> {CheckResponsibleEmployee:
> > > > > > > (Employee: (Id: chararray,Name: chararray),Time: (UnixUtcTime:
> > > > > > > long,OffsetMinutes: int))},GuestCounting: (Guests: (Value:
> > > > > > chararray),Mode:
> > > > > > > chararray),PrintedCheckId: chararray,RevenueCenter: (Id:
> > > > > chararray,Label:
> > > > > > > chararray),Room: (Id: chararray,Label: chararray)))},Checks:
> > > > > {CheckData:
> > > > > > > (CheckId: chararray,CheckHeaderUpdate: (Period: (Id:
> > > chararray,Label:
> > > > > > > chararray),GroupInfo: (Id: chararray,Label: (Id:
> chararray,Label:
> > > > > > > chararray),IsTable: boolean),Events: {CheckEvent:
> > > (CustomEventLabel:
> > > > > > > chararray,Time: (UnixUtcTime: long,OffsetMinutes:
> > > > int),CheckEventType:
> > > > > > > chararray)},CheckResponsibleEmployees:
> {CheckResponsibleEmployee:
> > > > > > > (Employee: (Id: chararray,Name: chararray),Time: (UnixUtcTime:
> > > > > > > long,OffsetMinutes: int))},GuestCounting: (Guests: (Value:
> > > > > > chararray),Mode:
> > > > > > > chararray),PrintedCheckId: chararray,RevenueCenter: (Id:
> > > > > chararray,Label:
> > > > > > > chararray),Room: (Id: chararray,Label: chararray)),Summary:
> > > > (NetAmount:
> > > > > > > (Value: chararray),Total: (Value: chararray)),CheckItems:
> > > {CheckItem:
> > > > > > > (AbstractCheckElement: (Amount: (Value: chararray),ElementId:
> > > > > > > chararray,ElementKind: (Id: chararray,Label:
> > chararray),CreatedOn:
> > > > > > > (UnixUtcTime: long,OffsetMinutes: int),ResponsibleEmployees:
> > > > (Employee:
> > > > > > > (Id: chararray,Name: chararray),Manager: (Id: chararray,Name:
> > > > > > > chararray))),Categories: {Category: (CategoryInfo: (Id:
> > > > > chararray,Label:
> > > > > > > chararray),Type: chararray)},ModifierInfo: (Label: (Id:
> > > > > chararray,Label:
> > > > > > > chararray),ItemModifierInfoType: chararray),NetAmount: (Value:
> > > > > > > chararray),OrderMode: (Id: chararray,Label:
> > > chararray),OriginalPrice:
> > > > > > > (Value: chararray),ParentItem: chararray,Quantity: (Value:
> > > > > > > chararray),Revenue: boolean,Seat: int,ProcessedInKitchen:
> > > > > > boolean,GiftCard:
> > > > > > > boolean,SplitItemElementId: chararray)},Comps: {CheckComp:
> > > > > > > (AbstractCheckLinkedElement: (AbstractCheckElement: (Amount:
> > > (Value:
> > > > > > > chararray),ElementId: chararray,ElementKind: (Id:
> > chararray,Label:
> > > > > > > chararray),CreatedOn: (UnixUtcTime: long,OffsetMinutes:
> > > > > > > int),ResponsibleEmployees: (Employee: (Id: chararray,Name:
> > > > > > > chararray),Manager: (Id: chararray,Name: chararray))),Items:
> > > > > {ItemAmount:
> > > > > > > (Amount: (Value: chararray),ElementId:
> > chararray)}),CheckCompType:
> > > > > > > chararray,Note: chararray)},Payments: {CheckPayment:
> > > > > > (AbstractCheckElement:
> > > > > > > (Amount: (Value: chararray),ElementId: chararray,ElementKind:
> > (Id:
> > > > > > > chararray,Label: chararray),CreatedOn: (UnixUtcTime:
> > > > > long,OffsetMinutes:
> > > > > > > int),ResponsibleEmployees: (Employee: (Id: chararray,Name:
> > > > > > > chararray),Manager: (Id: chararray,Name:
> chararray))),ChangeBack:
> > > > > (Value:
> > > > > > > chararray),DocumentId: chararray,Rounding: (Value:
> > chararray),Tip:
> > > > > > (Value:
> > > > > > > chararray),CheckPaymentType: chararray,Card:
> chararray)},Promos:
> > > > > > > {CheckPromo: (AbstractCheckLinkedElement:
> (AbstractCheckElement:
> > > > > (Amount:
> > > > > > > (Value: chararray),ElementId: chararray,ElementKind: (Id:
> > > > > > chararray,Label:
> > > > > > > chararray),CreatedOn: (UnixUtcTime: long,OffsetMinutes:
> > > > > > > int),ResponsibleEmployees: (Employee: (Id: chararray,Name:
> > > > > > > chararray),Manager: (Id: chararray,Name: chararray))),Items:
> > > > > {ItemAmount:
> > > > > > > (Amount: (Value: chararray),ElementId: chararray)}),Discount:
> > > (Value:
> > > > > > > chararray),CheckPromoType: chararray)},Surcharges:
> > {CheckSurcharge:
> > > > > > > (AbstractCheckLinkedElement: (AbstractCheckElement: (Amount:
> > > (Value:
> > > > > > > chararray),ElementId: chararray,ElementKind: (Id:
> > chararray,Label:
> > > > > > > chararray),CreatedOn: (UnixUtcTime: long,OffsetMinutes:
> > > > > > > int),ResponsibleEmployees: (Employee: (Id: chararray,Name:
> > > > > > > chararray),Manager: (Id: chararray,Name: chararray))),Items:
> > > > > {ItemAmount:
> > > > > > > (Amount: (Value: chararray),ElementId: chararray)}),Rate:
> (Value:
> > > > > > > chararray),CheckSurchargeType: chararray,Accounting:
> > > > chararray)},Voids:
> > > > > > > {CheckVoid: (AbstractCheckLinkedElement: (AbstractCheckElement:
> > > > > (Amount:
> > > > > > > (Value: chararray),ElementId: chararray,ElementKind: (Id:
> > > > > > chararray,Label:
> > > > > > > chararray),CreatedOn: (UnixUtcTime: long,OffsetMinutes:
> > > > > > > int),ResponsibleEmployees: (Employee: (Id: chararray,Name:
> > > > > > > chararray),Manager: (Id: chararray,Name: chararray))),Items:
> > > > > {ItemAmount:
> > > > > > > (Amount: (Value: chararray),ElementId:
> > chararray)}),CheckVoidType:
> > > > > > > chararray,Note: chararray)},RemovedElements: {RemovedElement:
> > > > > (ElementId:
> > > > > > > chararray,RemovedElementType: chararray)})},LaborData:
> > {LaborData:
> > > > > > (Shifts:
> > > > > > > {Shift: (State: chararray,StartDate: (UnixUtcTime:
> > > > long,OffsetMinutes:
> > > > > > > int),EndDate: (UnixUtcTime: long,OffsetMinutes: int),TotalPay:
> > > > (Value:
> > > > > > > chararray),PayRates: {ShiftPayRate: (AfterHours:
> int,HourlyRate:
> > > > > (Value:
> > > > > > > chararray),IsOvertime: boolean)},ShiftNumber: int,Job: (Id:
> > > > > > > chararray,Label: chararray),Breaks: {Break: (Paid:
> > > boolean,StartDate:
> > > > > > > (UnixUtcTime: long,OffsetMinutes: int),EndDate: (UnixUtcTime:
> > > > > > > long,OffsetMinutes: int))},IsManager: boolean)},Employee: (Id:
> > > > > > > chararray,Name: chararray))})},dirtydata::Created:
> (UnixUtcTime:
> > > > > > > long,OffsetMinutes: int)}
> > > > > > >
> > > > > > > *I am getting error: Pig Schema contains a name that is not
> > allowed
> > > > in
> > > > > > > Avro. Which is probably because of :: remains for dirtydata. Is
> > > > there a
> > > > > > way
> > > > > > > how to strip this off  (as now there is no point being there)
> > > > otherwise
> > > > > > > schema should be identical to input schema.*
> > > > > > >
> > > > > > > *Thanks for helping me out*
> > > > > > > *Jakub*
> > > > > > >
> > > > > >
> > > > >
> > > > >
> > > > >
> > > > > --
> > > > > Jakub Stransky
> > > > > cz.linkedin.com/in/jakubstransky
> > > > >
> > > >
> > >
> > >
> > >
> > > --
> > > Jakub Stransky
> > > cz.linkedin.com/in/jakubstransky
> > >
> >
>
>
>
> --
> Jakub Stransky
> cz.linkedin.com/in/jakubstransky
>

Reply via email to