Thanks Nishant! Will try using Pig json loader too to achieve this requirement. If you have any tutorial for extracting data from complex nested json arrays (as the example given in my previous email), please send it.
Appreciate your help! Thanks, Joel On Tue, Oct 27, 2015 at 10:20 PM, Nishant Aggarwal <nishant....@gmail.com> wrote: > Hello Sam, > You can easily achieve this by using elephant-bird.jars in pig. We are > also caturing tweets via flume and filter them using pig and elephant-jars. > You can find the related jars over internet. > > Cheers, > Nishant Aggarwal > On 28 Oct 2015 00:50, "Sam Joe" <games2013....@gmail.com> wrote: > >> Hi, >> >> Is it possible to use json_tuple function to extract data from json >> arrays (nested too). I am trying to process json data as string and avoid >> using serdes since user data may be malformed. >> >> Please see a sample json data given below: >> >> >> { >> >> "filter_level": "low", >> >> "retweeted": false, >> >> "in_reply_to_screen_name": null, >> >> "possibly_sensitive": false, >> >> "truncated": false, >> >> "lang": "en", >> >> "in_reply_to_status_id_str": null, >> >> "id": 654395184428515332, >> >> "extended_entities": { >> >> "media": [{ >> >> "sizes": { >> >> "thumb": { >> >> "w": 150, >> >> "resize": "crop", >> >> "h": 150 >> >> }, >> >> "small": { >> >> "w": 340, >> >> "resize": "fit", >> >> "h": 255 >> >> }, >> >> "large": { >> >> "w": 1024, >> >> "resize": "fit", >> >> "h": 768 >> >> }, >> >> "medium": { >> >> "w": 600, >> >> "resize": "fit", >> >> "h": 450 >> >> } >> >> }, >> >> "source_user_id": 16864598, >> >> "media_url": "http://pbs.twimg.com/media/CRSL2MPWsAAOnZo.jpg", >> >> "display_url": "pic.twitter.com/i3004WyF4g", >> >> "type": "photo", >> >> "url": "http://t.co/i3004WyF4g", >> >> "id": 654301608990388224, >> >> "media_url_https": "https://pbs.twimg.com/media/CRSL2MPWsAAOnZo.jpg", >> >> "expanded_url": " >> http://twitter.com/lordlancaster/status/654301626665189376/photo/1", >> >> "source_user_id_str": "16864598", >> >> "indices": [143, >> >> 144], >> >> "source_status_id_str": "654301626665189376", >> >> "source_status_id": 654301626665189376, >> >> "id_str": "654301608990388224" >> >> }, >> >> { >> >> "sizes": { >> >> "thumb": { >> >> "w": 150, >> >> "resize": "crop", >> >> "h": 150 >> >> }, >> >> "small": { >> >> "w": 340, >> >> "resize": "fit", >> >> "h": 255 >> >> }, >> >> "large": { >> >> "w": 1024, >> >> "resize": "fit", >> >> "h": 768 >> >> }, >> >> "medium": { >> >> "w": 600, >> >> "resize": "fit", >> >> "h": 450 >> >> } >> >> }, >> >> "source_user_id": 16864598, >> >> "media_url": "http://pbs.twimg.com/media/CRSL2MRWgAAGOcj.jpg", >> >> "display_url": "pic.twitter.com/i3004WyF4g", >> >> "type": "photo", >> >> "url": "http://t.co/i3004WyF4g", >> >> "id": 654301608998764544, >> >> "media_url_https": "https://pbs.twimg.com/media/CRSL2MRWgAAGOcj.jpg", >> >> "expanded_url": " >> http://twitter.com/lordlancaster/status/654301626665189376/photo/1", >> >> "source_user_id_str": "16864598", >> >> "indices": [143, >> >> 144], >> >> "source_status_id_str": "654301626665189376", >> >> "source_status_id": 654301626665189376, >> >> "id_str": "654301608998764544" >> >> }, >> >> { >> >> "sizes": { >> >> "thumb": { >> >> "w": 150, >> >> "resize": "crop", >> >> "h": 150 >> >> }, >> >> "small": { >> >> "w": 340, >> >> "resize": "fit", >> >> "h": 255 >> >> }, >> >> "large": { >> >> "w": 1024, >> >> "resize": "fit", >> >> "h": 768 >> >> }, >> >> "medium": { >> >> "w": 600, >> >> "resize": "fit", >> >> "h": 450 >> >> } >> >> }, >> >> "source_user_id": 16864598, >> >> "media_url": "http://pbs.twimg.com/media/CRSL2MQWwAAP4Qo.jpg", >> >> "display_url": "pic.twitter.com/i3004WyF4g", >> >> "type": "photo", >> >> "url": "http://t.co/i3004WyF4g", >> >> "id": 654301608994586624, >> >> "media_url_https": "https://pbs.twimg.com/media/CRSL2MQWwAAP4Qo.jpg", >> >> "expanded_url": " >> http://twitter.com/lordlancaster/status/654301626665189376/photo/1", >> >> "source_user_id_str": "16864598", >> >> "indices": [143, >> >> 144], >> >> "source_status_id_str": "654301626665189376", >> >> "source_status_id": 654301626665189376, >> >> "id_str": "654301608994586624" >> >> }, >> >> { >> >> "sizes": { >> >> "thumb": { >> >> "w": 150, >> >> "resize": "crop", >> >> "h": 150 >> >> }, >> >> "small": { >> >> "w": 340, >> >> "resize": "fit", >> >> "h": 255 >> >> }, >> >> "large": { >> >> "w": 1024, >> >> "resize": "fit", >> >> "h": 768 >> >> }, >> >> "medium": { >> >> "w": 600, >> >> "resize": "fit", >> >> "h": 450 >> >> } >> >> }, >> >> "source_user_id": 16864598, >> >> "media_url": "http://pbs.twimg.com/media/CRSL2M8WcAEXowZ.jpg", >> >> "display_url": "pic.twitter.com/i3004WyF4g", >> >> "type": "photo", >> >> "url": "http://t.co/i3004WyF4g", >> >> "id": 654301609179115521, >> >> "media_url_https": "https://pbs.twimg.com/media/CRSL2M8WcAEXowZ.jpg", >> >> "expanded_url": " >> http://twitter.com/lordlancaster/status/654301626665189376/photo/1", >> >> "source_user_id_str": "16864598", >> >> "indices": [143, >> >> 144], >> >> "source_status_id_str": "654301626665189376", >> >> "source_status_id": 654301626665189376, >> >> "id_str": "654301609179115521" >> >> }] >> >> } >> >> } >> >> >> Appreciate any help! >> >> >> *Thanks,* >> >> *Joel* >> >> >> >>