Hello Sam, You can easily achieve this by using elephant-bird.jars in pig. We are also caturing tweets via flume and filter them using pig and elephant-jars. You can find the related jars over internet.
Cheers, Nishant Aggarwal On 28 Oct 2015 00:50, "Sam Joe" <[email protected]> wrote: > Hi, > > Is it possible to use json_tuple function to extract data from json arrays > (nested too). I am trying to process json data as string and avoid using > serdes since user data may be malformed. > > Please see a sample json data given below: > > > { > > "filter_level": "low", > > "retweeted": false, > > "in_reply_to_screen_name": null, > > "possibly_sensitive": false, > > "truncated": false, > > "lang": "en", > > "in_reply_to_status_id_str": null, > > "id": 654395184428515332, > > "extended_entities": { > > "media": [{ > > "sizes": { > > "thumb": { > > "w": 150, > > "resize": "crop", > > "h": 150 > > }, > > "small": { > > "w": 340, > > "resize": "fit", > > "h": 255 > > }, > > "large": { > > "w": 1024, > > "resize": "fit", > > "h": 768 > > }, > > "medium": { > > "w": 600, > > "resize": "fit", > > "h": 450 > > } > > }, > > "source_user_id": 16864598, > > "media_url": "http://pbs.twimg.com/media/CRSL2MPWsAAOnZo.jpg", > > "display_url": "pic.twitter.com/i3004WyF4g", > > "type": "photo", > > "url": "http://t.co/i3004WyF4g", > > "id": 654301608990388224, > > "media_url_https": "https://pbs.twimg.com/media/CRSL2MPWsAAOnZo.jpg", > > "expanded_url": " > http://twitter.com/lordlancaster/status/654301626665189376/photo/1", > > "source_user_id_str": "16864598", > > "indices": [143, > > 144], > > "source_status_id_str": "654301626665189376", > > "source_status_id": 654301626665189376, > > "id_str": "654301608990388224" > > }, > > { > > "sizes": { > > "thumb": { > > "w": 150, > > "resize": "crop", > > "h": 150 > > }, > > "small": { > > "w": 340, > > "resize": "fit", > > "h": 255 > > }, > > "large": { > > "w": 1024, > > "resize": "fit", > > "h": 768 > > }, > > "medium": { > > "w": 600, > > "resize": "fit", > > "h": 450 > > } > > }, > > "source_user_id": 16864598, > > "media_url": "http://pbs.twimg.com/media/CRSL2MRWgAAGOcj.jpg", > > "display_url": "pic.twitter.com/i3004WyF4g", > > "type": "photo", > > "url": "http://t.co/i3004WyF4g", > > "id": 654301608998764544, > > "media_url_https": "https://pbs.twimg.com/media/CRSL2MRWgAAGOcj.jpg", > > "expanded_url": " > http://twitter.com/lordlancaster/status/654301626665189376/photo/1", > > "source_user_id_str": "16864598", > > "indices": [143, > > 144], > > "source_status_id_str": "654301626665189376", > > "source_status_id": 654301626665189376, > > "id_str": "654301608998764544" > > }, > > { > > "sizes": { > > "thumb": { > > "w": 150, > > "resize": "crop", > > "h": 150 > > }, > > "small": { > > "w": 340, > > "resize": "fit", > > "h": 255 > > }, > > "large": { > > "w": 1024, > > "resize": "fit", > > "h": 768 > > }, > > "medium": { > > "w": 600, > > "resize": "fit", > > "h": 450 > > } > > }, > > "source_user_id": 16864598, > > "media_url": "http://pbs.twimg.com/media/CRSL2MQWwAAP4Qo.jpg", > > "display_url": "pic.twitter.com/i3004WyF4g", > > "type": "photo", > > "url": "http://t.co/i3004WyF4g", > > "id": 654301608994586624, > > "media_url_https": "https://pbs.twimg.com/media/CRSL2MQWwAAP4Qo.jpg", > > "expanded_url": " > http://twitter.com/lordlancaster/status/654301626665189376/photo/1", > > "source_user_id_str": "16864598", > > "indices": [143, > > 144], > > "source_status_id_str": "654301626665189376", > > "source_status_id": 654301626665189376, > > "id_str": "654301608994586624" > > }, > > { > > "sizes": { > > "thumb": { > > "w": 150, > > "resize": "crop", > > "h": 150 > > }, > > "small": { > > "w": 340, > > "resize": "fit", > > "h": 255 > > }, > > "large": { > > "w": 1024, > > "resize": "fit", > > "h": 768 > > }, > > "medium": { > > "w": 600, > > "resize": "fit", > > "h": 450 > > } > > }, > > "source_user_id": 16864598, > > "media_url": "http://pbs.twimg.com/media/CRSL2M8WcAEXowZ.jpg", > > "display_url": "pic.twitter.com/i3004WyF4g", > > "type": "photo", > > "url": "http://t.co/i3004WyF4g", > > "id": 654301609179115521, > > "media_url_https": "https://pbs.twimg.com/media/CRSL2M8WcAEXowZ.jpg", > > "expanded_url": " > http://twitter.com/lordlancaster/status/654301626665189376/photo/1", > > "source_user_id_str": "16864598", > > "indices": [143, > > 144], > > "source_status_id_str": "654301626665189376", > > "source_status_id": 654301626665189376, > > "id_str": "654301609179115521" > > }] > > } > > } > > > Appreciate any help! > > > *Thanks,* > > *Joel* > > > >
