Hi

I am using LZO compression in our scripts but one script is still creating 
errors

Diagnostic Messages for this Task:
Error: java.io.IOException: java.io.EOFException: Premature EOF from inputStream
        at 
org.apache.hadoop.hive.io.HiveIOExceptionHandlerChain.handleRecordReaderCreationException(HiveIOExceptionHandlerChain.java:97)
        at 
org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(HiveIOExceptionHandlerUtil.java:57)
        at 
org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:243)
        at 
org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getRecordReader(CombineHiveInputFormat.java:522)
        at 
org.apache.hadoop.mapred.MapTask$TrackedRecordReader.<init>(MapTask.java:160)
        at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:381)
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:334)
        at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:152)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:396)
        at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
        at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:147)
Caused by: java.io.EOFException: Premature EOF from inputStream
        at 
com.hadoop.compression.lzo.LzopInputStream.readFully(LzopInputStream.java:75)
        at 
com.hadoop.compression.lzo.LzopInputStream.readHeader(LzopInputStream.java:114)
        at 
com.hadoop.compression.lzo.LzopInputStream.<init>(LzopInputStream.java:54)
        at 
com.hadoop.compression.lzo.LzopCodec.createInputStream(LzopCodec.java:83)
        at org.apache.hadoop.io.SequenceFile$Reader.init(SequenceFile.java:1871)
        at 
org.apache.hadoop.io.SequenceFile$Reader.initialize(SequenceFile.java:1765)
        at 
org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1714)
        at 
org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1728)
        at 
org.apache.hadoop.mapred.SequenceFileRecordReader.<init>(SequenceFileRecordReader.java:49)
        at 
org.apache.hadoop.mapred.SequenceFileInputFormat.getRecordReader(SequenceFileInputFormat.java:64)
        at 
org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:240)
        ... 9 more


SCRIPT
=======
set hiveconf mapred.output.compression.type=BLOCK;
set 
mapred.map.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
set mapreduce.map.output.compress=true;
set hive.exec.compress.output=true;
set 
mapreduce.output.fileoutputformat.compress.codec=com.hadoop.compression.lzo.LzopCodec;
set mapreduce.output.fileoutputformat.compress=true;
set hive.exec.compress.intermediate=true;
set mapreduce.job.maps=500;
set mapreduce.job.reduces=8;
set mapreduce.tasktracker.map.tasks.maximum=12;
set mapreduce.tasktracker.reduce.tasks.maximum=8;
add jar 
/home/nextag/sasubramanian/mycode/impressions/jar/impressions-hiveudfs-1.0-20130615-155038.jar;
create temporary function collect  as 
'com.wizecommerce.utils.hive.udf.GenericUDAFCollect';
create temporary function isnextagip  as 
'com.wizecommerce.utils.hive.udf.IsNextagIP';
create temporary function isfrombot  as 
'com.wizecommerce.utils.hive.udf.IsFromBot';
create temporary function processblankkeyword  as 
'com.wizecommerce.utils.hive.udf.ProcessBlankKeyword';
create temporary function getValidHiddenSellers as 
'com.wizecommerce.utils.hive.udf.GetValidHiddenSellers';
INSERT OVERWRITE DIRECTORY 
'/user/beeswax/warehouse/keyword_impressions_ptitles_log/2013-03-19'
SELECT
     hp.header_date,
     hp.impression_id,
     hp.header_searchsessionid,
     hp.cached_visit_id,
     split(hp.header_servername,'[\.]')[0],
     hp.cached_ip,
     hp.header_adnode,
     IF (concat_ws(',' , collect_set(concat_ws('|', cast(hp.seller_id as 
STRING), cast(IF(hp.seller_pricetier IS NULL, -1L, hp.seller_pricetier) as 
STRING), cast(hp.seller_price as STRING), cast(IF(hp.ptitle_rank IS  NULL, -1L, 
hp.ptitle_rank) as STRING)))) = '-1|-1',NULL,concat_ws(',' , 
collect_set(concat_ws('|', cast(hp.seller_id as STRING), 
cast(IF(hp.seller_pricetier IS NULL, -1L, hp.seller_pricetier) as STRING), 
cast(hp.seller_price as STRING), cast(IF(hp.ptitle_rank IS  NULL, -1L, 
hp.ptitle_rank) as STRING))))),
     IF(concat_ws(',' , getValidHiddenSellers(collect_set(concat_ws('|', 
cast(sh.seller_id as STRING), cast(sh.ptitle_id as STRING), cast(sh.tag_id as 
STRING), cast(IF(sh.price_tier IS NULL, -1L, sh.price_tier) as STRING))))) = 
'',NULL, concat_ws(',' , getValidHiddenSellers(collect_set(concat_ws('|', 
cast(sh.seller_id as STRING), cast(sh.ptitle_id as STRING), cast(sh.tag_id as 
STRING), cast(IF(sh.price_tier IS NULL, -1L, sh.price_tier) as STRING))))))
FROM
     (SELECT
          h.header_date,
          h.header_servername,
          h.impression_id,
          h.header_searchsessionid,
          h.cached_visit_id,
          h.cached_ip,
          h.header_adnode,
          p.ptitle_ptitleid,
          p.seller_id,
          p.seller_pricetier,
          p.seller_price,
          p.ptitle_rank
     FROM
          (SELECT
               header_date,
               header_servername,
               impression_id,
               header_searchsessionid,
               cached_ip,
               header_adnode,
               cached_recordid,
               cached_visit_id
           FROM
                outpdir_impressions_header
           WHERE
              header_date_partition='2013-03-19'
           AND
              header_rbabsentsellers = 1L
           AND
              cached_recordid IS NOT NULL
           AND
              isnextagip(cached_ip) = FALSE
           AND
              isfrombot(cached_visit_id) = FALSE
          ) h
     LEFT OUTER JOIN
          (SELECT
               po.impression_id,
               po.ptitle_ptitleid,
               po.header_date,
               po.seller_id,
               po.seller_pricetier,
               po.seller_price,
               po.ptitle_rank
           FROM
               (SELECT
                    impression_id,
                    ptitle_ptitleid,
                    header_date,
                    seller_id,
                    seller_pricetier,
                    seller_price,
                    ptitle_rank
                FROM
                    outpdir_impressions_ptitle
                WHERE
                    header_date_partition = '2013-03-19'
                AND
                    seller_id IS NOT NULL
                )  po
           JOIN
               (SELECT
                    impression_id,
                    ptitle_ptitleid,
                    ptitle_rank,
                    COUNT(DISTINCT seller_id, seller_pricetier, seller_price, 
ptitle_rank)
                FROM
                    outpdir_impressions_ptitle pi
                WHERE
                    header_date_partition = '2013-03-19'
                AND
                    seller_id IS NOT NULL
                GROUP BY
                    impression_id,
                    ptitle_ptitleid,
                    ptitle_rank
                HAVING
                    COUNT(DISTINCT seller_id, seller_pricetier, seller_price, 
ptitle_rank)  = 1
                ) pi
           ON
               po.impression_id = pi.impression_id
           AND
               po.ptitle_ptitleid = pi.ptitle_ptitleid
           AND
               po.ptitle_rank = pi.ptitle_rank
          ) p
     ON
          h.impression_id = p.impression_id
     AND
          h.header_date=p.header_date
     ) hp
LEFT OUTER JOIN
     (SELECT
           *
      FROM
           outpdir_seller_hidden
      WHERE
          header_date_partition='2013-03-19'
     ) sh
ON
     hp.impression_id = sh.impression_id
AND
     hp.header_date = sh.header_date
GROUP BY
     hp.header_date,
     hp.impression_id,
     hp.header_searchsessionid,
     hp.cached_visit_id,
     hp.header_servername,
     hp.cached_ip,
     hp.header_adnode


CONFIDENTIALITY NOTICE
======================
This email message and any attachments are for the exclusive use of the 
intended recipient(s) and may contain confidential and privileged information. 
Any unauthorized review, use, disclosure or distribution is prohibited. If you 
are not the intended recipient, please contact the sender by reply email and 
destroy all copies of the original message along with any attachments, from 
your computer system. If you are the intended recipient, please be advised that 
the content of this message is subject to access, review and disclosure by the 
sender's Email System Administrator.

Reply via email to