Hi, Try adding this in spark-env.sh
export JAVA_LIBRARY_PATH=$JAVA_LIBRARY_PATH:/usr/lib/hadoop-0.20-mapreduce/lib/native/Linux-amd64-64 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/hadoop-0.20-mapreduce/lib/native/Linux-amd64-64 export SPARK_LIBRARY_PATH=$SPARK_LIBRARY_PATH:/usr/lib/hadoop-0.20-mapreduce/lib/native/Linux-amd64-64 export SPARK_CLASSPATH=$SPARK_CLASSPATH:/usr/lib/hadoop-0.20-mapreduce/lib/snappy-java-1.0.4.1.jar Pointing to eqv. snappy / MR directory on your box. Thanks, Kapil Malik From: Naveen Kumar Pokala [mailto:npok...@spcapitaliq.com] Sent: 12 November 2014 19:59 To: user@spark.apache.org Subject: Snappy error with Spark SQL HI, I am facing the following problem when I am trying to save my RDD as parquet File. 14/11/12 07:43:59 WARN scheduler.TaskSetManager: Lost task 0.0 in stage 1.0 (TID 48,): org.xerial.snappy.SnappyError: [FAILED_TO_LOAD_NATIVE_LIBRARY] null org.xerial.snappy.SnappyLoader.load(SnappyLoader.java:236) org.xerial.snappy.Snappy.<clinit>(Snappy.java:48) parquet.hadoop.codec.SnappyCompressor.compress(SnappyCompressor.java:64) org.apache.hadoop.io.compress.CompressorStream.compress(CompressorStream.java:81) org.apache.hadoop.io.compress.CompressorStream.finish(CompressorStream.java:92) parquet.hadoop.CodecFactory$BytesCompressor.compress(CodecFactory.java:109) parquet.hadoop.ColumnChunkPageWriteStore$ColumnChunkPageWriter.writePage(ColumnChunkPageWriteStore.java:70) parquet.column.impl.ColumnWriterImpl.writePage(ColumnWriterImpl.java:119) parquet.column.impl.ColumnWriterImpl.flush(ColumnWriterImpl.java:199) parquet.column.impl.ColumnWriteStoreImpl.flush(ColumnWriteStoreImpl.java:108) parquet.hadoop.InternalParquetRecordWriter.flushStore(InternalParquetRecordWriter.java:146) parquet.hadoop.InternalParquetRecordWriter.close(InternalParquetRecordWriter.java:110) parquet.hadoop.ParquetRecordWriter.close(ParquetRecordWriter.java:73) org.apache.spark.sql.parquet.InsertIntoParquetTable.org$apache$spark$sql$parquet$InsertIntoParquetTable$$writeShard$1(ParquetTableOperations.scala:305) org.apache.spark.sql.parquet.InsertIntoParquetTable$$anonfun$saveAsHadoopFile$1.apply(ParquetTableOperations.scala:318) org.apache.spark.sql.parquet.InsertIntoParquetTable$$anonfun$saveAsHadoopFile$1.apply(ParquetTableOperations.scala:318) org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62) org.apache.spark.scheduler.Task.run(Task.scala:54) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) java.lang.Thread.run(Thread.java:745) 14/11/12 07:43:59 WARN scheduler.TaskSetManager: Lost task 3.0 in stage 1.0 (TID 51,): java.lang.NoClassDefFoundError: Could not initialize class org.xerial.snappy.Snappy parquet.hadoop.codec.SnappyCompressor.compress(SnappyCompressor.java:64) org.apache.hadoop.io.compress.CompressorStream.compress(CompressorStream.java:81) org.apache.hadoop.io.compress.CompressorStream.finish(CompressorStream.java:92) parquet.hadoop.CodecFactory$BytesCompressor.compress(CodecFactory.java:109) parquet.hadoop.ColumnChunkPageWriteStore$ColumnChunkPageWriter.writePage(ColumnChunkPageWriteStore.java:70) parquet.column.impl.ColumnWriterImpl.writePage(ColumnWriterImpl.java:119) parquet.column.impl.ColumnWriterImpl.flush(ColumnWriterImpl.java:199) parquet.column.impl.ColumnWriteStoreImpl.flush(ColumnWriteStoreImpl.java:108) parquet.hadoop.InternalParquetRecordWriter.flushStore(InternalParquetRecordWriter.java:146) parquet.hadoop.InternalParquetRecordWriter.close(InternalParquetRecordWriter.java:110) parquet.hadoop.ParquetRecordWriter.close(ParquetRecordWriter.java:73) org.apache.spark.sql.parquet.InsertIntoParquetTable.org$apache$spark$sql$parquet$InsertIntoParquetTable$$writeShard$1(ParquetTableOperations.scala:305) org.apache.spark.sql.parquet.InsertIntoParquetTable$$anonfun$saveAsHadoopFile$1.apply(ParquetTableOperations.scala:318) org.apache.spark.sql.parquet.InsertIntoParquetTable$$anonfun$saveAsHadoopFile$1.apply(ParquetTableOperations.scala:318) org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62) org.apache.spark.scheduler.Task.run(Task.scala:54) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) java.lang.Thread.run(Thread.java:745) Please help me. Regards, Naveen.