Hi All, We have a Datastax/cassandra cluster and I am trying to see if I can get Carbondata working there.
Below are the steps that I tried in spark shell. scala> import com.datastax.spark.connector._ scala> import org.apache.spark.sql.SaveMode scala> import org.apache.spark.sql.CarbonContext scala> import org.apache.spark.sql.types._ scala> val cc = new CarbonContext(sc, "cfs://127.0.0.1/opt/CarbonStore") scala> val df = cc.read.parquet("file:///home/cassandra/testdata-30day/cassandra/zone.parquet") scala> df.write.format("carbondata").option("tableName", "zone").option("compress", "true").option("TempCSV","false").mode(SaveMode.Overwrite).save() Below exception is thrown and it fails to create carbondata table. Full stack trace is attached. Appreciate if someone can give any pointers on where to look. ============================================== java.io.FileNotFoundException: /opt/CarbonStore/default/zone/Metadata/schema (No such file or directory) at java.io.FileOutputStream.open0(Native Method) at java.io.FileOutputStream.open(FileOutputStream.java:270) at java.io.FileOutputStream.<init>(FileOutputStream.java:213) at java.io.FileOutputStream.<init>(FileOutputStream.java:133) at org.apache.carbondata.core.datastore.impl.FileFactory.getDataOutputStream(FileFactory.java:207) at org.apache.carbondata.core.writer.ThriftWriter.open(ThriftWriter.java:84) at org.apache.spark.sql.hive.CarbonMetastore.createTableFromThrift(CarbonMetastore.scala:293) at org.apache.spark.sql.execution.command.CreateTable.run(carbonTableSchema.scala:163) at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:58) at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:56) at org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:70) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:130) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:130) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:55) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:55) at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:145) at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:130) at org.apache.spark.sql.CarbonContext.sql(CarbonContext.scala:139) at org.apache.carbondata.spark.CarbonDataFrameWriter.saveAsCarbonFile(CarbonDataFrameWriter.scala:39) at org.apache.spark.sql.CarbonSource.createRelation(CarbonDatasourceRelation.scala:109) at org.apache.spark.sql.execution.datasources.ResolvedDataSource$.apply(ResolvedDataSource.scala:222) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:148) ============================================== Thanks, Sanoj
cassandra@sanoj-OptiPlex-990:~/single-carbon/dse-5.0.4$ ./bin/dse spark SLF4J: Class path contains multiple SLF4J bindings. SLF4J: Found binding in [jar:file:/home/cassandra/single-carbon/dse-5.0.4/lib/carbondata_2.10-1.1.0-incubating-SNAPSHOT-shade-hadoop2.2.0.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: Found binding in [jar:file:/home/cassandra/single-carbon/dse-5.0.4/resources/cassandra/lib/logback-classic-1.1.3.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory] 17/04/04 16:17:31 INFO deploy.DseSparkSubmitBootstrapper: DSE Spark 17/04/04 16:17:32 WARN core.NettyUtil: Found Netty's native epoll transport in the classpath, but epoll is not available. Using NIO instead. 17/04/04 16:17:33 INFO core.Cluster: New Cassandra host /127.0.0.1:9042 added 17/04/04 16:17:33 INFO cql.CassandraConnector: Connected to Cassandra cluster: Test Cluster 17/04/04 16:17:33 INFO deploy.SparkNodeConfiguration: Trying to setup a server socket at /10.33.31.29:34923 to verify connectivity with DSE node... 17/04/04 16:17:33 INFO deploy.SparkNodeConfiguration: Successfully verified DSE Node -> this application connectivity on random port (34923) 17/04/04 16:17:33 INFO deploy.DseSparkSubmitBootstrapper: Starting Spark driver using SparkSubmit Welcome to ____ __ / __/__ ___ _____/ /__ _\ \/ _ \/ _ `/ __/ '_/ /___/ .__/\_,_/_/ /_/\_\ version 1.6.2 /_/ Using Scala version 2.10.5 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_111) Type in expressions to have them evaluated. Type :help for more information. Initializing SparkContext with MASTER: spark://127.0.0.1:7077 17/04/04 16:17:36 INFO spark.SparkContext: Running Spark version 1.6.2 17/04/04 16:17:36 INFO spark.SecurityManager: Changing view acls to: cassandra 17/04/04 16:17:36 INFO spark.SecurityManager: Changing modify acls to: cassandra 17/04/04 16:17:36 INFO spark.SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(cassandra); users with modify permissions: Set(cassandra) 17/04/04 16:17:37 INFO util.Utils: Successfully started service 'sparkDriver' on port 34668. 17/04/04 16:17:37 INFO slf4j.Slf4jLogger: Slf4jLogger started 17/04/04 16:17:37 INFO Remoting: Starting remoting 17/04/04 16:17:37 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkDriverActorSystem@10.33.31.29:39739] 17/04/04 16:17:37 INFO util.Utils: Successfully started service 'sparkDriverActorSystem' on port 39739. 17/04/04 16:17:37 INFO spark.SparkEnv: Registering MapOutputTracker 17/04/04 16:17:37 INFO spark.SparkEnv: Registering BlockManagerMaster 17/04/04 16:17:37 INFO storage.DiskBlockManager: Created local directory at /home/cassandra/single-carbon/spark/rdd/blockmgr-308a5366-7d6f-480b-b0c3-51070ab14a3a 17/04/04 16:17:37 INFO storage.MemoryStore: MemoryStore started with capacity 511.1 MB 17/04/04 16:17:37 INFO spark.SparkEnv: Registering OutputCommitCoordinator 17/04/04 16:17:38 INFO server.Server: jetty-8.y.z-SNAPSHOT 17/04/04 16:17:38 INFO server.AbstractConnector: Started SelectChannelConnector@0.0.0.0:4040 17/04/04 16:17:38 INFO util.Utils: Successfully started service 'SparkUI' on port 4040. 17/04/04 16:17:38 INFO ui.SparkUI: Started SparkUI at http://10.33.31.29:4040 17/04/04 16:17:38 INFO client.AppClient$ClientEndpoint: Connecting to master spark://127.0.0.1:7077... 17/04/04 16:17:38 INFO cluster.SparkDeploySchedulerBackend: Connected to Spark cluster with app ID app-20170404161738-0002 17/04/04 16:17:38 INFO client.AppClient$ClientEndpoint: Executor added: app-20170404161738-0002/0 on worker-20170404145724-127.0.0.1-35677 (127.0.0.1:35677) with 3 cores 17/04/04 16:17:38 INFO cluster.SparkDeploySchedulerBackend: Granted executor ID app-20170404161738-0002/0 on hostPort 127.0.0.1:35677 with 3 cores, 1024.0 MB RAM 17/04/04 16:17:38 INFO util.Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 42058. 17/04/04 16:17:38 INFO netty.NettyBlockTransferService: Server created on 42058 17/04/04 16:17:38 INFO storage.BlockManagerMaster: Trying to register BlockManager 17/04/04 16:17:38 INFO storage.BlockManagerMasterEndpoint: Registering block manager 10.33.31.29:42058 with 511.1 MB RAM, BlockManagerId(driver, 10.33.31.29, 42058) 17/04/04 16:17:38 INFO client.AppClient$ClientEndpoint: Executor updated: app-20170404161738-0002/0 is now RUNNING 17/04/04 16:17:38 INFO storage.BlockManagerMaster: Registered BlockManager 17/04/04 16:17:38 INFO spark.SparkContext: Registered listener com.datastax.bdp.spark.reporting.DseSparkListener 17/04/04 16:17:38 INFO cluster.SparkDeploySchedulerBackend: SchedulerBackend is ready for scheduling beginning after reached minRegisteredResourcesRatio: 0.0 Created spark context.. Spark context available as sc. Hive context available as sqlContext. Will be initialized on first use. scala> 17/04/04 16:17:40 INFO cql.CassandraConnector: Disconnected from Cassandra cluster: Test Cluster 17/04/04 16:17:41 INFO cluster.SparkDeploySchedulerBackend: Registered executor NettyRpcEndpointRef(null) (10.33.31.29:34220) with ID 0 17/04/04 16:17:41 INFO storage.BlockManagerMasterEndpoint: Registering block manager 10.33.31.29:34941 with 511.1 MB RAM, BlockManagerId(0, 10.33.31.29, 34941) scala> import com.datastax.spark.connector._ import com.datastax.spark.connector._ scala> import org.apache.spark.sql.SaveMode import org.apache.spark.sql.SaveMode scala> import org.apache.spark.sql.CarbonContext import org.apache.spark.sql.CarbonContext scala> import org.apache.spark.sql.types. | _ import org.apache.spark.sql.types._ scala> val cc = new CarbonContext(sc, "cfs://127.0.0.1/opt/CarbonStore") 17/04/04 16:18:45 INFO sql.CarbonContext: Initializing execution hive, version 1.2.1 17/04/04 16:18:45 INFO client.ClientWrapper: Inspected Hadoop version: 2.7.1.3 17/04/04 16:18:45 INFO client.ClientWrapper: Loaded org.apache.hadoop.hive.shims.Hadoop23Shims for Hadoop version 2.7.1.3 17/04/04 16:18:45 INFO Configuration.deprecation: fs.default.name is deprecated. Instead, use fs.defaultFS 17/04/04 16:18:45 INFO metastore.HiveMetaStore: 0: Opening raw store with implemenation class:org.apache.hadoop.hive.metastore.ObjectStore 17/04/04 16:18:45 INFO metastore.ObjectStore: ObjectStore, initialize called 17/04/04 16:18:45 INFO DataNucleus.Persistence: Property hive.metastore.integral.jdo.pushdown unknown - will be ignored 17/04/04 16:18:45 INFO DataNucleus.Persistence: Property datanucleus.cache.level2 unknown - will be ignored 17/04/04 16:18:46 INFO derby.LogbackBridge: ---------------------------------------------------------------- 17/04/04 16:18:46 INFO derby.LogbackBridge: Tue Apr 04 16:18:46 GST 2017: 17/04/04 16:18:46 INFO derby.LogbackBridge: Booting Derby version The Apache Software Foundation - Apache Derby - 10.10.2.0 - (1582446): instance a816c00e-015b-38e7-2d67-0000253b4218 17/04/04 16:18:46 INFO derby.LogbackBridge: on database directory /tmp/spark-d5036d7e-a7b1-4b6a-96e8-e0cb97f66333/metastore with class loader sun.misc.Launcher$AppClassLoader@6d6f6e28 17/04/04 16:18:46 INFO derby.LogbackBridge: Loaded from file:/home/cassandra/single-carbon/dse-5.0.4/resources/spark/lib/derby-10.10.2.0.jar 17/04/04 16:18:46 INFO derby.LogbackBridge: java.vendor=Oracle Corporation 17/04/04 16:18:46 INFO derby.LogbackBridge: java.runtime.version=1.8.0_111-b14 17/04/04 16:18:46 INFO derby.LogbackBridge: user.dir=/home/cassandra/single-carbon/dse-5.0.4 17/04/04 16:18:46 INFO derby.LogbackBridge: os.name=Linux 17/04/04 16:18:46 INFO derby.LogbackBridge: os.arch=amd64 17/04/04 16:18:46 INFO derby.LogbackBridge: os.version=4.4.0-71-generic 17/04/04 16:18:46 INFO derby.LogbackBridge: derby.system.home=null 17/04/04 16:18:46 INFO derby.LogbackBridge: derby.stream.error.method=com.datastax.bdp.derby.LogbackBridge.getLogger 17/04/04 16:18:46 INFO derby.LogbackBridge: Database Class Loader started - derby.database.classpath='' 17/04/04 16:18:53 INFO metastore.ObjectStore: Setting MetaStore object pin classes with hive.metastore.cache.pinobjtypes="Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order" 17/04/04 16:18:54 INFO DataNucleus.Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table. 17/04/04 16:18:54 INFO DataNucleus.Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table. 17/04/04 16:19:00 INFO DataNucleus.Datastore: The class "org.apache.hadoop.hive.metastore.model.MFieldSchema" is tagged as "embedded-only" so does not have its own datastore table. 17/04/04 16:19:00 INFO DataNucleus.Datastore: The class "org.apache.hadoop.hive.metastore.model.MOrder" is tagged as "embedded-only" so does not have its own datastore table. 17/04/04 16:19:01 INFO metastore.MetaStoreDirectSql: Using direct SQL, underlying DB is DERBY 17/04/04 16:19:01 INFO metastore.ObjectStore: Initialized ObjectStore 17/04/04 16:19:02 WARN metastore.ObjectStore: Version information not found in metastore. hive.metastore.schema.verification is not enabled so recording the schema version 1.2.0 17/04/04 16:19:02 WARN metastore.ObjectStore: Failed to get database default, returning NoSuchObjectException 17/04/04 16:19:03 INFO metastore.HiveMetaStore: Added admin role in metastore 17/04/04 16:19:03 INFO metastore.HiveMetaStore: Added public role in metastore 17/04/04 16:19:03 INFO metastore.HiveMetaStore: No user is added in admin role, since config is empty 17/04/04 16:19:04 INFO metastore.HiveMetaStore: 0: get_all_databases 17/04/04 16:19:04 INFO HiveMetaStore.audit: ugi=cassandra ip=unknown-ip-addr cmd=get_all_databases 17/04/04 16:19:04 INFO metastore.HiveMetaStore: 0: get_functions: db=default pat=* 17/04/04 16:19:04 INFO HiveMetaStore.audit: ugi=cassandra ip=unknown-ip-addr cmd=get_functions: db=default pat=* 17/04/04 16:19:04 INFO DataNucleus.Datastore: The class "org.apache.hadoop.hive.metastore.model.MResourceUri" is tagged as "embedded-only" so does not have its own datastore table. 17/04/04 16:19:05 INFO session.SessionState: Created local directory: /tmp/a27fa796-1be6-4bd3-a6dd-fccb7ee6667a_resources 17/04/04 16:19:05 INFO session.SessionState: Created HDFS directory: /tmp/hive/cassandra/a27fa796-1be6-4bd3-a6dd-fccb7ee6667a 17/04/04 16:19:05 INFO session.SessionState: Created local directory: /tmp/cassandra/a27fa796-1be6-4bd3-a6dd-fccb7ee6667a 17/04/04 16:19:05 INFO session.SessionState: Created HDFS directory: /tmp/hive/cassandra/a27fa796-1be6-4bd3-a6dd-fccb7ee6667a/_tmp_space.db 17/04/04 16:19:05 INFO sql.CarbonContext: default warehouse location is cfs:///user/spark/warehouse 17/04/04 16:19:05 INFO sql.CarbonContext: Initializing HiveMetastoreConnection version 1.2.1 using Spark classes. 17/04/04 16:19:05 INFO client.ClientWrapper: Inspected Hadoop version: 2.7.1.3 17/04/04 16:19:05 INFO client.ClientWrapper: Loaded org.apache.hadoop.hive.shims.Hadoop23Shims for Hadoop version 2.7.1.3 17/04/04 16:19:05 INFO metastore.HiveMetaStore: 0: Opening raw store with implemenation class:com.datastax.bdp.hadoop.hive.metastore.CassandraHiveMetaStore 17/04/04 16:19:08 INFO metastore.HiveMetaStore: Added admin role in metastore 17/04/04 16:19:08 INFO metastore.HiveMetaStore: Added public role in metastore 17/04/04 16:19:08 INFO metastore.HiveMetaStore: No user is added in admin role, since config is empty 17/04/04 16:19:08 INFO metastore.HiveMetaStore: 0: get_all_databases 17/04/04 16:19:08 INFO HiveMetaStore.audit: ugi=cassandra ip=unknown-ip-addr cmd=get_all_databases 17/04/04 16:19:08 INFO metastore.SchemaManagerService: Updating Cassandra Keyspace to Metastore Database Mapping 17/04/04 16:19:08 INFO metastore.SchemaManagerService: Refresh cluster meta data 17/04/04 16:19:08 WARN core.NettyUtil: Found Netty's native epoll transport in the classpath, but epoll is not available. Using NIO instead. 17/04/04 16:19:09 INFO policies.DCAwareRoundRobinPolicy: Using data-center name 'Analytics' for DCAwareRoundRobinPolicy (if this is incorrect, please provide the correct datacenter name with DCAwareRoundRobinPolicy constructor) 17/04/04 16:19:09 INFO core.Cluster: New Cassandra host /127.0.0.1:9042 added 17/04/04 16:19:09 INFO metastore.HiveMetaStore: 0: get_functions: db=default pat=* 17/04/04 16:19:09 INFO HiveMetaStore.audit: ugi=cassandra ip=unknown-ip-addr cmd=get_functions: db=default pat=* 17/04/04 16:19:09 INFO metastore.CassandraHiveMetaStore: in getFunctions with dbName: default and functionNamePattern: * 17/04/04 16:19:09 INFO cfs.CassandraFileSystem: CassandraFileSystem.uri : cfs://127.0.0.1/ 17/04/04 16:19:09 INFO cfs.CassandraFileSystem: Default block size: 33554432 17/04/04 16:19:09 INFO Configuration.deprecation: fs.default.name is deprecated. Instead, use fs.defaultFS 17/04/04 16:19:09 INFO cfs.CassandraFileSystemThriftStore: Consistency level for reads from cfs: LOCAL_QUORUM 17/04/04 16:19:09 INFO cfs.CassandraFileSystemThriftStore: Consistency level for writes into cfs: LOCAL_QUORUM 17/04/04 16:19:09 INFO Configuration.deprecation: dfs.permissions is deprecated. Instead, use dfs.permissions.enabled 17/04/04 16:19:09 INFO session.SessionState: Created local directory: /tmp/d1cea672-56e9-4ee3-bb5e-9a52f79e4e2b_resources 17/04/04 16:19:09 INFO session.SessionState: Created HDFS directory: /tmp/hive/cassandra/d1cea672-56e9-4ee3-bb5e-9a52f79e4e2b 17/04/04 16:19:09 INFO session.SessionState: Created local directory: /tmp/cassandra/d1cea672-56e9-4ee3-bb5e-9a52f79e4e2b 17/04/04 16:19:09 INFO session.SessionState: Created HDFS directory: /tmp/hive/cassandra/d1cea672-56e9-4ee3-bb5e-9a52f79e4e2b/_tmp_space.db 17/04/04 16:19:11 INFO util.CarbonProperties: main Property file path: /home/cassandra/single-carbon/dse-5.0.4/../../../conf/carbon.properties 17/04/04 16:19:11 INFO util.CarbonProperties: main ------Using Carbon.properties -------- 17/04/04 16:19:11 INFO util.CarbonProperties: main {} 17/04/04 16:19:11 INFO util.CarbonProperties: main Carbon Current data file version: V3 17/04/04 16:19:11 INFO util.CarbonProperties: main Executor start up wait time: 5 17/04/04 16:19:11 INFO util.CarbonProperties: main Blocklet Size Configured value is "64 cc: org.apache.spark.sql.CarbonContext = org.apache.spark.sql.CarbonContext@71fc80df scala> val df = cc.read.parquet("file:///home/cassandra/testdata-30day/cassandra/zone.parquet") 17/04/04 16:19:17 INFO parquet.ParquetRelation: Listing file:/home/cassandra/testdata-30day/cassandra/zone.parquet on driver 17/04/04 16:19:17 INFO spark.SparkContext: Starting job: parquet at <console>:70 17/04/04 16:19:17 INFO scheduler.DAGScheduler: Got job 0 (parquet at <console>:70) with 3 output partitions 17/04/04 16:19:17 INFO scheduler.DAGScheduler: Final stage: ResultStage 0 (parquet at <console>:70) 17/04/04 16:19:17 INFO scheduler.DAGScheduler: Parents of final stage: List() 17/04/04 16:19:17 INFO scheduler.DAGScheduler: Missing parents: List() 17/04/04 16:19:17 INFO scheduler.DAGScheduler: Submitting ResultStage 0 (MapPartitionsRDD[1] at parquet at <console>:70), which has no missing parents 17/04/04 16:19:17 INFO storage.MemoryStore: Block broadcast_0 stored as values in memory (estimated size 70.6 KB, free 70.6 KB) 17/04/04 16:19:17 INFO storage.MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 24.4 KB, free 95.0 KB) 17/04/04 16:19:17 INFO storage.BlockManagerInfo: Added broadcast_0_piece0 in memory on 10.33.31.29:42058 (size: 24.4 KB, free: 511.1 MB) 17/04/04 16:19:17 INFO spark.SparkContext: Created broadcast 0 from broadcast at DAGScheduler.scala:1006 17/04/04 16:19:17 INFO scheduler.DAGScheduler: Submitting 3 missing tasks from ResultStage 0 (MapPartitionsRDD[1] at parquet at <console>:70) 17/04/04 16:19:17 INFO scheduler.TaskSchedulerImpl: Adding task set 0.0 with 3 tasks 17/04/04 16:19:17 INFO scheduler.TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, 10.33.31.29, partition 0,PROCESS_LOCAL, 2076 bytes) 17/04/04 16:19:17 INFO scheduler.TaskSetManager: Starting task 1.0 in stage 0.0 (TID 1, 10.33.31.29, partition 1,PROCESS_LOCAL, 2076 bytes) 17/04/04 16:19:17 INFO scheduler.TaskSetManager: Starting task 2.0 in stage 0.0 (TID 2, 10.33.31.29, partition 2,PROCESS_LOCAL, 2217 bytes) 17/04/04 16:19:17 INFO storage.BlockManagerInfo: Added broadcast_0_piece0 in memory on 10.33.31.29:34941 (size: 24.4 KB, free: 511.1 MB) 17/04/04 16:19:18 INFO scheduler.TaskSetManager: Finished task 1.0 in stage 0.0 (TID 1) in 481 ms on 10.33.31.29 (1/3) 17/04/04 16:19:18 INFO scheduler.TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 511 ms on 10.33.31.29 (2/3) 17/04/04 16:19:18 INFO scheduler.TaskSetManager: Finished task 2.0 in stage 0.0 (TID 2) in 1212 ms on 10.33.31.29 (3/3) 17/04/04 16:19:18 INFO scheduler.DAGScheduler: ResultStage 0 (parquet at <console>:70) finished in 1.232 s 17/04/04 16:19:18 INFO scheduler.TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool 17/04/04 16:19:18 INFO scheduler.DAGScheduler: Job 0 finished: parquet at <console>:70, took 1.361201 s df: org.apache.spark.sql.DataFrame = [Zone: int, Country: int, Description: string, WeeklyOff1: int, WeeklyOff2: int, TimeOffset: int] scala> df.write.format("carbondata").option("tableName", "zone").option("compress", "true").option("TempCSV","false").mode(SaveMode.Overwrite).save() 17/04/04 16:19:30 INFO storage.BlockManagerInfo: Removed broadcast_0_piece0 on 10.33.31.29:42058 in memory (size: 24.4 KB, free: 511.1 MB) 17/04/04 16:19:30 INFO storage.BlockManagerInfo: Removed broadcast_0_piece0 on 10.33.31.29:34941 in memory (size: 24.4 KB, free: 511.1 MB) 17/04/04 16:19:30 INFO sql.CarbonContext$: main Query [ CREATE TABLE IF NOT EXISTS DEFAULT.ZONE (ZONE INT, COUNTRY INT, DESCRIPTION STRING, WEEKLYOFF1 INT, WEEKLYOFF2 INT, TIMEOFFSET INT) STORED BY 'ORG.APACHE.CARBONDATA.FORMAT' ] 17/04/04 16:19:30 INFO spark.ContextCleaner: Cleaned accumulator 1 17/04/04 16:19:30 INFO parse.ParseDriver: Parsing command: CREATE TABLE IF NOT EXISTS default.zone (Zone INT, Country INT, Description STRING, WeeklyOff1 INT, WeeklyOff2 INT, TimeOffset INT) STORED BY 'org.apache.carbondata.format' 17/04/04 16:19:31 INFO parse.ParseDriver: Parse Completed 17/04/04 16:19:31 AUDIT command.CreateTable: [sanoj-OptiPlex-990][cassandra][Thread-1]Creating Table with Database name [default] and Table name [zone] 17/04/04 16:19:31 INFO metastore.HiveMetaStore: 0: get_tables: db=default pat=.* 17/04/04 16:19:31 INFO HiveMetaStore.audit: ugi=cassandra ip=unknown-ip-addr cmd=get_tables: db=default pat=.* 17/04/04 16:19:31 INFO metastore.CassandraHiveMetaStore: in getTables with dbName: default and tableNamePattern: .* 17/04/04 16:19:31 INFO metastore.SchemaManagerService: Refresh cluster meta data 17/04/04 16:19:31 INFO policies.DCAwareRoundRobinPolicy: Using data-center name 'Analytics' for DCAwareRoundRobinPolicy (if this is incorrect, please provide the correct datacenter name with DCAwareRoundRobinPolicy constructor) 17/04/04 16:19:31 INFO core.Cluster: New Cassandra host /127.0.0.1:9042 added 17/04/04 16:19:31 INFO metastore.SchemaManagerService: Create mapping in hive db: default, for unmapped tables from keyspace: null 17/04/04 16:19:31 INFO table.CarbonTable: main Table block size not specified for default_zone. Therefore considering the default value 1024 MB java.io.FileNotFoundException: /opt/CarbonStore/default/zone/Metadata/schema (No such file or directory) at java.io.FileOutputStream.open0(Native Method) at java.io.FileOutputStream.open(FileOutputStream.java:270) at java.io.FileOutputStream.<init>(FileOutputStream.java:213) at java.io.FileOutputStream.<init>(FileOutputStream.java:133) at org.apache.carbondata.core.datastore.impl.FileFactory.getDataOutputStream(FileFactory.java:207) at org.apache.carbondata.core.writer.ThriftWriter.open(ThriftWriter.java:84) at org.apache.spark.sql.hive.CarbonMetastore.createTableFromThrift(CarbonMetastore.scala:293) at org.apache.spark.sql.execution.command.CreateTable.run(carbonTableSchema.scala:163) at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:58) at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:56) at org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:70) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:132) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:130) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:130) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:55) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:55) at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:145) at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:130) at org.apache.spark.sql.CarbonContext.sql(CarbonContext.scala:139) at org.apache.carbondata.spark.CarbonDataFrameWriter.saveAsCarbonFile(CarbonDataFrameWriter.scala:39) at org.apache.spark.sql.CarbonSource.createRelation(CarbonDatasourceRelation.scala:109) at org.apache.spark.sql.execution.datasources.ResolvedDataSource$.apply(ResolvedDataSource.scala:222) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:148) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:73) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:78) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:80) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:82) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:84) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:86) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:88) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:90) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:92) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:94) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:96) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:98) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:100) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:102) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:104) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:106) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:108) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:110) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:112) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:114) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:116) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:118) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:120) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:122) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:124) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:126) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:128) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:130) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:132) at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:134) at $iwC$$iwC$$iwC$$iwC.<init>(<console>:136) at $iwC$$iwC$$iwC.<init>(<console>:138) at $iwC$$iwC.<init>(<console>:140) at $iwC.<init>(<console>:142) at <init>(<console>:144) at .<init>(<console>:148) at .<clinit>(<console>) at .<init>(<console>:7) at .<clinit>(<console>) at $print(<console>) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1045) at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1326) at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:821) at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:852) at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:800) at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857) at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902) at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814) at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657) at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665) at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945) at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1064) at com.datastax.bdp.spark.SparkReplMain$.main(SparkReplMain.scala:16) at com.datastax.bdp.spark.SparkReplMain.main(SparkReplMain.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) at org.apache.spark.deploy.DseSparkSubmitBootstrapper$.main(DseSparkSubmitBootstrapper.scala:48) at org.apache.spark.deploy.DseSparkSubmitBootstrapper.main(DseSparkSubmitBootstrapper.scala) scala> sc.setLogLevel("DEBUG")