[ https://issues.apache.org/jira/browse/METRON-2285?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Michael Miklavcic updated METRON-2285: -------------------------------------- Fix Version/s: Next + 1 > Batch Profiler Cannot Persist Data Sketches > ------------------------------------------- > > Key: METRON-2285 > URL: https://issues.apache.org/jira/browse/METRON-2285 > Project: Metron > Issue Type: Bug > Affects Versions: 0.7.1 > Reporter: Maxim Dashenko > Assignee: Nick Allen > Priority: Major > Fix For: Next + 1 > > Time Spent: 1h > Remaining Estimate: 0h > > Used command: > {code} > /usr/hdp/current/spark2-client/bin/spark-submit --class > org.apache.metron.profiler.spark.cli.BatchProfilerCLI --properties-file > /usr/hcp/current/metron/config/batch-profiler.properties > ~/metron-profiler-spark-0.7.1.1.9.1.0-6.jar --config > /usr/hcp/current/metron/config/batch-profiler.properties --profiles > ~/profiler.json > {code} > cat /usr/hcp/current/metron/config/batch-profiler.properties > {code} > profiler.batch.input.path=/tmp/test_data.logs > profiler.batch.input.format=json > profiler.period.duration=15 > profiler.period.duration.units=MINUTES > {code} > > cat ~/profiler.json > {code} > { > "profiles":[ > { > "profile":"batchteststat", > "onlyif":"source.type == 'testsource' and devicehostname == > 'windows9.something.com'", > "foreach":"devicehostname", > "update":{ > "s":"STATS_ADD(s, devicehostname)" > }, > "result":{ > "profile":"s" > } > } > ], > "timestampField":"timestamp" > } > {code} > cat test_data.logs > {code} > {"devicehostname": "windows9.something.com", "timestamp": 1567241981000, > "source.type": "testsource"} > {code} > The command raises an exception: > {code} > Exception in thread "main" org.apache.spark.SparkException: Job aborted due > to stage failure: Task 68 in stage 8.0 failed 1 times, most recent failure: > Lost task 68.0 in stage 8.0 (TID 274, localhost, executor driver): > com.esotericsoftware.kryo.KryoException: Unable to find class: > org.apache.metron.statistics.OnlineStatisticsProvider > at > com.esotericsoftware.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:156) > at > com.esotericsoftware.kryo.util.DefaultClassResolver.readClass(DefaultClassResolver.java:133) > at com.esotericsoftware.kryo.Kryo.readClass(Kryo.java:670) > at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:781) > at > org.apache.metron.common.utils.SerDeUtils.fromBytes(SerDeUtils.java:262) > at > org.apache.metron.profiler.spark.ProfileMeasurementAdapter.toProfileMeasurement(ProfileMeasurementAdapter.java:85) > at > org.apache.metron.profiler.spark.function.HBaseWriterFunction.call(HBaseWriterFunction.java:124) > at org.apache.spark.sql.Dataset$$anonfun$48.apply(Dataset.scala:2266) > at org.apache.spark.sql.Dataset$$anonfun$48.apply(Dataset.scala:2266) > at > org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$6.apply(objects.scala:196) > at > org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$6.apply(objects.scala:193) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) > at org.apache.spark.scheduler.Task.run(Task.scala:108) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.lang.ClassNotFoundException: > org.apache.metron.statistics.OnlineStatisticsProvider > at java.net.URLClassLoader.findClass(URLClassLoader.java:381) > at java.lang.ClassLoader.loadClass(ClassLoader.java:424) > at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331) > at java.lang.ClassLoader.loadClass(ClassLoader.java:357) > at java.lang.Class.forName0(Native Method) > at java.lang.Class.forName(Class.java:348) > at > com.esotericsoftware.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:154) > ... 28 more > Driver stacktrace: > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1517) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1505) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1504) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1504) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) > at scala.Option.foreach(Option.scala:257) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1732) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1687) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1676) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2029) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2050) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2069) > at > org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:336) > at > org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:2861) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2150) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2150) > at org.apache.spark.sql.Dataset$$anonfun$55.apply(Dataset.scala:2842) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:2841) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2150) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2157) > at > org.apache.metron.profiler.spark.BatchProfiler.run(BatchProfiler.java:103) > at > org.apache.metron.profiler.spark.cli.BatchProfilerCLI.main(BatchProfilerCLI.java:95) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:782) > at > org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180) > at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:119) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > Caused by: com.esotericsoftware.kryo.KryoException: Unable to find class: > org.apache.metron.statistics.OnlineStatisticsProvider > at > com.esotericsoftware.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:156) > at > com.esotericsoftware.kryo.util.DefaultClassResolver.readClass(DefaultClassResolver.java:133) > at com.esotericsoftware.kryo.Kryo.readClass(Kryo.java:670) > at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:781) > at > org.apache.metron.common.utils.SerDeUtils.fromBytes(SerDeUtils.java:262) > at > org.apache.metron.profiler.spark.ProfileMeasurementAdapter.toProfileMeasurement(ProfileMeasurementAdapter.java:85) > at > org.apache.metron.profiler.spark.function.HBaseWriterFunction.call(HBaseWriterFunction.java:124) > at org.apache.spark.sql.Dataset$$anonfun$48.apply(Dataset.scala:2266) > at org.apache.spark.sql.Dataset$$anonfun$48.apply(Dataset.scala:2266) > at > org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$6.apply(objects.scala:196) > at > org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$6.apply(objects.scala:193) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) > at org.apache.spark.scheduler.Task.run(Task.scala:108) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.lang.ClassNotFoundException: > org.apache.metron.statistics.OnlineStatisticsProvider > at java.net.URLClassLoader.findClass(URLClassLoader.java:381) > at java.lang.ClassLoader.loadClass(ClassLoader.java:424) > at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331) > at java.lang.ClassLoader.loadClass(ClassLoader.java:357) > at java.lang.Class.forName0(Native Method) > at java.lang.Class.forName(Class.java:348) > at > com.esotericsoftware.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:154) > ... 28 more > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)