[ https://issues.apache.org/jira/browse/METRON-2285?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16969352#comment-16969352 ]
Nick Allen edited comment on METRON-2285 at 11/7/19 3:35 PM: ------------------------------------------------------------- While you are able to use data sketches (like STATS_ADD, STATS_COUNT, etc) in your profiles, you cannot persist those in HBase right now. This is a bug that needs addressed. As a work around can you just store a numeric value in HBase, instead of attempting to store the data sketch itself? For example, if you want a mean, just store that. {code:java} { "profiles": [ { "profile": "batchteststat", "onlyif": "source.type == 'testsource' and devicehostname == 'windows9.something.com'", "foreach": "devicehostname", "update": { "s": "STATS_ADD(s, devicehostname)" }, "result": { "profile": "STATS_MEAN(s)" } } ], "timestampField": "timestamp" } {code} was (Author: nickwallen): While you are able to use data sketches (like STATS_ADD, STATS_COUNT, etc) in your profiles, you cannot persist those in HBase right now. This is a bug that needs addressed. As a work around can you just store a numeric value in HBase, instead of attempting to store the data sketch itself? For example, if you want a mean, just store that. { "profiles":[ { "profile":"batchteststat", "onlyif":"source.type == 'testsource' and devicehostname == 'windows9.something.com'", "foreach":"devicehostname", "update":{ "s":"STATS_ADD(s, devicehostname)" }, "result":{ "profile":"STATS_MEAN(s)" } } ], "timestampField":"timestamp"} > Metron Profiler for Spark - Stellar function STATS_ADD can't be used > -------------------------------------------------------------------- > > Key: METRON-2285 > URL: https://issues.apache.org/jira/browse/METRON-2285 > Project: Metron > Issue Type: Bug > Affects Versions: 0.7.1 > Reporter: Maxim Dashenko > Priority: Major > > Used command: > {code} > /usr/hdp/current/spark2-client/bin/spark-submit --class > org.apache.metron.profiler.spark.cli.BatchProfilerCLI --properties-file > /usr/hcp/current/metron/config/batch-profiler.properties > ~/metron-profiler-spark-0.7.1.1.9.1.0-6.jar --config > /usr/hcp/current/metron/config/batch-profiler.properties --profiles > ~/profiler.json > {code} > cat /usr/hcp/current/metron/config/batch-profiler.properties > {code} > profiler.batch.input.path=/tmp/test_data.logs > profiler.batch.input.format=json > profiler.period.duration=15 > profiler.period.duration.units=MINUTES > {code} > > cat ~/profiler.json > {code} > { > "profiles":[ > { > "profile":"batchteststat", > "onlyif":"source.type == 'testsource' and devicehostname == > 'windows9.something.com'", > "foreach":"devicehostname", > "update":{ > "s":"STATS_ADD(s, devicehostname)" > }, > "result":{ > "profile":"s" > } > } > ], > "timestampField":"timestamp" > } > {code} > cat test_data.logs > {code} > {"devicehostname": "windows9.something.com", "timestamp": 1567241981000, > "source.type": "testsource"} > {code} > The command raises an exception: > {code} > Exception in thread "main" org.apache.spark.SparkException: Job aborted due > to stage failure: Task 68 in stage 8.0 failed 1 times, most recent failure: > Lost task 68.0 in stage 8.0 (TID 274, localhost, executor driver): > com.esotericsoftware.kryo.KryoException: Unable to find class: > org.apache.metron.statistics.OnlineStatisticsProvider > at > com.esotericsoftware.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:156) > at > com.esotericsoftware.kryo.util.DefaultClassResolver.readClass(DefaultClassResolver.java:133) > at com.esotericsoftware.kryo.Kryo.readClass(Kryo.java:670) > at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:781) > at > org.apache.metron.common.utils.SerDeUtils.fromBytes(SerDeUtils.java:262) > at > org.apache.metron.profiler.spark.ProfileMeasurementAdapter.toProfileMeasurement(ProfileMeasurementAdapter.java:85) > at > org.apache.metron.profiler.spark.function.HBaseWriterFunction.call(HBaseWriterFunction.java:124) > at org.apache.spark.sql.Dataset$$anonfun$48.apply(Dataset.scala:2266) > at org.apache.spark.sql.Dataset$$anonfun$48.apply(Dataset.scala:2266) > at > org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$6.apply(objects.scala:196) > at > org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$6.apply(objects.scala:193) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) > at org.apache.spark.scheduler.Task.run(Task.scala:108) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.lang.ClassNotFoundException: > org.apache.metron.statistics.OnlineStatisticsProvider > at java.net.URLClassLoader.findClass(URLClassLoader.java:381) > at java.lang.ClassLoader.loadClass(ClassLoader.java:424) > at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331) > at java.lang.ClassLoader.loadClass(ClassLoader.java:357) > at java.lang.Class.forName0(Native Method) > at java.lang.Class.forName(Class.java:348) > at > com.esotericsoftware.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:154) > ... 28 more > Driver stacktrace: > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1517) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1505) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1504) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1504) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) > at scala.Option.foreach(Option.scala:257) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1732) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1687) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1676) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2029) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2050) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2069) > at > org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:336) > at > org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38) > at > org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:2861) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2150) > at > org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2150) > at org.apache.spark.sql.Dataset$$anonfun$55.apply(Dataset.scala:2842) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:2841) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2150) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2157) > at > org.apache.metron.profiler.spark.BatchProfiler.run(BatchProfiler.java:103) > at > org.apache.metron.profiler.spark.cli.BatchProfilerCLI.main(BatchProfilerCLI.java:95) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:782) > at > org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180) > at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:119) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > Caused by: com.esotericsoftware.kryo.KryoException: Unable to find class: > org.apache.metron.statistics.OnlineStatisticsProvider > at > com.esotericsoftware.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:156) > at > com.esotericsoftware.kryo.util.DefaultClassResolver.readClass(DefaultClassResolver.java:133) > at com.esotericsoftware.kryo.Kryo.readClass(Kryo.java:670) > at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:781) > at > org.apache.metron.common.utils.SerDeUtils.fromBytes(SerDeUtils.java:262) > at > org.apache.metron.profiler.spark.ProfileMeasurementAdapter.toProfileMeasurement(ProfileMeasurementAdapter.java:85) > at > org.apache.metron.profiler.spark.function.HBaseWriterFunction.call(HBaseWriterFunction.java:124) > at org.apache.spark.sql.Dataset$$anonfun$48.apply(Dataset.scala:2266) > at org.apache.spark.sql.Dataset$$anonfun$48.apply(Dataset.scala:2266) > at > org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$6.apply(objects.scala:196) > at > org.apache.spark.sql.execution.MapPartitionsExec$$anonfun$6.apply(objects.scala:193) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:287) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) > at org.apache.spark.scheduler.Task.run(Task.scala:108) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.lang.ClassNotFoundException: > org.apache.metron.statistics.OnlineStatisticsProvider > at java.net.URLClassLoader.findClass(URLClassLoader.java:381) > at java.lang.ClassLoader.loadClass(ClassLoader.java:424) > at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331) > at java.lang.ClassLoader.loadClass(ClassLoader.java:357) > at java.lang.Class.forName0(Native Method) > at java.lang.Class.forName(Class.java:348) > at > com.esotericsoftware.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:154) > ... 28 more > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)