[ https://issues.apache.org/jira/browse/METRON-690?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15877024#comment-15877024 ]
ASF GitHub Bot commented on METRON-690: --------------------------------------- Github user cestella commented on a diff in the pull request: https://github.com/apache/incubator-metron/pull/450#discussion_r102348247 --- Diff: metron-analytics/metron-profiler-client/src/main/java/org/apache/metron/profiler/client/window/WindowProcessor.java --- @@ -0,0 +1,321 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.metron.profiler.client.window; + +import org.antlr.v4.runtime.*; +import org.antlr.v4.runtime.tree.ParseTree; +import org.apache.metron.common.dsl.ErrorListener; +import org.apache.metron.common.dsl.GrammarUtils; +import org.apache.metron.common.dsl.ParseException; +import org.apache.metron.common.dsl.Token; +import org.apache.metron.common.utils.ConversionUtils; +import org.apache.metron.profiler.client.window.generated.WindowBaseListener; +import org.apache.metron.profiler.client.window.generated.WindowLexer; +import org.apache.metron.profiler.client.window.generated.WindowParser; +import org.apache.metron.profiler.client.window.predicates.DayPredicates; + +import java.util.*; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import java.util.function.Predicate; + +import static org.apache.commons.lang3.StringUtils.isEmpty; + +public class WindowProcessor extends WindowBaseListener { + private Throwable throwable; + private Deque<Token<?>> stack; + private static final Token<Object> LIST_MARKER = new Token<>(null, Object.class); + private static final Token<Object> DAY_SPECIFIER_MARKER = new Token<>(null, Object.class); + private Window window; + + public WindowProcessor() { + this.stack = new ArrayDeque<>(); + this.window = new Window(); + } + + public Window getWindow() { + return window; + } + + private void enterList() { + stack.push(LIST_MARKER); + } + + private List<Function<Long, Predicate<Long>>> getPredicates() { + LinkedList<Function<Long, Predicate<Long>>> predicates = new LinkedList<>(); + while (true) { + Token<?> token = stack.pop(); + if (token == LIST_MARKER) { + break; + } else { + predicates.addFirst((Function<Long, Predicate<Long>>) token.getValue()); + } + } + return predicates; + } + + @Override + public void exitIdentifier(WindowParser.IdentifierContext ctx) { + if(checkForException(ctx)) { + return; + } + stack.push(new Token<>(ctx.getText().substring(1), String.class)); + } + + @Override + public void enterSpecifier(WindowParser.SpecifierContext ctx) { + if(checkForException(ctx)) { + return; + } + stack.push(DAY_SPECIFIER_MARKER); + } + + @Override + public void exitSpecifier(WindowParser.SpecifierContext ctx) { + LinkedList<String> args = new LinkedList<>(); + + while (true) { + Token<?> token = stack.pop(); + if (token == DAY_SPECIFIER_MARKER) { + break; + } else { + args.addFirst((String) token.getValue()); + } + } + String specifier = args.removeFirst(); + List<String> arg = args.size() > 0?args:new ArrayList<>(); + Function<Long, Predicate<Long>> predicate = null; + try { + if (specifier.equals("THIS DAY OF THE WEEK") || specifier.equals("THIS DAY OF WEEK")) { + predicate = now -> DayPredicates.dayOfWeekPredicate(DayPredicates.getDayOfWeek(now)); + } else { + final Predicate<Long> dayOfWeekPredicate = DayPredicates.create(specifier, arg); + predicate = now -> dayOfWeekPredicate; + } + stack.push(new Token<>(predicate, Function.class)); + } + catch(Throwable t) { + throwable = t; + } + } + + @Override + public void exitDay_specifier(WindowParser.Day_specifierContext ctx) { + if(checkForException(ctx)) { + return; + } + String specifier = ctx.getText().toUpperCase(); + if(specifier.length() == 0 && ctx.exception != null){ + IllegalStateException ise = new IllegalStateException("Invalid day specifier: " + ctx.getStart().getText(), ctx.exception); + throwable = ise; + throw ise; + } + if(specifier.endsWith("S")) { + specifier = specifier.substring(0, specifier.length() - 1); + } + stack.push(new Token<>(specifier, String.class)); + } + + @Override + public void enterExcluding_specifier(WindowParser.Excluding_specifierContext ctx) { + if(checkForException(ctx)) { + return; + } + enterList(); + } + + @Override + public void exitExcluding_specifier(WindowParser.Excluding_specifierContext ctx) { + if(checkForException(ctx)) { + return; + } + window.setExcludes(getPredicates()); + } + + @Override + public void enterIncluding_specifier(WindowParser.Including_specifierContext ctx) { + if(checkForException(ctx)) { + return; + } + enterList(); + } + + @Override + public void exitIncluding_specifier(WindowParser.Including_specifierContext ctx) { + if(checkForException(ctx)) { + return; + } + window.setIncludes(getPredicates()); + } + + private void setFromTo(int from, int to) { + window.setEndMillis(now -> now - Math.min(to, from)); + window.setStartMillis(now -> now - Math.max(from, to)); + } + + @Override + public void exitFromToDuration(org.apache.metron.profiler.client.window.generated.WindowParser.FromToDurationContext ctx) { + if(checkForException(ctx)) { + return; + } + Token<?> toInterval = stack.pop(); + Token<?> fromInterval = stack.pop(); + Integer to = (Integer)toInterval.getValue(); + Integer from = (Integer)fromInterval.getValue(); + setFromTo(from, to); + } + + @Override + public void exitFromDuration(org.apache.metron.profiler.client.window.generated.WindowParser.FromDurationContext ctx) { + if(checkForException(ctx)) { + return; + } + Token<?> timeInterval = stack.pop(); + Integer from = (Integer)timeInterval.getValue(); + setFromTo(from, 0); + } + + @Override + public void exitSkipDistance(org.apache.metron.profiler.client.window.generated.WindowParser.SkipDistanceContext ctx) { + if(checkForException(ctx)) { + return; + } + Token<?> timeInterval = stack.pop(); + Integer width = (Integer)timeInterval.getValue(); + window.setSkipDistance(width); + } + + @Override + public void exitWindowWidth(org.apache.metron.profiler.client.window.generated.WindowParser.WindowWidthContext ctx) { + if(checkForException(ctx)) { + return; + } + Token<?> timeInterval = stack.pop(); + Integer width = (Integer)timeInterval.getValue(); + window.setBinWidth(width); + window.setStartMillis(now -> now - width); + window.setEndMillis(now -> now); + } + + @Override + public void exitTimeInterval(org.apache.metron.profiler.client.window.generated.WindowParser.TimeIntervalContext ctx) { + if(checkForException(ctx)) { + return; + } + Token<?> timeUnit = stack.pop(); + Token<?> timeDuration = stack.pop(); + int duration = ConversionUtils.convert(timeDuration.getValue(), Integer.class); + TimeUnit unit = (TimeUnit) timeUnit.getValue(); + stack.push(new Token<>((int)unit.toMillis(duration), Integer.class)); + } + + @Override + public void exitTimeAmount(org.apache.metron.profiler.client.window.generated.WindowParser.TimeAmountContext ctx) { + if(checkForException(ctx)) { + return; + } + if(ctx.getText().length() == 0) { + throwable = new IllegalStateException("Unable to parse empty string."); + return; + } + int duration = Integer.parseInt(ctx.getText()); + stack.push(new Token<>(duration, Integer.class)); + } + + @Override + public void exitTimeUnit(org.apache.metron.profiler.client.window.generated.WindowParser.TimeUnitContext ctx) { + checkForException(ctx); + switch(normalizeTimeUnit(ctx.getText())) { + case "DAY": + stack.push(new Token<>(TimeUnit.DAYS, TimeUnit.class)); + break; + case "HOUR": + stack.push(new Token<>(TimeUnit.HOURS, TimeUnit.class)); + break; + case "MINUTE": + stack.push(new Token<>(TimeUnit.MINUTES, TimeUnit.class)); + break; + case "SECOND": + stack.push(new Token<>(TimeUnit.SECONDS, TimeUnit.class)); + break; + default: + throw new IllegalStateException("Unsupported time unit: " + ctx.getText() + + ". Supported units are limited to: day, hour, minute, second " + + "with any pluralization or capitalization."); + } + } + + private boolean checkForException(ParserRuleContext ctx) { + if(throwable != null) { + return true; + } + else if(ctx.exception != null) { + return true; + } + return false; + } + + private static String normalizeTimeUnit(String s) { + String ret = s.toUpperCase().replaceAll("[^A-Z]", ""); + if(ret.endsWith("S")) { + return ret.substring(0, ret.length() - 1); + } + return ret; + } + + public static Window parse(String statement) throws ParseException { + if (statement == null || isEmpty(statement.trim())) { + return null; + } + statement = statement.trim(); + ANTLRInputStream input = new ANTLRInputStream(statement); + WindowLexer lexer = new WindowLexer(input); + lexer.removeErrorListeners(); + lexer.addErrorListener(new ErrorListener()); + TokenStream tokens = new CommonTokenStream(lexer); + WindowParser parser = new WindowParser(tokens); + WindowProcessor treeBuilder = new WindowProcessor(); + parser.addParseListener(treeBuilder); + parser.removeErrorListeners(); + parser.addErrorListener(new ErrorListener()); + parser.window(); + if(treeBuilder.throwable != null) { + throw new ParseException(treeBuilder.throwable.getMessage(), treeBuilder.throwable); + } + return treeBuilder.getWindow(); + } + + public static String syntaxTree(String statement) { --- End diff -- Nobody is using syntaxTree, that's currently only for debugging issues with the grammar going forward for people adapting the language for new use-cases. Also, @mattf-horton asked for it during the dev discussion. It was marginally useful during the construction of the grammar to understand how things were being parsed. I'll document it as such. > Create a DSL-based timestamp lookup for profiler to enable sparse windows > ------------------------------------------------------------------------- > > Key: METRON-690 > URL: https://issues.apache.org/jira/browse/METRON-690 > Project: Metron > Issue Type: New Feature > Reporter: Casey Stella > > I propose that we support the following features: > * A starting point that is not current time > * Sparse bins (i.e. the last hour for every tuesday for the last month) > * The ability to skip events (e.g. weekends, holidays) > This would result in a new function with the following arguments: > from - The lookback starting point (default to now) > fromUnits - The units for the lookback starting point > to - The ending point for the lookback window (default to from + binSize) > toUnits - The units for the lookback ending point > including - A list of conditions which we would skip. > weekend > holiday > sunday through saturday > excluding - A list of conditions which we would skip. > weekend > holiday > sunday through saturday > binSize - The size of the lookback bin > binUnits - The units of the lookback bin > Given the number of arguments and their complexity and the fact that many, > many are optional, > PROFILE_LOOKBACK accept a string backed by a DSL to express these criteria > Base Case: A lookback of 1 hour ago > PROFILE_LOOKBACK( '1 hour bins from now') > Example 1: The same time window every tuesday for the last month starting one > hour ago > Just to make this as clear as possible, if this is run at 3PM on Monday > January 23rd, 2017, it would include the following bins: > January 17th, 2PM - 3PM > January 10th, 2PM - 3PM > January 3rd, 2PM - 3PM > December 27th, 2PM - 3PM > PROFILE_LOOKBACK( '1 hour bins from 1 hour to 1 month including tuesdays') > Example 2: The same time window every sunday for the last month starting one > hour ago skipping holidays > Just to make this as clear as possible, if this is run at 3PM on Monday > January 22rd, 2017, it would include the following bins: > January 16th, 2PM - 3PM > January 9th, 2PM - 3PM > January 2rd, 2PM - 3PM > NOT December 25th > PROFILE_LOOKBACK( '1 hour bins from 1 hour to 1 month including tuesdays > excluding holidays') -- This message was sent by Atlassian JIRA (v6.3.15#6346)