betodealmeida commented on code in PR #26767: URL: https://github.com/apache/superset/pull/26767#discussion_r1482092216
########## superset/sql_parse.py: ########## @@ -252,6 +253,182 @@ def __eq__(self, __o: object) -> bool: return str(self) == str(__o) +def extract_tables_from_statement( + statement: exp.Expression, + dialect: Optional[Dialects], +) -> set[Table]: + """ + Extract all table references in a single statement. + + Please not that this is not trivial; consider the following queries: + + DESCRIBE some_table; + SHOW PARTITIONS FROM some_table; + WITH masked_name AS (SELECT * FROM some_table) SELECT * FROM masked_name; + + See the unit tests for other tricky cases. + """ + sources: Iterable[exp.Table] + + if isinstance(statement, exp.Describe): + # A `DESCRIBE` query has no sources in sqlglot, so we need to explicitly + # query for all tables. + sources = statement.find_all(exp.Table) + elif isinstance(statement, exp.Command): + # Commands, like `SHOW COLUMNS FROM foo`, have to be converted into a + # `SELECT` statetement in order to extract tables. + literal = statement.find(exp.Literal) + if not literal: + return set() + + pseudo_query = parse_one(f"SELECT {literal.this}", dialect=dialect) + sources = pseudo_query.find_all(exp.Table) + else: + sources = [ + source + for scope in traverse_scope(statement) + for source in scope.sources.values() + if isinstance(source, exp.Table) and not is_cte(source, scope) + ] + + return { + Table( + source.name, + source.db if source.db != "" else None, + source.catalog if source.catalog != "" else None, + ) + for source in sources + } + + +def is_cte(source: exp.Table, scope: Scope) -> bool: + """ + Is the source a CTE? + + CTEs in the parent scope look like tables (and are represented by + exp.Table objects), but should not be considered as such; + otherwise a user with access to table `foo` could access any table + with a query like this: + + WITH foo AS (SELECT * FROM target_table) SELECT * FROM foo + + """ + parent_sources = scope.parent.sources if scope.parent else {} + ctes_in_scope = { + name + for name, parent_scope in parent_sources.items() + if isinstance(parent_scope, Scope) and parent_scope.scope_type == ScopeType.CTE + } + + return source.name in ctes_in_scope + + +class SQLQuery: + """ + A SQL query, with 0+ statements. + """ + + def __init__( + self, + query: str, + engine: Optional[str] = None, + ): + dialect = SQLGLOT_DIALECTS.get(engine) if engine else None + + self.statements = [ + SQLStatement(statement, engine=engine) + for statement in parse(query, dialect=dialect) + if statement + ] + + def format(self, comments: bool = True) -> str: + """ + Pretty-format the SQL query. + """ + return ";\n".join(statement.format(comments) for statement in self.statements) + + def get_settings(self) -> dict[str, str]: + """ + Return the settings for the SQL query. + + >>> statement = SQLQuery("SET foo = 'bar'; SET foo = 'baz'") + >>> statement.get_settings() + {"foo": "'baz'"} + + """ + settings: dict[str, str] = {} + for statement in self.statements: + settings.update(statement.get_settings()) + + return settings + + +class SQLStatement: Review Comment: Not just the first. The hierarchy is that a `SQLQuery` has 0+ `SQLStatement`s. When you pass a SQL query to `SQLQuery` it will split the query into statements, and instantiate one `SQLStatement` for each one. The `SQLStatement._parse_statement` validates that the SQL passed corresponds to exactly one statement, which is why it looks like it's extracting the first one. This is done because today in the codebase there's an implicit assumption that a `ParsedQuery` represents a single statement, but because this is not enforced there are many places in the code that pass a full query to it when using methods that really don't care. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: notifications-unsubscr...@superset.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: notifications-unsubscr...@superset.apache.org For additional commands, e-mail: notifications-h...@superset.apache.org