Github user ChinmaySKulkarni commented on a diff in the pull request: https://github.com/apache/phoenix/pull/404#discussion_r238036374 --- Diff: phoenix-core/src/main/java/org/apache/phoenix/mapreduce/OrphanViewTool.java --- @@ -0,0 +1,812 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.phoenix.mapreduce; + +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.COLUMN_FAMILY; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.COLUMN_NAME; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.LINK_TYPE; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.SYSTEM_CATALOG_NAME; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.SYSTEM_CHILD_LINK_NAME; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.TABLE_NAME; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.TABLE_SCHEM; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.TABLE_TYPE; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.TENANT_ID; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.VIEW_TYPE; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Properties; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.cli.PosixParser; +import org.apache.commons.lang.exception.ExceptionUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.phoenix.jdbc.PhoenixConnection; +import org.apache.phoenix.mapreduce.util.ConnectionUtil; +import org.apache.phoenix.parse.DropTableStatement; +import org.apache.phoenix.query.QueryServices; +import org.apache.phoenix.query.QueryServicesOptions; +import org.apache.phoenix.schema.MetaDataClient; +import org.apache.phoenix.schema.PTable; +import org.apache.phoenix.schema.PTableType; +import org.apache.phoenix.schema.TableNotFoundException; +import org.apache.phoenix.util.PhoenixRuntime; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A tool to identify orphan views and links, and drop them + * + */ +public class OrphanViewTool extends Configured implements Tool { + private static final Logger LOG = LoggerFactory.getLogger(OrphanViewTool.class); + // Query all the views that are not "MAPPED" views + private static final String viewQuery = "SELECT " + + TENANT_ID + ", " + + TABLE_SCHEM + "," + + TABLE_NAME + + " FROM " + SYSTEM_CATALOG_NAME + + " WHERE "+ TABLE_TYPE + " = '" + PTableType.VIEW.getSerializedValue() +"' AND NOT " + + VIEW_TYPE + " = " + PTable.ViewType.MAPPED.getSerializedValue(); + // Query all physical links + private static final String physicalLinkQuery = "SELECT " + + TENANT_ID + ", " + + TABLE_SCHEM + ", " + + TABLE_NAME + ", " + + COLUMN_NAME + " AS PHYSICAL_TABLE_TENANT_ID, " + + COLUMN_FAMILY + " AS PHYSICAL_TABLE_FULL_NAME " + + " FROM " + SYSTEM_CATALOG_NAME + + " WHERE "+ LINK_TYPE + " = " + + PTable.LinkType.PHYSICAL_TABLE.getSerializedValue(); + // Query all child-parent links + private static final String childParentLinkQuery = "SELECT " + + TENANT_ID + ", " + + TABLE_SCHEM + ", " + + TABLE_NAME + ", " + + COLUMN_NAME + " AS PARENT_VIEW_TENANT_ID, " + + COLUMN_FAMILY + " AS PARENT_VIEW_FULL_NAME " + + " FROM " + SYSTEM_CATALOG_NAME + + " WHERE "+ LINK_TYPE + " = " + + PTable.LinkType.PARENT_TABLE.getSerializedValue(); + // Query all parent-child links + private static final String parentChildLinkQuery = "SELECT " + + TENANT_ID + ", " + + TABLE_SCHEM + ", " + + TABLE_NAME + ", " + + COLUMN_NAME + " AS CHILD_VIEW_TENANT_ID, " + + COLUMN_FAMILY + " AS CHILD_VIEW_FULL_NAME " + + " FROM " + SYSTEM_CHILD_LINK_NAME + + " WHERE "+ LINK_TYPE + " = " + + PTable.LinkType.CHILD_TABLE.getSerializedValue(); + + // Query all the tables that can be a base table + private static final String candidateBaseTableQuery = "SELECT " + + TENANT_ID + ", " + + TABLE_SCHEM + ", " + + TABLE_NAME + + " FROM " + SYSTEM_CATALOG_NAME + + " WHERE "+ " NOT " + TABLE_TYPE + " = '" + PTableType.VIEW.getSerializedValue() + "'"; + + private String outputPath; + private String inputPath; + private boolean clean = false; + private int maxViewLevel = 0; + private static final long defaultAge = 24*60*60*1000; // 1 day + private long age = 0; + + private static final byte VIEW = 0; + private static final byte PHYSICAL_TABLE_LINK = 1; + private static final byte PARENT_TABLE_LINK = 2; + private static final byte CHILD_TABLE_LINK = 3; + private static final byte ORPHAN_TYPE_COUNT = 4; + + BufferedWriter writer[] = new BufferedWriter[ORPHAN_TYPE_COUNT]; + BufferedReader reader[] = new BufferedReader[ORPHAN_TYPE_COUNT]; + HashMap<Key, View> orphanViewSet = new HashMap<>(); + List<HashMap<Key, View>> viewSetArray = new ArrayList<HashMap<Key, View>>(); + HashMap<Key, Base> baseSet = new HashMap<>(); + HashSet<Link> orphanLinkSet = new HashSet<>(); + + private static final String fileName[] = {"OrphanView.txt", "OrphanPhysicalTableLink.txt", "OrphanParentTableLink", "OrphanChildTableLink"}; + private static final Option OUTPUT_PATH_OPTION = new Option("op", "output-path", true, + "Output path where the files listing orphan views and links are written"); + private static final Option INPUT_PATH_OPTION = new Option("ip", "input-path", true, + "Input path where the files listing orphan views and links are read"); + private static final Option CLEAN_ORPHAN_VIEWS_OPTION = new Option("c", "clean", false, + "If specified, cleans orphan views and links"); + private static final Option IDENTIFY_ORPHAN_VIEWS_OPTION = new Option("i", "identify", false, + "If specified, identifies orphan views and links"); + private static final Option AGE_OPTION = new Option("a", "age", true, + "The minimum age (in milliseconds) for the views (default value is " + Long.toString(defaultAge) + ", i.e. 1 day)"); + private static final Option HELP_OPTION = new Option("h", "help", false, "Help"); + + private Options getOptions() { + final Options options = new Options(); + options.addOption(OUTPUT_PATH_OPTION); + options.addOption(INPUT_PATH_OPTION); + options.addOption(CLEAN_ORPHAN_VIEWS_OPTION); + options.addOption(IDENTIFY_ORPHAN_VIEWS_OPTION); + options.addOption(AGE_OPTION); + options.addOption(HELP_OPTION); + return options; + } + + /** + * Parses the commandline arguments, throws IllegalStateException if mandatory arguments are + * missing. + * @param args supplied command line arguments + */ + private void parseOptions(String[] args) throws Exception { + + final Options options = getOptions(); + + CommandLineParser parser = new PosixParser(); + CommandLine cmdLine = null; + try { + cmdLine = parser.parse(options, args); + } catch (ParseException e) { + printHelpAndExit("Error parsing command line options: " + e.getMessage(), options); + } + if (cmdLine.hasOption(HELP_OPTION.getOpt())) { + printHelpAndExit(options, 0); + } + if (cmdLine.hasOption(OUTPUT_PATH_OPTION.getOpt()) && cmdLine.hasOption(INPUT_PATH_OPTION.getOpt())) { + throw new IllegalStateException("Specify either " + OUTPUT_PATH_OPTION.getLongOpt() + " or " + + INPUT_PATH_OPTION.getOpt()); + } + if (cmdLine.hasOption(INPUT_PATH_OPTION.getOpt()) && !cmdLine.hasOption(CLEAN_ORPHAN_VIEWS_OPTION.getOpt())) { + throw new IllegalStateException(INPUT_PATH_OPTION.getLongOpt() + " is only used with " + + IDENTIFY_ORPHAN_VIEWS_OPTION.getOpt()); + } + if (cmdLine.hasOption(IDENTIFY_ORPHAN_VIEWS_OPTION.getOpt()) && cmdLine.hasOption(CLEAN_ORPHAN_VIEWS_OPTION.getOpt())) { + throw new IllegalStateException("Specify either " + IDENTIFY_ORPHAN_VIEWS_OPTION.getLongOpt() + " or " + + IDENTIFY_ORPHAN_VIEWS_OPTION.getOpt()); + } + if (cmdLine.hasOption(OUTPUT_PATH_OPTION.getOpt()) && (!cmdLine.hasOption(IDENTIFY_ORPHAN_VIEWS_OPTION.getOpt()) && + !cmdLine.hasOption(CLEAN_ORPHAN_VIEWS_OPTION.getOpt()))) { + throw new IllegalStateException(OUTPUT_PATH_OPTION.getLongOpt() + " requires either " + + IDENTIFY_ORPHAN_VIEWS_OPTION.getOpt() + " or " + CLEAN_ORPHAN_VIEWS_OPTION.getOpt()); + } + if (cmdLine.hasOption(CLEAN_ORPHAN_VIEWS_OPTION.getOpt())) { + clean = true; + } + else if (!cmdLine.hasOption(IDENTIFY_ORPHAN_VIEWS_OPTION.getOpt())) { + throw new IllegalStateException("Specify either " + + IDENTIFY_ORPHAN_VIEWS_OPTION.getOpt() + " or " + CLEAN_ORPHAN_VIEWS_OPTION.getOpt()); + } + if (cmdLine.hasOption(AGE_OPTION.getOpt())) { + age = Long.valueOf(cmdLine.getOptionValue(AGE_OPTION.getOpt())); + } + + outputPath = cmdLine.getOptionValue(OUTPUT_PATH_OPTION.getOpt()); + inputPath = cmdLine.getOptionValue(INPUT_PATH_OPTION.getOpt()); + } + + private void printHelpAndExit(String errorMessage, Options options) { + System.err.println(errorMessage); + printHelpAndExit(options, 1); + } + + private void printHelpAndExit(Options options, int exitCode) { + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp("help", options); + System.exit(exitCode); + } + + private class Key { + private String tenantId; + private String schemaName; + private String tableName; + + public Key (String tenantId, String schemaName, String tableName) { + this.tenantId = tenantId; + this.schemaName = schemaName; + this.tableName = tableName; + } + + public Key (String tenantId, String fullTableName) { + this.tenantId = tenantId; + String[] columns = fullTableName.split("\\."); --- End diff -- You should use existing Phoenix APIs which parse the fullTableName using either '.' or ':' based on the value of the `isNamespaceMapped` property. Check out `SchemaUtil.java`
---