Github user ChinmaySKulkarni commented on a diff in the pull request: https://github.com/apache/phoenix/pull/404#discussion_r238034368 --- Diff: phoenix-core/src/main/java/org/apache/phoenix/mapreduce/OrphanViewTool.java --- @@ -0,0 +1,812 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.phoenix.mapreduce; + +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.COLUMN_FAMILY; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.COLUMN_NAME; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.LINK_TYPE; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.SYSTEM_CATALOG_NAME; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.SYSTEM_CHILD_LINK_NAME; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.TABLE_NAME; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.TABLE_SCHEM; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.TABLE_TYPE; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.TENANT_ID; +import static org.apache.phoenix.jdbc.PhoenixDatabaseMetaData.VIEW_TYPE; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Properties; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.cli.PosixParser; +import org.apache.commons.lang.exception.ExceptionUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.phoenix.jdbc.PhoenixConnection; +import org.apache.phoenix.mapreduce.util.ConnectionUtil; +import org.apache.phoenix.parse.DropTableStatement; +import org.apache.phoenix.query.QueryServices; +import org.apache.phoenix.query.QueryServicesOptions; +import org.apache.phoenix.schema.MetaDataClient; +import org.apache.phoenix.schema.PTable; +import org.apache.phoenix.schema.PTableType; +import org.apache.phoenix.schema.TableNotFoundException; +import org.apache.phoenix.util.PhoenixRuntime; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A tool to identify orphan views and links, and drop them + * + */ +public class OrphanViewTool extends Configured implements Tool { + private static final Logger LOG = LoggerFactory.getLogger(OrphanViewTool.class); + // Query all the views that are not "MAPPED" views + private static final String viewQuery = "SELECT " + + TENANT_ID + ", " + + TABLE_SCHEM + "," + + TABLE_NAME + + " FROM " + SYSTEM_CATALOG_NAME + + " WHERE "+ TABLE_TYPE + " = '" + PTableType.VIEW.getSerializedValue() +"' AND NOT " + + VIEW_TYPE + " = " + PTable.ViewType.MAPPED.getSerializedValue(); + // Query all physical links + private static final String physicalLinkQuery = "SELECT " + + TENANT_ID + ", " + + TABLE_SCHEM + ", " + + TABLE_NAME + ", " + + COLUMN_NAME + " AS PHYSICAL_TABLE_TENANT_ID, " + + COLUMN_FAMILY + " AS PHYSICAL_TABLE_FULL_NAME " + + " FROM " + SYSTEM_CATALOG_NAME + + " WHERE "+ LINK_TYPE + " = " + + PTable.LinkType.PHYSICAL_TABLE.getSerializedValue(); + // Query all child-parent links + private static final String childParentLinkQuery = "SELECT " + + TENANT_ID + ", " + + TABLE_SCHEM + ", " + + TABLE_NAME + ", " + + COLUMN_NAME + " AS PARENT_VIEW_TENANT_ID, " + + COLUMN_FAMILY + " AS PARENT_VIEW_FULL_NAME " + + " FROM " + SYSTEM_CATALOG_NAME + + " WHERE "+ LINK_TYPE + " = " + + PTable.LinkType.PARENT_TABLE.getSerializedValue(); + // Query all parent-child links + private static final String parentChildLinkQuery = "SELECT " + + TENANT_ID + ", " + + TABLE_SCHEM + ", " + + TABLE_NAME + ", " + + COLUMN_NAME + " AS CHILD_VIEW_TENANT_ID, " + + COLUMN_FAMILY + " AS CHILD_VIEW_FULL_NAME " + + " FROM " + SYSTEM_CHILD_LINK_NAME + + " WHERE "+ LINK_TYPE + " = " + + PTable.LinkType.CHILD_TABLE.getSerializedValue(); + + // Query all the tables that can be a base table + private static final String candidateBaseTableQuery = "SELECT " + + TENANT_ID + ", " + + TABLE_SCHEM + ", " + + TABLE_NAME + + " FROM " + SYSTEM_CATALOG_NAME + + " WHERE "+ " NOT " + TABLE_TYPE + " = '" + PTableType.VIEW.getSerializedValue() + "'"; + + private String outputPath; + private String inputPath; + private boolean clean = false; + private int maxViewLevel = 0; + private static final long defaultAge = 24*60*60*1000; // 1 day + private long age = 0; + + private static final byte VIEW = 0; + private static final byte PHYSICAL_TABLE_LINK = 1; + private static final byte PARENT_TABLE_LINK = 2; + private static final byte CHILD_TABLE_LINK = 3; + private static final byte ORPHAN_TYPE_COUNT = 4; + + BufferedWriter writer[] = new BufferedWriter[ORPHAN_TYPE_COUNT]; + BufferedReader reader[] = new BufferedReader[ORPHAN_TYPE_COUNT]; + HashMap<Key, View> orphanViewSet = new HashMap<>(); --- End diff -- Can you add some comments here about what is the data organization i.e. what is the expected key, value, list of what, etc.
---