yihua commented on code in PR #8518: URL: https://github.com/apache/hudi/pull/8518#discussion_r1240373903
########## hudi-utils/pom.xml: ########## @@ -22,7 +22,7 @@ <groupId>org.apache.hudi</groupId> <artifactId>hudi-utils</artifactId> <packaging>jar</packaging> - <version>1.0-SNAPSHOT</version> + <version>0.14.0-SNAPSHOT</version> Review Comment: Do we still want to keep the `hudi-utils` version different from Hudi jars? ########## hudi-utils/src/main/java/org/apache/hudi/utils/HoodieConfigDocGenerator.java: ########## @@ -71,435 +61,531 @@ public class HoodieConfigDocGenerator { private static final String LINE_BREAK = "<br></br>\n"; private static final String DOUBLE_NEWLINE = "\n\n"; private static final String SUMMARY = "This page covers the different ways of configuring " + - "your job to write/read Hudi tables. " + - "At a high level, you can control behaviour at few levels."; + "your job to write/read Hudi tables. " + + "At a high level, you can control behaviour at few levels."; private static final String FLINK_CONFIG_CLASS_NAME = "org.apache.hudi.configuration.FlinkOptions"; - private static final String CONFIG_PATH = "/tmp/configurations.md"; + private static final String ALL_CONFIGS_PATH = "/tmp/configurations.md"; private static final String EXTERNALIZED_CONFIGS = "## Externalized Config File\n" + "Instead of directly passing configuration settings to every Hudi job, you can also centrally set them in a configuration\n" + "file `hudi-default.conf`. By default, Hudi would load the configuration file under `/etc/hudi/conf` directory. You can\n" + "specify a different configuration directory location by setting the `HUDI_CONF_DIR` environment variable. This can be\n" + "useful for uniformly enforcing repeated configs (like Hive sync or write/index tuning), across your entire data lake."; + private static final String DEFAULT_FOOTER_MARKUP = new StringBuilder().append(NEWLINE).append(new HorizontalRule(3)).append(DOUBLE_NEWLINE).toString(); private static final Integer DEFAULT_CONFIG_GROUP_HEADING_LEVEL = 2; private static final Integer DEFAULT_CONFIG_PARAM_HEADING_LEVEL = 3; + private static final TableRow<String> DEFAULT_TABLE_HEADER_ROW = new TableRow<>(new ArrayList<>(Arrays.asList("Config Name", "Default", "Description", "Since Version"))); public static void main(String[] args) { Reflections reflections = new Reflections("org.apache.hudi"); - // Scan and collect meta info of all HoodieConfig superclasses by using reflection - List<HoodieConfigClassMetaInfo> hoodieConfigClassMetaInfos = getSortedListOfHoodieConfigClassMetaInfo(reflections.getSubTypesOf(HoodieConfig.class)); - - // Top heading - StringBuilder mainDocBuilder = new StringBuilder(); - generateHeader(mainDocBuilder); - - ListBuilder contentTableBuilder = new ListBuilder(); - Map<ConfigGroups.Names, StringBuilder> contentMap = generateContentTableAndMainHeadings(contentTableBuilder); - - // Special casing Spark Configs since the class does not extend HoodieConfig - // and also does not use ConfigClassProperty - populateSparkConfigs(contentMap); - - // generate Docs from the config classes - ConfigGroups.SubGroupNames prevSubGroupName = NONE; - boolean isPartOfSubGroup = false; - int configParamHeadingLevel = DEFAULT_CONFIG_PARAM_HEADING_LEVEL; - for (HoodieConfigClassMetaInfo configClassMetaInfo: hoodieConfigClassMetaInfos) { - Class<? extends HoodieConfig> subType = configClassMetaInfo.subType; - ConfigClassProperty configClassProperty = subType.getAnnotation(ConfigClassProperty.class); - StringBuilder groupOrSubGroupStringBuilder = contentMap.get(configClassProperty.groupName()); - LOG.info("Processing params for config class: " + subType.getName() + " " + configClassProperty.name() - + " " + configClassProperty.description()); - if (configClassMetaInfo.subGroupName == NONE){ - isPartOfSubGroup = false; - configParamHeadingLevel = DEFAULT_CONFIG_PARAM_HEADING_LEVEL; - } else if (configClassMetaInfo.subGroupName == prevSubGroupName) { - // Continuation of more HoodieConfig classes that are part of the same subgroup - isPartOfSubGroup = true; - groupOrSubGroupStringBuilder = new StringBuilder(); - configParamHeadingLevel = DEFAULT_CONFIG_PARAM_HEADING_LEVEL + 1; - } else if (configClassMetaInfo.hasCommonConfigs) { - // This is a new valid Subgroup encountered. Add description for the subgroup. - isPartOfSubGroup = true; - groupOrSubGroupStringBuilder = new StringBuilder(); - generateConfigGroupSummary(groupOrSubGroupStringBuilder, - configClassMetaInfo.subGroupName.name, - configClassMetaInfo.subGroupName.name(), - configClassProperty.subGroupName().getDescription(), - DEFAULT_CONFIG_GROUP_HEADING_LEVEL + 1); - configParamHeadingLevel = DEFAULT_CONFIG_PARAM_HEADING_LEVEL + 1; - } - prevSubGroupName = configClassMetaInfo.subGroupName; - generateConfigGroupSummary(groupOrSubGroupStringBuilder, - configClassProperty.name(), - configClassProperty.name().replace(" ", "-"), - configClassProperty.description(), - configParamHeadingLevel); - groupOrSubGroupStringBuilder - .append("`") - .append(new Text("Config Class")) - .append("`") - .append(": ") - .append(subType.getName()).append(LINE_BREAK); - - // Special casing Flink Configs since the class does not use ConfigClassProperty - // Also, we need to split Flink Options into Flink Read Options, Write Options... - if (subType.getName().equals(FLINK_CONFIG_CLASS_NAME)) { - generateFlinkConfigMarkup(subType, groupOrSubGroupStringBuilder); - } else { - generateAllOtherConfigs(subType, isPartOfSubGroup, groupOrSubGroupStringBuilder); - if (isPartOfSubGroup) { - // If the config class is part of a subgroup, close the string builder for subgroup and append it to the main group builder. - contentMap.get(configClassProperty.groupName()).append(groupOrSubGroupStringBuilder.toString()); - } - } - } - - try { - LOG.info("Generating markdown file"); - mainDocBuilder.append(contentTableBuilder.build()).append(DOUBLE_NEWLINE); - mainDocBuilder.append(generateExternalizedConfigs()); - contentMap.forEach((k, v) -> mainDocBuilder.append(v)); - Files.write(Paths.get(CONFIG_PATH), mainDocBuilder.toString().getBytes(StandardCharsets.UTF_8)); - } catch (IOException e) { - LOG.error("Error while writing to markdown file ", e); - } + // Create a Treemap of [config group/subgroup/commonconfigs/configclass] -> [configclass markup] + NavigableMap<ConfigClassMeta, ConfigClassMarkups> configClassTreeMap = new TreeMap<>(getConfigClassMetaComparator()); + initConfigClassTreeMap(reflections.getSubTypesOf(HoodieConfig.class), configClassTreeMap); + buildConfigMarkup(configClassTreeMap); + initAndBuildSparkConfigMarkup(configClassTreeMap); + generateAllConfigurationPages(configClassTreeMap); } - private static List<HoodieConfigClassMetaInfo> getSortedListOfHoodieConfigClassMetaInfo(Set<Class<? extends HoodieConfig>> subTypes) { - // Scan and collect meta info of all HoodieConfig superclasses by using reflection - List<HoodieConfigClassMetaInfo> hoodieConfigClassMetaInfos = new ArrayList<>(); - for (Class<? extends HoodieConfig> subType : subTypes) { - // sub-heading using the annotation - ConfigClassProperty configClassProperty = subType.getAnnotation(ConfigClassProperty.class); - try{ - if (configClassProperty != null) { - hoodieConfigClassMetaInfos.add(new HoodieConfigClassMetaInfo(configClassProperty.groupName(), configClassProperty.subGroupName(), configClassProperty.areCommonConfigs(), subType)); - } else { - LOG.error("FATAL error Please add `ConfigClassProperty` annotation for " + subType.getName()); - } - } catch (Exception e) { - LOG.error("FATAL error while processing config class: " + subType.getName(), e); - } - } + private static StringBuilder generateExternalizedConfigs() { + StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append(EXTERNALIZED_CONFIGS); + stringBuilder.append(DOUBLE_NEWLINE); + return stringBuilder; + } - // Now sort them based on these columns in the order - groupname, subgroupname (reverse order) and areCommonConfigs (reverse order) - // We want to list all groups with no subgroups first. Followed by groups with subgroups and among them list the - // class that has common configs first. - hoodieConfigClassMetaInfos.sort(Comparator.comparing(HoodieConfigClassMetaInfo::getGroupName) - .thenComparing(HoodieConfigClassMetaInfo::getSubGroupName, Comparator.reverseOrder()) - .thenComparing(HoodieConfigClassMetaInfo::areCommonConfigs, Comparator.reverseOrder())); - return hoodieConfigClassMetaInfos; + /** + * Generated main content headings for every config group. + */ + private static void generateMainHeadings(ListBuilder builder) { + EnumSet.allOf(ConfigGroups.Names.class).forEach(groupName -> builder.append( + new Link(new BoldText(groupName.name), + "#" + groupName.name()) + + ": " + ConfigGroups.getDescription(groupName))); } - private static void generateHeader(StringBuilder builder) { + /** + * Returns the header meta for the all configs doc page. This will be a .mdx page. + */ + private static void generateAllConfigsHeader(StringBuilder builder) { /* --- title: Configurations keywords: [configurations, default, flink options, spark, configs, parameters] permalink: /docs/configurations.html summary: This section offers an overview of tools available to operate an ecosystem of Hudi - toc: true - toc_min_heading_level: 2 - toc_max_heading_level: 4 last_modified_at: 2019-12-30T15:59:57-04:00 + hide_table_of_contents: true + --- + import TOCInline from '@theme/TOCInline'; + + <TOCInline toc={toc} minHeadingLevel={2} maxHeadingLevel={5}/> + --- */ LocalDateTime now = LocalDateTime.now(); builder.append(new HorizontalRule()).append(NEWLINE) - .append("title: ").append("All Configurations").append(NEWLINE) - .append("keywords: [ configurations, default, flink options, spark, configs, parameters ] ").append(NEWLINE) - .append("permalink: /docs/configurations.html").append(NEWLINE) - .append("summary: " + SUMMARY).append(NEWLINE) - .append("toc: true").append(NEWLINE) - .append("toc_min_heading_level: 2").append(NEWLINE) - .append("toc_max_heading_level: 4").append(NEWLINE) - .append("last_modified_at: " + DateTimeFormatter.ISO_DATE_TIME.format(now)).append(NEWLINE) - .append(new HorizontalRule()) - .append(DOUBLE_NEWLINE); + .append("title: ").append("All Configurations").append(NEWLINE) + .append("keywords: [ configurations, default, flink options, spark, configs, parameters ] ").append(NEWLINE) + .append("permalink: /docs/configurations.html").append(NEWLINE) + .append("summary: " + SUMMARY).append(NEWLINE) + .append("last_modified_at: " + DateTimeFormatter.ISO_DATE_TIME.format(now)).append(NEWLINE) + .append("hide_table_of_contents: true").append(NEWLINE) + .append(new HorizontalRule()).append(NEWLINE) + .append("import TOCInline from '@theme/TOCInline';") + .append(DOUBLE_NEWLINE) + .append("<TOCInline toc={toc} minHeadingLevel={2} maxHeadingLevel={5}/>") + .append(DOUBLE_NEWLINE) + .append(new HorizontalRule()) + .append(DOUBLE_NEWLINE); // Description builder.append(SUMMARY).append(DOUBLE_NEWLINE); } - private static Map<ConfigGroups.Names, StringBuilder> generateContentTableAndMainHeadings(ListBuilder builder) { - EnumSet.allOf(ConfigGroups.Names.class).forEach(groupName -> builder.append( - new Link(new BoldText(groupName.name), - "#" + groupName.name()) - + ": " + ConfigGroups.getDescription(groupName))); - Map<ConfigGroups.Names, StringBuilder> contentMap = new LinkedHashMap<>(); - EnumSet.allOf(ConfigGroups.Names.class).forEach(groupName -> { - StringBuilder stringBuilder = new StringBuilder(); - generateConfigGroupSummary(stringBuilder, groupName.name, groupName.name(), ConfigGroups.getDescription(groupName), DEFAULT_CONFIG_GROUP_HEADING_LEVEL); - contentMap.put(groupName, stringBuilder); - }); - return contentMap; - } + /** + * Generate a ConfigTableRow for the given config + */ + private static ConfigTableRow generateConfigTableRow(Class subType, Field field, Object object) { + try { + ConfigProperty cfgProperty = (ConfigProperty) field.get(object); + List<String> columns = new ArrayList<>(); - private static void generateConfigGroupSummary(StringBuilder stringBuilder, String friendlyName, String groupName, String description, int headingSize) { - stringBuilder.append(getHeadingSizeMarkup(headingSize)) - .append(friendlyName) - .append(" {" + "#").append(groupName).append("}") - .append(NEWLINE) - .append(description) - .append(DOUBLE_NEWLINE); + if (StringUtils.isNullOrEmpty(cfgProperty.doc())) { + LOG.warn("Found empty or null description for config class = " + + subType.getName() + + " for param = " + + field.getName()); + } + + // Config Key + String configKeyWithAnchorLink = "[" + cfgProperty.key() + "](#" + cfgProperty.key().replace(" ", "-").replace(".", "") + ")"; + columns.add(configKeyWithAnchorLink); + + // Default value + Object defaultValue = cfgProperty.hasDefaultValue() ? cfgProperty.defaultValue() : (cfgProperty.hasInferFunction() ? "" : null); + if (defaultValue != null) { + columns.add(defaultValue + " (Optional)"); + } else { + columns.add("N/A " + new BoldText("(Required)")); + } + boolean isConfigRequired = (defaultValue == null); + + // Description + String description = StringUtils.isNullOrEmpty(cfgProperty.doc()) ? "" : cfgProperty.doc().replaceAll("[\\t\\n\\r]+", " ").replaceAll("&", "&").replaceAll("\\|", " | ").replaceAll("<", "<").replaceAll(">", ">"); + columns.add(description); + + // First version + if (cfgProperty.getSinceVersion().isPresent()) { + String sinceVersion = String.valueOf(cfgProperty.getSinceVersion().get()); + String deprecatedVersion = ""; + if (cfgProperty.getDeprecatedVersion().isPresent()) { + deprecatedVersion = ". Deprecated since: " + String.valueOf(cfgProperty.getDeprecatedVersion().get()); + } + columns.add(sinceVersion + deprecatedVersion); + } else { + columns.add(" "); + } + + return new ConfigTableRow(cfgProperty.key(), new TableRow<>(columns), isConfigRequired, cfgProperty.isAdvanced()); + } catch (IllegalAccessException e) { + LOG.error("Error while getting field through reflection for config class: " + subType.getName(), e); + throw new IllegalArgumentException("Error while getting field through reflection for config class: " + subType.getName(), e); + } } - private static String getHeadingSizeMarkup(int headingSize){ + /** + * Returns the markup heading string for given heading size + */ + private static String getHeadingSizeMarkup(int headingSize) { StringBuilder stringBuilder = new StringBuilder(); for (int i = 0; i < headingSize; i++) { stringBuilder.append("#"); } stringBuilder.append(" "); return stringBuilder.toString(); } - private static StringBuilder generateExternalizedConfigs() { + + /** + * Returns the formatted summary for main Config group. + */ + private static String generateConfigGroupSummary(String friendlyName, String anchorString, String description, int headingSize) { StringBuilder stringBuilder = new StringBuilder(); - stringBuilder.append(EXTERNALIZED_CONFIGS); - stringBuilder.append(DOUBLE_NEWLINE); - return stringBuilder; + stringBuilder.append(getHeadingSizeMarkup(headingSize)) + .append(friendlyName) + .append(" {" + "#").append(anchorString).append("}") + .append(NEWLINE) + .append(description) + .append(DOUBLE_NEWLINE); + return stringBuilder.toString(); } - private static void populateSparkConfigs(Map<ConfigGroups.Names, StringBuilder> contentMap) { - StringBuilder configParamsBuilder = contentMap.get(ConfigGroups.Names.SPARK_DATASOURCE); - - for (Object sparkConfigObject : HoodieSparkConfigs.getSparkConfigObjects()) { - String configName = HoodieSparkConfigs.name(sparkConfigObject); - LOG.info("Processing params for config class: " + configName + " desc: " + HoodieSparkConfigs.description(sparkConfigObject)); - - configParamsBuilder.append("### ").append(configName) - .append(" {" + "#").append(configName.replace(" ", "-")).append("}") - .append(DOUBLE_NEWLINE); - configParamsBuilder.append(HoodieSparkConfigs.description(sparkConfigObject)).append(DOUBLE_NEWLINE); - - configParamsBuilder - .append("`") - .append(new Text("Config Class")) - .append("`") - .append(": ") - .append(HoodieSparkConfigs.className()).append(LINE_BREAK); + private static String generateConfigClassParam(String subTypeName) { + StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append("`") + .append(new Text("Config Class")) + .append("`") + .append(": ") + .append(subTypeName).append(LINE_BREAK); + return stringBuilder.toString(); + } + /** + * Generates the basic/advanced configs from the given list of configs + */ + private static String generateBasicOrAdvancedConfigsFrom(List<ConfigTableRow> configs, String configClassPrefix, boolean basicConfigs) { + StringBuilder stringBuilder = new StringBuilder(); + String anchorString; + String configTableHeading; + if (basicConfigs) { + anchorString = configClassPrefix + "-basic-configs"; + configTableHeading = "Basic Configs"; + } else { + anchorString = configClassPrefix + "-advanced-configs"; + configTableHeading = "Advanced Configs"; + } + Table.Builder configsTable = new Table.Builder(); + configsTable.addRow(DEFAULT_TABLE_HEADER_ROW); + for (ConfigTableRow config : configs) { + configsTable.addRow(config.getColumns()); + } + stringBuilder.append(DOUBLE_NEWLINE) + .append("[" + new BoldText(configTableHeading) + "]") + .append("(" + "#").append(anchorString).append(")") + .append(NEWLINE) + .append(DOUBLE_NEWLINE); + stringBuilder.append(configsTable.build()); + return stringBuilder.toString(); + } - Set<Field> hardcodedFields = ReflectionUtils.getAllFields(sparkConfigObject.getClass(), withTypeAssignableTo(ConfigProperty.class)); + /** + * Comparator to be used when ordering the config classes. Used to sort the tree map based on these columns in the + * order - groupname, subgroupname (reverse order), areCommonConfigs (reverse order) and the config class name. We + * want to list all groups with no subgroups first. Followed by groups with subgroups and among them list the class + * that has common configs first. The group name and subgroup name are based on ENUM position instead of actual + * string. + */ + public static Comparator<ConfigClassMeta> getConfigClassMetaComparator() { + return Comparator.comparing(ConfigClassMeta::getGroupName) + .thenComparing(ConfigClassMeta::getSubGroupName, Comparator.reverseOrder()) + .thenComparing(ConfigClassMeta::areCommonConfigs, Comparator.reverseOrder()) + .thenComparing(ConfigClassMeta::getClassName); + } - List<ConfigMarkup> allConfigs = new ArrayList<>(); - for (Field field : hardcodedFields) { - field.setAccessible(true); - ConfigMarkup configMarkup = generateConfigMarkup(sparkConfigObject.getClass(), field, sparkConfigObject, DEFAULT_CONFIG_PARAM_HEADING_LEVEL); - allConfigs.add(configMarkup); + /** + * Initializes the tree map with empty objects for config markup sections. + * + * @param subTypes + * @param configClassTreeMap + */ + private static void initConfigClassTreeMap(Set<Class<? extends HoodieConfig>> subTypes, NavigableMap<ConfigClassMeta, ConfigClassMarkups> configClassTreeMap) { + for (Class<? extends HoodieConfig> subType : subTypes) { + // sub-heading using the annotation + ConfigClassProperty configClassProperty = subType.getAnnotation(ConfigClassProperty.class); + LOG.info(subType.getName()); + try { + if (configClassProperty != null) { + ConfigClassMeta configClassMeta = new ConfigClassMeta(configClassProperty.groupName(), configClassProperty.subGroupName(), configClassProperty.areCommonConfigs(), subType); + configClassTreeMap.put(configClassMeta, new ConfigClassMarkups()); + } else { + LOG.error("FATAL error Please add `ConfigClassProperty` annotation for " + subType.getName()); + } + } catch (Exception e) { + LOG.error("FATAL error while processing config class: " + subType.getName(), e); } - // sort the configs based on config key prefix and add to the configParamsBuilder - allConfigs.sort(Comparator.comparing(ConfigMarkup::isConfigRequired).reversed() - .thenComparing(ConfigMarkup::getConfigKey)); - allConfigs.forEach(cfg -> configParamsBuilder.append(cfg.configMarkupString)); } } - private static void generateFlinkConfigMarkup(Class subType, StringBuilder configParamsBuilder) { - try { - List<ConfigMarkup> allConfigs = new ArrayList(); - Set<Field> fields = getAllFields(FlinkOptions.class, withTypeAssignableTo(ConfigOption.class)); - for (Field field : fields) { - StringBuilder tmpConfigParamBuilder = new StringBuilder(); - ConfigOption cfgProperty = (ConfigOption) field.get(null); - String description = new HtmlFormatter().format(cfgProperty.description()); - if (description.isEmpty()) { - LOG.warn("Found empty or null description for config class = " - + subType.getName() - + " for param = " - + field.getName()); - } - // Config Header - tmpConfigParamBuilder.append("> ").append("#### ").append(new Text(cfgProperty.key())).append(NEWLINE); - - // Description - tmpConfigParamBuilder - .append("> ") - .append(description) - .append(LINE_BREAK); - - // Default value - Object defaultValue = cfgProperty.hasDefaultValue() ? cfgProperty.defaultValue() : null; - addDefaultValue(tmpConfigParamBuilder, defaultValue); - boolean isConfigRequired = (defaultValue == null); - - // TODO: Custom config tags like "Doc on Default Value:" cannot be added for Flink. - // ConfigOption is a Flink class. In order to support custom config apis like getDocOnDefaultValue - // this class needs to be wrapped in Hudi first. - - // Config param name - generateConfigKeyValue(tmpConfigParamBuilder, "Config Param", field.getName()); - - tmpConfigParamBuilder - .append(NEWLINE) - .append(new HorizontalRule(3)) - .append(DOUBLE_NEWLINE); - - ConfigMarkup configMarkup = new ConfigMarkup(cfgProperty.key(), isConfigRequired, tmpConfigParamBuilder.toString()); - allConfigs.add(configMarkup); + /** + * Builds the config markups for all Config classes (Flink included) except Spark based ones. + * This can be reused later to generate config tables in individual doc pages. + */ + private static void buildConfigMarkup(NavigableMap<ConfigClassMeta, ConfigClassMarkups> configClassTreeMap) { + // generate Docs from the config classes + ConfigGroups.Names prevGroupName = ConfigGroups.Names.ENVIRONMENT_CONFIG; + ConfigGroups.SubGroupNames prevSubGroupName = NONE; + int configParamHeadingLevel = DEFAULT_CONFIG_PARAM_HEADING_LEVEL; + Set<ConfigClassMeta> keySet = configClassTreeMap.keySet(); + + for (ConfigClassMeta configClassMetaInfo : keySet) { + ConfigClassMarkups configClassMarkup = configClassTreeMap.get(configClassMetaInfo); + Class<? extends HoodieConfig> subType = configClassMetaInfo.subType; + ConfigClassProperty configClassProperty = subType.getAnnotation(ConfigClassProperty.class); + ConfigGroups.Names groupName = configClassProperty.groupName(); + + /* + We need to handle an exceptiion for the ConfigGroup SPARK_DATASOURCE since HoodiePreCommitValidatorConfig that Review Comment: ```suggestion We need to handle an exception for the ConfigGroup SPARK_DATASOURCE since HoodiePreCommitValidatorConfig that ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org