[ https://issues.apache.org/jira/browse/SPARK-21485?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Hyukjin Kwon updated SPARK-21485: --------------------------------- Description: It looks we can generate the documentation from {{ExpressionDescription}} and {{ExpressionInfo}} for Spark's SQL function documentation. I had some time to play with this so I just made a rough version - https://spark-test.github.io/sparksqldoc/ Codes I used are as below : In {{pyspark}} shell: {code} from collections import namedtuple ExpressionInfo = namedtuple("ExpressionInfo", "className usage name extended") jinfos = spark.sparkContext._jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listBuiltinFunctions() infos = [] for jinfo in jinfos: name = jinfo.getName() usage = jinfo.getUsage() usage = usage.replace("_FUNC_", name) if usage is not None else usage extended = jinfo.getExtended() extended = extended.replace("_FUNC_", name) if extended is not None else extended infos.append(ExpressionInfo( className=jinfo.getClassName(), usage=usage, name=name, extended=extended)) with open("index.md", 'w') as mdfile: strip = lambda s: "\n".join(map(lambda u: u.strip(), s.split("\n"))) for info in sorted(infos, key=lambda i: i.name): mdfile.write("### %s\n\n" % info.name) if info.usage is not None: mdfile.write("%s\n\n" % strip(info.usage)) if info.extended is not None: mdfile.write("```%s```\n\n" % strip(info.extended)) {code} This change had to be made first before running the codes above: {code:none} +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala @@ -17,9 +17,15 @@ package org.apache.spark.sql.api.python +import org.apache.spark.sql.catalyst.analysis.FunctionRegistry +import org.apache.spark.sql.catalyst.expressions.ExpressionInfo import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.types.DataType private[sql] object PythonSQLUtils { def parseDataType(typeText: String): DataType = CatalystSqlParser.parseDataType(typeText) + + def listBuiltinFunctions(): Array[ExpressionInfo] = { + FunctionRegistry.functionSet.flatMap(f => FunctionRegistry.builtin.lookupFunction(f)).toArray + } } {code} And then, I ran this: {code} mkdir docs echo "site_name: Spark SQL 2.3.0" >> mkdocs.yml echo "theme: readthedocs" >> mkdocs.yml mv index.md docs/index.md mkdocs serve {code} was: It looks we can generate the documentation from {{ExpressionDescription}} and {{ExpressionInfo}} for Spark's SQL function documentation. I had some time to play with this so I just made a rough version - https://spark-test.github.io/sparksqldoc/ Codes I used are as below : {code} from collections import namedtuple ExpressionInfo = namedtuple("ExpressionInfo", "className usage name extended") jinfos = spark.sparkContext._jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listBuiltinFunctions() infos = [] for jinfo in jinfos: name = jinfo.getName() usage = jinfo.getUsage() usage = usage.replace("_FUNC_", name) if usage is not None else usage extended = jinfo.getExtended() extended = extended.replace("_FUNC_", name) if extended is not None else extended infos.append(ExpressionInfo( className=jinfo.getClassName(), usage=usage, name=name, extended=extended)) with open("index.md", 'w') as mdfile: strip = lambda s: "\n".join(map(lambda u: u.strip(), s.split("\n"))) for info in sorted(infos, key=lambda i: i.name): mdfile.write("### %s\n\n" % info.name) if info.usage is not None: mdfile.write("%s\n\n" % strip(info.usage)) if info.extended is not None: mdfile.write("```%s```\n\n" % strip(info.extended)) {code} {code} +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala @@ -17,9 +17,15 @@ package org.apache.spark.sql.api.python +import org.apache.spark.sql.catalyst.analysis.FunctionRegistry +import org.apache.spark.sql.catalyst.expressions.ExpressionInfo import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.types.DataType private[sql] object PythonSQLUtils { def parseDataType(typeText: String): DataType = CatalystSqlParser.parseDataType(typeText) + + def listBuiltinFunctions(): Array[ExpressionInfo] = { + FunctionRegistry.functionSet.flatMap(f => FunctionRegistry.builtin.lookupFunction(f)).toArray + } } {code} {code} mkdir docs echo "site_name: Spark SQL 2.3.0" >> mkdocs.yml echo "theme: readthedocs" >> mkdocs.yml mv index.md docs/index.md mkdocs serve {code} > API Documentation for Spark SQL functions > ----------------------------------------- > > Key: SPARK-21485 > URL: https://issues.apache.org/jira/browse/SPARK-21485 > Project: Spark > Issue Type: Documentation > Components: Documentation, SQL > Affects Versions: 2.3.0 > Reporter: Hyukjin Kwon > > It looks we can generate the documentation from {{ExpressionDescription}} and > {{ExpressionInfo}} for Spark's SQL function documentation. > I had some time to play with this so I just made a rough version - > https://spark-test.github.io/sparksqldoc/ > Codes I used are as below : > In {{pyspark}} shell: > {code} > from collections import namedtuple > ExpressionInfo = namedtuple("ExpressionInfo", "className usage name extended") > jinfos = > spark.sparkContext._jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listBuiltinFunctions() > infos = [] > for jinfo in jinfos: > name = jinfo.getName() > usage = jinfo.getUsage() > usage = usage.replace("_FUNC_", name) if usage is not None else usage > extended = jinfo.getExtended() > extended = extended.replace("_FUNC_", name) if extended is not None else > extended > infos.append(ExpressionInfo( > className=jinfo.getClassName(), > usage=usage, > name=name, > extended=extended)) > with open("index.md", 'w') as mdfile: > strip = lambda s: "\n".join(map(lambda u: u.strip(), s.split("\n"))) > for info in sorted(infos, key=lambda i: i.name): > mdfile.write("### %s\n\n" % info.name) > if info.usage is not None: > mdfile.write("%s\n\n" % strip(info.usage)) > if info.extended is not None: > mdfile.write("```%s```\n\n" % strip(info.extended)) > {code} > This change had to be made first before running the codes above: > {code:none} > +++ > b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala > @@ -17,9 +17,15 @@ > package org.apache.spark.sql.api.python > +import org.apache.spark.sql.catalyst.analysis.FunctionRegistry > +import org.apache.spark.sql.catalyst.expressions.ExpressionInfo > import org.apache.spark.sql.catalyst.parser.CatalystSqlParser > import org.apache.spark.sql.types.DataType > private[sql] object PythonSQLUtils { > def parseDataType(typeText: String): DataType = > CatalystSqlParser.parseDataType(typeText) > + > + def listBuiltinFunctions(): Array[ExpressionInfo] = { > + FunctionRegistry.functionSet.flatMap(f => > FunctionRegistry.builtin.lookupFunction(f)).toArray > + } > } > {code} > And then, I ran this: > {code} > mkdir docs > echo "site_name: Spark SQL 2.3.0" >> mkdocs.yml > echo "theme: readthedocs" >> mkdocs.yml > mv index.md docs/index.md > mkdocs serve > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org