[ https://issues.apache.org/jira/browse/SPARK-24839?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
marios iliofotou updated SPARK-24839: ------------------------------------- Description: The problem shows up when joining a column that has constant value. As seen from the exception in the logical plan, the literal column gets dropped, which results in joining two DF on a column that does not exist, which correctly results in a Cartesian join. {code:java} scala> val df1 = spark.createDataFrame(Seq((1, 2), (2, 4))).withColumn("index", lit("a")) scala> df1.show +---+---+-----+ | _1| _2|index| +---+---+-----+ | 1| 2| a| | 2| 4| a| +---+---+-----+ scala> val df2 = spark.createDataFrame(Seq(("a", 1),("b", 2))).toDF("index", "someval") scala> df2.show() +-----+-------+ |index|someval| +-----+-------+ | a| 1| | b| 2| +-----+-------+ scala> df1.join(df2).show() org.apache.spark.sql.AnalysisException: Detected implicit cartesian product for INNER join between logical plans LocalRelation [_1#370, _2#371|#370, _2#371] and LocalRelation [index#335, someval#336|#335, someval#336] Join condition is missing or trivial. Either: use the CROSS JOIN syntax to allow cartesian products between these relations, or: enable implicit cartesian products by setting the configuration variable spark.sql.crossJoin.enabled=true; at org.apache.spark.sql.catalyst.optimizer.CheckCartesianProducts$$anonfun$apply$21.applyOrElse(Optimizer.scala:1124) at org.apache.spark.sql.catalyst.optimizer.CheckCartesianProducts$$anonfun$apply$21.applyOrElse(Optimizer.scala:1121) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:267) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:267) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:266) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306) at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187) at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:272) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306) at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187) at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:272) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306) at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187) at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:272) at org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:256) at org.apache.spark.sql.catalyst.optimizer.CheckCartesianProducts$.apply(Optimizer.scala:1121) at org.apache.spark.sql.catalyst.optimizer.CheckCartesianProducts$.apply(Optimizer.scala:1103) at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:87) at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:84) at scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:57) at scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:66) at scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:35) at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:84) at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76) at org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:66) at org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:66) at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:72) at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:68) at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:77) at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:77) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3249) at org.apache.spark.sql.Dataset.head(Dataset.scala:2484) at org.apache.spark.sql.Dataset.take(Dataset.scala:2698) at org.apache.spark.sql.Dataset.showString(Dataset.scala:254) at org.apache.spark.sql.Dataset.show(Dataset.scala:723) at org.apache.spark.sql.Dataset.show(Dataset.scala:682) at org.apache.spark.sql.Dataset.show(Dataset.scala:691) ... 42 elided {code} Same error shows up even when we are not using a natural join: {code:java} df1.join(df2, df1("index") === df2("index") ).show(){code} Same if you change the joining order: {code:java} scala> df2.join(df1, df2("index") === df1("index"), "left_outer").show() org.apache.spark.sql.AnalysisException: Detected implicit cartesian product for LEFT OUTER join between logical plans LocalRelation [index#12, someval#13] and LocalRelation [_1#0, _2#1, index#4] Join condition is missing or trivial. Either: use the CROSS JOIN syntax to allow cartesian products between these relations, or: enable implicit cartesian products by setting the configuration variable spark.sql.crossJoin.enabled=true; {code} Same if you have the literal column to not match any of the keys in the second DataFrame: {code:java} scala> val df1 = spark.createDataFrame(Seq((1, 2), (2, 4))).withColumn("index", lit("x")) df1: org.apache.spark.sql.DataFrame = [_1: int, _2: int ... 1 more field] scala> val df2 = spark.createDataFrame(Seq(("a", 1),("b", 2))).toDF("index", "someval") df2: org.apache.spark.sql.DataFrame = [index: string, someval: int] scala> df2.join(df1, df2("index") === df1("index"), "left_outer").show() org.apache.spark.sql.AnalysisException: Detected implicit cartesian product for LEFT OUTER join between logical plans LocalRelation [index#142, someval#143] and LocalRelation [_1#130, _2#131, index#134] Join condition is missing or trivial. {code} was: The problem shows up when joining a column that has constant value. As seen from the exception in the logical plan, the literal column gets dropped, which results in joining two on column that does not exist, which correctly results in a Cartesian join. {code:java} scala> val df1 = spark.createDataFrame(Seq((1, 2), (2, 4))).withColumn("index", lit("a")) scala> df1.show +---+---+-----+ | _1| _2|index| +---+---+-----+ | 1| 2| a| | 2| 4| a| +---+---+-----+ scala> val df2 = spark.createDataFrame(Seq(("a", 1),("b", 2))).toDF("index", "someval") scala> df2.show() +-----+-------+ |index|someval| +-----+-------+ | a| 1| | b| 2| +-----+-------+ scala> df1.join(df2).show() org.apache.spark.sql.AnalysisException: Detected implicit cartesian product for INNER join between logical plans LocalRelation [_1#370, _2#371|#370, _2#371] and LocalRelation [index#335, someval#336|#335, someval#336] Join condition is missing or trivial. Either: use the CROSS JOIN syntax to allow cartesian products between these relations, or: enable implicit cartesian products by setting the configuration variable spark.sql.crossJoin.enabled=true; at org.apache.spark.sql.catalyst.optimizer.CheckCartesianProducts$$anonfun$apply$21.applyOrElse(Optimizer.scala:1124) at org.apache.spark.sql.catalyst.optimizer.CheckCartesianProducts$$anonfun$apply$21.applyOrElse(Optimizer.scala:1121) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:267) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:267) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:266) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306) at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187) at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:272) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306) at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187) at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:272) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306) at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187) at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:272) at org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:256) at org.apache.spark.sql.catalyst.optimizer.CheckCartesianProducts$.apply(Optimizer.scala:1121) at org.apache.spark.sql.catalyst.optimizer.CheckCartesianProducts$.apply(Optimizer.scala:1103) at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:87) at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:84) at scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:57) at scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:66) at scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:35) at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:84) at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76) at org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:66) at org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:66) at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:72) at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:68) at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:77) at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:77) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3249) at org.apache.spark.sql.Dataset.head(Dataset.scala:2484) at org.apache.spark.sql.Dataset.take(Dataset.scala:2698) at org.apache.spark.sql.Dataset.showString(Dataset.scala:254) at org.apache.spark.sql.Dataset.show(Dataset.scala:723) at org.apache.spark.sql.Dataset.show(Dataset.scala:682) at org.apache.spark.sql.Dataset.show(Dataset.scala:691) ... 42 elided {code} Same error shows up even when we are not using a natural join: {code:java} df1.join(df2, df1("index") === df2("index") ).show(){code} Same if you change the joining order: {code} scala> df2.join(df1, df2("index") === df1("index"), "left_outer").show() org.apache.spark.sql.AnalysisException: Detected implicit cartesian product for LEFT OUTER join between logical plans LocalRelation [index#12, someval#13] and LocalRelation [_1#0, _2#1, index#4] Join condition is missing or trivial. Either: use the CROSS JOIN syntax to allow cartesian products between these relations, or: enable implicit cartesian products by setting the configuration variable spark.sql.crossJoin.enabled=true; {code} Same if you have the literal column to not match any of the keys in the second DataFrame: {code} scala> val df1 = spark.createDataFrame(Seq((1, 2), (2, 4))).withColumn("index", lit("x")) df1: org.apache.spark.sql.DataFrame = [_1: int, _2: int ... 1 more field] scala> val df2 = spark.createDataFrame(Seq(("a", 1),("b", 2))).toDF("index", "someval") df2: org.apache.spark.sql.DataFrame = [index: string, someval: int] scala> df2.join(df1, df2("index") === df1("index"), "left_outer").show() org.apache.spark.sql.AnalysisException: Detected implicit cartesian product for LEFT OUTER join between logical plans LocalRelation [index#142, someval#143] and LocalRelation [_1#130, _2#131, index#134] Join condition is missing or trivial. {code} > Incorrect drop of lit() column results in cross join > ---------------------------------------------------- > > Key: SPARK-24839 > URL: https://issues.apache.org/jira/browse/SPARK-24839 > Project: Spark > Issue Type: Bug > Components: Optimizer > Affects Versions: 2.3.1 > Reporter: marios iliofotou > Priority: Major > > The problem shows up when joining a column that has constant value. As seen > from the exception in the logical plan, the literal column gets dropped, > which results in joining two DF on a column that does not exist, which > correctly results in a Cartesian join. > > {code:java} > scala> val df1 = spark.createDataFrame(Seq((1, 2), (2, > 4))).withColumn("index", lit("a")) > scala> df1.show > +---+---+-----+ > | _1| _2|index| > +---+---+-----+ > | 1| 2| a| > | 2| 4| a| > +---+---+-----+ > > scala> val df2 = spark.createDataFrame(Seq(("a", 1),("b", 2))).toDF("index", > "someval") > scala> df2.show() > +-----+-------+ > |index|someval| > +-----+-------+ > | a| 1| > | b| 2| > +-----+-------+ > scala> df1.join(df2).show() > org.apache.spark.sql.AnalysisException: Detected implicit cartesian product > for INNER join between logical plans > LocalRelation [_1#370, _2#371|#370, _2#371] > and > LocalRelation [index#335, someval#336|#335, someval#336] > Join condition is missing or trivial. > Either: use the CROSS JOIN syntax to allow cartesian products between these > relations, or: enable implicit cartesian products by setting the > configuration > variable spark.sql.crossJoin.enabled=true; > at > org.apache.spark.sql.catalyst.optimizer.CheckCartesianProducts$$anonfun$apply$21.applyOrElse(Optimizer.scala:1124) > at > org.apache.spark.sql.catalyst.optimizer.CheckCartesianProducts$$anonfun$apply$21.applyOrElse(Optimizer.scala:1121) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:267) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:267) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:266) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:272) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:272) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:272) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:272) > at org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:256) > at > org.apache.spark.sql.catalyst.optimizer.CheckCartesianProducts$.apply(Optimizer.scala:1121) > at > org.apache.spark.sql.catalyst.optimizer.CheckCartesianProducts$.apply(Optimizer.scala:1103) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:87) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:84) > at > scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:57) > at > scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:66) > at scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:35) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:84) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76) > at scala.collection.immutable.List.foreach(List.scala:392) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76) > at > org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:66) > at > org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:66) > at > org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:72) > at > org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:68) > at > org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:77) > at > org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:77) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3249) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2484) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2698) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:254) > at org.apache.spark.sql.Dataset.show(Dataset.scala:723) > at org.apache.spark.sql.Dataset.show(Dataset.scala:682) > at org.apache.spark.sql.Dataset.show(Dataset.scala:691) > ... 42 elided > {code} > > Same error shows up even when we are not using a natural join: > {code:java} > df1.join(df2, df1("index") === df2("index") ).show(){code} > > Same if you change the joining order: > > {code:java} > scala> df2.join(df1, df2("index") === df1("index"), "left_outer").show() > org.apache.spark.sql.AnalysisException: Detected implicit cartesian product > for LEFT OUTER join between logical plans > LocalRelation [index#12, someval#13] > and > LocalRelation [_1#0, _2#1, index#4] > Join condition is missing or trivial. > Either: use the CROSS JOIN syntax to allow cartesian products between these > relations, or: enable implicit cartesian products by setting the configuration > variable spark.sql.crossJoin.enabled=true; > {code} > > Same if you have the literal column to not match any of the keys in the > second DataFrame: > {code:java} > scala> val df1 = spark.createDataFrame(Seq((1, 2), (2, > 4))).withColumn("index", lit("x")) > df1: org.apache.spark.sql.DataFrame = [_1: int, _2: int ... 1 more field] > scala> val df2 = spark.createDataFrame(Seq(("a", 1),("b", 2))).toDF("index", > "someval") > df2: org.apache.spark.sql.DataFrame = [index: string, someval: int] > scala> df2.join(df1, df2("index") === df1("index"), "left_outer").show() > org.apache.spark.sql.AnalysisException: Detected implicit cartesian product > for LEFT OUTER join between logical plans > LocalRelation [index#142, someval#143] > and > LocalRelation [_1#130, _2#131, index#134] > Join condition is missing or trivial. > {code} > -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org