[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-14 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334569158
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherSession.scala
 ##
 @@ -0,0 +1,285 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.slf4j.LoggerFactory
+
+import org.apache.spark.annotation.Evolving
+import org.apache.spark.sql.{Dataset, Row, SparkSession}
+import org.apache.spark.sql.types.{BooleanType, StructType}
+
+/**
+ * Contains constants used for convention based column naming.
+ */
+@Evolving
+object CypherSession {
+
+  /**
+   * Naming convention for identifier columns, both node and relationship 
identifiers.
+   */
+  val ID_COLUMN = "$ID"
+
+  /**
+   * Naming convention for relationship source identifier.
+   */
+  val SOURCE_ID_COLUMN = "$SOURCE_ID"
+
+  /**
+   * Naming convention for relationship target identifier.
+   */
+  val TARGET_ID_COLUMN = "$TARGET_ID"
+
+  /**
+   * Naming convention both for node label and relationship type prefixes.
+   */
+  val LABEL_COLUMN_PREFIX = ":"
+
+  /**
+   * Extracts [[NodeDataset]]s from a [[Dataset]] using column name 
conventions.
+   *
+   * For information about naming conventions, see 
[[CypherSession.createGraph]].
+   *
+   * @param nodes node dataset
+   * @since 3.0.0
+   */
+  def extractNodeDataset(nodes: Dataset[Row]): Set[NodeDataset] = {
+val labelColumns = 
nodes.columns.filter(_.startsWith(LABEL_COLUMN_PREFIX)).toSet
+validateLabelColumns(nodes.schema, labelColumns)
+
+val nodeProperties = (nodes.columns.toSet - ID_COLUMN -- labelColumns)
+  .map(col => col -> col)
+  .toMap
+
+val labelCount = labelColumns.size
+if (labelCount > 5) {
+  LoggerFactory.getLogger(CypherSession.getClass).warn(
+s"$labelCount label columns will result in ${Math.pow(labelCount, 2)} 
node frames.")
+  if (labelCount > 10) {
+throw new IllegalArgumentException(
+  s"Expected number of label columns to be less than or equal to 10, 
was $labelCount.")
+  }
+}
+
+val labelSets = labelColumns.subsets().toSet
+
+labelSets.map { labelSet =>
+  val predicate = labelColumns
+.map { labelColumn =>
+  if (labelSet.contains(labelColumn)) {
+nodes.col(labelColumn)
+  } else {
+!nodes.col(labelColumn)
+  }
+}
+.reduce(_ && _)
+
+  NodeDataset(nodes.filter(predicate), ID_COLUMN, 
labelSet.map(_.substring(1)), nodeProperties)
+}
+  }
+
+  /**
+   * Extracts [[RelationshipDataset]]s from a [[Dataset]] using column name 
conventions.
+   *
+   * For information about naming conventions, see 
[[CypherSession.createGraph]].
+   *
+   * @param relationships relationship dataset
+   * @since 3.0.0
+   */
+  def extractRelationshipDataset(relationships: Dataset[Row]): 
Set[RelationshipDataset] = {
 
 Review comment:
   +1 for your suggestion, `extractRelationshipDataset` -> 
`extractRelationshipDatasets`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-14 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334569057
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherSession.scala
 ##
 @@ -0,0 +1,285 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.slf4j.LoggerFactory
+
+import org.apache.spark.annotation.Evolving
+import org.apache.spark.sql.{Dataset, Row, SparkSession}
+import org.apache.spark.sql.types.{BooleanType, StructType}
+
+/**
+ * Contains constants used for convention based column naming.
+ */
+@Evolving
+object CypherSession {
+
+  /**
+   * Naming convention for identifier columns, both node and relationship 
identifiers.
+   */
+  val ID_COLUMN = "$ID"
+
+  /**
+   * Naming convention for relationship source identifier.
+   */
+  val SOURCE_ID_COLUMN = "$SOURCE_ID"
+
+  /**
+   * Naming convention for relationship target identifier.
+   */
+  val TARGET_ID_COLUMN = "$TARGET_ID"
+
+  /**
+   * Naming convention both for node label and relationship type prefixes.
+   */
+  val LABEL_COLUMN_PREFIX = ":"
+
+  /**
+   * Extracts [[NodeDataset]]s from a [[Dataset]] using column name 
conventions.
+   *
+   * For information about naming conventions, see 
[[CypherSession.createGraph]].
+   *
+   * @param nodes node dataset
+   * @since 3.0.0
+   */
+  def extractNodeDataset(nodes: Dataset[Row]): Set[NodeDataset] = {
 
 Review comment:
   +1 for your suggestion. `extractNodeDataset` -> `extractNodeDatasets`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334068831
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherSession.scala
 ##
 @@ -0,0 +1,258 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{Dataset, Row, SparkSession}
+import org.apache.spark.sql.types.{BooleanType, StructType}
+
+/**
+ * Contains constants used for convention based column naming.
+ */
+object CypherSession {
+
+  /**
+   * Naming convention for identifier columns, both node and relationship 
identifiers.
+   */
+  val ID_COLUMN = "$ID"
+
+  /**
+   * Naming convention for relationship source identifier.
+   */
+  val SOURCE_ID_COLUMN = "$SOURCE_ID"
+
+  /**
+   * Naming convention for relationship target identifier.
+   */
+  val TARGET_ID_COLUMN = "$TARGET_ID"
+
+  /**
+   * Naming convention both for node label and relationship type prefixes.
+   */
+  val LABEL_COLUMN_PREFIX = ":"
+}
+
+/**
+ * A CypherSession allows for creating, storing and loading [[PropertyGraph]] 
instances as well as
+ * executing Cypher queries on them.
+ *
+ * Wraps a [[org.apache.spark.sql.SparkSession]].
+ *
+ * @since 3.0.0
+ */
+trait CypherSession extends Logging {
+
+  def sparkSession: SparkSession
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @param graph [[PropertyGraph]] on which the query is executed
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * Note that queries can take optional parameters:
+   *
+   * {{{
+   * Parameters:
+   *
+   * {
+   *"name" : "Alice"
+   * }
+   *
+   * Query:
+   *
+   * MATCH (n:Person)
+   * WHERE n.name = $name
+   * RETURN n
+   * }}}
+   *
+   * @param graph  [[PropertyGraph]] on which the query is executed
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String, parameters: Map[String, 
Any]): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * Note that queries can take optional parameters:
+   *
+   * {{{
+   * Parameters:
+   *
+   * {
+   *"name" : "Alice"
+   * }
+   *
+   * Query:
+   *
+   * MATCH (n:Person)
+   * WHERE n.name = $name
+   * RETURN n
+   * }}}
+   *
+   * @param graph  [[PropertyGraph]] on which the query is executed
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(
+  graph: PropertyGraph,
+  query: String,
+  parameters: java.util.Map[String, Object]): CypherResult = {
+cypher(graph, query, parameters.asScala.toMap)
+  }
+
+  /**
+   * Creates a [[PropertyGraph]] from a sequence of [[NodeFrame]]s and 
[[RelationshipFrame]]s.
+   * At least one [[NodeFrame]] has to be provided.
+   *
+   * For each label set and relationship type there can be at most one 
[[NodeFrame]] and at most one
+   * [[RelationshipFrame]], respectively.
+   *
+   * @param nodes NodeFrames that define the nodes in the graph
+   * @param relationships RelationshipFrames that define the relationships in 
the graph
+   * @since 3.0.0
+   */
+  def createGraph(nodes: Array[NodeFrame], relationships: 
Array[RelationshipFrame]): PropertyGraph
+
+  /**
+   * Creates a [[PropertyGraph]] from nodes and relationships.
+   *
+   * The given dataset needs to adhere to the following column naming 
conventions:
+   *
+   * {{{
+   * Id column:`$ID`(nodes and relationships)
+   * SourceId column:  `$SOURCE_ID` (relationships)
+   * TargetId column:  `$TARGET_ID` (relationships)
+   *
+   * Label columns:`:{LABEL_NAME}`  (nodes)
+   * RelType columns:  

[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334067613
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/PropertyGraph.scala
 ##
 @@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * A Property Graph as defined by the openCypher Property Graph Data Model.
+ *
+ * A graph is always tied to and managed by a [[CypherSession]].
+ * The lifetime of a graph is bound by the session lifetime.
+ *
+ * @see http://www.opencypher.org/;>openCypher project
 
 Review comment:
   Thanks. Than, it's okay! We can keep it.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334041482
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/RelationshipFrameBuilder.scala
 ##
 @@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Interface used to build a [[RelationshipFrame]].
+ *
+ * @param df DataFrame containing a single relationship in each row
+ * @since 3.0.0
+ */
+final class RelationshipFrameBuilder(val df: Dataset[Row]) {
 
 Review comment:
   `var df:` -> `var ds:`


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334041397
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/RelationshipFrameBuilder.scala
 ##
 @@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Interface used to build a [[RelationshipFrame]].
+ *
+ * @param df DataFrame containing a single relationship in each row
 
 Review comment:
   `@param df` -> `@param ds`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334041260
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/RelationshipFrame.scala
 ##
 @@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Describes how to map a DataFrame to relationships.
+ *
+ * Each row in the DataFrame represents a relationship with the given 
relationship type.
+ *
+ * @param df   DataFrame containing a single relationship in each 
row
+ * @param idColumn column that contains the relationship identifier
+ * @param sourceIdColumn   column that contains the source node identifier of 
the relationship
+ * @param targetIdColumn   column that contains the target node identifier of 
the relationship
+ * @param relationshipType relationship type that is assigned to all 
relationships
+ * @param properties   mapping from property keys to corresponding columns
+ * @since 3.0.0
+ */
+case class RelationshipFrame private[graph] (
+df: Dataset[Row],
 
 Review comment:
   `df:` -> `ds:`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334040428
 
 

 ##
 File path: graph/api/src/main/scala/org/apache/spark/graph/api/NodeFrame.scala
 ##
 @@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Describes how to map a DataFrame to nodes.
+ *
+ * Each row in the DataFrame represents a node which has exactly the labels 
defined by the given
+ * label set.
+ *
+ * @param df DataFrame containing a single node in each row
+ * @param idColumn   column that contains the node identifier
+ * @param labelSet   labels that are assigned to all nodes
+ * @param properties mapping from property keys to corresponding columns
+ * @since 3.0.0
+ */
+case class NodeFrame private[graph] (
+df: Dataset[Row],
 
 Review comment:
   `df:` -> `ds:`


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334040579
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/NodeFrameBuilder.scala
 ##
 @@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Interface used to build a [[NodeFrame]].
+ *
+ * @param df DataFrame containing a single node in each row
 
 Review comment:
   `@param df` -> `@param ds`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334040710
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/NodeFrameBuilder.scala
 ##
 @@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Interface used to build a [[NodeFrame]].
+ *
+ * @param df DataFrame containing a single node in each row
+ * @since 3.0.0
+ */
+final class NodeFrameBuilder(var df: Dataset[Row]) {
 
 Review comment:
   `var df:` -> `var ds:`


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334040332
 
 

 ##
 File path: graph/api/src/main/scala/org/apache/spark/graph/api/NodeFrame.scala
 ##
 @@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Describes how to map a DataFrame to nodes.
+ *
+ * Each row in the DataFrame represents a node which has exactly the labels 
defined by the given
+ * label set.
+ *
+ * @param df DataFrame containing a single node in each row
 
 Review comment:
   `@param df` -> `@param ds`


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334040201
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/GraphElementFrame.scala
 ##
 @@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * A [[PropertyGraph]] is created from GraphElementFrames.
+ *
+ * A graph element is either a node or a relationship.
+ * A GraphElementFrame wraps a DataFrame and describes how it maps to graph 
elements.
+ *
+ * @since 3.0.0
+ */
+abstract class GraphElementFrame {
+
+  /**
+   * Initial DataFrame that can still contain unmapped, arbitrarily ordered 
columns.
+   *
+   * @since 3.0.0
+   */
+  def df: Dataset[Row]
 
 Review comment:
   `def df` -> `def ds`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334039878
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherResult.scala
 ##
 @@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Result of a Cypher query.
+ *
+ * Wraps a DataFrame that contains the result rows.
+ *
+ * @since 3.0.0
+ */
+sealed trait CypherResult {
+  // Note that representing the CypherResult as a trait allows for future 
extensions
+  // (e.g. returning graphs in addition to tables).
+
+  /**
+   * Contains the result rows.
+   *
+   * The column names are aligned with the return item names specified within 
the Cypher query,
+   * (e.g. `RETURN foo, bar AS baz` results in the columns `foo` and `baz`).
+   *
+   * @note Dot characters (i.e. `.`) within return item names are replaced by 
an underscore (`_`),
+   *   (e.g. `MATCH (n:Person) RETURN n` results in the columns `n`, 
`n:Person` and `n_name`).
+   * @since 3.0.0
+   */
+  def df: Dataset[Row]
 
 Review comment:
   `df` -> `ds`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334039302
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/RelationshipFrameBuilder.scala
 ##
 @@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Interface used to build a [[RelationshipFrame]].
+ *
+ * @param df DataFrame containing a single relationship in each row
 
 Review comment:
   `DataFrame` -> `Dataset`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334039032
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/RelationshipFrame.scala
 ##
 @@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Describes how to map a DataFrame to relationships.
+ *
+ * Each row in the DataFrame represents a relationship with the given 
relationship type.
 
 Review comment:
   `DataFrame` -> `Dataset`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334039113
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/RelationshipFrame.scala
 ##
 @@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Describes how to map a DataFrame to relationships.
+ *
+ * Each row in the DataFrame represents a relationship with the given 
relationship type.
+ *
+ * @param df   DataFrame containing a single relationship in each 
row
 
 Review comment:
   `DataFrame` -> `Dataset`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334038844
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/GraphElementFrame.scala
 ##
 @@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * A [[PropertyGraph]] is created from GraphElementFrames.
+ *
+ * A graph element is either a node or a relationship.
+ * A GraphElementFrame wraps a DataFrame and describes how it maps to graph 
elements.
 
 Review comment:
   `DataFrame` -> `Dataset`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334033319
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/GraphElementFrame.scala
 ##
 @@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * A [[PropertyGraph]] is created from GraphElementFrames.
+ *
+ * A graph element is either a node or a relationship.
+ * A GraphElementFrame wraps a DataFrame and describes how it maps to graph 
elements.
+ *
+ * @since 3.0.0
+ */
+abstract class GraphElementFrame {
+
+  /**
+   * Initial DataFrame that can still contain unmapped, arbitrarily ordered 
columns.
 
 Review comment:
   `DataFrame` -> `Dataset`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334027537
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherResult.scala
 ##
 @@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Result of a Cypher query.
+ *
+ * Wraps a DataFrame that contains the result rows.
 
 Review comment:
   `DataFrame` -> `Dataset`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334033463
 
 

 ##
 File path: graph/api/src/main/scala/org/apache/spark/graph/api/NodeFrame.scala
 ##
 @@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Describes how to map a DataFrame to nodes.
 
 Review comment:
   `DataFrame` -> `Dataset`


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334033601
 
 

 ##
 File path: graph/api/src/main/scala/org/apache/spark/graph/api/NodeFrame.scala
 ##
 @@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Describes how to map a DataFrame to nodes.
+ *
+ * Each row in the DataFrame represents a node which has exactly the labels 
defined by the given
 
 Review comment:
   `DataFrame` -> `Dataset`


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334037084
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/PropertyGraph.scala
 ##
 @@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * A Property Graph as defined by the openCypher Property Graph Data Model.
+ *
+ * A graph is always tied to and managed by a [[CypherSession]].
+ * The lifetime of a graph is bound by the session lifetime.
+ *
+ * @see http://www.opencypher.org/;>openCypher project
+ * @see https://dl.acm.org/citation.cfm?id=3183713.3190657;>Property 
Graph Model
+ * @since 3.0.0
+ */
+abstract class PropertyGraph {
+
+  /**
+   * The schema (graph type) describes the structure of this graph.
+   *
+   * @since 3.0.0
+   */
+  def schema: PropertyGraphType
+
+  /**
+   * The session in which this graph is managed.
+   *
+   * @since 3.0.0
+   */
+  def cypherSession: CypherSession
+
+  /**
+   * Executes a Cypher query in the session that manages this graph, using 
this graph as
+   * the input graph.
+   *
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(query: String): CypherResult = cypher(query, Map.empty[String, 
Any])
+
+  /**
+   * Executes a Cypher query in the session that manages this graph, using 
this graph as
+   * the input graph.
+   *
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(query: String, parameters: Map[String, Any]): CypherResult =
+cypherSession.cypher(this, query, parameters)
+
+  /**
+   * Executes a Cypher query in the [[CypherSession]] that manages this graph, 
using this graph as
+   * the input graph.
+   *
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(query: String, parameters: java.util.Map[String, Object]): 
CypherResult =
+cypherSession.cypher(this, query, parameters)
+
+  /**
+   * Returns the [[NodeFrame]] for a given node label set.
+   *
+   * @param labelSet Label set used for NodeFrame lookup
+   * @return NodeFrame for the given label set
+   * @since 3.0.0
+   */
+  def nodeFrame(labelSet: Array[String]): NodeFrame
+
+  /**
+   * Returns the [[RelationshipFrame]] for a given relationship type.
+   *
+   * @param relationshipType Relationship type used for RelationshipFrame 
lookup
+   * @return RelationshipFrame for the given relationship type
+   * @since 3.0.0
+   */
+  def relationshipFrame(relationshipType: String): RelationshipFrame
+
+  /**
+   * Returns a DataFrame that contains a row for each node in this graph.
+   *
+   * The DataFrame adheres to the following column naming conventions:
 
 Review comment:
   `DataFrame` -> `Dataset`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334036998
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/PropertyGraph.scala
 ##
 @@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * A Property Graph as defined by the openCypher Property Graph Data Model.
+ *
+ * A graph is always tied to and managed by a [[CypherSession]].
+ * The lifetime of a graph is bound by the session lifetime.
+ *
+ * @see http://www.opencypher.org/;>openCypher project
+ * @see https://dl.acm.org/citation.cfm?id=3183713.3190657;>Property 
Graph Model
+ * @since 3.0.0
+ */
+abstract class PropertyGraph {
+
+  /**
+   * The schema (graph type) describes the structure of this graph.
+   *
+   * @since 3.0.0
+   */
+  def schema: PropertyGraphType
+
+  /**
+   * The session in which this graph is managed.
+   *
+   * @since 3.0.0
+   */
+  def cypherSession: CypherSession
+
+  /**
+   * Executes a Cypher query in the session that manages this graph, using 
this graph as
+   * the input graph.
+   *
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(query: String): CypherResult = cypher(query, Map.empty[String, 
Any])
+
+  /**
+   * Executes a Cypher query in the session that manages this graph, using 
this graph as
+   * the input graph.
+   *
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(query: String, parameters: Map[String, Any]): CypherResult =
+cypherSession.cypher(this, query, parameters)
+
+  /**
+   * Executes a Cypher query in the [[CypherSession]] that manages this graph, 
using this graph as
+   * the input graph.
+   *
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(query: String, parameters: java.util.Map[String, Object]): 
CypherResult =
+cypherSession.cypher(this, query, parameters)
+
+  /**
+   * Returns the [[NodeFrame]] for a given node label set.
+   *
+   * @param labelSet Label set used for NodeFrame lookup
+   * @return NodeFrame for the given label set
+   * @since 3.0.0
+   */
+  def nodeFrame(labelSet: Array[String]): NodeFrame
+
+  /**
+   * Returns the [[RelationshipFrame]] for a given relationship type.
+   *
+   * @param relationshipType Relationship type used for RelationshipFrame 
lookup
+   * @return RelationshipFrame for the given relationship type
+   * @since 3.0.0
+   */
+  def relationshipFrame(relationshipType: String): RelationshipFrame
+
+  /**
+   * Returns a DataFrame that contains a row for each node in this graph.
 
 Review comment:
   `DataFrame` -> `Dataset`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334037192
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/PropertyGraph.scala
 ##
 @@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * A Property Graph as defined by the openCypher Property Graph Data Model.
+ *
+ * A graph is always tied to and managed by a [[CypherSession]].
+ * The lifetime of a graph is bound by the session lifetime.
+ *
+ * @see http://www.opencypher.org/;>openCypher project
+ * @see https://dl.acm.org/citation.cfm?id=3183713.3190657;>Property 
Graph Model
+ * @since 3.0.0
+ */
+abstract class PropertyGraph {
+
+  /**
+   * The schema (graph type) describes the structure of this graph.
+   *
+   * @since 3.0.0
+   */
+  def schema: PropertyGraphType
+
+  /**
+   * The session in which this graph is managed.
+   *
+   * @since 3.0.0
+   */
+  def cypherSession: CypherSession
+
+  /**
+   * Executes a Cypher query in the session that manages this graph, using 
this graph as
+   * the input graph.
+   *
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(query: String): CypherResult = cypher(query, Map.empty[String, 
Any])
+
+  /**
+   * Executes a Cypher query in the session that manages this graph, using 
this graph as
+   * the input graph.
+   *
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(query: String, parameters: Map[String, Any]): CypherResult =
+cypherSession.cypher(this, query, parameters)
+
+  /**
+   * Executes a Cypher query in the [[CypherSession]] that manages this graph, 
using this graph as
+   * the input graph.
+   *
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(query: String, parameters: java.util.Map[String, Object]): 
CypherResult =
+cypherSession.cypher(this, query, parameters)
+
+  /**
+   * Returns the [[NodeFrame]] for a given node label set.
+   *
+   * @param labelSet Label set used for NodeFrame lookup
+   * @return NodeFrame for the given label set
+   * @since 3.0.0
+   */
+  def nodeFrame(labelSet: Array[String]): NodeFrame
+
+  /**
+   * Returns the [[RelationshipFrame]] for a given relationship type.
+   *
+   * @param relationshipType Relationship type used for RelationshipFrame 
lookup
+   * @return RelationshipFrame for the given relationship type
+   * @since 3.0.0
+   */
+  def relationshipFrame(relationshipType: String): RelationshipFrame
+
+  /**
+   * Returns a DataFrame that contains a row for each node in this graph.
+   *
+   * The DataFrame adheres to the following column naming conventions:
+   *
+   * {{{
+   * Id column:`$ID`
+   * Label columns:`:{LABEL_NAME}`
+   * Property columns: `{Property_Key}`
+   * }}}
+   *
+   * @see `org.apache.spark.graph.api.CypherSession.createGraph(nodes, 
relationships)`
+   * @since 3.0.0
+   */
+  def nodes: Dataset[Row]
+
+  /**
+   * Returns a DataFrame that contains a row for each relationship in this
 
 Review comment:
   `DataFrame` -> `Dataset`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334033749
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/NodeFrameBuilder.scala
 ##
 @@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Interface used to build a [[NodeFrame]].
+ *
+ * @param df DataFrame containing a single node in each row
 
 Review comment:
   `DataFrame` -> `Dataset`


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334033633
 
 

 ##
 File path: graph/api/src/main/scala/org/apache/spark/graph/api/NodeFrame.scala
 ##
 @@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Describes how to map a DataFrame to nodes.
+ *
+ * Each row in the DataFrame represents a node which has exactly the labels 
defined by the given
+ * label set.
+ *
+ * @param df DataFrame containing a single node in each row
 
 Review comment:
   `DataFrame` -> `Dataset`


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334037273
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/PropertyGraph.scala
 ##
 @@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * A Property Graph as defined by the openCypher Property Graph Data Model.
+ *
+ * A graph is always tied to and managed by a [[CypherSession]].
+ * The lifetime of a graph is bound by the session lifetime.
+ *
+ * @see http://www.opencypher.org/;>openCypher project
+ * @see https://dl.acm.org/citation.cfm?id=3183713.3190657;>Property 
Graph Model
+ * @since 3.0.0
+ */
+abstract class PropertyGraph {
+
+  /**
+   * The schema (graph type) describes the structure of this graph.
+   *
+   * @since 3.0.0
+   */
+  def schema: PropertyGraphType
+
+  /**
+   * The session in which this graph is managed.
+   *
+   * @since 3.0.0
+   */
+  def cypherSession: CypherSession
+
+  /**
+   * Executes a Cypher query in the session that manages this graph, using 
this graph as
+   * the input graph.
+   *
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(query: String): CypherResult = cypher(query, Map.empty[String, 
Any])
+
+  /**
+   * Executes a Cypher query in the session that manages this graph, using 
this graph as
+   * the input graph.
+   *
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(query: String, parameters: Map[String, Any]): CypherResult =
+cypherSession.cypher(this, query, parameters)
+
+  /**
+   * Executes a Cypher query in the [[CypherSession]] that manages this graph, 
using this graph as
+   * the input graph.
+   *
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(query: String, parameters: java.util.Map[String, Object]): 
CypherResult =
+cypherSession.cypher(this, query, parameters)
+
+  /**
+   * Returns the [[NodeFrame]] for a given node label set.
+   *
+   * @param labelSet Label set used for NodeFrame lookup
+   * @return NodeFrame for the given label set
+   * @since 3.0.0
+   */
+  def nodeFrame(labelSet: Array[String]): NodeFrame
+
+  /**
+   * Returns the [[RelationshipFrame]] for a given relationship type.
+   *
+   * @param relationshipType Relationship type used for RelationshipFrame 
lookup
+   * @return RelationshipFrame for the given relationship type
+   * @since 3.0.0
+   */
+  def relationshipFrame(relationshipType: String): RelationshipFrame
+
+  /**
+   * Returns a DataFrame that contains a row for each node in this graph.
+   *
+   * The DataFrame adheres to the following column naming conventions:
+   *
+   * {{{
+   * Id column:`$ID`
+   * Label columns:`:{LABEL_NAME}`
+   * Property columns: `{Property_Key}`
+   * }}}
+   *
+   * @see `org.apache.spark.graph.api.CypherSession.createGraph(nodes, 
relationships)`
+   * @since 3.0.0
+   */
+  def nodes: Dataset[Row]
+
+  /**
+   * Returns a DataFrame that contains a row for each relationship in this
+   * graph.
+   *
+   * The DataFrame adheres to column naming conventions:
 
 Review comment:
   `DataFrame` -> `Dataset`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334037966
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/RelationshipFrame.scala
 ##
 @@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Describes how to map a DataFrame to relationships.
 
 Review comment:
   `DataFrame` -> `Dataset`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334037273
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/PropertyGraph.scala
 ##
 @@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * A Property Graph as defined by the openCypher Property Graph Data Model.
+ *
+ * A graph is always tied to and managed by a [[CypherSession]].
+ * The lifetime of a graph is bound by the session lifetime.
+ *
+ * @see http://www.opencypher.org/;>openCypher project
+ * @see https://dl.acm.org/citation.cfm?id=3183713.3190657;>Property 
Graph Model
+ * @since 3.0.0
+ */
+abstract class PropertyGraph {
+
+  /**
+   * The schema (graph type) describes the structure of this graph.
+   *
+   * @since 3.0.0
+   */
+  def schema: PropertyGraphType
+
+  /**
+   * The session in which this graph is managed.
+   *
+   * @since 3.0.0
+   */
+  def cypherSession: CypherSession
+
+  /**
+   * Executes a Cypher query in the session that manages this graph, using 
this graph as
+   * the input graph.
+   *
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(query: String): CypherResult = cypher(query, Map.empty[String, 
Any])
+
+  /**
+   * Executes a Cypher query in the session that manages this graph, using 
this graph as
+   * the input graph.
+   *
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(query: String, parameters: Map[String, Any]): CypherResult =
+cypherSession.cypher(this, query, parameters)
+
+  /**
+   * Executes a Cypher query in the [[CypherSession]] that manages this graph, 
using this graph as
+   * the input graph.
+   *
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(query: String, parameters: java.util.Map[String, Object]): 
CypherResult =
+cypherSession.cypher(this, query, parameters)
+
+  /**
+   * Returns the [[NodeFrame]] for a given node label set.
+   *
+   * @param labelSet Label set used for NodeFrame lookup
+   * @return NodeFrame for the given label set
+   * @since 3.0.0
+   */
+  def nodeFrame(labelSet: Array[String]): NodeFrame
+
+  /**
+   * Returns the [[RelationshipFrame]] for a given relationship type.
+   *
+   * @param relationshipType Relationship type used for RelationshipFrame 
lookup
+   * @return RelationshipFrame for the given relationship type
+   * @since 3.0.0
+   */
+  def relationshipFrame(relationshipType: String): RelationshipFrame
+
+  /**
+   * Returns a DataFrame that contains a row for each node in this graph.
+   *
+   * The DataFrame adheres to the following column naming conventions:
+   *
+   * {{{
+   * Id column:`$ID`
+   * Label columns:`:{LABEL_NAME}`
+   * Property columns: `{Property_Key}`
+   * }}}
+   *
+   * @see `org.apache.spark.graph.api.CypherSession.createGraph(nodes, 
relationships)`
+   * @since 3.0.0
+   */
+  def nodes: Dataset[Row]
+
+  /**
+   * Returns a DataFrame that contains a row for each relationship in this
+   * graph.
+   *
+   * The DataFrame adheres to column naming conventions:
 
 Review comment:
   `DataFrame` -> `DataSet`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334037192
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/PropertyGraph.scala
 ##
 @@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * A Property Graph as defined by the openCypher Property Graph Data Model.
+ *
+ * A graph is always tied to and managed by a [[CypherSession]].
+ * The lifetime of a graph is bound by the session lifetime.
+ *
+ * @see http://www.opencypher.org/;>openCypher project
+ * @see https://dl.acm.org/citation.cfm?id=3183713.3190657;>Property 
Graph Model
+ * @since 3.0.0
+ */
+abstract class PropertyGraph {
+
+  /**
+   * The schema (graph type) describes the structure of this graph.
+   *
+   * @since 3.0.0
+   */
+  def schema: PropertyGraphType
+
+  /**
+   * The session in which this graph is managed.
+   *
+   * @since 3.0.0
+   */
+  def cypherSession: CypherSession
+
+  /**
+   * Executes a Cypher query in the session that manages this graph, using 
this graph as
+   * the input graph.
+   *
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(query: String): CypherResult = cypher(query, Map.empty[String, 
Any])
+
+  /**
+   * Executes a Cypher query in the session that manages this graph, using 
this graph as
+   * the input graph.
+   *
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(query: String, parameters: Map[String, Any]): CypherResult =
+cypherSession.cypher(this, query, parameters)
+
+  /**
+   * Executes a Cypher query in the [[CypherSession]] that manages this graph, 
using this graph as
+   * the input graph.
+   *
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(query: String, parameters: java.util.Map[String, Object]): 
CypherResult =
+cypherSession.cypher(this, query, parameters)
+
+  /**
+   * Returns the [[NodeFrame]] for a given node label set.
+   *
+   * @param labelSet Label set used for NodeFrame lookup
+   * @return NodeFrame for the given label set
+   * @since 3.0.0
+   */
+  def nodeFrame(labelSet: Array[String]): NodeFrame
+
+  /**
+   * Returns the [[RelationshipFrame]] for a given relationship type.
+   *
+   * @param relationshipType Relationship type used for RelationshipFrame 
lookup
+   * @return RelationshipFrame for the given relationship type
+   * @since 3.0.0
+   */
+  def relationshipFrame(relationshipType: String): RelationshipFrame
+
+  /**
+   * Returns a DataFrame that contains a row for each node in this graph.
+   *
+   * The DataFrame adheres to the following column naming conventions:
+   *
+   * {{{
+   * Id column:`$ID`
+   * Label columns:`:{LABEL_NAME}`
+   * Property columns: `{Property_Key}`
+   * }}}
+   *
+   * @see `org.apache.spark.graph.api.CypherSession.createGraph(nodes, 
relationships)`
+   * @since 3.0.0
+   */
+  def nodes: Dataset[Row]
+
+  /**
+   * Returns a DataFrame that contains a row for each relationship in this
 
 Review comment:
   `DataFrame` -> `DataSet`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334036998
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/PropertyGraph.scala
 ##
 @@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * A Property Graph as defined by the openCypher Property Graph Data Model.
+ *
+ * A graph is always tied to and managed by a [[CypherSession]].
+ * The lifetime of a graph is bound by the session lifetime.
+ *
+ * @see http://www.opencypher.org/;>openCypher project
+ * @see https://dl.acm.org/citation.cfm?id=3183713.3190657;>Property 
Graph Model
+ * @since 3.0.0
+ */
+abstract class PropertyGraph {
+
+  /**
+   * The schema (graph type) describes the structure of this graph.
+   *
+   * @since 3.0.0
+   */
+  def schema: PropertyGraphType
+
+  /**
+   * The session in which this graph is managed.
+   *
+   * @since 3.0.0
+   */
+  def cypherSession: CypherSession
+
+  /**
+   * Executes a Cypher query in the session that manages this graph, using 
this graph as
+   * the input graph.
+   *
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(query: String): CypherResult = cypher(query, Map.empty[String, 
Any])
+
+  /**
+   * Executes a Cypher query in the session that manages this graph, using 
this graph as
+   * the input graph.
+   *
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(query: String, parameters: Map[String, Any]): CypherResult =
+cypherSession.cypher(this, query, parameters)
+
+  /**
+   * Executes a Cypher query in the [[CypherSession]] that manages this graph, 
using this graph as
+   * the input graph.
+   *
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(query: String, parameters: java.util.Map[String, Object]): 
CypherResult =
+cypherSession.cypher(this, query, parameters)
+
+  /**
+   * Returns the [[NodeFrame]] for a given node label set.
+   *
+   * @param labelSet Label set used for NodeFrame lookup
+   * @return NodeFrame for the given label set
+   * @since 3.0.0
+   */
+  def nodeFrame(labelSet: Array[String]): NodeFrame
+
+  /**
+   * Returns the [[RelationshipFrame]] for a given relationship type.
+   *
+   * @param relationshipType Relationship type used for RelationshipFrame 
lookup
+   * @return RelationshipFrame for the given relationship type
+   * @since 3.0.0
+   */
+  def relationshipFrame(relationshipType: String): RelationshipFrame
+
+  /**
+   * Returns a DataFrame that contains a row for each node in this graph.
 
 Review comment:
   `DataFrame` -> `DataSet`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334037084
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/PropertyGraph.scala
 ##
 @@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * A Property Graph as defined by the openCypher Property Graph Data Model.
+ *
+ * A graph is always tied to and managed by a [[CypherSession]].
+ * The lifetime of a graph is bound by the session lifetime.
+ *
+ * @see http://www.opencypher.org/;>openCypher project
+ * @see https://dl.acm.org/citation.cfm?id=3183713.3190657;>Property 
Graph Model
+ * @since 3.0.0
+ */
+abstract class PropertyGraph {
+
+  /**
+   * The schema (graph type) describes the structure of this graph.
+   *
+   * @since 3.0.0
+   */
+  def schema: PropertyGraphType
+
+  /**
+   * The session in which this graph is managed.
+   *
+   * @since 3.0.0
+   */
+  def cypherSession: CypherSession
+
+  /**
+   * Executes a Cypher query in the session that manages this graph, using 
this graph as
+   * the input graph.
+   *
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(query: String): CypherResult = cypher(query, Map.empty[String, 
Any])
+
+  /**
+   * Executes a Cypher query in the session that manages this graph, using 
this graph as
+   * the input graph.
+   *
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(query: String, parameters: Map[String, Any]): CypherResult =
+cypherSession.cypher(this, query, parameters)
+
+  /**
+   * Executes a Cypher query in the [[CypherSession]] that manages this graph, 
using this graph as
+   * the input graph.
+   *
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(query: String, parameters: java.util.Map[String, Object]): 
CypherResult =
+cypherSession.cypher(this, query, parameters)
+
+  /**
+   * Returns the [[NodeFrame]] for a given node label set.
+   *
+   * @param labelSet Label set used for NodeFrame lookup
+   * @return NodeFrame for the given label set
+   * @since 3.0.0
+   */
+  def nodeFrame(labelSet: Array[String]): NodeFrame
+
+  /**
+   * Returns the [[RelationshipFrame]] for a given relationship type.
+   *
+   * @param relationshipType Relationship type used for RelationshipFrame 
lookup
+   * @return RelationshipFrame for the given relationship type
+   * @since 3.0.0
+   */
+  def relationshipFrame(relationshipType: String): RelationshipFrame
+
+  /**
+   * Returns a DataFrame that contains a row for each node in this graph.
+   *
+   * The DataFrame adheres to the following column naming conventions:
 
 Review comment:
   `DataFrame` -> `DataSet`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334035972
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/PropertyGraph.scala
 ##
 @@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * A Property Graph as defined by the openCypher Property Graph Data Model.
+ *
+ * A graph is always tied to and managed by a [[CypherSession]].
+ * The lifetime of a graph is bound by the session lifetime.
+ *
+ * @see http://www.opencypher.org/;>openCypher project
 
 Review comment:
   Hmm. I don't know the history, but it seems that the site itself announces a 
false news.
   According to the our dev mailing list, there is no evidence for the 
followings. Apache Spark community decided by VOTE, not `Databricks`.
   > Databricks and Neo4j contributors have proposed to integrate the 
openCypher language into Apache Spark 3.0 to become a central piece of a new 
graph module in Apache Spark 3.0, driven through the openCypher project Cypher 
for Apache Spark
   
   Especially, @mengxr already told us that he is an individual to work on 
this. So, if you want to keep this link, you must remove the announce on the 
site. In general, I know that it's beyond of this PR scope, so, you can remove 
this link from the PR for now.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334035972
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/PropertyGraph.scala
 ##
 @@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * A Property Graph as defined by the openCypher Property Graph Data Model.
+ *
+ * A graph is always tied to and managed by a [[CypherSession]].
+ * The lifetime of a graph is bound by the session lifetime.
+ *
+ * @see http://www.opencypher.org/;>openCypher project
 
 Review comment:
   Hmm. I don't know the history, but it seems that the site itself announces a 
false news.
   According to the our dev mailing list, there is no evidence for the 
followings. AFAIK, Apache Spark community decided by VOTE.
   > Databricks and Neo4j contributors have proposed to integrate the 
openCypher language into Apache Spark 3.0 to become a central piece of a new 
graph module in Apache Spark 3.0, driven through the openCypher project Cypher 
for Apache Spark
   
   Especially, @mengxr already told us that he is an individual to work on 
this. So, if you want to keep this link, you must remove the announce on the 
site. In general, I know that it's beyond of this PR scope, so, you can remove 
this link from the PR for now.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334035972
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/PropertyGraph.scala
 ##
 @@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * A Property Graph as defined by the openCypher Property Graph Data Model.
+ *
+ * A graph is always tied to and managed by a [[CypherSession]].
+ * The lifetime of a graph is bound by the session lifetime.
+ *
+ * @see http://www.opencypher.org/;>openCypher project
 
 Review comment:
   Hmm. I don't know the history, but it seems that the site itself announces a 
false news.
   According to the our dev mailing list, there is no evidence for the 
followings.
   > Databricks and Neo4j contributors have proposed to integrate the 
openCypher language into Apache Spark 3.0 to become a central piece of a new 
graph module in Apache Spark 3.0, driven through the openCypher project Cypher 
for Apache Spark
   
   @mengxr already told us that he is an individual to work on this. So, if you 
want to keep this link, you must remove the announce on the site. In general, I 
know that it's beyond of this PR scope, so, you can remove this link from the 
PR for now.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334035972
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/PropertyGraph.scala
 ##
 @@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * A Property Graph as defined by the openCypher Property Graph Data Model.
+ *
+ * A graph is always tied to and managed by a [[CypherSession]].
+ * The lifetime of a graph is bound by the session lifetime.
+ *
+ * @see http://www.opencypher.org/;>openCypher project
 
 Review comment:
   Hmm. I don't know the history, but the site itself announces a false news.
   According to the our dev mailing list, there is no evidence for the 
followings.
   > Databricks and Neo4j contributors have proposed to integrate the 
openCypher language into Apache Spark 3.0 to become a central piece of a new 
graph module in Apache Spark 3.0, driven through the openCypher project Cypher 
for Apache Spark
   
   @mengxr already told us that he is an individual to work on this. So, if you 
want to keep this link, you must remove the announce on the site. In general, I 
know that it's beyond of this PR scope, so, you can remove this link from the 
PR for now.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334035972
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/PropertyGraph.scala
 ##
 @@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * A Property Graph as defined by the openCypher Property Graph Data Model.
+ *
+ * A graph is always tied to and managed by a [[CypherSession]].
+ * The lifetime of a graph is bound by the session lifetime.
+ *
+ * @see http://www.opencypher.org/;>openCypher project
 
 Review comment:
   Hmm. I understand the history, but the site itself announces a false news.
   According to the our dev mailing list, there is no evidence for the 
followings.
   > Databricks and Neo4j contributors have proposed to integrate the 
openCypher language into Apache Spark 3.0 to become a central piece of a new 
graph module in Apache Spark 3.0, driven through the openCypher project Cypher 
for Apache Spark
   
   @mengxr already told us that he is an individual to work on this. So, if you 
want to keep this link, you must remove the announce on the site. In general, I 
know that it's beyond of this PR scope, so, you can remove this link from the 
PR for now.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334033749
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/NodeFrameBuilder.scala
 ##
 @@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Interface used to build a [[NodeFrame]].
+ *
+ * @param df DataFrame containing a single node in each row
 
 Review comment:
   `DataFrame` -> `DataSet`


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334033601
 
 

 ##
 File path: graph/api/src/main/scala/org/apache/spark/graph/api/NodeFrame.scala
 ##
 @@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Describes how to map a DataFrame to nodes.
+ *
+ * Each row in the DataFrame represents a node which has exactly the labels 
defined by the given
 
 Review comment:
   `DataFrame` -> `DataSet`


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334033633
 
 

 ##
 File path: graph/api/src/main/scala/org/apache/spark/graph/api/NodeFrame.scala
 ##
 @@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Describes how to map a DataFrame to nodes.
+ *
+ * Each row in the DataFrame represents a node which has exactly the labels 
defined by the given
+ * label set.
+ *
+ * @param df DataFrame containing a single node in each row
 
 Review comment:
   `DataFrame` -> `DataSet`


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334033463
 
 

 ##
 File path: graph/api/src/main/scala/org/apache/spark/graph/api/NodeFrame.scala
 ##
 @@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Describes how to map a DataFrame to nodes.
 
 Review comment:
   `DataFrame` -> `DataSet`


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334033319
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/GraphElementFrame.scala
 ##
 @@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * A [[PropertyGraph]] is created from GraphElementFrames.
+ *
+ * A graph element is either a node or a relationship.
+ * A GraphElementFrame wraps a DataFrame and describes how it maps to graph 
elements.
+ *
+ * @since 3.0.0
+ */
+abstract class GraphElementFrame {
+
+  /**
+   * Initial DataFrame that can still contain unmapped, arbitrarily ordered 
columns.
 
 Review comment:
   `DataFrame` -> `DataSet`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334032955
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherSession.scala
 ##
 @@ -0,0 +1,258 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{Dataset, Row, SparkSession}
+import org.apache.spark.sql.types.{BooleanType, StructType}
+
+/**
+ * Contains constants used for convention based column naming.
+ */
+object CypherSession {
+
+  /**
+   * Naming convention for identifier columns, both node and relationship 
identifiers.
+   */
+  val ID_COLUMN = "$ID"
+
+  /**
+   * Naming convention for relationship source identifier.
+   */
+  val SOURCE_ID_COLUMN = "$SOURCE_ID"
+
+  /**
+   * Naming convention for relationship target identifier.
+   */
+  val TARGET_ID_COLUMN = "$TARGET_ID"
+
+  /**
+   * Naming convention both for node label and relationship type prefixes.
+   */
+  val LABEL_COLUMN_PREFIX = ":"
+}
+
+/**
+ * A CypherSession allows for creating, storing and loading [[PropertyGraph]] 
instances as well as
+ * executing Cypher queries on them.
+ *
+ * Wraps a [[org.apache.spark.sql.SparkSession]].
+ *
+ * @since 3.0.0
+ */
+trait CypherSession extends Logging {
 
 Review comment:
   After moving the [helper 
logic](https://github.com/apache/spark/pull/24851/files#r334031051), you can 
remove `extends Logging` here.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334031051
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherSession.scala
 ##
 @@ -0,0 +1,258 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{Dataset, Row, SparkSession}
+import org.apache.spark.sql.types.{BooleanType, StructType}
+
+/**
+ * Contains constants used for convention based column naming.
+ */
+object CypherSession {
+
+  /**
+   * Naming convention for identifier columns, both node and relationship 
identifiers.
+   */
+  val ID_COLUMN = "$ID"
+
+  /**
+   * Naming convention for relationship source identifier.
+   */
+  val SOURCE_ID_COLUMN = "$SOURCE_ID"
+
+  /**
+   * Naming convention for relationship target identifier.
+   */
+  val TARGET_ID_COLUMN = "$TARGET_ID"
+
+  /**
+   * Naming convention both for node label and relationship type prefixes.
+   */
+  val LABEL_COLUMN_PREFIX = ":"
+}
+
+/**
+ * A CypherSession allows for creating, storing and loading [[PropertyGraph]] 
instances as well as
+ * executing Cypher queries on them.
+ *
+ * Wraps a [[org.apache.spark.sql.SparkSession]].
+ *
+ * @since 3.0.0
+ */
+trait CypherSession extends Logging {
+
+  def sparkSession: SparkSession
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @param graph [[PropertyGraph]] on which the query is executed
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * Note that queries can take optional parameters:
+   *
+   * {{{
+   * Parameters:
+   *
+   * {
+   *"name" : "Alice"
+   * }
+   *
+   * Query:
+   *
+   * MATCH (n:Person)
+   * WHERE n.name = $name
+   * RETURN n
+   * }}}
+   *
+   * @param graph  [[PropertyGraph]] on which the query is executed
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String, parameters: Map[String, 
Any]): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * Note that queries can take optional parameters:
+   *
+   * {{{
+   * Parameters:
+   *
+   * {
+   *"name" : "Alice"
+   * }
+   *
+   * Query:
+   *
+   * MATCH (n:Person)
+   * WHERE n.name = $name
+   * RETURN n
+   * }}}
+   *
+   * @param graph  [[PropertyGraph]] on which the query is executed
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(
+  graph: PropertyGraph,
+  query: String,
+  parameters: java.util.Map[String, Object]): CypherResult = {
+cypher(graph, query, parameters.asScala.toMap)
+  }
+
+  /**
+   * Creates a [[PropertyGraph]] from a sequence of [[NodeFrame]]s and 
[[RelationshipFrame]]s.
+   * At least one [[NodeFrame]] has to be provided.
+   *
+   * For each label set and relationship type there can be at most one 
[[NodeFrame]] and at most one
+   * [[RelationshipFrame]], respectively.
+   *
+   * @param nodes NodeFrames that define the nodes in the graph
+   * @param relationships RelationshipFrames that define the relationships in 
the graph
+   * @since 3.0.0
+   */
+  def createGraph(nodes: Array[NodeFrame], relationships: 
Array[RelationshipFrame]): PropertyGraph
+
+  /**
+   * Creates a [[PropertyGraph]] from nodes and relationships.
+   *
+   * The given dataset needs to adhere to the following column naming 
conventions:
+   *
+   * {{{
+   * Id column:`$ID`(nodes and relationships)
+   * SourceId column:  `$SOURCE_ID` (relationships)
+   * TargetId column:  `$TARGET_ID` (relationships)
+   *
+   * Label columns:`:{LABEL_NAME}`  (nodes)
+   * RelType columns:  

[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334031051
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherSession.scala
 ##
 @@ -0,0 +1,258 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{Dataset, Row, SparkSession}
+import org.apache.spark.sql.types.{BooleanType, StructType}
+
+/**
+ * Contains constants used for convention based column naming.
+ */
+object CypherSession {
+
+  /**
+   * Naming convention for identifier columns, both node and relationship 
identifiers.
+   */
+  val ID_COLUMN = "$ID"
+
+  /**
+   * Naming convention for relationship source identifier.
+   */
+  val SOURCE_ID_COLUMN = "$SOURCE_ID"
+
+  /**
+   * Naming convention for relationship target identifier.
+   */
+  val TARGET_ID_COLUMN = "$TARGET_ID"
+
+  /**
+   * Naming convention both for node label and relationship type prefixes.
+   */
+  val LABEL_COLUMN_PREFIX = ":"
+}
+
+/**
+ * A CypherSession allows for creating, storing and loading [[PropertyGraph]] 
instances as well as
+ * executing Cypher queries on them.
+ *
+ * Wraps a [[org.apache.spark.sql.SparkSession]].
+ *
+ * @since 3.0.0
+ */
+trait CypherSession extends Logging {
+
+  def sparkSession: SparkSession
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @param graph [[PropertyGraph]] on which the query is executed
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * Note that queries can take optional parameters:
+   *
+   * {{{
+   * Parameters:
+   *
+   * {
+   *"name" : "Alice"
+   * }
+   *
+   * Query:
+   *
+   * MATCH (n:Person)
+   * WHERE n.name = $name
+   * RETURN n
+   * }}}
+   *
+   * @param graph  [[PropertyGraph]] on which the query is executed
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String, parameters: Map[String, 
Any]): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * Note that queries can take optional parameters:
+   *
+   * {{{
+   * Parameters:
+   *
+   * {
+   *"name" : "Alice"
+   * }
+   *
+   * Query:
+   *
+   * MATCH (n:Person)
+   * WHERE n.name = $name
+   * RETURN n
+   * }}}
+   *
+   * @param graph  [[PropertyGraph]] on which the query is executed
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(
+  graph: PropertyGraph,
+  query: String,
+  parameters: java.util.Map[String, Object]): CypherResult = {
+cypher(graph, query, parameters.asScala.toMap)
+  }
+
+  /**
+   * Creates a [[PropertyGraph]] from a sequence of [[NodeFrame]]s and 
[[RelationshipFrame]]s.
+   * At least one [[NodeFrame]] has to be provided.
+   *
+   * For each label set and relationship type there can be at most one 
[[NodeFrame]] and at most one
+   * [[RelationshipFrame]], respectively.
+   *
+   * @param nodes NodeFrames that define the nodes in the graph
+   * @param relationships RelationshipFrames that define the relationships in 
the graph
+   * @since 3.0.0
+   */
+  def createGraph(nodes: Array[NodeFrame], relationships: 
Array[RelationshipFrame]): PropertyGraph
+
+  /**
+   * Creates a [[PropertyGraph]] from nodes and relationships.
+   *
+   * The given dataset needs to adhere to the following column naming 
conventions:
+   *
+   * {{{
+   * Id column:`$ID`(nodes and relationships)
+   * SourceId column:  `$SOURCE_ID` (relationships)
+   * TargetId column:  `$TARGET_ID` (relationships)
+   *
+   * Label columns:`:{LABEL_NAME}`  (nodes)
+   * RelType columns:  

[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-11 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r334027537
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherResult.scala
 ##
 @@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.{Dataset, Row}
+
+/**
+ * Result of a Cypher query.
+ *
+ * Wraps a DataFrame that contains the result rows.
 
 Review comment:
   `DataFrame` -> `DataSet`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333815564
 
 

 ##
 File path: 
graph/api/src/test/scala/org/apache/spark/graph/api/PropertyGraphSuite.scala
 ##
 @@ -0,0 +1,309 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.scalatest.Matchers
+
+import org.apache.spark.graph.api.CypherSession.{ID_COLUMN, 
LABEL_COLUMN_PREFIX, SOURCE_ID_COLUMN, TARGET_ID_COLUMN}
+import org.apache.spark.sql.{DataFrame, QueryTest}
+import org.apache.spark.sql.test.SharedSparkSession
+
+abstract class PropertyGraphSuite extends QueryTest with SharedSparkSession 
with Matchers {
+
+  // Override in spark-cypher
+  type IdType = Long
+  def convertId(inputId: Long): IdType
+
+  def cypherSession: CypherSession
+
+  lazy val nodes: DataFrame = spark
+.createDataFrame(
+  Seq(
+(0L, true, true, false, false, Some(42), Some("Alice"), None, None),
+(1L, true, true, false, false, Some(23), Some("Bob"), None, None),
+(2L, true, false, true, false, Some(22), Some("Carol"), Some("CS"), 
None),
+(3L, true, true, false, false, Some(19), Some("Eve"), None, None),
+(4L, false, false, false, true, None, None, None, Some("UC Berkeley")),
+(5L, false, false, false, true, None, None, None, Some("Stanford"
+.toDF(
+  ID_COLUMN,
+  label("Person"),
+  label("Student"),
+  label("Teacher"),
+  label("University"),
+  "age",
+  "name",
+  "subject",
+  "title")
+
+  lazy val relationships: DataFrame = spark
 
 Review comment:
   ```scala
   -  lazy val relationships: DataFrame = spark
   +  lazy val relationships: Dataset[Row] = spark
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333815513
 
 

 ##
 File path: 
graph/api/src/test/scala/org/apache/spark/graph/api/PropertyGraphSuite.scala
 ##
 @@ -0,0 +1,309 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.scalatest.Matchers
+
+import org.apache.spark.graph.api.CypherSession.{ID_COLUMN, 
LABEL_COLUMN_PREFIX, SOURCE_ID_COLUMN, TARGET_ID_COLUMN}
+import org.apache.spark.sql.{DataFrame, QueryTest}
+import org.apache.spark.sql.test.SharedSparkSession
+
+abstract class PropertyGraphSuite extends QueryTest with SharedSparkSession 
with Matchers {
+
+  // Override in spark-cypher
+  type IdType = Long
+  def convertId(inputId: Long): IdType
+
+  def cypherSession: CypherSession
+
+  lazy val nodes: DataFrame = spark
 
 Review comment:
   ```scala
   -  lazy val nodes: DataFrame = spark
   +  lazy val nodes: Dataset[Row] = spark
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333815461
 
 

 ##
 File path: 
graph/api/src/test/scala/org/apache/spark/graph/api/PropertyGraphSuite.scala
 ##
 @@ -0,0 +1,309 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.scalatest.Matchers
+
+import org.apache.spark.graph.api.CypherSession.{ID_COLUMN, 
LABEL_COLUMN_PREFIX, SOURCE_ID_COLUMN, TARGET_ID_COLUMN}
+import org.apache.spark.sql.{DataFrame, QueryTest}
 
 Review comment:
   ```scala
   -import org.apache.spark.sql.{DataFrame, QueryTest}
   +import org.apache.spark.sql.{Dataset, QueryTest, Row}
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333815301
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/RelationshipFrameBuilder.scala
 ##
 @@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.DataFrame
+
+/**
+ * Interface used to build a [[RelationshipFrame]].
+ *
+ * @param df DataFrame containing a single relationship in each row
+ * @since 3.0.0
+ */
+final class RelationshipFrameBuilder(val df: DataFrame) {
 
 Review comment:
   ```scala
   -import org.apache.spark.sql.DataFrame
   +import org.apache.spark.sql.{Dataset, Row}
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333815222
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/RelationshipFrameBuilder.scala
 ##
 @@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.DataFrame
 
 Review comment:
   ```scala
   -import org.apache.spark.sql.DataFrame
   +import org.apache.spark.sql.{Dataset, Row}
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333815189
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/RelationshipFrame.scala
 ##
 @@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.DataFrame
+
+/**
+ * Describes how to map a DataFrame to relationships.
+ *
+ * Each row in the DataFrame represents a relationship with the given 
relationship type.
+ *
+ * @param df   DataFrame containing a single relationship in each 
row
+ * @param idColumn column that contains the relationship identifier
+ * @param sourceIdColumn   column that contains the source node identifier of 
the relationship
+ * @param targetIdColumn   column that contains the target node identifier of 
the relationship
+ * @param relationshipType relationship type that is assigned to all 
relationships
+ * @param properties   mapping from property keys to corresponding columns
+ * @since 3.0.0
+ */
+case class RelationshipFrame private[graph] (
+df: DataFrame,
 
 Review comment:
   ```scala
   -df: DataFrame,
   +df: Dataset[Row],
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333815075
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/PropertyGraph.scala
 ##
 @@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.DataFrame
+
+/**
+ * A Property Graph as defined by the openCypher Property Graph Data Model.
+ *
+ * A graph is always tied to and managed by a [[CypherSession]].
+ * The lifetime of a graph is bound by the session lifetime.
+ *
+ * @see http://www.opencypher.org/;>openCypher project
+ * @see https://dl.acm.org/citation.cfm?id=3183713.3190657;>Property 
Graph Model
+ * @since 3.0.0
+ */
+abstract class PropertyGraph {
+
+  /**
+   * The schema (graph type) describes the structure of this graph.
+   *
+   * @since 3.0.0
+   */
+  def schema: PropertyGraphType
+
+  /**
+   * The session in which this graph is managed.
+   *
+   * @since 3.0.0
+   */
+  def cypherSession: CypherSession
+
+  /**
+   * Executes a Cypher query in the session that manages this graph, using 
this graph as
+   * the input graph.
+   *
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(query: String): CypherResult = cypher(query, Map.empty[String, 
Any])
+
+  /**
+   * Executes a Cypher query in the session that manages this graph, using 
this graph as
+   * the input graph.
+   *
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(query: String, parameters: Map[String, Any]): CypherResult =
+cypherSession.cypher(this, query, parameters)
+
+  /**
+   * Executes a Cypher query in the [[CypherSession]] that manages this graph, 
using this graph as
+   * the input graph.
+   *
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(query: String, parameters: java.util.Map[String, Object]): 
CypherResult =
+cypherSession.cypher(this, query, parameters)
+
+  /**
+   * Returns the [[NodeFrame]] for a given node label set.
+   *
+   * @param labelSet Label set used for NodeFrame lookup
+   * @return NodeFrame for the given label set
+   * @since 3.0.0
+   */
+  def nodeFrame(labelSet: Array[String]): NodeFrame
+
+  /**
+   * Returns the [[RelationshipFrame]] for a given relationship type.
+   *
+   * @param relationshipType Relationship type used for RelationshipFrame 
lookup
+   * @return RelationshipFrame for the given relationship type
+   * @since 3.0.0
+   */
+  def relationshipFrame(relationshipType: String): RelationshipFrame
+
+  /**
+   * Returns a DataFrame that contains a row for each node in this graph.
+   *
+   * The DataFrame adheres to the following column naming conventions:
+   *
+   * {{{
+   * Id column:`$ID`
+   * Label columns:`:{LABEL_NAME}`
+   * Property columns: `{Property_Key}`
+   * }}}
+   *
+   * @see `org.apache.spark.graph.api.CypherSession.createGraph(nodes, 
relationships)`
+   * @since 3.0.0
+   */
+  def nodes: DataFrame
+
+  /**
+   * Returns a DataFrame that contains a row for each relationship in this
+   * graph.
+   *
+   * The DataFrame adheres to column naming conventions:
+   *
+   * {{{
+   * Id column:`$ID`
+   * SourceId column:  `$SOURCE_ID`
+   * TargetId column:  `$TARGET_ID`
+   * RelType columns:  `:{REL_TYPE}`
+   * Property columns: `{Property_Key}`
+   * }}}
+   *
+   * @see `org.apache.spark.graph.api.CypherSession.createGraph(nodes, 
relationships)`
+   * @since 3.0.0
+   */
+  def relationships: DataFrame
 
 Review comment:
   ```scala
   -  def relationships: DataFrame
   +  def relationships: Dataset[Row]
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With 

[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333815049
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/PropertyGraph.scala
 ##
 @@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.DataFrame
+
+/**
+ * A Property Graph as defined by the openCypher Property Graph Data Model.
+ *
+ * A graph is always tied to and managed by a [[CypherSession]].
+ * The lifetime of a graph is bound by the session lifetime.
+ *
+ * @see http://www.opencypher.org/;>openCypher project
+ * @see https://dl.acm.org/citation.cfm?id=3183713.3190657;>Property 
Graph Model
+ * @since 3.0.0
+ */
+abstract class PropertyGraph {
+
+  /**
+   * The schema (graph type) describes the structure of this graph.
+   *
+   * @since 3.0.0
+   */
+  def schema: PropertyGraphType
+
+  /**
+   * The session in which this graph is managed.
+   *
+   * @since 3.0.0
+   */
+  def cypherSession: CypherSession
+
+  /**
+   * Executes a Cypher query in the session that manages this graph, using 
this graph as
+   * the input graph.
+   *
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(query: String): CypherResult = cypher(query, Map.empty[String, 
Any])
+
+  /**
+   * Executes a Cypher query in the session that manages this graph, using 
this graph as
+   * the input graph.
+   *
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(query: String, parameters: Map[String, Any]): CypherResult =
+cypherSession.cypher(this, query, parameters)
+
+  /**
+   * Executes a Cypher query in the [[CypherSession]] that manages this graph, 
using this graph as
+   * the input graph.
+   *
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(query: String, parameters: java.util.Map[String, Object]): 
CypherResult =
+cypherSession.cypher(this, query, parameters)
+
+  /**
+   * Returns the [[NodeFrame]] for a given node label set.
+   *
+   * @param labelSet Label set used for NodeFrame lookup
+   * @return NodeFrame for the given label set
+   * @since 3.0.0
+   */
+  def nodeFrame(labelSet: Array[String]): NodeFrame
+
+  /**
+   * Returns the [[RelationshipFrame]] for a given relationship type.
+   *
+   * @param relationshipType Relationship type used for RelationshipFrame 
lookup
+   * @return RelationshipFrame for the given relationship type
+   * @since 3.0.0
+   */
+  def relationshipFrame(relationshipType: String): RelationshipFrame
+
+  /**
+   * Returns a DataFrame that contains a row for each node in this graph.
+   *
+   * The DataFrame adheres to the following column naming conventions:
+   *
+   * {{{
+   * Id column:`$ID`
+   * Label columns:`:{LABEL_NAME}`
+   * Property columns: `{Property_Key}`
+   * }}}
+   *
+   * @see `org.apache.spark.graph.api.CypherSession.createGraph(nodes, 
relationships)`
+   * @since 3.0.0
+   */
+  def nodes: DataFrame
 
 Review comment:
   ```scala
   -  def nodes: DataFrame
   +  def nodes: Dataset[Row]
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333815169
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/RelationshipFrame.scala
 ##
 @@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.DataFrame
 
 Review comment:
   ```scala
   -import org.apache.spark.sql.DataFrame
   +import org.apache.spark.sql.{Dataset, Row}
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333814921
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/NodeFrameBuilder.scala
 ##
 @@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.DataFrame
+
+/**
+ * Interface used to build a [[NodeFrame]].
+ *
+ * @param df DataFrame containing a single node in each row
+ * @since 3.0.0
+ */
+final class NodeFrameBuilder(var df: DataFrame) {
 
 Review comment:
   ```scala
   -final class NodeFrameBuilder(var df: DataFrame) {
   +final class NodeFrameBuilder(var df: Dataset[Row]) {
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333814998
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/PropertyGraph.scala
 ##
 @@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.DataFrame
 
 Review comment:
   ```scala
   -import org.apache.spark.sql.DataFrame
   +import org.apache.spark.sql.{Dataset, Row}
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333814906
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/NodeFrameBuilder.scala
 ##
 @@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.DataFrame
 
 Review comment:
   ```scala
   -import org.apache.spark.sql.DataFrame
   +import org.apache.spark.sql.{Dataset, Row}
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333814839
 
 

 ##
 File path: graph/api/src/main/scala/org/apache/spark/graph/api/NodeFrame.scala
 ##
 @@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.DataFrame
 
 Review comment:
   ```scala
   -import org.apache.spark.sql.DataFrame
   +import org.apache.spark.sql.{Dataset, Row}
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333814876
 
 

 ##
 File path: graph/api/src/main/scala/org/apache/spark/graph/api/NodeFrame.scala
 ##
 @@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.DataFrame
+
+/**
+ * Describes how to map a DataFrame to nodes.
+ *
+ * Each row in the DataFrame represents a node which has exactly the labels 
defined by the given
+ * label set.
+ *
+ * @param df DataFrame containing a single node in each row
+ * @param idColumn   column that contains the node identifier
+ * @param labelSet   labels that are assigned to all nodes
+ * @param properties mapping from property keys to corresponding columns
+ * @since 3.0.0
+ */
+case class NodeFrame private[graph] (
+df: DataFrame,
 
 Review comment:
   ```scala
   -df: DataFrame,
   +df: Dataset[Row],
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333814769
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/GraphElementFrame.scala
 ##
 @@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.DataFrame
 
 Review comment:
   ```scala
   -import org.apache.spark.sql.DataFrame
   +import org.apache.spark.sql.{Dataset, Row}
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333814743
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherSession.scala
 ##
 @@ -0,0 +1,258 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
+import org.apache.spark.sql.types.{BooleanType, StructType}
+
+/**
+ * Contains constants used for convention based column naming.
+ */
+object CypherSession {
+
+  /**
+   * Naming convention for identifier columns, both node and relationship 
identifiers.
+   */
+  val ID_COLUMN = "$ID"
+
+  /**
+   * Naming convention for relationship source identifier.
+   */
+  val SOURCE_ID_COLUMN = "$SOURCE_ID"
+
+  /**
+   * Naming convention for relationship target identifier.
+   */
+  val TARGET_ID_COLUMN = "$TARGET_ID"
+
+  /**
+   * Naming convention both for node label and relationship type prefixes.
+   */
+  val LABEL_COLUMN_PREFIX = ":"
+}
+
+/**
+ * A CypherSession allows for creating, storing and loading [[PropertyGraph]] 
instances as well as
+ * executing Cypher queries on them.
+ *
+ * Wraps a [[org.apache.spark.sql.SparkSession]].
+ *
+ * @since 3.0.0
+ */
+trait CypherSession extends Logging {
+
+  def sparkSession: SparkSession
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @param graph [[PropertyGraph]] on which the query is executed
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * Note that queries can take optional parameters:
+   *
+   * {{{
+   * Parameters:
+   *
+   * {
+   *"name" : "Alice"
+   * }
+   *
+   * Query:
+   *
+   * MATCH (n:Person)
+   * WHERE n.name = $name
+   * RETURN n
+   * }}}
+   *
+   * @param graph  [[PropertyGraph]] on which the query is executed
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String, parameters: Map[String, 
Any]): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * Note that queries can take optional parameters:
+   *
+   * {{{
+   * Parameters:
+   *
+   * {
+   *"name" : "Alice"
+   * }
+   *
+   * Query:
+   *
+   * MATCH (n:Person)
+   * WHERE n.name = $name
+   * RETURN n
+   * }}}
+   *
+   * @param graph  [[PropertyGraph]] on which the query is executed
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(
+  graph: PropertyGraph,
+  query: String,
+  parameters: java.util.Map[String, Object]): CypherResult = {
+cypher(graph, query, parameters.asScala.toMap)
+  }
+
+  /**
+   * Creates a [[PropertyGraph]] from a sequence of [[NodeFrame]]s and 
[[RelationshipFrame]]s.
+   * At least one [[NodeFrame]] has to be provided.
+   *
+   * For each label set and relationship type there can be at most one 
[[NodeFrame]] and at most one
+   * [[RelationshipFrame]], respectively.
+   *
+   * @param nodes NodeFrames that define the nodes in the graph
+   * @param relationships RelationshipFrames that define the relationships in 
the graph
+   * @since 3.0.0
+   */
+  def createGraph(nodes: Array[NodeFrame], relationships: 
Array[RelationshipFrame]): PropertyGraph
+
+  /**
+   * Creates a [[PropertyGraph]] from nodes and relationships.
+   *
+   * The given DataFrames need to adhere to the following column naming 
conventions:
+   *
+   * {{{
+   * Id column:`$ID`(nodes and relationships)
+   * SourceId column:  `$SOURCE_ID` (relationships)
+   * TargetId column:  `$TARGET_ID` (relationships)
+   *
+   * Label columns:`:{LABEL_NAME}`  (nodes)
+   * RelType columns:  

[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333814802
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/GraphElementFrame.scala
 ##
 @@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.DataFrame
+
+/**
+ * A [[PropertyGraph]] is created from GraphElementFrames.
+ *
+ * A graph element is either a node or a relationship.
+ * A GraphElementFrame wraps a DataFrame and describes how it maps to graph 
elements.
+ *
+ * @since 3.0.0
+ */
+abstract class GraphElementFrame {
+
+  /**
+   * Initial DataFrame that can still contain unmapped, arbitrarily ordered 
columns.
+   *
+   * @since 3.0.0
+   */
+  def df: DataFrame
 
 Review comment:
   ```scala
   -  def df: DataFrame
   +  def df: Dataset[Row]
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333814672
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherSession.scala
 ##
 @@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
+
+object CypherSession {
+  val ID_COLUMN = "$ID"
+  val SOURCE_ID_COLUMN = "$SOURCE_ID"
+  val TARGET_ID_COLUMN = "$TARGET_ID"
+  val LABEL_COLUMN_PREFIX = ":"
+}
+
+/**
+ * The entry point for using property graphs in Spark.
+ *
+ * Provides factory methods for creating [[PropertyGraph]] instances.
+ *
+ * Wraps a [[org.apache.spark.sql.SparkSession]].
+ *
+ * @since 3.0.0
+ */
+trait CypherSession {
+
+  def sparkSession: SparkSession
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @param graph [[PropertyGraph]] on which the query is executed
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @param graph  [[PropertyGraph]] on which the query is executed
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String, parameters: Map[String, 
Any]): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @param graph  [[PropertyGraph]] on which the query is executed
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph,
+ query: String,
+ parameters: java.util.Map[String, Object]): CypherResult = {
+cypher(graph, query, parameters.asScala.toMap)
+  }
+
+  /**
+   * Creates a [[PropertyGraph]] from a sequence of [[NodeFrame]]s and 
[[RelationshipFrame]]s.
+   * At least one [[NodeFrame]] has to be provided.
+   *
+   * For each label set and relationship type there can be at most one 
[[NodeFrame]] and at most one
+   * [[RelationshipFrame]], respectively.
+   *
+   * @param nodes NodeFrames that define the nodes in the graph
+   * @param relationships RelationshipFrames that define the relationships in 
the graph
+   * @since 3.0.0
+   */
+  def createGraph(nodes: Seq[NodeFrame], relationships: 
Seq[RelationshipFrame]): PropertyGraph
+
+  /**
+   * Creates a [[PropertyGraph]] from a sequence of [[NodeFrame]]s and 
[[RelationshipFrame]]s.
+   * At least one [[NodeFrame]] has to be provided.
+   *
+   * For each label set and relationship type there can be at most one 
[[NodeFrame]] and at most one
+   * [[RelationshipFrame]], respectively.
+   *
+   * @param nodes NodeFrames that define the nodes in the graph
+   * @param relationships RelationshipFrames that define the relationships in 
the graph
+   * @since 3.0.0
+   */
+  def createGraph(
+  nodes: java.util.List[NodeFrame],
+  relationships: java.util.List[RelationshipFrame]): PropertyGraph = {
+createGraph(nodes.asScala, relationships.asScala)
+  }
+
+  /**
+   * Creates a [[PropertyGraph]] from nodes and relationships.
+   *
+   * The given DataFrames need to adhere to the following column naming 
conventions:
+   *
+   * {{{
+   * Id column:`$ID`(nodes and relationships)
+   * SourceId column:  `$SOURCE_ID` (relationships)
+   * TargetId column:  `$TARGET_ID` (relationships)
+   *
+   * Label columns:`:{LABEL_NAME}`  (nodes)
+   * RelType columns:  `:{REL_TYPE}`(relationships)
+   *
+   * Property columns: `{Property_Key}` (nodes and relationships)
+   * }}}
+   *
+   * @see [[CypherSession]]
+   * @param nodes node DataFrame
+   * @param relationships relationship DataFrame
+   * @since 3.0.0
+   */
+  def createGraph(nodes: DataFrame, relationships: DataFrame): PropertyGraph = 
{
 
 Review comment:
   ```scala
   -  

[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333814596
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherSession.scala
 ##
 @@ -0,0 +1,258 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
 
 Review comment:
   Actually, the following for the final update.
   ```scala
   -import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
   +import org.apache.spark.sql.{Dataset, Row, SparkSession}
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333814713
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherSession.scala
 ##
 @@ -0,0 +1,258 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
+import org.apache.spark.sql.types.{BooleanType, StructType}
+
+/**
+ * Contains constants used for convention based column naming.
+ */
+object CypherSession {
+
+  /**
+   * Naming convention for identifier columns, both node and relationship 
identifiers.
+   */
+  val ID_COLUMN = "$ID"
+
+  /**
+   * Naming convention for relationship source identifier.
+   */
+  val SOURCE_ID_COLUMN = "$SOURCE_ID"
+
+  /**
+   * Naming convention for relationship target identifier.
+   */
+  val TARGET_ID_COLUMN = "$TARGET_ID"
+
+  /**
+   * Naming convention both for node label and relationship type prefixes.
+   */
+  val LABEL_COLUMN_PREFIX = ":"
+}
+
+/**
+ * A CypherSession allows for creating, storing and loading [[PropertyGraph]] 
instances as well as
+ * executing Cypher queries on them.
+ *
+ * Wraps a [[org.apache.spark.sql.SparkSession]].
+ *
+ * @since 3.0.0
+ */
+trait CypherSession extends Logging {
+
+  def sparkSession: SparkSession
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @param graph [[PropertyGraph]] on which the query is executed
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * Note that queries can take optional parameters:
+   *
+   * {{{
+   * Parameters:
+   *
+   * {
+   *"name" : "Alice"
+   * }
+   *
+   * Query:
+   *
+   * MATCH (n:Person)
+   * WHERE n.name = $name
+   * RETURN n
+   * }}}
+   *
+   * @param graph  [[PropertyGraph]] on which the query is executed
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String, parameters: Map[String, 
Any]): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * Note that queries can take optional parameters:
+   *
+   * {{{
+   * Parameters:
+   *
+   * {
+   *"name" : "Alice"
+   * }
+   *
+   * Query:
+   *
+   * MATCH (n:Person)
+   * WHERE n.name = $name
+   * RETURN n
+   * }}}
+   *
+   * @param graph  [[PropertyGraph]] on which the query is executed
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(
+  graph: PropertyGraph,
+  query: String,
+  parameters: java.util.Map[String, Object]): CypherResult = {
+cypher(graph, query, parameters.asScala.toMap)
+  }
+
+  /**
+   * Creates a [[PropertyGraph]] from a sequence of [[NodeFrame]]s and 
[[RelationshipFrame]]s.
+   * At least one [[NodeFrame]] has to be provided.
+   *
+   * For each label set and relationship type there can be at most one 
[[NodeFrame]] and at most one
+   * [[RelationshipFrame]], respectively.
+   *
+   * @param nodes NodeFrames that define the nodes in the graph
+   * @param relationships RelationshipFrames that define the relationships in 
the graph
+   * @since 3.0.0
+   */
+  def createGraph(nodes: Array[NodeFrame], relationships: 
Array[RelationshipFrame]): PropertyGraph
+
+  /**
+   * Creates a [[PropertyGraph]] from nodes and relationships.
+   *
+   * The given DataFrames need to adhere to the following column naming 
conventions:
+   *
+   * {{{
+   * Id column:`$ID`(nodes and relationships)
+   * SourceId column:  `$SOURCE_ID` (relationships)
+   * TargetId column:  `$TARGET_ID` (relationships)
+   *
+   * Label columns:`:{LABEL_NAME}`  (nodes)
+   * RelType columns:  

[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333814508
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherResult.scala
 ##
 @@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.DataFrame
+
+/**
+ * Result of a Cypher query.
+ *
+ * Wraps a DataFrame that contains the result rows.
+ *
+ * @since 3.0.0
+ */
+trait CypherResult {
+
+  /**
+   * Contains the result rows.
+   *
+   * The column names are aligned with the return item names specified within 
the Cypher query,
+   * (e.g. `RETURN foo, bar AS baz` results in the columns `foo` and `baz`).
+   *
+   * @note Dot characters (i.e. `.`) within return item names are replaced by 
an underscore (`_`),
+   *   (e.g. `MATCH (n:Person) RETURN n` results in the columns `n`, 
`n:Person` and `n_name`).
+   * @since 3.0.0
+   */
+  def df: DataFrame
 
 Review comment:
   ```scala
   -  def df: DataFrame
   +  def df: Dataset[Row]
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333814464
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherResult.scala
 ##
 @@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.DataFrame
 
 Review comment:
   ```scala
   -import org.apache.spark.sql.DataFrame
   +import org.apache.spark.sql.{Dataset, Row}
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333813221
 
 

 ##
 File path: 
graph/api/src/test/scala/org/apache/spark/graph/api/PropertyGraphSuite.scala
 ##
 @@ -0,0 +1,309 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.scalatest.Matchers
+
+import org.apache.spark.graph.api.CypherSession.{ID_COLUMN, 
LABEL_COLUMN_PREFIX, SOURCE_ID_COLUMN, TARGET_ID_COLUMN}
+import org.apache.spark.sql.{DataFrame, QueryTest}
+import org.apache.spark.sql.test.SharedSparkSession
+
+abstract class PropertyGraphSuite extends QueryTest with SharedSparkSession 
with Matchers {
+
+  // Override in spark-cypher
 
 Review comment:
   There is not much information about this. Could you explain about this a 
little bit more?


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333812890
 
 

 ##
 File path: 
graph/api/src/test/java/org/apache/spark/graph/api/JavaPropertyGraphSuite.java
 ##
 @@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api;
+
+import com.google.common.collect.Lists;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.test.TestSparkSession;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import static org.apache.spark.sql.types.DataTypes.*;
+
+public abstract class JavaPropertyGraphSuite implements Serializable {
+  private transient TestSparkSession spark;
+  private transient CypherSession cypherSession;
+
+  abstract CypherSession getCypherSession(SparkSession sparkSession);
+
+  @Before
+  public void setUp() {
+spark = new TestSparkSession();
+cypherSession = getCypherSession(spark);
+  }
+
+  @After
+  public void tearDown() {
+spark.stop();
+spark = null;
+  }
+
+  @Test
+  public void testCreateFromNodeFrame() {
+StructType personSchema = createSchema(
+Lists.newArrayList("id", "name"),
 
 Review comment:
   We use 2-space indentation in Java code. Please update all the other 
instances. You can reference the existing code, `Java8APISuite.java`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333811998
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/RelationshipFrame.scala
 ##
 @@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.apache.spark.sql.DataFrame
+
+/**
+ * Describes how to map a DataFrame to relationships.
+ *
+ * Each row in the DataFrame represents a relationship with the given 
relationship type.
+ *
+ * @param df   DataFrame containing a single relationship in each 
row
+ * @param idColumn column that contains the relationship identifier
+ * @param sourceIdColumn   column that contains the source node identifier of 
the relationship
+ * @param targetIdColumn   column that contains the target node identifier of 
the relationship
+ * @param relationshipType relationship type that is assigned to all 
relationships
+ * @param properties   mapping from property keys to corresponding columns
+ * @since 3.0.0
+ */
+case class RelationshipFrame private[graph] (
+df: DataFrame,
+idColumn: String,
+sourceIdColumn: String,
+targetIdColumn: String,
+relationshipType: String,
+properties: Map[String, String])
+extends GraphElementFrame {
 
 Review comment:
   Indentation?


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r333811076
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherSession.scala
 ##
 @@ -0,0 +1,258 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
 
 Review comment:
   ```scala
   -import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
   +import org.apache.spark.sql.{DataFrame, SparkSession}
   ```


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r42983
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/PropertyGraphWriter.scala
 ##
 @@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import java.util.Locale
+
+import org.apache.spark.sql.SaveMode
+
+abstract class PropertyGraphWriter(val graph: PropertyGraph) {
+
+  protected var saveMode: SaveMode = SaveMode.ErrorIfExists
+  protected var format: String =
+graph.cypherSession.sparkSession.sessionState.conf.defaultDataSourceName
+
+  /**
+   * Specifies the behavior when the graph already exists. Options include:
+   * 
+   * `SaveMode.Overwrite`: overwrite the existing data.
+   * `SaveMode.Ignore`: ignore the operation (i.e. no-op).
+   * `SaveMode.ErrorIfExists`: throw an exception at runtime.
+   * 
+   * 
+   * When writing the default option is `ErrorIfExists`.
+   *
+   * @since 3.0.0
+   */
+  def mode(mode: SaveMode): PropertyGraphWriter = {
+mode match {
+  case SaveMode.Append =>
+throw new IllegalArgumentException(s"Unsupported save mode: $mode. " +
+  "Accepted save modes are 'overwrite', 'ignore', 'error', 
'errorifexists'.")
+  case _ =>
+this.saveMode = mode
+}
+this
+  }
+
+  /**
+   * Specifies the behavior when the graph already exists. Options include:
+   * 
+   * `overwrite`: overwrite the existing graph.
+   * `ignore`: ignore the operation (i.e. no-op).
+   * `error` or `errorifexists`: default option, throw an exception at 
runtime.
+   * 
+   *
+   * @since 3.0.0
+   */
+  def mode(saveMode: String): PropertyGraphWriter = {
+saveMode.toLowerCase(Locale.ROOT) match {
+  case "overwrite" => mode(SaveMode.Overwrite)
+  case "ignore" => mode(SaveMode.Ignore)
+  case "error" | "errorifexists" => mode(SaveMode.ErrorIfExists)
+  case "default" => this
 
 Review comment:
   Ur, this can be wrong in case of `mode("overwrite").mode("default")`. Please 
merge `default` to the line 67. Please add a test case for this, too.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r48648
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/GraphElementFrame.scala
 ##
 @@ -0,0 +1,264 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.DataFrame
+
+/**
+ * A [[PropertyGraph]] is created from GraphElementFrames.
+ *
+ * A graph element is either a node or a relationship.
+ * A GraphElementFrame wraps a DataFrame and describes how it maps to graph 
elements.
+ *
+ * @since 3.0.0
+ */
+abstract class GraphElementFrame {
+
+  /**
+   * Initial DataFrame that can still contain unmapped, arbitrarily ordered 
columns.
+   *
+   * @since 3.0.0
+   */
+  def df: DataFrame
+
+  /**
+   * Name of the column that contains the graph element identifier.
+   *
+   * @since 3.0.0
+   */
+  def idColumn: String
+
+  /**
+   * Name of all columns that contain graph element identifiers.
+   *
+   * @since 3.0.0
+   */
+  def idColumns: Seq[String] = Seq(idColumn)
+
+  /**
+   * Mapping from graph element property keys to the columns that contain the 
corresponding property
+   * values.
+   *
+   * @since 3.0.0
+   */
+  def properties: Map[String, String]
+
+}
+
+/**
+ * Interface used to build a [[NodeFrame]].
+ *
+ * @param df DataFrame containing a single node in each row
+ * @since 3.0.0
+ */
+final class NodeFrameBuilder(var df: DataFrame) {
 
 Review comment:
   Although this is not a big file with 264 lines, let's split this single file 
into multiple files.
   - `GraphElementFrame.scala`
   - `NodeFrameBuilder.scala`
   - `NodeFrame.scala`
   - `RelationshipFrameBuilder.scala`
   - `RelationshipFrame.scala`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r47342
 
 

 ##
 File path: 
graph/api/src/test/scala/org/apache/spark/graph/api/PropertyGraphSuite.scala
 ##
 @@ -0,0 +1,309 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.scalatest.Matchers
+
+import org.apache.spark.graph.api.CypherSession.{ID_COLUMN, 
LABEL_COLUMN_PREFIX, SOURCE_ID_COLUMN, TARGET_ID_COLUMN}
+import org.apache.spark.sql.{DataFrame, QueryTest}
+import org.apache.spark.sql.test.SharedSparkSession
+
+abstract class PropertyGraphSuite extends QueryTest with SharedSparkSession 
with Matchers {
+
+  // Override in spark-cypher
+  type IdType = Long
+  def convertId(inputId: Long): IdType
+
+  def cypherSession: CypherSession
+
+  test("create graph from NodeFrame") {
+val nodeData = spark.createDataFrame(Seq(0L -> "Alice", 1L -> 
"Bob")).toDF("id", "name")
+val nodeFrame = cypherSession.buildNodeFrame(nodeData)
+  .idColumn("id")
+  .labelSet(Array("Person"))
+  .properties(Map("name" -> "name"))
+  .build()
+val graph = cypherSession.createGraph(Array(nodeFrame), 
Array.empty[RelationshipFrame])
+
+val expectedDf = spark
+  .createDataFrame(Seq((convertId(0L), true, "Alice"), (convertId(1L), 
true, "Bob")))
+  .toDF(ID_COLUMN, label("Person"), "name")
+
+checkAnswer(graph.nodes, expectedDf)
+  }
+
+  test("create graph from NodeFrame and RelationshipFrame") {
+val nodeData = spark.createDataFrame(Seq(0L -> "Alice", 1L -> 
"Bob")).toDF("id", "name")
+val nodeFrame = cypherSession.buildNodeFrame(nodeData)
+  .idColumn("id")
+  .labelSet(Array("Person"))
+  .properties(Map("name" -> "name"))
+  .build()
+val relationshipData = spark
+  .createDataFrame(Seq((0L, 0L, 1L, 1984)))
+  .toDF("id", "source", "target", "since")
+val relationshipFrame = 
cypherSession.buildRelationshipFrame(relationshipData)
+  .idColumn("id")
+  .sourceIdColumn("source")
+  .targetIdColumn("target")
+  .relationshipType("KNOWS")
+  .properties(Map("since" -> "since"))
+  .build()
+
+val graph = cypherSession.createGraph(Array(nodeFrame), 
Array(relationshipFrame))
+
+val expectedNodeDf = spark
+  .createDataFrame(Seq((convertId(0L), true, "Alice"), (convertId(1L), 
true, "Bob")))
+  .toDF(ID_COLUMN, label("Person"), "name")
+
+val expectedRelDf = spark
+  .createDataFrame(Seq((convertId(0L), convertId(0L), convertId(1L), true, 
1984)))
+  .toDF(ID_COLUMN, SOURCE_ID_COLUMN, TARGET_ID_COLUMN, label("KNOWS"), 
"since")
+
+checkAnswer(graph.nodes, expectedNodeDf)
+checkAnswer(graph.relationships, expectedRelDf)
+  }
+
+  test("create graph with multiple node and relationship types") {
+val studentDF = spark
+  .createDataFrame(Seq((0L, "Alice", 42), (1L, "Bob", 23)))
+  .toDF("id", "name", "age")
+val teacherDF = spark
+  .createDataFrame(Seq((2L, "Eve", "CS")))
+  .toDF("id", "name", "subject")
+
+val studentNF = cypherSession.buildNodeFrame(studentDF)
+.idColumn("id")
+.labelSet(Array("Person", "Student"))
+.properties(Map("name" -> "name", "age" -> "age"))
+.build()
+
+val teacherNF = cypherSession.buildNodeFrame(teacherDF)
+  .idColumn("id")
+  .labelSet(Array("Person", "Teacher"))
+  .properties(Map("name" -> "name", "subject" -> "subject"))
+  .build()
+
+val knowsDF = spark
+  .createDataFrame(Seq((0L, 0L, 1L, 1984)))
+  .toDF("id", "source", "target", "since")
+val teachesDF = spark
+  .createDataFrame(Seq((1L, 2L, 1L)))
+  .toDF("id", "source", "target")
+
+val knowsRF = cypherSession.buildRelationshipFrame(knowsDF)
+  .idColumn("id")
+  .sourceIdColumn("source")
+  .targetIdColumn("target")
+  .relationshipType("KNOWS")
+  .properties(Map("since" -> "since"))
+  .build()
+val teachesRF = cypherSession.buildRelationshipFrame(teachesDF)
+  .idColumn("id")
+  .sourceIdColumn("source")
+  .targetIdColumn("target")
+  

[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r45707
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherSession.scala
 ##
 @@ -0,0 +1,263 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
+import org.apache.spark.sql.types.{BooleanType, StructType}
+
+/**
+ * Contains constants used for convention based column naming.
+ */
+object CypherSession {
+
+  /**
+   * Naming convention for identifier columns, both node and relationship 
identifiers.
+   */
+  val ID_COLUMN = "$ID"
+
+  /**
+   * Naming convention for relationship source identifier.
+   */
+  val SOURCE_ID_COLUMN = "$SOURCE_ID"
+
+  /**
+   * Naming convention for relationship target identifier.
+   */
+  val TARGET_ID_COLUMN = "$TARGET_ID"
+
+  /**
+   * Naming convention both for node label and relationship type prefixes.
+   */
+  val LABEL_COLUMN_PREFIX = ":"
+}
+
+/**
+ * A CypherSession allows for creating, storing and loading [[PropertyGraph]] 
instances as well as
+ * executing Cypher queries on them.
+ *
+ * Wraps a [[org.apache.spark.sql.SparkSession]].
+ *
+ * @since 3.0.0
+ */
+trait CypherSession extends Logging {
+
+  def sparkSession: SparkSession
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @see https://neo4j.com/docs/cypher-manual/current/;>Cypher 
Manual
+   * @param graph [[PropertyGraph]] on which the query is executed
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * Note that queries can take optional parameters:
+   *
+   * {{{
+   * Parameters:
+   *
+   * {
+   *"name" : "Alice"
+   * }
+   *
+   * Query:
+   *
+   * MATCH (n:Person)
+   * WHERE n.name = $name
+   * RETURN n
+   * }}}
+   *
+   * @see https://neo4j.com/docs/cypher-manual/current/;>Cypher 
Manual
+   * @see https://neo4j.com/docs/cypher-manual/current/syntax/parameters/;>Parameters
+   * @param graph  [[PropertyGraph]] on which the query is executed
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String, parameters: Map[String, 
Any]): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * Note that queries can take optional parameters:
+   *
+   * {{{
+   * Parameters:
+   *
+   * {
+   *"name" : "Alice"
+   * }
+   *
+   * Query:
+   *
+   * MATCH (n:Person)
+   * WHERE n.name = $name
+   * RETURN n
+   * }}}
+   *
+   * @see https://neo4j.com/docs/cypher-manual/current/;>Cypher 
Manual
+   * @see https://neo4j.com/docs/cypher-manual/current/syntax/parameters/;>Parameters
 
 Review comment:
   Let's remove these two lines.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r45612
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherSession.scala
 ##
 @@ -0,0 +1,263 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
+import org.apache.spark.sql.types.{BooleanType, StructType}
+
+/**
+ * Contains constants used for convention based column naming.
+ */
+object CypherSession {
+
+  /**
+   * Naming convention for identifier columns, both node and relationship 
identifiers.
+   */
+  val ID_COLUMN = "$ID"
+
+  /**
+   * Naming convention for relationship source identifier.
+   */
+  val SOURCE_ID_COLUMN = "$SOURCE_ID"
+
+  /**
+   * Naming convention for relationship target identifier.
+   */
+  val TARGET_ID_COLUMN = "$TARGET_ID"
+
+  /**
+   * Naming convention both for node label and relationship type prefixes.
+   */
+  val LABEL_COLUMN_PREFIX = ":"
+}
+
+/**
+ * A CypherSession allows for creating, storing and loading [[PropertyGraph]] 
instances as well as
+ * executing Cypher queries on them.
+ *
+ * Wraps a [[org.apache.spark.sql.SparkSession]].
+ *
+ * @since 3.0.0
+ */
+trait CypherSession extends Logging {
+
+  def sparkSession: SparkSession
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @see https://neo4j.com/docs/cypher-manual/current/;>Cypher 
Manual
+   * @param graph [[PropertyGraph]] on which the query is executed
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * Note that queries can take optional parameters:
+   *
+   * {{{
+   * Parameters:
+   *
+   * {
+   *"name" : "Alice"
+   * }
+   *
+   * Query:
+   *
+   * MATCH (n:Person)
+   * WHERE n.name = $name
+   * RETURN n
+   * }}}
+   *
+   * @see https://neo4j.com/docs/cypher-manual/current/;>Cypher 
Manual
+   * @see https://neo4j.com/docs/cypher-manual/current/syntax/parameters/;>Parameters
 
 Review comment:
   Let's remove these two lines.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r45466
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherSession.scala
 ##
 @@ -0,0 +1,263 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
+import org.apache.spark.sql.types.{BooleanType, StructType}
+
+/**
+ * Contains constants used for convention based column naming.
+ */
+object CypherSession {
+
+  /**
+   * Naming convention for identifier columns, both node and relationship 
identifiers.
+   */
+  val ID_COLUMN = "$ID"
+
+  /**
+   * Naming convention for relationship source identifier.
+   */
+  val SOURCE_ID_COLUMN = "$SOURCE_ID"
+
+  /**
+   * Naming convention for relationship target identifier.
+   */
+  val TARGET_ID_COLUMN = "$TARGET_ID"
+
+  /**
+   * Naming convention both for node label and relationship type prefixes.
+   */
+  val LABEL_COLUMN_PREFIX = ":"
+}
+
+/**
+ * A CypherSession allows for creating, storing and loading [[PropertyGraph]] 
instances as well as
+ * executing Cypher queries on them.
+ *
+ * Wraps a [[org.apache.spark.sql.SparkSession]].
+ *
+ * @since 3.0.0
+ */
+trait CypherSession extends Logging {
+
+  def sparkSession: SparkSession
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @see https://neo4j.com/docs/cypher-manual/current/;>Cypher 
Manual
 
 Review comment:
   @s1ck . Please remove `neo4j.com` reference.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r44565
 
 

 ##
 File path: 
graph/api/src/test/java/org/apache/spark/graph/api/JavaPropertyGraphSuite.java
 ##
 @@ -74,9 +73,12 @@ public void testCreateFromNodeFrame() {
 
 List knowsData = Collections.singletonList(RowFactory.create(0L, 0L, 
1L, 1984));
 
-Dataset personDf = spark.createDataFrame(personData, personSchema);
-NodeFrame personNodeFrame = NodeFrame
-.create(personDf, "id", Sets.newHashSet("Person"));
+Dataset personDf = spark.createDataFrame(personData, 
personSchema);
 
 Review comment:
   Indentation?


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r43840
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/PropertyGraphWriter.scala
 ##
 @@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import java.util.Locale
+
+import org.apache.spark.sql.SaveMode
+
+abstract class PropertyGraphWriter(val graph: PropertyGraph) {
+
+  protected var saveMode: SaveMode = SaveMode.ErrorIfExists
+  protected var format: String =
+graph.cypherSession.sparkSession.sessionState.conf.defaultDataSourceName
+
+  /**
+   * Specifies the behavior when the graph already exists. Options include:
+   * 
+   * `SaveMode.Overwrite`: overwrite the existing data.
+   * `SaveMode.Ignore`: ignore the operation (i.e. no-op).
+   * `SaveMode.ErrorIfExists`: throw an exception at runtime.
+   * 
+   * 
+   * When writing the default option is `ErrorIfExists`.
+   *
+   * @since 3.0.0
+   */
+  def mode(mode: SaveMode): PropertyGraphWriter = {
+mode match {
+  case SaveMode.Append =>
+throw new IllegalArgumentException(s"Unsupported save mode: $mode. " +
+  "Accepted save modes are 'overwrite', 'ignore', 'error', 
'errorifexists'.")
+  case _ =>
+this.saveMode = mode
+}
+this
+  }
+
+  /**
+   * Specifies the behavior when the graph already exists. Options include:
+   * 
+   * `overwrite`: overwrite the existing graph.
+   * `ignore`: ignore the operation (i.e. no-op).
+   * `error` or `errorifexists`: default option, throw an exception at 
runtime.
+   * 
+   *
+   * @since 3.0.0
+   */
+  def mode(saveMode: String): PropertyGraphWriter = {
+saveMode.toLowerCase(Locale.ROOT) match {
+  case "overwrite" => mode(SaveMode.Overwrite)
+  case "ignore" => mode(SaveMode.Ignore)
+  case "error" | "errorifexists" => mode(SaveMode.ErrorIfExists)
+  case "default" => this
+  case "append" => mode(SaveMode.Append)
 
 Review comment:
   Although this will eventually raise exception, let's remove this to match 
the line 70 and 71. That will improve the consistency in this function.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r42983
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/PropertyGraphWriter.scala
 ##
 @@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import java.util.Locale
+
+import org.apache.spark.sql.SaveMode
+
+abstract class PropertyGraphWriter(val graph: PropertyGraph) {
+
+  protected var saveMode: SaveMode = SaveMode.ErrorIfExists
+  protected var format: String =
+graph.cypherSession.sparkSession.sessionState.conf.defaultDataSourceName
+
+  /**
+   * Specifies the behavior when the graph already exists. Options include:
+   * 
+   * `SaveMode.Overwrite`: overwrite the existing data.
+   * `SaveMode.Ignore`: ignore the operation (i.e. no-op).
+   * `SaveMode.ErrorIfExists`: throw an exception at runtime.
+   * 
+   * 
+   * When writing the default option is `ErrorIfExists`.
+   *
+   * @since 3.0.0
+   */
+  def mode(mode: SaveMode): PropertyGraphWriter = {
+mode match {
+  case SaveMode.Append =>
+throw new IllegalArgumentException(s"Unsupported save mode: $mode. " +
+  "Accepted save modes are 'overwrite', 'ignore', 'error', 
'errorifexists'.")
+  case _ =>
+this.saveMode = mode
+}
+this
+  }
+
+  /**
+   * Specifies the behavior when the graph already exists. Options include:
+   * 
+   * `overwrite`: overwrite the existing graph.
+   * `ignore`: ignore the operation (i.e. no-op).
+   * `error` or `errorifexists`: default option, throw an exception at 
runtime.
+   * 
+   *
+   * @since 3.0.0
+   */
+  def mode(saveMode: String): PropertyGraphWriter = {
+saveMode.toLowerCase(Locale.ROOT) match {
+  case "overwrite" => mode(SaveMode.Overwrite)
+  case "ignore" => mode(SaveMode.Ignore)
+  case "error" | "errorifexists" => mode(SaveMode.ErrorIfExists)
+  case "default" => this
 
 Review comment:
   Ur, this can be wrong in case of `mode("overwrite").mode("default")``. 
Please merge `default` to the line 67. Please add a test case for this, too.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r42983
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/PropertyGraphWriter.scala
 ##
 @@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import java.util.Locale
+
+import org.apache.spark.sql.SaveMode
+
+abstract class PropertyGraphWriter(val graph: PropertyGraph) {
+
+  protected var saveMode: SaveMode = SaveMode.ErrorIfExists
+  protected var format: String =
+graph.cypherSession.sparkSession.sessionState.conf.defaultDataSourceName
+
+  /**
+   * Specifies the behavior when the graph already exists. Options include:
+   * 
+   * `SaveMode.Overwrite`: overwrite the existing data.
+   * `SaveMode.Ignore`: ignore the operation (i.e. no-op).
+   * `SaveMode.ErrorIfExists`: throw an exception at runtime.
+   * 
+   * 
+   * When writing the default option is `ErrorIfExists`.
+   *
+   * @since 3.0.0
+   */
+  def mode(mode: SaveMode): PropertyGraphWriter = {
+mode match {
+  case SaveMode.Append =>
+throw new IllegalArgumentException(s"Unsupported save mode: $mode. " +
+  "Accepted save modes are 'overwrite', 'ignore', 'error', 
'errorifexists'.")
+  case _ =>
+this.saveMode = mode
+}
+this
+  }
+
+  /**
+   * Specifies the behavior when the graph already exists. Options include:
+   * 
+   * `overwrite`: overwrite the existing graph.
+   * `ignore`: ignore the operation (i.e. no-op).
+   * `error` or `errorifexists`: default option, throw an exception at 
runtime.
+   * 
+   *
+   * @since 3.0.0
+   */
+  def mode(saveMode: String): PropertyGraphWriter = {
+saveMode.toLowerCase(Locale.ROOT) match {
+  case "overwrite" => mode(SaveMode.Overwrite)
+  case "ignore" => mode(SaveMode.Ignore)
+  case "error" | "errorifexists" => mode(SaveMode.ErrorIfExists)
+  case "default" => this
 
 Review comment:
   Ur, this can be wrong in case of `mode("overwrite").mode("default")``. 
Please merge `default` to the line 67.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-05 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r331764101
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/GraphElementFrame.scala
 ##
 @@ -0,0 +1,260 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.DataFrame
+
+/**
+ * A [[PropertyGraph]] is created from GraphElementFrames.
+ *
+ * A graph element is either a node or a relationship.
+ * A GraphElementFrame wraps a DataFrame and describes how it maps to graph 
elements.
+ *
+ * @since 3.0.0
+ */
+abstract class GraphElementFrame {
+
+  /**
+   * Initial DataFrame that can still contain unmapped, arbitrarily ordered 
columns.
+   *
+   * @since 3.0.0
+   */
+  def df: DataFrame
+
+  /**
+   * Name of the column that contains the graph element identifier.
+   *
+   * @since 3.0.0
+   */
+  def idColumn: String
+
+  /**
+   * Name of all columns that contain graph element identifiers.
+   *
+   * @since 3.0.0
+   */
+  def idColumns: Seq[String] = Seq(idColumn)
+
+  /**
+   * Mapping from graph element property keys to the columns that contain the 
corresponding property
+   * values.
+   *
+   * @since 3.0.0
+   */
+  def properties: Map[String, String]
+
+}
+
+object NodeFrame {
+
+  /**
+   * Describes how to map an initial DataFrame to nodes.
+   *
+   * All columns apart from the given `idColumn` are mapped to node properties.
+   *
+   * @param dfDataFrame containing a single node in each row
+   * @param idColumn  column that contains the node identifier
+   * @param labelSet  labels that are assigned to all nodes
+   * @since 3.0.0
+   */
+  def create(df: DataFrame, idColumn: String, labelSet: Set[String]): 
NodeFrame = {
+val properties = (df.columns.toSet - idColumn)
+  .map(columnName => columnName -> columnName)
+  .toMap
+create(df, idColumn, labelSet, properties)
+  }
+
+  /**
+   * Describes how to map an initial DataFrame to nodes.
+   *
+   * All columns apart from the given `idColumn` are mapped to node properties.
+   *
+   * @param dfDataFrame containing a single node in each row
+   * @param idColumn  column that contains the node identifier
+   * @param labelSet  labels that are assigned to all nodes
+   * @param properties mapping from property keys to corresponding columns
+   * @since 3.0.0
+   */
+  def create(
+  df: DataFrame,
+  idColumn: String,
+  labelSet: Set[String],
+  properties: Map[String, String]): NodeFrame = {
+NodeFrame(df, idColumn, labelSet, properties)
+  }
+
+  /**
+   * Describes how to map an initial DataFrame to nodes.
+   *
+   * All columns apart from the given `idColumn` are mapped to node properties.
+   *
+   * @param dfDataFrame containing a single node in each row
+   * @param idColumn  column that contains the node identifier
+   * @param labelSet  labels that are assigned to all nodes
+   * @since 3.0.0
+   */
+  def create(df: DataFrame, idColumn: String, labelSet: 
java.util.Set[String]): NodeFrame = {
+create(df, idColumn, labelSet.asScala.toSet)
+  }
+
+  /**
+   * Describes how to map an initial DataFrame to nodes.
+   *
+   * All columns apart from the given `idColumn` are mapped to node properties.
+   *
+   * @param dfDataFrame containing a single node in each row
+   * @param idColumn  column that contains the node identifier
+   * @param labelSet  labels that are assigned to all nodes
+   * @param properties mapping from property keys to corresponding columns
+   * @since 3.0.0
+   */
+  def create(
 
 Review comment:
   Yep. Please follow @mengxr 's advice.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: 

[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-05 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r331764027
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherSession.scala
 ##
 @@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
+
+object CypherSession {
+  val ID_COLUMN = "$ID"
+  val SOURCE_ID_COLUMN = "$SOURCE_ID"
+  val TARGET_ID_COLUMN = "$TARGET_ID"
+  val LABEL_COLUMN_PREFIX = ":"
+}
+
+/**
+ * The entry point for using property graphs in Spark.
+ *
+ * Provides factory methods for creating [[PropertyGraph]] instances.
+ *
+ * Wraps a [[org.apache.spark.sql.SparkSession]].
+ *
+ * @since 3.0.0
+ */
+trait CypherSession {
+
+  def sparkSession: SparkSession
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @param graph [[PropertyGraph]] on which the query is executed
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @param graph  [[PropertyGraph]] on which the query is executed
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String, parameters: Map[String, 
Any]): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @param graph  [[PropertyGraph]] on which the query is executed
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph,
+ query: String,
+ parameters: java.util.Map[String, Object]): CypherResult = {
+cypher(graph, query, parameters.asScala.toMap)
+  }
+
+  /**
+   * Creates a [[PropertyGraph]] from a sequence of [[NodeFrame]]s and 
[[RelationshipFrame]]s.
+   * At least one [[NodeFrame]] has to be provided.
+   *
+   * For each label set and relationship type there can be at most one 
[[NodeFrame]] and at most one
+   * [[RelationshipFrame]], respectively.
+   *
+   * @param nodes NodeFrames that define the nodes in the graph
+   * @param relationships RelationshipFrames that define the relationships in 
the graph
+   * @since 3.0.0
+   */
+  def createGraph(nodes: Seq[NodeFrame], relationships: 
Seq[RelationshipFrame]): PropertyGraph
+
+  /**
+   * Creates a [[PropertyGraph]] from a sequence of [[NodeFrame]]s and 
[[RelationshipFrame]]s.
+   * At least one [[NodeFrame]] has to be provided.
+   *
+   * For each label set and relationship type there can be at most one 
[[NodeFrame]] and at most one
+   * [[RelationshipFrame]], respectively.
+   *
+   * @param nodes NodeFrames that define the nodes in the graph
+   * @param relationships RelationshipFrames that define the relationships in 
the graph
+   * @since 3.0.0
+   */
+  def createGraph(
+  nodes: java.util.List[NodeFrame],
+  relationships: java.util.List[RelationshipFrame]): PropertyGraph = {
+createGraph(nodes.asScala, relationships.asScala)
+  }
+
+  /**
+   * Creates a [[PropertyGraph]] from nodes and relationships.
+   *
+   * The given DataFrames need to adhere to the following column naming 
conventions:
+   *
+   * {{{
+   * Id column:`$ID`(nodes and relationships)
+   * SourceId column:  `$SOURCE_ID` (relationships)
+   * TargetId column:  `$TARGET_ID` (relationships)
+   *
+   * Label columns:`:{LABEL_NAME}`  (nodes)
+   * RelType columns:  `:{REL_TYPE}`(relationships)
+   *
+   * Property columns: `{Property_Key}` (nodes and relationships)
+   * }}}
+   *
+   * @see [[CypherSession]]
+   * @param nodes node DataFrame
+   * @param relationships relationship DataFrame
+   * @since 3.0.0
+   */
+  def createGraph(nodes: DataFrame, relationships: DataFrame): PropertyGraph = 
{
+val idColumn = 

[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-05 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r331764027
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherSession.scala
 ##
 @@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
+
+object CypherSession {
+  val ID_COLUMN = "$ID"
+  val SOURCE_ID_COLUMN = "$SOURCE_ID"
+  val TARGET_ID_COLUMN = "$TARGET_ID"
+  val LABEL_COLUMN_PREFIX = ":"
+}
+
+/**
+ * The entry point for using property graphs in Spark.
+ *
+ * Provides factory methods for creating [[PropertyGraph]] instances.
+ *
+ * Wraps a [[org.apache.spark.sql.SparkSession]].
+ *
+ * @since 3.0.0
+ */
+trait CypherSession {
+
+  def sparkSession: SparkSession
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @param graph [[PropertyGraph]] on which the query is executed
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @param graph  [[PropertyGraph]] on which the query is executed
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String, parameters: Map[String, 
Any]): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @param graph  [[PropertyGraph]] on which the query is executed
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph,
+ query: String,
+ parameters: java.util.Map[String, Object]): CypherResult = {
+cypher(graph, query, parameters.asScala.toMap)
+  }
+
+  /**
+   * Creates a [[PropertyGraph]] from a sequence of [[NodeFrame]]s and 
[[RelationshipFrame]]s.
+   * At least one [[NodeFrame]] has to be provided.
+   *
+   * For each label set and relationship type there can be at most one 
[[NodeFrame]] and at most one
+   * [[RelationshipFrame]], respectively.
+   *
+   * @param nodes NodeFrames that define the nodes in the graph
+   * @param relationships RelationshipFrames that define the relationships in 
the graph
+   * @since 3.0.0
+   */
+  def createGraph(nodes: Seq[NodeFrame], relationships: 
Seq[RelationshipFrame]): PropertyGraph
+
+  /**
+   * Creates a [[PropertyGraph]] from a sequence of [[NodeFrame]]s and 
[[RelationshipFrame]]s.
+   * At least one [[NodeFrame]] has to be provided.
+   *
+   * For each label set and relationship type there can be at most one 
[[NodeFrame]] and at most one
+   * [[RelationshipFrame]], respectively.
+   *
+   * @param nodes NodeFrames that define the nodes in the graph
+   * @param relationships RelationshipFrames that define the relationships in 
the graph
+   * @since 3.0.0
+   */
+  def createGraph(
+  nodes: java.util.List[NodeFrame],
+  relationships: java.util.List[RelationshipFrame]): PropertyGraph = {
+createGraph(nodes.asScala, relationships.asScala)
+  }
+
+  /**
+   * Creates a [[PropertyGraph]] from nodes and relationships.
+   *
+   * The given DataFrames need to adhere to the following column naming 
conventions:
+   *
+   * {{{
+   * Id column:`$ID`(nodes and relationships)
+   * SourceId column:  `$SOURCE_ID` (relationships)
+   * TargetId column:  `$TARGET_ID` (relationships)
+   *
+   * Label columns:`:{LABEL_NAME}`  (nodes)
+   * RelType columns:  `:{REL_TYPE}`(relationships)
+   *
+   * Property columns: `{Property_Key}` (nodes and relationships)
+   * }}}
+   *
+   * @see [[CypherSession]]
+   * @param nodes node DataFrame
+   * @param relationships relationship DataFrame
+   * @since 3.0.0
+   */
+  def createGraph(nodes: DataFrame, relationships: DataFrame): PropertyGraph = 
{
+val idColumn = 

[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-10-05 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r331763974
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherSession.scala
 ##
 @@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
+
+object CypherSession {
+  val ID_COLUMN = "$ID"
+  val SOURCE_ID_COLUMN = "$SOURCE_ID"
+  val TARGET_ID_COLUMN = "$TARGET_ID"
+  val LABEL_COLUMN_PREFIX = ":"
+}
+
+/**
+ * The entry point for using property graphs in Spark.
+ *
+ * Provides factory methods for creating [[PropertyGraph]] instances.
+ *
+ * Wraps a [[org.apache.spark.sql.SparkSession]].
+ *
+ * @since 3.0.0
+ */
+trait CypherSession {
+
+  def sparkSession: SparkSession
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @param graph [[PropertyGraph]] on which the query is executed
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @param graph  [[PropertyGraph]] on which the query is executed
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String, parameters: Map[String, 
Any]): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @param graph  [[PropertyGraph]] on which the query is executed
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph,
+ query: String,
+ parameters: java.util.Map[String, Object]): CypherResult = {
+cypher(graph, query, parameters.asScala.toMap)
+  }
+
+  /**
+   * Creates a [[PropertyGraph]] from a sequence of [[NodeFrame]]s and 
[[RelationshipFrame]]s.
+   * At least one [[NodeFrame]] has to be provided.
+   *
+   * For each label set and relationship type there can be at most one 
[[NodeFrame]] and at most one
+   * [[RelationshipFrame]], respectively.
+   *
+   * @param nodes NodeFrames that define the nodes in the graph
+   * @param relationships RelationshipFrames that define the relationships in 
the graph
+   * @since 3.0.0
+   */
+  def createGraph(nodes: Seq[NodeFrame], relationships: 
Seq[RelationshipFrame]): PropertyGraph
+
+  /**
+   * Creates a [[PropertyGraph]] from a sequence of [[NodeFrame]]s and 
[[RelationshipFrame]]s.
+   * At least one [[NodeFrame]] has to be provided.
+   *
+   * For each label set and relationship type there can be at most one 
[[NodeFrame]] and at most one
+   * [[RelationshipFrame]], respectively.
+   *
+   * @param nodes NodeFrames that define the nodes in the graph
+   * @param relationships RelationshipFrames that define the relationships in 
the graph
+   * @since 3.0.0
+   */
+  def createGraph(
+  nodes: java.util.List[NodeFrame],
+  relationships: java.util.List[RelationshipFrame]): PropertyGraph = {
+createGraph(nodes.asScala, relationships.asScala)
+  }
+
+  /**
+   * Creates a [[PropertyGraph]] from nodes and relationships.
+   *
+   * The given DataFrames need to adhere to the following column naming 
conventions:
+   *
+   * {{{
+   * Id column:`$ID`(nodes and relationships)
+   * SourceId column:  `$SOURCE_ID` (relationships)
+   * TargetId column:  `$TARGET_ID` (relationships)
+   *
+   * Label columns:`:{LABEL_NAME}`  (nodes)
+   * RelType columns:  `:{REL_TYPE}`(relationships)
+   *
+   * Property columns: `{Property_Key}` (nodes and relationships)
+   * }}}
+   *
+   * @see [[CypherSession]]
+   * @param nodes node DataFrame
+   * @param relationships relationship DataFrame
+   * @since 3.0.0
+   */
+  def createGraph(nodes: DataFrame, relationships: DataFrame): PropertyGraph = 
{
 
 Review comment:
   I understand the 

[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-07-12 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r303137999
 
 

 ##
 File path: 
graph/api/src/test/scala/org/apache/spark/graph/api/PropertyGraphSuite.scala
 ##
 @@ -0,0 +1,276 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import org.scalatest.Matchers
+
+import org.apache.spark.graph.api.CypherSession.{
+  ID_COLUMN,
+  LABEL_COLUMN_PREFIX,
+  SOURCE_ID_COLUMN,
+  TARGET_ID_COLUMN
+}
 
 Review comment:
   In Apache Spark project, this should be one-line `import`. We don't enforce 
maximum line length for `import`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] dongjoon-hyun commented on a change in pull request #24851: [SPARK-27303][GRAPH] Add Spark Graph API

2019-07-10 Thread GitBox
dongjoon-hyun commented on a change in pull request #24851: 
[SPARK-27303][GRAPH] Add Spark Graph API
URL: https://github.com/apache/spark/pull/24851#discussion_r302151123
 
 

 ##
 File path: 
graph/api/src/main/scala/org/apache/spark/graph/api/CypherSession.scala
 ##
 @@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graph.api
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
+
+object CypherSession {
+  val ID_COLUMN = "$ID"
+  val SOURCE_ID_COLUMN = "$SOURCE_ID"
+  val TARGET_ID_COLUMN = "$TARGET_ID"
+  val LABEL_COLUMN_PREFIX = ":"
+}
+
+/**
+ * The entry point for using property graphs in Spark.
+ *
+ * Provides factory methods for creating [[PropertyGraph]] instances.
+ *
+ * Wraps a [[org.apache.spark.sql.SparkSession]].
+ *
+ * @since 3.0.0
+ */
+trait CypherSession {
+
+  def sparkSession: SparkSession
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @param graph [[PropertyGraph]] on which the query is executed
+   * @param query Cypher query to execute
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @param graph  [[PropertyGraph]] on which the query is executed
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph, query: String, parameters: Map[String, 
Any]): CypherResult
+
+  /**
+   * Executes a Cypher query on the given input graph.
+   *
+   * @param graph  [[PropertyGraph]] on which the query is executed
+   * @param query  Cypher query to execute
+   * @param parameters parameters used by the Cypher query
+   * @since 3.0.0
+   */
+  def cypher(graph: PropertyGraph,
+ query: String,
 
 Review comment:
   indentation.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org