This is an automated email from the ASF dual-hosted git repository.
mengw15 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git
The following commit(s) were added to refs/heads/main by this push:
new b919fcd336 feat: add parallel coordinates plot visualization operator
(#4413)
b919fcd336 is described below
commit b919fcd336bfe95a8ad8497d3957c675d8ea2654
Author: KyleKDang <[email protected]>
AuthorDate: Mon Apr 20 12:44:32 2026 -0700
feat: add parallel coordinates plot visualization operator (#4413)
### What changes were proposed in this PR?
This PR adds a new visualization operator: **Parallel Coordinates Plot**
The operator enables users to visualize multivariate numeric data by
mapping multiple numeric columns onto parallel axes, allowing comparison
of values across different dimensions for each row. It also supports
optional grouping via a color column.
#### Inputs
- **Dimensions (required):** A list of numeric columns used as axes in
the parallel coordinates plot.
- **Color (optional):** A column used to assign colors to lines in the
visualization.
#### Behavior
- Filters out rows with missing values in the selected dimension columns
- Creates a Plotly parallel coordinates figure from the processed data
- Renders the resulting plot as HTML in the Texera UI result panel
### Any related issues, documentation, discussions?
No
### How was this PR tested?
- Ran `sbt "scalafixAll --check"` and `sbt scalafmtCheckAll`
successfully
- Ran `sbt test` and confirmed all backend tests pass
- Manually tested in Texera UI using a CSV dataset:
[test_parallel_coordinates.csv](https://github.com/user-attachments/files/26865635/test_parallel_coordinates.csv)
#### Sample Output
<img width="1470" height="797" alt="test_parallel_coordinates"
src="https://github.com/user-attachments/assets/8bc288c5-7e25-4285-8091-445c800a84a4"
/>
Verified:
- multiple numeric columns render correctly as axes
- optional color grouping works
- plot renders successfully in result panel
- no runtime errors in browser console
### Was this PR authored or co-authored using generative AI tooling?
Reviewed by ChatGPT (OpenAI GPT-5.3)
---------
Co-authored-by: Kyle Dang <[email protected]>
Co-authored-by: Meng Wang <[email protected]>
---
.../apache/texera/amber/operator/LogicalOp.scala | 2 +
.../ParallelCoordinatesPlotOpDesc.scala | 132 +++++++++++++++++++++
.../operator_images/ParallelCoordinatesPlot.png | Bin 0 -> 4583 bytes
3 files changed, 134 insertions(+)
diff --git
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala
index d9b9cd9f10..3949b67be9 100644
---
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala
+++
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala
@@ -132,6 +132,7 @@ import
org.apache.texera.amber.operator.visualization.scatter3DChart.Scatter3dCh
import
org.apache.texera.amber.operator.visualization.scatterplot.ScatterplotOpDesc
import
org.apache.texera.amber.operator.visualization.tablesChart.TablesPlotOpDesc
import
org.apache.texera.amber.operator.visualization.ternaryPlot.TernaryPlotOpDesc
+import
org.apache.texera.amber.operator.visualization.parallelCoordinatesPlot.ParallelCoordinatesPlotOpDesc
import
org.apache.texera.amber.operator.visualization.polarChart.PolarChartOpDesc
import
org.apache.texera.amber.operator.visualization.timeSeriesplot.TimeSeriesOpDesc
import org.apache.texera.amber.operator.visualization.treeplot.TreePlotOpDesc
@@ -202,6 +203,7 @@ trait StateTransferFunc
new Type(value = classOf[QuiverPlotOpDesc], name = "QuiverPlot"),
new Type(value = classOf[RadarPlotOpDesc], name = "RadarPlot"),
new Type(value = classOf[RadarChartOpDesc], name = "RadarChart"),
+ new Type(value = classOf[ParallelCoordinatesPlotOpDesc], name =
"ParallelCoordinatesPlot"),
new Type(value = classOf[WordCloudOpDesc], name = "WordCloud"),
new Type(value = classOf[HtmlVizOpDesc], name = "HTMLVisualizer"),
new Type(value = classOf[UrlVizOpDesc], name = "URLVisualizer"),
diff --git
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/parallelCoordinatesPlot/ParallelCoordinatesPlotOpDesc.scala
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/parallelCoordinatesPlot/ParallelCoordinatesPlotOpDesc.scala
new file mode 100644
index 0000000000..ef7730e7cc
--- /dev/null
+++
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/visualization/parallelCoordinatesPlot/ParallelCoordinatesPlotOpDesc.scala
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.visualization.parallelCoordinatesPlot
+
+import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
+import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject,
JsonSchemaTitle}
+import org.apache.texera.amber.core.tuple.{AttributeType, Schema}
+import org.apache.texera.amber.core.workflow.PortIdentity
+import org.apache.texera.amber.operator.PythonOperatorDescriptor
+import org.apache.texera.amber.operator.metadata.annotations.{
+ AutofillAttributeName,
+ AutofillAttributeNameList
+}
+import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants,
OperatorInfo}
+import org.apache.texera.amber.pybuilder.PyStringTypes.EncodableString
+import org.apache.texera.amber.pybuilder.PythonTemplateBuilder
+import
org.apache.texera.amber.pybuilder.PythonTemplateBuilder.PythonTemplateBuilderStringContext
+
+import javax.validation.constraints.{NotNull, Size}
+
+// type constraint: value can only be numeric
+@JsonSchemaInject(json = """
+{
+ "attributeTypeRules": {
+ "dimensions": {
+ "enum": ["integer", "long", "double"]
+ }
+ }
+}
+""")
+class ParallelCoordinatesPlotOpDesc extends PythonOperatorDescriptor {
+
+ @JsonProperty(value = "dimensions", required = true)
+ @JsonSchemaTitle("Dimensions")
+ @JsonPropertyDescription("List of numeric columns to visualize as parallel
axes")
+ @AutofillAttributeNameList
+ @NotNull(message = "Dimensions cannot be empty")
+ @Size(min = 1, message = "At least one dimension is required")
+ var dimensions: List[EncodableString] = List()
+
+ @JsonProperty(value = "color", required = false)
+ @JsonSchemaTitle("Color Column")
+ @JsonPropertyDescription("Column used to color or group the lines")
+ @AutofillAttributeName
+ var color: EncodableString = _
+
+ override def operatorInfo: OperatorInfo =
+ OperatorInfo.forVisualization(
+ "Parallel Coordinates Plot",
+ "Visualize multivariate data using parallel coordinate axes",
+ OperatorGroupConstants.VISUALIZATION_SCIENTIFIC_GROUP
+ )
+
+ override def getOutputSchemas(
+ inputSchemas: Map[PortIdentity, Schema]
+ ): Map[PortIdentity, Schema] = {
+ val outputSchema = Schema()
+ .add("html-content", AttributeType.STRING)
+ Map(operatorInfo.outputPorts.head.id -> outputSchema)
+ }
+
+ def manipulateTable(): PythonTemplateBuilder = {
+ val dimCols = dimensions.map(c => pyb"$c").mkString(",")
+ val colorFilter =
+ if (color != null && color.nonEmpty) pyb"&(table[$color].notnull())"
+ else ""
+ pyb"""
+ | table =
table[table[[$dimCols]].notnull().all(axis=1)$colorFilter].copy()
+ |"""
+ }
+
+ def createPlotlyFigure(): PythonTemplateBuilder = {
+ val dimCols = dimensions.map(c => pyb"$c").mkString(",")
+ val colorArg =
+ if (color != null && color.nonEmpty) pyb", color=$color"
+ else ""
+ pyb"""
+ | fig = px.parallel_coordinates(
+ | table,
+ | dimensions=[$dimCols]$colorArg
+ | )
+ |"""
+ }
+
+ override def generatePythonCode(): String = {
+ val finalcode =
+ pyb"""
+ |from pytexera import *
+ |
+ |import plotly.express as px
+ |import plotly.io
+ |
+ |class ProcessTableOperator(UDFTableOperator):
+ |
+ | def render_error(self, error_msg):
+ | return '''<h1>Parallel coordinates plot is not
available.</h1>
+ | <p>Reason is: {} </p>
+ | '''.format(error_msg)
+ |
+ | @overrides
+ | def process_table(self, table: Table, port: int) ->
Iterator[Optional[TableLike]]:
+ | if table.empty:
+ | yield {'html-content': self.render_error("Input table is
empty.")}
+ | return
+ | ${manipulateTable()}
+ | if table.empty:
+ | yield {'html-content': self.render_error("No valid rows
after filtering.")}
+ | return
+ | ${createPlotlyFigure()}
+ | html = plotly.io.to_html(fig, include_plotlyjs='cdn',
auto_play=False)
+ | yield {'html-content': html}
+ |"""
+ finalcode.encode
+ }
+}
diff --git a/frontend/src/assets/operator_images/ParallelCoordinatesPlot.png
b/frontend/src/assets/operator_images/ParallelCoordinatesPlot.png
new file mode 100644
index 0000000000..ba91db41d1
Binary files /dev/null and
b/frontend/src/assets/operator_images/ParallelCoordinatesPlot.png differ