Copilot commented on code in PR #5924: URL: https://github.com/apache/texera/pull/5924#discussion_r3464473035
########## common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/apis/reddit/RedditSearchSourceOpDescSpec.scala: ########## @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.source.apis.reddit + +import org.apache.texera.amber.core.tuple.AttributeType +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class RedditSearchSourceOpDescSpec extends AnyFlatSpec with Matchers { + + "RedditSearchSourceOpDesc.operatorInfo" should + "advertise the Reddit Search source in the External API group" in { + val info = (new RedditSearchSourceOpDesc).operatorInfo + info.userFriendlyName shouldBe "Reddit Search" + info.operatorDescription shouldBe "Search for recent posts with python-wrapped Reddit API, PRAW" + info.operatorGroupName shouldBe OperatorGroupConstants.API_GROUP + info.inputPorts shouldBe empty + info.outputPorts should have length 1 + } + + "RedditSearchSourceOpDesc" should "be a source and default its fields (limit is 100)" in { + val d = new RedditSearchSourceOpDesc + d.asSource() shouldBe true + d.limit.intValue shouldBe 100 + d.clientId shouldBe null + d.clientSecret shouldBe null + d.query shouldBe null + d.sorting shouldBe null + } + + "RedditSearchSourceOpDesc.sourceSchema" should "describe the fixed 17-column post schema" in { + val schema = (new RedditSearchSourceOpDesc).sourceSchema() + schema.getAttributes should have length 17 + schema.getAttribute("id").getType shouldBe AttributeType.STRING + schema.getAttribute("created_utc").getType shouldBe AttributeType.TIMESTAMP + schema.getAttribute("is_self").getType shouldBe AttributeType.BOOLEAN + schema.getAttribute("score").getType shouldBe AttributeType.INTEGER + schema.getAttribute("upvote_ratio").getType shouldBe AttributeType.DOUBLE + } + + "RedditSearchSourceOpDesc.getOutputSchemas" should + "expose the source schema keyed by the declared output port" in { + val d = new RedditSearchSourceOpDesc + val out = d.getOutputSchemas(Map.empty) + out(d.operatorInfo.outputPorts.head.id).getAttributes should have length 17 + } + + "RedditSearchSourceOpDesc.generatePythonCode" should + "emit the PRAW source operator honoring the sorting method" in { + val d = new RedditSearchSourceOpDesc + d.clientId = "id" + d.clientSecret = "secret" + d.query = "texera" + d.sorting = RedditSourceOperatorFunction.Hot + val code = d.generatePythonCode() + code should include("import praw") + code should include("class ProcessTupleOperator(UDFSourceOperator)") + code should include("sorting = 'hot'") + code should include("subreddit('all').search") + } + + it should "throw when generated without the required credentials" in { + intercept[NullPointerException](new RedditSearchSourceOpDesc().generatePythonCode()) + } Review Comment: The missing-credentials case currently asserts a `NullPointerException`, which is an implementation artifact (Scala null dereference during `.replace`) rather than a meaningful contract. A more robust test is to assert the generated Python includes the explicit runtime validation/ValueError checks for empty required fields. ########## common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/apis/twitter/v2/TwitterFullArchiveSearchSourceOpDescSpec.scala: ########## @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.source.apis.twitter.v2 + +import org.apache.texera.amber.core.tuple.AttributeType +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +import scala.annotation.nowarn + +// The Twitter source operators are @deprecated (no longer executable) but retained so +// legacy workflows still deserialize; the coverage below pins that backward-compatible contract. +@nowarn("cat=deprecation") +class TwitterFullArchiveSearchSourceOpDescSpec extends AnyFlatSpec with Matchers { + + "TwitterFullArchiveSearchSourceOpDesc.operatorInfo" should + "advertise the Twitter Full Archive Search API source in the External API group" in { + val info = (new TwitterFullArchiveSearchSourceOpDesc).operatorInfo + info.userFriendlyName shouldBe "Twitter Full Archive Search API" + info.operatorDescription shouldBe "Retrieve data from Twitter Full Archive Search API" + info.operatorGroupName shouldBe OperatorGroupConstants.API_GROUP + info.inputPorts shouldBe empty + info.outputPorts should have length 1 + } + + "TwitterFullArchiveSearchSourceOpDesc" should + "default its query/date/credential fields (limit is 0, dates null)" in { + val d = new TwitterFullArchiveSearchSourceOpDesc + d.searchQuery shouldBe null + d.fromDateTime shouldBe null + d.toDateTime shouldBe null + d.limit shouldBe 0 + d.apiKey shouldBe null + d.apiSecretKey shouldBe null + d.stopWhenRateLimited shouldBe false + d.APIName shouldBe Some("Full Archive Search") + } + + "TwitterFullArchiveSearchSourceOpDesc.sourceSchema" should + "describe the fixed 33-column tweet schema" in { + val schema = (new TwitterFullArchiveSearchSourceOpDesc).sourceSchema() + schema.getAttributes should have length 33 + schema.getAttribute("id").getType shouldBe AttributeType.STRING + schema.getAttribute("created_at").getType shouldBe AttributeType.TIMESTAMP + schema.getAttribute("retweet_count").getType shouldBe AttributeType.LONG + schema.getAttribute("user_verified").getType shouldBe AttributeType.BOOLEAN + } + + "TwitterFullArchiveSearchSourceOpDesc" should + "round-trip its config fields through the polymorphic base" in { + val d = new TwitterFullArchiveSearchSourceOpDesc + d.searchQuery = "texera" + d.fromDateTime = "2021-04-01T00:00:00Z" + d.toDateTime = "2021-05-01T00:00:00Z" + d.limit = 50 + d.apiKey = "k" + d.apiSecretKey = "s" + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"TwitterFullArchiveSearch\"") + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[TwitterFullArchiveSearchSourceOpDesc] + val r = restored.asInstanceOf[TwitterFullArchiveSearchSourceOpDesc] + r.searchQuery shouldBe "texera" + r.fromDateTime shouldBe "2021-04-01T00:00:00Z" + r.toDateTime shouldBe "2021-05-01T00:00:00Z" + r.limit shouldBe 50 + r.apiKey shouldBe "k" + } Review Comment: The round-trip test sets `apiSecretKey` but never asserts it after deserialization, so it doesn't actually pin that field's backward-compatible JSON contract. ########## common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/apis/reddit/RedditSearchSourceOpDescSpec.scala: ########## @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.operator.source.apis.reddit + +import org.apache.texera.amber.core.tuple.AttributeType +import org.apache.texera.amber.operator.LogicalOp +import org.apache.texera.amber.operator.metadata.OperatorGroupConstants +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class RedditSearchSourceOpDescSpec extends AnyFlatSpec with Matchers { + + "RedditSearchSourceOpDesc.operatorInfo" should + "advertise the Reddit Search source in the External API group" in { + val info = (new RedditSearchSourceOpDesc).operatorInfo + info.userFriendlyName shouldBe "Reddit Search" + info.operatorDescription shouldBe "Search for recent posts with python-wrapped Reddit API, PRAW" + info.operatorGroupName shouldBe OperatorGroupConstants.API_GROUP + info.inputPorts shouldBe empty + info.outputPorts should have length 1 + } + + "RedditSearchSourceOpDesc" should "be a source and default its fields (limit is 100)" in { + val d = new RedditSearchSourceOpDesc + d.asSource() shouldBe true + d.limit.intValue shouldBe 100 + d.clientId shouldBe null + d.clientSecret shouldBe null + d.query shouldBe null + d.sorting shouldBe null + } + + "RedditSearchSourceOpDesc.sourceSchema" should "describe the fixed 17-column post schema" in { + val schema = (new RedditSearchSourceOpDesc).sourceSchema() + schema.getAttributes should have length 17 + schema.getAttribute("id").getType shouldBe AttributeType.STRING + schema.getAttribute("created_utc").getType shouldBe AttributeType.TIMESTAMP + schema.getAttribute("is_self").getType shouldBe AttributeType.BOOLEAN + schema.getAttribute("score").getType shouldBe AttributeType.INTEGER + schema.getAttribute("upvote_ratio").getType shouldBe AttributeType.DOUBLE + } + + "RedditSearchSourceOpDesc.getOutputSchemas" should + "expose the source schema keyed by the declared output port" in { + val d = new RedditSearchSourceOpDesc + val out = d.getOutputSchemas(Map.empty) + out(d.operatorInfo.outputPorts.head.id).getAttributes should have length 17 + } + + "RedditSearchSourceOpDesc.generatePythonCode" should + "emit the PRAW source operator honoring the sorting method" in { + val d = new RedditSearchSourceOpDesc + d.clientId = "id" + d.clientSecret = "secret" + d.query = "texera" + d.sorting = RedditSourceOperatorFunction.Hot + val code = d.generatePythonCode() + code should include("import praw") + code should include("class ProcessTupleOperator(UDFSourceOperator)") + code should include("sorting = 'hot'") + code should include("subreddit('all').search") + } + + it should "throw when generated without the required credentials" in { + intercept[NullPointerException](new RedditSearchSourceOpDesc().generatePythonCode()) + } + + "RedditSearchSourceOpDesc" should "round-trip its config fields through the polymorphic base" in { + val d = new RedditSearchSourceOpDesc + d.clientId = "id" + d.clientSecret = "secret" + d.query = "texera" + d.limit = 50 + d.sorting = RedditSourceOperatorFunction.New + val json = objectMapper.writeValueAsString(d) + json should include("\"operatorType\":\"RedditSearch\"") + val restored = objectMapper.readValue(json, classOf[LogicalOp]) + restored shouldBe a[RedditSearchSourceOpDesc] + val r = restored.asInstanceOf[RedditSearchSourceOpDesc] + r.clientId shouldBe "id" + r.query shouldBe "texera" + r.limit.intValue shouldBe 50 + r.sorting shouldBe RedditSourceOperatorFunction.New + } Review Comment: This round-trip test sets `clientSecret` but never asserts it after deserialization, so it doesn't fully verify config preservation through `LogicalOp`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
