This is an automated email from the ASF dual-hosted git repository.
Yicong-Huang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git
The following commit(s) were added to refs/heads/main by this push:
new 786a920796 refactor(auth): align JWT claim parsing across
microservices and amber (#4896)
786a920796 is described below
commit 786a9207962858e37c29d726e2df290dcaf4a267
Author: Yicong Huang <[email protected]>
AuthorDate: Sun May 3 22:41:10 2026 -0700
refactor(auth): align JWT claim parsing across microservices and amber
(#4896)
### What changes were proposed in this PR?
Two JWT parsing paths drifted apart, and the codebase had two
`JwtConsumer` instances built with byte-identical config:
- `common/auth/JwtParser` (microservices) read 5 claims and dropped
`googleAvatar` — even though `JwtAuth.jwtClaims` writes it on every
token.
- `amber/.../UserAuthenticator` read those 5 plus `comment`,
`accountCreation` — neither of which the issuer ever writes, so always
`null` in real tokens. Also dropped `googleAvatar`.
Consolidate into a single `JwtParser.claimsToSessionUser(JwtClaims):
SessionUser` in `common/auth`. Make `JwtParser.parseToken` reuse
`JwtAuth.jwtConsumer` (one consumer instance left in the codebase).
Rewrite `UserAuthenticator.authenticate` as a 1-line delegate so amber
and the microservices produce identical `SessionUser` objects from the
same token.
`UserAuthenticator` itself stays in amber — it's a Dropwizard 1.3
`Authenticator[JwtContext, SessionUser]` adapter for the toastshaman
library. Once amber's Dropwizard upgrade unblocks the move to the common
`JwtAuthFilter`, this whole adapter goes away.
Side robustness fix: jose4j returns `Long` for integer claims after JSON
round-trip but `setClaim` keeps `Integer` in memory. Widening `userId`
via `getClaimValue("userId", classOf[Number]).intValue()` makes
directly-built and parsed claims behave the same.
### Any related issues, documentation, discussions?
Closes #4895
### How was this PR tested?
`JwtParserSpec` (new, 4 cases): asserts every issued claim is populated
(including `googleAvatar`), the non-issued `User` slots stay `null`, an
end-to-end round-trip via `JwtAuth.jwtToken -> JwtParser.parseToken`
reconstructs the user, and a structurally invalid token returns empty.
`sbt Auth/test` 11/11 green; `AccessControlService`, `ConfigService`,
`FileService`, `ComputingUnitManagingService`,
`WorkflowExecutionService` all compile clean; `Auth/scalafmtCheckAll`
and `Auth/scalafixAll --check` clean.
### Was this PR authored or co-authored using generative AI tooling?
Generated-by: Claude Code (Opus 4.7)
---
.../apache/texera/web/auth/UserAuthenticator.scala | 39 +---
.../texera/web/auth/UserAuthenticatorSpec.scala | 78 ++++++++
.../scala/org/apache/texera/auth/JwtParser.scala | 62 ++++---
.../org/apache/texera/auth/JwtParserSpec.scala | 200 +++++++++++++++++++++
4 files changed, 324 insertions(+), 55 deletions(-)
diff --git
a/amber/src/main/scala/org/apache/texera/web/auth/UserAuthenticator.scala
b/amber/src/main/scala/org/apache/texera/web/auth/UserAuthenticator.scala
index e7fe67ca10..8111e44290 100644
--- a/amber/src/main/scala/org/apache/texera/web/auth/UserAuthenticator.scala
+++ b/amber/src/main/scala/org/apache/texera/web/auth/UserAuthenticator.scala
@@ -21,48 +21,25 @@ package org.apache.texera.web.auth
import com.typesafe.scalalogging.LazyLogging
import io.dropwizard.auth.Authenticator
-import org.apache.texera.auth.SessionUser
-import org.apache.texera.dao.jooq.generated.enums.UserRoleEnum
-import org.apache.texera.dao.jooq.generated.tables.pojos.User
+import org.apache.texera.auth.{JwtParser, SessionUser}
import org.jose4j.jwt.consumer.JwtContext
-import java.time.OffsetDateTime
import java.util.Optional
+/** Adapter for the toastshaman Dropwizard JWT filter. The filter has already
+ * verified the signature by the time this is invoked, so the work here is
+ * pure claim extraction — delegated to [[JwtParser.claimsToSessionUser]]
+ * so amber and the microservices produce identical [[SessionUser]] objects
+ * from the same token.
+ */
object UserAuthenticator extends Authenticator[JwtContext, SessionUser] with
LazyLogging {
override def authenticate(context: JwtContext): Optional[SessionUser] = {
- // This method will be called once the token's signature has been verified,
- // including the token secret and the expiration time
try {
- val userName = context.getJwtClaims.getSubject
- val email =
context.getJwtClaims.getClaimValue("email").asInstanceOf[String]
- val userId =
context.getJwtClaims.getClaimValue("userId").asInstanceOf[Long].toInt
- val role =
-
UserRoleEnum.valueOf(context.getJwtClaims.getClaimValue("role").asInstanceOf[String])
- val googleId =
context.getJwtClaims.getClaimValue("googleId").asInstanceOf[String]
- val comment =
context.getJwtClaims.getClaimValue("comment").asInstanceOf[String]
- val accountCreation =
-
context.getJwtClaims.getClaimValue("accountCreation").asInstanceOf[OffsetDateTime]
- val user =
- new User(
- userId,
- userName,
- email,
- null,
- googleId,
- null,
- role,
- comment,
- accountCreation,
- null,
- null
- )
- Optional.of(new SessionUser(user))
+ Optional.of(JwtParser.claimsToSessionUser(context.getJwtClaims))
} catch {
case e: Exception =>
logger.error("Failed to authenticate the JwtContext", e)
Optional.empty()
}
-
}
}
diff --git
a/amber/src/test/scala/org/apache/texera/web/auth/UserAuthenticatorSpec.scala
b/amber/src/test/scala/org/apache/texera/web/auth/UserAuthenticatorSpec.scala
new file mode 100644
index 0000000000..d185669caa
--- /dev/null
+++
b/amber/src/test/scala/org/apache/texera/web/auth/UserAuthenticatorSpec.scala
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.web.auth
+
+import org.apache.texera.auth.JwtAuth
+import org.apache.texera.dao.jooq.generated.enums.UserRoleEnum
+import org.jose4j.jwt.JwtClaims
+import org.jose4j.jwt.consumer.JwtContext
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
+
+class UserAuthenticatorSpec extends AnyFlatSpec with Matchers {
+
+ // Mirror exactly what JwtAuth.jwtClaims would write at issue time, so
+ // the spec doubles as a contract check between the issuer and the
+ // amber-side authenticator.
+ private def buildClaims(): JwtClaims = {
+ val claims = new JwtClaims
+ claims.setSubject("alice")
+ claims.setClaim("userId", 42)
+ claims.setClaim("googleId", "g-123")
+ claims.setClaim("email", "[email protected]")
+ claims.setClaim("role", UserRoleEnum.ADMIN.name)
+ claims.setClaim("googleAvatar", "avatar-blob")
+ claims.setExpirationTimeMinutesInTheFuture(10f)
+ claims
+ }
+
+ // Run a token through the production consumer to get a real JwtContext —
+ // matches what the toastshaman filter hands the authenticator at runtime.
+ private def contextFor(claims: JwtClaims): JwtContext =
+ JwtAuth.jwtConsumer.process(JwtAuth.jwtToken(claims))
+
+ "UserAuthenticator.authenticate" should "delegate to JwtParser and return a
populated SessionUser" in {
+ val result = UserAuthenticator.authenticate(contextFor(buildClaims()))
+ result.isPresent shouldBe true
+ val u = result.get().getUser
+ u.getUid shouldBe 42
+ u.getName shouldBe "alice"
+ u.getEmail shouldBe "[email protected]"
+ u.getGoogleId shouldBe "g-123"
+ u.getGoogleAvatar shouldBe "avatar-blob"
+ u.getRole shouldBe UserRoleEnum.ADMIN
+ }
+
+ it should "return empty when a required custom claim (userId) is missing" in
{
+ val claims = new JwtClaims
+ claims.setSubject("alice")
+ claims.setClaim("role", UserRoleEnum.ADMIN.name)
+ claims.setExpirationTimeMinutesInTheFuture(10f)
+ UserAuthenticator.authenticate(contextFor(claims)).isPresent shouldBe false
+ }
+
+ it should "return empty when a required custom claim (role) is missing" in {
+ val claims = new JwtClaims
+ claims.setSubject("alice")
+ claims.setClaim("userId", 42)
+ claims.setExpirationTimeMinutesInTheFuture(10f)
+ UserAuthenticator.authenticate(contextFor(claims)).isPresent shouldBe false
+ }
+}
diff --git a/common/auth/src/main/scala/org/apache/texera/auth/JwtParser.scala
b/common/auth/src/main/scala/org/apache/texera/auth/JwtParser.scala
index 79e331cc64..bb139e7093 100644
--- a/common/auth/src/main/scala/org/apache/texera/auth/JwtParser.scala
+++ b/common/auth/src/main/scala/org/apache/texera/auth/JwtParser.scala
@@ -20,41 +20,25 @@
package org.apache.texera.auth
import com.typesafe.scalalogging.LazyLogging
-import org.apache.texera.config.AuthConfig
import org.apache.texera.dao.jooq.generated.enums.UserRoleEnum
import org.apache.texera.dao.jooq.generated.tables.pojos.User
import org.jose4j.jwt.JwtClaims
-import org.jose4j.jwt.consumer.{JwtConsumer, JwtConsumerBuilder}
-import org.jose4j.keys.HmacKey
import org.jose4j.lang.UnresolvableKeyException
-import java.nio.charset.StandardCharsets
import java.util.Optional
+/** Single source of truth for converting a verified JWT into a
[[SessionUser]].
+ *
+ * Verification reuses [[JwtAuth.jwtConsumer]] (same secret, same clock-skew
+ * config). The claim set extracted here mirrors what [[JwtAuth.jwtClaims]]
+ * writes when issuing a token.
+ */
object JwtParser extends LazyLogging {
- private val TOKEN_SECRET = AuthConfig.jwtSecretKey
-
- private val jwtConsumer: JwtConsumer = new JwtConsumerBuilder()
- .setAllowedClockSkewInSeconds(30)
- .setRequireExpirationTime()
- .setRequireSubject()
- .setVerificationKey(new
HmacKey(TOKEN_SECRET.getBytes(StandardCharsets.UTF_8)))
- .setRelaxVerificationKeyValidation()
- .build()
-
+ /** Verify and parse a Bearer token string. */
def parseToken(token: String): Optional[SessionUser] = {
try {
- val jwtClaims: JwtClaims = jwtConsumer.processToClaims(token)
- val userName = jwtClaims.getSubject
- val email = jwtClaims.getClaimValue("email", classOf[String])
- val userId = jwtClaims.getClaimValue("userId").asInstanceOf[Long].toInt
- val role =
UserRoleEnum.valueOf(jwtClaims.getClaimValue("role").asInstanceOf[String])
- val googleId = jwtClaims.getClaimValue("googleId", classOf[String])
-
- val user =
- new User(userId, userName, email, null, googleId, null, role, null,
null, null, null)
- Optional.of(new SessionUser(user))
+
Optional.of(claimsToSessionUser(JwtAuth.jwtConsumer.processToClaims(token)))
} catch {
case _: UnresolvableKeyException =>
logger.error("Invalid JWT Signature")
@@ -64,4 +48,34 @@ object JwtParser extends LazyLogging {
Optional.empty()
}
}
+
+ /** Build a [[SessionUser]] from already-verified claims. Used by both
+ * [[parseToken]] (which verifies then calls this) and amber's
+ * `UserAuthenticator` (which the toastshaman filter calls after its own
+ * signature verification).
+ */
+ def claimsToSessionUser(claims: JwtClaims): SessionUser = {
+ val userName = claims.getSubject
+ val email = claims.getClaimValue("email", classOf[String])
+ // jose4j returns Long after JSON round-trip but the original setClaim
+ // call writes Integer; widen via Number to handle both cases.
+ val userId = claims.getClaimValue("userId", classOf[Number]).intValue()
+ val role =
UserRoleEnum.valueOf(claims.getClaimValue("role").asInstanceOf[String])
+ val googleId = claims.getClaimValue("googleId", classOf[String])
+ val googleAvatar = claims.getClaimValue("googleAvatar", classOf[String])
+ val user = new User(
+ userId,
+ userName,
+ email,
+ null,
+ googleId,
+ googleAvatar,
+ role,
+ null,
+ null,
+ null,
+ null
+ )
+ new SessionUser(user)
+ }
}
diff --git
a/common/auth/src/test/scala/org/apache/texera/auth/JwtParserSpec.scala
b/common/auth/src/test/scala/org/apache/texera/auth/JwtParserSpec.scala
new file mode 100644
index 0000000000..dc91de4d64
--- /dev/null
+++ b/common/auth/src/test/scala/org/apache/texera/auth/JwtParserSpec.scala
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.auth
+
+import org.apache.texera.dao.jooq.generated.enums.UserRoleEnum
+import org.apache.texera.dao.jooq.generated.tables.pojos.User
+import org.jose4j.jws.AlgorithmIdentifiers.HMAC_SHA256
+import org.jose4j.jws.JsonWebSignature
+import org.jose4j.jwt.JwtClaims
+import org.jose4j.keys.HmacKey
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
+
+import java.nio.charset.StandardCharsets
+
+class JwtParserSpec extends AnyFlatSpec with Matchers {
+
+ private def buildClaims(): JwtClaims = {
+ // Mirror exactly what JwtAuth.jwtClaims would write at issue time, so
+ // this spec doubles as a contract test between the issuer and parser.
+ val claims = new JwtClaims
+ claims.setSubject("alice")
+ claims.setClaim("userId", 42)
+ claims.setClaim("googleId", "g-123")
+ claims.setClaim("email", "[email protected]")
+ claims.setClaim("role", UserRoleEnum.ADMIN.name)
+ claims.setClaim("googleAvatar", "avatar-blob")
+ claims.setExpirationTimeMinutesInTheFuture(10f)
+ claims
+ }
+
+ "JwtParser.claimsToSessionUser" should "populate every issued claim
including googleAvatar" in {
+ val user: User = JwtParser.claimsToSessionUser(buildClaims()).getUser
+ user.getUid shouldBe 42
+ user.getName shouldBe "alice"
+ user.getEmail shouldBe "[email protected]"
+ user.getGoogleId shouldBe "g-123"
+ user.getGoogleAvatar shouldBe "avatar-blob"
+ user.getRole shouldBe UserRoleEnum.ADMIN
+ }
+
+ it should "leave non-issued slots null (password, comment, accountCreation,
affiliation, joiningReason)" in {
+ val user: User = JwtParser.claimsToSessionUser(buildClaims()).getUser
+ user.getPassword shouldBe null
+ user.getComment shouldBe null
+ user.getAccountCreationTime shouldBe null
+ user.getAffiliation shouldBe null
+ user.getJoiningReason shouldBe null
+ }
+
+ it should "round-trip a token issued by JwtAuth.jwtToken" in {
+ val token = JwtAuth.jwtToken(buildClaims())
+ val parsed = JwtParser.parseToken(token)
+ parsed.isPresent shouldBe true
+ val u = parsed.get().getUser
+ u.getUid shouldBe 42
+ u.getGoogleAvatar shouldBe "avatar-blob"
+ }
+
+ "JwtParser.parseToken" should "return empty on a structurally invalid token"
in {
+ JwtParser.parseToken("not-a-real-jwt").isPresent shouldBe false
+ }
+
+ it should "return empty when the token is signed with the wrong secret" in {
+ val token = signWith(buildClaims(),
"definitely-not-the-real-secret-for-testing-only")
+ JwtParser.parseToken(token).isPresent shouldBe false
+ }
+
+ it should "return empty when the token is expired" in {
+ val claims = new JwtClaims
+ claims.setSubject("alice")
+ claims.setClaim("userId", 42)
+ claims.setClaim("role", UserRoleEnum.ADMIN.name)
+ claims.setExpirationTimeMinutesInTheFuture(-10f) // expired 10 minutes ago
+ val token = JwtAuth.jwtToken(claims)
+ JwtParser.parseToken(token).isPresent shouldBe false
+ }
+
+ it should "return empty when the token has no subject claim" in {
+ val claims = new JwtClaims
+ claims.setClaim("userId", 42)
+ claims.setClaim("role", UserRoleEnum.ADMIN.name)
+ claims.setExpirationTimeMinutesInTheFuture(10f)
+ val token = JwtAuth.jwtToken(claims)
+ JwtParser.parseToken(token).isPresent shouldBe false
+ }
+
+ it should "return empty when the token has no exp claim" in {
+ // The shared consumer is built with setRequireExpirationTime(); a token
+ // missing exp must be rejected even if the signature is valid.
+ val claims = new JwtClaims
+ claims.setSubject("alice")
+ claims.setClaim("userId", 42)
+ claims.setClaim("role", UserRoleEnum.ADMIN.name)
+ val token = JwtAuth.jwtToken(claims)
+ JwtParser.parseToken(token).isPresent shouldBe false
+ }
+
+ it should "still accept a token expired within the 30s clock-skew window" in
{
+ val claims = buildClaims()
+ // 5 seconds ago — well inside setAllowedClockSkewInSeconds(30).
+ claims.setExpirationTimeMinutesInTheFuture(-0.083f)
+ val token = JwtAuth.jwtToken(claims)
+ JwtParser.parseToken(token).isPresent shouldBe true
+ }
+
+ it should "reject a token expired beyond the 30s clock-skew window" in {
+ val claims = buildClaims()
+ // 90 seconds ago — past the 30s allowance.
+ claims.setExpirationTimeMinutesInTheFuture(-1.5f)
+ val token = JwtAuth.jwtToken(claims)
+ JwtParser.parseToken(token).isPresent shouldBe false
+ }
+
+ it should "return empty when the signature segment is tampered" in {
+ val token = JwtAuth.jwtToken(buildClaims())
+ val parts = token.split('.')
+ parts.length shouldBe 3
+ val tampered = s"${parts(0)}.${parts(1)}.${parts(2).reverse}"
+ JwtParser.parseToken(tampered).isPresent shouldBe false
+ }
+
+ it should "return empty when the payload segment is tampered" in {
+ // Re-base64 a claim with a different userId; the signature in parts(2)
+ // covers parts(0).parts(1), so the rebuilt token won't verify.
+ val token = JwtAuth.jwtToken(buildClaims())
+ val parts = token.split('.')
+ val swappedClaims = new JwtClaims
+ swappedClaims.setSubject("mallory")
+ swappedClaims.setClaim("userId", 99999)
+ swappedClaims.setClaim("role", UserRoleEnum.ADMIN.name)
+ swappedClaims.setExpirationTimeMinutesInTheFuture(10f)
+ val rebuiltPayload =
java.util.Base64.getUrlEncoder.withoutPadding.encodeToString(
+ swappedClaims.toJson.getBytes(StandardCharsets.UTF_8)
+ )
+ val tampered = s"${parts(0)}.$rebuiltPayload.${parts(2)}"
+ JwtParser.parseToken(tampered).isPresent shouldBe false
+ }
+
+ "JwtParser life cycle" should "round-trip distinct users without state
leakage" in {
+ val alice = buildClaims()
+ val bob = new JwtClaims
+ bob.setSubject("bob")
+ bob.setClaim("userId", 7)
+ bob.setClaim("googleId", "g-bob")
+ bob.setClaim("email", "[email protected]")
+ bob.setClaim("role", UserRoleEnum.REGULAR.name)
+ bob.setClaim("googleAvatar", "bob-avatar")
+ bob.setExpirationTimeMinutesInTheFuture(10f)
+
+ val aliceUser = JwtParser.parseToken(JwtAuth.jwtToken(alice)).get().getUser
+ val bobUser = JwtParser.parseToken(JwtAuth.jwtToken(bob)).get().getUser
+
+ aliceUser.getUid shouldBe 42
+ aliceUser.getName shouldBe "alice"
+ aliceUser.getRole shouldBe UserRoleEnum.ADMIN
+ bobUser.getUid shouldBe 7
+ bobUser.getName shouldBe "bob"
+ bobUser.getRole shouldBe UserRoleEnum.REGULAR
+ }
+
+ it should "produce equivalent SessionUser objects when re-parsed multiple
times" in {
+ val token = JwtAuth.jwtToken(buildClaims())
+ val first = JwtParser.parseToken(token).get().getUser
+ val second = JwtParser.parseToken(token).get().getUser
+ first.getUid shouldBe second.getUid
+ first.getName shouldBe second.getName
+ first.getEmail shouldBe second.getEmail
+ first.getGoogleAvatar shouldBe second.getGoogleAvatar
+ first.getRole shouldBe second.getRole
+ }
+
+ /** Sign a JwtClaims payload with an arbitrary secret. Used to produce a
+ * token whose signature won't verify against the real consumer's key.
+ */
+ private def signWith(claims: JwtClaims, secret: String): String = {
+ val jws = new JsonWebSignature
+ jws.setPayload(claims.toJson)
+ jws.setAlgorithmHeaderValue(HMAC_SHA256)
+ jws.setKey(new HmacKey(secret.getBytes(StandardCharsets.UTF_8)))
+ jws.getCompactSerialization
+ }
+}