This is an automated email from the ASF dual-hosted git repository.

Yicong-Huang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git


The following commit(s) were added to refs/heads/main by this push:
     new 786a920796 refactor(auth): align JWT claim parsing across 
microservices and amber (#4896)
786a920796 is described below

commit 786a9207962858e37c29d726e2df290dcaf4a267
Author: Yicong Huang <[email protected]>
AuthorDate: Sun May 3 22:41:10 2026 -0700

    refactor(auth): align JWT claim parsing across microservices and amber 
(#4896)
    
    ### What changes were proposed in this PR?
    
    Two JWT parsing paths drifted apart, and the codebase had two
    `JwtConsumer` instances built with byte-identical config:
    
    - `common/auth/JwtParser` (microservices) read 5 claims and dropped
    `googleAvatar` — even though `JwtAuth.jwtClaims` writes it on every
    token.
    - `amber/.../UserAuthenticator` read those 5 plus `comment`,
    `accountCreation` — neither of which the issuer ever writes, so always
    `null` in real tokens. Also dropped `googleAvatar`.
    
    Consolidate into a single `JwtParser.claimsToSessionUser(JwtClaims):
    SessionUser` in `common/auth`. Make `JwtParser.parseToken` reuse
    `JwtAuth.jwtConsumer` (one consumer instance left in the codebase).
    Rewrite `UserAuthenticator.authenticate` as a 1-line delegate so amber
    and the microservices produce identical `SessionUser` objects from the
    same token.
    
    `UserAuthenticator` itself stays in amber — it's a Dropwizard 1.3
    `Authenticator[JwtContext, SessionUser]` adapter for the toastshaman
    library. Once amber's Dropwizard upgrade unblocks the move to the common
    `JwtAuthFilter`, this whole adapter goes away.
    
    Side robustness fix: jose4j returns `Long` for integer claims after JSON
    round-trip but `setClaim` keeps `Integer` in memory. Widening `userId`
    via `getClaimValue("userId", classOf[Number]).intValue()` makes
    directly-built and parsed claims behave the same.
    
    ### Any related issues, documentation, discussions?
    
    Closes #4895
    
    ### How was this PR tested?
    
    `JwtParserSpec` (new, 4 cases): asserts every issued claim is populated
    (including `googleAvatar`), the non-issued `User` slots stay `null`, an
    end-to-end round-trip via `JwtAuth.jwtToken -> JwtParser.parseToken`
    reconstructs the user, and a structurally invalid token returns empty.
    
    `sbt Auth/test` 11/11 green; `AccessControlService`, `ConfigService`,
    `FileService`, `ComputingUnitManagingService`,
    `WorkflowExecutionService` all compile clean; `Auth/scalafmtCheckAll`
    and `Auth/scalafixAll --check` clean.
    
    ### Was this PR authored or co-authored using generative AI tooling?
    
    Generated-by: Claude Code (Opus 4.7)
---
 .../apache/texera/web/auth/UserAuthenticator.scala |  39 +---
 .../texera/web/auth/UserAuthenticatorSpec.scala    |  78 ++++++++
 .../scala/org/apache/texera/auth/JwtParser.scala   |  62 ++++---
 .../org/apache/texera/auth/JwtParserSpec.scala     | 200 +++++++++++++++++++++
 4 files changed, 324 insertions(+), 55 deletions(-)

diff --git 
a/amber/src/main/scala/org/apache/texera/web/auth/UserAuthenticator.scala 
b/amber/src/main/scala/org/apache/texera/web/auth/UserAuthenticator.scala
index e7fe67ca10..8111e44290 100644
--- a/amber/src/main/scala/org/apache/texera/web/auth/UserAuthenticator.scala
+++ b/amber/src/main/scala/org/apache/texera/web/auth/UserAuthenticator.scala
@@ -21,48 +21,25 @@ package org.apache.texera.web.auth
 
 import com.typesafe.scalalogging.LazyLogging
 import io.dropwizard.auth.Authenticator
-import org.apache.texera.auth.SessionUser
-import org.apache.texera.dao.jooq.generated.enums.UserRoleEnum
-import org.apache.texera.dao.jooq.generated.tables.pojos.User
+import org.apache.texera.auth.{JwtParser, SessionUser}
 import org.jose4j.jwt.consumer.JwtContext
 
-import java.time.OffsetDateTime
 import java.util.Optional
 
+/** Adapter for the toastshaman Dropwizard JWT filter. The filter has already
+  * verified the signature by the time this is invoked, so the work here is
+  * pure claim extraction — delegated to [[JwtParser.claimsToSessionUser]]
+  * so amber and the microservices produce identical [[SessionUser]] objects
+  * from the same token.
+  */
 object UserAuthenticator extends Authenticator[JwtContext, SessionUser] with 
LazyLogging {
   override def authenticate(context: JwtContext): Optional[SessionUser] = {
-    // This method will be called once the token's signature has been verified,
-    // including the token secret and the expiration time
     try {
-      val userName = context.getJwtClaims.getSubject
-      val email = 
context.getJwtClaims.getClaimValue("email").asInstanceOf[String]
-      val userId = 
context.getJwtClaims.getClaimValue("userId").asInstanceOf[Long].toInt
-      val role =
-        
UserRoleEnum.valueOf(context.getJwtClaims.getClaimValue("role").asInstanceOf[String])
-      val googleId = 
context.getJwtClaims.getClaimValue("googleId").asInstanceOf[String]
-      val comment = 
context.getJwtClaims.getClaimValue("comment").asInstanceOf[String]
-      val accountCreation =
-        
context.getJwtClaims.getClaimValue("accountCreation").asInstanceOf[OffsetDateTime]
-      val user =
-        new User(
-          userId,
-          userName,
-          email,
-          null,
-          googleId,
-          null,
-          role,
-          comment,
-          accountCreation,
-          null,
-          null
-        )
-      Optional.of(new SessionUser(user))
+      Optional.of(JwtParser.claimsToSessionUser(context.getJwtClaims))
     } catch {
       case e: Exception =>
         logger.error("Failed to authenticate the JwtContext", e)
         Optional.empty()
     }
-
   }
 }
diff --git 
a/amber/src/test/scala/org/apache/texera/web/auth/UserAuthenticatorSpec.scala 
b/amber/src/test/scala/org/apache/texera/web/auth/UserAuthenticatorSpec.scala
new file mode 100644
index 0000000000..d185669caa
--- /dev/null
+++ 
b/amber/src/test/scala/org/apache/texera/web/auth/UserAuthenticatorSpec.scala
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.web.auth
+
+import org.apache.texera.auth.JwtAuth
+import org.apache.texera.dao.jooq.generated.enums.UserRoleEnum
+import org.jose4j.jwt.JwtClaims
+import org.jose4j.jwt.consumer.JwtContext
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
+
+class UserAuthenticatorSpec extends AnyFlatSpec with Matchers {
+
+  // Mirror exactly what JwtAuth.jwtClaims would write at issue time, so
+  // the spec doubles as a contract check between the issuer and the
+  // amber-side authenticator.
+  private def buildClaims(): JwtClaims = {
+    val claims = new JwtClaims
+    claims.setSubject("alice")
+    claims.setClaim("userId", 42)
+    claims.setClaim("googleId", "g-123")
+    claims.setClaim("email", "[email protected]")
+    claims.setClaim("role", UserRoleEnum.ADMIN.name)
+    claims.setClaim("googleAvatar", "avatar-blob")
+    claims.setExpirationTimeMinutesInTheFuture(10f)
+    claims
+  }
+
+  // Run a token through the production consumer to get a real JwtContext —
+  // matches what the toastshaman filter hands the authenticator at runtime.
+  private def contextFor(claims: JwtClaims): JwtContext =
+    JwtAuth.jwtConsumer.process(JwtAuth.jwtToken(claims))
+
+  "UserAuthenticator.authenticate" should "delegate to JwtParser and return a 
populated SessionUser" in {
+    val result = UserAuthenticator.authenticate(contextFor(buildClaims()))
+    result.isPresent shouldBe true
+    val u = result.get().getUser
+    u.getUid shouldBe 42
+    u.getName shouldBe "alice"
+    u.getEmail shouldBe "[email protected]"
+    u.getGoogleId shouldBe "g-123"
+    u.getGoogleAvatar shouldBe "avatar-blob"
+    u.getRole shouldBe UserRoleEnum.ADMIN
+  }
+
+  it should "return empty when a required custom claim (userId) is missing" in 
{
+    val claims = new JwtClaims
+    claims.setSubject("alice")
+    claims.setClaim("role", UserRoleEnum.ADMIN.name)
+    claims.setExpirationTimeMinutesInTheFuture(10f)
+    UserAuthenticator.authenticate(contextFor(claims)).isPresent shouldBe false
+  }
+
+  it should "return empty when a required custom claim (role) is missing" in {
+    val claims = new JwtClaims
+    claims.setSubject("alice")
+    claims.setClaim("userId", 42)
+    claims.setExpirationTimeMinutesInTheFuture(10f)
+    UserAuthenticator.authenticate(contextFor(claims)).isPresent shouldBe false
+  }
+}
diff --git a/common/auth/src/main/scala/org/apache/texera/auth/JwtParser.scala 
b/common/auth/src/main/scala/org/apache/texera/auth/JwtParser.scala
index 79e331cc64..bb139e7093 100644
--- a/common/auth/src/main/scala/org/apache/texera/auth/JwtParser.scala
+++ b/common/auth/src/main/scala/org/apache/texera/auth/JwtParser.scala
@@ -20,41 +20,25 @@
 package org.apache.texera.auth
 
 import com.typesafe.scalalogging.LazyLogging
-import org.apache.texera.config.AuthConfig
 import org.apache.texera.dao.jooq.generated.enums.UserRoleEnum
 import org.apache.texera.dao.jooq.generated.tables.pojos.User
 import org.jose4j.jwt.JwtClaims
-import org.jose4j.jwt.consumer.{JwtConsumer, JwtConsumerBuilder}
-import org.jose4j.keys.HmacKey
 import org.jose4j.lang.UnresolvableKeyException
 
-import java.nio.charset.StandardCharsets
 import java.util.Optional
 
+/** Single source of truth for converting a verified JWT into a 
[[SessionUser]].
+  *
+  * Verification reuses [[JwtAuth.jwtConsumer]] (same secret, same clock-skew
+  * config). The claim set extracted here mirrors what [[JwtAuth.jwtClaims]]
+  * writes when issuing a token.
+  */
 object JwtParser extends LazyLogging {
 
-  private val TOKEN_SECRET = AuthConfig.jwtSecretKey
-
-  private val jwtConsumer: JwtConsumer = new JwtConsumerBuilder()
-    .setAllowedClockSkewInSeconds(30)
-    .setRequireExpirationTime()
-    .setRequireSubject()
-    .setVerificationKey(new 
HmacKey(TOKEN_SECRET.getBytes(StandardCharsets.UTF_8)))
-    .setRelaxVerificationKeyValidation()
-    .build()
-
+  /** Verify and parse a Bearer token string. */
   def parseToken(token: String): Optional[SessionUser] = {
     try {
-      val jwtClaims: JwtClaims = jwtConsumer.processToClaims(token)
-      val userName = jwtClaims.getSubject
-      val email = jwtClaims.getClaimValue("email", classOf[String])
-      val userId = jwtClaims.getClaimValue("userId").asInstanceOf[Long].toInt
-      val role = 
UserRoleEnum.valueOf(jwtClaims.getClaimValue("role").asInstanceOf[String])
-      val googleId = jwtClaims.getClaimValue("googleId", classOf[String])
-
-      val user =
-        new User(userId, userName, email, null, googleId, null, role, null, 
null, null, null)
-      Optional.of(new SessionUser(user))
+      
Optional.of(claimsToSessionUser(JwtAuth.jwtConsumer.processToClaims(token)))
     } catch {
       case _: UnresolvableKeyException =>
         logger.error("Invalid JWT Signature")
@@ -64,4 +48,34 @@ object JwtParser extends LazyLogging {
         Optional.empty()
     }
   }
+
+  /** Build a [[SessionUser]] from already-verified claims. Used by both
+    * [[parseToken]] (which verifies then calls this) and amber's
+    * `UserAuthenticator` (which the toastshaman filter calls after its own
+    * signature verification).
+    */
+  def claimsToSessionUser(claims: JwtClaims): SessionUser = {
+    val userName = claims.getSubject
+    val email = claims.getClaimValue("email", classOf[String])
+    // jose4j returns Long after JSON round-trip but the original setClaim
+    // call writes Integer; widen via Number to handle both cases.
+    val userId = claims.getClaimValue("userId", classOf[Number]).intValue()
+    val role = 
UserRoleEnum.valueOf(claims.getClaimValue("role").asInstanceOf[String])
+    val googleId = claims.getClaimValue("googleId", classOf[String])
+    val googleAvatar = claims.getClaimValue("googleAvatar", classOf[String])
+    val user = new User(
+      userId,
+      userName,
+      email,
+      null,
+      googleId,
+      googleAvatar,
+      role,
+      null,
+      null,
+      null,
+      null
+    )
+    new SessionUser(user)
+  }
 }
diff --git 
a/common/auth/src/test/scala/org/apache/texera/auth/JwtParserSpec.scala 
b/common/auth/src/test/scala/org/apache/texera/auth/JwtParserSpec.scala
new file mode 100644
index 0000000000..dc91de4d64
--- /dev/null
+++ b/common/auth/src/test/scala/org/apache/texera/auth/JwtParserSpec.scala
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.auth
+
+import org.apache.texera.dao.jooq.generated.enums.UserRoleEnum
+import org.apache.texera.dao.jooq.generated.tables.pojos.User
+import org.jose4j.jws.AlgorithmIdentifiers.HMAC_SHA256
+import org.jose4j.jws.JsonWebSignature
+import org.jose4j.jwt.JwtClaims
+import org.jose4j.keys.HmacKey
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
+
+import java.nio.charset.StandardCharsets
+
+class JwtParserSpec extends AnyFlatSpec with Matchers {
+
+  private def buildClaims(): JwtClaims = {
+    // Mirror exactly what JwtAuth.jwtClaims would write at issue time, so
+    // this spec doubles as a contract test between the issuer and parser.
+    val claims = new JwtClaims
+    claims.setSubject("alice")
+    claims.setClaim("userId", 42)
+    claims.setClaim("googleId", "g-123")
+    claims.setClaim("email", "[email protected]")
+    claims.setClaim("role", UserRoleEnum.ADMIN.name)
+    claims.setClaim("googleAvatar", "avatar-blob")
+    claims.setExpirationTimeMinutesInTheFuture(10f)
+    claims
+  }
+
+  "JwtParser.claimsToSessionUser" should "populate every issued claim 
including googleAvatar" in {
+    val user: User = JwtParser.claimsToSessionUser(buildClaims()).getUser
+    user.getUid shouldBe 42
+    user.getName shouldBe "alice"
+    user.getEmail shouldBe "[email protected]"
+    user.getGoogleId shouldBe "g-123"
+    user.getGoogleAvatar shouldBe "avatar-blob"
+    user.getRole shouldBe UserRoleEnum.ADMIN
+  }
+
+  it should "leave non-issued slots null (password, comment, accountCreation, 
affiliation, joiningReason)" in {
+    val user: User = JwtParser.claimsToSessionUser(buildClaims()).getUser
+    user.getPassword shouldBe null
+    user.getComment shouldBe null
+    user.getAccountCreationTime shouldBe null
+    user.getAffiliation shouldBe null
+    user.getJoiningReason shouldBe null
+  }
+
+  it should "round-trip a token issued by JwtAuth.jwtToken" in {
+    val token = JwtAuth.jwtToken(buildClaims())
+    val parsed = JwtParser.parseToken(token)
+    parsed.isPresent shouldBe true
+    val u = parsed.get().getUser
+    u.getUid shouldBe 42
+    u.getGoogleAvatar shouldBe "avatar-blob"
+  }
+
+  "JwtParser.parseToken" should "return empty on a structurally invalid token" 
in {
+    JwtParser.parseToken("not-a-real-jwt").isPresent shouldBe false
+  }
+
+  it should "return empty when the token is signed with the wrong secret" in {
+    val token = signWith(buildClaims(), 
"definitely-not-the-real-secret-for-testing-only")
+    JwtParser.parseToken(token).isPresent shouldBe false
+  }
+
+  it should "return empty when the token is expired" in {
+    val claims = new JwtClaims
+    claims.setSubject("alice")
+    claims.setClaim("userId", 42)
+    claims.setClaim("role", UserRoleEnum.ADMIN.name)
+    claims.setExpirationTimeMinutesInTheFuture(-10f) // expired 10 minutes ago
+    val token = JwtAuth.jwtToken(claims)
+    JwtParser.parseToken(token).isPresent shouldBe false
+  }
+
+  it should "return empty when the token has no subject claim" in {
+    val claims = new JwtClaims
+    claims.setClaim("userId", 42)
+    claims.setClaim("role", UserRoleEnum.ADMIN.name)
+    claims.setExpirationTimeMinutesInTheFuture(10f)
+    val token = JwtAuth.jwtToken(claims)
+    JwtParser.parseToken(token).isPresent shouldBe false
+  }
+
+  it should "return empty when the token has no exp claim" in {
+    // The shared consumer is built with setRequireExpirationTime(); a token
+    // missing exp must be rejected even if the signature is valid.
+    val claims = new JwtClaims
+    claims.setSubject("alice")
+    claims.setClaim("userId", 42)
+    claims.setClaim("role", UserRoleEnum.ADMIN.name)
+    val token = JwtAuth.jwtToken(claims)
+    JwtParser.parseToken(token).isPresent shouldBe false
+  }
+
+  it should "still accept a token expired within the 30s clock-skew window" in 
{
+    val claims = buildClaims()
+    // 5 seconds ago — well inside setAllowedClockSkewInSeconds(30).
+    claims.setExpirationTimeMinutesInTheFuture(-0.083f)
+    val token = JwtAuth.jwtToken(claims)
+    JwtParser.parseToken(token).isPresent shouldBe true
+  }
+
+  it should "reject a token expired beyond the 30s clock-skew window" in {
+    val claims = buildClaims()
+    // 90 seconds ago — past the 30s allowance.
+    claims.setExpirationTimeMinutesInTheFuture(-1.5f)
+    val token = JwtAuth.jwtToken(claims)
+    JwtParser.parseToken(token).isPresent shouldBe false
+  }
+
+  it should "return empty when the signature segment is tampered" in {
+    val token = JwtAuth.jwtToken(buildClaims())
+    val parts = token.split('.')
+    parts.length shouldBe 3
+    val tampered = s"${parts(0)}.${parts(1)}.${parts(2).reverse}"
+    JwtParser.parseToken(tampered).isPresent shouldBe false
+  }
+
+  it should "return empty when the payload segment is tampered" in {
+    // Re-base64 a claim with a different userId; the signature in parts(2)
+    // covers parts(0).parts(1), so the rebuilt token won't verify.
+    val token = JwtAuth.jwtToken(buildClaims())
+    val parts = token.split('.')
+    val swappedClaims = new JwtClaims
+    swappedClaims.setSubject("mallory")
+    swappedClaims.setClaim("userId", 99999)
+    swappedClaims.setClaim("role", UserRoleEnum.ADMIN.name)
+    swappedClaims.setExpirationTimeMinutesInTheFuture(10f)
+    val rebuiltPayload = 
java.util.Base64.getUrlEncoder.withoutPadding.encodeToString(
+      swappedClaims.toJson.getBytes(StandardCharsets.UTF_8)
+    )
+    val tampered = s"${parts(0)}.$rebuiltPayload.${parts(2)}"
+    JwtParser.parseToken(tampered).isPresent shouldBe false
+  }
+
+  "JwtParser life cycle" should "round-trip distinct users without state 
leakage" in {
+    val alice = buildClaims()
+    val bob = new JwtClaims
+    bob.setSubject("bob")
+    bob.setClaim("userId", 7)
+    bob.setClaim("googleId", "g-bob")
+    bob.setClaim("email", "[email protected]")
+    bob.setClaim("role", UserRoleEnum.REGULAR.name)
+    bob.setClaim("googleAvatar", "bob-avatar")
+    bob.setExpirationTimeMinutesInTheFuture(10f)
+
+    val aliceUser = JwtParser.parseToken(JwtAuth.jwtToken(alice)).get().getUser
+    val bobUser = JwtParser.parseToken(JwtAuth.jwtToken(bob)).get().getUser
+
+    aliceUser.getUid shouldBe 42
+    aliceUser.getName shouldBe "alice"
+    aliceUser.getRole shouldBe UserRoleEnum.ADMIN
+    bobUser.getUid shouldBe 7
+    bobUser.getName shouldBe "bob"
+    bobUser.getRole shouldBe UserRoleEnum.REGULAR
+  }
+
+  it should "produce equivalent SessionUser objects when re-parsed multiple 
times" in {
+    val token = JwtAuth.jwtToken(buildClaims())
+    val first = JwtParser.parseToken(token).get().getUser
+    val second = JwtParser.parseToken(token).get().getUser
+    first.getUid shouldBe second.getUid
+    first.getName shouldBe second.getName
+    first.getEmail shouldBe second.getEmail
+    first.getGoogleAvatar shouldBe second.getGoogleAvatar
+    first.getRole shouldBe second.getRole
+  }
+
+  /** Sign a JwtClaims payload with an arbitrary secret. Used to produce a
+    * token whose signature won't verify against the real consumer's key.
+    */
+  private def signWith(claims: JwtClaims, secret: String): String = {
+    val jws = new JsonWebSignature
+    jws.setPayload(claims.toJson)
+    jws.setAlgorithmHeaderValue(HMAC_SHA256)
+    jws.setKey(new HmacKey(secret.getBytes(StandardCharsets.UTF_8)))
+    jws.getCompactSerialization
+  }
+}

Reply via email to