Craig Condit created YUNIKORN-1228:
--------------------------------------

             Summary: Race condition when serializing K8s objects
                 Key: YUNIKORN-1228
                 URL: https://issues.apache.org/jira/browse/YUNIKORN-1228
             Project: Apache YuniKorn
          Issue Type: Bug
          Components: shim - kubernetes
            Reporter: Craig Condit
            Assignee: Craig Condit


A race was recently uncovered during testing:

 
{noformat}
==================
WARNING: DATA RACE
Read at 0x00c000581810 by goroutine 46:
  k8s.io/apimachinery/pkg/apis/meta/v1.(*TypeMeta).GroupVersionKind()
      
/home/testuser/go/pkg/mod/k8s.io/apimachinery@v0.20.11/pkg/apis/meta/v1/meta.go:128
 +0x64
  k8s.io/client-go/tools/reference.GetReference()
      
/home/testuser/go/pkg/mod/k8s.io/client-go@v0.20.11/tools/reference/ref.go:59 
+0x17d
  k8s.io/client-go/tools/events.(*recorderImpl).Eventf()
      
/home/testuser/go/pkg/mod/k8s.io/client-go@v0.20.11/tools/events/event_recorder.go:46
 +0x119
  
github.com/apache/yunikorn-k8shim/pkg/plugin/predicates.(*predicateManagerImpl).predicatesAllocate()
      
/home/testuser/repos/incubator-yunikorn-k8shim/pkg/plugin/predicates/predicate_manager.go:80
 +0x36f
  
github.com/apache/yunikorn-k8shim/pkg/plugin/predicates.(*predicateManagerImpl).Predicates()
      
/home/testuser/repos/incubator-yunikorn-k8shim/pkg/plugin/predicates/predicate_manager.go:64
 +0x52
  github.com/apache/yunikorn-k8shim/pkg/cache.(*Context).IsPodFitNode()
      /home/testuser/repos/incubator-yunikorn-k8shim/pkg/cache/context.go:341 
+0x241
  github.com/apache/yunikorn-k8shim/pkg/callback.(*AsyncRMCallback).Predicates()
      
/home/testuser/repos/incubator-yunikorn-k8shim/pkg/callback/scheduler_callback.go:187
 +0xb7
  github.com/apache/yunikorn-core/pkg/scheduler/objects.(*Node).preConditions()
      
/home/testuser/repos/incubator-yunikorn-core/pkg/scheduler/objects/node.go:386 
+0x1c7
  
github.com/apache/yunikorn-core/pkg/scheduler/objects.(*Node).preAllocateConditions()
      
/home/testuser/repos/incubator-yunikorn-core/pkg/scheduler/objects/node.go:368 
+0xe4
  github.com/apache/yunikorn-core/pkg/scheduler/objects.(*Application).tryNode()
      
/home/testuser/repos/incubator-yunikorn-core/pkg/scheduler/objects/application.go:1190
 +0xe7
  
github.com/apache/yunikorn-core/pkg/scheduler/objects.(*Application).tryNodes()
      
/home/testuser/repos/incubator-yunikorn-core/pkg/scheduler/objects/application.go:1112
 +0x7c4
  
github.com/apache/yunikorn-core/pkg/scheduler/objects.(*Application).tryAllocate()
      
/home/testuser/repos/incubator-yunikorn-core/pkg/scheduler/objects/application.go:849
 +0x7a4
  github.com/apache/yunikorn-core/pkg/scheduler/objects.(*Queue).TryAllocate()
      
/home/testuser/repos/incubator-yunikorn-core/pkg/scheduler/objects/queue.go:1070
 +0x18c
  github.com/apache/yunikorn-core/pkg/scheduler/objects.(*Queue).TryAllocate()
      
/home/testuser/repos/incubator-yunikorn-core/pkg/scheduler/objects/queue.go:1082
 +0xf7
  
github.com/apache/yunikorn-core/pkg/scheduler.(*PartitionContext).tryAllocate()
      
/home/testuser/repos/incubator-yunikorn-core/pkg/scheduler/partition.go:831 
+0x15c
  github.com/apache/yunikorn-core/pkg/scheduler.(*ClusterContext).schedule()
      /home/testuser/repos/incubator-yunikorn-core/pkg/scheduler/context.go:137 
+0x1b6
  github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).internalSchedule()
      
/home/testuser/repos/incubator-yunikorn-core/pkg/scheduler/scheduler.go:77 +0x47
  
github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).StartService.func2()
      
/home/testuser/repos/incubator-yunikorn-core/pkg/scheduler/scheduler.go:67 
+0x39Previous write at 0x00c000581810 by goroutine 47:
  k8s.io/apimachinery/pkg/apis/meta/v1.(*TypeMeta).SetGroupVersionKind()
      
/home/testuser/go/pkg/mod/k8s.io/apimachinery@v0.20.11/pkg/apis/meta/v1/meta.go:123
 +0x190
  k8s.io/apimachinery/pkg/runtime.WithVersionEncoder.Encode()
      
/home/testuser/go/pkg/mod/k8s.io/apimachinery@v0.20.11/pkg/runtime/helper.go:241
 +0x408
  k8s.io/apimachinery/pkg/runtime.(*WithVersionEncoder).Encode()
      <autogenerated>:1 +0xfb
  k8s.io/apimachinery/pkg/runtime.Encode()
      
/home/testuser/go/pkg/mod/k8s.io/apimachinery@v0.20.11/pkg/runtime/codec.go:50 
+0xb3
  k8s.io/client-go/rest.(*Request).Body()
      /home/testuser/go/pkg/mod/k8s.io/client-go@v0.20.11/rest/request.go:453 
+0x87c
  k8s.io/client-go/kubernetes/typed/core/v1.(*pods).UpdateStatus()
      
/home/testuser/go/pkg/mod/k8s.io/client-go@v0.20.11/kubernetes/typed/core/v1/pod.go:152
 +0x2ec
  github.com/apache/yunikorn-k8shim/pkg/cache.(*Context).updatePodCondition()
      /home/testuser/repos/incubator-yunikorn-k8shim/pkg/cache/context.go:821 
+0x83c
  
github.com/apache/yunikorn-k8shim/pkg/cache.(*Context).HandleContainerStateUpdate()
      /home/testuser/repos/incubator-yunikorn-k8shim/pkg/cache/context.go:858 
+0x2c6
  
github.com/apache/yunikorn-k8shim/pkg/callback.(*AsyncRMCallback).UpdateContainerSchedulingState()
      
/home/testuser/repos/incubator-yunikorn-k8shim/pkg/callback/scheduler_callback.go:198
 +0x45
  
github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).inspectOutstandingRequests()
      
/home/testuser/repos/incubator-yunikorn-core/pkg/scheduler/scheduler.go:179 
+0x535
  
github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).internalInspectOutstandingRequests()
      
/home/testuser/repos/incubator-yunikorn-core/pkg/scheduler/scheduler.go:94 +0x38
  
github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).StartService.func3()
      
/home/testuser/repos/incubator-yunikorn-core/pkg/scheduler/scheduler.go:68 
+0x39Goroutine 46 (running) created at:
  github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).StartService()
      
/home/testuser/repos/incubator-yunikorn-core/pkg/scheduler/scheduler.go:67 
+0x384
  
github.com/apache/yunikorn-core/pkg/entrypoint.startAllServicesWithParameters()
      
/home/testuser/repos/incubator-yunikorn-core/pkg/entrypoint/entrypoint.go:90 
+0x624
  github.com/apache/yunikorn-core/pkg/entrypoint.StartAllServices()
      
/home/testuser/repos/incubator-yunikorn-core/pkg/entrypoint/entrypoint.go:44 
+0x4f
  github.com/apache/yunikorn-core/pkg/entrypoint.StartAllServicesWithLogger()
      
/home/testuser/repos/incubator-yunikorn-core/pkg/entrypoint/entrypoint.go:55 
+0x3b
  main.main()
      /home/testuser/repos/incubator-yunikorn-k8shim/pkg/cmd/shim/main.go:50 
+0x4c4Goroutine 47 (running) created at:
  github.com/apache/yunikorn-core/pkg/scheduler.(*Scheduler).StartService()
      
/home/testuser/repos/incubator-yunikorn-core/pkg/scheduler/scheduler.go:68 
+0x3f0
  
github.com/apache/yunikorn-core/pkg/entrypoint.startAllServicesWithParameters()
      
/home/testuser/repos/incubator-yunikorn-core/pkg/entrypoint/entrypoint.go:90 
+0x624
  github.com/apache/yunikorn-core/pkg/entrypoint.StartAllServices()
      
/home/testuser/repos/incubator-yunikorn-core/pkg/entrypoint/entrypoint.go:44 
+0x4f
  github.com/apache/yunikorn-core/pkg/entrypoint.StartAllServicesWithLogger()
      
/home/testuser/repos/incubator-yunikorn-core/pkg/entrypoint/entrypoint.go:55 
+0x3b
  main.main()
      /home/testuser/repos/incubator-yunikorn-k8shim/pkg/cmd/shim/main.go:50 
+0x4c4
=================={noformat}
This seems to be due to the serialization that K8s does whens ending objects 
over the wire. If the version component of a GroupKindVersion object (which is 
a member of every K8s object) is not set, it is updated in-line. This 
transforms what would seemingly be a read operation into a read-write 
operation. We need to create defensive copies before calling methods which will 
serialize a K8s object over the wire, including event generation and updates to 
nodes or pods.



--
This message was sent by Atlassian Jira
(v8.20.7#820007)

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@yunikorn.apache.org
For additional commands, e-mail: dev-h...@yunikorn.apache.org

Reply via email to