[ 
https://issues.apache.org/jira/browse/FLINK-3544?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15206128#comment-15206128
 ] 

ASF GitHub Bot commented on FLINK-3544:
---------------------------------------

Github user mxm commented on a diff in the pull request:

    https://github.com/apache/flink/pull/1741#discussion_r56963220
  
    --- Diff: 
flink-runtime/src/main/scala/org/apache/flink/runtime/jobmanager/JobManager.scala
 ---
    @@ -312,59 +323,125 @@ class JobManager(
     
           leaderSessionID = None
     
    -    case RegisterTaskManager(
    -      connectionInfo,
    -      hardwareInformation,
    -      numberOfSlots) =>
    +    case msg: RegisterResourceManager =>
    +      log.debug(s"Resource manager registration: $msg")
    +
    +      // ditch current resource manager (if any)
    +      currentResourceManager = Option(msg.resourceManager())
    +
    +      val taskManagerResources = 
instanceManager.getAllRegisteredInstances.asScala.map(
    +        instance => instance.getResourceId).toList.asJava
    +
    +      // confirm registration and send known task managers with their 
resource ids
    +      sender ! decorateMessage(new RegisterResourceManagerSuccessful(self, 
taskManagerResources))
    +
    +    case msg: DisconnectResourceManager =>
    +      log.debug(s"Resource manager disconnect: $msg")
    +
    +      currentResourceManager match {
    +        case Some(rm) if rm.equals(msg.resourceManager()) =>
    +          // we should ditch the current resource manager
    +          log.debug(s"Disconnecting resource manager $rm.")
    +          // send the old one a disconnect message
    +          rm ! decorateMessage(new TriggerRegistrationAtJobManager(self))
    +          currentResourceManager = None
    +        case None =>
    +          // not connected, thus ignoring this message
    +          log.warn(s"No resource manager ${msg.resourceManager()} 
connected. Can't disconnect.")
    +      }
    +
    +    case msg @ RegisterTaskManager(
    +          resourceId,
    +          connectionInfo,
    +          hardwareInformation,
    +          numberOfSlots) =>
    +      // we are being informed by the ResourceManager that a new task 
manager is available
    +      log.debug(s"RegisterTaskManager: $msg")
     
           val taskManager = sender()
     
    +      currentResourceManager match {
    +        case Some(rm) =>
    +          val future = (rm ? decorateMessage(new 
RegisterResource(taskManager, msg)))(timeout)
    +          future.onComplete {
    +            case scala.util.Success(response) =>
    +              // the resource manager is available and answered
    +              self ! response
    +            case scala.util.Failure(t) =>
    +              // slow or unreachable resource manager, register anyway and 
let the rm reconnect
    +              self ! decorateMessage(new 
RegisterResourceSuccessful(taskManager, msg))
    +              self ! decorateMessage(new DisconnectResourceManager(rm))
    +          }(context.dispatcher)
    +
    +        case None =>
    +          log.info("Task Manager Registration but not connected to 
ResourceManager")
    +          // ResourceManager not yet available
    +          // sending task manager information later upon ResourceManager 
registration
    +          self ! decorateMessage(new 
RegisterResourceSuccessful(taskManager, msg))
    +      }
    +
    +    case msg: RegisterResourceSuccessful =>
    +
    +      val originalMsg = msg.getRegistrationMessage
    +      val taskManager = msg.getTaskManager
    +
    +      // ResourceManager knows about the resource, now let's try to 
register TaskManager
           if (instanceManager.isRegistered(taskManager)) {
             val instanceID = 
instanceManager.getRegisteredInstance(taskManager).getId
     
    -        // IMPORTANT: Send the response to the "sender", which is not the
    -        //            TaskManager actor, but the ask future!
    -        sender() ! decorateMessage(
    +        taskManager ! decorateMessage(
               AlreadyRegistered(
                 instanceID,
    -            libraryCacheManager.getBlobServerPort)
    -        )
    -      }
    -      else {
    +            libraryCacheManager.getBlobServerPort))
    +      } else {
             try {
               val instanceID = instanceManager.registerTaskManager(
                 taskManager,
    -            connectionInfo,
    -            hardwareInformation,
    -            numberOfSlots,
    +            originalMsg.resourceId,
    +            originalMsg.connectionInfo,
    +            originalMsg.resources,
    +            originalMsg.numberOfSlots,
                 leaderSessionID.orNull)
     
    -          // IMPORTANT: Send the response to the "sender", which is not the
    -          //            TaskManager actor, but the ask future!
    -          sender() ! decorateMessage(
    -            AcknowledgeRegistration(
    -              instanceID,
    -              libraryCacheManager.getBlobServerPort)
    -          )
    +          taskManager ! decorateMessage(
    +            AcknowledgeRegistration(instanceID, 
libraryCacheManager.getBlobServerPort))
     
               // to be notified when the taskManager is no longer reachable
               context.watch(taskManager)
    -        }
    -        catch {
    +        } catch {
               // registerTaskManager throws an IllegalStateException if it is 
already shut down
               // let the actor crash and restart itself in this case
               case e: Exception =>
                 log.error("Failed to register TaskManager at instance 
manager", e)
     
    -            // IMPORTANT: Send the response to the "sender", which is not 
the
    -            //            TaskManager actor, but the ask future!
    -            sender() ! decorateMessage(
    +            taskManager ! decorateMessage(
                   RefuseRegistration(
    -                ExceptionUtils.stringifyException(e))
    -            )
    +                ExceptionUtils.stringifyException(e)))
             }
           }
     
    +    case msg: RegisterResourceFailed =>
    +
    +      val taskManager = msg.getTaskManager
    +      val resourceId = msg.getResourceID
    +      log.warn(s"TaskManager's resource id $resourceId is not registered 
with ResourceManager. " +
    +        s"Refusing registration.")
    +
    +      taskManager ! decorateMessage(
    +        RefuseRegistration(
    +          ExceptionUtils.stringifyException(new IllegalStateException(
    --- End diff --
    
    All I'm saying is that I'm not perfectly aware of subtle bugs this change 
would introduce. It seems like String was deliberately chosen to avoid 
ClassNotFoundExceptions. (Think of different Flink versions talking to each 
other) Thus, I'd rather not change it.


> ResourceManager runtime components
> ----------------------------------
>
>                 Key: FLINK-3544
>                 URL: https://issues.apache.org/jira/browse/FLINK-3544
>             Project: Flink
>          Issue Type: Sub-task
>          Components: ResourceManager
>    Affects Versions: 1.1.0
>            Reporter: Maximilian Michels
>            Assignee: Maximilian Michels
>             Fix For: 1.1.0
>
>




--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to