[ https://issues.apache.org/jira/browse/MESOS-5799?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15365484#comment-15365484 ]
Yubo Li commented on MESOS-5799: -------------------------------- docker 1.9.1 and mesos master branch synced yesterday. > docker::inspect() may get wrong output when a docker container is not in > "running" state > ---------------------------------------------------------------------------------------- > > Key: MESOS-5799 > URL: https://issues.apache.org/jira/browse/MESOS-5799 > Project: Mesos > Issue Type: Bug > Components: containerization, docker > Reporter: Kevin Klues > Assignee: Benjamin Mahler > Labels: containerizer, docker > Fix For: 1.0.0 > > > I (klueska) am copying the text from an email I got about a bug report from > Yubo Li at IBM. > docker::inspect() may get wrong output when the docker container is not in > "running" state. In this case, the "docker inspect" will failed to parse > data, and system can not enter TASK:RUNNING status. > I attached related logs in stderr, I printed the docker inspect output. The > inspected output shows that the docker is in "created" status, not "running", > so that many of inspect fields are invalid. > Possible Fix: detect the "State->Running" field, and get success return when > "State->Running" is true. > {noformat} > I0706 09:01:05.342895 2975 docker.cpp:780] Running docker -H > unix:///var/run/docker.sock run --cpu-shares 512 --memory 536870912 -e > MARATHON_APP_VERSION=2016-07-06T08:15:02.610Z -e HOST=9.186.57.67 -e > MARATHON_APP_RESOURCE_CPUS=0.5 -e MARATHON_APP_RESOURCE_GPUS=1 -e > MARATHON_APP_DOCKER_IMAGE=cuda_test_v0.1 -e PORT_10000=31435 -e > MESOS_TASK_ID=ubuntu-gpu-32520.29f083bf-4358-11e6-b886-2ee1446b5607 -e > PORT=31435 -e MARATHON_APP_RESOURCE_MEM=512.0 -e PORTS=31435 -e > MARATHON_APP_RESOURCE_DISK=0.0 -e MARATHON_APP_LABELS= -e > MARATHON_APP_ID=/ubuntu-gpu-32520 -e PORT0=31435 -e > MESOS_SANDBOX=/mnt/mesos/sandbox -e > MESOS_CONTAINER_NAME=mesos-1875c0d3-9712-43c3-9d58-572c89fac50b-S1.cfe287a0-8a37-4a0f-8ffb-55eb0e6e4439 > -v > /var/run/mesos/slaves/1875c0d3-9712-43c3-9d58-572c89fac50b-S1/frameworks/aee07017-f8e6-4ed5-8008-b4ea3a090282-0000/executors/ubuntu-gpu-32520.29f083bf-4358-11e6-b886-2ee1446b5607/runs/cfe287a0-8a37-4a0f-8ffb-55eb0e6e4439:/mnt/mesos/sandbox > --net host --device=/dev/nvidiactl:/dev/nvidiactl:rwm > --device=/dev/nvidia-uvm:/dev/nvidia-uvm:rwm > --device=/dev/nvidia0:/dev/nvidia0:rwm --entrypoint /bin/sh --name > mesos-1875c0d3-9712-43c3-9d58-572c89fac50b-S1.cfe287a0-8a37-4a0f-8ffb-55eb0e6e4439 > cuda_test_v0.1 -c nvidia-smi && sleep 60s > I0706 09:01:05.345935 2975 docker.cpp:943] Running docker -H > unix:///var/run/docker.sock inspect > mesos-1875c0d3-9712-43c3-9d58-572c89fac50b-S1.cfe287a0-8a37-4a0f-8ffb-55eb0e6e4439 > I0706 09:01:05.548992 2976 docker.cpp:249] Docker inspect: [ > { > "Id": "5a4dc17e739b60593c04abf310f2485dddea832476e83007387b612839933f5a", > "Created": "2016-07-06T09:01:05.531216924Z", > "Path": "/bin/sh", > "Args": [ > "-c", > "nvidia-smi \u0026\u0026 sleep 60s" > ], > "State": { > "Status": "created", > "Running": false, > "Paused": false, > "Restarting": false, > "OOMKilled": false, > "Dead": false, > "Pid": 0, > "ExitCode": 0, > "Error": "", > "StartedAt": "0001-01-01T00:00:00Z", > "FinishedAt": "0001-01-01T00:00:00Z" > }, > "Image": > "8cf6c8da7045ec24b1e561906dfa54ab0276753ec617e139a7b2da3ef72d245e", > "ResolvConfPath": "", > "HostnamePath": "", > "HostsPath": "", > "LogPath": "", > "Name": > "/mesos-1875c0d3-9712-43c3-9d58-572c89fac50b-S1.cfe287a0-8a37-4a0f-8ffb-55eb0e6e4439", > "RestartCount": 0, > "Driver": "aufs", > "ExecDriver": "native-0.2", > "MountLabel": "", > "ProcessLabel": "", > "AppArmorProfile": "", > "ExecIDs": null, > "HostConfig": { > "Binds": null, > "ContainerIDFile": "", > "LxcConf": null, > "Memory": 0, > "MemoryReservation": 0, > "MemorySwap": 0, > "KernelMemory": 0, > "CpuShares": 0, > "CpuPeriod": 0, > "CpusetCpus": "", > "CpusetMems": "", > "CpuQuota": 0, > "BlkioWeight": 0, > "OomKillDisable": false, > "MemorySwappiness": null, > "Privileged": false, > "PortBindings": null, > "Links": null, > "PublishAllPorts": false, > "Dns": null, > "DnsOptions": null, > "DnsSearch": null, > "ExtraHosts": null, > "VolumesFrom": null, > "Devices": null, > "NetworkMode": "", > "IpcMode": "", > "PidMode": "", > "UTSMode": "", > "CapAdd": null, > "CapDrop": null, > "GroupAdd": null, > "RestartPolicy": { > "Name": "", > "MaximumRetryCount": 0 > }, > "SecurityOpt": null, > "ReadonlyRootfs": false, > "Ulimits": null, > "LogConfig": { > "Type": "json-file", > "Config": {} > }, > "CgroupParent": "", > "ConsoleSize": [ > 0, > 0 > ], > "VolumeDriver": "" > }, > "GraphDriver": { > "Name": "aufs", > "Data": null > }, > "Mounts": [], > "Config": { > "Hostname": "5a4dc17e739b", > "Domainname": "", > "User": "", > "AttachStdin": false, > "AttachStdout": true, > "AttachStderr": true, > "Tty": false, > "OpenStdin": false, > "StdinOnce": false, > "Env": [ > "MARATHON_APP_VERSION=2016-07-06T08:15:02.610Z", > "HOST=9.186.57.67", > "MARATHON_APP_RESOURCE_CPUS=0.5", > "MARATHON_APP_RESOURCE_GPUS=1", > "MARATHON_APP_DOCKER_IMAGE=cuda_test_v0.1", > "PORT_10000=31435", > > "MESOS_TASK_ID=ubuntu-gpu-32520.29f083bf-4358-11e6-b886-2ee1446b5607", > "PORT=31435", > "MARATHON_APP_RESOURCE_MEM=512.0", > "PORTS=31435", > "MARATHON_APP_RESOURCE_DISK=0.0", > "MARATHON_APP_LABELS=", > "MARATHON_APP_ID=/ubuntu-gpu-32520", > "PORT0=31435", > "MESOS_SANDBOX=/mnt/mesos/sandbox", > > "MESOS_CONTAINER_NAME=mesos-1875c0d3-9712-43c3-9d58-572c89fac50b-S1.cfe287a0-8a37-4a0f-8ffb-55eb0e6e4439", > > "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" > ], > "Cmd": [ > "-c", > "nvidia-smi \u0026\u0026 sleep 60s" > ], > "Image": "cuda_test_v0.1", > "Volumes": null, > "WorkingDir": "", > "Entrypoint": [ > "/bin/sh" > ], > "OnBuild": null, > "Labels": {}, > "StopSignal": "SIGTERM" > }, > "NetworkSettings": { > "Bridge": "", > "SandboxID": "", > "HairpinMode": false, > "LinkLocalIPv6Address": "", > "LinkLocalIPv6PrefixLen": 0, > "Ports": null, > "SandboxKey": "", > "SecondaryIPAddresses": null, > "SecondaryIPv6Addresses": null, > "EndpointID": "", > "Gateway": "", > "GlobalIPv6Address": "", > "GlobalIPv6PrefixLen": 0, > "IPAddress": "", > "IPPrefixLen": 0, > "IPv6Gateway": "", > "MacAddress": "", > "Networks": null > } > } > ] > I0706 09:01:05.549659 2976 docker.cpp:335] Unable to detect IP Address at > 'NetworkSettings.Networks..IPAddress', attempting deprecated field > WARNING: Your kernel does not support swap limit capabilities, memory limited > without swap. > I0706 09:01:52.983609 2973 exec.cpp:486] Agent exited, but framework has > checkpointing enabled. Waiting 15mins to reconnect with agent > 1875c0d3-9712-43c3-9d58-572c89fac50b-S1 > I0706 09:02:06.057607 2978 exec.cpp:549] Executor sending status update > TASK_FINISHED (UUID: 2cff35f2-9512-4120-b912-74a82c197696) for task > ubuntu-gpu-32520.29f083bf-4358-11e6-b886-2ee1446b5607 of framework > aee07017-f8e6-4ed5-8008-b4ea3a090282-0000 > I0706 09:02:06.058717 2980 poll_socket.cpp:131] Socket error while connecting > I0706 09:02:06.058815 2980 process.cpp:1799] Failed to send > 'mesos.internal.StatusUpdateMessage' to '127.0.1.1:5051', connect: Socket > error while connecting > E0706 09:02:06.058931 2980 process.cpp:2104] Failed to shutdown socket with > fd 6: Transport endpoint is not connected > {noformat} -- This message was sent by Atlassian JIRA (v6.3.4#6332)