This is an automated email from the ASF dual-hosted git repository.
pingsutw pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/submarine.git
The following commit(s) were added to refs/heads/master by this push:
new 55e4584 SUBMARINE-730. Store tensorboard data in NFS
55e4584 is described below
commit 55e4584aaf904b4a8651775c8ed11dacb848e00e
Author: ByronHsu <[email protected]>
AuthorDate: Wed Feb 17 19:33:39 2021 +0800
SUBMARINE-730. Store tensorboard data in NFS
### What is this PR for?
This PR is the follow-up of
[SUBMARINE-717](https://github.com/apache/submarine/pull/498).
I do the following changes:
1. Move persistent storage on hostPath from `/.submarine` to
`/tmp/submarine` because of the permission problem. (Thanks to xunliu).
2. Support storing tensorboard data in NFS.
### What type of PR is it?
[Improvement]
### Todos
* [ ] - Store notebook data in nfs
### What is the Jira issue?
https://issues.apache.org/jira/projects/SUBMARINE/issues/SUBMARINE-730
### How should this be tested?
### Screenshots (if appropriate)
In nfs-server, the data is stored as the following structure:
```
[rootnfs-server-bd4ddb8d4-qps5j exports]# tree -L 2
.
|-- index.html
|-- submarine-database
| |-- auto.cnf
| |-- ca-key.pem
| |-- ca.pem
| |-- client-cert.pem
| |-- client-key.pem
| |-- dummy
| |-- ib_buffer_pool
| |-- ib_logfile0
| |-- ib_logfile1
| |-- ibdata1
| |-- ibtmp1
| |-- metastore
| |-- metastore_test
| |-- mysql
| |-- performance_schema
| |-- private_key.pem
| |-- public_key.pem
| |-- server-cert.pem
| |-- server-key.pem
| |-- submarine
| |-- submarine_test
| `-- sys
`-- submarine-tensorboard
|-- tensorflow-tensorboard-dist-mnist
`-- tensorflow-tensorboard-dist-mnist-2
```
### Questions:
* Does the licenses files need update? No
* Is there breaking changes for older versions? No
* Does this needs documentation? No
Author: ByronHsu <[email protected]>
Author: Byron <[email protected]>
Signed-off-by: Kevin <[email protected]>
Closes #504 from ByronHsu/NFS_TFBOARD and squashes the following commits:
77bdc620 [ByronHsu] merge conflict
2352a946 [ByronHsu] default should be host volume
a21369d4 [ByronHsu] store tensorboard data in nfs
e3daf696 [ByronHsu] solve conflict
3f93fb8c [ByronHsu] default is host volume
40ec1bfc [ByronHsu] remove persistent volume in nfs-server
ea2ee8a8 [ByronHsu] update path
f12b45f5 [Byron] minor fix
a9196b85 [Byron] add nfs support for mysql
---
.../submarine/templates/submarine-database.yaml | 7 +---
.../submarine/templates/submarine-tensorboard.yaml | 45 +++++++++++++---------
helm-charts/submarine/values.yaml | 10 ++++-
.../server/submitter/k8s/K8sSubmitter.java | 4 +-
.../submitter/k8s/parser/ExperimentSpecParser.java | 7 +++-
5 files changed, 44 insertions(+), 29 deletions(-)
diff --git a/helm-charts/submarine/templates/submarine-database.yaml
b/helm-charts/submarine/templates/submarine-database.yaml
index 562503c..530029b 100644
--- a/helm-charts/submarine/templates/submarine-database.yaml
+++ b/helm-charts/submarine/templates/submarine-database.yaml
@@ -23,19 +23,16 @@ spec:
- ReadWriteMany
capacity:
storage: 1Gi
-
{{- with .Values.submarine.storage }}
-
{{- if eq (.type | lower) "nfs" }}
nfs:
server: {{ .nfs.ip }}
- path: "/"
+ path: {{ .nfs.path }}
{{- else }}
hostPath:
- path: "{{ .host.root }}"
+ path: "{{ .host.path }}"
type: DirectoryOrCreate
{{- end }}
-
{{- end}}
---
apiVersion: v1
diff --git a/helm-charts/submarine/templates/submarine-tensorboard.yaml
b/helm-charts/submarine/templates/submarine-tensorboard.yaml
index ea71d00..ff32ee1 100644
--- a/helm-charts/submarine/templates/submarine-tensorboard.yaml
+++ b/helm-charts/submarine/templates/submarine-tensorboard.yaml
@@ -17,67 +17,76 @@
apiVersion: v1
kind: PersistentVolume
metadata:
- name: tensorboard-pv
+ name: submarine-tensorboard-pv
spec:
accessModes:
- ReadWriteMany
capacity:
storage: "{{ .Values.submarine.tensorboard.storage }}"
- storageClassName: standard
+{{- with .Values.submarine.storage }}
+ {{- if eq (.type | lower) "nfs" }}
+ nfs:
+ server: {{ .nfs.ip }}
+ path: {{ .nfs.path }}
+ {{- else }}
hostPath:
- path: "{{ .Values.submarine.tensorboard.hostPath }}"
+ path: "{{ .host.path }}"
+ type: DirectoryOrCreate
+ {{- end }}
+{{- end}}
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
- name: tensorboard-pvc
+ name: submarine-tensorboard-pvc
spec:
accessModes:
- ReadWriteMany
- storageClassName: standard
+ storageClassName: ""
resources:
requests:
storage: "{{ .Values.submarine.tensorboard.storage }}"
- volumeName: tensorboard-pv # bind to specific pv
+ volumeName: submarine-tensorboard-pv # bind to specific pv
---
apiVersion: apps/v1
kind: Deployment
metadata:
- name: tensorboard
+ name: submarine-tensorboard
spec:
selector:
matchLabels:
- app: tensorboard-pod
+ app: submarine-tensorboard-pod
template:
metadata:
labels:
- app: tensorboard-pod
+ app: submarine-tensorboard-pod
spec:
containers:
- - name: tensorboard-container
+ - name: submarine-tensorboard-container
image: tensorflow/tensorflow:1.11.0
command:
- "tensorboard"
- "--logdir=/logs"
- - "--path_prefix=/tensorboard"
+ - "--path_prefix={{ .Values.submarine.tensorboard.ingressPath }}"
imagePullPolicy: IfNotPresent
ports:
- containerPort: 6006
volumeMounts:
- mountPath: "/logs"
name: "volume"
+ subPath: "submarine-tensorboard"
volumes:
- name: "volume"
persistentVolumeClaim:
- claimName: "tensorboard-pvc"
+ claimName: "submarine-tensorboard-pvc"
---
apiVersion: v1
kind: Service
metadata:
- name: tensorboard-service
+ name: submarine-tensorboard-service
spec:
selector:
- app: tensorboard-pod
+ app: submarine-tensorboard-pod
ports:
- protocol: TCP
port: 8080
@@ -86,14 +95,14 @@ spec:
apiVersion: traefik.containo.us/v1alpha1
kind: IngressRoute
metadata:
- name: tensorboard-ingressroute
+ name: submarine-tensorboard-ingressroute
spec:
entryPoints:
- web
routes:
- kind: Rule
- match: "{{ .Values.submarine.tensorboard.ingressPath }}"
+ match: "PathPrefix(`{{ .Values.submarine.tensorboard.ingressPath }}`)"
services:
- kind: Service
- name: tensorboard-service
- port: 8080
+ name: submarine-tensorboard-service
+ port: 8080
\ No newline at end of file
diff --git a/helm-charts/submarine/values.yaml
b/helm-charts/submarine/values.yaml
index cbd5479..41327e2 100644
--- a/helm-charts/submarine/values.yaml
+++ b/helm-charts/submarine/values.yaml
@@ -41,5 +41,11 @@ submarine:
ip: 10.96.0.2
tensorboard:
storage: 10Gi
- hostPath: /tmp/tfboard/
- ingressPath: PathPrefix(`/tensorboard`)
+ ingressPath: "/tensorboard"
+ storage:
+ type: host # "host" or "nfs"
+ host:
+ path: "/tmp/submarine/host"
+ nfs:
+ ip: 10.96.0.2
+ path: "/"
diff --git
a/submarine-server/server-submitter/submitter-k8s/src/main/java/org/apache/submarine/server/submitter/k8s/K8sSubmitter.java
b/submarine-server/server-submitter/submitter-k8s/src/main/java/org/apache/submarine/server/submitter/k8s/K8sSubmitter.java
index 3935acf..885e94c 100644
---
a/submarine-server/server-submitter/submitter-k8s/src/main/java/org/apache/submarine/server/submitter/k8s/K8sSubmitter.java
+++
b/submarine-server/server-submitter/submitter-k8s/src/main/java/org/apache/submarine/server/submitter/k8s/K8sSubmitter.java
@@ -262,9 +262,9 @@ public class K8sSubmitter implements Submitter {
@Override
public TensorboardInfo getTensorboardInfo() throws SubmarineRuntimeException
{
- final String name = "tensorboard";
+ final String name = "submarine-tensorboard";
final String namespace = "default";
- final String ingressRouteName = "tensorboard-ingressroute";
+ final String ingressRouteName = "submarine-tensorboard-ingressroute";
try {
V1Deployment deploy = appsV1Api.readNamespacedDeploymentStatus(name,
namespace, "true");
diff --git
a/submarine-server/server-submitter/submitter-k8s/src/main/java/org/apache/submarine/server/submitter/k8s/parser/ExperimentSpecParser.java
b/submarine-server/server-submitter/submitter-k8s/src/main/java/org/apache/submarine/server/submitter/k8s/parser/ExperimentSpecParser.java
index 8b8df40..0a56b02 100644
---
a/submarine-server/server-submitter/submitter-k8s/src/main/java/org/apache/submarine/server/submitter/k8s/parser/ExperimentSpecParser.java
+++
b/submarine-server/server-submitter/submitter-k8s/src/main/java/org/apache/submarine/server/submitter/k8s/parser/ExperimentSpecParser.java
@@ -180,12 +180,15 @@ public class ExperimentSpecParser {
final String name = experimentSpec.getMeta().getName();
// volumeMount
- container.addVolumeMountsItem(new
V1VolumeMount().mountPath("/logs").name("volume").subPath(name));
+ container.addVolumeMountsItem(
+ new V1VolumeMount().mountPath("/logs").
+ name("volume").subPath("submarine-tensorboard/" + name)
+ );
// volume
V1Volume podVolume = new V1Volume().name("volume");
podVolume.setPersistentVolumeClaim(
- new
V1PersistentVolumeClaimVolumeSource().claimName("tensorboard-pvc")
+ new
V1PersistentVolumeClaimVolumeSource().claimName("submarine-tensorboard-pvc")
);
podSpec.addVolumesItem(podVolume);
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]