deployment_tensorrt-inference-server-v100.yaml

apiVersion: apps/v1
kind: Deployment
metadata:
  annotations:
  labels:
    app: inference-server-v100
  name: tensorrt-inference-server-v100
  namespace: trt
spec:
  progressDeadlineSeconds: 600
  replicas: 0
  revisionHistoryLimit: 10
  selector:
    matchLabels:
      app: inference-server-v100
  strategy:
    type: Recreate
  template:
    metadata:
      creationTimestamp: null
      labels:
        app: inference-server-v100
    spec:
      containers:
      - args:
        - trtserver
          #    - --model-store=gs://ailab-model-repo/model_repository
        - --allow-metrics=true
        - --log-verbose=5
        - --grpc-infer-thread-count=64
        - --grpc-stream-infer-thread-count=64
        env:
        - name: GOOGLE_APPLICATION_CREDENTIALS
          value: /var/harrisgroup-bucket-auth.json
        - name: SA
          value: kubeflow-deploy-admin
        image: nvcr.io/nvidia/tensorrtserver:20.01-py3
        imagePullPolicy: Always
        name: tensorrt-inference-server-v100
        ports:
        - containerPort: 8000
          name: http
          protocol: TCP
        - containerPort: 8001
          name: grpc
          protocol: TCP
        - containerPort: 8002
          name: metrics
          protocol: TCP
        resources:
          limits:
            cpu: "15"
            memory: 40Gi
            nvidia.com/gpu: "4"
          requests:
            cpu: "14"
            memory: 20Gi
            nvidia.com/gpu: "4"
        securityContext:
          runAsUser: 1000
        terminationMessagePath: /dev/termination-log
        terminationMessagePolicy: File
        volumeMounts:
        - mountPath: /var/harrisgroup-bucket-auth.json
          name: authkey
          subPath: harrisgroup-bucket-auth.json
      dnsPolicy: ClusterFirst
      restartPolicy: Always
      schedulerName: default-scheduler
      securityContext: {}
      terminationGracePeriodSeconds: 30
      volumes:
      - name: authkey
        secret:
          defaultMode: 420
          items:
          - key: harrisgroup-bucket-auth.json
            path: harrisgroup-bucket-auth.json
          secretName: bucket-auth
status:
  conditions:
  - lastTransitionTime: "2020-03-26T22:07:53Z"
    lastUpdateTime: "2020-03-30T14:58:22Z"
    message: ReplicaSet "tensorrt-inference-server-v100-7d5fc8fdb6" has successfully
      progressed.
    reason: NewReplicaSetAvailable
    status: "True"
    type: Progressing
  - lastTransitionTime: "2020-03-30T15:00:07Z"
    lastUpdateTime: "2020-03-30T15:00:07Z"
    message: Deployment has minimum availability.
    reason: MinimumReplicasAvailable
    status: "True"
    type: Available
  observedGeneration: 17