Skip to content

Commit

Permalink
[Test] Use GcsFaultToleranceOptions in test and backward compatibility (
Browse files Browse the repository at this point in the history
  • Loading branch information
fscnick authored Feb 7, 2025
1 parent 4b27b44 commit 165a351
Show file tree
Hide file tree
Showing 5 changed files with 240 additions and 46 deletions.
198 changes: 198 additions & 0 deletions ray-operator/config/samples/ray-cluster.deprecate-gcs-ft.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
apiVersion: ray.io/v1
kind: RayCluster
metadata:
annotations:
ray.io/ft-enabled: "true" # enable Ray GCS FT
# In most cases, you don't need to set `ray.io/external-storage-namespace` because KubeRay will
# automatically set it to the UID of RayCluster. Only modify this annotation if you fully understand
# the behaviors of the Ray GCS FT and RayService to avoid misconfiguration.
# [Example]:
# ray.io/external-storage-namespace: "my-raycluster-storage"
name: raycluster-external-redis
spec:
rayVersion: '2.41.0'
headGroupSpec:
# The `rayStartParams` are used to configure the `ray start` command.
# See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay.
# See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`.
rayStartParams:
# Setting "num-cpus: 0" to avoid any Ray actors or tasks being scheduled on the Ray head Pod.
num-cpus: "0"
# redis-password should match "requirepass" in redis.conf in the ConfigMap above.
# Ray 2.3.0 changes the default redis password from "5241590000000000" to "".
redis-password: $REDIS_PASSWORD
# Pod template
template:
spec:
containers:
- name: ray-head
image: rayproject/ray:2.41.0
resources:
limits:
cpu: "1"
requests:
cpu: "1"
env:
# Ray will read the RAY_REDIS_ADDRESS environment variable to establish
# a connection with the Redis server. In this instance, we use the "redis"
# Kubernetes ClusterIP service name, also created by this YAML, as the
# connection point to the Redis server.
- name: RAY_REDIS_ADDRESS
value: redis:6379
# This environment variable is used in the `rayStartParams` above.
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: redis-password-secret
key: password
ports:
- containerPort: 6379
name: redis
- containerPort: 8265
name: dashboard
- containerPort: 10001
name: client
volumeMounts:
- mountPath: /tmp/ray
name: ray-logs
- mountPath: /home/ray/samples
name: ray-example-configmap
volumes:
- name: ray-logs
emptyDir: {}
- name: ray-example-configmap
configMap:
name: ray-example
defaultMode: 0777
items:
- key: detached_actor.py
path: detached_actor.py
- key: increment_counter.py
path: increment_counter.py
workerGroupSpecs:
# the pod replicas in this group typed worker
- replicas: 1
minReplicas: 1
maxReplicas: 10
groupName: small-group
# The `rayStartParams` are used to configure the `ray start` command.
# See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay.
# See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`.
rayStartParams: {}
# Pod template
template:
spec:
containers:
- name: ray-worker
image: rayproject/ray:2.41.0
volumeMounts:
- mountPath: /tmp/ray
name: ray-logs
resources:
limits:
cpu: "1"
requests:
cpu: "1"
volumes:
- name: ray-logs
emptyDir: {}
---
kind: ConfigMap
apiVersion: v1
metadata:
name: redis-config
labels:
app: redis
data:
redis.conf: |-
dir /data
port 6379
bind 0.0.0.0
appendonly yes
protected-mode no
requirepass 5241590000000000
pidfile /data/redis-6379.pid
---
apiVersion: v1
kind: Service
metadata:
name: redis
labels:
app: redis
spec:
type: ClusterIP
ports:
- name: redis
port: 6379
selector:
app: redis
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: redis
labels:
app: redis
spec:
replicas: 1
selector:
matchLabels:
app: redis
template:
metadata:
labels:
app: redis
spec:
containers:
- name: redis
image: redis:7.4.0
command:
- "sh"
- "-c"
- "redis-server /usr/local/etc/redis/redis.conf"
ports:
- containerPort: 6379
volumeMounts:
- name: config
mountPath: /usr/local/etc/redis/redis.conf
subPath: redis.conf
volumes:
- name: config
configMap:
name: redis-config
---
# Redis password
apiVersion: v1
kind: Secret
metadata:
name: redis-password-secret
type: Opaque
data:
# echo -n "5241590000000000" | base64
password: NTI0MTU5MDAwMDAwMDAwMA==
---
apiVersion: v1
kind: ConfigMap
metadata:
name: ray-example
data:
detached_actor.py: |
import ray
@ray.remote(num_cpus=1)
class Counter:
def __init__(self):
self.value = 0
def increment(self):
self.value += 1
return self.value
ray.init(namespace="default_namespace")
Counter.options(name="counter_actor", lifetime="detached").remote()
increment_counter.py: |
import ray
ray.init(namespace="default_namespace")
counter = ray.get_actor("counter_actor")
print(ray.get(counter.increment.remote()))
35 changes: 12 additions & 23 deletions ray-operator/config/samples/ray-cluster.persistent-redis.yaml
Original file line number Diff line number Diff line change
@@ -1,26 +1,28 @@
apiVersion: ray.io/v1
kind: RayCluster
metadata:
annotations:
ray.io/ft-enabled: "true" # enable Ray GCS FT
# In most cases, you don't need to set `ray.io/external-storage-namespace` because KubeRay will
# automatically set it to the UID of RayCluster. Only modify this annotation if you fully understand
# the behaviors of the Ray GCS FT and RayService to avoid misconfiguration.
# [Example]:
# ray.io/external-storage-namespace: "my-raycluster-storage"
name: raycluster-external-redis
spec:
rayVersion: '2.41.0'
gcsFaultToleranceOptions:
# In most cases, you don't need to set `externalStorageNamespace` because KubeRay will
# automatically set it to the UID of RayCluster. Only modify this annotation if you fully understand
# the behaviors of the Ray GCS FT and RayService to avoid misconfiguration.
# [Example]:
# externalStorageNamespace: "my-raycluster-storage"
redisAddress: "redis:6379"
redisPassword:
valueFrom:
secretKeyRef:
name: redis-password-secret
key: password
headGroupSpec:
# The `rayStartParams` are used to configure the `ray start` command.
# See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay.
# See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`.
rayStartParams:
# Setting "num-cpus: 0" to avoid any Ray actors or tasks being scheduled on the Ray head Pod.
num-cpus: "0"
# redis-password should match "requirepass" in redis.conf in the ConfigMap above.
# Ray 2.3.0 changes the default redis password from "5241590000000000" to "".
redis-password: $REDIS_PASSWORD
# Pod template
template:
spec:
Expand All @@ -32,19 +34,6 @@ spec:
cpu: "1"
requests:
cpu: "1"
env:
# Ray will read the RAY_REDIS_ADDRESS environment variable to establish
# a connection with the Redis server. In this instance, we use the "redis"
# Kubernetes ClusterIP service name, also created by this YAML, as the
# connection point to the Redis server.
- name: RAY_REDIS_ADDRESS
value: redis:6379
# This environment variable is used in the `rayStartParams` above.
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: redis-password-secret
key: password
ports:
- containerPort: 6379
name: redis
Expand Down
24 changes: 12 additions & 12 deletions ray-operator/config/samples/ray-service.high-availability.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ apiVersion: ray.io/v1
kind: RayService
metadata:
name: rayservice-ha
annotations:
ray.io/ft-enabled: "true"
spec:
serveConfigV2: |
applications:
Expand Down Expand Up @@ -44,25 +42,27 @@ spec:
rayVersion: '2.41.0' # should match the Ray version in the image of the containers
######################headGroupSpecs#################################
# Ray head pod template.
gcsFaultToleranceOptions:
# In most cases, you don't need to set `externalStorageNamespace` because KubeRay will
# automatically set it to the UID of RayCluster. Only modify this annotation if you fully understand
# the behaviors of the Ray GCS FT and RayService to avoid misconfiguration.
# [Example]:
# externalStorageNamespace: "my-raycluster-storage"
redisAddress: "redis:6379"
redisPassword:
valueFrom:
secretKeyRef:
name: redis-password-secret
key: password
headGroupSpec:
rayStartParams:
num-cpus: "0"
redis-password: $REDIS_PASSWORD
#pod template
template:
spec:
containers:
- name: ray-head
image: rayproject/ray:2.41.0
env:
- name: RAY_REDIS_ADDRESS
value: redis:6379
# This environment variable is used in the `rayStartParams` above.
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: redis-password-secret
key: password
resources:
limits:
cpu: 1
Expand Down
23 changes: 12 additions & 11 deletions ray-operator/test/e2erayservice/testdata/ray-service.ft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ apiVersion: ray.io/v1
kind: RayService
metadata:
name: test-rayservice
annotations:
ray.io/ft-enabled: "true"
spec:
excludeHeadPodFromServeSvc: true
serveConfigV2: |
Expand All @@ -23,24 +21,27 @@ spec:
num_cpus: 1
rayClusterConfig:
rayVersion: "2.41.0"
gcsFaultToleranceOptions:
# In most cases, you don't need to set `externalStorageNamespace` because KubeRay will
# automatically set it to the UID of RayCluster. Only modify this annotation if you fully understand
# the behaviors of the Ray GCS FT and RayService to avoid misconfiguration.
# [Example]:
# externalStorageNamespace: "my-raycluster-storage"
redisAddress: "redis:6379"
redisPassword:
valueFrom:
secretKeyRef:
name: redis-password-secret
key: password
headGroupSpec:
rayStartParams:
num-cpus: "0"
redis-password: $REDIS_PASSWORD
template:
spec:
containers:
- name: ray-head
image: rayproject/ray:2.41.0
env:
- name: RAY_REDIS_ADDRESS
value: redis:6379
# This environment variable is used in the `rayStartParams` above.
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: redis-password-secret
key: password
- name: RAY_gcs_rpc_server_reconnect_timeout_s
value: "20"
resources:
Expand Down
6 changes: 6 additions & 0 deletions ray-operator/test/sampleyaml/raycluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ func TestRayCluster(t *testing.T) {
{
name: "ray-cluster.custom-head-service.yaml",
},
{
name: "ray-cluster.deprecate-gcs-ft.yaml",
},
{
name: "ray-cluster.persistent-redis.yaml",
},
{
name: "ray-cluster.embed-grafana.yaml",
},
Expand Down

0 comments on commit 165a351

Please sign in to comment.