-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdocker-compose.prod.yml
114 lines (106 loc) · 2.88 KB
/
docker-compose.prod.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
x-app: &default-app
image: https://ghcr.io/openradx/radis:latest
volumes:
- web_data:/var/www/web
- ${SSL_CERT_FILE:?}:/etc/web/ssl/cert.pem
- ${SSL_KEY_FILE:?}:/etc/web/ssl/key.pem
environment:
DJANGO_EMAIL_URL: ${DJANGO_EMAIL_URL:?}
DJANGO_SETTINGS_MODULE: "radis.settings.production"
DJANGO_STATIC_ROOT: "/var/www/web/static/"
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?}
x-deploy: &deploy
replicas: 1
restart_policy:
condition: on-failure
max_attempts: 3
services:
# We can't use those manage commands inside the web container in production because
# the web service may have multiple replicas. So we make sure to only run them once
# and wait for it to be finished by the web service containers.
init:
<<: *default-app
hostname: init.local
command: >
bash -c "
wait-for-it -s postgres.local:5432 -t 120 &&
./manage.py migrate &&
./manage.py collectstatic --no-input &&
./manage.py create_superuser &&
wait-for-it -s llamacpp.local:8080 -t 60 &&
./manage.py ok_server --host 0.0.0.0 --port 8000
"
deploy:
<<: *deploy
web:
<<: *default-app
build:
target: production
ports:
- "${WEB_HTTP_PORT:-80}:80"
- "${WEB_HTTPS_PORT:-443}:443"
command: >
bash -c "
wait-for-it -s init.local:8000 -t 300 &&
echo 'Starting web server ...' &&
daphne -b 0.0.0.0 -p 80 \\
-e ssl:443:privateKey=/etc/web/ssl/key.pem:certKey=/etc/web/ssl/cert.pem \\
radis.asgi:application
"
deploy:
<<: *deploy
replicas: 3
default_worker:
<<: *default-app
command: >
bash -c "
wait-for-it -s postgres.local:5432 -t 60 &&
./manage.py bg_worker -q default
"
deploy:
<<: *deploy
llm_worker:
<<: *default-app
command: >
bash -c "
wait-for-it -s postgres.local:5432 -t 60 &&
./manage.py bg_worker -q llm
"
deploy:
<<: *deploy
postgres:
environment:
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?}
deploy:
<<: *deploy
llamacpp_gpu:
image: ghcr.io/ggerganov/llama.cpp:server-cuda
hostname: llamacpp.local
environment:
HTTP_PROXY: ${HTTP_PROXY:-}
HTTPS_PROXY: ${HTTPS_PROXY:-}
LLAMA_CACHE: "/models"
NO_PROXY: ${NO_PROXY:-}
volumes:
- models_data:/models
entrypoint: >
bash -c "
/app/llama-server \\
--model-url ${LLM_MODEL_URL} \\
--host 0.0.0.0 \\
--port 8080 \\
--ctx-size 8192 \\
--gpu-layers 99
"
deploy:
<<: *deploy
resources:
reservations:
# https://gist.github.com/medihack/6a6d24dc6376939e1919f32409c2119f
generic_resources:
- discrete_resource_spec:
kind: "gpu"
value: 1
volumes:
web_data:
models_data: