diff --git a/deployment/bare_metal/Readme.md b/deployment/bare_metal/Readme.md new file mode 100644 index 000000000..34d791e0f --- /dev/null +++ b/deployment/bare_metal/Readme.md @@ -0,0 +1,46 @@ +# Deployment for bare metal servers +This setup is designed for **bare metal servers**, offering maximum performance and reliability for Gradido deployments. However, it can also work on **virtual servers (VPS)** – if properly configured. + +## 🧠 Memory Considerations on VServers + +We have observed that some VServer providers apply **aggressive virtual memory constraints** or overcommit strategies that may cause **random crashes** of Node.js processes – even when total RAM appears sufficient. + +### Important Notes: + +- A single Node.js process may **allocate 10–12 GB of virtual memory** (VIRT), even if **real memory usage (RES)** stays below 200 MB. +- Some VPS environments **panic or kill processes** when virtual memory allocation exceeds certain invisible thresholds. + +## 🛡️ Rate Limiting (API Protection) + +This deployment includes built-in **rate limiting** for public-facing endpoints to prevent abuse and denial-of-service attacks. + +### 🔒 NGINX Rate & Connection Limits Overview + +| Path | Zone | Rate Limit | Burst | Max Connections | Notes | +|----------------------------|----------|----------------|-------|------------------|--------------------------------| +| `/` | frontend | 15 requests/s | 150 | 60 | Public frontend | +| `/admin` | frontend | 15 requests/s | 30 | 20 | Admin frontend | +| `/graphql` | backend | 20 requests/s | 40 | 20 | Main backend GraphQL API | +| `/hook` | backend | 20 requests/s | 20 | 20 | Internal backend webhooks | +| `/hooks/` | backend | 20 requests/s | 20 | 20 | Reverse proxy for webhooks | +| `/api/` | api | 30 requests/s | 60 | 30 | Federation GraphQL API | + +- ``: placeholder for federation api version +- All zones use `$binary_remote_addr` for client identification. +- `nodelay` ensures burst requests are not delayed (they are either accepted or rejected). +- Global connection zone: `limit_conn_zone $binary_remote_addr zone=addr:10m;` + +This setup helps protect public and internal interfaces from abuse, while ensuring smooth parallel access during high load periods (e.g., UI builds or cluster sync). + +These limits work like a traffic cop at each route: +- **Rate limits** (`limit_req`) define how many requests per second a single client can send. +- **Burst values** allow short spikes without blocking – like a temporary buffer. +- **Connection limits** (`limit_conn`) cap how many concurrent connections a single IP can keep open. + +Each route (frontend, backend, API, etc.) has its own configuration depending on its expected traffic pattern and sensitivity. For example: +- The public frontend allows higher bursts (many assets load at once). +- The GraphQL backend and admin interfaces are more tightly controlled. + +This ensures fairness, avoids accidental DoS scenarios, and keeps overall latency low, even under high usage. + + diff --git a/deployment/bare_metal/nginx/common/limit_requests.conf b/deployment/bare_metal/nginx/common/limit_requests.conf index c9501fd64..022a6d3a3 100644 --- a/deployment/bare_metal/nginx/common/limit_requests.conf +++ b/deployment/bare_metal/nginx/common/limit_requests.conf @@ -1,4 +1,4 @@ -limit_req_zone $binary_remote_addr zone=frontend:20m rate=5r/s; -limit_req_zone $binary_remote_addr zone=backend:25m rate=15r/s; +limit_req_zone $binary_remote_addr zone=frontend:20m rate=15r/s; +limit_req_zone $binary_remote_addr zone=backend:25m rate=20r/s; limit_req_zone $binary_remote_addr zone=api:5m rate=30r/s; limit_conn_zone $binary_remote_addr zone=addr:10m; \ No newline at end of file diff --git a/deployment/bare_metal/nginx/conf.d/logging.conf b/deployment/bare_metal/nginx/conf.d/logging.conf index a76e8fae7..b8ed225a8 100644 --- a/deployment/bare_metal/nginx/conf.d/logging.conf +++ b/deployment/bare_metal/nginx/conf.d/logging.conf @@ -1,4 +1,6 @@ log_format gradido_log '$remote_addr - $remote_user [$time_local] ' '"$request_method $status $request_uri"' ' "$http_referer" "$http_user_agent"' - ' $server_protocol $body_bytes_sent $request_time'; \ No newline at end of file + ' $server_protocol $body_bytes_sent $request_time' + ' limit status: $limit_req_status' + ; \ No newline at end of file diff --git a/deployment/bare_metal/nginx/sites-available/gradido-federation.conf.template b/deployment/bare_metal/nginx/sites-available/gradido-federation.conf.template index cf5f53b25..5123deb5e 100644 --- a/deployment/bare_metal/nginx/sites-available/gradido-federation.conf.template +++ b/deployment/bare_metal/nginx/sites-available/gradido-federation.conf.template @@ -1,7 +1,7 @@ location /api/$FEDERATION_APIVERSION { - #limit_req zone=api burst=60 nodelay; - #limit_conn addr 30; + limit_req zone=api burst=60 nodelay; + limit_conn addr 30; proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; diff --git a/deployment/bare_metal/nginx/sites-available/gradido.conf.ssl.template b/deployment/bare_metal/nginx/sites-available/gradido.conf.ssl.template index 294e9f8a0..1eb01f09e 100644 --- a/deployment/bare_metal/nginx/sites-available/gradido.conf.ssl.template +++ b/deployment/bare_metal/nginx/sites-available/gradido.conf.ssl.template @@ -25,8 +25,9 @@ server { include /etc/nginx/common/protect_add_header.conf; # protect from slow loris - #client_body_timeout 10s; - #client_header_timeout 10s; + client_body_timeout 10s; + client_header_timeout 10s; + send_timeout 10s; # protect from range attack (in http header) if ($http_range ~ "d{9,}") { @@ -53,12 +54,30 @@ server { # Frontend (default) location / { - - #limit_req zone=frontend burst=40 nodelay; - #limit_conn addr 40; + limit_req zone=frontend burst=150 nodelay; + limit_conn addr 60; root $PROJECT_ROOT/frontend/build/; index index.html; - try_files $uri $uri/ /index.html = 404; + + # caching rules for assets + # static assets + location ~* \.(?:woff2?|ttf|otf|eot|jpg|jpeg|png|gif|svg|webp|ico)$ { + # keep assets for a week + add_header Cache-Control "public, max-age=604800"; + try_files $uri =404; + } + # hashed assets + location ~* \.(?:js|css|json)$ { + add_header Cache-Control "public, max-age=31536000, immutable"; + try_files $uri =404; + } + + try_files $uri $uri/ /index.html = 404; + + # don't cache index.html + add_header Cache-Control "no-cache, no-store, must-revalidate"; + add_header Pragma "no-cache"; + add_header Expires 0; access_log $GRADIDO_LOG_PATH/nginx-access.frontend.log gradido_log; error_log $GRADIDO_LOG_PATH/nginx-error.frontend.log warn; @@ -66,8 +85,8 @@ server { # Backend location /graphql { - #limit_req zone=backend burst=10 nodelay; - #limit_conn addr 10; + limit_req zone=backend burst=40 nodelay; + limit_conn addr 20; proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection 'upgrade'; @@ -84,8 +103,8 @@ server { # Backend webhooks location /hook { - #limit_req zone=backend burst=10; - #limit_conn addr 10; + limit_req zone=backend burst=20 nodelay; + limit_conn addr 20; proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection 'upgrade'; @@ -102,8 +121,8 @@ server { # Webhook reverse proxy location /hooks/ { - #limit_req zone=backend burst=10; - #limit_conn addr 10; + limit_req zone=backend burst=20 nodelay; + limit_conn addr 20; proxy_pass http://127.0.0.1:9000/hooks/; access_log $GRADIDO_LOG_PATH/nginx-access.hooks.log gradido_log; @@ -112,13 +131,32 @@ server { # Admin Frontend location /admin { - #limit_req zone=frontend burst=30 nodelay; - #limit_conn addr 40; - rewrite ^/admin/(.*)$ /$1 break; - root $PROJECT_ROOT/admin/build/; + limit_req zone=frontend burst=30 nodelay; + limit_conn addr 20; + #rewrite ^/admin/(.*)$ /$1 break; + alias $PROJECT_ROOT/admin/build/; index index.html; + + # caching rules for assets + # static assets + location ~* \.(?:woff2?|ttf|otf|eot|jpg|jpeg|png|gif|svg|webp|ico)$ { + # keep assets for a week + add_header Cache-Control "public, max-age=604800"; + try_files $uri =404; + } + # hashed assets + location ~* \.(?:js|css|json)$ { + add_header Cache-Control "public, max-age=31536000, immutable"; + try_files $uri =404; + } + try_files $uri $uri/ /index.html = 404; + # don't cache index.html + add_header Cache-Control "no-cache, no-store, must-revalidate"; + add_header Pragma "no-cache"; + add_header Expires 0; + access_log $GRADIDO_LOG_PATH/nginx-access.admin.log gradido_log; error_log $GRADIDO_LOG_PATH/nginx-error.admin.log warn; } diff --git a/deployment/bare_metal/nginx/sites-available/gradido.conf.template b/deployment/bare_metal/nginx/sites-available/gradido.conf.template index 7bd28b228..1f5ca2304 100644 --- a/deployment/bare_metal/nginx/sites-available/gradido.conf.template +++ b/deployment/bare_metal/nginx/sites-available/gradido.conf.template @@ -10,8 +10,9 @@ server { include /etc/nginx/common/protect_add_header.conf; # protect from slow loris - #client_body_timeout 10s; - #client_header_timeout 10s; + client_body_timeout 10s; + client_header_timeout 10s; + send_timeout 10s; # protect from range attack (in http header) if ($http_range ~ "d{9,}") { @@ -38,20 +39,39 @@ server { # Frontend (default) location / { - #limit_req zone=frontend burst=40 nodelay; - #limit_conn addr 40; + limit_req zone=frontend burst=150 nodelay; + limit_conn addr 60; root $PROJECT_ROOT/frontend/build/; index index.html; + + # caching rules for assets + # static assets + location ~* \.(?:woff2?|ttf|otf|eot|jpg|jpeg|png|gif|svg|webp|ico)$ { + # keep assets for a week + add_header Cache-Control "public, max-age=604800"; + try_files $uri =404; + } + # hashed assets + location ~* \.(?:js|css|json)$ { + add_header Cache-Control "public, max-age=31536000, immutable"; + try_files $uri =404; + } + try_files $uri $uri/ /index.html = 404; + # don't cache index.html + add_header Cache-Control "no-cache, no-store, must-revalidate"; + add_header Pragma "no-cache"; + add_header Expires 0; + access_log $GRADIDO_LOG_PATH/nginx-access.frontend.log gradido_log; error_log $GRADIDO_LOG_PATH/nginx-error.frontend.log warn; } # Backend location /graphql { - #limit_req zone=backend burst=10 nodelay; - #limit_conn addr 10; + limit_req zone=backend burst=40 nodelay; + limit_conn addr 20; proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection 'upgrade'; @@ -68,8 +88,8 @@ server { # Backend webhooks location /hook { - #limit_req zone=backend burst=10; - #limit_conn addr 10; + limit_req zone=backend burst=20 nodelay; + limit_conn addr 20; proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection 'upgrade'; @@ -86,8 +106,8 @@ server { # Webhook reverse proxy location /hooks/ { - #limit_req zone=backend burst=10; - #limit_conn addr 10; + limit_req zone=backend burst=20 nodelay; + limit_conn addr 20; proxy_pass http://127.0.0.1:9000/hooks/; access_log $GRADIDO_LOG_PATH/nginx-access.hooks.log gradido_log; @@ -96,13 +116,32 @@ server { # Admin Frontend location /admin { - #limit_req zone=frontend burst=30 nodelay; - #limit_conn addr 40; + limit_req zone=frontend burst=30 nodelay; + limit_conn addr 20; rewrite ^/admin/(.*)$ /$1 break; root $PROJECT_ROOT/admin/build/; index index.html; + + # caching rules for assets + # static assets + location ~* \.(?:woff2?|ttf|otf|eot|jpg|jpeg|png|gif|svg|webp|ico)$ { + # keep assets for a week + add_header Cache-Control "public, max-age=604800"; + # try_files $uri =404; + } + # hashed assets + location ~* \.(?:js|css|json)$ { + add_header Cache-Control "public, max-age=31536000, immutable"; + # try_files $uri =404; + } + try_files $uri $uri/ /index.html = 404; + # don't cache index.html + add_header Cache-Control "no-cache, no-store, must-revalidate"; + add_header Pragma "no-cache"; + add_header Expires 0; + access_log $GRADIDO_LOG_PATH/nginx-access.admin.log gradido_log; error_log $GRADIDO_LOG_PATH/nginx-error.admin.log warn; } diff --git a/deployment/bare_metal/start.sh b/deployment/bare_metal/start.sh index f38621943..0eda8958a 100755 --- a/deployment/bare_metal/start.sh +++ b/deployment/bare_metal/start.sh @@ -101,7 +101,15 @@ TODAY=$(date +"%Y-%m-%d") # Create a new updating.html from the template \cp $SCRIPT_DIR/nginx/update-page/updating.html.template $UPDATE_HTML -# redirect all output of the script to the UPDATE_HTML and also have things on console +# store real console stream in fd 3 +if test -t 1; then + # stdout is a TTY - normal console + exec 3> /dev/tty +else + # stdout is not a TTY - probably Docker or CI + exec 3> /proc/$$/fd/1 +fi +# redirect all output of the script to the UPDATE_HTML # TODO: this might pose a security risk exec > >(tee -a $UPDATE_HTML) 2>&1 @@ -109,34 +117,36 @@ exec > >(tee -a $UPDATE_HTML) 2>&1 echo 'Configuring nginx to serve the update-page' nginx_restart() { sudo /etc/init.d/nginx restart || { - echo -e "\e[33mwarn: nginx restart failed, will try to fix with 'sudo systemctl reset-failed nginx' and 'sudo systemctl start nginx'\e[0m" > /dev/tty + echo -e "\e[33mwarn: nginx restart failed\e[0m" >&3 + # run nginx -t to show problem but ignore exit code to prevent trap + { sudo nginx -t || true; } >&3 + echo -e "\e[33mwarn: will try to fix with 'sudo systemctl reset-failed nginx' and 'sudo systemctl start nginx'\e[0m" >&3 sudo systemctl reset-failed nginx sudo systemctl start nginx } } -nginx_restart ln -sf $SCRIPT_DIR/nginx/sites-available/update-page.conf $SCRIPT_DIR/nginx/sites-enabled/default - +nginx_restart # helper functions log_step() { local message="$1" - echo -e "\e[34m$message\e[0m" # > /dev/tty # blue in console + echo -e "\e[34m$message\e[0m" >&3 # blue in console echo "

$message

" >> "$UPDATE_HTML" # blue in html } log_error() { local message="$1" - echo -e "\e[31m$message\e[0m" # > /dev/tty # red in console + echo -e "\e[31m$message\e[0m" >&3 # red in console echo "$message" >> "$UPDATE_HTML" # red in html } log_warn() { local message="$1" - echo -e "\e[33m$message\e[0m" # > /dev/tty # orange in console + echo -e "\e[33m$message\e[0m" >&3 # orange in console echo "$message" >> "$UPDATE_HTML" # orange in html } log_success() { local message="$1" - echo -e "\e[32m$message\e[0m" # > /dev/tty # green in console + echo -e "\e[32m$message\e[0m" >&3 # green in console echo "

$message

" >> "$UPDATE_HTML" # green in html } @@ -287,12 +297,19 @@ else fi # start after building all to use up less ressources -pm2 start --name gradido-backend "turbo backend#start --env-mode=loose" -l $GRADIDO_LOG_PATH/pm2.backend.$TODAY.log --log-date-format 'YYYY-MM-DD HH:mm:ss.SSS' -#pm2 start --name gradido-frontend "yarn --cwd $PROJECT_ROOT/frontend start" -l $GRADIDO_LOG_PATH/pm2.frontend.$TODAY.log --log-date-format 'YYYY-MM-DD HH:mm:ss.SSS' -#pm2 start --name gradido-admin "yarn --cwd $PROJECT_ROOT/admin start" -l $GRADIDO_LOG_PATH/pm2.admin.$TODAY.log --log-date-format 'YYYY-MM-DD HH:mm:ss.SSS' +pm2 start --name gradido-backend \ + "env TZ=UTC NODE_ENV=production node ./build/index.js" \ + --cwd $PROJECT_ROOT/backend \ + -l $GRADIDO_LOG_PATH/pm2.backend.$TODAY.log \ + --log-date-format 'YYYY-MM-DD HH:mm:ss.SSS' + pm2 save if [ ! -z $FEDERATION_DHT_TOPIC ]; then - pm2 start --name gradido-dht-node "turbo dht-node#start --env-mode=loose" -l $GRADIDO_LOG_PATH/pm2.dht-node.$TODAY.log --log-date-format 'YYYY-MM-DD HH:mm:ss.SSS' + pm2 start --name gradido-dht-node \ + "env TZ=UTC NODE_ENV=production node ./build/index.js" \ + --cwd $PROJECT_ROOT/dht-node \ + -l $GRADIDO_LOG_PATH/pm2.dht-node.$TODAY.log \ + --log-date-format 'YYYY-MM-DD HH:mm:ss.SSS' pm2 save else log_step "=====================================================================" @@ -316,7 +333,11 @@ do log_step "====================================================" log_step " start $MODULENAME listening on port=$FEDERATION_PORT" log_step "====================================================" - pm2 start --name $MODULENAME "turbo federation#start --env-mode=loose" -l $GRADIDO_LOG_PATH/pm2.$MODULENAME.$TODAY.log --log-date-format 'YYYY-MM-DD HH:mm:ss.SSS' + pm2 start --name $MODULENAME \ + "env TZ=UTC NODE_ENV=production node ./build/index.js" \ + --cwd $PROJECT_ROOT/federation \ + -l $GRADIDO_LOG_PATH/pm2.$MODULENAME.$TODAY.log \ + --log-date-format 'YYYY-MM-DD HH:mm:ss.SSS' pm2 save done