updatews
This commit is contained in:
parent
e0b4840a99
commit
ba3a481627
3 changed files with 163 additions and 21 deletions
138
start.sh
138
start.sh
|
|
@ -172,11 +172,15 @@ local traefik_dir="/opt/services/traefik"
|
||||||
cp "$SCRIPT_DIR/templates/traefik/tls.yml" "$traefik_dir/dynamic/tls.yml"
|
cp "$SCRIPT_DIR/templates/traefik/tls.yml" "$traefik_dir/dynamic/tls.yml"
|
||||||
|
|
||||||
# Create acme.json with proper permissions
|
# Create acme.json with proper permissions
|
||||||
touch "$traefik_dir/data/acme.json"
|
touch "$traefik_dir/data/acme.json"
|
||||||
chmod 600 "$traefik_dir/data/acme.json"
|
chmod 600 "$traefik_dir/data/acme.json"
|
||||||
|
|
||||||
|
# Schedule the SSL certificate renewal check
|
||||||
|
echo "0 0 * * * docker-compose -f $traefik_dir/docker-compose.yml run --rm traefik traefik renew --acme" | crontab -
|
||||||
|
log info "Scheduled daily SSL certificate renewal check."
|
||||||
|
|
||||||
# Create traefik_proxy network
|
# Create traefik_proxy network
|
||||||
docker network create traefik_proxy 2>/dev/null || true
|
docker network create traefik_proxy 2e/dev/null || true
|
||||||
|
|
||||||
log info "Uruchamianie kontenera Traefik..."
|
log info "Uruchamianie kontenera Traefik..."
|
||||||
(cd "$traefik_dir" && docker-compose up -d)
|
(cd "$traefik_dir" && docker-compose up -d)
|
||||||
|
|
@ -191,10 +195,50 @@ cmd_deploy_monitoring() {
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
log info "Wdrażam stos monitoringu..."
|
log info "Wdrażam stos monitoringu..."
|
||||||
# TODO: Dodać logikę z poprzedniej wersji skryptu
|
|
||||||
log info "(STUB) Konfiguracja Prometheus, Grafana, Alertmanager..."
|
local monitoring_dir="/opt/services/monitoring"
|
||||||
log info "(STUB) Uruchamianie kontenerów monitoringu..."
|
mkdir -p "$monitoring_dir/prometheus"
|
||||||
log info "Wdrożenie monitoringu zakończone."
|
mkdir -p "$monitoring_dir/grafana/provisioning/datasources"
|
||||||
|
mkdir -p "$monitoring_dir/grafana/provisioning/dashboards"
|
||||||
|
mkdir -p "$monitoring_dir/alertmanager"
|
||||||
|
mkdir -p "$monitoring_dir/blackbox"
|
||||||
|
|
||||||
|
# Copy configuration files
|
||||||
|
cp "$SCRIPT_DIR/templates/monitoring/docker-compose.yml" "$monitoring_dir/docker-compose.yml"
|
||||||
|
cp "$SCRIPT_DIR/templates/monitoring/prometheus.yml" "$monitoring_dir/prometheus/prometheus.yml"
|
||||||
|
cp "$SCRIPT_DIR/templates/monitoring/alertmanager.yml" "$monitoring_dir/alertmanager/alertmanager.yml"
|
||||||
|
cp "$SCRIPT_DIR/templates/monitoring/blackbox.yml" "$monitoring_dir/blackbox/blackbox.yml"
|
||||||
|
cp "$SCRIPT_DIR/templates/monitoring/alerts.yml" "$monitoring_dir/prometheus/alerts.yml"
|
||||||
|
cp "$SCRIPT_DIR/templates/monitoring/prometheus-datasource.yml" "$monitoring_dir/grafana/provisioning/datasources/prometheus.yaml"
|
||||||
|
cp "$SCRIPT_DIR/templates/monitoring/promtail-config.yml" "$monitoring_dir/promtail-config.yml"
|
||||||
|
|
||||||
|
# Generate random passwords if not set
|
||||||
|
if [[ -z "$GRAFANA_ADMIN_PASSWORD" ]]; then
|
||||||
|
GRAFANA_ADMIN_PASSWORD=$(head -c 32 /dev/urandom | base64 | tr -d '\n' | tr '/+' 'AB')
|
||||||
|
log info "Generated Grafana admin password: $GRAFANA_ADMIN_PASSWORD"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Create SMTP password file for Alertmanager
|
||||||
|
mkdir -p "$(dirname "$ALERT_SMTP_PASS_PATH")"
|
||||||
|
echo "$ALERT_SMTP_PASS" > "$ALERT_SMTP_PASS_PATH"
|
||||||
|
chmod 600 "$ALERT_SMTP_PASS_PATH"
|
||||||
|
|
||||||
|
# Export environment variables for docker-compose
|
||||||
|
export PRIMARY_DOMAIN PROMETHEUS_VER GRAFANA_VER ALERTMANAGER_VER NODE_EXPORTER_VER CADVISOR_VER BLACKBOX_VER LOKI_VER PROMTAIL_VER GRAFANA_ADMIN_PASSWORD ALERT_SMTP_PASS_PATH
|
||||||
|
|
||||||
|
# Substitute variables in configuration files
|
||||||
|
envsubst < "$monitoring_dir/prometheus/prometheus.yml" > "$monitoring_dir/prometheus/prometheus.yml.tmp" && mv "$monitoring_dir/prometheus/prometheus.yml.tmp" "$monitoring_dir/prometheus/prometheus.yml"
|
||||||
|
envsubst < "$monitoring_dir/alertmanager/alertmanager.yml" > "$monitoring_dir/alertmanager/alertmanager.yml.tmp" && mv "$monitoring_dir/alertmanager/alertmanager.yml.tmp" "$monitoring_dir/alertmanager/alertmanager.yml"
|
||||||
|
envsubst < "$monitoring_dir/docker-compose.yml" > "$monitoring_dir/docker-compose.yml.tmp" && mv "$monitoring_dir/docker-compose.yml.tmp" "$monitoring_dir/docker-compose.yml"
|
||||||
|
|
||||||
|
log info "Uruchamianie kontenerów monitoringu..."
|
||||||
|
(cd "$monitoring_dir" && docker-compose up -d)
|
||||||
|
|
||||||
|
log info "Konfiguracja monitoringu zakończona."
|
||||||
|
log info "Grafana dostępna pod: https://grafana.${PRIMARY_DOMAIN}"
|
||||||
|
log info "Prometheus dostępny pod: https://prometheus.${PRIMARY_DOMAIN}"
|
||||||
|
log info "Alertmanager dostępny pod: https://alertmanager.${PRIMARY_DOMAIN}"
|
||||||
|
|
||||||
add_receipt 'monitoring'
|
add_receipt 'monitoring'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -265,6 +309,8 @@ main() {
|
||||||
backup:run) cmd_backup "run" ;;
|
backup:run) cmd_backup "run" ;;
|
||||||
backup:list) cmd_backup "list" ;;
|
backup:list) cmd_backup "list" ;;
|
||||||
backup:restore) cmd_backup "restore" "$@" ;;
|
backup:restore) cmd_backup "restore" "$@" ;;
|
||||||
|
ssl:renew) cmd_ssl_renew "$@" ;;
|
||||||
|
ssl:status) cmd_ssl_status "$@" ;;
|
||||||
self-update) cmd_self_update "$@" ;;
|
self-update) cmd_self_update "$@" ;;
|
||||||
uninstall) cmd_uninstall "$@" ;;
|
uninstall) cmd_uninstall "$@" ;;
|
||||||
help|*) cmd_help ;;
|
help|*) cmd_help ;;
|
||||||
|
|
@ -279,6 +325,7 @@ cmd_help() {
|
||||||
echo " deploy_mastodon, deploy_traefik, deploy_monitoring"
|
echo " deploy_mastodon, deploy_traefik, deploy_monitoring"
|
||||||
echo " secrets:edit <service>, secrets:view <service>"
|
echo " secrets:edit <service>, secrets:view <service>"
|
||||||
echo " backup:init, backup:run, backup:list, backup:restore <snapshot_id>"
|
echo " backup:init, backup:run, backup:list, backup:restore <snapshot_id>"
|
||||||
|
echo " ssl:renew, ssl:status"
|
||||||
echo " self-update, uninstall, help"
|
echo " self-update, uninstall, help"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -324,6 +371,83 @@ cmd_backup() {
|
||||||
esac
|
esac
|
||||||
}
|
}
|
||||||
|
|
||||||
# ... (reszta funkcji)
|
# SSL Certificate Management
|
||||||
|
cmd_ssl_renew() {
|
||||||
|
log info "Ręczne odnawianie certyfikatów SSL..."
|
||||||
|
local traefik_dir="/opt/services/traefik"
|
||||||
|
if [[ ! -d "$traefik_dir" ]]; then
|
||||||
|
log error "Traefik nie jest zainstalowany. Uruchom najpierw deploy_traefik."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
log info "Wymuszenie odnowienia certyfikatów przez Traefik..."
|
||||||
|
(cd "$traefik_dir" && docker-compose restart traefik)
|
||||||
|
log info "Traefik zostal zrestartowany - certyfikaty zostana automatycznie odnowione jeśli to konieczne."
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd_ssl_status() {
|
||||||
|
log info "Sprawdzanie statusu certyfikatów SSL..."
|
||||||
|
local traefik_dir="/opt/services/traefik"
|
||||||
|
if [[ ! -d "$traefik_dir" ]]; then
|
||||||
|
log error "Traefik nie jest zainstalowany. Uruchom najpierw deploy_traefik."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
log info "Status certyfikatów dla domen:"
|
||||||
|
|
||||||
|
# Check certificate expiry for each domain
|
||||||
|
local domains=("$PRIMARY_DOMAIN" "grafana.$PRIMARY_DOMAIN" "prometheus.$PRIMARY_DOMAIN" "alertmanager.$PRIMARY_DOMAIN")
|
||||||
|
|
||||||
|
for domain in "${domains[@]}"; do
|
||||||
|
local expiry_date=$(echo | openssl s_client -servername "$domain" -connect "$domain:443" 2>/dev/null | openssl x509 -noout -dates 2>/dev/null | grep notAfter | cut -d= -f2)
|
||||||
|
if [[ -n "$expiry_date" ]]; then
|
||||||
|
local expiry_timestamp=$(date -d "$expiry_date" +%s 2>/dev/null || echo "0")
|
||||||
|
local current_timestamp=$(date +%s)
|
||||||
|
local days_until_expiry=$(( (expiry_timestamp - current_timestamp) / 86400 ))
|
||||||
|
|
||||||
|
if [[ $days_until_expiry -gt 7 ]]; then
|
||||||
|
log info "✓ $domain: Certyfikat ważny jeszcze $days_until_expiry dni (wygasa: $expiry_date)"
|
||||||
|
elif [[ $days_until_expiry -gt 0 ]]; then
|
||||||
|
log warn "⚠ $domain: Certyfikat wygasa za $days_until_expiry dni (wygasa: $expiry_date)"
|
||||||
|
else
|
||||||
|
log error "✗ $domain: Certyfikat wygasł lub nie można go sprawdzić"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
log error "✗ $domain: Nie można pobrać informacji o certyfikacie"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# Enhanced logging function with centralized log rotation
|
||||||
|
setup_log_rotation() {
|
||||||
|
local logrotate_config="/etc/logrotate.d/autoscript"
|
||||||
|
|
||||||
|
cat > "$logrotate_config" << EOF
|
||||||
|
$LOG_FILE {
|
||||||
|
daily
|
||||||
|
rotate 30
|
||||||
|
compress
|
||||||
|
delaycompress
|
||||||
|
missingok
|
||||||
|
notifempty
|
||||||
|
create 640 root adm
|
||||||
|
postrotate
|
||||||
|
systemctl reload rsyslog > /dev/null 2>&1 || true
|
||||||
|
endscript
|
||||||
|
}
|
||||||
|
|
||||||
|
/opt/services/*/logs/*.log {
|
||||||
|
daily
|
||||||
|
rotate 7
|
||||||
|
compress
|
||||||
|
delaycompress
|
||||||
|
missingok
|
||||||
|
notifempty
|
||||||
|
create 644 root root
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
log info "Konfiguracja rotacji logów została utworzona w $logrotate_config"
|
||||||
|
}
|
||||||
|
|
||||||
main "$@"
|
main "$@"
|
||||||
|
|
@ -1,17 +1,27 @@
|
||||||
global:
|
global:
|
||||||
smtp_smarthost: '$ALERT_SMTP_HOST'
|
smtp_smarthost: '${ALERT_SMTP_HOST}:587'
|
||||||
smtp_from: '$ALERT_SMTP_FROM'
|
smtp_from: '${ADMIN_EMAIL}'
|
||||||
smtp_auth_username: '$ALERT_SMTP_USER'
|
smtp_auth_username: '${ALERT_SMTP_USER}'
|
||||||
smtp_auth_password_file: '/etc/alertmanager/secrets/smtp_pass'
|
smtp_auth_password_file: '/etc/alertmanager/secrets/smtp_pass'
|
||||||
|
|
||||||
route:
|
route:
|
||||||
group_by: ['alertname']
|
group_by: ['alertname']
|
||||||
group_wait: 10s
|
group_wait: 10s
|
||||||
group_interval: 10s
|
group_interval: 10s
|
||||||
repeat_interval: 1h
|
repeat_interval: 1h
|
||||||
receiver: 'email'
|
receiver: 'email-notification'
|
||||||
|
|
||||||
receivers:
|
receivers:
|
||||||
- name: 'email'
|
- name: 'email-notification'
|
||||||
email_configs:
|
email_configs:
|
||||||
- to: '$ADMIN_EMAIL'
|
- to: '${ADMIN_EMAIL}'
|
||||||
subject: 'Alert: {{ "{{" }} .GroupLabels.alertname {{ "}}" }}'
|
subject: 'AutoScript Alert: {{ .GroupLabels.alertname }}'
|
||||||
|
text: "Alert: {{ .Annotations.summary }}\nDescription: {{ .Annotations.description }}\n"
|
||||||
send_resolved: true
|
send_resolved: true
|
||||||
|
|
||||||
|
inhibit_rules:
|
||||||
|
- source_match:
|
||||||
|
severity: 'critical'
|
||||||
|
target_match:
|
||||||
|
severity: 'warning'
|
||||||
|
equal: ['alertname', 'dev', 'instance']
|
||||||
|
|
|
||||||
|
|
@ -7,24 +7,32 @@ groups:
|
||||||
labels: { severity: critical }
|
labels: { severity: critical }
|
||||||
annotations:
|
annotations:
|
||||||
summary: "Instance {{ $labels.instance }} down"
|
summary: "Instance {{ $labels.instance }} down"
|
||||||
description: "{{ $labels.instance }} of job {{ $labels.job }} down >2m"
|
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes"
|
||||||
- alert: HighCPUUsage
|
- alert: HighCPUUsage
|
||||||
expr: 100 - (avg by(instance)(rate(node_cpu_seconds_total{mode="idle"}[5m]))*100) > 80
|
expr: 100 - (avg by(instance)(rate(node_cpu_seconds_total{mode="idle"}[5m]))*100) > 80
|
||||||
for: 5m
|
for: 5m
|
||||||
labels: { severity: warning }
|
labels: { severity: warning }
|
||||||
annotations: { summary: "High CPU {{ $labels.instance }}" }
|
annotations:
|
||||||
|
summary: "High CPU usage on {{ $labels.instance }}"
|
||||||
|
description: "CPU usage is above 80% for more than 5 minutes on {{ $labels.instance }}"
|
||||||
- alert: HighMemoryUsage
|
- alert: HighMemoryUsage
|
||||||
expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes)/node_memory_MemTotal_bytes*100 > 90
|
expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes)/node_memory_MemTotal_bytes*100 > 90
|
||||||
for: 5m
|
for: 5m
|
||||||
labels: { severity: warning }
|
labels: { severity: warning }
|
||||||
annotations: { summary: "High memory {{ $labels.instance }}" }
|
annotations:
|
||||||
|
summary: "High memory usage on {{ $labels.instance }}"
|
||||||
|
description: "Memory usage is above 90% for more than 5 minutes on {{ $labels.instance }}"
|
||||||
- alert: DiskSpaceLow
|
- alert: DiskSpaceLow
|
||||||
expr: (node_filesystem_avail_bytes{fstype!~"tmpfs|overlay"}/node_filesystem_size_bytes) < 0.1
|
expr: (node_filesystem_avail_bytes{fstype!~"tmpfs|overlay"}/node_filesystem_size_bytes) < 0.1
|
||||||
for: 10m
|
for: 10m
|
||||||
labels: { severity: warning }
|
labels: { severity: warning }
|
||||||
annotations: { summary: "Low disk {{ $labels.instance }}" }
|
annotations:
|
||||||
|
summary: "Low disk space on {{ $labels.instance }}"
|
||||||
|
description: "Disk space is below 10% for more than 10 minutes on {{ $labels.instance }}"
|
||||||
- alert: CertificateExpiration
|
- alert: CertificateExpiration
|
||||||
expr: probe_ssl_earliest_cert_expiry - time() < 604800
|
expr: probe_ssl_earliest_cert_expiry - time() < 604800
|
||||||
for: 0m
|
for: 0m
|
||||||
labels: { severity: warning }
|
labels: { severity: warning }
|
||||||
annotations: { summary: "Cert expires soon {{ $labels.instance }}" }
|
annotations:
|
||||||
|
summary: "SSL certificate expiring soon on {{ $labels.instance }}"
|
||||||
|
description: "SSL certificate for {{ $labels.instance }} expires in less than 7 days"
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue