Prometheus监控kafka和redis配置文档 1.上传镜像 上传kafka_exporter.tar,redis_exporter.tar
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 cd /tmp/promls docker load < kafka_exporter.tar docker load < redis_exporter.tar docker tag danielqsj/kafka-exporter:latest 172.31.10.118/monitor/kafka-exporter:v1.0 docker tag prom/grafana 172.31.70.135:8443/prom/grafana:v1.0 docker push 172.31.70.135:8443/prom/grafana:v1.0 docker tag prom/node-exporter:v0.15.2 172.31.70.135:8443/prom/node-exporter:v0.15.2 docker push 172.31.70.135:8443/prom/node-exporter:v0.15.2 docker tag prom/prometheus:v2.2.1 172.31.70.135:8443/prom/prometheus:v2.2.1 docker push 172.31.70.135:8443/prom/prometheus:v2.2.1 docker tag oliver006/redis_exporter:latest 172.31.10.118/monitor/redis_exporter:v1.0 docker login 172.31.10.118 docker push 172.31.10.118/monitor/kafka-exporter:v1.0 docker push 172.31.10.118/monitor/redis_exporter:v1.0
2.docker部署kafka_exporter 在172.31.10.127上
1 docker run -d --restart=always --net=host --name kafka_exporter 172.31.10.118/monitor/kafka-exporter:v1.0 --kafka.server=172.31.10.127:9092
3.docker部署redis_exporter 在172.31.10.123上
1 docker run -d --name redis_exporter -p 9121:9121 172.31.10.118/monitor/redis_exporter:v1.0 --redis.addr 172.31.10.123:6379 --redis.password 'redis密码'
4.修改Prometheus配置 1 kubectl edit cm -n monitoring monitor-prometheus-server
增加配置
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 - alert: RedisDown expr: redis_up == 0 for: 5m labels: severity: error annotations: summary: "Redis down (instance {{ $labels.instance }} )" description: "Redis 挂了啊,mmp\n VALUE = {{ $value }} \n LABELS: {{ $labels }} " - alert: MissingBackup expr: time() - redis_rdb_last_save_timestamp_seconds > 60 * 60 * 24 for: 5m labels: severity: error annotations: summary: "Missing backup (instance {{ $labels.instance }} )" description: "Redis has not been backuped for 24 hours\n VALUE = {{ $value }} \n LABELS: {{ $labels }} " - alert: OutOfMemory expr: redis_memory_used_bytes / redis_total_system_memory_bytes * 100 > 90 for: 5m labels: severity: warning annotations: summary: "Out of memory (instance {{ $labels.instance }} )" description: "Redis is running out of memory (> 90%)\n VALUE = {{ $value }} \n LABELS: {{ $labels }} " - alert: 告警!Kafka Topics 副本数少于3 expr: sum(kafka_topic_partition_in_sync_replica) by (topic) < 3 for: 0m labels: severity: 严重告警 annotations: summary: "{{ $labels.instance }} Kafka topics 副本数少于3" description: "Kafka topic 分区不同步\n 当前值 = {{ $value }} " - alert: 告警!KafkaConsumersGroup expr: sum(kafka_consumergroup_lag) by (consumergroup) > 50 for: 1m labels: severity: 严重告警 annotations: summary: "{{ $labels.instance }} Kafka consumers group" description: "Kafka consumers group\n 当前值 = {{ $value }} " - job_name: 'kafka' scrape_interval: 30s static_configs: - targets: ['172.31.10.127:9308' ] - job_name: 'redis' scrape_interval: 30s static_configs: - targets: ['172.31.10.123:9121' ]
5.增加grafana仪表盘 kafka的模板id:12326
redis的模板id:11692