Rules

APAC-SG

6.762s ago

761.6ms

Rule State Error Last Evaluation Evaluation Time
alert: http_server_error expr: sum(irate(haproxy_backend_http_responses_total{backend!~".*nobid-cluster",code="5xx",job="haproxy"}[5m])) > sum(irate(haproxy_backend_http_responses_total{backend!~".*nobid-cluster",job="haproxy"}[5m])) * 0.03 for: 5m labels: datacenter: APAC-SG service: system severity: page annotations: description: HTTP 5XX errors more than 3% for more than 1 minutes. summary: '[APAC-SG] HTTP 5XX error' ok 6.762s ago 439.7ms
alert: ssp_error expr: sum(rate(haproxy_backend_http_responses_total{backend="ssp-cluster",code=~"[45]xx"}[5m])) > sum(rate(haproxy_backend_http_responses_total{backend="ssp-cluster"}[5m])) * 0.03 for: 5m labels: datacenter: APAC-SG service: system severity: page annotations: description: SSP Cluster responds HTTP 4xx/5xx more than 5% total responses for more than 5 minutes. summary: '[APAC-SG] SSP Cluster Crashes' ok 6.323s ago 21.86ms
alert: ssp-cluster_haproxy_response_time_exception expr: haproxy_backend_http_response_time_average_seconds{backend="ssp-cluster"} > 0.5 for: 10m labels: datacenter: APAC-SG service: system severity: page annotations: description: SSP Cluster response time exception for more than 10 minutes. summary: '[APAC-SG] SSP Cluster response time exception' ok 6.301s ago 3.246ms
alert: joox-ssp_error expr: sum(rate(haproxy_backend_http_responses_total{backend="joox-ssp-cluster",code=~"[45]xx"}[5m])) > sum(rate(haproxy_backend_http_responses_total{backend="joox-ssp-cluster"}[5m])) * 0.03 for: 5m labels: datacenter: APAC-SG service: system severity: page annotations: description: Joox-SSP Cluster responds HTTP 4xx/5xx more than 5% total responses for more than 5 minutes. summary: '[APAC-SG] Joox-SSP Cluster Crashes' ok 6.298s ago 21.85ms
alert: joox-adx_error expr: sum(rate(haproxy_backend_http_responses_total{backend="joox-adx-cluster",code=~"[45]xx"}[5m])) > sum(rate(haproxy_backend_http_responses_total{backend="joox-adx-cluster"}[5m])) * 0.03 for: 5m labels: datacenter: APAC-SG service: system severity: page annotations: description: Joox-ADX Cluster responds HTTP 4xx/5xx more than 5% total responses for more than 5 minutes. summary: '[APAC-SG] Joox-ADX Cluster Crashes' ok 6.277s ago 20.98ms
alert: joox-adx-cluster_haproxy_response_time_exception expr: haproxy_backend_http_response_time_average_seconds{backend="joox-adx-cluster"} > 0.5 for: 10m labels: datacenter: APAC-SG service: system severity: page annotations: description: Joox-ADX Cluster response time exception for more than 10 minutes. summary: '[APAC-SG] Joox-ADX Cluster response time exception' ok 6.256s ago 2.539ms
alert: adtrack_error expr: sum(rate(haproxy_backend_http_responses_total{backend="adtrack-cluster",code=~"[45]xx"}[5m])) > sum(rate(haproxy_backend_http_responses_total{backend="adtrack-cluster"}[5m])) * 0.03 for: 5m labels: datacenter: APAC-SG service: system severity: page annotations: description: ADTRACK Cluster responds HTTP 4xx/5xx more than 5% total responses for more than 5 minutes. summary: '[APAC-SG] ADTRACK Cluster Crashes' ok 6.254s ago 32.01ms
alert: adtrack-cluster_haproxy_response_time_exception expr: haproxy_backend_http_response_time_average_seconds{backend="adtrack-cluster"} > 0.1 for: 10m labels: datacenter: APAC-SG service: system severity: page annotations: description: Adtrack Cluster response time exception for more than 10 minutes. summary: '[APAC-SG] Adtrack Cluster response time exception' ok 6.222s ago 3.66ms
alert: idsync_error expr: sum(rate(haproxy_backend_http_responses_total{backend="idsync-cluster",code=~"[45]xx"}[5m])) > sum(rate(haproxy_backend_http_responses_total{backend="idsync-cluster"}[5m])) * 0.03 for: 5m labels: datacenter: APAC-SG service: system severity: page annotations: description: Idsync Cluster responds HTTP 4xx/5xx more than 5% total responses for more than 5 minutes. summary: '[APAC-SG] Idsync Cluster Crashes' ok 6.219s ago 28.5ms
alert: idsync-cluster_haproxy_response_time_exception expr: haproxy_backend_http_response_time_average_seconds{backend="idsync-cluster"} > 0.1 for: 10m labels: datacenter: APAC-SG service: system severity: page annotations: description: Idsync Cluster response time exception for more than 10 minutes. summary: '[APAC-SG] Idsync Cluster response time exception' ok 6.19s ago 3.747ms
alert: logserver_error expr: sum(rate(haproxy_backend_http_responses_total{backend="log-cluster",code=~"[45]xx"}[5m])) > sum(rate(haproxy_backend_http_responses_total{backend="log-cluster"}[5m])) * 0.03 for: 5m labels: datacenter: APAC-SG service: system severity: page annotations: description: LogServer Cluster responds HTTP 4xx/5xx more than 5% total responses for more than 5 minutes. summary: '[APAC-SG] LogServer Cluster Crashes' ok 6.187s ago 27.86ms
alert: log-cluster_haproxy_response_time_exception expr: haproxy_backend_http_response_time_average_seconds{backend="log-cluster"} > 0.1 for: 10m labels: datacenter: APAC-SG service: system severity: page annotations: description: Log Cluster response time exception for more than 10 minutes. summary: '[APAC-SG] Log Cluster response time exception' ok 6.159s ago 3.116ms
alert: dsp_error expr: sum(rate(haproxy_backend_http_responses_total{backend="dsp-cluster",code=~"[45]xx"}[5m])) > sum(rate(haproxy_backend_http_responses_total{backend="dsp-cluster"}[5m])) * 0.03 for: 5m labels: datacenter: APAC-SG service: system severity: page annotations: description: DSP Cluster responds HTTP 4xx/5xx more than 5% total responses for more than 5 minutes. summary: '[APAC-SG] DSP Cluster Crashes' ok 6.156s ago 27.58ms
alert: dsp-cluster_haproxy_response_time_exception expr: haproxy_backend_http_response_time_average_seconds{backend="dsp-cluster"} > 0.15 for: 10m labels: datacenter: APAC-SG service: system severity: page annotations: description: DSP Cluster response time exception for more than 10 minutes. summary: '[APAC-SG] DSP Cluster response time exception' ok 6.129s ago 3.332ms
alert: adx_error expr: sum(rate(haproxy_backend_http_responses_total{backend="adx-cluster",code=~"[45]xx"}[5m])) > sum(rate(haproxy_backend_http_responses_total{backend="adx-cluster"}[5m])) * 0.03 for: 5m labels: datacenter: APAC-SG service: system severity: page annotations: description: ADX Cluster responds HTTP 4xx/5xx more than 5% total responses for more than 5 minutes. summary: '[APAC-SG] ADX Cluster Crashes' ok 6.126s ago 28.19ms
alert: adx-cluster_haproxy_response_time_exception expr: haproxy_backend_http_response_time_average_seconds{backend="adx-cluster"} > 0.3 for: 10m labels: datacenter: APAC-SG service: system severity: page annotations: description: ADX Cluster response time exception for more than 10 minutes. summary: '[APAC-SG] ADX Cluster response time exception' ok 6.098s ago 4.291ms
alert: hb_error expr: sum(rate(haproxy_backend_http_responses_total{backend="hb-cluster",code=~"[45]xx"}[5m])) > sum(rate(haproxy_backend_http_responses_total{backend="hb-cluster"}[5m])) * 0.03 for: 5m labels: datacenter: APAC-SG service: system severity: page annotations: description: HB Cluster responds HTTP 4xx/5xx more than 5% total responses for more than 5 minutes. summary: '[APAC-SG] HB Cluster Crashes' ok 6.094s ago 32.24ms
alert: hb-cluster_haproxy_response_time_exception expr: haproxy_backend_http_response_time_average_seconds{backend="hb-cluster"} > 0.5 for: 10m labels: datacenter: APAC-SG service: system severity: page annotations: description: HB Cluster response time exception for more than 10 minutes. summary: '[APAC-SG] HB Cluster response time exception' ok 6.062s ago 3.437ms
alert: hbwa_error expr: sum(rate(haproxy_backend_http_responses_total{backend="hbwa-cluster",code=~"[5]xx"}[5m])) > sum(rate(haproxy_backend_http_responses_total{backend="hbwa-cluster"}[5m])) * 0.03 for: 5m labels: datacenter: APAC-SG service: system severity: page annotations: description: HBWA Cluster responds HTTP 4xx/5xx more than 5% total responses for more than 5 minutes. summary: '[APAC-SG] HBWA Cluster Crashes' ok 6.058s ago 19.23ms
alert: hbwa-cluster_haproxy_response_time_exception expr: haproxy_backend_http_response_time_average_seconds{backend="hbwa-cluster"} > 0.1 for: 10m labels: datacenter: APAC-SG service: system severity: page annotations: description: HBWA Cluster response time exception for more than 10 minutes. summary: '[APAC-SG] HBWA Cluster response time exception' ok 6.039s ago 2.416ms
alert: InstanceDown expr: up == 0 for: 1m labels: datacenter: APAC-SG service: system severity: page annotations: description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes.' summary: '[APAC-SG] Instance {{ $labels.instance }} down' ok 6.037s ago 3.001ms
alert: short_free_storage_error expr: node_filesystem_free_bytes{mountpoint="/etc/hosts"} / node_filesystem_size_bytes < 0.1 for: 5m labels: datacenter: APAC-SG service: server severity: page annotations: description: 'Instance {{ $labels.instance }} has low free storage (current values: {{ $value }})' summary: '[APAC-SG] Instance {{ $labels.instance }} has low free storage' ok 6.034s ago 1.303ms
alert: low_avail_mem_error expr: (node_memory_MemFree_bytes{job="node"} + node_memory_Cached_bytes{job="node"} + node_memory_Buffers_bytes{job="node"}) / node_memory_MemTotal_bytes < 0.1 for: 5m labels: datacenter: APAC-SG service: system severity: page annotations: description: 'Instance {{ $labels.instance }} has low available memory (current values: {{ $value }})' summary: '[APAC-SG] Instance {{ $labels.instance }} has low available memory' ok 6.033s ago 1.433ms
alert: too_much_mongo_connection expr: (mongodb_connections{job="mongo",state="current"} or mongodb_ss_connections{conn_type="current"}) > 10000 for: 5m labels: datacenter: APAC-SG service: system severity: page annotations: description: 'MongoDB instance {{ $labels.instance }} has too much connection (current values: {{ $value }})' summary: '[APAC-SG] MongoDB instance {{ $labels.instance }} has too much connection' ok 6.032s ago 412.9us
alert: too_high_cpu_usage expr: 100 * (1 - avg by(instance) (irate(node_cpu_seconds_total{job="node",mode="idle"}[5m]))) > 85 for: 10m labels: datacenter: APAC-SG service: system severity: page annotations: description: 'Instance {{ $labels.instance }} has too high CPU usage (current values: {{ $value }})' summary: '[APAC-SG] Instance {{ $labels.instance }} CPU usage exceed 80%' ok 6.032s ago 22.99ms
alert: mongodb_replication_lag expr: ((mongodb_replset_my_replica_lag) or (mongodb_mongod_replset_member_replication_lag{state!="ARBITER"})) > 7200 for: 30m labels: datacenter: APAC-SG service: system severity: page annotations: description: 'MongoDB instance {{ $labels.instance }} mongodb replication too lag (current values: {{ $value }})' summary: '[APAC-SG] MongoDB instance {{ $labels.instance }} mongodb replication too lag' ok 6.009s ago 680.4us
alert: redis_master_last_io_seconds expr: (redis_master_last_io_seconds) == -1 for: 30m labels: datacenter: APAC-SG service: system severity: page annotations: description: 'Redis instance {{ $labels.instance }} Redis connection with master lost (current values: {{ $value }})' summary: '[APAC-SG] Redis instance {{ $labels.instance }} Redis connection with master lost' ok 6.009s ago 205.9us
alert: Redis_Down expr: redis_up != 1 for: 1m labels: datacenter: APAC-SG service: system severity: page ok 6.009s ago 290.2us
alert: node_high_disk_io_usage expr: (rate(node_disk_io_time_seconds_total[5m]) or irate(node_disk_io_time_seconds_total[5m])) * 100 > 90 for: 5m labels: datacenter: APAC-SG service: server severity: page annotations: description: 'Node instance {{ $labels.instance }} Node has high disk io usage (current values: {{ $value }})' summary: '[APAC-SG] Node instance {{ $labels.instance }} Node has high disk io usage' ok 6.009s ago 1.265ms