From 7413045f0e6cdc80b9364bef7946e999087f7a42 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Sat, 11 Jun 2022 00:50:31 -0700 Subject: [PATCH] fix: add missing minio_s3_requests_total (#15070) PR #15052 caused a regression, add the missing metrics back. Bonus: - internode information should be only for distributed setups - update the dashboard to include 4xx and 5xx error panels. --- cmd/generic-handlers.go | 4 +- cmd/http-stats.go | 2 + cmd/metrics-v2.go | 26 ++- .../prometheus/grafana/minio-dashboard.json | 216 +++++++++++++++++- 4 files changed, 223 insertions(+), 25 deletions(-) diff --git a/cmd/generic-handlers.go b/cmd/generic-handlers.go index 5479dced4..99f747c96 100644 --- a/cmd/generic-handlers.go +++ b/cmd/generic-handlers.go @@ -282,7 +282,9 @@ func setHTTPStatsHandler(h http.Handler) http.Handler { r.Body = meteredRequest h.ServeHTTP(meteredResponse, r) - if strings.HasPrefix(r.URL.Path, minioReservedBucketPath) { + if strings.HasPrefix(r.URL.Path, storageRESTPrefix) || + strings.HasPrefix(r.URL.Path, peerRESTPrefix) || + strings.HasPrefix(r.URL.Path, lockRESTPrefix) { globalConnStats.incInputBytes(meteredRequest.BytesRead()) globalConnStats.incOutputBytes(meteredResponse.BytesWritten()) } else { diff --git a/cmd/http-stats.go b/cmd/http-stats.go index 7b9361250..6cae30bd8 100644 --- a/cmd/http-stats.go +++ b/cmd/http-stats.go @@ -199,6 +199,8 @@ func (st *HTTPStats) updateStats(api string, r *http.Request, w *logger.Response return } + st.totalS3Requests.Inc(api) + // Increment the prometheus http request response histogram with appropriate label httpRequestsDuration.With(prometheus.Labels{"api": api}).Observe(w.TimeToFirstByte.Seconds()) diff --git a/cmd/metrics-v2.go b/cmd/metrics-v2.go index 05746a3ba..cc43a2000 100644 --- a/cmd/metrics-v2.go +++ b/cmd/metrics-v2.go @@ -1587,19 +1587,21 @@ func getNetworkMetrics() *MetricsGroup { mg := &MetricsGroup{} mg.RegisterRead(func(ctx context.Context) (metrics []Metric) { metrics = make([]Metric, 0, 10) - metrics = append(metrics, Metric{ - Description: getInternodeFailedRequests(), - Value: float64(loadAndResetRPCNetworkErrsCounter()), - }) connStats := globalConnStats.toServerConnStats() - metrics = append(metrics, Metric{ - Description: getInterNodeSentBytesMD(), - Value: float64(connStats.TotalOutputBytes), - }) - metrics = append(metrics, Metric{ - Description: getInterNodeReceivedBytesMD(), - Value: float64(connStats.TotalInputBytes), - }) + if globalIsDistErasure { + metrics = append(metrics, Metric{ + Description: getInternodeFailedRequests(), + Value: float64(loadAndResetRPCNetworkErrsCounter()), + }) + metrics = append(metrics, Metric{ + Description: getInterNodeSentBytesMD(), + Value: float64(connStats.TotalOutputBytes), + }) + metrics = append(metrics, Metric{ + Description: getInterNodeReceivedBytesMD(), + Value: float64(connStats.TotalInputBytes), + }) + } metrics = append(metrics, Metric{ Description: getS3SentBytesMD(), Value: float64(connStats.S3OutputBytes), diff --git a/docs/metrics/prometheus/grafana/minio-dashboard.json b/docs/metrics/prometheus/grafana/minio-dashboard.json index 5edff5e28..696bfb15a 100644 --- a/docs/metrics/prometheus/grafana/minio-dashboard.json +++ b/docs/metrics/prometheus/grafana/minio-dashboard.json @@ -72,7 +72,7 @@ "gnetId": 13502, "graphTooltip": 0, "id": null, - "iteration": 1654578559812, + "iteration": 1654921222878, "links": [ { "icon": "external link", @@ -1711,6 +1711,198 @@ "alignLevel": null } }, + { + "aliasColors": { + "S3 Errors": "light-red", + "S3 Requests": "light-green" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 22 + }, + "hiddenSeries": false, + "id": 86, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum by (server,api) (increase(minio_s3_requests_5xx_errors_total{job=\"$scrape_jobs\"}[$__rate_interval]))", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{server,api}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "S3 API Request Error Rate (5xx)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:331", + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:332", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "S3 Errors": "light-red", + "S3 Requests": "light-green" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 22 + }, + "hiddenSeries": false, + "id": 88, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum by (server,api) (increase(minio_s3_requests_4xx_errors_total{job=\"$scrape_jobs\"}[$__rate_interval]))", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{server,api}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "S3 API Request Error Rate (4xx)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:331", + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:332", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "aliasColors": { "10.13.1.25:9000 DELETE": "red", @@ -1734,7 +1926,7 @@ "h": 9, "w": 12, "x": 0, - "y": 22 + "y": 32 }, "hiddenSeries": false, "id": 17, @@ -1839,7 +2031,7 @@ "h": 9, "w": 12, "x": 12, - "y": 22 + "y": 32 }, "hiddenSeries": false, "id": 84, @@ -1947,7 +2139,7 @@ "h": 9, "w": 12, "x": 0, - "y": 31 + "y": 41 }, "hiddenSeries": false, "id": 77, @@ -2039,7 +2231,7 @@ "h": 9, "w": 12, "x": 12, - "y": 31 + "y": 41 }, "hiddenSeries": false, "id": 76, @@ -2131,7 +2323,7 @@ "h": 8, "w": 12, "x": 0, - "y": 40 + "y": 50 }, "hiddenSeries": false, "id": 74, @@ -2225,7 +2417,7 @@ "h": 8, "w": 12, "x": 12, - "y": 40 + "y": 50 }, "hiddenSeries": false, "id": 82, @@ -2330,7 +2522,7 @@ "h": 9, "w": 12, "x": 0, - "y": 48 + "y": 58 }, "hiddenSeries": false, "id": 11, @@ -2447,7 +2639,7 @@ "h": 9, "w": 12, "x": 12, - "y": 48 + "y": 58 }, "hiddenSeries": false, "id": 8, @@ -2542,7 +2734,7 @@ "h": 7, "w": 24, "x": 0, - "y": 57 + "y": 67 }, "hiddenSeries": false, "id": 73, @@ -2666,7 +2858,7 @@ ] }, "time": { - "from": "now-3h", + "from": "now-1h", "to": "now" }, "timepicker": { @@ -2696,5 +2888,5 @@ "timezone": "", "title": "MinIO Dashboard", "uid": "TgmJnqnnk", - "version": 8 + "version": 10 }