diff --git a/cmd/storage-datatypes.go b/cmd/storage-datatypes.go index 8373c6bc3..8e1d28b99 100644 --- a/cmd/storage-datatypes.go +++ b/cmd/storage-datatypes.go @@ -61,8 +61,10 @@ type DiskInfo struct { // the number of calls of each API and the moving average of // the duration of each API. type DiskMetrics struct { - LastMinute map[string]AccElem `json:"apiLatencies,omitempty"` - APICalls map[string]uint64 `json:"apiCalls,omitempty"` + LastMinute map[string]AccElem `json:"apiLatencies,omitempty"` + APICalls map[string]uint64 `json:"apiCalls,omitempty"` + TotalErrorsAvailability uint64 `json:"totalErrsAvailability"` + TotalErrorsTimeout uint64 `json:"totalErrsTimeout"` } // VolsInfo is a collection of volume(bucket) information diff --git a/cmd/storage-datatypes_gen.go b/cmd/storage-datatypes_gen.go index ee4da0f85..07d19c9d9 100644 --- a/cmd/storage-datatypes_gen.go +++ b/cmd/storage-datatypes_gen.go @@ -399,6 +399,18 @@ func (z *DiskMetrics) DecodeMsg(dc *msgp.Reader) (err error) { } z.APICalls[za0003] = za0004 } + case "TotalErrorsAvailability": + z.TotalErrorsAvailability, err = dc.ReadUint64() + if err != nil { + err = msgp.WrapError(err, "TotalErrorsAvailability") + return + } + case "TotalErrorsTimeout": + z.TotalErrorsTimeout, err = dc.ReadUint64() + if err != nil { + err = msgp.WrapError(err, "TotalErrorsTimeout") + return + } default: err = dc.Skip() if err != nil { @@ -412,9 +424,9 @@ func (z *DiskMetrics) DecodeMsg(dc *msgp.Reader) (err error) { // EncodeMsg implements msgp.Encodable func (z *DiskMetrics) EncodeMsg(en *msgp.Writer) (err error) { - // map header, size 2 + // map header, size 4 // write "LastMinute" - err = en.Append(0x82, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x4d, 0x69, 0x6e, 0x75, 0x74, 0x65) + err = en.Append(0x84, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x4d, 0x69, 0x6e, 0x75, 0x74, 0x65) if err != nil { return } @@ -457,15 +469,35 @@ func (z *DiskMetrics) EncodeMsg(en *msgp.Writer) (err error) { return } } + // write "TotalErrorsAvailability" + err = en.Append(0xb7, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79) + if err != nil { + return + } + err = en.WriteUint64(z.TotalErrorsAvailability) + if err != nil { + err = msgp.WrapError(err, "TotalErrorsAvailability") + return + } + // write "TotalErrorsTimeout" + err = en.Append(0xb2, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74) + if err != nil { + return + } + err = en.WriteUint64(z.TotalErrorsTimeout) + if err != nil { + err = msgp.WrapError(err, "TotalErrorsTimeout") + return + } return } // MarshalMsg implements msgp.Marshaler func (z *DiskMetrics) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) - // map header, size 2 + // map header, size 4 // string "LastMinute" - o = append(o, 0x82, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x4d, 0x69, 0x6e, 0x75, 0x74, 0x65) + o = append(o, 0x84, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x4d, 0x69, 0x6e, 0x75, 0x74, 0x65) o = msgp.AppendMapHeader(o, uint32(len(z.LastMinute))) for za0001, za0002 := range z.LastMinute { o = msgp.AppendString(o, za0001) @@ -482,6 +514,12 @@ func (z *DiskMetrics) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.AppendString(o, za0003) o = msgp.AppendUint64(o, za0004) } + // string "TotalErrorsAvailability" + o = append(o, 0xb7, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79) + o = msgp.AppendUint64(o, z.TotalErrorsAvailability) + // string "TotalErrorsTimeout" + o = append(o, 0xb2, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x54, 0x69, 0x6d, 0x65, 0x6f, 0x75, 0x74) + o = msgp.AppendUint64(o, z.TotalErrorsTimeout) return } @@ -563,6 +601,18 @@ func (z *DiskMetrics) UnmarshalMsg(bts []byte) (o []byte, err error) { } z.APICalls[za0003] = za0004 } + case "TotalErrorsAvailability": + z.TotalErrorsAvailability, bts, err = msgp.ReadUint64Bytes(bts) + if err != nil { + err = msgp.WrapError(err, "TotalErrorsAvailability") + return + } + case "TotalErrorsTimeout": + z.TotalErrorsTimeout, bts, err = msgp.ReadUint64Bytes(bts) + if err != nil { + err = msgp.WrapError(err, "TotalErrorsTimeout") + return + } default: bts, err = msgp.Skip(bts) if err != nil { @@ -591,6 +641,7 @@ func (z *DiskMetrics) Msgsize() (s int) { s += msgp.StringPrefixSize + len(za0003) + msgp.Uint64Size } } + s += 24 + msgp.Uint64Size + 19 + msgp.Uint64Size return } diff --git a/cmd/xl-storage-disk-id-check.go b/cmd/xl-storage-disk-id-check.go index 574cc2c47..eef9ccd84 100644 --- a/cmd/xl-storage-disk-id-check.go +++ b/cmd/xl-storage-disk-id-check.go @@ -77,6 +77,8 @@ const ( // Detects change in underlying disk. type xlStorageDiskIDCheck struct { + totalErrsAvailability uint64 // Captures all data availability errors such as permission denied, faulty disk and timeout errors. + totalErrsTimeout uint64 // Captures all timeout only errors // apiCalls should be placed first so alignment is guaranteed for atomic operations. apiCalls [storageMetricLast]uint64 apiLatencies [storageMetricLast]*lockedLastMinuteLatency @@ -102,6 +104,8 @@ func (p *xlStorageDiskIDCheck) getMetrics() DiskMetrics { for i := range p.apiCalls { diskMetric.APICalls[storageMetric(i).String()] = atomic.LoadUint64(&p.apiCalls[i]) } + diskMetric.TotalErrorsAvailability = atomic.LoadUint64(&p.totalErrsAvailability) + diskMetric.TotalErrorsTimeout = atomic.LoadUint64(&p.totalErrsTimeout) return diskMetric, nil } }) @@ -661,15 +665,34 @@ func (p *xlStorageDiskIDCheck) updateStorageMetrics(s storageMetric, paths ...st return func(errp *error) { duration := time.Since(startTime) + var err error + if errp != nil && *errp != nil { + err = *errp + } + atomic.AddUint64(&p.apiCalls[s], 1) + if IsErr(err, []error{ + errVolumeAccessDenied, + errFileAccessDenied, + errDiskAccessDenied, + errFaultyDisk, + errFaultyRemoteDisk, + context.DeadlineExceeded, + context.Canceled, + }...) { + atomic.AddUint64(&p.totalErrsAvailability, 1) + if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) { + atomic.AddUint64(&p.totalErrsTimeout, 1) + } + } p.apiLatencies[s].add(duration) if trace { - var errStr string - if errp != nil && *errp != nil { - errStr = (*errp).Error() - } paths = append([]string{p.String()}, paths...) + var errStr string + if err != nil { + errStr = err.Error() + } globalTrace.Publish(storageTrace(s, startTime, duration, strings.Join(paths, " "), errStr)) } }