Skip to content

Commit 601775e

Browse files
committed
fix: support transient SMART failures (upstream PR AnalogJ#375)
2 parents 0328ddc + 2e04c0f commit 601775e

File tree

12 files changed

+132
-31
lines changed

12 files changed

+132
-31
lines changed

collector/pkg/config/interface.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ type Interface interface {
2020
GetInt(key string) int
2121
GetString(key string) string
2222
GetStringSlice(key string) []string
23+
GetIntSlice(key string) []int
2324
UnmarshalKey(key string, rawVal interface{}, decoderOpts ...viper.DecoderConfigOption) error
2425

2526
GetDeviceOverrides() []models.ScanOverride

collector/pkg/config/mock/mock_config.go

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

example.scrutiny.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ log:
5757
file: '' #absolute or relative paths allowed, eg. web.log
5858
level: INFO
5959

60+
failures:
61+
transient:
62+
ata:
63+
- 195 # Hardware_ECC_Recovered, see https://superuser.com/a/1511916/169872
6064

6165
# Notification "urls" look like the following. For more information about service specific configuration see
6266
# Shoutrrr's documentation: https://containrrr.dev/shoutrrr/services/overview/

webapp/backend/pkg/config/config.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
package config
22

33
import (
4-
"github.com/analogj/go-util/utils"
5-
"github.com/analogj/scrutiny/webapp/backend/pkg/errors"
6-
"github.com/spf13/viper"
74
"log"
85
"os"
96
"strings"
7+
8+
"github.com/analogj/go-util/utils"
9+
"github.com/analogj/scrutiny/webapp/backend/pkg/errors"
10+
"github.com/spf13/viper"
1011
)
1112

1213
const DB_USER_SETTINGS_SUBKEY = "user"
@@ -52,6 +53,8 @@ func (c *configuration) Init() error {
5253
c.SetDefault("web.influxdb.tls.insecure_skip_verify", false)
5354
c.SetDefault("web.influxdb.retention_policy", true)
5455

56+
c.SetDefault("failures.transient.ata", []int{195})
57+
5558
//c.SetDefault("disks.include", []string{})
5659
//c.SetDefault("disks.exclude", []string{})
5760

webapp/backend/pkg/config/interface.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,6 @@ type Interface interface {
2525
GetInt64(key string) int64
2626
GetString(key string) string
2727
GetStringSlice(key string) []string
28+
GetIntSlice(key string) []int
2829
UnmarshalKey(key string, rawVal interface{}, decoderOpts ...viper.DecoderConfigOption) error
2930
}

webapp/backend/pkg/config/mock/mock_config.go

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

webapp/backend/pkg/constants.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@ const DeviceProtocolAta = "ATA"
44
const DeviceProtocolScsi = "SCSI"
55
const DeviceProtocolNvme = "NVMe"
66

7-
//go:generate stringer -type=AttributeStatus
87
// AttributeStatus bitwise flag, 1,2,4,8,16,32,etc
8+
//
9+
//go:generate stringer -type=AttributeStatus
910
type AttributeStatus uint8
1011

1112
const (
@@ -23,8 +24,9 @@ func AttributeStatusClear(b, flag AttributeStatus) AttributeStatus { return b &
2324
func AttributeStatusToggle(b, flag AttributeStatus) AttributeStatus { return b ^ flag }
2425
func AttributeStatusHas(b, flag AttributeStatus) bool { return b&flag != 0 }
2526

26-
//go:generate stringer -type=DeviceStatus
2727
// DeviceStatus bitwise flag, 1,2,4,8,16,32,etc
28+
//
29+
//go:generate stringer -type=DeviceStatus
2830
type DeviceStatus uint8
2931

3032
const (

webapp/backend/pkg/database/scrutiny_repository_device_smart_attributes.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@ import (
1313
log "github.com/sirupsen/logrus"
1414
)
1515

16-
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
16+
// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1717
// SMART
18-
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
18+
// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1919
func (sr *scrutinyRepository) SaveSmartAttributes(ctx context.Context, wwn string, collectorSmartData collector.SmartInfo) (measurements.Smart, error) {
2020
deviceSmartData := measurements.Smart{}
21-
err := deviceSmartData.FromCollectorSmartInfo(wwn, collectorSmartData)
21+
err := deviceSmartData.FromCollectorSmartInfo(sr.appConfig, wwn, collectorSmartData)
2222
if err != nil {
2323
sr.logger.Errorln("Could not process SMART metrics", err)
2424
return measurements.Smart{}, err

webapp/backend/pkg/database/scrutiny_repository_migrations.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,7 @@ func m20201107210306_FromPreInfluxDBSmartResultsCreatePostInfluxDBSmartResults(d
529529
})
530530
}
531531

532-
postDeviceSmartData.ProcessAtaSmartInfo(preAtaSmartAttributesTable)
532+
postDeviceSmartData.ProcessAtaSmartInfo(nil, preAtaSmartAttributesTable)
533533

534534
} else if preDevice.IsNvme() {
535535
//info collector.SmartInfo

webapp/backend/pkg/models/measurements/smart.go

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@ package measurements
22

33
import (
44
"fmt"
5-
"github.com/analogj/scrutiny/webapp/backend/pkg"
6-
"github.com/analogj/scrutiny/webapp/backend/pkg/models/collector"
7-
"github.com/analogj/scrutiny/webapp/backend/pkg/thresholds"
85
"log"
96
"strconv"
107
"strings"
118
"time"
9+
10+
"github.com/analogj/scrutiny/webapp/backend/pkg"
11+
"github.com/analogj/scrutiny/webapp/backend/pkg/config"
12+
"github.com/analogj/scrutiny/webapp/backend/pkg/models/collector"
13+
"github.com/analogj/scrutiny/webapp/backend/pkg/thresholds"
1214
)
1315

1416
type Smart struct {
@@ -100,8 +102,8 @@ func NewSmartFromInfluxDB(attrs map[string]interface{}) (*Smart, error) {
100102
return &sm, nil
101103
}
102104

103-
//Parse Collector SMART data results and create Smart object (and associated SmartAtaAttribute entries)
104-
func (sm *Smart) FromCollectorSmartInfo(wwn string, info collector.SmartInfo) error {
105+
// Parse Collector SMART data results and create Smart object (and associated SmartAtaAttribute entries)
106+
func (sm *Smart) FromCollectorSmartInfo(cfg config.Interface, wwn string, info collector.SmartInfo) error {
105107
sm.DeviceWWN = wwn
106108
sm.Date = time.Unix(info.LocalTime.TimeT, 0)
107109

@@ -117,7 +119,7 @@ func (sm *Smart) FromCollectorSmartInfo(wwn string, info collector.SmartInfo) er
117119
// process ATA/NVME/SCSI protocol data
118120
sm.Attributes = map[string]SmartAttribute{}
119121
if sm.DeviceProtocol == pkg.DeviceProtocolAta {
120-
sm.ProcessAtaSmartInfo(info.AtaSmartAttributes.Table)
122+
sm.ProcessAtaSmartInfo(cfg, info.AtaSmartAttributes.Table)
121123
} else if sm.DeviceProtocol == pkg.DeviceProtocolNvme {
122124
sm.ProcessNvmeSmartInfo(info.NvmeSmartHealthInformationLog)
123125
} else if sm.DeviceProtocol == pkg.DeviceProtocolScsi {
@@ -127,8 +129,8 @@ func (sm *Smart) FromCollectorSmartInfo(wwn string, info collector.SmartInfo) er
127129
return nil
128130
}
129131

130-
//generate SmartAtaAttribute entries from Scrutiny Collector Smart data.
131-
func (sm *Smart) ProcessAtaSmartInfo(tableItems []collector.AtaSmartAttributesTableItem) {
132+
// generate SmartAtaAttribute entries from Scrutiny Collector Smart data.
133+
func (sm *Smart) ProcessAtaSmartInfo(cfg config.Interface, tableItems []collector.AtaSmartAttributesTableItem) {
132134
for _, collectorAttr := range tableItems {
133135
attrModel := SmartAtaAttribute{
134136
AttributeId: collectorAttr.ID,
@@ -149,13 +151,25 @@ func (sm *Smart) ProcessAtaSmartInfo(tableItems []collector.AtaSmartAttributesTa
149151
attrModel.PopulateAttributeStatus()
150152
sm.Attributes[strconv.Itoa(collectorAttr.ID)] = &attrModel
151153

152-
if pkg.AttributeStatusHas(attrModel.Status, pkg.AttributeStatusFailedScrutiny) {
154+
var transient bool
155+
156+
if cfg != nil {
157+
transients := cfg.GetIntSlice("failures.transient.ata")
158+
for i := range transients {
159+
if collectorAttr.ID == transients[i] {
160+
transient = true
161+
break
162+
}
163+
}
164+
}
165+
166+
if pkg.AttributeStatusHas(attrModel.Status, pkg.AttributeStatusFailedScrutiny) && !transient {
153167
sm.Status = pkg.DeviceStatusSet(sm.Status, pkg.DeviceStatusFailedScrutiny)
154168
}
155169
}
156170
}
157171

158-
//generate SmartNvmeAttribute entries from Scrutiny Collector Smart data.
172+
// generate SmartNvmeAttribute entries from Scrutiny Collector Smart data.
159173
func (sm *Smart) ProcessNvmeSmartInfo(nvmeSmartHealthInformationLog collector.NvmeSmartHealthInformationLog) {
160174

161175
sm.Attributes = map[string]SmartAttribute{
@@ -185,7 +199,7 @@ func (sm *Smart) ProcessNvmeSmartInfo(nvmeSmartHealthInformationLog collector.Nv
185199
}
186200
}
187201

188-
//generate SmartScsiAttribute entries from Scrutiny Collector Smart data.
202+
// generate SmartScsiAttribute entries from Scrutiny Collector Smart data.
189203
func (sm *Smart) ProcessScsiSmartInfo(defectGrownList int64, scsiErrorCounterLog collector.ScsiErrorCounterLog) {
190204
sm.Attributes = map[string]SmartAttribute{
191205
"scsi_grown_defect_list": (&SmartScsiAttribute{AttributeId: "scsi_grown_defect_list", Value: defectGrownList, Threshold: 0}).PopulateAttributeStatus(),

0 commit comments

Comments
 (0)