blob: b1db7603cbeec240f1437d2504f839f9b9db8bfb [file] [log] [blame]
// Copyright 2022 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package util
import (
"context"
"os"
"time"
"chromiumos/tast/errors"
"chromiumos/tast/testing"
)
const (
// Main storage device has to be >= 16GB.
mainStorageDeviceMinSize = 16 * 1024 * 1024 * 1024
// Max number of retries for a sub-test of a universal test block.
maxSubtestRetry = 3
// DefaultStressBlockTimeout is the duration of the stress sub-test.
DefaultStressBlockTimeout = 240 * time.Minute
// DefaultSlcStressBlockTimeout is the duration of the slc-stress sub-test.
DefaultSlcStressBlockTimeout = 240 * time.Minute
// DefaultRetentionBlockTimeout is the duration of the retention sub-test.
DefaultRetentionBlockTimeout = 20 * time.Minute
// DefaultSuspendBlockTimeout is the total duration of the suspend sub-test.
DefaultSuspendBlockTimeout = 10 * time.Minute
)
// SetupBenchmarks captures and records bandwidth and latency disk benchmarks at the
// beginning and the end of the test suite.
func SetupBenchmarks(ctx context.Context, s *testing.State, rw *FioResultWriter, testParam QualParam) {
testConfig := &TestConfig{ResultWriter: rw}
// Run tests to collect metrics for boot device.
runFioStress(ctx, s, testConfig.WithPath(testParam.TestDevice).WithJob("seq_write"))
runFioStress(ctx, s, testConfig.WithPath(testParam.TestDevice).WithJob("seq_read"))
runFioStress(ctx, s, testConfig.WithPath(testParam.TestDevice).WithJob("4k_write"))
runFioStress(ctx, s, testConfig.WithPath(testParam.TestDevice).WithJob("4k_write_qd4"))
runFioStress(ctx, s, testConfig.WithPath(testParam.TestDevice).WithJob("4k_read_qd4"))
runFioStress(ctx, s, testConfig.WithPath(testParam.TestDevice).WithJob("4k_read"))
runFioStress(ctx, s, testConfig.WithPath(testParam.TestDevice).WithJob("16k_write"))
runFioStress(ctx, s, testConfig.WithPath(testParam.TestDevice).WithJob("16k_read"))
if testParam.IsSlcEnabled {
// Run tests to collect metrics for Slc device.
runFioStress(ctx, s, testConfig.WithPath(testParam.SlcDevice).WithJob("4k_write_qd4"))
runFioStress(ctx, s, testConfig.WithPath(testParam.SlcDevice).WithJob("4k_read_qd4"))
}
}
// soakTestBlock runs long, write-intensive storage stresses.
func soakTestBlock(ctx context.Context, s *testing.State, rw *FioResultWriter, testParam QualParam) {
testConfigNoVerify := &TestConfig{}
testConfigVerify := &TestConfig{
VerifyOnly: true,
ResultWriter: rw,
}
stressTasks := []func(context.Context){
func(ctx context.Context) {
runFioStress(ctx, s, testConfigNoVerify.WithPath(testParam.TestDevice).WithJob("64k_stress").WithDuration(testParam.StressBlockTimeout))
// NoVerify surf block to exercise device. Run once. Duration can be found in data/recovery
runFioStress(ctx, s, testConfigNoVerify.WithPath(testParam.TestDevice).WithJob("recovery"))
// Verify surfing block for performance evaluation. Run once. Duration can be found in data/surfing
runFioStress(ctx, s, testConfigVerify.WithPath(testParam.TestDevice).WithJob("surfing"))
},
}
if testParam.IsSlcEnabled {
stressTasks = append(stressTasks,
func(ctx context.Context) {
runFioStress(ctx, s, testConfigNoVerify.WithPath(testParam.SlcDevice).WithJob("4k_write").WithDuration(DefaultSlcStressBlockTimeout/2))
runFioStress(ctx, s, testConfigVerify.WithPath(testParam.SlcDevice).WithJob("4k_write").WithDuration(DefaultSlcStressBlockTimeout/2))
})
}
runTasksInParallel(ctx, 0, stressTasks)
}
// retentionTestBlock reads and then validates the same data after multiple short suspend cycles.
func retentionTestBlock(ctx context.Context, s *testing.State, rw *FioResultWriter, testParam QualParam) {
writeConfig := TestConfig{
Job: "8k_async_randwrite",
Duration: testParam.RetentionBlockTimeout,
}
// Verify disk consistency written by the initial FIO test.
verifyConfig := TestConfig{
Job: "8k_async_randwrite",
VerifyOnly: true,
}
writeTasks := []func(context.Context){
func(ctx context.Context) {
runFioStress(ctx, s, writeConfig.WithPath(testParam.TestDevice))
},
}
verifyTasks := []func(context.Context){
func(ctx context.Context) {
runFioStress(ctx, s, verifyConfig.WithPath(testParam.TestDevice))
},
}
if testParam.IsSlcEnabled {
writeTasks = append(writeTasks,
func(ctx context.Context) {
runFioStress(ctx, s, writeConfig.WithPath(testParam.SlcDevice))
})
verifyTasks = append(verifyTasks,
func(ctx context.Context) {
runFioStress(ctx, s, verifyConfig.WithPath(testParam.SlcDevice))
})
}
runTasksInParallel(ctx, 0, writeTasks)
// Run Suspend repeatedly until the timeout.
pollOptions := &testing.PollOptions{
Timeout: testParam.RetentionBlockTimeout,
Interval: 30 * time.Second,
}
if err := testing.Poll(ctx, func(ctx context.Context) error {
if err := Suspend(ctx, testParam.SkipS0iXResidencyCheck); err != nil {
return testing.PollBreak(errors.Wrap(err, "failed to suspend DUT"))
}
return errors.New("retention test is still running normally")
}, pollOptions); err != nil && !errors.As(err, &context.DeadlineExceeded) {
s.Fatal("Failed running retention block: ", err)
}
runTasksInParallel(ctx, 0, verifyTasks)
}
// suspendTestBlock triggers periodic power suspends while running disk
// This test block doesn't validate consistency nor status of the disk stress, which
// is done by measuring storage degradation by the next soak iteration.
func suspendTestBlock(ctx context.Context, s *testing.State, rw *FioResultWriter, testParam QualParam) {
if deadline, _ := ctx.Deadline(); time.Until(deadline) < testParam.SuspendBlockTimeout {
s.Fatal("Context timeout occurs before suspend block timeout")
}
tasks := []func(context.Context){
func(ctx context.Context) {
runContinuousStorageStress(ctx, "write_stress", s.DataPath("write_stress"), rw, testParam.TestDevice)
},
func(ctx context.Context) {
runPeriodicPowerSuspend(ctx, testParam.SkipS0iXResidencyCheck)
},
}
if testParam.IsSlcEnabled {
tasks = append(tasks,
func(ctx context.Context) {
runContinuousStorageStress(ctx, "4k_write", s.DataPath("4k_write"), rw, testParam.SlcDevice)
})
}
runTasksInParallel(ctx, testParam.SuspendBlockTimeout, tasks)
}
// trimTestBlock is a dispatcher function to start trim test on the boot device
// and on the slc.
func trimTestBlock(ctx context.Context, s *testing.State, rw *FioResultWriter, testParam QualParam) {
trimTestBlockImpl(ctx, s, testParam.TestDevice, rw)
if testParam.IsSlcEnabled {
trimTestBlockImpl(ctx, s, testParam.SlcDevice, rw)
}
}
// trimTestBlockImpl performs data integrity trim test on an unmounted partition.
// This test will write 1 GB of data and verify that trimmed data are gone and untrimmed data are unaffected.
// The verification will be run in 5 passes with 0%, 25%, 50%, 75%, and 100% of data trimmed.
// Also, perform 4K random read QD32 before and after trim. We should see some speed / latency difference
// if the device firmware trim data properly.
func trimTestBlockImpl(ctx context.Context, s *testing.State, trimPath string, rw *FioResultWriter) {
filesize, err := PartitionSize(ctx, trimPath)
if err != nil {
s.Fatal("Failed to acquire size for partition: ", err)
}
// Make file size multiple of 4 * chunk size to account for all passes,
// i.e. 25% = 1/4, 75% = 3/4.
filesize = filesize - filesize%(4*TrimChunkSize)
s.Logf("Filename: %s, filesize: %d", trimPath, filesize)
f, err := os.OpenFile(trimPath, os.O_RDWR, 0666)
if err != nil {
s.Fatal("Failed to open device: ", err)
}
defer f.Close()
if err := RunTrim(f, 0, filesize); err != nil {
s.Fatal("Error running trim command: ", err)
}
zeroHash := ZeroHash()
oneHash := OneHash()
chunkCount := filesize / TrimChunkSize
// Write random data to disk
s.Log("Writing random data to disk: ", trimPath)
if err := WriteRandomData(trimPath, chunkCount); err != nil {
s.Fatal("Error writing random data to disk: ", err)
}
s.Log("Calculating initial hash values for all chunks")
initialHash, err := CalculateCurrentHashes(trimPath, chunkCount)
if err != nil {
s.Fatal("Error calculating hashes: ", err)
}
// Check read bandwidth/latency when reading real data.
resultWriter := &FioResultWriter{}
defer resultWriter.Save(ctx, s.OutDir(), true)
testConfig := &TestConfig{ResultWriter: resultWriter, Path: trimPath}
if err := runFioStress(ctx, s, testConfig.WithJob("4k_read_qd32")); err != nil {
s.Fatal("Timeout while running disk i/o stress: ", err)
}
dataVerifyCount := 0
dataVerifyMatch := 0
trimVerifyCount := 0
trimVerifyZero := 0
trimVerifyOne := 0
trimVerifyNonDelete := 0
for _, ratio := range []float64{0, 0.25, 0.5, 0.75, 1} {
trimmed := make([]bool, chunkCount)
for i := uint64(0); i < chunkCount; i++ {
if float64(i%4)/4 < ratio {
if err := RunTrim(f, i, TrimChunkSize); err != nil {
s.Fatal("Error running trim command: ", err)
}
trimmed[i] = true
trimVerifyCount++
}
}
currHashes, err := CalculateCurrentHashes(trimPath, chunkCount)
if err != nil {
s.Fatal("Error calculating current hashes: ", err)
}
dataVerifyCount = dataVerifyCount + int(chunkCount) - trimVerifyCount
for i := uint64(0); i < chunkCount; i++ {
if trimmed[i] {
if currHashes[i] == zeroHash {
trimVerifyZero++
} else if currHashes[i] == oneHash {
trimVerifyOne++
} else if currHashes[i] == initialHash[i] {
trimVerifyNonDelete++
}
} else {
if currHashes[i] == initialHash[i] {
dataVerifyMatch++
}
}
}
}
// Check final (trimmed) read bandwidth/latency.
if err := runFioStress(ctx, s, testConfig.WithJob("4k_read_qd32")); err != nil {
s.Fatal("Timeout while running disk i/o stress")
}
// Write out all metrics to an external keyval file.
if err := WriteKeyVals(s.OutDir(), map[string]float64{
"dataVerifyCount": float64(dataVerifyCount),
"dataVerifyMatch": float64(dataVerifyMatch),
"trimVerifyCount": float64(trimVerifyCount),
"trimVerifyZero": float64(trimVerifyZero),
"trimVerifyOne": float64(trimVerifyOne),
"trimVerifyNonDelete": float64(trimVerifyNonDelete),
}); err != nil {
s.Fatal("Error writing trim counters: ", err)
}
if dataVerifyMatch < dataVerifyCount {
s.Fatal("Fail to verify untrimmed data")
}
expectTrimZero := false
expectTrimOne := false
if IsEMMC(trimPath) {
testing.ContextLog(ctx, "Device is eMMC, trim behavior not specified")
}
if IsNVME(trimPath) {
var dlfeat string
dlfeat, err = GetNVMEIdNSFeature(ctx, trimPath, "dlfeat")
if err != nil {
testing.ContextLog(ctx, "Expected values for trimmed data not reported")
} else if dlfeat == "1" {
expectTrimZero = true
} else if dlfeat == "2" {
expectTrimOne = true
} else {
testing.ContextLog(ctx, "Expected values for trimmed data not specified")
}
}
if expectTrimZero && trimVerifyZero < trimVerifyCount {
s.Fatal("Trimmed data is not zeroed")
} else if expectTrimOne && trimVerifyOne < trimVerifyCount {
s.Fatal("Trimmed data is not set to one")
}
}
// runFioStress runs an fio job:
// If fio returns an error, this function will fail the Tast test.
func runFioStress(ctx context.Context, s *testing.State, testConfig TestConfig) error {
config := testConfig.WithJobFile(s.DataPath(testConfig.Job))
if err := RunFioStress(ctx, config); err != nil {
if errors.As(err, &context.DeadlineExceeded) {
return err
}
s.Fatal("FIO stress failed: ", err)
}
return nil
}