ethdb: add leveldb write delay statistic (#16499)
This commit is contained in:
parent
2a1fc3d155
commit
6b2b328cdb
|
@ -17,6 +17,7 @@
|
||||||
package ethdb
|
package ethdb
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
@ -32,17 +33,25 @@ import (
|
||||||
"github.com/syndtr/goleveldb/leveldb/util"
|
"github.com/syndtr/goleveldb/leveldb/util"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
writeDelayNThreshold = 200
|
||||||
|
writeDelayThreshold = 350 * time.Millisecond
|
||||||
|
writeDelayWarningThrottler = 1 * time.Minute
|
||||||
|
)
|
||||||
|
|
||||||
var OpenFileLimit = 64
|
var OpenFileLimit = 64
|
||||||
|
|
||||||
type LDBDatabase struct {
|
type LDBDatabase struct {
|
||||||
fn string // filename for reporting
|
fn string // filename for reporting
|
||||||
db *leveldb.DB // LevelDB instance
|
db *leveldb.DB // LevelDB instance
|
||||||
|
|
||||||
compTimeMeter metrics.Meter // Meter for measuring the total time spent in database compaction
|
compTimeMeter metrics.Meter // Meter for measuring the total time spent in database compaction
|
||||||
compReadMeter metrics.Meter // Meter for measuring the data read during compaction
|
compReadMeter metrics.Meter // Meter for measuring the data read during compaction
|
||||||
compWriteMeter metrics.Meter // Meter for measuring the data written during compaction
|
compWriteMeter metrics.Meter // Meter for measuring the data written during compaction
|
||||||
diskReadMeter metrics.Meter // Meter for measuring the effective amount of data read
|
writeDelayNMeter metrics.Meter // Meter for measuring the write delay number due to database compaction
|
||||||
diskWriteMeter metrics.Meter // Meter for measuring the effective amount of data written
|
writeDelayMeter metrics.Meter // Meter for measuring the write delay duration due to database compaction
|
||||||
|
diskReadMeter metrics.Meter // Meter for measuring the effective amount of data read
|
||||||
|
diskWriteMeter metrics.Meter // Meter for measuring the effective amount of data written
|
||||||
|
|
||||||
quitLock sync.Mutex // Mutex protecting the quit channel access
|
quitLock sync.Mutex // Mutex protecting the quit channel access
|
||||||
quitChan chan chan error // Quit channel to stop the metrics collection before closing the database
|
quitChan chan chan error // Quit channel to stop the metrics collection before closing the database
|
||||||
|
@ -147,16 +156,17 @@ func (db *LDBDatabase) LDB() *leveldb.DB {
|
||||||
|
|
||||||
// Meter configures the database metrics collectors and
|
// Meter configures the database metrics collectors and
|
||||||
func (db *LDBDatabase) Meter(prefix string) {
|
func (db *LDBDatabase) Meter(prefix string) {
|
||||||
// Short circuit metering if the metrics system is disabled
|
if metrics.Enabled {
|
||||||
if !metrics.Enabled {
|
// Initialize all the metrics collector at the requested prefix
|
||||||
return
|
db.compTimeMeter = metrics.NewRegisteredMeter(prefix+"compact/time", nil)
|
||||||
|
db.compReadMeter = metrics.NewRegisteredMeter(prefix+"compact/input", nil)
|
||||||
|
db.compWriteMeter = metrics.NewRegisteredMeter(prefix+"compact/output", nil)
|
||||||
|
db.diskReadMeter = metrics.NewRegisteredMeter(prefix+"disk/read", nil)
|
||||||
|
db.diskWriteMeter = metrics.NewRegisteredMeter(prefix+"disk/write", nil)
|
||||||
}
|
}
|
||||||
// Initialize all the metrics collector at the requested prefix
|
// Initialize write delay metrics no matter we are in metric mode or not.
|
||||||
db.compTimeMeter = metrics.NewRegisteredMeter(prefix+"compact/time", nil)
|
db.writeDelayMeter = metrics.NewRegisteredMeter(prefix+"compact/writedelay/duration", nil)
|
||||||
db.compReadMeter = metrics.NewRegisteredMeter(prefix+"compact/input", nil)
|
db.writeDelayNMeter = metrics.NewRegisteredMeter(prefix+"compact/writedelay/counter", nil)
|
||||||
db.compWriteMeter = metrics.NewRegisteredMeter(prefix+"compact/output", nil)
|
|
||||||
db.diskReadMeter = metrics.NewRegisteredMeter(prefix+"disk/read", nil)
|
|
||||||
db.diskWriteMeter = metrics.NewRegisteredMeter(prefix+"disk/write", nil)
|
|
||||||
|
|
||||||
// Create a quit channel for the periodic collector and run it
|
// Create a quit channel for the periodic collector and run it
|
||||||
db.quitLock.Lock()
|
db.quitLock.Lock()
|
||||||
|
@ -178,6 +188,9 @@ func (db *LDBDatabase) Meter(prefix string) {
|
||||||
// 2 | 523 | 1000.37159 | 7.26059 | 66.86342 | 66.77884
|
// 2 | 523 | 1000.37159 | 7.26059 | 66.86342 | 66.77884
|
||||||
// 3 | 570 | 1113.18458 | 0.00000 | 0.00000 | 0.00000
|
// 3 | 570 | 1113.18458 | 0.00000 | 0.00000 | 0.00000
|
||||||
//
|
//
|
||||||
|
// This is how the write delay look like (currently):
|
||||||
|
// DelayN:5 Delay:406.604657ms
|
||||||
|
//
|
||||||
// This is how the iostats look like (currently):
|
// This is how the iostats look like (currently):
|
||||||
// Read(MB):3895.04860 Write(MB):3654.64712
|
// Read(MB):3895.04860 Write(MB):3654.64712
|
||||||
func (db *LDBDatabase) meter(refresh time.Duration) {
|
func (db *LDBDatabase) meter(refresh time.Duration) {
|
||||||
|
@ -188,6 +201,14 @@ func (db *LDBDatabase) meter(refresh time.Duration) {
|
||||||
}
|
}
|
||||||
// Create storage for iostats.
|
// Create storage for iostats.
|
||||||
var iostats [2]float64
|
var iostats [2]float64
|
||||||
|
|
||||||
|
// Create storage and warning log tracer for write delay.
|
||||||
|
var (
|
||||||
|
delaystats [2]int64
|
||||||
|
lastWriteDelay time.Time
|
||||||
|
lastWriteDelayN time.Time
|
||||||
|
)
|
||||||
|
|
||||||
// Iterate ad infinitum and collect the stats
|
// Iterate ad infinitum and collect the stats
|
||||||
for i := 1; ; i++ {
|
for i := 1; ; i++ {
|
||||||
// Retrieve the database stats
|
// Retrieve the database stats
|
||||||
|
@ -236,6 +257,52 @@ func (db *LDBDatabase) meter(refresh time.Duration) {
|
||||||
db.compWriteMeter.Mark(int64((compactions[i%2][2] - compactions[(i-1)%2][2]) * 1024 * 1024))
|
db.compWriteMeter.Mark(int64((compactions[i%2][2] - compactions[(i-1)%2][2]) * 1024 * 1024))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Retrieve the write delay statistic
|
||||||
|
writedelay, err := db.db.GetProperty("leveldb.writedelay")
|
||||||
|
if err != nil {
|
||||||
|
db.log.Error("Failed to read database write delay statistic", "err", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var (
|
||||||
|
delayN int64
|
||||||
|
delayDuration string
|
||||||
|
duration time.Duration
|
||||||
|
)
|
||||||
|
if n, err := fmt.Sscanf(writedelay, "DelayN:%d Delay:%s", &delayN, &delayDuration); n != 2 || err != nil {
|
||||||
|
db.log.Error("Write delay statistic not found")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
duration, err = time.ParseDuration(delayDuration)
|
||||||
|
if err != nil {
|
||||||
|
db.log.Error("Failed to parse delay duration", "err", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if db.writeDelayNMeter != nil {
|
||||||
|
db.writeDelayNMeter.Mark(delayN - delaystats[0])
|
||||||
|
// If the write delay number been collected in the last minute exceeds the predefined threshold,
|
||||||
|
// print a warning log here.
|
||||||
|
// If a warning that db performance is laggy has been displayed,
|
||||||
|
// any subsequent warnings will be withhold for 1 minute to don't overwhelm the user.
|
||||||
|
if int(db.writeDelayNMeter.Rate1()) > writeDelayNThreshold &&
|
||||||
|
time.Now().After(lastWriteDelayN.Add(writeDelayWarningThrottler)) {
|
||||||
|
db.log.Warn("Write delay number exceeds the threshold (200 per second) in the last minute")
|
||||||
|
lastWriteDelayN = time.Now()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if db.writeDelayMeter != nil {
|
||||||
|
db.writeDelayMeter.Mark(duration.Nanoseconds() - delaystats[1])
|
||||||
|
// If the write delay duration been collected in the last minute exceeds the predefined threshold,
|
||||||
|
// print a warning log here.
|
||||||
|
// If a warning that db performance is laggy has been displayed,
|
||||||
|
// any subsequent warnings will be withhold for 1 minute to don't overwhelm the user.
|
||||||
|
if int64(db.writeDelayMeter.Rate1()) > writeDelayThreshold.Nanoseconds() &&
|
||||||
|
time.Now().After(lastWriteDelay.Add(writeDelayWarningThrottler)) {
|
||||||
|
db.log.Warn("Write delay duration exceeds the threshold (35% of the time) in the last minute")
|
||||||
|
lastWriteDelay = time.Now()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
delaystats[0], delaystats[1] = delayN, duration.Nanoseconds()
|
||||||
|
|
||||||
// Retrieve the database iostats.
|
// Retrieve the database iostats.
|
||||||
ioStats, err := db.db.GetProperty("leveldb.iostats")
|
ioStats, err := db.db.GetProperty("leveldb.iostats")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
Loading…
Reference in New Issue