read_bench.go
read_bench.go - Overview
This file implements a command-line tool for benchmarking read performance in a Badger database. It supports random reads using multiple goroutines, full database scans using iterators, and allows configuration of cache sizes and read-only mode.
Detailed Documentation
var readBenchCmd
var readBenchCmd = &cobra.Command{
Use: "read",
Short: "Read data from Badger randomly to benchmark read speed.",
Long: `
This command reads data from existing Badger database randomly using multiple go routines.`,
RunE: readBench,
}
- Purpose: Defines a cobra command
readBenchCmd
for benchmarking Badger read performance. - Properties:
Use
: The command's name is "read".Short
: A short description of the command.Long
: A longer description of the command.RunE
: Specifies the functionreadBench
to execute when the command is run.
var sizeRead
var sizeRead atomic.Uint64 // will store size read till now
- Purpose: An atomic unsigned 64-bit integer to store the total size of data read during the benchmark.
var entriesRead
var entriesRead atomic.Uint64 // will store entries read till now
- Purpose: An atomic unsigned 64-bit integer to store the total number of entries read during the benchmark.
var startTime
var startTime time.Time // start time of read benchmarking
- Purpose: Stores the starting time of the read benchmark.
var ro
var ro = struct {
blockCacheSize int64
indexCacheSize int64
sampleSize int
keysOnly bool
readOnly bool
fullScan bool
}{}
- Purpose: Defines a struct
ro
to hold various read options for the benchmark. - Fields:
blockCacheSize
: Size of the block cache in MB.indexCacheSize
: Size of the index cache in MB.sampleSize
: Number of keys to sample for random lookups.keysOnly
: Flag to indicate if only keys should be read (values skipped).readOnly
: Flag to open the database in read-only mode.fullScan
: Flag to perform a full database scan.
func init()
func init() {
benchCmd.AddCommand(readBenchCmd)
readBenchCmd.Flags().IntVarP(
&numGoroutines, "goroutines", "g", 16, "Number of goroutines to run for reading.")
readBenchCmd.Flags().StringVarP(
&duration, "duration", "d", "1m", "How long to run the benchmark.")
readBenchCmd.Flags().IntVar(
&ro.sampleSize, "sample-size", 1000000, "Keys sample size to be used for random lookup.")
readBenchCmd.Flags().BoolVar(
&ro.keysOnly, "keys-only", false, "If false, values will also be read.")
readBenchCmd.Flags().BoolVar(
&ro.readOnly, "read-only", true, "If true, DB will be opened in read only mode.")
readBenchCmd.Flags().BoolVar(
&ro.fullScan, "full-scan", false, "If true, full db will be scanned using iterators.")
readBenchCmd.Flags().Int64Var(&ro.blockCacheSize, "block-cache", 256, "Max size of block cache in MB")
readBenchCmd.Flags().Int64Var(&ro.indexCacheSize, "index-cache", 0, "Max size of index cache in MB")
}
- Purpose: Initializes the
readBenchCmd
by adding it to thebenchCmd
and defining its command-line flags. - Functionality:
- Adds the
readBenchCmd
as a subcommand tobenchCmd
. - Defines flags for:
- Number of goroutines (
goroutines
,-g
) - Duration of the benchmark (
duration
,-d
) - Sample size for random lookups (
sample-size
) - Whether to read only keys (
keys-only
) - Whether to open the database in read-only mode (
read-only
) - Whether to perform a full scan (
full-scan
) - Block cache size (
block-cache
) - Index cache size (
index-cache
)
- Number of goroutines (
- Adds the
func fullScanDB(db *badger.DB)
func fullScanDB(db *badger.DB) {
txn := db.NewTransactionAt(math.MaxUint64, false)
defer txn.Discard()
startTime = time.Now()
// Print the stats
c := z.NewCloser(0)
c.AddRunning(1)
go printStats(c)
it := txn.NewIterator(badger.DefaultIteratorOptions)
defer it.Close()
for it.Rewind(); it.Valid(); it.Next() {
i := it.Item()
entriesRead.Add(1)
sizeRead.Add(uint64(i.EstimatedSize()))
}
}
- Purpose: Scans the entire Badger database using iterators and collects read statistics.
- Parameters:
db
: A pointer to the Badger database instance.
- Functionality:
- Creates a new transaction.
- Records the start time.
- Starts a goroutine to print statistics using
printStats
. - Creates an iterator with default options.
- Iterates through all items in the database:
- Increments the
entriesRead
counter. - Adds the estimated size of each item to the
sizeRead
counter.
- Increments the
func readBench(cmd *cobra.Command, args []string) error
func readBench(cmd *cobra.Command, args []string) error {
rand.Seed(time.Now().Unix())
dur, err := time.ParseDuration(duration)
if err != nil {
return y.Wrapf(err, "unable to parse duration")
}
y.AssertTrue(numGoroutines > 0)
opt := badger.DefaultOptions(sstDir).
WithValueDir(vlogDir).
WithReadOnly(ro.readOnly).
WithBlockCacheSize(ro.blockCacheSize << 20).
WithIndexCacheSize(ro.indexCacheSize << 20)
fmt.Printf("Opening badger with options = %+v\n", opt)
db, err := badger.OpenManaged(opt)
if err != nil {
return y.Wrapf(err, "unable to open DB")
}
defer db.Close()
fmt.Println("*********************************************************")
fmt.Println("Starting to benchmark Reads")
fmt.Println("*********************************************************")
// if fullScan is true then do a complete scan of the db and return
if ro.fullScan {
fullScanDB(db)
return nil
}
readTest(db, dur)
return nil
}
- Purpose: Implements the main logic for the read benchmark command.
- Parameters:
cmd
: A pointer to the cobra command.args
: Command-line arguments.
- Returns: An error, if any.
- Functionality:
- Seeds the random number generator.
- Parses the duration from the command-line arguments.
- Creates Badger database options based on command-line flags.
- Opens the Badger database.
- If
ro.fullScan
is true, performs a full database scan usingfullScanDB
and returns. - Otherwise, calls
readTest
to perform the read benchmark.
func printStats(c *z.Closer)
func printStats(c *z.Closer) {
defer c.Done()
t := time.NewTicker(time.Second)
defer t.Stop()
for {
select {
case <-c.HasBeenClosed():
return
case <-t.C:
dur := time.Since(startTime)
sz := sizeRead.Load()
entries := entriesRead.Load()
bytesRate := sz / uint64(dur.Seconds())
entriesRate := entries / uint64(dur.Seconds())
fmt.Printf("Time elapsed: %s, bytes read: %s, speed: %s/sec, "+
"entries read: %d, speed: %d/sec\n", y.FixedDuration(time.Since(startTime)),
humanize.IBytes(sz), humanize.IBytes(bytesRate), entries, entriesRate)
}
}
}
- Purpose: Prints read statistics periodically.
- Parameters:
c
: A pointer to az.Closer
for managing the goroutine's lifecycle.
- Functionality:
- Creates a ticker that fires every second.
- In a loop:
- If the closer is closed, exits the goroutine.
- Otherwise, calculates and prints the elapsed time, total bytes read, read speed (bytes/sec), total entries read, and read speed (entries/sec).
func readKeys(db *badger.DB, c *z.Closer, keys [][]byte)
func readKeys(db *badger.DB, c *z.Closer, keys [][]byte) {
defer c.Done()
r := rand.New(rand.NewSource(time.Now().Unix()))
for {
select {
case <-c.HasBeenClosed():
return
default:
key := keys[r.Int31n(int32(len(keys)))]
sizeRead.Add(lookupForKey(db, key))
entriesRead.Add(1)
}
}
}
- Purpose: Reads keys from the database randomly.
- Parameters:
db
: A pointer to the Badger database instance.c
: A pointer to az.Closer
for managing the goroutine's lifecycle.keys
: A slice of byte slices representing the keys to read.
- Functionality:
- Creates a new random number generator.
- In a loop:
- If the closer is closed, exits the goroutine.
- Otherwise, selects a random key from the
keys
slice. - Looks up the value for the selected key using
lookupForKey
. - Adds the size of the read value to
sizeRead
. - Increments the
entriesRead
counter.
func lookupForKey(db *badger.DB, key []byte) (sz uint64)
func lookupForKey(db *badger.DB, key []byte) (sz uint64) {
err := db.View(func(txn *badger.Txn) error {
iopt := badger.DefaultIteratorOptions
iopt.AllVersions = true
iopt.PrefetchValues = false
it := txn.NewKeyIterator(key, iopt)
defer it.Close()
cnt := 0
for it.Seek(key); it.Valid(); it.Next() {
itm := it.Item()
sz += uint64(itm.EstimatedSize())
cnt++
if cnt == 10 {
break
}
}
return nil
})
y.Check(err)
return
}
- Purpose: Looks up a key in the database and returns the estimated size of the item.
- Parameters:
db
: A pointer to the Badger database instance.key
: The key to look up.
- Returns: The estimated size of the item.
- Functionality:
- Starts a read-only transaction.
- Creates a new
KeyIterator
withAllVersions
set to true andPrefetchValues
set to false. - Seeks to the provided
key
- Iterates through a maximum of 10 versions of the key
- Accumulates the size of each item in the
sz
variable.
func getSampleKeys(db *badger.DB, sampleSize int) ([][]byte, error)
func getSampleKeys(db *badger.DB, sampleSize int) ([][]byte, error) {
var keys [][]byte
count := 0
stream := db.NewStreamAt(math.MaxUint64)
// overide stream.KeyToList as we only want keys. Also
// we can take only first version for the key.
stream.KeyToList = func(key []byte, itr *badger.Iterator) (*pb.KVList, error) {
l := &pb.KVList{}
// Since stream framework copies the item's key while calling
// KeyToList, we can directly append key to list.
l.Kv = append(l.Kv, &pb.KV{Key: key})
return l, nil
}
errStop := errors.New("Stop iterating")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
stream.Send = func(buf *z.Buffer) error {
if count >= ro.sampleSize {
return nil
}
err := buf.SliceIterate(func(s []byte) error {
var kv pb.KV
if err := proto.Unmarshal(s, &kv); err != nil {
return err
}
keys = append(keys, kv.Key)
count++
if count >= sampleSize {
cancel()
return errStop
}
return nil
})
if err == errStop || err == nil {
return nil
}
return err
}
if err := stream.Orchestrate(ctx); err != nil && err != context.Canceled {
return nil, err
}
// Shuffle keys before returning to minimise locality
// of keys coming from stream framework.
rand.Shuffle(len(keys), func(i, j int) {
keys[i], keys[j] = keys[j], keys[i]
})
return keys, nil
}
- Purpose: Retrieves a sample of keys from the database using the stream framework.
- Parameters:
db
: A pointer to the Badger database instance.sampleSize
: The number of keys to sample.
- Returns:
- A slice of byte slices representing the sampled keys.
- An error, if any.
- Functionality:
- Creates a new stream.
- Overrides the
KeyToList
function to only extract keys from the stream. - Defines a
Send
function that unmarshals the data, appends keys to a list, and cancels the context when the sample size is reached. - Orchestrates the stream and handles potential errors.
- Shuffles the keys to minimize locality.
Code Examples
None.