Skip to main content

read_bench.go

read_bench.go - Overview

This file implements a command-line tool for benchmarking read performance in a Badger database. It supports random reads using multiple goroutines, full database scans using iterators, and allows configuration of cache sizes and read-only mode.

Detailed Documentation

var readBenchCmd

var readBenchCmd = &cobra.Command{
Use: "read",
Short: "Read data from Badger randomly to benchmark read speed.",
Long: `
This command reads data from existing Badger database randomly using multiple go routines.`,
RunE: readBench,
}
  • Purpose: Defines a cobra command readBenchCmd for benchmarking Badger read performance.
  • Properties:
    • Use: The command's name is "read".
    • Short: A short description of the command.
    • Long: A longer description of the command.
    • RunE: Specifies the function readBench to execute when the command is run.

var sizeRead

var sizeRead atomic.Uint64 // will store size read till now
  • Purpose: An atomic unsigned 64-bit integer to store the total size of data read during the benchmark.

var entriesRead

var entriesRead atomic.Uint64 // will store entries read till now
  • Purpose: An atomic unsigned 64-bit integer to store the total number of entries read during the benchmark.

var startTime

var startTime time.Time     // start time of read benchmarking
  • Purpose: Stores the starting time of the read benchmark.

var ro

var ro = struct {
blockCacheSize int64
indexCacheSize int64

sampleSize int
keysOnly bool
readOnly bool
fullScan bool
}{}
  • Purpose: Defines a struct ro to hold various read options for the benchmark.
  • Fields:
    • blockCacheSize: Size of the block cache in MB.
    • indexCacheSize: Size of the index cache in MB.
    • sampleSize: Number of keys to sample for random lookups.
    • keysOnly: Flag to indicate if only keys should be read (values skipped).
    • readOnly: Flag to open the database in read-only mode.
    • fullScan: Flag to perform a full database scan.

func init()

func init() {
benchCmd.AddCommand(readBenchCmd)
readBenchCmd.Flags().IntVarP(
&numGoroutines, "goroutines", "g", 16, "Number of goroutines to run for reading.")
readBenchCmd.Flags().StringVarP(
&duration, "duration", "d", "1m", "How long to run the benchmark.")
readBenchCmd.Flags().IntVar(
&ro.sampleSize, "sample-size", 1000000, "Keys sample size to be used for random lookup.")
readBenchCmd.Flags().BoolVar(
&ro.keysOnly, "keys-only", false, "If false, values will also be read.")
readBenchCmd.Flags().BoolVar(
&ro.readOnly, "read-only", true, "If true, DB will be opened in read only mode.")
readBenchCmd.Flags().BoolVar(
&ro.fullScan, "full-scan", false, "If true, full db will be scanned using iterators.")
readBenchCmd.Flags().Int64Var(&ro.blockCacheSize, "block-cache", 256, "Max size of block cache in MB")
readBenchCmd.Flags().Int64Var(&ro.indexCacheSize, "index-cache", 0, "Max size of index cache in MB")
}
  • Purpose: Initializes the readBenchCmd by adding it to the benchCmd and defining its command-line flags.
  • Functionality:
    • Adds the readBenchCmd as a subcommand to benchCmd.
    • Defines flags for:
      • Number of goroutines (goroutines, -g)
      • Duration of the benchmark (duration, -d)
      • Sample size for random lookups (sample-size)
      • Whether to read only keys (keys-only)
      • Whether to open the database in read-only mode (read-only)
      • Whether to perform a full scan (full-scan)
      • Block cache size (block-cache)
      • Index cache size (index-cache)

func fullScanDB(db *badger.DB)

func fullScanDB(db *badger.DB) {
txn := db.NewTransactionAt(math.MaxUint64, false)
defer txn.Discard()

startTime = time.Now()
// Print the stats
c := z.NewCloser(0)
c.AddRunning(1)
go printStats(c)

it := txn.NewIterator(badger.DefaultIteratorOptions)
defer it.Close()
for it.Rewind(); it.Valid(); it.Next() {
i := it.Item()
entriesRead.Add(1)
sizeRead.Add(uint64(i.EstimatedSize()))
}
}
  • Purpose: Scans the entire Badger database using iterators and collects read statistics.
  • Parameters:
    • db: A pointer to the Badger database instance.
  • Functionality:
    • Creates a new transaction.
    • Records the start time.
    • Starts a goroutine to print statistics using printStats.
    • Creates an iterator with default options.
    • Iterates through all items in the database:
      • Increments the entriesRead counter.
      • Adds the estimated size of each item to the sizeRead counter.

func readBench(cmd *cobra.Command, args []string) error

func readBench(cmd *cobra.Command, args []string) error {
rand.Seed(time.Now().Unix())

dur, err := time.ParseDuration(duration)
if err != nil {
return y.Wrapf(err, "unable to parse duration")
}
y.AssertTrue(numGoroutines > 0)
opt := badger.DefaultOptions(sstDir).
WithValueDir(vlogDir).
WithReadOnly(ro.readOnly).
WithBlockCacheSize(ro.blockCacheSize << 20).
WithIndexCacheSize(ro.indexCacheSize << 20)
fmt.Printf("Opening badger with options = %+v\n", opt)
db, err := badger.OpenManaged(opt)
if err != nil {
return y.Wrapf(err, "unable to open DB")
}
defer db.Close()

fmt.Println("*********************************************************")
fmt.Println("Starting to benchmark Reads")
fmt.Println("*********************************************************")

// if fullScan is true then do a complete scan of the db and return
if ro.fullScan {
fullScanDB(db)
return nil
}
readTest(db, dur)
return nil
}
  • Purpose: Implements the main logic for the read benchmark command.
  • Parameters:
    • cmd: A pointer to the cobra command.
    • args: Command-line arguments.
  • Returns: An error, if any.
  • Functionality:
    • Seeds the random number generator.
    • Parses the duration from the command-line arguments.
    • Creates Badger database options based on command-line flags.
    • Opens the Badger database.
    • If ro.fullScan is true, performs a full database scan using fullScanDB and returns.
    • Otherwise, calls readTest to perform the read benchmark.

func printStats(c *z.Closer)

func printStats(c *z.Closer) {
defer c.Done()

t := time.NewTicker(time.Second)
defer t.Stop()
for {
select {
case <-c.HasBeenClosed():
return
case <-t.C:
dur := time.Since(startTime)
sz := sizeRead.Load()
entries := entriesRead.Load()
bytesRate := sz / uint64(dur.Seconds())
entriesRate := entries / uint64(dur.Seconds())
fmt.Printf("Time elapsed: %s, bytes read: %s, speed: %s/sec, "+
"entries read: %d, speed: %d/sec\n", y.FixedDuration(time.Since(startTime)),
humanize.IBytes(sz), humanize.IBytes(bytesRate), entries, entriesRate)
}
}
}
  • Purpose: Prints read statistics periodically.
  • Parameters:
    • c: A pointer to a z.Closer for managing the goroutine's lifecycle.
  • Functionality:
    • Creates a ticker that fires every second.
    • In a loop:
      • If the closer is closed, exits the goroutine.
      • Otherwise, calculates and prints the elapsed time, total bytes read, read speed (bytes/sec), total entries read, and read speed (entries/sec).

func readKeys(db *badger.DB, c *z.Closer, keys [][]byte)

func readKeys(db *badger.DB, c *z.Closer, keys [][]byte) {
defer c.Done()
r := rand.New(rand.NewSource(time.Now().Unix()))
for {
select {
case <-c.HasBeenClosed():
return
default:
key := keys[r.Int31n(int32(len(keys)))]
sizeRead.Add(lookupForKey(db, key))
entriesRead.Add(1)
}
}
}
  • Purpose: Reads keys from the database randomly.
  • Parameters:
    • db: A pointer to the Badger database instance.
    • c: A pointer to a z.Closer for managing the goroutine's lifecycle.
    • keys: A slice of byte slices representing the keys to read.
  • Functionality:
    • Creates a new random number generator.
    • In a loop:
      • If the closer is closed, exits the goroutine.
      • Otherwise, selects a random key from the keys slice.
      • Looks up the value for the selected key using lookupForKey.
      • Adds the size of the read value to sizeRead.
      • Increments the entriesRead counter.

func lookupForKey(db *badger.DB, key []byte) (sz uint64)

func lookupForKey(db *badger.DB, key []byte) (sz uint64) {
err := db.View(func(txn *badger.Txn) error {
iopt := badger.DefaultIteratorOptions
iopt.AllVersions = true
iopt.PrefetchValues = false
it := txn.NewKeyIterator(key, iopt)
defer it.Close()

cnt := 0
for it.Seek(key); it.Valid(); it.Next() {
itm := it.Item()
sz += uint64(itm.EstimatedSize())
cnt++
if cnt == 10 {
break
}
}
return nil
})
y.Check(err)
return
}
  • Purpose: Looks up a key in the database and returns the estimated size of the item.
  • Parameters:
    • db: A pointer to the Badger database instance.
    • key: The key to look up.
  • Returns: The estimated size of the item.
  • Functionality:
    • Starts a read-only transaction.
    • Creates a new KeyIterator with AllVersions set to true and PrefetchValues set to false.
    • Seeks to the provided key
    • Iterates through a maximum of 10 versions of the key
    • Accumulates the size of each item in the sz variable.

func getSampleKeys(db *badger.DB, sampleSize int) ([][]byte, error)

func getSampleKeys(db *badger.DB, sampleSize int) ([][]byte, error) {
var keys [][]byte
count := 0
stream := db.NewStreamAt(math.MaxUint64)

// overide stream.KeyToList as we only want keys. Also
// we can take only first version for the key.
stream.KeyToList = func(key []byte, itr *badger.Iterator) (*pb.KVList, error) {
l := &pb.KVList{}
// Since stream framework copies the item's key while calling
// KeyToList, we can directly append key to list.
l.Kv = append(l.Kv, &pb.KV{Key: key})
return l, nil
}

errStop := errors.New("Stop iterating")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
stream.Send = func(buf *z.Buffer) error {
if count >= ro.sampleSize {
return nil
}
err := buf.SliceIterate(func(s []byte) error {
var kv pb.KV
if err := proto.Unmarshal(s, &kv); err != nil {
return err
}
keys = append(keys, kv.Key)
count++
if count >= sampleSize {
cancel()
return errStop
}
return nil
})
if err == errStop || err == nil {
return nil
}
return err
}

if err := stream.Orchestrate(ctx); err != nil && err != context.Canceled {
return nil, err
}

// Shuffle keys before returning to minimise locality
// of keys coming from stream framework.
rand.Shuffle(len(keys), func(i, j int) {
keys[i], keys[j] = keys[j], keys[i]
})

return keys, nil
}
  • Purpose: Retrieves a sample of keys from the database using the stream framework.
  • Parameters:
    • db: A pointer to the Badger database instance.
    • sampleSize: The number of keys to sample.
  • Returns:
    • A slice of byte slices representing the sampled keys.
    • An error, if any.
  • Functionality:
    • Creates a new stream.
    • Overrides the KeyToList function to only extract keys from the stream.
    • Defines a Send function that unmarshals the data, appends keys to a list, and cancels the context when the sample size is reached.
    • Orchestrates the stream and handles potential errors.
    • Shuffles the keys to minimize locality.

Code Examples

None.

Getting Started Relevance