read_bench.go

read_bench.go - Overview

This file implements a command-line tool for benchmarking read performance in a Badger database. It supports random reads using multiple goroutines, full database scans using iterators, and allows configuration of cache sizes and read-only mode.

Detailed Documentation

`var readBenchCmd`

var readBenchCmd = &cobra.Command{
	Use:   "read",
	Short: "Read data from Badger randomly to benchmark read speed.",
	Long: `
This command reads data from existing Badger database randomly using multiple go routines.`,
	RunE: readBench,
}

Purpose: Defines a cobra command readBenchCmd for benchmarking Badger read performance.
Properties:
- Use: The command's name is "read".
- Short: A short description of the command.
- Long: A longer description of the command.
- RunE: Specifies the function readBench to execute when the command is run.

`var sizeRead`

var sizeRead atomic.Uint64 // will store size read till now

Purpose: An atomic unsigned 64-bit integer to store the total size of data read during the benchmark.

`var entriesRead`

var entriesRead atomic.Uint64 // will store entries read till now

Purpose: An atomic unsigned 64-bit integer to store the total number of entries read during the benchmark.

`var startTime`

var startTime time.Time     // start time of read benchmarking

Purpose: Stores the starting time of the read benchmark.

`var ro`

var ro = struct {
		blockCacheSize int64
		indexCacheSize int64

		sampleSize int
		keysOnly   bool
		readOnly   bool
		fullScan   bool
	}{}

Purpose: Defines a struct ro to hold various read options for the benchmark.
Fields:
- blockCacheSize: Size of the block cache in MB.
- indexCacheSize: Size of the index cache in MB.
- sampleSize: Number of keys to sample for random lookups.
- keysOnly: Flag to indicate if only keys should be read (values skipped).
- readOnly: Flag to open the database in read-only mode.
- fullScan: Flag to perform a full database scan.

`func init()`

func init() {
	benchCmd.AddCommand(readBenchCmd)
	readBenchCmd.Flags().IntVarP(
		&numGoroutines, "goroutines", "g", 16, "Number of goroutines to run for reading.")
	readBenchCmd.Flags().StringVarP(
		&duration, "duration", "d", "1m", "How long to run the benchmark.")
	readBenchCmd.Flags().IntVar(
		&ro.sampleSize, "sample-size", 1000000, "Keys sample size to be used for random lookup.")
	readBenchCmd.Flags().BoolVar(
		&ro.keysOnly, "keys-only", false, "If false, values will also be read.")
	readBenchCmd.Flags().BoolVar(
		&ro.readOnly, "read-only", true, "If true, DB will be opened in read only mode.")
	readBenchCmd.Flags().BoolVar(
		&ro.fullScan, "full-scan", false, "If true, full db will be scanned using iterators.")
	readBenchCmd.Flags().Int64Var(&ro.blockCacheSize, "block-cache", 256, "Max size of block cache in MB")
	readBenchCmd.Flags().Int64Var(&ro.indexCacheSize, "index-cache", 0, "Max size of index cache in MB")
}

Purpose: Initializes the readBenchCmd by adding it to the benchCmd and defining its command-line flags.
Functionality:
- Adds the readBenchCmd as a subcommand to benchCmd.
- Defines flags for:
  - Number of goroutines (goroutines, -g)
  - Duration of the benchmark (duration, -d)
  - Sample size for random lookups (sample-size)
  - Whether to read only keys (keys-only)
  - Whether to open the database in read-only mode (read-only)
  - Whether to perform a full scan (full-scan)
  - Block cache size (block-cache)
  - Index cache size (index-cache)

`func fullScanDB(db *badger.DB)`

func fullScanDB(db *badger.DB) {
	txn := db.NewTransactionAt(math.MaxUint64, false)
	defer txn.Discard()

	startTime = time.Now()
	// Print the stats
	c := z.NewCloser(0)
	c.AddRunning(1)
	go printStats(c)

	it := txn.NewIterator(badger.DefaultIteratorOptions)
	defer it.Close()
	for it.Rewind(); it.Valid(); it.Next() {
		i := it.Item()
		entriesRead.Add(1)
		sizeRead.Add(uint64(i.EstimatedSize()))
	}
}

Purpose: Scans the entire Badger database using iterators and collects read statistics.
Parameters:
- db: A pointer to the Badger database instance.
Functionality:
- Creates a new transaction.
- Records the start time.
- Starts a goroutine to print statistics using printStats.
- Creates an iterator with default options.
- Iterates through all items in the database:
  - Increments the entriesRead counter.
  - Adds the estimated size of each item to the sizeRead counter.

`func readBench(cmd *cobra.Command, args []string) error`

func readBench(cmd *cobra.Command, args []string) error {
	rand.Seed(time.Now().Unix())

	dur, err := time.ParseDuration(duration)
	if err != nil {
		return y.Wrapf(err, "unable to parse duration")
	}
	y.AssertTrue(numGoroutines > 0)
	opt := badger.DefaultOptions(sstDir).
		WithValueDir(vlogDir).
		WithReadOnly(ro.readOnly).
		WithBlockCacheSize(ro.blockCacheSize << 20).
		WithIndexCacheSize(ro.indexCacheSize << 20)
	fmt.Printf("Opening badger with options = %+v\n", opt)
	db, err := badger.OpenManaged(opt)
	if err != nil {
		return y.Wrapf(err, "unable to open DB")
	}
	defer db.Close()

	fmt.Println("*********************************************************")
	fmt.Println("Starting to benchmark Reads")
	fmt.Println("*********************************************************")

	// if fullScan is true then do a complete scan of the db and return
	if ro.fullScan {
		fullScanDB(db)
		return nil
	}
	readTest(db, dur)
	return nil
}

Purpose: Implements the main logic for the read benchmark command.
Parameters:
- cmd: A pointer to the cobra command.
- args: Command-line arguments.
Returns: An error, if any.
Functionality:
- Seeds the random number generator.
- Parses the duration from the command-line arguments.
- Creates Badger database options based on command-line flags.
- Opens the Badger database.
- If ro.fullScan is true, performs a full database scan using fullScanDB and returns.
- Otherwise, calls readTest to perform the read benchmark.

`func printStats(c *z.Closer)`

func printStats(c *z.Closer) {
	defer c.Done()

	t := time.NewTicker(time.Second)
	defer t.Stop()
	for {
		select {
		case <-c.HasBeenClosed():
			return
		case <-t.C:
			dur := time.Since(startTime)
			sz := sizeRead.Load()
			entries := entriesRead.Load()
			bytesRate := sz / uint64(dur.Seconds())
			entriesRate := entries / uint64(dur.Seconds())
			fmt.Printf("Time elapsed: %s, bytes read: %s, speed: %s/sec, "+
				"entries read: %d, speed: %d/sec\n", y.FixedDuration(time.Since(startTime)),
				humanize.IBytes(sz), humanize.IBytes(bytesRate), entries, entriesRate)
		}
	}
}

Purpose: Prints read statistics periodically.
Parameters:
- c: A pointer to a z.Closer for managing the goroutine's lifecycle.
Functionality:
- Creates a ticker that fires every second.
- In a loop:
  - If the closer is closed, exits the goroutine.
  - Otherwise, calculates and prints the elapsed time, total bytes read, read speed (bytes/sec), total entries read, and read speed (entries/sec).

`func readKeys(db badger.DB, c z.Closer, keys [][]byte)`

func readKeys(db *badger.DB, c *z.Closer, keys [][]byte) {
	defer c.Done()
	r := rand.New(rand.NewSource(time.Now().Unix()))
	for {
		select {
		case <-c.HasBeenClosed():
			return
		default:
			key := keys[r.Int31n(int32(len(keys)))]
			sizeRead.Add(lookupForKey(db, key))
			entriesRead.Add(1)
		}
	}
}

Purpose: Reads keys from the database randomly.
Parameters:
- db: A pointer to the Badger database instance.
- c: A pointer to a z.Closer for managing the goroutine's lifecycle.
- keys: A slice of byte slices representing the keys to read.
Functionality:
- Creates a new random number generator.
- In a loop:
  - If the closer is closed, exits the goroutine.
  - Otherwise, selects a random key from the keys slice.
  - Looks up the value for the selected key using lookupForKey.
  - Adds the size of the read value to sizeRead.
  - Increments the entriesRead counter.

`func lookupForKey(db *badger.DB, key []byte) (sz uint64)`

func lookupForKey(db *badger.DB, key []byte) (sz uint64) {
	err := db.View(func(txn *badger.Txn) error {
		iopt := badger.DefaultIteratorOptions
		iopt.AllVersions = true
		iopt.PrefetchValues = false
		it := txn.NewKeyIterator(key, iopt)
		defer it.Close()

		cnt := 0
		for it.Seek(key); it.Valid(); it.Next() {
			itm := it.Item()
			sz += uint64(itm.EstimatedSize())
			cnt++
			if cnt == 10 {
				break
			}
		}
		return nil
	})
	y.Check(err)
	return
}

Purpose: Looks up a key in the database and returns the estimated size of the item.
Parameters:
- db: A pointer to the Badger database instance.
- key: The key to look up.
Returns: The estimated size of the item.
Functionality:
- Starts a read-only transaction.
- Creates a new KeyIterator with AllVersions set to true and PrefetchValues set to false.
- Seeks to the provided key
- Iterates through a maximum of 10 versions of the key
- Accumulates the size of each item in the sz variable.

`func getSampleKeys(db *badger.DB, sampleSize int) ([][]byte, error)`

func getSampleKeys(db *badger.DB, sampleSize int) ([][]byte, error) {
	var keys [][]byte
	count := 0
	stream := db.NewStreamAt(math.MaxUint64)

	// overide stream.KeyToList as we only want keys. Also
	// we can take only first version for the key.
	stream.KeyToList = func(key []byte, itr *badger.Iterator) (*pb.KVList, error) {
		l := &pb.KVList{}
		// Since stream framework copies the item's key while calling
		// KeyToList, we can directly append key to list.
		l.Kv = append(l.Kv, &pb.KV{Key: key})
		return l, nil
	}

	errStop := errors.New("Stop iterating")
	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()
	stream.Send = func(buf *z.Buffer) error {
		if count >= ro.sampleSize {
			return nil
		}
		err := buf.SliceIterate(func(s []byte) error {
			var kv pb.KV
			if err := proto.Unmarshal(s, &kv); err != nil {
				return err
			}
			keys = append(keys, kv.Key)
			count++
			if count >= sampleSize {
				cancel()
				return errStop
			}
			return nil
		})
		if err == errStop || err == nil {
			return nil
		}
		return err
	}

	if err := stream.Orchestrate(ctx); err != nil && err != context.Canceled {
		return nil, err
	}

	// Shuffle keys before returning to minimise locality
	// of keys coming from stream framework.
	rand.Shuffle(len(keys), func(i, j int) {
		keys[i], keys[j] = keys[j], keys[i]
	})

	return keys, nil
}

Purpose: Retrieves a sample of keys from the database using the stream framework.
Parameters:
- db: A pointer to the Badger database instance.
- sampleSize: The number of keys to sample.
Returns:
- A slice of byte slices representing the sampled keys.
- An error, if any.
Functionality:
- Creates a new stream.
- Overrides the KeyToList function to only extract keys from the stream.
- Defines a Send function that unmarshals the data, appends keys to a list, and cancels the context when the sample size is reached.
- Orchestrates the stream and handles potential errors.
- Shuffles the keys to minimize locality.

Code Examples

None.

read_bench.go - Overview​

Detailed Documentation​

var readBenchCmd​

var sizeRead​

var entriesRead​

var startTime​

var ro​

func init()​

func fullScanDB(db *badger.DB)​

func readBench(cmd *cobra.Command, args []string) error​

func printStats(c *z.Closer)​

func readKeys(db *badger.DB, c *z.Closer, keys [][]byte)​

func lookupForKey(db *badger.DB, key []byte) (sz uint64)​

func getSampleKeys(db *badger.DB, sampleSize int) ([][]byte, error)​

Code Examples​

Getting Started Relevance​