Skip to main content

pick_table_bench.go

pick_table_bench.go - Overview

This file defines a benchmark command picktable for simulating and benchmarking the pickTables function used in BadgerDB iterators. It focuses on evaluating the performance of table selection based on key prefixes.

Detailed Documentation

pickBenchCmd

var pickBenchCmd = &cobra.Command{
Use: "picktable",
Short: "Benchmark pick tables.",
Long: `This command simulates pickTables used in iterators.`,
RunE: pickTableBench,
}
  • Purpose: Defines a cobra command named picktable to benchmark table picking logic.
    • Use: Command name is "picktable".
    • Short: Short description: "Benchmark pick tables."
    • Long: Long description explaining it simulates pickTables used in iterators.
    • RunE: Specifies the function to execute when the command is run, which is pickTableBench.

pickOpts

var pickOpts = struct {
readOnly bool
sampleSize int
cpuprofile string
}{}
  • Purpose: Defines a struct to hold options for the picktable benchmark.
    • readOnly: A boolean indicating whether the database should be opened in read-only mode.
    • sampleSize: An integer representing the sample size of keys used for lookup.
    • cpuprofile: A string specifying the file path for CPU profiling.

keys

var keys    [][]byte
  • Purpose: Declares a global variable keys to store a slice of byte slices, presumably keys used in the benchmark.

handler

var handler levelHandler
  • Purpose: Declares a global variable handler of type levelHandler, likely used to manage the tables during the benchmark.

init()

func init() {
benchCmd.AddCommand(pickBenchCmd)
pickBenchCmd.Flags().BoolVar(
&pickOpts.readOnly, "read-only", true, "If true, DB will be opened in read only mode.")
pickBenchCmd.Flags().IntVar(
&pickOpts.sampleSize, "sample-size", 1000000, "Sample size of keys to be used for lookup.")
pickBenchCmd.Flags().StringVar(
&pickOpts.cpuprofile, "cpuprofile", "", "Write CPU profile to file.")
}
  • Purpose: Initializes the pickBenchCmd by adding it to the benchCmd and defining flags for it.
    • Adds the pickBenchCmd to the benchCmd.
    • Defines a boolean flag read-only for the pickOpts.readOnly field, defaulting to true.
    • Defines an integer flag sample-size for the pickOpts.sampleSize field, defaulting to 1000000.
    • Defines a string flag cpuprofile for the pickOpts.cpuprofile field.

pickTableBench(cmd *cobra.Command, args []string) error

func pickTableBench(cmd *cobra.Command, args []string) error {
opt := badger.DefaultOptions(sstDir).
WithValueDir(vlogDir).
WithReadOnly(pickOpts.readOnly)
fmt.Printf("Opening badger with options = %+v\n", opt)
db, err := badger.OpenManaged(opt)
if err != nil {
return y.Wrapf(err, "unable to open DB")
}
defer func() {
y.Check(db.Close())
}()

boundaries := getBoundaries(db)
tables := genTables(boundaries)
defer func() {
for _, tbl := range tables {
if err := tbl.DecrRef(); err != nil {
panic(err)
}
}
}()
handler.init(tables)
keys, err = getSampleKeys(db, pickOpts.sampleSize)
y.Check(err)
fmt.Println("Running benchmark...")
fmt.Println("***** BenchmarkPickTables *****")
fmt.Println(testing.Benchmark(BenchmarkPickTables))
fmt.Println("*******************************")
return nil
}
  • Purpose: Executes the pick table benchmark.
    • Parameters:
      • cmd: A pointer to a cobra.Command object.
      • args: A slice of strings representing command line arguments.
    • Returns: An error, if any occurred during the benchmark.
    • Opens a BadgerDB instance with specified options.
    • Retrieves table boundaries, generates mock tables, initializes the level handler, gets sample keys, and runs the BenchmarkPickTables benchmark.

BenchmarkPickTables(b *testing.B)

func BenchmarkPickTables(b *testing.B) {
if len(pickOpts.cpuprofile) > 0 {
f, err := os.Create(pickOpts.cpuprofile)
y.Check(err)
err = pprof.StartCPUProfile(f)
y.Check(err)
defer pprof.StopCPUProfile()
}
b.ResetTimer()
iopts := iteratorOptions{prefixIsKey: true}
for i := 0; i < b.N; i++ {
for _, key := range keys {
iopts.Prefix = key
_ = handler.pickTables(iopts)
}
}
}
  • Purpose: Benchmarks the pickTables function.
    • Parameters:
      • b: A pointer to a testing.B object, used for benchmarking.
    • If pickOpts.cpuprofile is set, it starts CPU profiling.
    • Resets the benchmark timer.
    • Iterates b.N times, and for each iteration, it iterates over the keys and calls handler.pickTables with the current key as the prefix.

iteratorOptions

type iteratorOptions struct {
prefixIsKey bool // If set, use the prefix for bloom filter lookup.
Prefix []byte // Only iterate over this given prefix.
SinceTs uint64 // Only read data that has version > SinceTs.
}
  • Purpose: Defines options for iterators, specifically related to prefix-based filtering.
    • prefixIsKey: A boolean indicating whether the prefix should be used for bloom filter lookup.
    • Prefix: A byte slice representing the prefix to iterate over.
    • SinceTs: A uint64 representing the timestamp; only data with a version greater than this timestamp will be read.

compareToPrefix(key []byte) int

func (opt *iteratorOptions) compareToPrefix(key []byte) int {
// We should compare key without timestamp. For example key - a[TS] might be > "aa" prefix.
key = y.ParseKey(key)
if len(key) > len(opt.Prefix) {
key = key[:len(opt.Prefix)]
}
return bytes.Compare(key, opt.Prefix)
}
  • Purpose: Compares a key to the prefix, considering only the part of the key relevant to the prefix.
    • Parameters:
      • key: A byte slice representing the key to compare.
    • Returns: An integer representing the result of the comparison (similar to bytes.Compare).
    • Parses the key to remove the timestamp.
    • Shortens the key to the length of the prefix if it's longer.
    • Compares the key and the prefix using bytes.Compare.

levelHandler

type levelHandler struct {
tables []*table.Table
}
  • Purpose: Manages tables at a specific level.
    • tables: A slice of pointers to table.Table objects.

init(tables []*table.Table)

func (s *levelHandler) init(tables []*table.Table) {
fmt.Println("Initializing level handler...")
s.tables = tables
}
  • Purpose: Initializes the levelHandler with a slice of tables.
    • Parameters:
      • tables: A slice of pointers to table.Table objects.
    • Assigns the provided tables to the s.tables field.

pickTables(opt iteratorOptions) []*table.Table

func (s *levelHandler) pickTables(opt iteratorOptions) []*table.Table {
filterTables := func(tables []*table.Table) []*table.Table {
if opt.SinceTs > 0 {
tmp := tables[:0]
for _, t := range tables {
if t.MaxVersion() &lt; opt.SinceTs {
continue
}
tmp = append(tmp, t)
}
tables = tmp
}
return tables
}

all := s.tables
if len(opt.Prefix) == 0 {
out := make([]*table.Table, len(all))
copy(out, all)
return filterTables(out)
}
sIdx := sort.Search(len(all), func(i int) bool {
// table.Biggest >= opt.prefix
// if opt.Prefix &lt; table.Biggest, then surely it is not in any of the preceding tables.
return opt.compareToPrefix(all[i].Biggest()) >= 0
})
if sIdx == len(all) {
// Not found.
return []*table.Table{}
}

filtered := all[sIdx:]
if !opt.prefixIsKey {
eIdx := sort.Search(len(filtered), func(i int) bool {
return opt.compareToPrefix(filtered[i].Smallest()) > 0
})
out := make([]*table.Table, len(filtered[:eIdx]))
copy(out, filtered[:eIdx])
return filterTables(out)
}

// opt.prefixIsKey == true. This code is optimizing for opt.prefixIsKey part.
var out []*table.Table
// hash := y.Hash(opt.Prefix)
for _, t := range filtered {
// When we encounter the first table whose smallest key is higher than opt.Prefix, we can
// stop. This is an IMPORTANT optimization, just considering how often we call
// NewKeyIterator.
if opt.compareToPrefix(t.Smallest()) > 0 {
// if table.Smallest > opt.Prefix, then this and all tables after this can be ignored.
break
}
out = append(out, t)
}
return filterTables(out)
}
  • Purpose: Selects tables based on the provided iterator options, particularly the prefix.
    • Parameters:
      • opt: iteratorOptions which contains options for picking tables.
    • Returns: A slice of pointers to table.Table objects that match the criteria.
    • Implements logic similar to the master branch, filtering tables based on prefix and timestamp.

genTables(boundaries [][]byte) []*table.Table

func genTables(boundaries [][]byte) []*table.Table {
buildTable := func(k1, k2 []byte) *table.Table {
opts := table.Options{
ChkMode: options.NoVerification,
}
b := table.NewTableBuilder(opts)
defer b.Close()
// Add one key so that we can open this table.
b.Add(y.KeyWithTs(k1, 1), y.ValueStruct{}, 0)
b.Add(y.KeyWithTs(k2, 1), y.ValueStruct{}, 0)
tab, err := table.OpenInMemoryTable(b.Finish(), 0, &opts)
y.Check(err)
return tab
}

sort.Slice(boundaries, func(i, j int) bool {
return bytes.Compare(boundaries[i], boundaries[j]) &lt; 0
})
out := make([]*table.Table, 0, len(boundaries))
for i := range boundaries {
var j int
if i != 0 {
j = i - 1
}
out = append(out, buildTable(boundaries[i], boundaries[j]))
}
fmt.Printf("Created %d mock tables.\n", len(out))
return out
}
  • Purpose: Generates mock tables based on the given boundaries.
    • Parameters:
      • boundaries: A slice of byte slices representing the boundaries of the tables.
    • Returns: A slice of pointers to table.Table objects.
    • Sorts the boundaries, builds a table for each boundary (using adjacent boundaries as min/max keys), and returns the resulting slice of tables.

getBoundaries(db *badger.DB) [][]byte

func getBoundaries(db *badger.DB) [][]byte {
fmt.Println("Getting the table boundaries...")
tables := db.Tables()
out := make([][]byte, 0, 2*len(tables))
for _, t := range tables {
out = append(out, t.Left, t.Right)
}
return out
}
  • Purpose: Retrieves the boundaries (left and right keys) of the existing tables in the database.
    • Parameters:
      • db: A pointer to a badger.DB object.
    • Returns: A slice of byte slices, where each pair of elements represents the left and right boundaries of a table.
    • Gets all tables from the DB, then iterates through them and saves the left and right keys of the table to the out slice.

Code Examples

None

Getting Started Relevance