pick_table_bench.go
pick_table_bench.go - Overview
This file defines a benchmark command picktable
for simulating and benchmarking the pickTables
function used in BadgerDB iterators. It focuses on evaluating the performance of table selection based on key prefixes.
Detailed Documentation
pickBenchCmd
var pickBenchCmd = &cobra.Command{
Use: "picktable",
Short: "Benchmark pick tables.",
Long: `This command simulates pickTables used in iterators.`,
RunE: pickTableBench,
}
- Purpose: Defines a cobra command named
picktable
to benchmark table picking logic.Use
: Command name is "picktable".Short
: Short description: "Benchmark pick tables."Long
: Long description explaining it simulatespickTables
used in iterators.RunE
: Specifies the function to execute when the command is run, which ispickTableBench
.
pickOpts
var pickOpts = struct {
readOnly bool
sampleSize int
cpuprofile string
}{}
- Purpose: Defines a struct to hold options for the
picktable
benchmark.readOnly
: A boolean indicating whether the database should be opened in read-only mode.sampleSize
: An integer representing the sample size of keys used for lookup.cpuprofile
: A string specifying the file path for CPU profiling.
keys
var keys [][]byte
- Purpose: Declares a global variable
keys
to store a slice of byte slices, presumably keys used in the benchmark.
handler
var handler levelHandler
- Purpose: Declares a global variable
handler
of typelevelHandler
, likely used to manage the tables during the benchmark.
init()
func init() {
benchCmd.AddCommand(pickBenchCmd)
pickBenchCmd.Flags().BoolVar(
&pickOpts.readOnly, "read-only", true, "If true, DB will be opened in read only mode.")
pickBenchCmd.Flags().IntVar(
&pickOpts.sampleSize, "sample-size", 1000000, "Sample size of keys to be used for lookup.")
pickBenchCmd.Flags().StringVar(
&pickOpts.cpuprofile, "cpuprofile", "", "Write CPU profile to file.")
}
- Purpose: Initializes the
pickBenchCmd
by adding it to thebenchCmd
and defining flags for it.- Adds the
pickBenchCmd
to thebenchCmd
. - Defines a boolean flag
read-only
for thepickOpts.readOnly
field, defaulting totrue
. - Defines an integer flag
sample-size
for thepickOpts.sampleSize
field, defaulting to 1000000. - Defines a string flag
cpuprofile
for thepickOpts.cpuprofile
field.
- Adds the
pickTableBench(cmd *cobra.Command, args []string) error
func pickTableBench(cmd *cobra.Command, args []string) error {
opt := badger.DefaultOptions(sstDir).
WithValueDir(vlogDir).
WithReadOnly(pickOpts.readOnly)
fmt.Printf("Opening badger with options = %+v\n", opt)
db, err := badger.OpenManaged(opt)
if err != nil {
return y.Wrapf(err, "unable to open DB")
}
defer func() {
y.Check(db.Close())
}()
boundaries := getBoundaries(db)
tables := genTables(boundaries)
defer func() {
for _, tbl := range tables {
if err := tbl.DecrRef(); err != nil {
panic(err)
}
}
}()
handler.init(tables)
keys, err = getSampleKeys(db, pickOpts.sampleSize)
y.Check(err)
fmt.Println("Running benchmark...")
fmt.Println("***** BenchmarkPickTables *****")
fmt.Println(testing.Benchmark(BenchmarkPickTables))
fmt.Println("*******************************")
return nil
}
- Purpose: Executes the pick table benchmark.
- Parameters:
cmd
: A pointer to acobra.Command
object.args
: A slice of strings representing command line arguments.
- Returns: An error, if any occurred during the benchmark.
- Opens a BadgerDB instance with specified options.
- Retrieves table boundaries, generates mock tables, initializes the level handler, gets sample keys, and runs the
BenchmarkPickTables
benchmark.
- Parameters:
BenchmarkPickTables(b *testing.B)
func BenchmarkPickTables(b *testing.B) {
if len(pickOpts.cpuprofile) > 0 {
f, err := os.Create(pickOpts.cpuprofile)
y.Check(err)
err = pprof.StartCPUProfile(f)
y.Check(err)
defer pprof.StopCPUProfile()
}
b.ResetTimer()
iopts := iteratorOptions{prefixIsKey: true}
for i := 0; i < b.N; i++ {
for _, key := range keys {
iopts.Prefix = key
_ = handler.pickTables(iopts)
}
}
}
- Purpose: Benchmarks the
pickTables
function.- Parameters:
b
: A pointer to atesting.B
object, used for benchmarking.
- If
pickOpts.cpuprofile
is set, it starts CPU profiling. - Resets the benchmark timer.
- Iterates
b.N
times, and for each iteration, it iterates over thekeys
and callshandler.pickTables
with the current key as the prefix.
- Parameters:
iteratorOptions
type iteratorOptions struct {
prefixIsKey bool // If set, use the prefix for bloom filter lookup.
Prefix []byte // Only iterate over this given prefix.
SinceTs uint64 // Only read data that has version > SinceTs.
}
- Purpose: Defines options for iterators, specifically related to prefix-based filtering.
prefixIsKey
: A boolean indicating whether the prefix should be used for bloom filter lookup.Prefix
: A byte slice representing the prefix to iterate over.SinceTs
: A uint64 representing the timestamp; only data with a version greater than this timestamp will be read.
compareToPrefix(key []byte) int
func (opt *iteratorOptions) compareToPrefix(key []byte) int {
// We should compare key without timestamp. For example key - a[TS] might be > "aa" prefix.
key = y.ParseKey(key)
if len(key) > len(opt.Prefix) {
key = key[:len(opt.Prefix)]
}
return bytes.Compare(key, opt.Prefix)
}
- Purpose: Compares a key to the prefix, considering only the part of the key relevant to the prefix.
- Parameters:
key
: A byte slice representing the key to compare.
- Returns: An integer representing the result of the comparison (similar to
bytes.Compare
). - Parses the key to remove the timestamp.
- Shortens the key to the length of the prefix if it's longer.
- Compares the key and the prefix using
bytes.Compare
.
- Parameters:
levelHandler
type levelHandler struct {
tables []*table.Table
}
- Purpose: Manages tables at a specific level.
tables
: A slice of pointers totable.Table
objects.
init(tables []*table.Table)
func (s *levelHandler) init(tables []*table.Table) {
fmt.Println("Initializing level handler...")
s.tables = tables
}
- Purpose: Initializes the
levelHandler
with a slice of tables.- Parameters:
tables
: A slice of pointers totable.Table
objects.
- Assigns the provided
tables
to thes.tables
field.
- Parameters:
pickTables(opt iteratorOptions) []*table.Table
func (s *levelHandler) pickTables(opt iteratorOptions) []*table.Table {
filterTables := func(tables []*table.Table) []*table.Table {
if opt.SinceTs > 0 {
tmp := tables[:0]
for _, t := range tables {
if t.MaxVersion() < opt.SinceTs {
continue
}
tmp = append(tmp, t)
}
tables = tmp
}
return tables
}
all := s.tables
if len(opt.Prefix) == 0 {
out := make([]*table.Table, len(all))
copy(out, all)
return filterTables(out)
}
sIdx := sort.Search(len(all), func(i int) bool {
// table.Biggest >= opt.prefix
// if opt.Prefix < table.Biggest, then surely it is not in any of the preceding tables.
return opt.compareToPrefix(all[i].Biggest()) >= 0
})
if sIdx == len(all) {
// Not found.
return []*table.Table{}
}
filtered := all[sIdx:]
if !opt.prefixIsKey {
eIdx := sort.Search(len(filtered), func(i int) bool {
return opt.compareToPrefix(filtered[i].Smallest()) > 0
})
out := make([]*table.Table, len(filtered[:eIdx]))
copy(out, filtered[:eIdx])
return filterTables(out)
}
// opt.prefixIsKey == true. This code is optimizing for opt.prefixIsKey part.
var out []*table.Table
// hash := y.Hash(opt.Prefix)
for _, t := range filtered {
// When we encounter the first table whose smallest key is higher than opt.Prefix, we can
// stop. This is an IMPORTANT optimization, just considering how often we call
// NewKeyIterator.
if opt.compareToPrefix(t.Smallest()) > 0 {
// if table.Smallest > opt.Prefix, then this and all tables after this can be ignored.
break
}
out = append(out, t)
}
return filterTables(out)
}
- Purpose: Selects tables based on the provided iterator options, particularly the prefix.
- Parameters:
opt
:iteratorOptions
which contains options for picking tables.
- Returns: A slice of pointers to
table.Table
objects that match the criteria. - Implements logic similar to the master branch, filtering tables based on prefix and timestamp.
- Parameters:
genTables(boundaries [][]byte) []*table.Table
func genTables(boundaries [][]byte) []*table.Table {
buildTable := func(k1, k2 []byte) *table.Table {
opts := table.Options{
ChkMode: options.NoVerification,
}
b := table.NewTableBuilder(opts)
defer b.Close()
// Add one key so that we can open this table.
b.Add(y.KeyWithTs(k1, 1), y.ValueStruct{}, 0)
b.Add(y.KeyWithTs(k2, 1), y.ValueStruct{}, 0)
tab, err := table.OpenInMemoryTable(b.Finish(), 0, &opts)
y.Check(err)
return tab
}
sort.Slice(boundaries, func(i, j int) bool {
return bytes.Compare(boundaries[i], boundaries[j]) < 0
})
out := make([]*table.Table, 0, len(boundaries))
for i := range boundaries {
var j int
if i != 0 {
j = i - 1
}
out = append(out, buildTable(boundaries[i], boundaries[j]))
}
fmt.Printf("Created %d mock tables.\n", len(out))
return out
}
- Purpose: Generates mock tables based on the given boundaries.
- Parameters:
boundaries
: A slice of byte slices representing the boundaries of the tables.
- Returns: A slice of pointers to
table.Table
objects. - Sorts the boundaries, builds a table for each boundary (using adjacent boundaries as min/max keys), and returns the resulting slice of tables.
- Parameters:
getBoundaries(db *badger.DB) [][]byte
func getBoundaries(db *badger.DB) [][]byte {
fmt.Println("Getting the table boundaries...")
tables := db.Tables()
out := make([][]byte, 0, 2*len(tables))
for _, t := range tables {
out = append(out, t.Left, t.Right)
}
return out
}
- Purpose: Retrieves the boundaries (left and right keys) of the existing tables in the database.
- Parameters:
db
: A pointer to abadger.DB
object.
- Returns: A slice of byte slices, where each pair of elements represents the left and right boundaries of a table.
- Gets all tables from the DB, then iterates through them and saves the left and right keys of the table to the
out
slice.
- Parameters:
Code Examples
None