Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
321 changes: 317 additions & 4 deletions pkg/keyspace/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,18 @@
package keyspace

import (
"bytes"
"container/heap"
"encoding/binary"
"encoding/hex"
"regexp"
"slices"
"strconv"
"strings"

"github.com/gogo/protobuf/proto"
"github.com/google/btree"

"github.com/pingcap/errors"
"github.com/pingcap/kvproto/pkg/keyspacepb"

Expand All @@ -30,6 +35,8 @@ import (
"github.com/tikv/pd/pkg/keyspace/constant"
"github.com/tikv/pd/pkg/schedule/labeler"
"github.com/tikv/pd/pkg/storage/endpoint"
"github.com/tikv/pd/pkg/utils/keyutil"
"github.com/tikv/pd/pkg/utils/syncutil"
"github.com/tikv/pd/pkg/versioninfo/kerneltype"
)

Expand All @@ -52,6 +59,9 @@ var (
}
// Only keyspaces in the state specified by allowChangeConfig are allowed to change their config.
allowChangeConfig = []keyspacepb.KeyspaceState{keyspacepb.KeyspaceState_ENABLED, keyspacepb.KeyspaceState_DISABLED}

rawPrefix = []byte{'r'}
txnPrefix = []byte{'x'}
)

// validateID check if keyspace falls within the acceptable range.
Expand Down Expand Up @@ -119,10 +129,10 @@ func MakeRegionBound(id uint32) *RegionBound {
binary.BigEndian.PutUint32(keyspaceIDBytes, id)
binary.BigEndian.PutUint32(nextKeyspaceIDBytes, id+1)
return &RegionBound{
RawLeftBound: codec.EncodeBytes(append([]byte{'r'}, keyspaceIDBytes[1:]...)),
RawRightBound: codec.EncodeBytes(append([]byte{'r'}, nextKeyspaceIDBytes[1:]...)),
TxnLeftBound: codec.EncodeBytes(append([]byte{'x'}, keyspaceIDBytes[1:]...)),
TxnRightBound: codec.EncodeBytes(append([]byte{'x'}, nextKeyspaceIDBytes[1:]...)),
RawLeftBound: codec.EncodeBytes(append(rawPrefix, keyspaceIDBytes[1:]...)),
RawRightBound: codec.EncodeBytes(append(rawPrefix, nextKeyspaceIDBytes[1:]...)),
TxnLeftBound: codec.EncodeBytes(append(txnPrefix, keyspaceIDBytes[1:]...)),
TxnRightBound: codec.EncodeBytes(append(txnPrefix, nextKeyspaceIDBytes[1:]...)),
}
}

Expand Down Expand Up @@ -334,3 +344,306 @@ func isProtectedKeyspaceName(name string) bool {
}
return name == constant.DefaultKeyspaceName
}

// KeyType represents the type of the key, which can be raw key or txn key.
type KeyType int

const (
// KeyTypeRaw represents the raw keyspace, which is used for KV operations without transaction.
KeyTypeRaw KeyType = iota
// KeyTypeTxn represents the txn keyspace, which is used for KV operations with transaction.
KeyTypeTxn
// KeyTypeClassical represents the classical key, the key is not part of the new keyspace system.
KeyTypeClassical
)

// ExtractKeyspaceID extracts the keyspace ID from a region key.
// It returns the keyspace ID and a boolean indicating whether the key contains a valid keyspace ID.
// The key format is: [mode_prefix][keyspace_id_3bytes][...], where mode_prefix is 'x' for txn and 'r' for raw.
// if the key is empty, it means the key belongs the max txn keyspace.
func ExtractKeyspaceID(key []byte) (uint32, KeyType) {
// Empty key represents the start of the entire key space (no keyspace)
if len(key) == 0 {
return constant.MaxValidKeyspaceID, KeyTypeTxn
}

// Decode the key
_, decoded, err := codec.DecodeBytes(key)
if err != nil {
return 0, KeyTypeClassical
}

// Check if the key has a mode prefix and keyspace ID (at least 4 bytes: prefix + 3 bytes ID)
if len(decoded) < 4 {
return 0, KeyTypeClassical
}

// Check the mod prefix.
prefix := decoded[0]
var kt KeyType
switch prefix {
case rawPrefix[0]:
kt = KeyTypeRaw
case txnPrefix[0]:
kt = KeyTypeTxn
default:
return 0, KeyTypeClassical
}

// Extract keyspace ID (3 bytes after the prefix)
// Convert 3 bytes to uint32 by shifting and combining
keyspaceID := uint32(decoded[1])<<16 | uint32(decoded[2])<<8 | uint32(decoded[3])

return keyspaceID, kt
}

// Checker is an interface to check keyspace existence.
type Checker interface {
// GetKeyspaceIDInRange returns the keyspace IDs in the range [start, end].
// It returns the keyspace IDs by desc and a boolean indicating whether there is any keyspace in the range.
GetKeyspaceIDInRange(start, end uint32, limit int) ([]uint32, bool)
// ExistKeyspaceID returns whether the keyspace ID exists.
KeyspaceExist(keyspaceID uint32) bool
}

// RegionSpansMultipleKeyspaces checks if a region spans across multiple keyspaces.
// It returns true if the region crosses keyspace boundaries, false otherwise.
// startKey is the region start key, endKey is the region end key (exclusive).
// A region [startKey, endKey) spans multiple keyspaces if:
// 1. startKey and endKey have different keyspace IDs, AND
// 2. endKey is NOT exactly at the right bound of startKey's keyspace (i.e., not just at the boundary), AND
// 3. At least two existing keyspaces are crossed (checked via checker)
func RegionSpansMultipleKeyspaces(startKey, endKey []byte, checker Checker) bool {
if checker == nil {
return false
}
var startKeyspaceID uint32
var startKT KeyType
if len(startKey) == 0 {
startKeyspaceID, startKT = constant.StartKeyspaceID, KeyTypeRaw
} else {
startKeyspaceID, startKT = ExtractKeyspaceID(startKey)
}

endKeyspaceID, endKT := ExtractKeyspaceID(endKey)

// If either key has unknown key type, conservatively consider it spans multiple keyspaces to avoid potential data corruption.
// This can happen when the key is not in the expected format, or when there is a hole in keyspace allocation.
// For example, if startKey has valid keyspace ID but endKey is invalid, we cannot determine the keyspace boundary,
// thus we consider it spans multiple keyspaces to be safe.
if startKT == KeyTypeClassical && (endKT == KeyTypeClassical || len(endKey) == 0) {
return false
}

// If the keyspace ids are same and key types are same, it does not span multiple keyspaces even if the key is invalid.
if startKeyspaceID == endKeyspaceID && startKT == endKT {
return false
}
// If startKey is raw key and endKey is txn key, it must span multiple keyspaces, because raw key usually the rightmost key and the txn the smallest key.
// So it must cross the boundary between raw keyspace and txn keyspace, which means it spans multiple keyspaces.
// such as this ['r200','x100'], it may cross keyspace (200, MaxValidKeyspaceID]
if startKT == KeyTypeRaw && endKT == KeyTypeTxn {
return true
}

// If end keyspace ID is exactly start keyspace + 1,
// check if endKey is at the exact boundary (right bound of startKeyspace)
// If yes, the region is [startKey, rightBound of startKeyspace) which is within one keyspace.
if endKeyspaceID == startKeyspaceID+1 {
startBound := MakeRegionBound(startKeyspaceID)
// it means the region is [startKey, rightBound of startKeyspace)
// which is still within one keyspace
if string(endKey) == string(startBound.TxnRightBound) || string(endKey) == string(startBound.RawRightBound) {
return false
}
}
// Check the keyspace existence of startKeyspaceID and endKeyspaceID.
// If both of them do not exist, we consider it does not span multiple keyspaces.
startExist := checker.KeyspaceExist(startKeyspaceID)
endExist := checker.KeyspaceExist(endKeyspaceID)
return startExist || endExist
}

const scanLimit = 10

// GetKeyspaceSplitKeys returns the split keys for a region that spans multiple keyspaces.
// It returns a list of keys where the region should be split to separate keyspaces.
// Only returns split keys for keyspaces that exist (checked via checker).
func GetKeyspaceSplitKeys(startKey, endKey []byte, checker Checker) [][]byte {
// If checker is nil, cannot verify keyspace existence
if checker == nil {
return nil
}

var startKeyspaceID uint32
var startKT KeyType
if len(startKey) == 0 {
startKeyspaceID, startKT = constant.StartKeyspaceID, KeyTypeRaw
} else {
startKeyspaceID, startKT = ExtractKeyspaceID(startKey)
}

endKeyspaceID, endKT := ExtractKeyspaceID(endKey)
// If either key has classical key type, we can ignore the keyspace check.
if startKT == KeyTypeClassical && endKT == KeyTypeClassical {
return nil
}
// If endKey is Classical, set the endKeyspaceID to the max valid keyspace ID to generate split keys for all keyspaces after startKeyspaceID.
if endKT == KeyTypeClassical {
endKeyspaceID = constant.MaxValidKeyspaceID
}

// If same keyspace and key type, no split needed
if startKeyspaceID == endKeyspaceID && startKT == endKT {
return nil
}
// If startKey is raw key and endKey is txn key, it must span multiple keyspaces, because raw key usually the rightmost key and the txn the smallest key.
// So we must set the end keyspace id as the max valid keyspace ID to generate split keys for all keyspaces after startKeyspaceID.
// such as this ['r200','x100'], we should generate split keys for keyspace (200, MaxValidKeyspaceID]
if startKT == KeyTypeRaw && endKT == KeyTypeTxn {
endKeyspaceID = constant.MaxValidKeyspaceID
}

// If endKey's keyspace ID is exactly startKeyspace ID + 1,
// check if endKey is at the exact boundary (right bound of startKeyspace)
// If yes, the region is [startKey, rightBound of startKeyspace) which is within one keyspace.
// Otherwise, continue to generate split keys.
if endKeyspaceID == startKeyspaceID+1 {
startBound := MakeRegionBound(startKeyspaceID)
// it means the region is [startKey, rightBound of startKeyspace)
// which is still within one keyspace
if string(endKey) == string(startBound.TxnRightBound) || string(endKey) == string(startBound.RawRightBound) {
return nil
}
}

// Generate split keys for each keyspace boundary between start and end.
// Iterate existing keyspaces in (startKeyspaceID, endKeyspaceID).
var splitKeys [][]byte

keyspaceList, ok := checker.GetKeyspaceIDInRange(startKeyspaceID, endKeyspaceID, scanLimit)
if !ok {
return nil
}
if keyspaceList == nil {
return nil
}
for _, nextID := range keyspaceList {
bound := MakeRegionBound(nextID)
if keyutil.Between(startKey, endKey, bound.RawLeftBound) {
splitKeys = append(splitKeys, bound.RawLeftBound)
}
if keyutil.Between(startKey, endKey, bound.RawRightBound) {
splitKeys = append(splitKeys, bound.RawRightBound)
}
if keyutil.Between(startKey, endKey, bound.TxnLeftBound) {
splitKeys = append(splitKeys, bound.TxnLeftBound)
}
if keyutil.Between(startKey, endKey, bound.TxnRightBound) {
splitKeys = append(splitKeys, bound.TxnRightBound)
}
}
if len(splitKeys) == 0 {
return nil
}
slices.SortFunc(splitKeys, bytes.Compare)
return slices.CompactFunc(splitKeys, bytes.Equal)
}

type keyspaceItem struct {
keyspaceID uint32
name string
state keyspacepb.KeyspaceState
}

// Less compares two keyspaceItem.
func (s *keyspaceItem) Less(than keyspaceItem) bool {
return s.keyspaceID < than.keyspaceID
}

// Cache is a cache for keyspace information, which is used to quickly determine keyspace existence and get keyspace name by ID.
type Cache struct {
syncutil.RWMutex
tree *btree.BTreeG[keyspaceItem]
}

// NewCache creates a new Cache.
func NewCache() *Cache {
return &Cache{
tree: btree.NewG(2, func(i, j keyspaceItem) bool {
return i.Less(j)
}),
}
}

func (s *Cache) getKeyspaceByID(keyspaceID uint32) (keyspaceItem, bool) {
s.RLock()
defer s.RUnlock()
item, found := s.tree.Get(keyspaceItem{keyspaceID: keyspaceID})
return item, found
}

// Save saves the keyspace information to the cache. It will replace the old information if the keyspace ID already exists.
func (s *Cache) Save(keyspaceID uint32, name string, state keyspacepb.KeyspaceState) {
s.Lock()
defer s.Unlock()
item := keyspaceItem{keyspaceID: keyspaceID, name: name, state: state}
s.tree.ReplaceOrInsert(item)
}

// DeleteKeyspace deletes a keyspace by ID.
func (s *Cache) DeleteKeyspace(keyspaceID uint32) {
s.Lock()
defer s.Unlock()
s.tree.Delete(keyspaceItem{keyspaceID: keyspaceID})
}

func (s *Cache) scanAllKeyspaces(f func(keyspaceID uint32, name string) bool) {
s.RLock()
defer s.RUnlock()
s.tree.Ascend(func(i keyspaceItem) bool {
return f(i.keyspaceID, i.name)
})
}

// KeyspaceExist checks if a keyspace exists by ID.
func (s *Cache) KeyspaceExist(id uint32) bool {
s.RLock()
defer s.RUnlock()
item, found := s.tree.Get(keyspaceItem{keyspaceID: id})
if found && item.state == keyspacepb.KeyspaceState_TOMBSTONE {
return false
}
return found
}

// GetKeyspaceIDInRange returns the keyspace IDs in the range [start, end].
func (s *Cache) GetKeyspaceIDInRange(start, end uint32, limit int) ([]uint32, bool) {
s.RLock()
defer s.RUnlock()
ret := make([]uint32, 0)
found := false
s.tree.DescendLessOrEqual(keyspaceItem{keyspaceID: end}, func(item keyspaceItem) bool {
if item.state == keyspacepb.KeyspaceState_TOMBSTONE {
return true
}
if item.keyspaceID >= start {
ret = append(ret, item.keyspaceID)
found = true
if limit > 0 && len(ret) >= limit {
return false
}
}
return true
})
return ret, found
}

// NewKeyspaceMeta creates a KeyspaceMeta from the given json string.
func NewKeyspaceMeta(data string) (*keyspacepb.KeyspaceMeta, error) {
meta := &keyspacepb.KeyspaceMeta{}
if err := proto.Unmarshal([]byte(data), meta); err != nil {
return nil, errs.ErrProtoUnmarshal.Wrap(err).GenWithStackByCause()
}
return meta, nil
}
Loading
Loading