Safe Haskell	Safe-Inferred
Language	GHC2021

ScheduledMerges

Contents

Main API
Test and trace
Invariants
Run sizes
Level capacity

Description

A prototype of an LSM with explicitly scheduled incremental merges.

The scheduled incremental merges is about ensuring that the merging work (CPU and I/O) can be spread out over time evenly. This also means the LSM update operations have worst case complexity rather than amortised complexity, because they do a fixed amount of merging work each.

Another thing this prototype demonstrates is a design for duplicating tables and sharing ongoing incremental merges.

Finally, it demonstrates a design for table unions, including a representation for in-progress merging trees.

The merging policy that this prototype uses is "lazy levelling". Each level is T times bigger than the previous level. Lazy levelling means we use tiering for every level except the last level which uses levelling. Though note that the first level always uses tiering, even if the first level is also the last level. This is to simplify flushing the write buffer: if we used levelling on the first level we would need a code path for merging the write buffer into the first level.

Synopsis

data LSM s
newtype TableId = TableId Int
data LSMConfig = LSMConfig {
- configMaxWriteBufferSize :: !Int
- configSizeRatio :: !Int
}
newtype Key = K Int
newtype Value = V Int
resolveValue :: Value -> Value -> Value
newtype Blob = B Int
new :: Tracer (ST s) Event -> TableId -> ST s (LSM s)
newWith :: Tracer (ST s) Event -> TableId -> LSMConfig -> ST s (LSM s)
data LookupResult v b
- = NotFound
- | Found !v !(Maybe b)
lookup :: Tracer (ST s) Event -> LSM s -> Key -> ST s (LookupResult Value Blob)
lookups :: LSM s -> [Key] -> ST s [LookupResult Value Blob]
type Entry = Update Value Blob
data Update v b
- = Insert !v !(Maybe b)
- | Mupsert !v
- | Delete
update :: Tracer (ST s) Event -> LSM s -> Key -> Entry -> ST s ()
updates :: Tracer (ST s) Event -> LSM s -> [(Key, Entry)] -> ST s ()
insert :: Tracer (ST s) Event -> LSM s -> Key -> Value -> Maybe Blob -> ST s ()
inserts :: Tracer (ST s) Event -> LSM s -> [(Key, Value, Maybe Blob)] -> ST s ()
delete :: Tracer (ST s) Event -> LSM s -> Key -> ST s ()
deletes :: Tracer (ST s) Event -> LSM s -> [Key] -> ST s ()
mupsert :: Tracer (ST s) Event -> LSM s -> Key -> Value -> ST s ()
mupserts :: Tracer (ST s) Event -> LSM s -> [(Key, Value)] -> ST s ()
supplyMergeCredits :: LSM s -> NominalCredit -> ST s ()
duplicate :: Tracer (ST s) Event -> TableId -> LSM s -> ST s (LSM s)
unions :: Tracer (ST s) Event -> TableId -> [LSM s] -> ST s (LSM s)
type Credit = Int
type Debt = Int
remainingUnionDebt :: LSM s -> ST s UnionDebt
supplyUnionCredits :: LSM s -> UnionCredits -> ST s UnionCredits
data MTree r
- = MLeaf r
- | MNode TreeMergeType [MTree r]
logicalValue :: LSM s -> ST s (Map Key (Value, Maybe Blob))
type Representation = (Run, [LevelRepresentation], Maybe (MTree Run))
dumpRepresentation :: LSM s -> ST s Representation
representationShape :: Representation -> (Int, [([Int], [Int])], Maybe (MTree Int))
data Event
data EventAt e = EventAt {
- eventAtStep :: Counter
- eventAtLevel :: Int
- eventDetail :: e
}
data EventDetail
- = AddLevelEvent
- | AddRunEvent {
  - runsAtLevel :: [Run]
  }
- | NewLevelMergeEvent {
  - mergePolicy :: MergePolicyForLevel
  - mergeType :: LevelMergeType
  - mergeDebt :: Debt
  - mergeRuns :: [Run]
  }
- | NewSingleRunEvent Run
- | LevelMergeCompletedEvent {
  - mergePolicy :: MergePolicyForLevel
  - mergeType :: LevelMergeType
  - mergeSize :: Int
  }
- | SingleRunCompletedEvent Run
- | RunTooSmallForLevelEvent MergePolicyForLevel Run
- | LevelIsFullEvent MergePolicyForLevel
- | LevelIsNotFullEvent MergePolicyForLevel
newtype MergingTree s = MergingTree (STRef s (MergingTreeState s))
data MergingTreeState s
- = CompletedTreeMerge !Run
- | OngoingTreeMerge !(MergingRun TreeMergeType s)
- | PendingTreeMerge !(PendingMerge s)
data PendingMerge s
- = PendingLevelMerge ![PreExistingRun s] !(Maybe (MergingTree s))
- | PendingUnionMerge ![MergingTree s]
data PreExistingRun s
- = PreExistingRun !Run
- | PreExistingMergingRun !(MergingRun LevelMergeType s)
data MergingRun t s = MergingRun !t !MergeDebt !(STRef s MergingRunState)
data MergingRunState
- = CompletedMerge !Run
- | OngoingMerge !MergeCredit ![Run] Run
data MergePolicyForLevel
- = LevelTiering
- | LevelLevelling
class Show t => IsMergeType t where
- isLastLevel :: t -> Bool
- isUnion :: t -> Bool
data TreeMergeType
- = MergeLevel
- | MergeUnion
data LevelMergeType
- = MergeMidLevel
- | MergeLastLevel
data MergeCredit = MergeCredit {
- spentCredits :: !Credit
- unspentCredits :: !Credit
}
newtype MergeDebt = MergeDebt {
- totalDebt :: Debt
}
newtype NominalCredit = NominalCredit Credit
newtype NominalDebt = NominalDebt Credit
type Run = Map Key Entry
runSize :: Run -> Int
newtype UnionCredits = UnionCredits Credit
supplyCreditsMergingTree :: Credit -> MergingTree s -> ST s Credit
newtype UnionDebt = UnionDebt Debt
remainingDebtMergingTree :: MergingTree s -> ST s (Debt, Size)
mergek :: IsMergeType t => t -> [Run] -> Run
mergeBatchSize :: Int
type Invariant s = ExceptT String (ST s)
evalInvariant :: Invariant s a -> ST s (Either String a)
treeInvariant :: MergingTree s -> Invariant s ()
mergeDebtInvariant :: MergeDebt -> MergeCredit -> Bool
levelNumberToMaxRunSize :: HasCallStack => MergePolicyForLevel -> LSMConfig -> LevelNo -> Int
runSizeToLevelNumber :: HasCallStack => MergePolicyForLevel -> LSMConfig -> Int -> LevelNo
maxWriteBufferSize :: HasCallStack => LSMConfig -> Int
runSizeFitsInLevel :: HasCallStack => MergePolicyForLevel -> LSMConfig -> LevelNo -> Int -> Bool
runSizeTooSmallForLevel :: HasCallStack => MergePolicyForLevel -> LSMConfig -> LevelNo -> Int -> Bool
runSizeTooLargeForLevel :: HasCallStack => MergePolicyForLevel -> LSMConfig -> LevelNo -> Int -> Bool
levelIsFull :: MergePolicyForLevel -> LSMConfig -> LevelNo -> [Run] -> [Run] -> Bool

Main API

data LSM s Source #

newtype TableId Source #

Identifiers for LSM tables

Constructors

TableId Int

Instances

Instances details

Enum TableId Source #
Instance details Defined in ScheduledMerges Methods succ :: TableId -> TableId # pred :: TableId -> TableId # toEnum :: Int -> TableId # fromEnum :: TableId -> Int # enumFrom :: TableId -> [TableId] # enumFromThen :: TableId -> TableId -> [TableId] # enumFromTo :: TableId -> TableId -> [TableId] # enumFromThenTo :: TableId -> TableId -> TableId -> [TableId] #
Show TableId Source #
Instance details Defined in ScheduledMerges Methods showsPrec :: Int -> TableId -> ShowS # show :: TableId -> String # showList :: [TableId] -> ShowS #
Eq TableId Source #
Instance details Defined in ScheduledMerges Methods (==) :: TableId -> TableId -> Bool # (/=) :: TableId -> TableId -> Bool #
Ord TableId Source #
Instance details Defined in ScheduledMerges Methods compare :: TableId -> TableId -> Ordering # (<) :: TableId -> TableId -> Bool # (<=) :: TableId -> TableId -> Bool # (>) :: TableId -> TableId -> Bool # (>=) :: TableId -> TableId -> Bool # max :: TableId -> TableId -> TableId # min :: TableId -> TableId -> TableId #
Prim TableId Source #
Instance details Defined in ScheduledMerges Methods sizeOfType# :: Proxy TableId -> Int# Source # sizeOf# :: TableId -> Int# Source # alignmentOfType# :: Proxy TableId -> Int# Source # alignment# :: TableId -> Int# Source # indexByteArray# :: ByteArray# -> Int# -> TableId Source # readByteArray# :: MutableByteArray# s -> Int# -> State# s -> (# State# s, TableId #) Source # writeByteArray# :: MutableByteArray# s -> Int# -> TableId -> State# s -> State# s Source # setByteArray# :: MutableByteArray# s -> Int# -> Int# -> TableId -> State# s -> State# s Source # indexOffAddr# :: Addr# -> Int# -> TableId Source # readOffAddr# :: Addr# -> Int# -> State# s -> (# State# s, TableId #) Source # writeOffAddr# :: Addr# -> Int# -> TableId -> State# s -> State# s Source # setOffAddr# :: Addr# -> Int# -> Int# -> TableId -> State# s -> State# s Source #

data LSMConfig Source #

Configuration options for individual LSM tables.

Constructors

LSMConfig
Fields configMaxWriteBufferSize :: !Int configSizeRatio :: !Int Also known as the parameter `T`

Instances

Instances details

Show LSMConfig Source #
Instance details Defined in ScheduledMerges Methods showsPrec :: Int -> LSMConfig -> ShowS # show :: LSMConfig -> String # showList :: [LSMConfig] -> ShowS #
Eq LSMConfig Source #
Instance details Defined in ScheduledMerges Methods (==) :: LSMConfig -> LSMConfig -> Bool # (/=) :: LSMConfig -> LSMConfig -> Bool #

newtype Key Source #

Constructors

K Int

Instances

Instances details

Arbitrary Key Source #
Instance details Defined in ScheduledMerges Methods arbitrary :: Gen Key Source # shrink :: Key -> [Key] Source #
Enum Key Source #
Instance details Defined in ScheduledMerges Methods succ :: Key -> Key # pred :: Key -> Key # toEnum :: Int -> Key # fromEnum :: Key -> Int # enumFrom :: Key -> [Key] # enumFromThen :: Key -> Key -> [Key] # enumFromTo :: Key -> Key -> [Key] # enumFromThenTo :: Key -> Key -> Key -> [Key] #
Show Key Source #
Instance details Defined in ScheduledMerges Methods showsPrec :: Int -> Key -> ShowS # show :: Key -> String # showList :: [Key] -> ShowS #
Eq Key Source #
Instance details Defined in ScheduledMerges Methods (==) :: Key -> Key -> Bool # (/=) :: Key -> Key -> Bool #
Ord Key Source #
Instance details Defined in ScheduledMerges Methods compare :: Key -> Key -> Ordering # (<) :: Key -> Key -> Bool # (<=) :: Key -> Key -> Bool # (>) :: Key -> Key -> Bool # (>=) :: Key -> Key -> Bool # max :: Key -> Key -> Key # min :: Key -> Key -> Key #

newtype Value Source #

Constructors

V Int

Instances

Instances details

Arbitrary Value Source #
Instance details Defined in ScheduledMerges Methods arbitrary :: Gen Value Source # shrink :: Value -> [Value] Source #
Show Value Source #
Instance details Defined in ScheduledMerges Methods showsPrec :: Int -> Value -> ShowS # show :: Value -> String # showList :: [Value] -> ShowS #
Eq Value Source #
Instance details Defined in ScheduledMerges Methods (==) :: Value -> Value -> Bool # (/=) :: Value -> Value -> Bool #

resolveValue :: Value -> Value -> Value Source #

newtype Blob Source #

Constructors

B Int

Instances

Instances details

Arbitrary Blob Source #
Instance details Defined in ScheduledMerges Methods arbitrary :: Gen Blob Source # shrink :: Blob -> [Blob] Source #
Show Blob Source #
Instance details Defined in ScheduledMerges Methods showsPrec :: Int -> Blob -> ShowS # show :: Blob -> String # showList :: [Blob] -> ShowS #
Eq Blob Source #
Instance details Defined in ScheduledMerges Methods (==) :: Blob -> Blob -> Bool # (/=) :: Blob -> Blob -> Bool #

new :: Tracer (ST s) Event -> TableId -> ST s (LSM s) Source #

newWith :: Tracer (ST s) Event -> TableId -> LSMConfig -> ST s (LSM s) Source #

data LookupResult v b Source #

Constructors

NotFound
Found !v !(Maybe b)

Instances

Instances details

(Show v, Show b) => Show (LookupResult v b) Source #
Instance details Defined in ScheduledMerges Methods showsPrec :: Int -> LookupResult v b -> ShowS # show :: LookupResult v b -> String # showList :: [LookupResult v b] -> ShowS #
(Eq v, Eq b) => Eq (LookupResult v b) Source #
Instance details Defined in ScheduledMerges Methods (==) :: LookupResult v b -> LookupResult v b -> Bool # (/=) :: LookupResult v b -> LookupResult v b -> Bool #

lookup :: Tracer (ST s) Event -> LSM s -> Key -> ST s (LookupResult Value Blob) Source #

lookups :: LSM s -> [Key] -> ST s [LookupResult Value Blob] Source #

type Entry = Update Value Blob Source #

data Update v b Source #

Constructors

Insert !v !(Maybe b)
Mupsert !v
Delete

Instances

Instances details

(Arbitrary v, Arbitrary b) => Arbitrary (Update v b) Source #
Instance details Defined in ScheduledMerges Methods arbitrary :: Gen (Update v b) Source # shrink :: Update v b -> [Update v b] Source #
(Show b, Show v) => Show (Update v b) Source #
Instance details Defined in ScheduledMerges Methods showsPrec :: Int -> Update v b -> ShowS # show :: Update v b -> String # showList :: [Update v b] -> ShowS #
(Eq b, Eq v) => Eq (Update v b) Source #
Instance details Defined in ScheduledMerges Methods (==) :: Update v b -> Update v b -> Bool # (/=) :: Update v b -> Update v b -> Bool #

update :: Tracer (ST s) Event -> LSM s -> Key -> Entry -> ST s () Source #

updates :: Tracer (ST s) Event -> LSM s -> [(Key, Entry)] -> ST s () Source #

insert :: Tracer (ST s) Event -> LSM s -> Key -> Value -> Maybe Blob -> ST s () Source #

inserts :: Tracer (ST s) Event -> LSM s -> [(Key, Value, Maybe Blob)] -> ST s () Source #

delete :: Tracer (ST s) Event -> LSM s -> Key -> ST s () Source #

deletes :: Tracer (ST s) Event -> LSM s -> [Key] -> ST s () Source #

mupsert :: Tracer (ST s) Event -> LSM s -> Key -> Value -> ST s () Source #

mupserts :: Tracer (ST s) Event -> LSM s -> [(Key, Value)] -> ST s () Source #

supplyMergeCredits :: LSM s -> NominalCredit -> ST s () Source #

duplicate :: Tracer (ST s) Event -> TableId -> LSM s -> ST s (LSM s) Source #

unions :: Tracer (ST s) Event -> TableId -> [LSM s] -> ST s (LSM s) Source #

Similar to Data.Map.unionWith.

A call to union itself is not expensive, as the input tables are not immediately merged. Instead, it creates a representation of an in-progress merge that can be performed incrementally (somewhat similar to a thunk).

The more merge work remains, the more expensive are lookups on the table.

type Credit = Int Source #

Credits for keeping track of merge progress. These credits correspond directly to merge steps performed.

We also call these "physical" credits (since they correspond to steps done), and as opposed to "nominal" credits in NominalCredit and NominalDebt.

type Debt = Int Source #

Debt for keeping track of the total merge work to do.

remainingUnionDebt :: LSM s -> ST s UnionDebt Source #

Return the current union debt. This debt can be reduced until it is paid off using supplyUnionCredits.

supplyUnionCredits :: LSM s -> UnionCredits -> ST s UnionCredits Source #

Supply union credits to reduce union debt.

Supplying union credits leads to union merging work being performed in batches. This reduces the union debt returned by remainingUnionDebt. Union debt will be reduced by at least the number of supplied union credits. It is therefore advisable to query remainingUnionDebt every once in a while to see what the current debt is.

This function returns any surplus of union credits as leftover credits when a union has finished. In particular, if the returned number of credits is non-negative, then the union is finished.

Test and trace

data MTree r Source #

Constructors

MLeaf r
MNode TreeMergeType [MTree r]

Instances

Instances details

Foldable MTree Source #
Instance details Defined in ScheduledMerges Methods fold :: Monoid m => MTree m -> m # foldMap :: Monoid m => (a -> m) -> MTree a -> m # foldMap' :: Monoid m => (a -> m) -> MTree a -> m # foldr :: (a -> b -> b) -> b -> MTree a -> b # foldr' :: (a -> b -> b) -> b -> MTree a -> b # foldl :: (b -> a -> b) -> b -> MTree a -> b # foldl' :: (b -> a -> b) -> b -> MTree a -> b # foldr1 :: (a -> a -> a) -> MTree a -> a # foldl1 :: (a -> a -> a) -> MTree a -> a # toList :: MTree a -> [a] # null :: MTree a -> Bool # length :: MTree a -> Int # elem :: Eq a => a -> MTree a -> Bool # maximum :: Ord a => MTree a -> a # minimum :: Ord a => MTree a -> a # sum :: Num a => MTree a -> a # product :: Num a => MTree a -> a #
Functor MTree Source #
Instance details Defined in ScheduledMerges Methods fmap :: (a -> b) -> MTree a -> MTree b # (<$) :: a -> MTree b -> MTree a #
Show r => Show (MTree r) Source #
Instance details Defined in ScheduledMerges Methods showsPrec :: Int -> MTree r -> ShowS # show :: MTree r -> String # showList :: [MTree r] -> ShowS #
Eq r => Eq (MTree r) Source #
Instance details Defined in ScheduledMerges Methods (==) :: MTree r -> MTree r -> Bool # (/=) :: MTree r -> MTree r -> Bool #

logicalValue :: LSM s -> ST s (Map Key (Value, Maybe Blob)) Source #

type Representation = (Run, [LevelRepresentation], Maybe (MTree Run)) Source #

dumpRepresentation :: LSM s -> ST s Representation Source #

representationShape :: Representation -> (Int, [([Int], [Int])], Maybe (MTree Int)) Source #

data Event Source #

Instances

Instances details

Show Event Source #
Instance details Defined in ScheduledMerges Methods showsPrec :: Int -> Event -> ShowS # show :: Event -> String # showList :: [Event] -> ShowS #

data EventAt e Source #

Constructors

EventAt
Fields eventAtStep :: Counter eventAtLevel :: Int eventDetail :: e

Instances

Instances details

Show e => Show (EventAt e) Source #
Instance details Defined in ScheduledMerges Methods showsPrec :: Int -> EventAt e -> ShowS # show :: EventAt e -> String # showList :: [EventAt e] -> ShowS #

data EventDetail Source #

Constructors

AddLevelEvent
AddRunEvent
Fields runsAtLevel :: [Run]
NewLevelMergeEvent
Fields mergePolicy :: MergePolicyForLevel mergeType :: LevelMergeType mergeDebt :: Debt mergeRuns :: [Run]
NewSingleRunEvent Run
LevelMergeCompletedEvent
Fields mergePolicy :: MergePolicyForLevel mergeType :: LevelMergeType mergeSize :: Int
SingleRunCompletedEvent Run
RunTooSmallForLevelEvent MergePolicyForLevel Run
LevelIsFullEvent MergePolicyForLevel
LevelIsNotFullEvent MergePolicyForLevel

Instances

Instances details

Show EventDetail Source #
Instance details Defined in ScheduledMerges Methods showsPrec :: Int -> EventDetail -> ShowS # show :: EventDetail -> String # showList :: [EventDetail] -> ShowS #

newtype MergingTree s Source #

A "merging tree" is a mutable representation of an incremental tree-shaped nested merge. This allows to represent union merges of entire tables, each of which itself first need to be merged to become a single run.

Trees have to support arbitrarily deep nesting, since each input to union might already contain an in-progress merging tree (which then becomes shared between multiple tables).

See Note [Table Unions].

Constructors

MergingTree (STRef s (MergingTreeState s))

data MergingTreeState s Source #

Constructors

CompletedTreeMerge !Run
OngoingTreeMerge !(MergingRun TreeMergeType s)	Reuses MergingRun (with its STRef) to allow sharing existing merges.
PendingTreeMerge !(PendingMerge s)

data PendingMerge s Source #

A merge that is waiting for its inputs to complete.

The inputs can themselves be MergingTrees (with its STRef) to allow sharing existing unions.

Constructors

PendingLevelMerge ![PreExistingRun s] !(Maybe (MergingTree s))	The inputs are entire content of a table, i.e. its (merging) runs and finally a union merge (if that table already contained a union).
PendingUnionMerge ![MergingTree s]	Each input is a level merge of the entire content of a table.

data PreExistingRun s Source #

This is much like an IncomingRun, and are created from them, but contain only the essential information needed in a PendingLevelMerge.

Constructors

PreExistingRun !Run
PreExistingMergingRun !(MergingRun LevelMergeType s)

data MergingRun t s Source #

A "merging run" is a mutable representation of an incremental merge. It is also a unit of sharing between duplicated tables.

Constructors

MergingRun !t !MergeDebt !(STRef s MergingRunState)

data MergingRunState Source #

Constructors

CompletedMerge !Run
OngoingMerge
Fields !MergeCredit ![Run] inputs of the merge Run output of the merge (lazily evaluated)

data MergePolicyForLevel Source #

The merge policy for a LSM level can be either tiering or levelling. In this design we use levelling for the last level, and tiering for all other levels. The first level always uses tiering however, even if it's also the last level. So MergePolicyForLevel and LevelMergeType are orthogonal, all combinations are possible.

Constructors

LevelTiering
LevelLevelling

Instances

Instances details

Show MergePolicyForLevel Source #
Instance details Defined in ScheduledMerges Methods showsPrec :: Int -> MergePolicyForLevel -> ShowS # show :: MergePolicyForLevel -> String # showList :: [MergePolicyForLevel] -> ShowS #
Eq MergePolicyForLevel Source #
Instance details Defined in ScheduledMerges Methods (==) :: MergePolicyForLevel -> MergePolicyForLevel -> Bool # (/=) :: MergePolicyForLevel -> MergePolicyForLevel -> Bool #

class Show t => IsMergeType t where Source #

Merges can exist in different parts of the LSM, each with different options for the exact merge operation performed.

Methods

isLastLevel :: t -> Bool Source #

isUnion :: t -> Bool Source #

Instances

Instances details

IsMergeType LevelMergeType Source #
Instance details Defined in ScheduledMerges Methods isLastLevel :: LevelMergeType -> Bool Source # isUnion :: LevelMergeType -> Bool Source #
IsMergeType TreeMergeType Source #
Instance details Defined in ScheduledMerges Methods isLastLevel :: TreeMergeType -> Bool Source # isUnion :: TreeMergeType -> Bool Source #

data TreeMergeType Source #

Different types of merges created as part of the merging tree.

Union merges follow the semantics of Data.Map.unionWith (<>). Since the input runs are semantically treated like Data.Maps, deletes are ignored and inserts act like mupserts, so they need to be merged monoidally using resolveValue.

Trees can only exist on the union level, which is the last. Therefore, node merges can always drop deletes.

Constructors

MergeLevel
MergeUnion

Instances

Instances details

Arbitrary TreeMergeType Source #
Instance details Defined in ScheduledMerges Methods arbitrary :: Gen TreeMergeType Source # shrink :: TreeMergeType -> [TreeMergeType] Source #
Show TreeMergeType Source #
Instance details Defined in ScheduledMerges Methods showsPrec :: Int -> TreeMergeType -> ShowS # show :: TreeMergeType -> String # showList :: [TreeMergeType] -> ShowS #
Eq TreeMergeType Source #
Instance details Defined in ScheduledMerges Methods (==) :: TreeMergeType -> TreeMergeType -> Bool # (/=) :: TreeMergeType -> TreeMergeType -> Bool #
IsMergeType TreeMergeType Source #
Instance details Defined in ScheduledMerges Methods isLastLevel :: TreeMergeType -> Bool Source # isUnion :: TreeMergeType -> Bool Source #

data LevelMergeType Source #

Different types of merges created as part of a regular (non-union) level.

A last level merge behaves differently from a mid-level merge: last level merges can actually remove delete entries, whereas mid-level merges must preserve them. This is orthogonal to the MergePolicyForLevel.

Constructors

MergeMidLevel
MergeLastLevel

Instances

Instances details

Arbitrary LevelMergeType Source #
Instance details Defined in ScheduledMerges Methods arbitrary :: Gen LevelMergeType Source # shrink :: LevelMergeType -> [LevelMergeType] Source #
Show LevelMergeType Source #
Instance details Defined in ScheduledMerges Methods showsPrec :: Int -> LevelMergeType -> ShowS # show :: LevelMergeType -> String # showList :: [LevelMergeType] -> ShowS #
Eq LevelMergeType Source #
Instance details Defined in ScheduledMerges Methods (==) :: LevelMergeType -> LevelMergeType -> Bool # (/=) :: LevelMergeType -> LevelMergeType -> Bool #
IsMergeType LevelMergeType Source #
Instance details Defined in ScheduledMerges Methods isLastLevel :: LevelMergeType -> Bool Source # isUnion :: LevelMergeType -> Bool Source #

data MergeCredit Source #

Constructors

MergeCredit
Fields spentCredits :: !Credit unspentCredits :: !Credit

Instances

Instances details

Show MergeCredit Source #
Instance details Defined in ScheduledMerges Methods showsPrec :: Int -> MergeCredit -> ShowS # show :: MergeCredit -> String # showList :: [MergeCredit] -> ShowS #

newtype MergeDebt Source #

Constructors

MergeDebt
Fields totalDebt :: Debt

Instances

Instances details

Show MergeDebt Source #
Instance details Defined in ScheduledMerges Methods showsPrec :: Int -> MergeDebt -> ShowS # show :: MergeDebt -> String # showList :: [MergeDebt] -> ShowS #

newtype NominalCredit Source #

Nominal credit is the credit supplied to each level as we insert update entries, one credit per update entry inserted.

Nominal credit must be supplied up to the NominalDebt to ensure the merge is complete.

Nominal credits are a similar order of magnitude to physical credits (see Credit) but not the same, and we have to scale linearly to convert between them. Physical credits are the actual number of inputs to the merge, which may be somewhat more or somewhat less than the number of update entries we will insert before we need the merge to be complete.

Constructors

NominalCredit Credit

Instances

Instances details

Show NominalCredit Source #
Instance details Defined in ScheduledMerges Methods showsPrec :: Int -> NominalCredit -> ShowS # show :: NominalCredit -> String # showList :: [NominalCredit] -> ShowS #

newtype NominalDebt Source #

The nominal debt for a merging run is the worst case (minimum) number of update entries we expect to insert before we expect the merge to be complete.

We require that an equal amount of nominal credit is supplied before we can expect a merge to be complete.

We scale linearly to convert nominal credits to physical credits, such that the nominal debt and physical debt are both considered "100%", and so that both debts are paid off at exactly the same time.

Constructors

NominalDebt Credit

Instances

Instances details

Show NominalDebt Source #
Instance details Defined in ScheduledMerges Methods showsPrec :: Int -> NominalDebt -> ShowS # show :: NominalDebt -> String # showList :: [NominalDebt] -> ShowS #

type Run = Map Key Entry Source #

runSize :: Run -> Int Source #

newtype UnionCredits Source #

Credits are used to pay off UnionDebt, completing a union in the process.

A union credit corresponds to a single merging step being performed.

Constructors

UnionCredits Credit

Instances

Instances details

Num UnionCredits Source #
Instance details Defined in ScheduledMerges Methods (+) :: UnionCredits -> UnionCredits -> UnionCredits # (-) :: UnionCredits -> UnionCredits -> UnionCredits # (*) :: UnionCredits -> UnionCredits -> UnionCredits # negate :: UnionCredits -> UnionCredits # abs :: UnionCredits -> UnionCredits # signum :: UnionCredits -> UnionCredits # fromInteger :: Integer -> UnionCredits #
Show UnionCredits Source #
Instance details Defined in ScheduledMerges Methods showsPrec :: Int -> UnionCredits -> ShowS # show :: UnionCredits -> String # showList :: [UnionCredits] -> ShowS #
Eq UnionCredits Source #
Instance details Defined in ScheduledMerges Methods (==) :: UnionCredits -> UnionCredits -> Bool # (/=) :: UnionCredits -> UnionCredits -> Bool #
Ord UnionCredits Source #
Instance details Defined in ScheduledMerges Methods compare :: UnionCredits -> UnionCredits -> Ordering # (<) :: UnionCredits -> UnionCredits -> Bool # (<=) :: UnionCredits -> UnionCredits -> Bool # (>) :: UnionCredits -> UnionCredits -> Bool # (>=) :: UnionCredits -> UnionCredits -> Bool # max :: UnionCredits -> UnionCredits -> UnionCredits # min :: UnionCredits -> UnionCredits -> UnionCredits #

supplyCreditsMergingTree :: Credit -> MergingTree s -> ST s Credit Source #

newtype UnionDebt Source #

The current upper bound on the number of UnionCredits that have to be supplied before a union is completed.

The union debt is the number of merging steps that need to be performed /at most/ until the delayed work of performing a union is completed. This includes the cost of completing merges that were part of the union's input tables.

Constructors

UnionDebt Debt

Instances

Instances details

Num UnionDebt Source #
Instance details Defined in ScheduledMerges Methods (+) :: UnionDebt -> UnionDebt -> UnionDebt # (-) :: UnionDebt -> UnionDebt -> UnionDebt # (*) :: UnionDebt -> UnionDebt -> UnionDebt # negate :: UnionDebt -> UnionDebt # abs :: UnionDebt -> UnionDebt # signum :: UnionDebt -> UnionDebt # fromInteger :: Integer -> UnionDebt #
Show UnionDebt Source #
Instance details Defined in ScheduledMerges Methods showsPrec :: Int -> UnionDebt -> ShowS # show :: UnionDebt -> String # showList :: [UnionDebt] -> ShowS #
Eq UnionDebt Source #
Instance details Defined in ScheduledMerges Methods (==) :: UnionDebt -> UnionDebt -> Bool # (/=) :: UnionDebt -> UnionDebt -> Bool #
Ord UnionDebt Source #
Instance details Defined in ScheduledMerges Methods compare :: UnionDebt -> UnionDebt -> Ordering # (<) :: UnionDebt -> UnionDebt -> Bool # (<=) :: UnionDebt -> UnionDebt -> Bool # (>) :: UnionDebt -> UnionDebt -> Bool # (>=) :: UnionDebt -> UnionDebt -> Bool # max :: UnionDebt -> UnionDebt -> UnionDebt # min :: UnionDebt -> UnionDebt -> UnionDebt #

remainingDebtMergingTree :: MergingTree s -> ST s (Debt, Size) Source #

mergek :: IsMergeType t => t -> [Run] -> Run Source #

mergeBatchSize :: Int Source #

Invariants

type Invariant s = ExceptT String (ST s) Source #

evalInvariant :: Invariant s a -> ST s (Either String a) Source #

treeInvariant :: MergingTree s -> Invariant s () Source #

mergeDebtInvariant :: MergeDebt -> MergeCredit -> Bool Source #

Run sizes

levelNumberToMaxRunSize :: HasCallStack => MergePolicyForLevel -> LSMConfig -> LevelNo -> Int Source #

Compute the maximum size of a run for a given level.

The size of a tiering run at each level is allowed to be bufferSize*sizeRatio^(level-1) < size <= bufferSize*sizeRatio^level.

>>> levelNumberToMaxRunSize LevelTiering (LSMConfig 2 4) <$> [0, 1, 2, 3, 4]
[0,2,8,32,128]

The size of a levelling run at each level is allowed to be bufferSize*sizeRatio^level < size <= bufferSize*sizeRatio^(level+1). A levelling run can take take up a whole level, so the maximum size of a run is sizeRatio tmes larger than the maximum size of a tiering run on the same level.

>>> levelNumberToMaxRunSize LevelLevelling (LSMConfig 2 4) <$> [0, 1, 2, 3, 4]
[0,8,32,128,512]

runSizeToLevelNumber :: HasCallStack => MergePolicyForLevel -> LSMConfig -> Int -> LevelNo Source #

Compute the appropriate level for the size of the given run.

See levelNumberToMaxRunSize for the bounds on (tiering or levelling) run sizes at each level.

>>> runSizeToLevelNumber LevelTiering (LSMConfig 2 4) <$> [0,2,8,32,128]
[0,1,2,3,4]

>>> runSizeToLevelNumber LevelLevelling (LSMConfig 2 4) <$> [0,8,32,128,512]
[0,1,2,3,4]

maxWriteBufferSize :: HasCallStack => LSMConfig -> Int Source #

runSizeFitsInLevel :: HasCallStack => MergePolicyForLevel -> LSMConfig -> LevelNo -> Int -> Bool Source #

Check wheter a run of the given size fits in the given level.

See levelNumberToMaxRunSize for the bounds on (tiering or levelling) run sizes at each level.

>>> runSizeFitsInLevel LevelTiering (LSMConfig 2 4) 3 <$> [8,9,16,32,33]
[False,True,True,True,False]

>>> runSizeFitsInLevel LevelLevelling (LSMConfig 2 4) 2 <$> [8,9,16,32,33]
[False,True,True,True,False]

runSizeTooSmallForLevel :: HasCallStack => MergePolicyForLevel -> LSMConfig -> LevelNo -> Int -> Bool Source #

Check wheter a run of the given size is too small for the given level.

See levelNumberToMaxRunSize for the bounds on (tiering or levelling) run sizes at each level.

>>> runSizeTooSmallForLevel LevelTiering (LSMConfig 2 4) 3 <$> [8,9]
[True,False]

>>> runSizeTooSmallForLevel LevelLevelling (LSMConfig 2 4) 2 <$> [8,9]
[True,False]

runSizeTooLargeForLevel :: HasCallStack => MergePolicyForLevel -> LSMConfig -> LevelNo -> Int -> Bool Source #

Check wheter a run of the given size is too large for the given level.

See levelNumberToMaxRunSize for the bounds on (tiering or levelling) run sizes at each level.

>>> runSizeTooLargeForLevel LevelTiering (LSMConfig 2 4) 2 <$> [8,9]
[False,True]

>>> runSizeTooLargeForLevel LevelLevelling (LSMConfig 2 4) 1 <$> [8,9]
[False,True]

Level capacity

levelIsFull :: MergePolicyForLevel -> LSMConfig -> LevelNo -> [Run] -> [Run] -> Bool Source #