{-# LANGUAGE CPP             #-}
{-# LANGUAGE MagicHash       #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE UnboxedTuples   #-}
{-# OPTIONS_HADDOCK not-home #-}

module Database.LSMTree.Internal.BloomFilterQuery1 (
  bloomQueries,
  RunIxKeyIx(RunIxKeyIx),
  RunIx, KeyIx,
) where

import           Data.Bits
import qualified Data.Primitive as P
import qualified Data.Vector as V
import qualified Data.Vector.Primitive as VP
import qualified Data.Vector.Primitive.Mutable as VPM
import           Data.Word (Word32)

import           Control.Exception (assert)
import           Control.Monad.ST (ST)

import           Data.BloomFilter (Bloom)
import qualified Data.BloomFilter as Bloom
import qualified Data.BloomFilter.Hash as Bloom

import           Database.LSMTree.Internal.Serialise (SerialisedKey)


-- Bulk query
-----------------------------------------------------------

type KeyIx = Int
type RunIx = Int

-- | A 'RunIxKeyIx' is a (compact) pair of a 'RunIx' and a 'KeyIx'.
--
-- We represent it as a 32bit word, using:
--
-- * 16 bits for the run\/filter index (MSB)
-- * 16 bits for the key index (LSB)
--
newtype RunIxKeyIx = MkRunIxKeyIx Word32
  deriving stock RunIxKeyIx -> RunIxKeyIx -> Bool
(RunIxKeyIx -> RunIxKeyIx -> Bool)
-> (RunIxKeyIx -> RunIxKeyIx -> Bool) -> Eq RunIxKeyIx
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: RunIxKeyIx -> RunIxKeyIx -> Bool
== :: RunIxKeyIx -> RunIxKeyIx -> Bool
$c/= :: RunIxKeyIx -> RunIxKeyIx -> Bool
/= :: RunIxKeyIx -> RunIxKeyIx -> Bool
Eq
#if MIN_VERSION_GLASGOW_HASKELL(9,0,0,0)
  deriving newtype Addr# -> Int# -> RunIxKeyIx
ByteArray# -> Int# -> RunIxKeyIx
Proxy RunIxKeyIx -> Int#
RunIxKeyIx -> Int#
(Proxy RunIxKeyIx -> Int#)
-> (RunIxKeyIx -> Int#)
-> (Proxy RunIxKeyIx -> Int#)
-> (RunIxKeyIx -> Int#)
-> (ByteArray# -> Int# -> RunIxKeyIx)
-> (forall s.
    MutableByteArray# s
    -> Int# -> State# s -> (# State# s, RunIxKeyIx #))
-> (forall s.
    MutableByteArray# s -> Int# -> RunIxKeyIx -> State# s -> State# s)
-> (forall s.
    MutableByteArray# s
    -> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s)
-> (Addr# -> Int# -> RunIxKeyIx)
-> (forall s.
    Addr# -> Int# -> State# s -> (# State# s, RunIxKeyIx #))
-> (forall s. Addr# -> Int# -> RunIxKeyIx -> State# s -> State# s)
-> (forall s.
    Addr# -> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s)
-> Prim RunIxKeyIx
forall s.
Addr# -> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s
forall s. Addr# -> Int# -> State# s -> (# State# s, RunIxKeyIx #)
forall s. Addr# -> Int# -> RunIxKeyIx -> State# s -> State# s
forall s.
MutableByteArray# s
-> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s
forall s.
MutableByteArray# s
-> Int# -> State# s -> (# State# s, RunIxKeyIx #)
forall s.
MutableByteArray# s -> Int# -> RunIxKeyIx -> State# s -> State# s
forall a.
(Proxy a -> Int#)
-> (a -> Int#)
-> (Proxy a -> Int#)
-> (a -> Int#)
-> (ByteArray# -> Int# -> a)
-> (forall s.
    MutableByteArray# s -> Int# -> State# s -> (# State# s, a #))
-> (forall s.
    MutableByteArray# s -> Int# -> a -> State# s -> State# s)
-> (forall s.
    MutableByteArray# s -> Int# -> Int# -> a -> State# s -> State# s)
-> (Addr# -> Int# -> a)
-> (forall s. Addr# -> Int# -> State# s -> (# State# s, a #))
-> (forall s. Addr# -> Int# -> a -> State# s -> State# s)
-> (forall s. Addr# -> Int# -> Int# -> a -> State# s -> State# s)
-> Prim a
$csizeOfType# :: Proxy RunIxKeyIx -> Int#
sizeOfType# :: Proxy RunIxKeyIx -> Int#
$csizeOf# :: RunIxKeyIx -> Int#
sizeOf# :: RunIxKeyIx -> Int#
$calignmentOfType# :: Proxy RunIxKeyIx -> Int#
alignmentOfType# :: Proxy RunIxKeyIx -> Int#
$calignment# :: RunIxKeyIx -> Int#
alignment# :: RunIxKeyIx -> Int#
$cindexByteArray# :: ByteArray# -> Int# -> RunIxKeyIx
indexByteArray# :: ByteArray# -> Int# -> RunIxKeyIx
$creadByteArray# :: forall s.
MutableByteArray# s
-> Int# -> State# s -> (# State# s, RunIxKeyIx #)
readByteArray# :: forall s.
MutableByteArray# s
-> Int# -> State# s -> (# State# s, RunIxKeyIx #)
$cwriteByteArray# :: forall s.
MutableByteArray# s -> Int# -> RunIxKeyIx -> State# s -> State# s
writeByteArray# :: forall s.
MutableByteArray# s -> Int# -> RunIxKeyIx -> State# s -> State# s
$csetByteArray# :: forall s.
MutableByteArray# s
-> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s
setByteArray# :: forall s.
MutableByteArray# s
-> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s
$cindexOffAddr# :: Addr# -> Int# -> RunIxKeyIx
indexOffAddr# :: Addr# -> Int# -> RunIxKeyIx
$creadOffAddr# :: forall s. Addr# -> Int# -> State# s -> (# State# s, RunIxKeyIx #)
readOffAddr# :: forall s. Addr# -> Int# -> State# s -> (# State# s, RunIxKeyIx #)
$cwriteOffAddr# :: forall s. Addr# -> Int# -> RunIxKeyIx -> State# s -> State# s
writeOffAddr# :: forall s. Addr# -> Int# -> RunIxKeyIx -> State# s -> State# s
$csetOffAddr# :: forall s.
Addr# -> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s
setOffAddr# :: forall s.
Addr# -> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s
P.Prim
#else
instance P.Prim RunIxKeyIx where
    sizeOfType# _ = 4#
    alignmentOfType# _ = 4#

    indexByteArray# ba i =
      MkRunIxKeyIx (P.indexByteArray# ba i)
    readByteArray# ba i s =
      case P.readByteArray# ba i s of
        (# s', w #) -> (# s', MkRunIxKeyIx w #)
    writeByteArray# ba i (MkRunIxKeyIx w) s =
      P.writeByteArray# ba i w s

    indexOffAddr# ba i =
      MkRunIxKeyIx (P.indexOffAddr# ba i)
    readOffAddr# ba i s =
      case P.readOffAddr# ba i s of
        (# s', w #) -> (# s', MkRunIxKeyIx w #)
    writeOffAddr# ba i (MkRunIxKeyIx w) s =
      P.writeOffAddr# ba i w s
#endif

pattern RunIxKeyIx :: RunIx -> KeyIx -> RunIxKeyIx
pattern $mRunIxKeyIx :: forall {r}. RunIxKeyIx -> (Int -> Int -> r) -> ((# #) -> r) -> r
$bRunIxKeyIx :: Int -> Int -> RunIxKeyIx
RunIxKeyIx r k <- (unpackRunIxKeyIx -> (r, k))
  where
    RunIxKeyIx Int
r Int
k = Int -> Int -> RunIxKeyIx
packRunIxKeyIx Int
r Int
k
#if MIN_VERSION_GLASGOW_HASKELL(9,0,0,0)
{-# INLINE RunIxKeyIx #-}
#endif
{-# COMPLETE RunIxKeyIx #-}

packRunIxKeyIx :: Int -> Int -> RunIxKeyIx
packRunIxKeyIx :: Int -> Int -> RunIxKeyIx
packRunIxKeyIx Int
r Int
k =
    Bool -> RunIxKeyIx -> RunIxKeyIx
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
r Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
r Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xffff
         Bool -> Bool -> Bool
&& Int
k Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
k Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xffff) (RunIxKeyIx -> RunIxKeyIx) -> RunIxKeyIx -> RunIxKeyIx
forall a b. (a -> b) -> a -> b
$
    Word32 -> RunIxKeyIx
MkRunIxKeyIx (Word32 -> RunIxKeyIx) -> Word32 -> RunIxKeyIx
forall a b. (a -> b) -> a -> b
$
      (Word -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral :: Word -> Word32) (Word -> Word32) -> Word -> Word32
forall a b. (a -> b) -> a -> b
$
        (Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
r Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
16)
     Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
k
{-# INLINE packRunIxKeyIx #-}

unpackRunIxKeyIx :: RunIxKeyIx -> (Int, Int)
unpackRunIxKeyIx :: RunIxKeyIx -> (Int, Int)
unpackRunIxKeyIx (MkRunIxKeyIx Word32
c) =
    ( Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
c Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
16)
    , Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
c Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.&. Word32
0xfff)
    )
{-# INLINE unpackRunIxKeyIx #-}

instance Show RunIxKeyIx where
  showsPrec :: Int -> RunIxKeyIx -> ShowS
showsPrec Int
_ (RunIxKeyIx Int
r Int
k) =
    String -> ShowS
showString String
"RunIxKeyIx " ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int -> ShowS
forall a. Show a => Int -> a -> ShowS
showsPrec Int
11 Int
r
              ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ShowS
showChar Char
' ' ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int -> ShowS
forall a. Show a => Int -> a -> ShowS
showsPrec Int
11 Int
k

type ResIx = Int -- Result index

-- | Perform a batch of bloom queries. The result is a tuple of indexes into the
-- vector of runs and vector of keys respectively.
--
-- The result vector can be of variable length. The initial estimate is 2x the
-- number of keys but this is grown if needed (using a doubling strategy).
--
bloomQueries ::
     V.Vector (Bloom SerialisedKey)
  -> V.Vector SerialisedKey
  -> VP.Vector RunIxKeyIx
bloomQueries :: Vector (Bloom SerialisedKey)
-> Vector SerialisedKey -> Vector RunIxKeyIx
bloomQueries !Vector (Bloom SerialisedKey)
blooms !Vector SerialisedKey
ks
  | Int
rsN Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 Bool -> Bool -> Bool
|| Int
ksN Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Vector RunIxKeyIx
forall a. Prim a => Vector a
VP.empty
  | Bool
otherwise            = (forall s. ST s (MVector s RunIxKeyIx)) -> Vector RunIxKeyIx
forall a. Prim a => (forall s. ST s (MVector s a)) -> Vector a
VP.create ((forall s. ST s (MVector s RunIxKeyIx)) -> Vector RunIxKeyIx)
-> (forall s. ST s (MVector s RunIxKeyIx)) -> Vector RunIxKeyIx
forall a b. (a -> b) -> a -> b
$ do
      MVector s RunIxKeyIx
res <- Int -> ST s (MVector (PrimState (ST s)) RunIxKeyIx)
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
Int -> m (MVector (PrimState m) a)
VPM.unsafeNew (Vector SerialisedKey -> Int
forall a. Vector a -> Int
V.length Vector SerialisedKey
ks Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
2)
      MVector s RunIxKeyIx -> Int -> Int -> ST s (MVector s RunIxKeyIx)
forall s.
MVector s RunIxKeyIx -> Int -> Int -> ST s (MVector s RunIxKeyIx)
loop1 MVector s RunIxKeyIx
res Int
0 Int
0
  where
    !rsN :: Int
rsN = Vector (Bloom SerialisedKey) -> Int
forall a. Vector a -> Int
V.length Vector (Bloom SerialisedKey)
blooms
    !ksN :: Int
ksN = Vector SerialisedKey -> Int
forall a. Vector a -> Int
V.length Vector SerialisedKey
ks

    hs :: VP.Vector (Bloom.CheapHashes SerialisedKey)
    !hs :: Vector (CheapHashes SerialisedKey)
hs  = Int
-> (Int -> CheapHashes SerialisedKey)
-> Vector (CheapHashes SerialisedKey)
forall a. Prim a => Int -> (Int -> a) -> Vector a
VP.generate Int
ksN ((Int -> CheapHashes SerialisedKey)
 -> Vector (CheapHashes SerialisedKey))
-> (Int -> CheapHashes SerialisedKey)
-> Vector (CheapHashes SerialisedKey)
forall a b. (a -> b) -> a -> b
$ \Int
i -> SerialisedKey -> CheapHashes SerialisedKey
forall a. Hashable a => a -> CheapHashes a
Bloom.makeCheapHashes (Vector SerialisedKey -> Int -> SerialisedKey
forall a. Vector a -> Int -> a
V.unsafeIndex Vector SerialisedKey
ks Int
i)

    -- Loop over all run indexes
    loop1 ::
         VPM.MVector s RunIxKeyIx
      -> ResIx
      -> RunIx
      -> ST s (VPM.MVector s RunIxKeyIx)
    loop1 :: forall s.
MVector s RunIxKeyIx -> Int -> Int -> ST s (MVector s RunIxKeyIx)
loop1 !MVector s RunIxKeyIx
res1 !Int
resix1 !Int
rix
      | Int
rix Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
rsN = MVector s RunIxKeyIx -> ST s (MVector s RunIxKeyIx)
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (MVector s RunIxKeyIx -> ST s (MVector s RunIxKeyIx))
-> MVector s RunIxKeyIx -> ST s (MVector s RunIxKeyIx)
forall a b. (a -> b) -> a -> b
$ Int -> Int -> MVector s RunIxKeyIx -> MVector s RunIxKeyIx
forall a s. Prim a => Int -> Int -> MVector s a -> MVector s a
VPM.slice Int
0 Int
resix1 MVector s RunIxKeyIx
res1
      | Bool
otherwise
      = do
          (MVector s RunIxKeyIx
res1', Int
resix1') <- MVector s RunIxKeyIx
-> Int
-> Int
-> Bloom SerialisedKey
-> ST s (MVector s RunIxKeyIx, Int)
forall s.
MVector s RunIxKeyIx
-> Int
-> Int
-> Bloom SerialisedKey
-> ST s (MVector s RunIxKeyIx, Int)
loop2 MVector s RunIxKeyIx
res1 Int
resix1 Int
0 (Vector (Bloom SerialisedKey)
blooms Vector (Bloom SerialisedKey) -> Int -> Bloom SerialisedKey
forall a. Vector a -> Int -> a
`V.unsafeIndex` Int
rix)
          MVector s RunIxKeyIx -> Int -> Int -> ST s (MVector s RunIxKeyIx)
forall s.
MVector s RunIxKeyIx -> Int -> Int -> ST s (MVector s RunIxKeyIx)
loop1 MVector s RunIxKeyIx
res1' Int
resix1' (Int
rixInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
      where
        -- Loop over all key indexes
        loop2 ::
             VPM.MVector s RunIxKeyIx
          -> ResIx
          -> KeyIx
          -> Bloom SerialisedKey
          -> ST s (VPM.MVector s RunIxKeyIx, ResIx)
        loop2 :: forall s.
MVector s RunIxKeyIx
-> Int
-> Int
-> Bloom SerialisedKey
-> ST s (MVector s RunIxKeyIx, Int)
loop2 !MVector s RunIxKeyIx
res2 !Int
resix2 !Int
kix !Bloom SerialisedKey
b
          | Int
kix Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
ksN = (MVector s RunIxKeyIx, Int) -> ST s (MVector s RunIxKeyIx, Int)
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (MVector s RunIxKeyIx
res2, Int
resix2)
          | let !h :: CheapHashes SerialisedKey
h = Vector (CheapHashes SerialisedKey)
hs Vector (CheapHashes SerialisedKey)
-> Int -> CheapHashes SerialisedKey
forall a. Prim a => Vector a -> Int -> a
`VP.unsafeIndex` Int
kix
          , CheapHashes SerialisedKey -> Bloom SerialisedKey -> Bool
forall (h :: * -> *) a. Hashes h => h a -> Bloom' h a -> Bool
Bloom.elemHashes CheapHashes SerialisedKey
h Bloom SerialisedKey
b = do
              -- Double the vector if we've reached the end.
              -- Note unsafeGrow takes the number to grow by, not the new size.
              MVector s RunIxKeyIx
res2' <- if Int
resix2 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== MVector s RunIxKeyIx -> Int
forall a s. Prim a => MVector s a -> Int
VPM.length MVector s RunIxKeyIx
res2
                        then MVector (PrimState (ST s)) RunIxKeyIx
-> Int -> ST s (MVector (PrimState (ST s)) RunIxKeyIx)
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
MVector (PrimState m) a -> Int -> m (MVector (PrimState m) a)
VPM.unsafeGrow MVector s RunIxKeyIx
MVector (PrimState (ST s)) RunIxKeyIx
res2 (MVector s RunIxKeyIx -> Int
forall a s. Prim a => MVector s a -> Int
VPM.length MVector s RunIxKeyIx
res2)
                        else MVector s RunIxKeyIx -> ST s (MVector s RunIxKeyIx)
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure MVector s RunIxKeyIx
res2
              MVector (PrimState (ST s)) RunIxKeyIx
-> Int -> RunIxKeyIx -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VPM.unsafeWrite MVector s RunIxKeyIx
MVector (PrimState (ST s)) RunIxKeyIx
res2' Int
resix2 (Int -> Int -> RunIxKeyIx
RunIxKeyIx Int
rix Int
kix)
              MVector s RunIxKeyIx
-> Int
-> Int
-> Bloom SerialisedKey
-> ST s (MVector s RunIxKeyIx, Int)
forall s.
MVector s RunIxKeyIx
-> Int
-> Int
-> Bloom SerialisedKey
-> ST s (MVector s RunIxKeyIx, Int)
loop2 MVector s RunIxKeyIx
res2' (Int
resix2Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) (Int
kixInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Bloom SerialisedKey
b
          | Bool
otherwise = MVector s RunIxKeyIx
-> Int
-> Int
-> Bloom SerialisedKey
-> ST s (MVector s RunIxKeyIx, Int)
forall s.
MVector s RunIxKeyIx
-> Int
-> Int
-> Bloom SerialisedKey
-> ST s (MVector s RunIxKeyIx, Int)
loop2 MVector s RunIxKeyIx
res2 Int
resix2 (Int
kixInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Bloom SerialisedKey
b