{-# LANGUAGE CPP             #-}
{-# LANGUAGE MagicHash       #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE UnboxedTuples   #-}
{-# OPTIONS_HADDOCK not-home #-}

module Database.LSMTree.Internal.BloomFilter (
  -- * Types
  Bloom.Bloom,
  Bloom.MBloom,

  -- * Bulk query
  bloomQueries,
  RunIxKeyIx(RunIxKeyIx),
  RunIx, KeyIx,

  -- * Serialisation
  bloomFilterVersion,
  bloomFilterToLBS,
  bloomFilterFromFile,
) where

import           Data.Bits
import qualified Data.ByteString as BS
import qualified Data.ByteString.Builder.Extra as B
import qualified Data.ByteString.Lazy as LBS
import qualified Data.Primitive as P
import qualified Data.Vector as V
import qualified Data.Vector.Primitive as VP
import           Data.Word (Word32, Word64, byteSwap32)

import           Control.Exception (assert)
import           Control.Monad (void, when)
import           Control.Monad.Class.MonadThrow
import           Control.Monad.Primitive (PrimMonad)
import           Control.Monad.ST (ST, runST)
import           System.FS.API

import           Data.BloomFilter.Blocked (Bloom)
import qualified Data.BloomFilter.Blocked as Bloom
import           Database.LSMTree.Internal.ByteString (byteArrayToByteString)
import           Database.LSMTree.Internal.CRC32C (FileCorruptedError (..),
                     FileFormat (..))
import           Database.LSMTree.Internal.Serialise (SerialisedKey)
import qualified Database.LSMTree.Internal.Vector as P

#ifdef HAVE_STRICT_ARRAY
import qualified Database.LSMTree.Internal.StrictArray as P
#endif

import           Prelude hiding (filter)

-- Bulk query
-----------------------------------------------------------

type KeyIx = Int
type RunIx = Int

-- | A 'RunIxKeyIx' is a (compact) pair of a 'RunIx' and a 'KeyIx'.
--
-- We represent it as a 32bit word, using:
--
-- * 16 bits for the run\/filter index (MSB)
-- * 16 bits for the key index (LSB)
--
newtype RunIxKeyIx = MkRunIxKeyIx Word32
  deriving stock RunIxKeyIx -> RunIxKeyIx -> Bool
(RunIxKeyIx -> RunIxKeyIx -> Bool)
-> (RunIxKeyIx -> RunIxKeyIx -> Bool) -> Eq RunIxKeyIx
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: RunIxKeyIx -> RunIxKeyIx -> Bool
== :: RunIxKeyIx -> RunIxKeyIx -> Bool
$c/= :: RunIxKeyIx -> RunIxKeyIx -> Bool
/= :: RunIxKeyIx -> RunIxKeyIx -> Bool
Eq
  deriving newtype Addr# -> Int# -> RunIxKeyIx
ByteArray# -> Int# -> RunIxKeyIx
Proxy RunIxKeyIx -> Int#
RunIxKeyIx -> Int#
(Proxy RunIxKeyIx -> Int#)
-> (RunIxKeyIx -> Int#)
-> (Proxy RunIxKeyIx -> Int#)
-> (RunIxKeyIx -> Int#)
-> (ByteArray# -> Int# -> RunIxKeyIx)
-> (forall s.
    MutableByteArray# s
    -> Int# -> State# s -> (# State# s, RunIxKeyIx #))
-> (forall s.
    MutableByteArray# s -> Int# -> RunIxKeyIx -> State# s -> State# s)
-> (forall s.
    MutableByteArray# s
    -> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s)
-> (Addr# -> Int# -> RunIxKeyIx)
-> (forall s.
    Addr# -> Int# -> State# s -> (# State# s, RunIxKeyIx #))
-> (forall s. Addr# -> Int# -> RunIxKeyIx -> State# s -> State# s)
-> (forall s.
    Addr# -> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s)
-> Prim RunIxKeyIx
forall s.
Addr# -> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s
forall s. Addr# -> Int# -> State# s -> (# State# s, RunIxKeyIx #)
forall s. Addr# -> Int# -> RunIxKeyIx -> State# s -> State# s
forall s.
MutableByteArray# s
-> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s
forall s.
MutableByteArray# s
-> Int# -> State# s -> (# State# s, RunIxKeyIx #)
forall s.
MutableByteArray# s -> Int# -> RunIxKeyIx -> State# s -> State# s
forall a.
(Proxy a -> Int#)
-> (a -> Int#)
-> (Proxy a -> Int#)
-> (a -> Int#)
-> (ByteArray# -> Int# -> a)
-> (forall s.
    MutableByteArray# s -> Int# -> State# s -> (# State# s, a #))
-> (forall s.
    MutableByteArray# s -> Int# -> a -> State# s -> State# s)
-> (forall s.
    MutableByteArray# s -> Int# -> Int# -> a -> State# s -> State# s)
-> (Addr# -> Int# -> a)
-> (forall s. Addr# -> Int# -> State# s -> (# State# s, a #))
-> (forall s. Addr# -> Int# -> a -> State# s -> State# s)
-> (forall s. Addr# -> Int# -> Int# -> a -> State# s -> State# s)
-> Prim a
$csizeOfType# :: Proxy RunIxKeyIx -> Int#
sizeOfType# :: Proxy RunIxKeyIx -> Int#
$csizeOf# :: RunIxKeyIx -> Int#
sizeOf# :: RunIxKeyIx -> Int#
$calignmentOfType# :: Proxy RunIxKeyIx -> Int#
alignmentOfType# :: Proxy RunIxKeyIx -> Int#
$calignment# :: RunIxKeyIx -> Int#
alignment# :: RunIxKeyIx -> Int#
$cindexByteArray# :: ByteArray# -> Int# -> RunIxKeyIx
indexByteArray# :: ByteArray# -> Int# -> RunIxKeyIx
$creadByteArray# :: forall s.
MutableByteArray# s
-> Int# -> State# s -> (# State# s, RunIxKeyIx #)
readByteArray# :: forall s.
MutableByteArray# s
-> Int# -> State# s -> (# State# s, RunIxKeyIx #)
$cwriteByteArray# :: forall s.
MutableByteArray# s -> Int# -> RunIxKeyIx -> State# s -> State# s
writeByteArray# :: forall s.
MutableByteArray# s -> Int# -> RunIxKeyIx -> State# s -> State# s
$csetByteArray# :: forall s.
MutableByteArray# s
-> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s
setByteArray# :: forall s.
MutableByteArray# s
-> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s
$cindexOffAddr# :: Addr# -> Int# -> RunIxKeyIx
indexOffAddr# :: Addr# -> Int# -> RunIxKeyIx
$creadOffAddr# :: forall s. Addr# -> Int# -> State# s -> (# State# s, RunIxKeyIx #)
readOffAddr# :: forall s. Addr# -> Int# -> State# s -> (# State# s, RunIxKeyIx #)
$cwriteOffAddr# :: forall s. Addr# -> Int# -> RunIxKeyIx -> State# s -> State# s
writeOffAddr# :: forall s. Addr# -> Int# -> RunIxKeyIx -> State# s -> State# s
$csetOffAddr# :: forall s.
Addr# -> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s
setOffAddr# :: forall s.
Addr# -> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s
P.Prim

pattern RunIxKeyIx :: RunIx -> KeyIx -> RunIxKeyIx
pattern $mRunIxKeyIx :: forall {r}. RunIxKeyIx -> (Int -> Int -> r) -> ((# #) -> r) -> r
$bRunIxKeyIx :: Int -> Int -> RunIxKeyIx
RunIxKeyIx r k <- (unpackRunIxKeyIx -> (r, k))
  where
    RunIxKeyIx Int
r Int
k = Int -> Int -> RunIxKeyIx
packRunIxKeyIx Int
r Int
k
{-# INLINE RunIxKeyIx #-}
{-# COMPLETE RunIxKeyIx #-}

packRunIxKeyIx :: Int -> Int -> RunIxKeyIx
packRunIxKeyIx :: Int -> Int -> RunIxKeyIx
packRunIxKeyIx Int
r Int
k =
    Bool -> RunIxKeyIx -> RunIxKeyIx
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
r Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
r Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xffff
         Bool -> Bool -> Bool
&& Int
k Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
k Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xffff) (RunIxKeyIx -> RunIxKeyIx) -> RunIxKeyIx -> RunIxKeyIx
forall a b. (a -> b) -> a -> b
$
    Word32 -> RunIxKeyIx
MkRunIxKeyIx (Word32 -> RunIxKeyIx) -> Word32 -> RunIxKeyIx
forall a b. (a -> b) -> a -> b
$
      (Word -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral :: Word -> Word32) (Word -> Word32) -> Word -> Word32
forall a b. (a -> b) -> a -> b
$
        (Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
r Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
16)
     Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
k
{-# INLINE packRunIxKeyIx #-}

unpackRunIxKeyIx :: RunIxKeyIx -> (Int, Int)
unpackRunIxKeyIx :: RunIxKeyIx -> (Int, Int)
unpackRunIxKeyIx (MkRunIxKeyIx Word32
c) =
    ( Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
c Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
16)
    , Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
c Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.&. Word32
0xfff)
    )
{-# INLINE unpackRunIxKeyIx #-}

instance Show RunIxKeyIx where
  showsPrec :: Int -> RunIxKeyIx -> ShowS
showsPrec Int
_ (RunIxKeyIx Int
r Int
k) =
    String -> ShowS
showString String
"RunIxKeyIx " ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int -> ShowS
forall a. Show a => Int -> a -> ShowS
showsPrec Int
11 Int
r
              ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ShowS
showChar Char
' ' ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int -> ShowS
forall a. Show a => Int -> a -> ShowS
showsPrec Int
11 Int
k

type ResIx = Int -- Result index

{-# NOINLINE bloomQueries #-}
-- | Perform a batch of bloom queries. The result is a tuple of indexes into the
-- vector of runs and vector of keys respectively. The order of keys and
-- runs\/filters in the input is maintained in the output. This implementation
-- produces results in key-major order.
--
-- The result vector can be of variable length. The initial estimate is 2x the
-- number of keys but this is grown if needed (using a doubling strategy).
--
bloomQueries ::
     V.Vector (Bloom SerialisedKey)
  -> V.Vector SerialisedKey
  -> VP.Vector RunIxKeyIx
bloomQueries :: Vector (Bloom SerialisedKey)
-> Vector SerialisedKey -> Vector RunIxKeyIx
bloomQueries !Vector (Bloom SerialisedKey)
filters !Vector SerialisedKey
keys
  | Vector (Bloom SerialisedKey) -> Bool
forall a. Vector a -> Bool
V.null Vector (Bloom SerialisedKey)
filters Bool -> Bool -> Bool
|| Vector SerialisedKey -> Bool
forall a. Vector a -> Bool
V.null Vector SerialisedKey
keys = Vector RunIxKeyIx
forall a. Prim a => Vector a
VP.empty
bloomQueries !Vector (Bloom SerialisedKey)
filters !Vector SerialisedKey
keys =
    (forall s. ST s (Vector RunIxKeyIx)) -> Vector RunIxKeyIx
forall a. (forall s. ST s a) -> a
runST (BloomFilters
-> PrimArray (Hashes SerialisedKey) -> ST s (Vector RunIxKeyIx)
forall s.
BloomFilters
-> PrimArray (Hashes SerialisedKey) -> ST s (Vector RunIxKeyIx)
bloomQueries_loop1 BloomFilters
filters' PrimArray (Hashes SerialisedKey)
keyhashes)
  where
    filters' :: BloomFilters
filters'  = Vector (Bloom SerialisedKey) -> BloomFilters
toFiltersArray Vector (Bloom SerialisedKey)
filters
    keyhashes :: PrimArray (Hashes SerialisedKey)
keyhashes = Int
-> (Int -> Hashes SerialisedKey)
-> PrimArray (Hashes SerialisedKey)
forall a. Prim a => Int -> (Int -> a) -> PrimArray a
P.generatePrimArray (Vector SerialisedKey -> Int
forall a. Vector a -> Int
V.length Vector SerialisedKey
keys) ((Int -> Hashes SerialisedKey) -> PrimArray (Hashes SerialisedKey))
-> (Int -> Hashes SerialisedKey)
-> PrimArray (Hashes SerialisedKey)
forall a b. (a -> b) -> a -> b
$ \Int
i ->
                  SerialisedKey -> Hashes SerialisedKey
forall a. Hashable a => a -> Hashes a
Bloom.hashes (Vector SerialisedKey -> Int -> SerialisedKey
forall a. Vector a -> Int -> a
V.unsafeIndex Vector SerialisedKey
keys Int
i)

-- loop over all keys
bloomQueries_loop1 ::
     BloomFilters
  -> P.PrimArray (Bloom.Hashes SerialisedKey)
  -> ST s (VP.Vector RunIxKeyIx)
bloomQueries_loop1 :: forall s.
BloomFilters
-> PrimArray (Hashes SerialisedKey) -> ST s (Vector RunIxKeyIx)
bloomQueries_loop1 !BloomFilters
filters !PrimArray (Hashes SerialisedKey)
keyhashes = do
    MutablePrimArray s RunIxKeyIx
res <- Int -> ST s (MutablePrimArray (PrimState (ST s)) RunIxKeyIx)
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
Int -> m (MutablePrimArray (PrimState m) a)
P.newPrimArray (PrimArray (Hashes SerialisedKey) -> Int
forall a. Prim a => PrimArray a -> Int
P.sizeofPrimArray PrimArray (Hashes SerialisedKey)
keyhashes Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
2)
    (MutablePrimArray s RunIxKeyIx
res', Int
resix') <- MutablePrimArray s RunIxKeyIx
-> Int -> Int -> ST s (MutablePrimArray s RunIxKeyIx, Int)
go MutablePrimArray s RunIxKeyIx
res Int
0 Int
0
    PrimArray RunIxKeyIx
parr <- MutablePrimArray s RunIxKeyIx -> ST s (PrimArray RunIxKeyIx)
MutablePrimArray (PrimState (ST s)) RunIxKeyIx
-> ST s (PrimArray RunIxKeyIx)
forall (m :: * -> *) a.
PrimMonad m =>
MutablePrimArray (PrimState m) a -> m (PrimArray a)
P.unsafeFreezePrimArray (MutablePrimArray s RunIxKeyIx -> ST s (PrimArray RunIxKeyIx))
-> ST s (MutablePrimArray s RunIxKeyIx)
-> ST s (PrimArray RunIxKeyIx)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< MutablePrimArray (PrimState (ST s)) RunIxKeyIx
-> Int -> ST s (MutablePrimArray (PrimState (ST s)) RunIxKeyIx)
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
MutablePrimArray (PrimState m) a
-> Int -> m (MutablePrimArray (PrimState m) a)
P.resizeMutablePrimArray MutablePrimArray s RunIxKeyIx
MutablePrimArray (PrimState (ST s)) RunIxKeyIx
res' Int
resix'
    Vector RunIxKeyIx -> ST s (Vector RunIxKeyIx)
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Vector RunIxKeyIx -> ST s (Vector RunIxKeyIx))
-> Vector RunIxKeyIx -> ST s (Vector RunIxKeyIx)
forall a b. (a -> b) -> a -> b
$! PrimArray RunIxKeyIx -> Vector RunIxKeyIx
forall a. Prim a => PrimArray a -> Vector a
P.primArrayToPrimVector PrimArray RunIxKeyIx
parr
  where
    go :: MutablePrimArray s RunIxKeyIx
-> Int -> Int -> ST s (MutablePrimArray s RunIxKeyIx, Int)
go !MutablePrimArray s RunIxKeyIx
res !Int
resix !Int
kix
      | Int
kix Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== PrimArray (Hashes SerialisedKey) -> Int
forall a. Prim a => PrimArray a -> Int
P.sizeofPrimArray PrimArray (Hashes SerialisedKey)
keyhashes = (MutablePrimArray s RunIxKeyIx, Int)
-> ST s (MutablePrimArray s RunIxKeyIx, Int)
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (MutablePrimArray s RunIxKeyIx
res, Int
resix)
      | Bool
otherwise = do
        let !keyhash :: Hashes SerialisedKey
keyhash = PrimArray (Hashes SerialisedKey) -> Int -> Hashes SerialisedKey
forall a. Prim a => PrimArray a -> Int -> a
P.indexPrimArray PrimArray (Hashes SerialisedKey)
keyhashes Int
kix
        BloomFilters -> Hashes SerialisedKey -> Int -> ST s ()
forall s. BloomFilters -> Hashes SerialisedKey -> Int -> ST s ()
bloomQueries_loop2_prefetch BloomFilters
filters Hashes SerialisedKey
keyhash Int
0
        (MutablePrimArray s RunIxKeyIx
res', Int
resix') <- BloomFilters
-> Hashes SerialisedKey
-> Int
-> MutablePrimArray s RunIxKeyIx
-> Int
-> Int
-> ST s (MutablePrimArray s RunIxKeyIx, Int)
forall s.
BloomFilters
-> Hashes SerialisedKey
-> Int
-> MutablePrimArray s RunIxKeyIx
-> Int
-> Int
-> ST s (MutablePrimArray s RunIxKeyIx, Int)
bloomQueries_loop2 BloomFilters
filters Hashes SerialisedKey
keyhash Int
kix MutablePrimArray s RunIxKeyIx
res Int
resix Int
0
        MutablePrimArray s RunIxKeyIx
-> Int -> Int -> ST s (MutablePrimArray s RunIxKeyIx, Int)
go MutablePrimArray s RunIxKeyIx
res' Int
resix' (Int
kixInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)

-- loop over all filters
bloomQueries_loop2 ::
     BloomFilters
  -> Bloom.Hashes SerialisedKey
  -> KeyIx
  -> P.MutablePrimArray s RunIxKeyIx
  -> ResIx
  -> RunIx
  -> ST s (P.MutablePrimArray s RunIxKeyIx, ResIx)
bloomQueries_loop2 :: forall s.
BloomFilters
-> Hashes SerialisedKey
-> Int
-> MutablePrimArray s RunIxKeyIx
-> Int
-> Int
-> ST s (MutablePrimArray s RunIxKeyIx, Int)
bloomQueries_loop2 !BloomFilters
filters !Hashes SerialisedKey
keyhash !Int
kix = MutablePrimArray s RunIxKeyIx
-> Int -> Int -> ST s (MutablePrimArray s RunIxKeyIx, Int)
go
  where
    go :: MutablePrimArray s RunIxKeyIx
-> Int -> Int -> ST s (MutablePrimArray s RunIxKeyIx, Int)
go MutablePrimArray s RunIxKeyIx
res2 Int
resix2 Int
rix
      | Int
rix Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== BloomFilters -> Int
lengthFiltersArray BloomFilters
filters = (MutablePrimArray s RunIxKeyIx, Int)
-> ST s (MutablePrimArray s RunIxKeyIx, Int)
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (MutablePrimArray s RunIxKeyIx
res2, Int
resix2)
      | let !filter :: Bloom SerialisedKey
filter = BloomFilters -> Int -> Bloom SerialisedKey
indexFiltersArray BloomFilters
filters Int
rix
      , Bloom SerialisedKey -> Hashes SerialisedKey -> Bool
forall a. Bloom a -> Hashes a -> Bool
Bloom.elemHashes Bloom SerialisedKey
filter Hashes SerialisedKey
keyhash = do
          MutablePrimArray (PrimState (ST s)) RunIxKeyIx
-> Int -> RunIxKeyIx -> ST s ()
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
MutablePrimArray (PrimState m) a -> Int -> a -> m ()
P.writePrimArray MutablePrimArray s RunIxKeyIx
MutablePrimArray (PrimState (ST s)) RunIxKeyIx
res2 Int
resix2 (Int -> Int -> RunIxKeyIx
RunIxKeyIx Int
rix Int
kix)
          Int
ressz2 <- MutablePrimArray (PrimState (ST s)) RunIxKeyIx -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
MutablePrimArray (PrimState m) a -> m Int
P.getSizeofMutablePrimArray MutablePrimArray s RunIxKeyIx
MutablePrimArray (PrimState (ST s)) RunIxKeyIx
res2
          MutablePrimArray s RunIxKeyIx
res2'  <- if Int
resix2Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
ressz2
                     then MutablePrimArray s RunIxKeyIx
-> ST s (MutablePrimArray s RunIxKeyIx)
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure MutablePrimArray s RunIxKeyIx
res2
                     else MutablePrimArray (PrimState (ST s)) RunIxKeyIx
-> Int -> ST s (MutablePrimArray (PrimState (ST s)) RunIxKeyIx)
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
MutablePrimArray (PrimState m) a
-> Int -> m (MutablePrimArray (PrimState m) a)
P.resizeMutablePrimArray MutablePrimArray s RunIxKeyIx
MutablePrimArray (PrimState (ST s)) RunIxKeyIx
res2 (Int
ressz2 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
2)
          MutablePrimArray s RunIxKeyIx
-> Int -> Int -> ST s (MutablePrimArray s RunIxKeyIx, Int)
go MutablePrimArray s RunIxKeyIx
res2' (Int
resix2Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) (Int
rixInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)

      | Bool
otherwise =
          MutablePrimArray s RunIxKeyIx
-> Int -> Int -> ST s (MutablePrimArray s RunIxKeyIx, Int)
go MutablePrimArray s RunIxKeyIx
res2 Int
resix2 (Int
rixInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)

bloomQueries_loop2_prefetch ::
     BloomFilters
  -> Bloom.Hashes SerialisedKey
  -> RunIx
  -> ST s ()
bloomQueries_loop2_prefetch :: forall s. BloomFilters -> Hashes SerialisedKey -> Int -> ST s ()
bloomQueries_loop2_prefetch !BloomFilters
filters !Hashes SerialisedKey
keyhash = Int -> ST s ()
go
  where
    go :: Int -> ST s ()
go !Int
rix
      | Int
rix Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== BloomFilters -> Int
lengthFiltersArray BloomFilters
filters = () -> ST s ()
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
      | Bool
otherwise  = do
          let !filter :: Bloom SerialisedKey
filter = BloomFilters -> Int -> Bloom SerialisedKey
indexFiltersArray BloomFilters
filters Int
rix
          Bloom SerialisedKey -> Hashes SerialisedKey -> ST s ()
forall a s. Bloom a -> Hashes a -> ST s ()
Bloom.prefetchElem Bloom SerialisedKey
filter Hashes SerialisedKey
keyhash
          Int -> ST s ()
go (Int
rixInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)

type BloomFilters =
#ifdef HAVE_STRICT_ARRAY
  P.StrictArray (Bloom SerialisedKey)
#else
  V.Vector (Bloom SerialisedKey)
#endif

{-# INLINE toFiltersArray #-}
toFiltersArray :: V.Vector (Bloom SerialisedKey) -> BloomFilters

{-# INLINE indexFiltersArray #-}
indexFiltersArray :: BloomFilters -> Int -> Bloom SerialisedKey

{-# INLINE lengthFiltersArray #-}
lengthFiltersArray :: BloomFilters -> Int

#ifdef HAVE_STRICT_ARRAY
toFiltersArray :: Vector (Bloom SerialisedKey) -> BloomFilters
toFiltersArray     = Vector (Bloom SerialisedKey) -> BloomFilters
forall a. Vector a -> StrictArray a
P.vectorToStrictArray
indexFiltersArray :: BloomFilters -> Int -> Bloom SerialisedKey
indexFiltersArray  = BloomFilters -> Int -> Bloom SerialisedKey
forall a. StrictArray a -> Int -> a
P.indexStrictArray
lengthFiltersArray :: BloomFilters -> Int
lengthFiltersArray = BloomFilters -> Int
forall a. StrictArray a -> Int
P.sizeofStrictArray
#else
toFiltersArray     = id
indexFiltersArray  = V.unsafeIndex
lengthFiltersArray = V.length
#endif


-- serialising
-----------------------------------------------------------

-- | By writing out the version in host endianness, we also indicate endianness.
-- During deserialisation, we would discover an endianness mismatch.
--
-- We base our version number on the 'Bloom.formatVersion' from the @bloomfilter@
-- library, plus our own version here. This accounts both for changes in the
-- format code here, and changes in the library.
--
bloomFilterVersion :: Word32
bloomFilterVersion :: Word32
bloomFilterVersion = Word32
1 Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
Bloom.formatVersion

bloomFilterToLBS :: Bloom a -> LBS.ByteString
bloomFilterToLBS :: forall a. Bloom a -> ByteString
bloomFilterToLBS Bloom a
bf =
    let (BloomSize
size, ByteArray
ba, Int
off, Int
len) = Bloom a -> (BloomSize, ByteArray, Int, Int)
forall a. Bloom a -> (BloomSize, ByteArray, Int, Int)
Bloom.serialise Bloom a
bf
     in BloomSize -> ByteString
header BloomSize
size ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteArray -> Int -> Int -> ByteString
byteArrayToLBS ByteArray
ba Int
off Int
len
  where
    header :: BloomSize -> ByteString
header Bloom.BloomSize { Int
sizeBits :: Int
sizeBits :: BloomSize -> Int
sizeBits, Int
sizeHashes :: Int
sizeHashes :: BloomSize -> Int
sizeHashes } =
        -- creates a single 16 byte chunk
        AllocationStrategy -> ByteString -> Builder -> ByteString
B.toLazyByteStringWith (Int -> Int -> AllocationStrategy
B.safeStrategy Int
16 Int
B.smallChunkSize) ByteString
forall a. Monoid a => a
mempty (Builder -> ByteString) -> Builder -> ByteString
forall a b. (a -> b) -> a -> b
$
             Word32 -> Builder
B.word32Host Word32
bloomFilterVersion
          Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Word32 -> Builder
B.word32Host (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
sizeHashes)
          Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Word64 -> Builder
B.word64Host (Int -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
sizeBits)

    byteArrayToLBS :: P.ByteArray -> Int -> Int -> LBS.ByteString
    byteArrayToLBS :: ByteArray -> Int -> Int -> ByteString
byteArrayToLBS ByteArray
ba Int
off Int
len =
      ByteString -> ByteString
LBS.fromStrict (Int -> Int -> ByteArray -> ByteString
byteArrayToByteString Int
off Int
len ByteArray
ba)

-- deserialising
-----------------------------------------------------------

{-# SPECIALISE bloomFilterFromFile ::
     HasFS IO h
  -> Handle h
  -> IO (Bloom a) #-}
-- | Read a 'Bloom' from a file.
--
bloomFilterFromFile ::
     (PrimMonad m, MonadCatch m)
  => HasFS m h
  -> Handle h  -- ^ The open file, in read mode
  -> m (Bloom a)
bloomFilterFromFile :: forall (m :: * -> *) h a.
(PrimMonad m, MonadCatch m) =>
HasFS m h -> Handle h -> m (Bloom a)
bloomFilterFromFile HasFS m h
hfs Handle h
h = do
    ByteArray
header <- String -> m ByteArray -> m ByteArray
forall {m :: * -> *} {a}. MonadCatch m => String -> m a -> m a
rethrowEOFError String
"Doesn't contain a header" (m ByteArray -> m ByteArray) -> m ByteArray -> m ByteArray
forall a b. (a -> b) -> a -> b
$
              HasFS m h -> Handle h -> Int -> m ByteArray
forall (m :: * -> *) h.
(PrimMonad m, MonadThrow m) =>
HasFS m h -> Handle h -> Int -> m ByteArray
hGetByteArrayExactly HasFS m h
hfs Handle h
h Int
16

    let !version :: Word32
version = ByteArray -> Int -> Word32
forall a. Prim a => ByteArray -> Int -> a
P.indexByteArray ByteArray
header Int
0 :: Word32
        !nhashes :: Word32
nhashes = ByteArray -> Int -> Word32
forall a. Prim a => ByteArray -> Int -> a
P.indexByteArray ByteArray
header Int
1 :: Word32
        !nbits :: Word64
nbits   = ByteArray -> Int -> Word64
forall a. Prim a => ByteArray -> Int -> a
P.indexByteArray ByteArray
header Int
1 :: Word64

    Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Word32
version Word32 -> Word32 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word32
bloomFilterVersion) (m () -> m ()) -> m () -> m ()
forall a b. (a -> b) -> a -> b
$ String -> m ()
throwFormatError (String -> m ()) -> String -> m ()
forall a b. (a -> b) -> a -> b
$
      if Word32 -> Word32
byteSwap32 Word32
version Word32 -> Word32 -> Bool
forall a. Eq a => a -> a -> Bool
== Word32
bloomFilterVersion
      then String
"Different byte order"
      else String
"Unsupported version"

    Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Word64
nbits Word64 -> Word64 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word64
0) (m () -> m ()) -> m () -> m ()
forall a b. (a -> b) -> a -> b
$ String -> m ()
throwFormatError String
"Length is zero"

    -- limit to 2^48 bits
    Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Word64
nbits Word64 -> Word64 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word64
0x1_0000_0000_0000) (m () -> m ()) -> m () -> m ()
forall a b. (a -> b) -> a -> b
$ String -> m ()
throwFormatError String
"Too large bloomfilter"
    --TODO: get max size from bloomfilter lib

    -- read the filter data from the file directly into the bloom filter
    Bloom a
bloom <-
      BloomSize
-> (MutableByteArray (PrimState m) -> Int -> Int -> m ())
-> m (Bloom a)
forall (m :: * -> *) a.
PrimMonad m =>
BloomSize
-> (MutableByteArray (PrimState m) -> Int -> Int -> m ())
-> m (Bloom a)
Bloom.deserialise
        Bloom.BloomSize {
          sizeBits :: Int
Bloom.sizeBits   = Word64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word64
nbits,
          sizeHashes :: Int
Bloom.sizeHashes = Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
nhashes
        }
        (\MutableByteArray (PrimState m)
buf Int
off Int
len ->
            String -> m () -> m ()
forall {m :: * -> *} {a}. MonadCatch m => String -> m a -> m a
rethrowEOFError String
"bloom filter file too short" (m () -> m ()) -> m () -> m ()
forall a b. (a -> b) -> a -> b
$
              m ByteCount -> m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (m ByteCount -> m ()) -> m ByteCount -> m ()
forall a b. (a -> b) -> a -> b
$ HasFS m h
-> Handle h
-> MutableByteArray (PrimState m)
-> BufferOffset
-> ByteCount
-> m ByteCount
forall (m :: * -> *) h.
(?callStack::CallStack, MonadThrow m) =>
HasFS m h
-> Handle h
-> MutableByteArray (PrimState m)
-> BufferOffset
-> ByteCount
-> m ByteCount
hGetBufExactly HasFS m h
hfs
                       Handle h
h MutableByteArray (PrimState m)
buf (Int -> BufferOffset
BufferOffset Int
off) (Int -> ByteCount
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len))

    -- check we're now at EOF
    ByteString
trailing <- HasFS m h
-> (?callStack::CallStack) => Handle h -> Word64 -> m ByteString
forall (m :: * -> *) h.
HasFS m h
-> (?callStack::CallStack) => Handle h -> Word64 -> m ByteString
hGetSome HasFS m h
hfs Handle h
h Word64
1
    Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Bool -> Bool
not (ByteString -> Bool
BS.null ByteString
trailing)) (m () -> m ()) -> m () -> m ()
forall a b. (a -> b) -> a -> b
$
      String -> m ()
throwFormatError String
"Byte array is too large for components"
    Bloom a -> m (Bloom a)
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bloom a
bloom
  where
    throwFormatError :: String -> m ()
throwFormatError = FileCorruptedError -> m ()
forall e a. Exception e => e -> m a
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwIO
                     (FileCorruptedError -> m ())
-> (String -> FileCorruptedError) -> String -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. FsErrorPath -> FileFormat -> String -> FileCorruptedError
ErrFileFormatInvalid
                         (HasFS m h -> FsPath -> FsErrorPath
forall (m :: * -> *) h. HasFS m h -> FsPath -> FsErrorPath
mkFsErrorPath HasFS m h
hfs (Handle h -> FsPath
forall h. Handle h -> FsPath
handlePath Handle h
h))
                         FileFormat
FormatBloomFilterFile
    rethrowEOFError :: String -> m a -> m a
rethrowEOFError String
msg =
      (FsError -> Maybe FsError) -> (FsError -> m a) -> m a -> m a
forall e b a.
Exception e =>
(e -> Maybe b) -> (b -> m a) -> m a -> m a
forall (m :: * -> *) e b a.
(MonadCatch m, Exception e) =>
(e -> Maybe b) -> (b -> m a) -> m a -> m a
handleJust
        (\FsError
e -> if FsErrorType -> FsError -> Bool
isFsErrorType FsErrorType
FsReachedEOF FsError
e then FsError -> Maybe FsError
forall a. a -> Maybe a
Just FsError
e else Maybe FsError
forall a. Maybe a
Nothing)
        (\FsError
e -> FileCorruptedError -> m a
forall e a. Exception e => e -> m a
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwIO (FileCorruptedError -> m a) -> FileCorruptedError -> m a
forall a b. (a -> b) -> a -> b
$
                 FsErrorPath -> FileFormat -> String -> FileCorruptedError
ErrFileFormatInvalid
                   (FsError -> FsErrorPath
fsErrorPath FsError
e) FileFormat
FormatBloomFilterFile String
msg)

{-# SPECIALISE hGetByteArrayExactly ::
     HasFS IO h
  -> Handle h
  -> Int
  -> IO P.ByteArray #-}
hGetByteArrayExactly ::
     (PrimMonad m, MonadThrow m)
  => HasFS m h
  -> Handle h
  -> Int
  -> m P.ByteArray
hGetByteArrayExactly :: forall (m :: * -> *) h.
(PrimMonad m, MonadThrow m) =>
HasFS m h -> Handle h -> Int -> m ByteArray
hGetByteArrayExactly HasFS m h
hfs Handle h
h Int
len = do
    MutableByteArray (PrimState m)
buf <- Int -> m (MutableByteArray (PrimState m))
forall (m :: * -> *).
PrimMonad m =>
Int -> m (MutableByteArray (PrimState m))
P.newByteArray Int
len
    ByteCount
_   <- HasFS m h
-> Handle h
-> MutableByteArray (PrimState m)
-> BufferOffset
-> ByteCount
-> m ByteCount
forall (m :: * -> *) h.
(?callStack::CallStack, MonadThrow m) =>
HasFS m h
-> Handle h
-> MutableByteArray (PrimState m)
-> BufferOffset
-> ByteCount
-> m ByteCount
hGetBufExactly HasFS m h
hfs Handle h
h MutableByteArray (PrimState m)
buf BufferOffset
0 (Int -> ByteCount
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len)
    MutableByteArray (PrimState m) -> m ByteArray
forall (m :: * -> *).
PrimMonad m =>
MutableByteArray (PrimState m) -> m ByteArray
P.unsafeFreezeByteArray MutableByteArray (PrimState m)
buf