{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE UnboxedTuples #-}
{-# OPTIONS_HADDOCK not-home #-}
module Database.LSMTree.Internal.BloomFilter (
Bloom.Bloom,
Bloom.MBloom,
bloomQueries,
RunIxKeyIx(RunIxKeyIx),
RunIx, KeyIx,
bloomFilterVersion,
bloomFilterToLBS,
bloomFilterFromFile,
) where
import Data.Bits
import qualified Data.ByteString as BS
import qualified Data.ByteString.Builder.Extra as B
import qualified Data.ByteString.Lazy as LBS
import qualified Data.Primitive as P
import qualified Data.Vector as V
import qualified Data.Vector.Primitive as VP
import Data.Word (Word32, Word64, byteSwap32)
import Control.Exception (assert)
import Control.Monad (void, when)
import Control.Monad.Class.MonadThrow
import Control.Monad.Primitive (PrimMonad)
import Control.Monad.ST (ST, runST)
import System.FS.API
import Data.BloomFilter.Blocked (Bloom)
import qualified Data.BloomFilter.Blocked as Bloom
import Database.LSMTree.Internal.ByteString (byteArrayToByteString)
import Database.LSMTree.Internal.CRC32C (FileCorruptedError (..),
FileFormat (..))
import Database.LSMTree.Internal.Serialise (SerialisedKey)
import qualified Database.LSMTree.Internal.Vector as P
#ifdef HAVE_STRICT_ARRAY
import qualified Database.LSMTree.Internal.StrictArray as P
#endif
import Prelude hiding (filter)
type KeyIx = Int
type RunIx = Int
newtype RunIxKeyIx = MkRunIxKeyIx Word32
deriving stock RunIxKeyIx -> RunIxKeyIx -> Bool
(RunIxKeyIx -> RunIxKeyIx -> Bool)
-> (RunIxKeyIx -> RunIxKeyIx -> Bool) -> Eq RunIxKeyIx
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: RunIxKeyIx -> RunIxKeyIx -> Bool
== :: RunIxKeyIx -> RunIxKeyIx -> Bool
$c/= :: RunIxKeyIx -> RunIxKeyIx -> Bool
/= :: RunIxKeyIx -> RunIxKeyIx -> Bool
Eq
deriving newtype Addr# -> Int# -> RunIxKeyIx
ByteArray# -> Int# -> RunIxKeyIx
Proxy RunIxKeyIx -> Int#
RunIxKeyIx -> Int#
(Proxy RunIxKeyIx -> Int#)
-> (RunIxKeyIx -> Int#)
-> (Proxy RunIxKeyIx -> Int#)
-> (RunIxKeyIx -> Int#)
-> (ByteArray# -> Int# -> RunIxKeyIx)
-> (forall s.
MutableByteArray# s
-> Int# -> State# s -> (# State# s, RunIxKeyIx #))
-> (forall s.
MutableByteArray# s -> Int# -> RunIxKeyIx -> State# s -> State# s)
-> (forall s.
MutableByteArray# s
-> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s)
-> (Addr# -> Int# -> RunIxKeyIx)
-> (forall s.
Addr# -> Int# -> State# s -> (# State# s, RunIxKeyIx #))
-> (forall s. Addr# -> Int# -> RunIxKeyIx -> State# s -> State# s)
-> (forall s.
Addr# -> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s)
-> Prim RunIxKeyIx
forall s.
Addr# -> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s
forall s. Addr# -> Int# -> State# s -> (# State# s, RunIxKeyIx #)
forall s. Addr# -> Int# -> RunIxKeyIx -> State# s -> State# s
forall s.
MutableByteArray# s
-> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s
forall s.
MutableByteArray# s
-> Int# -> State# s -> (# State# s, RunIxKeyIx #)
forall s.
MutableByteArray# s -> Int# -> RunIxKeyIx -> State# s -> State# s
forall a.
(Proxy a -> Int#)
-> (a -> Int#)
-> (Proxy a -> Int#)
-> (a -> Int#)
-> (ByteArray# -> Int# -> a)
-> (forall s.
MutableByteArray# s -> Int# -> State# s -> (# State# s, a #))
-> (forall s.
MutableByteArray# s -> Int# -> a -> State# s -> State# s)
-> (forall s.
MutableByteArray# s -> Int# -> Int# -> a -> State# s -> State# s)
-> (Addr# -> Int# -> a)
-> (forall s. Addr# -> Int# -> State# s -> (# State# s, a #))
-> (forall s. Addr# -> Int# -> a -> State# s -> State# s)
-> (forall s. Addr# -> Int# -> Int# -> a -> State# s -> State# s)
-> Prim a
$csizeOfType# :: Proxy RunIxKeyIx -> Int#
sizeOfType# :: Proxy RunIxKeyIx -> Int#
$csizeOf# :: RunIxKeyIx -> Int#
sizeOf# :: RunIxKeyIx -> Int#
$calignmentOfType# :: Proxy RunIxKeyIx -> Int#
alignmentOfType# :: Proxy RunIxKeyIx -> Int#
$calignment# :: RunIxKeyIx -> Int#
alignment# :: RunIxKeyIx -> Int#
$cindexByteArray# :: ByteArray# -> Int# -> RunIxKeyIx
indexByteArray# :: ByteArray# -> Int# -> RunIxKeyIx
$creadByteArray# :: forall s.
MutableByteArray# s
-> Int# -> State# s -> (# State# s, RunIxKeyIx #)
readByteArray# :: forall s.
MutableByteArray# s
-> Int# -> State# s -> (# State# s, RunIxKeyIx #)
$cwriteByteArray# :: forall s.
MutableByteArray# s -> Int# -> RunIxKeyIx -> State# s -> State# s
writeByteArray# :: forall s.
MutableByteArray# s -> Int# -> RunIxKeyIx -> State# s -> State# s
$csetByteArray# :: forall s.
MutableByteArray# s
-> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s
setByteArray# :: forall s.
MutableByteArray# s
-> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s
$cindexOffAddr# :: Addr# -> Int# -> RunIxKeyIx
indexOffAddr# :: Addr# -> Int# -> RunIxKeyIx
$creadOffAddr# :: forall s. Addr# -> Int# -> State# s -> (# State# s, RunIxKeyIx #)
readOffAddr# :: forall s. Addr# -> Int# -> State# s -> (# State# s, RunIxKeyIx #)
$cwriteOffAddr# :: forall s. Addr# -> Int# -> RunIxKeyIx -> State# s -> State# s
writeOffAddr# :: forall s. Addr# -> Int# -> RunIxKeyIx -> State# s -> State# s
$csetOffAddr# :: forall s.
Addr# -> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s
setOffAddr# :: forall s.
Addr# -> Int# -> Int# -> RunIxKeyIx -> State# s -> State# s
P.Prim
pattern RunIxKeyIx :: RunIx -> KeyIx -> RunIxKeyIx
pattern $mRunIxKeyIx :: forall {r}. RunIxKeyIx -> (Int -> Int -> r) -> ((# #) -> r) -> r
$bRunIxKeyIx :: Int -> Int -> RunIxKeyIx
RunIxKeyIx r k <- (unpackRunIxKeyIx -> (r, k))
where
RunIxKeyIx Int
r Int
k = Int -> Int -> RunIxKeyIx
packRunIxKeyIx Int
r Int
k
{-# INLINE RunIxKeyIx #-}
{-# COMPLETE RunIxKeyIx #-}
packRunIxKeyIx :: Int -> Int -> RunIxKeyIx
packRunIxKeyIx :: Int -> Int -> RunIxKeyIx
packRunIxKeyIx Int
r Int
k =
Bool -> RunIxKeyIx -> RunIxKeyIx
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
r Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
r Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xffff
Bool -> Bool -> Bool
&& Int
k Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
k Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xffff) (RunIxKeyIx -> RunIxKeyIx) -> RunIxKeyIx -> RunIxKeyIx
forall a b. (a -> b) -> a -> b
$
Word32 -> RunIxKeyIx
MkRunIxKeyIx (Word32 -> RunIxKeyIx) -> Word32 -> RunIxKeyIx
forall a b. (a -> b) -> a -> b
$
(Word -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral :: Word -> Word32) (Word -> Word32) -> Word -> Word32
forall a b. (a -> b) -> a -> b
$
(Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
r Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
16)
Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
k
{-# INLINE packRunIxKeyIx #-}
unpackRunIxKeyIx :: RunIxKeyIx -> (Int, Int)
unpackRunIxKeyIx :: RunIxKeyIx -> (Int, Int)
unpackRunIxKeyIx (MkRunIxKeyIx Word32
c) =
( Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
c Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
16)
, Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
c Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.&. Word32
0xfff)
)
{-# INLINE unpackRunIxKeyIx #-}
instance Show RunIxKeyIx where
showsPrec :: Int -> RunIxKeyIx -> ShowS
showsPrec Int
_ (RunIxKeyIx Int
r Int
k) =
String -> ShowS
showString String
"RunIxKeyIx " ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int -> ShowS
forall a. Show a => Int -> a -> ShowS
showsPrec Int
11 Int
r
ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ShowS
showChar Char
' ' ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int -> ShowS
forall a. Show a => Int -> a -> ShowS
showsPrec Int
11 Int
k
type ResIx = Int
{-# NOINLINE bloomQueries #-}
bloomQueries ::
V.Vector (Bloom SerialisedKey)
-> V.Vector SerialisedKey
-> VP.Vector RunIxKeyIx
bloomQueries :: Vector (Bloom SerialisedKey)
-> Vector SerialisedKey -> Vector RunIxKeyIx
bloomQueries !Vector (Bloom SerialisedKey)
filters !Vector SerialisedKey
keys
| Vector (Bloom SerialisedKey) -> Bool
forall a. Vector a -> Bool
V.null Vector (Bloom SerialisedKey)
filters Bool -> Bool -> Bool
|| Vector SerialisedKey -> Bool
forall a. Vector a -> Bool
V.null Vector SerialisedKey
keys = Vector RunIxKeyIx
forall a. Prim a => Vector a
VP.empty
bloomQueries !Vector (Bloom SerialisedKey)
filters !Vector SerialisedKey
keys =
(forall s. ST s (Vector RunIxKeyIx)) -> Vector RunIxKeyIx
forall a. (forall s. ST s a) -> a
runST (BloomFilters
-> PrimArray (Hashes SerialisedKey) -> ST s (Vector RunIxKeyIx)
forall s.
BloomFilters
-> PrimArray (Hashes SerialisedKey) -> ST s (Vector RunIxKeyIx)
bloomQueries_loop1 BloomFilters
filters' PrimArray (Hashes SerialisedKey)
keyhashes)
where
filters' :: BloomFilters
filters' = Vector (Bloom SerialisedKey) -> BloomFilters
toFiltersArray Vector (Bloom SerialisedKey)
filters
keyhashes :: PrimArray (Hashes SerialisedKey)
keyhashes = Int
-> (Int -> Hashes SerialisedKey)
-> PrimArray (Hashes SerialisedKey)
forall a. Prim a => Int -> (Int -> a) -> PrimArray a
P.generatePrimArray (Vector SerialisedKey -> Int
forall a. Vector a -> Int
V.length Vector SerialisedKey
keys) ((Int -> Hashes SerialisedKey) -> PrimArray (Hashes SerialisedKey))
-> (Int -> Hashes SerialisedKey)
-> PrimArray (Hashes SerialisedKey)
forall a b. (a -> b) -> a -> b
$ \Int
i ->
SerialisedKey -> Hashes SerialisedKey
forall a. Hashable a => a -> Hashes a
Bloom.hashes (Vector SerialisedKey -> Int -> SerialisedKey
forall a. Vector a -> Int -> a
V.unsafeIndex Vector SerialisedKey
keys Int
i)
bloomQueries_loop1 ::
BloomFilters
-> P.PrimArray (Bloom.Hashes SerialisedKey)
-> ST s (VP.Vector RunIxKeyIx)
bloomQueries_loop1 :: forall s.
BloomFilters
-> PrimArray (Hashes SerialisedKey) -> ST s (Vector RunIxKeyIx)
bloomQueries_loop1 !BloomFilters
filters !PrimArray (Hashes SerialisedKey)
keyhashes = do
MutablePrimArray s RunIxKeyIx
res <- Int -> ST s (MutablePrimArray (PrimState (ST s)) RunIxKeyIx)
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
Int -> m (MutablePrimArray (PrimState m) a)
P.newPrimArray (PrimArray (Hashes SerialisedKey) -> Int
forall a. Prim a => PrimArray a -> Int
P.sizeofPrimArray PrimArray (Hashes SerialisedKey)
keyhashes Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
2)
(MutablePrimArray s RunIxKeyIx
res', Int
resix') <- MutablePrimArray s RunIxKeyIx
-> Int -> Int -> ST s (MutablePrimArray s RunIxKeyIx, Int)
go MutablePrimArray s RunIxKeyIx
res Int
0 Int
0
PrimArray RunIxKeyIx
parr <- MutablePrimArray s RunIxKeyIx -> ST s (PrimArray RunIxKeyIx)
MutablePrimArray (PrimState (ST s)) RunIxKeyIx
-> ST s (PrimArray RunIxKeyIx)
forall (m :: * -> *) a.
PrimMonad m =>
MutablePrimArray (PrimState m) a -> m (PrimArray a)
P.unsafeFreezePrimArray (MutablePrimArray s RunIxKeyIx -> ST s (PrimArray RunIxKeyIx))
-> ST s (MutablePrimArray s RunIxKeyIx)
-> ST s (PrimArray RunIxKeyIx)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< MutablePrimArray (PrimState (ST s)) RunIxKeyIx
-> Int -> ST s (MutablePrimArray (PrimState (ST s)) RunIxKeyIx)
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
MutablePrimArray (PrimState m) a
-> Int -> m (MutablePrimArray (PrimState m) a)
P.resizeMutablePrimArray MutablePrimArray s RunIxKeyIx
MutablePrimArray (PrimState (ST s)) RunIxKeyIx
res' Int
resix'
Vector RunIxKeyIx -> ST s (Vector RunIxKeyIx)
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Vector RunIxKeyIx -> ST s (Vector RunIxKeyIx))
-> Vector RunIxKeyIx -> ST s (Vector RunIxKeyIx)
forall a b. (a -> b) -> a -> b
$! PrimArray RunIxKeyIx -> Vector RunIxKeyIx
forall a. Prim a => PrimArray a -> Vector a
P.primArrayToPrimVector PrimArray RunIxKeyIx
parr
where
go :: MutablePrimArray s RunIxKeyIx
-> Int -> Int -> ST s (MutablePrimArray s RunIxKeyIx, Int)
go !MutablePrimArray s RunIxKeyIx
res !Int
resix !Int
kix
| Int
kix Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== PrimArray (Hashes SerialisedKey) -> Int
forall a. Prim a => PrimArray a -> Int
P.sizeofPrimArray PrimArray (Hashes SerialisedKey)
keyhashes = (MutablePrimArray s RunIxKeyIx, Int)
-> ST s (MutablePrimArray s RunIxKeyIx, Int)
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (MutablePrimArray s RunIxKeyIx
res, Int
resix)
| Bool
otherwise = do
let !keyhash :: Hashes SerialisedKey
keyhash = PrimArray (Hashes SerialisedKey) -> Int -> Hashes SerialisedKey
forall a. Prim a => PrimArray a -> Int -> a
P.indexPrimArray PrimArray (Hashes SerialisedKey)
keyhashes Int
kix
BloomFilters -> Hashes SerialisedKey -> Int -> ST s ()
forall s. BloomFilters -> Hashes SerialisedKey -> Int -> ST s ()
bloomQueries_loop2_prefetch BloomFilters
filters Hashes SerialisedKey
keyhash Int
0
(MutablePrimArray s RunIxKeyIx
res', Int
resix') <- BloomFilters
-> Hashes SerialisedKey
-> Int
-> MutablePrimArray s RunIxKeyIx
-> Int
-> Int
-> ST s (MutablePrimArray s RunIxKeyIx, Int)
forall s.
BloomFilters
-> Hashes SerialisedKey
-> Int
-> MutablePrimArray s RunIxKeyIx
-> Int
-> Int
-> ST s (MutablePrimArray s RunIxKeyIx, Int)
bloomQueries_loop2 BloomFilters
filters Hashes SerialisedKey
keyhash Int
kix MutablePrimArray s RunIxKeyIx
res Int
resix Int
0
MutablePrimArray s RunIxKeyIx
-> Int -> Int -> ST s (MutablePrimArray s RunIxKeyIx, Int)
go MutablePrimArray s RunIxKeyIx
res' Int
resix' (Int
kixInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
bloomQueries_loop2 ::
BloomFilters
-> Bloom.Hashes SerialisedKey
-> KeyIx
-> P.MutablePrimArray s RunIxKeyIx
-> ResIx
-> RunIx
-> ST s (P.MutablePrimArray s RunIxKeyIx, ResIx)
bloomQueries_loop2 :: forall s.
BloomFilters
-> Hashes SerialisedKey
-> Int
-> MutablePrimArray s RunIxKeyIx
-> Int
-> Int
-> ST s (MutablePrimArray s RunIxKeyIx, Int)
bloomQueries_loop2 !BloomFilters
filters !Hashes SerialisedKey
keyhash !Int
kix = MutablePrimArray s RunIxKeyIx
-> Int -> Int -> ST s (MutablePrimArray s RunIxKeyIx, Int)
go
where
go :: MutablePrimArray s RunIxKeyIx
-> Int -> Int -> ST s (MutablePrimArray s RunIxKeyIx, Int)
go MutablePrimArray s RunIxKeyIx
res2 Int
resix2 Int
rix
| Int
rix Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== BloomFilters -> Int
lengthFiltersArray BloomFilters
filters = (MutablePrimArray s RunIxKeyIx, Int)
-> ST s (MutablePrimArray s RunIxKeyIx, Int)
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (MutablePrimArray s RunIxKeyIx
res2, Int
resix2)
| let !filter :: Bloom SerialisedKey
filter = BloomFilters -> Int -> Bloom SerialisedKey
indexFiltersArray BloomFilters
filters Int
rix
, Bloom SerialisedKey -> Hashes SerialisedKey -> Bool
forall a. Bloom a -> Hashes a -> Bool
Bloom.elemHashes Bloom SerialisedKey
filter Hashes SerialisedKey
keyhash = do
MutablePrimArray (PrimState (ST s)) RunIxKeyIx
-> Int -> RunIxKeyIx -> ST s ()
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
MutablePrimArray (PrimState m) a -> Int -> a -> m ()
P.writePrimArray MutablePrimArray s RunIxKeyIx
MutablePrimArray (PrimState (ST s)) RunIxKeyIx
res2 Int
resix2 (Int -> Int -> RunIxKeyIx
RunIxKeyIx Int
rix Int
kix)
Int
ressz2 <- MutablePrimArray (PrimState (ST s)) RunIxKeyIx -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
MutablePrimArray (PrimState m) a -> m Int
P.getSizeofMutablePrimArray MutablePrimArray s RunIxKeyIx
MutablePrimArray (PrimState (ST s)) RunIxKeyIx
res2
MutablePrimArray s RunIxKeyIx
res2' <- if Int
resix2Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
ressz2
then MutablePrimArray s RunIxKeyIx
-> ST s (MutablePrimArray s RunIxKeyIx)
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure MutablePrimArray s RunIxKeyIx
res2
else MutablePrimArray (PrimState (ST s)) RunIxKeyIx
-> Int -> ST s (MutablePrimArray (PrimState (ST s)) RunIxKeyIx)
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
MutablePrimArray (PrimState m) a
-> Int -> m (MutablePrimArray (PrimState m) a)
P.resizeMutablePrimArray MutablePrimArray s RunIxKeyIx
MutablePrimArray (PrimState (ST s)) RunIxKeyIx
res2 (Int
ressz2 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
2)
MutablePrimArray s RunIxKeyIx
-> Int -> Int -> ST s (MutablePrimArray s RunIxKeyIx, Int)
go MutablePrimArray s RunIxKeyIx
res2' (Int
resix2Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) (Int
rixInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
| Bool
otherwise =
MutablePrimArray s RunIxKeyIx
-> Int -> Int -> ST s (MutablePrimArray s RunIxKeyIx, Int)
go MutablePrimArray s RunIxKeyIx
res2 Int
resix2 (Int
rixInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
bloomQueries_loop2_prefetch ::
BloomFilters
-> Bloom.Hashes SerialisedKey
-> RunIx
-> ST s ()
bloomQueries_loop2_prefetch :: forall s. BloomFilters -> Hashes SerialisedKey -> Int -> ST s ()
bloomQueries_loop2_prefetch !BloomFilters
filters !Hashes SerialisedKey
keyhash = Int -> ST s ()
go
where
go :: Int -> ST s ()
go !Int
rix
| Int
rix Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== BloomFilters -> Int
lengthFiltersArray BloomFilters
filters = () -> ST s ()
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
| Bool
otherwise = do
let !filter :: Bloom SerialisedKey
filter = BloomFilters -> Int -> Bloom SerialisedKey
indexFiltersArray BloomFilters
filters Int
rix
Bloom SerialisedKey -> Hashes SerialisedKey -> ST s ()
forall a s. Bloom a -> Hashes a -> ST s ()
Bloom.prefetchElem Bloom SerialisedKey
filter Hashes SerialisedKey
keyhash
Int -> ST s ()
go (Int
rixInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
type BloomFilters =
#ifdef HAVE_STRICT_ARRAY
P.StrictArray (Bloom SerialisedKey)
#else
V.Vector (Bloom SerialisedKey)
#endif
{-# INLINE toFiltersArray #-}
toFiltersArray :: V.Vector (Bloom SerialisedKey) -> BloomFilters
{-# INLINE indexFiltersArray #-}
indexFiltersArray :: BloomFilters -> Int -> Bloom SerialisedKey
{-# INLINE lengthFiltersArray #-}
lengthFiltersArray :: BloomFilters -> Int
#ifdef HAVE_STRICT_ARRAY
toFiltersArray :: Vector (Bloom SerialisedKey) -> BloomFilters
toFiltersArray = Vector (Bloom SerialisedKey) -> BloomFilters
forall a. Vector a -> StrictArray a
P.vectorToStrictArray
indexFiltersArray :: BloomFilters -> Int -> Bloom SerialisedKey
indexFiltersArray = BloomFilters -> Int -> Bloom SerialisedKey
forall a. StrictArray a -> Int -> a
P.indexStrictArray
lengthFiltersArray :: BloomFilters -> Int
lengthFiltersArray = BloomFilters -> Int
forall a. StrictArray a -> Int
P.sizeofStrictArray
#else
toFiltersArray = id
indexFiltersArray = V.unsafeIndex
lengthFiltersArray = V.length
#endif
bloomFilterVersion :: Word32
bloomFilterVersion :: Word32
bloomFilterVersion = Word32
1 Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
Bloom.formatVersion
bloomFilterToLBS :: Bloom a -> LBS.ByteString
bloomFilterToLBS :: forall a. Bloom a -> ByteString
bloomFilterToLBS Bloom a
bf =
let (BloomSize
size, ByteArray
ba, Int
off, Int
len) = Bloom a -> (BloomSize, ByteArray, Int, Int)
forall a. Bloom a -> (BloomSize, ByteArray, Int, Int)
Bloom.serialise Bloom a
bf
in BloomSize -> ByteString
header BloomSize
size ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteArray -> Int -> Int -> ByteString
byteArrayToLBS ByteArray
ba Int
off Int
len
where
header :: BloomSize -> ByteString
header Bloom.BloomSize { Int
sizeBits :: Int
sizeBits :: BloomSize -> Int
sizeBits, Int
sizeHashes :: Int
sizeHashes :: BloomSize -> Int
sizeHashes } =
AllocationStrategy -> ByteString -> Builder -> ByteString
B.toLazyByteStringWith (Int -> Int -> AllocationStrategy
B.safeStrategy Int
16 Int
B.smallChunkSize) ByteString
forall a. Monoid a => a
mempty (Builder -> ByteString) -> Builder -> ByteString
forall a b. (a -> b) -> a -> b
$
Word32 -> Builder
B.word32Host Word32
bloomFilterVersion
Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Word32 -> Builder
B.word32Host (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
sizeHashes)
Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Word64 -> Builder
B.word64Host (Int -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
sizeBits)
byteArrayToLBS :: P.ByteArray -> Int -> Int -> LBS.ByteString
byteArrayToLBS :: ByteArray -> Int -> Int -> ByteString
byteArrayToLBS ByteArray
ba Int
off Int
len =
ByteString -> ByteString
LBS.fromStrict (Int -> Int -> ByteArray -> ByteString
byteArrayToByteString Int
off Int
len ByteArray
ba)
{-# SPECIALISE bloomFilterFromFile ::
HasFS IO h
-> Handle h
-> IO (Bloom a) #-}
bloomFilterFromFile ::
(PrimMonad m, MonadCatch m)
=> HasFS m h
-> Handle h
-> m (Bloom a)
bloomFilterFromFile :: forall (m :: * -> *) h a.
(PrimMonad m, MonadCatch m) =>
HasFS m h -> Handle h -> m (Bloom a)
bloomFilterFromFile HasFS m h
hfs Handle h
h = do
ByteArray
header <- String -> m ByteArray -> m ByteArray
forall {m :: * -> *} {a}. MonadCatch m => String -> m a -> m a
rethrowEOFError String
"Doesn't contain a header" (m ByteArray -> m ByteArray) -> m ByteArray -> m ByteArray
forall a b. (a -> b) -> a -> b
$
HasFS m h -> Handle h -> Int -> m ByteArray
forall (m :: * -> *) h.
(PrimMonad m, MonadThrow m) =>
HasFS m h -> Handle h -> Int -> m ByteArray
hGetByteArrayExactly HasFS m h
hfs Handle h
h Int
16
let !version :: Word32
version = ByteArray -> Int -> Word32
forall a. Prim a => ByteArray -> Int -> a
P.indexByteArray ByteArray
header Int
0 :: Word32
!nhashes :: Word32
nhashes = ByteArray -> Int -> Word32
forall a. Prim a => ByteArray -> Int -> a
P.indexByteArray ByteArray
header Int
1 :: Word32
!nbits :: Word64
nbits = ByteArray -> Int -> Word64
forall a. Prim a => ByteArray -> Int -> a
P.indexByteArray ByteArray
header Int
1 :: Word64
Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Word32
version Word32 -> Word32 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word32
bloomFilterVersion) (m () -> m ()) -> m () -> m ()
forall a b. (a -> b) -> a -> b
$ String -> m ()
throwFormatError (String -> m ()) -> String -> m ()
forall a b. (a -> b) -> a -> b
$
if Word32 -> Word32
byteSwap32 Word32
version Word32 -> Word32 -> Bool
forall a. Eq a => a -> a -> Bool
== Word32
bloomFilterVersion
then String
"Different byte order"
else String
"Unsupported version"
Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Word64
nbits Word64 -> Word64 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word64
0) (m () -> m ()) -> m () -> m ()
forall a b. (a -> b) -> a -> b
$ String -> m ()
throwFormatError String
"Length is zero"
Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Word64
nbits Word64 -> Word64 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word64
0x1_0000_0000_0000) (m () -> m ()) -> m () -> m ()
forall a b. (a -> b) -> a -> b
$ String -> m ()
throwFormatError String
"Too large bloomfilter"
Bloom a
bloom <-
BloomSize
-> (MutableByteArray (PrimState m) -> Int -> Int -> m ())
-> m (Bloom a)
forall (m :: * -> *) a.
PrimMonad m =>
BloomSize
-> (MutableByteArray (PrimState m) -> Int -> Int -> m ())
-> m (Bloom a)
Bloom.deserialise
Bloom.BloomSize {
sizeBits :: Int
Bloom.sizeBits = Word64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word64
nbits,
sizeHashes :: Int
Bloom.sizeHashes = Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
nhashes
}
(\MutableByteArray (PrimState m)
buf Int
off Int
len ->
String -> m () -> m ()
forall {m :: * -> *} {a}. MonadCatch m => String -> m a -> m a
rethrowEOFError String
"bloom filter file too short" (m () -> m ()) -> m () -> m ()
forall a b. (a -> b) -> a -> b
$
m ByteCount -> m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (m ByteCount -> m ()) -> m ByteCount -> m ()
forall a b. (a -> b) -> a -> b
$ HasFS m h
-> Handle h
-> MutableByteArray (PrimState m)
-> BufferOffset
-> ByteCount
-> m ByteCount
forall (m :: * -> *) h.
(?callStack::CallStack, MonadThrow m) =>
HasFS m h
-> Handle h
-> MutableByteArray (PrimState m)
-> BufferOffset
-> ByteCount
-> m ByteCount
hGetBufExactly HasFS m h
hfs
Handle h
h MutableByteArray (PrimState m)
buf (Int -> BufferOffset
BufferOffset Int
off) (Int -> ByteCount
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len))
ByteString
trailing <- HasFS m h
-> (?callStack::CallStack) => Handle h -> Word64 -> m ByteString
forall (m :: * -> *) h.
HasFS m h
-> (?callStack::CallStack) => Handle h -> Word64 -> m ByteString
hGetSome HasFS m h
hfs Handle h
h Word64
1
Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Bool -> Bool
not (ByteString -> Bool
BS.null ByteString
trailing)) (m () -> m ()) -> m () -> m ()
forall a b. (a -> b) -> a -> b
$
String -> m ()
throwFormatError String
"Byte array is too large for components"
Bloom a -> m (Bloom a)
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bloom a
bloom
where
throwFormatError :: String -> m ()
throwFormatError = FileCorruptedError -> m ()
forall e a. Exception e => e -> m a
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwIO
(FileCorruptedError -> m ())
-> (String -> FileCorruptedError) -> String -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. FsErrorPath -> FileFormat -> String -> FileCorruptedError
ErrFileFormatInvalid
(HasFS m h -> FsPath -> FsErrorPath
forall (m :: * -> *) h. HasFS m h -> FsPath -> FsErrorPath
mkFsErrorPath HasFS m h
hfs (Handle h -> FsPath
forall h. Handle h -> FsPath
handlePath Handle h
h))
FileFormat
FormatBloomFilterFile
rethrowEOFError :: String -> m a -> m a
rethrowEOFError String
msg =
(FsError -> Maybe FsError) -> (FsError -> m a) -> m a -> m a
forall e b a.
Exception e =>
(e -> Maybe b) -> (b -> m a) -> m a -> m a
forall (m :: * -> *) e b a.
(MonadCatch m, Exception e) =>
(e -> Maybe b) -> (b -> m a) -> m a -> m a
handleJust
(\FsError
e -> if FsErrorType -> FsError -> Bool
isFsErrorType FsErrorType
FsReachedEOF FsError
e then FsError -> Maybe FsError
forall a. a -> Maybe a
Just FsError
e else Maybe FsError
forall a. Maybe a
Nothing)
(\FsError
e -> FileCorruptedError -> m a
forall e a. Exception e => e -> m a
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwIO (FileCorruptedError -> m a) -> FileCorruptedError -> m a
forall a b. (a -> b) -> a -> b
$
FsErrorPath -> FileFormat -> String -> FileCorruptedError
ErrFileFormatInvalid
(FsError -> FsErrorPath
fsErrorPath FsError
e) FileFormat
FormatBloomFilterFile String
msg)
{-# SPECIALISE hGetByteArrayExactly ::
HasFS IO h
-> Handle h
-> Int
-> IO P.ByteArray #-}
hGetByteArrayExactly ::
(PrimMonad m, MonadThrow m)
=> HasFS m h
-> Handle h
-> Int
-> m P.ByteArray
hGetByteArrayExactly :: forall (m :: * -> *) h.
(PrimMonad m, MonadThrow m) =>
HasFS m h -> Handle h -> Int -> m ByteArray
hGetByteArrayExactly HasFS m h
hfs Handle h
h Int
len = do
MutableByteArray (PrimState m)
buf <- Int -> m (MutableByteArray (PrimState m))
forall (m :: * -> *).
PrimMonad m =>
Int -> m (MutableByteArray (PrimState m))
P.newByteArray Int
len
ByteCount
_ <- HasFS m h
-> Handle h
-> MutableByteArray (PrimState m)
-> BufferOffset
-> ByteCount
-> m ByteCount
forall (m :: * -> *) h.
(?callStack::CallStack, MonadThrow m) =>
HasFS m h
-> Handle h
-> MutableByteArray (PrimState m)
-> BufferOffset
-> ByteCount
-> m ByteCount
hGetBufExactly HasFS m h
hfs Handle h
h MutableByteArray (PrimState m)
buf BufferOffset
0 (Int -> ByteCount
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len)
MutableByteArray (PrimState m) -> m ByteArray
forall (m :: * -> *).
PrimMonad m =>
MutableByteArray (PrimState m) -> m ByteArray
P.unsafeFreezeByteArray MutableByteArray (PrimState m)
buf