{-# LANGUAGE CPP           #-}
{-# LANGUAGE MagicHash     #-}
{-# LANGUAGE UnboxedTuples #-}
{-# OPTIONS_HADDOCK not-home #-}
-- | This module defines the 'Bloom' and 'MBloom' types and all the functions
-- that need direct knowledge of and access to the representation. This forms
-- the trusted base.
module Data.BloomFilter.Classic.Internal (
    -- * Mutable Bloom filters
    MBloom,
    new,
    maxSizeBits,

    -- * Immutable Bloom filters
    Bloom,
    bloomInvariant,
    size,

    -- * Hash-based operations
    Hashes,
    hashes,
    insertHashes,
    elemHashes,
    readHashes,

    -- * Conversion
    freeze,
    unsafeFreeze,
    thaw,

    -- * (De)Serialisation
    formatVersion,
    serialise,
    deserialise,
  ) where

import           Control.DeepSeq (NFData (..))
import           Control.Exception (assert)
import           Control.Monad.Primitive (PrimMonad, PrimState)
import           Control.Monad.ST (ST)
import           Data.Bits
import           Data.Kind (Type)
import           Data.Primitive.ByteArray
import           Data.Primitive.PrimArray
import           Data.Primitive.Types (Prim (..))
import           Data.Word (Word64)

import           GHC.Exts (Int (I#), Int#, int2Word#, timesWord2#,
                     uncheckedIShiftL#, word2Int#, (+#))
import qualified GHC.Exts as Exts
import           GHC.Word (Word64 (W64#))

import           Data.BloomFilter.Classic.BitArray (BitArray, MBitArray)
import qualified Data.BloomFilter.Classic.BitArray as BitArray
import           Data.BloomFilter.Classic.Calc
import           Data.BloomFilter.Hash

-- | The version of the format used by 'serialise' and 'deserialise'. The
-- format number will change when there is an incompatible change in the
-- library, such that deserialising and using the filter will not work.
-- This can include more than just changes to the serialised format, for
-- example changes to hash functions or how the hash is mapped to bits.
--
-- Note that the format produced does not include this version. Version
-- checking is the responsibility of the user of the library.
--
-- The library guarantes that the format version value for the classic
-- ("Data.BloomFilter.Classic") and blocked ("Data.BloomFilter.Blocked")
-- implementation will not overlap with each other or any previous value used
-- by either implementation. So switching between the two implementations will
-- always be detectable and unambigious.
--
-- History:
--
-- * Version 0: original
--
-- * Version 1: changed range reduction (of hash to bit index) from remainder
--   to method based on multiplication.
--
formatVersion :: Int
formatVersion :: Int
formatVersion = Int
1

-------------------------------------------------------------------------------
-- Mutable Bloom filters
--

type MBloom :: Type -> Type -> Type
-- | A mutable Bloom filter, for use within the 'ST' monad.
data MBloom s a = MBloom {
      forall s a. MBloom s a -> Int
mbNumBits   :: {-# UNPACK #-} !Int  -- ^ non-zero
    , forall s a. MBloom s a -> Int
mbNumHashes :: {-# UNPACK #-} !Int
    , forall s a. MBloom s a -> MBitArray s
mbBitArray  :: {-# UNPACK #-} !(MBitArray s)
    }
type role MBloom nominal nominal

instance Show (MBloom s a) where
    show :: MBloom s a -> String
show MBloom s a
mb = String
"MBloom { " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show (MBloom s a -> Int
forall s a. MBloom s a -> Int
mbNumBits MBloom s a
mb) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" bits } "

instance NFData (MBloom s a) where
    rnf :: MBloom s a -> ()
rnf !MBloom s a
_ = ()

-- | Create a new mutable Bloom filter.
--
-- The filter size is capped at 'maxSizeBits'.
--
new :: BloomSize -> ST s (MBloom s a)
new :: forall s a. BloomSize -> ST s (MBloom s a)
new BloomSize { Int
sizeBits :: Int
sizeBits :: BloomSize -> Int
sizeBits, Int
sizeHashes :: Int
sizeHashes :: BloomSize -> Int
sizeHashes } = do
    let !mbNumBits :: Int
mbNumBits = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
1 (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
maxSizeBits Int
sizeBits)
    MBitArray s
mbBitArray <- Int -> ST s (MBitArray s)
forall s. Int -> ST s (MBitArray s)
BitArray.new Int
mbNumBits
    MBloom s a -> ST s (MBloom s a)
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure MBloom {
      Int
mbNumBits :: Int
mbNumBits :: Int
mbNumBits,
      mbNumHashes :: Int
mbNumHashes = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
1 Int
sizeHashes,
      MBitArray s
mbBitArray :: MBitArray s
mbBitArray :: MBitArray s
mbBitArray
    }

-- | The maximum filter size is $2^48$ bits. Tell us if you need bigger bloom
-- filters.
--
maxSizeBits :: Int
maxSizeBits :: Int
maxSizeBits = Int
0x1_0000_0000_0000

insertHashes :: MBloom s a -> Hashes a -> ST s ()
insertHashes :: forall s a. MBloom s a -> Hashes a -> ST s ()
insertHashes MBloom { Int
mbNumBits :: forall s a. MBloom s a -> Int
mbNumBits :: Int
mbNumBits, Int
mbNumHashes :: forall s a. MBloom s a -> Int
mbNumHashes :: Int
mbNumHashes, MBitArray s
mbBitArray :: forall s a. MBloom s a -> MBitArray s
mbBitArray :: MBitArray s
mbBitArray } !Hashes a
h =
    Int -> ST s ()
go Int
0
  where
    go :: Int -> ST s ()
go !Int
i | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
mbNumHashes = () -> ST s ()
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
    go !Int
i = do
      let probe :: Word64
          probe :: Word64
probe = Hashes a -> Int -> Word64
forall {k} (a :: k). Hashes a -> Int -> Word64
evalHashes Hashes a
h Int
i
          index :: Int
          index :: Int
index = Word64 -> Int -> Int
reduceRange64 Word64
probe Int
mbNumBits
      MBitArray s -> Int -> ST s ()
forall s. MBitArray s -> Int -> ST s ()
BitArray.unsafeSet MBitArray s
mbBitArray Int
index
      Int -> ST s ()
go (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)

readHashes :: forall s a. MBloom s a -> Hashes a -> ST s Bool
readHashes :: forall s a. MBloom s a -> Hashes a -> ST s Bool
readHashes MBloom { Int
mbNumBits :: forall s a. MBloom s a -> Int
mbNumBits :: Int
mbNumBits, Int
mbNumHashes :: forall s a. MBloom s a -> Int
mbNumHashes :: Int
mbNumHashes, MBitArray s
mbBitArray :: forall s a. MBloom s a -> MBitArray s
mbBitArray :: MBitArray s
mbBitArray } !Hashes a
h =
    Int -> ST s Bool
go Int
0
  where
    go :: Int -> ST s Bool
    go :: Int -> ST s Bool
go !Int
i | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
mbNumHashes = Bool -> ST s Bool
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True
    go !Int
i = do
      let probe :: Word64
          probe :: Word64
probe = Hashes a -> Int -> Word64
forall {k} (a :: k). Hashes a -> Int -> Word64
evalHashes Hashes a
h Int
i
          index :: Int
          index :: Int
index = Word64 -> Int -> Int
reduceRange64 Word64
probe Int
mbNumBits
      Bool
b <- MBitArray s -> Int -> ST s Bool
forall s. MBitArray s -> Int -> ST s Bool
BitArray.unsafeRead MBitArray s
mbBitArray Int
index
      if Bool
b then Int -> ST s Bool
go (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
           else Bool -> ST s Bool
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False

{-# INLINE deserialise #-}
-- | Overwrite the filter's bit array. Use 'new' to create a filter of the
-- expected size and then use this function to fill in the bit data.
--
-- The callback is expected to write (exactly) the given number of bytes into
-- the given byte array buffer.
--
-- See also 'formatVersion' for compatibility advice.
--
deserialise :: PrimMonad m
            => MBloom (PrimState m) a
            -> (MutableByteArray (PrimState m) -> Int -> Int -> m ())
            -> m ()
deserialise :: forall (m :: * -> *) a.
PrimMonad m =>
MBloom (PrimState m) a
-> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) -> m ()
deserialise MBloom {MBitArray (PrimState m)
mbBitArray :: forall s a. MBloom s a -> MBitArray s
mbBitArray :: MBitArray (PrimState m)
mbBitArray} MutableByteArray (PrimState m) -> Int -> Int -> m ()
fill =
    MBitArray (PrimState m)
-> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) -> m ()
forall (m :: * -> *).
PrimMonad m =>
MBitArray (PrimState m)
-> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) -> m ()
BitArray.deserialise MBitArray (PrimState m)
mbBitArray MutableByteArray (PrimState m) -> Int -> Int -> m ()
fill


-------------------------------------------------------------------------------
-- Immutable Bloom filters
--

type Bloom :: Type -> Type
-- | An immutable Bloom filter.
data Bloom a = Bloom {
      forall a. Bloom a -> Int
numBits   :: {-# UNPACK #-} !Int  -- ^ non-zero
    , forall a. Bloom a -> Int
numHashes :: {-# UNPACK #-} !Int
    , forall a. Bloom a -> BitArray
bitArray  :: {-# UNPACK #-} !BitArray
    }
  deriving stock Bloom a -> Bloom a -> Bool
(Bloom a -> Bloom a -> Bool)
-> (Bloom a -> Bloom a -> Bool) -> Eq (Bloom a)
forall a. Bloom a -> Bloom a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: forall a. Bloom a -> Bloom a -> Bool
== :: Bloom a -> Bloom a -> Bool
$c/= :: forall a. Bloom a -> Bloom a -> Bool
/= :: Bloom a -> Bloom a -> Bool
Eq
type role Bloom nominal

bloomInvariant :: Bloom a -> Bool
bloomInvariant :: forall a. Bloom a -> Bool
bloomInvariant Bloom { Int
numBits :: forall a. Bloom a -> Int
numBits :: Int
numBits, Int
numHashes :: forall a. Bloom a -> Int
numHashes :: Int
numHashes, bitArray :: forall a. Bloom a -> BitArray
bitArray = BitArray.BitArray PrimArray Word64
pa } =
       Int
numBits Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0
    Bool -> Bool -> Bool
&& Int
numBits Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
2Int -> Int -> Int
forall a b. (Num a, Integral b) => a -> b -> a
^(Int
48 :: Int)
    Bool -> Bool -> Bool
&& Int -> Int
forall {a}. (Bits a, Num a) => a -> a
ceilDiv64 Int
numBits Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== PrimArray Word64 -> Int
forall a. Prim a => PrimArray a -> Int
sizeofPrimArray PrimArray Word64
pa
    Bool -> Bool -> Bool
&& Int
numHashes Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0
  where
    ceilDiv64 :: a -> a
ceilDiv64 a
x = a -> Int -> a
forall a. Bits a => a -> Int -> a
unsafeShiftR (a
x a -> a -> a
forall a. Num a => a -> a -> a
+ a
63) Int
6

instance Show (Bloom a) where
    show :: Bloom a -> String
show Bloom a
mb = String
"Bloom { " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show (Bloom a -> Int
forall a. Bloom a -> Int
numBits Bloom a
mb) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" bits } "

instance NFData (Bloom a) where
    rnf :: Bloom a -> ()
rnf !Bloom a
_ = ()

-- | Return the size of the Bloom filter.
size :: Bloom a -> BloomSize
size :: forall a. Bloom a -> BloomSize
size Bloom { Int
numBits :: forall a. Bloom a -> Int
numBits :: Int
numBits, Int
numHashes :: forall a. Bloom a -> Int
numHashes :: Int
numHashes } =
    BloomSize {
      sizeBits :: Int
sizeBits   = Int
numBits,
      sizeHashes :: Int
sizeHashes = Int
numHashes
    }

-- | Query an immutable Bloom filter for membership using already constructed
-- 'Hashes' value.
elemHashes :: Bloom a -> Hashes a -> Bool
elemHashes :: forall a. Bloom a -> Hashes a -> Bool
elemHashes Bloom { Int
numBits :: forall a. Bloom a -> Int
numBits :: Int
numBits, Int
numHashes :: forall a. Bloom a -> Int
numHashes :: Int
numHashes, BitArray
bitArray :: forall a. Bloom a -> BitArray
bitArray :: BitArray
bitArray } !Hashes a
h =
    Int -> Bool
go Int
0
  where
    go :: Int -> Bool
    go :: Int -> Bool
go !Int
i | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
numHashes = Bool
True
    go !Int
i =
      let probe :: Word64
          probe :: Word64
probe = Hashes a -> Int -> Word64
forall {k} (a :: k). Hashes a -> Int -> Word64
evalHashes Hashes a
h Int
i
          index :: Int
          index :: Int
index = Word64 -> Int -> Int
reduceRange64 Word64
probe Int
numBits
       in if BitArray -> Int -> Bool
BitArray.unsafeIndex BitArray
bitArray Int
index
            then Int -> Bool
go (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
            else Bool
False

-- | Serialise the bloom filter to a 'BloomSize' (which is needed to
-- deserialise) and a 'ByteArray' along with the offset and length containing
-- the filter's bit data.
--
-- See also 'formatVersion' for compatibility advice.
--
serialise :: Bloom a -> (BloomSize, ByteArray, Int, Int)
serialise :: forall a. Bloom a -> (BloomSize, ByteArray, Int, Int)
serialise b :: Bloom a
b@Bloom{BitArray
bitArray :: forall a. Bloom a -> BitArray
bitArray :: BitArray
bitArray} =
    (Bloom a -> BloomSize
forall a. Bloom a -> BloomSize
size Bloom a
b, ByteArray
ba, Int
off, Int
len)
  where
    (ByteArray
ba, Int
off, Int
len) = BitArray -> (ByteArray, Int, Int)
BitArray.serialise BitArray
bitArray


-------------------------------------------------------------------------------
-- Conversions between mutable and immutable Bloom filters
--

-- | Create an immutable Bloom filter from a mutable one.  The mutable
-- filter may be modified afterwards.
freeze :: MBloom s a -> ST s (Bloom a)
freeze :: forall s a. MBloom s a -> ST s (Bloom a)
freeze MBloom { Int
mbNumBits :: forall s a. MBloom s a -> Int
mbNumBits :: Int
mbNumBits, Int
mbNumHashes :: forall s a. MBloom s a -> Int
mbNumHashes :: Int
mbNumHashes, MBitArray s
mbBitArray :: forall s a. MBloom s a -> MBitArray s
mbBitArray :: MBitArray s
mbBitArray } = do
    BitArray
bitArray <- MBitArray s -> ST s BitArray
forall s. MBitArray s -> ST s BitArray
BitArray.freeze MBitArray s
mbBitArray
    let !bf :: Bloom a
bf = Bloom {
                numBits :: Int
numBits   = Int
mbNumBits,
                numHashes :: Int
numHashes = Int
mbNumHashes,
                BitArray
bitArray :: BitArray
bitArray :: BitArray
bitArray
              }
    Bool -> ST s (Bloom a) -> ST s (Bloom a)
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Bloom a -> Bool
forall a. Bloom a -> Bool
bloomInvariant Bloom a
bf) (ST s (Bloom a) -> ST s (Bloom a))
-> ST s (Bloom a) -> ST s (Bloom a)
forall a b. (a -> b) -> a -> b
$ Bloom a -> ST s (Bloom a)
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bloom a
bf

-- | Create an immutable Bloom filter from a mutable one without copying. The
-- mutable filter /must not/ be modified afterwards. For a safer creation
-- interface, use 'freeze' or 'create'.
unsafeFreeze :: MBloom s a -> ST s (Bloom a)
unsafeFreeze :: forall s a. MBloom s a -> ST s (Bloom a)
unsafeFreeze MBloom { Int
mbNumBits :: forall s a. MBloom s a -> Int
mbNumBits :: Int
mbNumBits, Int
mbNumHashes :: forall s a. MBloom s a -> Int
mbNumHashes :: Int
mbNumHashes, MBitArray s
mbBitArray :: forall s a. MBloom s a -> MBitArray s
mbBitArray :: MBitArray s
mbBitArray } = do
    BitArray
bitArray <- MBitArray s -> ST s BitArray
forall s. MBitArray s -> ST s BitArray
BitArray.unsafeFreeze MBitArray s
mbBitArray
    let !bf :: Bloom a
bf = Bloom {
                numBits :: Int
numBits   = Int
mbNumBits,
                numHashes :: Int
numHashes = Int
mbNumHashes,
                BitArray
bitArray :: BitArray
bitArray :: BitArray
bitArray
              }
    Bool -> ST s (Bloom a) -> ST s (Bloom a)
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Bloom a -> Bool
forall a. Bloom a -> Bool
bloomInvariant Bloom a
bf) (ST s (Bloom a) -> ST s (Bloom a))
-> ST s (Bloom a) -> ST s (Bloom a)
forall a b. (a -> b) -> a -> b
$ Bloom a -> ST s (Bloom a)
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bloom a
bf

-- | Copy an immutable Bloom filter to create a mutable one.  There is
-- no non-copying equivalent.
thaw :: Bloom a -> ST s (MBloom s a)
thaw :: forall a s. Bloom a -> ST s (MBloom s a)
thaw Bloom { Int
numBits :: forall a. Bloom a -> Int
numBits :: Int
numBits, Int
numHashes :: forall a. Bloom a -> Int
numHashes :: Int
numHashes, BitArray
bitArray :: forall a. Bloom a -> BitArray
bitArray :: BitArray
bitArray } = do
    MBitArray s
mbBitArray <- BitArray -> ST s (MBitArray s)
forall s. BitArray -> ST s (MBitArray s)
BitArray.thaw BitArray
bitArray
    MBloom s a -> ST s (MBloom s a)
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure MBloom {
      mbNumBits :: Int
mbNumBits   = Int
numBits,
      mbNumHashes :: Int
mbNumHashes = Int
numHashes,
      MBitArray s
mbBitArray :: MBitArray s
mbBitArray :: MBitArray s
mbBitArray
    }


-------------------------------------------------------------------------------
-- Low level utils
--

-- | Given a word sampled uniformly from the full 'Word64' range, such as a
-- hash, reduce it fairly to a value in the range @[0,n)@.
--
-- See <https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/>
--
{-# INLINE reduceRange64 #-}
reduceRange64 :: Word64 -- ^ Sample from 0..2^64-1
              -> Int -- ^ upper bound of range [0,n)
              -> Int -- ^ result within range
reduceRange64 :: Word64 -> Int -> Int
reduceRange64 (W64# Word64#
x) (I# Int#
n) =
    -- Note that we use widening multiplication of two 64bit numbers, with a
    -- 128bit result. GHC provides a primop which returns the 128bit result as
    -- a pair of 64bit words. There are (as of 2025) no high level wrappers in
    -- the base or primitive packages, so we use the primops directly.
    case Word# -> Word# -> (# Word#, Word# #)
timesWord2# (Word64# -> Word#
word64ToWordShim# Word64#
x) (Int# -> Word#
int2Word# Int#
n) of
      (# Word#
high, Word#
_low #) -> Int# -> Int
I# (Word# -> Int#
word2Int# Word#
high)
    -- Note that while x can cover the full Word64 range, since the result is
    -- less than n, and since n was an Int then the result fits an Int too.

{-# INLINE word64ToWordShim# #-}

#if MIN_VERSION_base(4,17,0)
word64ToWordShim# :: Exts.Word64# -> Exts.Word#
word64ToWordShim# :: Word64# -> Word#
word64ToWordShim# = Word64# -> Word#
Exts.word64ToWord#
#else
word64ToWordShim# :: Exts.Word# -> Exts.Word#
word64ToWordShim# x# = x#
#endif

-------------------------------------------------------------------------------
-- Hashes
--

-- | A pair of hashes used for a double hashing scheme.
--
-- See 'evalHashes'.
data Hashes a = Hashes !Hash !Hash
type role Hashes nominal

instance Prim (Hashes a) where
    sizeOfType# :: Proxy (Hashes a) -> Int#
sizeOfType# Proxy (Hashes a)
_ = Int#
16#
    alignmentOfType# :: Proxy (Hashes a) -> Int#
alignmentOfType# Proxy (Hashes a)
_ = Int#
8#

    indexByteArray# :: ByteArray# -> Int# -> Hashes a
indexByteArray# ByteArray#
ba Int#
i = Word64 -> Word64 -> Hashes a
forall {k} (a :: k). Word64 -> Word64 -> Hashes a
Hashes
        (ByteArray# -> Int# -> Word64
forall a. Prim a => ByteArray# -> Int# -> a
indexByteArray# ByteArray#
ba (Int# -> Int#
indexLo Int#
i))
        (ByteArray# -> Int# -> Word64
forall a. Prim a => ByteArray# -> Int# -> a
indexByteArray# ByteArray#
ba (Int# -> Int#
indexHi Int#
i))
    readByteArray# :: forall s.
MutableByteArray# s -> Int# -> State# s -> (# State# s, Hashes a #)
readByteArray# MutableByteArray# s
ba Int#
i State# s
s1 =
        case MutableByteArray# s -> Int# -> State# s -> (# State# s, Word64 #)
forall s.
MutableByteArray# s -> Int# -> State# s -> (# State# s, Word64 #)
forall a s.
Prim a =>
MutableByteArray# s -> Int# -> State# s -> (# State# s, a #)
readByteArray# MutableByteArray# s
ba (Int# -> Int#
indexLo Int#
i) State# s
s1 of { (# State# s
s2, Word64
lo #) ->
        case MutableByteArray# s -> Int# -> State# s -> (# State# s, Word64 #)
forall s.
MutableByteArray# s -> Int# -> State# s -> (# State# s, Word64 #)
forall a s.
Prim a =>
MutableByteArray# s -> Int# -> State# s -> (# State# s, a #)
readByteArray# MutableByteArray# s
ba (Int# -> Int#
indexHi Int#
i) State# s
s2 of { (# State# s
s3, Word64
hi #) ->
        (# State# s
s3, Word64 -> Word64 -> Hashes a
forall {k} (a :: k). Word64 -> Word64 -> Hashes a
Hashes Word64
lo Word64
hi #)
        }}
    writeByteArray# :: forall s.
MutableByteArray# s -> Int# -> Hashes a -> State# s -> State# s
writeByteArray# MutableByteArray# s
ba Int#
i (Hashes Word64
lo Word64
hi) State# s
s =
        MutableByteArray# s -> Int# -> Word64 -> State# s -> State# s
forall s.
MutableByteArray# s -> Int# -> Word64 -> State# s -> State# s
forall a s.
Prim a =>
MutableByteArray# s -> Int# -> a -> State# s -> State# s
writeByteArray# MutableByteArray# s
ba (Int# -> Int#
indexHi Int#
i) Word64
hi (MutableByteArray# s -> Int# -> Word64 -> State# s -> State# s
forall s.
MutableByteArray# s -> Int# -> Word64 -> State# s -> State# s
forall a s.
Prim a =>
MutableByteArray# s -> Int# -> a -> State# s -> State# s
writeByteArray# MutableByteArray# s
ba (Int# -> Int#
indexLo Int#
i) Word64
lo State# s
s)

    indexOffAddr# :: Addr# -> Int# -> Hashes a
indexOffAddr# Addr#
ba Int#
i = Word64 -> Word64 -> Hashes a
forall {k} (a :: k). Word64 -> Word64 -> Hashes a
Hashes
        (Addr# -> Int# -> Word64
forall a. Prim a => Addr# -> Int# -> a
indexOffAddr# Addr#
ba (Int# -> Int#
indexLo Int#
i))
        (Addr# -> Int# -> Word64
forall a. Prim a => Addr# -> Int# -> a
indexOffAddr# Addr#
ba (Int# -> Int#
indexHi Int#
i))
    readOffAddr# :: forall s. Addr# -> Int# -> State# s -> (# State# s, Hashes a #)
readOffAddr# Addr#
ba Int#
i State# s
s1 =
        case Addr# -> Int# -> State# s -> (# State# s, Word64 #)
forall s. Addr# -> Int# -> State# s -> (# State# s, Word64 #)
forall a s.
Prim a =>
Addr# -> Int# -> State# s -> (# State# s, a #)
readOffAddr# Addr#
ba (Int# -> Int#
indexLo Int#
i) State# s
s1 of { (# State# s
s2, Word64
lo #) ->
        case Addr# -> Int# -> State# s -> (# State# s, Word64 #)
forall s. Addr# -> Int# -> State# s -> (# State# s, Word64 #)
forall a s.
Prim a =>
Addr# -> Int# -> State# s -> (# State# s, a #)
readOffAddr# Addr#
ba (Int# -> Int#
indexHi Int#
i) State# s
s2 of { (# State# s
s3, Word64
hi #) ->
        (# State# s
s3, Word64 -> Word64 -> Hashes a
forall {k} (a :: k). Word64 -> Word64 -> Hashes a
Hashes Word64
lo Word64
hi #)
        }}
    writeOffAddr# :: forall s. Addr# -> Int# -> Hashes a -> State# s -> State# s
writeOffAddr# Addr#
ba Int#
i (Hashes Word64
lo Word64
hi) State# s
s =
        Addr# -> Int# -> Word64 -> State# s -> State# s
forall s. Addr# -> Int# -> Word64 -> State# s -> State# s
forall a s. Prim a => Addr# -> Int# -> a -> State# s -> State# s
writeOffAddr# Addr#
ba (Int# -> Int#
indexHi Int#
i) Word64
hi (Addr# -> Int# -> Word64 -> State# s -> State# s
forall s. Addr# -> Int# -> Word64 -> State# s -> State# s
forall a s. Prim a => Addr# -> Int# -> a -> State# s -> State# s
writeOffAddr# Addr#
ba (Int# -> Int#
indexLo Int#
i) Word64
lo State# s
s)

indexLo :: Int# -> Int#
indexLo :: Int# -> Int#
indexLo Int#
i = Int# -> Int# -> Int#
uncheckedIShiftL# Int#
i Int#
1#

indexHi :: Int# -> Int#
indexHi :: Int# -> Int#
indexHi Int#
i = Int# -> Int# -> Int#
uncheckedIShiftL# Int#
i Int#
1# Int# -> Int# -> Int#
+# Int#
1#

{- Note [Original Hashes]

Compute a list of 32-bit hashes relatively cheaply.  The value to
hash is inspected at most twice, regardless of the number of hashes
requested.

We use a variant of Kirsch and Mitzenmacher's technique from \"Less
Hashing, Same Performance: Building a Better Bloom Filter\",
<http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf>.

Where Kirsch and Mitzenmacher multiply the second hash by a
coefficient, we shift right by the coefficient.  This offers better
performance (as a shift is much cheaper than a multiply), and the
low order bits of the final hash stay well mixed.

-}

{- Note: [Hashes]

On the first glance the 'evalHashes' scheme seems dubious.

Firstly, it's original performance motivation is dubious.

> multiply the second hash by a coefficient

While the scheme double hashing scheme is presented in
theoretical analysis as

    g(i) = a + i * b

In practice it's implemented in a loop which looks like

    g[0] = a
    for (i = 1; i < k; i++) {
        a += b;
        g[i] = a;
    }

I.e. with just an addition.

Secondly there is no analysis anywhere about the
'evalHashes' scheme.

Peter Dillinger's thesis (Adaptive Approximate State Storage)
discusses various fast hashing schemes (section 6.5),
mentioning why ordinary "double hashing" is weak scheme.

Issue 1: when second hash value is bad, e.g. not coprime with bloom filters size in bits,
we can get repetitions (worst case 0, or m/2).

Issue 2: in bloom filter scenario, whether we do a + i * b or h0 - i * b' (with b' = -b)
as we probe all indices (as set) doesn't matter, not sequentially (like in hash table).
So we lose one bit entropy.

Issue 3: the scheme is prone to partial overlap.
Two values with the same second hash value could overlap on many indices.

Then Dillinger discusses various schemes which solve this issue.

The Hashes scheme seems to avoid these cuprits.
This is probably because it uses most of the bits of the second hash, even in m = 2^n scenarios.
(normal double hashing and enhances double hashing don't use the high bits or original hash then).
TL;DR Hashes seems to work well in practice.

For the record: RocksDB uses an own scheme as well,
where first hash is used to pick a cache line, and second one to generate probes inside it.
https://github.com/facebook/rocksdb/blob/096fb9b67d19a9a180e7c906b4a0cdb2b2d0c1f6/util/bloom_impl.h

-}

-- | Evalute 'Hashes' family.
--
-- \[
-- g_i = h_0 + \left\lfloor h_1 / 2^i \right\rfloor
-- \]
--
evalHashes :: Hashes a -> Int -> Hash
evalHashes :: forall {k} (a :: k). Hashes a -> Int -> Word64
evalHashes (Hashes Word64
h1 Word64
h2) Int
i = Word64
h1 Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
+ (Word64
h2 Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
i)

-- | Create 'Hashes' structure.
--
-- It's simply hashes the value twice using seed 0 and 1.
hashes :: Hashable a => a -> Hashes a
hashes :: forall a. Hashable a => a -> Hashes a
hashes a
v = Word64 -> Word64 -> Hashes a
forall {k} (a :: k). Word64 -> Word64 -> Hashes a
Hashes (Word64 -> a -> Word64
forall a. Hashable a => Word64 -> a -> Word64
hashSalt64 Word64
0 a
v) (Word64 -> a -> Word64
forall a. Hashable a => Word64 -> a -> Word64
hashSalt64 Word64
1 a
v)
{-# INLINE hashes #-}