-- |
-- A fast, space efficient Bloom filter implementation.  A Bloom
-- filter is a set-like data structure that provides a probabilistic
-- membership test.
--
-- * Queries do not give false negatives.  When an element is added to
--   a filter, a subsequent membership test will definitely return
--   'True'.
--
-- * False positives /are/ possible.  If an element has not been added
--   to a filter, a membership test /may/ nevertheless indicate that
--   the element is present.
--
-- This module provides low-level control.  For an easier to use
-- interface, see the "Data.BloomFilter.Easy" module.

module Data.BloomFilter.Mutable (
    -- * Overview
    -- $overview

    -- ** Ease of use
    -- $ease

    -- ** Performance
    -- $performance

    -- * Types
    Hash,
    MBloom,
    MBloom',
    CheapHashes,
    RealHashes,
    -- * Mutable Bloom filters

    -- ** Creation
    new,

    -- ** Accessors
    length,
    elem,

    -- ** Mutation
    insert,
) where

import           Control.Monad (liftM)
import           Control.Monad.ST (ST)
import           Data.BloomFilter.Hash (CheapHashes, Hash, Hashable,
                     Hashes (..), RealHashes)
import           Data.BloomFilter.Mutable.Internal
import           Data.Word (Word64)

import qualified Data.BloomFilter.BitVec64 as V

import           Prelude hiding (elem, length)

-- | Mutable Bloom filter using CheapHashes hashing scheme.
type MBloom s = MBloom' s CheapHashes

-- | Create a new mutable Bloom filter.
--
-- The size is ceiled at $2^48$. Tell us if you need bigger bloom filters.
--
new :: Int                    -- ^ number of hash functions to use
    -> Word64                 -- ^ number of bits in filter
    -> ST s (MBloom' s h a)
new :: forall s (h :: * -> *) a. Int -> Word64 -> ST s (MBloom' s h a)
new Int
hash Word64
numBits = Int -> Word64 -> MBitVec64 s -> MBloom' s h a
forall s (h :: * -> *) a.
Int -> Word64 -> MBitVec64 s -> MBloom' s h a
MBloom Int
hash Word64
numBits' (MBitVec64 s -> MBloom' s h a)
-> ST s (MBitVec64 s) -> ST s (MBloom' s h a)
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` Word64 -> ST s (MBitVec64 s)
forall s. Word64 -> ST s (MBitVec64 s)
V.new Word64
numBits'
  where numBits' :: Word64
numBits' | Word64
numBits Word64 -> Word64 -> Bool
forall a. Eq a => a -> a -> Bool
== Word64
0                = Word64
1
                 | Word64
numBits Word64 -> Word64 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word64
0xffff_ffff_ffff = Word64
0x1_0000_0000_0000
                 | Bool
otherwise                   = Word64
numBits

-- | Insert a value into a mutable Bloom filter.  Afterwards, a
-- membership query for the same value is guaranteed to return @True@.
insert :: (Hashes h, Hashable a) => MBloom' s h a -> a -> ST s ()
insert :: forall (h :: * -> *) a s.
(Hashes h, Hashable a) =>
MBloom' s h a -> a -> ST s ()
insert !MBloom' s h a
mb !a
x = MBloom' s h a -> h a -> ST s ()
forall (h :: * -> *) s a.
Hashes h =>
MBloom' s h a -> h a -> ST s ()
insertHashes MBloom' s h a
mb (a -> h a
forall a. Hashable a => a -> h a
forall (h :: * -> *) a. (Hashes h, Hashable a) => a -> h a
makeHashes a
x)

insertHashes :: Hashes h => MBloom' s h a -> h a -> ST s ()
insertHashes :: forall (h :: * -> *) s a.
Hashes h =>
MBloom' s h a -> h a -> ST s ()
insertHashes (MBloom Int
k Word64
m MBitVec64 s
v) !h a
h = Int -> ST s ()
go Int
0
  where
    go :: Int -> ST s ()
go !Int
i | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
k = () -> ST s ()
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
          | Bool
otherwise = let !idx :: Word64
idx = h a -> Int -> Word64
forall a. h a -> Int -> Word64
forall (h :: * -> *) a. Hashes h => h a -> Int -> Word64
evalHashes h a
h Int
i Word64 -> Word64 -> Word64
forall a. Integral a => a -> a -> a
`rem` Word64
m
                        in MBitVec64 s -> Word64 -> Bool -> ST s ()
forall s. MBitVec64 s -> Word64 -> Bool -> ST s ()
V.unsafeWrite MBitVec64 s
v Word64
idx Bool
True ST s () -> ST s () -> ST s ()
forall a b. ST s a -> ST s b -> ST s b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int -> ST s ()
go (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)

-- | Query a mutable Bloom filter for membership.  If the value is
-- present, return @True@.  If the value is not present, there is
-- /still/ some possibility that @True@ will be returned.
elem :: (Hashes h, Hashable a) => a -> MBloom' s h a -> ST s Bool
elem :: forall (h :: * -> *) a s.
(Hashes h, Hashable a) =>
a -> MBloom' s h a -> ST s Bool
elem a
elt MBloom' s h a
mb = h a -> MBloom' s h a -> ST s Bool
forall (h :: * -> *) s a.
Hashes h =>
h a -> MBloom' s h a -> ST s Bool
elemHashes (a -> h a
forall a. Hashable a => a -> h a
forall (h :: * -> *) a. (Hashes h, Hashable a) => a -> h a
makeHashes a
elt) MBloom' s h a
mb

elemHashes :: forall h s a. Hashes h => h a -> MBloom' s h a -> ST s Bool
elemHashes :: forall (h :: * -> *) s a.
Hashes h =>
h a -> MBloom' s h a -> ST s Bool
elemHashes !h a
ch (MBloom Int
k Word64
m MBitVec64 s
v) = Int -> ST s Bool
go Int
0 where
    go :: Int -> ST s Bool
    go :: Int -> ST s Bool
go !Int
i | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
k    = Bool -> ST s Bool
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return Bool
True
          | Bool
otherwise = do let !idx' :: Word64
idx' = h a -> Int -> Word64
forall a. h a -> Int -> Word64
forall (h :: * -> *) a. Hashes h => h a -> Int -> Word64
evalHashes h a
ch Int
i
                           let !idx :: Word64
idx = Word64
idx' Word64 -> Word64 -> Word64
forall a. Integral a => a -> a -> a
`rem` Word64
m
                           Bool
b <- MBitVec64 s -> Word64 -> ST s Bool
forall s. MBitVec64 s -> Word64 -> ST s Bool
V.unsafeRead MBitVec64 s
v Word64
idx
                           if Bool
b
                           then Int -> ST s Bool
go (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
                           else Bool -> ST s Bool
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return Bool
False

-- | Return the size of a mutable Bloom filter, in bits.
length :: MBloom' s h a -> Word64
length :: forall s (h :: * -> *) a. MBloom' s h a -> Word64
length = MBloom' s h a -> Word64
forall s (h :: * -> *) a. MBloom' s h a -> Word64
size

-- $overview
--
-- Each of the functions for creating Bloom filters accepts two parameters:
--
-- * The number of bits that should be used for the filter.  Note that
--   a filter is fixed in size; it cannot be resized after creation.
--
-- * A number of hash functions, /k/, to be used for the filter.
--
-- By choosing these parameters with care, it is possible to tune for
-- a particular false positive rate.
-- The 'Data.BloomFilter.Easy.suggestSizing' function in
-- the "Data.BloomFilter.Easy" module calculates useful estimates for
-- these parameters.

-- $ease
--
-- This module provides both mutable interfaces for creating and
-- querying a Bloom filter.  It is most useful as a low-level way to
-- manage a Bloom filter with a custom set of characteristics.

-- $performance
--
-- The implementation has been carefully tuned for high performance
-- and low space consumption.