{-# LANGUAGE MagicHash     #-}
{-# LANGUAGE UnboxedTuples #-}
-- |
--
-- Fast hashing of Haskell values.
-- The hash used is XXH3 64bit.
--
module Data.BloomFilter.Hash (
    -- * Basic hash functionality
    Hash,
    Salt,
    Hashable(..),
    hash64,
    hashByteArray,
    -- * Incremental hashing
    Incremental (..),
    HashState,
    incrementalHash,
) where

import           Control.Monad (forM_)
import           Control.Monad.ST (ST, runST)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as LBS
import           Data.Char (ord)
import qualified Data.Primitive.ByteArray as P
import           Data.Word (Word32, Word64)
import qualified XXH3

-- | A hash value is 64 bits wide.
type Hash = Word64

-- | The salt value to be used for hashes.
type Salt = Word64

-------------------------------------------------------------------------------
-- One shot hashing
-------------------------------------------------------------------------------

-- | The class of types that can be converted to a hash value.
--
-- The instances are meant to be stable, the hash values can be persisted.
--
class Hashable a where
    -- | Compute a 64-bit hash of a value.
    hashSalt64 ::
           Salt  -- ^ seed
        -> a     -- ^ value to hash
        -> Hash

-- | Compute a 64-bit hash.
hash64 :: Hashable a => a -> Hash
hash64 :: forall a. Hashable a => a -> Hash
hash64 = Hash -> a -> Hash
forall a. Hashable a => Hash -> a -> Hash
hashSalt64 Hash
0

instance Hashable () where
    hashSalt64 :: Hash -> () -> Hash
hashSalt64 Hash
salt ()
_ = Hash
salt

instance Hashable Char where
    -- Char's ordinal value should fit into Word32
    hashSalt64 :: Hash -> Char -> Hash
hashSalt64 Hash
salt Char
c = Hash -> Word32 -> Hash
forall a. Hashable a => Hash -> a -> Hash
hashSalt64 Hash
salt (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c) :: Word32)

instance Hashable BS.ByteString where
    hashSalt64 :: Hash -> ByteString -> Hash
hashSalt64 Hash
salt ByteString
bs = ByteString -> Hash -> Hash
XXH3.xxh3_64bit_withSeed_bs ByteString
bs Hash
salt

instance Hashable LBS.ByteString where
    hashSalt64 :: Hash -> ByteString -> Hash
hashSalt64 Hash
salt ByteString
lbs =
        Hash -> (forall s. HashState s -> ST s ()) -> Hash
incrementalHash Hash
salt ((forall s. HashState s -> ST s ()) -> Hash)
-> (forall s. HashState s -> ST s ()) -> Hash
forall a b. (a -> b) -> a -> b
$ \HashState s
s ->
        [ByteString] -> (ByteString -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ (ByteString -> [ByteString]
LBS.toChunks ByteString
lbs) ((ByteString -> ST s ()) -> ST s ())
-> (ByteString -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \ByteString
bs ->
        HashState s -> ByteString -> ST s ()
forall a s. Incremental a => HashState s -> a -> ST s ()
forall s. HashState s -> ByteString -> ST s ()
update HashState s
s ByteString
bs

instance Hashable P.ByteArray where
    hashSalt64 :: Hash -> ByteArray -> Hash
hashSalt64 Hash
salt ByteArray
ba = ByteArray -> Int -> Int -> Hash -> Hash
XXH3.xxh3_64bit_withSeed_ba ByteArray
ba Int
0 (ByteArray -> Int
P.sizeofByteArray ByteArray
ba) Hash
salt

instance Hashable Word64 where
    hashSalt64 :: Hash -> Hash -> Hash
hashSalt64 Hash
salt Hash
w = Hash -> Hash -> Hash
XXH3.xxh3_64bit_withSeed_w64 Hash
w Hash
salt

instance Hashable Word32 where
    hashSalt64 :: Hash -> Word32 -> Hash
hashSalt64 Hash
salt Word32
w = Word32 -> Hash -> Hash
XXH3.xxh3_64bit_withSeed_w32 Word32
w Hash
salt

instance Hashable Word where
    hashSalt64 :: Hash -> Word -> Hash
hashSalt64 Hash
salt Word
n = Hash -> Hash -> Hash
forall a. Hashable a => Hash -> a -> Hash
hashSalt64 Hash
salt (Word -> Hash
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
n :: Word64)
    --32bit support would need some CPP here to select based on word size

instance Hashable Int where
    hashSalt64 :: Hash -> Int -> Hash
hashSalt64 Hash
salt Int
n = Hash -> Word -> Hash
forall a. Hashable a => Hash -> a -> Hash
hashSalt64 Hash
salt (Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n :: Word)

{- Note [Tree hashing]

We recursively hash inductive types (instead e.g. just serially hashing
their fields). Why?

So ("", "x") and ("x", "") or [[],[],[""]], [[],[""],[]] and [[""],[],[]]
have different hash values!

Another approach would be to have injective serialisation,
but then 'Incremental BS.ByteString' instance (e.g.) would need to serialise
the length, so we'd need third class for "pieces", keeping 'Incremental'
just adding bytes to the state (without any extras).

-}

instance Hashable a => Hashable [a] where
    hashSalt64 :: Hash -> [a] -> Hash
hashSalt64 Hash
salt [a]
xs = Hash -> (forall s. HashState s -> ST s ()) -> Hash
incrementalHash Hash
salt ((forall s. HashState s -> ST s ()) -> Hash)
-> (forall s. HashState s -> ST s ()) -> Hash
forall a b. (a -> b) -> a -> b
$ \HashState s
s -> [a] -> (a -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [a]
xs ((a -> ST s ()) -> ST s ()) -> (a -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \a
x ->
        HashState s -> Hash -> ST s ()
forall a s. Incremental a => HashState s -> a -> ST s ()
forall s. HashState s -> Hash -> ST s ()
update HashState s
s (a -> Hash
forall a. Hashable a => a -> Hash
hash64 a
x)

instance (Hashable a, Hashable b) => Hashable (a, b) where
    hashSalt64 :: Hash -> (a, b) -> Hash
hashSalt64 Hash
salt (a
x, b
y) = Hash -> (forall s. HashState s -> ST s ()) -> Hash
incrementalHash Hash
salt ((forall s. HashState s -> ST s ()) -> Hash)
-> (forall s. HashState s -> ST s ()) -> Hash
forall a b. (a -> b) -> a -> b
$ \HashState s
s -> do
        HashState s -> Hash -> ST s ()
forall a s. Incremental a => HashState s -> a -> ST s ()
forall s. HashState s -> Hash -> ST s ()
update HashState s
s (a -> Hash
forall a. Hashable a => a -> Hash
hash64 a
x)
        HashState s -> Hash -> ST s ()
forall a s. Incremental a => HashState s -> a -> ST s ()
forall s. HashState s -> Hash -> ST s ()
update HashState s
s (b -> Hash
forall a. Hashable a => a -> Hash
hash64 b
y)

-- | Hash a (part of) 'P.ByteArray'.
hashByteArray :: P.ByteArray -> Int -> Int -> Salt -> Hash
hashByteArray :: ByteArray -> Int -> Int -> Hash -> Hash
hashByteArray = ByteArray -> Int -> Int -> Hash -> Hash
XXH3.xxh3_64bit_withSeed_ba

-------------------------------------------------------------------------------
-- Incremental hashing
-------------------------------------------------------------------------------

-- | Hash state for incremental hashing
newtype HashState s = HashState (XXH3.XXH3_State s)

-- | The class of types that can be incrementally hashed.
class Incremental a where
    update :: HashState s -> a -> ST s ()

instance Incremental BS.ByteString where
    update :: forall s. HashState s -> ByteString -> ST s ()
update (HashState XXH3_State s
s) = XXH3_State s -> ByteString -> ST s ()
forall s. XXH3_State s -> ByteString -> ST s ()
XXH3.xxh3_64bit_update_bs XXH3_State s
s

instance Incremental Word32 where
    update :: forall s. HashState s -> Word32 -> ST s ()
update (HashState XXH3_State s
s) = XXH3_State s -> Word32 -> ST s ()
forall s. XXH3_State s -> Word32 -> ST s ()
XXH3.xxh3_64bit_update_w32 XXH3_State s
s

instance Incremental Word64 where
    update :: forall s. HashState s -> Hash -> ST s ()
update (HashState XXH3_State s
s) = XXH3_State s -> Hash -> ST s ()
forall s. XXH3_State s -> Hash -> ST s ()
XXH3.xxh3_64bit_update_w64 XXH3_State s
s

instance Incremental Char where
    update :: forall s. HashState s -> Char -> ST s ()
update HashState s
s Char
c = HashState s -> Word32 -> ST s ()
forall a s. Incremental a => HashState s -> a -> ST s ()
forall s. HashState s -> Word32 -> ST s ()
update HashState s
s (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c) :: Word32)

-- | Calculate incrementally constructed hash.
incrementalHash :: Salt -> (forall s. HashState s -> ST s ()) -> Hash
incrementalHash :: Hash -> (forall s. HashState s -> ST s ()) -> Hash
incrementalHash Hash
seed forall s. HashState s -> ST s ()
f = (forall s. ST s Hash) -> Hash
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s Hash) -> Hash) -> (forall s. ST s Hash) -> Hash
forall a b. (a -> b) -> a -> b
$ do
    XXH3_State s
s <- ST s (XXH3_State s)
forall s. ST s (XXH3_State s)
XXH3.xxh3_64bit_createState
    XXH3_State s -> Hash -> ST s ()
forall s. XXH3_State s -> Hash -> ST s ()
XXH3.xxh3_64bit_reset_withSeed XXH3_State s
s Hash
seed
    HashState s -> ST s ()
forall s. HashState s -> ST s ()
f (XXH3_State s -> HashState s
forall s. XXH3_State s -> HashState s
HashState XXH3_State s
s)
    XXH3_State s -> ST s Hash
forall s. XXH3_State s -> ST s Hash
XXH3.xxh3_64bit_digest XXH3_State s
s