{-# LANGUAGE MagicHash     #-}
{-# LANGUAGE UnboxedTuples #-}
-- |
--
-- Fast hashing of Haskell values.
-- The hash used is XXH3 64bit.
--
module Data.BloomFilter.Hash (
    -- * Basic hash functionality
    Hash,
    Hashable(..),
    hash64,
    hashByteArray,
    -- * Incremental hashing
    Incremental (..),
    HashState,
    incrementalHash,
) where

import           Control.Monad (forM_)
import           Control.Monad.ST (ST, runST)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as LBS
import           Data.Char (ord)
import qualified Data.Primitive.ByteArray as P
import           Data.Word (Word32, Word64)
import qualified XXH3

-- | A hash value is 64 bits wide.
type Hash = Word64

-------------------------------------------------------------------------------
-- One shot hashing
-------------------------------------------------------------------------------

-- | The class of types that can be converted to a hash value.
--
-- The instances are meant to be stable, the hash values can be persisted.
--
class Hashable a where
    -- | Compute a 64-bit hash of a value.
    hashSalt64 ::
           Word64  -- ^ seed
        -> a       -- ^ value to hash
        -> Word64

-- | Compute a 64-bit hash.
hash64 :: Hashable a => a -> Word64
hash64 :: forall a. Hashable a => a -> Word64
hash64 = Word64 -> a -> Word64
forall a. Hashable a => Word64 -> a -> Word64
hashSalt64 Word64
0

instance Hashable () where
    hashSalt64 :: Word64 -> () -> Word64
hashSalt64 Word64
salt ()
_ = Word64
salt

instance Hashable Char where
    -- Char's ordinal value should fit into Word32
    hashSalt64 :: Word64 -> Char -> Word64
hashSalt64 Word64
salt Char
c = Word64 -> Word32 -> Word64
forall a. Hashable a => Word64 -> a -> Word64
hashSalt64 Word64
salt (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c) :: Word32)

instance Hashable BS.ByteString where
    hashSalt64 :: Word64 -> ByteString -> Word64
hashSalt64 Word64
salt ByteString
bs = ByteString -> Word64 -> Word64
XXH3.xxh3_64bit_withSeed_bs ByteString
bs Word64
salt

instance Hashable LBS.ByteString where
    hashSalt64 :: Word64 -> ByteString -> Word64
hashSalt64 Word64
salt ByteString
lbs =
        Word64 -> (forall s. HashState s -> ST s ()) -> Word64
incrementalHash Word64
salt ((forall s. HashState s -> ST s ()) -> Word64)
-> (forall s. HashState s -> ST s ()) -> Word64
forall a b. (a -> b) -> a -> b
$ \HashState s
s ->
        [ByteString] -> (ByteString -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ (ByteString -> [ByteString]
LBS.toChunks ByteString
lbs) ((ByteString -> ST s ()) -> ST s ())
-> (ByteString -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \ByteString
bs ->
        HashState s -> ByteString -> ST s ()
forall a s. Incremental a => HashState s -> a -> ST s ()
forall s. HashState s -> ByteString -> ST s ()
update HashState s
s ByteString
bs

instance Hashable P.ByteArray where
    hashSalt64 :: Word64 -> ByteArray -> Word64
hashSalt64 Word64
salt ByteArray
ba = ByteArray -> Int -> Int -> Word64 -> Word64
XXH3.xxh3_64bit_withSeed_ba ByteArray
ba Int
0 (ByteArray -> Int
P.sizeofByteArray ByteArray
ba) Word64
salt

instance Hashable Word64 where
    hashSalt64 :: Word64 -> Word64 -> Word64
hashSalt64 Word64
salt Word64
w = Word64 -> Word64 -> Word64
XXH3.xxh3_64bit_withSeed_w64 Word64
w Word64
salt

instance Hashable Word32 where
    hashSalt64 :: Word64 -> Word32 -> Word64
hashSalt64 Word64
salt Word32
w = Word32 -> Word64 -> Word64
XXH3.xxh3_64bit_withSeed_w32 Word32
w Word64
salt

instance Hashable Word where
    hashSalt64 :: Word64 -> Word -> Word64
hashSalt64 Word64
salt Word
n = Word64 -> Word64 -> Word64
forall a. Hashable a => Word64 -> a -> Word64
hashSalt64 Word64
salt (Word -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
n :: Word64)
    --32bit support would need some CPP here to select based on word size

instance Hashable Int where
    hashSalt64 :: Word64 -> Int -> Word64
hashSalt64 Word64
salt Int
n = Word64 -> Word -> Word64
forall a. Hashable a => Word64 -> a -> Word64
hashSalt64 Word64
salt (Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n :: Word)

{- Note [Tree hashing]

We recursively hash inductive types (instead e.g. just serially hashing
their fields). Why?

So ("", "x") and ("x", "") or [[],[],[""]], [[],[""],[]] and [[""],[],[]]
have different hash values!

Another approach would be to have injective serialisation,
but then 'Incremental BS.ByteString' instance (e.g.) would need to serialise
the length, so we'd need third class for "pieces", keeping 'Incremental'
just adding bytes to the state (without any extras).

-}

instance Hashable a => Hashable [a] where
    hashSalt64 :: Word64 -> [a] -> Word64
hashSalt64 Word64
salt [a]
xs = Word64 -> (forall s. HashState s -> ST s ()) -> Word64
incrementalHash Word64
salt ((forall s. HashState s -> ST s ()) -> Word64)
-> (forall s. HashState s -> ST s ()) -> Word64
forall a b. (a -> b) -> a -> b
$ \HashState s
s -> [a] -> (a -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [a]
xs ((a -> ST s ()) -> ST s ()) -> (a -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \a
x ->
        HashState s -> Word64 -> ST s ()
forall a s. Incremental a => HashState s -> a -> ST s ()
forall s. HashState s -> Word64 -> ST s ()
update HashState s
s (a -> Word64
forall a. Hashable a => a -> Word64
hash64 a
x)

instance (Hashable a, Hashable b) => Hashable (a, b) where
    hashSalt64 :: Word64 -> (a, b) -> Word64
hashSalt64 Word64
salt (a
x, b
y) = Word64 -> (forall s. HashState s -> ST s ()) -> Word64
incrementalHash Word64
salt ((forall s. HashState s -> ST s ()) -> Word64)
-> (forall s. HashState s -> ST s ()) -> Word64
forall a b. (a -> b) -> a -> b
$ \HashState s
s -> do
        HashState s -> Word64 -> ST s ()
forall a s. Incremental a => HashState s -> a -> ST s ()
forall s. HashState s -> Word64 -> ST s ()
update HashState s
s (a -> Word64
forall a. Hashable a => a -> Word64
hash64 a
x)
        HashState s -> Word64 -> ST s ()
forall a s. Incremental a => HashState s -> a -> ST s ()
forall s. HashState s -> Word64 -> ST s ()
update HashState s
s (b -> Word64
forall a. Hashable a => a -> Word64
hash64 b
y)

-- | Hash a (part of) 'P.ByteArray'.
hashByteArray :: P.ByteArray -> Int -> Int -> Word64 -> Word64
hashByteArray :: ByteArray -> Int -> Int -> Word64 -> Word64
hashByteArray = ByteArray -> Int -> Int -> Word64 -> Word64
XXH3.xxh3_64bit_withSeed_ba

-------------------------------------------------------------------------------
-- Incremental hashing
-------------------------------------------------------------------------------

-- | Hash state for incremental hashing
newtype HashState s = HashState (XXH3.XXH3_State s)

-- | The class of types that can be incrementally hashed.
class Incremental a where
    update :: HashState s -> a -> ST s ()

instance Incremental BS.ByteString where
    update :: forall s. HashState s -> ByteString -> ST s ()
update (HashState XXH3_State s
s) = XXH3_State s -> ByteString -> ST s ()
forall s. XXH3_State s -> ByteString -> ST s ()
XXH3.xxh3_64bit_update_bs XXH3_State s
s

instance Incremental Word32 where
    update :: forall s. HashState s -> Word32 -> ST s ()
update (HashState XXH3_State s
s) = XXH3_State s -> Word32 -> ST s ()
forall s. XXH3_State s -> Word32 -> ST s ()
XXH3.xxh3_64bit_update_w32 XXH3_State s
s

instance Incremental Word64 where
    update :: forall s. HashState s -> Word64 -> ST s ()
update (HashState XXH3_State s
s) = XXH3_State s -> Word64 -> ST s ()
forall s. XXH3_State s -> Word64 -> ST s ()
XXH3.xxh3_64bit_update_w64 XXH3_State s
s

instance Incremental Char where
    update :: forall s. HashState s -> Char -> ST s ()
update HashState s
s Char
c = HashState s -> Word32 -> ST s ()
forall a s. Incremental a => HashState s -> a -> ST s ()
forall s. HashState s -> Word32 -> ST s ()
update HashState s
s (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c) :: Word32)

-- | Calculate incrementally constructed hash.
incrementalHash :: Word64 -> (forall s. HashState s -> ST s ()) -> Word64
incrementalHash :: Word64 -> (forall s. HashState s -> ST s ()) -> Word64
incrementalHash Word64
seed forall s. HashState s -> ST s ()
f = (forall s. ST s Word64) -> Word64
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s Word64) -> Word64)
-> (forall s. ST s Word64) -> Word64
forall a b. (a -> b) -> a -> b
$ do
    XXH3_State s
s <- ST s (XXH3_State s)
forall s. ST s (XXH3_State s)
XXH3.xxh3_64bit_createState
    XXH3_State s -> Word64 -> ST s ()
forall s. XXH3_State s -> Word64 -> ST s ()
XXH3.xxh3_64bit_reset_withSeed XXH3_State s
s Word64
seed
    HashState s -> ST s ()
forall s. HashState s -> ST s ()
f (XXH3_State s -> HashState s
forall s. XXH3_State s -> HashState s
HashState XXH3_State s
s)
    XXH3_State s -> ST s Word64
forall s. XXH3_State s -> ST s Word64
XXH3.xxh3_64bit_digest XXH3_State s
s