{-# LANGUAGE CPP                #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE MagicHash          #-}
{-# LANGUAGE TypeFamilies       #-}
{-# LANGUAGE UnboxedTuples      #-}
{-# OPTIONS_HADDOCK not-home #-}
{- HLINT ignore "Redundant lambda" -}

#ifndef __HLINT__
-- HLint would fail to find this file and emit a warning
#include <MachDeps.h>
#endif

-- |
--
-- This module is intended to be imported qualified, to avoid name clashes with
-- "Prelude" functions:
--
-- @
--   import Database.LSMTree.Internal.RawBytes (RawBytes (..))
--   import qualified Database.LSMTree.Internal.RawBytes as RB
-- @
module Database.LSMTree.Internal.RawBytes (
    -- See Note: [Export structure]
    -- * Raw bytes
    RawBytes (..)
    -- * Accessors
    -- ** Length information
  , size
    -- ** Extracting subvectors (slicing)
  , take
  , drop
  , topBits64
    -- * Construction
    -- | Use 'Semigroup' and 'Monoid' operations
    -- ** Restricting memory usage
  , copy
  , force
    -- * Conversions
  , fromVector
  , fromByteArray
    -- ** Lists
  , pack
  , unpack
    -- * @bytestring@ utils
  , fromByteString
  , unsafeFromByteString
  , toByteString
  , unsafePinnedToByteString
  , fromShortByteString
  , builder
  ) where

import           Control.DeepSeq (NFData)
import           Control.Exception (assert)
import           Data.BloomFilter.Hash (Hashable (..), hashByteArray)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Builder as BB
import           Data.ByteString.Short (ShortByteString (SBS))
import qualified Data.ByteString.Short as SBS
import           Data.Primitive.ByteArray (ByteArray (..), compareByteArrays)
import qualified Data.Primitive.ByteArray as BA
import qualified Data.Vector.Primitive as VP
import           Database.LSMTree.Internal.ByteString (byteArrayToByteString,
                     shortByteStringFromTo, tryGetByteArray,
                     unsafePinnedByteArrayToByteString)
import           Database.LSMTree.Internal.Vector
import           Prelude hiding (drop, take)

import           GHC.Exts
import           GHC.Stack
import           GHC.Word

{- Note: [Export structure]
   ~~~~~~~~~~~~~~~~~~~~~~~
   Since RawBytes are very similar to Primitive Vectors, the code is sectioned
   and structured much like the "Data.Vector.Primitive" module.
-}

{-------------------------------------------------------------------------------
  Raw bytes
-------------------------------------------------------------------------------}

-- | Raw bytes with no alignment constraint (i.e. byte aligned), and no
-- guarantee of pinned or unpinned memory (i.e. could be either).
newtype RawBytes = RawBytes (VP.Vector Word8)
  deriving newtype (Int -> RawBytes -> ShowS
[RawBytes] -> ShowS
RawBytes -> String
(Int -> RawBytes -> ShowS)
-> (RawBytes -> String) -> ([RawBytes] -> ShowS) -> Show RawBytes
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> RawBytes -> ShowS
showsPrec :: Int -> RawBytes -> ShowS
$cshow :: RawBytes -> String
show :: RawBytes -> String
$cshowList :: [RawBytes] -> ShowS
showList :: [RawBytes] -> ShowS
Show, RawBytes -> ()
(RawBytes -> ()) -> NFData RawBytes
forall a. (a -> ()) -> NFData a
$crnf :: RawBytes -> ()
rnf :: RawBytes -> ()
NFData)

instance Eq RawBytes where
  RawBytes
bs1 == :: RawBytes -> RawBytes -> Bool
== RawBytes
bs2 = RawBytes -> RawBytes -> Ordering
compareBytes RawBytes
bs1 RawBytes
bs2 Ordering -> Ordering -> Bool
forall a. Eq a => a -> a -> Bool
== Ordering
EQ

-- | Lexicographical 'Ord' instance.
instance Ord RawBytes where
  compare :: RawBytes -> RawBytes -> Ordering
compare = RawBytes -> RawBytes -> Ordering
compareBytes

-- | Based on @Ord 'ShortByteString'@.
compareBytes :: RawBytes -> RawBytes -> Ordering
compareBytes :: RawBytes -> RawBytes -> Ordering
compareBytes rb1 :: RawBytes
rb1@(RawBytes Vector Word8
vec1) rb2 :: RawBytes
rb2@(RawBytes Vector Word8
vec2) =
    let !len1 :: Int
len1 = RawBytes -> Int
size RawBytes
rb1
        !len2 :: Int
len2 = RawBytes -> Int
size RawBytes
rb2
        !len :: Int
len  = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
len1 Int
len2
     in case ByteArray -> Int -> ByteArray -> Int -> Int -> Ordering
compareByteArrays ByteArray
ba1 Int
off1 ByteArray
ba2 Int
off2 Int
len of
          Ordering
EQ | Int
len1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
len2 -> Ordering
LT
             | Int
len1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
len2 -> Ordering
GT
          Ordering
o  -> Ordering
o
  where
    VP.Vector Int
off1 Int
_size1 ByteArray
ba1 = Vector Word8
vec1
    VP.Vector Int
off2 Int
_size2 ByteArray
ba2 = Vector Word8
vec2

instance Hashable RawBytes where
  hashSalt64 :: Word64 -> RawBytes -> Word64
  hashSalt64 :: Word64 -> RawBytes -> Word64
hashSalt64 = Word64 -> RawBytes -> Word64
hash

hash :: Word64 -> RawBytes -> Word64
hash :: Word64 -> RawBytes -> Word64
hash Word64
salt (RawBytes (VP.Vector Int
off Int
len ByteArray
ba)) = ByteArray -> Int -> Int -> Word64 -> Word64
hashByteArray ByteArray
ba Int
off Int
len Word64
salt

instance IsList RawBytes where
  type Item RawBytes = Word8

  fromList :: [Item RawBytes] -> RawBytes
  fromList :: [Item RawBytes] -> RawBytes
fromList = [Word8] -> RawBytes
[Item RawBytes] -> RawBytes
pack

  toList :: RawBytes -> [Item RawBytes]
  toList :: RawBytes -> [Item RawBytes]
toList = RawBytes -> [Word8]
RawBytes -> [Item RawBytes]
unpack

-- | Mostly to make test cases shorter to write.
instance IsString RawBytes where
    fromString :: String -> RawBytes
fromString = [Word8] -> RawBytes
pack ([Word8] -> RawBytes) -> (String -> [Word8]) -> String -> RawBytes
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Word8) -> String -> [Word8]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> (Char -> Int) -> Char -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
forall a. Enum a => a -> Int
fromEnum)

{-------------------------------------------------------------------------------
  Accessors
-------------------------------------------------------------------------------}

-- | \( O(1) \)
size :: RawBytes -> Int
size :: RawBytes -> Int
size = (Vector Word8 -> Int) -> RawBytes -> Int
forall a b. Coercible a b => a -> b
coerce Vector Word8 -> Int
forall a. Prim a => Vector a -> Int
VP.length

-- | \( O(1) \)
take :: Int -> RawBytes -> RawBytes
take :: Int -> RawBytes -> RawBytes
take = (Int -> Vector Word8 -> Vector Word8)
-> Int -> RawBytes -> RawBytes
forall a b. Coercible a b => a -> b
coerce Int -> Vector Word8 -> Vector Word8
forall a. Prim a => Int -> Vector a -> Vector a
VP.take

-- | \( O(1) \)
drop :: Int -> RawBytes -> RawBytes
drop :: Int -> RawBytes -> RawBytes
drop = (Int -> Vector Word8 -> Vector Word8)
-> Int -> RawBytes -> RawBytes
forall a b. Coercible a b => a -> b
coerce Int -> Vector Word8 -> Vector Word8
forall a. Prim a => Int -> Vector a -> Vector a
VP.drop

-- | @'topBits64' rb@ slices the first @64@ bits from the /top/ of the raw bytes
-- @rb@. Returns the string of bits as a 'Word64'.
--
-- The /top/ corresponds to the most significant bit (big-endian).
--
-- PRECONDITION: The byte-size of the raw bytes should be at least 8 bytes.
--
-- TODO: optimisation ideas: use unsafe shift/byteswap primops, look at GHC
-- core, find other opportunities for using primops.
--
topBits64 :: RawBytes -> Word64
topBits64 :: RawBytes -> Word64
topBits64 rb :: RawBytes
rb@(RawBytes (VP.Vector (I# Int#
off#) Int
_size (ByteArray ByteArray#
k#))) =
    Bool -> Word64 -> Word64
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (RawBytes -> Int
size RawBytes
rb Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
8) (Word64 -> Word64) -> Word64 -> Word64
forall a b. (a -> b) -> a -> b
$ Word64# -> Word64
toWord64 (ByteArray# -> Int# -> Word64#
indexWord8ArrayAsWord64# ByteArray#
k# Int#
off#)

#if (MIN_VERSION_GLASGOW_HASKELL(9, 4, 0, 0))
toWord64 :: Word64# -> Word64
#else
toWord64 :: Word# -> Word64
#endif
#if WORDS_BIGENDIAN
toWord64 = W64#
#else
toWord64 :: Word64# -> Word64
toWord64 Word64#
x# = Word64 -> Word64
byteSwap64 (Word64# -> Word64
W64# Word64#
x#)
#endif

{-------------------------------------------------------------------------------
  Construction
-------------------------------------------------------------------------------}

instance Semigroup RawBytes where
    <> :: RawBytes -> RawBytes -> RawBytes
(<>) = (Vector Word8 -> Vector Word8 -> Vector Word8)
-> RawBytes -> RawBytes -> RawBytes
forall a b. Coercible a b => a -> b
coerce Vector Word8 -> Vector Word8 -> Vector Word8
forall a. Prim a => Vector a -> Vector a -> Vector a
(VP.++)

instance Monoid RawBytes where
    mempty :: RawBytes
mempty = Vector Word8 -> RawBytes
forall a b. Coercible a b => a -> b
coerce Vector Word8
forall a. Prim a => Vector a
VP.empty
    mconcat :: [RawBytes] -> RawBytes
mconcat = ([Vector Word8] -> Vector Word8) -> [RawBytes] -> RawBytes
forall a b. Coercible a b => a -> b
coerce [Vector Word8] -> Vector Word8
forall a. Prim a => [Vector a] -> Vector a
VP.concat

-- | O(n) Yield the argument, but force it not to retain any extra memory by
-- copying it.
--
-- Useful when dealing with slices. Also, see
-- "Database.LSMTree.Internal.Unsliced"
copy :: RawBytes -> RawBytes
copy :: RawBytes -> RawBytes
copy (RawBytes Vector Word8
pvec) = Vector Word8 -> RawBytes
RawBytes (Vector Word8 -> Vector Word8
forall a. Prim a => Vector a -> Vector a
VP.force Vector Word8
pvec)

-- | Force 'RawBytes' to not retain any extra memory. This may copy the contents.
force :: RawBytes -> ByteArray
force :: RawBytes -> ByteArray
force (RawBytes (VP.Vector Int
off Int
len ByteArray
ba))
    | Int
off Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
    , ByteArray -> Int
BA.sizeofByteArray ByteArray
ba Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
len
    = ByteArray
ba

    | Bool
otherwise
    = (forall s. ST s (MutableByteArray s)) -> ByteArray
BA.runByteArray ((forall s. ST s (MutableByteArray s)) -> ByteArray)
-> (forall s. ST s (MutableByteArray s)) -> ByteArray
forall a b. (a -> b) -> a -> b
$ do
        MutableByteArray s
mba <- Int -> ST s (MutableByteArray (PrimState (ST s)))
forall (m :: * -> *).
PrimMonad m =>
Int -> m (MutableByteArray (PrimState m))
BA.newByteArray Int
len
        MutableByteArray (PrimState (ST s))
-> Int -> ByteArray -> Int -> Int -> ST s ()
forall (m :: * -> *).
PrimMonad m =>
MutableByteArray (PrimState m)
-> Int -> ByteArray -> Int -> Int -> m ()
BA.copyByteArray MutableByteArray s
MutableByteArray (PrimState (ST s))
mba Int
0 ByteArray
ba Int
off Int
len
        MutableByteArray s -> ST s (MutableByteArray s)
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return MutableByteArray s
mba

{-------------------------------------------------------------------------------
  Conversions
-------------------------------------------------------------------------------}

-- | \( O(1) \)
fromVector :: VP.Vector Word8 -> RawBytes
fromVector :: Vector Word8 -> RawBytes
fromVector Vector Word8
v = Vector Word8 -> RawBytes
RawBytes Vector Word8
v

-- | \( O(1) \)
fromByteArray :: Int -> Int -> ByteArray -> RawBytes
fromByteArray :: Int -> Int -> ByteArray -> RawBytes
fromByteArray Int
off Int
len ByteArray
ba = Vector Word8 -> RawBytes
RawBytes (Int -> Int -> ByteArray -> Vector Word8
forall a. Prim a => Int -> Int -> ByteArray -> Vector a
mkPrimVector Int
off Int
len ByteArray
ba)

pack :: [Word8] -> RawBytes
pack :: [Word8] -> RawBytes
pack = ([Word8] -> Vector Word8) -> [Word8] -> RawBytes
forall a b. Coercible a b => a -> b
coerce [Word8] -> Vector Word8
forall a. Prim a => [a] -> Vector a
VP.fromList

unpack :: RawBytes -> [Word8]
unpack :: RawBytes -> [Word8]
unpack = (Vector Word8 -> [Word8]) -> RawBytes -> [Word8]
forall a b. Coercible a b => a -> b
coerce Vector Word8 -> [Word8]
forall a. Prim a => Vector a -> [a]
VP.toList

{-------------------------------------------------------------------------------
  @bytestring@ utils
-------------------------------------------------------------------------------}

-- | \( O(n) \) conversion from a strict bytestring to raw bytes.
fromByteString :: BS.ByteString -> RawBytes
fromByteString :: ByteString -> RawBytes
fromByteString = ShortByteString -> RawBytes
fromShortByteString (ShortByteString -> RawBytes)
-> (ByteString -> ShortByteString) -> ByteString -> RawBytes
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ShortByteString
SBS.toShort

-- | \( O(1) \) conversion from a strict bytestring to raw bytes.
--
-- Strict bytestrings are allocated using 'mallocPlainForeignPtrBytes', so we
-- are expecting a 'PlainPtr' (or 'FinalPtr' with length 0).
-- For other variants, this function will fail.
unsafeFromByteString :: HasCallStack => BS.ByteString -> RawBytes
unsafeFromByteString :: (?callStack::CallStack) => ByteString -> RawBytes
unsafeFromByteString ByteString
bs =
    case ByteString -> Either String (ByteArray, Int)
tryGetByteArray ByteString
bs of
      Right (ByteArray
ba, Int
n) -> Vector Word8 -> RawBytes
RawBytes (Int -> Int -> ByteArray -> Vector Word8
forall a. Prim a => Int -> Int -> ByteArray -> Vector a
mkPrimVector Int
0 Int
n ByteArray
ba)
      Left String
err      -> String -> RawBytes
forall a. (?callStack::CallStack) => String -> a
error (String -> RawBytes) -> String -> RawBytes
forall a b. (a -> b) -> a -> b
$ String
"unsafeFromByteString: " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
err

-- | \( O(1) \) conversion from raw bytes to a bytestring if pinned,
-- \( O(n) \) if unpinned.
toByteString :: RawBytes -> BS.ByteString
toByteString :: RawBytes -> ByteString
toByteString (RawBytes (VP.Vector Int
off Int
len ByteArray
ba)) =
    Int -> Int -> ByteArray -> ByteString
byteArrayToByteString Int
off Int
len ByteArray
ba

-- | \( O(1) \) conversion from raw bytes to a bytestring.
-- Fails if the underlying byte array is not pinned.
unsafePinnedToByteString :: HasCallStack => RawBytes -> BS.ByteString
unsafePinnedToByteString :: (?callStack::CallStack) => RawBytes -> ByteString
unsafePinnedToByteString (RawBytes (VP.Vector Int
off Int
len ByteArray
ba)) =
    (?callStack::CallStack) => Int -> Int -> ByteArray -> ByteString
Int -> Int -> ByteArray -> ByteString
unsafePinnedByteArrayToByteString Int
off Int
len ByteArray
ba

-- | \( O(1) \) conversion from a short bytestring to raw bytes.
fromShortByteString :: ShortByteString -> RawBytes
fromShortByteString :: ShortByteString -> RawBytes
fromShortByteString sbs :: ShortByteString
sbs@(SBS ByteArray#
ba#) =
    Vector Word8 -> RawBytes
RawBytes (Int -> Int -> ByteArray -> Vector Word8
forall a. Prim a => Int -> Int -> ByteArray -> Vector a
mkPrimVector Int
0 (ShortByteString -> Int
SBS.length ShortByteString
sbs) (ByteArray# -> ByteArray
ByteArray ByteArray#
ba#))

{-# INLINE builder #-}
builder :: RawBytes -> BB.Builder
builder :: RawBytes -> Builder
builder (RawBytes (VP.Vector Int
off Int
sz (ByteArray ByteArray#
ba#))) =
    Int -> Int -> ShortByteString -> Builder
shortByteStringFromTo Int
off (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
sz) (ByteArray# -> ShortByteString
SBS ByteArray#
ba#)