{-# LANGUAGE ScopedTypeVariables #-}
module Data.BloomFilter.Easy
(
Bloom
, easyList
, B.elem
, B.notElem
, B.length
, safeSuggestSizing
, suggestSizing
) where
import Data.BloomFilter (Bloom)
import Data.BloomFilter.Hash (Hashable, cheapHashes)
import Data.BloomFilter.Util (nextPowerOfTwo)
import qualified Data.ByteString as SB
import qualified Data.ByteString.Lazy as LB
import qualified Data.BloomFilter as B
easyList :: (Hashable a)
=> Double
-> [a]
-> Bloom a
{-# SPECIALIZE easyList :: Double -> [String] -> Bloom String #-}
{-# SPECIALIZE easyList :: Double -> [LB.ByteString] -> Bloom LB.ByteString #-}
{-# SPECIALIZE easyList :: Double -> [SB.ByteString] -> Bloom SB.ByteString #-}
easyList :: forall a. Hashable a => Double -> [a] -> Bloom a
easyList Double
errRate [a]
xs = (a -> [Hash]) -> Int -> [a] -> Bloom a
forall a. (a -> [Hash]) -> Int -> [a] -> Bloom a
B.fromList (Int -> a -> [Hash]
forall a. Hashable a => Int -> a -> [Hash]
cheapHashes Int
numHashes) Int
numBits [a]
xs
where capacity :: Int
capacity = [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xs
(Int
numBits, Int
numHashes)
| Int
capacity Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0 = Int -> Double -> (Int, Int)
suggestSizing Int
capacity Double
errRate
| Bool
otherwise = (Int
1, Int
1)
safeSuggestSizing
:: Int
-> Double
-> Either String (Int, Int)
safeSuggestSizing :: Int -> Double -> Either String (Int, Int)
safeSuggestSizing Int
capacity Double
errRate
| Int
capacity Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0 = String -> Either String (Int, Int)
forall a b. a -> Either a b
Left String
"invalid capacity"
| Double
errRate Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
<= Double
0 Bool -> Bool -> Bool
|| Double
errRate Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
>= Double
1 = String -> Either String (Int, Int)
forall a b. a -> Either a b
Left String
"invalid error rate"
| Bool
otherwise =
let cap :: Double
cap = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
capacity
(Double
bits :: Double, Double
hashes :: Double) =
[(Double, Double)] -> (Double, Double)
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum [((-Double
k) Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
cap Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double -> Double
forall a. Floating a => a -> a
log (Double
1 Double -> Double -> Double
forall a. Num a => a -> a -> a
- (Double
errRate Double -> Double -> Double
forall a. Floating a => a -> a -> a
** (Double
1 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
k))), Double
k)
| Double
k <- [Double
1..Double
100]]
roundedBits :: Int
roundedBits = Int -> Int
nextPowerOfTwo (Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
ceiling Double
bits)
in if Int
roundedBits Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0
then String -> Either String (Int, Int)
forall a b. a -> Either a b
Left String
"capacity too large to represent"
else (Int, Int) -> Either String (Int, Int)
forall a b. b -> Either a b
Right (Int
roundedBits, Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
truncate Double
hashes)
suggestSizing :: Int
-> Double
-> (Int, Int)
suggestSizing :: Int -> Double -> (Int, Int)
suggestSizing Int
cap Double
errs = (String -> (Int, Int))
-> ((Int, Int) -> (Int, Int))
-> Either String (Int, Int)
-> (Int, Int)
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> (Int, Int)
forall {c}. String -> c
fatal (Int, Int) -> (Int, Int)
forall a. a -> a
id (Int -> Double -> Either String (Int, Int)
safeSuggestSizing Int
cap Double
errs)
where fatal :: String -> c
fatal = String -> c
forall a. HasCallStack => String -> a
error (String -> c) -> (String -> String) -> String -> c
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String
"Data.BloomFilter.Util.suggestSizing: " String -> String -> String
forall a. [a] -> [a] -> [a]
++)