Created
November 25, 2014 20:56
-
-
Save kputnam/12c926b6b06f50a09600 to your computer and use it in GitHub Desktop.
Evaluation of Predictive Ranking Algorithms
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.Ord | |
import Data.List | |
import Test.QuickCheck | |
-- | Quantify how "unsorted" a given list of elements is, by computing | |
-- at each position in the list: how many later elements are smaller? | |
-- | |
-- e.g. cost "abc" == 0 | |
-- cost "acb" == 1 | |
-- cost "cba" == 2 | |
-- cost "baa" == 2 | |
-- cost "baaa" == 3 | |
-- cost "baaaa" == 4 | |
-- | |
-- The best case is a sorted list, which has cost == 0. The worst | |
-- case is a list sorted in reverse, which has a cost of n*(n-1)/2, | |
-- where n = length xs | |
-- | |
cost :: Ord a => [a] -> Int | |
cost = sum . map (max 0 . uncurry (-)) . zip [0..] . rank | |
where | |
-- This doesn't rank tied elements equally (by design) | |
-- e.g. rank "abcde" == [0,1,2,3,4] | |
-- rank "edcba" == [4,3,2,1,0] | |
-- rank "aaacb" == [0,1,2,4,3] | |
rank :: Ord a => [a] -> [Int] | |
rank = map fst . sortBy (comparing snd) . zip [0..] | |
-- | Count number of pairs (p,n) where n >= p, where p is drawn from | |
-- the first list and n is drawn from the second, given we know that | |
-- all elements in the first list (positives) *should* have higher | |
-- predicted values than elements in the second list (negatives). We | |
-- don't make any comparisons between elements within the same list. | |
-- | |
-- The theoretical best performing algorithm would have zero inversions | |
-- while the worse would have |P|x|N| -- the entire Cartesian product. | |
-- | |
inversions :: Ord a => [a] -> [a] -> Int | |
inversions ps ns | |
= aux (sort ps) (sort ns, length ns) | |
where | |
aux [] _ = 0 | |
aux _ ([], _) = 0 | |
-- | Drop elements in ns until we find the first error/inversion | |
aux (p:ps) (n:ns, nN) | |
= case compare p n of | |
-- Not inverted, drop this n | |
GT -> aux (p:ps) (ns, nN-1) | |
-- This n >= p, and all of remaining ns too | |
_ -> nN + aux ps (n:ns, nN) | |
-- Tests | |
------------------------------------------------------------------------------- | |
-- | Naive implementation | |
inversions' ps ns = length [ (p,n) | p <- ps, n <- ns, n >= p ] | |
-- | Test that the naive inversions' and optimized inversions agree | |
pCompareNaive :: Ord a => [a] -> [a] -> Bool | |
pCompareNaive ps ns = inversions' ps ns == inversions ps ns | |
-- | These examples are P(x) values when y is known to be 1 | |
positives :: [Double] | |
positives = [0.033, 0.034, 0.410, 0.698, 0.712, 0.928] | |
-- | These examples are P(x) values when y is known to be 0 | |
negatives :: [Double] | |
negatives = [0.021, 0.039, 0.041, 0.041, 0.042, 0.187, 0.813] | |
-- Example: | |
-- inversion positives negatives == 15 | |
main :: IO () | |
main = do | |
quickCheck (pCompareNaive :: [Int] -> [Int] -> Bool) | |
quickCheck (pCompareNaive :: [Char] -> [Char] -> Bool) | |
quickCheck (pCompareNaive :: [Float] -> [Float] -> Bool) | |
quickCheck (pCompareNaive :: [Double] -> [Double] -> Bool) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment