kputnam · November 25, 2014 20:56
diff --git a/Evaluation.hs b/Evaluation.hs
 import Data.Ord
 import Data.List
 import Test.QuickCheck

 -- | Quantify how "unsorted" a given list of elements is, by computing
 --   at each position in the list: how many later elements are smaller?
 --
 --     e.g. cost "abc"   == 0
 --          cost "acb"   == 1
 --          cost "cba"   == 2
 --          cost "baa"   == 2
 --          cost "baaa"  == 3
 --          cost "baaaa" == 4
 --
 --   The best case is a sorted list, which has cost == 0. The worst
 --   case is a list sorted in reverse, which has a cost of n*(n-1)/2,
 --   where n = length xs
 --
 cost :: Ord a => [a] -> Int
 cost = sum . map (max 0 . uncurry (-)) . zip [0..] . rank
  where
    -- This doesn't rank tied elements equally (by design)
    --   e.g. rank "abcde" == [0,1,2,3,4]
    --        rank "edcba" == [4,3,2,1,0]
    --        rank "aaacb" == [0,1,2,4,3]
    rank :: Ord a => [a] -> [Int]
    rank = map fst . sortBy (comparing snd) . zip [0..]


 -- | Count number of pairs (p,n) where n >= p, where p is drawn from
 --   the first list and n is drawn from the second, given we know that
 --   all elements in the first list (positives) *should* have higher
 --   predicted values than elements in the second list (negatives). We
 --   don't make any comparisons between elements within the same list.
 --
 --   The theoretical best performing algorithm would have zero inversions
 --   while the worse would have |P|x|N| -- the entire Cartesian product.
 --
 inversions :: Ord a => [a] -> [a] -> Int
 inversions ps ns
  = aux (sort ps) (sort ns, length ns)
  where
    aux [] _      = 0
    aux _ ([], _) = 0

    -- | Drop elements in ns until we find the first error/inversion
    aux (p:ps) (n:ns, nN)
      = case compare p n of
          -- Not inverted, drop this n
          GT -> aux (p:ps) (ns, nN-1)
      
          -- This n >= p, and all of remaining ns too
          _  -> nN + aux ps (n:ns, nN)
    
 -- Tests
 -------------------------------------------------------------------------------

 -- | Naive implementation
 inversions' ps ns = length [ (p,n) | p <- ps, n <- ns, n >= p ]

 -- | Test that the naive inversions' and optimized inversions agree
 pCompareNaive :: Ord a => [a] -> [a] -> Bool
 pCompareNaive ps ns = inversions' ps ns == inversions ps ns

 -- | These examples are P(x) values when y is known to be 1
 positives :: [Double]
 positives = [0.033, 0.034, 0.410, 0.698, 0.712, 0.928]

 -- | These examples are P(x) values when y is known to be 0
 negatives :: [Double]
 negatives = [0.021, 0.039, 0.041, 0.041, 0.042, 0.187, 0.813]

 -- Example:
 --   inversion positives negatives == 15

 main :: IO ()
 main = do
  quickCheck (pCompareNaive :: [Int] -> [Int] -> Bool)
  quickCheck (pCompareNaive :: [Char] -> [Char] -> Bool)
  quickCheck (pCompareNaive :: [Float] -> [Float] -> Bool)
  quickCheck (pCompareNaive :: [Double] -> [Double] -> Bool)
	import Data.Ord
	import Data.List
	import Test.QuickCheck

	-- \| Quantify how "unsorted" a given list of elements is, by computing
	-- at each position in the list: how many later elements are smaller?
	--
	-- e.g. cost "abc" == 0
	-- cost "acb" == 1
	-- cost "cba" == 2
	-- cost "baa" == 2
	-- cost "baaa" == 3
	-- cost "baaaa" == 4
	--
	-- The best case is a sorted list, which has cost == 0. The worst
	-- case is a list sorted in reverse, which has a cost of n*(n-1)/2,
	-- where n = length xs
	--
	cost :: Ord a => [a] -> Int
	cost = sum . map (max 0 . uncurry (-)) . zip [0..] . rank
	where
	-- This doesn't rank tied elements equally (by design)
	-- e.g. rank "abcde" == [0,1,2,3,4]
	-- rank "edcba" == [4,3,2,1,0]
	-- rank "aaacb" == [0,1,2,4,3]
	rank :: Ord a => [a] -> [Int]
	rank = map fst . sortBy (comparing snd) . zip [0..]


	-- \| Count number of pairs (p,n) where n >= p, where p is drawn from
	-- the first list and n is drawn from the second, given we know that
	-- all elements in the first list (positives) should have higher
	-- predicted values than elements in the second list (negatives). We
	-- don't make any comparisons between elements within the same list.
	--
	-- The theoretical best performing algorithm would have zero inversions
	-- while the worse would have \|P\|x\|N\| -- the entire Cartesian product.
	--
	inversions :: Ord a => [a] -> [a] -> Int
	inversions ps ns
	= aux (sort ps) (sort ns, length ns)
	where
	aux [] _ = 0
	aux _ ([], _) = 0

	-- \| Drop elements in ns until we find the first error/inversion
	aux (p:ps) (n:ns, nN)
	= case compare p n of
	-- Not inverted, drop this n
	GT -> aux (p:ps) (ns, nN-1)

	-- This n >= p, and all of remaining ns too
	_ -> nN + aux ps (n:ns, nN)

	-- Tests
	-------------------------------------------------------------------------------

	-- \| Naive implementation
	inversions' ps ns = length [ (p,n) \| p <- ps, n <- ns, n >= p ]

	-- \| Test that the naive inversions' and optimized inversions agree
	pCompareNaive :: Ord a => [a] -> [a] -> Bool
	pCompareNaive ps ns = inversions' ps ns == inversions ps ns

	-- \| These examples are P(x) values when y is known to be 1
	positives :: [Double]
	positives = [0.033, 0.034, 0.410, 0.698, 0.712, 0.928]

	-- \| These examples are P(x) values when y is known to be 0
	negatives :: [Double]
	negatives = [0.021, 0.039, 0.041, 0.041, 0.042, 0.187, 0.813]

	-- Example:
	-- inversion positives negatives == 15

	main :: IO ()
	main = do
	quickCheck (pCompareNaive :: [Int] -> [Int] -> Bool)
	quickCheck (pCompareNaive :: [Char] -> [Char] -> Bool)
	quickCheck (pCompareNaive :: [Float] -> [Float] -> Bool)
	quickCheck (pCompareNaive :: [Double] -> [Double] -> Bool)