criterion-measurement-0.2.2.0/0000755000000000000000000000000007346545000014363 5ustar0000000000000000criterion-measurement-0.2.2.0/LICENSE0000644000000000000000000000246107346545000015373 0ustar0000000000000000Copyright (c) 2009, 2010 Bryan O'Sullivan All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. criterion-measurement-0.2.2.0/README.md0000644000000000000000000000052707346545000015646 0ustar0000000000000000# criterion-measurement [![Build Status](https://github.com/haskell/criterion/workflows/Haskell-CI/badge.svg)](https://github.com/haskell/criterion/actions?query=workflow%3AHaskell-CI) Measurement-related functionality extracted from Criterion, with minimal dependencies. The rationale for this is to enable alternative analysis front-ends. criterion-measurement-0.2.2.0/Setup.hs0000644000000000000000000000005607346545000016020 0ustar0000000000000000import Distribution.Simple main = defaultMain criterion-measurement-0.2.2.0/cbits/0000755000000000000000000000000007346545000015467 5ustar0000000000000000criterion-measurement-0.2.2.0/cbits/cycles.c0000644000000000000000000000243707346545000017123 0ustar0000000000000000#include "Rts.h" #if darwin_HOST_OS #include StgWord64 criterion_rdtsc(void) { return mach_absolute_time(); } #elif aarch64_HOST_ARCH StgWord64 criterion_rdtsc(void) { StgWord64 ret; __asm__ __volatile__ ("mrs %0, cntvct_el0" : "=r"(ret)); return ret; } #elif x86_64_HOST_ARCH || i386_HOST_ARCH StgWord64 criterion_rdtsc(void) { StgWord32 hi, lo; __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); return ((StgWord64) lo) | (((StgWord64) hi)<<32); } #elif linux_HOST_OS /* * This should work on all Linux. * * Technique by Austin Seipp found here: * * http://neocontra.blogspot.com/2013/05/user-mode-performance-counters-for.html */ #include #include #include static int fddev = -1; __attribute__((constructor)) static void init(void) { static struct perf_event_attr attr; attr.type = PERF_TYPE_HARDWARE; attr.config = PERF_COUNT_HW_CPU_CYCLES; fddev = syscall (__NR_perf_event_open, &attr, 0, -1, -1, 0); } __attribute__((destructor)) static void fini(void) { close(fddev); } StgWord64 criterion_rdtsc (void) { StgWord64 result = 0; if (read (fddev, &result, sizeof(result)) < sizeof(result)) return 0; return result; } #else #error Unsupported OS/architecture/compiler! #endif criterion-measurement-0.2.2.0/cbits/time-osx.c0000644000000000000000000000124707346545000017404 0ustar0000000000000000#include #include void criterion_inittime(void) {} double criterion_gettime(void) { return clock_gettime_nsec_np(CLOCK_UPTIME_RAW) / 1e9; } static double to_double(time_value_t time) { return time.seconds + time.microseconds / 1e6; } double criterion_getcputime(void) { struct task_thread_times_info thread_info_data; mach_msg_type_number_t thread_info_count = TASK_THREAD_TIMES_INFO_COUNT; kern_return_t kr = task_info(mach_task_self(), TASK_THREAD_TIMES_INFO, (task_info_t) &thread_info_data, &thread_info_count); return (to_double(thread_info_data.user_time) + to_double(thread_info_data.system_time)); } criterion-measurement-0.2.2.0/cbits/time-posix.c0000644000000000000000000000054407346545000017734 0ustar0000000000000000#include void criterion_inittime(void) { } double criterion_gettime(void) { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return ts.tv_sec + ts.tv_nsec * 1e-9; } double criterion_getcputime(void) { struct timespec ts; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); return ts.tv_sec + ts.tv_nsec * 1e-9; } criterion-measurement-0.2.2.0/cbits/time-windows.c0000644000000000000000000000327307346545000020266 0ustar0000000000000000/* * Windows has the most amazingly cretinous time measurement APIs you * can possibly imagine. * * Our first possibility is GetSystemTimeAsFileTime, which updates at * roughly 60Hz, and is hence worthless - we'd have to run a * computation for tens or hundreds of seconds to get a trustworthy * number. * * Alternatively, we can use QueryPerformanceCounter, which has * undefined behaviour under almost all interesting circumstances * (e.g. multicore systems, CPU frequency changes). But at least it * increments reasonably often. */ #include #if 0 void criterion_inittime(void) { } double criterion_gettime(void) { FILETIME ft; ULARGE_INTEGER li; GetSystemTimeAsFileTime(&ft); li.LowPart = ft.dwLowDateTime; li.HighPart = ft.dwHighDateTime; return (li.QuadPart - 130000000000000000ull) * 1e-7; } #else static double freq_recip; static LARGE_INTEGER firstClock; void criterion_inittime(void) { LARGE_INTEGER freq; if (freq_recip == 0) { QueryPerformanceFrequency(&freq); QueryPerformanceCounter(&firstClock); freq_recip = 1.0 / freq.QuadPart; } } double criterion_gettime(void) { LARGE_INTEGER li; QueryPerformanceCounter(&li); return ((double) (li.QuadPart - firstClock.QuadPart)) * freq_recip; } #endif static ULONGLONG to_quad_100ns(FILETIME ft) { ULARGE_INTEGER li; li.LowPart = ft.dwLowDateTime; li.HighPart = ft.dwHighDateTime; return li.QuadPart; } double criterion_getcputime(void) { FILETIME creation, exit, kernel, user; ULONGLONG time; GetProcessTimes(GetCurrentProcess(), &creation, &exit, &kernel, &user); time = to_quad_100ns(user) + to_quad_100ns(kernel); return time / 1e7; } criterion-measurement-0.2.2.0/changelog.md0000644000000000000000000000644307346545000016643 0ustar00000000000000000.2.2.0 * Supporting building with all AArch64 platforms (not just Linux and macOS). 0.2.1.0 * Make the behavior of the benchmarking functions independent of the `-fspec-const-count` limit. 0.2.0.0 * Add a `measPeakMbAllocated` field to `Measured` for reporting maximum megabytes allocated. Naturally, this affects the behavior of `Measured`'s `{To,From}JSON` and `Binary` instances. 0.1.4.0 * Fix a bug that occurred with GHC 9.2.4 or later that would cause incorrect measurements. 0.1.3.0 * Change `criterion_rdtsc` to return `mach_absolute_time` on macOS. This is a portable way of returning the number of CPU cycles that works on both Intel- and ARM-based Macs. * Change `criterion_gettime` to use `clock_gettime_nsec_np` instead of `mach_absolute_time` on macOS. While `mach_absolute_time` has nanosecond resolution on Intel-based Macs, this is not the case on ARM-based Macs, so the previous `mach_absolute_time`-based implementation would return incorrect timing results on Apple silicon. There are two minor consequences of this change: * `criterion-measurement` now only supports macOS 10.02 or later, as that is the first version to have `clock_gettime_nsec_np`. As macOS 10.02 was released in 2002, this is unlikely to affect users, but please speak up if this is a problem for you. * As `clock_gettime_nsec_np` does not require any special initialization code, `criterion_inittime` is now a no-op on macOS. If you manually invoke the `getTime` function in your code, however, it is still important that you `initializeTime` beforehand, as this is still required for the Windows implementation to work correctly. 0.1.2.0 * Ensure that `Criterion.Measurement.Types.Internal` is always compiled with optimizations, even if the `criterion-measurement` library itself happens to be built with `-O0` or `-fprof-auto`. This is necessary to ensure that the inner benchmarking loop of criterion always finishes in a timely manner, even if the rest of the library is not fully optimized. 0.1.1.0 * Add `nfAppIO` and `whnfAppIO` functions, which take a function and its argument separately like `nf`/`whnf`, but whose function returns `IO` like `nfIO`/`whnfIO`. This is useful for benchmarking functions in which the bulk of the work is not bound by IO, but by pure computations that might otherwise be optimized away if the argument is known statically. 0.1.0.0 * This is the first release of `criterion-measurement`. The changelog notes below are copied from the notes for the corresponding `criterion` release, `criterion-1.5.0.0`. * Move the measurement functionality of `criterion` into a standalone package, `criterion-measurement`. In particular, `cbits/` and `Criterion.Measurement` are now in `criterion-measurement`, along with the relevant definitions of `Criterion.Types` and `Criterion.Types.Internal` (both of which are now under the `Criterion.Measurement.*` namespace). Consequently, `criterion` now depends on `criterion-measurement`. This will let other libraries (e.g. alternative statistical analysis front-ends) to import the measurement functionality alone as a lightweight dependency. * Fix a bug on macOS and Windows where using `runAndAnalyse` and other lower-level benchmarking functions would result in an infinite loop. criterion-measurement-0.2.2.0/criterion-measurement.cabal0000644000000000000000000000411607346545000021672 0ustar0000000000000000name: criterion-measurement version: 0.2.2.0 synopsis: Criterion measurement functionality and associated types description: Measurement-related functionality extracted from Criterion, with minimal dependencies. The rationale for this is to enable alternative analysis front-ends. homepage: https://github.com/haskell/criterion license: BSD3 license-file: LICENSE author: Bryan O'Sullivan maintainer: Marco Zocca , Ryan Scott copyright: 2009-2016 Bryan O'Sullivan and others category: Development, Performance, Testing, Benchmarking build-type: Simple extra-source-files: README.md, changelog.md cabal-version: >=1.10 tested-with: GHC==7.4.2, GHC==7.6.3, GHC==7.8.4, GHC==7.10.3, GHC==8.0.2, GHC==8.2.2, GHC==8.4.4, GHC==8.6.5, GHC==8.8.4, GHC==8.10.7, GHC==9.0.2, GHC==9.2.7, GHC==9.4.5, GHC==9.6.1 flag fast description: compile without optimizations default: False manual: True library hs-source-dirs: src exposed-modules: Criterion.Measurement Criterion.Measurement.Types Criterion.Measurement.Types.Internal build-depends: aeson >= 0.8 , base >= 4.5 && < 5 , base-compat >= 0.9 , binary >= 0.5.1.0 , containers , deepseq >= 1.1.0.0 , ghc-prim , vector >= 0.7.1 default-language: Haskell2010 ghc-options: -Wall -funbox-strict-fields if impl(ghc >= 6.8) ghc-options: -fwarn-tabs if flag(fast) ghc-options: -O0 else ghc-options: -O2 c-sources: cbits/cycles.c if os(darwin) c-sources: cbits/time-osx.c else { if os(windows) c-sources: cbits/time-windows.c else c-sources: cbits/time-posix.c } source-repository head type: git location: https://github.com/haskell/criterion subdir: criterion-measurement criterion-measurement-0.2.2.0/src/Criterion/0000755000000000000000000000000007346545000017110 5ustar0000000000000000criterion-measurement-0.2.2.0/src/Criterion/Measurement.hs0000644000000000000000000004141307346545000021734 0ustar0000000000000000{-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE Trustworthy #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE BangPatterns, CPP, ForeignFunctionInterface, ScopedTypeVariables #-} -- | -- Module : Criterion.Measurement -- Copyright : (c) 2009-2014 Bryan O'Sullivan -- -- License : BSD-style -- Maintainer : bos@serpentine.com -- Stability : experimental -- Portability : GHC -- -- Benchmark measurement code. module Criterion.Measurement ( initializeTime , getTime , getCPUTime , getCycles , getGCStatistics , GCStatistics(..) , secs , measure , runBenchmark , runBenchmarkable , runBenchmarkable_ , measured , applyGCStatistics , threshold ) where import Criterion.Measurement.Types (Benchmarkable(..), Measured(..)) import Control.DeepSeq (NFData(rnf)) import Control.Exception (finally,evaluate) import Data.Data (Data, Typeable) import Data.Int (Int64) import Data.List (unfoldr) import Data.Word (Word64) import GHC.Generics (Generic) #if MIN_VERSION_base(4,10,0) import GHC.Stats (RTSStats(..), GCDetails(..)) #else import GHC.Stats (GCStats(..)) #endif import Prelude () import Prelude.Compat #if MIN_VERSION_base(4,7,0) import System.Mem (performGC, performMinorGC) # else import System.Mem (performGC) #endif import Text.Printf (printf) import qualified Control.Exception as Exc import qualified Data.Vector as V import qualified GHC.Stats as Stats #if !(MIN_VERSION_base(4,7,0)) foreign import ccall "performGC" performMinorGC :: IO () #endif -- | Statistics about memory usage and the garbage collector. Apart from -- 'gcStatsCurrentBytesUsed' and 'gcStatsCurrentBytesSlop' all are cumulative values since -- the program started. -- -- 'GCStatistics' is cargo-culted from the @GCStats@ data type that "GHC.Stats" -- used to export. Since @GCStats@ was removed in GHC 8.4, @criterion@ uses -- 'GCStatistics' to provide a backwards-compatible view of GC statistics. data GCStatistics = GCStatistics { -- | Total number of bytes allocated gcStatsBytesAllocated :: !Int64 -- | Number of garbage collections performed (any generation, major and -- minor) , gcStatsNumGcs :: !Int64 -- | Maximum number of live bytes seen so far , gcStatsMaxBytesUsed :: !Int64 -- | Number of byte usage samples taken, or equivalently -- the number of major GCs performed. , gcStatsNumByteUsageSamples :: !Int64 -- | Sum of all byte usage samples, can be used with -- 'gcStatsNumByteUsageSamples' to calculate averages with -- arbitrary weighting (if you are sampling this record multiple -- times). , gcStatsCumulativeBytesUsed :: !Int64 -- | Number of bytes copied during GC , gcStatsBytesCopied :: !Int64 -- | Number of live bytes at the end of the last major GC , gcStatsCurrentBytesUsed :: !Int64 -- | Current number of bytes lost to slop , gcStatsCurrentBytesSlop :: !Int64 -- | Maximum number of bytes lost to slop at any one time so far , gcStatsMaxBytesSlop :: !Int64 -- | Maximum number of megabytes allocated , gcStatsPeakMegabytesAllocated :: !Int64 -- | CPU time spent running mutator threads. This does not include -- any profiling overhead or initialization. , gcStatsMutatorCpuSeconds :: !Double -- | Wall clock time spent running mutator threads. This does not -- include initialization. , gcStatsMutatorWallSeconds :: !Double -- | CPU time spent running GC , gcStatsGcCpuSeconds :: !Double -- | Wall clock time spent running GC , gcStatsGcWallSeconds :: !Double -- | Total CPU time elapsed since program start , gcStatsCpuSeconds :: !Double -- | Total wall clock time elapsed since start , gcStatsWallSeconds :: !Double } deriving (Eq, Read, Show, Typeable, Data, Generic) -- | Try to get GC statistics, bearing in mind that the GHC runtime -- will throw an exception if statistics collection was not enabled -- using \"@+RTS -T@\". -- -- If you need guaranteed up-to-date stats, call 'performGC' first. getGCStatistics :: IO (Maybe GCStatistics) #if MIN_VERSION_base(4,10,0) -- Use RTSStats/GCDetails to gather GC stats getGCStatistics = do stats <- Stats.getRTSStats let gcdetails :: Stats.GCDetails gcdetails = gc stats nsToSecs :: Int64 -> Double nsToSecs ns = fromIntegral ns * 1.0E-9 return $ Just GCStatistics { gcStatsBytesAllocated = fromIntegral $ allocated_bytes stats , gcStatsNumGcs = fromIntegral $ gcs stats , gcStatsMaxBytesUsed = fromIntegral $ max_live_bytes stats , gcStatsNumByteUsageSamples = fromIntegral $ major_gcs stats , gcStatsCumulativeBytesUsed = fromIntegral $ cumulative_live_bytes stats , gcStatsBytesCopied = fromIntegral $ copied_bytes stats , gcStatsCurrentBytesUsed = fromIntegral $ gcdetails_live_bytes gcdetails , gcStatsCurrentBytesSlop = fromIntegral $ gcdetails_slop_bytes gcdetails , gcStatsMaxBytesSlop = fromIntegral $ max_slop_bytes stats , gcStatsPeakMegabytesAllocated = fromIntegral (max_mem_in_use_bytes stats) `quot` (1024*1024) , gcStatsMutatorCpuSeconds = nsToSecs $ mutator_cpu_ns stats , gcStatsMutatorWallSeconds = nsToSecs $ mutator_elapsed_ns stats , gcStatsGcCpuSeconds = nsToSecs $ gc_cpu_ns stats , gcStatsGcWallSeconds = nsToSecs $ gc_elapsed_ns stats , gcStatsCpuSeconds = nsToSecs $ cpu_ns stats , gcStatsWallSeconds = nsToSecs $ elapsed_ns stats } `Exc.catch` \(_::Exc.SomeException) -> return Nothing #else -- Use the old GCStats type to gather GC stats getGCStatistics = do stats <- Stats.getGCStats return $ Just GCStatistics { gcStatsBytesAllocated = bytesAllocated stats , gcStatsNumGcs = numGcs stats , gcStatsMaxBytesUsed = maxBytesUsed stats , gcStatsNumByteUsageSamples = numByteUsageSamples stats , gcStatsCumulativeBytesUsed = cumulativeBytesUsed stats , gcStatsBytesCopied = bytesCopied stats , gcStatsCurrentBytesUsed = currentBytesUsed stats , gcStatsCurrentBytesSlop = currentBytesSlop stats , gcStatsMaxBytesSlop = maxBytesSlop stats , gcStatsPeakMegabytesAllocated = peakMegabytesAllocated stats , gcStatsMutatorCpuSeconds = mutatorCpuSeconds stats , gcStatsMutatorWallSeconds = mutatorWallSeconds stats , gcStatsGcCpuSeconds = gcCpuSeconds stats , gcStatsGcWallSeconds = gcWallSeconds stats , gcStatsCpuSeconds = cpuSeconds stats , gcStatsWallSeconds = wallSeconds stats } `Exc.catch` \(_::Exc.SomeException) -> return Nothing #endif -- | Measure the execution of a benchmark a given number of times. -- -- This function initializes the timer before measuring time (refer to the -- documentation for 'initializeTime' for more details). measure :: Benchmarkable -- ^ Operation to benchmark. -> Int64 -- ^ Number of iterations. -> IO (Measured, Double) measure bm iters = runBenchmarkable bm iters combineResults $ \ !n act -> do -- Ensure the stats from getGCStatistics are up-to-date -- by garbage collecting. performMinorGC does /not/ update all stats, but -- it does update the ones we need (see applyGCStatistics for details. -- -- We use performMinorGC instead of performGC to avoid the cost of copying -- the live data in the heap potentially hundreds of times in a -- single benchmark. performMinorGC initializeTime startStats <- getGCStatistics startTime <- getTime startCpuTime <- getCPUTime startCycles <- getCycles act endTime <- getTime endCpuTime <- getCPUTime endCycles <- getCycles -- From these we can derive GC-related deltas. endStatsPreGC <- getGCStatistics performMinorGC -- From these we can derive all other deltas, and performGC guarantees they -- are up-to-date. endStatsPostGC <- getGCStatistics let !m = applyGCStatistics endStatsPostGC endStatsPreGC startStats $ measured { measTime = max 0 (endTime - startTime) , measCpuTime = max 0 (endCpuTime - startCpuTime) , measCycles = max 0 (fromIntegral (endCycles - startCycles)) , measIters = n } return (m, endTime) where -- When combining runs, the Measured value is accumulated over many runs, -- but the Double value is the most recent absolute measurement of time. combineResults :: (Measured, Double) -> (Measured, Double) -> (Measured, Double) combineResults (!m1, _) (!m2, !d2) = (m3, d2) where combine :: (a -> a -> a) -> (Measured -> a) -> a combine g sel = sel m1 `g` sel m2 add :: Num a => (Measured -> a) -> a add = combine (+) m3 = Measured { measTime = add measTime , measCpuTime = add measCpuTime , measCycles = add measCycles , measIters = add measIters , measAllocated = add measAllocated , measPeakMbAllocated = combine max measPeakMbAllocated , measNumGcs = add measNumGcs , measBytesCopied = add measBytesCopied , measMutatorWallSeconds = add measMutatorWallSeconds , measMutatorCpuSeconds = add measMutatorCpuSeconds , measGcWallSeconds = add measGcWallSeconds , measGcCpuSeconds = add measGcCpuSeconds } {-# INLINE measure #-} -- | The amount of time a benchmark must run for in order for us to -- have some trust in the raw measurement. -- -- We set this threshold so that we can generate enough data to later -- perform meaningful statistical analyses. -- -- The threshold is 30 milliseconds. One use of 'runBenchmark' must -- accumulate more than 300 milliseconds of total measurements above -- this threshold before it will finish. threshold :: Double threshold = 0.03 {-# INLINE threshold #-} runBenchmarkable :: Benchmarkable -> Int64 -> (a -> a -> a) -> (Int64 -> IO () -> IO a) -> IO a runBenchmarkable Benchmarkable{..} i comb f | perRun = work >>= go (i - 1) | otherwise = work where go 0 result = return result go !n !result = work >>= go (n - 1) . comb result count | perRun = 1 | otherwise = i work = do env <- allocEnv count let clean = cleanEnv count env run = runRepeatedly env count clean `seq` run `seq` evaluate $ rnf env f count run `finally` clean {-# INLINE work #-} {-# INLINE runBenchmarkable #-} runBenchmarkable_ :: Benchmarkable -> Int64 -> IO () runBenchmarkable_ bm i = runBenchmarkable bm i (\() () -> ()) (const id) {-# INLINE runBenchmarkable_ #-} -- | Run a single benchmark, and return measurements collected while -- executing it, along with the amount of time the measurement process -- took. -- -- This function initializes the timer before measuring time (refer to the -- documentation for 'initializeTime' for more details). runBenchmark :: Benchmarkable -> Double -- ^ Lower bound on how long the benchmarking process -- should take. In practice, this time limit may be -- exceeded in order to generate enough data to perform -- meaningful statistical analyses. -> IO (V.Vector Measured, Double) runBenchmark bm timeLimit = do initializeTime runBenchmarkable_ bm 1 start <- performGC >> getTime let loop [] !_ !_ _ = error "unpossible!" loop (iters:niters) prev count acc = do (m, endTime) <- measure bm iters let overThresh = max 0 (measTime m - threshold) + prev -- We try to honour the time limit, but we also have more -- important constraints: -- -- We must generate enough data that bootstrapping won't -- simply crash. -- -- We need to generate enough measurements that have long -- spans of execution to outweigh the (rather high) cost of -- measurement. if endTime - start >= timeLimit && overThresh > threshold * 10 && count >= (4 :: Int) then do let !v = V.reverse (V.fromList acc) return (v, endTime - start) else loop niters overThresh (count+1) (m:acc) loop (squish (unfoldr series 1)) 0 0 [] -- Our series starts its growth very slowly when we begin at 1, so we -- eliminate repeated values. squish :: (Eq a) => [a] -> [a] squish ys = foldr go [] ys where go x xs = x : dropWhile (==x) xs series :: Double -> Maybe (Int64, Double) series k = Just (truncate l, l) where l = k * 1.05 -- | An empty structure. measured :: Measured measured = Measured { measTime = 0 , measCpuTime = 0 , measCycles = 0 , measIters = 0 , measAllocated = minBound , measPeakMbAllocated = minBound , measNumGcs = minBound , measBytesCopied = minBound , measMutatorWallSeconds = bad , measMutatorCpuSeconds = bad , measGcWallSeconds = bad , measGcCpuSeconds = bad } where bad = -1/0 -- | Apply the difference between two sets of GC statistics to a -- measurement. applyGCStatistics :: Maybe GCStatistics -- ^ Statistics gathered at the __end__ of a run, post-GC. -> Maybe GCStatistics -- ^ Statistics gathered at the __end__ of a run, pre-GC. -> Maybe GCStatistics -- ^ Statistics gathered at the __beginning__ of a run. -> Measured -- ^ Value to \"modify\". -> Measured applyGCStatistics (Just endPostGC) (Just endPreGC) (Just start) m = m { -- The choice of endPostGC or endPreGC is important. -- For bytes allocated/copied, and mutator statistics, we use -- endPostGC, because the intermediate performGC ensures they're up-to-date. -- The others (num GCs and GC cpu/wall seconds) must be diffed against -- endPreGC so that the extra performGC does not taint them. measAllocated = diff endPostGC gcStatsBytesAllocated , measPeakMbAllocated = gcStatsPeakMegabytesAllocated endPostGC , measNumGcs = diff endPreGC gcStatsNumGcs , measBytesCopied = diff endPostGC gcStatsBytesCopied , measMutatorWallSeconds = diff endPostGC gcStatsMutatorWallSeconds , measMutatorCpuSeconds = diff endPostGC gcStatsMutatorCpuSeconds , measGcWallSeconds = diff endPreGC gcStatsGcWallSeconds , measGcCpuSeconds = diff endPreGC gcStatsGcCpuSeconds } where diff a f = f a - f start applyGCStatistics _ _ _ m = m -- | Convert a number of seconds to a string. The string will consist -- of four decimal places, followed by a short description of the time -- units. secs :: Double -> String secs k | k < 0 = '-' : secs (-k) | k >= 1 = k `with` "s" | k >= 1e-3 = (k*1e3) `with` "ms" | k >= 1e-6 = (k*1e6) `with` "μs" | k >= 1e-9 = (k*1e9) `with` "ns" | k >= 1e-12 = (k*1e12) `with` "ps" | k >= 1e-15 = (k*1e15) `with` "fs" | k >= 1e-18 = (k*1e18) `with` "as" | otherwise = printf "%g s" k where with (t :: Double) (u :: String) | t >= 1e9 = printf "%.4g %s" t u | t >= 1e3 = printf "%.0f %s" t u | t >= 1e2 = printf "%.1f %s" t u | t >= 1e1 = printf "%.2f %s" t u | otherwise = printf "%.3f %s" t u -- | Set up time measurement. -- -- @criterion@ measures time using OS-specific APIs whenever possible for -- efficiency. On certain operating systems, such as macOS and Windows, one -- must explicitly initialize a timer (which 'initializeTime' accomplishes) -- before one can actually measure the current time (which 'getTime' -- accomplishes). -- -- It is imperative that you call 'initializeTime' before calling 'getTime'. -- (See [this bug report](https://github.com/haskell/criterion/issues/195) for an -- example of what can happen if you do not do so.) All of the 'IO'-returning -- functions in "Criterion.Main" make sure that this is done, but other -- functions (such as those in "Criterion.Measurement") do not guarantee this -- unless otherwise stated. foreign import ccall unsafe "criterion_inittime" initializeTime :: IO () -- | Read the CPU cycle counter. foreign import ccall unsafe "criterion_rdtsc" getCycles :: IO Word64 -- | Return the current wallclock time, in seconds since some -- arbitrary time. -- -- You /must/ call 'initializeTime' once before calling this function! -- Refer to the documentation for 'initializeTime' for more details. foreign import ccall unsafe "criterion_gettime" getTime :: IO Double -- | Return the amount of elapsed CPU time, combining user and kernel -- (system) time into a single measure. foreign import ccall unsafe "criterion_getcputime" getCPUTime :: IO Double criterion-measurement-0.2.2.0/src/Criterion/Measurement/0000755000000000000000000000000007346545000021375 5ustar0000000000000000criterion-measurement-0.2.2.0/src/Criterion/Measurement/Types.hs0000644000000000000000000006121507346545000023042 0ustar0000000000000000{-# LANGUAGE CPP #-} {-# LANGUAGE Trustworthy #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE DeriveDataTypeable, DeriveGeneric, GADTs, RecordWildCards #-} {-# OPTIONS_GHC -funbox-strict-fields #-} -- | -- Module : Criterion.Measurement.Types -- Copyright : (c) 2009-2014 Bryan O'Sullivan -- -- License : BSD-style -- Maintainer : bos@serpentine.com -- Stability : experimental -- Portability : GHC -- -- Types for benchmarking. -- -- The core type is 'Benchmarkable', which admits both pure functions -- and 'IO' actions. -- -- For a pure function of type @a -> b@, the benchmarking harness -- calls this function repeatedly, each time with a different 'Int64' -- argument (the number of times to run the function in a loop), and -- reduces the result the function returns to weak head normal form. -- -- For an action of type @IO a@, the benchmarking harness calls the -- action repeatedly, but does not reduce the result. module Criterion.Measurement.Types ( -- * Benchmark descriptions Benchmarkable(..) , Benchmark(..) -- * Measurements , Measured(..) , fromInt , toInt , fromDouble , toDouble , measureAccessors , measureKeys , measure , rescale -- * Benchmark construction , env , envWithCleanup , perBatchEnv , perBatchEnvWithCleanup , perRunEnv , perRunEnvWithCleanup , toBenchmarkable , bench , bgroup , addPrefix , benchNames -- ** Evaluation control , nf , whnf , nfIO , whnfIO , nfAppIO , whnfAppIO ) where import Control.DeepSeq (NFData(rnf)) import Criterion.Measurement.Types.Internal (fakeEnvironment, nf', whnf') import Data.Aeson (FromJSON(..), ToJSON(..)) import Data.Binary (Binary(..)) import Data.Data (Data, Typeable) import Data.Int (Int64) import Data.Map (Map, fromList) import GHC.Generics (Generic) import Prelude () import Prelude.Compat import qualified Data.Vector as V import qualified Data.Vector.Unboxed as U -- | A pure function or impure action that can be benchmarked. The -- 'Int64' parameter indicates the number of times to run the given -- function or action. data Benchmarkable = forall a . NFData a => Benchmarkable { allocEnv :: Int64 -> IO a , cleanEnv :: Int64 -> a -> IO () , runRepeatedly :: a -> Int64 -> IO () , perRun :: Bool } noop :: Monad m => a -> m () noop = const $ return () {-# INLINE noop #-} -- | Construct a 'Benchmarkable' value from an impure action, where the 'Int64' -- parameter indicates the number of times to run the action. toBenchmarkable :: (Int64 -> IO ()) -> Benchmarkable toBenchmarkable f = Benchmarkable noop (const noop) (const f) False {-# INLINE toBenchmarkable #-} -- | A collection of measurements made while benchmarking. -- -- Measurements related to garbage collection are tagged with __GC__. -- They will only be available if a benchmark is run with @\"+RTS -- -T\"@. -- -- __Packed storage.__ When GC statistics cannot be collected, GC -- values will be set to huge negative values. If a field is labeled -- with \"__GC__\" below, use 'fromInt' and 'fromDouble' to safely -- convert to \"real\" values. data Measured = Measured { measTime :: !Double -- ^ Total wall-clock time elapsed, in seconds. , measCpuTime :: !Double -- ^ Total CPU time elapsed, in seconds. Includes both user and -- kernel (system) time. , measCycles :: !Int64 -- ^ Cycles, in unspecified units that may be CPU cycles. (On -- i386 and x86_64, this is measured using the @rdtsc@ -- instruction.) , measIters :: !Int64 -- ^ Number of loop iterations measured. , measAllocated :: !Int64 -- ^ __(GC)__ Number of bytes allocated. Access using 'fromInt'. , measPeakMbAllocated :: !Int64 -- ^ __(GC)__ Max number of megabytes allocated. Access using 'fromInt'. , measNumGcs :: !Int64 -- ^ __(GC)__ Number of garbage collections performed. Access -- using 'fromInt'. , measBytesCopied :: !Int64 -- ^ __(GC)__ Number of bytes copied during garbage collection. -- Access using 'fromInt'. , measMutatorWallSeconds :: !Double -- ^ __(GC)__ Wall-clock time spent doing real work -- (\"mutation\"), as distinct from garbage collection. Access -- using 'fromDouble'. , measMutatorCpuSeconds :: !Double -- ^ __(GC)__ CPU time spent doing real work (\"mutation\"), as -- distinct from garbage collection. Access using 'fromDouble'. , measGcWallSeconds :: !Double -- ^ __(GC)__ Wall-clock time spent doing garbage collection. -- Access using 'fromDouble'. , measGcCpuSeconds :: !Double -- ^ __(GC)__ CPU time spent doing garbage collection. Access -- using 'fromDouble'. } deriving (Eq, Read, Show, Typeable, Data, Generic) instance FromJSON Measured where parseJSON v = do (a,b,c,d,e,f,g,h,i,j,k,l) <- parseJSON v -- The first four fields are not subject to the encoding policy: return $ Measured a b c d (int e) (int f) (int g) (int h) (db i) (db j) (db k) (db l) where int = toInt; db = toDouble -- Here we treat the numeric fields as `Maybe Int64` and `Maybe Double` -- and we use a specific policy for deciding when they should be Nothing, -- which becomes null in JSON. instance ToJSON Measured where toJSON Measured{..} = toJSON (measTime, measCpuTime, measCycles, measIters, i measAllocated, i measPeakMbAllocated, i measNumGcs, i measBytesCopied, d measMutatorWallSeconds, d measMutatorCpuSeconds, d measGcWallSeconds, d measGcCpuSeconds) where i = fromInt; d = fromDouble instance NFData Measured where rnf Measured{} = () -- THIS MUST REFLECT THE ORDER OF FIELDS IN THE DATA TYPE. -- -- The ordering is used by Javascript code to pick out the correct -- index into the vector that represents a Measured value in that -- world. measureAccessors_ :: [(String, (Measured -> Maybe Double, String))] measureAccessors_ = [ ("time", (Just . measTime, "wall-clock time")) , ("cpuTime", (Just . measCpuTime, "CPU time")) , ("cycles", (Just . fromIntegral . measCycles, "CPU cycles")) , ("iters", (Just . fromIntegral . measIters, "loop iterations")) , ("allocated", (fmap fromIntegral . fromInt . measAllocated, "(+RTS -T) bytes allocated")) , ("peakMbAllocated", (fmap fromIntegral . fromInt . measPeakMbAllocated, "(+RTS -T) peak megabytes allocated")) , ("numGcs", (fmap fromIntegral . fromInt . measNumGcs, "(+RTS -T) number of garbage collections")) , ("bytesCopied", (fmap fromIntegral . fromInt . measBytesCopied, "(+RTS -T) number of bytes copied during GC")) , ("mutatorWallSeconds", (fromDouble . measMutatorWallSeconds, "(+RTS -T) wall-clock time for mutator threads")) , ("mutatorCpuSeconds", (fromDouble . measMutatorCpuSeconds, "(+RTS -T) CPU time spent running mutator threads")) , ("gcWallSeconds", (fromDouble . measGcWallSeconds, "(+RTS -T) wall-clock time spent doing GC")) , ("gcCpuSeconds", (fromDouble . measGcCpuSeconds, "(+RTS -T) CPU time spent doing GC")) ] -- | Field names in a 'Measured' record, in the order in which they -- appear. measureKeys :: [String] measureKeys = map fst measureAccessors_ -- | Field names and accessors for a 'Measured' record. measureAccessors :: Map String (Measured -> Maybe Double, String) measureAccessors = fromList measureAccessors_ -- | Normalise every measurement as if 'measIters' was 1. -- -- ('measIters' itself is left unaffected.) rescale :: Measured -> Measured rescale m@Measured{..} = m { measTime = d measTime , measCpuTime = d measCpuTime , measCycles = i measCycles -- skip measIters , measNumGcs = i measNumGcs , measBytesCopied = i measBytesCopied , measMutatorWallSeconds = d measMutatorWallSeconds , measMutatorCpuSeconds = d measMutatorCpuSeconds , measGcWallSeconds = d measGcWallSeconds , measGcCpuSeconds = d measGcCpuSeconds } where d k = maybe k (/ iters) (fromDouble k) i k = maybe k (round . (/ iters)) (fromIntegral <$> fromInt k) iters = fromIntegral measIters :: Double -- | Convert a (possibly unavailable) GC measurement to a true value. -- If the measurement is a huge negative number that corresponds to -- \"no data\", this will return 'Nothing'. fromInt :: Int64 -> Maybe Int64 fromInt i | i == minBound = Nothing | otherwise = Just i -- | Convert from a true value back to the packed representation used -- for GC measurements. toInt :: Maybe Int64 -> Int64 toInt Nothing = minBound toInt (Just i) = i -- | Convert a (possibly unavailable) GC measurement to a true value. -- If the measurement is a huge negative number that corresponds to -- \"no data\", this will return 'Nothing'. fromDouble :: Double -> Maybe Double fromDouble d | isInfinite d || isNaN d = Nothing | otherwise = Just d -- | Convert from a true value back to the packed representation used -- for GC measurements. toDouble :: Maybe Double -> Double toDouble Nothing = -1/0 toDouble (Just d) = d instance Binary Measured where put Measured{..} = do put measTime; put measCpuTime; put measCycles; put measIters put measAllocated; put measPeakMbAllocated; put measNumGcs; put measBytesCopied put measMutatorWallSeconds; put measMutatorCpuSeconds put measGcWallSeconds; put measGcCpuSeconds get = Measured <$> get <*> get <*> get <*> get <*> get <*> get <*> get <*> get <*> get <*> get <*> get <*> get -- | Apply an argument to a function, and evaluate the result to -- normal form (NF). nf :: NFData b => (a -> b) -> a -> Benchmarkable nf f x = toBenchmarkable (nf' rnf f x) -- | Apply an argument to a function, and evaluate the result to weak -- head normal form (WHNF). whnf :: (a -> b) -> a -> Benchmarkable whnf f x = toBenchmarkable (whnf' f x) -- | Perform an action, then evaluate its result to normal form (NF). -- This is particularly useful for forcing a lazy 'IO' action to be -- completely performed. -- -- If the construction of the 'IO a' value is an important factor -- in the benchmark, it is best to use 'nfAppIO' instead. nfIO :: NFData a => IO a -> Benchmarkable nfIO a = toBenchmarkable (nfIO' rnf a) -- | Perform an action, then evaluate its result to weak head normal -- form (WHNF). This is useful for forcing an 'IO' action whose result -- is an expression to be evaluated down to a more useful value. -- -- If the construction of the 'IO a' value is an important factor -- in the benchmark, it is best to use 'whnfAppIO' instead. whnfIO :: IO a -> Benchmarkable whnfIO a = toBenchmarkable (whnfIO' a) -- | Apply an argument to a function which performs an action, then -- evaluate its result to normal form (NF). -- This function constructs the 'IO b' value on each iteration, -- similar to 'nf'. -- This is particularly useful for 'IO' actions where the bulk of the -- work is not bound by IO, but by pure computations that may -- optimize away if the argument is known statically, as in 'nfIO'. -- See issue #189 for more info. nfAppIO :: NFData b => (a -> IO b) -> a -> Benchmarkable nfAppIO f v = toBenchmarkable (nfAppIO' rnf f v) -- | Perform an action, then evaluate its result to weak head normal -- form (WHNF). -- This function constructs the 'IO b' value on each iteration, -- similar to 'whnf'. -- This is particularly useful for 'IO' actions where the bulk of the -- work is not bound by IO, but by pure computations that may -- optimize away if the argument is known statically, as in 'nfIO'. -- See issue #189 for more info. whnfAppIO :: (a -> IO b) -> a -> Benchmarkable whnfAppIO f v = toBenchmarkable (whnfAppIO' f v) -- Along with nf' and whnf', the following two functions are the core -- benchmarking loops. They have been carefully constructed to avoid -- allocation while also evaluating @a@. -- -- These functions must not be inlined. There are two possible issues that -- can arise if they are inlined. First, the work is often floated out of -- the loop, which creates a nonsense benchmark. Second, the benchmark code -- itself could be changed by the user's optimization level. By marking them -- @NOINLINE@, the core benchmark code is always the same. -- -- See #183 and #184 for discussion. -- | Generate a function that will run an action a given number of times, -- reducing it to normal form each time. nfIO' :: (a -> ()) -> IO a -> (Int64 -> IO ()) nfIO' reduce a = go where go n | n <= 0 = return () | otherwise = do x <- a reduce x `seq` go (n-1) {-# NOINLINE nfIO' #-} -- | Generate a function that will run an action a given number of times. whnfIO' :: IO a -> (Int64 -> IO ()) whnfIO' a = go where go n | n <= 0 = return () | otherwise = do x <- a x `seq` go (n-1) {-# NOINLINE whnfIO' #-} -- | Generate a function which applies an argument to a function a given -- number of times, running its action and reducing the result to normal form. nfAppIO' :: (b -> ()) -> (a -> IO b) -> a -> (Int64 -> IO ()) nfAppIO' reduce f v = go where go n | n <= 0 = return () | otherwise = do x <- f v reduce x `seq` go (n-1) {-# NOINLINE nfAppIO' #-} -- | Generate a function which applies an argument to a function a given -- number of times, running its action and reducing the result to -- weak-head normal form. whnfAppIO' :: (a -> IO b) -> a -> (Int64 -> IO ()) whnfAppIO' f v = go where go n | n <= 0 = return () | otherwise = do x <- f v x `seq` go (n-1) {-# NOINLINE whnfAppIO' #-} -- | Specification of a collection of benchmarks and environments. A -- benchmark may consist of: -- -- * An environment that creates input data for benchmarks, created -- with 'env'. -- -- * A single 'Benchmarkable' item with a name, created with 'bench'. -- -- * A (possibly nested) group of 'Benchmark's, created with 'bgroup'. data Benchmark where Environment :: NFData env => IO env -> (env -> IO a) -> (env -> Benchmark) -> Benchmark Benchmark :: String -> Benchmarkable -> Benchmark BenchGroup :: String -> [Benchmark] -> Benchmark -- | Run a benchmark (or collection of benchmarks) in the given -- environment. The purpose of an environment is to lazily create -- input data to pass to the functions that will be benchmarked. -- -- A common example of environment data is input that is read from a -- file. Another is a large data structure constructed in-place. -- -- __Motivation.__ In earlier versions of criterion, all benchmark -- inputs were always created when a program started running. By -- deferring the creation of an environment when its associated -- benchmarks need the its, we avoid two problems that this strategy -- caused: -- -- * Memory pressure distorted the results of unrelated benchmarks. -- If one benchmark needed e.g. a gigabyte-sized input, it would -- force the garbage collector to do extra work when running some -- other benchmark that had no use for that input. Since the data -- created by an environment is only available when it is in scope, -- it should be garbage collected before other benchmarks are run. -- -- * The time cost of generating all needed inputs could be -- significant in cases where no inputs (or just a few) were really -- needed. This occurred often, for instance when just one out of a -- large suite of benchmarks was run, or when a user would list the -- collection of benchmarks without running any. -- -- __Creation.__ An environment is created right before its related -- benchmarks are run. The 'IO' action that creates the environment -- is run, then the newly created environment is evaluated to normal -- form (hence the 'NFData' constraint) before being passed to the -- function that receives the environment. -- -- __Complex environments.__ If you need to create an environment that -- contains multiple values, simply pack the values into a tuple. -- -- __Lazy pattern matching.__ In situations where a \"real\" -- environment is not needed, e.g. if a list of benchmark names is -- being generated, a value which throws an exception will be passed -- to the function that receives the environment. This avoids the -- overhead of generating an environment that will not actually be -- used. -- -- The function that receives the environment must use lazy pattern -- matching to deconstruct the tuple (e.g., @~(x, y)@, not @(x, y)@), -- as use of strict pattern matching will cause a crash if an -- exception-throwing value is passed in. -- -- __Example.__ This program runs benchmarks in an environment that -- contains two values. The first value is the contents of a text -- file; the second is a string. Pay attention to the use of a lazy -- pattern to deconstruct the tuple in the function that returns the -- benchmarks to be run. -- -- > setupEnv = do -- > let small = replicate 1000 (1 :: Int) -- > big <- map length . words <$> readFile "/usr/dict/words" -- > return (small, big) -- > -- > main = defaultMain [ -- > -- notice the lazy pattern match here! -- > env setupEnv $ \ ~(small,big) -> bgroup "main" [ -- > bgroup "small" [ -- > bench "length" $ whnf length small -- > , bench "length . filter" $ whnf (length . filter (==1)) small -- > ] -- > , bgroup "big" [ -- > bench "length" $ whnf length big -- > , bench "length . filter" $ whnf (length . filter (==1)) big -- > ] -- > ] ] -- -- __Discussion.__ The environment created in the example above is -- intentionally /not/ ideal. As Haskell's scoping rules suggest, the -- variable @big@ is in scope for the benchmarks that use only -- @small@. It would be better to create a separate environment for -- @big@, so that it will not be kept alive while the unrelated -- benchmarks are being run. env :: NFData env => IO env -- ^ Create the environment. The environment will be evaluated to -- normal form before being passed to the benchmark. -> (env -> Benchmark) -- ^ Take the newly created environment and make it available to -- the given benchmarks. -> Benchmark env alloc = Environment alloc noop -- | Same as `env`, but but allows for an additional callback -- to clean up the environment. Resource clean up is exception safe, that is, -- it runs even if the 'Benchmark' throws an exception. envWithCleanup :: NFData env => IO env -- ^ Create the environment. The environment will be evaluated to -- normal form before being passed to the benchmark. -> (env -> IO a) -- ^ Clean up the created environment. -> (env -> Benchmark) -- ^ Take the newly created environment and make it available to -- the given benchmarks. -> Benchmark envWithCleanup = Environment -- | Create a Benchmarkable where a fresh environment is allocated for every -- batch of runs of the benchmarkable. -- -- The environment is evaluated to normal form before the benchmark is run. -- -- When using 'whnf', 'whnfIO', etc. Criterion creates a 'Benchmarkable' -- whichs runs a batch of @N@ repeat runs of that expressions. Criterion may -- run any number of these batches to get accurate measurements. Environments -- created by 'env' and 'envWithCleanup', are shared across all these batches -- of runs. -- -- This is fine for simple benchmarks on static input, but when benchmarking -- IO operations where these operations can modify (and especially grow) the -- environment this means that later batches might have their accuracy effected -- due to longer, for example, longer garbage collection pauses. -- -- An example: Suppose we want to benchmark writing to a Chan, if we allocate -- the Chan using environment and our benchmark consists of @writeChan env ()@, -- the contents and thus size of the Chan will grow with every repeat. If -- Criterion runs a 1,000 batches of 1,000 repeats, the result is that the -- channel will have 999,000 items in it by the time the last batch is run. -- Since GHC GC has to copy the live set for every major GC this means our last -- set of writes will suffer a lot of noise of the previous repeats. -- -- By allocating a fresh environment for every batch of runs this function -- should eliminate this effect. perBatchEnv :: (NFData env, NFData b) => (Int64 -> IO env) -- ^ Create an environment for a batch of N runs. The environment will be -- evaluated to normal form before running. -> (env -> IO b) -- ^ Function returning the IO action that should be benchmarked with the -- newly generated environment. -> Benchmarkable perBatchEnv alloc = perBatchEnvWithCleanup alloc (const noop) -- | Same as `perBatchEnv`, but but allows for an additional callback -- to clean up the environment. Resource clean up is exception safe, that is, -- it runs even if the 'Benchmark' throws an exception. perBatchEnvWithCleanup :: (NFData env, NFData b) => (Int64 -> IO env) -- ^ Create an environment for a batch of N runs. The environment will be -- evaluated to normal form before running. -> (Int64 -> env -> IO ()) -- ^ Clean up the created environment. -> (env -> IO b) -- ^ Function returning the IO action that should be benchmarked with the -- newly generated environment. -> Benchmarkable perBatchEnvWithCleanup alloc clean work = Benchmarkable alloc clean (nfIO' rnf . work) False -- | Create a Benchmarkable where a fresh environment is allocated for every -- run of the operation to benchmark. This is useful for benchmarking mutable -- operations that need a fresh environment, such as sorting a mutable Vector. -- -- As with 'env' and 'perBatchEnv' the environment is evaluated to normal form -- before the benchmark is run. -- -- This introduces extra noise and result in reduce accuracy compared to other -- Criterion benchmarks. But allows easier benchmarking for mutable operations -- than was previously possible. perRunEnv :: (NFData env, NFData b) => IO env -- ^ Action that creates the environment for a single run. -> (env -> IO b) -- ^ Function returning the IO action that should be benchmarked with the -- newly generated environment. -> Benchmarkable perRunEnv alloc = perRunEnvWithCleanup alloc noop -- | Same as `perRunEnv`, but but allows for an additional callback -- to clean up the environment. Resource clean up is exception safe, that is, -- it runs even if the 'Benchmark' throws an exception. perRunEnvWithCleanup :: (NFData env, NFData b) => IO env -- ^ Action that creates the environment for a single run. -> (env -> IO ()) -- ^ Clean up the created environment. -> (env -> IO b) -- ^ Function returning the IO action that should be benchmarked with the -- newly generated environment. -> Benchmarkable perRunEnvWithCleanup alloc clean work = bm { perRun = True } where bm = perBatchEnvWithCleanup (const alloc) (const clean) work -- | Create a single benchmark. bench :: String -- ^ A name to identify the benchmark. -> Benchmarkable -- ^ An activity to be benchmarked. -> Benchmark bench = Benchmark -- | Group several benchmarks together under a common name. bgroup :: String -- ^ A name to identify the group of benchmarks. -> [Benchmark] -- ^ Benchmarks to group under this name. -> Benchmark bgroup = BenchGroup -- | Add the given prefix to a name. If the prefix is empty, the name -- is returned unmodified. Otherwise, the prefix and name are -- separated by a @\'\/\'@ character. addPrefix :: String -- ^ Prefix. -> String -- ^ Name. -> String addPrefix "" desc = desc addPrefix pfx desc = pfx ++ '/' : desc -- | Retrieve the names of all benchmarks. Grouped benchmarks are -- prefixed with the name of the group they're in. benchNames :: Benchmark -> [String] benchNames (Environment _ _ b) = benchNames (b fakeEnvironment) benchNames (Benchmark d _) = [d] benchNames (BenchGroup d bs) = map (addPrefix d) . concatMap benchNames $ bs instance Show Benchmark where show (Environment _ _ b) = "Environment _ _" ++ show (b fakeEnvironment) show (Benchmark d _) = "Benchmark " ++ show d show (BenchGroup d _) = "BenchGroup " ++ show d measure :: (U.Unbox a) => (Measured -> a) -> V.Vector Measured -> U.Vector a measure f v = U.convert . V.map f $ v criterion-measurement-0.2.2.0/src/Criterion/Measurement/Types/0000755000000000000000000000000007346545000022501 5ustar0000000000000000criterion-measurement-0.2.2.0/src/Criterion/Measurement/Types/Internal.hs0000644000000000000000000000733207346545000024616 0ustar0000000000000000{-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} -- Ensure that nf' and whnf' are always optimized, even if -- criterion-measurement is compiled with -O0 or -fprof-auto (see #184). {-# OPTIONS_GHC -O2 -fno-prof-auto #-} -- Make the function applications in nf' and whnf' strict (avoiding allocation) -- and avoid floating out the computations. {-# OPTIONS_GHC -fno-full-laziness #-} -- | -- Module : Criterion.Measurement.Types.Internal -- Copyright : (c) 2017 Ryan Scott -- -- License : BSD-style -- Maintainer : bos@serpentine.com -- Stability : experimental -- Portability : GHC -- -- Exports 'fakeEnvironment'. module Criterion.Measurement.Types.Internal ( fakeEnvironment , nf' , whnf' , SPEC(..) ) where import Data.Int (Int64) import Control.Exception #if MIN_VERSION_ghc_prim(0,3,1) import GHC.Types (SPEC(..)) #else import GHC.Exts (SpecConstrAnnotation(..)) #endif -- | A dummy environment that is passed to functions that create benchmarks -- from environments when no concrete environment is available. fakeEnvironment :: env fakeEnvironment = error $ unlines [ "Criterion atttempted to retrieve a non-existent environment!" , "\tPerhaps you forgot to use lazy pattern matching in a function which" , "\tconstructs benchmarks from an environment?" , "\t(see the documentation for `env` for details)" ] -- Along with Criterion.Types.nfIO' and Criterion.Types.whnfIO', the following -- two functions are the core benchmarking loops. They have been carefully -- constructed to avoid allocation while also evaluating @f x@. -- -- Because these functions are pure, GHC is particularly smart about optimizing -- them. We must turn off @-ffull-laziness@ to prevent the computation from -- being floated out of the loop. -- -- For a similar reason, these functions must not be inlined. There are two -- possible issues that can arise if they are inlined. First, the work is often -- floated out of the loop, which creates a nonsense benchmark. Second, the -- benchmark code itself could be changed by the user's optimization level. By -- marking them @NOINLINE@, the core benchmark code is always the same. -- -- To ensure that the behavior of these functions remains independent of -- -fspec-constr-count, we force SpecConst optimization by passing SPEC. -- -- Finally, it's important that both branches of the loop depend on the state -- token from the IO action. This is achieved by using `evaluate` rather than `let !y = f x` -- in order to force the value to whnf. `evaluate` is in the IO monad and therefore the state -- token needs to be passed through the loop. -- -- See ghc#21948 where a change in eta-expansion behaviour -- caused the work to be performed in the wrong place because the otherwise branch -- did not depend on the state token at all, and the whole loop could be evaluated to -- a single return function before being run in the IO monad. -- -- See #183, #184 and #264 for discussion. -- | Generate a function which applies an argument to a function a -- given number of times, reducing the result to normal form. nf' :: (b -> ()) -> (a -> b) -> a -> (Int64 -> IO ()) nf' reduce f x = go SPEC where go :: SPEC -> Int64 -> IO () go !_ n | n <= 0 = return () | otherwise = do y <- evaluate (f x) reduce y `seq` go SPEC (n-1) {-# NOINLINE nf' #-} -- | Generate a function which applies an argument to a function a -- given number of times. whnf' :: (a -> b) -> a -> (Int64 -> IO ()) whnf' f x = go SPEC where go :: SPEC -> Int64 -> IO () go !_ n | n <= 0 = return () | otherwise = do _ <- evaluate (f x) go SPEC (n-1) {-# NOINLINE whnf' #-} #if !(MIN_VERSION_ghc_prim(0,3,1)) data SPEC = SPEC | SPEC2 {-# ANN type SPEC ForceSpecConstr #-} #endif