pax_global_header00006660000000000000000000000064145667262320014527gustar00rootroot0000000000000052 comment=611ee84b9bc17e7eccc82ec29f461a7e63e68319 kcas-0.7.0/000077500000000000000000000000001456672623200124545ustar00rootroot00000000000000kcas-0.7.0/.dockerignore000066400000000000000000000000071456672623200151250ustar00rootroot00000000000000_build kcas-0.7.0/.gitattributes000066400000000000000000000000551456672623200153470ustar00rootroot00000000000000# To work around MDX issues *.md text eol=lf kcas-0.7.0/.github/000077500000000000000000000000001456672623200140145ustar00rootroot00000000000000kcas-0.7.0/.github/workflows/000077500000000000000000000000001456672623200160515ustar00rootroot00000000000000kcas-0.7.0/.github/workflows/workflow.yml000066400000000000000000000017241456672623200204520ustar00rootroot00000000000000name: build-and-test on: pull_request: push: branches: - main jobs: build-windows: strategy: matrix: ocaml-compiler: - ocaml.5.0.0,ocaml-option-mingw - ocaml.5.1.1,ocaml-option-mingw runs-on: windows-latest env: QCHECK_MSG_INTERVAL: '60' steps: - name: Check out code uses: actions/checkout@v3 - name: Set up OCaml uses: ocaml/setup-ocaml@v2 with: ocaml-compiler: ${{ matrix.ocaml-compiler }} opam-repositories: | dra27: https://github.com/dra27/opam-repository.git#windows-5.0 default: https://github.com/ocaml-opam/opam-repository-mingw.git#sunset standard: https://github.com/ocaml/opam-repository.git - name: Install dependencies run: opam install . --deps-only --with-test - name: Build run: opam exec -- dune build - name: Test run: opam exec -- dune runtest kcas-0.7.0/.gitignore000066400000000000000000000000761456672623200144470ustar00rootroot00000000000000_build tmp *~ \.\#* \#*# *.install *.native *.byte **/.merlin kcas-0.7.0/.ocamlformat000066400000000000000000000000721456672623200147600ustar00rootroot00000000000000profile = default version = 0.26.1 exp-grouping=preserve kcas-0.7.0/.prettierrc000066400000000000000000000002151456672623200146360ustar00rootroot00000000000000{ "arrowParens": "avoid", "bracketSpacing": false, "printWidth": 80, "semi": false, "singleQuote": true, "proseWrap": "always" } kcas-0.7.0/CHANGES.md000066400000000000000000000072611456672623200140540ustar00rootroot00000000000000## 0.7.0 - Numerous minor internal improvements (@polytypic) - Added many benchmarks to allow better understanding of the overheads of composable transactions (@polytypic) - Exposed shape of `_ Loc.t` to avoid float array pessimization (@polytypic) - Made `Accumulator` automatically scaling and removed optional `n_way` arguments (@polytypic) - Use polymorphic variant for `mode` (@polytypic) - Add `?backoff` to `Loc.compare_and_set` (@polytypic) - Remove the Op API (@polytypic, @lyrm) - Fix `Hashtbl.clear` (@polytypic) - Fix single location updates to be linearizable (@polytypic) - Add `Xt.compare_and_set` (@polytypic) - Add `Dllist.create_node value` (@polytypic) - Workarounds for CSE optimization (@polytypic) - Changed to use `(implicit_transitive_deps false)` (@polytypic) - Move `Backoff` module to its own `backoff` package (@lyrm, @polytypic) - Support padding to avoid false sharing (@polytypic) - Pass through `?timeoutf` to blocking operations on data structures (@polytypic) - Ported to OCaml 4.13 (@polytypic) ## 0.6.1 - Ported to OCaml 4.14 (@polytypic) ## 0.6.0 - Add timeout support to potentially blocking operations (@polytypic) - Add explicit `~xt` parameter to `Xt.call` to make it polymorphic (@polytypic) ## 0.5.3 - Fix to also snapshot and rollback post commit actions (@polytypic) - Fix `Loc.compare_and_set` to have strong semantics (@polytypic) - Fix single location no-op updates to be strictly serializable (@polytypic) - Add `Dllist.move_l node list` and `Dllist.move_r node list` (@polytypic) ## 0.5.2 - Improve `Hashtbl` read-write performance and add `swap` (@polytypic) - Avoid some unnecessary verifies of read-only CMP operations (@polytypic) ## 0.5.1 - Add synchronizing variable `Mvar` to `kcas_data` (@polytypic) - Fix to allow retry from within `Xt.update` and `Xt.modify` (@polytypic) ## 0.5.0 - Add nested conditional transaction support (@polytypic) - Add explicit location validation support (@polytypic) ## 0.4.0 - Allocation of location ids in a transaction log friendly order (@polytypic) - Per location operating mode selection (@Dashy-Dolphin, review: @polytypic) - Injectivity `!'a Kcas_data.Dllist.t` annotation (@polytypic) ## 0.3.1 - Added doubly-linked list `Dllist` to `kcas_data` (@polytypic) - Minor optimizations (@polytypic) ## 0.3.0 - Remove the `Tx` API (@polytypic) - Add blocking support to turn kcas into a proper STM (@polytypic, review: @lyrm) - Add periodic validation of transactions (@polytypic) ## 0.2.4 - Introduce `kcas_data` companion package of composable lock-free data structures (@polytypic) - Add `is_in_log` operation to determine whether a location has been accessed by a transaction (@polytypic) - Add `Loc.modify` (@polytypic) - Add transactional `swap` operation to exchange contents of two locations (@polytypic) - Injectivity `!'a Loc.t` and variance `+'a Tx.t` annotations (@polytypic) ## 0.2.3 - Add support for post commit actions to transactions (@polytypic) - Bring `Xt` and `Tx` access combinators to parity and add `compare_and_swap` (@polytypic) ## 0.2.2 - New explicit transaction log passing API based on idea by @gasche (@polytypic, review: @samoht and @lyrm) ## 0.2.1 - New k-CAS-n-CMP algorithm extending the GKMZ algorithm (@polytypic, review: @bartoszmodelski) ## 0.2.0 - Complete redesign adding a new transaction API (@polytypic, review: @bartoszmodelski) ## 0.1.8 - Fix a bug in GKMZ implementation (@polytypic, review: @bartoszmodelski) ## 0.1.7 - Change to use the new GKMZ algorithm (@polytypic, review: @bartoszmodelski) ## 0.1.6 - Add preflights sorting and checks (@bartoszmodelski, review: @polytypic) ## 0.1.5 - Republish in opam (update opam, dune) (@tmcgilchrist, review: @Sudha247) kcas-0.7.0/CODE_OF_CONDUCT.md000066400000000000000000000007031456672623200152530ustar00rootroot00000000000000# Code of Conduct This project has adopted the [OCaml Code of Conduct](https://github.com/ocaml/code-of-conduct/blob/main/CODE_OF_CONDUCT.md). # Enforcement This project follows the OCaml Code of Conduct [enforcement policy](https://github.com/ocaml/code-of-conduct/blob/main/CODE_OF_CONDUCT.md#enforcement). To report any violations, please contact: - Carine Morel - Sudha Parimala kcas-0.7.0/HACKING.md000066400000000000000000000011431456672623200140410ustar00rootroot00000000000000### Formatting This project uses [ocamlformat](https://github.com/ocaml-ppx/ocamlformat) (for OCaml) and [prettier](https://prettier.io/) (for Markdown). ### To make a new release 1. Update [CHANGES.md](CHANGES.md). 2. Run `dune-release tag VERSION` to create a tag for the new `VERSION`. 3. Run `dune-release distrib` to create package locally. 4. Run `dune-release publish distrib` to create release on GitHub. 5. Run `opam publish --tag=VERSION` to create PR to [opam-repository](https://github.com/ocaml/opam-repository). 6. Run `./update-gh-pages-for-tag VERSION` to update the online documentation. kcas-0.7.0/LICENSE.md000066400000000000000000000016111456672623200140570ustar00rootroot00000000000000Copyright (c) 2016, KC Sivaramakrishnan Copyright (c) 2017, Nicolas ASSOUAD Copyright (c) 2018, Sadiq Jaffer Copyright (c) 2023, Vesa Karvonen Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. kcas-0.7.0/Makefile000066400000000000000000000001101456672623200141040ustar00rootroot00000000000000.PHONY: bench bench: @dune exec --release -- bench/main.exe -budget 1 kcas-0.7.0/README.md000066400000000000000000002277651456672623200137560ustar00rootroot00000000000000[API reference](https://ocaml-multicore.github.io/kcas/doc/) · [Benchmarks](https://bench.ci.dev/ocaml-multicore/kcas/branch/main/benchmark/default)
Kcas logo # **Kcas** — Software Transactional Memory for OCaml
[**Kcas**](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/index.html) provides a [software transactional memory](https://en.wikipedia.org/wiki/Software_transactional_memory) (STM) implementation based on an atomic [lock-free](https://en.wikipedia.org/wiki/Non-blocking_algorithm#Lock-freedom) multi-word [compare-and-set](https://en.wikipedia.org/wiki/Compare-and-swap) (MCAS) algorithm [enhanced with read-only compare operations](doc/gkmz-with-read-only-cmp-ops.md) and ability to block awaiting for changes. [**Kcas_data**](https://ocaml-multicore.github.io/kcas/doc/kcas_data/Kcas_data/index.html) provides compositional lock-free data structures and primitives for communication and synchronization implemented using **Kcas**. Features and properties: - **_Efficient_**: In the common uncontended case only **k + 1** single-word CASes are required per k-CAS and, as a special case, 1-CAS requires only a single single-word CAS. - **_Lock-free_**: The underlying algorithm guarantees that at least one operation will be able to make progress. - **_Disjoint-access parallel_**: Unrelated operations progress independently, without interference, even if they occur at the same time. - **_Read-only compares_**: The algorithm supports [obstruction-free](https://en.wikipedia.org/wiki/Non-blocking_algorithm#Obstruction-freedom) read-only compare (CMP) operations that can be performed on overlapping locations in parallel without interference. - **_Blocking await_**: The algorithm supports timeouts and awaiting for changes to any number of shared memory locations. - **_Composable_**: Independently developed transactions can be composed with ease sequentially, conjunctively, conditionally, and disjunctively. In other words, [performance](https://bench.ci.dev/ocaml-multicore/kcas/branch/main/benchmark/default) should be acceptable and scalable for many use cases, the non-blocking properties should allow use in many contexts including those where locks are not acceptable, and the features provided should support most practical needs. **Kcas** is [published on **opam**](https://opam.ocaml.org/packages/kcas/) and is distributed under the [ISC license](LICENSE.md). [![OCaml-CI Build Status](https://img.shields.io/endpoint?url=https%3A%2F%2Fci.ocamllabs.io%2Fbadge%2Focaml-multicore%2Fkcas%2Fmain&logo=ocaml&style=flat-square)](https://ci.ocamllabs.io/github/ocaml-multicore/kcas) [![GitHub release (latest by date)](https://img.shields.io/github/v/release/ocaml-multicore/kcas?style=flat-square&color=09aa89)](https://github.com/ocaml-multicore/kcas/releases/latest) [![docs](https://img.shields.io/badge/doc-online-blue.svg?style=flat-square)](https://ocaml-multicore.github.io/kcas/doc/) ## Contents - [A quick tour](#a-quick-tour) - [Introduction](#introduction) - [Creating and manipulating individual shared memory locations](#creating-and-manipulating-individual-shared-memory-locations) - [Programming with transactions](#programming-with-transactions) - [A transactional lock-free stack](#a-transactional-lock-free-stack) - [A transactional lock-free queue](#a-transactional-lock-free-queue) - [Composing transactions](#composing-transactions) - [Blocking transactions](#blocking-transactions) - [Timeouts](#timeouts) - [A transactional lock-free leftist heap](#a-transactional-lock-free-leftist-heap) - [Programming with transactional data structures](#programming-with-transactional-data-structures) - [The dining philosophers problem](#the-dining-philosophers-problem) - [A transactional LRU cache](#a-transactional-lru-cache) - [Designing lock-free algorithms with k-CAS](#designing-lock-free-algorithms-with-k-cas) - [Understand performance](#understand-performance) - [Minimize accesses](#minimize-accesses) - [Prefer compound accesses](#prefer-compound-accesses) - [Log updates optimistically](#log-updates-optimistically) - [Postcompute](#postcompute) - [Post commit actions](#post-commit-actions) - [A composable Michael-Scott style queue](#a-composable-michael-scott-style-queue) - [Race to cooperate](#race-to-cooperate) - [Understanding transactions](#understanding-transactions) - [A three-stack lock-free queue](#a-three-stack-lock-free-queue) - [A rehashable lock-free hash table](#a-rehashable-lock-free-hash-table) - [Avoid false sharing](#avoid-false-sharing) - [Beware of torn reads](#beware-of-torn-reads) - [Additional resources](#additional-resources) ## A quick tour To use the library ```ocaml # #require "kcas" # open Kcas ``` one first creates shared memory locations: ```ocaml # let a = Loc.make 0 and b = Loc.make 0 and x = Loc.make 0 val a : int Loc.t = Kcas.Loc.Loc {Kcas.Loc.state = ; id = } val b : int Loc.t = Kcas.Loc.Loc {Kcas.Loc.state = ; id = } val x : int Loc.t = Kcas.Loc.Loc {Kcas.Loc.state = ; id = } ``` One can then manipulate the locations individually: ```ocaml # Loc.set a 10 - : unit = () # Loc.get a - : int = 10 # Loc.compare_and_set b 0 52 - : bool = true # Loc.get b - : int = 52 ``` Block waiting for changes to locations: ```ocaml # let a_domain = Domain.spawn @@ fun () -> let x = Loc.get_as (fun x -> Retry.unless (x <> 0); x) x in Printf.sprintf "The answer is %d!" x val a_domain : string Domain.t = ``` Perform transactions over locations: ```ocaml # let tx ~xt = let a = Xt.get ~xt a and b = Xt.get ~xt b in Xt.set ~xt x (b - a) in Xt.commit { tx } - : unit = () ``` And now we have it: ```ocaml # Domain.join a_domain - : string = "The answer is 42!" ``` ## Introduction The API of **Kcas** is divided into submodules. The main modules are - [`Loc`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Loc/index.html), providing an abstraction of _shared memory locations_, and - [`Xt`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html), providing _explicit transaction log passing_ over shared memory locations. The following sections discuss both of the above in turn. ### Creating and manipulating individual shared memory locations The [`Loc`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Loc/index.html) module is essentially compatible with the Stdlib [`Atomic`](https://v2.ocaml.org/api/Atomic.html) module, except that a number of functions take some optional arguments that one usually need not worry about. In other words, an application that uses [`Atomic`](https://v2.ocaml.org/api/Atomic.html), but then needs to perform atomic operations over multiple atomic locations, could theoretically just rebind `module Atomic = Loc` and then use the [`Xt`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html) API to perform operations over multiple locations. This should not be done just-in-case, however, as, even though **Kcas** is efficient, it does naturally have higher overhead than the Stdlib [`Atomic`](https://v2.ocaml.org/api/Atomic.html). ### Programming with transactions The [`Xt`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html) module provides an API that allows _transactions_ over shared memory locations to be implemented as functions that explicitly pass a mutable transaction log, as the labeled argument `~xt`, through the computation to record accesses of shared memory locations. Once the transaction function returns, those accesses can then be attempted to be performed atomically. The [`Xt`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html) API is intended to be suitable for both designing and implementing new lock-free algorithms and as an application level programming interface for compositional use of such algorithms. #### A transactional lock-free stack As our first example of using transactions, let's implement a lock-free stack. A stack can be just a shared memory location that holds a list of elements: ```ocaml type 'a stack = 'a list Loc.t ``` To create a stack we just [`make`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Loc/index.html#val-make) a new location with an empty list: ```ocaml # let stack () : _ stack = Loc.make [] val stack : unit -> 'a stack = ``` To push an element to a stack we [`modify`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-modify) the stack to cons the element onto the list: ```ocaml # let push ~xt stack element = Xt.modify ~xt stack @@ List.cons element val push : xt:'a Xt.t -> 'b list Loc.t -> 'b -> unit = ``` Notice the `~xt` parameter. It refers to the transaction log being passed explicitly. Above we pass it to [`modify`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-modify) to record an operation in the log rather than perform it immediately. Popping an element from a stack is a little more complicated as we need to handle the case of an empty stack. Let's go with a basic approach where we first [`get`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-get) the content of the stack, and [`set`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-set) it if necessary, and return an optional element. ```ocaml # let try_pop ~xt stack = match Xt.get ~xt stack with | [] -> None | element :: rest -> Xt.set ~xt stack rest; Some element val try_pop : xt:'a Xt.t -> 'b list Loc.t -> 'b option = ``` Again, `try_pop` passes the `~xt` parameter explicitly to the [`get`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-get) and [`set`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-set) operations to record them in the log rather than perform them immediately. We could also implement `try_pop` more concisely with the help of a couple of useful list manipulation helper functions ```ocaml # let hd_opt = function | [] -> None | element :: _ -> Some element val hd_opt : 'a list -> 'a option = # let tl_safe = function | [] -> [] | _ :: rest -> rest val tl_safe : 'a list -> 'a list = ``` and [`update`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-update): ```ocaml # let try_pop ~xt stack = Xt.update ~xt stack tl_safe |> hd_opt val try_pop : xt:'a Xt.t -> 'b list Loc.t -> 'b option = ``` If the stack already contained an empty list, `[]`, both of the above variations of `try_pop` generate a read-only CMP operation in the [`obstruction_free`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Mode/index.html#val-obstruction_free) mode. This means that multiple domains may run `try_pop` on an empty stack in parallel without interference. The variation using [`update`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-update) also makes only a single access to the underlying transaction log and is likely to be the faster variation. So, to use a stack, we first need to create it and then we may [`commit`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-commit) transactions to `push` and `try_pop` elements: ```ocaml # let a_stack : int stack = stack () val a_stack : int stack = Kcas.Loc.Loc {Kcas.Loc.state = ; id = } # Xt.commit { tx = push a_stack 101 } - : unit = () # Xt.commit { tx = try_pop a_stack } - : int option = Some 101 # Xt.commit { tx = try_pop a_stack } - : int option = None ``` The [`{ tx = ... }`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#type-tx) wrapper is used to ensure that the transaction function is polymorphic with respect to the log. This way the type system makes it difficult to accidentally leak the log as described in the paper [Lazy Functional State Threads](https://dl.acm.org/doi/10.1145/178243.178246). As an astute reader you may wonder why we wrote `push` and `try_pop` to take a transaction log as a parameter and then separately called [`commit`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-commit) rather than just call [`commit`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-commit) inside the `push` and `try_pop` functions and avoid exposing the `~xt` parameter. We'll get to that soon! #### A transactional lock-free queue Let's then implement a lock-free queue. To keep things simple we just use the traditional two-stack queue data structure: ```ocaml type 'a queue = { front: 'a list Loc.t; back: 'a list Loc.t; } ``` To create a queue we [`make`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Loc/index.html#val-make) the two locations: ```ocaml # let queue () = { front = Loc.make []; back = Loc.make []; } val queue : unit -> 'a queue = ``` To enqueue we just [`modify`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-modify) the back of the queue and `cons` the element to the list: ```ocaml # let enqueue ~xt queue element = Xt.modify ~xt queue.back @@ List.cons element val enqueue : xt:'a Xt.t -> 'b queue -> 'b -> unit = ``` Dequeue is again more complicated. First we examine the front of the queue. If there is an element, we update the front and return the element. If the front is empty, we examine the back of the queue in `rev`erse. If there is an element we clear the back, move the rest of the elements to the front, and return the element. Otherwise we return `None` as the queue was empty. ```ocaml # let try_dequeue ~xt queue = match Xt.update ~xt queue.front tl_safe with | element :: _ -> Some element | [] -> match Xt.exchange ~xt queue.back [] |> List.rev with | [] -> None | element :: rest -> Xt.set ~xt queue.front rest; Some element val try_dequeue : xt:'a Xt.t -> 'b queue -> 'b option = ``` Above, [`update`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-update) and [`exchange`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-exchange) are used as convenient shorthands and to reduce the number of accesses to the transaction log. If both the front and back locations already contained an empty list, `[]`, the above generates read-only CMP operations in the [`obstruction_free`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Mode/index.html#val-obstruction_free) mode allowing multiple domains to run `try_dequeue` on an empty queue in parallel without interference. Additionally, if the back contained only one element, no write to the front is generated. > **_Question_**: _When does a transaction generate a read-only compare against > a particular location?_ > > First of all, the transaction must be attempted in the > [`obstruction_free`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Mode/index.html#val-obstruction_free) > mode, which is the default mode that > [`commit`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-commit) > uses initially. > > Additionally, there must be no operation in the transaction that sets a new > value to the location. > > If an operation sets a location to a new value, the full original state of the > location is forgotten, and the transaction will then later attempt a > compare-and-set operation against that location even if a later operation > inside the transaction sets the location to its original value. > > The intention behind this approach is to strike a balance between adding > overhead and also supporting convenient read-only updates. So, to use a queue, we first need to create it and then we may [`commit`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-commit) transactions to `enqueue` and `try_dequeue` elements: ```ocaml # let a_queue : int queue = queue () val a_queue : int queue = {front = Kcas.Loc.Loc {Kcas.Loc.state = ; id = }; back = Kcas.Loc.Loc {Kcas.Loc.state = ; id = }} # Xt.commit { tx = enqueue a_queue 76 } - : unit = () # Xt.commit { tx = try_dequeue a_queue } - : int option = Some 76 # Xt.commit { tx = try_dequeue a_queue } - : int option = None ``` > **_Beware_**: Using two stacks for a queue is easy to implement and performs > ok in many cases. Unfortunately it has one major weakness. The problem is that > it may take a relatively long time to reverse the back of a queue. This can > cause > [starvation]() as > producers may then be able to always complete their transactions before > consumers and the back of the queue might grow without bound. _Can you see a > way to avoid this problem?_ #### Composing transactions The main feature of the [`Xt`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html) API is that transactions are composable. In fact, we already wrote transactions that recorded multiple primitive shared memory accesses to the explicitly passed transaction log. Nothing prevents us from writing transactions calling other non-primitive transactions. For example, one can write a transaction to push multiple elements to a transactional stack atomically: ```ocaml # let tx ~xt = push ~xt a_stack 3; push ~xt a_stack 1; push ~xt a_stack 4 in Xt.commit { tx } - : unit = () ``` Or transfer elements between different transactional data structures: ```ocaml # let tx ~xt = match try_pop ~xt a_stack with | Some element -> enqueue ~xt a_queue element | None -> () in Xt.commit { tx } - : unit = () ``` The ability to compose transactions allows algorithms and data-structures to be used for a wider variety of purposes. #### Blocking transactions All of the previous operations we have implemented on stacks and queues have been non-blocking. What if we'd like to wait for an element to appear in a stack? One could write a loop that keeps on trying to pop an element ```ocaml # let rec busy_waiting_pop stack = match Xt.commit { tx = try_pop stack } with | None -> busy_waiting_pop stack | Some elem -> elem val busy_waiting_pop : 'a list Loc.t -> 'a = ``` but this sort of [busy-wait](https://en.wikipedia.org/wiki/Busy_waiting) is usually a _bad idea_ and should be avoided. It is usually better to block in such a way that the underlying domain can potentially perform other work. To support blocking **Kcas** provides a [`later`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Retry/index.html#val-later) operation that amounts to raising a [`Later`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Retry/index.html#exception-Later) exception signaling that the operation, whether a single location operation or a multi location transaction, should be retried only after the shared memory locations examined by the operation have been modified outside of the transaction. Using [`later`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Retry/index.html#val-later) we can easily write blocking pop ```ocaml # let pop ~xt stack = match try_pop ~xt stack with | None -> Retry.later () | Some elem -> elem val pop : xt:'a Xt.t -> 'b list Loc.t -> 'b = ``` and dequeue ```ocaml # let dequeue ~xt queue = match try_dequeue ~xt queue with | None -> Retry.later () | Some elem -> elem val dequeue : xt:'a Xt.t -> 'b queue -> 'b = ``` operations. To test them out, let's create a fresh stack and a queue ```ocaml # let a_stack : int stack = stack () val a_stack : int stack = Kcas.Loc.Loc {Kcas.Loc.state = ; id = } # let a_queue : int queue = queue () val a_queue : int queue = {front = Kcas.Loc.Loc {Kcas.Loc.state = ; id = }; back = Kcas.Loc.Loc {Kcas.Loc.state = ; id = }} ``` and then spawn a domain that tries to atomically both pop and dequeue: ```ocaml # let a_domain = Domain.spawn @@ fun () -> let tx ~xt = (pop ~xt a_stack, dequeue ~xt a_queue) in let (popped, dequeued) = Xt.commit { tx } in Printf.sprintf "I popped %d and dequeued %d!" popped dequeued val a_domain : string Domain.t = ``` The domain is now blocked waiting for changes to the stack and the queue. As long as we don't populate both at the same time ```ocaml # Xt.commit { tx = push a_stack 2 }; let x = Xt.commit { tx = pop a_stack } in Xt.commit { tx = enqueue a_queue x } - : unit = () ``` the transaction keeps on being blocked. But if both become populated at the same time ```ocaml # Xt.commit { tx = push a_stack 4 } - : unit = () # Domain.join a_domain - : string = "I popped 4 and dequeued 2!" ``` the transaction can finish. The retry mechanism essentially allows a transaction to wait for an arbitrary condition and can function as a fairly expressive communication and synchronization mechanism. #### Timeouts > If you block, will they come? That is a good question. Blocking indefinitely is often not acceptable. A blocked transaction can be waken up by a write to any shared memory location that was accessed by the transaction. This means that, given a suitable timeout mechanism, one could e.g. setup a timeout that writes to a boolean shared memory location that is accessed by a blocking transaction: ```ocaml # let pop_or_raise_if ~xt timeout stack = (* Check if timeout has expired: *) if Xt.get ~xt timeout then raise Exit; pop stack val pop_or_raise_if : xt:'a Xt.t -> bool Loc.t -> 'b list Loc.t -> xt:'c Xt.t -> 'b = ``` This works, but creating, checking, and canceling timeouts properly can be a lot of work. Therefore **Kcas** also directly supports an optional `timeoutf` argument for potentially blocking operations. For example, to perform a blocking pop with a timeout, one can simply explicitly pass the desired timeout in seconds: ```ocaml # let an_empty_stack = stack () in Xt.commit ~timeoutf:0.1 { tx = pop an_empty_stack } Exception: Failure "Domain_local_timeout.set_timeoutf not implemented". ``` Oops! What happened above is that the [_domain local timeout_](https://github.com/ocaml-multicore/domain-local-timeout) mechanism used by **Kcas** was not implemented on the current domain. The idea is that, in the future, concurrent schedulers provide the mechanism out of the box, but there is also a default implementation using the Stdlib `Thread` and `Unix` modules that works on most platforms. However, to avoid direct dependencies to `Thread` and `Unix`, we need to explicitly tell the library that it can use those modules: ```ocaml # Domain_local_timeout.set_system (module Thread) (module Unix) - : unit = () ``` This initialization, if needed, should be done by application code rather than by libraries. If we now retry the previous example we will get a [`Timeout`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Timeout/index.html#exception-Timeout) exception as expected: ```ocaml # let an_empty_stack = stack () in Xt.commit ~timeoutf:0.1 { tx = pop an_empty_stack } Exception: Kcas.Timeout.Timeout. ``` Besides [`commit`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-commit), potentially blocking single location operations such as [`get_as`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Loc/index.html#val-get_as), [`update`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Loc/index.html#val-update), and [`modify`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Loc/index.html#val-modify) support the optional `timeoutf` argument. #### A transactional lock-free leftist heap Let's implement something a bit more complicated, [a leftist heap](https://en.wikipedia.org/wiki/Leftist_tree), which is a kind of priority queue. > The implementation here is adapted from the book _Data Structures and > Algorithm Analysis in C (2nd ed.)_ by Mark Allen Weiss. First we define a data type to represent the spine of a leftist heap: ```ocaml type 'v leftist = [ `Null | `Node of 'v leftist Loc.t * int Loc.t * 'v * 'v leftist Loc.t ] ``` To create a leftist heap we [`make`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Loc/index.html#val-make) a location with an empty spine: ```ocaml # let leftist () : _ leftist Loc.t = Loc.make `Null val leftist : unit -> 'a leftist Loc.t = ``` We then define an auxiliary function `npl_of` to get the null path length of a leftist heap: ```ocaml # let npl_of ~xt : _ leftist -> int = function | `Null -> 0 | `Node (_, npl, _, _) -> Xt.get ~xt npl val npl_of : xt:'a Xt.t -> 'b leftist -> int = ``` The core operation of leftist heaps is that of merging two leftist heaps: ```ocaml # let rec merge ~xt ~lt h1 h2 = match h1, h2 with | `Null, h2 -> h2 | h1, `Null -> h1 | (`Node (_, _, v1, _) as h1), (`Node (_, _, v2, _) as h2) -> let (`Node (h1l, npl, _, h1r) as h1), h2 = if lt v1 v2 then h1, h2 else h2, h1 in let l = Xt.get ~xt h1l in if l == `Null then Xt.set ~xt h1l h2 else begin let r = merge ~xt ~lt (Xt.get ~xt h1r) h2 in match npl_of ~xt l, npl_of ~xt r with | l_npl, r_npl when l_npl < r_npl -> Xt.set ~xt h1l r; Xt.set ~xt h1r l; Xt.set ~xt npl (l_npl + 1) | _, r_npl -> Xt.set ~xt h1r r; Xt.set ~xt npl (r_npl + 1) end; h1 val merge : xt:'a Xt.t -> lt:('b -> 'b -> bool) -> 'b leftist -> 'b leftist -> 'b leftist = ``` The `merge` operation can be used to implement both insertion to ```ocaml # let insert ~xt ~lt h x = let h1 = `Node ( Loc.make `Null, Loc.make 1, x, Loc.make `Null ) in Xt.set ~xt h (merge ~xt ~lt h1 (Xt.get ~xt h)) val insert : xt:'a Xt.t -> lt:('b -> 'b -> bool) -> 'b leftist Loc.t -> 'b -> unit = ``` and deletion from ```ocaml # let delete_min_opt ~xt ~lt h = match Xt.get ~xt h with | `Null -> None | `Node (h1, _, x, h2) -> Xt.set ~xt h (merge ~xt ~lt (Xt.get ~xt h1) (Xt.get ~xt h2)); Some x val delete_min_opt : xt:'a Xt.t -> lt:('b -> 'b -> bool) -> 'b leftist Loc.t -> 'b option = ``` a leftist heap. Let's then first pick an ordering ```ocaml # let lt = (>) val lt : 'a -> 'a -> bool = ``` and create a leftist heap: ```ocaml # let a_heap : int leftist Loc.t = leftist () val a_heap : int leftist Loc.t = Kcas.Loc.Loc {Kcas.Loc.state = ; id = } ``` To populate the heap we need to define a transaction passing function and pass it to [`commit`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-commit): ```ocaml # let tx ~xt = List.iter (insert ~xt ~lt a_heap) [3; 1; 4; 1; 5] in Xt.commit { tx } - : unit = () ``` Notice that we could simply use `List.iter` from the Stdlib to iterate over a list of elements. Let's then define a transaction passing function to remove all elements from a heap ```ocaml # let remove_all ~xt ~lt h = let xs = ref [] in while match delete_min_opt ~xt ~lt h with | None -> false | Some x -> xs := x :: !xs; true do () done; List.rev !xs val remove_all : xt:'a Xt.t -> lt:('b -> 'b -> bool) -> 'b leftist Loc.t -> 'b list = ``` and use it ```ocaml # Xt.commit { tx = remove_all ~lt a_heap } - : int list = [5; 4; 3; 1; 1] ``` on the heap we populated earlier. Notice how we were able to use a `while` loop, rather than recursion, in `remove_all`. > This leftist tree implementation is unlikely to be the best performing > lock-free heap implementation, but it was pretty straightforward to implement > using k-CAS based on a textbook imperative implementation. ### Programming with transactional data structures When was the last time you implemented a non-trivial data structure or algorithm from scratch? For most professionals the answer might be along the lines of _"when I took my data structures course at the university"_ or _"when I interviewed for the software engineering position at Big Co"_. **Kcas** aims to be usable both - for experts implementing correct and performant lock-free data structures, and - for everyone gluing together programs using such data structures. Many of the examples in this introduction are data structures of some sort. However, implementing basic data structures from scratch is not something everyone should be doing every time they are writing concurrent programs. Instead programmers should be able to reuse carefully constructed data structures. One source of ready-made data structures is [**Kcas_data**](https://ocaml-multicore.github.io/kcas/doc/kcas_data/Kcas_data/index.html). Let's explore how we can leverage those data structures. Of course, first we need to `#require` the package and we'll also open it for convenience: ```ocaml # #require "kcas_data" # open Kcas_data ``` #### The dining philosophers problem The [dining philosophers problem](https://en.wikipedia.org/wiki/Dining_philosophers_problem) is a well known classic synchronization problem. It is easy to solve with **Kcas**. If you are unfamiliar with the problem, please take a moment to read the description of the problem. A handy concurrent data structure for solving the dining philosophers problem is the [`Mvar`](https://ocaml-multicore.github.io/kcas/doc/kcas_data/Kcas_data/Mvar/index.html) or synchronizing variable. A `'a Mvar.t` is basically like a `'a option Loc.t` with blocking semantics for both [`take`](https://ocaml-multicore.github.io/kcas/doc/kcas_data/Kcas_data/Mvar/index.html#val-take) and [`put`](https://ocaml-multicore.github.io/kcas/doc/kcas_data/Kcas_data/Mvar/index.html#val-put). For the dining philosophers problem, we can use `Mvar`s to store the forks. The problem statement doesn't actually say when to stop. The gist of the problem, of course, is that no philosopher should starve. So, we'll make it so that we keep a record of how many times each philosopher has eaten. We'll then end the experiment as soon as each philosopher has eaten some minimum number of times. Programming a philosopher is now straightforward: ```ocaml # let philosopher ~fork_lhs ~fork_rhs ~eaten ~continue = let eat () = let take_forks ~xt = ( Mvar.Xt.take ~xt fork_lhs, Mvar.Xt.take ~xt fork_rhs ) in let (lhs, rhs) = Xt.commit { tx = take_forks } in Loc.incr eaten; let drop_forks () = Mvar.put fork_lhs lhs; Mvar.put fork_rhs rhs in drop_forks () in while continue () do eat () done val philosopher : fork_lhs:'a Mvar.t -> fork_rhs:'b Mvar.t -> eaten:int Loc.t -> continue:(unit -> bool) -> unit = ``` The dining philosophers main routine then creates the data structures and spawns the philosophers: ```ocaml # let dinining_philosophers ~philosophers ~min_rounds = assert (3 <= philosophers && 0 <= min_rounds); let eaten = Loc.make_array philosophers 0 in let continue () = eaten |> Array.exists @@ fun eaten -> Loc.get eaten < min_rounds in let forks = Array.init philosophers @@ fun i -> Mvar.create (Some i) in Array.iter Domain.join @@ Array.init philosophers @@ fun i -> Domain.spawn @@ fun () -> let fork_lhs = forks.(i) and fork_rhs = forks.((i + 1) mod philosophers) and eaten = eaten.(i) in philosopher ~fork_lhs ~fork_rhs ~eaten ~continue val dinining_philosophers : philosophers:int -> min_rounds:int -> unit = ``` We can now run our solution and confirm that it terminates after each philosopher has eaten at least a given number of times: ```ocaml # dinining_philosophers ~philosophers:5 ~min_rounds:1_000 - : unit = () ``` What makes dining philosophers so easy to solve with transactions is that we can simply compose two `take` operations to take both forks. #### A transactional LRU cache A LRU or least-recently-used cache is essentially a bounded association table. When the capacity of the cache is exceeded, some association is dropped. The LRU or least-recently-used policy is to drop the association that was accessed least recently. A simple way to implement a LRU cache is to use a hash table to store the associations and a doubly-linked list to keep track of the order in which associations have been accessed. Whenever an association is accessed, the corresponding linked list node is added or moved to one end of the list. When the cache overflows, the association whose node is at the other end of the list is removed. **Kcas_data** conveniently provides a [`Hashtbl`](https://ocaml-multicore.github.io/kcas/doc/kcas_data/Kcas_data/Hashtbl/index.html) module providing a hash table implementation that mimics the Stdlib [`Hashtbl`](https://v2.ocaml.org/api/Hashtbl.html) module and a [`Dllist`](https://ocaml-multicore.github.io/kcas/doc/kcas_data/Kcas_data/Dllist/index.html) providing a doubly-linked list implementation. We'll also keep track of the space in the cache using a separate shared memory location so that it is possible to change the capacity of the cache dynamically: ```ocaml type ('k, 'v) cache = { space: int Loc.t; table: ('k, 'k Dllist.node * 'v) Hashtbl.t; order: 'k Dllist.t; } ``` To create a cache we just create the data structures: ```ocaml # let cache ?hashed_type capacity = { space = Loc.make capacity; table = Hashtbl.create ?hashed_type (); order = Dllist.create (); } val cache : ?hashed_type:'a Hashtbl.hashed_type -> int -> ('a, 'b) cache = ``` Note that above we just passed the optional `hashed_type` argument to the hash table constructor. The hash table [`create`](https://ocaml-multicore.github.io/kcas/doc/kcas_data/Kcas_data/Hashtbl/index.html#val-create) function takes some more optional arguments some of which might make sense to pass through. To access an association in the cache we provide a `get_opt` operation ```ocaml # let get_opt ~xt {table; order; _} key = Hashtbl.Xt.find_opt ~xt table key |> Option.map @@ fun (node, value) -> Dllist.Xt.move_l ~xt node order; value val get_opt : xt:'a Xt.t -> ('b, 'c) cache -> 'b -> 'c option = ``` that, as explained previously, moves the node corresponding to the accessed association to the left end of the list. To introduce associations we provide the `set_blocking` operation ```ocaml # let set_blocking ~xt {table; order; space; _} key value = let node = match Hashtbl.Xt.find_opt ~xt table key with | None -> if 0 = Xt.update ~xt space (fun n -> Int.max 0 (n-1)) then Dllist.Xt.take_blocking_r ~xt order |> Hashtbl.Xt.remove ~xt table; Dllist.Xt.add_l ~xt key order | Some (node, _) -> Dllist.Xt.move_l ~xt node order; node in Hashtbl.Xt.replace ~xt table key (node, value) val set_blocking : xt:'a Xt.t -> ('b, 'c) cache -> 'b -> 'c -> unit = ``` that, like `get_opt`, either moves or adds the node of the accessed association to the left end of the list. In case a new association is added, the space is decremented. If there was no space, an association is first removed, which will block in case capacity is 0. As described previously, the association to remove is determined by removing the rightmost element from the list. We can then test that the cache works as expected: ```ocaml # let a_cache : (int, string) cache = cache 2 val a_cache : (int, string) cache = {space = Kcas.Loc.Loc {Kcas.Loc.state = ; id = }; table = ; order = } # Xt.commit { tx = set_blocking a_cache 101 "basics" } - : unit = () # Xt.commit { tx = set_blocking a_cache 42 "answer" } - : unit = () # Xt.commit { tx = get_opt a_cache 101 } - : string option = Some "basics" # Xt.commit { tx = set_blocking a_cache 2023 "year" } - : unit = () # Xt.commit { tx = get_opt a_cache 42 } - : string option = None ``` As an exercise, implement an operation to `remove` associations from a cache and an operation to change the capacity of the cache. ## Designing lock-free algorithms with k-CAS The key benefit of k-CAS, or k-CAS-n-CMP, and transactions in particular, is that it allows developing lock-free algorithms compositionally. In the following sections we discuss a number of basic tips and approaches for making best use of k-CAS. ### Understand performance It is possible to convert imperative sequential data structures to lock-free data structures [almost](#beware-of-torn-reads) just by using [shared memory locations](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Loc/) and wrapping everything inside [transactions](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/), but doing so will likely not lead to good performance. On the other hand, if you have a non-blocking data structure implemented using plain `Atomic`s, then simply replacing `Atomic` with [`Loc`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Loc/) you should get a data structure that works the same and will take somewhat more memory and operates somewhat more slowly. However, adding transactional operations simply by wrapping all accesses of a non-blocking data structure implementation will likely not lead to well performing transactional operations. [Shared memory locations](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Loc/) take more memory than ordinary mutable fields or mutable references and mutating operations on shared memory locations allocate. The [transaction mechanism](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/) also allocates and adds lookup overhead to accesses. Updating multiple locations in a transaction is more expensive than updating individual locations atomically. Contention can cause transactions to retry and perform poorly. With that said, it is possible to create composable and reasonably well performing data structures using **Kcas**. If a **Kcas** based data structure is performing much worse than a similar lock-free or lock-based data structure, then there is likely room to improve. Doing so will require good understanding of and careful attention to algorithmic details, such as which accessed need to be performed transactionally and which do not, operation of the transaction mechanism, and performance of individual low level operations. ### Minimize accesses Accesses of shared memory locations inside transactions consult the transaction log. While the log is optimized, it still adds overhead. For best performance it can be advantageous to minimize the number of accesses. #### Prefer compound accesses For best performance it can be advantageous to use compound accesses such as [`update`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-update), [`exchange`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-exchange), and [`modify`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-modify) instead of [`get`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-get) and [`set`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-set), because the compound accesses only consult the transaction log once. Consider the following example that swaps the values of the shared memory locations `a` and `b`: ```ocaml # let tx ~xt = let a_val = Xt.get ~xt a and b_val = Xt.get ~xt b in Xt.set ~xt a b_val; Xt.set ~xt b a_val in Xt.commit { tx } - : unit = () ``` The above performs four accesses. Using [`exchange`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-exchange) we can reduce that to three: ```ocaml # let tx ~xt = let a_val = Xt.get ~xt a in let b_val = Xt.exchange ~xt b a_val in Xt.set ~xt a b_val in Xt.commit { tx } - : unit = () ``` The above will likely perform slightly better. > **_Question_**: _How does one count the number of accesses to the transaction > log?_ > > It is simple. Basically all of the access operations perform only a single > access to the log. For simplicity, the documentation is written as if > [`get`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-get) > and > [`set`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-set) > were primitive, but all operations are actually implemented in terms of a more > general operation similar to > [`update`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-update), > that only performs a single access to the transaction log. #### Log updates optimistically Transactional write accesses to shared memory locations are only attempted after the transaction log construction finishes successfully. Therefore it is entirely safe to optimistically log updates against shared memory locations, validate them during the log construction, and abort the transaction in case validation fails. Consider the following function to transfer an amount from specified source location to specified target location: ```ocaml # let transfer amount ~source ~target = let tx ~xt = if amount <= Xt.get ~xt source then begin Xt.set ~xt source (Xt.get ~xt source - amount); Xt.set ~xt target (Xt.get ~xt target + amount) end in Xt.commit { tx } val transfer : int -> source:int Loc.t -> target:int Loc.t -> unit = ``` The above first examine the source location and then updates both source and target. In a successful case it makes a total of five accesses. Using compound accesses and optimistic updates we can reduce that to just two accesses: ```ocaml # let transfer amount ~source ~target = let tx ~xt = if Xt.fetch_and_add ~xt source (-amount) < amount then raise Exit; Xt.fetch_and_add ~xt target amount |> ignore in try Xt.commit { tx } with Exit -> () val transfer : int -> source:int Loc.t -> target:int Loc.t -> unit = ``` Note that we raise the Stdlib `Exit` exception to abort the transaction. As we can see ```ocaml # Loc.get a, Loc.get b - : int * int = (10, 52) # transfer 100 ~source:a ~target:b - : unit = () # Loc.get a, Loc.get b - : int * int = (10, 52) # transfer 10 ~source:a ~target:b - : unit = () # Loc.get a, Loc.get b - : int * int = (0, 62) ``` the updates are only done in case of success. A problem with the `transfer` function above is that it is not a composable transaction. The transaction mechanism provided by **Kcas** does not implicitly perform rollbacks of changes made to locations, but it does offer low level support for nested conditional transactions. By explicitly calling [`snapshot`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-snapshot) and [`rollback`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-rollback) one can scope tentative changes and create a composable version of `transfer`: ```ocaml # let transfer ~xt amount ~source ~target = let snap = Xt.snapshot ~xt in if Xt.fetch_and_add ~xt source (-amount) < amount then Retry.later (Xt.rollback ~xt snap); Xt.fetch_and_add ~xt target amount |> ignore val transfer : xt:'a Xt.t -> int -> source:int Loc.t -> target:int Loc.t -> unit = ``` Given a bunch of locations ```ocaml let a = Loc.make 10 and b = Loc.make 20 and c = Loc.make 30 and d = Loc.make 27 ``` we can now attempt `transfer`s and perform the [`first`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-first) of them that succeeds: ```ocaml # Xt.commit { tx = Xt.first [ transfer 15 ~source:a ~target:d; transfer 15 ~source:b ~target:d; transfer 15 ~source:c ~target:d; ] } - : unit = () ``` A look at the locations ```ocaml # List.map Loc.get [a; b; c; d] - : int list = [10; 5; 30; 42] ``` confirms the expected result. ### Postcompute The more time a transaction takes, the more likely it is to suffer from interference or even starvation. For best performance it is important to keep transactions as short as possible. In particular, when possible, perform expensive computations after the transactions. Consider the following example of computing the size of a stack: ```ocaml # let a_stack = Loc.make [2; 3] val a_stack : int list Loc.t = Kcas.Loc.Loc {Kcas.Loc.state = ; id = } # let n_elems = let tx ~xt = Xt.get ~xt a_stack |> List.length in Xt.commit { tx } val n_elems : int = 2 ``` The problem is that the computation of the list length is potentially expensive and opens a potentially long time window for other domains to interfere. In this case we can trivially move the list length computation outside of the transaction: ```ocaml # let n_elems = Xt.commit { tx = Xt.get a_stack } |> List.length val n_elems : int = 2 ``` As a more general approach, one could e.g. use closures to move compute after transactions: ```ocaml # let n_elems = let tx ~xt = let xs = Xt.get ~xt a_stack in fun () -> List.length xs in Xt.commit { tx } () val n_elems : int = 2 ``` ### Post commit actions Closely related to moving compute outside of transactions, it is also sometimes possible or necessary to perform some side-effects or actions, such as non-transactional IO operations, only after a transaction has been committed successfully. These cases are supported via the ability to register [`post_commit`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-post_commit) actions. As a basic example, one might want to log a message when some transactional operation is performed. Instead of directly logging the message ```ocaml # let enqueue_and_log ~xt queue message = enqueue ~xt queue message; (* BAD: The printf could be executed many times! *) Printf.printf "sent %s" message val enqueue_and_log : xt:'a Xt.t -> string queue -> string -> unit = ``` one should use [`post_commit`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-post_commit) ```ocaml # let enqueue_and_log ~xt queue message = enqueue ~xt queue message; Xt.post_commit ~xt @@ fun () -> Printf.printf "sent %s" message val enqueue_and_log : xt:'a Xt.t -> string queue -> string -> unit = ``` to make sure that the message is only printed once after the transaction has actually completed successfully. #### A composable Michael-Scott style queue One of the most famous lock-free algorithms is [the Michael-Scott queue](https://www.cs.rochester.edu/~scott/papers/1996_PODC_queues.pdf). Perhaps its characteristic feature is that the tail pointer of the queue is allowed to momentarily fall behind and that operations on the queue perform cooperative CASes to update the tail. The tail pointer can be seen as an optimization — whether it points to the true tail or not does not change the logical state of the queue. Let's implement a composable queue that allows the tail to momentarily lag behind. First we define a type for nodes: ```ocaml type 'a node = Nil | Node of 'a * 'a node Loc.t ``` A queue is then a pair of pointers to the head and tail of a queue: ```ocaml type 'a queue = { head : 'a node Loc.t Loc.t; tail : 'a node Loc.t Atomic.t; } ``` Note that we used an `Atomic.t` for the tail. We do not need to operate on the tail transactionally. To create a queue we allocate a shared memory location for the pointer to the first node to be enqueued and make both the head and tail point to the location: ```ocaml # let queue () = let next = Loc.make Nil in { head = Loc.make next; tail = Atomic.make next } val queue : unit -> 'a queue = ``` To dequeue a node, only the head of the queue is examined. If the location pointed to by the head points to a node we update the head to point to the location pointing to the next node: ```ocaml # let try_dequeue ~xt { head; _ } = let old_head = Xt.get ~xt head in match Xt.get ~xt old_head with | Nil -> None | Node (value, next) -> Xt.set ~xt head next; Some value val try_dequeue : xt:'a Xt.t -> 'b queue -> 'b option = ``` To enqueue a value into the queue, only the tail of the queue needs to be examined. We allocate a new location for the new tail and a node. We then need to find the true tail of the queue and update it to point to the new node. The reason we need to find the true tail is that we explicitly allow the tail to momentarily fall behind. We then add a post commit action to the transaction to update the tail after the transaction has been successfully committed: ```ocaml # let enqueue ~xt { tail; _ } value = let new_tail = Loc.make Nil in let new_node = Node (value, new_tail) in let rec find_and_set_tail old_tail = match Xt.compare_and_swap ~xt old_tail Nil new_node with | Nil -> () | Node (_, old_tail) -> find_and_set_tail old_tail in find_and_set_tail (Atomic.get tail); let rec fix_tail () = let old_tail = Atomic.get tail in if Loc.get new_tail == Nil && not (Atomic.compare_and_set tail old_tail new_tail) then fix_tail () in Xt.post_commit ~xt fix_tail val enqueue : xt:'a Xt.t -> 'b queue -> 'b -> unit = ``` The post commit action, registered using [`post_commit`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-post_commit), checks that the tail is still the true tail and then attempts to update the tail. The order of accesses is very subtle as always with non-transactional atomic operations. Can you see why it works? Although we allow the tail to momentarily fall behind, it is important that we do not let the tail fall behind indefinitely, because then we would risk leaking memory — nodes that have been dequeued from the queue would still be pointed to by the tail. Using the Michael-Scott style queue is as easy as any other transactional queue: ```ocaml # let a_queue : int queue = queue () val a_queue : int queue = {head = Kcas.Loc.Loc {Kcas.Loc.state = ; id = }; tail = } # Xt.commit { tx = enqueue a_queue 19 } - : unit = () # Xt.commit { tx = try_dequeue a_queue } - : int option = Some 19 # Xt.commit { tx = try_dequeue a_queue } - : int option = None ``` The queue implementation in this section is an example of using **Kcas** to implement a fine-grained lock-free algorithm. Instead of recording all shared memory accesses and performing them atomically all at once, the implementation updates the tail outside of the transaction. This can potentially improve performance and scalability. This sort of algorithm design requires careful reasoning. Consider the dequeue operation. Instead of recording the `Xt.get ~xt old_head` operation in the transaction log, one could propose to bypass the log as `Loc.get old_head`. That may seem like a valid optimization, because logging the update of the head in the transaction is sufficient to ensure that each transaction dequeues a unique node. Unfortunately that would change the semantics of the operation. Suppose, for example, that you have two queues, _A_ and _B_, and you must maintain the invariant that at least one of the queues is empty. One domain tries to dequeue from _A_ and, if _A_ was empty, enqueue to _B_. Another domain does the opposite, dequeue from _B_ and enqueue to _A_ (when _B_ was empty). When such operations are performed in isolation, the invariant would be maintained. However, if the access of `old_head` is not recorded in the log, it is possible to end up with both _A_ and _B_ non-empty. This kind of [race condition](https://en.wikipedia.org/wiki/Race_condition) is common enough that it has been given a name: _write skew_. As an exercise, write out the sequence of atomic accesses that leads to that result. ### Race to cooperate Sometimes it is necessary to perform slower transactions that access many shared memory locations or need to perform expensive computations during the transaction. As mentioned previously, such transactions are more likely to suffer from interference or even starvation as other transactions race to make conflicting mutations to shared memory locations. To avoid such problems, it is often possible to split the transaction into two: 1. A quick transaction that adversarially races against others. 2. A slower transaction that others will then cooperate to complete. This lock-free algorithm design technique and the examples in the following subsections are more advanced than the basic techniques described previously. To understand and reason about these examples it is necessary to have a good understanding of how transactions work. #### Understanding transactions We have previously casually talked about "transactions". Let's sharpen our understanding of transactions. In **Kcas**, a _transaction_ is essentially a function that can be called to prepare a specification of an operation or operations, in the form of a _transaction log_, that can then be _attempted to be performed atomically_ by the underlying k-CAS-n-CMP algorithm provided by **Kcas**. In other words, and simplifying a bit, when an explicit attempt is made to perform a transaction, it basically proceeds in phases: 1. The first phase records a log of operations to access shared memory locations. 2. The second phase attempts to perform the operations atomically. Either of the phases may fail. The first phase, which is under the control of the transaction function, may raise an exception to abort the attempt. The second phase fails when the accesses recorded in the transaction log are found to be inconsistent with the contents of the shared memory locations. That happens when the shared memory locations are mutated outside of the accesses specified in the transaction log regardless of who made those mutations. A transaction is not itself atomic and the construction of a transaction log, by recording accesses of shared memory locations to the log, does not logically mutate any shared memory locations. When a transaction is (unconditionally) _committed_, rather than merely _attempted_ (once), the commit mechanism keeps on retrying until an attempt succeeds or the transaction function raises an exception (other than [`Later`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Retry/index.html#exception-Later) or [`Interference`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Mode/index.html#exception-Interference)) that the commit mechanism does not handle. Each attempt or retry calls the transaction function again. This means that any _side-effects_ within the transaction function are also performed again. In previous sections we have used transactions as a coarse-grained mechanism to encompass all shared memory accesses of the algorithm being implemented. This makes it easy to reason about the effects of committing a transaction as the accesses are then all performed as a single atomic operation. In the following examples we will use our deeper understanding of transactions to implement more fine-grained algorithms. #### A three-stack lock-free queue Recall the [two-stack queue](#a-transactional-lock-free-queue) discussed earlier. The problem is that the `try_dequeue` operation `rev`erses the `back` of the queue and that can be relatively expensive. One way to avoid that problem is to introduce a third "middle" stack, or shared memory location, to the queue and quickly move the back to the middle stack. First we redefine the `queue` type to include a `middle` location: ```ocaml type 'a queue = { back : 'a list Loc.t; middle : 'a list Loc.t; front : 'a list Loc.t; } ``` And adjust the `queue` constructor function accordingly: ```ocaml # let queue () = let back = Loc.make [] and middle = Loc.make [] and front = Loc.make [] in { back; middle; front } val queue : unit -> 'a queue = ``` The `enqueue` operation remains essentially the same: ```ocaml # let enqueue ~xt queue elem = Xt.modify ~xt queue.back @@ List.cons elem val enqueue : xt:'a Xt.t -> 'b queue -> 'b -> unit = ``` For the quick transaction we introduce a helper function: ```ocaml # let back_to_middle queue = let tx ~xt = match Xt.exchange ~xt queue.back [] with | [] -> raise Exit | xs -> if Xt.exchange ~xt queue.middle xs != [] then raise Exit in try Xt.commit { tx } with Exit -> () val back_to_middle : 'a queue -> unit = ``` Note that the above uses [`exchange`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-exchange) to optimistically record shared memory accesses and then uses the `Exit` exception to abort the transaction in case the optimistic accesses turn out to be unnecessary or incorrect. The `dequeue` operation then runs the quick transaction to move elements from the `back` to the `middle` before examining the `middle`: ```ocaml # let dequeue ~xt queue = match Xt.update ~xt queue.front tl_safe with | x :: _ -> Some x | [] -> if not (Xt.is_in_log ~xt queue.middle || Xt.is_in_log ~xt queue.back) then back_to_middle queue; match Xt.exchange ~xt queue.middle [] |> List.rev with | x :: xs -> Xt.set ~xt queue.front xs; Some x | [] -> match Xt.exchange ~xt queue.back [] |> List.rev with | x :: xs -> Xt.set ~xt queue.front xs; Some x | [] -> None val dequeue : xt:'a Xt.t -> 'b queue -> 'b option = ``` There are a number of subtle implementation details above that deserve attention. First of all, notice that `dequeue` only calls `back_to_middle queue` after making sure that `queue.middle` and `queue.back` have not already been accessed using [`is_in_log`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-is_in_log). If the call `back_to_middle queue` would be made after accessing `queue.middle` or `queue.back`, then those accesses would be recorded in the transaction log `xt` and the log would be inconsistent after `back_to_middle queue` mutates the locations. This would cause the transaction attempt to fail and we want to avoid such doomed attempts. Another subtle, but important, detail is that despite calling `back_to_middle queue` to move `queue.back` to `queue.middle`, it would be incorrect to assume that `queue.back` would be empty or that `queue.middle` would be non-empty. That is because we must assume other domains may be performing operations on the queue simultaneously. Another domain may have pushed new elements to the `queue.back` or emptied `queue.middle`. Therefore we meticulously examine both `queue.middle` and `queue.back`, if necessary. If we don't do that, then it is possible that we incorrectly report the queue as being empty. Also, as should be clear, the side-effect performed by calling `back_to_middle queue` is committed immediately every time it is called regardless of the outcome of the transaction attempt. This is safe, because `back_to_middle queue` does not logically change the state of the queue. It merely performs a helping step, that is invisible to outside observers, towards advancing the internal state of the queue. This is a common pattern in lock-free algorithms. As subtle as these kinds of lock-free algorithms are, this approach avoids the potential starvation problems as now consumers do not attempt a slow transaction to race against producers. Rather, the consumers perform quick adversarial races against producers and then cooperatively race to complete the slow transaction. > The three-stack queue presented in this section seems to perform reasonably > well, should not suffer from most concurrency problems, and can be used > compositionally. #### A rehashable lock-free hash table The previous example of adding a `middle` stack to the queue may seem like a special case. Let's implement a simple lock-free hash table and, along the way, examine a simple general way to replace a slow transaction with a quick adversarial transaction and a slow cooperative transaction. The difficulty with hash tables is rehashing. Let's ignore that for now and implement a hash table without rehashing. For further simplicity, let's just use separate chaining. Here is a type for such a basic hash table: ```ocaml type ('k, 'v) basic_hashtbl = { size: int Loc.t; data: ('k * 'v Loc.t) list Loc.t array Loc.t; } ``` The basic hash table constructor just allocates all the locations: ```ocaml # let basic_hashtbl () = { size = Loc.make 0; data = Loc.make (Loc.make_array 4 []); } val basic_hashtbl : unit -> ('a, 'b) basic_hashtbl = ``` Note that we (intentionally) used a very small capacity for the `data` table. In a real implementation you'd probably want to have a bigger minimum capacity (and might e.g. want to use a prime number). Before tackling the basic operations, let's implement a helper function for accessing the association list location corresponding to specified key: ```ocaml # let access ~xt basic_hashtbl key = let data = Xt.get ~xt basic_hashtbl.data in let n = Array.length data in let i = Stdlib.Hashtbl.hash key mod n in data.(i) val access : xt:'a Xt.t -> ('b, 'c) basic_hashtbl -> 'd -> ('b * 'c Loc.t) list Loc.t = ``` Now, to find an element, we access the association list and try to find the key-value -pair: ```ocaml # let find ~xt hashtbl key = let assoc_loc = access ~xt hashtbl key in Xt.get ~xt (List.assoc key (Xt.get ~xt assoc_loc)) val find : xt:'a Xt.t -> ('b, 'c) basic_hashtbl -> 'b -> 'c = ``` When replacing (or adding) the value corresponding to a key, we need to take care to update the size when necessary: ```ocaml # let replace ~xt hashtbl key value = let assoc_loc = access ~xt hashtbl key in let assoc = Xt.get ~xt assoc_loc in try let value_loc = List.assoc key assoc in Xt.set ~xt value_loc value with Not_found -> Xt.set ~xt assoc_loc ((key, Loc.make value) :: assoc); Xt.incr ~xt hashtbl.size val replace : xt:'a Xt.t -> ('b, 'c) basic_hashtbl -> 'b -> 'c -> unit = ``` Removing an association also involves making sure that the size is updated correctly: ```ocaml # let remove ~xt hashtbl key = let assoc_loc = access ~xt hashtbl key in let rec loop ys = function | ((key', _) as y) :: xs -> if key <> key' then loop (y :: ys) xs else begin Xt.set ~xt assoc_loc (List.rev_append ys xs); Xt.decr ~xt hashtbl.size end | [] -> () in loop [] (Xt.get ~xt assoc_loc) val remove : xt:'a Xt.t -> ('b, 'c) basic_hashtbl -> 'b -> unit = ``` Now, the problem with the above is the lack of rehashing. As more associations are added, performance deteriorates. We could implement a naive rehashing operation: ```ocaml # let rehash ~xt hashtbl new_capacity = let new_data = Loc.make_array new_capacity [] in Xt.exchange ~xt hashtbl.data new_data |> Array.iter @@ fun assoc_loc -> Xt.get ~xt assoc_loc |> List.iter @@ fun ((key, _) as bucket) -> let i = Stdlib.Hashtbl.hash key mod new_capacity in Xt.modify ~xt new_data.(i) (List.cons bucket) val rehash : xt:'a Xt.t -> ('b, 'c) basic_hashtbl -> int -> unit = ``` But that involves reading all the bucket locations. Any mutation that adds or removes an association would cause such a rehash to fail. To avoid taking on such adversarial races, we can use a level of indirection: ```ocaml type ('k, 'v) hashtbl = { pending: [`Nothing | `Rehash of int] Loc.t; basic: ('k, 'v) basic_hashtbl; } ``` The idea is that a hash table is either considered to be normally accessible or in the middle of being rehashed. It is easy to use this approach even when there are many different slow operations. Finding an element does not require mutating any locations, so we might just as well allow those also during rehashes: ```ocaml # let find ~xt hashtbl key = find ~xt hashtbl.basic key val find : xt:'a Xt.t -> ('b, 'c) hashtbl -> 'b -> 'c = ``` Then we use a similar trick as with the three-stack queue. We use a quick adversarial transaction to switch a hash table to the rehashing state in case a rehash seems necessary: ```ocaml # let prepare_rehash ~xt hashtbl delta = let tx ~xt = match Xt.get ~xt hashtbl.pending with | `Rehash _ -> () | `Nothing -> let size = Int.max 1 (Xt.get ~xt hashtbl.basic.size + delta) and capacity = Array.length (Xt.get ~xt hashtbl.basic.data) in if capacity < size * 4 then Xt.set ~xt hashtbl.pending (`Rehash (capacity * 2)) else if size * 8 < capacity then Xt.set ~xt hashtbl.pending (`Rehash (capacity / 2)) else raise Exit in try if Xt.is_in_log ~xt hashtbl.pending then tx ~xt else Xt.commit { tx } with Exit -> () val prepare_rehash : xt:'a Xt.t -> ('b, 'c) hashtbl -> int -> unit = ``` Note again that while the rehash logic allows some slack in the capacity, a real implementation would likely use a bigger minimum capacity and perhaps avoid using powers of two. Also, if we have already modified the hash table, which we know by using [`is_in_log`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-is_in_log) to check whether the `pending` location has been accessed, we must continue within the same transaction. Before we mutate a hash table, we will then call a helper to check whether we need to rehash: ```ocaml # let maybe_rehash ~xt hashtbl delta = prepare_rehash ~xt hashtbl delta; match Xt.get ~xt hashtbl.pending with | `Nothing -> () | `Rehash new_capacity -> Xt.set ~xt hashtbl.pending `Nothing; rehash ~xt hashtbl.basic new_capacity val maybe_rehash : xt:'a Xt.t -> ('b, 'c) hashtbl -> int -> unit = ``` Similarly to the previous example of [a three-stack queue](#a-three-stack-lock-free-queue), a subtle, but important detail is that the call to `prepare_rehash` is made before accessing `hashtbl.pending`. This way the transaction log is not poisoned and there is chance for the operation to succeed on the first attempt. After switching to the rehashing state, all mutators will then cooperatively race to perform the rehash. We can now just implement the replace ```ocaml # let replace ~xt hashtbl key value = maybe_rehash ~xt hashtbl (+1); replace ~xt hashtbl.basic key value val replace : xt:'a Xt.t -> ('b, 'c) hashtbl -> 'b -> 'c -> unit = ``` and remove ```ocaml # let remove ~xt hashtbl key = maybe_rehash ~xt hashtbl (-1); remove ~xt hashtbl.basic key val remove : xt:'a Xt.t -> ('b, 'c) hashtbl -> 'b -> unit = ``` operations with rehashing. After creating a constructor function ```ocaml # let hashtbl () = { pending = Loc.make `Nothing; basic = basic_hashtbl (); } val hashtbl : unit -> ('a, 'b) hashtbl = ``` for hash tables, we are ready to take it out for a spin: ```ocaml # let a_hashtbl : (string, int) hashtbl = hashtbl () val a_hashtbl : (string, int) hashtbl = {pending = Kcas.Loc.Loc {Kcas.Loc.state = ; id = }; basic = {size = Kcas.Loc.Loc {Kcas.Loc.state = ; id = }; data = Kcas.Loc.Loc {Kcas.Loc.state = ; id = }}} # let assoc = [ ("Intro", 101); ("Answer", 42); ("OCaml", 5); ("Year", 2023) ] val assoc : (string * int) list = [("Intro", 101); ("Answer", 42); ("OCaml", 5); ("Year", 2023)] # assoc |> List.iter @@ fun (key, value) -> Xt.commit { tx = replace a_hashtbl key value } - : unit = () # assoc |> List.iter @@ fun (key, _) -> Xt.commit { tx = remove a_hashtbl key } - : unit = () ``` What we have here is a lock-free hash table with rehashing that should not be highly prone to starvation. In other respects this is a fairly naive hash table implementation. You might want to think about various ways to improve upon it. ### Avoid false sharing [False sharing](https://en.wikipedia.org/wiki/False_sharing) is a form of contention that arises when some location, that is being written to by at least a single core, happens to be in memory next to — within the same cache line aligned region of memory — another location that is accessed, read or written, by other cores. Perhaps contrary to how it is often described, false sharing doesn't require the use of atomic variables or atomic instructions. Consider the following example: ```ocaml # type state = { mutable counter : int; mutable finished: bool; } type state = { mutable counter : int; mutable finished : bool; } # let state = { counter = 1_000; finished = false } val state : state = {counter = 1000; finished = false} # let reader = Domain.spawn @@ fun () -> while not state.finished do Domain.cpu_relax () done val reader : unit Domain.t = # while 0 < state.counter do state.counter <- state.counter - 1 done; - : unit = () # state.finished <- true; - : unit = () # Domain.join reader - : unit = () ``` The `state` is a record with two fields, `counter` and `finished`, next to each other, which makes it rather likely for them to happen to reside in the same cache line aligned region of memory. The main domain repeatedly mutates the `counter` field and the other domain repeatedly reads the `finished` field. What this means in practice is that the reads of the `finished` field by the other domain will be very expensive, because the cache is repeatedly invalidated by the `counter` updates done by the main domain. The above example is contrived, of course, but this sort of false sharing can happen very easily. Cache lines are typically relatively large — 8, 16, or even 32 words wide. Typically many, if not most, heap allocated objects in OCaml are smaller than a cache line, which means that false sharing may easily happen even between seemingly unrelated objects. To completely avoid false sharing one would basically need to make sure that mutable locations (atomic or otherwise) are not allocated next to locations that might be accessed from other domains. Unfortunately, that is difficult to achieve without being expensive in itself as it tends to increase memory usage and the amount of initializing stores. The [`Loc.make`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Loc/index.html#val-make) function takes an optional `padded` argument, which can be explicitly specified as `~padded:true` to request the location to be allocated in a way to avoid false sharing. Using `~padded:true` on long lived shared memory locations that are being repeatedly modified can improve performance significantly. Using `~padded:true` on short lived shared memory locations is not recommended. Using `~padded:true` does not eliminate all false sharing, however. Consider the following sketch of a queue data structure: ```ocaml type 'a queue = { head: 'a list Loc.t; tail: 'a list Loc.t; } ``` Even if you allocate the locations with padding ```ocaml # let queue () = { head = Loc.make ~padded:true []; tail = Loc.make ~padded:true []; } val queue : unit -> 'a queue = ``` the queue record will still be vulnerable to the same kind of false sharing as in the earlier example: ```ocaml # let a_queue : int queue = queue () val a_queue : int queue = {head = Kcas.Loc.Loc {Kcas.Loc.state = ; id = }; tail = Kcas.Loc.Loc {Kcas.Loc.state = ; id = }} # let counter = ref 1_000 val counter : int ref = {contents = 1000} ``` Above the reference cell for the `counter` might exhibit false sharing with the queue record (which is read-only) and significantly degrade the performance of the queue for passing messages between domains. To avoid the above kind of problems, a strategic approach is to also allocate the queue record in a way to avoid false sharing. Unfortunately OCaml does not currently provide a standard way to do so. The [multicore-magic](https://github.com/ocaml-multicore/multicore-magic) library provides a function [`copy_as_padded`](https://ocaml-multicore.github.io/multicore-magic/doc/multicore-magic/Multicore_magic/index.html#val-copy_as_padded) for the purpose. Using [`copy_as_padded`](https://ocaml-multicore.github.io/multicore-magic/doc/multicore-magic/Multicore_magic/index.html#val-copy_as_padded) one would write ```ocaml # let queue () = Multicore_magic.copy_as_padded { head = Loc.make ~padded:true []; tail = Loc.make ~padded:true []; } val queue : unit -> 'a queue = ``` to allocate the queue record in a way to avoid false sharing. Note that allocating long lived data structures, like queues, used for inter domain communication in the way as described above to avoid false sharing does not eliminate all false sharing, but it is likely to reduce false sharing significantly with relatively low effort. ### Beware of torn reads The algorithm underlying **Kcas** ensures that it is not possible to read uncommitted changes to shared memory locations and that an operation can only complete successfully if all of the accesses taken together were atomic. These are very strong guarantees and make it much easier to implement correct concurrent algorithms. Unfortunately, the transaction mechanism that **Kcas** provides does not prevent one specific concurrency anomaly. When reading multiple locations, it is possible for a transaction to observe different locations at different times even though it is not possible for the transaction to commit successfully unless all the accesses together were atomic. Let's examine this phenomena. To see the anomaly, we need to have two or more locations. Let's just create two locations `a` and `b`: ```ocaml # let a = Loc.make 0 and b = Loc.make 0 val a : int Loc.t = Kcas.Loc.Loc {Kcas.Loc.state = ; id = } val b : int Loc.t = Kcas.Loc.Loc {Kcas.Loc.state = ; id = } ``` And create a helper that spawns a domain that repeatedly increments `a` and decrements `b` in a transaction: ```ocaml # let with_updater fn = let stop = ref false in let domain = Domain.spawn @@ fun () -> while not !stop do let tx ~xt = Xt.incr ~xt a; Xt.decr ~xt b in Xt.commit { tx } done in let finally () = stop := true; Domain.join domain in Fun.protect ~finally fn val with_updater : (unit -> 'a) -> 'a = ``` The sum of the values of `a` and `b` must always be zero. We can verify this using a transaction: ```ocaml # with_updater @@ fun () -> for _ = 1 to 1_000 do let tx ~xt = 0 = Xt.get ~xt a + Xt.get ~xt b in if not (Xt.commit { tx }) then failwith "read skew" done; "no read skew" - : string = "no read skew" ``` Nice! So, it appears everything works as expected. A transaction can only commit after having read a consistent, atomic, snapshot of all the shared memory locations. Unfortunately within a transaction attempt things are not as simple. Let's do an experiment where we abort the transaction in case we observe that the values of `a` and `b` are inconsistent: ```ml # with_updater @@ fun () -> for _ = 1 to 1_000 do let tx ~xt = if 0 <> Xt.get ~xt a + Xt.get ~xt b then failwith "read skew" in Xt.commit { tx } done; "no read skew" Exception: Failure "read skew". ``` Oops! So, within a transaction we may actually observe different locations having values from different committed transactions. This is something that needs to be kept in mind when writing transactions. To mitigate issues due to read skew and to also avoid problems with long running transactions, the **Kcas** transaction mechanism automatically validates the transaction log periodically when an access is made to the transaction log. Therefore an important guideline for writing transactions is that loops inside a transaction should always include an access of some shared memory location through the transaction log or should otherwise be guaranteed to be bounded. In addition to the automatic periodic validation, one can also explicitly [`validate`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html#val-validate), _after_ reading some locations, that the locations have not been modified outside of the transaction: ```ocaml # with_updater @@ fun () -> for _ = 1 to 1_000 do let tx ~xt = let a' = Xt.get ~xt a in let b' = Xt.get ~xt b in Xt.validate ~xt a; if 0 <> a' + b' then failwith "read skew" in Xt.commit { tx } done; "no read skew" - : string = "no read skew" ``` Notice that above we only validated the access of `a`, because we know that `a` and `b` are always updated atomically and we read `b` after reading `a`. In this case that is enough to ensure that read skew is not possible. ## Additional resources - [Kcas: Building a Lock-Free STM for OCaml (1/2)](https://tarides.com/blog/2023-08-07-kcas-building-a-lock-free-stm-for-ocaml-1-2/) [and (2/2)](https://tarides.com/blog/2023-08-10-kcas-building-a-lock-free-stm-for-ocaml-2-2/) - [Building a lock-free STM for OCaml](https://icfp23.sigplan.org/details/ocaml-2023-papers/6/Building-a-lock-free-STM-for-OCaml), see [video](https://www.youtube.com/watch?v=Mt8wPCHU1ZU) and [slides](https://polytypic.github.io/kcas-talk/). kcas-0.7.0/bench.Dockerfile000066400000000000000000000005421456672623200155250ustar00rootroot00000000000000FROM ocaml/opam:debian-ocaml-5.1 RUN sudo ln -sf /usr/bin/opam-2.1 /usr/bin/opam WORKDIR bench-dir COPY *.opam ./ RUN opam remote add origin https://github.com/ocaml/opam-repository.git && \ opam update RUN opam pin -yn --with-version=dev . RUN opam install -y --deps-only --with-test . COPY . ./ RUN opam exec -- dune build --release bench/main.exe kcas-0.7.0/bench/000077500000000000000000000000001456672623200135335ustar00rootroot00000000000000kcas-0.7.0/bench/bench_accumulator.ml000066400000000000000000000017621456672623200175510ustar00rootroot00000000000000open Kcas_data open Multicore_bench let run_one ~budgetf ~n_domains ?(n_ops = 180 * Util.iter_factor) () = let n_ops = n_ops * n_domains in let t = Accumulator.make 0 in let n_ops_todo = Atomic.make n_ops |> Multicore_magic.copy_as_padded in let init _ = () in let work _ () = let rec work () = let n = Util.alloc n_ops_todo in if n <> 0 then let rec loop n = if 0 < n then begin Accumulator.incr t; Accumulator.decr t; loop (n - 2) end else work () in loop n in work () in let after () = Atomic.set n_ops_todo n_ops in let config = Printf.sprintf "%d worker%s, 0%% reads" n_domains (if n_domains = 1 then "" else "s") in Times.record ~budgetf ~n_domains ~init ~work ~after () |> Times.to_thruput_metrics ~n:n_ops ~config ~singular:"operation" let run_suite ~budgetf = [ 1; 2; 4 ] |> List.concat_map @@ fun n_domains -> run_one ~n_domains ~budgetf () kcas-0.7.0/bench/bench_atomic.ml000066400000000000000000000031651456672623200165050ustar00rootroot00000000000000open Multicore_bench module Atomic = struct include Stdlib.Atomic let rec modify ?(backoff = Backoff.default) x f = let before = Atomic.get x in let after = f before in if not (Atomic.compare_and_set x before after) then modify ~backoff:(Backoff.once backoff) x f end type t = | Op : string * int * 'a * ('a Atomic.t -> unit) * ('a Atomic.t -> unit) -> t let run_one ~budgetf ?(n_iter = 500 * Util.iter_factor) (Op (name, extra, value, op1, op2)) = let n_iter = n_iter * extra in let loc = Atomic.make value in let init _ = () in let work _ () = let rec loop i = if i > 0 then begin op1 loc; op2 loc; loop (i - 2) end in loop n_iter in Times.record ~budgetf ~n_domains:1 ~init ~work () |> Times.to_thruput_metrics ~n:n_iter ~singular:"op" ~config:name let run_suite ~budgetf = [ (let get x = Atomic.get x |> ignore in Op ("get", 10, 42, get, get)); (let incr x = Atomic.incr x in Op ("incr", 1, 0, incr, incr)); (let push x = Atomic.modify x (fun xs -> 101 :: xs) and pop x = Atomic.modify x (function [] -> [] | _ :: xs -> xs) in Op ("push & pop", 2, [], push, pop)); (let cas01 x = Atomic.compare_and_set x 0 1 |> ignore and cas10 x = Atomic.compare_and_set x 1 0 |> ignore in Op ("cas int", 1, 0, cas01, cas10)); (let xchg1 x = Atomic.exchange x 1 |> ignore and xchg0 x = Atomic.exchange x 0 |> ignore in Op ("xchg int", 1, 0, xchg1, xchg0)); (let swap x = Atomic.modify x (fun (x, y) -> (y, x)) in Op ("swap", 2, (4, 2), swap, swap)); ] |> List.concat_map @@ run_one ~budgetf kcas-0.7.0/bench/bench_dllist.ml000066400000000000000000000041711456672623200165220ustar00rootroot00000000000000open Kcas_data open Multicore_bench let run_single ~budgetf ?(n_msgs = 15 * Util.iter_factor) () = let t = Dllist.create () in let op push = if push then Dllist.add_l 101 t |> ignore else Dllist.take_opt_r t |> ignore in let init _ = assert (Dllist.is_empty t); Util.generate_push_and_pop_sequence n_msgs in let work _ bits = Util.Bits.iter op bits in Times.record ~budgetf ~n_domains:1 ~init ~work () |> Times.to_thruput_metrics ~n:n_msgs ~singular:"message" ~config:"one domain" let run_one ~budgetf ?(n_adders = 2) ?(n_takers = 2) ?(factor = 1) ?(n_msgs = 20 * factor * Util.iter_factor) () = let n_domains = n_adders + n_takers in let t = Dllist.create () in let n_msgs_to_take = Atomic.make 0 |> Multicore_magic.copy_as_padded in let n_msgs_to_add = Atomic.make 0 |> Multicore_magic.copy_as_padded in let init _ = assert (Dllist.is_empty t); Atomic.set n_msgs_to_take n_msgs; Atomic.set n_msgs_to_add n_msgs in let work i () = if i < n_adders then let rec work () = let n = Util.alloc n_msgs_to_add in if 0 < n then begin for i = 1 to n do Dllist.add_r i t |> ignore done; work () end in work () else let rec work () = let n = Util.alloc n_msgs_to_take in if n <> 0 then begin for _ = 1 to n do while Option.is_none (Dllist.take_opt_l t) do Domain.cpu_relax () done done; work () end in work () in let config = let format role blocking n = Printf.sprintf "%d %s%s%s" n (if blocking then "" else "nb ") role (if n = 1 then "" else "s") in Printf.sprintf "%s, %s" (format "adder" false n_adders) (format "taker" false n_takers) in Times.record ~budgetf ~n_domains ~init ~work () |> Times.to_thruput_metrics ~n:n_msgs ~singular:"message" ~config let run_suite ~budgetf = run_single ~budgetf () @ (Util.cross [ 1; 2 ] [ 1; 2 ] |> List.concat_map @@ fun (n_adders, n_takers) -> run_one ~budgetf ~n_adders ~n_takers ()) kcas-0.7.0/bench/bench_hashtbl.ml000066400000000000000000000031531456672623200166530ustar00rootroot00000000000000open Kcas_data open Multicore_bench module Int = struct include Int let hash = Fun.id end let run_one ~budgetf ~n_domains ?(n_ops = 40 * Util.iter_factor) ?(n_keys = 1000) ~percent_read () = let t = Hashtbl.create ~hashed_type:(module Int) () in let n_ops = (100 + percent_read) * n_ops / 100 in let n_ops = n_ops * n_domains in for i = 0 to n_keys - 1 do Hashtbl.replace t i i done; let n_ops_todo = Atomic.make 0 |> Multicore_magic.copy_as_padded in let init _ = Atomic.set n_ops_todo n_ops; Random.State.make_self_init () in let work _ state = let rec work () = let n = Util.alloc n_ops_todo in if n <> 0 then let rec loop n = if 0 < n then let value = Random.State.bits state in let op = (value asr 20) mod 100 in let key = value mod n_keys in if op < percent_read then begin Hashtbl.find_opt t key |> ignore; loop (n - 1) end else begin Hashtbl.remove t key; Hashtbl.add t key value; loop (n - 2) end else work () in loop n in work () in let config = Printf.sprintf "%d worker%s, %d%% reads" n_domains (if n_domains = 1 then "" else "s") percent_read in Times.record ~budgetf ~n_domains ~init ~work () |> Times.to_thruput_metrics ~n:n_ops ~singular:"operation" ~config let run_suite ~budgetf = Util.cross [ 90; 50; 10 ] [ 1; 2; 4 ] |> List.concat_map @@ fun (percent_read, n_domains) -> run_one ~budgetf ~n_domains ~percent_read () kcas-0.7.0/bench/bench_loc.ml000066400000000000000000000025071456672623200160050ustar00rootroot00000000000000open Kcas open Multicore_bench type t = Op : string * int * 'a * ('a Loc.t -> unit) * ('a Loc.t -> unit) -> t let run_one ~budgetf ?(n_iter = 250 * Util.iter_factor) (Op (name, extra, value, op1, op2)) = let n_iter = n_iter * extra in let loc = Loc.make value in let init _ = () in let work _ () = let rec loop i = if i > 0 then begin op1 loc; op2 loc; loop (i - 2) end in loop n_iter in Times.record ~budgetf ~n_domains:1 ~init ~work () |> Times.to_thruput_metrics ~n:n_iter ~singular:"op" ~config:name let run_suite ~budgetf = [ (let get x = Loc.get x |> ignore in Op ("get", 5, 42, get, get)); (let incr x = Loc.incr x in Op ("incr", 1, 0, incr, incr)); (let push x = Loc.modify x (fun xs -> 101 :: xs) and pop x = Loc.modify x (function [] -> [] | _ :: xs -> xs) in Op ("push & pop", 1, [], push, pop)); (let cas01 x = Loc.compare_and_set x 0 1 |> ignore and cas10 x = Loc.compare_and_set x 1 0 |> ignore in Op ("cas int", 2, 0, cas01, cas10)); (let xchg1 x = Loc.exchange x 1 |> ignore and xchg0 x = Loc.exchange x 0 |> ignore in Op ("xchg int", 2, 0, xchg1, xchg0)); (let swap x = Loc.modify x (fun (x, y) -> (y, x)) in Op ("swap", 1, (4, 2), swap, swap)); ] |> List.concat_map @@ run_one ~budgetf kcas-0.7.0/bench/bench_mvar.ml000066400000000000000000000046231456672623200161760ustar00rootroot00000000000000open Kcas_data open Multicore_bench let run_one ~budgetf ?(n_adders = 2) ?(blocking_add = false) ?(n_takers = 2) ?(blocking_take = false) ?(n_msgs = 2 * Util.iter_factor) () = let n_domains = n_adders + n_takers in let t = Mvar.create None in let n_msgs_to_take = Atomic.make n_msgs |> Multicore_magic.copy_as_padded in let n_msgs_to_add = Atomic.make n_msgs |> Multicore_magic.copy_as_padded in let init _ = () in let work i () = if i < n_adders then if blocking_add then let rec work () = let n = Util.alloc n_msgs_to_add in if 0 < n then begin for i = 1 to n do Mvar.put t i done; work () end in work () else let rec work () = let n = Util.alloc n_msgs_to_add in if 0 < n then begin for i = 1 to n do while not (Mvar.try_put t i) do Domain.cpu_relax () done done; work () end in work () else if blocking_take then let rec work () = let n = Util.alloc n_msgs_to_take in if n <> 0 then begin for _ = 1 to n do ignore (Mvar.take t) done; work () end in work () else let rec work () = let n = Util.alloc n_msgs_to_take in if n <> 0 then begin for _ = 1 to n do while Option.is_none (Mvar.take_opt t) do Domain.cpu_relax () done done; work () end in work () in let after () = Atomic.set n_msgs_to_take n_msgs; Atomic.set n_msgs_to_add n_msgs in let config = let format role blocking n = Printf.sprintf "%d %s%s%s" n (if blocking then "" else "nb ") role (if n = 1 then "" else "s") in Printf.sprintf "%s, %s" (format "adder" blocking_add n_adders) (format "taker" blocking_take n_takers) in Times.record ~budgetf ~n_domains ~init ~work ~after () |> Times.to_thruput_metrics ~n:n_msgs ~singular:"message" ~config let run_suite ~budgetf = Util.cross (Util.cross [ 1; 2 ] [ false; true ]) (Util.cross [ 1; 2 ] [ false; true ]) |> List.concat_map @@ fun ((n_adders, blocking_add), (n_takers, blocking_take)) -> run_one ~budgetf ~n_adders ~blocking_add ~n_takers ~blocking_take () kcas-0.7.0/bench/bench_parallel_cmp.ml000066400000000000000000000023541456672623200176630ustar00rootroot00000000000000open Kcas open Multicore_bench let run_one ~budgetf ~n_domains ?(n_ops = 50 * Util.iter_factor) () = let n_ops = n_ops * n_domains in let a = Loc.make ~padded:true 10 in let b = Loc.make ~padded:true 52 in let xs = Loc.make_array ~padded:true n_domains 0 in let n_ops_todo = Atomic.make n_ops |> Multicore_magic.copy_as_padded in let init i = Array.unsafe_get xs i in let work _ x = let tx1 ~xt = let a = Xt.get ~xt a in let b = Xt.get ~xt b in Xt.set ~xt x (b - a) and tx2 ~xt = let a = Xt.get ~xt a in let b = Xt.get ~xt b in Xt.set ~xt x (a + b) in let rec work () = let n = Util.alloc n_ops_todo in if n <> 0 then begin for _ = 1 to n asr 1 do Xt.commit { tx = tx1 }; Xt.commit { tx = tx2 } done; work () end in work () in let after () = Atomic.set n_ops_todo n_ops in let config = Printf.sprintf "%d worker%s" n_domains (if n_domains = 1 then "" else "s") in Times.record ~budgetf ~n_domains ~init ~work ~after () |> Times.to_thruput_metrics ~n:n_ops ~singular:"transaction" ~config let run_suite ~budgetf = [ 1; 2; 4 ] |> List.concat_map @@ fun n_domains -> run_one ~budgetf ~n_domains () kcas-0.7.0/bench/bench_queue.ml000066400000000000000000000050251456672623200163520ustar00rootroot00000000000000open Kcas_data open Multicore_bench let run_one_domain ~budgetf ?(n_msgs = 50 * Util.iter_factor) () = let t = Queue.create () in let op push = if push then Queue.push 101 t else Queue.take_opt t |> ignore in let init _ = assert (Queue.is_empty t); Util.generate_push_and_pop_sequence n_msgs in let work _ bits = Util.Bits.iter op bits in Times.record ~budgetf ~n_domains:1 ~init ~work () |> Times.to_thruput_metrics ~n:n_msgs ~singular:"message" ~config:"one domain" let run_one ~budgetf ?(n_adders = 2) ?(blocking_add = false) ?(n_takers = 2) ?(blocking_take = false) ?(n_msgs = 50 * Util.iter_factor) () = let n_domains = n_adders + n_takers in let t = Queue.create () in let n_msgs_to_take = Atomic.make 0 |> Multicore_magic.copy_as_padded in let n_msgs_to_add = Atomic.make 0 |> Multicore_magic.copy_as_padded in let init _ = assert (Queue.is_empty t); Atomic.set n_msgs_to_take n_msgs; Atomic.set n_msgs_to_add n_msgs in let work i () = if i < n_adders then let rec work () = let n = Util.alloc n_msgs_to_add in if 0 < n then begin for i = 1 to n do Queue.add i t done; work () end in work () else if blocking_take then let rec work () = let n = Util.alloc n_msgs_to_take in if n <> 0 then begin for _ = 1 to n do ignore (Queue.take_blocking t) done; work () end in work () else let rec work () = let n = Util.alloc n_msgs_to_take in if n <> 0 then begin for _ = 1 to n do while Option.is_none (Queue.take_opt t) do Domain.cpu_relax () done done; work () end in work () in let config = let format role blocking n = Printf.sprintf "%d %s%s%s" n (if blocking then "" else "nb ") role (if n = 1 then "" else "s") in Printf.sprintf "%s, %s" (format "adder" blocking_add n_adders) (format "taker" blocking_take n_takers) in Times.record ~budgetf ~n_domains ~init ~work () |> Times.to_thruput_metrics ~n:n_msgs ~singular:"message" ~config let run_suite ~budgetf = run_one_domain ~budgetf () @ (Util.cross (Util.cross [ 1; 2 ] [ false ]) (Util.cross [ 1; 2 ] [ false; true ]) |> List.concat_map @@ fun ((n_adders, blocking_add), (n_takers, blocking_take)) -> run_one ~budgetf ~n_adders ~blocking_add ~n_takers ~blocking_take ()) kcas-0.7.0/bench/bench_ref.ml000066400000000000000000000036171456672623200160070ustar00rootroot00000000000000open Multicore_bench module Ref = struct type 'a t = 'a ref let make = ref let get = ( ! ) let[@poll error] [@inline never] incr x = x := !x + 1 let[@poll error] [@inline never] compare_and_set x before after = !x == before && begin x := after; true end let[@poll error] [@inline never] exchange x after = let before = !x in x := after; before let rec modify ?(backoff = Backoff.default) x f = let before = get x in let after = f before in if not (compare_and_set x before after) then modify ~backoff:(Backoff.once backoff) x f end type t = Op : string * int * 'a * ('a Ref.t -> unit) * ('a Ref.t -> unit) -> t let run_one ~budgetf ?(n_iter = 500 * Util.iter_factor) (Op (name, extra, value, op1, op2)) = let n_iter = n_iter * extra in let loc = Ref.make value in let init _ = () in let work _ () = let rec loop i = if i > 0 then begin op1 loc; op2 loc; loop (i - 2) end in loop n_iter in Times.record ~budgetf ~n_domains:1 ~init ~work () |> Times.to_thruput_metrics ~n:n_iter ~singular:"op" ~config:name let run_suite ~budgetf = [ (let get x = Ref.get x |> ignore in Op ("get", 10, 42, get, get)); (let incr x = Ref.incr x in Op ("incr", 1, 0, incr, incr)); (let push x = Ref.modify x (fun xs -> 101 :: xs) and pop x = Ref.modify x (function [] -> [] | _ :: xs -> xs) in Op ("push & pop", 2, [], push, pop)); (let cas01 x = Ref.compare_and_set x 0 1 |> ignore and cas10 x = Ref.compare_and_set x 1 0 |> ignore in Op ("cas int", 1, 0, cas01, cas10)); (let xchg1 x = Ref.exchange x 1 |> ignore and xchg0 x = Ref.exchange x 0 |> ignore in Op ("xchg int", 1, 0, xchg1, xchg0)); (let swap x = Ref.modify x (fun (x, y) -> (y, x)) in Op ("swap", 2, (4, 2), swap, swap)); ] |> List.concat_map @@ run_one ~budgetf kcas-0.7.0/bench/bench_ref_mutex.ml000066400000000000000000000035421456672623200172260ustar00rootroot00000000000000open Multicore_bench module Ref = struct type 'a t = 'a ref let make = ref let[@inline] compare_and_set x before after = !x == before && begin x := after; true end let[@inline] exchange x after = let before = !x in x := after; before end type t = Op : string * 'a * ('a Ref.t -> unit) * ('a Ref.t -> unit) -> t (** For some reason allocating the mutex inside [run_one] tends to cause performance hiccups, i.e. some operations appear to be 10x slower than others, which doesn't make sense. So, we allocate the mutex here. *) let mutex = Mutex.create () let run_one ~budgetf ?(n_iter = 250 * Util.iter_factor) (Op (name, value, op1, op2)) = let loc = Ref.make value in let init _ = () in let work _ () = let rec loop i = if i > 0 then begin Mutex.lock mutex; op1 loc; Mutex.unlock mutex; Mutex.lock mutex; op2 loc; Mutex.unlock mutex; loop (i - 2) end in loop n_iter in Times.record ~budgetf ~n_domains:1 ~init ~work () |> Times.to_thruput_metrics ~n:n_iter ~singular:"op" ~config:name let run_suite ~budgetf = [ (let get x = !x |> ignore in Op ("get", 42, get, get)); (let incr x = x := !x + 1 in Op ("incr", 0, incr, incr)); (let push x = x := 101 :: !x and pop x = match !x with [] -> () | _ :: xs -> x := xs in Op ("push & pop", [], push, pop)); (let cas01 x = Ref.compare_and_set x 0 1 |> ignore and cas10 x = Ref.compare_and_set x 1 0 |> ignore in Op ("cas int", 0, cas01, cas10)); (let xchg1 x = Ref.exchange x 1 |> ignore and xchg0 x = Ref.exchange x 0 |> ignore in Op ("xchg int", 0, xchg1, xchg0)); (let swap x = let l, r = !x in x := (r, l) in Op ("swap", (4, 2), swap, swap)); ] |> List.concat_map @@ run_one ~budgetf kcas-0.7.0/bench/bench_stack.ml000066400000000000000000000050231456672623200163310ustar00rootroot00000000000000open Kcas_data open Multicore_bench let run_one_domain ~budgetf ?(n_msgs = 50 * Util.iter_factor) () = let t = Stack.create () in let op push = if push then Stack.push 101 t else Stack.pop_opt t |> ignore in let init _ = assert (Stack.is_empty t); Util.generate_push_and_pop_sequence n_msgs in let work _ bits = Util.Bits.iter op bits in Times.record ~budgetf ~n_domains:1 ~init ~work () |> Times.to_thruput_metrics ~n:n_msgs ~singular:"message" ~config:"one domain" let run_one ~budgetf ?(n_adders = 2) ?(blocking_add = false) ?(n_takers = 2) ?(blocking_take = false) ?(n_msgs = 50 * Util.iter_factor) () = let n_domains = n_adders + n_takers in let t = Stack.create () in let n_msgs_to_take = Atomic.make 0 |> Multicore_magic.copy_as_padded in let n_msgs_to_add = Atomic.make 0 |> Multicore_magic.copy_as_padded in let init _ = assert (Stack.is_empty t); Atomic.set n_msgs_to_take n_msgs; Atomic.set n_msgs_to_add n_msgs in let work i () = if i < n_adders then let rec work () = let n = Util.alloc n_msgs_to_add in if 0 < n then begin for i = 1 to n do Stack.push i t done; work () end in work () else if blocking_take then let rec work () = let n = Util.alloc n_msgs_to_take in if n <> 0 then begin for _ = 1 to n do ignore (Stack.pop_blocking t) done; work () end in work () else let rec work () = let n = Util.alloc n_msgs_to_take in if n <> 0 then begin for _ = 1 to n do while Option.is_none (Stack.pop_opt t) do Domain.cpu_relax () done done; work () end in work () in let config = let format role blocking n = Printf.sprintf "%d %s%s%s" n (if blocking then "" else "nb ") role (if n = 1 then "" else "s") in Printf.sprintf "%s, %s" (format "adder" blocking_add n_adders) (format "taker" blocking_take n_takers) in Times.record ~budgetf ~n_domains ~init ~work () |> Times.to_thruput_metrics ~n:n_msgs ~singular:"message" ~config let run_suite ~budgetf = run_one_domain ~budgetf () @ (Util.cross (Util.cross [ 1; 2 ] [ false ]) (Util.cross [ 1; 2 ] [ false; true ]) |> List.concat_map @@ fun ((n_adders, blocking_add), (n_takers, blocking_take)) -> run_one ~budgetf ~n_adders ~blocking_add ~n_takers ~blocking_take ()) kcas-0.7.0/bench/bench_xt.ml000066400000000000000000000014711456672623200156620ustar00rootroot00000000000000open Kcas open Multicore_bench let run_one ~budgetf ?(n_locs = 2) ?(n_iter = 15 * (9 - n_locs) * Util.iter_factor) () = let locs = Loc.make_array n_locs 0 in let rec loop ~xt i = Xt.incr ~xt (Array.unsafe_get locs i); let i = i - 1 in if 0 <= i then loop ~xt i in let tx ~xt = let i = n_locs - 1 in if 0 <= i then loop ~xt i in let init _ = () in let work _ () = let rec loop i = if i > 0 then begin Xt.commit { tx }; loop (i - 1) end in loop n_iter in let config = Printf.sprintf "%d loc tx" n_locs in Times.record ~budgetf ~n_domains:1 ~init ~work () |> Times.to_thruput_metrics ~n:n_iter ~singular:"transaction" ~config let run_suite ~budgetf = [ 0; 1; 2; 4; 8 ] |> List.concat_map @@ fun n_locs -> run_one ~budgetf ~n_locs () kcas-0.7.0/bench/bench_xt_ro.ml000066400000000000000000000015351456672623200163630ustar00rootroot00000000000000open Kcas open Multicore_bench let run_one ~budgetf ?(n_locs = 2) ?(n_iter = 20 * (9 - n_locs) * Util.iter_factor) () = let locs = Loc.make_array n_locs 0 in let rec loop ~xt s i = let s = s + Xt.get ~xt (Array.unsafe_get locs i) in let i = i - 1 in if 0 <= i then loop ~xt s i else s in let tx ~xt = let i = n_locs - 1 in if 0 <= i then loop ~xt 0 i |> ignore in let init _ = () in let work _ () = let rec loop i = if i > 0 then begin Xt.commit { tx }; loop (i - 1) end in loop n_iter in let config = Printf.sprintf "%d loc tx" n_locs in Times.record ~budgetf ~n_domains:1 ~init ~work () |> Times.to_thruput_metrics ~n:n_iter ~singular:"transaction" ~config let run_suite ~budgetf = [ 0; 1; 2; 4; 8 ] |> List.concat_map @@ fun n_locs -> run_one ~budgetf ~n_locs () kcas-0.7.0/bench/dune000066400000000000000000000005331456672623200144120ustar00rootroot00000000000000(* -*- tuareg -*- *) let maybe_domain_shims_and_threads = if Jbuild_plugin.V1.ocaml_version < "5" then "domain_shims threads.posix" else "" let () = Jbuild_plugin.V1.send @@ {| (test (name main) (package kcas_data) (libraries kcas_data multicore-bench backoff multicore-magic |} ^ maybe_domain_shims_and_threads ^ {| )) |} kcas-0.7.0/bench/main.ml000066400000000000000000000013011456672623200150040ustar00rootroot00000000000000let benchmarks = [ ("Ref with [@poll error]", Bench_ref.run_suite); ("Ref with Mutex", Bench_ref_mutex.run_suite); ("Atomic", Bench_atomic.run_suite); ("Kcas Loc", Bench_loc.run_suite); ("Kcas Xt", Bench_xt.run_suite); ("Kcas Xt read-only", Bench_xt_ro.run_suite); ("Kcas parallel CMP", Bench_parallel_cmp.run_suite); ("Kcas_data Accumulator", Bench_accumulator.run_suite); ("Kcas_data Dllist", Bench_dllist.run_suite); ("Kcas_data Hashtbl", Bench_hashtbl.run_suite); ("Kcas_data Mvar", Bench_mvar.run_suite); ("Kcas_data Queue", Bench_queue.run_suite); ("Kcas_data Stack", Bench_stack.run_suite); ] let () = Multicore_bench.Cmd.run ~benchmarks () kcas-0.7.0/doc/000077500000000000000000000000001456672623200132215ustar00rootroot00000000000000kcas-0.7.0/doc/dune000066400000000000000000000002631456672623200141000ustar00rootroot00000000000000(mdx (package kcas_data) (deps (package kcas) (package kcas_data)) (enabled_if (>= %{ocaml_version} 5.0.0)) (files gkmz-with-read-only-cmp-ops.md scheduler-interop.md)) kcas-0.7.0/doc/gkmz-with-read-only-cmp-ops.md000066400000000000000000000342171456672623200207370ustar00rootroot00000000000000# Extending k-CAS with efficient read-only CMP operations > **_NOTE_**: This document was originally written at around the time the kcas > library was extended with a > [`Tx`](https://ocaml-multicore.github.io/kcas/0.2.0/kcas/Kcas/Tx/index.html) > API for monadic transactions. This version of the document has been updated to > use the new > [`Xt`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html) > API. [`Kcas`](https://ocaml-multicore.github.io/kcas/) currently uses the GKMZ algorithm for [Efficient Multi-word Compare and Swap](https://arxiv.org/abs/2008.02527) or MCAS aka k-CAS. This is a nearly optimal algorithm for MCAS as it requires only `k + 1` CAS operations. The new library API also provides a transactional API for using the algorithm. For example, suppose one would create the following shared memory locations: ```ocaml let a = Loc.make 10 let b = Loc.make 52 let x = Loc.make 0 let y = Loc.make 0 ``` Using the [`Xt`](https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/Xt/index.html) API one could define a transaction `x_to_b_sub_a` ```ocaml let x_to_b_sub_a ~xt = let a' = Xt.get ~xt a and b' = Xt.get ~xt b in Xt.set ~xt x (b' - a') ``` to update `x` with the difference of `b` and `a` and commit that transaction: ```ocaml Xt.commit { tx = x_to_b_sub_a } ``` One could similarly define a transaction `y_to_a_add_b` ```ocaml let y_to_a_add_b ~xt = let a' = Xt.get ~xt a and b' = Xt.get ~xt b in Xt.set ~xt y (a' + b') ``` to update `y` with the sum of `a` and `b` and commit that transaction: ```ocaml Xt.commit { tx = y_to_a_add_b } ``` The above committed transactions essentially correspond to MCAS operations as follows: ```ml Xt.commit { tx = x_to_b_sub_a } == [ CAS (a, 10, 10); CAS (b, 52, 52); CAS (x, 0, 42) ] Xt.commit { tx = y_to_a_add_b } == [ CAS (a, 10, 10); CAS (b, 52, 52); CAS (y, 0, 62) ] ``` CAS with equal expected or before and desired or after values essentially expresses an operation that does not change the logical content of the target location, but only "asserts" that it does not change during the operation. Note that the transactions `x_to_b_sub_a` and `y_to_a_add_b`, unlike the MCAS operations they generate, are independent of the exact values of the locations being accessed. It is important to distinguish between them. A transaction is a specification for generating a list of CASes. One might attempt to perform both of the two MCAS operations ```ml [ CAS (a, 10, 10); CAS (b, 52, 52); CAS (x, 0, 42) ] ``` and ```ml [ CAS (a, 10, 10); CAS (b, 52, 52); CAS (y, 0, 62) ] ``` in parallel, but that will not be allowed by the GKMZ algorithm. Every CAS actually updates the targeted memory locations. This means two things: 1. CAS operations targeting the same location can only execute sequentially. 2. CAS operations, even those that do not change the logical content of a location, cause contention as after the operation only the cache of the writer will have a valid copy of the [shared memory](https://en.wikipedia.org/wiki/MSI_protocol) location. Could we extend upon GKMZ and allow read-only CMP operations to be expressed directly and also make it so that read-only CMP operations do not write to memory? Let's first examine, in a bit more detail, how GKMZ, or our OCaml adaptation of it, operates. For this issue, in order to make it a bit easier to follow, I'll simplify the implementation a bit. We'll replace the splay tree representation with a simpler list, assume that locations are always given in some total order, and we also ignore the release of unused values after the operation. Here are the core data structures used by the GKMZ algorithm: ```ocaml type 'a loc = 'a state Atomic.t and 'a state = { before : 'a; after : 'a; casn : casn } and cass = CASS : 'a loc * 'a state -> cass and casn = status Atomic.t and status = Undetermined of cass list | After | Before ``` Take a closer look at the internal `cass` type. Previously we explained transactions and talked about MCAS in terms a different CAS type whose definition could look like this: ```ocaml type cas = | CAS : 'a loc * 'a * 'a -> cas ``` The difference between the above logical `cas` operation and the internal `cass` descriptor is that the logical `cas` deals with plain values of type `'a` while the internal `cass` descriptor uses a state type or the `'a state` type. A location `'a loc` is an atomic location that contains a `'a state`. The core GKMZ algorithm attempts a MCAS by attempting to replace the states of all the locations in a list of `cass`es and then setting status of the `casn` descriptor to either `After` or `Before` depending on whether the whole operation was a success or a failure, respectively. When using GKMZ one first prepares a list of internal `cass` descriptors and a `casn` descriptor that has the `Undetermined` list of those descriptors. The data structure is cyclic: `casn` contains the list of `cass` descriptors which contain `state`s which contain a reference to the `casn` descriptor. This cyclic form allows the whole data structure to be traversed starting from any `state`, which one might find in a location. Here is a the core of the GKMZ algorithm in OCaml: ```ocaml let finish casn desired = match Atomic.get casn with | After -> true | Before -> false | Undetermined _ as current -> Atomic.compare_and_set casn current desired |> ignore; Atomic.get casn == After let rec gkmz casn = function | [] -> finish casn After (* seems like a success *) | (CASS (loc, desired) :: continue) as retry -> let current = Atomic.get loc in if desired == current then gkmz casn continue else let current_value = if is_after current.casn then current.after else current.before in if current_value != desired.before then finish casn Before (* seems like a failure *) else match Atomic.get casn with | Undetermined _ -> (* operation still unfinished *) if Atomic.compare_and_set loc current desired then gkmz casn continue else gkmz casn retry | After -> true (* operation was a success *) | Before -> false (* operation was a failure *) and is_after casn = match Atomic.get casn with | Undetermined cass -> gkmz casn cass | After -> true | Before -> false let get loc = let state = Atomic.get loc in if is_after state.casn then state.after else state.before let atomically logical_cas_list = let casn = Atomic.make After in let cass = logical_cas_list |> List.map @@ function | CAS (loc, before, after) -> CASS (loc, {before; after; casn}) in Atomic.set casn (Undetermined cass); gkmz casn cass ``` Note that every call of `atomically` allocates a fresh location for `casn` descriptor and also fresh `state`s for all the `CASS` descriptors. This is important as it makes sure that [ABA problems](https://en.wikipedia.org/wiki/ABA_problem) are avoided. It is doubly important in the following extended algorithm. Let's then simply extend the algorithm to allow `CMP` operations. First we extend the logical `cas` type with a new logical `CMP` operation: ```diff type cas = | CAS : 'a loc * 'a * 'a -> cas + | CMP : 'a loc * 'a -> cas ``` It turns out that we don't need to change the internal data structures at all. The gist is that an internal read-only `CASS (loc, state)` descriptor refers to the `state` of a location before the operation started. We can distinguish such a `state` simply by comparing the `casn` of the state to the `casn` of the entire operation. Furthermore, because we know that the `state`s and `casn` are always freshly allocated, we know that we can compare them simply by their identities without [ABA problems](https://en.wikipedia.org/wiki/ABA_problem). Then we extend the algorithm to allow read-only `CMP` operations. The idea is simple: instead of attempting to store the `state`s of read-only `CMP` operations to the locations, we simply check that those locations have their original `state`s. Additionally, before we attempt to complete an operation as a success (by writing `After` to the `casn`), we verify that all of the read-only locations still have their original values. ```diff +let is_cmp casn state = + state.casn != casn + let finish casn desired = match Atomic.get casn with | After -> true | Before -> false - | Undetermined _ as current -> + | Undetermined cass as current -> + let desired = + if desired == After + && cass + |> List.exists @@ fun (CASS (loc, state)) -> + is_cmp casn state && Atomic.get loc != state then + Before + else + desired in Atomic.compare_and_set casn current desired |> ignore; Atomic.get casn == After let rec gkmz casn = function | [] -> finish casn After (* seems like a success *) | (CASS (loc, desired) :: continue) as retry -> let current = Atomic.get loc in if desired == current then gkmz casn continue + else if is_cmp casn desired then + finish casn Before (* seems like a failure *) else let current_value = if is_after current.casn then current.after else current.before in if current_value != desired.before then finish casn Before (* seems like a failure *) else match Atomic.get casn with | Undetermined _ -> (* operation still unfinished *) if Atomic.compare_and_set loc current desired then gkmz casn continue else gkmz casn retry | After -> true (* operation was a success *) | Before -> false (* operation was a failure *) and is_after casn = match Atomic.get casn with | Undetermined cass -> gkmz casn cass | After -> true | Before -> false let get loc = let state = Atomic.get loc in if is_after state.casn then state.after else state.before let atomically logical_cas_list = let casn = Atomic.make After in let cass = logical_cas_list |> List.map @@ function | CAS (loc, before, after) -> CASS (loc, {before; after; casn}) + | CMP (loc, expected) -> + let current = Atomic.get loc in + if get loc != expected || Atomic.get loc != current then + raise Exit + else + CASS (loc, current) in Atomic.set casn (Undetermined cass); gkmz casn cass + +let atomically logical_cas_list = + try atomically logical_cas_list with Exit -> false ``` The above implementation is specifically designed to minimize the diffs compared to the original GKMZ algorithm. Minor optimizations are possible that are not shown above. Additionally, it makes sense to distinguish the case when the algorithm specifically fails during the verification step. We'll get back to this shortly. We claim that the above algorithm is [linearizable](https://cs.brown.edu/~mph/HerlihyW90/p463-herlihy.pdf) and [obstruction-free](https://core.ac.uk/download/pdf/9590574.pdf). It should be clear that if one thread runs the above algorithm in isolation, it will be able to finish in a finite number of steps. When not running in isolation, the verification steps (in `finish`) of two operations may indefinitely cause both to fail. To see this, consider the following operations operating in parallel: ```ml [ CMP (a, 0); CAS (b, 0, 1) ] and [ CAS (a, 0, 1); CMP (b, 0) ] ``` Let's assume both operations manage to initially convert the `CMP` operations in `atomically`, check the read-only `CASS` once during `gkmz`, and perform their mutating `CASS` operations. At that point both enter the verification step and both of them will fail. The same could happen on a subsequent retry. To prove linearizability we need to show that, for any set of operations performed in parallel, there is an order in which the operations could have been performed sequentially giving the same state at the end for all locations. First note that the new algorithm operates exactly the same as the original GKMZ algorithm in case only `CAS` operations are performed. All the cases where operations write to overlapping locations are already proven to be linearizable by the basic GKMZ algorithm. Operations that are completely non-overlapping are trivially linearizable. The interesting case to consider is when an operation `R` that only reads a location `x` (and might also write other locations) needs to be linearizable with an operation `W` that writes to said location `x`. We claim any observer of said operations will only be able to read an end state that is consistent with `R` happening before `W`. Let's assume the opposite, that an observer reads the results of `W` and `R` and can determine that the operation `W` happened before `R`. For that to be possible, the `casn` descriptors of both `W` and `R` must be set to the `After` state. The only way for that to be possible is that the `R` operation verified the `x` location before `W` wrote to it and so the result of `R` must be consistent with `R` happening before `W`. This contradicts the assumption and proves the original claim. Previously we mentioned that it makes sense to distinguish the case when the verification step fails. Let's assume we have done so. Consider having a transaction mechanism using the new algorithm. Initially such a mechanism attempts to perform the transaction optimistically using the obstruction-free algorithm for `k-CAS-n-CMP`. If that repeatedly fails during the verification step, then the transaction mechanism can switch to using only `k-CAS` operations and try to complete the operation in lock-free manner. This way the transaction mechanism can guarantee lock-free behavior, which ensures that at least one thread will be able to make progress. Recall the example transactions `x_to_b_sub_a` and `y_to_a_add_b` that we started with. Using the new `k-CAS-n-CMP` algorithm the transactions can generate the following operations: ```ml Xt.commit { tx = x_to_b_sub_a } == [ CMP (a, 10); CMP (b, 52); CAS (x, 0, 42) ] Xt.commit { tx = y_to_a_add_b } == [ CMP (a, 10); CMP (b, 52); CAS (y, 0, 62) ] ``` The new algorithm will then be able to run the two transaction in parallel. kcas-0.7.0/doc/kcas.svg000066400000000000000000000027061456672623200146700ustar00rootroot00000000000000 kcas-0.7.0/doc/scheduler-interop.md000066400000000000000000000124301456672623200171770ustar00rootroot00000000000000# Scheduler interop The blocking mechanism in **Kcas** is based on a [_domain local await_](https://github.com/ocaml-multicore/domain-local-await) mechanism that schedulers can choose to implement to allow libraries like **Kcas** to work with them. Implementing schedulers is not really what casual users of **Kcas** are supposed to do. Below is an example of a _toy_ scheduler whose purpose is only to give a sketch of how a scheduler can provide the domain local await mechanism. Let's also demonstrate the use of the [`Queue`](https://ocaml-multicore.github.io/kcas/doc/kcas_data/Kcas_data/Queue/index.html), [`Stack`](https://ocaml-multicore.github.io/kcas/doc/kcas_data/Kcas_data/Stack/index.html), and [`Promise`](https://ocaml-multicore.github.io/kcas/doc/kcas_data/Kcas_data/Promise/index.html) implementations that are conveniently provided by [**Kcas_data**](https://ocaml-multicore.github.io/kcas/doc/kcas_data/Kcas_data/index.html). Here is the full toy scheduler module: ```ocaml module Scheduler : sig type t val spawn : unit -> t val join : t -> unit val fiber : t -> (unit -> 'a) -> 'a Promise.t end = struct open Effect.Deep type _ Effect.t += | Suspend : (('a, unit) continuation -> unit) -> 'a Effect.t type t = { queue: (unit -> unit) Queue.t; domain: unit Domain.t } let spawn () = let queue = Queue.create () in let rec scheduler work = let effc (type a) : a Effect.t -> _ = function | Suspend ef -> Some ef | _ -> None in try_with work () { effc }; match Queue.take_opt queue with | Some work -> scheduler work | None -> () in let prepare_for_await _ = let state = Atomic.make `Init in let release () = if Atomic.get state != `Released then match Atomic.exchange state `Released with | `Awaiting k -> Queue.add (continue k) queue | _ -> () in let await () = if Atomic.get state != `Released then Effect.perform @@ Suspend (fun k -> if not (Atomic.compare_and_set state `Init (`Awaiting k)) then continue k ()) in Domain_local_await.{ release; await } in let domain = Domain.spawn @@ fun () -> try while true do let work = Queue.take_blocking queue in Domain_local_await.using ~prepare_for_await ~while_running:(fun () -> scheduler work) done with Exit -> () in { queue; domain } let join t = Queue.add (fun () -> raise Exit) t.queue; Domain.join t.domain let fiber t thunk = let (promise, resolver) = Promise.create () in Queue.add (fun () -> Promise.resolve resolver (thunk ())) t.queue; promise end ``` The idea is that one can spawn a scheduler to run on a new domain. Then one can run fibers on the scheduler. Because the scheduler provides the domain local await mechanism libraries like **Kcas** can use it to block in a scheduler independent and friendly manner. Let's then demonstrate the integration. To start we spawn a scheduler: ```ocaml # let scheduler = Scheduler.spawn () val scheduler : Scheduler.t = ``` The scheduler is now eagerly awaiting for fibers to run. Let's give it a couple of them, but, let's first create a queue and a stack to communicate with the fibers: ```ocaml # let in_queue : int Queue.t = Queue.create () val in_queue : int Kcas_data.Queue.t = # let out_stack : int Stack.t = Stack.create () val out_stack : int Kcas_data.Stack.t = ``` The first fiber we create just copies elements from the `in_queue` to the `out_stack`: ```ocaml # ignore @@ Scheduler.fiber scheduler @@ fun () -> while true do let elem = Queue.take_blocking in_queue in Printf.printf "Giving %d...\n%!" elem; Stack.push elem out_stack done - : unit = () ``` The second fiber awaits to take two elements from the `out_stack`, updates a state in between, and then returns their sum: ```ocaml # let state = Loc.make 0 val state : int Loc.t = Kcas.Loc.Loc {Kcas.Loc.state = ; id = } # let sync_to target = state |> Loc.get_as @@ fun current -> Retry.unless (target <= current) val sync_to : int -> unit = # let a_promise = Scheduler.fiber scheduler @@ fun () -> let x = Stack.pop_blocking out_stack in Printf.printf "First you gave me %d.\n%!" x; Loc.set state 1; let y = Stack.pop_blocking out_stack in Printf.printf "Then you gave me %d.\n%!" y; Loc.set state 2; x + y val a_promise : int Promise.t = ``` To interact with the fibers, we add some elements to the `in_queue`: ```ocaml # Queue.add 14 in_queue; sync_to 1 Giving 14... First you gave me 14. - : unit = () # Queue.add 28 in_queue; sync_to 2 Giving 28... Then you gave me 28. - : unit = () # Promise.await a_promise - : int = 42 ``` As can be seen above, the scheduler multiplexes the domain among the fibers. Notice that thanks to the domain local await mechanism we could just perform blocking operations without thinking about the schedulers. Communication between the main domain, the scheduler domain, and the fibers _just works_ ™. Time to close the shop. ```ocaml # Scheduler.join scheduler - : unit = () ``` _That's all Folks!_ kcas-0.7.0/dune000066400000000000000000000001671456672623200133360ustar00rootroot00000000000000(mdx (package kcas_data) (deps (package kcas) (package kcas_data)) (libraries domain_shims) (files README.md)) kcas-0.7.0/dune-project000066400000000000000000000040211456672623200147730ustar00rootroot00000000000000(lang dune 3.14) (name kcas) (generate_opam_files true) (implicit_transitive_deps false) (authors "KC Sivaramakrishnan " "Vesa Karvonen ") (maintainers "Vesa Karvonen " "KC Sivaramakrishnan ") (source (github ocaml-multicore/kcas)) (homepage "https://github.com/ocaml-multicore/kcas") (license ISC) (using mdx 0.4) (package (name kcas) (synopsis "Software transactional memory based on lock-free multi-word compare-and-set") (description "A software transactional memory (STM) implementation based on an atomic lock-free multi-word compare-and-set (MCAS) algorithm enhanced with read-only compare operations and ability to block awaiting for changes.") (depends (ocaml (>= 4.13.0)) (backoff (>= 0.1.0)) (domain-local-await (>= 1.0.1)) (domain-local-timeout (>= 1.0.1)) (multicore-magic (>= 2.1.0)) (domain_shims (and (>= 0.1.0) :with-test)) (alcotest (and (>= 1.7.0) :with-test)) (mdx (and (>= 2.3.0) :with-test)) (sherlodoc (and (>= 0.2) :with-doc)) (odoc (and (>= 2.4.1) :with-doc)))) (package (name kcas_data) (synopsis "Compositional lock-free data structures and primitives for communication and synchronization") (description "A library of compositional lock-free data structures and primitives for communication and synchronization implemented using kcas.") (depends (kcas (= :version)) (multicore-magic (>= 2.1.0)) (backoff (and (>= 0.1.0) :with-test)) (domain-local-await (and (>= 1.0.1) :with-test)) (domain_shims (and (>= 0.1.0) :with-test)) (multicore-bench (and (>= 0.1.1) :with-test)) (alcotest (and (>= 1.7.0) :with-test)) (qcheck-core (and (>= 0.21.2) :with-test)) (qcheck-stm (and (>= 0.3) :with-test)) (mdx (and (>= 2.3.0) :with-test)) (sherlodoc (and (>= 0.2) :with-doc)) (odoc (and (>= 2.4.1) :with-doc)))) kcas-0.7.0/kcas.opam000066400000000000000000000025451456672623200142610ustar00rootroot00000000000000# This file is generated by dune, edit dune-project instead opam-version: "2.0" synopsis: "Software transactional memory based on lock-free multi-word compare-and-set" description: "A software transactional memory (STM) implementation based on an atomic lock-free multi-word compare-and-set (MCAS) algorithm enhanced with read-only compare operations and ability to block awaiting for changes." maintainer: [ "Vesa Karvonen " "KC Sivaramakrishnan " ] authors: [ "KC Sivaramakrishnan " "Vesa Karvonen " ] license: "ISC" homepage: "https://github.com/ocaml-multicore/kcas" bug-reports: "https://github.com/ocaml-multicore/kcas/issues" depends: [ "dune" {>= "3.14"} "ocaml" {>= "4.13.0"} "backoff" {>= "0.1.0"} "domain-local-await" {>= "1.0.1"} "domain-local-timeout" {>= "1.0.1"} "multicore-magic" {>= "2.1.0"} "domain_shims" {>= "0.1.0" & with-test} "alcotest" {>= "1.7.0" & with-test} "mdx" {>= "2.3.0" & with-test} "sherlodoc" {>= "0.2" & with-doc} "odoc" {>= "2.4.1" & with-doc} ] build: [ ["dune" "subst"] {dev} [ "dune" "build" "-p" name "-j" jobs "@install" "@runtest" {with-test} "@doc" {with-doc} ] ] dev-repo: "git+https://github.com/ocaml-multicore/kcas.git" doc: "https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/" kcas-0.7.0/kcas.opam.template000066400000000000000000000000751456672623200160670ustar00rootroot00000000000000doc: "https://ocaml-multicore.github.io/kcas/doc/kcas/Kcas/" kcas-0.7.0/kcas_data.opam000066400000000000000000000026321456672623200152470ustar00rootroot00000000000000# This file is generated by dune, edit dune-project instead opam-version: "2.0" synopsis: "Compositional lock-free data structures and primitives for communication and synchronization" description: "A library of compositional lock-free data structures and primitives for communication and synchronization implemented using kcas." maintainer: [ "Vesa Karvonen " "KC Sivaramakrishnan " ] authors: [ "KC Sivaramakrishnan " "Vesa Karvonen " ] license: "ISC" homepage: "https://github.com/ocaml-multicore/kcas" bug-reports: "https://github.com/ocaml-multicore/kcas/issues" depends: [ "dune" {>= "3.14"} "kcas" {= version} "multicore-magic" {>= "2.1.0"} "backoff" {>= "0.1.0" & with-test} "domain-local-await" {>= "1.0.1" & with-test} "domain_shims" {>= "0.1.0" & with-test} "multicore-bench" {>= "0.1.1" & with-test} "alcotest" {>= "1.7.0" & with-test} "qcheck-core" {>= "0.21.2" & with-test} "qcheck-stm" {>= "0.3" & with-test} "mdx" {>= "2.3.0" & with-test} "sherlodoc" {>= "0.2" & with-doc} "odoc" {>= "2.4.1" & with-doc} ] build: [ ["dune" "subst"] {dev} [ "dune" "build" "-p" name "-j" jobs "@install" "@runtest" {with-test} "@doc" {with-doc} ] ] dev-repo: "git+https://github.com/ocaml-multicore/kcas.git" doc: "https://ocaml-multicore.github.io/kcas/doc/kcas_data/Kcas_data/" kcas-0.7.0/kcas_data.opam.template000066400000000000000000000001071456672623200170540ustar00rootroot00000000000000doc: "https://ocaml-multicore.github.io/kcas/doc/kcas_data/Kcas_data/" kcas-0.7.0/src/000077500000000000000000000000001456672623200132435ustar00rootroot00000000000000kcas-0.7.0/src/kcas/000077500000000000000000000000001456672623200141645ustar00rootroot00000000000000kcas-0.7.0/src/kcas/dune000066400000000000000000000003411456672623200150400ustar00rootroot00000000000000(library (name kcas) (public_name kcas) (libraries domain-local-await domain-local-timeout backoff multicore-magic)) (mdx (package kcas) (deps (package kcas)) (libraries kcas backoff domain_shims) (files kcas.mli)) kcas-0.7.0/src/kcas/kcas.ml000066400000000000000000001106511456672623200154430ustar00rootroot00000000000000(* * Copyright (c) 2017, Nicolas ASSOUAD * Copyright (c) 2023, Vesa Karvonen *) (** Work around CSE bug in OCaml 5-5.1. *) let[@inline] atomic_get x = Atomic.get ((* Prevents CSE *) Sys.opaque_identity x) (* NOTE: You can adjust comment blocks below to select whether or not to use fenceless operations where it is safe to do so. Fenceless operations have been seen to provide significant performance improvements on ARM (Apple M1). *) (**) external fenceless_get : 'a Atomic.t -> 'a = "%field0" let[@inline] fenceless_get x = fenceless_get ((* Prevents CSE *) Sys.opaque_identity x) (**) (* let fenceless_get = atomic_get *) module Timeout = struct exception Timeout let[@inline never] timeout () = raise Timeout type _ t = | Unset : [> `Unset ] t | Elapsed : [> `Elapsed ] t | Call : (unit -> unit) -> [> `Call ] t | Set : { mutable state : [< `Elapsed | `Call ] t } -> [> `Set ] t external as_atomic : [< `Set ] t -> [< `Elapsed | `Call ] t Atomic.t = "%identity" (* Fenceless operations are safe here as the timeout state is not not visible outside of the library and we don't always need the latest value and, when we do, there is a fence after. *) let[@inline] check (t : [< `Set | `Unset ] t) = match t with | Unset -> () | Set set_r -> if fenceless_get (as_atomic (Set set_r)) == Elapsed then timeout () let set seconds (state : [< `Elapsed | `Call ] t Atomic.t) = Domain_local_timeout.set_timeoutf seconds @@ fun () -> match Atomic.exchange state Elapsed with | Call release_or_cancel -> release_or_cancel () | Elapsed -> () let call_id = Call Fun.id let[@inline never] alloc seconds = let (Set set_r as t : [ `Set ] t) = Set { state = call_id } in let cancel = set seconds (as_atomic t) in if not (Atomic.compare_and_set (as_atomic t) call_id (Call cancel)) then timeout (); Set set_r let[@inline] alloc_opt = function | None -> Unset | Some seconds -> alloc seconds let[@inline never] await (state : [< `Elapsed | `Call ] t Atomic.t) release = match fenceless_get state with | Call cancel as alive -> if Atomic.compare_and_set state alive (Call release) then Call cancel else timeout () | Elapsed -> timeout () let[@inline] await (t : [ `Unset | `Set ] t) release = match t with Unset -> Unset | Set r -> await (as_atomic (Set r)) release let[@inline never] unawait (state : [< `Elapsed | `Call ] t Atomic.t) alive = match fenceless_get state with | Call _ as await -> if not (Atomic.compare_and_set state await alive) then timeout () | Elapsed -> timeout () let[@inline] unawait t alive = match (t, alive) with | Set set_r, Call call_r -> unawait (as_atomic (Set set_r)) (Call call_r) | _ -> () let[@inline] cancel_alive (alive : [< `Unset | `Call ] t) = match alive with Unset -> () | Call cancel -> cancel () let[@inline] cancel (t : [< `Set | `Unset ] t) = match t with | Unset -> () | Set set_r -> ( match fenceless_get (as_atomic (Set set_r)) with | Elapsed -> () | Call cancel -> cancel ()) end module Id = struct let neg_id = Atomic.make (-1) let[@inline] neg_ids n = Atomic.fetch_and_add neg_id (-n) let[@inline] neg_id () = neg_ids 1 let nat_id = Atomic.make Int.max_int let[@inline] nat_ids n = Atomic.fetch_and_add nat_id (-n) let[@inline] nat_id () = nat_ids 1 end module Action : sig type t val noop : t val append : (unit -> unit) -> t -> t val run : t -> 'a -> 'a (** Always call this last as user code may raise. *) end = struct type t = unit -> unit let noop = Fun.id let[@inline] append action t = if t == noop then action else fun x -> action (t x) let[@inline] run t x = t (); x end type awaiter = unit -> unit let[@inline] resume_awaiter awaiter = awaiter () let[@inline] resume_awaiters = function | [] -> () | [ awaiter ] -> resume_awaiter awaiter | awaiters -> List.iter resume_awaiter awaiters module Mode = struct type t = [ `Lock_free | `Obstruction_free ] end type 'a state = { mutable before : 'a; (** Keep [before] first (i.e. at index [0]). *) mutable after : 'a; (** Keep [after] second (i.e. at index [1]). *) mutable which : which; awaiters : awaiter list; } (** Tagged GADT for representing both the state of MCAS operations and of the transaction log or splay [tree]. Different subsets of this GADT are used in different contexts. See the [root], [tree], and [which] existentials. *) and _ tdt = | Before : [> `Before ] tdt (** The result has been determined to be the [before] value. Keep [Before] first (i.e. value [0] or [false]). *) | After : [> `After ] tdt (** The result has been determined to be the [after] value. Keep [After] second (i.e. value [1] or [true]). *) | Xt : { mutable rot : rot; (** [rot] is for Root or Tree. This field must be first, see [root_as_atomic] and [tree_as_ref]. *) timeout : [ `Set | `Unset ] Timeout.t; mutable mode : Mode.t; mutable validate_counter : int; mutable post_commit : Action.t; } -> [> `Xt ] tdt (** The result might not yet have been determined. The [root] either says which it is or points to the root of the transaction log or [tree]. Note that if/when local/stack allocation mode becomes available in OCaml, the transaction log should be mostly stack allocated. *) | Leaf : [> `Leaf ] tdt (** Leaf node in the transaction log or [tree]. *) | Node : { loc : 'a loc; state : 'a state; lt : tree; gt : tree; mutable awaiters : awaiter list; } -> [> `Node ] tdt (** Branch node in the transaction log or [tree] that specifies a single [CAS] or [CMP] operation. *) and root = R : [< `Before | `After | `Node ] tdt -> root [@@unboxed] and tree = T : [< `Leaf | `Node ] tdt -> tree [@@unboxed] and rot = U : [< `Before | `After | `Node | `Leaf ] tdt -> rot [@@unboxed] and which = W : [< `Before | `After | `Xt ] tdt -> which [@@unboxed] (* NOTE: You can adjust comment blocks below to select whether or not to use an unsafe cast to avoid a level of indirection due to [Atomic.t] and reduce the size of a location by two words (or more when padded). This has been seen to provide significant performance improvements. *) (**) and 'a loc = { mutable _state : 'a state; id : int } external as_atomic : 'a loc -> 'a state Atomic.t = "%identity" let[@inline] make_loc padded state id = let record = { _state = state; id } in if padded then Multicore_magic.copy_as_padded record else record (**) (* and 'a loc = { state : 'a state Atomic.t; id : int } let[@inline] as_atomic loc = loc.state let[@inline] make_loc padded state id = let atomic = Atomic.make state in let state = if padded then Multicore_magic.copy_as_padded atomic else atomic in let record = { state; id } in if padded then Multicore_magic.copy_as_padded record else record *) external root_as_atomic : [< `Xt ] tdt -> root Atomic.t = "%identity" external tree_as_ref : [< `Xt ] tdt -> tree ref = "%identity" let[@inline] is_node tree = tree != T Leaf let[@inline] is_cmp which state = state.which != W which let[@inline] is_cas which state = state.which == W which let () = assert (Before == Obj.magic false); assert (After == Obj.magic true) let[@inline] is_determined_after (status : [< `Before | `After ] tdt) : bool = (* This is the identity function. For some reason the OCaml 5.0 compiler is not able to optimize {[ match status with | Before -> false | After -> true ]} to the identity function. It should be possible and the compiler can do that in many cases. *) Obj.magic status type not_float = which let[@inline] get (state : 'a state) (index : bool) : 'a = (* Here we treat the [state] record as an array of non-float values. This allows accessing the value (i.e. [before] or [after]) without using branches. *) Obj.magic (Array.unsafe_get (Obj.magic state : not_float array) (Bool.to_int index)) let[@inline] isnt_int x = not (Obj.is_int (Obj.repr x)) let[@inline] clear_other (state : 'a state) status = (* Here we treat the [state] record as an array of non-float values. This allows accessing the value (i.e. [before] or [after]) without using branches. *) let i = 1 - Bool.to_int (is_determined_after status) in let state = (Obj.magic state : not_float array) in if isnt_int (Array.unsafe_get state i) then Array.unsafe_set state i (Obj.magic ()) let[@inline] is_determined = function | (Xt _ as xt : [< `Xt ] tdt) -> begin match fenceless_get (root_as_atomic xt) with | R (Node _) -> false | R After | R Before -> true end let[@inline] rec release_rec which status = function | T Leaf -> is_determined_after status | T (Node node_r) -> release which status (Node node_r) and release which status (Node node_r : [< `Node ] tdt) = release_rec which status node_r.lt |> ignore; let state = node_r.state in if is_cas which state then begin state.which <- W status; clear_other state status; resume_awaiters node_r.awaiters end; release_rec which status node_r.gt let[@inline] rec verify_rec which = function | T Leaf -> After | T (Node node_r) -> verify which (Node node_r) and verify which (Node node_r : [< `Node ] tdt) = let status = verify_rec which node_r.lt in if status == After then (* Fenceless is safe as [finish] has a fence after. *) if is_cmp which node_r.state && fenceless_get (as_atomic node_r.loc) != node_r.state then Before else verify_rec which node_r.gt else status let finish which root status = if Atomic.compare_and_set (root_as_atomic which) (R root) (R status) then release which status root else (* Fenceless is safe as we have a fence above. *) fenceless_get (root_as_atomic which) == R After let a_cmp = 1 let a_cas = 2 let a_cmp_followed_by_a_cas = 4 let[@inline] next_status a_cas_or_a_cmp status = let a_cmp_followed_by_a_cas = a_cas_or_a_cmp * 2 land (status * 4) in status lor a_cas_or_a_cmp lor a_cmp_followed_by_a_cas let[@inline] rec determine_rec which status = function | T Leaf -> status | T (Node node_r) -> determine which status (Node node_r) and determine which status (Node node_r : [< `Node ] tdt) = let status = determine_rec which status node_r.lt in if status < 0 then status else determine_eq Backoff.default which status (Node node_r) and determine_eq backoff which status (Node node_r as eq : [< `Node ] tdt) = let current = atomic_get (as_atomic node_r.loc) in let state = node_r.state in if state == current then begin let a_cas_or_a_cmp = 1 + Bool.to_int (is_cas which state) in if is_determined which then raise_notrace Exit; determine_rec which (next_status a_cas_or_a_cmp status) node_r.gt end else let matches_expected () = let current = match current.which with | W ((Before | After) as which) -> get current (is_determined_after which) | W (Xt _ as xt) -> get current (is_undetermined_after xt) in state.before == current in if is_cas which state && matches_expected () then begin if is_determined which then raise_notrace Exit; (* We now know that the operation wasn't finished when we read [current], but it is possible that the [loc]ation has been updated since then by some other domain helping us (or even by some later operation). If so, then the [compare_and_set] below fails. Copying the awaiters from [current] is safe in either case, because we know that we have the [current] state that our operation is interested in. By doing the copying here, we at most duplicate work already done by some other domain. However, it is necessary to do the copy before the [compare_and_set], because afterwards is too late as some other domain might finish the operation after the [compare_and_set] and miss the awaiters. *) if current.awaiters != [] then node_r.awaiters <- current.awaiters; if Atomic.compare_and_set (as_atomic node_r.loc) current state then determine_rec which (next_status a_cas status) node_r.gt else determine_eq (Backoff.once backoff) which status eq end else -1 and is_undetermined_after = function | (Xt _ as xt : [< `Xt ] tdt) -> begin (* Fenceless at most gives old root and causes extra work. *) match fenceless_get (root_as_atomic xt) with | R (Node node_r) -> begin let root = Node node_r in match determine xt 0 root with | status -> finish xt root (if a_cmp_followed_by_a_cas < status then verify xt root else if 0 <= status then After else Before) | exception Exit -> (* Fenceless is safe as there was a fence before. *) fenceless_get (root_as_atomic xt) == R After end | R Before -> false | R After -> true end let[@inline never] impossible () = failwith "impossible" let[@inline never] invalid_retry () = failwith "kcas: invalid use of retry" let[@inline] make_node loc state lt gt = T (Node { loc; state; lt; gt; awaiters = [] }) let rec splay ~hit_parent x = function | T Leaf -> (T Leaf, T Leaf, T Leaf) | T (Node { loc = a; state = s; lt = l; gt = r; _ }) as t -> if x < a.id && ((not hit_parent) || is_node l) then match l with | T Leaf -> (T Leaf, T Leaf, t) | T (Node { loc = pa; state = ps; lt = ll; gt = lr; _ }) -> if x < pa.id && ((not hit_parent) || is_node ll) then let lll, n, llr = splay ~hit_parent x ll in (lll, n, make_node pa ps llr (make_node a s lr r)) else if pa.id < x && ((not hit_parent) || is_node lr) then let lrl, n, lrr = splay ~hit_parent x lr in (make_node pa ps ll lrl, n, make_node a s lrr r) else (ll, l, make_node a s lr r) else if a.id < x && ((not hit_parent) || is_node r) then match r with | T Leaf -> (t, T Leaf, T Leaf) | T (Node { loc = pa; state = ps; lt = rl; gt = rr; _ }) -> if x < pa.id && ((not hit_parent) || is_node rl) then let rll, n, rlr = splay ~hit_parent x rl in (make_node a s l rll, n, make_node pa ps rlr rr) else if pa.id < x && ((not hit_parent) || is_node rr) then let rrl, n, rrr = splay ~hit_parent x rr in (make_node pa ps (make_node a s l rl) rrl, n, rrr) else (make_node a s l rl, r, rr) else (l, t, r) let[@inline] new_state after = { before = Obj.magic (); after; which = W After; awaiters = [] } let[@inline] eval state = match state.which with | W ((Before | After) as which) -> get state (is_determined_after which) | W (Xt _ as xt) -> get state (is_undetermined_after xt) module Retry = struct exception Later let[@inline never] later () = raise_notrace Later let[@inline] unless condition = if not condition then later () exception Invalid let[@inline never] invalid () = raise_notrace Invalid end let add_awaiter loc before awaiter = (* Fenceless is safe as we have fence after. *) let state_old = fenceless_get (as_atomic loc) in let state_new = let awaiters = awaiter :: state_old.awaiters in { before = Obj.magic (); after = before; which = W After; awaiters } in before == eval state_old && Atomic.compare_and_set (as_atomic loc) state_old state_new let[@tail_mod_cons] rec remove_first x' removed = function | [] -> removed := false; [] | x :: xs -> if x == x' then xs else x :: remove_first x' removed xs let rec remove_awaiter backoff loc before awaiter = (* Fenceless is safe as we have fence after. *) let state_old = fenceless_get (as_atomic loc) in if before == eval state_old then let removed = ref true in let awaiters = remove_first awaiter removed state_old.awaiters in if !removed then let state_new = { before = Obj.magic (); after = before; which = W After; awaiters } in if not (Atomic.compare_and_set (as_atomic loc) state_old state_new) then remove_awaiter (Backoff.once backoff) loc before awaiter let block timeout loc before = let t = Domain_local_await.prepare_for_await () in let alive = Timeout.await timeout t.release in if add_awaiter loc before t.release then begin try t.await () with cancellation_exn -> remove_awaiter Backoff.default loc before t.release; Timeout.cancel_alive alive; raise cancellation_exn end; Timeout.unawait timeout alive let rec update_no_alloc timeout backoff loc state f = (* Fenceless is safe as we have had a fence before if needed and there is a fence after. *) let state_old = fenceless_get (as_atomic loc) in let before = eval state_old in match f before with | after -> if before == after then begin Timeout.cancel timeout; before end else begin state.after <- after; if Atomic.compare_and_set (as_atomic loc) state_old state then begin resume_awaiters state_old.awaiters; Timeout.cancel timeout; before end else update_no_alloc timeout (Backoff.once backoff) loc state f end | exception Retry.Later -> block timeout loc before; update_no_alloc timeout backoff loc state f | exception exn -> Timeout.cancel timeout; raise exn let update_with_state timeout backoff loc f state_old = let before = eval state_old in match f before with | after -> if before == after then begin Timeout.cancel timeout; before end else let state = new_state after in if Atomic.compare_and_set (as_atomic loc) state_old state then begin resume_awaiters state_old.awaiters; Timeout.cancel timeout; before end else update_no_alloc timeout (Backoff.once backoff) loc state f | exception Retry.Later -> let state = new_state before in block timeout loc before; update_no_alloc timeout backoff loc state f | exception exn -> Timeout.cancel timeout; raise exn let rec exchange_no_alloc backoff loc state = let state_old = atomic_get (as_atomic loc) in let before = eval state_old in if before == state.after then before else if Atomic.compare_and_set (as_atomic loc) state_old state then begin resume_awaiters state_old.awaiters; before end else exchange_no_alloc (Backoff.once backoff) loc state let[@inline] rec cas_with_state backoff loc before state state_old = before == eval state_old && (before == state.after || if Atomic.compare_and_set (as_atomic loc) state_old state then begin resume_awaiters state_old.awaiters; true end else (* We must retry, because compare is by value rather than by state. In other words, we should not fail spuriously due to some other thread having installed or removed a waiter. Fenceless is safe as there was a fence before. *) cas_with_state (Backoff.once backoff) loc before state (fenceless_get (as_atomic loc))) let inc x = x + 1 let dec x = x - 1 module Loc = struct type !'a t = private Loc : { state : 'state; id : 'id } -> 'a t external of_loc : 'a loc -> 'a t = "%identity" external to_loc : 'a t -> 'a loc = "%identity" let make ?(padded = false) ?(mode = `Obstruction_free) after = let state = new_state after and id = if mode == `Obstruction_free then Id.nat_id () else Id.neg_id () in make_loc padded state id |> of_loc let make_contended ?mode after = make ~padded:true ?mode after let make_array ?(padded = false) ?(mode = `Obstruction_free) n after = assert (0 <= n); let state = new_state after and id = (if mode == `Obstruction_free then Id.nat_ids n else Id.neg_ids n) - (n - 1) in Array.init n @@ fun i -> make_loc padded state (id + i) |> of_loc let[@inline] get_id loc = (to_loc loc).id let get loc = eval (atomic_get (as_atomic (to_loc loc))) let rec get_as timeout f loc state = let before = eval state in match f before with | value -> Timeout.cancel timeout; value | exception Retry.Later -> block timeout (to_loc loc) before; (* Fenceless is safe as there was already a fence before. *) get_as timeout f loc (fenceless_get (as_atomic (to_loc loc))) | exception exn -> Timeout.cancel timeout; raise exn let[@inline] get_as ?timeoutf f loc = get_as (Timeout.alloc_opt timeoutf) f loc (atomic_get (as_atomic (to_loc loc))) let[@inline] get_mode loc = if (to_loc loc).id < 0 then `Lock_free else `Obstruction_free let compare_and_set ?(backoff = Backoff.default) loc before after = let state = new_state after in let state_old = atomic_get (as_atomic (to_loc loc)) in cas_with_state backoff (to_loc loc) before state state_old let fenceless_update ?timeoutf ?(backoff = Backoff.default) loc f = let timeout = Timeout.alloc_opt timeoutf in update_with_state timeout backoff (to_loc loc) f (fenceless_get (as_atomic (to_loc loc))) let[@inline] fenceless_modify ?timeoutf ?backoff loc f = fenceless_update ?timeoutf ?backoff loc f |> ignore let update ?timeoutf ?(backoff = Backoff.default) loc f = let timeout = Timeout.alloc_opt timeoutf in update_with_state timeout backoff (to_loc loc) f (atomic_get (as_atomic (to_loc loc))) let[@inline] modify ?timeoutf ?backoff loc f = update ?timeoutf ?backoff loc f |> ignore let exchange ?(backoff = Backoff.default) loc value = exchange_no_alloc backoff (to_loc loc) (new_state value) let set ?backoff loc value = exchange ?backoff loc value |> ignore let fetch_and_add ?backoff loc n = if n = 0 then get loc else (* Fenceless is safe as we always update. *) fenceless_update ?backoff loc (( + ) n) let incr ?backoff loc = (* Fenceless is safe as we always update. *) fenceless_update ?backoff loc inc |> ignore let decr ?backoff loc = (* Fenceless is safe as we always update. *) fenceless_update ?backoff loc dec |> ignore let has_awaiters loc = let state = atomic_get (as_atomic (to_loc loc)) in state.awaiters != [] let fenceless_get loc = eval (fenceless_get (as_atomic (to_loc loc))) end module Xt = struct type 'x t = [ `Xt ] tdt let[@inline] validate_one which loc state = let before = if is_cmp which state then eval state else state.before in (* Fenceless is safe inside transactions as each log update has a fence. *) if before != eval (fenceless_get (as_atomic loc)) then Retry.invalid () let[@inline] rec validate_all_rec which = function | T Leaf -> () | T (Node node_r) -> validate_all which (Node node_r) and validate_all which (Node node_r : [< `Node ] tdt) = validate_all_rec which node_r.lt; validate_one which node_r.loc node_r.state; validate_all_rec which node_r.gt let[@inline] is_obstruction_free (Xt xt_r : _ t) loc = (* Fenceless is safe as we are accessing a private location. *) xt_r.mode == `Obstruction_free && 0 <= loc.id type (_, _) up = | Compare_and_swap : ('a * 'a, 'a) up | Fetch_and_add : (int, int) up | Fn : ('a -> 'a, 'a) up | Exchange : ('a, 'a) up | Get : (unit, 'a) up let update_new : type c a. _ -> a loc -> c -> (c, a) up -> _ -> _ -> a = fun xt loc c up lt gt -> let state = fenceless_get (as_atomic loc) in let before = eval state in let after : a = match up with | Compare_and_swap -> if fst c == before then snd c else before | Fetch_and_add -> before + c | Fn -> begin let rot = !(tree_as_ref xt) in match c before with | after -> assert (rot == !(tree_as_ref xt)); after | exception exn -> assert (rot == !(tree_as_ref xt)); tree_as_ref xt := T (Node { loc; state; lt; gt; awaiters = [] }); raise exn end | Exchange -> c | Get -> before in let state = if before == after && is_obstruction_free xt loc then state else { before; after; which = W xt; awaiters = [] } in tree_as_ref xt := T (Node { loc; state; lt; gt; awaiters = [] }); before let update_old : type c a. _ -> a loc -> c -> (c, a) up -> _ -> _ -> _ -> a = fun (Xt xt_r as xt : _ t) loc c up lt gt state' -> let c0 = xt_r.validate_counter in let c1 = c0 + 1 in xt_r.validate_counter <- c1; (* Validate whenever counter reaches next power of 2. The assumption is that potentially infinite loops will repeatedly access the same locations. *) if c0 land c1 = 0 then begin Timeout.check xt_r.timeout; validate_all_rec xt !(tree_as_ref xt) end; let state : a state = Obj.magic state' in if is_cmp xt state then begin let current = eval state in let after : a = match up with | Compare_and_swap -> if fst c == current then snd c else current | Fetch_and_add -> current + c | Fn -> let rot = !(tree_as_ref xt) in let after = c current in assert (rot == !(tree_as_ref xt)); after | Exchange -> c | Get -> current in let state = if current == after then state else { before = current; after; which = W xt; awaiters = [] } in tree_as_ref xt := T (Node { loc; state; lt; gt; awaiters = [] }); current end else let current = state.after in let after : a = match up with | Compare_and_swap -> if fst c == current then snd c else current | Fetch_and_add -> current + c | Fn -> let rot = !(tree_as_ref xt) in let after = c current in assert (rot == !(tree_as_ref xt)); after | Exchange -> c | Get -> current in let state = if current == after then state else { before = state.before; after; which = W xt; awaiters = [] } in tree_as_ref xt := T (Node { loc; state; lt; gt; awaiters = [] }); current let update_as ~xt loc c up = let loc = Loc.to_loc loc in let x = loc.id in match !(tree_as_ref xt) with | T Leaf -> update_new xt loc c up (T Leaf) (T Leaf) | T (Node { loc = a; lt = T Leaf; _ }) as tree when x < a.id -> update_new xt loc c up (T Leaf) tree | T (Node { loc = a; gt = T Leaf; _ }) as tree when a.id < x -> update_new xt loc c up tree (T Leaf) | T (Node { loc = a; state; lt; gt; _ }) when Obj.magic a == loc -> update_old xt loc c up lt gt state | tree -> begin match splay ~hit_parent:false x tree with | l, T Leaf, r -> update_new xt loc c up l r | l, T (Node node_r), r -> update_old xt loc c up l r node_r.state end let get ~xt loc = update_as ~xt loc () Get let set ~xt loc after = update_as ~xt loc after Exchange |> ignore let modify ~xt loc f = update_as ~xt loc f Fn |> ignore let compare_and_swap ~xt loc before after = update_as ~xt loc (before, after) Compare_and_swap let compare_and_set ~xt loc before after = compare_and_swap ~xt loc before after == before let exchange ~xt loc after = update_as ~xt loc after Exchange let fetch_and_add ~xt loc n = update_as ~xt loc n Fetch_and_add let incr ~xt loc = update_as ~xt loc 1 Fetch_and_add |> ignore let decr ~xt loc = update_as ~xt loc (-1) Fetch_and_add |> ignore let update ~xt loc f = update_as ~xt loc f Fn let swap ~xt l1 l2 = set ~xt l1 @@ exchange ~xt l2 @@ get ~xt l1 let[@inline] to_blocking ~xt tx = match tx ~xt with None -> Retry.later () | Some value -> value let[@inline] to_nonblocking ~xt tx = match tx ~xt with value -> Some value | exception Retry.Later -> None let post_commit ~xt:(Xt xt_r : _ t) action = xt_r.post_commit <- Action.append action xt_r.post_commit type _ op = Validate : unit op | Is_in_log : bool op let do_op : type r. xt:'x t -> 'a Loc.t -> r op -> r = fun ~xt loc op -> let loc = Loc.to_loc loc in let x = loc.id in match !(tree_as_ref xt) with | T Leaf -> begin match op with Validate -> () | Is_in_log -> false end | T (Node { loc = a; lt = T Leaf; _ }) when x < a.id -> begin match op with Validate -> () | Is_in_log -> false end | T (Node { loc = a; gt = T Leaf; _ }) when a.id < x -> begin match op with Validate -> () | Is_in_log -> false end | T (Node { loc = a; state; _ }) when Obj.magic a == loc -> begin match op with Validate -> validate_one xt a state | Is_in_log -> true end | tree -> begin match splay ~hit_parent:true x tree with | lt, T (Node node_r), gt -> begin tree_as_ref xt := T (Node { node_r with lt; gt; awaiters = [] }); match op with | Validate -> if Obj.magic node_r.loc == loc then validate_one xt node_r.loc node_r.state | Is_in_log -> Obj.magic node_r.loc == loc end | _, T Leaf, _ -> impossible () end let[@inline] validate ~xt loc = do_op ~xt loc Validate let[@inline] is_in_log ~xt loc = do_op ~xt loc Is_in_log let rec rollback which tree_snap tree = if tree_snap == tree then tree else match tree with | T Leaf -> T Leaf | T (Node node_r) -> begin match splay ~hit_parent:false node_r.loc.id tree_snap with | lt_mark, T Leaf, gt_mark -> let lt = rollback which lt_mark node_r.lt and gt = rollback which gt_mark node_r.gt in let state = let state = node_r.state in if is_cmp which state then state else (* Fenceless is safe inside transactions as each log update has a fence. *) let current = fenceless_get (as_atomic node_r.loc) in if state.before != eval current then Retry.invalid () else current in T (Node { loc = node_r.loc; state; lt; gt; awaiters = [] }) | lt_mark, T (Node inner_node_r), gt_mark -> let lt = rollback which lt_mark node_r.lt and gt = rollback which gt_mark node_r.gt in T (Node { inner_node_r with lt; gt; awaiters = [] }) end type 'x snap = tree * Action.t let snapshot ~xt:(Xt xt_r as xt : _ t) = (!(tree_as_ref xt), xt_r.post_commit) let rollback ~xt:(Xt xt_r as xt : _ t) (snap, post_commit) = tree_as_ref xt := rollback xt snap !(tree_as_ref xt); xt_r.post_commit <- post_commit let rec first ~xt tx = function | [] -> tx ~xt | tx' :: txs -> begin match tx ~xt with | value -> value | exception Retry.Later -> first ~xt tx' txs end let first ~xt = function | [] -> Retry.later () | tx :: txs -> first ~xt tx txs type 'a tx = { tx : 'x. xt:'x t -> 'a } [@@unboxed] let[@inline] call ~xt { tx } = tx ~xt let[@inline] rec add_awaiters_rec awaiter which = function | T Leaf -> T Leaf | T (Node node_r) -> add_awaiters awaiter which (Node node_r) and add_awaiters awaiter which (Node node_r as stop : [< `Node ] tdt) = match add_awaiters_rec awaiter which node_r.lt with | T Leaf -> if add_awaiter node_r.loc (let state = node_r.state in if is_cmp which state then eval state else state.before) awaiter then add_awaiters_rec awaiter which node_r.gt else T stop | T (Node _) as stop -> stop let[@inline] rec remove_awaiters_rec awaiter which stop = function | T Leaf -> T Leaf | T (Node node_r) -> remove_awaiters awaiter which stop (Node node_r) and remove_awaiters awaiter which stop (Node node_r as at : [< `Node ] tdt) = match remove_awaiters_rec awaiter which stop node_r.lt with | T Leaf -> if T at != stop then begin remove_awaiter Backoff.default node_r.loc (let state = node_r.state in if is_cmp which state then eval state else state.before) awaiter; remove_awaiters_rec awaiter which stop node_r.gt end else stop | T (Node _) as stop -> stop let initial_validate_period = 4 let success (Xt xt_r : _ t) result = Timeout.cancel xt_r.timeout; Action.run xt_r.post_commit result let rec commit backoff (Xt xt_r as xt : _ t) tx = match tx ~xt with | result -> begin match !(tree_as_ref xt) with | T Leaf -> success xt result | T (Node { loc; state; lt = T Leaf; gt = T Leaf; _ }) -> if is_cmp xt state then success xt result else begin state.which <- W After; let before = state.before in if isnt_int before then state.before <- Obj.magic (); (* Fenceless is safe inside transactions as each log update has a fence. *) let state_old = fenceless_get (as_atomic loc) in if cas_with_state Backoff.default loc before state state_old then success xt result else commit_once_reuse backoff xt tx end | T (Node node_r) -> begin let root = Node node_r in match determine xt 0 root with | status -> if a_cmp_followed_by_a_cas < status then begin if finish xt root (verify xt root) then success xt result else begin (* We switch to [`Lock_free] as there was interference. *) commit_once_alloc backoff `Lock_free xt tx end end else if a_cmp = status || finish xt root (if 0 <= status then After else Before) then success xt result else commit_once_alloc backoff xt_r.mode xt tx | exception Exit -> (* Fenceless is safe as there was a fence before. *) if fenceless_get (root_as_atomic xt) == R After then success xt result else commit_once_alloc backoff xt_r.mode xt tx end end | exception Retry.Invalid -> commit_once_reuse backoff xt tx | exception Retry.Later -> begin match !(tree_as_ref xt) with | T Leaf -> invalid_retry () | T (Node node_r) -> begin let root = Node node_r in let t = Domain_local_await.prepare_for_await () in let alive = Timeout.await xt_r.timeout t.release in match add_awaiters t.release xt root with | T Leaf -> begin match t.await () with | () -> remove_awaiters t.release xt (T Leaf) root |> ignore; Timeout.unawait xt_r.timeout alive; commit_reset_reuse backoff xt tx | exception cancellation_exn -> remove_awaiters t.release xt (T Leaf) root |> ignore; Timeout.cancel_alive alive; raise cancellation_exn end | T (Node _) as stop -> remove_awaiters t.release xt stop root |> ignore; Timeout.unawait xt_r.timeout alive; commit_once_reuse backoff xt tx end end | exception exn -> Timeout.cancel xt_r.timeout; raise exn and commit_once_reuse backoff xt tx = commit_reuse (Backoff.once backoff) xt tx and commit_reset_reuse backoff xt tx = commit_reuse (Backoff.reset backoff) xt tx and commit_reuse backoff (Xt xt_r as xt : _ t) tx = tree_as_ref xt := T Leaf; xt_r.validate_counter <- initial_validate_period; xt_r.post_commit <- Action.noop; Timeout.check xt_r.timeout; commit backoff xt tx and commit_once_alloc backoff mode (Xt xt_r : _ t) tx = let backoff = Backoff.once backoff in Timeout.check xt_r.timeout; let rot = U Leaf in let validate_counter = initial_validate_period in let post_commit = Action.noop in let xt = Xt { xt_r with rot; mode; validate_counter; post_commit } in commit backoff xt tx let[@inline] commit ?timeoutf ?(backoff = Backoff.default) ?(mode = `Obstruction_free) { tx } = let timeout = Timeout.alloc_opt timeoutf and rot = U Leaf and validate_counter = initial_validate_period and post_commit = Action.noop in let xt = Xt { rot; timeout; mode; validate_counter; post_commit } in commit backoff xt tx end kcas-0.7.0/src/kcas/kcas.mli000066400000000000000000000566061456672623200156250ustar00rootroot00000000000000(** This library provides a software transactional memory (STM) implementation based on an atomic {{:https://en.wikipedia.org/wiki/Non-blocking_algorithm#Lock-freedom} lock-free} multi-word {{:https://en.wikipedia.org/wiki/Compare-and-swap} compare-and-set} (MCAS) algorithm enhanced with read-only compare operations and ability to block awaiting for changes. Features and properties: - {b Efficient}: In the common uncontended case only [k + 1] single-word CASes are required per [k]-CAS and, as a special case, [1]-CAS requires only a single single-word CAS. - {b Lock-free}: The underlying algorithm guarantees that at least one operation will be able to make progress. - {b Disjoint-access parallel}: Unrelated operations progress independently, without interference, even if they occur at the same time. - {b Read-only compares}: The algorithm supports {{:https://en.wikipedia.org/wiki/Non-blocking_algorithm#Obstruction-freedom} obstruction-free} read-only compare (CMP) operations that can be performed on overlapping locations in parallel without interference. - {b Blocking await}: The algorithm supports timeouts and awaiting for changes to any number of shared memory locations. - {b Composable}: Independently developed transactions can be composed with ease sequentially, conjunctively, conditionally, and disjunctively. In other words, performance should be acceptable and scalable for many use cases, the non-blocking properties should allow use in many contexts including those where locks are not acceptable, and the features provided should support most practical needs. {1 A quick tour} Let's first open the library for convenience: {[ open Kcas ]} To use the library one creates shared memory locations: {[ # let a = Loc.make 0 and b = Loc.make 0 and x = Loc.make 0 val a : int Loc.t = Kcas.Loc.Loc {Kcas.Loc.state = ; id = } val b : int Loc.t = Kcas.Loc.Loc {Kcas.Loc.state = ; id = } val x : int Loc.t = Kcas.Loc.Loc {Kcas.Loc.state = ; id = } ]} One can then manipulate the locations individually: {[ # Loc.set a 10 - : unit = () # Loc.get a - : int = 10 # Loc.compare_and_set b 0 52 - : bool = true # Loc.get b - : int = 52 ]} Block waiting for changes to locations: {[ # let a_domain = Domain.spawn @@ fun () -> let x = Loc.get_as (fun x -> Retry.unless (x <> 0); x) x in Printf.sprintf "The answer is %d!" x val a_domain : string Domain.t = ]} Perform transactions over locations: {[ # let tx ~xt = let a = Xt.get ~xt a and b = Xt.get ~xt b in Xt.set ~xt x (b - a) in Xt.commit { tx } - : unit = () ]} And now we have it: {[ # Domain.join a_domain - : string = "The answer is 42!" ]} The main repository includes a longer introduction with many examples and discussion of more advanced topics for designing lock-free algorithms. *) (** {1 Auxiliary modules} The modules in this section serve auxiliary purposes. On a first read you can skip over these. The documentation links back to these modules where appropriate. *) (** Timeout support. *) module Timeout : sig exception Timeout (** Exception that may be raised by operations such as {!Loc.get_as}, {!Loc.update}, {!Loc.modify}, or {!Xt.commit} when given a [~timeoutf] in seconds. *) end (** Retry support. *) module Retry : sig exception Later (** Exception that may be raised to signal that the operation, such as {!Loc.get_as}, {!Loc.update}, or {!Xt.commit}, should be retried, at some point in the future, after the examined shared memory location or locations have changed. {b NOTE}: It is important to understand that "{i after}" may effectively mean "{i immediately}", because it may be the case that the examined shared memory locations have already changed. *) val later : unit -> 'a (** [later ()] is equivalent to [raise Later]. *) val unless : bool -> unit (** [unless condition] is equivalent to [if not condition then later ()]. *) exception Invalid (** Exception that may be raised to signal that the transaction log is no longer valid, e.g. because shared memory locations have been changed outside of the transaction, and the transaction should be retried. *) val invalid : unit -> 'a (** [invalid ()] is equivalent to [raise Invalid]. *) end (** Operating modes of the [k-CAS-n-CMP] algorithm. *) module Mode : sig type t = [ `Lock_free (** In [`Lock_free] mode the algorithm makes sure that at least one domain will be able to make progress at the cost of performing read-only operations as read-write operations. *) | `Obstruction_free (** In [`Obstruction_free] mode the algorithm proceeds optimistically and allows read-only operations to fail due to interference from other domains that might have been prevented in the [`Lock_free] mode. *) ] (** Type of an operating mode of the [k-CAS-n-CMP] algorithm. *) end (** {1 Individual locations} Individual shared memory locations can be created and manipulated through the {!Loc} module that is essentially compatible with the [Stdlib.Atomic] module except that some of the operations take additional optional arguments: - [backoff] specifies the configuration for the [Backoff] mechanism. In special cases, having more detailed knowledge of the application, one might adjust the configuration to improve performance. - [timeoutf] specifies a timeout in seconds and, if specified, the {!Timeout.Timeout} exception may be raised by the operation to signal that the timeout expired. *) (** Shared memory locations. This module is essentially compatible with the [Stdlib.Atomic] module, except that a number of functions take some optional arguments that one usually need not worry about. *) module Loc : sig (** Type of shared memory locations. *) type !'a t = private | Loc : { state : 'state; id : 'id } -> 'a t (** The shape is transparent to allow the compiler to perform optimizations on array accesses. User code should treat this type as abstract. *) val make : ?padded:bool -> ?mode:Mode.t -> 'a -> 'a t (** [make initial] creates a new shared memory location with the [initial] value. The optional [padded] argument defaults to [false]. If explicitly specified as [~padded:true] the location will be allocated in a way to avoid false sharing. For relatively long lived shared memory locations this can improve performance and make performance more stable at the cost of using more memory. It is not recommended to use [~padded:true] for short lived shared memory locations. The optional {{!Mode.t} [mode]} argument defaults to [`Obstruction_free]. If explicitly specified as [`Lock_free], the location will always be accessed using the lock-free operating mode. This may improve performance in rare cases where a location is updated frequently and obstruction-free read-only accesses would almost certainly suffer from interference. Locations are allocated such that accessing the locations inside a transaction in allocation order from oldest to youngest is as fast as possible. *) val make_contended : ?mode:Mode.t -> 'a -> 'a t (** [make_contended initial] is equivalent to [make ~padded:true initial]. *) val make_array : ?padded:bool -> ?mode:Mode.t -> int -> 'a -> 'a t array (** [make_array n initial] creates an array of [n] new shared memory locations with the [initial] value. The locations are allocated in such an order that accessing the locations in the array inside a transaction in order from the highest index to the lowest index is as fast as possible. *) val get_mode : 'a t -> Mode.t (** [get_mode r] returns the operating mode of the shared memory location [r]. *) val get_id : 'a t -> int (** [get_id r] returns the unique id of the shared memory location [r]. *) val get : 'a t -> 'a (** [get r] reads the current value of the shared memory location [r]. *) val get_as : ?timeoutf:float -> ('a -> 'b) -> 'a t -> 'b (** [get_as f loc] is equivalent to [f (get loc)]. The given function [f] may raise the {!Retry.Later} exception to signal that the conditional load should be retried only after the location has been modified outside of the conditional load. It is also safe for the given function [f] to raise any other exception to abort the conditional load. *) val compare_and_set : ?backoff:Backoff.t -> 'a t -> 'a -> 'a -> bool (** [compare_and_set r before after] atomically updates the shared memory location [r] to the [after] value if the current value of [r] is the [before] value. *) val update : ?timeoutf:float -> ?backoff:Backoff.t -> 'a t -> ('a -> 'a) -> 'a (** [update r f] repeats [let b = get r in compare_and_set r b (f b)] until it succeeds and then returns the [b] value. The given function [f] may raise the {!Retry.Later} exception to signal that the update should only be retried after the location has been modified outside of the update. It is also safe for the given function [f] to raise any other exception to abort the update. *) val modify : ?timeoutf:float -> ?backoff:Backoff.t -> 'a t -> ('a -> 'a) -> unit (** [modify r f] is equivalent to [update r f |> ignore]. *) val exchange : ?backoff:Backoff.t -> 'a t -> 'a -> 'a (** [exchange r after] atomically updates the shared memory location [r] to the [after] value and returns the current value (before the exchange). *) val set : ?backoff:Backoff.t -> 'a t -> 'a -> unit (** [set r after] atomically updates the shared memory location [r] to the [after] value. *) val fetch_and_add : ?backoff:Backoff.t -> int t -> int -> int (** [fetch_and_add r n] atomically increments the value of [r] by [n], and returns the current value (before the increment). *) val incr : ?backoff:Backoff.t -> int t -> unit (** [incr r] atomically increments [r]. *) val decr : ?backoff:Backoff.t -> int t -> unit (** [decr r] atomically decrements [r]. *) (**/**) val has_awaiters : 'a t -> bool (** [has_awaiters r] determines whether the shared memory location [r] has awaiters. *) val fenceless_get : 'a t -> 'a (** [fenceless_get r] is like [get r] except that [fenceless_get]s may be reordered. *) val fenceless_update : ?timeoutf:float -> ?backoff:Backoff.t -> 'a t -> ('a -> 'a) -> 'a (** [fenceless_update r f] is like [update r f] except that in case [f x == x] the update may be reordered. *) val fenceless_modify : ?timeoutf:float -> ?backoff:Backoff.t -> 'a t -> ('a -> 'a) -> unit (** [fenceless_modify r f] is like [modify r f] except that in case [f x == x] the modify may be reordered. *) end (** {1 Manipulating multiple locations atomically} Multiple shared memory locations can be manipulated atomically using the {!Xt} module to explicitly pass a transaction log to record accesses. Atomic operations over multiple shared memory locations are performed in two or three phases: 1. The first phase essentially records a list or log of operations to access shared memory locations. The first phase involves code you write as a user of the library. Aside from some advanced techniques, shared memory locations are not mutated during this phase. 2. The second phase attempts to perform the operations atomically. This is done internally by the library implementation. Only logically invisible writes to shared memory locations are performed during this phase. 3. In [`Obstruction_free] {{!Mode.t} mode} a third phase verifies all read-only operations. This is also done internally by the library implementation. Each phase may fail. In particular, in the first phase, as no changes to shared memory have yet been attempted, it is safe, for example, to raise exceptions to signal failure. Failure on the third phase is automatically handled by {!Xt.commit}. Only after all phases have completed succesfully, the writes to shared memory locations are atomically marked as having taken effect and subsequent reads of the locations will be able to see the newly written values. *) (** Explicit transaction log passing on shared memory locations. This module provides a way to implement composable transactions over shared memory locations. A transaction is a function written by the library user and can be thought of as a specification of a sequence of {!Xt.get} and {!Xt.set} accesses to shared memory locations. To actually perform the accesses one then {!Xt.commit}s the transaction. Transactions should generally not perform arbitrary side-effects, because when a transaction is committed it may be attempted multiple times meaning that the side-effects are also performed multiple times. {!Xt.post_commit} can be used to perform an action only once after the transaction has been committed succesfully. {b WARNING}: To make it clear, the operations provided by the {!Loc} module for accessing individual shared memory locations do not implicitly go through the transaction mechanism and should generally not be used within transactions. There are advanced algorithms where one might, within a transaction, perform operations that do not get recorded into the transaction log. Using such techniques correctly requires expert knowledge and is not recommended for casual users. As an example, consider an implementation of doubly-linked circular lists. Instead of using a mutable field, [ref], or [Atomic.t], one would use a shared memory location, or {!Loc.t}, for the pointers in the node type: {[ type 'a node = { succ: 'a node Loc.t; pred: 'a node Loc.t; datum: 'a; } ]} To remove a node safely one wants to atomically update the [succ] and [pred] pointers of the predecessor and successor nodes and to also update the [succ] and [pred] pointers of a node to point to the node itself, so that removal becomes an {{:https://en.wikipedia.org/wiki/Idempotence} idempotent} operation. Using explicit transaction log passing one could implement the [remove] operation as follows: {[ let remove ~xt node = (* Read pointers to the predecessor and successor nodes: *) let pred = Xt.get ~xt node.pred in let succ = Xt.get ~xt node.succ in (* Update pointers in this node: *) Xt.set ~xt node.succ node; Xt.set ~xt node.pred node; (* Update pointers to this node: *) Xt.set ~xt pred.succ succ; Xt.set ~xt succ.pred pred ]} The labeled argument, [~xt], refers to the transaction log. Transactional operations like {!Xt.get} and {!Xt.set} are then recorded in that log. To actually remove a node, we need to commit the transaction {@ocaml skip[ Xt.commit { tx = remove node } ]} which repeatedly calls the transaction function, [tx], to record a transaction log and attempts to atomically perform it until it succeeds. Notice that [remove] is not recursive. It doesn't have to account for failure or perform a backoff. It is also not necessary to know or keep track of what the previous values of locations were. All of that is taken care of for us by the transaction log and the {!Xt.commit} function. Furthermore, [remove] can easily be called as a part of a more complex transaction. *) module Xt : sig type 'x t (** Type of an explicit transaction log on shared memory locations. Note that a transaction log itself is not safe against concurrent or parallel use and should generally only be used by a single thread of execution. If a new thread of execution is spawned inside a function recording shared memory accesses to a log and the new thread of execution also records accesses to the log it may become inconsistent. *) (** {1 Recording accesses} Accesses of shared memory locations using an explicit transaction log first ensure that the initial value of the shared memory location is recorded in the log and then act on the current value of the shared memory location as recorded in the log. It is important to understand that it is possible for a transaction to observe the contents of two (or more) different shared memory locations from two (or more) different committed updates. This means that invariants that hold between two (or more) different shared memory locations may be seen as broken inside the transaction function. However, it is not possible for the transaction attempt to succeed after it has seen such an inconsistent view of the shared memory locations. To mitigate potential issues due to this read skew anomaly and due to very long running transactions, all of the access recording operations in this section periodically validate the entire transaction log when a previously accessed location is accessed again. An important guideline for writing transactions is that loops inside a transaction should always include an access of some shared memory location through the transaction log or should otherwise be guaranteed to be bounded. *) val get : xt:'x t -> 'a Loc.t -> 'a (** [get ~xt r] returns the current value of the shared memory location [r] in the explicit transaction log [xt]. *) val set : xt:'x t -> 'a Loc.t -> 'a -> unit (** [set ~xt r v] records the current value of the shared memory location [r] to be the given value [v] in the explicit transaction log [xt]. *) val update : xt:'x t -> 'a Loc.t -> ('a -> 'a) -> 'a (** [update ~xt r f] is equivalent to [let x = get ~xt r in set ~xt r (f x); x] with the limitation that [f] must not and is not allowed to access the transaction log. *) val modify : xt:'x t -> 'a Loc.t -> ('a -> 'a) -> unit (** [modify ~xt r f] is equivalent to [let x = get ~xt r in set ~xt r (f x)] with the limitation that [f] must not and is not allowed to access the transaction log. *) val exchange : xt:'x t -> 'a Loc.t -> 'a -> 'a (** [exchange ~xt r v] is equivalent to [update ~xt r (fun _ -> v)]. *) val swap : xt:'x t -> 'a Loc.t -> 'a Loc.t -> unit (** [swap ~xt l1 l2] is equivalent to [set ~xt l1 @@ exchange ~xt l2 @@ get ~xt l1]. *) val compare_and_set : xt:'x t -> 'a Loc.t -> 'a -> 'a -> bool (** [compare_and_set ~xt r before after] is equivalent to [compare_and_swap ~xt r before after == before]. *) val compare_and_swap : xt:'x t -> 'a Loc.t -> 'a -> 'a -> 'a (** [compare_and_swap ~xt r before after] is equivalent to {@ocaml skip[ update ~xt r @@ fun actual -> if actual == before then after else actual ]} *) val fetch_and_add : xt:'x t -> int Loc.t -> int -> int (** [fetch_and_add ~xt r n] is equivalent to [update ~xt r ((+) n)]. *) val incr : xt:'x t -> int Loc.t -> unit (** [incr ~xt r] is equivalent to [fetch_and_add ~xt r 1 |> ignore]. *) val decr : xt:'x t -> int Loc.t -> unit (** [decr ~xt r] is equivalent to [fetch_and_add ~xt r (-1) |> ignore]. *) (** {1 Blocking} *) val to_blocking : xt:'x t -> (xt:'x t -> 'a option) -> 'a (** [to_blocking ~xt tx] converts the non-blocking transaction [tx] to a blocking transaction by retrying on [None]. *) val to_nonblocking : xt:'x t -> (xt:'x t -> 'a) -> 'a option (** [to_nonblocking ~xt tx] converts the blocking transaction [tx] to a non-blocking transaction by returning [None] on retry. *) (** {1 Nested transactions} The transaction mechanism does not implicitly rollback changes recorded in the transaction log. Using {!snapshot} and {!rollback} it is possible to implement nested conditional transactions that may tentatively record changes in the transaction log and then later discard those changes. *) type 'x snap (** Type of a {!snapshot} of a transaction log. *) val snapshot : xt:'x t -> 'x snap (** [snapshot ~xt] returns a snapshot of the transaction log. Taking a snapshot is a fast constant time [O(1)] operation. *) val rollback : xt:'x t -> 'x snap -> unit (** [rollback ~xt snap] discards any changes of shared memory locations recorded in the transaction log after the [snap] was taken by {!snapshot}. Performing a rollback is potentially as expensive as linear time [O(n)] in the number of locations accessed, but, depending on the exact access patterns, may also be performed more quickly. The implementation is optimized with the assumption that a rollback is performed at most once per snapshot. {b NOTE}: Only changes are discarded. Any location newly accessed after the snapshot was taken will remain recorded in the log as a read-only entry. *) val first : xt:'x t -> (xt:'x t -> 'a) list -> 'a (** [first ~xt txs] calls each transaction in the given list in turn and either returns the value returned by the first transaction in the list or raises {!Retry.Later} in case all of the transactions raised {!Retry.Later}. {b NOTE}: [first] does not automatically rollback changes made by the transactions. *) (** {1 Post commit actions} *) val post_commit : xt:'x t -> (unit -> unit) -> unit (** [post_commit ~xt action] adds the [action] to be performed after the transaction has been committed successfully. *) (** {1 Validation} *) val validate : xt:'x t -> 'a Loc.t -> unit (** [validate ~xt r] determines whether the shared memory location [r] has been modified outside of the transaction and raises {!Retry.Invalid} in case it has. Due to the possibility of read skew, in cases where some important invariant should hold between two or more different shared memory locations, one may explicitly validate the locations, after reading all of them, to ensure that no read skew is possible. *) (** {1 Advanced} *) val is_in_log : xt:'x t -> 'a Loc.t -> bool (** [is_in_log ~xt r] determines whether the shared memory location [r] has been accessed by the transaction. *) (** {1 Performing accesses} *) type 'a tx = { tx : 'x. xt:'x t -> 'a } [@@unboxed] (** Type of a transaction function that is polymorphic with respect to an explicit transaction log. The universal quantification helps to ensure that the transaction log cannot accidentally escape. *) val call : xt:'x t -> 'a tx -> 'a (** [call ~xt tx] is equivalent to [tx.Xt.tx ~xt]. *) val commit : ?timeoutf:float -> ?backoff:Backoff.t -> ?mode:Mode.t -> 'a tx -> 'a (** [commit tx] repeatedly calls [tx] to record a log of shared memory accesses and attempts to perform them atomically until it succeeds and then returns whatever [tx] returned. [tx] may raise {!Retry.Later} or {!Retry.Invalid} to explicitly request a retry or any other exception to abort the transaction. The default {{!Mode.t} [mode]} for [commit] is [`Obstruction_free]. However, after enough attempts have failed during the verification step, [commit] switches to [`Lock_free]. *) end kcas-0.7.0/src/kcas_data/000077500000000000000000000000001456672623200151555ustar00rootroot00000000000000kcas-0.7.0/src/kcas_data/accumulator.ml000066400000000000000000000040731456672623200200320ustar00rootroot00000000000000open Kcas type t = { mutable cache : int Loc.t array; truth : int Loc.t array Loc.t } let make n = let cs = Loc.make_array ~padded:true ~mode:`Lock_free 1 0 in Loc.set (Array.unsafe_get cs 0) n; let truth = Loc.make ~padded:true cs in Multicore_magic.copy_as_padded { cache = cs; truth } let[@inline never] rec get_self a i cs n = let add_cs = Loc.make_array ~padded:true ~mode:`Lock_free (n + 1) 0 in let new_cs = (* The length of [new_cs] will be a power of two minus 1, which means the whole heap block will have a power of two number of words, which may help to keep it cache line aligned. *) Array.init ((n * 2) + 1) @@ fun i -> if i <= n then Array.unsafe_get add_cs i else Array.unsafe_get cs (i - n - 1) in if Loc.compare_and_set a.truth cs new_cs then a.cache <- new_cs; let cs = a.cache in let n = Array.length cs in if i < n then Array.unsafe_get cs i else get_self a i cs n let[@inline] get_self a = let i = Multicore_magic.instantaneous_domain_index () in let cs = a.cache in let n = Array.length cs in if i < n then Array.unsafe_get cs i else get_self a i cs n module Xt = struct let add ~xt a n = if n <> 0 then Xt.fetch_and_add ~xt (get_self a) n |> ignore let incr ~xt a = Xt.incr ~xt (get_self a) let decr ~xt a = Xt.decr ~xt (get_self a) let rec get_rec ~xt cs s i = let s = s + Xt.get ~xt (Array.unsafe_get cs i) in if i = 0 then s else get_rec ~xt cs s (i - 1) let get ~xt a = let cs = Xt.get ~xt a.truth in let cs_old = a.cache in if cs != cs_old then a.cache <- cs; let i = Array.length cs - 1 in let s = Xt.get ~xt (Array.unsafe_get cs i) in if i = 0 then s else get_rec ~xt cs s (i - 1) let set ~xt a n = let delta = n - get ~xt a in if delta <> 0 then Xt.fetch_and_add ~xt (Array.unsafe_get a.cache 0) delta |> ignore end let add a n = if n <> 0 then Loc.fetch_and_add (get_self a) n |> ignore let incr a = Loc.incr (get_self a) let decr a = Loc.decr (get_self a) let get a = Kcas.Xt.commit { tx = Xt.get a } let set a n = Kcas.Xt.commit { tx = Xt.set a n } kcas-0.7.0/src/kcas_data/accumulator.mli000066400000000000000000000012451456672623200202010ustar00rootroot00000000000000open Kcas (** Scalable accumulator. A scalable accumulator can be used to scalably accumulate an integer value in parallel as long as the accumulated value is read infrequently. *) (** {1 Common interface} *) type t (** The type of a scalable accumulator. *) val make : int -> t (** [make n] returns a new accumulator whose initial value is [n]. *) (** {1 Compositional interface} *) module Xt : Accumulator_intf.Ops with type t := t with type ('x, 'fn) fn := xt:'x Xt.t -> 'fn (** Explicit transaction log passing on accumulators. *) (** {1 Non-compositional interface} *) include Accumulator_intf.Ops with type t := t with type ('x, 'fn) fn := 'fn kcas-0.7.0/src/kcas_data/accumulator_intf.ml000066400000000000000000000012421456672623200210450ustar00rootroot00000000000000module type Ops = sig type t type ('x, 'fn) fn val add : ('x, t -> int -> unit) fn (** [add a n] increments the value of the accumulator [a] by [n]. [add] operations can be performed scalably in parallel. *) val incr : ('x, t -> unit) fn (** [incr a] is equivalent to [add a 1]. *) val decr : ('x, t -> unit) fn (** [decr a] is equivalent to [add a (-1)]. *) val get : ('x, t -> int) fn (** [get a] returns the current value of the accumulator. {b CAUTION}: Performing a [get] is expensive and can limit scalability. *) val set : ('x, t -> int -> unit) fn (** [set a n] sets the current value of the accumulator [a] to [n]. *) end kcas-0.7.0/src/kcas_data/bits.ml000066400000000000000000000006741456672623200164570ustar00rootroot00000000000000let is_pow_2 n = n land (n - 1) = 0 let max_0 n = let m = n asr (Sys.int_size - 1) in n land lnot m let ceil_pow_2_minus_1 n = let n = n lor (n lsr 1) in let n = n lor (n lsr 2) in let n = n lor (n lsr 4) in let n = n lor (n lsr 8) in let n = n lor (n lsr 16) in if Sys.int_size > 32 then n lor (n lsr 32) else n let ceil_pow_2 n = if n <= 1 then 1 else let n = n - 1 in let n = ceil_pow_2_minus_1 n in n + 1 kcas-0.7.0/src/kcas_data/bits.mli000066400000000000000000000003731456672623200166240ustar00rootroot00000000000000val max_0 : int -> int (** [max_0 n] is equivalent to [Int.max 0 n]. *) val is_pow_2 : int -> bool (** [is_pow_2 n] determines [n] is zero or of the form [1 lsl i] for some [i]. *) val ceil_pow_2_minus_1 : int -> int val ceil_pow_2 : int -> int kcas-0.7.0/src/kcas_data/dllist.ml000066400000000000000000000224701456672623200170070ustar00rootroot00000000000000open Kcas (** Tagged GADT for representing doubly-linked lists. The [lhs] and [rhs] fields are the first two fields in both a [List] and a [Node] so that it is possible (by using an unsafe cast) to access the fields without knowing whether the target is a [List] or a [Node]. *) type ('a, _) tdt = | List : { lhs : 'a cursor Loc.t; (** [lhs] points to the rightmost node of this list or to the list itself in case the list is empty. *) rhs : 'a cursor Loc.t; (** [rhs] points to the leftmost node of this list or to the list itself in case the list is empty. *) } -> ('a, [> `List ]) tdt | Node : { lhs : 'a cursor Loc.t; (** [lhs] points to the node on the left side of this node, to the list if this node is the leftmost node, or to the node itself in case this node is not in any list. *) rhs : 'a cursor Loc.t; (** [rhs] points to the node on the right side of this node, to the list if this node is the rightmost node, or to the node itself in case this node is not in any list. *) value : 'a; } -> ('a, [> `Node ]) tdt and 'a cursor = At : ('a, [< `List | `Node ]) tdt -> 'a cursor [@@unboxed] type 'a t = ('a, [ `List ]) tdt type 'a node = ('a, [ `Node ]) tdt external as_list : ('a, _) tdt -> 'a t = "%identity" external as_node : ('a, _) tdt -> 'a node = "%identity" let[@inline] get (Node { value; _ } : 'a node) = value let[@inline] lhs_of list_or_node = let (List list_r) = as_list list_or_node in list_r.lhs let[@inline] rhs_of list_or_node = let (List list_r) = as_list list_or_node in list_r.rhs let[@inline] value_of (Node node_r : 'a node) = node_r.value let create () = let lhs = Loc.make ~padded:true (Obj.magic ()) in let rhs = Loc.make ~padded:true (Obj.magic ()) in let list = Multicore_magic.copy_as_padded (List { lhs; rhs }) in Loc.set lhs (At list); Loc.set rhs (At list); list let create_node value = let node = let lhs = Loc.make (Obj.magic ()) in let rhs = Loc.make (Obj.magic ()) in Node { lhs; rhs; value } in Loc.set (lhs_of node) (At node); Loc.set (rhs_of node) (At node); node let create_node_with ~lhs ~rhs value = Node { lhs = Loc.make (At lhs); rhs = Loc.make (At rhs); value } module Xt = struct let remove ~xt node = let (At rhs) = Xt.exchange ~xt (rhs_of node) (At node) in if At rhs != At node then begin let (At lhs) = Xt.exchange ~xt (lhs_of node) (At node) in Xt.set ~xt (lhs_of rhs) (At lhs); Xt.set ~xt (rhs_of lhs) (At rhs) end let is_empty ~xt list = Xt.get ~xt (lhs_of list) == At list let add_node_l ~xt node list = let (At rhs) = Xt.get ~xt (rhs_of list) in assert (Loc.fenceless_get (lhs_of node) == At list); Loc.set (rhs_of node) (At rhs); Xt.set ~xt (rhs_of list) (At node); Xt.set ~xt (lhs_of rhs) (At node); node let add_l ~xt value list = let (At rhs) = Xt.get ~xt (rhs_of list) in let node = create_node_with ~lhs:list ~rhs value in Xt.set ~xt (rhs_of list) (At node); Xt.set ~xt (lhs_of rhs) (At node); node let add_node_r ~xt node list = let (At lhs) = Xt.get ~xt (lhs_of list) in Loc.set (lhs_of node) (At lhs); assert (Loc.fenceless_get (rhs_of node) == At list); Xt.set ~xt (lhs_of list) (At node); Xt.set ~xt (rhs_of lhs) (At node); node let add_r ~xt value list = let (At lhs) = Xt.get ~xt (lhs_of list) in let node = create_node_with ~lhs ~rhs:list value in Xt.set ~xt (lhs_of list) (At node); Xt.set ~xt (rhs_of lhs) (At node); node let move_l ~xt node list = let (At list_rhs) = Xt.exchange ~xt (rhs_of list) (At node) in if At list_rhs != At node then begin let (At node_lhs) = Xt.exchange ~xt (lhs_of node) (At list) in let (At node_rhs) = Xt.exchange ~xt (rhs_of node) (At list_rhs) in if At node_lhs != At node then begin Xt.set ~xt (rhs_of node_lhs) (At node_rhs); Xt.set ~xt (lhs_of node_rhs) (At node_lhs) end; Xt.set ~xt (lhs_of list_rhs) (At node) end let move_r ~xt node list = let (At list_lhs) = Xt.exchange ~xt (lhs_of list) (At node) in if At list_lhs != At node then begin let (At node_rhs) = Xt.exchange ~xt (rhs_of node) (At list) in let (At node_lhs) = Xt.exchange ~xt (lhs_of node) (At list_lhs) in if At node_rhs != At node then begin Xt.set ~xt (rhs_of node_lhs) (At node_rhs); Xt.set ~xt (lhs_of node_rhs) (At node_lhs) end; Xt.set ~xt (rhs_of list_lhs) (At node) end let take_opt_l ~xt list = let (At rhs) = Xt.get ~xt (rhs_of list) in if At rhs == At list then None else let node = as_node rhs in remove ~xt node; Some (value_of node) let take_opt_r ~xt list = let (At lhs) = Xt.get ~xt (lhs_of list) in if At lhs == At list then None else let node = as_node lhs in remove ~xt node; Some (value_of node) let take_blocking_l ~xt list = Xt.to_blocking ~xt (take_opt_l list) let take_blocking_r ~xt list = Xt.to_blocking ~xt (take_opt_r list) let transfer_l ~xt t1 t2 = let (At t1_rhs) = Xt.exchange ~xt (rhs_of t1) (At t1) in if At t1_rhs != At t1 then begin let (At t1_lhs) = Xt.exchange ~xt (lhs_of t1) (At t1) in let (At t2_rhs) = Xt.exchange ~xt (rhs_of t2) (At t1_rhs) in Xt.set ~xt (lhs_of t2_rhs) (At t1_lhs); Xt.set ~xt (lhs_of t1_rhs) (At t2); Xt.set ~xt (rhs_of t1_lhs) (At t2_rhs) end let transfer_r ~xt t1 t2 = let (At t1_rhs) = Xt.exchange ~xt (rhs_of t1) (At t1) in if At t1_rhs != At t1 then begin let (At t1_lhs) = Xt.exchange ~xt (lhs_of t1) (At t1) in let (At t2_lhs) = Xt.exchange ~xt (lhs_of t2) (At t1_lhs) in Xt.set ~xt (rhs_of t2_lhs) (At t1_rhs); Xt.set ~xt (rhs_of t1_lhs) (At t2); Xt.set ~xt (lhs_of t1_rhs) (At t2_lhs) end let swap ~xt t1 t2 = let (At t1_rhs) = Xt.get ~xt (rhs_of t1) in if At t1_rhs == At t1 then transfer_l ~xt t2 t1 else let (At t2_lhs) = Xt.get ~xt (lhs_of t2) in if At t2_lhs == At t2 then transfer_l ~xt t1 t2 else let (At t1_lhs) = Xt.exchange ~xt (lhs_of t1) (At t2_lhs) in let (At t2_rhs) = Xt.exchange ~xt (rhs_of t2) (At t1_rhs) in Xt.set ~xt (lhs_of t2) (At t1_lhs); Xt.set ~xt (rhs_of t1) (At t2_rhs); Xt.set ~xt (lhs_of t2_rhs) (At t1); Xt.set ~xt (rhs_of t2_lhs) (At t1); Xt.set ~xt (lhs_of t1_rhs) (At t2); Xt.set ~xt (rhs_of t1_lhs) (At t2) let[@tail_mod_cons] rec to_list_as_l ~xt f list (At at) = if At at == At list then [] else f (as_node at) :: to_list_as_l ~xt f list (Xt.get ~xt (rhs_of at)) let to_list_as_l ~xt f list = to_list_as_l ~xt f list (Xt.get ~xt (rhs_of list)) let to_list_l ~xt list = to_list_as_l ~xt get list let to_nodes_l ~xt list = to_list_as_l ~xt Fun.id list let[@tail_mod_cons] rec to_list_as_r ~xt f list (At at) = if At at == At list then [] else f (as_node at) :: to_list_as_r ~xt f list (Xt.get ~xt (lhs_of at)) let to_list_as_r ~xt f list = to_list_as_r ~xt f list (Xt.get ~xt (lhs_of list)) let to_list_r ~xt list = to_list_as_r ~xt get list let to_nodes_r ~xt list = to_list_as_r ~xt Fun.id list end let remove node = Kcas.Xt.commit { tx = Xt.remove node } let is_empty list = Loc.get (lhs_of list) == At list let add_l value list = let node = create_node_with ~lhs:list ~rhs:list value in Kcas.Xt.commit { tx = Xt.add_node_l node list } let add_r value list = let node = create_node_with ~lhs:list ~rhs:list value in Kcas.Xt.commit { tx = Xt.add_node_r node list } let move_l node list = Kcas.Xt.commit { tx = Xt.move_l node list } let move_r node list = Kcas.Xt.commit { tx = Xt.move_r node list } let take_opt_l list = Kcas.Xt.commit { tx = Xt.take_opt_l list } let take_opt_r list = Kcas.Xt.commit { tx = Xt.take_opt_r list } let take_blocking_l ?timeoutf list = Kcas.Xt.commit ?timeoutf { tx = Xt.take_blocking_l list } let take_blocking_r ?timeoutf list = Kcas.Xt.commit ?timeoutf { tx = Xt.take_blocking_r list } let swap t1 t2 = Kcas.Xt.commit { tx = Xt.swap t1 t2 } let transfer_l t1 t2 = Kcas.Xt.commit { tx = Xt.transfer_l t1 t2 } let transfer_r t1 t2 = Kcas.Xt.commit { tx = Xt.transfer_r t1 t2 } let to_list_l list = Kcas.Xt.commit { tx = Xt.to_list_l list } let to_list_r list = Kcas.Xt.commit { tx = Xt.to_list_r list } let to_nodes_l list = Kcas.Xt.commit { tx = Xt.to_nodes_l list } let to_nodes_r list = Kcas.Xt.commit { tx = Xt.to_nodes_r list } exception Empty let take_l list = match take_opt_l list with None -> raise Empty | Some v -> v let take_r list = match take_opt_r list with None -> raise Empty | Some v -> v let take_all list = let copy = let lhs = Loc.make ~padded:true (At list) in let rhs = Loc.make ~padded:true (At list) in List { lhs; rhs } |> Multicore_magic.copy_as_padded in let open Kcas in let tx ~xt = let (At lhs) = Xt.exchange ~xt (lhs_of list) (At list) in if At lhs == At list then begin Loc.set (lhs_of copy) (At copy); Loc.set (rhs_of copy) (At copy) end else let (At rhs) = Xt.exchange ~xt (rhs_of list) (At list) in Xt.set ~xt (rhs_of lhs) (At copy); Xt.set ~xt (lhs_of rhs) (At copy); Loc.set (lhs_of copy) (At lhs); Loc.set (rhs_of copy) (At rhs) in Xt.commit { tx }; copy kcas-0.7.0/src/kcas_data/dllist.mli000066400000000000000000000057751456672623200171710ustar00rootroot00000000000000open Kcas (** Doubly-linked list. The interface provides a subset of the operations of the doubly-linked list data structure provided by the {{:https://opam.ocaml.org/packages/lwt-dllist/}lwt-dllist} package with some omissions: - The sequence iterators, e.g. [iter_l], [iter_node_l], [fold_l], [find_node_opt_l], and [find_node_l], are not provided. - The [length] operation is not provided. - The [set] operation is not provided. A non-compositional {!take_all} operation is added for {{: https://en.wikipedia.org/wiki/Privatization_(computer_programming)}privatization} as well as conversions to a list of nodes ({!to_nodes_l} and {!to_nodes_r}) and to a list of values ({!to_list_l} and {!to_list_r}). Probably the main reason to use a doubly-linked list like this rather than e.g. a ['a list Loc.t] is the ability to remove a node without having to potentially iterate through the list: {[ let node_of_x = add_l x list in (* ... and then later somewhere else ... *) remove node_of_x ]} A doubly-linked list can also be used as a deque or double-ended queue, but a deque implementation that doesn't allow individual nodes to be removed is likely to be faster. *) (** {1 Common interface} *) type !'a t (** Type of a doubly-linked list containing {!node}s of type ['a]. *) type !'a node (** Type of a node containing a value of type ['a]. *) val create : unit -> 'a t (** [create ()] creates a new doubly-linked list. *) (** {2 Operations on nodes} *) val create_node : 'a -> 'a node (** [create_node value] creates a new doubly-linked list node that is not in any list. The node can then e.g. be added to a list using {!move_l} or {!move_r}. *) val get : 'a node -> 'a (** [get node] returns the value stored in the {!node}. *) (** {1 Compositional interface} *) module Xt : Dllist_intf.Ops with type 'a t := 'a t with type 'a node := 'a node with type ('x, 'fn) fn := xt:'x Xt.t -> 'fn with type ('x, 'fn) blocking_fn := xt:'x Xt.t -> 'fn (** Explicit transaction log passing on doubly-linked lists. *) (** {1 Non-compositional interface} *) include Dllist_intf.Ops with type 'a t := 'a t with type 'a node := 'a node with type ('x, 'fn) fn := 'fn with type ('x, 'fn) blocking_fn := ?timeoutf:float -> 'fn val take_all : 'a t -> 'a t (** [take_all l] removes all nodes of the doubly-linked list [l] and returns a new doubly-linked list containing the removed nodes. *) exception Empty (** Raised when {!take_l} or {!take_r} is applied to an empty doubly-linked list. *) val take_l : 'a t -> 'a (** [take_l l] removes and returns the value of the leftmost node of the doubly-linked list [l], or raises {!Empty} if the list is empty. @raise Empty if the list is empty. *) val take_r : 'a t -> 'a (** [take_r l] removes and returns the value of the rightmost node of the doubly-linked list [l], or raises {!Empty} if the list is empty. @raise Empty if the list is empty. *) kcas-0.7.0/src/kcas_data/dllist_intf.ml000066400000000000000000000074751456672623200200370ustar00rootroot00000000000000module type Ops = sig type 'a t type 'a node type ('x, 'fn) fn type ('x, 'fn) blocking_fn (** {2 Operations on nodes} *) val remove : ('x, 'a node -> unit) fn (** [remove n] removes the node [n] from the doubly-linked list it is part of. [remove] is idempotent. *) val move_l : ('x, 'a node -> 'a t -> unit) fn (** [move_l n l] removes the node [n] from the doubly-linked list it is part of and then adds it to the left of the list [l]. *) val move_r : ('x, 'a node -> 'a t -> unit) fn (** [move_r n l] removes the node [n] from the doubly-linked list it is part of and then adds it to the right of the list [l]. *) (** {2 Operations on lists} *) val is_empty : ('x, 'a t -> bool) fn (** [is_empty l] determines whether the doubly-linked list [l] is empty or not. *) (** {3 Adding or removing values at the ends of a list} *) val add_l : ('x, 'a -> 'a t -> 'a node) fn (** [add_l v l] creates and returns a new node with the value [v] and adds the node to the left of the doubly-linked list [l]. *) val add_r : ('x, 'a -> 'a t -> 'a node) fn (** [add_r v l] creates and returns a new node with the value [v] and adds the node to the right of the doubly-linked list [l]. *) val take_opt_l : ('x, 'a t -> 'a option) fn (** [take_opt_l l] removes and returns the value of leftmost node of the doubly-linked list [l], or return [None] if the list is empty. *) val take_opt_r : ('x, 'a t -> 'a option) fn (** [take_opt_r l] removes and returns the value of rightmost node of the doubly-linked list [l], or return [None] if the list is empty. *) val take_blocking_l : ('x, 'a t -> 'a) blocking_fn (** [take_blocking_l l] removes and returns the value of leftmost node of the doubly-linked list [l], or blocks waiting for the list to become non-empty. *) val take_blocking_r : ('x, 'a t -> 'a) blocking_fn (** [take_blocking_r l] removes and returns the value of rightmost node of the doubly-linked list [l], or blocks waiting for the list to become non-empty. *) (** {3 Moving all nodes between lists} *) val swap : ('x, 'a t -> 'a t -> unit) fn (** [swap l1 l2] exchanges the nodes of the doubly-linked lists [l1] and [l2]. *) val transfer_l : ('x, 'a t -> 'a t -> unit) fn (** [transfer_l l1 l2] removes all nodes of [l1] and adds them to the left of [l2]. *) val transfer_r : ('x, 'a t -> 'a t -> unit) fn (** [transfer_r l1 l2] removes all nodes of [l1] and adds them to the right of [l2]. *) (** {3 Extracting all values or nodes from a list} *) val to_list_l : ('x, 'a t -> 'a list) fn (** [to_list_l l] collects the values of the nodes of the doubly-linked list [l] to a list in left-to-right order. {b NOTE}: This operation is linear time, [O(n)], and should typically be avoided unless the list is privatized, e.g. by using {!take_all}. *) val to_list_r : ('x, 'a t -> 'a list) fn (** [to_list_r l] collects the values of the nodes of the doubly-linked list [l] to a list in right-to-left order. {b NOTE}: This operation is linear time, [O(n)], and should typically be avoided unless the list is privatized, e.g. by using {!take_all}. *) val to_nodes_l : ('x, 'a t -> 'a node list) fn (** [to_nodes_l l] collects the nodes of the doubly-linked list [l] to a list in left-to-right order. {b NOTE}: This operation is linear time, [O(n)], and should typically be avoided unless the list is privatized, e.g. by using {!take_all}. *) val to_nodes_r : ('x, 'a t -> 'a node list) fn (** [to_nodes_r l] collects the nodes of the doubly-linked list [l] to a list in right-to-left order. {b NOTE}: This operation is linear time, [O(n)], and should typically be avoided unless the list is privatized, e.g. by using {!take_all}. *) end kcas-0.7.0/src/kcas_data/domain.ocaml4.ml000066400000000000000000000000641456672623200201340ustar00rootroot00000000000000let recommended_domain_count () = 1 let self () = 0 kcas-0.7.0/src/kcas_data/dune000066400000000000000000000005771456672623200160440ustar00rootroot00000000000000(library (name kcas_data) (public_name kcas_data) (libraries (re_export kcas) multicore-magic)) (rule (targets domain.ml) (deps domain.ocaml4.ml) (enabled_if (< %{ocaml_version} 5.0.0)) (action (progn (copy domain.ocaml4.ml domain.ml)))) (mdx (package kcas_data) (deps (package kcas) (package kcas_data)) (libraries kcas kcas_data) (files kcas_data.mli)) kcas-0.7.0/src/kcas_data/elems.ml000066400000000000000000000027431456672623200166220ustar00rootroot00000000000000type 'a t = { value : 'a; tl : 'a t; length : int } let rec empty = { value = Obj.magic (); tl = empty; length = 0 } let[@inline] singleton value = { value; tl = empty; length = 1 } let[@inline] tl_safe { tl; _ } = tl let[@inline] tl_or_retry t = if t != empty then t.tl else Kcas.Retry.later () let[@inline] length { length; _ } = length let[@inline] cons value tl = { value; tl; length = 1 + tl.length } let[@inline] hd_opt t = if t != empty then Some t.value else None let[@inline] hd_or_retry t = if t != empty then t.value else Kcas.Retry.later () let[@inline] hd_unsafe t = t.value let rec fold f a t = if t == empty then a else fold f (f a t.value) t.tl let[@inline] iter f t = fold (fun () x -> f x) () t let rec rev_append t tl = if t == empty then tl else rev_append t.tl @@ cons t.value tl let rev t = if t.length <= 1 then t else rev_append t.tl (singleton t.value) let rec prepend_to_seq t tl = if t == empty then tl else fun () -> Seq.Cons (t.value, prepend_to_seq t.tl tl) let to_seq t = prepend_to_seq t Seq.empty let of_seq_rev xs = Seq.fold_left (fun t x -> cons x t) empty xs let rev_prepend_to_seq t tl = if t.length <= 1 then prepend_to_seq t tl else let t = ref (Either.Left t) in fun () -> let t = match !t with | Left t' -> (* This is parallelism safe as the result is always equivalent. *) let t' = rev t' in t := Right t'; t' | Right t' -> t' in prepend_to_seq t tl () kcas-0.7.0/src/kcas_data/elems.mli000066400000000000000000000012161456672623200167650ustar00rootroot00000000000000(** Basically a list where each node includes length, the empty list is a cyclic node, and conversions to sequences are performed lazily. *) type !'a t val empty : 'a t val tl_safe : 'a t -> 'a t val tl_or_retry : 'a t -> 'a t val length : 'a t -> int val cons : 'a -> 'a t -> 'a t val hd_opt : 'a t -> 'a option val hd_or_retry : 'a t -> 'a val hd_unsafe : 'a t -> 'a val iter : ('a -> unit) -> 'a t -> unit val fold : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b val rev : 'a t -> 'a t val prepend_to_seq : 'a t -> 'a Seq.t -> 'a Seq.t val to_seq : 'a t -> 'a Seq.t val of_seq_rev : 'a Seq.t -> 'a t val rev_prepend_to_seq : 'a t -> 'a Seq.t -> 'a Seq.t kcas-0.7.0/src/kcas_data/hashtbl.ml000066400000000000000000000425741456672623200171500ustar00rootroot00000000000000open Kcas (** Optimized operations on internal association lists with custom equality. *) module Assoc = struct type ('k, 'v) t = Nil | Cons of { k : 'k; v : 'v; kvs : ('k, 'v) t } let[@inline] cons k v kvs = Cons { k; v; kvs } let rec fold fn accum = function | Nil -> accum | Cons { k; v; kvs } -> fold fn (fn k v accum) kvs let length kvs = fold (fun _ _ n -> n + 1) 0 kvs let rec rev_append kvs accum = match kvs with | Nil -> accum | Cons { k; v; kvs } -> rev_append kvs (Cons { k; v; kvs = accum }) let rev kvs = rev_append kvs Nil let rec iter fn = function | Nil -> () | Cons { k; v; kvs } -> fn k v; iter fn kvs let iter_rev fn = function | Nil -> () | Cons { k; v; kvs = Nil } -> fn k v | kvs -> kvs |> rev |> iter fn let rec find_opt equal k' = function | Nil -> None | Cons r -> if equal r.k k' then Some r.v else find_opt equal k' r.kvs let[@tail_mod_cons] rec find_all equal k' = function | Nil -> [] | Cons { k; v; kvs } -> if equal k k' then v :: find_all equal k' kvs else find_all equal k' kvs let rec mem equal k' = function | Nil -> false | Cons r -> equal r.k k' || mem equal k' r.kvs exception Not_found let[@tail_mod_cons] rec remove equal k' = function | Nil -> raise_notrace Not_found | Cons r -> if equal r.k k' then r.kvs else Cons { k = r.k; v = r.v; kvs = remove equal k' r.kvs } type change = Nop | Replaced | Added let[@tail_mod_cons] rec replace equal change k' v' = function | Nil -> change := Added; Cons { k = k'; v = v'; kvs = Nil } | Cons r as original -> if equal r.k k' then if r.v == v' then original else begin change := Replaced; Cons { k = r.k; v = v'; kvs = r.kvs } end else Cons { k = r.k; v = r.v; kvs = replace equal change k' v' r.kvs } let[@tail_mod_cons] rec filter_map fn delta = function | Nil -> Nil | Cons { k; v; kvs } -> begin match fn k v with | None -> decr delta; filter_map fn delta kvs | Some v' -> Cons { k; v = v'; kvs = filter_map fn delta kvs } end end type ('k, 'v) pending = | Nothing | Rehash of { state : int Loc.t; new_capacity : int; new_buckets : ('k, 'v) Assoc.t Loc.t array Loc.t; } | Snapshot of { state : int Loc.t; snapshot : ('k, 'v) Assoc.t array Loc.t } | Filter_map of { state : int Loc.t; fn : 'k -> 'v -> 'v option; raised : exn Loc.t; new_buckets : ('k, 'v) Assoc.t Loc.t array Loc.t; } type ('k, 'v) r = { pending : ('k, 'v) pending; length : Accumulator.t; buckets : ('k, 'v) Assoc.t Loc.t array; hash : 'k -> int; equal : 'k -> 'k -> bool; min_buckets : int; max_buckets : int; } type ('k, 'v) t = ('k, 'v) r Loc.t type 'k hashed_type = (module Stdlib.Hashtbl.HashedType with type t = 'k) let lo_buckets = 1 lsl 5 let hi_buckets = (Sys.max_array_length lsr 1) + 1 let () = assert (Bits.is_pow_2 hi_buckets) let min_buckets_default = lo_buckets let max_buckets_default = Int.min hi_buckets (1 lsl 30 (* Limit of [hash] *)) module HashedType = struct let pack (type k) hash equal : k hashed_type = (module struct type t = k let hash = hash and equal = equal end) let unpack (type k) ((module HashedType) : k hashed_type) = (HashedType.hash, HashedType.equal) let is_same_as (type k) hash equal ((module HashedType) : k hashed_type) = hash == HashedType.hash && equal == HashedType.equal end let create ?hashed_type ?min_buckets ?max_buckets () = let min_buckets = match min_buckets with | None -> min_buckets_default | Some c -> Int.max lo_buckets c |> Int.min hi_buckets |> Bits.ceil_pow_2 in let t = Loc.make ~padded:true (Obj.magic ()) in let max_buckets = match max_buckets with | None -> Int.max min_buckets max_buckets_default | Some c -> Int.max min_buckets c |> Int.min hi_buckets |> Bits.ceil_pow_2 and hash, equal = match hashed_type with | None -> (Stdlib.Hashtbl.seeded_hash (Random.bits ()), ( = )) | Some hashed_type -> HashedType.unpack hashed_type and pending = Nothing and buckets = Loc.make_array min_buckets Assoc.Nil and length = Accumulator.make 0 in Loc.set t (Multicore_magic.copy_as_padded { pending; length; buckets; hash; equal; min_buckets; max_buckets }); t let min_buckets_of t = (Loc.get t).min_buckets let max_buckets_of t = (Loc.get t).max_buckets let hashed_type_of t = let r = Loc.get t in HashedType.pack r.hash r.equal let bucket_of hash key buckets = Array.unsafe_get buckets (hash key land (Array.length buckets - 1)) exception Done module Xt = struct let find_opt ~xt t k = let r = Xt.get ~xt t in r.buckets |> bucket_of r.hash k |> Xt.get ~xt |> Assoc.find_opt r.equal k let find_all ~xt t k = let r = Xt.get ~xt t in r.buckets |> bucket_of r.hash k |> Xt.get ~xt |> Assoc.find_all r.equal k let mem ~xt t k = let r = Xt.get ~xt t in r.buckets |> bucket_of r.hash k |> Xt.get ~xt |> Assoc.mem r.equal k let get_or_alloc array_loc make length = let tx ~xt = let array = Xt.get ~xt array_loc in if array != [||] then array else let array = make length Assoc.Nil in Xt.set ~xt array_loc array; array in Xt.commit { tx } (** Pending operations are performed incrementally in small batches. *) let batch_size = 3 let perform_pending ~xt t = (* TODO: Implement pending operations such that multiple domains may be working to complete them in parallel by extending the [state] to an array of multiple partition [states]. *) let must_be_done_in_this_tx = Xt.is_in_log ~xt t in let r = Xt.get ~xt t in match r.pending with | Nothing -> r | Rehash { state; new_capacity; new_buckets } -> begin let new_buckets = get_or_alloc new_buckets Loc.make_array new_capacity in let old_buckets = r.buckets in let r = Multicore_magic.copy_as_padded { r with pending = Nothing; buckets = new_buckets } in Xt.set ~xt t r; let hash = r.hash and mask = new_capacity - 1 in let rehash_a_few_buckets ~xt = (* We process buckets in descending order as that is slightly faster with the transaction log. It also makes sure that we know when the operation has already been performed independently of the buckets array we read above. *) let i = Xt.fetch_and_add ~xt state (-batch_size) in if i <= 0 then raise_notrace Done; for i = i - 1 downto Bits.max_0 (i - batch_size) do Array.unsafe_get old_buckets i |> Xt.get ~xt |> Assoc.iter_rev @@ fun k v -> Xt.modify ~xt (Array.unsafe_get new_buckets (hash k land mask)) (Assoc.cons k v) done in try if must_be_done_in_this_tx then begin (* If the old buckets have already been accessed, we cannot perform rehashing outside of the transaction. In this case rehashing becomes linearithmic, O(n*log(n)), because that is the best that the transaction log promises. However, as we access the bucket locations mostly in order, we often actually get linear time, O(n), performance. *) let initial_state = Array.length old_buckets in while true do (* If state is modified outside our expensive tx would fail. *) if Loc.fenceless_get state != initial_state then Retry.invalid (); rehash_a_few_buckets ~xt done; r end else begin (* When possible, rehashing is performed cooperatively a few buckets at a time. This gives expected linear time, O(n). *) while true do Xt.commit { tx = rehash_a_few_buckets } done; r end with Done -> r end | Snapshot { state; snapshot } -> begin assert (not must_be_done_in_this_tx); let buckets = r.buckets in let r = Multicore_magic.copy_as_padded { r with pending = Nothing } in Xt.set ~xt t r; (* Check state to ensure that buckets have not been updated. *) if Loc.fenceless_get state < 0 then Retry.invalid (); let snapshot = get_or_alloc snapshot Array.make (Array.length buckets) in let snapshot_a_few_buckets ~xt = let i = Xt.fetch_and_add ~xt state (-batch_size) in if i <= 0 then raise_notrace Done; for i = i - 1 downto Bits.max_0 (i - batch_size) do Array.unsafe_get buckets i |> Xt.get ~xt |> Array.unsafe_set snapshot i done in try while true do Xt.commit { tx = snapshot_a_few_buckets } done; r with Done -> r end | Filter_map { state; fn; raised; new_buckets } -> begin assert (not must_be_done_in_this_tx); let old_buckets = r.buckets in (* Check state to ensure that buckets have not been updated. *) if Loc.fenceless_get state < 0 then Retry.invalid (); let new_capacity = Array.length old_buckets in let new_buckets = get_or_alloc new_buckets Loc.make_array new_capacity in let filter_map_a_few_buckets ~xt = let i = Xt.fetch_and_add ~xt state (-batch_size) in if i <= 0 then raise_notrace Done; let a_few_buckets_delta = ref 0 in for i = i - 1 downto Bits.max_0 (i - batch_size) do Xt.get ~xt (Array.unsafe_get old_buckets i) |> Assoc.filter_map fn a_few_buckets_delta |> Xt.set ~xt (Array.unsafe_get new_buckets i) done; !a_few_buckets_delta in let total_delta = ref 0 in try while true do total_delta := !total_delta + Xt.commit { tx = filter_map_a_few_buckets } done; r with | Done -> Accumulator.Xt.add ~xt r.length !total_delta; let r = Multicore_magic.copy_as_padded { r with pending = Nothing; buckets = new_buckets } in Xt.set ~xt t r; r | exn -> Loc.compare_and_set raised Done exn |> ignore; let r = Multicore_magic.copy_as_padded { r with pending = Nothing } in Xt.set ~xt t r; r end let[@inline] make_rehash old_capacity new_capacity = let state = Loc.make old_capacity and new_buckets = Loc.make [||] in Rehash { state; new_capacity; new_buckets } let reset ~xt t = let r = perform_pending ~xt t in Accumulator.Xt.set ~xt r.length 0; let buckets = Loc.make_array r.min_buckets Assoc.Nil in Xt.set ~xt t { r with buckets } let clear ~xt t = reset ~xt t let remove ~xt t k = let r = perform_pending ~xt t in let buckets = r.buckets in let mask = Array.length buckets - 1 in let bucket = Array.unsafe_get buckets (r.hash k land mask) in match Xt.modify ~xt bucket (Assoc.remove r.equal k) with | () -> Accumulator.Xt.decr ~xt r.length; if r.min_buckets <= mask && Random.bits () land mask = 0 then let capacity = mask + 1 in let length = Accumulator.Xt.get ~xt r.length in if length * 4 < capacity then Xt.set ~xt t { r with pending = make_rehash capacity (capacity asr 1) } | exception Assoc.Not_found -> () let add ~xt t k v = let r = perform_pending ~xt t in let buckets = r.buckets in let mask = Array.length buckets - 1 in let bucket = Array.unsafe_get buckets (r.hash k land mask) in Xt.modify ~xt bucket (Assoc.cons k v); Accumulator.Xt.incr ~xt r.length; if mask + 1 < r.max_buckets && Random.bits () land mask = 0 then let capacity = mask + 1 in let length = Accumulator.Xt.get ~xt r.length in if capacity < length then Xt.set ~xt t { r with pending = make_rehash capacity (capacity * 2) } let replace ~xt t k v = let r = perform_pending ~xt t in let buckets = r.buckets in let mask = Array.length buckets - 1 in let bucket = Array.unsafe_get buckets (r.hash k land mask) in let change = ref Assoc.Nop in Xt.modify ~xt bucket (fun kvs -> let kvs' = Assoc.replace r.equal change k v kvs in if !change != Assoc.Nop then kvs' else kvs); if !change == Assoc.Added then begin Accumulator.Xt.incr ~xt r.length; if mask + 1 < r.max_buckets && Random.bits () land mask = 0 then let capacity = mask + 1 in let length = Accumulator.Xt.get ~xt r.length in if capacity < length then Xt.set ~xt t { r with pending = make_rehash capacity (capacity * 2) } end let length ~xt t = Accumulator.Xt.get ~xt (Xt.get ~xt t).length let swap = Xt.swap end let find_opt t k = let t = Loc.get t in (* Fenceless is safe as we have a fence above. *) t.buckets |> bucket_of t.hash k |> Loc.fenceless_get |> Assoc.find_opt t.equal k let find_all t k = let t = Loc.get t in (* Fenceless is safe as we have a fence above. *) t.buckets |> bucket_of t.hash k |> Loc.fenceless_get |> Assoc.find_all t.equal k let find t k = match find_opt t k with None -> raise Not_found | Some v -> v let mem t k = let t = Loc.get t in (* Fenceless is safe as we have a fence above. *) t.buckets |> bucket_of t.hash k |> Loc.fenceless_get |> Assoc.mem t.equal k let clear t = Kcas.Xt.commit { tx = Xt.clear t } let reset t = Kcas.Xt.commit { tx = Xt.reset t } let remove t k = Kcas.Xt.commit { tx = Xt.remove t k } let add t k v = Kcas.Xt.commit { tx = Xt.add t k v } let replace t k v = Kcas.Xt.commit { tx = Xt.replace t k v } let length t = Accumulator.get (Loc.get t).length let swap t1 t2 = Kcas.Xt.commit { tx = Xt.swap t1 t2 } let snapshot ?length ?record t = let state = Loc.make 0 and snapshot = Loc.make [||] in let pending = Snapshot { state; snapshot } in let tx ~xt = let r = Xt.perform_pending ~xt t in length |> Option.iter (fun length -> length := Accumulator.Xt.get ~xt r.length); record |> Option.iter (fun record -> record := r); Loc.set state (Array.length r.buckets); Kcas.Xt.set ~xt t { r with pending } in Kcas.Xt.commit { tx }; Kcas.Xt.commit { tx = Xt.perform_pending t } |> ignore; (* Fenceless is safe as commit above has fences. *) Loc.fenceless_get snapshot let to_seq t = let snapshot = snapshot t in let rec loop i kvs () = match kvs with | Assoc.Nil -> if i = Array.length snapshot then Seq.Nil else loop (i + 1) (Array.unsafe_get snapshot i) () | Cons { k; v; kvs } -> Seq.Cons ((k, v), loop i kvs) in loop 0 Nil let to_seq_keys t = to_seq t |> Seq.map fst let to_seq_values t = to_seq t |> Seq.map snd let of_seq ?hashed_type ?min_buckets ?max_buckets xs = let t = create ?hashed_type ?min_buckets ?max_buckets () in Seq.iter (fun (k, v) -> replace t k v) xs; t let rebuild ?hashed_type ?min_buckets ?max_buckets t = let record = ref (Obj.magic ()) and length = ref 0 in let snapshot = snapshot ~length ~record t in let r = !record in let min_buckets = match min_buckets with | None -> r.min_buckets | Some c -> Int.max lo_buckets c |> Int.min hi_buckets |> Bits.ceil_pow_2 in let max_buckets = match max_buckets with | None -> Int.max min_buckets r.max_buckets | Some c -> Int.max min_buckets c |> Int.min hi_buckets |> Bits.ceil_pow_2 in let is_same_hashed_type = match hashed_type with | None -> true | Some hashed_type -> HashedType.is_same_as r.hash r.equal hashed_type and length = !length in if is_same_hashed_type && min_buckets <= length && length <= max_buckets then begin let t = Loc.make ~padded:true (Obj.magic ()) in let pending = Nothing and buckets = Array.map Loc.make snapshot and length = Accumulator.make length in Loc.set t @@ Multicore_magic.copy_as_padded { r with pending; length; buckets; min_buckets; max_buckets }; t end else let t = create ?hashed_type ~min_buckets ~max_buckets () in snapshot |> Array.iter (Assoc.iter_rev (add t)); t let copy t = rebuild t let fold fn t a = Array.fold_left (Assoc.fold fn) a (snapshot t) let iter f t = fold (fun k v () -> f k v) t () let filter_map_inplace fn t = let state = Loc.make 0 and raised = Loc.make Done and new_buckets = Loc.make [||] in let pending = Filter_map { state; fn; raised; new_buckets } in let tx ~xt = let r = Xt.perform_pending ~xt t in Loc.set state (Array.length r.buckets); Kcas.Xt.set ~xt t { r with pending } in Kcas.Xt.commit { tx }; Kcas.Xt.commit { tx = Xt.perform_pending t } |> ignore; (* Fenceless is safe as commit above has fences. *) match Loc.fenceless_get raised with Done -> () | exn -> raise exn let stats t = let length = ref 0 in let snapshot = snapshot ~length t in let num_bindings = !length in let num_buckets = Array.length snapshot in let bucket_lengths = Array.map Assoc.length snapshot in let max_bucket_length = Array.fold_left Int.max 0 bucket_lengths in let bucket_histogram = Array.make (max_bucket_length + 1) 0 in bucket_lengths |> Array.iter (fun i -> bucket_histogram.(i) <- 1 + bucket_histogram.(i)); Stdlib.Hashtbl. { num_bindings; num_buckets; max_bucket_length; bucket_histogram } kcas-0.7.0/src/kcas_data/hashtbl.mli000066400000000000000000000143201456672623200173050ustar00rootroot00000000000000open Kcas (** Hash table. The interface provides a subset of the OCaml [Stdlib.Hashtbl] module with some changes: - The functorial interface of the [Stdlib.Hashtbl] is not provided. Instead the constructor functions, {!create}, {!of_seq}, and {!rebuild}, take an optional [HashedType] module as an argument. By default {!create} returns a randomized hash table. - The [add_seq] and [replace_seq] operations are not provided at all. - Non-instance specific operations related to randomization (e.g. [randomize], [is_randomized]) are not provided. - Non-instance specific operations related to hashing (e.g. [hash], [seeded_hash], [hash_param], [seeded_hash_param]) are not provided. Compositional versions of {!find}, {!to_seq}, {!to_seq_keys}, {!to_seq_values}, {!rebuild}, {!copy}, {!iter}, {!filter_map_inplace}, {!fold}, and {!stats} are not provided. Please note that the design is intentionally based on [Stdlib.Hashtbl] and copies its semantics as accurately as possible. Some of the operations come with warnings. The hash table implementation is designed to avoid starvation. Read-only accesses can generally proceed in parallel without interference. Write accesses that do not change the number of bindings can proceed in parallel as long as they hit different internal buckets. Write accesses that change the number of bindings use a scalable {!Accumulator} and only make infrequent random checks to determine whether the hash table should be resized. *) (** {1 Common interface} *) type (!'k, !'v) t (** The type of hash tables from type ['k] to type ['v]. *) type 'k hashed_type = (module Stdlib.Hashtbl.HashedType with type t = 'k) (** First-class [HashedType] module type abbreviation. *) val create : ?hashed_type:'k hashed_type -> ?min_buckets:int -> ?max_buckets:int -> unit -> ('k, 'v) t (** [create ()] returns a new empty hash table. - The default [hash] is computed as [Stdlib.Hashtbl.hash (Random.bits ())]. - The default [equal] is [(=)]. - The default [min_buckets] is unspecified and a given [min_buckets] may be adjusted by the implementation. - The default [max_buckets] is the minimum of [1 lsl 30] and suitably adjusted [Sys.max_array_length] and a given [max_buckets] may be adjusted by the implementation. Hash tables are automatically internally resized. *) val hashed_type_of : ('k, 'v) t -> 'k hashed_type (** [hashed_type_of t] returns a copy of the hashed type used when the hash table [t] was {!create}d. *) val min_buckets_of : ('k, 'v) t -> int (** [min_buckets_of t] returns the minimum number of buckets of the hash table [t]. {b NOTE}: The returned value may not be the same as given to {!create}. *) val max_buckets_of : ('k, 'v) t -> int (** [max_buckets_of t] returns the maximum number of buckets of the hash table [t]. {b NOTE}: The returned value may not be the same as given to {!create}. *) val of_seq : ?hashed_type:'k hashed_type -> ?min_buckets:int -> ?max_buckets:int -> ('k * 'v) Seq.t -> ('k, 'v) t (** [of_seq assoc] creates a new hash table from the given association sequence [assoc]. The associations are added in the same order as they appear in the sequence, using {!replace}, which means that if two pairs have the same key, only the latest one will appear in the table. See {!create} for the optional arguments. ⚠️ [of_seq (to_seq t)] does not necessarily copy the bindings of a hash table correctly. *) (** {1 Compositional interface} *) module Xt : Hashtbl_intf.Ops with type ('k, 'v) t := ('k, 'v) t with type ('x, 'fn) fn := xt:'x Xt.t -> 'fn (** Explicit transaction log passing on hash tables. *) (** {1 Non-compositional interface} *) include Hashtbl_intf.Ops with type ('k, 'v) t := ('k, 'v) t with type ('x, 'fn) fn := 'fn val find : ('k, 'v) t -> 'k -> 'v (** [find t k] returns the current binding of [k] in hash table [t], or raises [Not_found] if no such binding exists. *) val to_seq : ('k, 'v) t -> ('k * 'v) Seq.t (** [to_seq t] takes a snapshot of the keys and values in the hash table and returns them as an association sequence. Bindings of each individual key appear in the sequence in reverse order of their introduction. ⚠️ [of_seq (to_seq t)] does not necessarily copy the bindings of a hash table correctly. *) val to_seq_keys : ('k, 'v) t -> 'k Seq.t (** [to_seq_keys t] is equivalent to [to_seq t |> Seq.map fst]. ⚠️ The sequence may include duplicates. *) val to_seq_values : ('k, 'v) t -> 'v Seq.t (** [to_seq_values t] is equivalent to [to_seq t |> Seq.map snd]. ⚠️ The sequence may include values of bindings that are hidden. *) val rebuild : ?hashed_type:'k hashed_type -> ?min_buckets:int -> ?max_buckets:int -> ('k, 'v) t -> ('k, 'v) t (** [rebuild t] returns a copy of the given hash table [t] optionally rehashing all of the bindings. See {!create} for descriptions of the optional arguments. Unlike {!create}, [rebuild] uses the given hash table [t] as a template to get defaults for the optional arguments. *) val copy : ('k, 'v) t -> ('k, 'v) t (** [copy t] is equivalent to [rebuild t]. In other words, the returned hash table uses the same {!hashed_type} (and other parameters) as the given hash table [t]. *) val iter : ('a -> 'b -> unit) -> ('a, 'b) t -> unit (** [iter f t] is equivalent to [Seq.iter (fun (k, v) -> f k v) (to_seq t)]. *) val filter_map_inplace : ('k -> 'v -> 'v option) -> ('k, 'v) t -> unit (** [filter_map_inplace f t] applies [f] to all bindings in the hash table [t] and updates each binding depending on the result of [f]. If [f] returns [None], the binding is discarded. If [f] returns [Some new_value], the binding is updated to associate the key to the [new_value]. ⚠️ The given [f] may be called multiple times for the same bindings from multiple domains in parallel. *) val fold : ('a -> 'b -> 'c -> 'c) -> ('a, 'b) t -> 'c -> 'c (** [fold f t a] is equivalent to [Seq.fold_left (fun a (k, v) -> f k v a) a (to_seq t)]. *) val stats : ('a, 'b) t -> Stdlib.Hashtbl.statistics (** [stats t] returns statistics about the hash table [t]. *) kcas-0.7.0/src/kcas_data/hashtbl_intf.ml000066400000000000000000000035061456672623200201600ustar00rootroot00000000000000module type Ops = sig type ('k, 'v) t type ('x, 'fn) fn val length : ('x, ('k, 'v) t -> int) fn (** [length t] returns the number of {i bindings} in the hash table [t]. ⚠️ The returned value may be greater than the number of {i distinct keys} in the hash table. *) val reset : ('x, ('k, 'v) t -> unit) fn (** [reset t] remove all bindings from the hash table [t] and shrinks the capacity of the table back to the minimum. *) val clear : ('x, ('k, 'v) t -> unit) fn (** [clear] is a synonym for {!reset}. *) val swap : ('x, ('k, 'v) t -> ('k, 'v) t -> unit) fn (** [swap t1 t2] exchanges the contents of the hash tables [t1] and [t2]. *) val remove : ('x, ('k, 'v) t -> 'k -> unit) fn (** [remove t k] removes the most recent existing binding of key [k], if any, from the hash table [t] thereby revealing the earlier binding of [k], if any. *) val add : ('x, ('k, 'v) t -> 'k -> 'v -> unit) fn (** [add t k v] adds a binding of key [k] to value [v] to the hash table shadowing the previous binding of the key [k], if any. ⚠️ Consider using {!replace} instead of [add]. *) val replace : ('x, ('k, 'v) t -> 'k -> 'v -> unit) fn (** [replace t k v] adds a binding of key [k] to value [v] or replaces the most recent existing binding of key [k] in the hash table [t]. *) val mem : ('x, ('k, 'v) t -> 'k -> bool) fn (** [mem t k] is equivalent to [Option.is_some (find_opt t k)]. *) val find_opt : ('x, ('k, 'v) t -> 'k -> 'v option) fn (** [find_opt t k] returns the current binding of key [k] in the hash table [t], or [None] if no such binding exists. *) val find_all : ('x, ('k, 'v) t -> 'k -> 'v list) fn (** [find_all t k] returns a list of all the bindings of the key [k] in the hash table in reverse order of their introduction. *) end kcas-0.7.0/src/kcas_data/kcas_data.ml000066400000000000000000000002471456672623200174240ustar00rootroot00000000000000module Hashtbl = Hashtbl module Queue = Queue module Stack = Stack module Mvar = Mvar module Promise = Promise module Dllist = Dllist module Accumulator = Accumulator kcas-0.7.0/src/kcas_data/kcas_data.mli000066400000000000000000000127701456672623200176010ustar00rootroot00000000000000(** This is a library of compositional lock-free data structures and primitives for communication and synchronization implemented using {!Kcas}. All data structure implementations in this library are concurrency and parallelism safe and should strive to provide the following guarantees: - Provided operations are {i strictly serializable} (i.e. both {{:https://en.wikipedia.org/wiki/Linearizability}linerizable} and {{:https://en.wikipedia.org/wiki/Serializability}serializable}). - Provided operations are efficient, either ({{:https://en.wikipedia.org/wiki/Amortized_analysis}amortized}) constant time, [O(1)], or logarithmic time, [O(log(n))]. - Provided operations are {{:https://en.wikipedia.org/wiki/Non-blocking_algorithm#Lock-freedom}lock-free} and designed to avoid {{:https://en.wikipedia.org/wiki/Starvation_(computer_science)}starvation} under moderate contention. - Provided read-only operations scale perfectly when only read-only operations are performed in parallel. Unobvious exceptions to the above guarantees should be clearly and explicitly documented. The main feature of these data structure implementations is their compositionality. If your application does not need compositionality, then other concurrency and parallelism safe data structure libraries may potentially offer better performance. But why should you care about composability? As an example, consider the implementation of a least-recently-used (LRU) cache or a bounded associative map, but first, let's open the libraries for convenience: {[ open Kcas open Kcas_data ]} A simple sequential approach to implement a LRU cache is to use a hash table and a doubly-linked list and keep track of the amount of space in the cache: {[ type ('k, 'v) cache = { space: int Loc.t; table: ('k, 'k Dllist.node * 'v) Hashtbl.t; order: 'k Dllist.t } ]} On a cache lookup the doubly-linked list node corresponding to the accessed key is moved to the left end of the list: {[ let get_opt {table; order; _} key = Hashtbl.find_opt table key |> Option.map @@ fun (node, datum) -> Dllist.move_l node order; datum ]} On a cache update, in case of overflow, the association corresponding to the node on the right end of the list is dropped: {[ let set {table; order; space; _} key datum = let node = match Hashtbl.find_opt table key with | None -> if 0 = Loc.update space (fun n -> max 0 (n-1)) then Dllist.take_opt_r order |> Option.iter (Hashtbl.remove table); Dllist.add_l key order | Some (node, _) -> Dllist.move_l node order; node in Hashtbl.replace table key (node, datum) ]} Sequential algorithms such as the above are so common that one does not even think about them. Unfortunately, in a concurrent setting the above doesn't work even if the individual operations on lists and hash tables were atomic as they are in this library. But how would one make the operations on a cache atomic as a whole? As explained by Maurice Herlihy in one of his talks on {{:https://youtu.be/ZkUrl8BZHjk?t=1503} Transactional Memory} adding locks to protect the atomicity of the operation is far from trivial. Fortunately, rather than having to e.g. wrap the cache implementation behind a {{:https://en.wikipedia.org/wiki/Lock_(computer_science)} mutex} and make another individually atomic yet uncomposable data structure, or having to learn a completely different programming model and rewrite the cache implementation, we can use the transactional programming model provided by the {!Kcas} library and the transactional data structures provided by this library to trivially convert the previous implementation to a lock-free composable transactional data structure. To make it so, we simply use transactional versions, [*.Xt.*], of operations on the data structures and explicitly pass a transaction log, [~xt], to the operations. For the [get_opt] operation we end up with {[ let get_opt ~xt {table; order; _} key = Hashtbl.Xt.find_opt ~xt table key |> Option.map @@ fun (node, datum) -> Dllist.Xt.move_l ~xt node order; datum ]} and the [set] operation is just as easy to convert to a transactional version. One way to think about transactions is that they give us back the ability to compose programs such as the above. *) (** {1 [Stdlib] style data structures} The data structures in this section are designed to closely mimic the corresponding unsynchronized data structures in the OCaml [Stdlib]. Each of these provide a non-compositional, but concurrency and parallelism safe, interface that is close to the [Stdlib] equivalent. Additionally, compositional transactional interfaces are provided for some operations. These implementations will use more space than the corresponding [Stdlib] data structures. Performance, when accessed concurrently, should be competitive or superior compared to naïve locking. *) module Hashtbl = Hashtbl module Queue = Queue module Stack = Stack (** {1 Communication and synchronization primitives} *) module Mvar = Mvar module Promise = Promise (** {1 Linked data structures} *) module Dllist = Dllist (** {1 Utilities} *) module Accumulator = Accumulator kcas-0.7.0/src/kcas_data/magic_option.ml000066400000000000000000000011061456672623200201550ustar00rootroot00000000000000open Kcas type 'a t = 'a let none = ref () let none = Obj.magic none external some : 'a -> 'a t = "%identity" let[@inline] is_none x = x == none let[@inline] is_some x = x != none let[@inline] get_or_retry x = if is_none x then Retry.later () else x let[@inline] put_or_retry v x = if is_none x then some v else Retry.later () let[@inline] take_or_retry x = if is_none x then Retry.later () else none external get_unsafe : 'a t -> 'a = "%identity" let[@inline] to_option x = if is_none x then None else Some x let[@inline] of_option = function None -> none | Some x -> some x kcas-0.7.0/src/kcas_data/magic_option.mli000066400000000000000000000005561456672623200203360ustar00rootroot00000000000000(** Unboxed option using a unique block to identify {!none}. *) type !'a t val none : 'a t val some : 'a -> 'a t val is_none : 'a t -> bool val is_some : 'a t -> bool val get_or_retry : 'a t -> 'a val put_or_retry : 'a -> 'a t -> 'a t val take_or_retry : 'a t -> 'a t val get_unsafe : 'a t -> 'a val to_option : 'a t -> 'a option val of_option : 'a option -> 'a t kcas-0.7.0/src/kcas_data/mvar.ml000066400000000000000000000025601456672623200164570ustar00rootroot00000000000000open Kcas type 'a t = 'a Magic_option.t Loc.t let create x_opt = Loc.make ~padded:true (Magic_option.of_option x_opt) module Xt = struct let is_empty ~xt mv = Magic_option.is_none (Xt.get ~xt mv) let try_put ~xt mv value = Magic_option.is_none (Xt.compare_and_swap ~xt mv Magic_option.none (Magic_option.some value)) let put ~xt mv value = Xt.modify ~xt mv (Magic_option.put_or_retry value) let take_opt ~xt mv = Magic_option.to_option (Xt.exchange ~xt mv Magic_option.none) let take ~xt mv = Magic_option.get_unsafe (Xt.update ~xt mv Magic_option.take_or_retry) let peek ~xt mv = Magic_option.get_or_retry (Xt.get ~xt mv) let peek_opt ~xt mv = Magic_option.to_option (Xt.get ~xt mv) end let is_empty mv = Magic_option.is_none (Loc.get mv) let put ?timeoutf mv value = (* Fenceless is safe as we always update. *) Loc.fenceless_modify ?timeoutf mv (Magic_option.put_or_retry value) let try_put mv value = Loc.compare_and_set mv Magic_option.none (Magic_option.some value) let take ?timeoutf mv = (* Fenceless is safe as we always update. *) Magic_option.get_unsafe (Loc.fenceless_update ?timeoutf mv Magic_option.take_or_retry) let take_opt mv = Magic_option.to_option (Loc.exchange mv Magic_option.none) let peek ?timeoutf mv = Loc.get_as ?timeoutf Magic_option.get_or_retry mv let peek_opt mv = Magic_option.to_option (Loc.get mv) kcas-0.7.0/src/kcas_data/mvar.mli000066400000000000000000000023061456672623200166260ustar00rootroot00000000000000open Kcas (** Synchronizing variable. A synchronizing variable is essentially equivalent to a ['a option Loc.t] with blocking semantics on both {!take} and {!put}. {b NOTE}: The current implementation is not guaranteed to be fair or scalable. In other words, when multiple producers block on {!put} or multiple consumers block on {!take} the operations are not queued and it is possible for a particular producer or consumer to starve. *) (** {1 Common interface} *) type !'a t (** The type of a synchronizing variable that may contain a value of type ['a]. *) val create : 'a option -> 'a t (** [create x_opt] returns a new synchronizing variable that will either be empty when [x_opt] is [None] or full when [x_opt] is [Some x]. *) (** {1 Compositional interface} *) module Xt : Mvar_intf.Ops with type 'a t := 'a t with type ('x, 'fn) fn := xt:'x Xt.t -> 'fn with type ('x, 'fn) blocking_fn := xt:'x Xt.t -> 'fn (** Explicit transaction passing on synchronizing variables. *) (** {1 Non-compositional interface} *) include Mvar_intf.Ops with type 'a t := 'a t with type ('x, 'fn) fn := 'fn with type ('x, 'fn) blocking_fn := ?timeoutf:float -> 'fn kcas-0.7.0/src/kcas_data/mvar_intf.ml000066400000000000000000000025341456672623200175000ustar00rootroot00000000000000module type Ops = sig type 'a t type ('x, 'fn) fn type ('x, 'fn) blocking_fn val is_empty : ('x, 'a t -> bool) fn (** [is_empty mv] determines whether the synchronizing variable [mv] contains a value or not. *) val put : ('x, 'a t -> 'a -> unit) blocking_fn (** [put mv x] fills the synchronizing variable [mv] with the value [v] or blocks until the variable becomes empty. *) val try_put : ('x, 'a t -> 'a -> bool) fn (** [try_put mv x] tries to fill the synchronizing variable [mv] with the value [v] and returns [true] on success or [false] in case the variable is full. *) val take : ('x, 'a t -> 'a) blocking_fn (** [take mv] removes and returns the current value of the synchronizing variable [mv] or blocks waiting until the variable is filled. *) val take_opt : ('x, 'a t -> 'a option) fn (** [take_opt mv] removes and returns the current value of the synchronizing variable [mv] or returns [None] in case the variable is empty. *) val peek : ('x, 'a t -> 'a) blocking_fn (** [peek mv] returns the current value of the synchronizing variable [mv] or blocks waiting until the variable is filled. *) val peek_opt : ('x, 'a t -> 'a option) fn (** [peek_opt mv] returns the current value of the synchronizing variable [mv] or returns [None] in case the variable is empty. *) end kcas-0.7.0/src/kcas_data/promise.ml000066400000000000000000000033741456672623200171740ustar00rootroot00000000000000open Kcas type 'a internal = 'a Magic_option.t Loc.t type !+'a t type !-'a u type 'a or_exn = ('a, exn) Stdlib.result t external to_promise : 'a internal -> 'a t = "%identity" external to_resolver : 'a internal -> 'a u = "%identity" external of_promise : 'a t -> 'a internal = "%identity" external of_resolver : 'a u -> 'a internal = "%identity" let create () = let p = Loc.make Magic_option.none in (to_promise p, to_resolver p) let create_resolved v = to_promise (Loc.make (Magic_option.some v)) let[@inline never] already_resolved () = invalid_arg "Can't resolve already-resolved promise" module Xt = struct let resolve ~xt u v = if Magic_option.is_some (Xt.compare_and_swap ~xt (of_resolver u) Magic_option.none (Magic_option.some v)) then already_resolved () let await ~xt t = Magic_option.get_or_retry (Xt.get ~xt (of_promise t)) let peek ~xt t = Magic_option.to_option (Xt.get ~xt (of_promise t)) let is_resolved ~xt t = Magic_option.is_some (Xt.get ~xt (of_promise t)) let await_exn ~xt t = match await ~xt t with Ok value -> value | Error exn -> raise exn let resolve_ok ~xt u v = resolve ~xt u (Ok v) let resolve_error ~xt u e = resolve ~xt u (Error e) end let await ?timeoutf t = Loc.get_as ?timeoutf Magic_option.get_or_retry (of_promise t) let resolve u v = if not (Loc.compare_and_set (of_resolver u) Magic_option.none (Magic_option.some v)) then already_resolved () let peek t = Magic_option.to_option (Loc.get (of_promise t)) let is_resolved t = Magic_option.is_some (Loc.get (of_promise t)) let await_exn ?timeoutf t = match await ?timeoutf t with Ok value -> value | Error exn -> raise exn let resolve_ok u v = resolve u (Ok v) let resolve_error u e = resolve u (Error e) kcas-0.7.0/src/kcas_data/promise.mli000066400000000000000000000027271456672623200173460ustar00rootroot00000000000000open Kcas (** A promise of a value to be resolved at some point in the future. Example: {[ # let promise, resolver = Promise.create () in let domain = Domain.spawn @@ fun () -> Printf.printf "Got %d\n%!" (Promise.await promise) in Promise.resolve resolver 42; Domain.join domain Got 42 - : unit = () ]} *) (** {1 Common interface} *) type !+'a t (** The type of a promise of a value of type ['a]. *) type !-'a u (** The type of a resolver of a value of type ['a]. *) type 'a or_exn = ('a, exn) Stdlib.result t (** The type of a promise of a result of type [('a, exn) result]. *) val create : unit -> 'a t * 'a u (** [create ()] returns a new unresolved pair of a promise and a resolver for the promise. *) val create_resolved : 'a -> 'a t (** [create_resolved x] returns a promise that is already resolved to the given value [x]. *) (** {1 Compositional interface} *) module Xt : Promise_intf.Ops with type 'a t := 'a t with type 'a or_exn := 'a or_exn with type 'a u := 'a u with type ('x, 'fn) fn := xt:'x Xt.t -> 'fn with type ('x, 'fn) blocking_fn := xt:'x Xt.t -> 'fn (** Explicit transaction log passing on promises. *) (** {1 Non-compositional interface} *) include Promise_intf.Ops with type 'a t := 'a t with type 'a or_exn := 'a or_exn with type 'a u := 'a u with type ('x, 'fn) fn := 'fn with type ('x, 'fn) blocking_fn := ?timeoutf:float -> 'fn kcas-0.7.0/src/kcas_data/promise_intf.ml000066400000000000000000000023671456672623200202150ustar00rootroot00000000000000module type Ops = sig type !+'a t type !-'a u type 'a or_exn type ('x, 'fn) fn type ('x, 'fn) blocking_fn val resolve : ('x, 'a u -> 'a -> unit) fn (** [resolve u v] resolves the promise corresponding to the resolver [u] to the value [v]. Any awaiters of the corresponding promise are then unblocked. *) val await : ('x, 'a t -> 'a) blocking_fn (** [await t] either immediately returns the resolved value of the promise [t] or blocks until the promise [t] is resolved. *) val peek : ('x, 'a t -> 'a option) fn (** [peek t] immediately returns either the resolved value of the promise [t] or [None] in case the promise hasn't yet been resolved. *) val is_resolved : ('x, 'a t -> bool) fn (** [is_resolved t] determines whether the promise [t] has already been resolved. *) (** {2 Result promises} *) val await_exn : ('x, 'a or_exn -> 'a) blocking_fn (** [await_exn t] is equivalent to [match await t with v -> v | exception e -> raise e]. *) val resolve_ok : ('x, ('a, 'b) result u -> 'a -> unit) fn (** [resolve_ok u v] is equivalent to [resolve u (Ok v)]. *) val resolve_error : ('x, ('a, 'b) result u -> 'b -> unit) fn (** [resolve_error u e] is equivalent to [resolve u (Error e)]. *) end kcas-0.7.0/src/kcas_data/queue.ml000066400000000000000000000130211456672623200166300ustar00rootroot00000000000000open Kcas type 'a t = { front : 'a Elems.t Loc.t; middle : 'a Elems.t Loc.t; back : 'a Elems.t Loc.t; } let alloc ~front ~middle ~back = (* We allocate locations in specific order to make most efficient use of the splay-tree based transaction log. *) let front = Loc.make ~padded:true front and middle = Loc.make ~padded:true middle and back = Loc.make ~padded:true back in Multicore_magic.copy_as_padded { back; middle; front } let create () = alloc ~front:Elems.empty ~middle:Elems.empty ~back:Elems.empty let copy q = let tx ~xt = (Xt.get ~xt q.front, Xt.get ~xt q.middle, Xt.get ~xt q.back) in let front, middle, back = Xt.commit { tx } in alloc ~front ~middle ~back module Xt = struct let is_empty ~xt t = (* We access locations in order of allocation to make most efficient use of the splay-tree based transaction log. *) Xt.get ~xt t.front == Elems.empty && Xt.get ~xt t.middle == Elems.empty && Xt.get ~xt t.back == Elems.empty let length ~xt { back; middle; front } = Elems.length (Xt.get ~xt front) + Elems.length (Xt.get ~xt middle) + Elems.length (Xt.get ~xt back) let add ~xt x q = Xt.modify ~xt q.back @@ Elems.cons x let push = add (** Cooperative helper to move elems from back to middle. *) let back_to_middle ~middle ~back = let tx ~xt = let xs = Xt.exchange ~xt back Elems.empty in if xs == Elems.empty || Xt.exchange ~xt middle xs != Elems.empty then raise_notrace Exit in try Xt.commit { tx } with Exit -> () let take_opt_finish ~xt front elems = let elems = Elems.rev elems in Xt.set ~xt front (Elems.tl_safe elems); Elems.hd_opt elems let take_opt ~xt t = let front = t.front in let elems = Xt.update ~xt front Elems.tl_safe in if elems != Elems.empty then Elems.hd_opt elems else let middle = t.middle and back = t.back in if not (Xt.is_in_log ~xt middle || Xt.is_in_log ~xt back) then back_to_middle ~middle ~back; let elems = Xt.exchange ~xt middle Elems.empty in if elems != Elems.empty then take_opt_finish ~xt front elems else let elems = Xt.exchange ~xt back Elems.empty in if elems != Elems.empty then take_opt_finish ~xt front elems else None let take_blocking ~xt q = Xt.to_blocking ~xt (take_opt q) let peek_opt_finish ~xt front elems = let elems = Elems.rev elems in Xt.set ~xt front elems; Elems.hd_opt elems let peek_opt ~xt t = let front = t.front in let elems = Xt.get ~xt front in if elems != Elems.empty then Elems.hd_opt elems else let middle = t.middle and back = t.back in if not (Xt.is_in_log ~xt middle || Xt.is_in_log ~xt back) then back_to_middle ~middle ~back; let elems = Xt.exchange ~xt middle Elems.empty in if elems != Elems.empty then peek_opt_finish ~xt front elems else let elems = Xt.exchange ~xt back Elems.empty in if elems != Elems.empty then peek_opt_finish ~xt front elems else None let peek_blocking ~xt q = Xt.to_blocking ~xt (peek_opt q) let clear ~xt t = Xt.set ~xt t.front Elems.empty; Xt.set ~xt t.middle Elems.empty; Xt.set ~xt t.back Elems.empty let swap ~xt q1 q2 = let front = Xt.get ~xt q1.front and middle = Xt.get ~xt q1.middle and back = Xt.get ~xt q1.back in let front = Xt.exchange ~xt q2.front front and middle = Xt.exchange ~xt q2.middle middle and back = Xt.exchange ~xt q2.back back in Xt.set ~xt q1.front front; Xt.set ~xt q1.middle middle; Xt.set ~xt q1.back back let seq_of ~front ~middle ~back = (* Sequence construction is lazy, so this function is O(1). *) Seq.empty |> Elems.rev_prepend_to_seq back |> Elems.rev_prepend_to_seq middle |> Elems.prepend_to_seq front let to_seq ~xt t = let front = Xt.get ~xt t.front and middle = Xt.get ~xt t.middle and back = Xt.get ~xt t.back in seq_of ~front ~middle ~back let take_all ~xt t = let front = Xt.exchange ~xt t.front Elems.empty and middle = Xt.exchange ~xt t.middle Elems.empty and back = Xt.exchange ~xt t.back Elems.empty in seq_of ~front ~middle ~back end let is_empty q = Kcas.Xt.commit { tx = Xt.is_empty q } let length q = Kcas.Xt.commit { tx = Xt.length q } let add x q = (* Fenceless is safe as we always update. *) Loc.fenceless_modify q.back @@ Elems.cons x let push = add let take_opt q = (* Fenceless is safe as we revert to a transaction in case we didn't update. *) match Loc.fenceless_update q.front Elems.tl_safe |> Elems.hd_opt with | None -> Kcas.Xt.commit { tx = Xt.take_opt q } | some -> some let take_blocking ?timeoutf q = (* Fenceless is safe as we revert to a transaction in case we didn't update. *) match Loc.fenceless_update q.front Elems.tl_safe |> Elems.hd_opt with | None -> Kcas.Xt.commit ?timeoutf { tx = Xt.take_blocking q } | Some elem -> elem let take_all q = Kcas.Xt.commit { tx = Xt.take_all q } let peek_opt q = match Loc.get q.front |> Elems.hd_opt with | None -> Kcas.Xt.commit { tx = Xt.peek_opt q } | some -> some let peek_blocking ?timeoutf q = Kcas.Xt.commit ?timeoutf { tx = Xt.peek_blocking q } let clear q = Kcas.Xt.commit { tx = Xt.clear q } let swap q1 q2 = Kcas.Xt.commit { tx = Xt.swap q1 q2 } let to_seq q = Kcas.Xt.commit { tx = Xt.to_seq q } let iter f q = Seq.iter f @@ to_seq q let fold f a q = Seq.fold_left f a @@ to_seq q exception Empty let[@inline] of_option = function None -> raise Empty | Some value -> value let peek s = peek_opt s |> of_option let top = peek let take s = take_opt s |> of_option kcas-0.7.0/src/kcas_data/queue.mli000066400000000000000000000035331456672623200170100ustar00rootroot00000000000000open Kcas (** First-In First-Out (FIFO) queue. The interface provides a subset of the OCaml [Stdlib.Queue] module. [transfer] and [add_seq] are not provided at all. Compositional versions of {!iter}, {!fold}, {!peek}, {!top}, and {!take} are not provided. The queue implementation is designed to avoid contention between a producer and a consumer operating concurrently. The implementation is also designed to avoid starvation. Performance in most concurrent use cases should be superior to what can be achieved with one or two locks. *) (** {1 Common interface} *) type !'a t (** The type of queues containing elements of type ['a]. *) exception Empty (** Raised when {!take} or {!peek} is applied to an empty queue. *) val create : unit -> 'a t (** [create ()] returns a new empty queue. *) val copy : 'a t -> 'a t (** [copy q] returns a copy of the queue [q]. *) (** {1 Compositional interface} *) module Xt : Queue_intf.Ops with type 'a t := 'a t with type ('x, 'fn) fn := xt:'x Xt.t -> 'fn with type ('x, 'fn) blocking_fn := xt:'x Xt.t -> 'fn (** Explicit transaction log passing on queues. *) (** {1 Non-compositional interface} *) include Queue_intf.Ops with type 'a t := 'a t with type ('x, 'fn) fn := 'fn with type ('x, 'fn) blocking_fn := ?timeoutf:float -> 'fn val peek : 'a t -> 'a (** [peek q] returns the first element in queue [s], or raises {!Empty} if the queue is empty. *) val top : 'a t -> 'a (** [top] is a synonym for {!peek}. *) val take : 'a t -> 'a (** [take s] removes and returns the first element in queue [q], or raises {!Empty} if the queue is empty. *) val iter : ('a -> unit) -> 'a t -> unit (** [iter f s] is equivalent to [Seq.iter f (to_seq s)]. *) val fold : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b (** [fold f s] is equivalent to [Seq.fold_left f a (to_seq s)]. *) kcas-0.7.0/src/kcas_data/queue_intf.ml000066400000000000000000000037241456672623200176610ustar00rootroot00000000000000module type Ops = sig type 'a t type ('x, 'fn) fn type ('x, 'fn) blocking_fn val is_empty : ('x, 'a t -> bool) fn (** [is_empty s] determines whether the queue [q] is empty. *) val length : ('x, 'a t -> int) fn (** [length q] returns the length of the queue [q]. *) val clear : ('x, 'a t -> unit) fn (** [clear q] removes all elements from the queue [q]. *) val swap : ('x, 'a t -> 'a t -> unit) fn (** [swap q1 q2] exchanges the contents of the queues [q1] and [q2]. *) val to_seq : ('x, 'a t -> 'a Seq.t) fn (** [to_seq s] returns a concurrency and parallelism safe sequence for iterating through the elements of the queue front to back. The sequence is based on a constant time, [O(1)], snapshot of the queue and modifications of the queue have no effect on the sequence. *) val add : ('x, 'a -> 'a t -> unit) fn (** [add x q] adds the element [x] at the end of the queue [q]. *) val push : ('x, 'a -> 'a t -> unit) fn (** [push] is a synonym for {!add}. *) val peek_opt : ('x, 'a t -> 'a option) fn (** [peek_opt q] returns the first element in queue [q], without removing it from the queue, or returns [None] if the queue is empty. *) val peek_blocking : ('x, 'a t -> 'a) blocking_fn (** [peek_blocking q] returns the first element in queue [q], without removing it from the queue, or blocks waiting for the queue to become non-empty. *) val take_blocking : ('x, 'a t -> 'a) blocking_fn (** [take_blocking q] removes and returns the first element in queue [q], or blocks waiting for the queue to become non-empty. *) val take_opt : ('x, 'a t -> 'a option) fn (** [take_opt q] removes and returns the first element in queue [q], or returns [None] if the queue is empty. *) val take_all : ('x, 'a t -> 'a Seq.t) fn (** [take_all q] removes and returns a concurrency and parallelism safe sequence for iterating through all the elements that were in the queue front to back. *) end kcas-0.7.0/src/kcas_data/stack.ml000066400000000000000000000034071456672623200166200ustar00rootroot00000000000000open Kcas type 'a t = 'a Elems.t Loc.t let create () = Loc.make ~padded:true Elems.empty let copy s = Loc.make ~padded:true @@ Loc.get s let of_seq xs = Loc.make ~padded:true (Elems.of_seq_rev xs) module Xt = struct let length ~xt s = Xt.get ~xt s |> Elems.length let is_empty ~xt s = Xt.get ~xt s == Elems.empty let push ~xt x s = Xt.modify ~xt s @@ Elems.cons x let pop_opt ~xt s = Xt.update ~xt s Elems.tl_safe |> Elems.hd_opt let pop_all ~xt s = Elems.to_seq @@ Xt.exchange ~xt s Elems.empty let pop_blocking ~xt s = Xt.update ~xt s Elems.tl_safe |> Elems.hd_or_retry let top_opt ~xt s = Xt.get ~xt s |> Elems.hd_opt let top_blocking ~xt s = Xt.get ~xt s |> Elems.hd_or_retry let clear ~xt s = Xt.set ~xt s Elems.empty let swap ~xt s1 s2 = Xt.swap ~xt s1 s2 let to_seq ~xt s = Elems.to_seq @@ Xt.get ~xt s end let length s = Loc.get s |> Elems.length let is_empty s = Loc.get s == Elems.empty let push x s = (* Fenceless is safe as we always update. *) Loc.fenceless_modify s @@ Elems.cons x let pop_opt s = Loc.update s Elems.tl_safe |> Elems.hd_opt let pop_all s = Loc.exchange s Elems.empty |> Elems.to_seq let pop_blocking ?timeoutf s = (* Fenceless is safe as we always update. *) Loc.fenceless_update ?timeoutf s Elems.tl_or_retry |> Elems.hd_unsafe let top_opt s = Loc.get s |> Elems.hd_opt let top_blocking ?timeoutf s = Loc.get_as ?timeoutf Elems.hd_or_retry s let clear s = Loc.set s Elems.empty let swap s1 s2 = Kcas.Xt.commit { tx = Kcas.Xt.swap s1 s2 } let to_seq s = Elems.to_seq @@ Loc.get s let iter f s = Elems.iter f @@ Loc.get s let fold f a s = Elems.fold f a @@ Loc.get s exception Empty let[@inline] of_option = function None -> raise Empty | Some value -> value let top s = top_opt s |> of_option let pop s = pop_opt s |> of_option kcas-0.7.0/src/kcas_data/stack.mli000066400000000000000000000033551456672623200167730ustar00rootroot00000000000000open Kcas (** Last-In First-Out (LIFO) stack. The interface provides a subset of the OCaml [Stdlib.Stack] module. [add_seq] is not provided at all. Compositional versions of {!iter}, {!fold}, {!pop}, and {!top} are not provided. The implementation is essentially a {{:https://en.wikipedia.org/wiki/Treiber_stack}Treiber stack} with randomized exponential backoff and support for constant time {!length}. *) (** {1 Common interface} *) type !'a t (** The type of stacks containing elements of type ['a]. *) exception Empty (** Raised when {!pop} or {!top} is applied to an empty stack. *) val create : unit -> 'a t (** [create ()] returns a new empty stack. *) val copy : 'a t -> 'a t (** [copy s] returns a copy of the stack [s]. *) val of_seq : 'a Seq.t -> 'a t (** [of_seq xs] creates a stack from the sequence [xs]. *) (** {1 Compositional interface} *) module Xt : Stack_intf.Ops with type 'a t := 'a t with type ('x, 'fn) fn := xt:'x Xt.t -> 'fn with type ('x, 'fn) blocking_fn := xt:'x Xt.t -> 'fn (** Explicit transaction log passing on stacks. *) (** {1 Non-compositional interface} *) include Stack_intf.Ops with type 'a t := 'a t with type ('x, 'fn) fn := 'fn with type ('x, 'fn) blocking_fn := ?timeoutf:float -> 'fn val pop : 'a t -> 'a (** [pop s] removes and returns the topmost element in stack [s], or raises {!Empty} if the stack is empty. *) val top : 'a t -> 'a (** [top s] returns the topmost element in stack [s], or raises {!Empty} if the stack is empty. *) val iter : ('a -> unit) -> 'a t -> unit (** [iter f s] is equivalent to [Seq.iter f (to_seq s)]. *) val fold : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b (** [fold f s] is equivalent to [Seq.fold_left f a (to_seq s)]. *) kcas-0.7.0/src/kcas_data/stack_intf.ml000066400000000000000000000034611456672623200176400ustar00rootroot00000000000000module type Ops = sig type 'a t type ('x, 'fn) fn type ('x, 'fn) blocking_fn val is_empty : ('x, 'a t -> bool) fn (** [is_empty s] determines whether the stack [s] is empty. *) val length : ('x, 'a t -> int) fn (** [length s] returns the length of the stack [s]. *) val clear : ('x, 'a t -> unit) fn (** [clear s] removes all elements from the stack [s]. *) val swap : ('x, 'a t -> 'a t -> unit) fn (** [swap s1 s2] exchanges the contents of the stacks [s1] and [s2]. *) val to_seq : ('x, 'a t -> 'a Seq.t) fn (** [to_seq s] returns a concurrency and parallelism safe sequence for iterating through the elements of the stack top to bottom. The sequence is based on a constant time, [O(1)], snapshot of the stack and modifications of the stack have no effect on the sequence. *) val push : ('x, 'a -> 'a t -> unit) fn (** [push x s] adds the element [x] to the top of the stack [s]. *) val pop_opt : ('x, 'a t -> 'a option) fn (** [pop_opt s] removes and returns the topmost element of the stack [s], or [None] if the stack is empty. *) val pop_all : ('x, 'a t -> 'a Seq.t) fn (** [pop_all s] removes and returns a concurrency and parallelism safe sequence for iterating through all the elements that were in the stack top to bottom. *) val pop_blocking : ('x, 'a t -> 'a) blocking_fn (** [pop_blocking s] removes and returns the topmost element of the stack [s], or blocks waiting for the queue to become non-empty. *) val top_opt : ('x, 'a t -> 'a option) fn (** [top_opt s] returns the topmost element in stack [s], or [None] if the stack is empty. *) val top_blocking : ('x, 'a t -> 'a) blocking_fn (** [top_blocking s] returns the topmost element in stack [s], or blocks waiting for the queue to become non-empty. *) end kcas-0.7.0/test/000077500000000000000000000000001456672623200134335ustar00rootroot00000000000000kcas-0.7.0/test/kcas/000077500000000000000000000000001456672623200143545ustar00rootroot00000000000000kcas-0.7.0/test/kcas/barrier.ml000066400000000000000000000003421456672623200163330ustar00rootroot00000000000000type t = { counter : int Atomic.t; total : int } let make total = { counter = Atomic.make 0; total } let await { counter; total } = Atomic.incr counter; while Atomic.get counter < total do Domain.cpu_relax () done kcas-0.7.0/test/kcas/barrier.mli000066400000000000000000000000621456672623200165030ustar00rootroot00000000000000type t val make : int -> t val await : t -> unit kcas-0.7.0/test/kcas/dune000066400000000000000000000002421456672623200152300ustar00rootroot00000000000000(tests (names test ms_queue_test threads loc_modes) (libraries alcotest kcas domain-local-timeout threads.posix unix domain_shims) (package kcas)) kcas-0.7.0/test/kcas/loc_modes.ml000066400000000000000000000045411456672623200166560ustar00rootroot00000000000000open Kcas let loop_count = try int_of_string Sys.argv.(1) with _ -> Util.iter_factor let mode = Some (try if Sys.argv.(2) = "obstruction-free" then `Obstruction_free else `Lock_free with _ -> `Lock_free) (* Number of shared counters being used to try to cause interference *) (* Number of private accumulators used for extra work *) let n_counters = try int_of_string Sys.argv.(3) with _ -> 2 let n_accumulators = try int_of_string Sys.argv.(4) with _ -> 2 let sleep_time = try int_of_string Sys.argv.(5) with _ -> 85 (* Set to true when the accumulator work is done and counter threads may exit. This way we ensure that the counter threads are causing interference for the whole duration of the test. *) let exit = ref false (* Counters are first initialized with a dummy location *) let counters = let dummy_location_to_be_replaced = Loc.make 0 in Array.make n_counters dummy_location_to_be_replaced (* Barrier used to synchronize counter threads and the accumulator thread *) let barrier = Barrier.make (n_counters + 1) let counter_thread i () = (* We allocate actual counter locations within the domain to avoid false sharing *) let counter = Loc.make ?mode 0 in counters.(i) <- counter; let tx ~xt = Xt.incr ~xt counter in Barrier.await barrier; while not !exit do (* Increment the accumulator to cause interference *) Xt.commit { tx }; (* Delay for a bit. If we don't delay enough, we can starve the accumulator. *) for _ = 1 to sleep_time do Domain.cpu_relax () done done let accumulator_thread () = (* Accumulators allocated in the domain to avoid false sharing *) let accumulators = Array.init n_accumulators (fun _ -> Loc.make 0) in let tx ~xt = (* Compute sum of counters - these accesses can be interfered with *) let sum_of_counters = Array.fold_left (fun sum counter -> sum + Xt.get ~xt counter) 0 counters in (* And do some other work (updating accumulators) *) Array.iter (fun accumulator -> Xt.fetch_and_add ~xt accumulator sum_of_counters |> ignore) accumulators in Barrier.await barrier; for _ = 1 to loop_count do Xt.commit { tx } done; exit := true let () = accumulator_thread :: List.init n_counters counter_thread |> List.map Domain.spawn |> List.iter Domain.join; Printf.printf "Loc modes OK!\n%!" kcas-0.7.0/test/kcas/ms_queue_test.ml000066400000000000000000000066411456672623200175770ustar00rootroot00000000000000open Kcas module Q = struct type 'a node = Nil | Node of 'a * 'a node Loc.t type 'a queue = { head : 'a node Loc.t Loc.t; tail : 'a node Loc.t Atomic.t } let queue () = let next = Loc.make Nil in { head = Loc.make next; tail = Atomic.make next } let try_dequeue ~xt { head; _ } = let old_head = Xt.get ~xt head in match Xt.get ~xt old_head with | Nil -> None | Node (value, next) -> Xt.set ~xt head next; Some value let enqueue ~xt { tail; _ } value = let new_tail = Loc.make Nil in let new_node = Node (value, new_tail) in let rec find_and_set_tail old_tail = match Xt.compare_and_swap ~xt old_tail Nil new_node with | Nil -> () | Node (_, old_tail) -> find_and_set_tail old_tail in find_and_set_tail (Atomic.get tail); let rec fix_tail () = let old_tail = Atomic.get tail in if Loc.get new_tail == Nil && not (Atomic.compare_and_set tail old_tail new_tail) then fix_tail () in Xt.post_commit ~xt fix_tail let check_tail { tail; _ } = Loc.get (Atomic.get tail) == Nil end let failure exit msg = Atomic.set exit true; Printf.printf "%s\n%!" msg; failwith msg let write_skew_test n = let q1 = Q.queue () and q2 = Q.queue () in let push_to_q2 ~xt = match Q.try_dequeue ~xt q1 with None -> Q.enqueue ~xt q2 42 | Some _ -> () and push_to_q1 ~xt = match Q.try_dequeue ~xt q2 with None -> Q.enqueue ~xt q1 24 | Some _ -> () and clear ~xt = (Q.try_dequeue ~xt q1, Q.try_dequeue ~xt q2) in let barrier = Atomic.make 3 in let sync () = Atomic.decr barrier; while Atomic.get barrier != 0 do Domain.cpu_relax () done in let exit = Atomic.make false in let domains = [ Domain.spawn (fun () -> sync (); while not (Atomic.get exit) do Xt.commit { tx = push_to_q1 } done); Domain.spawn (fun () -> sync (); while not (Atomic.get exit) do Xt.commit { tx = push_to_q2 } done); ] in sync (); for _ = 1 to n do match Xt.commit { tx = clear } with | Some _, Some _ -> failure exit "write skew!" | _ -> () done; Atomic.set exit true; List.iter Domain.join domains let tail_leak_test n = let q = Q.queue () in let m = 2 in let exit = Atomic.make false and rounds = Array.init m @@ fun _ -> Atomic.make (n * 2) in let finished () = Array.exists (fun round -> Atomic.get round <= 0) rounds || Atomic.get exit and sync i = let n = Atomic.fetch_and_add rounds.(i) (-1) - 1 in while rounds |> Array.exists @@ fun round -> n < Atomic.get round do if Atomic.get exit then failwith "exit" done in let domain i () = try while not (finished ()) do sync i; Xt.commit { tx = Q.enqueue q 42 }; if None == Xt.commit { tx = Q.try_dequeue q } then failure exit "impossible!"; sync i; if not (Q.check_tail q) then failure exit "tail leak!" done with e -> Atomic.set exit true; raise e in List.init m domain |> List.map Domain.spawn |> List.iter Domain.join let () = let n = try int_of_string Sys.argv.(1) with _ -> 1 * Util.iter_factor in Alcotest.run "MS queue" [ ( "write skew", [ Alcotest.test_case "" `Quick (fun () -> write_skew_test n) ] ); ( "tail leak", [ Alcotest.test_case "" `Quick (fun () -> tail_leak_test n) ] ); ] kcas-0.7.0/test/kcas/test.ml000066400000000000000000000441601456672623200156720ustar00rootroot00000000000000(*--------------------------------------------------------------------------- Copyright (c) 2016 KC Sivaramakrishnan. All rights reserved. Distributed under the ISC license, see terms at the end of the file. %%NAME%% %%VERSION%% ---------------------------------------------------------------------------*) (* ######## Copyright (c) 2017, Nicolas ASSOUAD ######## *) (*--------------------------------------------------------------------------- Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ---------------------------------------------------------------------------*) let is_single = Domain.recommended_domain_count () = 1 open Kcas let nb_iter = 100 * Util.iter_factor let assert_kcas loc expected_v = let present_v = Loc.get loc in assert (present_v == expected_v) let run_domains = function | [] -> () | main :: others -> let others = List.map Domain.spawn others in main (); List.iter Domain.join others (* *) let test_non_linearizable_xt () = [ `Obstruction_free; `Lock_free ] |> List.iter @@ fun mode -> let barrier = Barrier.make 2 and n_iter = 100 * Util.iter_factor and test_finished = ref false in let a = Loc.make ~mode 0 and b = Loc.make ~mode 0 in let cass1a ~xt = (Xt.get ~xt b == 0 && Xt.compare_and_set ~xt a 0 1) || Retry.invalid () and cass1b ~xt = (Xt.get ~xt b == 0 && Xt.compare_and_set ~xt a 1 0) || Retry.invalid () and cass2a ~xt = (Xt.compare_and_set ~xt b 0 1 && Xt.get ~xt a == 0) || Retry.invalid () and cass2b ~xt = (Xt.compare_and_set ~xt b 1 0 && Xt.get ~xt a == 0) || Retry.invalid () in let atomically tx = if Random.bool () then Xt.commit ~mode:`Obstruction_free tx else Xt.commit tx in let thread1 () = Barrier.await barrier; while not !test_finished do if atomically { tx = cass1a } then while not (atomically { tx = cass1b }) do if is_single then Domain.cpu_relax (); assert (Loc.get a == 1 && Loc.get b == 0) done else if is_single then Domain.cpu_relax () done and thread2 () = Barrier.await barrier; for _ = 1 to n_iter do if atomically { tx = cass2a } then while not (atomically { tx = cass2b }) do if is_single then Domain.cpu_relax (); assert (Loc.get a == 0 && Loc.get b == 1) done else if is_single then Domain.cpu_relax () done; test_finished := true in run_domains [ thread2; thread1 ] (* *) let test_set () = let a = Loc.make 0 in assert_kcas a 0; Loc.set a 1; assert_kcas a 1 (* *) let test_no_skew_xt () = [ `Obstruction_free; `Lock_free ] |> List.iter @@ fun mode -> let barrier = Barrier.make 3 in let test_finished = Atomic.make false in let a1 = Loc.make ~mode 0 in let a2 = Loc.make ~mode 0 in let thread1 () = let c1 ~xt = Xt.compare_and_set ~xt a1 0 1 && Xt.compare_and_set ~xt a2 0 1 in let c2 ~xt = Xt.compare_and_set ~xt a1 1 0 && Xt.compare_and_set ~xt a2 1 0 in Barrier.await barrier; for _ = 1 to nb_iter do assert_kcas a1 0; assert_kcas a2 0; let out1 = Xt.commit { tx = c1 } in assert out1; assert_kcas a1 1; assert_kcas a2 1; let out2 = Xt.commit { tx = c2 } in assert out2 done; Atomic.set test_finished true and thread2 () = let c1 ~xt = Xt.get ~xt a1 == 0 && Xt.get ~xt a2 == 1 in let c2 ~xt = Xt.get ~xt a2 == 1 && Xt.get ~xt a2 == 0 in Barrier.await barrier; while not (Atomic.get test_finished) do let out1 = Xt.commit { tx = c1 } in let out2 = Xt.commit { tx = c2 } in assert (not out1); assert (not out2); if is_single then Domain.cpu_relax () done and thread3 () = let c1 ~xt = Xt.get ~xt a1 == 1 && Xt.get ~xt a2 == 0 in let c2 ~xt = Xt.get ~xt a2 == 0 && Xt.get ~xt a2 == 1 in Barrier.await barrier; while not (Atomic.get test_finished) do let out1 = Xt.commit { tx = c1 } in let out2 = Xt.commit { tx = c2 } in assert (not out1); assert (not out2); if is_single then Domain.cpu_relax () done in run_domains [ thread1; thread2; thread3 ] (* *) let test_get_seq_xt () = [ `Obstruction_free; `Lock_free ] |> List.iter @@ fun mode -> let barrier = Barrier.make 4 in let test_finished = Atomic.make false in let a1 = Loc.make ~mode 0 in let a2 = Loc.make ~mode 0 in let mutator () = Barrier.await barrier; for _ = 0 to nb_iter do let tx ~xt = Xt.incr ~xt a1; Xt.incr ~xt a2 in Xt.commit { tx } done; Atomic.set test_finished true and getter () = Barrier.await barrier; while not (Atomic.get test_finished) do let a = Loc.get a1 in let b = Loc.get a2 in assert (a <= b); if is_single then Domain.cpu_relax () done and getaser () = Barrier.await barrier; while not (Atomic.get test_finished) do let a = Loc.get_as Fun.id a1 in let b = Loc.get_as Fun.id a2 in assert (a <= b); if is_single then Domain.cpu_relax () done and committer () = Barrier.await barrier; while not (Atomic.get test_finished) do let a = Xt.commit { tx = Xt.get a1 } in let b = Xt.commit { tx = Xt.get a2 } in assert (a <= b); if is_single then Domain.cpu_relax () done and updater () = Barrier.await barrier; while not (Atomic.get test_finished) do let a = Loc.update a1 Fun.id in let b = Loc.update a2 Fun.id in assert (a <= b); if is_single then Domain.cpu_relax () done in run_domains [ mutator; getter; getaser; committer; updater ] (* *) let test_stress_xt n nb_loop = [ `Obstruction_free; `Lock_free ] |> List.iter @@ fun mode -> let make_loc n = let rec loop n out = if n > 0 then loop (n - 1) (Loc.make ~mode 0 :: out) else out in loop n [] and make_kcas0 ~xt r_l = let rec loop ~xt r_l = match r_l with | h :: t -> Xt.compare_and_set ~xt h 0 1 && loop ~xt t | [] -> true in loop ~xt r_l and make_kcas1 ~xt r_l = let rec loop ~xt r_l = match r_l with | h :: t -> Xt.compare_and_set ~xt h 1 0 && loop ~xt t | [] -> true in loop ~xt r_l in let r_l = make_loc n in let kcas0 ~xt = make_kcas0 ~xt r_l in let kcas1 ~xt = make_kcas1 ~xt r_l in for _ = 1 to nb_loop do assert (Xt.commit { tx = kcas0 }); assert (Xt.commit { tx = kcas1 }) done (* *) (** Various tests make accesses in random order to exercise the internal splay tree based transaction log handling. *) let in_place_shuffle array = let n = Array.length array in for i = 0 to n - 2 do let j = Random.int (n - i) + i in let t = array.(i) in array.(i) <- array.(j); array.(j) <- t done (* *) let test_presort_and_is_in_log_xt () = let n_incs = 10 * Util.iter_factor and n_domains = 3 and n_locs = 12 in let n_locs_half = n_locs asr 1 in let barrier = Barrier.make n_domains in let locs = Array.init n_locs (fun _ -> Loc.make 0) in let thread () = let locs = Array.copy locs in Random.self_init (); Barrier.await barrier; for _ = 1 to n_incs do in_place_shuffle locs; let tx ~xt = for i = 0 to n_locs_half - 1 do Xt.incr ~xt locs.(i) done; assert (Xt.is_in_log ~xt locs.(Random.int n_locs_half)); assert (not (Xt.is_in_log ~xt locs.(n_locs_half))) in Xt.commit { tx } done in run_domains (List.init n_domains (Fun.const thread)); let sum = locs |> Array.map Loc.get |> Array.fold_left ( + ) 0 in assert (sum = n_incs * n_locs_half * n_domains) (* *) let test_updates () = let x = Loc.make 0 in assert (Loc.fetch_and_add x 1 = 0); assert (Loc.get x = 1); Loc.incr x; assert (Loc.get x = 2); Loc.set x 1; assert (Loc.get x = 1); Loc.decr x; assert (Loc.get x = 0); assert (Loc.exchange x 1 = 0) (* *) let test_post_commit () = let attempt_with_post_commit ~expect { Xt.tx } = let count = ref 0 in let tx ~xt = tx ~xt; Xt.post_commit ~xt @@ fun () -> incr count in begin try count := 0; Xt.commit ~mode:`Obstruction_free { tx } with Exit -> () end; assert (!count = expect) in let tx ~xt:_ = raise Exit in attempt_with_post_commit ~expect:0 { tx }; let tx ~xt:_ = () in attempt_with_post_commit ~expect:1 { tx }; let a = Loc.make 0 and b = Loc.make 0 in attempt_with_post_commit ~expect:1 { tx = Xt.modify a Fun.id }; attempt_with_post_commit ~expect:1 { tx = Xt.incr a }; let tx ~xt = Xt.set ~xt a (Xt.exchange ~xt b 0) in attempt_with_post_commit ~expect:1 { tx } (* *) let test_blocking () = let state = Loc.make `Spawned in let await state' = (* Intentionally test that [Xt.modify] allows retry. *) let tx ~xt = Xt.modify ~xt state @@ fun state -> Retry.unless (state == state'); state in Xt.commit { tx } in let a = Loc.make 0 and bs = Array.init 10 @@ fun _ -> Loc.make 0 in let n = 10 * Util.iter_factor in let num_attempts = ref 0 in let other_domain = Domain.spawn @@ fun () -> Loc.set state `Get_a_non_zero; Loc.get_as (fun a -> incr num_attempts; Retry.unless (a != 0)) a; Loc.set state `Update_a_zero; Loc.modify a (fun a -> incr num_attempts; Retry.unless (a = 0); a + 1); Loc.set state `Set_1_b_to_0; let bs = Array.copy bs in for _ = 1 to n do (* We access in random order to exercise tx log waiters handling. *) in_place_shuffle bs; let tx ~xt = incr num_attempts; match bs |> Array.find_map @@ fun b -> if Xt.get ~xt b = 1 then Some b else if Loc.has_awaiters b (* There must be no leaked waiters... *) then begin (* ...except if main domain just set the loc *) assert (Loc.get b = 1); Retry.later () end else None with | None -> Retry.later () | Some b -> Xt.set ~xt b 0 in Xt.commit { tx } done in await `Get_a_non_zero; Loc.set a 1; assert (!num_attempts <= 2 + 1 (* Need to account for race to next. *)); await `Update_a_zero; Loc.set a 0; assert (!num_attempts <= 4 + 1 (* Need to account for race to next. *)); await `Set_1_b_to_0; for _ = 1 to n do let i = Random.int (Array.length bs) in Loc.set bs.(i) 1; Loc.get_as (fun b -> Retry.unless (b = 0)) bs.(i) done; Domain.join other_domain; assert (!num_attempts <= 4 + (n * 2)); for i = 0 to Array.length bs - 1 do assert (not (Loc.has_awaiters bs.(i))) done let test_no_unnecessary_wakeups () = let continue = Loc.make false and tries = Atomic.make 0 in let other_domain = Domain.spawn @@ fun () -> continue |> Loc.get_as @@ fun s -> Atomic.incr tries; Retry.unless s in while not (Loc.has_awaiters continue) do Domain.cpu_relax () done; assert (Loc.compare_and_set continue false false); assert (not (Loc.update continue Fun.id)); Loc.set continue false; Unix.sleepf 0.01; assert (Loc.has_awaiters continue && Atomic.get tries = 1); Loc.set continue true; Domain.join other_domain; assert ((not (Loc.has_awaiters continue)) && Atomic.get tries = 2) (* *) let test_periodic_validation () = let a = Loc.make 0 and b = Loc.make 0 and looping = ref false in let non_zero_difference_domain = Domain.spawn @@ fun () -> let rec tx ~xt = let d = Xt.get ~xt a - Xt.get ~xt b in if d <> 0 then d else begin (* We explicitly want this tx to go into infinite loop! Without validation this would never finish. *) looping := true; tx ~xt end in Xt.commit { tx } in while not !looping do Domain.cpu_relax () done; Loc.set a 1; assert (1 = Domain.join non_zero_difference_domain) (* *) let test_explicit_validation () = let a = Loc.make 0 and b = Loc.make 0 in let exit = ref false and mutator_running = ref false in let mutator_domain = Domain.spawn @@ fun () -> mutator_running := true; while not !exit do let tx ~xt = Xt.decr ~xt a; Xt.incr ~xt b in Xt.commit { tx }; Domain.cpu_relax () done in let n = 100 in while not !mutator_running do Domain.cpu_relax () done; for _ = 1 to n do let tx ~xt = let a' = Xt.get ~xt a and b' = Xt.get ~xt b in Xt.validate ~xt a; assert (a' + b' = 0) in Xt.commit { tx } done; exit := true; Domain.join mutator_domain (* *) let test_rollback () = let n_iter = 10 * Util.iter_factor in let n_locs = 20 in let locs = Loc.make_array n_locs 0 in let accum = ref 0 in for _ = 1 to n_iter do let n_permanent = Random.int n_locs in let n_rollbacks = Random.int n_locs in let expected = ref false in let unexpected = ref false in let tx ~xt = in_place_shuffle locs; for i = 0 to n_permanent - 1 do Xt.incr ~xt locs.(i) done; Xt.post_commit ~xt (fun () -> expected := true); let snap = Xt.snapshot ~xt in in_place_shuffle locs; for i = 0 to n_rollbacks - 1 do Xt.incr ~xt locs.(i) done; Xt.post_commit ~xt (fun () -> unexpected := true); Xt.rollback ~xt snap in Xt.commit { tx }; assert !expected; assert (not !unexpected); accum := n_permanent + !accum done; let sum = Array.map Loc.get locs |> Array.fold_left ( + ) 0 in assert (!accum = sum) (* *) let test_call () = let never = Xt.{ tx = (fun ~xt:_ -> Retry.later ()) } in let result = Xt.commit { tx = Xt.first [ Xt.call never; (fun ~xt:_ -> 101) ] } in assert (result = 101) (* *) (** This is a non-deterministic test that might fail occasionally. *) let test_timeout () = Domain_local_timeout.set_system (module Thread) (module Unix); let check (op : ?timeoutf:float -> bool Loc.t -> unit) () = let rec loop n = let x = Loc.make false in let (_ : unit -> unit) = Domain_local_timeout.set_timeoutf 0.6 @@ fun () -> Loc.set x true in match op ~timeoutf:0.02 x with | () -> if 0 < n then loop (n - 1) else assert false | exception Timeout.Timeout -> op ~timeoutf:2.0 x in loop 10 in run_domains [ check (fun ?timeoutf x -> Loc.get_as ?timeoutf (fun x -> if not x then Retry.later ()) x); check (fun ?timeoutf x -> Loc.update ?timeoutf x (fun x -> x || Retry.later ()) |> ignore); check (fun ?timeoutf x -> Loc.modify ?timeoutf x (fun x -> x || Retry.later ())); check (fun ?timeoutf x -> let y = Loc.make false in let tx ~xt = if not (Xt.get ~xt x) then Retry.later (); Xt.swap ~xt x y in Xt.commit ?timeoutf { tx }); check (fun ?timeoutf x -> let y = Loc.make false in let tx ~xt = if not (Xt.get ~xt x) then Retry.invalid (); Xt.swap ~xt x y in Xt.commit ?timeoutf { tx }); ] (* *) let test_mode () = assert (Loc.get_mode (Loc.make ~mode:`Lock_free 0) == `Lock_free); assert (Loc.get_mode (Loc.make ~mode:`Obstruction_free 0) == `Obstruction_free); assert (Loc.get_mode (Loc.make 0) == `Obstruction_free) (* *) type _ _loc_is_injective = | Int : int _loc_is_injective | Loc : 'a _loc_is_injective -> 'a Loc.t _loc_is_injective (* *) let test_xt () = let rx = Loc.make 0 in let ry = Loc.make 1 in let tx ~xt = let y = Xt.get ~xt ry in Xt.set ~xt rx y; let x' = Xt.get ~xt rx in assert (x' = y) in Xt.commit { tx }; assert (Loc.get rx = Loc.get ry) let () = Alcotest.run "Kcas" [ ( "non linearizable xt", [ Alcotest.test_case "" `Quick test_non_linearizable_xt ] ); ("set", [ Alcotest.test_case "" `Quick test_set ]); ("no skew xt", [ Alcotest.test_case "" `Quick test_no_skew_xt ]); ("get seq xt", [ Alcotest.test_case "" `Quick test_get_seq_xt ]); ( "stress xt", [ Alcotest.test_case "" `Quick (fun () -> test_stress_xt (10 * Util.iter_factor) 1_0); ] ); ( "is_in_log", [ Alcotest.test_case "" `Quick test_presort_and_is_in_log_xt ] ); ("updates", [ Alcotest.test_case "" `Quick test_updates ]); ("post commit", [ Alcotest.test_case "" `Quick test_post_commit ]); ("blocking", [ Alcotest.test_case "" `Quick test_blocking ]); ( "no unnecessary wakeups", [ Alcotest.test_case "" `Quick test_no_unnecessary_wakeups ] ); ( "pediodic validation", [ Alcotest.test_case "" `Quick test_periodic_validation ] ); ( "explicit validation", [ Alcotest.test_case "" `Quick test_explicit_validation ] ); ("rollback", [ Alcotest.test_case "" `Quick test_rollback ]); ("call", [ Alcotest.test_case "" `Quick test_call ]); ("mode", [ Alcotest.test_case "" `Quick test_mode ]); ("xt", [ Alcotest.test_case "" `Quick test_xt ]); ( "timeout (non-deterministic)", [ Alcotest.test_case "" `Quick test_timeout ] ); ] kcas-0.7.0/test/kcas/threads.ml000066400000000000000000000007701456672623200163440ustar00rootroot00000000000000open Kcas let await_between_threads () = let x = Loc.make 0 in let y = Loc.make 0 in let a_thread = () |> Thread.create @@ fun () -> Loc.get_as (fun x -> Retry.unless (x <> 0)) x; Loc.set y 22 in Loc.set x 20; Loc.get_as (fun y -> Retry.unless (y <> 0)) y; Thread.join a_thread; assert (Loc.get x + Loc.get y = 42) let () = Alcotest.run "Threads" [ ( "await between threads", [ Alcotest.test_case "" `Quick await_between_threads ] ); ] kcas-0.7.0/test/kcas/util.ml000066400000000000000000000002641456672623200156650ustar00rootroot00000000000000let iter_factor = let factor b = if b then 10 else 1 in factor (64 <= Sys.word_size) * factor (Sys.backend_type = Native) * factor (1 < Domain.recommended_domain_count ()) kcas-0.7.0/test/kcas_data/000077500000000000000000000000001456672623200153455ustar00rootroot00000000000000kcas-0.7.0/test/kcas_data/accumulator_test_stm.ml000066400000000000000000000022771456672623200221500ustar00rootroot00000000000000open QCheck open STM open Kcas_data module Spec = struct type cmd = Incr | Decr | Get | Set of int let show_cmd = function | Incr -> "Incr" | Decr -> "Decr" | Get -> "Get" | Set v -> "Set " ^ string_of_int v type state = int type sut = Accumulator.t let arb_cmd _s = [ Gen.return Incr; Gen.return Decr; Gen.return Get; Gen.map (fun i -> Set i) Gen.nat; ] |> Gen.oneof |> make ~print:show_cmd let init_state = 0 let init_sut () = Accumulator.make 0 let cleanup _ = () let next_state c s = match c with Incr -> s + 1 | Decr -> s - 1 | Get -> s | Set v -> v let precond _ _ = true let run c d = match c with | Incr -> Res (unit, Accumulator.incr d) | Decr -> Res (unit, Accumulator.decr d) | Get -> Res (int, Accumulator.get d) | Set v -> Res (unit, Accumulator.set d v) let postcond c (s : state) res = match (c, res) with | Incr, Res ((Unit, _), ()) -> true | Decr, Res ((Unit, _), ()) -> true | Set _, Res ((Unit, _), ()) -> true | Get, Res ((Int, _), res) -> res = s | _, _ -> false end let () = Stm_run.run ~count:1000 ~verbose:true ~name:"Accumulator" (module Spec) |> exit kcas-0.7.0/test/kcas_data/dllist_test.ml000066400000000000000000000042251456672623200202340ustar00rootroot00000000000000open Kcas_data let[@tail_mod_cons] rec take_as_list take l = match take l with None -> [] | Some x -> x :: take_as_list take l let basics () = let t1 = Dllist.create () in let t1' = Dllist.take_all t1 in assert (Dllist.to_list_r t1 = [] && Dllist.to_list_l t1' = []); Dllist.transfer_r t1' t1'; Dllist.add_r 2 t1' |> ignore; Dllist.move_r (Dllist.create_node 3) t1'; Dllist.swap t1' t1'; Dllist.add_l 1 t1' |> ignore; Dllist.transfer_r t1' t1'; let t1 = Dllist.take_all t1' in assert (Dllist.to_list_l t1' = [] && Dllist.to_list_r t1 = [ 3; 2; 1 ]); let t2 = Dllist.create () in Dllist.transfer_r t2 t1; Dllist.transfer_l t2 t1; Dllist.swap t1 t2; Dllist.swap t1 t2; Dllist.transfer_l t2 t2; Dllist.add_r 4 t2 |> ignore; Dllist.swap t1 t2; Dllist.swap t1 t2; Dllist.transfer_l t2 t2; Dllist.transfer_l t1 t2; Dllist.transfer_l t1 t2; Dllist.swap t1 t2; assert (Dllist.take_opt_l t2 = None); assert (Dllist.take_opt_l t2 = None); assert (take_as_list Dllist.take_opt_r t1 = [ 4; 3; 2; 1 ]) let add () = let l = Dllist.create () in Dllist.add_l 1 l |> ignore; Dllist.add_l 3 l |> ignore; Dllist.add_r 4 l |> ignore; assert (take_as_list Dllist.take_opt_l l = [ 3; 1; 4 ]) let move () = let t1 = Dllist.create () in let n1 = Dllist.add_l 5.3 t1 in Dllist.move_l n1 t1; assert (Dllist.to_list_l t1 = [ 5.3 ]); Dllist.move_r n1 t1; assert (Dllist.to_list_l t1 = [ 5.3 ]); let n2 = Dllist.add_l 5.2 t1 in assert (Dllist.to_list_l t1 = [ 5.2; 5.3 ]); Dllist.move_r n2 t1; assert (Dllist.to_list_l t1 = [ 5.3; 5.2 ]); Dllist.move_l n2 t1; assert (Dllist.to_list_l t1 = [ 5.2; 5.3 ]); let t2 = Dllist.create () in Dllist.move_l n1 t2; assert (Dllist.to_list_l t1 = [ 5.2 ]); assert (Dllist.to_list_l t2 = [ 5.3 ]); Dllist.move_r n2 t2; assert (Dllist.to_list_l t2 = [ 5.3; 5.2 ]); Dllist.move_l n1 t1; assert (Dllist.to_list_l t2 = [ 5.2 ]); assert (Dllist.to_list_l t1 = [ 5.3 ]) let () = Alcotest.run "Dllist" [ ("basics", [ Alcotest.test_case "" `Quick basics ]); ("add", [ Alcotest.test_case "" `Quick add ]); ("move", [ Alcotest.test_case "" `Quick move ]); ] kcas-0.7.0/test/kcas_data/dllist_test_stm.ml000066400000000000000000000044071456672623200211210ustar00rootroot00000000000000open QCheck open STM open Kcas_data module Spec = struct type cmd = Add_l of int | Take_opt_l | Add_r of int | Take_opt_r let show_cmd = function | Add_l x -> "Add_l " ^ string_of_int x | Take_opt_l -> "Take_opt_l" | Add_r x -> "Add_r " ^ string_of_int x | Take_opt_r -> "Take_opt_r" module State = struct type t = int list * int list let push_l x (l, r) = (x :: l, r) let push_r x (l, r) = (l, x :: r) let drop_l (l, r) = match l with | _ :: l -> (l, r) | [] -> begin match List.rev r with [] -> ([], []) | _ :: l -> (l, []) end let drop_r (l, r) = match r with | _ :: r -> (l, r) | [] -> begin match List.rev l with [] -> ([], []) | _ :: r -> ([], r) end let peek_opt_l (l, r) = match l with | x :: _ -> Some x | [] -> begin match List.rev r with x :: _ -> Some x | [] -> None end let peek_opt_r (l, r) = match r with | x :: _ -> Some x | [] -> begin match List.rev l with x :: _ -> Some x | [] -> None end end type state = State.t type sut = int Dllist.t let arb_cmd _s = [ Gen.int |> Gen.map (fun x -> Add_l x); Gen.return Take_opt_l; Gen.int |> Gen.map (fun x -> Add_r x); Gen.return Take_opt_r; ] |> Gen.oneof |> make ~print:show_cmd let init_state = ([], []) let init_sut () = Dllist.create () let cleanup _ = () let next_state c s = match c with | Add_l x -> State.push_l x s | Take_opt_l -> State.drop_l s | Add_r x -> State.push_r x s | Take_opt_r -> State.drop_r s let precond _ _ = true let run c d = match c with | Add_l x -> Res (unit, Dllist.add_l x d |> ignore) | Take_opt_l -> Res (option int, Dllist.take_opt_l d) | Add_r x -> Res (unit, Dllist.add_r x d |> ignore) | Take_opt_r -> Res (option int, Dllist.take_opt_r d) let postcond c (s : state) res = match (c, res) with | Add_l _x, Res ((Unit, _), ()) -> true | Take_opt_l, Res ((Option Int, _), res) -> res = State.peek_opt_l s | Add_r _x, Res ((Unit, _), ()) -> true | Take_opt_r, Res ((Option Int, _), res) -> res = State.peek_opt_r s | _, _ -> false end let () = Stm_run.run ~count:1000 ~verbose:true ~name:"Dllist" (module Spec) |> exit kcas-0.7.0/test/kcas_data/dune000066400000000000000000000013321456672623200162220ustar00rootroot00000000000000(rule (enabled_if %{lib-available:qcheck-stm.domain}) (action (copy stm_run.ocaml5.ml stm_run.ml))) (rule (enabled_if (not %{lib-available:qcheck-stm.domain})) (action (copy stm_run.ocaml4.ml stm_run.ml))) (tests (names accumulator_test_stm dllist_test dllist_test_stm hashtbl_test hashtbl_test_stm linearizable_chaining_example lru_cache_example mvar_test queue_test queue_test_stm stack_test stack_test_stm xt_test) (libraries alcotest kcas kcas_data domain_shims qcheck-core qcheck-core.runner qcheck-stm.stm qcheck-stm.sequential qcheck-stm.thread (select empty.ml from (qcheck-stm.domain -> empty.ocaml5.ml) (-> empty.ocaml4.ml))) (package kcas_data)) kcas-0.7.0/test/kcas_data/empty.ocaml4.ml000066400000000000000000000000001456672623200202010ustar00rootroot00000000000000kcas-0.7.0/test/kcas_data/empty.ocaml5.ml000066400000000000000000000000001456672623200202020ustar00rootroot00000000000000kcas-0.7.0/test/kcas_data/hashtbl_test.ml000066400000000000000000000061361456672623200203710ustar00rootroot00000000000000open Kcas open Kcas_data let replace_and_remove () = let t = Hashtbl.create () in let n = try int_of_string Sys.argv.(1) with _ -> 10 * Util.iter_factor in for i = 1 to n do Hashtbl.replace t i i done; assert (Hashtbl.length t = n); assert (Seq.fold_left (fun n _ -> n + 1) 0 (Hashtbl.to_seq t) = n); for i = 1 to n do assert (Hashtbl.find t i = i) done; for i = 1 to n do Hashtbl.remove t i done; assert (Hashtbl.length t = 0) let large_tx () = let t = Hashtbl.create () in let n = 1_000 in let tx ~xt = for i = 1 to n do Hashtbl.Xt.replace ~xt t i i done in Xt.commit { tx }; assert (Seq.fold_left (fun n _ -> n + 1) 0 (Hashtbl.to_seq t) = n); let tx ~xt = for i = 1 to n do assert (Hashtbl.Xt.find_opt ~xt t i = Some i) done in Xt.commit { tx } let large_ops () = let t = Hashtbl.create () in Hashtbl.add t "key" 1; Hashtbl.add t "key" 2; Hashtbl.add t "key" 3; assert ( Hashtbl.fold (fun k v kvs -> (k, v) :: kvs) t [] = [ ("key", 1); ("key", 2); ("key", 3) ]); let stats = Hashtbl.stats t in assert (stats.num_bindings = 3); assert (stats.num_buckets > 0); assert (stats.max_bucket_length = 3); assert (stats.bucket_histogram.(3) = 1); assert (Hashtbl.find_all t "key" = [ 3; 2; 1 ]); let t' = Hashtbl.copy t in assert (Hashtbl.find_all t' "key" = [ 3; 2; 1 ]); let t' = Hashtbl.rebuild ~hashed_type:(Hashtbl.hashed_type_of t) t in assert (Hashtbl.find_all t' "key" = [ 3; 2; 1 ]); assert ( Hashtbl.to_seq t |> List.of_seq = [ ("key", 3); ("key", 2); ("key", 1) ]); let u = Hashtbl.to_seq t |> Hashtbl.of_seq in Hashtbl.swap t u; assert (Hashtbl.find t "key" = 1); assert (Hashtbl.find u "key" = 3); Hashtbl.filter_map_inplace (fun _ v -> if v = 1 then None else Some (-v)) u; assert (Hashtbl.find_all u "key" = [ -3; -2 ]); Hashtbl.swap u t; assert (Hashtbl.length t = 2); (match Hashtbl.filter_map_inplace (fun _ v -> if v = -2 then raise Exit else None) t with | _ -> assert false | exception Exit -> ()); assert (Hashtbl.find_all t "key" = [ -3; -2 ]); assert (Hashtbl.length t = 2) let basics () = let t = Hashtbl.create () in assert (Hashtbl.length t = 0); Hashtbl.replace t "foo" 101; Hashtbl.remove t "bar"; assert (Hashtbl.length t = 1); Hashtbl.replace t "bar" 19; assert (Hashtbl.mem t "foo"); assert (not (Hashtbl.mem t "bal")); Hashtbl.replace t "foo" 76; assert (Hashtbl.length t = 2); assert (Hashtbl.find_opt t "lol" = None); assert ( Hashtbl.to_seq t |> List.of_seq |> List.sort compare = [ ("bar", 19); ("foo", 76) ]); Hashtbl.remove t "foo"; assert (Hashtbl.length t = 1); assert (Hashtbl.to_seq t |> List.of_seq |> List.sort compare = [ ("bar", 19) ]); Hashtbl.reset t; assert (not (Hashtbl.mem t "nope")) let () = Alcotest.run "Hashtbl" [ ("replace and remove", [ Alcotest.test_case "" `Quick replace_and_remove ]); ("large tx", [ Alcotest.test_case "" `Quick large_tx ]); ("large ops", [ Alcotest.test_case "" `Quick large_ops ]); ("basics", [ Alcotest.test_case "" `Quick basics ]); ] kcas-0.7.0/test/kcas_data/hashtbl_test_stm.ml000066400000000000000000000033511456672623200212500ustar00rootroot00000000000000open QCheck open STM open Kcas_data module Spec = struct type cmd = Add of int | Mem of int | Remove of int | Clear | Length let show_cmd = function | Add x -> "Add " ^ string_of_int x | Mem x -> "Mem " ^ string_of_int x | Remove x -> "Remove " ^ string_of_int x | Clear -> "Clear" | Length -> "Length" type state = int list type sut = (int, unit) Hashtbl.t let arb_cmd _s = [ Gen.int_bound 10 |> Gen.map (fun x -> Add x); Gen.int_bound 10 |> Gen.map (fun x -> Mem x); Gen.int_bound 10 |> Gen.map (fun x -> Remove x); Gen.return Clear; Gen.return Length; ] |> Gen.oneof |> make ~print:show_cmd let init_state = [] let init_sut () = Hashtbl.create () let cleanup _ = () let next_state c s = match c with | Add x -> x :: s | Mem _ -> s | Remove x -> let[@tail_mod_cons] rec drop_first = function | [] -> [] | x' :: xs -> if x = x' then xs else x' :: drop_first xs in drop_first s | Clear -> [] | Length -> s let precond _ _ = true let run c d = match c with | Add x -> Res (unit, Hashtbl.add d x ()) | Mem x -> Res (bool, Hashtbl.mem d x) | Remove x -> Res (unit, Hashtbl.remove d x) | Clear -> Res (unit, Hashtbl.clear d) | Length -> Res (int, Hashtbl.length d) let postcond c (s : state) res = match (c, res) with | Add _x, Res ((Unit, _), ()) -> true | Mem x, Res ((Bool, _), res) -> res = List.exists (( = ) x) s | Remove _x, Res ((Unit, _), ()) -> true | Clear, Res ((Unit, _), ()) -> true | Length, Res ((Int, _), res) -> res = List.length s | _, _ -> false end let () = Stm_run.run ~count:1000 ~verbose:true ~name:"Hashtbl" (module Spec) |> exit kcas-0.7.0/test/kcas_data/linearizable_chaining_example.ml000066400000000000000000000145761456672623200237300ustar00rootroot00000000000000(** This demonstrates an approach to composing non-blocking linearizable data structures inspired by the paper Concurrent Size by Gal Sela and Erez Petrank https://arxiv.org/pdf/2209.07100.pdf First a [Hashtbl] is implemented that allows [idempotent_add] and [idempotent_remove] operations to be specified. The hash table makes sure that any operations that might witness the addition or the removal of a key will perform those operations before returning. Then a [Hashtbl_with_order] is implemented on top of the [Hashtbl] by specifying the [idempotent_add] and [idempotent_remove] operation such that they update a lock-free doubly-linked list to maintain a list of the keys in the hash table in insertion [order]. In other words, we composed a hash table with a doubly-linked list, both lock-free and linearizable, resulting in a lock-free linearizable hash table that maintains the insertion order. Finally a STM tests is used test against linearizability violations. Note that this technique does not require the use of Kcas or software transactional memory, but Kcas makes it easy to demonstrate the technique, because it makes it easy to implement idempotent non-blocking operations based on existing non-blocking data structures, such as the doubly-linked list used in this example. *) open Kcas open Kcas_data module type Hashtbl_base = sig type (!'k, !'v) t val find_opt : ('k, 'v) t -> 'k -> 'v option val add : ('k, 'v) t -> 'k -> 'v -> bool val remove : ('k, 'v) t -> 'k -> bool end module Hashtbl : sig include Hashtbl_base val create : ?idempotent_add:('k -> 'v -> ('k, 'v) t -> unit) -> ?idempotent_remove:('k -> 'v -> ('k, 'v) t -> unit) -> unit -> ('k, 'v) t end = struct type ('k, 'v) t = { idempotent_add : 'k -> 'v -> ('k, 'v) t -> unit; idempotent_remove : 'k -> 'v -> ('k, 'v) t -> unit; hashtbl : ('k, ('k, 'v) value) Hashtbl.t; } and ('k, 'v) value = | Add of { event : ('k, 'v) t -> unit; value : 'v } | Remove of { event : ('k, 'v) t -> unit } let create ?(idempotent_add = fun _ _ _ -> ()) ?(idempotent_remove = fun _ _ _ -> ()) () = let hashtbl = Hashtbl.create () in { idempotent_add; idempotent_remove; hashtbl } let find_opt t key = match Hashtbl.find_opt t.hashtbl key with | None -> None | Some (Add r) -> r.event t; Some r.value | Some (Remove r) -> r.event t; None let add t key value = let event = t.idempotent_add key value in let value = Add { event; value } in let tx ~xt = begin match Hashtbl.Xt.find_opt ~xt t.hashtbl key with | None -> true | Some (Add r) -> r.event t; false | Some (Remove r) -> r.event t; true end && begin Hashtbl.Xt.replace ~xt t.hashtbl key value; true end in Xt.commit { tx } && begin event t; true end let remove t key = let tx ~xt = begin match Hashtbl.Xt.find_opt ~xt t.hashtbl key with | None -> false | Some (Add r) -> r.event t; let event = t.idempotent_remove key r.value in let value = Remove { event } in Hashtbl.Xt.replace ~xt t.hashtbl key value; true | Some (Remove r) -> r.event t; false end in Xt.commit { tx } && let tx ~xt = match Hashtbl.Xt.find_opt ~xt t.hashtbl key with | None -> () | Some (Add _) -> () | Some (Remove r) -> r.event t; Hashtbl.Xt.remove ~xt t.hashtbl key in Xt.commit { tx }; true end module Hashtbl_with_order : sig include Hashtbl_base val create : unit -> ('k, 'v) t val order : ('k, 'v) t -> 'k list end = struct type ('k, 'v) t = { table : ('k, 'k Dllist.node * 'v) Hashtbl.t; order : 'k Dllist.t; } let create () = let order = Dllist.create () in let idempotent_add _key (node, _value) = let node = Loc.make (Some node) in let tx ~xt = match Xt.exchange ~xt node None with | None -> () | Some node -> Dllist.Xt.move_l ~xt node order in fun _table -> Xt.commit { tx } in let idempotent_remove _key (node, _value) = let node = Loc.make (Some node) in let tx ~xt = match Xt.exchange ~xt node None with | None -> () | Some node -> Dllist.Xt.remove ~xt node in fun _table -> Xt.commit { tx } in let table = Hashtbl.create ~idempotent_add ~idempotent_remove () in { table; order } let find_opt t key = Hashtbl.find_opt t.table key |> Option.map (fun (_, v) -> v) let add t key value = Hashtbl.add t.table key (Dllist.create_node key, value) let remove t key = Hashtbl.remove t.table key let order t = Dllist.to_list_l t.order end module Spec = struct type cmd = Add of int | Remove of int | Order let show_cmd = function | Add key -> "Add " ^ string_of_int key | Remove key -> "Remove " ^ string_of_int key | Order -> "Order" type state = int list type sut = (int, unit) Hashtbl_with_order.t let arb_cmd _s = QCheck.( [ (* Generate keys in small range so that [remove] hits. *) Gen.int_range 1 5 |> Gen.map (fun key -> Add key); Gen.int_range 1 5 |> Gen.map (fun key -> Remove key); Gen.return Order; ] |> Gen.oneof |> make ~print:show_cmd) let init_state = [] let init_sut () = Hashtbl_with_order.create () let cleanup _ = () let next_state c s = match c with | Add key -> if List.for_all (( != ) key) s then key :: s else s | Remove key -> List.filter (( != ) key) s | Order -> s let precond _ _ = true let run c d = let open STM in match c with | Add key -> Res (bool, Hashtbl_with_order.add d key ()) | Remove key -> Res (bool, Hashtbl_with_order.remove d key) | Order -> Res (list int, Hashtbl_with_order.order d) let postcond c (s : state) res = let open STM in match (c, res) with | Add key, Res ((Bool, _), res) -> res = List.for_all (( != ) key) s | Remove key, Res ((Bool, _), res) -> res = List.exists (( == ) key) s | Order, Res ((List Int, _), res) -> res = s | _, _ -> false end let () = Stm_run.run ~count:1000 ~verbose:true ~name:"Hashtbl_with_order" (module Spec) |> exit kcas-0.7.0/test/kcas_data/lru_cache.ml000066400000000000000000000041351456672623200176270ustar00rootroot00000000000000open Kcas open Kcas_data type ('k, 'v) t = { space : int Loc.t; table : ('k, 'k Dllist.node * 'v) Hashtbl.t; order : 'k Dllist.t; } let check_capacity capacity = if capacity < 0 then invalid_arg "Lru_cache: capacity must be non-negative" let create ?hashed_type capacity = check_capacity capacity; { space = Loc.make capacity; table = Hashtbl.create ?hashed_type (); order = Dllist.create (); } module Xt = struct let capacity_of ~xt c = Xt.get ~xt c.space + Hashtbl.Xt.length ~xt c.table let set_capacity ~xt c new_capacity = check_capacity new_capacity; let old_length = Hashtbl.Xt.length ~xt c.table in let old_space = Xt.get ~xt c.space in let old_capacity = old_space + old_length in for _ = 1 to old_length - new_capacity do Dllist.Xt.take_blocking_r ~xt c.order |> Hashtbl.Xt.remove ~xt c.table done; Xt.set ~xt c.space (Int.max 0 (old_space + new_capacity - old_capacity)) let get_opt ~xt c key = Hashtbl.Xt.find_opt ~xt c.table key |> Option.map @@ fun (node, datum) -> Dllist.Xt.move_l ~xt node c.order; datum let set_blocking ~xt c key datum = let node = match Hashtbl.Xt.find_opt ~xt c.table key with | None -> if 0 = Xt.update ~xt c.space (fun n -> Int.max 0 (n - 1)) then Dllist.Xt.take_blocking_r ~xt c.order |> Hashtbl.Xt.remove ~xt c.table; Dllist.Xt.add_l ~xt key c.order | Some (node, _) -> Dllist.Xt.move_l ~xt node c.order; node in Hashtbl.Xt.replace ~xt c.table key (node, datum) let remove ~xt c key = Hashtbl.Xt.find_opt ~xt c.table key |> Option.iter @@ fun (node, _) -> Hashtbl.Xt.remove ~xt c.table key; Dllist.Xt.remove ~xt node; Xt.incr ~xt c.space end let capacity_of c = Kcas.Xt.commit { tx = Xt.capacity_of c } let set_capacity c n = Kcas.Xt.commit { tx = Xt.set_capacity c n } let get_opt c k = Kcas.Xt.commit { tx = Xt.get_opt c k } let set_blocking ?timeoutf c k v = Kcas.Xt.commit ?timeoutf { tx = Xt.set_blocking c k v } let remove c k = Kcas.Xt.commit { tx = Xt.remove c k } kcas-0.7.0/test/kcas_data/lru_cache.mli000066400000000000000000000007101456672623200177730ustar00rootroot00000000000000open Kcas open Kcas_data type ('k, 'v) t val create : ?hashed_type:'k Hashtbl.hashed_type -> int -> ('k, 'v) t module Xt : Lru_cache_intf.Ops with type ('k, 'v) t := ('k, 'v) t with type ('x, 'fn) fn := xt:'x Xt.t -> 'fn with type ('x, 'fn) blocking_fn := xt:'x Xt.t -> 'fn include Lru_cache_intf.Ops with type ('k, 'v) t := ('k, 'v) t with type ('x, 'fn) fn := 'fn with type ('x, 'fn) blocking_fn := ?timeoutf:float -> 'fn kcas-0.7.0/test/kcas_data/lru_cache_example.ml000066400000000000000000000031021456672623200213330ustar00rootroot00000000000000open Kcas module Lru_cache = struct include Lru_cache module Xt = struct include Xt let get ~xt c key = Kcas.Xt.to_blocking ~xt (get_opt c key) let get_if ~xt c key predicate = let snap = Kcas.Xt.snapshot ~xt in let datum = get ~xt c key in if predicate datum then datum else Retry.later (Kcas.Xt.rollback ~xt snap) let try_set ~xt c key datum = match set_blocking ~xt c key datum with | () -> true | exception Retry.Later -> false end let get ?timeoutf c k = Kcas.Xt.commit ?timeoutf { tx = Xt.get c k } let get_if ?timeoutf c k p = Kcas.Xt.commit ?timeoutf { tx = Xt.get_if c k p } let try_set c k d = Kcas.Xt.commit { tx = Xt.try_set c k d } end let () = let c = Lru_cache.create 10 in let domain = Domain.spawn @@ fun () -> let tx ~xt = Lru_cache.Xt.get ~xt c "a" + Lru_cache.Xt.get ~xt c "b" in Xt.commit { tx } in Lru_cache.set_blocking c "b" 30; Lru_cache.set_blocking c "a" 12; assert (Domain.join domain = 42); () let () = let c = Lru_cache.create 10 in assert (Lru_cache.try_set c "a" 1); Lru_cache.set_blocking c "c" 2; assert (Lru_cache.capacity_of c = 10); assert (Lru_cache.get_opt c "b" = None); assert (Lru_cache.get c "a" = 1); Lru_cache.set_capacity c 3; assert (Lru_cache.get c "c" = 2); Lru_cache.set_capacity c 1; assert (Lru_cache.capacity_of c = 1); assert (Lru_cache.get_opt c "a" = None); assert (Lru_cache.get_if c "c" (( <> ) 0) = 2); Lru_cache.remove c "c"; assert (Lru_cache.get_opt c "c" = None); () let () = Printf.printf "LRU Cache OK!\n%!" kcas-0.7.0/test/kcas_data/lru_cache_intf.ml000066400000000000000000000005631456672623200206500ustar00rootroot00000000000000module type Ops = sig type ('k, 'v) t type ('x, 'fn) fn type ('x, 'fn) blocking_fn val capacity_of : ('x, ('k, 'v) t -> int) fn val set_capacity : ('x, ('k, 'v) t -> int -> unit) fn val get_opt : ('x, ('k, 'v) t -> 'k -> 'v option) fn val set_blocking : ('x, ('k, 'v) t -> 'k -> 'v -> unit) blocking_fn val remove : ('x, ('k, 'v) t -> 'k -> unit) fn end kcas-0.7.0/test/kcas_data/mvar_test.ml000066400000000000000000000011241456672623200177010ustar00rootroot00000000000000open Kcas open Kcas_data let basics () = let mv = Mvar.create (Some 101) in assert (not (Mvar.is_empty mv)); assert (Mvar.take mv = 101); assert (Mvar.is_empty mv); assert (Mvar.take_opt mv = None); Mvar.put mv 42; let running = Mvar.create None in let d = Domain.spawn @@ fun () -> Mvar.put running (); Xt.commit { tx = Mvar.Xt.put mv 76 } in assert (Mvar.take running = ()); assert (Xt.commit { tx = Mvar.Xt.take mv } = 42); Domain.join d; assert (Mvar.take mv = 76) let () = Alcotest.run "Mvar" [ ("basics", [ Alcotest.test_case "" `Quick basics ]) ] kcas-0.7.0/test/kcas_data/queue_test.ml000066400000000000000000000017341456672623200200670ustar00rootroot00000000000000open Kcas open Kcas_data let basics () = let q = Queue.create () in Queue.add 101 q; let tx ~xt = Queue.Xt.add ~xt 42 q; assert (Queue.Xt.take_opt ~xt q = Some 101); assert (Queue.Xt.length ~xt q = 1); assert (Queue.Xt.take_opt ~xt q = Some 42); assert (Queue.Xt.take_opt ~xt q = None) in Xt.commit { tx }; let q = Queue.create () in assert (Queue.length q = 0); assert (Queue.is_empty q); Queue.add 101 q; assert (Queue.length q = 1); assert (not (Queue.is_empty q)); let r = Queue.copy q in assert (Queue.peek_opt q = Some 101); Queue.add 42 q; assert (List.of_seq (Queue.to_seq q) = [ 101; 42 ]); Queue.swap q r; assert (Queue.peek_opt q = Some 101); assert (Queue.take_opt q = Some 101); assert (Queue.take_opt q = None); assert (Queue.take_opt r = Some 101); assert (Queue.take_opt r = Some 42); assert (Queue.take_opt r = None) let () = Alcotest.run "Queue" [ ("basics", [ Alcotest.test_case "" `Quick basics ]) ] kcas-0.7.0/test/kcas_data/queue_test_stm.ml000066400000000000000000000033401456672623200207450ustar00rootroot00000000000000open QCheck open STM open Kcas_data module Spec = struct type cmd = Push of int | Take_opt | Peek_opt | Length let show_cmd = function | Push x -> "Push " ^ string_of_int x | Take_opt -> "Take_opt" | Peek_opt -> "Peek_opt" | Length -> "Length" module State = struct type t = int list * int list let push x (h, t) = if h == [] then ([ x ], []) else (h, x :: t) let peek_opt (h, _) = match h with x :: _ -> Some x | [] -> None let drop ((h, t) as s) = match h with [] -> s | [ _ ] -> (List.rev t, []) | _ :: h -> (h, t) let length (h, t) = List.length h + List.length t end type state = State.t type sut = int Queue.t let arb_cmd _s = [ Gen.int |> Gen.map (fun x -> Push x); Gen.return Take_opt; Gen.return Peek_opt; Gen.return Length; ] |> Gen.oneof |> make ~print:show_cmd let init_state = ([], []) let init_sut () = Queue.create () let cleanup _ = () let next_state c s = match c with | Push x -> State.push x s | Take_opt -> State.drop s | Peek_opt -> s | Length -> s let precond _ _ = true let run c d = match c with | Push x -> Res (unit, Queue.push x d) | Take_opt -> Res (option int, Queue.take_opt d) | Peek_opt -> Res (option int, Queue.peek_opt d) | Length -> Res (int, Queue.length d) let postcond c (s : state) res = match (c, res) with | Push _x, Res ((Unit, _), ()) -> true | Take_opt, Res ((Option Int, _), res) -> res = State.peek_opt s | Peek_opt, Res ((Option Int, _), res) -> res = State.peek_opt s | Length, Res ((Int, _), res) -> res = State.length s | _, _ -> false end let () = Stm_run.run ~count:1000 ~verbose:true ~name:"Queue" (module Spec) |> exit kcas-0.7.0/test/kcas_data/stack_test.ml000066400000000000000000000013031456672623200200400ustar00rootroot00000000000000open Kcas_data let basics () = let s = Stack.create () in assert (Stack.length s = 0); assert (Stack.is_empty s); Stack.push 101 s; assert (not (Stack.is_empty s)); assert (Stack.top_opt s = Some 101); assert (Stack.length s = 1); let t = Stack.copy s in assert (Stack.pop_opt t = Some 101); Stack.push 42 s; Stack.swap s t; assert (Stack.pop_opt s = None); assert (List.of_seq (Stack.to_seq t) = [ 42; 101 ]); assert (Stack.top_opt t = Some 42); assert (Stack.length t = 2); assert (Stack.pop_opt t = Some 42); assert (Stack.pop_opt t = Some 101); assert (Stack.pop_opt t = None) let () = Alcotest.run "Stack" [ ("basics", [ Alcotest.test_case "" `Quick basics ]) ] kcas-0.7.0/test/kcas_data/stack_test_stm.ml000066400000000000000000000026741456672623200207370ustar00rootroot00000000000000open QCheck open STM open Kcas_data module Spec = struct type cmd = Push of int | Pop_opt | Top_opt | Length let show_cmd = function | Push x -> "Push " ^ string_of_int x | Pop_opt -> "Pop_opt" | Top_opt -> "Top_opt" | Length -> "Length" type state = int list type sut = int Stack.t let arb_cmd _s = [ Gen.int |> Gen.map (fun x -> Push x); Gen.return Pop_opt; Gen.return Top_opt; Gen.return Length; ] |> Gen.oneof |> make ~print:show_cmd let init_state = [] let init_sut () = Stack.create () let cleanup _ = () let next_state c s = match c with | Push x -> x :: s | Pop_opt -> ( match s with [] -> [] | _ :: s -> s) | Top_opt -> s | Length -> s let precond _ _ = true let run c d = match c with | Push x -> Res (unit, Stack.push x d) | Pop_opt -> Res (option int, Stack.pop_opt d) | Top_opt -> Res (option int, Stack.top_opt d) | Length -> Res (int, Stack.length d) let postcond c (s : state) res = match (c, res) with | Push _x, Res ((Unit, _), ()) -> true | Pop_opt, Res ((Option Int, _), res) -> ( res = match s with [] -> None | x :: _ -> Some x) | Top_opt, Res ((Option Int, _), res) -> ( res = match s with [] -> None | x :: _ -> Some x) | Length, Res ((Int, _), res) -> res = List.length s | _, _ -> false end let () = Stm_run.run ~count:1000 ~verbose:true ~name:"Stack" (module Spec) |> exit kcas-0.7.0/test/kcas_data/stm_run.ocaml4.ml000066400000000000000000000005351456672623200205470ustar00rootroot00000000000000let run ~verbose ~count ~name (module Spec : STM.Spec) = let module Seq = STM_sequential.Make (Spec) in let module Con = STM_thread.Make (Spec) [@alert "-experimental"] in QCheck_base_runner.run_tests ~verbose [ Seq.agree_test ~count ~name:(name ^ " sequential"); Con.agree_test_conc ~count ~name:(name ^ " concurrent"); ] kcas-0.7.0/test/kcas_data/stm_run.ocaml5.ml000066400000000000000000000005011456672623200205410ustar00rootroot00000000000000let run ~verbose ~count ~name (module Spec : STM.Spec) = let module Seq = STM_sequential.Make (Spec) in let module Dom = STM_domain.Make (Spec) in QCheck_base_runner.run_tests ~verbose [ Seq.agree_test ~count ~name:(name ^ " sequential"); Dom.agree_test_par ~count ~name:(name ^ " parallel"); ] kcas-0.7.0/test/kcas_data/util.ml000066400000000000000000000002641456672623200166560ustar00rootroot00000000000000let iter_factor = let factor b = if b then 10 else 1 in factor (64 <= Sys.word_size) * factor (Sys.backend_type = Native) * factor (1 < Domain.recommended_domain_count ()) kcas-0.7.0/test/kcas_data/xt_linked_queue.ml000066400000000000000000000017411456672623200210670ustar00rootroot00000000000000open Kcas type 'a t = { front : 'a node Loc.t; back : 'a node Loc.t } and 'a node = Nil | Node of 'a node Loc.t * 'a let create () = { front = Loc.make Nil; back = Loc.make Nil } let is_empty ~xt queue = Xt.get ~xt queue.front == Nil let push_front ~xt queue value = let next = Loc.make Nil in let node = Node (next, value) in match Xt.exchange ~xt queue.front node with | Nil -> Xt.set ~xt queue.back node | succ -> Xt.set ~xt next succ let push_back ~xt queue value = let node = Node (Loc.make Nil, value) in match Xt.exchange ~xt queue.back node with | Nil -> Xt.set ~xt queue.front node | Node (next, _) -> Xt.set ~xt next node let pop_front ~xt queue = match Xt.get ~xt queue.front with | Nil -> None | Node (next, value) -> begin match Xt.get ~xt next with | Nil -> Xt.set ~xt queue.front Nil; Xt.set ~xt queue.back Nil; Some value | node -> Xt.set ~xt queue.front node; Some value end kcas-0.7.0/test/kcas_data/xt_linked_queue.mli000066400000000000000000000003551456672623200212400ustar00rootroot00000000000000open Kcas type 'a t val create : unit -> 'a t val is_empty : xt:'x Xt.t -> 'a t -> bool val push_front : xt:'x Xt.t -> 'a t -> 'a -> unit val push_back : xt:'x Xt.t -> 'a t -> 'a -> unit val pop_front : xt:'x Xt.t -> 'a t -> 'a option kcas-0.7.0/test/kcas_data/xt_stack.ml000066400000000000000000000004271456672623200175220ustar00rootroot00000000000000open Kcas type 'a t = 'a list Loc.t let create () = Loc.make [] let is_empty ~xt s = Xt.get ~xt s == [] let push ~xt s x = Xt.modify ~xt s @@ List.cons x let pop_opt ~xt s = match Xt.update ~xt s @@ function [] -> [] | _ :: xs -> xs with | x :: _ -> Some x | [] -> None kcas-0.7.0/test/kcas_data/xt_stack.mli000066400000000000000000000002641456672623200176720ustar00rootroot00000000000000open Kcas type 'a t val create : unit -> 'a t val is_empty : xt:'x Xt.t -> 'a t -> bool val push : xt:'x Xt.t -> 'a t -> 'a -> unit val pop_opt : xt:'x Xt.t -> 'a t -> 'a option kcas-0.7.0/test/kcas_data/xt_test.ml000066400000000000000000000022001456672623200173630ustar00rootroot00000000000000open Kcas module Q = Xt_linked_queue module P = Kcas_data.Queue module S = Xt_stack let basics () = let p = P.create () and q = Q.create () and s = S.create () in (* Populate [p] with two items atomically *) let tx ~xt = P.Xt.add ~xt 4 p; P.Xt.add ~xt 1 p in Xt.commit { tx }; Xt.commit { tx = P.Xt.add 3 p }; assert (not (Xt.commit { tx = P.Xt.is_empty p })); (* Transfer item from [p] queue to [q] queue atomically *) let tx ~xt = P.Xt.take_opt ~xt p |> Option.iter @@ Q.push_back ~xt q in Xt.commit { tx }; assert (Xt.commit { tx = Q.pop_front q } = Some 4); assert (Xt.commit { tx = Q.is_empty q }); (* Transfer item from queue [p] to stack [s] atomically *) let tx ~xt = P.Xt.take_opt ~xt p |> Option.iter @@ fun x -> S.push ~xt s x in Xt.commit { tx }; assert (Xt.commit { tx = S.pop_opt s } = Some 1); assert (Xt.commit { tx = P.Xt.take_opt p } = Some 3); assert (Xt.commit { tx = P.Xt.is_empty p }); Xt.commit { tx = Q.push_front q 101 }; assert (not (Xt.commit { tx = Q.is_empty q })) let () = Alcotest.run "Transactions" [ ("basics", [ Alcotest.test_case "" `Quick basics ]) ] kcas-0.7.0/update-gh-pages-for-tag000077500000000000000000000025651456672623200167220ustar00rootroot00000000000000#!/bin/bash set -xeuo pipefail TMP=tmp NAME=kcas MAIN=doc GIT="git@github.com:ocaml-multicore/$NAME.git" DOC="_build/default/_doc/_html" GH_PAGES=gh-pages TAG="$1" if ! [ -e $NAME.opam ] || [ $# -ne 1 ] || \ { [ "$TAG" != main ] && ! [ "$(git tag -l "$TAG")" ]; }; then CMD="${0##*/}" cat << EOF Usage: $CMD tag-name-or-main This script - clones the repository into a temporary directory ($TMP/$NAME), - builds the documentation for the specified tag or main, - updates $GH_PAGES branch with the documentation in directory for the tag, - prompts whether to also update the main documentation in $MAIN directory, and - prompts whether to push changes to $GH_PAGES. EOF exit 1 fi mkdir $TMP cd $TMP git clone $GIT cd $NAME git checkout "$TAG" dune build @doc --root=. git checkout $GH_PAGES if [ "$TAG" != main ]; then echo "Updating the $TAG doc." if [ -e "$TAG" ]; then git rm -rf "$TAG" fi cp -r $DOC "$TAG" git add "$TAG" fi read -p "Update the main doc? (y/N) " -n 1 -r echo if [[ $REPLY =~ ^[Yy]$ ]]; then if [ -e $MAIN ]; then git rm -rf $MAIN fi cp -r $DOC $MAIN git add $MAIN else echo "Skipped main doc update." fi git commit -m "Update $NAME doc for $TAG" read -p "Push changes to $GH_PAGES? (y/N) " -n 1 -r echo if ! [[ $REPLY =~ ^[Yy]$ ]]; then echo "Leaving $TMP for you to examine." exit 1 fi git push cd .. cd .. rm -rf $TMP