pax_global_header00006660000000000000000000000064147655430210014521gustar00rootroot0000000000000052 comment=ea4d5192dd1e81aa473fce8a6747c753d3e97654 conky-1.22.1/000077500000000000000000000000001476554302100127275ustar00rootroot00000000000000conky-1.22.1/.clang-format000066400000000000000000000076401476554302100153110ustar00rootroot00000000000000--- Language: Cpp # BasedOnStyle: Google AccessModifierOffset: -1 AlignAfterOpenBracket: Align AlignConsecutiveAssignments: false AlignConsecutiveDeclarations: false AlignEscapedNewlines: Left AlignOperands: true AlignTrailingComments: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: true AllowShortCaseLabelsOnASingleLine: false AllowShortFunctionsOnASingleLine: All AllowShortIfStatementsOnASingleLine: true AllowShortLoopsOnASingleLine: true AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: true AlwaysBreakTemplateDeclarations: Yes BinPackArguments: true BinPackParameters: true BraceWrapping: AfterClass: false AfterControlStatement: false AfterEnum: false AfterFunction: false AfterNamespace: false AfterObjCDeclaration: false AfterStruct: false AfterUnion: false AfterExternBlock: false BeforeCatch: false BeforeElse: false IndentBraces: false SplitEmptyFunction: true SplitEmptyRecord: true SplitEmptyNamespace: true BreakBeforeBinaryOperators: None BreakBeforeBraces: Attach BreakBeforeInheritanceComma: false BreakInheritanceList: BeforeColon BreakBeforeTernaryOperators: true BreakConstructorInitializersBeforeComma: false BreakConstructorInitializers: BeforeColon BreakAfterJavaFieldAnnotations: false BreakStringLiterals: true ColumnLimit: 80 CommentPragmas: '^ IWYU pragma:' CompactNamespaces: false ConstructorInitializerAllOnOneLineOrOnePerLine: true ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true DerivePointerAlignment: true DisableFormat: false ExperimentalAutoDetectBinPacking: false FixNamespaceComments: true ForEachMacros: - foreach - Q_FOREACH - BOOST_FOREACH IncludeBlocks: Preserve IncludeCategories: - Regex: '^' Priority: 2 - Regex: '^<.*\.h>' Priority: 1 - Regex: '^<.*' Priority: 2 - Regex: '.*' Priority: 3 IncludeIsMainRegex: '([-_](test|unittest))?$' IndentCaseLabels: true IndentPPDirectives: None IndentWidth: 2 IndentWrappedFunctionNames: false JavaScriptQuotes: Leave JavaScriptWrapImports: true KeepEmptyLinesAtTheStartOfBlocks: false MacroBlockBegin: '' MacroBlockEnd: '' MaxEmptyLinesToKeep: 1 NamespaceIndentation: None ObjCBinPackProtocolList: Never ObjCBlockIndentWidth: 2 ObjCSpaceAfterProperty: false ObjCSpaceBeforeProtocolList: false PenaltyBreakAssignment: 2 PenaltyBreakBeforeFirstCallParameter: 1 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyBreakTemplateDeclaration: 10 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 200 PointerAlignment: Left RawStringFormats: - Language: Cpp Delimiters: - cc - CC - cpp - Cpp - CPP - 'c++' - 'C++' CanonicalDelimiter: '' BasedOnStyle: google - Language: TextProto Delimiters: - pb - PB - proto - PROTO EnclosingFunctions: - EqualsProto - EquivToProto - PARSE_PARTIAL_TEXT_PROTO - PARSE_TEST_PROTO - PARSE_TEXT_PROTO - ParseTextOrDie - ParseTextProtoOrDie CanonicalDelimiter: '' BasedOnStyle: google ReflowComments: true SortIncludes: true SortUsingDeclarations: true SpaceAfterCStyleCast: false SpaceAfterTemplateKeyword: true SpaceBeforeAssignmentOperators: true SpaceBeforeCpp11BracedList: false SpaceBeforeCtorInitializerColon: true SpaceBeforeInheritanceColon: true SpaceBeforeParens: ControlStatements SpaceBeforeRangeBasedForLoopColon: true SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 2 SpacesInAngles: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false Standard: Auto TabWidth: 8 UseTab: Never ... conky-1.22.1/.clang-tidy000066400000000000000000000042371476554302100147710ustar00rootroot00000000000000--- Checks: 'clang-diagnostic-*,clang-analyzer-*,google-*' WarningsAsErrors: '' HeaderFilterRegex: '' AnalyzeTemporaryDtors: false FormatStyle: none User: brenden CheckOptions: - key: google-build-namespaces.HeaderFileExtensions value: ',h,hh,hpp,hxx' - key: google-global-names-in-headers.HeaderFileExtensions value: ',h,hh,hpp,hxx' - key: google-readability-braces-around-statements.ShortStatementLines value: '1' - key: google-readability-function-size.BranchThreshold value: '4294967295' - key: google-readability-function-size.LineThreshold value: '4294967295' - key: google-readability-function-size.NestingThreshold value: '4294967295' - key: google-readability-function-size.ParameterThreshold value: '4294967295' - key: google-readability-function-size.StatementThreshold value: '800' - key: google-readability-function-size.VariableThreshold value: '4294967295' - key: google-readability-namespace-comments.ShortNamespaceLines value: '10' - key: google-readability-namespace-comments.SpacesBeforeComments value: '2' - key: google-runtime-int.SignedTypePrefix value: int - key: google-runtime-int.TypeSuffix value: '' - key: google-runtime-int.UnsignedTypePrefix value: uint - key: google-runtime-references.WhiteListTypes value: '' - key: modernize-loop-convert.MaxCopySize value: '16' - key: modernize-loop-convert.MinConfidence value: reasonable - key: modernize-loop-convert.NamingStyle value: CamelCase - key: modernize-pass-by-value.IncludeStyle value: llvm - key: modernize-replace-auto-ptr.IncludeStyle value: llvm - key: modernize-use-nullptr.NullMacros value: 'NULL' ... conky-1.22.1/.dockerignore000066400000000000000000000005701476554302100154050ustar00rootroot00000000000000Dockerfile .dockerignore # Ignore SCM/CI related stuff .github/ .git/ .gitlab-ci.yml .DS_Store *~ .*.swp Doxyfile patches/ doc/conky.1 README build*/ doc/*.html Makefile CMakeCache.txt CMakeFiles data/convertconf.h data/defconfig.h lua/libcairo-orig.c lua/libcairo.c lua/libimlib2.c *.so *.a /config.h # Ignore vscode stuff .vscode *.code-workspace # Ignore tests tests/ conky-1.22.1/.editorconfig000066400000000000000000000001431476554302100154020ustar00rootroot00000000000000root = true [*] end_of_line = lf insert_final_newline = true indent_style = space indent_size = 2 conky-1.22.1/.envrc000066400000000000000000000000121476554302100140360ustar00rootroot00000000000000use flake conky-1.22.1/.git-blame-ignore-revs000066400000000000000000000045131476554302100170320ustar00rootroot00000000000000# Run this command to always ignore formatting commits in `git blame` # git config blame.ignoreRevsFile .git-blame-ignore-revs # Following commits only reformat/cleanup code and can be ignored when tracking # down regressions and introduced bugs: ### Clarifying licensing (SVN@904) # Updates license headers 27b4c8550cfae4fd0f2169962e33f4907a8e7d71 ### Reformatted all code (SVN@1007) 3d26a4880e92df2c6004d85c1be458e35c6bfc3a ### outsource the whole template object machinery # Moves template variables code from core.c into template.c ff199355f66216600c4bdc6bec4743afe5b61470 ### Add formatting/static analysis (#486) # Large cleanup of codebase that fixed indentation in most files eebc8c653b34eb946d23ceb913ed5d69cc22f10e ### Build improvements: clang-tidy, sonar, Dockerfile. (#488) # Touches most files with minor changes (NULL -> nullptr) 4b92556fca9cbede3cbac4112f0a24b554390099 ### Use clang-format # Applies clang-format formatting 033508a93e6b0440ddbd2376c1e97b69e3308687 ### Move X11 stuff from conky.cc to display-x11 281097a2a562ef58e5604a3519f739c715ba5410 ### Get rid of silly `if (not ...` expressions (#713) 3a3092902ee8a5fda71996d264f981b98375c6a3 ### Fix docbook2x handling. # Large diff, affects only docbook2x which is no longer used c6ad28074af3ec1bb4b3cc052df58062ce2a7c9b ### Fix MAINTAINER_MODE (-Wall -Werror -Wpedantic) (#714) # Large diff, minor refactoring changes 9af6fa7903c5cb6f05614deac23373d4d0cf5959 ### Refactor docs, make a new website # Large diff, no changes affecting code 47ad3f9982baea4069a5c37ffdb2e1523e504f18 ### Fix issues building without BUILD_ARGB flag # Changes a lot of indentations in x11.cc f6d42c5a69fed134a8b4ed0602c892f6f7b6e242 ### Fix DependentOptions splitting arguments on spaces # Again, indentation changes in x11.cc cbebe447078e28c50957d568303f42d6b8aae126 ### Cleanup build flags, global namespace and includes (#1841) # Large refactor of global namespace # Includes changes to functionality so it can't be ignored: # color parsing, text alignment, output registration # 6adf6b9dd4d368640bf7ef57f3e6199d83154d78 ### Move sources into appropriate subdirectories (#2119) # Performed no changes to the source files, only moved them. Child commit # addressed the bugs caused by the move, to separate renames from changes. 342f07b8ca101e0c4ef5c75346033c4df241d16d ae8f1fa8472d6c3a31b4a09be4d19568a3f3f23e conky-1.22.1/.github/000077500000000000000000000000001476554302100142675ustar00rootroot00000000000000conky-1.22.1/.github/FUNDING.yml000066400000000000000000000001031476554302100160760ustar00rootroot00000000000000# These are supported funding model platforms github: brndnmtthws conky-1.22.1/.github/ISSUE_TEMPLATE/000077500000000000000000000000001476554302100164525ustar00rootroot00000000000000conky-1.22.1/.github/ISSUE_TEMPLATE/bug_report.yml000066400000000000000000000057731476554302100213610ustar00rootroot00000000000000name: 🐞 Bug Report description: 'File a bug report: crashes, unexpected behaviour, and other errors go here.' title: '[Bug]: ' labels: ['bug', 'triage'] body: - type: markdown attributes: value: | If you haven't done so already, please take a moment to search the [existing issues](https://github.com/brndnmtthws/conky/issues) to see if this has been previously reported. If not, carry on. Thanks for taking the time to fill out this bug report! - type: textarea id: what-happened attributes: label: What happened? description: > Describe the problem, what you expected to happen, and any other relevant info. You can attach logs, stack traces, and config below. If required, include screenshots here which demonstrate the problem. placeholder: Here's what happened validations: required: true - type: input id: version attributes: label: Version description: What version of Conky are you running? validations: required: true - type: dropdown id: os validations: required: true attributes: label: Which OS/distro are you seeing the problem on? options: - Ubuntu - Fedora - Debian - Arch Linux - Gentoo - Linux (other) - macOS - type: textarea id: config attributes: label: Conky config description: Paste your Conky config here if it's relevant. render: Lua - type: markdown attributes: value: | ## Getting a stack trace If you are reporting a crash, please attach a stack trace below. To create one with `gdb`, try the following steps: ```shell # Start 'conky' $ gdb conky # Run 'conky' with a config. (gdb) run -c ~/.your_conky.conf # Wait for a crash to occur, then run this. (gdb) bt full # ... stack trace output here ... ``` Here are some additional resources on obtaining stack traces: - [Arch Linux Wiki](https://wiki.archlinux.org/index.php/Debug_-_Getting_Traces) - [Ubuntu Wiki](https://wiki.ubuntu.com/DebuggingProgramCrash) - [AskUbuntu - Where do I find core dump files, and how do I view and analyze the backtrace (stack trace) in one?](https://askubuntu.com/a/1349048) - [Julia Evans - How to get a core dump for a segfault on Linux](https://jvns.ca/blog/2018/04/28/debugging-a-segfault-on-linux/) - [GDB Documentation](https://sourceware.org/gdb/current/onlinedocs/gdb/) - [lldb Documentation](https://lldb.llvm.org/use/tutorial.html) - type: textarea id: stacktrace attributes: label: Stack trace description: Paste your stacktrace here if you have one. render: GDB - type: textarea id: logs attributes: label: Relevant log output description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks. render: Shell conky-1.22.1/.github/ISSUE_TEMPLATE/config.yml000066400000000000000000000006451476554302100204470ustar00rootroot00000000000000blank_issues_enabled: true contact_links: - name: 🙏 General questions & help url: https://github.com/brndnmtthws/conky/discussions/categories/q-a about: Please ask and answer general questions using GitHub Discussions - name: 💡 New ideas and feature requests url: https://github.com/brndnmtthws/conky/discussions/categories/ideas about: Request and discuss enhancements using GitHub Discussions conky-1.22.1/.github/dependabot.yml000066400000000000000000000010141476554302100171130ustar00rootroot00000000000000# Set update schedule for GitHub Actions version: 2 updates: # Maintain dependencies for actions - package-ecosystem: 'github-actions' directory: '/' schedule: interval: 'weekly' groups: actions: patterns: - '*' # Maintain dependencies for web docs/npm - package-ecosystem: 'npm' directory: '/web' schedule: interval: 'weekly' groups: web-deps: patterns: # Group all the web deps together to keep the PR spam down - '*' conky-1.22.1/.github/labeler.yml000066400000000000000000000126301476554302100164220ustar00rootroot00000000000000documentation: - changed-files: - any-glob-to-any-file: - doc/**/* - data/conky*.conf extras: - changed-files: - any-glob-to-any-file: - extras/**/* sources: - changed-files: - any-glob-to-any-file: - src/**/* tests: - changed-files: - any-glob-to-any-file: - tests/**/* web: - changed-files: - any-glob-to-any-file: - web/**/* javascript: - changed-files: - any-glob-to-any-file: - web/**/*.tsx - web/**/*.jsx - web/**/*.ts - web/**/*.js appimage: - changed-files: - any-glob-to-any-file: - appimage/* - appimage/**/* 3rdparty: - changed-files: - any-glob-to-any-file: - 3rdparty/**/* gh-actions: - changed-files: - any-glob-to-any-file: - .github/workflows/* - .github/workflows/**/* - .github/labeler.yml - .github/pull_request_template.md dependencies: - changed-files: - any-glob-to-any-file: - web/package-lock.json - cmake/ConkyPlatformChecks.cmake audio: - changed-files: - any-glob-to-any-file: - src/data/audio/**/* power: - changed-files: - any-glob-to-any-file: - src/data/hardware/apcupsd.cc - src/data/hardware/apcupsd.h - src/data/hardware/bsdapm.cc - src/data/hardware/bsdapm.h - src/data/hardware/smapi.cc - src/data/hardware/smapi.h 'display: console': - changed-files: - any-glob-to-any-file: - src/output/display-console.cc - src/output/display-console.hh 'display: file': - changed-files: - any-glob-to-any-file: - src/output/display-file.cc - src/output/display-file.hh 'display: http': - changed-files: - any-glob-to-any-file: - src/output/display-http.cc - src/output/display-http.hh 'display: ncurses': - changed-files: - any-glob-to-any-file: - src/output/nc.cc - src/output/nc.h - src/output/display-ncurses.cc - src/output/display-ncurses.hh 'display: wayland': - changed-files: - any-glob-to-any-file: - src/wl_protocols/**/* - src/output/wl.cc - src/output/wl.h - src/output/display-wayland.cc - src/output/display-wayland.hh 'display: x11': - changed-files: - any-glob-to-any-file: - src/output/*x11*.cc - src/output/*x11*.h - src/output/*x11*.hh 'build system': - changed-files: - any-glob-to-any-file: - 'CMakeLists.txt' - 'cmake/**/*' cairo: - changed-files: - any-glob-to-any-file: - 'lua/*cairo*' 'disk io': - changed-files: - any-glob-to-any-file: - src/data/hardware/diskio.cc - src/data/hardware/diskio.h 'sensors': - changed-files: - any-glob-to-any-file: - src/data/hardware/hddtemp.cc - src/data/hardware/hddtemp.h - src/data/hardware/i8k.cc - src/data/hardware/i8k.h - src/data/hardware/sony.cc - src/data/hardware/sony.h cpu: - changed-files: - any-glob-to-any-file: - src/data/hardware/cpu.cc - src/data/hardware/cpu.h - src/data/proc.cc - src/data/proc.h - src/data/top.cc - src/data/top.h lua: - changed-files: - any-glob-to-any-file: - lua/**/* - src/lua/**/* 'os: linux': - changed-files: - any-glob-to-any-file: - src/data/os/linux.cc - src/data/os/linux.h 'os: dragonfly': - changed-files: - any-glob-to-any-file: - src/data/os/dragonfly.cc - src/data/os/dragonfly.h 'os: freebsd': - changed-files: - any-glob-to-any-file: - src/data/os/freebsd.cc - src/data/os/freebsd.h 'os: haiku': - changed-files: - any-glob-to-any-file: - src/data/os/haiku.cc - src/data/os/haiku.h 'os: netbsd': - changed-files: - any-glob-to-any-file: - src/data/os/netbsd.cc - src/data/os/netbsd.h 'os: openbsd': - changed-files: - any-glob-to-any-file: - src/data/os/openbsd.cc - src/data/os/openbsd.h 'os: solaris': - changed-files: - any-glob-to-any-file: - src/data/os/solaris.cc - src/data/os/solaris.h 'os: macos': - changed-files: - any-glob-to-any-file: - src/data/os/darwin_sip.h - src/data/os/darwin.h - src/data/os/darwin.mm 'mouse events': - changed-files: - any-glob-to-any-file: - src/mouse-events.cc - src/mouse-events.h networking: - changed-files: - any-glob-to-any-file: - src/data/network/**/* mail: - changed-files: - any-glob-to-any-file: - src/data/network/mail.cc - src/data/network/mail.h calendar: - changed-files: - any-glob-to-any-file: - src/data/network/ical.cc - src/data/network/ical.h nvidia: - changed-files: - any-glob-to-any-file: - src/data/hardware/nvidia.cc - src/data/hardware/nvidia.h rendering: - changed-files: - any-glob-to-any-file: - src/output/gui.cc - src/output/gui.h - src/specials.cc - src/specials.h text: - changed-files: - any-glob-to-any-file: - src/content/**/* conky-1.22.1/.github/pull_request_template.md000066400000000000000000000007471476554302100212400ustar00rootroot00000000000000# Checklist - [ ] I have described the changes - [ ] I have linked to any relevant GitHub issues, if applicable - [ ] Documentation in `doc/` has been updated - [ ] All new code is licensed under GPLv3 ## Description * Describe the changes, why they were necessary, etc * Describe how the changes will affect existing behaviour. * Describe how you tested and validated your changes. * Include any relevant screenshots/evidence demonstrating that the changes work and have been tested. conky-1.22.1/.github/release.yml000066400000000000000000000006271476554302100164370ustar00rootroot00000000000000changelog: categories: - title: 🏕 Features labels: - feature - title: 🐞 Bug fixes labels: - bug - regression - title: 🎛️ Miscellaneous labels: - '*' exclude: labels: - dependencies - bug - regression - feature - title: 👒 Dependencies labels: - dependencies conky-1.22.1/.github/scripts/000077500000000000000000000000001476554302100157565ustar00rootroot00000000000000conky-1.22.1/.github/scripts/docker-build.bash000077500000000000000000000025671476554302100211760ustar00rootroot00000000000000#!/usr/bin/env bash set -e DH_USERNAME="${DOCKERHUB_USERNAME:-conky}" DOCKERHUB_IMAGE_ID=$DH_USERNAME/$IMAGE_NAME # Change all uppercase to lowercase DOCKERHUB_IMAGE_ID=$(echo $DOCKERHUB_IMAGE_ID | tr '[A-Z]' '[a-z]') # Only build amd64 on PRs, build all platforms on main. The arm builds # take far too long. image_platforms="--platform linux/amd64" push_image="" cache_tag="pr-cache" image_tags=() # Strip git ref prefix from version VERSION_TAG=$(echo $GITHUB_REF | sed -e 's,.*/\(.*\),\1,') # Strip "v" prefix from tag name if [[ "$GITHUB_REF" == refs/tags/* ]]; then VERSION_TAG=$(echo $VERSION_TAG | sed -e 's/^v//') fi image_tags+=("--tag" "$DOCKERHUB_IMAGE_ID:$VERSION_TAG") # tag as latest on releases if [[ "$RELEASE" == ON ]]; then image_tags+=("--tag" "$DOCKERHUB_IMAGE_ID:latest") fi # Only push on main if [[ "$GITHUB_REF" == refs/heads/main ]]; then push_image="--push" image_platforms="--platform linux/arm/v7,linux/arm64/v8,linux/amd64" cache_tag="main-cache" fi # Only write to cache if credentials are available if [[ -z "$DOCKERHUB_TOKEN" ]]; then write_cache="" else write_cache="--cache-to=type=registry,ref=$DOCKERHUB_IMAGE_ID:$cache_tag,mode=max" fi docker buildx build \ ${push_image} \ ${image_platforms} \ --cache-from=type=registry,ref=$DOCKERHUB_IMAGE_ID:$cache_tag \ ${write_cache} \ "${image_tags[@]}" \ . conky-1.22.1/.github/scripts/setup-sccache.sh000077500000000000000000000040661476554302100210520ustar00rootroot00000000000000#!/bin/sh export SCCACHE_VERSION="${SCCACHE_VERSION:=0.9.1}" export sccache_arch="x86_64" if [ "$RUNNER_ARCH" = "X86" ]; then export sccache_arch="i686" elif [ "$RUNNER_ARCH" = "X64" ]; then export sccache_arch="x86_64" elif [ "$RUNNER_ARCH" = "ARM" ]; then export sccache_arch="armv7" elif [ "$RUNNER_ARCH" = "ARM64" ]; then export sccache_arch="aarch64" fi install_sccache() { export sccache_archive="sccache-v$SCCACHE_VERSION-$sccache_arch-$sccache_os" export sccache_url="https://github.com/mozilla/sccache/releases/download/v$SCCACHE_VERSION/$sccache_archive.tar.gz" echo "Downloading $sccache_url..." if ! wget -q "$sccache_url"; then echo "Can't download $sccache_url." >2 exit 1 fi echo "Extracting $sccache_archive.tar.gz..." if ! tar -xzf "$sccache_archive.tar.gz" >/dev/null; then echo "Can't extract $sccache_archive.tar.gz" >2 exit 1 fi chmod +x "$sccache_archive/sccache" sudo cp "$sccache_archive/sccache" "/usr/local/bin/sccache" rm -rf "$sccache_archive.tar.gz" rm -rf "$sccache_archive" } export sccache_os="unknown-linux-musl" if [ "$RUNNER_OS" = "Linux" ]; then export sccache_os="unknown-linux-musl" if [ "$RUNNER_ARCH" = "ARM" ]; then export sccache_os="unknown-linux-musleabi" fi if ! install_sccache; then echo "Unable to install sccache!" >2 exit 1 fi elif [ "$RUNNER_OS" = "macOS" ]; then export sccache_os="apple-darwin" if ! install_sccache; then echo "Unable to install sccache!" >2 exit 1 fi elif [ "$RUNNER_OS" = "Windows" ]; then export sccache_os="pc-windows-msvc" if ! install_sccache; then echo "Unable to install sccache!" >2 exit 1 fi fi echo "sccache installed." # Configure mkdir $HOME/.sccache echo "SCCACHE_DIR=$HOME/.sccache" >>$GITHUB_ENV if [ "$RUNNER_DEBUG" = "1" ]; then echo "Running with debug output; cached binary artifacts will be ignored to produce a cleaner build" echo "SCCACHE_RECACHE=true" >>$GITHUB_ENV fi echo "sccache configured." conky-1.22.1/.github/workflows/000077500000000000000000000000001476554302100163245ustar00rootroot00000000000000conky-1.22.1/.github/workflows/build-and-test-linux.yaml000066400000000000000000000113331476554302100231620ustar00rootroot00000000000000name: Build and test on Linux on: push: branches: - main paths-ignore: - web/** - doc/** pull_request: branches: - main paths-ignore: - web/** - doc/** concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true env: SCCACHE_VERSION: '0.9.1' jobs: build: strategy: matrix: os: - ubuntu-22.04 - ubuntu-24.04 x11: [ON, OFF] wayland: [ON, OFF] compiler: - clang - gcc runs-on: ${{ matrix.os }} steps: - run: sudo apt-get -qq update - name: Install dependencies run: | sudo apt-get install -yqq --no-install-recommends \ audacious-dev \ cmake \ gawk \ gperf \ lcov \ libaudclient-dev \ libcairo2-dev \ libpango1.0-dev \ libcurl4-gnutls-dev \ libdbus-glib-1-dev \ libglib2.0-dev \ libical-dev \ libimlib2-dev \ libircclient-dev \ libiw-dev \ liblua5.3-dev \ libmicrohttpd-dev \ libmysqlclient-dev \ libpulse-dev \ librsvg2-dev \ libsystemd-dev \ libwayland-bin \ libwayland-dev \ libx11-dev \ libxdamage-dev \ libxext-dev \ libxft-dev \ libxinerama-dev \ libxml2-dev \ libxmmsclient-dev \ libxnvctrl-dev \ ncurses-dev \ ninja-build \ wayland-protocols - name: Install clang and libc++ if: matrix.compiler == 'clang' run: | sudo apt-get install -yqq --no-install-recommends \ clang \ libc++-dev \ libc++abi-dev - name: Install gcc if: matrix.compiler == 'gcc' run: | sudo apt-get install -yqq --no-install-recommends \ gcc \ g++ - name: Checkout uses: actions/checkout@v4 - name: Install sccache run: .github/scripts/setup-sccache.sh - name: Load cached compilation artifacts id: compiler-cache uses: actions/cache@v4 with: path: '${{ env.SCCACHE_DIR }}' key: sccache-${{ matrix.os }}-${{ matrix.x11 }}-${{ matrix.wayland }}-${{ matrix.compiler }} restore-keys: | sccache-${{ matrix.os }}-${{ matrix.x11 }}-${{ matrix.wayland }}-${{ matrix.compiler }} sccache-${{ matrix.os }}-${{ matrix.x11 }}-${{ matrix.wayland }} sccache-${{ matrix.os }}-${{ matrix.x11 }} sccache-${{ matrix.os }} - name: Configure with CMake run: | set -x # show the commands we're running [[ "${{ matrix.compiler }}" == "clang"* ]] && CC=clang [[ "${{ matrix.compiler }}" == "clang"* ]] && CXX=clang++ [[ "${{ matrix.compiler }}" == "gcc"* ]] && CC=gcc [[ "${{ matrix.compiler }}" == "gcc"* ]] && CXX=g++ # Enable librsvg by default, disable for older versions of Ubuntu # because we need librsvg>=2.52 RSVG_ENABLED=ON [[ "${{ matrix.os }}" == "ubuntu-20.04"* ]] && RSVG_ENABLED=OFF # Reset sccache statistics sccache --zero-stats cmake . -B build -G Ninja \ -DBUILD_AUDACIOUS=ON \ -DBUILD_HTTP=ON \ -DBUILD_ICAL=ON \ -DBUILD_ICONV=ON \ -DBUILD_IRC=ON \ -DBUILD_IRC=ON \ -DBUILD_JOURNAL=ON \ -DBUILD_LUA_CAIRO=ON \ -DBUILD_LUA_CAIRO_XLIB=ON \ -DBUILD_LUA_IMLIB2=ON \ -DBUILD_LUA_RSVG=${RSVG_ENABLED} \ -DBUILD_MYSQL=ON \ -DBUILD_NVIDIA=ON \ -DBUILD_PULSEAUDIO=ON \ -DBUILD_CURL=ON \ -DBUILD_RSS=ON \ -DBUILD_WLAN=ON \ -DBUILD_WAYLAND=${{ matrix.wayland }}\ -DBUILD_X11=${{ matrix.x11 }} \ -DBUILD_XMMS2=ON \ -DCMAKE_C_COMPILER=$CC \ -DCMAKE_CXX_COMPILER=$CXX \ -DMAINTAINER_MODE=ON - name: Compile run: cmake --build build - name: Show sccache stats run: sccache --show-stats - name: Test working-directory: build run: ctest --output-on-failure conky-1.22.1/.github/workflows/build-and-test-macos.yaml000066400000000000000000000042121476554302100231230ustar00rootroot00000000000000name: Build and test on macOS on: push: branches: - main paths-ignore: - web/** - doc/** pull_request: branches: - main paths-ignore: - web/** - doc/** concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true env: SCCACHE_VERSION: '0.9.1' jobs: build: env: HOMEBREW_NO_INSTALL_UPGRADE: 1 HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK: 1 strategy: matrix: os: - macos-13 - macos-14 runs-on: ${{ matrix.os }} steps: - name: Install dependencies run: | brew install --force \ cmake \ curl \ freetype \ gettext \ gperf \ imlib2 \ lcov \ librsvg \ libxfixes \ libxft \ libxi \ libxinerama \ libxml2 \ lua \ ninja \ pkg-config \ || true # Ignore errors - name: Checkout uses: actions/checkout@v4 - name: Configure sccache run: .github/scripts/setup-sccache.sh - name: Load cached compilation artifacts id: compiler-cache uses: actions/cache@v4 with: path: '${{ env.SCCACHE_DIR }}' key: sccache-${{ matrix.os }} restore-keys: | sccache-${{ matrix.os }} sccache- - name: Configure with CMake run: | # Reset sccache statistics sccache --zero-stats cmake . -B build -G Ninja \ -DMAINTAINER_MODE=ON \ -DBUILD_WAYLAND=OFF \ -DBUILD_RSS=ON \ -DBUILD_CURL=ON - name: Compile run: cmake --build build - name: Show sccache stats run: sccache --show-stats - name: Test working-directory: build run: ctest --output-on-failure conky-1.22.1/.github/workflows/codeql.yml000066400000000000000000000057451476554302100203310ustar00rootroot00000000000000name: 'CodeQL' on: schedule: - cron: '20 2 * * 0' jobs: analyze: name: Analyze runs-on: ubuntu-latest permissions: actions: read contents: read security-events: write strategy: fail-fast: false matrix: language: ['cpp', 'javascript', 'python'] # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] # Use only 'java' to analyze code written in Java, Kotlin or both # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support steps: - run: sudo apt-get -qq update - name: Install dependencies if: matrix.language == 'cpp' run: | sudo apt-get install -yqq --no-install-recommends \ audacious-dev \ cmake \ gawk \ gperf \ lcov \ libaudclient-dev \ libc++-14-dev \ libc++abi-14-dev \ libcairo2-dev \ libcurl4-gnutls-dev \ libdbus-glib-1-dev \ libglib2.0-dev \ libical-dev \ libimlib2-dev \ libircclient-dev \ libiw-dev \ liblua5.3-dev \ libmicrohttpd-dev \ libmysqlclient-dev \ libpulse-dev \ librsvg2-dev \ libsystemd-dev \ libwayland-bin \ libwayland-dev \ libx11-dev \ libxdamage-dev \ libxext-dev \ libxft-dev \ libxinerama-dev \ libxml2-dev \ libxmmsclient-dev \ libxnvctrl-dev \ ncurses-dev \ ninja-build \ wayland-protocols - name: Checkout repository uses: actions/checkout@v4 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. # By default, queries listed here will override any specified in a config file. # Prefix the list here with "+" to use these queries and those in the config file. # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs # queries: security-extended,security-and-quality # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild uses: github/codeql-action/autobuild@v3 - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v3 with: category: '/language:${{matrix.language}}' conky-1.22.1/.github/workflows/docker.yaml000066400000000000000000000023251476554302100204610ustar00rootroot00000000000000name: Docker on: push: branches: - main paths-ignore: - web/** - doc/** pull_request: branches: - main paths-ignore: - web/** - doc/** # Publish `v1.2.3` tags as releases. tags: - v* concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: docker-buildx: env: IMAGE_NAME: conky DOCKER_BUILDKIT: 1 DOCKERHUB_USERNAME: brndnmtthws DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx id: buildx uses: docker/setup-buildx-action@v3 - name: Login to Docker Hub uses: docker/login-action@v3 if: ${{ env.DOCKERHUB_TOKEN != '' }} with: username: ${{ env.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Build and push Docker image env: RELEASE: "${{ startsWith(github.ref, 'refs/tags/') && 'ON' || 'OFF' }}" GITHUB_REF: ${{ github.ref }} run: .github/scripts/docker-build.bash conky-1.22.1/.github/workflows/labeler.yml000066400000000000000000000006031476554302100204540ustar00rootroot00000000000000name: 'Pull Request Labeler' on: - pull_request_target jobs: triage: permissions: contents: read pull-requests: write runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 - name: Run labeler uses: actions/labeler@v5 with: sync-labels: true repo-token: '${{ secrets.GITHUB_TOKEN }}' conky-1.22.1/.github/workflows/nix.yaml000066400000000000000000000016351476554302100200130ustar00rootroot00000000000000name: 'Nix build' on: push: branches: - main paths-ignore: - web/** - doc/** pull_request: branches: - main concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: build-and-check: strategy: matrix: os: [ubuntu-latest, macos-latest] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 - uses: nixbuild/nix-quick-install-action@v29 - name: Restore and cache Nix store uses: nix-community/cache-nix-action@v5 with: primary-key: nix-${{ runner.os }}-${{ hashFiles('**/*.nix') }} restore-prefixes-first-match: nix-${{ runner.os }}- gc-max-store-size: 1073741824 purge: true purge-prefixes: cache-${{ runner.os }}- purge-created: 0 purge-primary-key: never - run: nix build - run: nix flake check conky-1.22.1/.github/workflows/publish-appimage.yml000066400000000000000000000133441476554302100223030ustar00rootroot00000000000000name: Build AppImage on: push: branches: - main tags: - v* paths-ignore: - web/** - doc/** pull_request: branches: - main concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true env: SCCACHE_VERSION: '0.9.1' jobs: build: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: - ubuntu-22.04 - ubuntu-24.04 arch: - x86_64 permissions: contents: write discussions: write steps: - run: sudo apt-get -qq update - name: Install dependencies run: | sudo apt-get install -yqq --no-install-recommends \ audacious-dev \ cmake \ gawk \ gperf \ lcov \ libaudclient-dev \ libcairo2-dev \ libcurl4-gnutls-dev \ libdbus-glib-1-dev \ libfuse2 \ libglib2.0-dev \ libical-dev \ libimlib2-dev \ libircclient-dev \ libiw-dev \ liblua5.3-dev \ libmicrohttpd-dev \ libmysqlclient-dev \ libpulse-dev \ librsvg2-dev \ libsystemd-dev \ libwayland-bin \ libwayland-dev \ libx11-dev \ libxdamage-dev \ libxext-dev \ libxft-dev \ libxinerama-dev \ libxml2-dev \ libxmmsclient-dev \ libxnvctrl-dev \ ncurses-dev \ ninja-build \ pandoc \ wayland-protocols - name: Checkout uses: actions/checkout@v4 with: fetch-depth: 0 - uses: actions/setup-python@v5 with: python-version: '3.12' cache: 'pip' # caching pip dependencies cache-dependency-path: 'requirements-dev.txt' - run: pip install -r requirements-dev.txt - name: Import GPG Deploy Key # only run on main branch if: github.ref == 'refs/heads/main' run: | echo "${{ secrets.GPG_DEPLOY_KEY }}" > appimage/secret.gpg gpg --import appimage/secret.gpg - name: Set clang version to 12 for ubuntu-20.04 if: matrix.os == 'ubuntu-20.04' run: | echo "CLANG_VERSION=12" | tee -a $GITHUB_ENV - name: Set clang version to 15 for ubuntu-22.04 if: matrix.os == 'ubuntu-22.04' run: | echo "CLANG_VERSION=15" | tee -a $GITHUB_ENV - name: Set clang version to 18 for ubuntu-24.04 if: matrix.os == 'ubuntu-24.04' run: | echo "CLANG_VERSION=18" | tee -a $GITHUB_ENV - name: Install libc++, set CC and CXX env vars run: | sudo apt-get install -yqq --no-install-recommends \ libc++-${CLANG_VERSION}-dev \ libc++abi-${CLANG_VERSION}-dev echo "CC=clang-${CLANG_VERSION}" | tee -a $GITHUB_ENV echo "CXX=clang++-${CLANG_VERSION}" | tee -a $GITHUB_ENV - name: Install sccache if: startsWith(github.ref, 'refs/tags/') != true run: .github/scripts/setup-sccache.sh - name: Load cached compilation artifacts if: startsWith(github.ref, 'refs/tags/') != true id: compiler-cache uses: actions/cache@v4 with: path: '${{ env.SCCACHE_DIR }}' key: sccache-${{ matrix.os }}-${{ matrix.arch }} restore-keys: | sccache-${{ matrix.os }}-${{ matrix.arch }} sccache-${{ matrix.os }} sccache- - name: Build AppImage run: ./appimage/build.sh env: RELEASE: "${{ startsWith(github.ref, 'refs/tags/') && 'ON' || 'OFF' }}" - name: Show sccache stats if: startsWith(github.ref, 'refs/tags/') != true run: sccache --show-stats - run: ./conky-x86_64.AppImage --version # print version - name: Set CONKY_VERSION run: echo "CONKY_VERSION=$(./conky-x86_64.AppImage --short-version)" | tee -a $GITHUB_ENV - run: mv conky-x86_64.AppImage conky-${{ matrix.os }}-${{ matrix.arch }}-v${{ env.CONKY_VERSION }}.AppImage - run: mv conky-x86_64.AppImage.sha256 conky-${{ matrix.os }}-${{ matrix.arch }}-v${{ env.CONKY_VERSION }}.AppImage.sha256 - name: Upload AppImage artifact uses: actions/upload-artifact@v4 with: path: 'conky-${{ matrix.os }}-${{ matrix.arch }}-v${{ env.CONKY_VERSION }}.AppImage' name: 'conky-${{ matrix.os }}-${{ matrix.arch }}-v${{ env.CONKY_VERSION }}.AppImage' if-no-files-found: error - name: Upload AppImage checksum artifact uses: actions/upload-artifact@v4 with: path: 'conky-${{ matrix.os }}-${{ matrix.arch }}-v${{ env.CONKY_VERSION }}.AppImage.sha256' name: 'conky-${{ matrix.os }}-${{ matrix.arch }}-v${{ env.CONKY_VERSION }}.AppImage.sha256' if-no-files-found: error - name: Upload man page artifact uses: actions/upload-artifact@v4 with: name: conky.1.gz path: conky.1.gz # conky.1.gz is created by all jobs! overwrite: true release: runs-on: ubuntu-latest needs: build steps: - name: Download all artifacts uses: actions/download-artifact@v4 with: path: ${{ github.workspace }}/artifacts - name: Create Conky Release id: create_release uses: softprops/action-gh-release@v2 if: startsWith(github.ref, 'refs/tags/') with: draft: false prerelease: false discussion_category_name: General generate_release_notes: true fail_on_unmatched_files: true files: | ${{ github.workspace }}/artifacts/**/* conky-1.22.1/.github/workflows/stale.yml000066400000000000000000000020071476554302100201560ustar00rootroot00000000000000name: "Close stale issues and PRs" on: schedule: - cron: "30 1 * * *" jobs: stale: permissions: contents: write # for delete-branch option issues: write pull-requests: write runs-on: ubuntu-latest steps: - uses: actions/stale@v9 with: stale-issue-message: "This issue is stale because it has been open 365 days with no activity. Remove stale label or comment, or this issue will be closed in 30 days." stale-pr-message: "This PR is stale because it has been open 365 days with no activity. Remove stale label or comment, or this PR will be closed in 30 days." close-issue-message: "This issue was closed because it has been stalled for 30 days with no activity." close-pr-message: "This PR was closed because it has been stalled for 30 days with no activity." days-before-issue-stale: 365 days-before-pr-stale: 365 days-before-issue-close: 30 days-before-pr-close: 30 delete-branch: true conky-1.22.1/.github/workflows/web.yml000066400000000000000000000014041476554302100176230ustar00rootroot00000000000000name: Web CI on: push: branches: [main] paths: - web/** - doc/** pull_request: branches: [main] paths: - web/** - doc/** concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Use Node.js 18 uses: actions/setup-node@v4 with: node-version: 18 cache: npm cache-dependency-path: '**/package-lock.json' - run: npm ci working-directory: web - run: npm run lint working-directory: web - run: npm run build --if-present working-directory: web - run: npm run e2e:headless working-directory: web conky-1.22.1/.gitignore000066400000000000000000000007701476554302100147230ustar00rootroot00000000000000.DS_Store *~ .*.swp Doxyfile patches/ doc/conky.1 README build*/ doc/*.html Makefile CMakeCache.txt CMakeFiles cmake_install.cmake cmake_uninstall.cmake CPackConfig.cmake CPackSourceConfig.cmake # Generated files data/convertconf.h data/defconfig.h *.so *.a /config.h /build.h src/colour-names.hh # Compiler cache .cache # Ignore (most) vscode stuff .vscode/* *.code-workspace # Allow vscode recommended extensions !.vscode/extensions.json .idea/ # Ignore nix stuff /result # Ignore direnv .direnv conky-1.22.1/.prettierrc.cjs000066400000000000000000000001541476554302100156710ustar00rootroot00000000000000module.exports = { printWidth: 80, semi: false, singleQuote: true, tabWidth: 2, useTabs: false, } conky-1.22.1/.vscode/000077500000000000000000000000001476554302100142705ustar00rootroot00000000000000conky-1.22.1/.vscode/extensions.json000066400000000000000000000002051476554302100173570ustar00rootroot00000000000000{ "recommendations": [ "kamadorueda.alejandra", "ms-vscode.cmake-tools", "mkhl.direnv", "xaver.clang-format" ] } conky-1.22.1/3rdparty/000077500000000000000000000000001476554302100144775ustar00rootroot00000000000000conky-1.22.1/3rdparty/CMakeLists.txt000066400000000000000000000001031476554302100172310ustar00rootroot00000000000000# Vc version 1.4.4 add_subdirectory(Vc) add_subdirectory(toluapp) conky-1.22.1/3rdparty/Vc/000077500000000000000000000000001476554302100150475ustar00rootroot00000000000000conky-1.22.1/3rdparty/Vc/.clang-format000066400000000000000000000143241476554302100174260ustar00rootroot00000000000000BasedOnStyle: Google # The extra indent or outdent of access modifiers, e.g. public:. AccessModifierOffset: -4 # If true, aligns escaped newlines as far left as possible. Otherwise puts them into the right-most column. AlignEscapedNewlinesLeft: false # If true, aligns trailing comments. AlignTrailingComments: true # Allow putting all parameters of a function declaration onto the next line even if BinPackParameters is false. AllowAllParametersOfDeclarationOnNextLine: false # If true, if (a) return; can be put on a single line. AllowShortIfStatementsOnASingleLine: false # If true, while (true) continue; can be put on a single line. AllowShortLoopsOnASingleLine: false AllowShortFunctionsOnASingleLine: true # If true, always break before multiline string literals. AlwaysBreakBeforeMultilineStrings: false # If true, always break after the template<...> of a template declaration. AlwaysBreakTemplateDeclarations: false # If false, a function call’s or function definition’s parameters will either all be on the same line or will have one line each. BinPackParameters: true # If true, binary operators will be placed after line breaks. BreakBeforeBinaryOperators: false # The brace breaking style to use. # Possible values: # BS_Attach (in configuration: Attach) Always attach braces to surrounding context. # BS_Linux (in configuration: Linux) Like Attach, but break before braces on function, namespace and class definitions. # BS_Stroustrup (in configuration: Stroustrup) Like Attach, but break before function definitions. # BS_Allman (in configuration: Allman) Always break before braces. BreakBeforeBraces: Linux # Always break constructor initializers before commas and align the commas with the colon. BreakConstructorInitializersBeforeComma: true # The column limit. # A column limit of 0 means that there is no column limit. In this case, clang-format will respect the input’s line breaking decisions within statements. ColumnLimit: 90 # If the constructor initializers don’t fit on a line, put each initializer on its own line. #ConstructorInitializerAllOnOneLineOrOnePerLine (bool) # The number of characters to use for indentation of constructor initializer lists. #ConstructorInitializerIndentWidth (unsigned) # If true, format braced lists as best suited for C++11 braced lists. # Important differences: - No spaces inside the braced list. - No line break before the closing brace. - Indentation with the continuation indent, not with the block indent. # Fundamentally, C++11 braced lists are formatted exactly like function calls would be formatted in their place. If the braced list follows a name (e.g. a type or variable name), clang-format formats as if the {} were the parentheses of a function call with that name. If there is no name, a zero-length name is assumed. Cpp11BracedListStyle: true # If true, analyze the formatted file for the most common binding. #DerivePointerBinding (bool) # If true, clang-format detects whether function calls and definitions are formatted with one parameter per line. # Each call can be bin-packed, one-per-line or inconclusive. If it is inconclusive, e.g. completely on one line, but a decision needs to be made, clang-format analyzes whether there are other bin-packed cases in the input file and act accordingly. # NOTE: This is an experimental flag, that might go away or be renamed. Do not use this in config files, etc. Use at your own risk. #ExperimentalAutoDetectBinPacking (bool) # Indent case labels one level from the switch statement. # When false, use the same indentation level as for the switch statement. Switch statement body is always indented one level more than case labels. IndentCaseLabels: false # If true, indent when breaking function declarations which are not also definitions after the type. #IndentFunctionDeclarationAfterType (bool) # The number of characters to use for indentation. IndentWidth: 4 # The maximum number of consecutive empty lines to keep. MaxEmptyLinesToKeep: 1 # The indentation used for namespaces. # Possible values: # NI_None (in configuration: None) Don’t indent in namespaces. # NI_Inner (in configuration: Inner) Indent only in inner namespaces (nested in other namespaces). # NI_All (in configuration: All) Indent in all namespaces. NamespaceIndentation: None # Add a space in front of an Objective-C protocol list, i.e. use Foo instead of Foo. #ObjCSpaceBeforeProtocolList (bool) # The penalty for each line break introduced inside a comment. #PenaltyBreakComment (unsigned) # The penalty for breaking before the first <<. #PenaltyBreakFirstLessLess (unsigned) # The penalty for each line break introduced inside a string literal. #PenaltyBreakString (unsigned) # The penalty for each character outside of the column limit. #PenaltyExcessCharacter (unsigned) # Penalty for putting the return type of a function onto its own line. #PenaltyReturnTypeOnItsOwnLine (unsigned) # Set whether & and * bind to the type as opposed to the variable. #PointerBindsToType: false # If true, spaces will be inserted between ‘for’/’if’/’while’/... and ‘(‘. #SpaceAfterControlStatementKeyword: true # If false, spaces will be removed before ‘=’, ‘+=’, etc. #SpaceBeforeAssignmentOperators: true # If false, spaces may be inserted into ‘()’. #SpaceInEmptyParentheses: false # The number of spaces to before trailing line comments. #SpacesBeforeTrailingComments (unsigned) # If false, spaces may be inserted into C style casts. #SpacesInCStyleCastParentheses (bool) # If true, spaces will be inserted after every ‘(‘ and before every ‘)’. SpacesInParentheses: false # Format compatible with this standard, e.g. use A > instead of A> for LS_Cpp03. # Possible values: # LS_Cpp03 (in configuration: Cpp03) Use C++03-compatible syntax. # LS_Cpp11 (in configuration: Cpp11) Use features of C++11 (e.g. A> instead of A >). # LS_Auto (in configuration: Auto) Automatic detection based on the input. Standard: Cpp11 # If true, IndentWidth consecutive spaces will be replaced with tab characters. UseTab: false # vim: ft=yaml conky-1.22.1/3rdparty/Vc/.gitignore000066400000000000000000000001521476554302100170350ustar00rootroot00000000000000doc/html doc/latex doc/man vc-benchmarks *.swp *~ .makeApidox.stamp .makeApidox.stamp.new build-* .vs out conky-1.22.1/3rdparty/Vc/CMakeLists.txt000066400000000000000000000251531476554302100176150ustar00rootroot00000000000000# cmake_minimum_required(VERSION 3.0) cmake_policy(SET CMP0028 NEW) # Double colon in target name means ALIAS or IMPORTED target. cmake_policy(SET CMP0048 NEW) # The ``project()`` command manages VERSION variables. if(PROJECT_SOURCE_DIR STREQUAL PROJECT_BINARY_DIR) message(FATAL_ERROR "You don't want to configure in the source directory!") endif() if(NOT DEFINED CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebug RelWithDebInfo MinSizeRel." FORCE) endif() # read version parts from version.h file(STRINGS ${CMAKE_CURRENT_SOURCE_DIR}/Vc/version.h _version_lines REGEX "^#define Vc_VERSION_STRING ") string(REGEX MATCH "([0-9]+)\\.([0-9]+)\\.([0-9]+)" _version_matches "${_version_lines}") project(Vc VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}" LANGUAGES C CXX) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake") set(disabled_targets) include (VcMacros) include (AddTargetProperty) include (OptimizeForArchitecture) vc_determine_compiler() if("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(i686|x86|AMD64|amd64)") set(Vc_X86 TRUE) elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(arm|aarch32|aarch64)") message(WARNING "No optimized implementation of the Vc types available for ${CMAKE_SYSTEM_PROCESSOR}") set(Vc_ARM TRUE) else() message(WARNING "No optimized implementation of the Vc types available for ${CMAKE_SYSTEM_PROCESSOR}") endif() if(NOT Vc_COMPILER_IS_MSVC) AddCompilerFlag("-std=c++14" CXX_RESULT _ok CXX_FLAGS CMAKE_CXX_FLAGS) if(NOT _ok) AddCompilerFlag("-std=c++1y" CXX_RESULT _ok CXX_FLAGS CMAKE_CXX_FLAGS) if(NOT _ok) AddCompilerFlag("-std=c++11" CXX_RESULT _ok CXX_FLAGS CMAKE_CXX_FLAGS) if(NOT _ok) AddCompilerFlag("-std=c++0x" CXX_RESULT _ok CXX_FLAGS CMAKE_CXX_FLAGS) if(NOT _ok) message(FATAL_ERROR "Vc 1.x requires C++11, better even C++14. It seems this is not available. If this was incorrectly determined please notify vc-devel@compeng.uni-frankfurt.de") endif() endif() endif() endif() # elseif(MSVC_VERSION LESS 1920) # message(FATAL_ERROR "Vc 1.x requires at least Visual Studio 2019.") # AddCompilerFlag("/std:c++14" CXX_RESULT _ok CXX_FLAGS CMAKE_CXX_FLAGS) endif() # if(MSVC AND (NOT DEFINED Vc_USE_MSVC_SSA_OPTIMIZER_DESPITE_BUGGY_EXP OR NOT Vc_USE_MSVC_SSA_OPTIMIZER_DESPITE_BUGGY_EXP)) # # bug report: https://developercommunity.visualstudio.com/t/AVX-codegen-bug-on-Vc-with-MSVC-2019/1470844#T-N1521672 # message(STATUS "WARNING! MSVC starting with 19.20 uses a new optimizer that has a bug causing Vc::exp() to return slighly wrong results.\ # You can set Vc_USE_MSVC_SSA_OPTIMIZER_DESPITE_BUGGY_EXP=ON to still use the new optimizer on the affected MSVC versions.") # AddCompilerFlag("/d2SSAOptimizer-" CXX_RESULT _ok CXX_FLAGS CMAKE_CXX_FLAGS) # endif() if(Vc_COMPILER_IS_GCC) if(Vc_GCC_VERSION VERSION_GREATER "5.0.0" AND Vc_GCC_VERSION VERSION_LESS "6.0.0") UserWarning("GCC 5 goes into an endless loop comiling example_scaling_scalar. Therefore, this target is disabled.") list(APPEND disabled_targets example_scaling_scalar ) endif() # elseif(Vc_COMPILER_IS_MSVC) # # Disable warning "C++ exception specification ignored except to indicate a function is not __declspec(nothrow)" # # MSVC emits the warning for the _UnitTest_Compare desctructor which needs the throw declaration so that it doesn't std::terminate # AddCompilerFlag("/wd4290") endif() vc_set_preferred_compiler_flags(WARNING_FLAGS BUILDTYPE_FLAGS) add_definitions(${Vc_DEFINITIONS}) add_compile_options(${Vc_COMPILE_FLAGS}) if(Vc_COMPILER_IS_INTEL) # per default icc is not IEEE compliant, but we need that for verification AddCompilerFlag("-fp-model source") endif() if(CMAKE_BUILD_TYPE STREQUAL "" AND NOT CMAKE_CXX_FLAGS MATCHES "-O[123]") message(STATUS "WARNING! It seems you are compiling without optimization. Please set CMAKE_BUILD_TYPE.") endif(CMAKE_BUILD_TYPE STREQUAL "" AND NOT CMAKE_CXX_FLAGS MATCHES "-O[123]") include_directories(${CMAKE_CURRENT_SOURCE_DIR}) # ${CMAKE_CURRENT_SOURCE_DIR}/include) add_custom_target(other VERBATIM) add_custom_target(Scalar COMMENT "build Scalar code" VERBATIM) add_custom_target(SSE COMMENT "build SSE code" VERBATIM) add_custom_target(AVX COMMENT "build AVX code" VERBATIM) add_custom_target(AVX2 COMMENT "build AVX2 code" VERBATIM) AddCompilerFlag(-ftemplate-depth=128 CXX_FLAGS CMAKE_CXX_FLAGS) set(libvc_compile_flags "-DVc_COMPILE_LIB") AddCompilerFlag("-fPIC" CXX_FLAGS libvc_compile_flags) # -fstack-protector is the default of GCC, but at least Ubuntu changes the default to -fstack-protector-strong, which is crazy AddCompilerFlag("-fstack-protector" CXX_FLAGS libvc_compile_flags) set(_srcs src/const.cpp) if(Vc_X86) list(APPEND _srcs src/cpuid.cpp src/support_x86.cpp) vc_compile_for_all_implementations(_srcs src/trigonometric.cpp ONLY SSE2 SSE3 SSSE3 SSE4_1 AVX AVX+FMA AVX2+FMA+BMI2) if(NOT Vc_XOP_INTRINSICS_BROKEN) vc_compile_for_all_implementations(_srcs src/trigonometric.cpp ONLY AVX+XOP+FMA) if(NOT Vc_FMA4_INTRINSICS_BROKEN) vc_compile_for_all_implementations(_srcs src/trigonometric.cpp ONLY SSE+XOP+FMA4 AVX+XOP+FMA4) endif() endif() vc_compile_for_all_implementations(_srcs src/sse_sorthelper.cpp ONLY SSE2 SSE4_1 AVX AVX2+FMA+BMI2) vc_compile_for_all_implementations(_srcs src/avx_sorthelper.cpp ONLY AVX AVX2+FMA+BMI2) elseif(Vc_ARM) list(APPEND _srcs src/support_dummy.cpp) else() list(APPEND _srcs src/support_dummy.cpp) endif() add_library(Vc STATIC ${_srcs}) set_property(TARGET Vc APPEND PROPERTY COMPILE_OPTIONS ${libvc_compile_flags}) add_target_property(Vc LABELS "other") if(XCODE) # TODO: document what this does and why it has no counterpart in the non-XCODE logic set_target_properties(Vc PROPERTIES XCODE_ATTRIBUTE_GCC_INLINES_ARE_PRIVATE_EXTERN "NO") set_target_properties(Vc PROPERTIES XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "YES") set_target_properties(Vc PROPERTIES XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++0x") set_target_properties(Vc PROPERTIES XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++") elseif(UNIX AND Vc_COMPILER_IS_CLANG) # On UNIX (Linux) the standard library used by default typically is libstdc++ (GCC). # To get the full clang deal we rather want to build against libc++. This requires # additionally the libc++abi and libsupc++ libraries in all linker invokations. option(USE_LIBC++ "Use libc++ instead of the system default C++ standard library." OFF) if(USE_LIBC++) AddCompilerFlag(-stdlib=libc++ CXX_FLAGS CMAKE_CXX_FLAGS CXX_RESULT _use_libcxx) if(_use_libcxx) find_library(LIBC++ABI c++abi) mark_as_advanced(LIBC++ABI) if(LIBC++ABI) set(CMAKE_REQUIRED_LIBRARIES "${LIBC++ABI};supc++") CHECK_CXX_SOURCE_COMPILES("#include #include void foo() { std::cout << 'h' << std::flush << std::endl; throw std::exception(); } int main() { try { foo(); } catch (int) { return 0; } return 1; }" libcxx_compiles) unset(CMAKE_REQUIRED_LIBRARIES) if(libcxx_compiles) link_libraries(${LIBC++ABI} supc++) endif() endif() endif() else() CHECK_CXX_SOURCE_COMPILES("#include std::tuple f() { std::tuple r; return r; } int main() { return 0; } " tuple_sanity) if (NOT tuple_sanity) message(FATAL_ERROR "Clang and std::tuple brokenness detected. Please update your compiler.") endif() endif() endif() add_dependencies(other Vc) target_include_directories(Vc PUBLIC $ $ ) # option(Vc_ENABLE_INSTALL "Whether to install the library." OFF) # if (Vc_ENABLE_INSTALL) # install(TARGETS Vc EXPORT VcTargets DESTINATION lib${LIB_SUFFIX}) # install(DIRECTORY Vc/ DESTINATION include/Vc FILES_MATCHING REGEX "/*.(h|tcc|def)$") # install(FILES # Vc/Allocator # Vc/IO # Vc/Memory # Vc/SimdArray # Vc/Utils # Vc/Vc # Vc/algorithm # Vc/array # Vc/iterators # Vc/limits # Vc/simdize # Vc/span # Vc/type_traits # Vc/vector # DESTINATION include/Vc) # # Generate and install CMake package and modules # include(CMakePackageConfigHelpers) # set(PACKAGE_INSTALL_DESTINATION # lib${LIB_SUFFIX}/cmake/${PROJECT_NAME} # ) # install(EXPORT ${PROJECT_NAME}Targets # NAMESPACE ${PROJECT_NAME}:: # DESTINATION ${PACKAGE_INSTALL_DESTINATION} # EXPORT_LINK_INTERFACE_LIBRARIES # ) # write_basic_package_version_file( # ${CMAKE_CURRENT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake # VERSION ${PROJECT_VERSION} # COMPATIBILITY AnyNewerVersion # ) # configure_package_config_file( # ${PROJECT_SOURCE_DIR}/cmake/${PROJECT_NAME}Config.cmake.in # ${CMAKE_CURRENT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake # INSTALL_DESTINATION ${PACKAGE_INSTALL_DESTINATION} # PATH_VARS CMAKE_INSTALL_PREFIX # ) # install(FILES # cmake/UserWarning.cmake # cmake/VcMacros.cmake # cmake/AddCompilerFlag.cmake # cmake/CheckCCompilerFlag.cmake # cmake/CheckCXXCompilerFlag.cmake # cmake/OptimizeForArchitecture.cmake # cmake/FindVc.cmake # ${CMAKE_CURRENT_BINARY_DIR}/cmake/${PROJECT_NAME}Config.cmake # ${CMAKE_CURRENT_BINARY_DIR}/cmake/${PROJECT_NAME}ConfigVersion.cmake # DESTINATION ${PACKAGE_INSTALL_DESTINATION} # ) # endif() # option(BUILD_TESTING "Build the testing tree." OFF) # include (CTest) # configure_file(${PROJECT_SOURCE_DIR}/CTestCustom.cmake ${PROJECT_BINARY_DIR}/CTestCustom.cmake COPYONLY) # if(BUILD_TESTING) # add_custom_target(build_tests ALL VERBATIM) # add_subdirectory(tests) # endif() # set(BUILD_EXAMPLES FALSE CACHE BOOL "Build examples.") # if(BUILD_EXAMPLES) # add_subdirectory(examples) # endif(BUILD_EXAMPLES) # Hide Vc_IMPL as it is only meant for users of Vc mark_as_advanced(Vc_IMPL) # find_program(BIN_CAT cat) # mark_as_advanced(BIN_CAT) # if(BIN_CAT) # file(REMOVE ${PROJECT_BINARY_DIR}/help.txt) # add_custom_command(OUTPUT ${PROJECT_BINARY_DIR}/help.txt # COMMAND ${CMAKE_MAKE_PROGRAM} help > ${PROJECT_BINARY_DIR}/help.txt # VERBATIM # ) # add_custom_target(cached_help # ${BIN_CAT} ${PROJECT_BINARY_DIR}/help.txt # DEPENDS ${PROJECT_BINARY_DIR}/help.txt # VERBATIM # ) # endif() conky-1.22.1/3rdparty/Vc/LICENSE000066400000000000000000000027461476554302100160650ustar00rootroot00000000000000Copyright © 2009-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. conky-1.22.1/3rdparty/Vc/README.md000066400000000000000000000157561476554302100163440ustar00rootroot00000000000000**Vc is now in maintenance mode and no longer actively developed. However, we continue to review pull requests with bugfixes from the community.** **You may be interested in switching to [std-simd](https://github.com/VcDevel/std-simd).** GCC 11 includes an experimental version of `std::simd` as part of libstdc++, which also works with clang. Features present in Vc 1.4 and not present in *std-simd* will eventually turn into Vc 2.0,which then depends on *std-simd*. # Vc: portable, zero-overhead C++ types for explicitly data-parallel programming Recent generations of CPUs, and GPUs in particular, require data-parallel codes for full efficiency. Data parallelism requires that the same sequence of operations is applied to different input data. CPUs and GPUs can thus reduce the necessary hardware for instruction decoding and scheduling in favor of more arithmetic and logic units, which execute the same instructions synchronously. On CPU architectures this is implemented via SIMD registers and instructions. A single SIMD register can store N values and a single SIMD instruction can execute N operations on those values. On GPU architectures N threads run in perfect sync, fed by a single instruction decoder/scheduler. Each thread has local memory and a given index to calculate the offsets in memory for loads and stores. Current C++ compilers can do automatic transformation of scalar codes to SIMD instructions (auto-vectorization). However, the compiler must reconstruct an intrinsic property of the algorithm that was lost when the developer wrote a purely scalar implementation in C++. Consequently, C++ compilers cannot vectorize any given code to its most efficient data-parallel variant. Especially larger data-parallel loops, spanning over multiple functions or even translation units, will often not be transformed into efficient SIMD code. The Vc library provides the missing link. Its types enable explicitly stating data-parallel operations on multiple values. The parallelism is therefore added via the type system. Competing approaches state the parallelism via new control structures and consequently new semantics inside the body of these control structures. Vc is a free software library to ease explicit vectorization of C++ code. It has an intuitive API and provides portability between different compilers and compiler versions as well as portability between different vector instruction sets. Thus an application written with Vc can be compiled for: * AVX and AVX2 * SSE2 up to SSE4.2 or SSE4a * Scalar * ~~AVX-512 (Vc 2 development)~~ * ~~NEON (in development)~~ * ~~NVIDIA GPUs / CUDA (research)~~ After Intel dropped MIC support with ICC 18, Vc 1.4 also removed support for it. ## Examples ### Usage on Compiler Explorer * [Simdize Example](https://godbolt.org/z/JVEM2j) * [Total momentum and time stepping of `std::vector`](https://godbolt.org/z/JNdkL9) * [Matrix Example](https://godbolt.org/z/fFEkuX): This uses vertical vectorization which does not scale to different vector sizes. However, the example is instructive to compare it with similar solutions of other languages or libraries. * [N-vortex solver](https://godbolt.org/z/4o1cg_) showing `simdize`d iteration over many `std::vector`. Note how [important the `-march` flag is, compared to plain `-mavx2 -mfma`](https://godbolt.org/z/hKiOjr). ### Scalar Product Let's start from the code for calculating a 3D scalar product using builtin floats: ```cpp using Vec3D = std::array; float scalar_product(Vec3D a, Vec3D b) { return a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; } ``` Using Vc, we can easily vectorize the code using the `float_v` type: ```cpp using Vc::float_v using Vec3D = std::array; float_v scalar_product(Vec3D a, Vec3D b) { return a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; } ``` The above will scale to 1, 4, 8, 16, etc. scalar products calculated in parallel, depending on the target hardware's capabilities. For comparison, the same vectorization using Intel SSE intrinsics is more verbose and uses prefix notation (i.e. function calls): ```cpp using Vec3D = std::array<__m128, 3>; __m128 scalar_product(Vec3D a, Vec3D b) { return _mm_add_ps(_mm_add_ps(_mm_mul_ps(a[0], b[0]), _mm_mul_ps(a[1], b[1])), _mm_mul_ps(a[2], b[2])); } ``` The above will neither scale to AVX, AVX-512, etc. nor is it portable to other SIMD ISAs. ## Build Requirements cmake >= 3.0 C++11 Compiler: * GCC >= 4.8.1 * clang >= 3.4 * ICC >= 18.0.5 * Visual Studio 2019 (64-bit target) ## Building and Installing Vc * Clone Vc and initialize Vc's git submodules: ```sh git clone https://github.com/VcDevel/Vc.git cd Vc git submodule update --init ``` * Create a build directory: ```sh $ mkdir build $ cd build ``` * Configure with cmake and add relevant options: ```sh $ cmake .. ``` Optionally, specify an installation directory: ```sh $ cmake -DCMAKE_INSTALL_PREFIX=/opt/Vc .. ``` Optionally, include building the unit tests: ```sh $ cmake -DBUILD_TESTING=ON .. ``` On Windows, if you have multiple versions of Visual Studio installed, you can select one: ```sh $ cmake -G "Visual Studio 16 2019" .. ``` See `cmake --help` for a list of possible generators. * Build and install: ```sh $ cmake --build . -j 16 $ cmake --install . # may require permissions ``` On Windows, you can also open `Vc.sln` in Visual Studio and build/install from the IDE. ## Documentation The documentation is generated via [doxygen](http://doxygen.org). You can build the documentation by running `doxygen` in the `doc` subdirectory. Alternatively, you can find nightly builds of the documentation at: * [1.4 branch](https://vcdevel.github.io/Vc-1.4/) * [1.4.4 release](https://vcdevel.github.io/Vc-1.4.4/) * [1.4.3 release](https://vcdevel.github.io/Vc-1.4.3/) * [1.4.2 release](https://vcdevel.github.io/Vc-1.4.2/) * [1.4.1 release](https://vcdevel.github.io/Vc-1.4.1/) * [1.4.0 release](https://vcdevel.github.io/Vc-1.4.0/) * [1.3 branch](https://vcdevel.github.io/Vc-1.3/) * [1.3.0 release](https://vcdevel.github.io/Vc-1.3.0/) * [1.2.0 release](https://vcdevel.github.io/Vc-1.2.0/) * [1.1.0 release](https://vcdevel.github.io/Vc-1.1.0/) * [0.7 branch](https://vcdevel.github.io/Vc-0.7/) ## Publications * [M. Kretz, "Extending C++ for Explicit Data-Parallel Programming via SIMD Vector Types", Goethe University Frankfurt, Dissertation, 2015.](http://publikationen.ub.uni-frankfurt.de/frontdoor/index/index/docId/38415) * [M. Kretz and V. Lindenstruth, "Vc: A C++ library for explicit vectorization", Software: Practice and Experience, 2011.](http://dx.doi.org/10.1002/spe.1149) * [M. Kretz, "Efficient Use of Multi- and Many-Core Systems with Vectorization and Multithreading", University of Heidelberg, 2009.](http://code.compeng.uni-frankfurt.de/attachments/13/Diplomarbeit.pdf) [Work on integrating the functionality of Vc in the C++ standard library.]( https://github.com/VcDevel/Vc/wiki/ISO-Standardization-of-the-Vector-classes) ## License Vc is released under the terms of the [3-clause BSD license](http://opensource.org/licenses/BSD-3-Clause). conky-1.22.1/3rdparty/Vc/Vc/000077500000000000000000000000001476554302100154175ustar00rootroot00000000000000conky-1.22.1/3rdparty/Vc/Vc/Allocator000066400000000000000000000274351476554302100172750ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2014 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_ALLOCATOR_H_ #define VC_ALLOCATOR_H_ #include #include #include #include #include "global.h" #include "common/macros.h" /** * \ingroup Utilities * * Convenience macro to set the default allocator for a given \p Type to * Vc::Allocator. * * \param Type Your type that you want to use with STL containers. * * \note You have to use this macro in the global namespace. */ #ifdef Vc_MSVC #define Vc_DECLARE_ALLOCATOR(Type) \ namespace std \ { \ template <> class allocator : public ::Vc::Allocator \ { \ public: \ template struct rebind { \ typedef ::std::allocator other; \ }; \ /* MSVC brokenness: the following function is optional - just doesn't compile \ * without it */ \ const allocator &select_on_container_copy_construction() const { return *this; } \ }; \ } #else #define Vc_DECLARE_ALLOCATOR(Type) \ namespace std \ { \ template <> class allocator : public ::Vc::Allocator \ { \ public: \ template struct rebind { \ typedef ::std::allocator other; \ }; \ }; \ } #endif namespace Vc_VERSIONED_NAMESPACE { using std::size_t; using std::ptrdiff_t; /** * \headerfile Allocator * An allocator that uses global new and supports over-aligned types, as per [C++11 20.6.9]. * * Meant as a simple replacement for the allocator defined in the C++ Standard. * Allocation is done using the global new/delete operators. But if the alignment property of \p * T is larger than the size of a pointer, the allocate function allocates slightly more memory * to adjust the pointer for correct alignment. * * If the \p T does not require over-alignment no additional memory will be allocated. * * \tparam T The type of objects to allocate. * * Example: * \code * struct Data { * Vc::float_v x, y, z; * }; * * void fun() * { * std::vector dat0; // this will use std::allocator, which probably ignores the * // alignment requirements for Data. Thus any access to dat0 may * // crash your program. * * std::vector > dat1; // now std::vector will get correctly aligned * // memory. Accesses to dat1 are safe. * ... * \endcode * * %Vc ships a macro to conveniently tell STL to use Vc::Allocator per default for a given type: * \code * struct Data { * Vc::float_v x, y, z; * }; * Vc_DECLARE_ALLOCATOR(Data) * * void fun() * { * std::vector dat0; // good now * ... * \endcode * * \ingroup Utilities */ template class Allocator { private: enum Constants { #ifdef Vc_HAVE_STD_MAX_ALIGN_T NaturalAlignment = alignof(std::max_align_t), #elif defined(Vc_HAVE_MAX_ALIGN_T) NaturalAlignment = alignof(::max_align_t), #else NaturalAlignment = sizeof(void *) > alignof(long double) ? sizeof(void *) : (alignof(long double) > alignof(long long) ? alignof(long double) : alignof(long long)), #endif #if defined Vc_IMPL_AVX SimdAlignment = 32, #elif defined Vc_IMPL_SSE SimdAlignment = 16, #else SimdAlignment = 1, #endif Alignment = alignof(T) > SimdAlignment ? alignof(T) : SimdAlignment, /* The number of extra bytes allocated must be large enough to put a pointer right * before the adjusted address. This pointer stores the original address, which is * required to call ::operator delete in deallocate. * * The address we get from ::operator new is a multiple of NaturalAlignment: * p = N * NaturalAlignment * * Since all alignments are powers of two, Alignment is a multiple of NaturalAlignment: * Alignment = k * NaturalAlignment * * two cases: * 1. If p is already aligned to Alignment then allocate will return p + Alignment. In * this case there are Alignment Bytes available to store a pointer. * 2. If p is not aligned then p + (k - (N modulo k)) * NaturalAlignment will be * returned. Since NaturalAlignment >= sizeof(void*) the pointer fits. */ ExtraBytes = Alignment > NaturalAlignment ? Alignment : 0, AlignmentMask = Alignment - 1 }; public: typedef size_t size_type; typedef ptrdiff_t difference_type; typedef T* pointer; typedef const T* const_pointer; typedef T& reference; typedef const T& const_reference; typedef T value_type; template struct rebind { typedef Allocator other; }; Allocator() throw() { } Allocator(const Allocator&) throw() { } template Allocator(const Allocator&) throw() { } pointer address(reference x) const { return &x; } const_pointer address(const_reference x) const { return &x; } pointer allocate(size_type n, const void* = 0) { if (n > this->max_size()) { throw std::bad_alloc(); } char *p = static_cast(::operator new(n * sizeof(T) + ExtraBytes)); if (ExtraBytes > 0) { char *const pp = p; p += ExtraBytes; const char *null = 0; p -= ((p - null) & AlignmentMask); // equivalent to p &= ~AlignmentMask; reinterpret_cast(p)[-1] = pp; } return reinterpret_cast(p); } void deallocate(pointer p, size_type) { if (ExtraBytes > 0) { p = reinterpret_cast(p)[-1]; } ::operator delete(p); } size_type max_size() const throw() { return size_t(-1) / sizeof(T); } #ifdef Vc_MSVC // MSVC brokenness: the following function is optional - just doesn't compile without it const Allocator &select_on_container_copy_construction() const { return *this; } // MSVC also requires a function that neither C++98 nor C++11 mention // but it doesn't support variadic templates... otherwise the Vc_CXX11 clause would be nice void construct(pointer p) { ::new(p) T(); } // we still need the C++98 version: void construct(pointer p, const T& val) { ::new(p) T(val); } void destroy(pointer p) { p->~T(); } #else template void construct(U* p, Args&&... args) { ::new(p) U(std::forward(args)...); } template void destroy(U* p) { p->~U(); } #endif }; template inline bool operator==(const Allocator&, const Allocator&) { return true; } template inline bool operator!=(const Allocator&, const Allocator&) { return false; } } #include "vector.h" namespace std { template class allocator > : public ::Vc::Allocator > { public: template struct rebind { typedef ::std::allocator other; }; #ifdef Vc_MSVC // MSVC brokenness: the following function is optional - just doesn't compile without it const allocator &select_on_container_copy_construction() const { return *this; } #endif }; template class allocator> : public ::Vc::Allocator> { public: template struct rebind { typedef ::std::allocator other; }; #ifdef Vc_MSVC // MSVC brokenness: the following function is optional - just doesn't compile without it const allocator &select_on_container_copy_construction() const { return *this; } #endif }; template class allocator> : public ::Vc::Allocator> { public: template struct rebind { typedef ::std::allocator other; }; #ifdef Vc_MSVC // MSVC brokenness: the following function is optional - just doesn't compile without it const allocator &select_on_container_copy_construction() const { return *this; } #endif }; template class allocator> : public ::Vc::Allocator> { public: template struct rebind { typedef ::std::allocator other; }; #ifdef Vc_MSVC // MSVC brokenness: the following function is optional - just doesn't compile without it const allocator &select_on_container_copy_construction() const { return *this; } #endif }; } #endif // VC_ALLOCATOR_H_ // vim: ft=cpp et sw=4 sts=4 conky-1.22.1/3rdparty/Vc/Vc/IO000066400000000000000000000201471476554302100156550ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2009-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_IO_ #define VC_IO_ #include "common/types.h" #include "common/simdarrayfwd.h" #include "common/memoryfwd.h" #include #if defined(__GNUC__) && !defined(_WIN32) && defined(_GLIBCXX_OSTREAM) #define Vc_HACK_OSTREAM_FOR_TTY 1 #endif #ifdef Vc_HACK_OSTREAM_FOR_TTY #include #include #endif namespace Vc_VERSIONED_NAMESPACE { namespace { #ifdef Vc_HACK_OSTREAM_FOR_TTY class hacked_ostream : public std::ostream { public: using std::ostream::_M_streambuf; }; bool mayUseColor(const std::ostream &os) __attribute__((__const__)); bool mayUseColor(const std::ostream &os) { std::basic_streambuf *hack1 = const_cast *>(os.*(&hacked_ostream::_M_streambuf)); __gnu_cxx::stdio_sync_filebuf *hack = dynamic_cast<__gnu_cxx::stdio_sync_filebuf *>(hack1); if (!hack) { return false; } FILE *file = hack->file(); return 1 == isatty(fileno(file)); } #else bool mayUseColor(const std::ostream &) { return false; } #endif } // anonymous namespace namespace AnsiColor { struct Type { const char *data; }; static const Type green = {"\033[1;40;32m"}; static const Type yellow = {"\033[1;40;33m"}; static const Type blue = {"\033[1;40;34m"}; static const Type normal = {"\033[0m"}; inline std::ostream &operator<<(std::ostream &out, const Type &c) { if (mayUseColor(out)) { out << c.data; } return out; } } // namespace AnsiColor /** * \ingroup Vectors * \headerfile IO * * Prints the contents of a vector into a stream object. * * \code * const Vc::int_v v(Vc::IndexesFromZero); * std::cout << v << std::endl; * \endcode * will output (with SSE): \verbatim [0, 1, 2, 3] \endverbatim * * \param out Any standard C++ ostream object. For example std::cout or a * std::stringstream object. * \param v Any Vc::Vector object. * \return The ostream object: to chain multiple stream operations. * * \note With the GNU standard library this function will check whether the * output stream is a tty in which case it colorizes the output. */ template inline std::ostream &operator<<(std::ostream &out, const Vc::Vector &v) { using TT = typename std::conditional::value || std::is_same::value || std::is_same::value, int, T>::type; out << AnsiColor::green << '['; out << TT(v[0]); for (size_t i = 1; i < v.Size; ++i) { out << ", " << TT(v[i]); } out << ']' << AnsiColor::normal; return out; } /** * \ingroup Masks * \headerfile IO * * Prints the contents of a mask into a stream object. * * \code * const Vc::short_m m = Vc::short_v::IndexesFromZero() < 3; * std::cout << m << std::endl; * \endcode * will output (with SSE): \verbatim m[1110 0000] \endverbatim * * \param out Any standard C++ ostream object. For example std::cout or a * std::stringstream object. * \param m Any Vc::Mask object. * \return The ostream object: to chain multiple stream operations. * * \note With the GNU standard library this function will check whether the * output stream is a tty in which case it colorizes the output. */ template inline std::ostream &operator<<(std::ostream &out, const Vc::Mask &m) { out << AnsiColor::blue << "m["; for (unsigned int i = 0; i < m.Size; ++i) { if (i > 0 && (i % 4) == 0) { out << ' '; } if (m[i]) { out << AnsiColor::yellow << '1'; } else { out << AnsiColor::blue << '0'; } } out << AnsiColor::blue << ']' << AnsiColor::normal; return out; } namespace Common { #ifdef DOXYGEN /** * \ingroup Utilities * \headerfile dox.h * * Prints the contents of a Memory object into a stream object. * * \code * Vc::Memory m; * for (int i = 0; i < m.entriesCount(); ++i) { * m[i] = i; * } * std::cout << m << std::endl; * \endcode * will output (with SSE): \verbatim {[0, 1, 2, 3] [4, 5, 6, 7] [8, 9, 0, 0]} \endverbatim * * \param s Any standard C++ ostream object. For example std::cout or a std::stringstream object. * \param m Any Vc::Memory object. * \return The ostream object: to chain multiple stream operations. * * \note With the GNU standard library this function will check whether the * output stream is a tty in which case it colorizes the output. * * \warning Please do not forget that printing a large memory object can take a long time. */ template inline std::ostream &operator<<(std::ostream &s, const Vc::MemoryBase &m); #endif template inline std::ostream &operator<<(std::ostream &out, const MemoryBase &m ) { out << AnsiColor::blue << '{' << AnsiColor::normal; for (unsigned int i = 0; i < m.vectorsCount(); ++i) { out << V(m.vector(i)); } out << AnsiColor::blue << '}' << AnsiColor::normal; return out; } template inline std::ostream &operator<<(std::ostream &out, const MemoryBase &m ) { out << AnsiColor::blue << '{' << AnsiColor::normal; for (size_t i = 0; i < m.rowsCount(); ++i) { if (i > 0) { out << "\n "; } const size_t vcount = m[i].vectorsCount(); for (size_t j = 0; j < vcount; ++j) { out << V(m[i].vector(j)); } } out << AnsiColor::blue << '}' << AnsiColor::normal; return out; } } // namespace Common template inline std::ostream &operator<<(std::ostream &out, const SimdArray &v) { out << AnsiColor::green << '<' << v[0]; for (size_t i = 1; i < N; ++i) { if (i % 4 == 0) out << " |"; out << ' ' << v[i]; } return out << '>' << AnsiColor::normal; } template inline std::ostream &operator<<(std::ostream &out, const SimdMaskArray &m) { out << AnsiColor::blue << "«"; for (size_t i = 0; i < N; ++i) { if (i > 0 && (i % 4) == 0) { out << ' '; } if ( m[i] ) { out << AnsiColor::yellow << '1'; } else { out << AnsiColor::blue << '0'; } } return out << AnsiColor::blue << "»" << AnsiColor::normal; } } #endif // VC_IO_ // vim: ft=cpp foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/Memory000066400000000000000000000034601476554302100166150ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2009-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_MEMORY_ #define VC_MEMORY_ #include "vector.h" #include "common/memory.h" #include "common/interleavedmemory.h" #include "common/make_unique.h" namespace Vc_VERSIONED_NAMESPACE { using Common::make_unique; } #endif // VC_MEMORY_ // vim: ft=cpp foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/SimdArray000066400000000000000000000032411476554302100172350ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2013-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_SIMDARRAY_ #define VC_SIMDARRAY_ #include "common/simdarray.h" #endif // VC_SIMDARRAY_ // vim: ft=cpp foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/Utils000066400000000000000000000034621476554302100164470ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2010-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_UTILS_ #define VC_UTILS_ #include "global.h" #ifdef Vc_IMPL_Scalar # define VECTOR_NAMESPACE Scalar #else # define VECTOR_NAMESPACE SSE #endif #include "common/deinterleave.h" #include "common/makeContainer.h" #endif // VC_UTILS_ // vim: ft=cpp foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/Vc000066400000000000000000000034661476554302100157230ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2009-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_VC_ #define VC_VC_ #include "vector.h" #include "IO" #include "Memory" #include "Utils" #include "Allocator" #include "algorithm" #include "iterators" #include "simdize" #include "array" #include "span" #include "vector" #endif // VC_VC_ // vim: ft=cpp foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/algorithm000066400000000000000000000000371476554302100173300ustar00rootroot00000000000000#include "common/algorithms.h" conky-1.22.1/3rdparty/Vc/Vc/array000066400000000000000000000250651476554302100164700ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ //===---------------------------- array -----------------------------------===// // // The LLVM Compiler Infrastructure // // This file is dual licensed under the MIT and the University of Illinois Open // Source Licenses. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #ifndef VC_INCLUDE_VC_ARRAY_ #define VC_INCLUDE_VC_ARRAY_ #include #include #include #include #include #include "common/subscript.h" namespace Vc_VERSIONED_NAMESPACE { /** * \ingroup Containers * This is `std::array` with additional subscript operators supporting gather and scatter operations. * * The [std::array](https://en.cppreference.com/w/cpp/container/array) documentation applies. * * Gathers from structured data (AoS: arrays of struct) are possible via a special * subscript operator. * Example: * \code * Vc::array data; * std::iota(data.begin(), data.end(), 0.f); // fill with values 0, 1, 2, ... * auto indexes = float_v::IndexType::IndexesFromZero(); * float_v gathered = data[indexes]; // gathered == [0, 1, 2, ...] * \endcode * * This also works for gathers into arrays of structures: * \code * struct Point { float x, y, z; }; * Vc::array points; * // fill points ... * auto indexes = float_v::IndexType::IndexesFromZero(); * float_v xs = data[indexes][&Point::x]; // [points[0].x, points[1].x, points[2].x, ...] * float_v ys = data[indexes][&Point::y]; // [points[0].y, points[1].y, points[2].y, ...] * float_v zs = data[indexes][&Point::z]; // [points[0].z, points[1].z, points[2].z, ...] * \endcode * * Arrays may also be nested: * \code: * Vc::array, 100> points; * // fill points ... * auto indexes = float_v::IndexType::IndexesFromZero(); * float_v xs = data[indexes][0]; // [points[0][0], points[1][0], points[2][0], ...] * float_v ys = data[indexes][1]; // [points[0][1], points[1][1], points[2][1], ...] * float_v zs = data[indexes][2]; // [points[0][2], points[1][2], points[2][2], ...] * \endcode */ template struct array { // types: typedef array self_; typedef T value_type; typedef value_type& reference; typedef const value_type& const_reference; typedef value_type* iterator; typedef const value_type* const_iterator; typedef value_type* pointer; typedef const value_type* const_pointer; typedef size_t size_type; typedef ptrdiff_t difference_type; typedef std::reverse_iterator reverse_iterator; typedef std::reverse_iterator const_reverse_iterator; value_type elems_[Size > 0 ? Size : 1]; // No explicit construct/copy/destroy for aggregate type void fill(const value_type& u_) { std::fill_n(elems_, Size, u_); } void swap(array& a_) noexcept(std::swap(std::declval(), std::declval())) { std::swap_ranges(elems_, elems_ + Size, a_.elems_); } // iterators: iterator begin() noexcept { return iterator(elems_); } const_iterator begin() const noexcept { return const_iterator(elems_); } iterator end() noexcept { return iterator(elems_ + Size); } const_iterator end() const noexcept { return const_iterator(elems_ + Size); } reverse_iterator rbegin() noexcept { return reverse_iterator(end()); } const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); } reverse_iterator rend() noexcept { return reverse_iterator(begin()); } const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); } const_iterator cbegin() const noexcept { return begin(); } const_iterator cend() const noexcept { return end(); } const_reverse_iterator crbegin() const noexcept { return rbegin(); } const_reverse_iterator crend() const noexcept { return rend(); } // capacity: constexpr size_type size() const noexcept { return Size; } constexpr size_type max_size() const noexcept { return Size; } constexpr bool empty() const noexcept { return Size == 0; } // element access: reference operator[](size_type n_) { return elems_[n_]; } constexpr const_reference operator[](size_type n_) const { return elems_[n_]; } /** * \name Data-Parallel Subscripting for Gather & Scatter */ ///@{ template Vc_ALWAYS_INLINE auto operator[](I&& arg_) -> decltype(subscript_operator(*this, std::forward(arg_))) { return subscript_operator(*this, std::forward(arg_)); } template Vc_ALWAYS_INLINE auto operator[](I&& arg_) const -> decltype(subscript_operator(*this, std::forward(arg_))) { return subscript_operator(*this, std::forward(arg_)); } ///@} reference at(size_type n_); constexpr const_reference at(size_type n_) const; reference front() { return elems_[0]; } constexpr const_reference front() const { return elems_[0]; } reference back() { return elems_[Size > 0 ? Size - 1 : 0]; } constexpr const_reference back() const { return elems_[Size > 0 ? Size - 1 : 0]; } value_type* data() noexcept { return elems_; } const value_type* data() const noexcept { return elems_; } }; template typename array::reference array::at(size_type n_) { if (n_ >= Size) { throw std::out_of_range("array::at"); } return elems_[n_]; } template constexpr typename array::const_reference array::at(size_type n_) const { return n_ >= Size ? (throw std::out_of_range("array::at"), elems_[0]) : elems_[n_]; } template inline bool operator==(const array& x_, const array& y_) { return std::equal(x_.elems_, x_.elems_ + Size, y_.elems_); } template inline bool operator!=(const array& x_, const array& y_) { return !(x_ == y_); } template inline bool operator<(const array& x_, const array& y_) { return std::lexicographical_compare(x_.elems_, x_.elems_ + Size, y_.elems_, y_.elems_ + Size); } template inline bool operator>(const array& x_, const array& y_) { return y_ < x_; } template inline bool operator<=(const array& x_, const array& y_) { return !(y_ < x_); } template inline bool operator>=(const array& x_, const array& y_) { return !(x_ < y_); } /**\name non-member begin & end * Implement the non-member begin & end functions in the %Vc namespace so that ADL works * and `begin(some_vc_array)` always works. */ ///@{ template inline auto begin(array& arr) -> decltype(arr.begin()) { return arr.begin(); } template inline auto begin(const array& arr) -> decltype(arr.begin()) { return arr.begin(); } template inline auto end(array& arr) -> decltype(arr.end()) { return arr.end(); } template inline auto end(const array& arr) -> decltype(arr.end()) { return arr.end(); } ///@} namespace Traits { template struct has_no_allocated_data_impl> : public std::true_type { }; template struct has_contiguous_storage_impl> : public std::true_type { }; } // namespace Traits } // namespace Vc namespace std { template inline #ifdef Vc_MSVC // MSVC fails to do SFINAE correctly and gets totally confused: // error C2433: 'type': 'inline' not permitted on data declarations // error C4430: missing type specifier - int assumed. Note: C++ does not support default-int // error C2061: syntax error: identifier 'swap' void #else typename enable_if(), declval()))>::value, void>::type #endif swap(const Vc::array& x_, const Vc::array& y_) noexcept(swap(declval(), declval())) { x_.swap(y_); } template class tuple_size> : public integral_constant { }; template class tuple_element> { public: typedef T type; }; template inline constexpr typename std::enable_if<(I < Size), T&>::type get( Vc::array& a_) noexcept { return a_.elems_[I]; } template inline constexpr typename std::enable_if<(I < Size), const T&>::type get( const Vc::array& a_) noexcept { return a_.elems_[I]; } template inline constexpr typename std::enable_if<(I < Size), T&&>::type get( Vc::array&& a_) noexcept { return std::move(a_.elems_[I]); } } // namespace std #endif // VC_INCLUDE_VC_ARRAY_ // vim: ft=cpp foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/avx/000077500000000000000000000000001476554302100162155ustar00rootroot00000000000000conky-1.22.1/3rdparty/Vc/Vc/avx/casts.h000066400000000000000000000416731476554302100175160ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2009-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_AVX_CASTS_H_ #define VC_AVX_CASTS_H_ #include "intrinsics.h" #include "types.h" #include "../sse/casts.h" #include "shuffle.h" #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace AVX { namespace Casts { template Vc_INTRINSIC_L T avx_cast(__m128 v) Vc_INTRINSIC_R; template Vc_INTRINSIC_L T avx_cast(__m128i v) Vc_INTRINSIC_R; template Vc_INTRINSIC_L T avx_cast(__m128d v) Vc_INTRINSIC_R; template Vc_INTRINSIC_L T avx_cast(__m256 v) Vc_INTRINSIC_R; template Vc_INTRINSIC_L T avx_cast(__m256i v) Vc_INTRINSIC_R; template Vc_INTRINSIC_L T avx_cast(__m256d v) Vc_INTRINSIC_R; // 128 -> 128 template<> Vc_INTRINSIC __m128 avx_cast(__m128 v) { return v; } template<> Vc_INTRINSIC __m128 avx_cast(__m128i v) { return _mm_castsi128_ps(v); } template<> Vc_INTRINSIC __m128 avx_cast(__m128d v) { return _mm_castpd_ps(v); } template<> Vc_INTRINSIC __m128i avx_cast(__m128 v) { return _mm_castps_si128(v); } template<> Vc_INTRINSIC __m128i avx_cast(__m128i v) { return v; } template<> Vc_INTRINSIC __m128i avx_cast(__m128d v) { return _mm_castpd_si128(v); } template<> Vc_INTRINSIC __m128d avx_cast(__m128 v) { return _mm_castps_pd(v); } template<> Vc_INTRINSIC __m128d avx_cast(__m128i v) { return _mm_castsi128_pd(v); } template<> Vc_INTRINSIC __m128d avx_cast(__m128d v) { return v; } // 128 -> 256 // FIXME: the following casts leave the upper 128bits undefined. With GCC and ICC I've never // seen the cast not do what I want though: after a VEX-coded SSE instruction the register's // upper 128bits are zero. Thus using the same register as AVX register will have the upper // 128bits zeroed. MSVC, though, implements _mm256_castxx128_xx256 with a 128bit move to memory // + 256bit load. Thus the upper 128bits are really undefined. But there is no intrinsic to do // what I want (i.e. alias the register, disallowing the move to memory in-between). I'm stuck, // do we really want to rely on specific compiler behavior here? template<> Vc_INTRINSIC __m256 avx_cast(__m128 v) { return _mm256_castps128_ps256(v); } template<> Vc_INTRINSIC __m256 avx_cast(__m128i v) { return _mm256_castps128_ps256(_mm_castsi128_ps(v)); } template<> Vc_INTRINSIC __m256 avx_cast(__m128d v) { return _mm256_castps128_ps256(_mm_castpd_ps(v)); } template<> Vc_INTRINSIC __m256i avx_cast(__m128 v) { return _mm256_castsi128_si256(_mm_castps_si128(v)); } template<> Vc_INTRINSIC __m256i avx_cast(__m128i v) { return _mm256_castsi128_si256(v); } template<> Vc_INTRINSIC __m256i avx_cast(__m128d v) { return _mm256_castsi128_si256(_mm_castpd_si128(v)); } template<> Vc_INTRINSIC __m256d avx_cast(__m128 v) { return _mm256_castpd128_pd256(_mm_castps_pd(v)); } template<> Vc_INTRINSIC __m256d avx_cast(__m128i v) { return _mm256_castpd128_pd256(_mm_castsi128_pd(v)); } template<> Vc_INTRINSIC __m256d avx_cast(__m128d v) { return _mm256_castpd128_pd256(v); } #if defined Vc_MSVC || defined Vc_CLANG || defined Vc_APPLECLANG static Vc_INTRINSIC Vc_CONST __m256 zeroExtend(__m128 v) { return _mm256_permute2f128_ps (_mm256_castps128_ps256(v), _mm256_castps128_ps256(v), 0x80); } static Vc_INTRINSIC Vc_CONST __m256i zeroExtend(__m128i v) { return _mm256_permute2f128_si256(_mm256_castsi128_si256(v), _mm256_castsi128_si256(v), 0x80); } static Vc_INTRINSIC Vc_CONST __m256d zeroExtend(__m128d v) { return _mm256_permute2f128_pd (_mm256_castpd128_pd256(v), _mm256_castpd128_pd256(v), 0x80); } #else static Vc_INTRINSIC Vc_CONST __m256 zeroExtend(__m128 v) { return _mm256_castps128_ps256(v); } static Vc_INTRINSIC Vc_CONST __m256i zeroExtend(__m128i v) { return _mm256_castsi128_si256(v); } static Vc_INTRINSIC Vc_CONST __m256d zeroExtend(__m128d v) { return _mm256_castpd128_pd256(v); } #endif // 256 -> 128 template<> Vc_INTRINSIC __m128 avx_cast(__m256 v) { return _mm256_castps256_ps128(v); } template<> Vc_INTRINSIC __m128 avx_cast(__m256i v) { return _mm256_castps256_ps128(_mm256_castsi256_ps(v)); } template<> Vc_INTRINSIC __m128 avx_cast(__m256d v) { return _mm256_castps256_ps128(_mm256_castpd_ps(v)); } template<> Vc_INTRINSIC __m128i avx_cast(__m256 v) { return _mm256_castsi256_si128(_mm256_castps_si256(v)); } template<> Vc_INTRINSIC __m128i avx_cast(__m256i v) { return _mm256_castsi256_si128(v); } template<> Vc_INTRINSIC __m128i avx_cast(__m256d v) { return _mm256_castsi256_si128(_mm256_castpd_si256(v)); } template<> Vc_INTRINSIC __m128d avx_cast(__m256 v) { return _mm256_castpd256_pd128(_mm256_castps_pd(v)); } template<> Vc_INTRINSIC __m128d avx_cast(__m256i v) { return _mm256_castpd256_pd128(_mm256_castsi256_pd(v)); } template<> Vc_INTRINSIC __m128d avx_cast(__m256d v) { return _mm256_castpd256_pd128(v); } // 256 -> 256 template<> Vc_INTRINSIC __m256 avx_cast(__m256 v) { return v; } template<> Vc_INTRINSIC __m256 avx_cast(__m256i v) { return _mm256_castsi256_ps(v); } template<> Vc_INTRINSIC __m256 avx_cast(__m256d v) { return _mm256_castpd_ps(v); } template<> Vc_INTRINSIC __m256i avx_cast(__m256 v) { return _mm256_castps_si256(v); } template<> Vc_INTRINSIC __m256i avx_cast(__m256i v) { return v; } template<> Vc_INTRINSIC __m256i avx_cast(__m256d v) { return _mm256_castpd_si256(v); } template<> Vc_INTRINSIC __m256d avx_cast(__m256 v) { return _mm256_castps_pd(v); } template<> Vc_INTRINSIC __m256d avx_cast(__m256i v) { return _mm256_castsi256_pd(v); } template<> Vc_INTRINSIC __m256d avx_cast(__m256d v) { return v; } // simplify splitting 256-bit registers in 128-bit registers Vc_INTRINSIC Vc_CONST __m128 lo128(__m256 v) { return avx_cast<__m128>(v); } Vc_INTRINSIC Vc_CONST __m128d lo128(__m256d v) { return avx_cast<__m128d>(v); } Vc_INTRINSIC Vc_CONST __m128i lo128(__m256i v) { return avx_cast<__m128i>(v); } Vc_INTRINSIC Vc_CONST __m128 hi128(__m256 v) { return extract128<1>(v); } Vc_INTRINSIC Vc_CONST __m128d hi128(__m256d v) { return extract128<1>(v); } Vc_INTRINSIC Vc_CONST __m128i hi128(__m256i v) { return extract128<1>(v); } // simplify combining 128-bit registers in 256-bit registers Vc_INTRINSIC Vc_CONST __m256 concat(__m128 a, __m128 b) { return insert128<1>(avx_cast<__m256 >(a), b); } Vc_INTRINSIC Vc_CONST __m256d concat(__m128d a, __m128d b) { return insert128<1>(avx_cast<__m256d>(a), b); } Vc_INTRINSIC Vc_CONST __m256i concat(__m128i a, __m128i b) { return insert128<1>(avx_cast<__m256i>(a), b); } } // namespace Casts using namespace Casts; } // namespace AVX namespace AVX2 { using namespace AVX::Casts; } // namespace AVX2 namespace AVX { template struct ConvertTag {}; Vc_INTRINSIC __m256i convert(__m256 v, ConvertTag) { return _mm256_cvttps_epi32(v); } Vc_INTRINSIC __m128i convert(__m256d v, ConvertTag) { return _mm256_cvttpd_epi32(v); } Vc_INTRINSIC __m256i convert(__m256i v, ConvertTag) { return v; } Vc_INTRINSIC __m256i convert(__m256i v, ConvertTag) { return v; } Vc_INTRINSIC __m256i convert(__m128i v, ConvertTag) { #ifdef Vc_IMPL_AVX2 return _mm256_cvtepi16_epi32(v); #else return AVX::srai_epi32<16>( concat(_mm_unpacklo_epi16(v, v), _mm_unpackhi_epi16(v, v))); #endif } Vc_INTRINSIC __m256i convert(__m128i v, ConvertTag) { #ifdef Vc_IMPL_AVX2 return _mm256_cvtepu16_epi32(v); #else return AVX::srli_epi32<16>( concat(_mm_unpacklo_epi16(v, v), _mm_unpackhi_epi16(v, v))); #endif } Vc_INTRINSIC __m256i convert(__m256 v, ConvertTag) { using namespace AVX; return _mm256_castps_si256(_mm256_blendv_ps( _mm256_castsi256_ps(_mm256_cvttps_epi32(v)), _mm256_castsi256_ps(add_epi32(_mm256_cvttps_epi32(_mm256_sub_ps(v, set2power31_ps())), set2power31_epu32())), cmpge_ps(v, set2power31_ps()))); } Vc_INTRINSIC __m128i convert(__m256d v, ConvertTag) { using namespace AVX; return _mm_xor_si128( _mm256_cvttpd_epi32(_mm256_sub_pd(_mm256_floor_pd(v), set1_pd(0x80000000u))), _mm_set2power31_epu32()); } Vc_INTRINSIC __m256i convert(__m256i v, ConvertTag) { return v; } Vc_INTRINSIC __m256i convert(__m256i v, ConvertTag) { return v; } Vc_INTRINSIC __m256i convert(__m128i v, ConvertTag) { #ifdef Vc_IMPL_AVX2 return _mm256_cvtepi16_epi32(v); #else return AVX::srai_epi32<16>( concat(_mm_unpacklo_epi16(v, v), _mm_unpackhi_epi16(v, v))); #endif } Vc_INTRINSIC __m256i convert(__m128i v, ConvertTag) { #ifdef Vc_IMPL_AVX2 return _mm256_cvtepu16_epi32(v); #else return AVX::srli_epi32<16>( concat(_mm_unpacklo_epi16(v, v), _mm_unpackhi_epi16(v, v))); #endif } Vc_INTRINSIC __m256 convert(__m256 v, ConvertTag) { return v; } Vc_INTRINSIC __m128 convert(__m256d v, ConvertTag) { return _mm256_cvtpd_ps(v); } Vc_INTRINSIC __m256 convert(__m256i v, ConvertTag) { return _mm256_cvtepi32_ps(v); } Vc_INTRINSIC __m256 convert(__m256i v, ConvertTag) { // this is complicated because cvtepi32_ps only supports signed input. Thus, all // input values with the MSB set would produce a negative result. We can reuse the // cvtepi32_ps instruction if we unset the MSB. But then the rounding results can be // different. Since float uses 24 bits for the mantissa (effectively), the 9-bit LSB // determines the rounding direction. (Consider the bits ...8'7654'3210. The bits [0:7] // need to be dropped and if > 0x80 round up, if < 0x80 round down. If [0:7] == 0x80 // then the rounding direction is determined by bit [8] for round to even. That's why // the 9th bit is relevant for the rounding decision.) // If the MSB of the input is set to 0, the cvtepi32_ps instruction makes its rounding // decision on the lowest 8 bits instead. A second rounding decision is made when // float(0x8000'0000) is added. This will rarely fix the rounding issue. // // Here's what the standard rounding mode expects: // 0xc0000080 should cvt to 0xc0000000 // 0xc0000081 should cvt to 0xc0000100 // -- should cvt to 0xc0000100 // 0xc000017f should cvt to 0xc0000100 // 0xc0000180 should cvt to 0xc0000200 // // However: using float(input ^ 0x8000'0000) + float(0x8000'0000) we get: // 0xc0000081 would cvt to 0xc0000000 // 0xc00000c0 would cvt to 0xc0000000 // 0xc00000c1 would cvt to 0xc0000100 // 0xc000013f would cvt to 0xc0000100 // 0xc0000140 would cvt to 0xc0000200 // // Solution: float(input & 0x7fff'fe00) + (float(0x8000'0000) + float(input & 0x1ff)) // This ensures the rounding decision is made on the 9-bit LSB when 0x8000'0000 is // added to the float value of the low 8 bits of the input. using namespace AVX; return _mm256_blendv_ps( _mm256_cvtepi32_ps(v), _mm256_add_ps(_mm256_cvtepi32_ps(and_si256(v, set1_epi32(0x7ffffe00))), _mm256_add_ps(set2power31_ps(), _mm256_cvtepi32_ps(and_si256( v, set1_epi32(0x000001ff))))), _mm256_castsi256_ps(cmplt_epi32(v, _mm256_setzero_si256()))); } Vc_INTRINSIC __m256 convert(__m128i v, ConvertTag) { return _mm256_cvtepi32_ps(convert(v, ConvertTag< short, int>())); } Vc_INTRINSIC __m256 convert(__m128i v, ConvertTag) { return _mm256_cvtepi32_ps(convert(v, ConvertTag())); } Vc_INTRINSIC __m256d convert(__m128 v, ConvertTag) { return _mm256_cvtps_pd(v); } Vc_INTRINSIC __m256d convert(__m256d v, ConvertTag) { return v; } Vc_INTRINSIC __m256d convert(__m128i v, ConvertTag) { return _mm256_cvtepi32_pd(v); } Vc_INTRINSIC __m256d convert(__m128i v, ConvertTag) { using namespace AVX; return _mm256_add_pd( _mm256_cvtepi32_pd(_mm_xor_si128(v, _mm_setmin_epi32())), set1_pd(1u << 31)); } Vc_INTRINSIC __m256d convert(__m128i v, ConvertTag) { return convert(convert(v, SSE::ConvertTag< short, int>()), ConvertTag()); } Vc_INTRINSIC __m256d convert(__m128i v, ConvertTag) { return convert(convert(v, SSE::ConvertTag()), ConvertTag()); } Vc_INTRINSIC __m128i convert(__m256i v, ConvertTag) { #ifdef Vc_IMPL_AVX2 auto a = _mm256_shuffle_epi8( v, _mm256_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, 0, 1, 4, 5, 8, 9, 12, 13, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80)); return lo128(_mm256_permute4x64_epi64(a, 0xf8)); // a[0] a[2] | a[3] a[3] #else const auto tmp0 = _mm_unpacklo_epi16(lo128(v), hi128(v)); const auto tmp1 = _mm_unpackhi_epi16(lo128(v), hi128(v)); const auto tmp2 = _mm_unpacklo_epi16(tmp0, tmp1); const auto tmp3 = _mm_unpackhi_epi16(tmp0, tmp1); return _mm_unpacklo_epi16(tmp2, tmp3); #endif } Vc_INTRINSIC __m128i convert(__m256i v, ConvertTag) { return convert(v, ConvertTag()); } Vc_INTRINSIC __m128i convert(__m256 v, ConvertTag) { return convert(convert(v, ConvertTag()), ConvertTag()); } Vc_INTRINSIC __m128i convert(__m256d v, ConvertTag) { return convert(convert(v, ConvertTag()), SSE::ConvertTag()); } Vc_INTRINSIC __m256i convert(__m256i v, ConvertTag) { return v; } Vc_INTRINSIC __m256i convert(__m256i v, ConvertTag) { return v; } Vc_INTRINSIC __m128i convert(__m256i v, ConvertTag) { auto tmp0 = _mm_unpacklo_epi16(lo128(v), hi128(v)); auto tmp1 = _mm_unpackhi_epi16(lo128(v), hi128(v)); auto tmp2 = _mm_unpacklo_epi16(tmp0, tmp1); auto tmp3 = _mm_unpackhi_epi16(tmp0, tmp1); return _mm_unpacklo_epi16(tmp2, tmp3); } Vc_INTRINSIC __m128i convert(__m256i v, ConvertTag) { auto tmp0 = _mm_unpacklo_epi16(lo128(v), hi128(v)); auto tmp1 = _mm_unpackhi_epi16(lo128(v), hi128(v)); auto tmp2 = _mm_unpacklo_epi16(tmp0, tmp1); auto tmp3 = _mm_unpackhi_epi16(tmp0, tmp1); return _mm_unpacklo_epi16(tmp2, tmp3); } Vc_INTRINSIC __m128i convert(__m256 v, ConvertTag) { return convert(convert(v, ConvertTag()), ConvertTag()); } Vc_INTRINSIC __m128i convert(__m256d v, ConvertTag) { return convert(convert(v, ConvertTag()), SSE::ConvertTag()); } Vc_INTRINSIC __m256i convert(__m256i v, ConvertTag) { return v; } Vc_INTRINSIC __m256i convert(__m256i v, ConvertTag) { return v; } template Vc_INTRINSIC auto convert( typename std::conditional<(sizeof(From) < sizeof(To)), typename SSE::VectorTraits::VectorType, typename AVX::VectorTypeHelper::Type>::type v) -> decltype(convert(v, ConvertTag())) { return convert(v, ConvertTag()); } template > Vc_INTRINSIC auto convert(typename AVX::VectorTypeHelper::Type v) -> decltype(convert(lo128(v), ConvertTag())) { return convert(lo128(v), ConvertTag()); } } // namespace AVX } // namespace Vc #endif // VC_AVX_CASTS_H_ conky-1.22.1/3rdparty/Vc/Vc/avx/const.h000066400000000000000000000173711476554302100175250ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2009-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_AVX_CONST_H_ #define VC_AVX_CONST_H_ #include #include "types.h" #include "const_data.h" #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace AVX { template struct IndexesFromZeroData; template<> struct IndexesFromZeroData { static Vc_ALWAYS_INLINE Vc_CONST const int *address() { return reinterpret_cast(&_IndexesFromZero32[0]); } }; template<> struct IndexesFromZeroData { static Vc_ALWAYS_INLINE Vc_CONST const unsigned int *address() { return &_IndexesFromZero32[0]; } }; template<> struct IndexesFromZeroData { static Vc_ALWAYS_INLINE Vc_CONST const short *address() { return reinterpret_cast(&_IndexesFromZero16[0]); } }; template<> struct IndexesFromZeroData { static Vc_ALWAYS_INLINE Vc_CONST const unsigned short *address() { return &_IndexesFromZero16[0]; } }; template<> struct IndexesFromZeroData { static Vc_ALWAYS_INLINE Vc_CONST const signed char *address() { return reinterpret_cast(&_IndexesFromZero8[0]); } }; template<> struct IndexesFromZeroData { static Vc_ALWAYS_INLINE Vc_CONST const char *address() { return reinterpret_cast(&_IndexesFromZero8[0]); } }; template<> struct IndexesFromZeroData { static Vc_ALWAYS_INLINE Vc_CONST const unsigned char *address() { return &_IndexesFromZero8[0]; } }; template struct Const { typedef Vector<_T> V; typedef typename V::EntryType T; typedef typename V::Mask M; static Vc_ALWAYS_INLINE Vc_CONST V _pi_4() { return V(c_trig::data[0]); } static Vc_ALWAYS_INLINE Vc_CONST V _pi_4_hi() { return V(c_trig::data[1]); } static Vc_ALWAYS_INLINE Vc_CONST V _pi_4_rem1() { return V(c_trig::data[2]); } static Vc_ALWAYS_INLINE Vc_CONST V _pi_4_rem2() { return V(c_trig::data[3]); } static Vc_ALWAYS_INLINE Vc_CONST V _1_16() { return V(c_trig::data[4]); } static Vc_ALWAYS_INLINE Vc_CONST V _16() { return V(c_trig::data[5]); } static Vc_ALWAYS_INLINE Vc_CONST V atanP(int i) { return V(c_trig::data[(12 + i)]); } static Vc_ALWAYS_INLINE Vc_CONST V atanQ(int i) { return V(c_trig::data[(17 + i)]); } static Vc_ALWAYS_INLINE Vc_CONST V atanThrsHi() { return V(c_trig::data[22]); } static Vc_ALWAYS_INLINE Vc_CONST V atanThrsLo() { return V(c_trig::data[23]); } static Vc_ALWAYS_INLINE Vc_CONST V _pi_2_rem() { return V(c_trig::data[24]); } static Vc_ALWAYS_INLINE Vc_CONST V lossThreshold() { return V(c_trig::data[8]); } static Vc_ALWAYS_INLINE Vc_CONST V _4_pi() { return V(c_trig::data[9]); } static Vc_ALWAYS_INLINE Vc_CONST V _pi_2() { return V(c_trig::data[10]); } static Vc_ALWAYS_INLINE Vc_CONST V _pi() { return V(c_trig::data[11]); } static Vc_ALWAYS_INLINE Vc_CONST V asinCoeff0(int i) { return V(c_trig::data[(28 + i)]); } static Vc_ALWAYS_INLINE Vc_CONST V asinCoeff1(int i) { return V(c_trig::data[(33 + i)]); } static Vc_ALWAYS_INLINE Vc_CONST V asinCoeff2(int i) { return V(c_trig::data[(37 + i)]); } static Vc_ALWAYS_INLINE Vc_CONST V asinCoeff3(int i) { return V(c_trig::data[(43 + i)]); } static Vc_ALWAYS_INLINE Vc_CONST V smallAsinInput() { return V(c_trig::data[25]); } static Vc_ALWAYS_INLINE Vc_CONST V largeAsinInput() { return V(c_trig::data[26]); } static Vc_ALWAYS_INLINE Vc_CONST M exponentMask() { return M(V(c_log::d(1)).data()); } static Vc_ALWAYS_INLINE Vc_CONST V _1_2() { return V(c_log::d(18)); } static Vc_ALWAYS_INLINE Vc_CONST V _1_sqrt2() { return V(c_log::d(15)); } static Vc_ALWAYS_INLINE Vc_CONST V P(int i) { return V(c_log::d(2 + i)); } static Vc_ALWAYS_INLINE Vc_CONST V Q(int i) { return V(c_log::d(8 + i)); } static Vc_ALWAYS_INLINE Vc_CONST V min() { return V(c_log::d(14)); } static Vc_ALWAYS_INLINE Vc_CONST V ln2_small() { return V(c_log::d(17)); } static Vc_ALWAYS_INLINE Vc_CONST V ln2_large() { return V(c_log::d(16)); } static Vc_ALWAYS_INLINE Vc_CONST V neginf() { return V(c_log::d(13)); } static Vc_ALWAYS_INLINE Vc_CONST V log10_e() { return V(c_log::d(19)); } static Vc_ALWAYS_INLINE Vc_CONST V log2_e() { return V(c_log::d(20)); } static Vc_ALWAYS_INLINE_L Vc_CONST_L V highMask() Vc_ALWAYS_INLINE_R Vc_CONST_R; static Vc_ALWAYS_INLINE_L Vc_CONST_L V highMask(int bits) Vc_ALWAYS_INLINE_R Vc_CONST_R; }; template <> Vc_ALWAYS_INLINE Vc_CONST Vector Const::highMask() { return _mm256_broadcast_ss( reinterpret_cast(&c_general::highMaskFloat)); } template <> Vc_ALWAYS_INLINE Vc_CONST Vector Const::highMask() { return _mm256_broadcast_sd( reinterpret_cast(&c_general::highMaskDouble)); } template <> Vc_ALWAYS_INLINE Vc_CONST Vector Const::highMask(int bits) { #ifdef Vc_IMPL_AVX2 #if defined Vc_ICC || defined Vc_MSVC __m256i allone = _mm256_set1_epi64x(~0); #else auto allone = ~__m256i(); #endif return _mm256_castsi256_ps(_mm256_slli_epi32(allone, bits)); #else __m128 tmp = _mm_castsi128_ps(_mm_slli_epi32(_mm_setallone_si128(), bits)); return concat(tmp, tmp); #endif } template <> Vc_ALWAYS_INLINE Vc_CONST Vector Const::highMask(int bits) { #ifdef Vc_IMPL_AVX2 #if defined Vc_ICC || defined Vc_MSVC __m256i allone = _mm256_set1_epi64x(~0); #else auto allone = ~__m256i(); #endif return _mm256_castsi256_pd(_mm256_slli_epi64(allone, bits)); #else __m128d tmp = _mm_castsi128_pd(_mm_slli_epi64(_mm_setallone_si128(), bits)); return concat(tmp, tmp); #endif } } // namespace AVX namespace AVX2 { using AVX::IndexesFromZeroData; using AVX::Const; } // namespace AVX2 } // namespace Vc #endif // VC_AVX_CONST_H_ conky-1.22.1/3rdparty/Vc/Vc/avx/const_data.h000066400000000000000000000067561476554302100205230ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2012-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_AVX_CONST_DATA_H_ #define VC_AVX_CONST_DATA_H_ #include "../common/data.h" #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace AVX { alignas(64) extern const unsigned int _IndexesFromZero32[ 8]; alignas(16) extern const unsigned short _IndexesFromZero16[16]; alignas(16) extern const unsigned char _IndexesFromZero8 [32]; struct alignas(64) c_general { static const float oneFloat; static const unsigned int absMaskFloat[2]; static const unsigned int signMaskFloat[2]; static const unsigned int highMaskFloat; static const unsigned short minShort[2]; static const unsigned short one16[2]; static const float _2power31; static const double oneDouble; static const unsigned long long frexpMask; static const unsigned long long highMaskDouble; }; template struct c_trig { alignas(64) static const T data[]; }; #ifndef Vc_MSVC template <> alignas(64) const float c_trig::data[]; template <> alignas(64) const double c_trig::data[]; #endif template struct c_log { typedef float floatAlias Vc_MAY_ALIAS; static Vc_ALWAYS_INLINE float d(int i) { return *reinterpret_cast(&data[i]); } alignas(64) static const unsigned int data[21]; }; #ifndef Vc_MSVC template<> alignas(64) const unsigned int c_log::data[21]; #endif template<> struct c_log { enum VectorSize { Size = 16 / sizeof(double) }; typedef double doubleAlias Vc_MAY_ALIAS; static Vc_ALWAYS_INLINE double d(int i) { return *reinterpret_cast(&data[i]); } alignas(64) static const unsigned long long data[21]; }; } // namespace AVX } // namespace Vc namespace Vc_VERSIONED_NAMESPACE { namespace AVX2 { using AVX::_IndexesFromZero8; using AVX::_IndexesFromZero16; using AVX::_IndexesFromZero32; using AVX::c_general; using AVX::c_trig; using AVX::c_log; } // namespace AVX2 } // namespace Vc #endif // VC_AVX_CONST_DATA_H_ conky-1.22.1/3rdparty/Vc/Vc/avx/debug.h000066400000000000000000000100771476554302100174610ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2011-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_AVX_DEBUG_H_ #define VC_AVX_DEBUG_H_ #ifndef NDEBUG #include "vector.h" #include #include #endif namespace Vc_VERSIONED_NAMESPACE { namespace AVX { template struct AddType { const U &d; }; template AddType addType(const U &x) { return {x}; } #ifdef NDEBUG class DebugStream { public: DebugStream(const char *, const char *, int) {} template inline DebugStream &operator<<(const T &) { return *this; } }; #else class DebugStream { private: template static void printVector(V _x) { enum { Size = sizeof(V) / sizeof(T) }; union { V v; T m[Size]; } x = { _x }; std::cerr << '[' << std::setprecision(24) << x.m[0]; for (int i = 1; i < Size; ++i) { std::cerr << ", " << std::setprecision(24) << x.m[i]; } std::cerr << ']'; } public: DebugStream(const char *func, const char *file, int line) { std::cerr << "\033[1;40;33mDEBUG: " << file << ':' << line << ' ' << func << ' '; } template DebugStream &operator<<(const T &x) { std::cerr << x; return *this; } template DebugStream &operator<<(AddType &&x) { printVector(x.d); return *this; } DebugStream &operator<<(__m128 x) { printVector(x); return *this; } DebugStream &operator<<(__m256 x) { printVector(x); return *this; } DebugStream &operator<<(__m128d x) { printVector(x); return *this; } DebugStream &operator<<(__m256d x) { printVector(x); return *this; } DebugStream &operator<<(__m128i x) { printVector(x); return *this; } DebugStream &operator<<(__m256i x) { printVector(x); return *this; } ~DebugStream() { std::cerr << "\033[0m" << std::endl; } }; #endif #ifdef Vc_DEBUG #undef Vc_DEBUG #endif #ifdef Vc_MSVC #define Vc_DEBUG Vc::AVX::DebugStream(__FUNCSIG__, __FILE__, __LINE__) #else #define Vc_DEBUG Vc::AVX::DebugStream(__PRETTY_FUNCTION__, __FILE__, __LINE__) #endif } // namespace AVX } // namespace Vc #endif // VC_AVX_DEBUG_H_ conky-1.22.1/3rdparty/Vc/Vc/avx/deinterleave.tcc000066400000000000000000000302621476554302100213620ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2010-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ namespace Vc_VERSIONED_NAMESPACE { namespace AVX2 { inline void deinterleave(double_v &Vc_RESTRICT a, double_v &Vc_RESTRICT b, double_v &Vc_RESTRICT c) { // estimated latency (AVX): 4.5 cycles const m256d tmp0 = Mem::shuffle128(a.data(), b.data()); const m256d tmp1 = Mem::shuffle128(a.data(), c.data()); const m256d tmp2 = Mem::shuffle128(b.data(), c.data()); a.data() = Mem::shuffle(tmp0, tmp1); b.data() = Mem::shuffle(tmp0, tmp2); c.data() = Mem::shuffle(tmp1, tmp2); } inline void deinterleave(float_v &Vc_RESTRICT a, float_v &Vc_RESTRICT b, float_v &Vc_RESTRICT c) { // abc abc abc // a = [a0 b0 c0 a1 b1 c1 a2 b2] 332 = 211+121 // b = [c2 a3 b3 c3 a4 b4 c4 a5] 323 = 112+211 // c = [b5 c5 a6 b6 c6 a7 b7 c7] 233 = 121+112 const m256 ac0 = Mem::shuffle128(a.data(), c.data()); // a0 b0 c0 a1 b5 c5 a6 b6 const m256 ac1 = Mem::shuffle128(a.data(), c.data()); // b1 c1 a2 b2 c6 a7 b7 c7 m256 tmp0 = Mem::blend( ac0, b.data()); tmp0 = Mem::blend(tmp0, ac1); // a0 a3 a2 a1 a4 a7 a6 a5 m256 tmp1 = Mem::blend( ac0, b.data()); tmp1 = Mem::blend(tmp1, ac1); // b1 b0 b3 b2 b5 b4 b7 b6 m256 tmp2 = Mem::blend( ac0, b.data()); tmp2 = Mem::blend(tmp2, ac1); // c2 c1 c0 c3 c6 c5 c4 c7 a.data() = Mem::permute(tmp0); b.data() = Mem::permute(tmp1); c.data() = Mem::permute(tmp2); } inline void deinterleave(int_v &Vc_RESTRICT a, int_v &Vc_RESTRICT b, int_v &Vc_RESTRICT c) { deinterleave(reinterpret_cast(a), reinterpret_cast(b), reinterpret_cast(c)); } inline void deinterleave(uint_v &Vc_RESTRICT a, uint_v &Vc_RESTRICT b, uint_v &Vc_RESTRICT c) { deinterleave(reinterpret_cast(a), reinterpret_cast(b), reinterpret_cast(c)); } inline void deinterleave(Vector &Vc_RESTRICT , Vector &Vc_RESTRICT , Vector &Vc_RESTRICT ) { return; /* TODO: // abc abc abc // a = [a0 b0 c0 a1 b1 c1 a2 b2] 332 = 211+121 // b = [c2 a3 b3 c3 a4 b4 c4 a5] 323 = 112+211 // c = [b5 c5 a6 b6 c6 a7 b7 c7] 233 = 121+112 m128i ac0 = _mm_unpacklo_epi64(a.data(), c.data()); // a0 b0 c0 a1 b5 c5 a6 b6 m128i ac1 = _mm_unpackhi_epi64(a.data(), c.data()); // b1 c1 a2 b2 c6 a7 b7 c7 m128i tmp0 = Mem::blend( ac0, b.data()); tmp0 = Mem::blend(tmp0, ac1); // a0 a3 a2 a1 a4 a7 a6 a5 m128i tmp1 = Mem::blend( ac0, b.data()); tmp1 = Mem::blend(tmp1, ac1); // b1 b0 b3 b2 b5 b4 b7 b6 m128i tmp2 = Mem::blend( ac0, b.data()); tmp2 = Mem::blend(tmp2, ac1); // c2 c1 c0 c3 c6 c5 c4 c7 a.data() = Mem::permuteHi(Mem::permuteLo(tmp0)); b.data() = Mem::permuteHi(Mem::permuteLo(tmp1)); c.data() = Mem::permuteHi(Mem::permuteLo(tmp2)); */ } inline void deinterleave(Vector &Vc_RESTRICT a, Vector &Vc_RESTRICT b, Vector &Vc_RESTRICT c) { deinterleave(reinterpret_cast &>(a), reinterpret_cast &>(b), reinterpret_cast &>(c)); } inline void deinterleave(Vector &a, Vector &b) { // a7 a6 a5 a4 a3 a2 a1 a0 // b7 b6 b5 b4 b3 b2 b1 b0 const m256 tmp0 = Reg::permute128(a.data(), b.data()); // b3 b2 b1 b0 a3 a2 a1 a0 const m256 tmp1 = Reg::permute128(a.data(), b.data()); // b7 b6 b5 b4 a7 a6 a5 a4 const m256 tmp2 = _mm256_unpacklo_ps(tmp0, tmp1); // b5 b1 b4 b0 a5 a1 a4 a0 const m256 tmp3 = _mm256_unpackhi_ps(tmp0, tmp1); // b7 b3 b6 b2 a7 a3 a6 a2 a.data() = _mm256_unpacklo_ps(tmp2, tmp3); // b6 b4 b2 b0 a6 a4 a2 a0 b.data() = _mm256_unpackhi_ps(tmp2, tmp3); // b7 b5 b3 b1 a7 a5 a3 a1 } inline void deinterleave(Vector &a, // a0 b0 a1 b1 a2 b2 a3 b3 | a4 b4 a5 ... Vector &b) // a8 b8 a9 ... { auto v0 = Mem::shuffle128(a.data(), b.data()); auto v1 = Mem::shuffle128(a.data(), b.data()); auto v2 = AVX::unpacklo_epi16(v0, v1); // a0 a4 ... auto v3 = AVX::unpackhi_epi16(v0, v1); // a2 a6 ... v0 = AVX::unpacklo_epi16(v2, v3); // a0 a2 ... v1 = AVX::unpackhi_epi16(v2, v3); // a1 a3 ... a.data() = AVX::unpacklo_epi16(v0, v1); // a0 a1 ... b.data() = AVX::unpackhi_epi16(v0, v1); // b0 b1 ... } inline void deinterleave(Vector &a, Vector &b) { auto v0 = Mem::shuffle128(a.data(), b.data()); auto v1 = Mem::shuffle128(a.data(), b.data()); auto v2 = AVX::unpacklo_epi16(v0, v1); // a0 a4 ... auto v3 = AVX::unpackhi_epi16(v0, v1); // a2 a6 ... v0 = AVX::unpacklo_epi16(v2, v3); // a0 a2 ... v1 = AVX::unpackhi_epi16(v2, v3); // a1 a3 ... a.data() = AVX::unpacklo_epi16(v0, v1); // a0 a1 ... b.data() = AVX::unpackhi_epi16(v0, v1); // b0 b1 ... } } // namespace AVX2 namespace Detail { template inline void deinterleave(AVX2::float_v &a, AVX2::float_v &b, const float *m, Flags align) { a.load(m, align); b.load(m + AVX2::float_v::Size, align); Vc::AVX2::deinterleave(a, b); } template inline void deinterleave(AVX2::float_v &a, AVX2::float_v &b, const short *m, Flags f) { using namespace Vc::AVX2; const auto tmp = Detail::load32(m, f); a.data() = _mm256_cvtepi32_ps(concat(_mm_srai_epi32(_mm_slli_epi32(lo128(tmp), 16), 16), _mm_srai_epi32(_mm_slli_epi32(hi128(tmp), 16), 16))); b.data() = _mm256_cvtepi32_ps( concat(_mm_srai_epi32(lo128(tmp), 16), _mm_srai_epi32(hi128(tmp), 16))); } template inline void deinterleave(AVX2::float_v &a, AVX2::float_v &b, const unsigned short *m, Flags f) { using namespace Vc::AVX2; const auto tmp = Detail::load32(m, f); a.data() = _mm256_cvtepi32_ps( concat(_mm_blend_epi16(lo128(tmp), _mm_setzero_si128(), 0xaa), _mm_blend_epi16(hi128(tmp), _mm_setzero_si128(), 0xaa))); b.data() = _mm256_cvtepi32_ps( concat(_mm_srli_epi32(lo128(tmp), 16), _mm_srli_epi32(hi128(tmp), 16))); } template inline void deinterleave(AVX2::double_v &a, AVX2::double_v &b, const double *m, Flags align) { using namespace Vc::AVX2; a.load(m, align); b.load(m + AVX2::double_v::Size, align); m256d tmp0 = Mem::shuffle128(a.data(), b.data()); // b1 b0 a1 a0 m256d tmp1 = Mem::shuffle128(a.data(), b.data()); // b3 b2 a3 a2 a.data() = _mm256_unpacklo_pd(tmp0, tmp1); // b2 b0 a2 a0 b.data() = _mm256_unpackhi_pd(tmp0, tmp1); // b3 b1 a3 a1 } template inline void deinterleave(AVX2::int_v &a, AVX2::int_v &b, const int *m, Flags align) { using namespace AVX; a.load(m, align); b.load(m + AVX2::int_v::Size, align); const m256 tmp0 = avx_cast(Mem::shuffle128(a.data(), b.data())); const m256 tmp1 = avx_cast(Mem::shuffle128(a.data(), b.data())); const m256 tmp2 = _mm256_unpacklo_ps(tmp0, tmp1); // b5 b1 b4 b0 a5 a1 a4 a0 const m256 tmp3 = _mm256_unpackhi_ps(tmp0, tmp1); // b7 b3 b6 b2 a7 a3 a6 a2 a.data() = avx_cast(_mm256_unpacklo_ps(tmp2, tmp3)); // b6 b4 b2 b0 a6 a4 a2 a0 b.data() = avx_cast(_mm256_unpackhi_ps(tmp2, tmp3)); // b7 b5 b3 b1 a7 a5 a3 a1 } template inline void deinterleave(AVX2::int_v &a, AVX2::int_v &b, const short *m, Flags f) { using namespace Vc::AVX; const AVX2::short_v tmp0(m, f); const m256i tmp = tmp0.data(); a.data() = concat( _mm_srai_epi32(_mm_slli_epi32(lo128(tmp), 16), 16), _mm_srai_epi32(_mm_slli_epi32(hi128(tmp), 16), 16)); b.data() = concat( _mm_srai_epi32(lo128(tmp), 16), _mm_srai_epi32(hi128(tmp), 16)); } template inline void deinterleave(AVX2::uint_v &a, AVX2::uint_v &b, const unsigned int *m, Flags align) { using namespace AVX; a.load(m, align); b.load(m + AVX2::uint_v::Size, align); const m256 tmp0 = avx_cast(Mem::shuffle128(a.data(), b.data())); const m256 tmp1 = avx_cast(Mem::shuffle128(a.data(), b.data())); const m256 tmp2 = _mm256_unpacklo_ps(tmp0, tmp1); // b5 b1 b4 b0 a5 a1 a4 a0 const m256 tmp3 = _mm256_unpackhi_ps(tmp0, tmp1); // b7 b3 b6 b2 a7 a3 a6 a2 a.data() = avx_cast(_mm256_unpacklo_ps(tmp2, tmp3)); // b6 b4 b2 b0 a6 a4 a2 a0 b.data() = avx_cast(_mm256_unpackhi_ps(tmp2, tmp3)); // b7 b5 b3 b1 a7 a5 a3 a1 } template inline void deinterleave(AVX2::uint_v &a, AVX2::uint_v &b, const unsigned short *m, Flags f) { using namespace Vc::AVX; const AVX2::ushort_v tmp0(m, f); const m256i tmp = tmp0.data(); a.data() = concat( _mm_srai_epi32(_mm_slli_epi32(lo128(tmp), 16), 16), _mm_srai_epi32(_mm_slli_epi32(hi128(tmp), 16), 16)); b.data() = concat( _mm_srai_epi32(lo128(tmp), 16), _mm_srai_epi32(hi128(tmp), 16)); } template inline void deinterleave(AVX2::short_v &a, AVX2::short_v &b, const short *m, Flags align) { a.load(m, align); b.load(m + AVX2::short_v::Size, align); Vc::AVX2::deinterleave(a, b); } template inline void deinterleave(AVX2::ushort_v &a, AVX2::ushort_v &b, const unsigned short *m, Flags align) { a.load(m, align); b.load(m + AVX2::ushort_v::Size, align); Vc::AVX2::deinterleave(a, b); } // only support M == V::EntryType -> no specialization template Vc_ALWAYS_INLINE void deinterleave(AVX2::Vector &Vc_RESTRICT a, AVX2::Vector &Vc_RESTRICT b, AVX2::Vector &Vc_RESTRICT c, const M *Vc_RESTRICT memory, Flags align) { using V = AVX2::Vector; a.load(&memory[0 * V::Size], align); b.load(&memory[1 * V::Size], align); c.load(&memory[2 * V::Size], align); Vc::AVX2::deinterleave(a, b, c); } } // namespace Detail } // namespace Vc conky-1.22.1/3rdparty/Vc/Vc/avx/detail.h000066400000000000000000003466451476554302100176520ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_AVX_DETAIL_H_ #define VC_AVX_DETAIL_H_ #include "../sse/detail.h" #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace Detail { // (converting) load functions {{{1 template Vc_INTRINSIC Vc_PURE __m256 load(const float *x, Flags, LoadTag<__m256, float>, typename Flags::EnableIfAligned = nullptr) { return _mm256_load_ps(x); } template Vc_INTRINSIC Vc_PURE __m256 load(const float *x, Flags, LoadTag<__m256, float>, typename Flags::EnableIfUnaligned = nullptr) { return _mm256_loadu_ps(x); } template Vc_INTRINSIC Vc_PURE __m256 load(const float *x, Flags, LoadTag<__m256, float>, typename Flags::EnableIfStreaming = nullptr) { return AvxIntrinsics::stream_load<__m256>(x); } template Vc_INTRINSIC Vc_PURE __m256d load(const double *x, Flags, LoadTag<__m256d, double>, typename Flags::EnableIfAligned = nullptr) { return _mm256_load_pd(x); } template Vc_INTRINSIC Vc_PURE __m256d load(const double *x, Flags, LoadTag<__m256d, double>, typename Flags::EnableIfUnaligned = nullptr) { return _mm256_loadu_pd(x); } template Vc_INTRINSIC Vc_PURE __m256d load(const double *x, Flags, LoadTag<__m256d, double>, typename Flags::EnableIfStreaming = nullptr) { return AvxIntrinsics::stream_load<__m256d>(x); } template ::value>> Vc_INTRINSIC Vc_PURE __m256i load(const T *x, Flags, LoadTag<__m256i, T>, typename Flags::EnableIfAligned = nullptr) { return _mm256_load_si256(reinterpret_cast(x)); } template ::value>> Vc_INTRINSIC Vc_PURE __m256i load(const T *x, Flags, LoadTag<__m256i, T>, typename Flags::EnableIfUnaligned = nullptr) { return _mm256_loadu_si256(reinterpret_cast(x)); } template ::value>> Vc_INTRINSIC Vc_PURE __m256i load(const T *x, Flags, LoadTag<__m256i, T>, typename Flags::EnableIfStreaming = nullptr) { return AvxIntrinsics::stream_load<__m256i>(x); } // load32{{{2 Vc_INTRINSIC __m256 load32(const float *mem, when_aligned) { return _mm256_load_ps(mem); } Vc_INTRINSIC __m256 load32(const float *mem, when_unaligned) { return _mm256_loadu_ps(mem); } Vc_INTRINSIC __m256 load32(const float *mem, when_streaming) { return AvxIntrinsics::stream_load<__m256>(mem); } Vc_INTRINSIC __m256d load32(const double *mem, when_aligned) { return _mm256_load_pd(mem); } Vc_INTRINSIC __m256d load32(const double *mem, when_unaligned) { return _mm256_loadu_pd(mem); } Vc_INTRINSIC __m256d load32(const double *mem, when_streaming) { return AvxIntrinsics::stream_load<__m256d>(mem); } template Vc_INTRINSIC __m256i load32(const T *mem, when_aligned) { static_assert(std::is_integral::value, "load32 is only intended for integral T"); return _mm256_load_si256(reinterpret_cast(mem)); } template Vc_INTRINSIC __m256i load32(const T *mem, when_unaligned) { static_assert(std::is_integral::value, "load32 is only intended for integral T"); return _mm256_loadu_si256(reinterpret_cast(mem)); } template Vc_INTRINSIC __m256i load32(const T *mem, when_streaming) { static_assert(std::is_integral::value, "load32 is only intended for integral T"); return AvxIntrinsics::stream_load<__m256i>(mem); } // MSVC workarounds{{{2 #ifdef Vc_MSVC // work around: "fatal error C1001: An internal error has occurred in the compiler." Vc_INTRINSIC __m256i load(const uint *mem, when_aligned, LoadTag<__m256i, int>) { return _mm256_load_si256(reinterpret_cast(mem)); } Vc_INTRINSIC __m256d load(const double *mem, when_unaligned, LoadTag<__m256d, double>) { return _mm256_loadu_pd(mem); } template Vc_INTRINSIC __m256 load(const float *mem, when_aligned, enable_if<(std::is_same::value && std::is_same::value)> = nullarg) { return _mm256_load_ps(mem); } template Vc_INTRINSIC __m256 load(const float *mem, when_unaligned, enable_if<(std::is_same::value && std::is_same::value)> = nullarg) { return _mm256_loadu_ps(mem); } template Vc_INTRINSIC __m256 load(const float *mem, when_streaming, enable_if<(std::is_same::value && std::is_same::value)> = nullarg) { return AvxIntrinsics::stream_load<__m256>(mem); } template Vc_INTRINSIC __m256d load(const double *mem, when_aligned, enable_if<(std::is_same::value && std::is_same::value)> = nullarg) { return _mm256_load_pd(mem); } template Vc_INTRINSIC __m256d load(const double *mem, when_unaligned, enable_if<(std::is_same::value && std::is_same::value)> = nullarg) { return _mm256_loadu_pd(mem); } template Vc_INTRINSIC __m256d load(const double *mem, when_streaming, enable_if<(std::is_same::value && std::is_same::value)> = nullarg) { return AvxIntrinsics::stream_load<__m256d>(mem); } template Vc_INTRINSIC __m256i load(const uint *mem, when_aligned, enable_if<(std::is_same::value && std::is_same::value)> = nullarg) { return _mm256_load_si256(reinterpret_cast(mem)); } template Vc_INTRINSIC __m256i load(const uint *mem, when_unaligned, enable_if<(std::is_same::value && std::is_same::value)> = nullarg) { return _mm256_loadu_si256(reinterpret_cast(mem)); } template Vc_INTRINSIC __m256i load(const uint *mem, when_streaming, enable_if<(std::is_same::value && std::is_same::value)> = nullarg) { return AvxIntrinsics::stream_load<__m256i>(mem); } template Vc_INTRINSIC __m256i load(const int *mem, when_unaligned, enable_if<(std::is_same::value && std::is_same::value)> = nullarg) { return _mm256_loadu_si256(reinterpret_cast(mem)); } template Vc_INTRINSIC __m256i load(const int *mem, when_aligned, enable_if<(std::is_same::value && std::is_same::value)> = nullarg) { return _mm256_load_si256(reinterpret_cast(mem)); } template Vc_INTRINSIC __m256i load(const int *mem, when_streaming, enable_if<(std::is_same::value && std::is_same::value)> = nullarg) { return AvxIntrinsics::stream_load<__m256i>(mem); } template Vc_INTRINSIC __m256i load(const short *mem, when_unaligned, enable_if<(std::is_same::value && std::is_same::value)> = nullarg) { return _mm256_loadu_si256(reinterpret_cast(mem)); } template Vc_INTRINSIC __m256i load(const short *mem, when_aligned, enable_if<(std::is_same::value && std::is_same::value)> = nullarg) { return _mm256_load_si256(reinterpret_cast(mem)); } template Vc_INTRINSIC __m256i load(const short *mem, when_streaming, enable_if<(std::is_same::value && std::is_same::value)> = nullarg) { return AvxIntrinsics::stream_load<__m256i>(mem); } template Vc_INTRINSIC __m256i load(const ushort *mem, when_unaligned, enable_if<(std::is_same::value && std::is_same::value)> = nullarg) { return _mm256_loadu_si256(reinterpret_cast(mem)); } template Vc_INTRINSIC __m256i load(const ushort *mem, when_aligned, enable_if<(std::is_same::value && std::is_same::value)> = nullarg) { return _mm256_load_si256(reinterpret_cast(mem)); } template Vc_INTRINSIC __m256i load(const ushort *mem, when_streaming, enable_if<(std::is_same::value && std::is_same::value)> = nullarg) { return AvxIntrinsics::stream_load<__m256i>(mem); } #endif // Vc_MSVC // short {{{2 template Vc_INTRINSIC __m256i load(const ushort *mem, Flags f, LoadTag<__m256i, short>) { return load32(mem, f); } template Vc_INTRINSIC __m256i load(const uchar *mem, Flags f, LoadTag<__m256i, short>) { return AVX::cvtepu8_epi16(load16(mem, f)); } template Vc_INTRINSIC __m256i load(const schar *mem, Flags f, LoadTag<__m256i, short>) { return AVX::cvtepi8_epi16(load16(mem, f)); } // ushort {{{2 template Vc_INTRINSIC __m256i load(const uchar *mem, Flags f, LoadTag<__m256i, ushort>) { return AVX::cvtepu8_epi16(load16(mem, f)); } // int {{{2 template Vc_INTRINSIC __m256i load(const uint *mem, Flags f, LoadTag<__m256i, int>) { return load32(mem, f); } template Vc_INTRINSIC __m256i load(const ushort *mem, Flags f, LoadTag<__m256i, int>) { return AVX::cvtepu16_epi32(load16(mem, f)); } template Vc_INTRINSIC __m256i load(const short *mem, Flags f, LoadTag<__m256i, int>) { return AVX::cvtepi16_epi32(load16(mem, f)); } template Vc_INTRINSIC __m256i load(const uchar *mem, Flags, LoadTag<__m256i, int>) { return AVX::cvtepu8_epi32(_mm_loadl_epi64(reinterpret_cast(mem))); } template Vc_INTRINSIC __m256i load(const schar *mem, Flags, LoadTag<__m256i, int>) { return AVX::cvtepi8_epi32(_mm_loadl_epi64(reinterpret_cast(mem))); } // uint {{{2 template Vc_INTRINSIC __m256i load(const ushort *mem, Flags f, LoadTag<__m256i, uint>) { return AVX::cvtepu16_epi32(load16(mem, f)); } template Vc_INTRINSIC __m256i load(const uchar *mem, Flags, LoadTag<__m256i, uint>) { return AVX::cvtepu8_epi32(_mm_loadl_epi64(reinterpret_cast(mem))); } // double {{{2 template Vc_INTRINSIC __m256d load(const float *mem, Flags f, LoadTag<__m256d, double>) { return AVX::convert(load16(mem, f)); } template Vc_INTRINSIC __m256d load(const uint *mem, Flags f, LoadTag<__m256d, double>) { return AVX::convert(load16(mem, f)); } template Vc_INTRINSIC __m256d load(const int *mem, Flags f, LoadTag<__m256d, double>) { return AVX::convert(load16(mem, f)); } template Vc_INTRINSIC __m256d load(const ushort *mem, Flags f, LoadTag<__m256d, double>) { return AVX::convert(load16(mem, f)); } template Vc_INTRINSIC __m256d load(const short *mem, Flags f, LoadTag<__m256d, double>) { return AVX::convert(load16(mem, f)); } template Vc_INTRINSIC __m256d load(const uchar *mem, Flags f, LoadTag<__m256d, double>) { return AVX::convert(load16(mem, f)); } template Vc_INTRINSIC __m256d load(const schar *mem, Flags f, LoadTag<__m256d, double>) { return AVX::convert(load16(mem, f)); } // float {{{2 template Vc_INTRINSIC __m256 load(const double *mem, Flags f, LoadTag<__m256, float>) { return AVX::concat(_mm256_cvtpd_ps(load32(&mem[0], f)), _mm256_cvtpd_ps(load32(&mem[4], f))); } template Vc_INTRINSIC __m256 load(const uint *mem, Flags f, LoadTag<__m256, float>) { const auto v = load32(mem, f); return _mm256_blendv_ps( _mm256_cvtepi32_ps(v), _mm256_add_ps(_mm256_cvtepi32_ps(AVX::sub_epi32(v, AVX::set2power31_epu32())), AVX::set2power31_ps()), _mm256_castsi256_ps(AVX::cmplt_epi32(v, _mm256_setzero_si256()))); } template Vc_INTRINSIC __m256 load(const int *mem, Flags f, LoadTag<__m256, float>) { return AVX::convert(load32(mem, f)); } template ::value>> Vc_INTRINSIC __m256 load(const T *mem, Flags f, LoadTag<__m256, float>) { return _mm256_cvtepi32_ps(load<__m256i, int>(mem, f)); } template Vc_INTRINSIC __m256 load(const ushort *mem, Flags f, LoadTag<__m256, float>) { return AVX::convert(load16(mem, f)); } template Vc_INTRINSIC __m256 load(const short *mem, Flags f, LoadTag<__m256, float>) { return AVX::convert(load16(mem, f)); } /* template struct LoadHelper { static __m256 load(const unsigned char *mem, Flags) { return _mm256_cvtepi32_ps( cvtepu8_epi32(_mm_loadl_epi64(reinterpret_cast(mem)))); } }; template struct LoadHelper { static __m256 load(const signed char *mem, Flags) { return _mm256_cvtepi32_ps( cvtepi8_epi32(_mm_loadl_epi64(reinterpret_cast(mem)))); } }; */ // shifted{{{1 template Vc_INTRINSIC Vc_CONST enable_if<(sizeof(T) == 32 && amount >= 16), T> shifted(T k) { return AVX::avx_cast(AVX::zeroExtend( _mm_srli_si128(AVX::hi128(AVX::avx_cast<__m256i>(k)), amount - 16))); } template Vc_INTRINSIC Vc_CONST enable_if<(sizeof(T) == 32 && amount > 0 && amount < 16), T> shifted(T k) { return AVX::avx_cast( AVX::alignr(Mem::permute128(AVX::avx_cast<__m256i>(k)), AVX::avx_cast<__m256i>(k))); } template Vc_INTRINSIC Vc_CONST enable_if<(sizeof(T) == 32 && amount <= -16), T> shifted(T k) { return AVX::avx_cast(Mem::permute128(AVX::avx_cast<__m256i>( _mm_slli_si128(AVX::lo128(AVX::avx_cast<__m256i>(k)), -16 - amount)))); } template Vc_INTRINSIC Vc_CONST enable_if<(sizeof(T) == 32 && amount > -16 && amount < 0), T> shifted(T k) { return AVX::avx_cast( AVX::alignr<16 + amount>(AVX::avx_cast<__m256i>(k), Mem::permute128(AVX::avx_cast<__m256i>(k)))); } // mask_cast{{{1 template Vc_INTRINSIC Vc_CONST R mask_cast(__m256i k) { static_assert(From == To, "Incorrect mask cast."); static_assert(std::is_same::value, "Incorrect mask cast."); return AVX::avx_cast<__m256>(k); } // 4 -> 4 template <> Vc_INTRINSIC Vc_CONST __m128 mask_cast<4, 4, __m128>(__m256i k) { return AVX::avx_cast<__m128>(_mm_packs_epi32(AVX::lo128(k), AVX::hi128(k))); } template <> Vc_INTRINSIC Vc_CONST __m256 mask_cast<4, 4, __m256>(__m128i k) { const auto kk = _mm_castsi128_ps(k); return AVX::concat(_mm_unpacklo_ps(kk, kk), _mm_unpackhi_ps(kk, kk)); } // 4 -> 8 template<> Vc_INTRINSIC Vc_CONST __m256 mask_cast<4, 8, __m256>(__m256i k) { // aabb ccdd -> abcd 0000 return AVX::avx_cast<__m256>(AVX::concat(_mm_packs_epi32(AVX::lo128(k), AVX::hi128(k)), _mm_setzero_si128())); } template<> Vc_INTRINSIC Vc_CONST __m128 mask_cast<4, 8, __m128>(__m256i k) { // aaaa bbbb cccc dddd -> abcd 0000 return AVX::avx_cast<__m128>(_mm_packs_epi16(_mm_packs_epi32(AVX::lo128(k), AVX::hi128(k)), _mm_setzero_si128())); } template <> Vc_INTRINSIC Vc_CONST __m256 mask_cast<4, 8, __m256>(__m128i k) { return AVX::zeroExtend(AVX::avx_cast<__m128>(k)); } // 4 -> 16 template<> Vc_INTRINSIC Vc_CONST __m256 mask_cast<4, 16, __m256>(__m256i k) { // aaaa bbbb cccc dddd -> abcd 0000 0000 0000 return AVX::zeroExtend(mask_cast<4, 8, __m128>(k)); } // 8 -> 4 template<> Vc_INTRINSIC Vc_CONST __m256 mask_cast<8, 4, __m256>(__m256i k) { // aabb ccdd eeff gghh -> aaaa bbbb cccc dddd const auto lo = AVX::lo128(AVX::avx_cast<__m256>(k)); return AVX::concat(_mm_unpacklo_ps(lo, lo), _mm_unpackhi_ps(lo, lo)); } template<> Vc_INTRINSIC Vc_CONST __m128 mask_cast<8, 4, __m128>(__m256i k) { return AVX::avx_cast<__m128>(AVX::lo128(k)); } template<> Vc_INTRINSIC Vc_CONST __m256 mask_cast<8, 4, __m256>(__m128i k) { // abcd efgh -> aaaa bbbb cccc dddd const auto tmp = _mm_unpacklo_epi16(k, k); // aa bb cc dd return AVX::avx_cast<__m256>(AVX::concat(_mm_unpacklo_epi32(tmp, tmp), // aaaa bbbb _mm_unpackhi_epi32(tmp, tmp))); // cccc dddd } // 8 -> 8 template<> Vc_INTRINSIC Vc_CONST __m128 mask_cast<8, 8, __m128>(__m256i k) { // aabb ccdd eeff gghh -> abcd efgh return AVX::avx_cast<__m128>(_mm_packs_epi16(AVX::lo128(k), AVX::hi128(k))); } template<> Vc_INTRINSIC Vc_CONST __m256 mask_cast<8, 8, __m256>(__m128i k) { return AVX::avx_cast<__m256>(AVX::concat(_mm_unpacklo_epi16(k, k), _mm_unpackhi_epi16(k, k))); } // 8 -> 16 template<> Vc_INTRINSIC Vc_CONST __m256 mask_cast<8, 16, __m256>(__m256i k) { // aabb ccdd eeff gghh -> abcd efgh 0000 0000 return AVX::zeroExtend(mask_cast<8, 8, __m128>(k)); } // 16 -> 8 #ifdef Vc_IMPL_AVX2 template<> Vc_INTRINSIC Vc_CONST __m256 mask_cast<16, 8, __m256>(__m256i k) { // abcd efgh ijkl mnop -> aabb ccdd eeff gghh const auto flipped = Mem::permute4x64(k); return _mm256_castsi256_ps(AVX::unpacklo_epi16(flipped, flipped)); } #endif // 16 -> 4 template<> Vc_INTRINSIC Vc_CONST __m256 mask_cast<16, 4, __m256>(__m256i k) { // abcd efgh ijkl mnop -> aaaa bbbb cccc dddd const auto tmp = _mm_unpacklo_epi16(AVX::lo128(k), AVX::lo128(k)); // aabb ccdd return _mm256_castsi256_ps(AVX::concat(_mm_unpacklo_epi32(tmp, tmp), _mm_unpackhi_epi32(tmp, tmp))); } // allone{{{1 template<> Vc_INTRINSIC Vc_CONST __m256 allone<__m256 >() { return AVX::setallone_ps(); } template<> Vc_INTRINSIC Vc_CONST __m256i allone<__m256i>() { return AVX::setallone_si256(); } template<> Vc_INTRINSIC Vc_CONST __m256d allone<__m256d>() { return AVX::setallone_pd(); } // zero{{{1 template<> Vc_INTRINSIC Vc_CONST __m256 zero<__m256 >() { return _mm256_setzero_ps(); } template<> Vc_INTRINSIC Vc_CONST __m256i zero<__m256i>() { return _mm256_setzero_si256(); } template<> Vc_INTRINSIC Vc_CONST __m256d zero<__m256d>() { return _mm256_setzero_pd(); } // one{{{1 Vc_INTRINSIC Vc_CONST __m256 one( float) { return AVX::setone_ps (); } Vc_INTRINSIC Vc_CONST __m256d one(double) { return AVX::setone_pd (); } Vc_INTRINSIC Vc_CONST __m256i one( int) { return AVX::setone_epi32(); } Vc_INTRINSIC Vc_CONST __m256i one( uint) { return AVX::setone_epu32(); } Vc_INTRINSIC Vc_CONST __m256i one( short) { return AVX::setone_epi16(); } Vc_INTRINSIC Vc_CONST __m256i one(ushort) { return AVX::setone_epu16(); } Vc_INTRINSIC Vc_CONST __m256i one( schar) { return AVX::setone_epi8 (); } Vc_INTRINSIC Vc_CONST __m256i one( uchar) { return AVX::setone_epu8 (); } // negate{{{1 Vc_ALWAYS_INLINE Vc_CONST __m256 negate(__m256 v, std::integral_constant) { return _mm256_xor_ps(v, AVX::setsignmask_ps()); } Vc_ALWAYS_INLINE Vc_CONST __m256d negate(__m256d v, std::integral_constant) { return _mm256_xor_pd(v, AVX::setsignmask_pd()); } Vc_ALWAYS_INLINE Vc_CONST __m256i negate(__m256i v, std::integral_constant) { return AVX::sign_epi32(v, Detail::allone<__m256i>()); } Vc_ALWAYS_INLINE Vc_CONST __m256i negate(__m256i v, std::integral_constant) { return AVX::sign_epi16(v, Detail::allone<__m256i>()); } // xor_{{{1 Vc_INTRINSIC __m256 xor_(__m256 a, __m256 b) { return _mm256_xor_ps(a, b); } Vc_INTRINSIC __m256d xor_(__m256d a, __m256d b) { return _mm256_xor_pd(a, b); } Vc_INTRINSIC __m256i xor_(__m256i a, __m256i b) { #ifdef Vc_IMPL_AVX2 return _mm256_xor_si256(a, b); #else return _mm256_castps_si256( _mm256_xor_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b))); #endif } // or_{{{1 Vc_INTRINSIC __m256 or_(__m256 a, __m256 b) { return _mm256_or_ps(a, b); } Vc_INTRINSIC __m256d or_(__m256d a, __m256d b) { return _mm256_or_pd(a, b); } Vc_INTRINSIC __m256i or_(__m256i a, __m256i b) { #ifdef Vc_IMPL_AVX2 return _mm256_or_si256(a, b); #else return _mm256_castps_si256( _mm256_or_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b))); #endif } // and_{{{1 Vc_INTRINSIC __m256 and_(__m256 a, __m256 b) { return _mm256_and_ps(a, b); } Vc_INTRINSIC __m256d and_(__m256d a, __m256d b) { return _mm256_and_pd(a, b); } Vc_INTRINSIC __m256i and_(__m256i a, __m256i b) { #ifdef Vc_IMPL_AVX2 return _mm256_and_si256(a, b); #else return _mm256_castps_si256( _mm256_and_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b))); #endif } // andnot_{{{1 Vc_INTRINSIC __m256 andnot_(__m256 a, __m256 b) { return _mm256_andnot_ps(a, b); } Vc_INTRINSIC __m256d andnot_(__m256d a, __m256d b) { return _mm256_andnot_pd(a, b); } Vc_INTRINSIC __m256i andnot_(__m256i a, __m256i b) { #ifdef Vc_IMPL_AVX2 return _mm256_andnot_si256(a, b); #else return _mm256_castps_si256( _mm256_andnot_ps(_mm256_castsi256_ps(a), _mm256_castsi256_ps(b))); #endif } // not_{{{1 Vc_INTRINSIC __m256 not_(__m256 a) { return andnot_(a, allone<__m256 >()); } Vc_INTRINSIC __m256d not_(__m256d a) { return andnot_(a, allone<__m256d>()); } Vc_INTRINSIC __m256i not_(__m256i a) { return andnot_(a, allone<__m256i>()); } // blend{{{1 Vc_INTRINSIC __m256 blend(__m256 a, __m256 b, __m256 c) { return _mm256_blendv_ps(a, b, c); } Vc_INTRINSIC __m256d blend(__m256d a, __m256d b, __m256d c) { return _mm256_blendv_pd(a, b, c); } Vc_INTRINSIC __m256i blend(__m256i a, __m256i b, __m256i c) { return AVX::blendv_epi8(a, b, c); } // abs{{{1 Vc_INTRINSIC __m256 abs(__m256 a, float) { return and_(a, AVX::setabsmask_ps()); } Vc_INTRINSIC __m256d abs(__m256d a, double) { return and_(a, AVX::setabsmask_pd()); } Vc_INTRINSIC __m256i abs(__m256i a, int) { return AVX::abs_epi32(a); } Vc_INTRINSIC __m256i abs(__m256i a, uint) { return a; } Vc_INTRINSIC __m256i abs(__m256i a, short) { return AVX::abs_epi16(a); } Vc_INTRINSIC __m256i abs(__m256i a, ushort) { return a; } Vc_INTRINSIC __m256i abs(__m256i a, schar) { return AVX::abs_epi8 (a); } Vc_INTRINSIC __m256i abs(__m256i a, uchar) { return a; } // add{{{1 Vc_INTRINSIC __m256 add(__m256 a, __m256 b, float) { return _mm256_add_ps(a, b); } Vc_INTRINSIC __m256d add(__m256d a, __m256d b, double) { return _mm256_add_pd(a, b); } Vc_INTRINSIC __m256i add(__m256i a, __m256i b, int) { return AVX::add_epi32(a, b); } Vc_INTRINSIC __m256i add(__m256i a, __m256i b, uint) { return AVX::add_epi32(a, b); } Vc_INTRINSIC __m256i add(__m256i a, __m256i b, short) { return AVX::add_epi16(a, b); } Vc_INTRINSIC __m256i add(__m256i a, __m256i b, ushort) { return AVX::add_epi16(a, b); } // sub{{{1 Vc_INTRINSIC __m256 sub(__m256 a, __m256 b, float) { return _mm256_sub_ps(a, b); } Vc_INTRINSIC __m256d sub(__m256d a, __m256d b, double) { return _mm256_sub_pd(a, b); } Vc_INTRINSIC __m256i sub(__m256i a, __m256i b, int) { return AVX::sub_epi32(a, b); } Vc_INTRINSIC __m256i sub(__m256i a, __m256i b, uint) { return AVX::sub_epi32(a, b); } Vc_INTRINSIC __m256i sub(__m256i a, __m256i b, short) { return AVX::sub_epi16(a, b); } Vc_INTRINSIC __m256i sub(__m256i a, __m256i b, ushort) { return AVX::sub_epi16(a, b); } // mul{{{1 Vc_INTRINSIC __m256 mul(__m256 a, __m256 b, float) { return _mm256_mul_ps(a, b); } Vc_INTRINSIC __m256d mul(__m256d a, __m256d b, double) { return _mm256_mul_pd(a, b); } Vc_INTRINSIC __m256i mul(__m256i a, __m256i b, int) { return AVX::mullo_epi32(a, b); } Vc_INTRINSIC __m256i mul(__m256i a, __m256i b, uint) { return AVX::mullo_epi32(a, b); } Vc_INTRINSIC __m256i mul(__m256i a, __m256i b, short) { return AVX::mullo_epi16(a, b); } Vc_INTRINSIC __m256i mul(__m256i a, __m256i b, ushort) { return AVX::mullo_epi16(a, b); } // mul{{{1 Vc_INTRINSIC __m256 div(__m256 a, __m256 b, float) { return _mm256_div_ps(a, b); } Vc_INTRINSIC __m256d div(__m256d a, __m256d b, double) { return _mm256_div_pd(a, b); } Vc_INTRINSIC __m256i div(__m256i a, __m256i b, int) { using namespace AVX; const __m256d lo1 = _mm256_cvtepi32_pd(lo128(a)); const __m256d lo2 = _mm256_cvtepi32_pd(lo128(b)); const __m256d hi1 = _mm256_cvtepi32_pd(hi128(a)); const __m256d hi2 = _mm256_cvtepi32_pd(hi128(b)); return concat(_mm256_cvttpd_epi32(_mm256_div_pd(lo1, lo2)), _mm256_cvttpd_epi32(_mm256_div_pd(hi1, hi2))); } Vc_INTRINSIC __m256i div(__m256i a, __m256i b, uint) { // SSE/AVX only has signed int conversion to doubles. Therefore we first adjust the input before // conversion and take the adjustment back after the conversion. // It could be argued that for b this is not really important because division by a b >= 2^31 is // useless. But for full correctness it cannot be ignored. using namespace AVX; const __m256i aa = add_epi32(a, set1_epi32(-2147483648)); const __m256i bb = add_epi32(b, set1_epi32(-2147483648)); const __m256d loa = _mm256_add_pd(_mm256_cvtepi32_pd(lo128(aa)), set1_pd(2147483648.)); const __m256d hia = _mm256_add_pd(_mm256_cvtepi32_pd(hi128(aa)), set1_pd(2147483648.)); const __m256d lob = _mm256_add_pd(_mm256_cvtepi32_pd(lo128(bb)), set1_pd(2147483648.)); const __m256d hib = _mm256_add_pd(_mm256_cvtepi32_pd(hi128(bb)), set1_pd(2147483648.)); // there is one remaining problem: a >= 2^31 and b == 1 // in that case the return value would be 2^31 return avx_cast<__m256i>(_mm256_blendv_ps( avx_cast<__m256>(concat(_mm256_cvttpd_epi32(_mm256_div_pd(loa, lob)), _mm256_cvttpd_epi32(_mm256_div_pd(hia, hib)))), avx_cast<__m256>(a), avx_cast<__m256>(cmpeq_epi32(b, setone_epi32())))); } Vc_INTRINSIC __m256i div(__m256i a, __m256i b, short) { using namespace AVX; const __m256 lo = _mm256_div_ps(convert(lo128(a)), convert(lo128(b))); const __m256 hi = _mm256_div_ps(convert(hi128(a)), convert(hi128(b))); return concat(convert(lo), convert(hi)); } // horizontal add{{{1 template Vc_INTRINSIC T add(Common::IntrinsicType a, T) { return {add(add(AVX::lo128(a), AVX::hi128(a), T()), T())}; } // horizontal mul{{{1 template Vc_INTRINSIC T mul(Common::IntrinsicType a, T) { return {mul(mul(AVX::lo128(a), AVX::hi128(a), T()), T())}; } // horizontal min{{{1 template Vc_INTRINSIC T min(Common::IntrinsicType a, T) { return {min(min(AVX::lo128(a), AVX::hi128(a), T()), T())}; } // horizontal max{{{1 template Vc_INTRINSIC T max(Common::IntrinsicType a, T) { return {max(max(AVX::lo128(a), AVX::hi128(a), T()), T())}; } // cmpeq{{{1 Vc_INTRINSIC __m256 cmpeq(__m256 a, __m256 b, float) { return AvxIntrinsics::cmpeq_ps(a, b); } Vc_INTRINSIC __m256d cmpeq(__m256d a, __m256d b, double) { return AvxIntrinsics::cmpeq_pd(a, b); } Vc_INTRINSIC __m256i cmpeq(__m256i a, __m256i b, int) { return AvxIntrinsics::cmpeq_epi32(a, b); } Vc_INTRINSIC __m256i cmpeq(__m256i a, __m256i b, uint) { return AvxIntrinsics::cmpeq_epi32(a, b); } Vc_INTRINSIC __m256i cmpeq(__m256i a, __m256i b, short) { return AvxIntrinsics::cmpeq_epi16(a, b); } Vc_INTRINSIC __m256i cmpeq(__m256i a, __m256i b, ushort) { return AvxIntrinsics::cmpeq_epi16(a, b); } // cmpneq{{{1 Vc_INTRINSIC __m256 cmpneq(__m256 a, __m256 b, float) { return AvxIntrinsics::cmpneq_ps(a, b); } Vc_INTRINSIC __m256d cmpneq(__m256d a, __m256d b, double) { return AvxIntrinsics::cmpneq_pd(a, b); } Vc_INTRINSIC __m256i cmpneq(__m256i a, __m256i b, int) { return not_(AvxIntrinsics::cmpeq_epi32(a, b)); } Vc_INTRINSIC __m256i cmpneq(__m256i a, __m256i b, uint) { return not_(AvxIntrinsics::cmpeq_epi32(a, b)); } Vc_INTRINSIC __m256i cmpneq(__m256i a, __m256i b, short) { return not_(AvxIntrinsics::cmpeq_epi16(a, b)); } Vc_INTRINSIC __m256i cmpneq(__m256i a, __m256i b, ushort) { return not_(AvxIntrinsics::cmpeq_epi16(a, b)); } Vc_INTRINSIC __m256i cmpneq(__m256i a, __m256i b, schar) { return not_(AvxIntrinsics::cmpeq_epi8 (a, b)); } Vc_INTRINSIC __m256i cmpneq(__m256i a, __m256i b, uchar) { return not_(AvxIntrinsics::cmpeq_epi8 (a, b)); } // cmpgt{{{1 Vc_INTRINSIC __m256 cmpgt(__m256 a, __m256 b, float) { return AVX::cmpgt_ps(a, b); } Vc_INTRINSIC __m256d cmpgt(__m256d a, __m256d b, double) { return AVX::cmpgt_pd(a, b); } Vc_INTRINSIC __m256i cmpgt(__m256i a, __m256i b, int) { return AVX::cmpgt_epi32(a, b); } Vc_INTRINSIC __m256i cmpgt(__m256i a, __m256i b, uint) { return AVX::cmpgt_epu32(a, b); } Vc_INTRINSIC __m256i cmpgt(__m256i a, __m256i b, short) { return AVX::cmpgt_epi16(a, b); } Vc_INTRINSIC __m256i cmpgt(__m256i a, __m256i b, ushort) { return AVX::cmpgt_epu16(a, b); } Vc_INTRINSIC __m256i cmpgt(__m256i a, __m256i b, schar) { return AVX::cmpgt_epi8 (a, b); } Vc_INTRINSIC __m256i cmpgt(__m256i a, __m256i b, uchar) { return AVX::cmpgt_epu8 (a, b); } // cmpge{{{1 Vc_INTRINSIC __m256 cmpge(__m256 a, __m256 b, float) { return AVX::cmpge_ps(a, b); } Vc_INTRINSIC __m256d cmpge(__m256d a, __m256d b, double) { return AVX::cmpge_pd(a, b); } Vc_INTRINSIC __m256i cmpge(__m256i a, __m256i b, int) { return not_(AVX::cmpgt_epi32(b, a)); } Vc_INTRINSIC __m256i cmpge(__m256i a, __m256i b, uint) { return not_(AVX::cmpgt_epu32(b, a)); } Vc_INTRINSIC __m256i cmpge(__m256i a, __m256i b, short) { return not_(AVX::cmpgt_epi16(b, a)); } Vc_INTRINSIC __m256i cmpge(__m256i a, __m256i b, ushort) { return not_(AVX::cmpgt_epu16(b, a)); } Vc_INTRINSIC __m256i cmpge(__m256i a, __m256i b, schar) { return not_(AVX::cmpgt_epi8 (b, a)); } Vc_INTRINSIC __m256i cmpge(__m256i a, __m256i b, uchar) { return not_(AVX::cmpgt_epu8 (b, a)); } // cmple{{{1 Vc_INTRINSIC __m256 cmple(__m256 a, __m256 b, float) { return AVX::cmple_ps(a, b); } Vc_INTRINSIC __m256d cmple(__m256d a, __m256d b, double) { return AVX::cmple_pd(a, b); } Vc_INTRINSIC __m256i cmple(__m256i a, __m256i b, int) { return not_(AVX::cmpgt_epi32(a, b)); } Vc_INTRINSIC __m256i cmple(__m256i a, __m256i b, uint) { return not_(AVX::cmpgt_epu32(a, b)); } Vc_INTRINSIC __m256i cmple(__m256i a, __m256i b, short) { return not_(AVX::cmpgt_epi16(a, b)); } Vc_INTRINSIC __m256i cmple(__m256i a, __m256i b, ushort) { return not_(AVX::cmpgt_epu16(a, b)); } Vc_INTRINSIC __m256i cmple(__m256i a, __m256i b, schar) { return not_(AVX::cmpgt_epi8 (a, b)); } Vc_INTRINSIC __m256i cmple(__m256i a, __m256i b, uchar) { return not_(AVX::cmpgt_epu8 (a, b)); } // cmplt{{{1 Vc_INTRINSIC __m256 cmplt(__m256 a, __m256 b, float) { return AVX::cmplt_ps(a, b); } Vc_INTRINSIC __m256d cmplt(__m256d a, __m256d b, double) { return AVX::cmplt_pd(a, b); } Vc_INTRINSIC __m256i cmplt(__m256i a, __m256i b, int) { return AVX::cmpgt_epi32(b, a); } Vc_INTRINSIC __m256i cmplt(__m256i a, __m256i b, uint) { return AVX::cmpgt_epu32(b, a); } Vc_INTRINSIC __m256i cmplt(__m256i a, __m256i b, short) { return AVX::cmpgt_epi16(b, a); } Vc_INTRINSIC __m256i cmplt(__m256i a, __m256i b, ushort) { return AVX::cmpgt_epu16(b, a); } Vc_INTRINSIC __m256i cmplt(__m256i a, __m256i b, schar) { return AVX::cmpgt_epi8 (b, a); } Vc_INTRINSIC __m256i cmplt(__m256i a, __m256i b, uchar) { return AVX::cmpgt_epu8 (b, a); } // fma{{{1 Vc_INTRINSIC __m256 fma(__m256 a, __m256 b, __m256 c, float) { #ifdef Vc_IMPL_FMA4 return _mm256_macc_ps(a, b, c); #elif defined Vc_IMPL_FMA return _mm256_fmadd_ps(a, b, c); #else using namespace AVX; __m256d v1_0 = _mm256_cvtps_pd(lo128(a)); __m256d v1_1 = _mm256_cvtps_pd(hi128(a)); __m256d v2_0 = _mm256_cvtps_pd(lo128(b)); __m256d v2_1 = _mm256_cvtps_pd(hi128(b)); __m256d v3_0 = _mm256_cvtps_pd(lo128(c)); __m256d v3_1 = _mm256_cvtps_pd(hi128(c)); return concat(_mm256_cvtpd_ps(_mm256_add_pd(_mm256_mul_pd(v1_0, v2_0), v3_0)), _mm256_cvtpd_ps(_mm256_add_pd(_mm256_mul_pd(v1_1, v2_1), v3_1))); #endif } Vc_INTRINSIC __m256d fma(__m256d a, __m256d b, __m256d c, double) { #ifdef Vc_IMPL_FMA4 return _mm256_macc_pd(a, b, c); #elif defined Vc_IMPL_FMA return _mm256_fmadd_pd(a, b, c); #else using namespace AVX; __m256d h1 = and_(a, _mm256_broadcast_sd(reinterpret_cast( &c_general::highMaskDouble))); __m256d h2 = and_(b, _mm256_broadcast_sd(reinterpret_cast( &c_general::highMaskDouble))); const __m256d l1 = _mm256_sub_pd(a, h1); const __m256d l2 = _mm256_sub_pd(b, h2); const __m256d ll = mul(l1, l2, double()); const __m256d lh = add(mul(l1, h2, double()), mul(h1, l2, double()), double()); const __m256d hh = mul(h1, h2, double()); // ll < lh < hh for all entries is certain const __m256d lh_lt_v3 = cmplt(abs(lh, double()), abs(c, double()), double()); // |lh| < |c| const __m256d x = _mm256_blendv_pd(c, lh, lh_lt_v3); const __m256d y = _mm256_blendv_pd(lh, c, lh_lt_v3); return add(add(ll, x, double()), add(y, hh, double()), double()); #endif } template Vc_INTRINSIC __m256i fma(__m256i a, __m256i b, __m256i c, T) { return add(mul(a, b, T()), c, T()); } // shiftRight{{{1 template Vc_INTRINSIC __m256i shiftRight(__m256i a, int) { return AVX::srai_epi32(a); } template Vc_INTRINSIC __m256i shiftRight(__m256i a, uint) { return AVX::srli_epi32(a); } template Vc_INTRINSIC __m256i shiftRight(__m256i a, short) { return AVX::srai_epi16(a); } template Vc_INTRINSIC __m256i shiftRight(__m256i a, ushort) { return AVX::srli_epi16(a); } //template Vc_INTRINSIC __m256i shiftRight(__m256i a, schar) { return AVX::srai_epi8 (a); } //template Vc_INTRINSIC __m256i shiftRight(__m256i a, uchar) { return AVX::srli_epi8 (a); } Vc_INTRINSIC __m256i shiftRight(__m256i a, int shift, int) { return AVX::sra_epi32(a, _mm_cvtsi32_si128(shift)); } Vc_INTRINSIC __m256i shiftRight(__m256i a, int shift, uint) { return AVX::srl_epi32(a, _mm_cvtsi32_si128(shift)); } Vc_INTRINSIC __m256i shiftRight(__m256i a, int shift, short) { return AVX::sra_epi16(a, _mm_cvtsi32_si128(shift)); } Vc_INTRINSIC __m256i shiftRight(__m256i a, int shift, ushort) { return AVX::srl_epi16(a, _mm_cvtsi32_si128(shift)); } //Vc_INTRINSIC __m256i shiftRight(__m256i a, int shift, schar) { return AVX::sra_epi8 (a, _mm_cvtsi32_si128(shift)); } //Vc_INTRINSIC __m256i shiftRight(__m256i a, int shift, uchar) { return AVX::srl_epi8 (a, _mm_cvtsi32_si128(shift)); } // shiftLeft{{{1 template Vc_INTRINSIC __m256i shiftLeft(__m256i a, int) { return AVX::slli_epi32(a); } template Vc_INTRINSIC __m256i shiftLeft(__m256i a, uint) { return AVX::slli_epi32(a); } template Vc_INTRINSIC __m256i shiftLeft(__m256i a, short) { return AVX::slli_epi16(a); } template Vc_INTRINSIC __m256i shiftLeft(__m256i a, ushort) { return AVX::slli_epi16(a); } //template Vc_INTRINSIC __m256i shiftLeft(__m256i a, schar) { return AVX::slli_epi8 (a); } //template Vc_INTRINSIC __m256i shiftLeft(__m256i a, uchar) { return AVX::slli_epi8 (a); } Vc_INTRINSIC __m256i shiftLeft(__m256i a, int shift, int) { return AVX::sll_epi32(a, _mm_cvtsi32_si128(shift)); } Vc_INTRINSIC __m256i shiftLeft(__m256i a, int shift, uint) { return AVX::sll_epi32(a, _mm_cvtsi32_si128(shift)); } Vc_INTRINSIC __m256i shiftLeft(__m256i a, int shift, short) { return AVX::sll_epi16(a, _mm_cvtsi32_si128(shift)); } Vc_INTRINSIC __m256i shiftLeft(__m256i a, int shift, ushort) { return AVX::sll_epi16(a, _mm_cvtsi32_si128(shift)); } //Vc_INTRINSIC __m256i shiftLeft(__m256i a, int shift, schar) { return AVX::sll_epi8 (a, _mm_cvtsi32_si128(shift)); } //Vc_INTRINSIC __m256i shiftLeft(__m256i a, int shift, uchar) { return AVX::sll_epi8 (a, _mm_cvtsi32_si128(shift)); } // zeroExtendIfNeeded{{{1 Vc_INTRINSIC __m256 zeroExtendIfNeeded(__m256 x) { return x; } Vc_INTRINSIC __m256d zeroExtendIfNeeded(__m256d x) { return x; } Vc_INTRINSIC __m256i zeroExtendIfNeeded(__m256i x) { return x; } Vc_INTRINSIC __m256 zeroExtendIfNeeded(__m128 x) { return AVX::zeroExtend(x); } Vc_INTRINSIC __m256d zeroExtendIfNeeded(__m128d x) { return AVX::zeroExtend(x); } Vc_INTRINSIC __m256i zeroExtendIfNeeded(__m128i x) { return AVX::zeroExtend(x); } // broadcast{{{1 Vc_INTRINSIC __m256 avx_broadcast( float x) { return _mm256_set1_ps(x); } Vc_INTRINSIC __m256d avx_broadcast(double x) { return _mm256_set1_pd(x); } Vc_INTRINSIC __m256i avx_broadcast( int x) { return _mm256_set1_epi32(x); } Vc_INTRINSIC __m256i avx_broadcast( uint x) { return _mm256_set1_epi32(x); } Vc_INTRINSIC __m256i avx_broadcast( short x) { return _mm256_set1_epi16(x); } Vc_INTRINSIC __m256i avx_broadcast(ushort x) { return _mm256_set1_epi16(x); } Vc_INTRINSIC __m256i avx_broadcast( char x) { return _mm256_set1_epi8(x); } Vc_INTRINSIC __m256i avx_broadcast( schar x) { return _mm256_set1_epi8(x); } Vc_INTRINSIC __m256i avx_broadcast( uchar x) { return _mm256_set1_epi8(x); } // sorted{{{1 template = AVXImpl && Impl <= AVX2Impl)>> Vc_CONST_L AVX2::Vector Vc_VDECL sorted(AVX2::Vector x) Vc_CONST_R; template Vc_INTRINSIC Vc_CONST AVX2::Vector sorted(AVX2::Vector x) { return sorted(x); } // shifted{{{1 template static Vc_INTRINSIC Vc_CONST enable_if<(sizeof(V) == 32), V> shifted(V v, int amount) { using namespace AVX; constexpr int S = sizeof(T); switch (amount) { case 0: return v; case 1: return shifted( 1 * S)>(v); case 2: return shifted( 2 * S)>(v); case 3: return shifted( 3 * S)>(v); case -1: return shifted(-1 * S)>(v); case -2: return shifted(-2 * S)>(v); case -3: return shifted(-3 * S)>(v); } if (sizeof(T) <= 4) { switch (amount) { case 4: return shifted( 4 * S)>(v); case 5: return shifted( 5 * S)>(v); case 6: return shifted( 6 * S)>(v); case 7: return shifted( 7 * S)>(v); case -4: return shifted(-4 * S)>(v); case -5: return shifted(-5 * S)>(v); case -6: return shifted(-6 * S)>(v); case -7: return shifted(-7 * S)>(v); } if (sizeof(T) <= 2) { switch (amount) { case 8: return shifted( 8 * S)>(v); case 9: return shifted( 9 * S)>(v); case 10: return shifted( 10 * S)>(v); case 11: return shifted( 11 * S)>(v); case 12: return shifted( 12 * S)>(v); case 13: return shifted( 13 * S)>(v); case 14: return shifted( 14 * S)>(v); case 15: return shifted( 15 * S)>(v); case -8: return shifted(- 8 * S)>(v); case -9: return shifted(- 9 * S)>(v); case -10: return shifted(-10 * S)>(v); case -11: return shifted(-11 * S)>(v); case -12: return shifted(-12 * S)>(v); case -13: return shifted(-13 * S)>(v); case -14: return shifted(-14 * S)>(v); case -15: return shifted(-15 * S)>(v); } if (sizeof(T) == 1) { switch (amount) { case 16: return shifted( 16)>(v); case 17: return shifted( 17)>(v); case 18: return shifted( 18)>(v); case 19: return shifted( 19)>(v); case 20: return shifted( 20)>(v); case 21: return shifted( 21)>(v); case 22: return shifted( 22)>(v); case 23: return shifted( 23)>(v); case 24: return shifted( 24)>(v); case 25: return shifted( 25)>(v); case 26: return shifted( 26)>(v); case 27: return shifted( 27)>(v); case 28: return shifted( 28)>(v); case 29: return shifted( 29)>(v); case 30: return shifted( 30)>(v); case 31: return shifted( 31)>(v); case -16: return shifted(-16)>(v); case -17: return shifted(-17)>(v); case -18: return shifted(-18)>(v); case -19: return shifted(-19)>(v); case -20: return shifted(-20)>(v); case -21: return shifted(-21)>(v); case -22: return shifted(-22)>(v); case -23: return shifted(-23)>(v); case -24: return shifted(-24)>(v); case -25: return shifted(-25)>(v); case -26: return shifted(-26)>(v); case -27: return shifted(-27)>(v); case -28: return shifted(-28)>(v); case -29: return shifted(-29)>(v); case -30: return shifted(-30)>(v); case -31: return shifted(-31)>(v); } } } } return avx_cast(_mm256_setzero_ps()); } template static Vc_INTRINSIC Vc_CONST enable_if<(sizeof(V) == 16), V> shifted(V v, int amount) { using namespace AVX; switch (amount) { case 0: return v; case 1: return avx_cast(_mm_srli_si128(avx_cast<__m128i>(v), sanitize(1 * sizeof(T)))); case 2: return avx_cast(_mm_srli_si128(avx_cast<__m128i>(v), sanitize(2 * sizeof(T)))); case 3: return avx_cast(_mm_srli_si128(avx_cast<__m128i>(v), sanitize(3 * sizeof(T)))); case -1: return avx_cast(_mm_slli_si128(avx_cast<__m128i>(v), sanitize(1 * sizeof(T)))); case -2: return avx_cast(_mm_slli_si128(avx_cast<__m128i>(v), sanitize(2 * sizeof(T)))); case -3: return avx_cast(_mm_slli_si128(avx_cast<__m128i>(v), sanitize(3 * sizeof(T)))); } if (sizeof(T) <= 2) { switch (amount) { case 4: return avx_cast(_mm_srli_si128(avx_cast<__m128i>(v), sanitize(4 * sizeof(T)))); case 5: return avx_cast(_mm_srli_si128(avx_cast<__m128i>(v), sanitize(5 * sizeof(T)))); case 6: return avx_cast(_mm_srli_si128(avx_cast<__m128i>(v), sanitize(6 * sizeof(T)))); case 7: return avx_cast(_mm_srli_si128(avx_cast<__m128i>(v), sanitize(7 * sizeof(T)))); case -4: return avx_cast(_mm_slli_si128(avx_cast<__m128i>(v), sanitize(4 * sizeof(T)))); case -5: return avx_cast(_mm_slli_si128(avx_cast<__m128i>(v), sanitize(5 * sizeof(T)))); case -6: return avx_cast(_mm_slli_si128(avx_cast<__m128i>(v), sanitize(6 * sizeof(T)))); case -7: return avx_cast(_mm_slli_si128(avx_cast<__m128i>(v), sanitize(7 * sizeof(T)))); } } return avx_cast(_mm_setzero_ps()); } // rotated{{{1 template static Vc_INTRINSIC Vc_CONST enable_if<(sizeof(V) == 32 && N == 4), V> rotated(V v, int amount) { using namespace AVX; const __m128i vLo = avx_cast<__m128i>(lo128(v)); const __m128i vHi = avx_cast<__m128i>(hi128(v)); switch (static_cast(amount) % N) { case 0: return v; case 1: return avx_cast(concat(SSE::alignr_epi8(vHi, vLo), SSE::alignr_epi8(vLo, vHi))); case 2: return Mem::permute128(v); case 3: return avx_cast(concat(SSE::alignr_epi8(vLo, vHi), SSE::alignr_epi8(vHi, vLo))); } return avx_cast(_mm256_setzero_ps()); } template static Vc_INTRINSIC Vc_CONST enable_if<(sizeof(V) == 32 && N == 8), V> rotated(V v, int amount) { using namespace AVX; const __m128i vLo = avx_cast<__m128i>(lo128(v)); const __m128i vHi = avx_cast<__m128i>(hi128(v)); switch (static_cast(amount) % N) { case 0: return v; case 1: return avx_cast(concat(SSE::alignr_epi8<1 * sizeof(T)>(vHi, vLo), SSE::alignr_epi8<1 * sizeof(T)>(vLo, vHi))); case 2: return avx_cast(concat(SSE::alignr_epi8<2 * sizeof(T)>(vHi, vLo), SSE::alignr_epi8<2 * sizeof(T)>(vLo, vHi))); case 3: return avx_cast(concat(SSE::alignr_epi8<3 * sizeof(T)>(vHi, vLo), SSE::alignr_epi8<3 * sizeof(T)>(vLo, vHi))); case 4: return Mem::permute128(v); case 5: return avx_cast(concat(SSE::alignr_epi8<1 * sizeof(T)>(vLo, vHi), SSE::alignr_epi8<1 * sizeof(T)>(vHi, vLo))); case 6: return avx_cast(concat(SSE::alignr_epi8<2 * sizeof(T)>(vLo, vHi), SSE::alignr_epi8<2 * sizeof(T)>(vHi, vLo))); case 7: return avx_cast(concat(SSE::alignr_epi8<3 * sizeof(T)>(vLo, vHi), SSE::alignr_epi8<3 * sizeof(T)>(vHi, vLo))); } return avx_cast(_mm256_setzero_ps()); } #ifdef Vc_IMPL_AVX2 template static Vc_INTRINSIC Vc_CONST enable_if<(sizeof(V) == 32 && N == 16), V> rotated( V v, int amount) { using namespace AVX; const __m128i vLo = avx_cast<__m128i>(lo128(v)); const __m128i vHi = avx_cast<__m128i>(hi128(v)); switch (static_cast(amount) % N) { case 0: return v; case 1: return avx_cast(concat(SSE::alignr_epi8<1 * sizeof(T)>(vHi, vLo), SSE::alignr_epi8<1 * sizeof(T)>(vLo, vHi))); case 2: return avx_cast(concat(SSE::alignr_epi8<2 * sizeof(T)>(vHi, vLo), SSE::alignr_epi8<2 * sizeof(T)>(vLo, vHi))); case 3: return avx_cast(concat(SSE::alignr_epi8<3 * sizeof(T)>(vHi, vLo), SSE::alignr_epi8<3 * sizeof(T)>(vLo, vHi))); case 4: return Mem::permute4x64(v); case 5: return avx_cast(concat(SSE::alignr_epi8<5 * sizeof(T)>(vHi, vLo), SSE::alignr_epi8<5 * sizeof(T)>(vLo, vHi))); case 6: return avx_cast(concat(SSE::alignr_epi8<6 * sizeof(T)>(vHi, vLo), SSE::alignr_epi8<6 * sizeof(T)>(vLo, vHi))); case 7: return avx_cast(concat(SSE::alignr_epi8<7 * sizeof(T)>(vHi, vLo), SSE::alignr_epi8<7 * sizeof(T)>(vLo, vHi))); case 8: return Mem::permute128(v); case 9: return avx_cast(concat(SSE::alignr_epi8<1 * sizeof(T)>(vLo, vHi), SSE::alignr_epi8<1 * sizeof(T)>(vHi, vLo))); case 10: return avx_cast(concat(SSE::alignr_epi8<2 * sizeof(T)>(vLo, vHi), SSE::alignr_epi8<2 * sizeof(T)>(vHi, vLo))); case 11: return avx_cast(concat(SSE::alignr_epi8<3 * sizeof(T)>(vLo, vHi), SSE::alignr_epi8<3 * sizeof(T)>(vHi, vLo))); case 12: return Mem::permute4x64(v); case 13: return avx_cast(concat(SSE::alignr_epi8<5 * sizeof(T)>(vLo, vHi), SSE::alignr_epi8<5 * sizeof(T)>(vHi, vLo))); case 14: return avx_cast(concat(SSE::alignr_epi8<6 * sizeof(T)>(vLo, vHi), SSE::alignr_epi8<6 * sizeof(T)>(vHi, vLo))); case 15: return avx_cast(concat(SSE::alignr_epi8<7 * sizeof(T)>(vLo, vHi), SSE::alignr_epi8<7 * sizeof(T)>(vHi, vLo))); } return avx_cast(_mm256_setzero_ps()); } #endif // Vc_IMPL_AVX2 // testc{{{1 Vc_INTRINSIC Vc_CONST int testc(__m128 a, __m128 b) { return _mm_testc_si128(_mm_castps_si128(a), _mm_castps_si128(b)); } Vc_INTRINSIC Vc_CONST int testc(__m256 a, __m256 b) { return _mm256_testc_ps(a, b); } Vc_INTRINSIC Vc_CONST int testc(__m256d a, __m256d b) { return _mm256_testc_pd(a, b); } Vc_INTRINSIC Vc_CONST int testc(__m256i a, __m256i b) { return _mm256_testc_si256(a, b); } // testz{{{1 Vc_INTRINSIC Vc_CONST int testz(__m128 a, __m128 b) { return _mm_testz_si128(_mm_castps_si128(a), _mm_castps_si128(b)); } Vc_INTRINSIC Vc_CONST int testz(__m256 a, __m256 b) { return _mm256_testz_ps(a, b); } Vc_INTRINSIC Vc_CONST int testz(__m256d a, __m256d b) { return _mm256_testz_pd(a, b); } Vc_INTRINSIC Vc_CONST int testz(__m256i a, __m256i b) { return _mm256_testz_si256(a, b); } // testnzc{{{1 Vc_INTRINSIC Vc_CONST int testnzc(__m128 a, __m128 b) { return _mm_testnzc_si128(_mm_castps_si128(a), _mm_castps_si128(b)); } Vc_INTRINSIC Vc_CONST int testnzc(__m256 a, __m256 b) { return _mm256_testnzc_ps(a, b); } Vc_INTRINSIC Vc_CONST int testnzc(__m256d a, __m256d b) { return _mm256_testnzc_pd(a, b); } Vc_INTRINSIC Vc_CONST int testnzc(__m256i a, __m256i b) { return _mm256_testnzc_si256(a, b); } // movemask{{{1 Vc_INTRINSIC Vc_CONST int movemask(__m256i a) { return AVX::movemask_epi8(a); } Vc_INTRINSIC Vc_CONST int movemask(__m128i a) { return _mm_movemask_epi8(a); } Vc_INTRINSIC Vc_CONST int movemask(__m256d a) { return _mm256_movemask_pd(a); } Vc_INTRINSIC Vc_CONST int movemask(__m128d a) { return _mm_movemask_pd(a); } Vc_INTRINSIC Vc_CONST int movemask(__m256 a) { return _mm256_movemask_ps(a); } Vc_INTRINSIC Vc_CONST int movemask(__m128 a) { return _mm_movemask_ps(a); } // mask_store{{{1 template Vc_INTRINSIC void mask_store(__m256i k, bool *mem, Flags) { static_assert( N == 4 || N == 8 || N == 16, "mask_store(__m256i, bool *) is only implemented for 4, 8, and 16 entries"); switch (N) { case 4: *aliasing_cast(mem) = (_mm_movemask_epi8(AVX::lo128(k)) | (_mm_movemask_epi8(AVX::hi128(k)) << 16)) & 0x01010101; break; case 8: { const auto k2 = _mm_srli_epi16(_mm_packs_epi16(AVX::lo128(k), AVX::hi128(k)), 15); const auto k3 = _mm_packs_epi16(k2, _mm_setzero_si128()); #ifdef __x86_64__ *aliasing_cast(mem) = _mm_cvtsi128_si64(k3); #else *aliasing_cast(mem) = _mm_cvtsi128_si32(k3); *aliasing_cast(mem + 4) = _mm_extract_epi32(k3, 1); #endif } break; case 16: { const auto bools = Detail::and_(_mm_set1_epi8(1), _mm_packs_epi16(AVX::lo128(k), AVX::hi128(k))); if (Flags::IsAligned) { _mm_store_si128(reinterpret_cast<__m128i *>(mem), bools); } else { _mm_storeu_si128(reinterpret_cast<__m128i *>(mem), bools); } } break; default: Vc_UNREACHABLE(); } } // mask_load{{{1 template Vc_INTRINSIC R mask_load(const bool *mem, Flags, enable_if::value> = nullarg) { static_assert(N == 4 || N == 8, "mask_load<__m128>(const bool *) is only implemented for 4, 8 entries"); switch (N) { case 4: { __m128i k = _mm_cvtsi32_si128(*aliasing_cast(mem)); k = _mm_unpacklo_epi8(k, k); k = _mm_unpacklo_epi16(k, k); k = _mm_cmpgt_epi32(k, _mm_setzero_si128()); return AVX::avx_cast<__m128>(k); } case 8: { #ifdef __x86_64__ __m128i k = _mm_cvtsi64_si128(*aliasing_cast(mem)); #else __m128i k = _mm_castpd_si128(_mm_load_sd(aliasing_cast(mem))); #endif return AVX::avx_cast<__m128>( _mm_cmpgt_epi16(_mm_unpacklo_epi8(k, k), _mm_setzero_si128())); } default: Vc_UNREACHABLE(); } } template Vc_INTRINSIC R mask_load(const bool *mem, Flags, enable_if::value> = nullarg) { static_assert( N == 4 || N == 8 || N == 16, "mask_load<__m256>(const bool *) is only implemented for 4, 8, and 16 entries"); switch (N) { case 4: { __m128i k = AVX::avx_cast<__m128i>(_mm_and_ps( _mm_set1_ps(*aliasing_cast(mem)), AVX::avx_cast<__m128>(_mm_setr_epi32(0x1, 0x100, 0x10000, 0x1000000)))); k = _mm_cmpgt_epi32(k, _mm_setzero_si128()); return AVX::avx_cast<__m256>( AVX::concat(_mm_unpacklo_epi32(k, k), _mm_unpackhi_epi32(k, k))); } case 8: { #ifdef __x86_64__ __m128i k = _mm_cvtsi64_si128(*aliasing_cast(mem)); #else __m128i k = _mm_castpd_si128(_mm_load_sd(aliasing_cast(mem))); #endif k = _mm_cmpgt_epi16(_mm_unpacklo_epi8(k, k), _mm_setzero_si128()); return AVX::avx_cast<__m256>( AVX::concat(_mm_unpacklo_epi16(k, k), _mm_unpackhi_epi16(k, k))); } case 16: { const auto k128 = _mm_cmpgt_epi8( Flags::IsAligned ? _mm_load_si128(reinterpret_cast(mem)) : _mm_loadu_si128(reinterpret_cast(mem)), _mm_setzero_si128()); return AVX::avx_cast<__m256>( AVX::concat(_mm_unpacklo_epi8(k128, k128), _mm_unpackhi_epi8(k128, k128))); } default: Vc_UNREACHABLE(); return R(); } } // mask_to_int{{{1 template Vc_INTRINSIC_L Vc_CONST_L int mask_to_int(__m256i x) Vc_INTRINSIC_R Vc_CONST_R; template <> Vc_INTRINSIC Vc_CONST int mask_to_int<4>(__m256i k) { return movemask(AVX::avx_cast<__m256d>(k)); } template <> Vc_INTRINSIC Vc_CONST int mask_to_int<8>(__m256i k) { return movemask(AVX::avx_cast<__m256>(k)); } #ifdef Vc_IMPL_BMI2 template <> Vc_INTRINSIC Vc_CONST int mask_to_int<16>(__m256i k) { return _pext_u32(movemask(k), 0x55555555u); } #endif template <> Vc_INTRINSIC Vc_CONST int mask_to_int<32>(__m256i k) { return movemask(k); } //InterleaveImpl{{{1 template struct InterleaveImpl { template static inline void interleave(typename V::EntryType *const data, const I &i,/*{{{*/ const typename V::AsArg v0, // a0 a1 a2 a3 a4 a5 a6 a7 | a8 a9 ... const typename V::AsArg v1) // b0 b1 b2 b3 b4 b5 b6 b7 | b8 b9 ... { const __m256i tmp0 = AVX::unpacklo_epi16(v0.data(), v1.data()); // a0 b0 a1 b1 a2 b2 a3 b3 | a8 b8 a9 ... const __m256i tmp1 = AVX::unpackhi_epi16(v0.data(), v1.data()); // a4 b4 a5 ... using namespace AVX; *aliasing_cast(&data[i[ 0]]) = _mm_cvtsi128_si32(lo128(tmp0)); *aliasing_cast(&data[i[ 1]]) = _mm_extract_epi32(lo128(tmp0), 1); *aliasing_cast(&data[i[ 2]]) = _mm_extract_epi32(lo128(tmp0), 2); *aliasing_cast(&data[i[ 3]]) = _mm_extract_epi32(lo128(tmp0), 3); *aliasing_cast(&data[i[ 4]]) = _mm_cvtsi128_si32(lo128(tmp1)); *aliasing_cast(&data[i[ 5]]) = _mm_extract_epi32(lo128(tmp1), 1); *aliasing_cast(&data[i[ 6]]) = _mm_extract_epi32(lo128(tmp1), 2); *aliasing_cast(&data[i[ 7]]) = _mm_extract_epi32(lo128(tmp1), 3); *aliasing_cast(&data[i[ 8]]) = _mm_cvtsi128_si32(hi128(tmp0)); *aliasing_cast(&data[i[ 9]]) = _mm_extract_epi32(hi128(tmp0), 1); *aliasing_cast(&data[i[10]]) = _mm_extract_epi32(hi128(tmp0), 2); *aliasing_cast(&data[i[11]]) = _mm_extract_epi32(hi128(tmp0), 3); *aliasing_cast(&data[i[12]]) = _mm_cvtsi128_si32(hi128(tmp1)); *aliasing_cast(&data[i[13]]) = _mm_extract_epi32(hi128(tmp1), 1); *aliasing_cast(&data[i[14]]) = _mm_extract_epi32(hi128(tmp1), 2); *aliasing_cast(&data[i[15]]) = _mm_extract_epi32(hi128(tmp1), 3); }/*}}}*/ static inline void interleave(typename V::EntryType *const data, const Common::SuccessiveEntries<2> &i,/*{{{*/ const typename V::AsArg v0, const typename V::AsArg v1) { const __m256i tmp0 = AVX::unpacklo_epi16(v0.data(), v1.data()); // a0 b0 a1 b1 a2 b2 a3 b3 | a8 b8 a9 ... const __m256i tmp1 = AVX::unpackhi_epi16(v0.data(), v1.data()); // a4 b4 a5 ... V(Mem::shuffle128(tmp0, tmp1)).store(&data[i[0]], Vc::Unaligned); V(Mem::shuffle128(tmp0, tmp1)).store(&data[i[8]], Vc::Unaligned); }/*}}}*/ template static inline void interleave(typename V::EntryType *const data, const I &i,/*{{{*/ const typename V::AsArg v0, const typename V::AsArg v1, const typename V::AsArg v2) { interleave(data, i, v0, v1); v2.scatter(data + 2, i); }/*}}}*/ template static inline void interleave(typename V::EntryType *const data, const I &i,/*{{{*/ const typename V::AsArg v0, const typename V::AsArg v1, const typename V::AsArg v2, const typename V::AsArg v3) { const __m256i tmp0 = AVX::unpacklo_epi16(v0.data(), v2.data()); // a0 c0 a1 c1 a2 c2 a3 c3 | a8 c8 a9 c9 ... const __m256i tmp1 = AVX::unpackhi_epi16(v0.data(), v2.data()); // a4 c4 a5 c5 a6 c6 a7 c7 | a12 c12 ... const __m256i tmp2 = AVX::unpacklo_epi16(v1.data(), v3.data()); // b0 d0 b1 d1 b2 d2 b3 d3 | b8 d8 b9 d9 ... const __m256i tmp3 = AVX::unpackhi_epi16(v1.data(), v3.data()); // b4 d4 b5 ... const __m256i tmp4 = AVX::unpacklo_epi16(tmp0, tmp2); // a0 b0 c0 d0 a1 b1 c1 d1 | a8 b8 c8 d8 a9 b9 ... const __m256i tmp5 = AVX::unpackhi_epi16(tmp0, tmp2); // [abcd]2 [abcd]3 | [abcd]10 [abcd]11 const __m256i tmp6 = AVX::unpacklo_epi16(tmp1, tmp3); // [abcd]4 [abcd]5 | [abcd]12 [abcd]13 const __m256i tmp7 = AVX::unpackhi_epi16(tmp1, tmp3); // [abcd]6 [abcd]7 | [abcd]14 [abcd]15 using namespace AVX; auto &&store = [&](__m256i x, int offset) { _mm_storel_epi64(reinterpret_cast<__m128i *>(&data[i[offset + 0]]), lo128(x)); _mm_storel_epi64(reinterpret_cast<__m128i *>(&data[i[offset + 8]]), hi128(x)); _mm_storeh_pi(reinterpret_cast<__m64 *>(&data[i[offset + 1]]), avx_cast<__m128>(x)); _mm_storeh_pi(reinterpret_cast<__m64 *>(&data[i[offset + 9]]), avx_cast<__m128>(hi128(x))); }; store(tmp4, 0); store(tmp5, 2); store(tmp6, 4); store(tmp7, 6); }/*}}}*/ static inline void interleave(typename V::EntryType *const data, const Common::SuccessiveEntries<4> &i,/*{{{*/ const typename V::AsArg v0, const typename V::AsArg v1, const typename V::AsArg v2, const typename V::AsArg v3) { const __m256i tmp0 = AVX::unpacklo_epi16(v0.data(), v2.data()); // a0 c0 a1 c1 a2 c2 a3 c3 | a8 c8 a9 c9 ... const __m256i tmp1 = AVX::unpackhi_epi16(v0.data(), v2.data()); // a4 c4 a5 c5 a6 c6 a7 c7 | a12 c12 ... const __m256i tmp2 = AVX::unpacklo_epi16(v1.data(), v3.data()); // b0 d0 b1 d1 b2 d2 b3 d3 | b8 d8 b9 d9 ... const __m256i tmp3 = AVX::unpackhi_epi16(v1.data(), v3.data()); // b4 d4 b5 ... const __m256i tmp4 = AVX::unpacklo_epi16(tmp0, tmp2); // a0 b0 c0 d0 a1 b1 c1 d1 | a8 b8 c8 d8 a9 b9 ... const __m256i tmp5 = AVX::unpackhi_epi16(tmp0, tmp2); // [abcd]2 [abcd]3 | [abcd]10 [abcd]11 const __m256i tmp6 = AVX::unpacklo_epi16(tmp1, tmp3); // [abcd]4 [abcd]5 | [abcd]12 [abcd]13 const __m256i tmp7 = AVX::unpackhi_epi16(tmp1, tmp3); // [abcd]6 [abcd]7 | [abcd]14 [abcd]15 V(Mem::shuffle128(tmp4, tmp5)).store(&data[i[0]], ::Vc::Unaligned); V(Mem::shuffle128(tmp6, tmp7)).store(&data[i[4]], ::Vc::Unaligned); V(Mem::shuffle128(tmp4, tmp5)).store(&data[i[8]], ::Vc::Unaligned); V(Mem::shuffle128(tmp6, tmp7)).store(&data[i[12]], ::Vc::Unaligned); }/*}}}*/ template // interleave 5 args {{{2 static inline void interleave(typename V::EntryType *const data, const I &i, const typename V::AsArg v0, const typename V::AsArg v1, const typename V::AsArg v2, const typename V::AsArg v3, const typename V::AsArg v4) { interleave(data, i, v0, v1, v2, v3); v4.scatter(data + 4, i); } template // interleave 6 args {{{2 static inline void interleave(typename V::EntryType *const data, const I &i, const typename V::AsArg v0, const typename V::AsArg v1, const typename V::AsArg v2, const typename V::AsArg v3, const typename V::AsArg v4, const typename V::AsArg v5) { interleave(data, i, v0, v1, v2, v3); interleave(data + 4, i, v4, v5); } template // interleave 7 args {{{2 static inline void interleave(typename V::EntryType *const data, const I &i, const typename V::AsArg v0, const typename V::AsArg v1, const typename V::AsArg v2, const typename V::AsArg v3, const typename V::AsArg v4, const typename V::AsArg v5, const typename V::AsArg v6) { interleave(data, i, v0, v1, v2, v3); interleave(data + 4, i, v4, v5, v6); } template // interleave 8 args {{{2 static inline void interleave(typename V::EntryType *const data, const I &i, const typename V::AsArg v0, const typename V::AsArg v1, const typename V::AsArg v2, const typename V::AsArg v3, const typename V::AsArg v4, const typename V::AsArg v5, const typename V::AsArg v6, const typename V::AsArg v7) { interleave(data, i, v0, v1, v2, v3); interleave(data + 4, i, v4, v5, v6, v7); } //}}}2 template static inline void deinterleave(typename V::EntryType const *const data,/*{{{*/ const I &i, V &v0, V &v1) { const __m256i tmp4 = // a0 b0 a1 b1 a2 b2 a3 b3 | a8 b8 a9 b9 a10 b10 a11 b11 _mm256_setr_epi32( *aliasing_cast(&data[i[0]]), *aliasing_cast(&data[i[1]]), *aliasing_cast(&data[i[2]]), *aliasing_cast(&data[i[3]]), *aliasing_cast(&data[i[8]]), *aliasing_cast(&data[i[9]]), *aliasing_cast(&data[i[10]]), *aliasing_cast(&data[i[11]])); const __m256i tmp5 = // a4 b4 a5 b5 a6 b6 a7 b7 | a12 b12 a13 b13 a14 b14 a15 b15 _mm256_setr_epi32( *aliasing_cast(&data[i[4]]), *aliasing_cast(&data[i[5]]), *aliasing_cast(&data[i[6]]), *aliasing_cast(&data[i[7]]), *aliasing_cast(&data[i[12]]), *aliasing_cast(&data[i[13]]), *aliasing_cast(&data[i[14]]), *aliasing_cast(&data[i[15]])); const __m256i tmp2 = AVX::unpacklo_epi16(tmp4, tmp5); // a0 a4 b0 b4 a1 a5 b1 b5 | a8 a12 b8 b12 a9 a13 b9 b13 const __m256i tmp3 = AVX::unpackhi_epi16(tmp4, tmp5); // a2 a6 b2 b6 a3 a7 b3 b7 | a10 a14 b10 b14 a11 a15 b11 b15 const __m256i tmp0 = AVX::unpacklo_epi16(tmp2, tmp3); // a0 a2 a4 a6 b0 b2 b4 b6 | a8 a10 a12 a14 b8 ... const __m256i tmp1 = AVX::unpackhi_epi16(tmp2, tmp3); // a1 a3 a5 a7 b1 b3 b5 b7 | a9 a11 a13 a15 b9 ... v0.data() = AVX::unpacklo_epi16(tmp0, tmp1); // a0 a1 a2 a3 a4 a5 a6 a7 | a8 a9 ... v1.data() = AVX::unpackhi_epi16(tmp0, tmp1); // b0 b1 b2 b3 b4 b5 b6 b7 | b8 b9 ... }/*}}}*/ template static inline void deinterleave(typename V::EntryType const *const data,/*{{{*/ const I &i, V &v0, V &v1, V &v2) { using namespace AVX; const __m256i tmp0 = avx_cast<__m256i>(_mm256_setr_pd( *aliasing_cast(&data[i[0]]), *aliasing_cast(&data[i[1]]), *aliasing_cast(&data[i[8]]), *aliasing_cast(&data[i[9]]))); const __m256i tmp1 = avx_cast<__m256i>(_mm256_setr_pd( *aliasing_cast(&data[i[2]]), *aliasing_cast(&data[i[3]]), *aliasing_cast(&data[i[10]]), *aliasing_cast(&data[i[11]]))); const __m256i tmp2 = avx_cast<__m256i>(_mm256_setr_pd( *aliasing_cast(&data[i[4]]), *aliasing_cast(&data[i[5]]), *aliasing_cast(&data[i[12]]), *aliasing_cast(&data[i[13]]))); const __m256i tmp3 = avx_cast<__m256i>(_mm256_setr_pd( *aliasing_cast(&data[i[6]]), *aliasing_cast(&data[i[7]]), *aliasing_cast(&data[i[14]]), *aliasing_cast(&data[i[15]]))); const __m256i tmp4 = AVX::unpacklo_epi16(tmp0, tmp2); // a0 a4 b0 b4 c0 c4 XX XX | a8 a12 b8 ... const __m256i tmp5 = AVX::unpackhi_epi16(tmp0, tmp2); // a1 a5 ... const __m256i tmp6 = AVX::unpacklo_epi16(tmp1, tmp3); // a2 a6 ... const __m256i tmp7 = AVX::unpackhi_epi16(tmp1, tmp3); // a3 a7 ... const __m256i tmp8 = AVX::unpacklo_epi16(tmp4, tmp6); // a0 a2 a4 a6 b0 ... const __m256i tmp9 = AVX::unpackhi_epi16(tmp4, tmp6); // c0 c2 c4 c6 XX ... const __m256i tmp10 = AVX::unpacklo_epi16(tmp5, tmp7); // a1 a3 a5 a7 b1 ... const __m256i tmp11 = AVX::unpackhi_epi16(tmp5, tmp7); // c1 c3 c5 c7 XX ... v0.data() = AVX::unpacklo_epi16(tmp8, tmp10); // a0 a1 a2 a3 a4 a5 a6 a7 | a8 ... v1.data() = AVX::unpackhi_epi16(tmp8, tmp10); v2.data() = AVX::unpacklo_epi16(tmp9, tmp11); }/*}}}*/ template static inline void deinterleave(typename V::EntryType const *const data,/*{{{*/ const I &i, V &v0, V &v1, V &v2, V &v3) { using namespace AVX; const __m256i tmp0 = avx_cast<__m256i>(_mm256_setr_pd( *aliasing_cast(&data[i[0]]), *aliasing_cast(&data[i[1]]), *aliasing_cast(&data[i[8]]), *aliasing_cast(&data[i[9]]))); const __m256i tmp1 = avx_cast<__m256i>(_mm256_setr_pd( *aliasing_cast(&data[i[2]]), *aliasing_cast(&data[i[3]]), *aliasing_cast(&data[i[10]]), *aliasing_cast(&data[i[11]]))); const __m256i tmp2 = avx_cast<__m256i>(_mm256_setr_pd( *aliasing_cast(&data[i[4]]), *aliasing_cast(&data[i[5]]), *aliasing_cast(&data[i[12]]), *aliasing_cast(&data[i[13]]))); const __m256i tmp3 = avx_cast<__m256i>(_mm256_setr_pd( *aliasing_cast(&data[i[6]]), *aliasing_cast(&data[i[7]]), *aliasing_cast(&data[i[14]]), *aliasing_cast(&data[i[15]]))); const __m256i tmp4 = AVX::unpacklo_epi16(tmp0, tmp2); // a0 a4 b0 b4 c0 c4 d0 d4 | a8 a12 b8 ... const __m256i tmp5 = AVX::unpackhi_epi16(tmp0, tmp2); // a1 a5 ... const __m256i tmp6 = AVX::unpacklo_epi16(tmp1, tmp3); // a2 a6 ... const __m256i tmp7 = AVX::unpackhi_epi16(tmp1, tmp3); // a3 a7 ... const __m256i tmp8 = AVX::unpacklo_epi16(tmp4, tmp6); // a0 a2 a4 a6 b0 ... const __m256i tmp9 = AVX::unpackhi_epi16(tmp4, tmp6); // c0 c2 c4 c6 d0 ... const __m256i tmp10 = AVX::unpacklo_epi16(tmp5, tmp7); // a1 a3 a5 a7 b1 ... const __m256i tmp11 = AVX::unpackhi_epi16(tmp5, tmp7); // c1 c3 c5 c7 d1 ... v0.data() = AVX::unpacklo_epi16(tmp8, tmp10); // a0 a1 a2 a3 a4 a5 a6 a7 | a8 ... v1.data() = AVX::unpackhi_epi16(tmp8, tmp10); v2.data() = AVX::unpacklo_epi16(tmp9, tmp11); v3.data() = AVX::unpackhi_epi16(tmp9, tmp11); }/*}}}*/ template static inline void deinterleave(typename V::EntryType const *const data,/*{{{*/ const I &i, V &v0, V &v1, V &v2, V &v3, V &v4) { using namespace AVX; const __m256i a = concat(_mm_loadu_si128(reinterpret_cast(&data[i[0]])), _mm_loadu_si128(reinterpret_cast(&data[i[8]]))); const __m256i b = concat(_mm_loadu_si128(reinterpret_cast(&data[i[1]])), _mm_loadu_si128(reinterpret_cast(&data[i[9]]))); const __m256i c = concat(_mm_loadu_si128(reinterpret_cast(&data[i[2]])), _mm_loadu_si128(reinterpret_cast(&data[i[10]]))); const __m256i d = concat(_mm_loadu_si128(reinterpret_cast(&data[i[3]])), _mm_loadu_si128(reinterpret_cast(&data[i[11]]))); const __m256i e = concat(_mm_loadu_si128(reinterpret_cast(&data[i[4]])), _mm_loadu_si128(reinterpret_cast(&data[i[12]]))); const __m256i f = concat(_mm_loadu_si128(reinterpret_cast(&data[i[5]])), _mm_loadu_si128(reinterpret_cast(&data[i[13]]))); const __m256i g = concat(_mm_loadu_si128(reinterpret_cast(&data[i[6]])), _mm_loadu_si128(reinterpret_cast(&data[i[14]]))); const __m256i h = concat(_mm_loadu_si128(reinterpret_cast(&data[i[7]])), _mm_loadu_si128(reinterpret_cast(&data[i[15]]))); const __m256i tmp2 = AVX::unpacklo_epi16(a, e); // a0 a4 b0 b4 c0 c4 d0 d4 | a8 ... const __m256i tmp4 = AVX::unpacklo_epi16(b, f); // a1 a5 b1 b5 c1 c5 d1 d5 const __m256i tmp3 = AVX::unpacklo_epi16(c, g); // a2 a6 b2 b6 c2 c6 d2 d6 const __m256i tmp5 = AVX::unpacklo_epi16(d, h); // a3 a7 b3 b7 c3 c7 d3 d7 const __m256i tmp10 = AVX::unpackhi_epi16(a, e); // e0 e4 f0 f4 g0 g4 h0 h4 const __m256i tmp11 = AVX::unpackhi_epi16(c, g); // e1 e5 f1 f5 g1 g5 h1 h5 const __m256i tmp12 = AVX::unpackhi_epi16(b, f); // e2 e6 f2 f6 g2 g6 h2 h6 const __m256i tmp13 = AVX::unpackhi_epi16(d, h); // e3 e7 f3 f7 g3 g7 h3 h7 const __m256i tmp0 = AVX::unpacklo_epi16(tmp2, tmp3); // a0 a2 a4 a6 b0 b2 b4 b6 | a8 ... const __m256i tmp1 = AVX::unpacklo_epi16(tmp4, tmp5); // a1 a3 a5 a7 b1 b3 b5 b7 const __m256i tmp6 = AVX::unpackhi_epi16(tmp2, tmp3); // c0 c2 c4 c6 d0 d2 d4 d6 const __m256i tmp7 = AVX::unpackhi_epi16(tmp4, tmp5); // c1 c3 c5 c7 d1 d3 d5 d7 const __m256i tmp8 = AVX::unpacklo_epi16(tmp10, tmp11); // e0 e2 e4 e6 f0 f2 f4 f6 const __m256i tmp9 = AVX::unpacklo_epi16(tmp12, tmp13); // e1 e3 e5 e7 f1 f3 f5 f7 v0.data() = AVX::unpacklo_epi16(tmp0, tmp1); v1.data() = AVX::unpackhi_epi16(tmp0, tmp1); v2.data() = AVX::unpacklo_epi16(tmp6, tmp7); v3.data() = AVX::unpackhi_epi16(tmp6, tmp7); v4.data() = AVX::unpacklo_epi16(tmp8, tmp9); }/*}}}*/ template static inline void deinterleave(typename V::EntryType const *const data,/*{{{*/ const I &i, V &v0, V &v1, V &v2, V &v3, V &v4, V &v5) { using namespace AVX; const __m256i a = concat(_mm_loadu_si128(reinterpret_cast(&data[i[0]])), _mm_loadu_si128(reinterpret_cast(&data[i[8]]))); const __m256i b = concat(_mm_loadu_si128(reinterpret_cast(&data[i[1]])), _mm_loadu_si128(reinterpret_cast(&data[i[9]]))); const __m256i c = concat(_mm_loadu_si128(reinterpret_cast(&data[i[2]])), _mm_loadu_si128(reinterpret_cast(&data[i[10]]))); const __m256i d = concat(_mm_loadu_si128(reinterpret_cast(&data[i[3]])), _mm_loadu_si128(reinterpret_cast(&data[i[11]]))); const __m256i e = concat(_mm_loadu_si128(reinterpret_cast(&data[i[4]])), _mm_loadu_si128(reinterpret_cast(&data[i[12]]))); const __m256i f = concat(_mm_loadu_si128(reinterpret_cast(&data[i[5]])), _mm_loadu_si128(reinterpret_cast(&data[i[13]]))); const __m256i g = concat(_mm_loadu_si128(reinterpret_cast(&data[i[6]])), _mm_loadu_si128(reinterpret_cast(&data[i[14]]))); const __m256i h = concat(_mm_loadu_si128(reinterpret_cast(&data[i[7]])), _mm_loadu_si128(reinterpret_cast(&data[i[15]]))); const __m256i tmp2 = AVX::unpacklo_epi16(a, e); // a0 a4 b0 b4 c0 c4 d0 d4 | a8 ... const __m256i tmp4 = AVX::unpacklo_epi16(b, f); // a1 a5 b1 b5 c1 c5 d1 d5 const __m256i tmp3 = AVX::unpacklo_epi16(c, g); // a2 a6 b2 b6 c2 c6 d2 d6 const __m256i tmp5 = AVX::unpacklo_epi16(d, h); // a3 a7 b3 b7 c3 c7 d3 d7 const __m256i tmp10 = AVX::unpackhi_epi16(a, e); // e0 e4 f0 f4 g0 g4 h0 h4 const __m256i tmp11 = AVX::unpackhi_epi16(c, g); // e1 e5 f1 f5 g1 g5 h1 h5 const __m256i tmp12 = AVX::unpackhi_epi16(b, f); // e2 e6 f2 f6 g2 g6 h2 h6 const __m256i tmp13 = AVX::unpackhi_epi16(d, h); // e3 e7 f3 f7 g3 g7 h3 h7 const __m256i tmp0 = AVX::unpacklo_epi16(tmp2, tmp3); // a0 a2 a4 a6 b0 b2 b4 b6 | a8 ... const __m256i tmp1 = AVX::unpacklo_epi16(tmp4, tmp5); // a1 a3 a5 a7 b1 b3 b5 b7 const __m256i tmp6 = AVX::unpackhi_epi16(tmp2, tmp3); // c0 c2 c4 c6 d0 d2 d4 d6 const __m256i tmp7 = AVX::unpackhi_epi16(tmp4, tmp5); // c1 c3 c5 c7 d1 d3 d5 d7 const __m256i tmp8 = AVX::unpacklo_epi16(tmp10, tmp11); // e0 e2 e4 e6 f0 f2 f4 f6 const __m256i tmp9 = AVX::unpacklo_epi16(tmp12, tmp13); // e1 e3 e5 e7 f1 f3 f5 f7 v0.data() = AVX::unpacklo_epi16(tmp0, tmp1); v1.data() = AVX::unpackhi_epi16(tmp0, tmp1); v2.data() = AVX::unpacklo_epi16(tmp6, tmp7); v3.data() = AVX::unpackhi_epi16(tmp6, tmp7); v4.data() = AVX::unpacklo_epi16(tmp8, tmp9); v5.data() = AVX::unpackhi_epi16(tmp8, tmp9); }/*}}}*/ template static inline void deinterleave(typename V::EntryType const *const data,/*{{{*/ const I &i, V &v0, V &v1, V &v2, V &v3, V &v4, V &v5, V &v6) { using namespace AVX; const __m256i a = concat(_mm_loadu_si128(reinterpret_cast(&data[i[0]])), _mm_loadu_si128(reinterpret_cast(&data[i[8]]))); const __m256i b = concat(_mm_loadu_si128(reinterpret_cast(&data[i[1]])), _mm_loadu_si128(reinterpret_cast(&data[i[9]]))); const __m256i c = concat(_mm_loadu_si128(reinterpret_cast(&data[i[2]])), _mm_loadu_si128(reinterpret_cast(&data[i[10]]))); const __m256i d = concat(_mm_loadu_si128(reinterpret_cast(&data[i[3]])), _mm_loadu_si128(reinterpret_cast(&data[i[11]]))); const __m256i e = concat(_mm_loadu_si128(reinterpret_cast(&data[i[4]])), _mm_loadu_si128(reinterpret_cast(&data[i[12]]))); const __m256i f = concat(_mm_loadu_si128(reinterpret_cast(&data[i[5]])), _mm_loadu_si128(reinterpret_cast(&data[i[13]]))); const __m256i g = concat(_mm_loadu_si128(reinterpret_cast(&data[i[6]])), _mm_loadu_si128(reinterpret_cast(&data[i[14]]))); const __m256i h = concat(_mm_loadu_si128(reinterpret_cast(&data[i[7]])), _mm_loadu_si128(reinterpret_cast(&data[i[15]]))); const __m256i tmp2 = AVX::unpacklo_epi16(a, e); // a0 a4 b0 b4 c0 c4 d0 d4 | a8 ... const __m256i tmp4 = AVX::unpacklo_epi16(b, f); // a1 a5 b1 b5 c1 c5 d1 d5 const __m256i tmp3 = AVX::unpacklo_epi16(c, g); // a2 a6 b2 b6 c2 c6 d2 d6 const __m256i tmp5 = AVX::unpacklo_epi16(d, h); // a3 a7 b3 b7 c3 c7 d3 d7 const __m256i tmp10 = AVX::unpackhi_epi16(a, e); // e0 e4 f0 f4 g0 g4 h0 h4 const __m256i tmp11 = AVX::unpackhi_epi16(c, g); // e1 e5 f1 f5 g1 g5 h1 h5 const __m256i tmp12 = AVX::unpackhi_epi16(b, f); // e2 e6 f2 f6 g2 g6 h2 h6 const __m256i tmp13 = AVX::unpackhi_epi16(d, h); // e3 e7 f3 f7 g3 g7 h3 h7 const __m256i tmp0 = AVX::unpacklo_epi16(tmp2, tmp3); // a0 a2 a4 a6 b0 b2 b4 b6 | a8 ... const __m256i tmp1 = AVX::unpacklo_epi16(tmp4, tmp5); // a1 a3 a5 a7 b1 b3 b5 b7 const __m256i tmp6 = AVX::unpackhi_epi16(tmp2, tmp3); // c0 c2 c4 c6 d0 d2 d4 d6 const __m256i tmp7 = AVX::unpackhi_epi16(tmp4, tmp5); // c1 c3 c5 c7 d1 d3 d5 d7 const __m256i tmp8 = AVX::unpacklo_epi16(tmp10, tmp11); // e0 e2 e4 e6 f0 f2 f4 f6 const __m256i tmp9 = AVX::unpacklo_epi16(tmp12, tmp13); // e1 e3 e5 e7 f1 f3 f5 f7 const __m256i tmp14 = AVX::unpackhi_epi16(tmp10, tmp11); // g0 g2 g4 g6 h0 h2 h4 h6 const __m256i tmp15 = AVX::unpackhi_epi16(tmp12, tmp13); // g1 g3 g5 g7 h1 h3 h5 h7 v0.data() = AVX::unpacklo_epi16(tmp0, tmp1); v1.data() = AVX::unpackhi_epi16(tmp0, tmp1); v2.data() = AVX::unpacklo_epi16(tmp6, tmp7); v3.data() = AVX::unpackhi_epi16(tmp6, tmp7); v4.data() = AVX::unpacklo_epi16(tmp8, tmp9); v5.data() = AVX::unpackhi_epi16(tmp8, tmp9); v6.data() = AVX::unpacklo_epi16(tmp14, tmp15); }/*}}}*/ template static inline void deinterleave(typename V::EntryType const *const data,/*{{{*/ const I &i, V &v0, V &v1, V &v2, V &v3, V &v4, V &v5, V &v6, V &v7) { using namespace AVX; const __m256i a = concat(_mm_loadu_si128(reinterpret_cast(&data[i[0]])), _mm_loadu_si128(reinterpret_cast(&data[i[8]]))); const __m256i b = concat(_mm_loadu_si128(reinterpret_cast(&data[i[1]])), _mm_loadu_si128(reinterpret_cast(&data[i[9]]))); const __m256i c = concat(_mm_loadu_si128(reinterpret_cast(&data[i[2]])), _mm_loadu_si128(reinterpret_cast(&data[i[10]]))); const __m256i d = concat(_mm_loadu_si128(reinterpret_cast(&data[i[3]])), _mm_loadu_si128(reinterpret_cast(&data[i[11]]))); const __m256i e = concat(_mm_loadu_si128(reinterpret_cast(&data[i[4]])), _mm_loadu_si128(reinterpret_cast(&data[i[12]]))); const __m256i f = concat(_mm_loadu_si128(reinterpret_cast(&data[i[5]])), _mm_loadu_si128(reinterpret_cast(&data[i[13]]))); const __m256i g = concat(_mm_loadu_si128(reinterpret_cast(&data[i[6]])), _mm_loadu_si128(reinterpret_cast(&data[i[14]]))); const __m256i h = concat(_mm_loadu_si128(reinterpret_cast(&data[i[7]])), _mm_loadu_si128(reinterpret_cast(&data[i[15]]))); const __m256i tmp2 = AVX::unpacklo_epi16(a, e); // a0 a4 b0 b4 c0 c4 d0 d4 | a8 ... const __m256i tmp4 = AVX::unpacklo_epi16(b, f); // a1 a5 b1 b5 c1 c5 d1 d5 const __m256i tmp3 = AVX::unpacklo_epi16(c, g); // a2 a6 b2 b6 c2 c6 d2 d6 const __m256i tmp5 = AVX::unpacklo_epi16(d, h); // a3 a7 b3 b7 c3 c7 d3 d7 const __m256i tmp10 = AVX::unpackhi_epi16(a, e); // e0 e4 f0 f4 g0 g4 h0 h4 const __m256i tmp11 = AVX::unpackhi_epi16(c, g); // e1 e5 f1 f5 g1 g5 h1 h5 const __m256i tmp12 = AVX::unpackhi_epi16(b, f); // e2 e6 f2 f6 g2 g6 h2 h6 const __m256i tmp13 = AVX::unpackhi_epi16(d, h); // e3 e7 f3 f7 g3 g7 h3 h7 const __m256i tmp0 = AVX::unpacklo_epi16(tmp2, tmp3); // a0 a2 a4 a6 b0 b2 b4 b6 | a8 ... const __m256i tmp1 = AVX::unpacklo_epi16(tmp4, tmp5); // a1 a3 a5 a7 b1 b3 b5 b7 const __m256i tmp6 = AVX::unpackhi_epi16(tmp2, tmp3); // c0 c2 c4 c6 d0 d2 d4 d6 const __m256i tmp7 = AVX::unpackhi_epi16(tmp4, tmp5); // c1 c3 c5 c7 d1 d3 d5 d7 const __m256i tmp8 = AVX::unpacklo_epi16(tmp10, tmp11); // e0 e2 e4 e6 f0 f2 f4 f6 const __m256i tmp9 = AVX::unpacklo_epi16(tmp12, tmp13); // e1 e3 e5 e7 f1 f3 f5 f7 const __m256i tmp14 = AVX::unpackhi_epi16(tmp10, tmp11); // g0 g2 g4 g6 h0 h2 h4 h6 const __m256i tmp15 = AVX::unpackhi_epi16(tmp12, tmp13); // g1 g3 g5 g7 h1 h3 h5 h7 v0.data() = AVX::unpacklo_epi16(tmp0, tmp1); v1.data() = AVX::unpackhi_epi16(tmp0, tmp1); v2.data() = AVX::unpacklo_epi16(tmp6, tmp7); v3.data() = AVX::unpackhi_epi16(tmp6, tmp7); v4.data() = AVX::unpacklo_epi16(tmp8, tmp9); v5.data() = AVX::unpackhi_epi16(tmp8, tmp9); v6.data() = AVX::unpacklo_epi16(tmp14, tmp15); v7.data() = AVX::unpackhi_epi16(tmp14, tmp15); }/*}}}*/ }; template struct InterleaveImpl { static_assert(sizeof(typename V::value_type) == 4, ""); template static inline void interleave(typename V::EntryType *const data, const I &i,/*{{{*/ const typename V::AsArg v0, const typename V::AsArg v1) { using namespace AVX; // [0a 1a 0b 1b 0e 1e 0f 1f]: const m256 tmp0 = _mm256_unpacklo_ps(avx_cast(v0.data()), avx_cast(v1.data())); // [0c 1c 0d 1d 0g 1g 0h 1h]: const m256 tmp1 = _mm256_unpackhi_ps(avx_cast(v0.data()), avx_cast(v1.data())); _mm_storel_pi(reinterpret_cast<__m64 *>(&data[i[0]]), lo128(tmp0)); _mm_storeh_pi(reinterpret_cast<__m64 *>(&data[i[1]]), lo128(tmp0)); _mm_storel_pi(reinterpret_cast<__m64 *>(&data[i[2]]), lo128(tmp1)); _mm_storeh_pi(reinterpret_cast<__m64 *>(&data[i[3]]), lo128(tmp1)); _mm_storel_pi(reinterpret_cast<__m64 *>(&data[i[4]]), hi128(tmp0)); _mm_storeh_pi(reinterpret_cast<__m64 *>(&data[i[5]]), hi128(tmp0)); _mm_storel_pi(reinterpret_cast<__m64 *>(&data[i[6]]), hi128(tmp1)); _mm_storeh_pi(reinterpret_cast<__m64 *>(&data[i[7]]), hi128(tmp1)); }/*}}}*/ static inline void interleave(typename V::EntryType *const data, const Common::SuccessiveEntries<2> &i,/*{{{*/ const typename V::AsArg v0, const typename V::AsArg v1) { using namespace AVX; // [0a 1a 0b 1b 0e 1e 0f 1f]: const m256 tmp0 = _mm256_unpacklo_ps(avx_cast(v0.data()), avx_cast(v1.data())); // [0c 1c 0d 1d 0g 1g 0h 1h]: const m256 tmp1 = _mm256_unpackhi_ps(avx_cast(v0.data()), avx_cast(v1.data())); _mm_storeu_ps(aliasing_cast(&data[i[0]]), lo128(tmp0)); _mm_storeu_ps(aliasing_cast(&data[i[2]]), lo128(tmp1)); _mm_storeu_ps(aliasing_cast(&data[i[4]]), hi128(tmp0)); _mm_storeu_ps(aliasing_cast(&data[i[6]]), hi128(tmp1)); }/*}}}*/ // interleave scatter 3 {{{ template static inline void interleave(typename V::EntryType *const data, const I &i, const typename V::AsArg v0, const typename V::AsArg v1, const typename V::AsArg v2) { using namespace AVX; #ifdef Vc_USE_MASKMOV_SCATTER // [0a 2a 0b 2b 0e 2e 0f 2f]: const m256 tmp0 = _mm256_unpacklo_ps(avx_cast(v0.data()), avx_cast(v2.data())); // [0c 2c 0d 2d 0g 2g 0h 2h]: const m256 tmp1 = _mm256_unpackhi_ps(avx_cast(v0.data()), avx_cast(v2.data())); // [1a __ 1b __ 1e __ 1f __]: const m256 tmp2 = _mm256_unpacklo_ps(avx_cast(v1.data()), avx_cast(v1.data())); // [1c __ 1d __ 1g __ 1h __]: const m256 tmp3 = _mm256_unpackhi_ps(avx_cast(v1.data()), avx_cast(v1.data())); const m256 tmp4 = _mm256_unpacklo_ps(tmp0, tmp2); const m256 tmp5 = _mm256_unpackhi_ps(tmp0, tmp2); const m256 tmp6 = _mm256_unpacklo_ps(tmp1, tmp3); const m256 tmp7 = _mm256_unpackhi_ps(tmp1, tmp3); const m128i mask = _mm_set_epi32(0, -1, -1, -1); _mm_maskstore_ps(aliasing_cast(&data[i[0]]), mask, lo128(tmp4)); _mm_maskstore_ps(aliasing_cast(&data[i[1]]), mask, lo128(tmp5)); _mm_maskstore_ps(aliasing_cast(&data[i[2]]), mask, lo128(tmp6)); _mm_maskstore_ps(aliasing_cast(&data[i[3]]), mask, lo128(tmp7)); _mm_maskstore_ps(aliasing_cast(&data[i[4]]), mask, hi128(tmp4)); _mm_maskstore_ps(aliasing_cast(&data[i[5]]), mask, hi128(tmp5)); _mm_maskstore_ps(aliasing_cast(&data[i[6]]), mask, hi128(tmp6)); _mm_maskstore_ps(aliasing_cast(&data[i[7]]), mask, hi128(tmp7)); #else interleave(data, i, v0, v1); v2.scatter(data + 2, i); #endif } // }}} // interleave successive 3 {{{ static inline void interleave(typename V::EntryType *const data, const Common::SuccessiveEntries<3> &i, const typename V::AsArg v0_, const typename V::AsArg v1_, const typename V::AsArg v2_) { __m256 v0 = AVX::avx_cast<__m256>(v0_.data()); // a0 a1 a2 a3|a4 a5 a6 a7 __m256 v1 = AVX::avx_cast<__m256>(v1_.data()); // b0 b1 b2 b3|b4 b5 b6 b7 __m256 v2 = AVX::avx_cast<__m256>(v2_.data()); // c0 c1 c2 c3|c4 c5 c6 c7 v0 = _mm256_shuffle_ps(v0, v0, 0x6c); // a0 a3 a2 a1|a4 a7 a6 a5 v1 = _mm256_shuffle_ps(v1, v1, 0xb1); // b1 b0 b3 b2|b5 b4 b7 b6 v2 = _mm256_shuffle_ps(v2, v2, 0xc6); // c2 c1 c0 c3|c6 c5 c4 c7 // a0 b0 c0 a1|c6 a7 b7 c7: __m256 w0 = Mem::blend( Mem::blend(v0, v1), v2); // b1 c1 a2 b2|b5 c5 a6 b6: __m256 w1 = Mem::blend( Mem::blend(v0, v1), v2); // c2 a3 b3 c3|a4 b4 c4 a5: __m256 w2 = Mem::blend( Mem::blend(v0, v1), v2); // a0 b0 c0 a1|b1 c1 a2 b2: _mm256_storeu_ps(aliasing_cast(&data[i[0]]), _mm256_permute2f128_ps(w0, w1, 0x20)); // c2 a3 b3 c3|a4 b4 c4 a5: w2 _mm256_storeu_ps(aliasing_cast(&data[i[0]] + 8), w2); // b5 c5 a6 b6|c6 a7 b7 c7: _mm256_storeu_ps(aliasing_cast(&data[i[0]] + 16), _mm256_permute2f128_ps(w1, w0, 0x31)); } //}}} // interleave scatter 4 {{{ template static inline void interleave(typename V::EntryType *const data, const I &i, const typename V::AsArg v0, const typename V::AsArg v1, const typename V::AsArg v2, const typename V::AsArg v3) { using namespace AVX; const __m256 tmp0 = _mm256_unpacklo_ps(avx_cast(v0.data()), avx_cast(v2.data())); const __m256 tmp1 = _mm256_unpackhi_ps(avx_cast(v0.data()), avx_cast(v2.data())); const __m256 tmp2 = _mm256_unpacklo_ps(avx_cast(v1.data()), avx_cast(v3.data())); const __m256 tmp3 = _mm256_unpackhi_ps(avx_cast(v1.data()), avx_cast(v3.data())); const __m256 _04 = _mm256_unpacklo_ps(tmp0, tmp2); const __m256 _15 = _mm256_unpackhi_ps(tmp0, tmp2); const __m256 _26 = _mm256_unpacklo_ps(tmp1, tmp3); const __m256 _37 = _mm256_unpackhi_ps(tmp1, tmp3); _mm_storeu_ps(aliasing_cast(&data[i[0]]), lo128(_04)); _mm_storeu_ps(aliasing_cast(&data[i[1]]), lo128(_15)); _mm_storeu_ps(aliasing_cast(&data[i[2]]), lo128(_26)); _mm_storeu_ps(aliasing_cast(&data[i[3]]), lo128(_37)); _mm_storeu_ps(aliasing_cast(&data[i[4]]), hi128(_04)); _mm_storeu_ps(aliasing_cast(&data[i[5]]), hi128(_15)); _mm_storeu_ps(aliasing_cast(&data[i[6]]), hi128(_26)); _mm_storeu_ps(aliasing_cast(&data[i[7]]), hi128(_37)); } // }}} // interleave successive 4 {{{ // same as above except fot the stores, that can be combined to 256-bit stores static inline void interleave(typename V::EntryType *const data, const Common::SuccessiveEntries<4> &i, const typename V::AsArg v0, const typename V::AsArg v1, const typename V::AsArg v2, const typename V::AsArg v3) { using namespace AVX; const __m256 tmp0 = _mm256_unpacklo_ps(avx_cast(v0.data()), avx_cast(v2.data())); const __m256 tmp1 = _mm256_unpackhi_ps(avx_cast(v0.data()), avx_cast(v2.data())); const __m256 tmp2 = _mm256_unpacklo_ps(avx_cast(v1.data()), avx_cast(v3.data())); const __m256 tmp3 = _mm256_unpackhi_ps(avx_cast(v1.data()), avx_cast(v3.data())); const __m256 _04 = _mm256_unpacklo_ps(tmp0, tmp2); const __m256 _15 = _mm256_unpackhi_ps(tmp0, tmp2); const __m256 _26 = _mm256_unpacklo_ps(tmp1, tmp3); const __m256 _37 = _mm256_unpackhi_ps(tmp1, tmp3); _mm256_storeu_ps(aliasing_cast(&data[i[0]]), _mm256_permute2f128_ps(_04, _15, 0x20)); _mm256_storeu_ps(aliasing_cast(&data[i[0]] + 8), _mm256_permute2f128_ps(_26, _37, 0x20)); _mm256_storeu_ps(aliasing_cast(&data[i[0]] + 16), _mm256_permute2f128_ps(_04, _15, 0x31)); _mm256_storeu_ps(aliasing_cast(&data[i[0]] + 24), _mm256_permute2f128_ps(_26, _37, 0x31)); } // }}} template // interleave 5 args {{{2 static inline void interleave(typename V::EntryType *const data, const I &i, const typename V::AsArg v0, const typename V::AsArg v1, const typename V::AsArg v2, const typename V::AsArg v3, const typename V::AsArg v4) { interleave(data, i, v0, v1, v2, v3); v4.scatter(data + 4, i); } template // interleave 6 args {{{2 static inline void interleave(typename V::EntryType *const data, const I &i, const typename V::AsArg v0, const typename V::AsArg v1, const typename V::AsArg v2, const typename V::AsArg v3, const typename V::AsArg v4, const typename V::AsArg v5) { interleave(data, i, v0, v1, v2, v3); interleave(data + 4, i, v4, v5); } template // interleave 7 args {{{2 static inline void interleave(typename V::EntryType *const data, const I &i, const typename V::AsArg v0, const typename V::AsArg v1, const typename V::AsArg v2, const typename V::AsArg v3, const typename V::AsArg v4, const typename V::AsArg v5, const typename V::AsArg v6) { interleave(data, i, v0, v1, v2, v3); interleave(data + 4, i, v4, v5, v6); } template // interleave 8 args {{{2 static inline void interleave(typename V::EntryType *const data, const I &i, const typename V::AsArg v0, const typename V::AsArg v1, const typename V::AsArg v2, const typename V::AsArg v3, const typename V::AsArg v4, const typename V::AsArg v5, const typename V::AsArg v6, const typename V::AsArg v7) { interleave(data, i, v0, v1, v2, v3); interleave(data + 4, i, v4, v5, v6, v7); } //}}}2 // deinterleave scatter 2 {{{ template static inline void deinterleave(typename V::EntryType const *const data, const I &i, V &v0, V &v1) { using namespace AVX; const m128 il0 = _mm_loadl_pi(_mm_setzero_ps(), reinterpret_cast<__m64 const *>(&data[i[0]])); // a0 b0 const m128 il2 = _mm_loadl_pi(_mm_setzero_ps(), reinterpret_cast<__m64 const *>(&data[i[2]])); // a2 b2 const m128 il4 = _mm_loadl_pi(_mm_setzero_ps(), reinterpret_cast<__m64 const *>(&data[i[4]])); // a4 b4 const m128 il6 = _mm_loadl_pi(_mm_setzero_ps(), reinterpret_cast<__m64 const *>(&data[i[6]])); // a6 b6 const m128 il01 = _mm_loadh_pi( il0, reinterpret_cast<__m64 const *>(&data[i[1]])); // a0 b0 a1 b1 const m128 il23 = _mm_loadh_pi( il2, reinterpret_cast<__m64 const *>(&data[i[3]])); // a2 b2 a3 b3 const m128 il45 = _mm_loadh_pi( il4, reinterpret_cast<__m64 const *>(&data[i[5]])); // a4 b4 a5 b5 const m128 il67 = _mm_loadh_pi( il6, reinterpret_cast<__m64 const *>(&data[i[7]])); // a6 b6 a7 b7 const m256 tmp2 = concat(il01, il45); const m256 tmp3 = concat(il23, il67); const m256 tmp0 = _mm256_unpacklo_ps(tmp2, tmp3); const m256 tmp1 = _mm256_unpackhi_ps(tmp2, tmp3); v0.data() = avx_cast(_mm256_unpacklo_ps(tmp0, tmp1)); v1.data() = avx_cast(_mm256_unpackhi_ps(tmp0, tmp1)); } // }}} // deinterleave successive 2 {{{ static inline void deinterleave(typename V::EntryType const *const data, const Common::SuccessiveEntries<2> &i, V &v0, V &v1) { using namespace AVX; const m256 il0123 = _mm256_loadu_ps(aliasing_cast(&data[i[0]])); // a0 b0 a1 b1 a2 b2 a3 b3 const m256 il4567 = _mm256_loadu_ps(aliasing_cast(&data[i[4]])); // a4 b4 a5 b5 a6 b6 a7 b7 const m256 tmp2 = Mem::shuffle128(il0123, il4567); const m256 tmp3 = Mem::shuffle128(il0123, il4567); const m256 tmp0 = _mm256_unpacklo_ps(tmp2, tmp3); const m256 tmp1 = _mm256_unpackhi_ps(tmp2, tmp3); v0.data() = avx_cast(_mm256_unpacklo_ps(tmp0, tmp1)); v1.data() = avx_cast(_mm256_unpackhi_ps(tmp0, tmp1)); } // }}} // deinterleave scatter 3 {{{ template static inline void deinterleave(typename V::EntryType const *const data, const I &i, V &v0, V &v1, V &v2) { using namespace AVX; const m128 il0 = _mm_loadu_ps(aliasing_cast(&data[i[0]])); // a0 b0 c0 d0 const m128 il1 = _mm_loadu_ps(aliasing_cast(&data[i[1]])); // a1 b1 c1 d1 const m128 il2 = _mm_loadu_ps(aliasing_cast(&data[i[2]])); // a2 b2 c2 d2 const m128 il3 = _mm_loadu_ps(aliasing_cast(&data[i[3]])); // a3 b3 c3 d3 const m128 il4 = _mm_loadu_ps(aliasing_cast(&data[i[4]])); // a4 b4 c4 d4 const m128 il5 = _mm_loadu_ps(aliasing_cast(&data[i[5]])); // a5 b5 c5 d5 const m128 il6 = _mm_loadu_ps(aliasing_cast(&data[i[6]])); // a6 b6 c6 d6 const m128 il7 = _mm_loadu_ps(aliasing_cast(&data[i[7]])); // a7 b7 c7 d7 const m256 il04 = concat(il0, il4); const m256 il15 = concat(il1, il5); const m256 il26 = concat(il2, il6); const m256 il37 = concat(il3, il7); const m256 ab0246 = _mm256_unpacklo_ps(il04, il26); const m256 ab1357 = _mm256_unpacklo_ps(il15, il37); const m256 cd0246 = _mm256_unpackhi_ps(il04, il26); const m256 cd1357 = _mm256_unpackhi_ps(il15, il37); v0.data() = avx_cast(_mm256_unpacklo_ps(ab0246, ab1357)); v1.data() = avx_cast(_mm256_unpackhi_ps(ab0246, ab1357)); v2.data() = avx_cast(_mm256_unpacklo_ps(cd0246, cd1357)); } // }}} // deinterleave successive 3 {{{ static inline void deinterleave(typename V::EntryType const *const data, const Common::SuccessiveEntries<3> &i, V &v0, V &v1, V &v2) { // 0a 1a 2a 0b 1b 2b 0c 1c __m256 in0 = _mm256_loadu_ps(aliasing_cast(&data[i[0]] + 0)); // 2c 0d 1d 2d 0e 1e 2e 0f __m256 in1 = _mm256_loadu_ps(aliasing_cast(&data[i[0]] + 8)); // 1f 2f 0g 1g 2g 0h 1h 2h __m256 in2 = _mm256_loadu_ps(aliasing_cast(&data[i[0]] + 16)); // swap(v0.hi, v2.lo): // [0a 1a 2a 0b 1f 2f 0g 1g] // [2c 0d 1d 2d 0e 1e 2e 0f] // [1b 2b 0c 1c 2g 0h 1h 2h] const __m256 aaabffgg = _mm256_permute2f128_ps(in0, in2, 0x20); const __m256 cdddeeef = in1; const __m256 bbccghhh = _mm256_permute2f128_ps(in0, in2, 0x31); // blend: // 0: [a d c b e h g f] // 1: [b a d c f e h g] // 2: [c b a d g f e h] const __m256 x0 = _mm256_blend_ps( _mm256_blend_ps(aaabffgg, cdddeeef, 0 + 2 + 0 + 0 + 0x10 + 0 + 0 + 0x80), bbccghhh, 0 + 0 + 4 + 0 + 0 + 0x20 + 0 + 0); const __m256 x1 = _mm256_blend_ps( _mm256_blend_ps(aaabffgg, cdddeeef, 0 + 0 + 4 + 0 + 0 + 0x20 + 0 + 0), bbccghhh, 1 + 0 + 0 + 8 + 0 + 0 + 0x40 + 0); const __m256 x2 = _mm256_blend_ps( _mm256_blend_ps(aaabffgg, cdddeeef, 1 + 0 + 0 + 8 + 0 + 0 + 0x40 + 0), bbccghhh, 0 + 2 + 0 + 0 + 0x10 + 0 + 0 + 0x80); // 0: [a d c b e h g f] >-perm(0, 3, 2, 1)-> [a b c d e f g h] // 1: [b a d c f e h g] >-perm(1, 0, 3, 2)-> [a b c d e f g h] // 2: [c b a d g f e h] >-perm(2, 1, 0, 3)-> [a b c d e f g h] v0 = AVX::avx_cast(_mm256_shuffle_ps(x0, x0, 0x6c)); v1 = AVX::avx_cast(_mm256_shuffle_ps(x1, x1, 0xb1)); v2 = AVX::avx_cast(_mm256_shuffle_ps(x2, x2, 0xc6)); } // }}} // deinterleave scatter 4 {{{ template static inline void deinterleave(typename V::EntryType const *const data, const I &i, V &v0, V &v1, V &v2, V &v3) { using namespace AVX; const m128 il0 = _mm_loadu_ps(aliasing_cast(&data[i[0]])); // a0 b0 c0 d0 const m128 il1 = _mm_loadu_ps(aliasing_cast(&data[i[1]])); // a1 b1 c1 d1 const m128 il2 = _mm_loadu_ps(aliasing_cast(&data[i[2]])); // a2 b2 c2 d2 const m128 il3 = _mm_loadu_ps(aliasing_cast(&data[i[3]])); // a3 b3 c3 d3 const m128 il4 = _mm_loadu_ps(aliasing_cast(&data[i[4]])); // a4 b4 c4 d4 const m128 il5 = _mm_loadu_ps(aliasing_cast(&data[i[5]])); // a5 b5 c5 d5 const m128 il6 = _mm_loadu_ps(aliasing_cast(&data[i[6]])); // a6 b6 c6 d6 const m128 il7 = _mm_loadu_ps(aliasing_cast(&data[i[7]])); // a7 b7 c7 d7 const m256 il04 = concat(il0, il4); const m256 il15 = concat(il1, il5); const m256 il26 = concat(il2, il6); const m256 il37 = concat(il3, il7); const m256 ab0246 = _mm256_unpacklo_ps(il04, il26); const m256 ab1357 = _mm256_unpacklo_ps(il15, il37); const m256 cd0246 = _mm256_unpackhi_ps(il04, il26); const m256 cd1357 = _mm256_unpackhi_ps(il15, il37); v0.data() = avx_cast(_mm256_unpacklo_ps(ab0246, ab1357)); v1.data() = avx_cast(_mm256_unpackhi_ps(ab0246, ab1357)); v2.data() = avx_cast(_mm256_unpacklo_ps(cd0246, cd1357)); v3.data() = avx_cast(_mm256_unpackhi_ps(cd0246, cd1357)); } // }}} // deinterleave successive 4 {{{ static inline void deinterleave(typename V::EntryType const *const data, const Common::SuccessiveEntries<4> &i, V &v0, V &v1, V &v2, V &v3) { using namespace AVX; const __m256 il01 = _mm256_loadu_ps( aliasing_cast(&data[i[0]])); // a0 b0 c0 d0 | a1 b1 c1 d1 const __m256 il23 = _mm256_loadu_ps( aliasing_cast(&data[i[2]])); // a2 b2 c2 d2 | a3 b3 c3 d3 const __m256 il45 = _mm256_loadu_ps( aliasing_cast(&data[i[4]])); // a4 b4 c4 d4 | a5 b5 c5 d5 const __m256 il67 = _mm256_loadu_ps( aliasing_cast(&data[i[6]])); // a6 b6 c6 d6 | a7 b7 c7 d7 const __m256 il04 = _mm256_permute2f128_ps(il01, il45, 0x20); const __m256 il15 = _mm256_permute2f128_ps(il01, il45, 0x31); const __m256 il26 = _mm256_permute2f128_ps(il23, il67, 0x20); const __m256 il37 = _mm256_permute2f128_ps(il23, il67, 0x31); const __m256 ab0246 = _mm256_unpacklo_ps(il04, il26); const __m256 ab1357 = _mm256_unpacklo_ps(il15, il37); const __m256 cd0246 = _mm256_unpackhi_ps(il04, il26); const __m256 cd1357 = _mm256_unpackhi_ps(il15, il37); v0.data() = avx_cast(_mm256_unpacklo_ps(ab0246, ab1357)); v1.data() = avx_cast(_mm256_unpackhi_ps(ab0246, ab1357)); v2.data() = avx_cast(_mm256_unpacklo_ps(cd0246, cd1357)); v3.data() = avx_cast(_mm256_unpackhi_ps(cd0246, cd1357)); } // }}} template static inline void deinterleave(typename V::EntryType const *const data,/*{{{*/ const I &i, V &v0, V &v1, V &v2, V &v3, V &v4) { v4.gather(data + 4, i); deinterleave(data, i, v0, v1, v2, v3); }/*}}}*/ template static inline void deinterleave(typename V::EntryType const *const data,/*{{{*/ const I &i, V &v0, V &v1, V &v2, V &v3, V &v4, V &v5) { deinterleave(data, i, v0, v1, v2, v3); deinterleave(data + 4, i, v4, v5); }/*}}}*/ static inline void deinterleave(typename V::EntryType const *const data,/*{{{*/ const Common::SuccessiveEntries<6> &i, V &v0, V &v1, V &v2, V &v3, V &v4, V &v5) { using namespace AVX; const m256 a = _mm256_loadu_ps(aliasing_cast(&data[i[0]])); const m256 b = _mm256_loadu_ps(aliasing_cast(&data[i[0] + 1 * V::Size])); const m256 c = _mm256_loadu_ps(aliasing_cast(&data[i[0] + 2 * V::Size])); const m256 d = _mm256_loadu_ps(aliasing_cast(&data[i[0] + 3 * V::Size])); const m256 e = _mm256_loadu_ps(aliasing_cast(&data[i[0] + 4 * V::Size])); const m256 f = _mm256_loadu_ps(aliasing_cast(&data[i[0] + 5 * V::Size])); const __m256 tmp2 = Mem::shuffle128(a, d); const __m256 tmp3 = Mem::shuffle128(b, e); const __m256 tmp4 = Mem::shuffle128(a, d); const __m256 tmp5 = Mem::shuffle128(c, f); const __m256 tmp8 = Mem::shuffle128(b, e); const __m256 tmp9 = Mem::shuffle128(c, f); const __m256 tmp0 = _mm256_unpacklo_ps(tmp2, tmp3); const __m256 tmp1 = _mm256_unpackhi_ps(tmp4, tmp5); const __m256 tmp6 = _mm256_unpackhi_ps(tmp2, tmp3); const __m256 tmp7 = _mm256_unpacklo_ps(tmp8, tmp9); const __m256 tmp10 = _mm256_unpacklo_ps(tmp4, tmp5); const __m256 tmp11 = _mm256_unpackhi_ps(tmp8, tmp9); v0.data() = avx_cast(_mm256_unpacklo_ps(tmp0, tmp1)); v1.data() = avx_cast(_mm256_unpackhi_ps(tmp0, tmp1)); v2.data() = avx_cast(_mm256_unpacklo_ps(tmp6, tmp7)); v3.data() = avx_cast(_mm256_unpackhi_ps(tmp6, tmp7)); v4.data() = avx_cast(_mm256_unpacklo_ps(tmp10, tmp11)); v5.data() = avx_cast(_mm256_unpackhi_ps(tmp10, tmp11)); }/*}}}*/ template static inline void deinterleave(typename V::EntryType const *const data,/*{{{*/ const I &i, V &v0, V &v1, V &v2, V &v3, V &v4, V &v5, V &v6) { deinterleave(data, i, v0, v1, v2, v3); deinterleave(data + 4, i, v4, v5, v6); }/*}}}*/ template static inline void deinterleave(typename V::EntryType const *const data,/*{{{*/ const I &i, V &v0, V &v1, V &v2, V &v3, V &v4, V &v5, V &v6, V &v7) { deinterleave(data, i, v0, v1, v2, v3); deinterleave(data + 4, i, v4, v5, v6, v7); }/*}}}*/ }; template struct InterleaveImpl { template // interleave 2 args{{{2 static inline void interleave(typename V::EntryType *const data, const I &i, const typename V::AsArg v0, const typename V::AsArg v1) { using namespace AVX; const m256d tmp0 = _mm256_unpacklo_pd(v0.data(), v1.data()); const m256d tmp1 = _mm256_unpackhi_pd(v0.data(), v1.data()); _mm_storeu_pd(&data[i[0]], lo128(tmp0)); _mm_storeu_pd(&data[i[1]], lo128(tmp1)); _mm_storeu_pd(&data[i[2]], hi128(tmp0)); _mm_storeu_pd(&data[i[3]], hi128(tmp1)); } template // interleave 3 args{{{2 static inline void interleave(typename V::EntryType *const data, const I &i, const typename V::AsArg v0, const typename V::AsArg v1, const typename V::AsArg v2) { using namespace AVX; #ifdef Vc_USE_MASKMOV_SCATTER const m256d tmp0 = _mm256_unpacklo_pd(v0.data(), v1.data()); const m256d tmp1 = _mm256_unpackhi_pd(v0.data(), v1.data()); const m256d tmp2 = _mm256_unpacklo_pd(v2.data(), v2.data()); const m256d tmp3 = _mm256_unpackhi_pd(v2.data(), v2.data()); #if defined(Vc_MSVC) && (Vc_MSVC < 170000000 || !defined(_WIN64)) // MSVC needs to be at Version 2012 before _mm256_set_epi64x works const m256i mask = concat(_mm_setallone_si128(), _mm_set_epi32(0, 0, -1, -1)); #else const m256i mask = _mm256_set_epi64x(0, -1, -1, -1); #endif _mm256_maskstore_pd(&data[i[0]], mask, Mem::shuffle128(tmp0, tmp2)); _mm256_maskstore_pd(&data[i[1]], mask, Mem::shuffle128(tmp1, tmp3)); _mm256_maskstore_pd(&data[i[2]], mask, Mem::shuffle128(tmp0, tmp2)); _mm256_maskstore_pd(&data[i[3]], mask, Mem::shuffle128(tmp1, tmp3)); #else interleave(data, i, v0, v1); v2.scatter(data + 2, i); #endif } template // interleave 4 args{{{2 static inline void interleave(typename V::EntryType *const data, const I &i, const typename V::AsArg v0, const typename V::AsArg v1, const typename V::AsArg v2, const typename V::AsArg v3) { using namespace AVX; // 0a 1a 0c 1c: const m256d tmp0 = _mm256_unpacklo_pd(v0.data(), v1.data()); // 0b 1b 0b 1b: const m256d tmp1 = _mm256_unpackhi_pd(v0.data(), v1.data()); // 2a 3a 2c 3c: const m256d tmp2 = _mm256_unpacklo_pd(v2.data(), v3.data()); // 2b 3b 2b 3b: const m256d tmp3 = _mm256_unpackhi_pd(v2.data(), v3.data()); /* The following might be more efficient once 256-bit stores are not split internally into 2 * 128-bit stores. _mm256_storeu_pd(&data[i[0]], Mem::shuffle128(tmp0, tmp2)); _mm256_storeu_pd(&data[i[1]], Mem::shuffle128(tmp1, tmp3)); _mm256_storeu_pd(&data[i[2]], Mem::shuffle128(tmp0, tmp2)); _mm256_storeu_pd(&data[i[3]], Mem::shuffle128(tmp1, tmp3)); */ _mm_storeu_pd(&data[i[0] ], lo128(tmp0)); _mm_storeu_pd(&data[i[0]+2], lo128(tmp2)); _mm_storeu_pd(&data[i[1] ], lo128(tmp1)); _mm_storeu_pd(&data[i[1]+2], lo128(tmp3)); _mm_storeu_pd(&data[i[2] ], hi128(tmp0)); _mm_storeu_pd(&data[i[2]+2], hi128(tmp2)); _mm_storeu_pd(&data[i[3] ], hi128(tmp1)); _mm_storeu_pd(&data[i[3]+2], hi128(tmp3)); } template // interleave 5 args {{{2 static inline void interleave(typename V::EntryType *const data, const I &i, const typename V::AsArg v0, const typename V::AsArg v1, const typename V::AsArg v2, const typename V::AsArg v3, const typename V::AsArg v4) { interleave(data, i, v0, v1, v2, v3); v4.scatter(data + 4, i); } template // interleave 6 args {{{2 static inline void interleave(typename V::EntryType *const data, const I &i, const typename V::AsArg v0, const typename V::AsArg v1, const typename V::AsArg v2, const typename V::AsArg v3, const typename V::AsArg v4, const typename V::AsArg v5) { interleave(data, i, v0, v1, v2, v3); interleave(data + 4, i, v4, v5); } template // interleave 7 args {{{2 static inline void interleave(typename V::EntryType *const data, const I &i, const typename V::AsArg v0, const typename V::AsArg v1, const typename V::AsArg v2, const typename V::AsArg v3, const typename V::AsArg v4, const typename V::AsArg v5, const typename V::AsArg v6) { interleave(data, i, v0, v1, v2, v3); interleave(data + 4, i, v4, v5, v6); } template // interleave 8 args {{{2 static inline void interleave(typename V::EntryType *const data, const I &i, const typename V::AsArg v0, const typename V::AsArg v1, const typename V::AsArg v2, const typename V::AsArg v3, const typename V::AsArg v4, const typename V::AsArg v5, const typename V::AsArg v6, const typename V::AsArg v7) { interleave(data, i, v0, v1, v2, v3); interleave(data + 4, i, v4, v5, v6, v7); } //}}}2 template static inline void deinterleave(typename V::EntryType const *const data,/*{{{*/ const I &i, V &v0, V &v1) { using namespace Vc::AVX; const m256d ab02 = concat(_mm_loadu_pd(&data[i[0]]), _mm_loadu_pd(&data[i[2]])); const m256d ab13 = concat(_mm_loadu_pd(&data[i[1]]), _mm_loadu_pd(&data[i[3]])); v0.data() = _mm256_unpacklo_pd(ab02, ab13); v1.data() = _mm256_unpackhi_pd(ab02, ab13); }/*}}}*/ template static inline void deinterleave(typename V::EntryType const *const data,/*{{{*/ const I &i, V &v0, V &v1, V &v2) { v2.gather(data + 2, i); deinterleave(data, i, v0, v1); }/*}}}*/ template static inline void deinterleave(typename V::EntryType const *const data,/*{{{*/ const I &i, V &v0, V &v1, V &v2, V &v3) { deinterleave(data, i, v0, v1); deinterleave(data + 2, i, v2, v3); }/*}}}*/ template static inline void deinterleave(typename V::EntryType const *const data,/*{{{*/ const I &i, V &v0, V &v1, V &v2, V &v3, V &v4) { v4.gather(data + 4, i); deinterleave(data, i, v0, v1); deinterleave(data + 2, i, v2, v3); }/*}}}*/ template static inline void deinterleave(typename V::EntryType const *const data,/*{{{*/ const I &i, V &v0, V &v1, V &v2, V &v3, V &v4, V &v5) { deinterleave(data, i, v0, v1); deinterleave(data + 2, i, v2, v3); deinterleave(data + 4, i, v4, v5); }/*}}}*/ template static inline void deinterleave(typename V::EntryType const *const data,/*{{{*/ const I &i, V &v0, V &v1, V &v2, V &v3, V &v4, V &v5, V &v6) { v6.gather(data + 6, i); deinterleave(data, i, v0, v1); deinterleave(data + 2, i, v2, v3); deinterleave(data + 4, i, v4, v5); }/*}}}*/ template static inline void deinterleave(typename V::EntryType const *const data,/*{{{*/ const I &i, V &v0, V &v1, V &v2, V &v3, V &v4, V &v5, V &v6, V &v7) { deinterleave(data, i, v0, v1); deinterleave(data + 2, i, v2, v3); deinterleave(data + 4, i, v4, v5); deinterleave(data + 6, i, v6, v7); }/*}}}*/ }; //}}}1 } // namespace Detail } // namespace Vc #endif // VC_AVX_DETAIL_H_ // vim: foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/avx/helperimpl.h000066400000000000000000000126571476554302100205420ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2011-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_AVX_HELPERIMPL_H_ #define VC_AVX_HELPERIMPL_H_ #include "../sse/helperimpl.h" #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace Detail { template inline void deinterleave(AVX2::float_v &, AVX2::float_v &, const float *, A); template inline void deinterleave(AVX2::float_v &, AVX2::float_v &, const short *, A); template inline void deinterleave(AVX2::float_v &, AVX2::float_v &, const ushort *, A); template inline void deinterleave(AVX2::double_v &, AVX2::double_v &, const double *, A); template inline void deinterleave(AVX2::int_v &, AVX2::int_v &, const int *, A); template inline void deinterleave(AVX2::int_v &, AVX2::int_v &, const short *, A); template inline void deinterleave(AVX2::uint_v &, AVX2::uint_v &, const uint *, A); template inline void deinterleave(AVX2::uint_v &, AVX2::uint_v &, const ushort *, A); template inline void deinterleave(AVX2::short_v &, AVX2::short_v &, const short *, A); template inline void deinterleave(AVX2::ushort_v &, AVX2::ushort_v &, const ushort *, A); template Vc_ALWAYS_INLINE_L void deinterleave(AVX2::Vector &Vc_RESTRICT a, AVX2::Vector &Vc_RESTRICT b, AVX2::Vector &Vc_RESTRICT c, const M *Vc_RESTRICT memory, A align) Vc_ALWAYS_INLINE_R; template Vc_ALWAYS_INLINE_L void deinterleave(AVX2::Vector &Vc_RESTRICT a, AVX2::Vector &Vc_RESTRICT b, AVX2::Vector &Vc_RESTRICT c, AVX2::Vector &Vc_RESTRICT d, const M *Vc_RESTRICT memory, A align) Vc_ALWAYS_INLINE_R; template Vc_ALWAYS_INLINE_L void deinterleave(AVX2::Vector &Vc_RESTRICT a, AVX2::Vector &Vc_RESTRICT b, AVX2::Vector &Vc_RESTRICT c, AVX2::Vector &Vc_RESTRICT d, AVX2::Vector &Vc_RESTRICT e, const M *Vc_RESTRICT memory, A align) Vc_ALWAYS_INLINE_R; template Vc_ALWAYS_INLINE_L void deinterleave( AVX2::Vector &Vc_RESTRICT a, AVX2::Vector &Vc_RESTRICT b, AVX2::Vector &Vc_RESTRICT c, AVX2::Vector &Vc_RESTRICT d, AVX2::Vector &Vc_RESTRICT e, AVX2::Vector &Vc_RESTRICT f, const M *Vc_RESTRICT memory, A align) Vc_ALWAYS_INLINE_R; template Vc_ALWAYS_INLINE_L void deinterleave( AVX2::Vector &Vc_RESTRICT a, AVX2::Vector &Vc_RESTRICT b, AVX2::Vector &Vc_RESTRICT c, AVX2::Vector &Vc_RESTRICT d, AVX2::Vector &Vc_RESTRICT e, AVX2::Vector &Vc_RESTRICT f, AVX2::Vector &Vc_RESTRICT g, AVX2::Vector &Vc_RESTRICT h, const M *Vc_RESTRICT memory, A align) Vc_ALWAYS_INLINE_R; Vc_ALWAYS_INLINE void prefetchForOneRead(const void *addr, VectorAbi::Avx) { prefetchForOneRead(addr, VectorAbi::Sse()); } Vc_ALWAYS_INLINE void prefetchForModify(const void *addr, VectorAbi::Avx) { prefetchForModify(addr, VectorAbi::Sse()); } Vc_ALWAYS_INLINE void prefetchClose(const void *addr, VectorAbi::Avx) { prefetchClose(addr, VectorAbi::Sse()); } Vc_ALWAYS_INLINE void prefetchMid(const void *addr, VectorAbi::Avx) { prefetchMid(addr, VectorAbi::Sse()); } Vc_ALWAYS_INLINE void prefetchFar(const void *addr, VectorAbi::Avx) { prefetchFar(addr, VectorAbi::Sse()); } } // namespace Detail } // namespace Vc #include "deinterleave.tcc" #endif // VC_AVX_HELPERIMPL_H_ conky-1.22.1/3rdparty/Vc/Vc/avx/intrinsics.h000066400000000000000000001035041476554302100205560ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2009-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_AVX_INTRINSICS_H_ #define VC_AVX_INTRINSICS_H_ #include "../global.h" #include "../traits/type_traits.h" // see comment in sse/intrinsics.h extern "C" { // AVX #include #if (defined(Vc_IMPL_XOP) || defined(Vc_IMPL_FMA4)) && !defined(Vc_MSVC) #include #endif } #include "../common/fix_clang_emmintrin.h" #include "const_data.h" #include "../common/types.h" #include "macros.h" #include #if (defined Vc_CLANG && Vc_CLANG >= 0x30900 && Vc_CLANG < 0x70000) #ifdef _mm256_permute2f128_si256 #undef _mm256_permute2f128_si256 #define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \ (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \ (__v8si)(__m256i)(V2), (char)(M)); }) #endif #ifdef _mm256_permute2f128_ps #undef _mm256_permute2f128_ps #define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \ (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \ (__v8sf)(__m256)(V2), (char)(M)); }) #endif #ifdef _mm256_permute2x128_si256 #undef _mm256_permute2x128_si256 #define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \ (__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (char)(M)); }) #endif #endif namespace Vc_VERSIONED_NAMESPACE { namespace AvxIntrinsics { using AVX::c_general; using AVX::_IndexesFromZero32; using AVX::_IndexesFromZero16; using AVX::_IndexesFromZero8; typedef __m128 m128 ; typedef __m128d m128d; typedef __m128i m128i; typedef __m256 m256 ; typedef __m256d m256d; typedef __m256i m256i; #ifdef Vc_GCC // Redefine the mul/add/sub intrinsics to use GCC-specific operators instead of builtin // functions. This way the fp-contraction optimization step kicks in and creates FMAs! :) static Vc_INTRINSIC Vc_CONST m256d _mm256_mul_pd(m256d a, m256d b) { return static_cast(static_cast<__v4df>(a) * static_cast<__v4df>(b)); } static Vc_INTRINSIC Vc_CONST m256d _mm256_add_pd(m256d a, m256d b) { return static_cast(static_cast<__v4df>(a) + static_cast<__v4df>(b)); } static Vc_INTRINSIC Vc_CONST m256d _mm256_sub_pd(m256d a, m256d b) { return static_cast(static_cast<__v4df>(a) - static_cast<__v4df>(b)); } static Vc_INTRINSIC Vc_CONST m256 _mm256_mul_ps(m256 a, m256 b) { return static_cast(static_cast<__v8sf>(a) * static_cast<__v8sf>(b)); } static Vc_INTRINSIC Vc_CONST m256 _mm256_add_ps(m256 a, m256 b) { return static_cast(static_cast<__v8sf>(a) + static_cast<__v8sf>(b)); } static Vc_INTRINSIC Vc_CONST m256 _mm256_sub_ps(m256 a, m256 b) { return static_cast(static_cast<__v8sf>(a) - static_cast<__v8sf>(b)); } #endif static Vc_INTRINSIC m256d Vc_CONST set1_pd (double a) { return _mm256_set1_pd (a); } static Vc_INTRINSIC m256i Vc_CONST set1_epi32(int a) { return _mm256_set1_epi32(a); } static Vc_INTRINSIC Vc_CONST m128i _mm_setallone_si128() { return _mm_load_si128(reinterpret_cast(Common::AllBitsSet)); } static Vc_INTRINSIC Vc_CONST m128 _mm_setallone_ps() { return _mm_load_ps(reinterpret_cast(Common::AllBitsSet)); } static Vc_INTRINSIC Vc_CONST m128d _mm_setallone_pd() { return _mm_load_pd(reinterpret_cast(Common::AllBitsSet)); } static Vc_INTRINSIC Vc_CONST m256i setallone_si256() { return _mm256_castps_si256(_mm256_load_ps(reinterpret_cast(Common::AllBitsSet))); } static Vc_INTRINSIC Vc_CONST m256d setallone_pd() { return _mm256_load_pd(reinterpret_cast(Common::AllBitsSet)); } static Vc_INTRINSIC Vc_CONST m256 setallone_ps() { return _mm256_load_ps(reinterpret_cast(Common::AllBitsSet)); } static Vc_INTRINSIC m256i Vc_CONST setone_epi8 () { return _mm256_set1_epi8(1); } static Vc_INTRINSIC m256i Vc_CONST setone_epu8 () { return setone_epi8(); } static Vc_INTRINSIC m256i Vc_CONST setone_epi16() { return _mm256_castps_si256(_mm256_broadcast_ss(reinterpret_cast(c_general::one16))); } static Vc_INTRINSIC m256i Vc_CONST setone_epu16() { return setone_epi16(); } static Vc_INTRINSIC m256i Vc_CONST setone_epi32() { return _mm256_castps_si256(_mm256_broadcast_ss(reinterpret_cast(&_IndexesFromZero32[1]))); } static Vc_INTRINSIC m256i Vc_CONST setone_epu32() { return setone_epi32(); } static Vc_INTRINSIC m256 Vc_CONST setone_ps() { return _mm256_broadcast_ss(&c_general::oneFloat); } static Vc_INTRINSIC m256d Vc_CONST setone_pd() { return _mm256_broadcast_sd(&c_general::oneDouble); } static Vc_INTRINSIC m256d Vc_CONST setabsmask_pd() { return _mm256_broadcast_sd(reinterpret_cast(&c_general::absMaskFloat[0])); } static Vc_INTRINSIC m256 Vc_CONST setabsmask_ps() { return _mm256_broadcast_ss(reinterpret_cast(&c_general::absMaskFloat[1])); } static Vc_INTRINSIC m256d Vc_CONST setsignmask_pd(){ return _mm256_broadcast_sd(reinterpret_cast(&c_general::signMaskFloat[0])); } static Vc_INTRINSIC m256 Vc_CONST setsignmask_ps(){ return _mm256_broadcast_ss(reinterpret_cast(&c_general::signMaskFloat[1])); } static Vc_INTRINSIC m256 Vc_CONST set2power31_ps() { return _mm256_broadcast_ss(&c_general::_2power31); } static Vc_INTRINSIC m128 Vc_CONST _mm_set2power31_ps() { return _mm_broadcast_ss(&c_general::_2power31); } static Vc_INTRINSIC m256i Vc_CONST set2power31_epu32() { return _mm256_castps_si256(_mm256_broadcast_ss(reinterpret_cast(&c_general::signMaskFloat[1]))); } static Vc_INTRINSIC m128i Vc_CONST _mm_set2power31_epu32() { return _mm_castps_si128(_mm_broadcast_ss(reinterpret_cast(&c_general::signMaskFloat[1]))); } static Vc_INTRINSIC m256i Vc_CONST setmin_epi8 () { return _mm256_set1_epi8(-0x80); } static Vc_INTRINSIC m128i Vc_CONST _mm_setmin_epi16() { return _mm_castps_si128(_mm_broadcast_ss(reinterpret_cast(c_general::minShort))); } static Vc_INTRINSIC m128i Vc_CONST _mm_setmin_epi32() { return _mm_castps_si128(_mm_broadcast_ss(reinterpret_cast(&c_general::signMaskFloat[1]))); } static Vc_INTRINSIC m256i Vc_CONST setmin_epi16() { return _mm256_castps_si256(_mm256_broadcast_ss(reinterpret_cast(c_general::minShort))); } static Vc_INTRINSIC m256i Vc_CONST setmin_epi32() { return _mm256_castps_si256(_mm256_broadcast_ss(reinterpret_cast(&c_general::signMaskFloat[1]))); } template static Vc_INTRINSIC Vc_CONST unsigned int extract_epu32(__m128i x) { return _mm_extract_epi32(x, i); } template Vc_INTRINSIC __m256 insert128(__m256 a, __m128 b) { return _mm256_insertf128_ps(a, b, offset); } template Vc_INTRINSIC __m256d insert128(__m256d a, __m128d b) { return _mm256_insertf128_pd(a, b, offset); } template Vc_INTRINSIC __m256i insert128(__m256i a, __m128i b) { #ifdef Vc_IMPL_AVX2 return _mm256_inserti128_si256(a, b, offset); #else return _mm256_insertf128_si256(a, b, offset); #endif } template Vc_INTRINSIC __m128 extract128(__m256 a) { return _mm256_extractf128_ps(a, offset); } template Vc_INTRINSIC __m128d extract128(__m256d a) { return _mm256_extractf128_pd(a, offset); } template Vc_INTRINSIC __m128i extract128(__m256i a) { #ifdef Vc_IMPL_AVX2 return _mm256_extracti128_si256(a, offset); #else return _mm256_extractf128_si256(a, offset); #endif } /////////////////////// COMPARE OPS /////////////////////// #ifdef Vc_GCC // GCC needs builtin compare operators to enable constant folding Vc_INTRINSIC __m256d cmpeq_pd (__m256d a, __m256d b) { return reinterpret_cast<__m256d>(a == b); } Vc_INTRINSIC __m256d cmpneq_pd (__m256d a, __m256d b) { return reinterpret_cast<__m256d>(a != b); } Vc_INTRINSIC __m256d cmplt_pd (__m256d a, __m256d b) { return reinterpret_cast<__m256d>(a < b); } Vc_INTRINSIC __m256d cmpge_pd (__m256d a, __m256d b) { return reinterpret_cast<__m256d>(a >= b); } Vc_INTRINSIC __m256d cmple_pd (__m256d a, __m256d b) { return reinterpret_cast<__m256d>(a <= b); } Vc_INTRINSIC __m256d cmpgt_pd (__m256d a, __m256d b) { return reinterpret_cast<__m256d>(a > b); } Vc_INTRINSIC __m256 cmpeq_ps (__m256 a, __m256 b) { return reinterpret_cast<__m256 >(a == b); } Vc_INTRINSIC __m256 cmpneq_ps (__m256 a, __m256 b) { return reinterpret_cast<__m256 >(a != b); } Vc_INTRINSIC __m256 cmplt_ps (__m256 a, __m256 b) { return reinterpret_cast<__m256 >(a < b); } Vc_INTRINSIC __m256 cmpge_ps (__m256 a, __m256 b) { return reinterpret_cast<__m256 >(a >= b); } Vc_INTRINSIC __m256 cmple_ps (__m256 a, __m256 b) { return reinterpret_cast<__m256 >(a <= b); } Vc_INTRINSIC __m256 cmpgt_ps (__m256 a, __m256 b) { return reinterpret_cast<__m256 >(a > b); } #else Vc_INTRINSIC __m256d cmpeq_pd (__m256d a, __m256d b) { return _mm256_cmp_pd(a, b, _CMP_EQ_OQ); } Vc_INTRINSIC __m256d cmpneq_pd (__m256d a, __m256d b) { return _mm256_cmp_pd(a, b, _CMP_NEQ_UQ); } Vc_INTRINSIC __m256d cmplt_pd (__m256d a, __m256d b) { return _mm256_cmp_pd(a, b, _CMP_LT_OS); } Vc_INTRINSIC __m256d cmpge_pd (__m256d a, __m256d b) { return _mm256_cmp_pd(a, b, _CMP_NLT_US); } Vc_INTRINSIC __m256d cmple_pd (__m256d a, __m256d b) { return _mm256_cmp_pd(a, b, _CMP_LE_OS); } Vc_INTRINSIC __m256d cmpgt_pd (__m256d a, __m256d b) { return _mm256_cmp_pd(a, b, _CMP_NLE_US); } Vc_INTRINSIC __m256 cmpeq_ps (__m256 a, __m256 b) { return _mm256_cmp_ps(a, b, _CMP_EQ_OQ); } Vc_INTRINSIC __m256 cmpneq_ps (__m256 a, __m256 b) { return _mm256_cmp_ps(a, b, _CMP_NEQ_UQ); } Vc_INTRINSIC __m256 cmplt_ps (__m256 a, __m256 b) { return _mm256_cmp_ps(a, b, _CMP_LT_OS); } Vc_INTRINSIC __m256 cmpge_ps (__m256 a, __m256 b) { return _mm256_cmp_ps(a, b, _CMP_NLT_US); } Vc_INTRINSIC __m256 cmple_ps (__m256 a, __m256 b) { return _mm256_cmp_ps(a, b, _CMP_LE_OS); } Vc_INTRINSIC __m256 cmpgt_ps (__m256 a, __m256 b) { return _mm256_cmp_ps(a, b, _CMP_NLE_US); } #endif Vc_INTRINSIC __m256d cmpnlt_pd (__m256d a, __m256d b) { return cmpge_pd(a, b); } Vc_INTRINSIC __m256d cmpnle_pd (__m256d a, __m256d b) { return cmpgt_pd(a, b); } Vc_INTRINSIC __m256 cmpnlt_ps (__m256 a, __m256 b) { return cmpge_ps(a, b); } Vc_INTRINSIC __m256 cmpnle_ps (__m256 a, __m256 b) { return cmpgt_ps(a, b); } Vc_INTRINSIC __m256d cmpord_pd (__m256d a, __m256d b) { return _mm256_cmp_pd(a, b, _CMP_ORD_Q); } Vc_INTRINSIC __m256d cmpunord_pd(__m256d a, __m256d b) { return _mm256_cmp_pd(a, b, _CMP_UNORD_Q); } Vc_INTRINSIC __m256 cmpord_ps (__m256 a, __m256 b) { return _mm256_cmp_ps(a, b, _CMP_ORD_Q); } Vc_INTRINSIC __m256 cmpunord_ps(__m256 a, __m256 b) { return _mm256_cmp_ps(a, b, _CMP_UNORD_Q); } #if defined(Vc_IMPL_XOP) static Vc_INTRINSIC m128i cmplt_epu16(__m128i a, __m128i b) { return _mm_comlt_epu16(a, b); } static Vc_INTRINSIC m128i cmpgt_epu16(__m128i a, __m128i b) { return _mm_comgt_epu16(a, b); } #else static Vc_INTRINSIC m128i cmplt_epu16(__m128i a, __m128i b) { return _mm_cmplt_epi16(_mm_xor_si128(a, _mm_setmin_epi16()), _mm_xor_si128(b, _mm_setmin_epi16())); } static Vc_INTRINSIC m128i cmpgt_epu16(__m128i a, __m128i b) { return _mm_cmpgt_epi16(_mm_xor_si128(a, _mm_setmin_epi16()), _mm_xor_si128(b, _mm_setmin_epi16())); } #endif #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST m256i alignr(__m256i s1, __m256i s2) { return _mm256_alignr_epi8(s1, s2, shift); } #else template Vc_INTRINSIC Vc_CONST m256i alignr(__m256i s1, __m256i s2) { return insert128<1>( _mm256_castsi128_si256(_mm_alignr_epi8(_mm256_castsi256_si128(s1), _mm256_castsi256_si128(s2), shift)), _mm_alignr_epi8(extract128<1>(s1), extract128<1>(s2), shift)); } #endif #ifdef Vc_IMPL_AVX2 #define Vc_AVX_TO_SSE_2_NEW(name) \ Vc_INTRINSIC Vc_CONST m256i name(__m256i a0, __m256i b0) \ { \ return _mm256_##name(a0, b0); \ } #define Vc_AVX_TO_SSE_256_128(name) \ Vc_INTRINSIC Vc_CONST m256i name(__m256i a0, __m128i b0) \ { \ return _mm256_##name(a0, b0); \ } #define Vc_AVX_TO_SSE_1i(name) \ template Vc_INTRINSIC Vc_CONST m256i name(__m256i a0) \ { \ return _mm256_##name(a0, i); \ } #define Vc_AVX_TO_SSE_1(name) \ Vc_INTRINSIC Vc_CONST __m256i name(__m256i a0) { return _mm256_##name(a0); } #define Vc_AVX_TO_SSE_1_128(name, shift__) \ Vc_INTRINSIC Vc_CONST __m256i name(__m128i a0) { return _mm256_##name(a0); } #else /**\internal * Defines the function \p name, which takes to __m256i arguments and calls `_mm_##name` on the low * and high 128 bit halfs of the arguments. * * In case the AVX2 intrinsics are enabled, the arguments are directly passed to a single * `_mm256_##name` call. */ #define Vc_AVX_TO_SSE_1(name) \ Vc_INTRINSIC Vc_CONST __m256i name(__m256i a0) \ { \ __m128i a1 = extract128<1>(a0); \ __m128i r0 = _mm_##name(_mm256_castsi256_si128(a0)); \ __m128i r1 = _mm_##name(a1); \ return insert128<1>(_mm256_castsi128_si256(r0), r1); \ } #define Vc_AVX_TO_SSE_1_128(name, shift__) \ Vc_INTRINSIC Vc_CONST __m256i name(__m128i a0) \ { \ __m128i r0 = _mm_##name(a0); \ __m128i r1 = _mm_##name(_mm_srli_si128(a0, shift__)); \ return insert128<1>(_mm256_castsi128_si256(r0), r1); \ } #define Vc_AVX_TO_SSE_2_NEW(name) \ Vc_INTRINSIC Vc_CONST m256i name(__m256i a0, __m256i b0) \ { \ m128i a1 = extract128<1>(a0); \ m128i b1 = extract128<1>(b0); \ m128i r0 = _mm_##name(_mm256_castsi256_si128(a0), _mm256_castsi256_si128(b0)); \ m128i r1 = _mm_##name(a1, b1); \ return insert128<1>(_mm256_castsi128_si256(r0), r1); \ } #define Vc_AVX_TO_SSE_256_128(name) \ Vc_INTRINSIC Vc_CONST m256i name(__m256i a0, __m128i b0) \ { \ m128i a1 = extract128<1>(a0); \ m128i r0 = _mm_##name(_mm256_castsi256_si128(a0), b0); \ m128i r1 = _mm_##name(a1, b0); \ return insert128<1>(_mm256_castsi128_si256(r0), r1); \ } #define Vc_AVX_TO_SSE_1i(name) \ template Vc_INTRINSIC Vc_CONST m256i name(__m256i a0) \ { \ m128i a1 = extract128<1>(a0); \ m128i r0 = _mm_##name(_mm256_castsi256_si128(a0), i); \ m128i r1 = _mm_##name(a1, i); \ return insert128<1>(_mm256_castsi128_si256(r0), r1); \ } #endif Vc_INTRINSIC Vc_CONST __m128i sll_epi16(__m128i a, __m128i b) { return _mm_sll_epi16(a, b); } Vc_INTRINSIC Vc_CONST __m128i sll_epi32(__m128i a, __m128i b) { return _mm_sll_epi32(a, b); } Vc_INTRINSIC Vc_CONST __m128i sll_epi64(__m128i a, __m128i b) { return _mm_sll_epi64(a, b); } Vc_INTRINSIC Vc_CONST __m128i srl_epi16(__m128i a, __m128i b) { return _mm_srl_epi16(a, b); } Vc_INTRINSIC Vc_CONST __m128i srl_epi32(__m128i a, __m128i b) { return _mm_srl_epi32(a, b); } Vc_INTRINSIC Vc_CONST __m128i srl_epi64(__m128i a, __m128i b) { return _mm_srl_epi64(a, b); } Vc_INTRINSIC Vc_CONST __m128i sra_epi16(__m128i a, __m128i b) { return _mm_sra_epi16(a, b); } Vc_INTRINSIC Vc_CONST __m128i sra_epi32(__m128i a, __m128i b) { return _mm_sra_epi32(a, b); } Vc_AVX_TO_SSE_1i(slli_epi16) Vc_AVX_TO_SSE_1i(slli_epi32) Vc_AVX_TO_SSE_1i(slli_epi64) Vc_AVX_TO_SSE_1i(srai_epi16) Vc_AVX_TO_SSE_1i(srai_epi32) Vc_AVX_TO_SSE_1i(srli_epi16) Vc_AVX_TO_SSE_1i(srli_epi32) Vc_AVX_TO_SSE_1i(srli_epi64) Vc_AVX_TO_SSE_256_128(sll_epi16) Vc_AVX_TO_SSE_256_128(sll_epi32) Vc_AVX_TO_SSE_256_128(sll_epi64) Vc_AVX_TO_SSE_256_128(srl_epi16) Vc_AVX_TO_SSE_256_128(srl_epi32) Vc_AVX_TO_SSE_256_128(srl_epi64) Vc_AVX_TO_SSE_256_128(sra_epi16) Vc_AVX_TO_SSE_256_128(sra_epi32) Vc_AVX_TO_SSE_2_NEW(cmpeq_epi8) Vc_AVX_TO_SSE_2_NEW(cmpeq_epi16) Vc_AVX_TO_SSE_2_NEW(cmpeq_epi32) Vc_AVX_TO_SSE_2_NEW(cmpeq_epi64) Vc_AVX_TO_SSE_2_NEW(cmpgt_epi8) Vc_AVX_TO_SSE_2_NEW(cmpgt_epi16) Vc_AVX_TO_SSE_2_NEW(cmpgt_epi32) Vc_AVX_TO_SSE_2_NEW(cmpgt_epi64) Vc_AVX_TO_SSE_2_NEW(unpackhi_epi16) Vc_AVX_TO_SSE_2_NEW(unpacklo_epi16) Vc_AVX_TO_SSE_2_NEW(add_epi16) Vc_AVX_TO_SSE_2_NEW(add_epi32) Vc_AVX_TO_SSE_2_NEW(add_epi64) Vc_AVX_TO_SSE_2_NEW(sub_epi16) Vc_AVX_TO_SSE_2_NEW(sub_epi32) Vc_AVX_TO_SSE_2_NEW(mullo_epi16) Vc_AVX_TO_SSE_2_NEW(sign_epi16) Vc_AVX_TO_SSE_2_NEW(sign_epi32) Vc_AVX_TO_SSE_2_NEW(min_epi8) Vc_AVX_TO_SSE_2_NEW(max_epi8) Vc_AVX_TO_SSE_2_NEW(min_epu16) Vc_AVX_TO_SSE_2_NEW(max_epu16) Vc_AVX_TO_SSE_2_NEW(min_epi32) Vc_AVX_TO_SSE_2_NEW(max_epi32) Vc_AVX_TO_SSE_2_NEW(min_epu32) Vc_AVX_TO_SSE_2_NEW(max_epu32) Vc_AVX_TO_SSE_2_NEW(mullo_epi32) Vc_AVX_TO_SSE_1(abs_epi8) Vc_AVX_TO_SSE_1(abs_epi16) Vc_AVX_TO_SSE_1(abs_epi32) Vc_AVX_TO_SSE_1_128(cvtepi8_epi16, 8) Vc_AVX_TO_SSE_1_128(cvtepi8_epi32, 4) Vc_AVX_TO_SSE_1_128(cvtepi8_epi64, 2) Vc_AVX_TO_SSE_1_128(cvtepi16_epi32, 8) Vc_AVX_TO_SSE_1_128(cvtepi16_epi64, 4) Vc_AVX_TO_SSE_1_128(cvtepi32_epi64, 8) Vc_AVX_TO_SSE_1_128(cvtepu8_epi16, 8) Vc_AVX_TO_SSE_1_128(cvtepu8_epi32, 4) Vc_AVX_TO_SSE_1_128(cvtepu8_epi64, 2) Vc_AVX_TO_SSE_1_128(cvtepu16_epi32, 8) Vc_AVX_TO_SSE_1_128(cvtepu16_epi64, 4) Vc_AVX_TO_SSE_1_128(cvtepu32_epi64, 8) #ifndef Vc_IMPL_AVX2 ///////////////////////////////////////////////////////////////////////// // implementation of the intrinsics missing in AVX ///////////////////////////////////////////////////////////////////////// static Vc_INTRINSIC m256i Vc_CONST and_si256(__m256i x, __m256i y) { return _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(x), _mm256_castsi256_ps(y))); } static Vc_INTRINSIC m256i Vc_CONST andnot_si256(__m256i x, __m256i y) { return _mm256_castps_si256(_mm256_andnot_ps(_mm256_castsi256_ps(x), _mm256_castsi256_ps(y))); } static Vc_INTRINSIC m256i Vc_CONST or_si256(__m256i x, __m256i y) { return _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(x), _mm256_castsi256_ps(y))); } static Vc_INTRINSIC m256i Vc_CONST xor_si256(__m256i x, __m256i y) { return _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(x), _mm256_castsi256_ps(y))); } Vc_INTRINSIC Vc_CONST int movemask_epi8(__m256i a0) { m128i a1 = extract128<1>(a0); return (_mm_movemask_epi8(a1) << 16) | _mm_movemask_epi8(_mm256_castsi256_si128(a0)); } template Vc_INTRINSIC Vc_CONST m256i blend_epi16(__m256i a0, __m256i b0) { m128i a1 = extract128<1>(a0); m128i b1 = extract128<1>(b0); m128i r0 = _mm_blend_epi16(_mm256_castsi256_si128(a0), _mm256_castsi256_si128(b0), m & 0xff); m128i r1 = _mm_blend_epi16(a1, b1, m >> 8); return insert128<1>(_mm256_castsi128_si256(r0), r1); } Vc_INTRINSIC Vc_CONST m256i blendv_epi8(__m256i a0, __m256i b0, __m256i m0) { m128i a1 = extract128<1>(a0); m128i b1 = extract128<1>(b0); m128i m1 = extract128<1>(m0); m128i r0 = _mm_blendv_epi8(_mm256_castsi256_si128(a0), _mm256_castsi256_si128(b0), _mm256_castsi256_si128(m0)); m128i r1 = _mm_blendv_epi8(a1, b1, m1); return insert128<1>(_mm256_castsi128_si256(r0), r1); } // mpsadbw_epu8 (__m128i __X, __m128i __Y, const int __M) #else // Vc_IMPL_AVX2 static Vc_INTRINSIC Vc_CONST m256i xor_si256(__m256i x, __m256i y) { return _mm256_xor_si256(x, y); } static Vc_INTRINSIC Vc_CONST m256i or_si256(__m256i x, __m256i y) { return _mm256_or_si256(x, y); } static Vc_INTRINSIC Vc_CONST m256i and_si256(__m256i x, __m256i y) { return _mm256_and_si256(x, y); } static Vc_INTRINSIC Vc_CONST m256i andnot_si256(__m256i x, __m256i y) { return _mm256_andnot_si256(x, y); } ///////////////////////////////////////////////////////////////////////// // implementation of the intrinsics missing in AVX2 ///////////////////////////////////////////////////////////////////////// Vc_INTRINSIC Vc_CONST m256i blendv_epi8(__m256i a0, __m256i b0, __m256i m0) { return _mm256_blendv_epi8(a0, b0, m0); } Vc_INTRINSIC Vc_CONST int movemask_epi8(__m256i a0) { return _mm256_movemask_epi8(a0); } #endif // Vc_IMPL_AVX2 ///////////////////////////////////////////////////////////////////////// // implementation of intrinsics missing in AVX and AVX2 ///////////////////////////////////////////////////////////////////////// static Vc_INTRINSIC m256i cmplt_epi64(__m256i a, __m256i b) { return cmpgt_epi64(b, a); } static Vc_INTRINSIC m256i cmplt_epi32(__m256i a, __m256i b) { return cmpgt_epi32(b, a); } static Vc_INTRINSIC m256i cmplt_epi16(__m256i a, __m256i b) { return cmpgt_epi16(b, a); } static Vc_INTRINSIC m256i cmplt_epi8(__m256i a, __m256i b) { return cmpgt_epi8(b, a); } static Vc_INTRINSIC m256i cmpgt_epu8(__m256i a, __m256i b) { return cmpgt_epi8(xor_si256(a, setmin_epi8()), xor_si256(b, setmin_epi8())); } #if defined(Vc_IMPL_XOP) Vc_AVX_TO_SSE_2_NEW(comlt_epu32) Vc_AVX_TO_SSE_2_NEW(comgt_epu32) Vc_AVX_TO_SSE_2_NEW(comlt_epu16) Vc_AVX_TO_SSE_2_NEW(comgt_epu16) static Vc_INTRINSIC m256i Vc_CONST cmplt_epu32(__m256i a, __m256i b) { return comlt_epu32(a, b); } static Vc_INTRINSIC m256i Vc_CONST cmpgt_epu32(__m256i a, __m256i b) { return comgt_epu32(a, b); } static Vc_INTRINSIC m256i Vc_CONST cmplt_epu16(__m256i a, __m256i b) { return comlt_epu16(a, b); } static Vc_INTRINSIC m256i Vc_CONST cmpgt_epu16(__m256i a, __m256i b) { return comgt_epu16(a, b); } #else static Vc_INTRINSIC m256i Vc_CONST cmplt_epu32(__m256i _a, __m256i _b) { m256i a = _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(_a), _mm256_castsi256_ps(setmin_epi32()))); m256i b = _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(_b), _mm256_castsi256_ps(setmin_epi32()))); return cmplt_epi32(a, b); } static Vc_INTRINSIC m256i Vc_CONST cmpgt_epu32(__m256i _a, __m256i _b) { m256i a = _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(_a), _mm256_castsi256_ps(setmin_epi32()))); m256i b = _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(_b), _mm256_castsi256_ps(setmin_epi32()))); return cmpgt_epi32(a, b); } static Vc_INTRINSIC m256i Vc_CONST cmplt_epu16(__m256i _a, __m256i _b) { m256i a = _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(_a), _mm256_castsi256_ps(setmin_epi16()))); m256i b = _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(_b), _mm256_castsi256_ps(setmin_epi16()))); return cmplt_epi16(a, b); } static Vc_INTRINSIC m256i Vc_CONST cmpgt_epu16(__m256i _a, __m256i _b) { m256i a = _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(_a), _mm256_castsi256_ps(setmin_epi16()))); m256i b = _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(_b), _mm256_castsi256_ps(setmin_epi16()))); return cmpgt_epi16(a, b); } #endif static Vc_INTRINSIC void _mm256_maskstore(float *mem, const __m256 mask, const __m256 v) { _mm256_maskstore_ps(mem, _mm256_castps_si256(mask), v); } static Vc_INTRINSIC void _mm256_maskstore(double *mem, const __m256d mask, const __m256d v) { _mm256_maskstore_pd(mem, _mm256_castpd_si256(mask), v); } static Vc_INTRINSIC void _mm256_maskstore(int *mem, const __m256i mask, const __m256i v) { #ifdef Vc_IMPL_AVX2 _mm256_maskstore_epi32(mem, mask, v); #else _mm256_maskstore_ps(reinterpret_cast(mem), mask, _mm256_castsi256_ps(v)); #endif } static Vc_INTRINSIC void _mm256_maskstore(unsigned int *mem, const __m256i mask, const __m256i v) { _mm256_maskstore(reinterpret_cast(mem), mask, v); } static Vc_INTRINSIC void _mm256_maskstore(short *mem, const __m256i mask, const __m256i v) { using namespace AVX; _mm_maskmoveu_si128(_mm256_castsi256_si128(v), _mm256_castsi256_si128(mask), reinterpret_cast(&mem[0])); _mm_maskmoveu_si128(extract128<1>(v), extract128<1>(mask), reinterpret_cast(&mem[8])); } static Vc_INTRINSIC void _mm256_maskstore(unsigned short *mem, const __m256i mask, const __m256i v) { _mm256_maskstore(reinterpret_cast(mem), mask, v); } #undef Vc_AVX_TO_SSE_1 #undef Vc_AVX_TO_SSE_1_128 #undef Vc_AVX_TO_SSE_2_NEW #undef Vc_AVX_TO_SSE_256_128 #undef Vc_AVX_TO_SSE_1i template Vc_INTRINSIC_L R stream_load(const float *mem) Vc_INTRINSIC_R; template<> Vc_INTRINSIC m128 stream_load(const float *mem) { return _mm_castsi128_ps(_mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast(mem)))); } template<> Vc_INTRINSIC m256 stream_load(const float *mem) { return insert128<1>(_mm256_castps128_ps256(stream_load(mem)), stream_load(mem + 4)); } template Vc_INTRINSIC_L R stream_load(const double *mem) Vc_INTRINSIC_R; template<> Vc_INTRINSIC m128d stream_load(const double *mem) { return _mm_castsi128_pd(_mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast(mem)))); } template<> Vc_INTRINSIC m256d stream_load(const double *mem) { return insert128<1>(_mm256_castpd128_pd256(stream_load(mem)), stream_load(mem + 2)); } template Vc_INTRINSIC_L R stream_load(const void *mem) Vc_INTRINSIC_R; template<> Vc_INTRINSIC m128i stream_load(const void *mem) { return _mm_stream_load_si128(reinterpret_cast<__m128i *>(const_cast(mem))); } template<> Vc_INTRINSIC m256i stream_load(const void *mem) { return insert128<1>(_mm256_castsi128_si256(stream_load(mem)), stream_load(static_cast(mem) + 1)); } Vc_INTRINSIC void stream_store(float *mem, __m128 value, __m128 mask) { _mm_maskmoveu_si128(_mm_castps_si128(value), _mm_castps_si128(mask), reinterpret_cast(mem)); } Vc_INTRINSIC void stream_store(float *mem, __m256 value, __m256 mask) { stream_store(mem, _mm256_castps256_ps128(value), _mm256_castps256_ps128(mask)); stream_store(mem + 4, extract128<1>(value), extract128<1>(mask)); } Vc_INTRINSIC void stream_store(double *mem, __m128d value, __m128d mask) { _mm_maskmoveu_si128(_mm_castpd_si128(value), _mm_castpd_si128(mask), reinterpret_cast(mem)); } Vc_INTRINSIC void stream_store(double *mem, __m256d value, __m256d mask) { stream_store(mem, _mm256_castpd256_pd128(value), _mm256_castpd256_pd128(mask)); stream_store(mem + 2, extract128<1>(value), extract128<1>(mask)); } Vc_INTRINSIC void stream_store(void *mem, __m128i value, __m128i mask) { _mm_maskmoveu_si128(value, mask, reinterpret_cast(mem)); } Vc_INTRINSIC void stream_store(void *mem, __m256i value, __m256i mask) { stream_store(mem, _mm256_castsi256_si128(value), _mm256_castsi256_si128(mask)); stream_store(static_cast<__m128i *>(mem) + 1, extract128<1>(value), extract128<1>(mask)); } #ifndef __x86_64__ Vc_INTRINSIC Vc_PURE __m128i _mm_cvtsi64_si128(int64_t x) { return _mm_castpd_si128(_mm_load_sd(reinterpret_cast(&x))); } #endif #ifdef Vc_IMPL_AVX2 template __m256 gather(const float *addr, __m256i idx) { return _mm256_i32gather_ps(addr, idx, Scale); } template __m256d gather(const double *addr, __m128i idx) { return _mm256_i32gather_pd(addr, idx, Scale); } template __m256i gather(const int *addr, __m256i idx) { return _mm256_i32gather_epi32(addr, idx, Scale); } template __m256i gather(const unsigned *addr, __m256i idx) { return _mm256_i32gather_epi32(aliasing_cast(addr), idx, Scale); } template __m256 gather(__m256 src, __m256 k, const float *addr, __m256i idx) { return _mm256_mask_i32gather_ps(src, addr, idx, k, Scale); } template __m256d gather(__m256d src, __m256d k, const double *addr, __m128i idx) { return _mm256_mask_i32gather_pd(src, addr, idx, k, Scale); } template __m256i gather(__m256i src, __m256i k, const int *addr, __m256i idx) { return _mm256_mask_i32gather_epi32(src, addr, idx, k, Scale); } template __m256i gather(__m256i src, __m256i k, const unsigned *addr, __m256i idx) { return _mm256_mask_i32gather_epi32(src, aliasing_cast(addr), idx, k, Scale); } #endif } // namespace AvxIntrinsics } // namespace Vc namespace Vc_VERSIONED_NAMESPACE { namespace AVX { using namespace AvxIntrinsics; } // namespace AVX namespace AVX2 { using namespace AvxIntrinsics; } // namespace AVX2 namespace AVX { template struct VectorTypeHelper; template<> struct VectorTypeHelper< char > { typedef __m256i Type; }; template<> struct VectorTypeHelper< signed char > { typedef __m256i Type; }; template<> struct VectorTypeHelper { typedef __m256i Type; }; template<> struct VectorTypeHelper< short> { typedef __m256i Type; }; template<> struct VectorTypeHelper { typedef __m256i Type; }; template<> struct VectorTypeHelper< int > { typedef __m256i Type; }; template<> struct VectorTypeHelper { typedef __m256i Type; }; template<> struct VectorTypeHelper< long > { typedef __m256i Type; }; template<> struct VectorTypeHelper { typedef __m256i Type; }; template<> struct VectorTypeHelper< long long> { typedef __m256i Type; }; template<> struct VectorTypeHelper { typedef __m256i Type; }; template<> struct VectorTypeHelper< float> { typedef __m256 Type; }; template<> struct VectorTypeHelper< double> { typedef __m256d Type; }; template using IntegerVectorType = typename std::conditional::type; template using DoubleVectorType = typename std::conditional::type; template using FloatVectorType = typename std::conditional::type; template struct VectorHelper {}; template struct VectorHelperSize; } // namespace AVX } // namespace Vc #endif // VC_AVX_INTRINSICS_H_ conky-1.22.1/3rdparty/Vc/Vc/avx/limits.h000066400000000000000000000130101476554302100176620ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2009-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_AVX_LIMITS_H_ #define VC_AVX_LIMITS_H_ #include "intrinsics.h" #include "types.h" #include "macros.h" namespace std { #define Vc_NUM_LIM(T, _max, _min) \ template <> struct numeric_limits> : public numeric_limits { \ static Vc_INTRINSIC Vc_CONST Vc::AVX2::Vector max() Vc_NOEXCEPT \ { \ return _max; \ } \ static Vc_INTRINSIC Vc_CONST Vc::AVX2::Vector min() Vc_NOEXCEPT \ { \ return _min; \ } \ static Vc_INTRINSIC Vc_CONST Vc::AVX2::Vector lowest() Vc_NOEXCEPT \ { \ return min(); \ } \ static Vc_INTRINSIC Vc_CONST Vc::AVX2::Vector epsilon() Vc_NOEXCEPT \ { \ return Vc::AVX2::Vector::Zero(); \ } \ static Vc_INTRINSIC Vc_CONST Vc::AVX2::Vector round_error() Vc_NOEXCEPT \ { \ return Vc::AVX2::Vector::Zero(); \ } \ static Vc_INTRINSIC Vc_CONST Vc::AVX2::Vector infinity() Vc_NOEXCEPT \ { \ return Vc::AVX2::Vector::Zero(); \ } \ static Vc_INTRINSIC Vc_CONST Vc::AVX2::Vector quiet_NaN() Vc_NOEXCEPT \ { \ return Vc::AVX2::Vector::Zero(); \ } \ static Vc_INTRINSIC Vc_CONST Vc::AVX2::Vector signaling_NaN() Vc_NOEXCEPT \ { \ return Vc::AVX2::Vector::Zero(); \ } \ static Vc_INTRINSIC Vc_CONST Vc::AVX2::Vector denorm_min() Vc_NOEXCEPT \ { \ return Vc::AVX2::Vector::Zero(); \ } \ } #ifdef Vc_IMPL_AVX2 Vc_NUM_LIM(unsigned short, Vc::Detail::allone<__m256i>(), Vc::Detail::zero<__m256i>()); Vc_NUM_LIM( short, _mm256_srli_epi16(Vc::Detail::allone<__m256i>(), 1), Vc::AVX::setmin_epi16()); Vc_NUM_LIM( unsigned int, Vc::Detail::allone<__m256i>(), Vc::Detail::zero<__m256i>()); Vc_NUM_LIM( int, _mm256_srli_epi32(Vc::Detail::allone<__m256i>(), 1), Vc::AVX::setmin_epi32()); #endif #undef Vc_NUM_LIM } // namespace std #endif // VC_AVX_LIMITS_H_ conky-1.22.1/3rdparty/Vc/Vc/avx/macros.h000066400000000000000000000032101476554302100176460ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2009-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #include "../common/macros.h" #ifndef VC_AVX_MACROS_H_ #define VC_AVX_MACROS_H_ #endif // VC_AVX_MACROS_H_ conky-1.22.1/3rdparty/Vc/Vc/avx/mask.h000066400000000000000000000226241476554302100173270ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2009-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_AVX_MASK_H_ #define VC_AVX_MASK_H_ #include #include "intrinsics.h" #include "../common/storage.h" #include "../common/bitscanintrinsics.h" #include "../common/maskbool.h" #include "detail.h" #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { template class Mask { public: using abi = VectorAbi::Avx; /** * The \c EntryType of masks is always bool, independent of \c T. */ typedef bool EntryType; using value_type = EntryType; using MaskBool = Common::MaskBool; /** * The \c VectorEntryType, in contrast to \c EntryType, reveals information about the SIMD * implementation. This type is useful for the \c sizeof operator in generic functions. */ using VectorEntryType = MaskBool; /** * The associated Vector type. */ using Vector = AVX2::Vector; ///\internal using VectorTypeF = AVX::FloatVectorType::Type>; ///\internal using VectorTypeD = AVX::DoubleVectorType; ///\internal using VectorTypeI = AVX::IntegerVectorType; private: typedef const VectorTypeF VArg; typedef const VectorTypeD VdArg; typedef const VectorTypeI ViArg; public: static constexpr size_t Size = sizeof(VectorTypeF) / sizeof(T); static constexpr size_t MemoryAlignment = Size; static constexpr std::size_t size() { return Size; } Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(VectorType)); private: typedef Common::Storage Storage; public: /** * The \c VectorType reveals the implementation-specific internal type used for the * SIMD type. */ using VectorType = typename Storage::VectorType; using EntryReference = Vc::Detail::ElementReference; using reference = EntryReference; // abstracts the way Masks are passed to functions, it can easily be changed to const ref here #if defined Vc_MSVC && defined _WIN32 typedef const Mask &AsArg; #else typedef const Mask AsArg; #endif Vc_INTRINSIC Mask() {} Vc_INTRINSIC Mask(VArg x) : d(AVX::avx_cast(x)) {} Vc_INTRINSIC Mask(VdArg x) : d(AVX::avx_cast(x)) {} Vc_INTRINSIC Mask(ViArg x) : d(AVX::avx_cast(x)) {} Vc_INTRINSIC explicit Mask(VectorSpecialInitializerZero) : d(Detail::zero()) {} Vc_INTRINSIC explicit Mask(VectorSpecialInitializerOne) : d(Detail::allone()) {} Vc_INTRINSIC explicit Mask(bool b) : d(b ? Detail::allone() : Detail::zero()) { } Vc_INTRINSIC static Mask Zero() { return Mask{Vc::Zero}; } Vc_INTRINSIC static Mask One() { return Mask{Vc::One}; } // implicit cast template Vc_INTRINSIC Mask( U &&rhs, Common::enable_if_mask_converts_implicitly = nullarg) : d(AVX::avx_cast( Detail::mask_cast::Size, Size, VectorTypeF>( rhs.dataI()))) { } #if Vc_IS_VERSION_1 // explicit cast, implemented via simd_cast (in avx/simd_cast_caller.h) template Vc_DEPRECATED("use simd_cast instead of explicit type casting to convert between " "mask types") Vc_INTRINSIC explicit Mask(U &&rhs, Common::enable_if_mask_converts_explicitly = nullarg); #endif template Vc_INTRINSIC explicit Mask(const bool *mem, Flags f = Flags()) { load(mem, f); } template Vc_INTRINSIC void load(const bool *mem, Flags = Flags()); template Vc_INTRINSIC void store(bool *mem, Flags = Flags()) const; Vc_INTRINSIC Mask &operator=(const Mask &) = default; Vc_INTRINSIC_L Mask &operator=(const std::array &values) Vc_INTRINSIC_R; Vc_INTRINSIC_L operator std::array() const Vc_INTRINSIC_R; // specializations in mask.tcc Vc_INTRINSIC Vc_PURE bool operator==(const Mask &rhs) const { return Detail::movemask(d.v()) == Detail::movemask(rhs.d.v()); } Vc_INTRINSIC Vc_PURE bool operator!=(const Mask &rhs) const { return !operator==(rhs); } Vc_INTRINSIC Mask operator!() const { #ifdef Vc_GCC return ~dataI(); #else return Detail::andnot_(dataF(), Detail::allone()); #endif } Vc_INTRINSIC Mask &operator&=(const Mask &rhs) { d.v() = AVX::avx_cast(Detail::and_(data(), rhs.data())); return *this; } Vc_INTRINSIC Mask &operator|=(const Mask &rhs) { d.v() = AVX::avx_cast(Detail::or_ (data(), rhs.data())); return *this; } Vc_INTRINSIC Mask &operator^=(const Mask &rhs) { d.v() = AVX::avx_cast(Detail::xor_(data(), rhs.data())); return *this; } Vc_INTRINSIC Vc_PURE Mask operator&(const Mask &rhs) const { return Detail::and_(data(), rhs.data()); } Vc_INTRINSIC Vc_PURE Mask operator|(const Mask &rhs) const { return Detail::or_(data(), rhs.data()); } Vc_INTRINSIC Vc_PURE Mask operator^(const Mask &rhs) const { return Detail::xor_(data(), rhs.data()); } Vc_INTRINSIC Vc_PURE Mask operator&&(const Mask &rhs) const { return Detail::and_(data(), rhs.data()); } Vc_INTRINSIC Vc_PURE Mask operator||(const Mask &rhs) const { return Detail::or_(data(), rhs.data()); } // no need for expression template optimizations because cmp(n)eq for floats are not bitwise // compares Vc_INTRINSIC_L bool isNotEmpty() const Vc_INTRINSIC_R; Vc_INTRINSIC_L bool isEmpty() const Vc_INTRINSIC_R; Vc_INTRINSIC_L bool isFull() const Vc_INTRINSIC_R; Vc_INTRINSIC_L bool isMix() const Vc_INTRINSIC_R; Vc_INTRINSIC Vc_PURE int shiftMask() const { return Detail::movemask(dataI()); } Vc_INTRINSIC Vc_PURE int toInt() const { return Detail::mask_to_int(dataI()); } Vc_INTRINSIC VectorType data () const { return d.v(); } Vc_INTRINSIC VectorTypeF dataF() const { return AVX::avx_cast(d.v()); } Vc_INTRINSIC VectorTypeI dataI() const { return AVX::avx_cast(d.v()); } Vc_INTRINSIC VectorTypeD dataD() const { return AVX::avx_cast(d.v()); } private: friend reference; static Vc_INTRINSIC Vc_PURE value_type get(const Mask &m, int i) noexcept { return m.toInt() & (1 << i); } template static Vc_INTRINSIC void set(Mask &m, int i, U &&v) noexcept(noexcept(MaskBool(std::declval()))) { m.d.set(i, MaskBool(std::forward(v))); } public: /** * \note the returned object models the concept of a reference and * as such it can exist longer than the data it is referencing. * \note to avoid lifetime issues, we strongly advice not to store * any reference objects. */ Vc_ALWAYS_INLINE reference operator[](size_t index) noexcept { return {*this, int(index)}; } Vc_ALWAYS_INLINE Vc_PURE value_type operator[](size_t index) const noexcept { return get(*this, index); } Vc_INTRINSIC Vc_PURE int count() const { return Detail::popcnt16(toInt()); } Vc_INTRINSIC Vc_PURE int firstOne() const { return _bit_scan_forward(toInt()); } template static Vc_INTRINSIC_L Mask generate(G &&gen) Vc_INTRINSIC_R; Vc_INTRINSIC_L Vc_PURE_L Mask shifted(int amount) const Vc_INTRINSIC_R Vc_PURE_R; private: #ifdef Vc_COMPILE_BENCHMARKS public: #endif Storage d; }; template constexpr size_t Mask::Size; template constexpr size_t Mask::MemoryAlignment; } // namespace Vc #include "mask.tcc" #endif // VC_AVX_MASK_H_ conky-1.22.1/3rdparty/Vc/Vc/avx/mask.tcc000066400000000000000000000320321476554302100176430ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2011-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ namespace Vc_VERSIONED_NAMESPACE { // store {{{1 template template Vc_INTRINSIC void Mask::store(bool *mem, Flags f) const { Detail::mask_store(dataI(), mem, f); } // load {{{1 template template Vc_INTRINSIC void Mask::load(const bool *mem, Flags f) { d.v() = AVX::avx_cast(Detail::mask_load(mem, f)); } // operator[] {{{1 #ifdef Vc_IMPL_AVX2 template <> Vc_INTRINSIC Vc_PURE bool AVX2::Mask::get(const AVX2::Mask &m, int index) noexcept { return m.shiftMask() & (1 << 2 * index); } template <> Vc_INTRINSIC Vc_PURE bool AVX2::Mask::get(const AVX2::Mask &m, int index) noexcept { return m.shiftMask() & (1 << 2 * index); } #endif // operator== {{{1 template <> Vc_INTRINSIC Vc_PURE bool AVX2::double_m::operator==(const AVX2::double_m &rhs) const { return Detail::movemask(dataD()) == Detail::movemask(rhs.dataD()); } #ifdef Vc_IMPL_AVX2 template <> Vc_INTRINSIC Vc_PURE bool AVX2::short_m::operator==(const AVX2::short_m &rhs) const { return Detail::movemask(dataI()) == Detail::movemask(rhs.dataI()); } template <> Vc_INTRINSIC Vc_PURE bool AVX2::ushort_m::operator==(const AVX2::ushort_m &rhs) const { return Detail::movemask(dataI()) == Detail::movemask(rhs.dataI()); } #endif // isFull, isNotEmpty, isEmpty, isMix specializations{{{1 template Vc_INTRINSIC bool Mask::isFull() const { if (sizeof(T) == 8) { return 0 != Detail::testc(dataD(), Detail::allone()); } else if (sizeof(T) == 4) { return 0 != Detail::testc(dataF(), Detail::allone()); } else { return 0 != Detail::testc(dataI(), Detail::allone()); } } template Vc_INTRINSIC bool Mask::isNotEmpty() const { if (sizeof(T) == 8) { return 0 == Detail::testz(dataD(), dataD()); } else if (sizeof(T) == 4) { return 0 == Detail::testz(dataF(), dataF()); } else { return 0 == Detail::testz(dataI(), dataI()); } } template Vc_INTRINSIC bool Mask::isEmpty() const { if (sizeof(T) == 8) { return 0 != Detail::testz(dataD(), dataD()); } else if (sizeof(T) == 4) { return 0 != Detail::testz(dataF(), dataF()); } else { return 0 != Detail::testz(dataI(), dataI()); } } template Vc_INTRINSIC bool Mask::isMix() const { if (sizeof(T) == 8) { return 0 != Detail::testnzc(dataD(), Detail::allone()); } else if (sizeof(T) == 4) { return 0 != Detail::testnzc(dataF(), Detail::allone()); } else { return 0 != Detail::testnzc(dataI(), Detail::allone()); } } // generate {{{1 template Vc_INTRINSIC M generate_impl(G &&gen, std::integral_constant) { return _mm256_setr_epi64x( gen(0) ? 0xffffffffffffffffull : 0, gen(1) ? 0xffffffffffffffffull : 0, gen(2) ? 0xffffffffffffffffull : 0, gen(3) ? 0xffffffffffffffffull : 0); } template Vc_INTRINSIC M generate_impl(G &&gen, std::integral_constant) { return _mm256_setr_epi32(gen(0) ? 0xfffffffful : 0, gen(1) ? 0xfffffffful : 0, gen(2) ? 0xfffffffful : 0, gen(3) ? 0xfffffffful : 0, gen(4) ? 0xfffffffful : 0, gen(5) ? 0xfffffffful : 0, gen(6) ? 0xfffffffful : 0, gen(7) ? 0xfffffffful : 0); } template Vc_INTRINSIC M generate_impl(G &&gen, std::integral_constant) { return _mm256_setr_epi16(gen(0) ? 0xfffful : 0, gen(1) ? 0xfffful : 0, gen(2) ? 0xfffful : 0, gen(3) ? 0xfffful : 0, gen(4) ? 0xfffful : 0, gen(5) ? 0xfffful : 0, gen(6) ? 0xfffful : 0, gen(7) ? 0xfffful : 0, gen(8) ? 0xfffful : 0, gen(9) ? 0xfffful : 0, gen(10) ? 0xfffful : 0, gen(11) ? 0xfffful : 0, gen(12) ? 0xfffful : 0, gen(13) ? 0xfffful : 0, gen(14) ? 0xfffful : 0, gen(15) ? 0xfffful : 0); } template template Vc_INTRINSIC AVX2::Mask Mask::generate(G &&gen) { return generate_impl>(std::forward(gen), std::integral_constant()); } // shifted {{{1 template Vc_INTRINSIC Vc_PURE AVX2::Mask Mask::shifted(int amount) const { switch (amount * int(sizeof(VectorEntryType))) { case 0: return *this; case 1: return Detail::shifted< 1>(dataI()); case 2: return Detail::shifted< 2>(dataI()); case 3: return Detail::shifted< 3>(dataI()); case 4: return Detail::shifted< 4>(dataI()); case 5: return Detail::shifted< 5>(dataI()); case 6: return Detail::shifted< 6>(dataI()); case 7: return Detail::shifted< 7>(dataI()); case 8: return Detail::shifted< 8>(dataI()); case 9: return Detail::shifted< 9>(dataI()); case 10: return Detail::shifted< 10>(dataI()); case 11: return Detail::shifted< 11>(dataI()); case 12: return Detail::shifted< 12>(dataI()); case 13: return Detail::shifted< 13>(dataI()); case 14: return Detail::shifted< 14>(dataI()); case 15: return Detail::shifted< 15>(dataI()); case 16: return Detail::shifted< 16>(dataI()); case 17: return Detail::shifted< 17>(dataI()); case 18: return Detail::shifted< 18>(dataI()); case 19: return Detail::shifted< 19>(dataI()); case 20: return Detail::shifted< 20>(dataI()); case 21: return Detail::shifted< 21>(dataI()); case 22: return Detail::shifted< 22>(dataI()); case 23: return Detail::shifted< 23>(dataI()); case 24: return Detail::shifted< 24>(dataI()); case 25: return Detail::shifted< 25>(dataI()); case 26: return Detail::shifted< 26>(dataI()); case 27: return Detail::shifted< 27>(dataI()); case 28: return Detail::shifted< 28>(dataI()); case 29: return Detail::shifted< 29>(dataI()); case 30: return Detail::shifted< 30>(dataI()); case 31: return Detail::shifted< 31>(dataI()); case -1: return Detail::shifted< -1>(dataI()); case -2: return Detail::shifted< -2>(dataI()); case -3: return Detail::shifted< -3>(dataI()); case -4: return Detail::shifted< -4>(dataI()); case -5: return Detail::shifted< -5>(dataI()); case -6: return Detail::shifted< -6>(dataI()); case -7: return Detail::shifted< -7>(dataI()); case -8: return Detail::shifted< -8>(dataI()); case -9: return Detail::shifted< -9>(dataI()); case -10: return Detail::shifted<-10>(dataI()); case -11: return Detail::shifted<-11>(dataI()); case -12: return Detail::shifted<-12>(dataI()); case -13: return Detail::shifted<-13>(dataI()); case -14: return Detail::shifted<-14>(dataI()); case -15: return Detail::shifted<-15>(dataI()); case -16: return Detail::shifted<-16>(dataI()); case -17: return Detail::shifted<-17>(dataI()); case -18: return Detail::shifted<-18>(dataI()); case -19: return Detail::shifted<-19>(dataI()); case -20: return Detail::shifted<-20>(dataI()); case -21: return Detail::shifted<-21>(dataI()); case -22: return Detail::shifted<-22>(dataI()); case -23: return Detail::shifted<-23>(dataI()); case -24: return Detail::shifted<-24>(dataI()); case -25: return Detail::shifted<-25>(dataI()); case -26: return Detail::shifted<-26>(dataI()); case -27: return Detail::shifted<-27>(dataI()); case -28: return Detail::shifted<-28>(dataI()); case -29: return Detail::shifted<-29>(dataI()); case -30: return Detail::shifted<-30>(dataI()); case -31: return Detail::shifted<-31>(dataI()); } return Zero(); } // }}}1 /* template<> Vc_INTRINSIC AVX2::Mask< 4, 32> &AVX2::Mask< 4, 32>::operator=(const std::array &values) { static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte"); unsigned int x = *reinterpret_cast(values.data()); x *= 0xffu; __m128i y = _mm_cvtsi32_si128(x); // 4 Bytes y = _mm_unpacklo_epi8(y, y); // 8 Bytes y = _mm_unpacklo_epi16(y, y); // 16 Bytes d.v() = AVX::avx_cast<__m256>(AVX::concat(_mm_unpacklo_epi32(y, y), _mm_unpackhi_epi32(y, y))); return *this; } template<> Vc_INTRINSIC AVX2::Mask< 8, 32> &AVX2::Mask< 8, 32>::operator=(const std::array &values) { static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte"); unsigned long long x = *reinterpret_cast(values.data()); x *= 0xffull; __m128i y = _mm_cvtsi64_si128(x); // 8 Bytes y = _mm_unpacklo_epi8(y, y); // 16 Bytes d.v() = AVX::avx_cast<__m256>(AVX::concat(_mm_unpacklo_epi16(y, y), _mm_unpackhi_epi16(y, y))); return *this; } template<> Vc_INTRINSIC AVX2::Mask< 8, 16> &AVX2::Mask< 8, 16>::operator=(const std::array &values) { static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte"); unsigned long long x = *reinterpret_cast(values.data()); x *= 0xffull; __m128i y = _mm_cvtsi64_si128(x); // 8 Bytes d.v() = AVX::avx_cast<__m128>(_mm_unpacklo_epi8(y, y)); return *this; } template<> Vc_INTRINSIC AVX2::Mask<16, 16> &AVX2::Mask<16, 16>::operator=(const std::array &values) { static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte"); __m128i x = _mm_loadu_si128(reinterpret_cast(values.data())); d.v() = _mm_andnot_ps(AVX::_mm_setallone_ps(), AVX::avx_cast<__m128>(_mm_sub_epi8(x, _mm_set1_epi8(1)))); return *this; } template<> Vc_INTRINSIC AVX2::Mask< 4, 32>::operator std::array() const { static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte"); __m128i x = _mm_packs_epi32(AVX::lo128(dataI()), AVX::hi128(dataI())); // 64bit -> 32bit x = _mm_packs_epi32(x, x); // 32bit -> 16bit x = _mm_srli_epi16(x, 15); x = _mm_packs_epi16(x, x); // 16bit -> 8bit std::array r; asm volatile("vmovd %1,%0" : "=m"(*r.data()) : "x"(x)); return r; } template<> Vc_INTRINSIC AVX2::Mask< 8, 32>::operator std::array() const { static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte"); __m128i x = _mm_packs_epi32(AVX::lo128(dataI()), AVX::hi128(dataI())); // 32bit -> 16bit x = _mm_srli_epi16(x, 15); x = _mm_packs_epi16(x, x); // 16bit -> 8bit std::array r; asm volatile("vmovq %1,%0" : "=m"(*r.data()) : "x"(x)); return r; } template<> Vc_INTRINSIC AVX2::Mask< 8, 16>::operator std::array() const { static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte"); __m128i x = _mm_srli_epi16(dataI(), 15); x = _mm_packs_epi16(x, x); // 16bit -> 8bit std::array r; asm volatile("vmovq %1,%0" : "=m"(*r.data()) : "x"(x)); return r; } template<> Vc_INTRINSIC AVX2::Mask<16, 16>::operator std::array() const { static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte"); __m128 x = _mm_and_ps(d.v(), AVX::avx_cast<__m128>(_mm_set1_epi32(0x01010101))); std::array r; asm volatile("vmovups %1,%0" : "=m"(*r.data()) : "x"(x)); return r; } */ } // vim: foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/avx/math.h000066400000000000000000000314011476554302100173160ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2009-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_AVX_MATH_H_ #define VC_AVX_MATH_H_ #include "const.h" #include "limits.h" #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { // min & max {{{1 #ifdef Vc_IMPL_AVX2 Vc_ALWAYS_INLINE AVX2::int_v min(const AVX2::int_v &x, const AVX2::int_v &y) { return _mm256_min_epi32(x.data(), y.data()); } Vc_ALWAYS_INLINE AVX2::uint_v min(const AVX2::uint_v &x, const AVX2::uint_v &y) { return _mm256_min_epu32(x.data(), y.data()); } Vc_ALWAYS_INLINE AVX2::short_v min(const AVX2::short_v &x, const AVX2::short_v &y) { return _mm256_min_epi16(x.data(), y.data()); } Vc_ALWAYS_INLINE AVX2::ushort_v min(const AVX2::ushort_v &x, const AVX2::ushort_v &y) { return _mm256_min_epu16(x.data(), y.data()); } Vc_ALWAYS_INLINE AVX2::int_v max(const AVX2::int_v &x, const AVX2::int_v &y) { return _mm256_max_epi32(x.data(), y.data()); } Vc_ALWAYS_INLINE AVX2::uint_v max(const AVX2::uint_v &x, const AVX2::uint_v &y) { return _mm256_max_epu32(x.data(), y.data()); } Vc_ALWAYS_INLINE AVX2::short_v max(const AVX2::short_v &x, const AVX2::short_v &y) { return _mm256_max_epi16(x.data(), y.data()); } Vc_ALWAYS_INLINE AVX2::ushort_v max(const AVX2::ushort_v &x, const AVX2::ushort_v &y) { return _mm256_max_epu16(x.data(), y.data()); } #endif Vc_ALWAYS_INLINE AVX2::float_v min(const AVX2::float_v &x, const AVX2::float_v &y) { return _mm256_min_ps(x.data(), y.data()); } Vc_ALWAYS_INLINE AVX2::double_v min(const AVX2::double_v &x, const AVX2::double_v &y) { return _mm256_min_pd(x.data(), y.data()); } Vc_ALWAYS_INLINE AVX2::float_v max(const AVX2::float_v &x, const AVX2::float_v &y) { return _mm256_max_ps(x.data(), y.data()); } Vc_ALWAYS_INLINE AVX2::double_v max(const AVX2::double_v &x, const AVX2::double_v &y) { return _mm256_max_pd(x.data(), y.data()); } // sqrt {{{1 template Vc_ALWAYS_INLINE Vc_PURE AVX2::Vector sqrt(const AVX2::Vector &x) { return AVX::VectorHelper::sqrt(x.data()); } // rsqrt {{{1 template Vc_ALWAYS_INLINE Vc_PURE AVX2::Vector rsqrt(const AVX2::Vector &x) { return AVX::VectorHelper::rsqrt(x.data()); } // reciprocal {{{1 template Vc_ALWAYS_INLINE Vc_PURE AVX2::Vector reciprocal(const AVX2::Vector &x) { return AVX::VectorHelper::reciprocal(x.data()); } // round {{{1 template Vc_ALWAYS_INLINE Vc_PURE AVX2::Vector round(const AVX2::Vector &x) { return AVX::VectorHelper::round(x.data()); } // abs {{{1 Vc_INTRINSIC Vc_CONST AVX2::double_v abs(AVX2::double_v x) { return Detail::and_(x.data(), AVX::setabsmask_pd()); } Vc_INTRINSIC Vc_CONST AVX2::float_v abs(AVX2::float_v x) { return Detail::and_(x.data(), AVX::setabsmask_ps()); } #ifdef Vc_IMPL_AVX2 Vc_INTRINSIC Vc_CONST AVX2::int_v abs(AVX2::int_v x) { return _mm256_abs_epi32(x.data()); } Vc_INTRINSIC Vc_CONST AVX2::short_v abs(AVX2::short_v x) { return _mm256_abs_epi16(x.data()); } #endif // isfinite {{{1 Vc_ALWAYS_INLINE Vc_PURE AVX2::double_m isfinite(const AVX2::double_v &x) { return AVX::cmpord_pd(x.data(), _mm256_mul_pd(Detail::zero<__m256d>(), x.data())); } Vc_ALWAYS_INLINE Vc_PURE AVX2::float_m isfinite(const AVX2::float_v &x) { return AVX::cmpord_ps(x.data(), _mm256_mul_ps(Detail::zero<__m256>(), x.data())); } // isinf {{{1 Vc_ALWAYS_INLINE Vc_PURE AVX2::double_m isinf(const AVX2::double_v &x) { return _mm256_castsi256_pd(AVX::cmpeq_epi64( _mm256_castpd_si256(abs(x).data()), _mm256_castpd_si256(Detail::avx_broadcast(AVX::c_log::d(1))))); } Vc_ALWAYS_INLINE Vc_PURE AVX2::float_m isinf(const AVX2::float_v &x) { return _mm256_castsi256_ps( AVX::cmpeq_epi32(_mm256_castps_si256(abs(x).data()), _mm256_castps_si256(Detail::avx_broadcast(AVX::c_log::d(1))))); } // isnan {{{1 Vc_ALWAYS_INLINE Vc_PURE AVX2::double_m isnan(const AVX2::double_v &x) { return AVX::cmpunord_pd(x.data(), x.data()); } Vc_ALWAYS_INLINE Vc_PURE AVX2::float_m isnan(const AVX2::float_v &x) { return AVX::cmpunord_ps(x.data(), x.data()); } // copysign {{{1 Vc_INTRINSIC Vc_CONST AVX2::float_v copysign(AVX2::float_v mag, AVX2::float_v sign) { return _mm256_or_ps(_mm256_and_ps(sign.data(), AVX::setsignmask_ps()), _mm256_and_ps(mag.data(), AVX::setabsmask_ps())); } Vc_INTRINSIC Vc_CONST AVX2::double_v copysign(AVX2::double_v::AsArg mag, AVX2::double_v::AsArg sign) { return _mm256_or_pd(_mm256_and_pd(sign.data(), AVX::setsignmask_pd()), _mm256_and_pd(mag.data(), AVX::setabsmask_pd())); } //}}}1 // frexp {{{1 /** * splits \p v into exponent and mantissa, the sign is kept with the mantissa * * The return value will be in the range [0.5, 1.0[ * The \p e value will be an integer defining the power-of-two exponent */ inline AVX2::double_v frexp(AVX2::double_v::AsArg v, SimdArray *e) { const __m256d exponentBits = AVX::Const::exponentMask().dataD(); const __m256d exponentPart = _mm256_and_pd(v.data(), exponentBits); auto lo = AVX::avx_cast<__m128i>(AVX::lo128(exponentPart)); auto hi = AVX::avx_cast<__m128i>(AVX::hi128(exponentPart)); lo = _mm_sub_epi32(_mm_srli_epi64(lo, 52), _mm_set1_epi64x(0x3fe)); hi = _mm_sub_epi32(_mm_srli_epi64(hi, 52), _mm_set1_epi64x(0x3fe)); SSE::int_v exponent = Mem::shuffle(lo, hi); const __m256d exponentMaximized = _mm256_or_pd(v.data(), exponentBits); AVX2::double_v ret = _mm256_and_pd(exponentMaximized, _mm256_broadcast_sd(reinterpret_cast(&AVX::c_general::frexpMask))); const double_m zeroMask = v == AVX2::double_v::Zero(); ret(isnan(v) || !isfinite(v) || zeroMask) = v; exponent.setZero(simd_cast(zeroMask)); internal_data(*e) = exponent; return ret; } #ifdef Vc_IMPL_AVX2 inline SimdArray frexp(const SimdArray &v, SimdArray *e) { const __m256d exponentBits = AVX::Const::exponentMask().dataD(); const __m256d w[2] = {internal_data(internal_data0(v)).data(), internal_data(internal_data1(v)).data()}; const __m256i exponentPart[2] = { _mm256_castpd_si256(_mm256_and_pd(w[0], exponentBits)), _mm256_castpd_si256(_mm256_and_pd(w[1], exponentBits))}; const __m256i lo = _mm256_sub_epi32(_mm256_srli_epi64(exponentPart[0], 52), _mm256_set1_epi32(0x3fe)); // 0.1. 2.3. const __m256i hi = _mm256_sub_epi32(_mm256_srli_epi64(exponentPart[1], 52), _mm256_set1_epi32(0x3fe)); // 4.5. 6.7. const __m256i a = _mm256_unpacklo_epi32(lo, hi); // 04.. 26.. const __m256i b = _mm256_unpackhi_epi32(lo, hi); // 15.. 37.. const __m256i tmp = _mm256_unpacklo_epi32(a, b); // 0145 2367 const __m256i exponent = AVX::concat(_mm_unpacklo_epi64(AVX::lo128(tmp), AVX::hi128(tmp)), _mm_unpackhi_epi64(AVX::lo128(tmp), AVX::hi128(tmp))); // 0123 4567 const __m256d exponentMaximized[2] = {_mm256_or_pd(w[0], exponentBits), _mm256_or_pd(w[1], exponentBits)}; const auto frexpMask = _mm256_broadcast_sd(reinterpret_cast(&AVX::c_general::frexpMask)); fixed_size_simd ret = { fixed_size_simd( AVX::double_v(_mm256_and_pd(exponentMaximized[0], frexpMask))), fixed_size_simd( AVX::double_v(_mm256_and_pd(exponentMaximized[1], frexpMask)))}; const auto zeroMask = v == v.Zero(); ret(isnan(v) || !isfinite(v) || zeroMask) = v; internal_data(*e) = Detail::andnot_(simd_cast(zeroMask).dataI(), exponent); return ret; } #endif // Vc_IMPL_AVX2 namespace Detail { Vc_INTRINSIC AVX2::float_v::IndexType extractExponent(__m256 e) { SimdArray exponentPart; const auto ee = AVX::avx_cast<__m256i>(e); #ifdef Vc_IMPL_AVX2 exponentPart = AVX2::uint_v(ee); #else internal_data(internal_data0(exponentPart)) = AVX::lo128(ee); internal_data(internal_data1(exponentPart)) = AVX::hi128(ee); #endif return (exponentPart >> 23) - 0x7e; } } // namespace Detail inline AVX2::float_v frexp(AVX2::float_v::AsArg v, SimdArray *e) { using namespace Detail; using namespace AVX2; const __m256 exponentBits = Const::exponentMask().data(); *e = extractExponent(and_(v.data(), exponentBits)); const __m256 exponentMaximized = or_(v.data(), exponentBits); AVX2::float_v ret = _mm256_and_ps(exponentMaximized, avx_cast<__m256>(set1_epi32(0xbf7fffffu))); ret(isnan(v) || !isfinite(v) || v == AVX2::float_v::Zero()) = v; e->setZero(simd_cast(v == AVX2::float_v::Zero())); return ret; } // ldexp {{{1 /* -> x * 2^e * x == NaN -> NaN * x == (-)inf -> (-)inf */ inline AVX2::double_v ldexp(AVX2::double_v::AsArg v, const SimdArray &_e) { SSE::int_v e = internal_data(_e); e.setZero(simd_cast(v == AVX2::double_v::Zero())); const __m256i exponentBits = AVX::concat(_mm_slli_epi64(_mm_unpacklo_epi32(e.data(), e.data()), 52), _mm_slli_epi64(_mm_unpackhi_epi32(e.data(), e.data()), 52)); return AVX::avx_cast<__m256d>( AVX::add_epi64(AVX::avx_cast<__m256i>(v.data()), exponentBits)); } inline AVX2::float_v ldexp(AVX2::float_v::AsArg v, SimdArray e) { e.setZero(simd_cast(v == AVX2::float_v::Zero())); e <<= 23; #ifdef Vc_IMPL_AVX2 return {AVX::avx_cast<__m256>( AVX::concat(_mm_add_epi32(AVX::avx_cast<__m128i>(AVX::lo128(v.data())), AVX::lo128(internal_data(e).data())), _mm_add_epi32(AVX::avx_cast<__m128i>(AVX::hi128(v.data())), AVX::hi128(internal_data(e).data()))))}; #else return {AVX::avx_cast<__m256>( AVX::concat(_mm_add_epi32(AVX::avx_cast<__m128i>(AVX::lo128(v.data())), internal_data(internal_data0(e)).data()), _mm_add_epi32(AVX::avx_cast<__m128i>(AVX::hi128(v.data())), internal_data(internal_data1(e)).data())))}; #endif } // trunc {{{1 Vc_ALWAYS_INLINE AVX2::float_v trunc(AVX2::float_v::AsArg v) { return _mm256_round_ps(v.data(), 0x3); } Vc_ALWAYS_INLINE AVX2::double_v trunc(AVX2::double_v::AsArg v) { return _mm256_round_pd(v.data(), 0x3); } // floor {{{1 Vc_ALWAYS_INLINE AVX2::float_v floor(AVX2::float_v::AsArg v) { return _mm256_floor_ps(v.data()); } Vc_ALWAYS_INLINE AVX2::double_v floor(AVX2::double_v::AsArg v) { return _mm256_floor_pd(v.data()); } // ceil {{{1 Vc_ALWAYS_INLINE AVX2::float_v ceil(AVX2::float_v::AsArg v) { return _mm256_ceil_ps(v.data()); } Vc_ALWAYS_INLINE AVX2::double_v ceil(AVX2::double_v::AsArg v) { return _mm256_ceil_pd(v.data()); } // fma {{{1 template Vc_ALWAYS_INLINE Vector fma(Vector a, Vector b, Vector c) { return Detail::fma(a.data(), b.data(), c.data(), T()); } // }}}1 } // namespace Vc #endif // VC_AVX_MATH_H_ // vim: foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/avx/shuffle.h000066400000000000000000000457061476554302100200360ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2011-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_AVX_SHUFFLE_H_ #define VC_AVX_SHUFFLE_H_ #include "../sse/shuffle.h" #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace Detail { template struct Permutation {}; template struct Mask {}; #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST __m256i blend(__m256i a, __m256i b, Mask) { static_assert((Sel0 == 0 || Sel0 == 1) && (Sel1 == 0 || Sel1 == 1) && (Sel2 == 0 || Sel2 == 1) && (Sel3 == 0 || Sel3 == 1) && (Sel4 == 0 || Sel4 == 1) && (Sel5 == 0 || Sel5 == 1) && (Sel6 == 0 || Sel6 == 1) && (Sel7 == 0 || Sel7 == 1) && (Sel8 == 0 || Sel8 == 1) && (Sel9 == 0 || Sel9 == 1) && (Sel10 == 0 || Sel10 == 1) && (Sel11 == 0 || Sel11 == 1) && (Sel12 == 0 || Sel12 == 1) && (Sel13 == 0 || Sel13 == 1) && (Sel14 == 0 || Sel14 == 1) && (Sel15 == 0 || Sel15 == 1), "Selectors must be 0 or 1 to select the value from a or b"); constexpr uint8_t mask = static_cast( (Sel0 << 0 ) | (Sel1 << 1 ) | (Sel2 << 2 ) | (Sel3 << 3 ) | (Sel4 << 4 ) | (Sel5 << 5 ) | (Sel6 << 6 ) | (Sel7 << 7 ) | (Sel8 << 8 ) | (Sel9 << 9 ) | (Sel10 << 10) | (Sel11 << 11) | (Sel12 << 12) | (Sel13 << 13) | (Sel14 << 14) | (Sel15 << 15)); return _mm256_blend_epi16(a, b, mask); } #endif // Vc_IMPL_AVX2 } // namespace Detail namespace Mem { #ifdef Vc_IMPL_AVX2 template static Vc_ALWAYS_INLINE __m256i Vc_CONST permuteLo(__m256i x) { static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= X0 && Dst3 >= X0, "Incorrect_Range"); static_assert(Dst0 <= X3 && Dst1 <= X3 && Dst2 <= X3 && Dst3 <= X3, "Incorrect_Range"); return _mm256_shufflelo_epi16(x, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64); } template static Vc_ALWAYS_INLINE __m256i Vc_CONST permuteHi(__m256i x) { static_assert(Dst0 >= X4 && Dst1 >= X4 && Dst2 >= X4 && Dst3 >= X4, "Incorrect_Range"); static_assert(Dst0 <= X7 && Dst1 <= X7 && Dst2 <= X7 && Dst3 <= X7, "Incorrect_Range"); return _mm256_shufflehi_epi16(x, (Dst0 - X4) + (Dst1 - X4) * 4 + (Dst2 - X4) * 16 + (Dst3 - X4) * 64); } #endif // Vc_IMPL_AVX2 template static Vc_ALWAYS_INLINE __m256 Vc_CONST permute128(__m256 x) { static_assert((L >= X0 && L <= X1) || L == Const0, "Incorrect_Range"); static_assert((H >= X0 && H <= X1) || H == Const0, "Incorrect_Range"); return _mm256_permute2f128_ps( x, x, (L == Const0 ? 0x8 : L) + (H == Const0 ? 0x80 : H * (1 << 4))); } template static Vc_ALWAYS_INLINE __m256d Vc_CONST permute128(__m256d x) { static_assert((L >= X0 && L <= X1) || L == Const0, "Incorrect_Range"); static_assert((H >= X0 && H <= X1) || H == Const0, "Incorrect_Range"); return _mm256_permute2f128_pd( x, x, (L == Const0 ? 0x8 : L) + (H == Const0 ? 0x80 : H * (1 << 4))); } template static Vc_ALWAYS_INLINE __m256i Vc_CONST permute128(__m256i x) { static_assert((L >= X0 && L <= X1) || L == Const0, "Incorrect_Range"); static_assert((H >= X0 && H <= X1) || H == Const0, "Incorrect_Range"); #ifdef Vc_IMPL_AVX2 return _mm256_permute2x128_si256( x, x, (L == Const0 ? 0x8 : L) + (H == Const0 ? 0x80 : H * (1 << 4))); #else return _mm256_permute2f128_si256( x, x, (L == Const0 ? 0x8 : L) + (H == Const0 ? 0x80 : H * (1 << 4))); #endif } template static Vc_ALWAYS_INLINE __m256 Vc_CONST shuffle128(__m256 x, __m256 y) { static_assert(L >= X0 && H >= X0, "Incorrect_Range"); static_assert(L <= Y1 && H <= Y1, "Incorrect_Range"); return _mm256_permute2f128_ps(x, y, (L < Y0 ? L : L - Y0 + 2) + (H < Y0 ? H : H - Y0 + 2) * (1 << 4)); } template static Vc_ALWAYS_INLINE __m256i Vc_CONST shuffle128(__m256i x, __m256i y) { static_assert(L >= X0 && H >= X0, "Incorrect_Range"); static_assert(L <= Y1 && H <= Y1, "Incorrect_Range"); #ifdef Vc_IMPL_AVX2 return _mm256_permute2x128_si256( x, y, (L < Y0 ? L : L - Y0 + 2) + (H < Y0 ? H : H - Y0 + 2) * (1 << 4)); #else return _mm256_permute2f128_si256( x, y, (L < Y0 ? L : L - Y0 + 2) + (H < Y0 ? H : H - Y0 + 2) * (1 << 4)); #endif } template static Vc_ALWAYS_INLINE __m256d Vc_CONST shuffle128(__m256d x, __m256d y) { static_assert(L >= X0 && H >= X0, "Incorrect_Range"); static_assert(L <= Y1 && H <= Y1, "Incorrect_Range"); return _mm256_permute2f128_pd(x, y, (L < Y0 ? L : L - Y0 + 2) + (H < Y0 ? H : H - Y0 + 2) * (1 << 4)); } template static Vc_ALWAYS_INLINE __m256d Vc_CONST permute(__m256d x) { static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= X2 && Dst3 >= X2, "Incorrect_Range"); static_assert(Dst0 <= X1 && Dst1 <= X1 && Dst2 <= X3 && Dst3 <= X3, "Incorrect_Range"); return _mm256_permute_pd(x, Dst0 + Dst1 * 2 + (Dst2 - X2) * 4 + (Dst3 - X2) * 8); } template static Vc_ALWAYS_INLINE __m256 Vc_CONST permute(__m256 x) { static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= X0 && Dst3 >= X0, "Incorrect_Range"); static_assert(Dst0 <= X3 && Dst1 <= X3 && Dst2 <= X3 && Dst3 <= X3, "Incorrect_Range"); return _mm256_permute_ps(x, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64); } template static Vc_ALWAYS_INLINE __m256i Vc_CONST permute(__m256i x) { return _mm256_castps_si256(permute(_mm256_castsi256_ps(x))); } #ifdef Vc_IMPL_AVX2 template static Vc_ALWAYS_INLINE __m256i Vc_CONST permute4x64(__m256i x) { static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= X0 && Dst3 >= X0, "Incorrect_Range"); static_assert(Dst0 <= X3 && Dst1 <= X3 && Dst2 <= X3 && Dst3 <= X3, "Incorrect_Range"); return _mm256_permute4x64_epi64(x, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64); } #endif // Vc_IMPL_AVX2 template static Vc_ALWAYS_INLINE __m256d Vc_CONST shuffle(__m256d x, __m256d y) { static_assert(Dst0 >= X0 && Dst1 >= Y0 && Dst2 >= X2 && Dst3 >= Y2, "Incorrect_Range"); static_assert(Dst0 <= X1 && Dst1 <= Y1 && Dst2 <= X3 && Dst3 <= Y3, "Incorrect_Range"); return _mm256_shuffle_pd(x, y, Dst0 + (Dst1 - Y0) * 2 + (Dst2 - X2) * 4 + (Dst3 - Y2) * 8); } template static Vc_ALWAYS_INLINE __m256 Vc_CONST shuffle(__m256 x, __m256 y) { static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= Y0 && Dst3 >= Y0, "Incorrect_Range"); static_assert(Dst0 <= X3 && Dst1 <= X3 && Dst2 <= Y3 && Dst3 <= Y3, "Incorrect_Range"); return _mm256_shuffle_ps(x, y, Dst0 + Dst1 * 4 + (Dst2 - Y0) * 16 + (Dst3 - Y0) * 64); } template static Vc_ALWAYS_INLINE __m256 Vc_CONST blend(__m256 x, __m256 y) { static_assert(Dst0 == X0 || Dst0 == Y0, "Incorrect_Range"); static_assert(Dst1 == X1 || Dst1 == Y1, "Incorrect_Range"); static_assert(Dst2 == X2 || Dst2 == Y2, "Incorrect_Range"); static_assert(Dst3 == X3 || Dst3 == Y3, "Incorrect_Range"); static_assert(Dst4 == X4 || Dst4 == Y4, "Incorrect_Range"); static_assert(Dst5 == X5 || Dst5 == Y5, "Incorrect_Range"); static_assert(Dst6 == X6 || Dst6 == Y6, "Incorrect_Range"); static_assert(Dst7 == X7 || Dst7 == Y7, "Incorrect_Range"); return _mm256_blend_ps(x, y, (Dst0 / Y0) * 1 + (Dst1 / Y1) * 2 + (Dst2 / Y2) * 4 + (Dst3 / Y3) * 8 + (Dst4 / Y4) * 16 + (Dst5 / Y5) * 32 + (Dst6 / Y6) * 64 + (Dst7 / Y7) *128 ); } template static Vc_ALWAYS_INLINE __m256i Vc_CONST blend(__m256i x, __m256i y) { return _mm256_castps_si256(blend(_mm256_castsi256_ps(x), _mm256_castsi256_ps(y))); } template struct ScaleForBlend { enum { Value = Dst >= X4 ? Dst - X4 + Y0 : Dst }; }; template static Vc_ALWAYS_INLINE __m256 Vc_CONST permute(__m256 x) { static_assert(Dst0 >= X0 && Dst0 <= X7, "Incorrect_Range"); static_assert(Dst1 >= X0 && Dst1 <= X7, "Incorrect_Range"); static_assert(Dst2 >= X0 && Dst2 <= X7, "Incorrect_Range"); static_assert(Dst3 >= X0 && Dst3 <= X7, "Incorrect_Range"); static_assert(Dst4 >= X0 && Dst4 <= X7, "Incorrect_Range"); static_assert(Dst5 >= X0 && Dst5 <= X7, "Incorrect_Range"); static_assert(Dst6 >= X0 && Dst6 <= X7, "Incorrect_Range"); static_assert(Dst7 >= X0 && Dst7 <= X7, "Incorrect_Range"); if (Dst0 + X4 == Dst4 && Dst1 + X4 == Dst5 && Dst2 + X4 == Dst6 && Dst3 + X4 == Dst7) { return permute(x); } const __m128 loIn = _mm256_castps256_ps128(x); const __m128 hiIn = _mm256_extractf128_ps(x, 1); __m128 lo, hi; if (Dst0 < X4 && Dst1 < X4 && Dst2 < X4 && Dst3 < X4) { lo = _mm_permute_ps(loIn, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64); } else if (Dst0 >= X4 && Dst1 >= X4 && Dst2 >= X4 && Dst3 >= X4) { lo = _mm_permute_ps(hiIn, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64); } else if (Dst0 < X4 && Dst1 < X4 && Dst2 >= X4 && Dst3 >= X4) { lo = shuffle(loIn, hiIn); } else if (Dst0 >= X4 && Dst1 >= X4 && Dst2 < X4 && Dst3 < X4) { lo = shuffle(hiIn, loIn); } else if (Dst0 == X0 && Dst1 == X4 && Dst2 == X1 && Dst3 == X5) { lo = _mm_unpacklo_ps(loIn, hiIn); } else if (Dst0 == X4 && Dst1 == X0 && Dst2 == X5 && Dst3 == X1) { lo = _mm_unpacklo_ps(hiIn, loIn); } else if (Dst0 == X2 && Dst1 == X6 && Dst2 == X3 && Dst3 == X7) { lo = _mm_unpackhi_ps(loIn, hiIn); } else if (Dst0 == X6 && Dst1 == X2 && Dst2 == X7 && Dst3 == X3) { lo = _mm_unpackhi_ps(hiIn, loIn); } else if (Dst0 % X4 == 0 && Dst1 % X4 == 1 && Dst2 % X4 == 2 && Dst3 % X4 == 3) { lo = blend::Value, ScaleForBlend::Value, ScaleForBlend::Value, ScaleForBlend::Value>(loIn, hiIn); } if (Dst4 >= X4 && Dst5 >= X4 && Dst6 >= X4 && Dst7 >= X4) { hi = _mm_permute_ps(hiIn, (Dst4 - X4) + (Dst5 - X4) * 4 + (Dst6 - X4) * 16 + (Dst7 - X4) * 64); } else if (Dst4 < X4 && Dst5 < X4 && Dst6 < X4 && Dst7 < X4) { hi = _mm_permute_ps(loIn, (Dst4 - X4) + (Dst5 - X4) * 4 + (Dst6 - X4) * 16 + (Dst7 - X4) * 64); } else if (Dst4 < X4 && Dst5 < X4 && Dst6 >= X4 && Dst7 >= X4) { hi = shuffle(loIn, hiIn); } else if (Dst4 >= X4 && Dst5 >= X4 && Dst6 < X4 && Dst7 < X4) { hi = shuffle(hiIn, loIn); } else if (Dst4 == X0 && Dst5 == X4 && Dst6 == X1 && Dst7 == X5) { hi = _mm_unpacklo_ps(loIn, hiIn); } else if (Dst4 == X4 && Dst5 == X0 && Dst6 == X5 && Dst7 == X1) { hi = _mm_unpacklo_ps(hiIn, loIn); } else if (Dst4 == X2 && Dst5 == X6 && Dst6 == X3 && Dst7 == X7) { hi = _mm_unpackhi_ps(loIn, hiIn); } else if (Dst4 == X6 && Dst5 == X2 && Dst6 == X7 && Dst7 == X3) { hi = _mm_unpackhi_ps(hiIn, loIn); } else if (Dst4 % X4 == 0 && Dst5 % X4 == 1 && Dst6 % X4 == 2 && Dst7 % X4 == 3) { hi = blend::Value, ScaleForBlend::Value, ScaleForBlend::Value, ScaleForBlend::Value>(loIn, hiIn); } return _mm256_insertf128_ps(_mm256_castps128_ps256(lo), hi, 1); } } // namespace Mem } // namespace Vc // little endian has the lo bits on the right and high bits on the left // with vectors this becomes greatly confusing: // Mem: abcd // Reg: dcba // // The shuffles and permutes above use memory ordering. The ones below use register ordering: namespace Vc_VERSIONED_NAMESPACE { namespace Reg { template static Vc_ALWAYS_INLINE __m256 Vc_CONST permute128(__m256 x, __m256 y) { static_assert(L >= X0 && H >= X0, "Incorrect_Range"); static_assert(L <= Y1 && H <= Y1, "Incorrect_Range"); return _mm256_permute2f128_ps(x, y, (L < Y0 ? L : L - Y0 + 2) + (H < Y0 ? H : H - Y0 + 2) * (1 << 4)); } template static Vc_ALWAYS_INLINE __m256i Vc_CONST permute128(__m256i x, __m256i y) { static_assert(L >= X0 && H >= X0, "Incorrect_Range"); static_assert(L <= Y1 && H <= Y1, "Incorrect_Range"); #ifdef Vc_IMPL_AVX2 return _mm256_permute2x128_si256( x, y, (L < Y0 ? L : L - Y0 + 2) + (H < Y0 ? H : H - Y0 + 2) * (1 << 4)); #else return _mm256_permute2f128_si256( x, y, (L < Y0 ? L : L - Y0 + 2) + (H < Y0 ? H : H - Y0 + 2) * (1 << 4)); #endif } template static Vc_ALWAYS_INLINE __m256d Vc_CONST permute128(__m256d x, __m256d y) { static_assert(L >= X0 && H >= X0, "Incorrect_Range"); static_assert(L <= Y1 && H <= Y1, "Incorrect_Range"); return _mm256_permute2f128_pd(x, y, (L < Y0 ? L : L - Y0 + 2) + (H < Y0 ? H : H - Y0 + 2) * (1 << 4)); } template static Vc_ALWAYS_INLINE __m256d Vc_CONST permute(__m256d x) { static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= X2 && Dst3 >= X2, "Incorrect_Range"); static_assert(Dst0 <= X1 && Dst1 <= X1 && Dst2 <= X3 && Dst3 <= X3, "Incorrect_Range"); return _mm256_permute_pd(x, Dst0 + Dst1 * 2 + (Dst2 - X2) * 4 + (Dst3 - X2) * 8); } template static Vc_ALWAYS_INLINE __m256 Vc_CONST permute(__m256 x) { static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= X0 && Dst3 >= X0, "Incorrect_Range"); static_assert(Dst0 <= X3 && Dst1 <= X3 && Dst2 <= X3 && Dst3 <= X3, "Incorrect_Range"); return _mm256_permute_ps(x, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64); } template static Vc_ALWAYS_INLINE __m128d Vc_CONST permute(__m128d x) { static_assert(Dst0 >= X0 && Dst1 >= X0, "Incorrect_Range"); static_assert(Dst0 <= X1 && Dst1 <= X1, "Incorrect_Range"); return _mm_permute_pd(x, Dst0 + Dst1 * 2); } template static Vc_ALWAYS_INLINE __m128 Vc_CONST permute(__m128 x) { static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= X0 && Dst3 >= X0, "Incorrect_Range"); static_assert(Dst0 <= X3 && Dst1 <= X3 && Dst2 <= X3 && Dst3 <= X3, "Incorrect_Range"); return _mm_permute_ps(x, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64); } template static Vc_ALWAYS_INLINE __m256d Vc_CONST shuffle(__m256d x, __m256d y) { static_assert(Dst0 >= X0 && Dst1 >= Y0 && Dst2 >= X2 && Dst3 >= Y2, "Incorrect_Range"); static_assert(Dst0 <= X1 && Dst1 <= Y1 && Dst2 <= X3 && Dst3 <= Y3, "Incorrect_Range"); return _mm256_shuffle_pd(x, y, Dst0 + (Dst1 - Y0) * 2 + (Dst2 - X2) * 4 + (Dst3 - Y2) * 8); } template static Vc_ALWAYS_INLINE __m256 Vc_CONST shuffle(__m256 x, __m256 y) { static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= Y0 && Dst3 >= Y0, "Incorrect_Range"); static_assert(Dst0 <= X3 && Dst1 <= X3 && Dst2 <= Y3 && Dst3 <= Y3, "Incorrect_Range"); return _mm256_shuffle_ps(x, y, Dst0 + Dst1 * 4 + (Dst2 - Y0) * 16 + (Dst3 - Y0) * 64); } } // namespace Reg } // namespace Vc #endif // VC_AVX_SHUFFLE_H_ conky-1.22.1/3rdparty/Vc/Vc/avx/simd_cast.h000066400000000000000000004131241476554302100203410ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2014-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_AVX_SIMD_CAST_H_ #define VC_AVX_SIMD_CAST_H_ #ifndef VC_AVX_VECTOR_H_ #error "Vc/avx/vector.h needs to be included before Vc/avx/simd_cast.h" #endif #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { // Declarations: helper macros Vc_SIMD_CAST_AVX_[124] & Vc_SIMD_CAST_[124] {{{1 #define Vc_SIMD_CAST_AVX_1(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast( \ AVX2::from_ x, enable_if::value> = nullarg) #define Vc_SIMD_CAST_AVX_2(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast( \ AVX2::from_ x0, AVX2::from_ x1, \ enable_if::value> = nullarg) #define Vc_SIMD_CAST_AVX_3(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast( \ AVX2::from_ x0, AVX2::from_ x1, AVX2::from_ x2, \ enable_if::value> = nullarg) #define Vc_SIMD_CAST_AVX_4(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast( \ AVX2::from_ x0, AVX2::from_ x1, AVX2::from_ x2, AVX2::from_ x3, \ enable_if::value> = nullarg) #define Vc_SIMD_CAST_1(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast( \ from_ x, enable_if::value> = nullarg) #define Vc_SIMD_CAST_2(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast( \ from_ x0, from_ x1, enable_if::value> = nullarg) #define Vc_SIMD_CAST_3(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast( \ from_ x0, from_ x1, from_ x2, enable_if::value> = nullarg) #define Vc_SIMD_CAST_4(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast( \ from_ x0, from_ x1, from_ x2, from_ x3, \ enable_if::value> = nullarg) #define Vc_SIMD_CAST_5(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast( \ from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, \ enable_if::value> = nullarg) #define Vc_SIMD_CAST_6(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast( \ from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, from_ x5, \ enable_if::value> = nullarg) #define Vc_SIMD_CAST_7(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast( \ from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, from_ x5, from_ x6, \ enable_if::value> = nullarg) #define Vc_SIMD_CAST_8(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast( \ from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, from_ x5, from_ x6, from_ x7, \ enable_if::value> = nullarg) #define Vc_SIMD_CAST_OFFSET(from_, to_, offset_) \ static_assert(from_::size() >= to_::size() * (offset_ + 1), \ "this offset cannot exist for this type combination"); \ template \ Vc_INTRINSIC Vc_CONST To simd_cast( \ from_ x, \ enable_if<(offset == offset_ && std::is_same::value)> = nullarg) // Declaration: SSE -> AVX where the AVX Vector is integral and thus of equal size() {{{1 // as the equivalent SSE Vector template Vc_INTRINSIC Vc_CONST To simd_cast(From x, enable_if<(AVX2::is_vector::value && SSE::is_vector::value && SSE::Vector::Size == To::Size)> = nullarg); template Vc_INTRINSIC Vc_CONST To simd_cast( From x0, From x1, enable_if<(AVX2::is_vector::value && SSE::is_vector::value && SSE::Vector::Size == To::Size)> = nullarg); template Vc_INTRINSIC Vc_CONST To simd_cast( From x0, From x1, From x2, enable_if<(AVX2::is_vector::value && SSE::is_vector::value && SSE::Vector::Size == To::Size)> = nullarg); template Vc_INTRINSIC Vc_CONST To simd_cast( From x0, From x1, From x2, From x3, enable_if<(AVX2::is_vector::value && SSE::is_vector::value && SSE::Vector::Size == To::Size)> = nullarg); template Vc_INTRINSIC Vc_CONST To simd_cast( From x0, From x1, From x2, From x3, From x4, From x5, From x6, From x7, enable_if<(AVX2::is_vector::value && SSE::is_vector::value && SSE::Vector::Size == To::Size)> = nullarg); // Declarations: Vector casts without offset {{{1 // AVX2::Vector {{{2 Vc_SIMD_CAST_AVX_1( float_v, double_v); Vc_SIMD_CAST_AVX_1(double_v, float_v); Vc_SIMD_CAST_AVX_2(double_v, float_v); #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_AVX_1( int_v, double_v); Vc_SIMD_CAST_AVX_1( uint_v, double_v); Vc_SIMD_CAST_AVX_1( short_v, double_v); Vc_SIMD_CAST_AVX_1(ushort_v, double_v); Vc_SIMD_CAST_AVX_1( int_v, float_v); Vc_SIMD_CAST_AVX_1( uint_v, float_v); Vc_SIMD_CAST_AVX_1( short_v, float_v); Vc_SIMD_CAST_AVX_1(ushort_v, float_v); Vc_SIMD_CAST_AVX_1(double_v, int_v); Vc_SIMD_CAST_AVX_1( float_v, int_v); Vc_SIMD_CAST_AVX_1( uint_v, int_v); Vc_SIMD_CAST_AVX_1( short_v, int_v); Vc_SIMD_CAST_AVX_1(ushort_v, int_v); Vc_SIMD_CAST_AVX_2(double_v, int_v); Vc_SIMD_CAST_AVX_1(double_v, uint_v); Vc_SIMD_CAST_AVX_1( float_v, uint_v); Vc_SIMD_CAST_AVX_1( int_v, uint_v); Vc_SIMD_CAST_AVX_1( short_v, uint_v); Vc_SIMD_CAST_AVX_1(ushort_v, uint_v); Vc_SIMD_CAST_AVX_2(double_v, uint_v); Vc_SIMD_CAST_AVX_1(double_v, short_v); Vc_SIMD_CAST_AVX_1( float_v, short_v); Vc_SIMD_CAST_AVX_1( int_v, short_v); Vc_SIMD_CAST_AVX_1( uint_v, short_v); Vc_SIMD_CAST_AVX_1(ushort_v, short_v); Vc_SIMD_CAST_AVX_2(double_v, short_v); Vc_SIMD_CAST_AVX_2( float_v, short_v); Vc_SIMD_CAST_AVX_2( int_v, short_v); Vc_SIMD_CAST_AVX_2( uint_v, short_v); Vc_SIMD_CAST_AVX_3(double_v, short_v); Vc_SIMD_CAST_AVX_4(double_v, short_v); Vc_SIMD_CAST_AVX_1(double_v, ushort_v); Vc_SIMD_CAST_AVX_1( float_v, ushort_v); Vc_SIMD_CAST_AVX_1( int_v, ushort_v); Vc_SIMD_CAST_AVX_1( uint_v, ushort_v); Vc_SIMD_CAST_AVX_1( short_v, ushort_v); Vc_SIMD_CAST_AVX_2(double_v, ushort_v); Vc_SIMD_CAST_AVX_2( float_v, ushort_v); Vc_SIMD_CAST_AVX_2( int_v, ushort_v); Vc_SIMD_CAST_AVX_2( uint_v, ushort_v); Vc_SIMD_CAST_AVX_3(double_v, ushort_v); Vc_SIMD_CAST_AVX_4(double_v, ushort_v); #endif // 1 SSE::Vector to 1 AVX2::Vector {{{2 Vc_SIMD_CAST_1(SSE::double_v, AVX2::double_v); Vc_SIMD_CAST_1(SSE:: float_v, AVX2::double_v); Vc_SIMD_CAST_1(SSE:: int_v, AVX2::double_v); Vc_SIMD_CAST_1(SSE:: uint_v, AVX2::double_v); Vc_SIMD_CAST_1(SSE:: short_v, AVX2::double_v); Vc_SIMD_CAST_1(SSE::ushort_v, AVX2::double_v); Vc_SIMD_CAST_1(SSE::double_v, AVX2:: float_v); Vc_SIMD_CAST_1(SSE:: float_v, AVX2:: float_v); Vc_SIMD_CAST_1(SSE:: int_v, AVX2:: float_v); Vc_SIMD_CAST_1(SSE:: uint_v, AVX2:: float_v); Vc_SIMD_CAST_1(SSE:: short_v, AVX2:: float_v); Vc_SIMD_CAST_1(SSE::ushort_v, AVX2:: float_v); #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_1(SSE::double_v, AVX2:: int_v); Vc_SIMD_CAST_1(SSE::double_v, AVX2:: uint_v); Vc_SIMD_CAST_1(SSE::double_v, AVX2:: short_v); Vc_SIMD_CAST_1(SSE::double_v, AVX2::ushort_v); Vc_SIMD_CAST_1(SSE:: float_v, AVX2:: int_v); Vc_SIMD_CAST_1(SSE:: float_v, AVX2:: uint_v); Vc_SIMD_CAST_1(SSE:: float_v, AVX2:: short_v); Vc_SIMD_CAST_1(SSE:: float_v, AVX2::ushort_v); Vc_SIMD_CAST_1(SSE:: int_v, AVX2:: int_v); Vc_SIMD_CAST_1(SSE:: uint_v, AVX2:: int_v); Vc_SIMD_CAST_1(SSE:: short_v, AVX2:: int_v); Vc_SIMD_CAST_1(SSE::ushort_v, AVX2:: int_v); Vc_SIMD_CAST_1(SSE:: int_v, AVX2:: uint_v); Vc_SIMD_CAST_1(SSE:: uint_v, AVX2:: uint_v); Vc_SIMD_CAST_1(SSE:: short_v, AVX2:: uint_v); Vc_SIMD_CAST_1(SSE::ushort_v, AVX2:: uint_v); Vc_SIMD_CAST_1(SSE:: int_v, AVX2:: short_v); Vc_SIMD_CAST_1(SSE:: uint_v, AVX2:: short_v); Vc_SIMD_CAST_1(SSE:: short_v, AVX2:: short_v); Vc_SIMD_CAST_1(SSE::ushort_v, AVX2:: short_v); Vc_SIMD_CAST_1(SSE:: int_v, AVX2::ushort_v); Vc_SIMD_CAST_1(SSE:: uint_v, AVX2::ushort_v); Vc_SIMD_CAST_1(SSE:: short_v, AVX2::ushort_v); Vc_SIMD_CAST_1(SSE::ushort_v, AVX2::ushort_v); #endif // 2 SSE::Vector to 1 AVX2::Vector {{{2 Vc_SIMD_CAST_2(SSE::double_v, AVX2::double_v); Vc_SIMD_CAST_2(SSE::double_v, AVX2:: float_v); Vc_SIMD_CAST_2(SSE:: float_v, AVX2:: float_v); Vc_SIMD_CAST_2(SSE:: int_v, AVX2:: float_v); Vc_SIMD_CAST_2(SSE:: uint_v, AVX2:: float_v); #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_2(SSE::double_v, AVX2:: int_v); Vc_SIMD_CAST_2(SSE::double_v, AVX2:: uint_v); Vc_SIMD_CAST_2(SSE::double_v, AVX2:: short_v); Vc_SIMD_CAST_2(SSE::double_v, AVX2::ushort_v); Vc_SIMD_CAST_2(SSE:: float_v, AVX2:: int_v); Vc_SIMD_CAST_2(SSE:: float_v, AVX2:: uint_v); Vc_SIMD_CAST_2(SSE:: float_v, AVX2:: short_v); Vc_SIMD_CAST_2(SSE:: float_v, AVX2::ushort_v); Vc_SIMD_CAST_2(SSE:: int_v, AVX2:: int_v); Vc_SIMD_CAST_2(SSE:: uint_v, AVX2:: int_v); Vc_SIMD_CAST_2(SSE:: int_v, AVX2:: uint_v); Vc_SIMD_CAST_2(SSE:: uint_v, AVX2:: uint_v); Vc_SIMD_CAST_2(SSE:: int_v, AVX2:: short_v); Vc_SIMD_CAST_2(SSE:: uint_v, AVX2:: short_v); Vc_SIMD_CAST_2(SSE:: short_v, AVX2:: short_v); Vc_SIMD_CAST_2(SSE::ushort_v, AVX2:: short_v); Vc_SIMD_CAST_2(SSE:: int_v, AVX2::ushort_v); Vc_SIMD_CAST_2(SSE:: uint_v, AVX2::ushort_v); Vc_SIMD_CAST_2(SSE:: short_v, AVX2::ushort_v); Vc_SIMD_CAST_2(SSE::ushort_v, AVX2::ushort_v); #endif // 3 SSE::Vector to 1 AVX2::Vector {{{2 Vc_SIMD_CAST_3(SSE::double_v, AVX2:: float_v); #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_3(SSE::double_v, AVX2:: int_v); Vc_SIMD_CAST_3(SSE::double_v, AVX2:: uint_v); Vc_SIMD_CAST_3(SSE::double_v, AVX2:: short_v); Vc_SIMD_CAST_3(SSE::double_v, AVX2::ushort_v); Vc_SIMD_CAST_3(SSE:: float_v, AVX2:: short_v); Vc_SIMD_CAST_3(SSE:: float_v, AVX2::ushort_v); Vc_SIMD_CAST_3(SSE:: int_v, AVX2:: short_v); Vc_SIMD_CAST_3(SSE:: uint_v, AVX2:: short_v); Vc_SIMD_CAST_3(SSE:: int_v, AVX2::ushort_v); Vc_SIMD_CAST_3(SSE:: uint_v, AVX2::ushort_v); #endif // 4 SSE::Vector to 1 AVX2::Vector {{{2 Vc_SIMD_CAST_4(SSE::double_v, AVX2:: float_v); #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_4(SSE::double_v, AVX2:: int_v); Vc_SIMD_CAST_4(SSE::double_v, AVX2:: uint_v); Vc_SIMD_CAST_4(SSE::double_v, AVX2:: short_v); Vc_SIMD_CAST_4(SSE::double_v, AVX2::ushort_v); Vc_SIMD_CAST_4(SSE:: float_v, AVX2:: short_v); Vc_SIMD_CAST_4(SSE:: float_v, AVX2::ushort_v); Vc_SIMD_CAST_4(SSE:: int_v, AVX2:: short_v); Vc_SIMD_CAST_4(SSE:: uint_v, AVX2:: short_v); Vc_SIMD_CAST_4(SSE:: int_v, AVX2::ushort_v); Vc_SIMD_CAST_4(SSE:: uint_v, AVX2::ushort_v); #endif // 5 SSE::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_5(SSE::double_v, AVX2:: short_v); Vc_SIMD_CAST_5(SSE::double_v, AVX2::ushort_v); #endif // 6 SSE::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_6(SSE::double_v, AVX2:: short_v); Vc_SIMD_CAST_6(SSE::double_v, AVX2::ushort_v); #endif // 7 SSE::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_7(SSE::double_v, AVX2:: short_v); Vc_SIMD_CAST_7(SSE::double_v, AVX2::ushort_v); #endif // 8 SSE::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_8(SSE::double_v, AVX2:: short_v); Vc_SIMD_CAST_8(SSE::double_v, AVX2::ushort_v); #endif // 1 AVX2::Vector to 1 SSE::Vector {{{2 Vc_SIMD_CAST_1(AVX2::double_v, SSE::double_v); Vc_SIMD_CAST_1(AVX2::double_v, SSE:: float_v); Vc_SIMD_CAST_1(AVX2::double_v, SSE:: int_v); Vc_SIMD_CAST_1(AVX2::double_v, SSE:: uint_v); Vc_SIMD_CAST_1(AVX2::double_v, SSE:: short_v); Vc_SIMD_CAST_1(AVX2::double_v, SSE::ushort_v); Vc_SIMD_CAST_1(AVX2:: float_v, SSE::double_v); Vc_SIMD_CAST_1(AVX2:: float_v, SSE:: float_v); Vc_SIMD_CAST_1(AVX2:: float_v, SSE:: int_v); Vc_SIMD_CAST_1(AVX2:: float_v, SSE:: uint_v); Vc_SIMD_CAST_1(AVX2:: float_v, SSE:: short_v); Vc_SIMD_CAST_1(AVX2:: float_v, SSE::ushort_v); #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_1(AVX2:: int_v, SSE::double_v); Vc_SIMD_CAST_1(AVX2:: int_v, SSE:: float_v); Vc_SIMD_CAST_1(AVX2:: int_v, SSE:: uint_v); Vc_SIMD_CAST_1(AVX2:: int_v, SSE:: int_v); Vc_SIMD_CAST_1(AVX2:: int_v, SSE:: short_v); Vc_SIMD_CAST_1(AVX2:: int_v, SSE::ushort_v); Vc_SIMD_CAST_1(AVX2:: uint_v, SSE::double_v); Vc_SIMD_CAST_1(AVX2:: uint_v, SSE:: float_v); Vc_SIMD_CAST_1(AVX2:: uint_v, SSE:: int_v); Vc_SIMD_CAST_1(AVX2:: uint_v, SSE:: uint_v); Vc_SIMD_CAST_1(AVX2:: uint_v, SSE:: short_v); Vc_SIMD_CAST_1(AVX2:: uint_v, SSE::ushort_v); Vc_SIMD_CAST_1(AVX2:: short_v, SSE::double_v); Vc_SIMD_CAST_1(AVX2:: short_v, SSE:: float_v); Vc_SIMD_CAST_1(AVX2:: short_v, SSE:: int_v); Vc_SIMD_CAST_1(AVX2:: short_v, SSE:: uint_v); Vc_SIMD_CAST_1(AVX2:: short_v, SSE:: short_v); Vc_SIMD_CAST_1(AVX2:: short_v, SSE::ushort_v); Vc_SIMD_CAST_1(AVX2::ushort_v, SSE::double_v); Vc_SIMD_CAST_1(AVX2::ushort_v, SSE:: float_v); Vc_SIMD_CAST_1(AVX2::ushort_v, SSE:: int_v); Vc_SIMD_CAST_1(AVX2::ushort_v, SSE:: uint_v); Vc_SIMD_CAST_1(AVX2::ushort_v, SSE:: short_v); Vc_SIMD_CAST_1(AVX2::ushort_v, SSE::ushort_v); #endif // 2 AVX2::Vector to 1 SSE::Vector {{{2 Vc_SIMD_CAST_2(AVX2::double_v, SSE:: short_v); Vc_SIMD_CAST_2(AVX2::double_v, SSE::ushort_v); // 1 Scalar::Vector to 1 AVX2::Vector {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x, enable_if::value> = nullarg); #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x, enable_if::value> = nullarg); #endif // 2 Scalar::Vector to 1 AVX2::Vector {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, enable_if::value> = nullarg); #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, enable_if::value> = nullarg); #endif // 3 Scalar::Vector to 1 AVX2::Vector {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, enable_if::value> = nullarg); #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, enable_if::value> = nullarg); #endif // 4 Scalar::Vector to 1 AVX2::Vector {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, enable_if::value> = nullarg); #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, enable_if::value> = nullarg); #endif // 5 Scalar::Vector to 1 AVX2::Vector {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, enable_if::value> = nullarg); #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, enable_if::value> = nullarg); #endif // 6 Scalar::Vector to 1 AVX2::Vector {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, enable_if::value> = nullarg); #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, enable_if::value> = nullarg); #endif // 7 Scalar::Vector to 1 AVX2::Vector {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, enable_if::value> = nullarg); #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, enable_if::value> = nullarg); #endif // 8 Scalar::Vector to 1 AVX2::Vector {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, enable_if::value> = nullarg); #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, enable_if::value> = nullarg); #endif // 9 Scalar::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, enable_if::value> = nullarg); #endif // 10 Scalar::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, enable_if::value> = nullarg); #endif // 11 Scalar::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, enable_if::value> = nullarg); #endif // 12 Scalar::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, Scalar::Vector x11, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, Scalar::Vector x11, enable_if::value> = nullarg); #endif // 13 Scalar::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, Scalar::Vector x11, Scalar::Vector x12, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, Scalar::Vector x11, Scalar::Vector x12, enable_if::value> = nullarg); #endif // 14 Scalar::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, Scalar::Vector x11, Scalar::Vector x12, Scalar::Vector x13, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, Scalar::Vector x11, Scalar::Vector x12, Scalar::Vector x13, enable_if::value> = nullarg); #endif // 15 Scalar::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, Scalar::Vector x11, Scalar::Vector x12, Scalar::Vector x13, Scalar::Vector x14, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, Scalar::Vector x11, Scalar::Vector x12, Scalar::Vector x13, Scalar::Vector x14, enable_if::value> = nullarg); #endif // 16 Scalar::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, Scalar::Vector x11, Scalar::Vector x12, Scalar::Vector x13, Scalar::Vector x14, Scalar::Vector x15, enable_if::value> = nullarg); template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, Scalar::Vector x11, Scalar::Vector x12, Scalar::Vector x13, Scalar::Vector x14, Scalar::Vector x15, enable_if::value> = nullarg); #endif // 1 AVX2::Vector to 1 Scalar::Vector {{{2 template Vc_INTRINSIC Vc_CONST To simd_cast(AVX2::Vector x, enable_if::value> = nullarg); // Declarations: Mask casts without offset {{{1 // 1 AVX2::Mask to 1 AVX2::Mask {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(const AVX2::Mask &k, enable_if::value> = nullarg); // 2 AVX2::Mask to 1 AVX2::Mask {{{2 Vc_SIMD_CAST_AVX_2(double_m, float_m); #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_AVX_2(double_m, int_m); Vc_SIMD_CAST_AVX_2(double_m, uint_m); Vc_SIMD_CAST_AVX_2(double_m, short_m); Vc_SIMD_CAST_AVX_2(double_m, ushort_m); Vc_SIMD_CAST_AVX_2( float_m, short_m); Vc_SIMD_CAST_AVX_2( float_m, ushort_m); Vc_SIMD_CAST_AVX_2( int_m, short_m); Vc_SIMD_CAST_AVX_2( int_m, ushort_m); Vc_SIMD_CAST_AVX_2( uint_m, short_m); Vc_SIMD_CAST_AVX_2( uint_m, ushort_m); #endif // 4 AVX2::Mask to 1 AVX2::Mask {{{2 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_AVX_4(double_m, short_m); Vc_SIMD_CAST_AVX_4(double_m, ushort_m); #endif // 1 SSE::Mask to 1 AVX2::Mask {{{2 Vc_SIMD_CAST_1(SSE::double_m, AVX2::double_m); Vc_SIMD_CAST_1(SSE::double_m, AVX2:: float_m); #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_1(SSE::double_m, AVX2:: int_m); Vc_SIMD_CAST_1(SSE::double_m, AVX2:: uint_m); Vc_SIMD_CAST_1(SSE::double_m, AVX2:: short_m); Vc_SIMD_CAST_1(SSE::double_m, AVX2::ushort_m); #endif Vc_SIMD_CAST_1(SSE:: float_m, AVX2::double_m); Vc_SIMD_CAST_1(SSE:: int_m, AVX2::double_m); Vc_SIMD_CAST_1(SSE:: uint_m, AVX2::double_m); Vc_SIMD_CAST_1(SSE:: short_m, AVX2::double_m); Vc_SIMD_CAST_1(SSE::ushort_m, AVX2::double_m); Vc_SIMD_CAST_1(SSE:: float_m, AVX2:: float_m); Vc_SIMD_CAST_1(SSE:: int_m, AVX2:: float_m); Vc_SIMD_CAST_1(SSE:: uint_m, AVX2:: float_m); Vc_SIMD_CAST_1(SSE:: short_m, AVX2:: float_m); Vc_SIMD_CAST_1(SSE::ushort_m, AVX2:: float_m); #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_1(SSE:: float_m, AVX2:: int_m); Vc_SIMD_CAST_1(SSE:: float_m, AVX2:: uint_m); Vc_SIMD_CAST_1(SSE:: int_m, AVX2:: int_m); Vc_SIMD_CAST_1(SSE:: int_m, AVX2:: uint_m); Vc_SIMD_CAST_1(SSE:: uint_m, AVX2:: int_m); Vc_SIMD_CAST_1(SSE:: uint_m, AVX2:: uint_m); Vc_SIMD_CAST_1(SSE:: float_m, AVX2:: short_m); Vc_SIMD_CAST_1(SSE:: int_m, AVX2:: short_m); Vc_SIMD_CAST_1(SSE:: uint_m, AVX2:: short_m); Vc_SIMD_CAST_1(SSE:: short_m, AVX2:: short_m); Vc_SIMD_CAST_1(SSE::ushort_m, AVX2:: short_m); Vc_SIMD_CAST_1(SSE:: float_m, AVX2::ushort_m); Vc_SIMD_CAST_1(SSE:: int_m, AVX2::ushort_m); Vc_SIMD_CAST_1(SSE:: uint_m, AVX2::ushort_m); Vc_SIMD_CAST_1(SSE:: short_m, AVX2::ushort_m); Vc_SIMD_CAST_1(SSE::ushort_m, AVX2::ushort_m); Vc_SIMD_CAST_1(SSE:: short_m, AVX2:: int_m); Vc_SIMD_CAST_1(SSE:: short_m, AVX2:: uint_m); Vc_SIMD_CAST_1(SSE::ushort_m, AVX2:: int_m); Vc_SIMD_CAST_1(SSE::ushort_m, AVX2:: uint_m); #endif // 2 SSE::Mask to 1 AVX2::Mask {{{2 Vc_SIMD_CAST_2(SSE::double_m, AVX2::double_m); Vc_SIMD_CAST_2(SSE::double_m, AVX2:: float_m); Vc_SIMD_CAST_2(SSE:: float_m, AVX2:: float_m); Vc_SIMD_CAST_2(SSE:: int_m, AVX2:: float_m); Vc_SIMD_CAST_2(SSE:: uint_m, AVX2:: float_m); #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_2(SSE::double_m, AVX2:: int_m); Vc_SIMD_CAST_2(SSE::double_m, AVX2:: uint_m); Vc_SIMD_CAST_2(SSE::double_m, AVX2:: short_m); Vc_SIMD_CAST_2(SSE::double_m, AVX2::ushort_m); Vc_SIMD_CAST_2(SSE:: float_m, AVX2:: int_m); Vc_SIMD_CAST_2(SSE:: float_m, AVX2:: uint_m); Vc_SIMD_CAST_2(SSE:: float_m, AVX2:: short_m); Vc_SIMD_CAST_2(SSE:: float_m, AVX2::ushort_m); Vc_SIMD_CAST_2(SSE:: int_m, AVX2:: int_m); Vc_SIMD_CAST_2(SSE:: int_m, AVX2:: uint_m); Vc_SIMD_CAST_2(SSE:: int_m, AVX2:: short_m); Vc_SIMD_CAST_2(SSE:: int_m, AVX2::ushort_m); Vc_SIMD_CAST_2(SSE:: uint_m, AVX2:: int_m); Vc_SIMD_CAST_2(SSE:: uint_m, AVX2:: uint_m); Vc_SIMD_CAST_2(SSE:: uint_m, AVX2:: short_m); Vc_SIMD_CAST_2(SSE:: uint_m, AVX2::ushort_m); Vc_SIMD_CAST_2(SSE:: short_m, AVX2:: short_m); Vc_SIMD_CAST_2(SSE:: short_m, AVX2::ushort_m); Vc_SIMD_CAST_2(SSE::ushort_m, AVX2:: short_m); Vc_SIMD_CAST_2(SSE::ushort_m, AVX2::ushort_m); #endif // 4 SSE::Mask to 1 AVX2::Mask {{{2 Vc_SIMD_CAST_4(SSE::double_m, AVX2:: float_m); #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_4(SSE::double_m, AVX2:: int_m); Vc_SIMD_CAST_4(SSE::double_m, AVX2:: uint_m); Vc_SIMD_CAST_4(SSE::double_m, AVX2:: short_m); Vc_SIMD_CAST_4(SSE::double_m, AVX2::ushort_m); Vc_SIMD_CAST_4(SSE:: float_m, AVX2:: short_m); Vc_SIMD_CAST_4(SSE:: float_m, AVX2::ushort_m); Vc_SIMD_CAST_4(SSE:: int_m, AVX2:: short_m); Vc_SIMD_CAST_4(SSE:: int_m, AVX2::ushort_m); Vc_SIMD_CAST_4(SSE:: uint_m, AVX2:: short_m); Vc_SIMD_CAST_4(SSE:: uint_m, AVX2::ushort_m); #endif // 1 Scalar::Mask to 1 AVX2::Mask {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Mask k, enable_if::value> = nullarg); // 2 Scalar::Mask to 1 AVX2::Mask {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Mask k0, Scalar::Mask k1, enable_if::value> = nullarg); // 4 Scalar::Mask to 1 AVX2::Mask {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast( Scalar::Mask k0, Scalar::Mask k1, Scalar::Mask k2, Scalar::Mask k3, enable_if<(AVX2::is_mask::value && Return::Size >= 4)> = nullarg); // 8 Scalar::Mask to 1 AVX2::Mask {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast( Scalar::Mask k0, Scalar::Mask k1, Scalar::Mask k2, Scalar::Mask k3, Scalar::Mask k4, Scalar::Mask k5, Scalar::Mask k6, Scalar::Mask k7, enable_if<(AVX2::is_mask::value && Return::Size >= 8)> = nullarg); // 16 Scalar::Mask to 1 AVX2::Mask {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Mask k0, Scalar::Mask k1, Scalar::Mask k2, Scalar::Mask k3, Scalar::Mask k4, Scalar::Mask k5, Scalar::Mask k6, Scalar::Mask k7, Scalar::Mask k8, Scalar::Mask k9, Scalar::Mask k10, Scalar::Mask k11, Scalar::Mask k12, Scalar::Mask k13, Scalar::Mask k14, Scalar::Mask k15, enable_if<(AVX2::is_mask::value && Return::Size >= 16)> = nullarg); // 1 AVX2::Mask to 1 SSE::Mask {{{2 Vc_SIMD_CAST_1(AVX2::double_m, SSE::double_m); Vc_SIMD_CAST_1(AVX2::double_m, SSE:: float_m); Vc_SIMD_CAST_1(AVX2::double_m, SSE:: int_m); Vc_SIMD_CAST_1(AVX2::double_m, SSE:: uint_m); Vc_SIMD_CAST_1(AVX2::double_m, SSE:: short_m); Vc_SIMD_CAST_1(AVX2::double_m, SSE::ushort_m); Vc_SIMD_CAST_1(AVX2:: float_m, SSE::double_m); Vc_SIMD_CAST_1(AVX2:: float_m, SSE:: float_m); Vc_SIMD_CAST_1(AVX2:: float_m, SSE:: int_m); Vc_SIMD_CAST_1(AVX2:: float_m, SSE:: uint_m); Vc_SIMD_CAST_1(AVX2:: float_m, SSE:: short_m); Vc_SIMD_CAST_1(AVX2:: float_m, SSE::ushort_m); #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_1(AVX2:: int_m, SSE::double_m); Vc_SIMD_CAST_1(AVX2:: int_m, SSE:: float_m); Vc_SIMD_CAST_1(AVX2:: int_m, SSE:: int_m); Vc_SIMD_CAST_1(AVX2:: int_m, SSE:: uint_m); Vc_SIMD_CAST_1(AVX2:: int_m, SSE:: short_m); Vc_SIMD_CAST_1(AVX2:: int_m, SSE::ushort_m); Vc_SIMD_CAST_1(AVX2:: uint_m, SSE::double_m); Vc_SIMD_CAST_1(AVX2:: uint_m, SSE:: float_m); Vc_SIMD_CAST_1(AVX2:: uint_m, SSE:: int_m); Vc_SIMD_CAST_1(AVX2:: uint_m, SSE:: uint_m); Vc_SIMD_CAST_1(AVX2:: uint_m, SSE:: short_m); Vc_SIMD_CAST_1(AVX2:: uint_m, SSE::ushort_m); Vc_SIMD_CAST_1(AVX2:: short_m, SSE::double_m); Vc_SIMD_CAST_1(AVX2:: short_m, SSE:: float_m); Vc_SIMD_CAST_1(AVX2:: short_m, SSE:: int_m); Vc_SIMD_CAST_1(AVX2:: short_m, SSE:: uint_m); Vc_SIMD_CAST_1(AVX2:: short_m, SSE:: short_m); Vc_SIMD_CAST_1(AVX2:: short_m, SSE::ushort_m); Vc_SIMD_CAST_1(AVX2::ushort_m, SSE::double_m); Vc_SIMD_CAST_1(AVX2::ushort_m, SSE:: float_m); Vc_SIMD_CAST_1(AVX2::ushort_m, SSE:: int_m); Vc_SIMD_CAST_1(AVX2::ushort_m, SSE:: uint_m); Vc_SIMD_CAST_1(AVX2::ushort_m, SSE:: short_m); Vc_SIMD_CAST_1(AVX2::ushort_m, SSE::ushort_m); #endif // 2 AVX2::Mask to 1 SSE::Mask {{{2 Vc_SIMD_CAST_2(AVX2::double_m, SSE:: short_m); Vc_SIMD_CAST_2(AVX2::double_m, SSE::ushort_m); // 1 AVX2::Mask to 1 Scalar::Mask {{{2 template Vc_INTRINSIC Vc_CONST To simd_cast(AVX2::Mask x, enable_if::value> = nullarg); // Declaration: offset == 0 | convert from AVX2::Mask/Vector {{{1 template Vc_INTRINSIC Vc_CONST enable_if< (offset == 0 && ((AVX2::is_vector::value && !Scalar::is_vector::value && Traits::is_simd_vector::value && !Traits::isSimdArray::value) || (AVX2::is_mask::value && !Scalar::is_mask::value && Traits::is_simd_mask::value && !Traits::isSimdMaskArray::value))), Return> simd_cast(const From &x); // Declaration: offset == 0 | convert from SSE::Mask/Vector to AVX2::Mask/Vector {{{1 template Vc_INTRINSIC Vc_CONST Return simd_cast( const From &x, enable_if::value && AVX2::is_vector::value) || (SSE::is_mask::value && AVX2::is_mask::value))> = nullarg); // Declarations: Vector casts with offset {{{1 // AVX2 to AVX2 {{{2 template Vc_INTRINSIC Vc_CONST enable_if<(AVX2::is_vector::value && offset != 0), Return> simd_cast(AVX2::Vector x); // AVX2 to SSE (Vector) {{{2 template Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_vector::value && sizeof(AVX2::Vector) == 32), Return> simd_cast(AVX2::Vector x); template Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_vector::value && sizeof(AVX2::Vector) == 16), Return> simd_cast(AVX2::Vector x); // SSE to AVX2 {{{2 Vc_SIMD_CAST_OFFSET(SSE:: short_v, AVX2::double_v, 1); Vc_SIMD_CAST_OFFSET(SSE::ushort_v, AVX2::double_v, 1); // Declarations: Mask casts with offset {{{1 // 1 AVX2::Mask to N AVX2::Mask {{{2 /* This declaration confuses GCC (4.9.2). If the declarations are there the definitions * are ignored by the compiler. ;-( template Vc_INTRINSIC_L Vc_CONST_L Return simd_cast(const AVX2::Mask &k, enable_if::value> = nullarg) Vc_INTRINSIC_R Vc_CONST_R; template Vc_INTRINSIC_L Vc_CONST_L Return simd_cast(const AVX2::Mask &k, enable_if::value> = nullarg) Vc_INTRINSIC_R Vc_CONST_R; template Vc_INTRINSIC_L Vc_CONST_L Return simd_cast(const AVX2::Mask &k, enable_if::value> = nullarg) Vc_INTRINSIC_R Vc_CONST_R; template Vc_INTRINSIC_L Vc_CONST_L Return simd_cast(const AVX2::Mask &k, enable_if::value> = nullarg) Vc_INTRINSIC_R Vc_CONST_R; */ // 1 SSE::Mask to N AVX2(2)::Mask {{{2 Vc_SIMD_CAST_OFFSET(SSE:: short_m, AVX2::double_m, 1); Vc_SIMD_CAST_OFFSET(SSE::ushort_m, AVX2::double_m, 1); // AVX2 to SSE (Mask) {{{2 template Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_mask::value && sizeof(AVX2::Mask) == 32), Return> simd_cast(AVX2::Mask x); template Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_mask::value && sizeof(AVX2::Mask) == 16), Return> simd_cast(AVX2::Mask x); // helper macros Vc_SIMD_CAST_AVX_[124] & Vc_SIMD_CAST_[124] {{{1 #undef Vc_SIMD_CAST_AVX_1 #define Vc_SIMD_CAST_AVX_1(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast(AVX2::from_ x, \ enable_if::value>) #undef Vc_SIMD_CAST_AVX_2 #define Vc_SIMD_CAST_AVX_2(from_, to_) \ static_assert(AVX2::from_::size() * 2 <= AVX2::to_::size(), \ "this type combination is wrong"); \ template \ Vc_INTRINSIC Vc_CONST To simd_cast(AVX2::from_ x0, AVX2::from_ x1, \ enable_if::value>) #undef Vc_SIMD_CAST_AVX_3 #define Vc_SIMD_CAST_AVX_3(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast(AVX2::from_ x0, AVX2::from_ x1, AVX2::from_ x2, \ enable_if::value>) #undef Vc_SIMD_CAST_AVX_4 #define Vc_SIMD_CAST_AVX_4(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast(AVX2::from_ x0, AVX2::from_ x1, AVX2::from_ x2, \ AVX2::from_ x3, \ enable_if::value>) #undef Vc_SIMD_CAST_1 #define Vc_SIMD_CAST_1(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast(from_ x, enable_if::value>) #undef Vc_SIMD_CAST_2 #define Vc_SIMD_CAST_2(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, \ enable_if::value>) #undef Vc_SIMD_CAST_3 #define Vc_SIMD_CAST_3(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2, \ enable_if::value>) #undef Vc_SIMD_CAST_4 #define Vc_SIMD_CAST_4(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2, from_ x3, \ enable_if::value>) #undef Vc_SIMD_CAST_5 #define Vc_SIMD_CAST_5(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, \ enable_if::value>) #undef Vc_SIMD_CAST_6 #define Vc_SIMD_CAST_6(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, \ from_ x5, \ enable_if::value>) #undef Vc_SIMD_CAST_7 #define Vc_SIMD_CAST_7(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, \ from_ x5, from_ x6, \ enable_if::value>) #undef Vc_SIMD_CAST_8 #define Vc_SIMD_CAST_8(from_, to_) \ template \ Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, \ from_ x5, from_ x6, from_ x7, \ enable_if::value>) #undef Vc_SIMD_CAST_OFFSET #define Vc_SIMD_CAST_OFFSET(from_, to_, offset_) \ static_assert(from_::size() >= to_::size() * (offset_ + 1), \ "this offset cannot exist for this type combination"); \ template \ Vc_INTRINSIC Vc_CONST To simd_cast( \ from_ x, enable_if<(offset == offset_ && std::is_same::value)>) // SSE -> AVX2 where the AVX2 Vector is integral and thus of equal size() as the {{{1 // equivalent SSE Vector template Vc_INTRINSIC Vc_CONST To simd_cast(From x, enable_if<(AVX2::is_vector::value && SSE::is_vector::value && SSE::Vector::Size == To::Size)>) { return simd_cast>(x).data(); } template Vc_INTRINSIC Vc_CONST To simd_cast(From x0, From x1, enable_if<(AVX2::is_vector::value && SSE::is_vector::value && SSE::Vector::Size == To::Size)>) { return simd_cast>(x0, x1).data(); } template Vc_INTRINSIC Vc_CONST To simd_cast(From x0, From x1, From x2, enable_if<(AVX2::is_vector::value && SSE::is_vector::value && SSE::Vector::Size == To::Size)>) { return simd_cast>(x0, x1, x2).data(); } template Vc_INTRINSIC Vc_CONST To simd_cast(From x0, From x1, From x2, From x3, enable_if<(AVX2::is_vector::value && SSE::is_vector::value && SSE::Vector::Size == To::Size)>) { return simd_cast>(x0, x1, x2, x3).data(); } template Vc_INTRINSIC Vc_CONST To simd_cast(From x0, From x1, From x2, From x3, From x4, From x5, From x6, From x7, enable_if<(AVX2::is_vector::value && SSE::is_vector::value && SSE::Vector::Size == To::Size)>) { return simd_cast>(x0, x1, x2, x3, x4, x5, x6, x7) .data(); } // Vector casts without offset {{{1 // AVX2::Vector {{{2 // 1: to double_v {{{3 Vc_SIMD_CAST_AVX_1( float_v, double_v) { return _mm256_cvtps_pd(AVX::lo128(x.data())); } #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_AVX_1( int_v, double_v) { return AVX::convert< int, double>(AVX::lo128(x.data())); } Vc_SIMD_CAST_AVX_1( uint_v, double_v) { return AVX::convert< uint, double>(AVX::lo128(x.data())); } Vc_SIMD_CAST_AVX_1( short_v, double_v) { return AVX::convert< short, double>(AVX::lo128(x.data())); } Vc_SIMD_CAST_AVX_1(ushort_v, double_v) { return AVX::convert(AVX::lo128(x.data())); } #endif // 1: to float_v {{{3 Vc_SIMD_CAST_AVX_1(double_v, float_v) { return AVX::zeroExtend(_mm256_cvtpd_ps(x.data())); } #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_AVX_1( int_v, float_v) { return AVX::convert< int, float>(x.data()); } Vc_SIMD_CAST_AVX_1( uint_v, float_v) { return AVX::convert< uint, float>(x.data()); } Vc_SIMD_CAST_AVX_1( short_v, float_v) { return AVX::convert< short, float>(AVX::lo128(x.data())); } Vc_SIMD_CAST_AVX_1(ushort_v, float_v) { return AVX::convert(AVX::lo128(x.data())); } #endif // 2: to float_v {{{3 Vc_SIMD_CAST_AVX_2(double_v, float_v) { return AVX::concat(_mm256_cvtpd_ps(x0.data()), _mm256_cvtpd_ps(x1.data())); } // 1: to int_v {{{3 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_AVX_1(double_v, int_v) { return AVX::zeroExtend(_mm256_cvttpd_epi32(x.data())); } Vc_SIMD_CAST_AVX_1( float_v, int_v) { return _mm256_cvttps_epi32(x.data()); } Vc_SIMD_CAST_AVX_1( uint_v, int_v) { return x.data(); } Vc_SIMD_CAST_AVX_1( short_v, int_v) { return _mm256_cvtepi16_epi32(AVX::lo128(x.data())); } Vc_SIMD_CAST_AVX_1(ushort_v, int_v) { return _mm256_cvtepu16_epi32(AVX::lo128(x.data())); } #endif // 2: to int_v {{{3 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_AVX_2(double_v, int_v) { return AVX::concat(_mm256_cvttpd_epi32(x0.data()), _mm256_cvttpd_epi32(x1.data())); } #endif // 1: to uint_v {{{3 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_AVX_1(double_v, uint_v) { return AVX::zeroExtend(AVX::convert(x.data())); } Vc_SIMD_CAST_AVX_1( float_v, uint_v) { return _mm256_blendv_epi8( _mm256_cvttps_epi32(x.data()), _mm256_add_epi32( _mm256_cvttps_epi32(_mm256_sub_ps(x.data(), AVX::set2power31_ps())), AVX::set2power31_epu32()), _mm256_castps_si256(AVX::cmpge_ps(x.data(), AVX::set2power31_ps()))); } Vc_SIMD_CAST_AVX_1( int_v, uint_v) { return x.data(); } Vc_SIMD_CAST_AVX_1( short_v, uint_v) { return _mm256_cvtepi16_epi32(AVX::lo128(x.data())); } Vc_SIMD_CAST_AVX_1(ushort_v, uint_v) { return _mm256_cvtepu16_epi32(AVX::lo128(x.data())); } #endif // 2: to uint_v {{{3 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_AVX_2(double_v, uint_v) { return AVX::concat(AVX::convert(x0.data()), AVX::convert(x1.data())); } #endif // 1: to short_v {{{3 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_AVX_1(double_v, short_v) { return AVX::zeroExtend(_mm_packs_epi32(_mm256_cvttpd_epi32(x.data()), _mm_setzero_si128())); } Vc_SIMD_CAST_AVX_1( float_v, short_v) { const auto tmp = _mm256_cvttps_epi32(x.data()); return AVX::zeroExtend(_mm_packs_epi32(AVX::lo128(tmp), AVX::hi128(tmp))); } Vc_SIMD_CAST_AVX_1( int_v, short_v) { return AVX::zeroExtend(AVX::convert< int, short>(x.data())); } Vc_SIMD_CAST_AVX_1( uint_v, short_v) { return AVX::zeroExtend(AVX::convert(x.data())); } Vc_SIMD_CAST_AVX_1(ushort_v, short_v) { return x.data(); } #endif // 2: to short_v {{{3 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_AVX_2(double_v, short_v) { const auto tmp0 = _mm256_cvttpd_epi32(x0.data()); const auto tmp1 = _mm256_cvttpd_epi32(x1.data()); return AVX::zeroExtend(_mm_packs_epi32(tmp0, tmp1)); } Vc_SIMD_CAST_AVX_2( float_v, short_v) { using AVX2::short_v; using AVX2::int_v; return simd_cast(simd_cast(x0), simd_cast(x1)); } Vc_SIMD_CAST_AVX_2( int_v, short_v) { const auto shuf = _mm256_setr_epi8( 0, 1, 4, 5, 8, 9, 12, 13, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, 0, 1, 4, 5, 8, 9, 12, 13, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80); auto a = _mm256_shuffle_epi8(x0.data(), shuf); auto b = _mm256_shuffle_epi8(x1.data(), shuf); return Mem::permute4x64(_mm256_unpacklo_epi64(a, b)); } Vc_SIMD_CAST_AVX_2( uint_v, short_v) { const auto shuf = _mm256_setr_epi8( 0, 1, 4, 5, 8, 9, 12, 13, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, 0, 1, 4, 5, 8, 9, 12, 13, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80); auto a = _mm256_shuffle_epi8(x0.data(), shuf); auto b = _mm256_shuffle_epi8(x1.data(), shuf); return Mem::permute4x64(_mm256_unpacklo_epi64(a, b)); } #endif // 3: to short_v {{{3 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_AVX_3(double_v, short_v) { const auto tmp0 = _mm256_cvttpd_epi32(x0.data()); const auto tmp1 = _mm256_cvttpd_epi32(x1.data()); const auto tmp2 = _mm256_cvttpd_epi32(x2.data()); return AVX::concat(_mm_packs_epi32(tmp0, tmp1), _mm_packs_epi32(tmp2, _mm_setzero_si128())); } #endif // 4: to short_v {{{3 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_AVX_4(double_v, short_v) { const auto tmp0 = _mm256_cvttpd_epi32(x0.data()); const auto tmp1 = _mm256_cvttpd_epi32(x1.data()); const auto tmp2 = _mm256_cvttpd_epi32(x2.data()); const auto tmp3 = _mm256_cvttpd_epi32(x3.data()); return AVX::concat(_mm_packs_epi32(tmp0, tmp1), _mm_packs_epi32(tmp2, tmp3)); } #endif // 1: to ushort_v {{{3 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_AVX_1(double_v, ushort_v) { const auto tmp = _mm256_cvttpd_epi32(x.data()); return AVX::zeroExtend(_mm_packus_epi32(tmp, _mm_setzero_si128())); } Vc_SIMD_CAST_AVX_1( float_v, ushort_v) { const auto tmp = _mm256_cvttps_epi32(x.data()); return AVX::zeroExtend(_mm_packus_epi32(AVX::lo128(tmp), AVX::hi128(tmp))); } Vc_SIMD_CAST_AVX_1( int_v, ushort_v) { return AVX::zeroExtend(AVX::convert< int, ushort>(x.data())); } Vc_SIMD_CAST_AVX_1( uint_v, ushort_v) { return AVX::zeroExtend(AVX::convert(x.data())); } Vc_SIMD_CAST_AVX_1( short_v, ushort_v) { return x.data(); } #endif // 2: to ushort_v {{{3 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_AVX_2(double_v, ushort_v) { const auto tmp0 = _mm256_cvttpd_epi32(x0.data()); const auto tmp1 = _mm256_cvttpd_epi32(x1.data()); return AVX::zeroExtend(_mm_packus_epi32(tmp0, tmp1)); } Vc_SIMD_CAST_AVX_2( float_v, ushort_v) { using AVX2::ushort_v; using AVX2::int_v; return simd_cast(simd_cast(x0), simd_cast(x1)); } Vc_SIMD_CAST_AVX_2( int_v, ushort_v) { auto tmp0 = _mm256_unpacklo_epi16(x0.data(), x1.data()); auto tmp1 = _mm256_unpackhi_epi16(x0.data(), x1.data()); auto tmp2 = _mm256_unpacklo_epi16(tmp0, tmp1); auto tmp3 = _mm256_unpackhi_epi16(tmp0, tmp1); return Mem::permute4x64(_mm256_unpacklo_epi16(tmp2, tmp3)); } Vc_SIMD_CAST_AVX_2( uint_v, ushort_v) { auto tmp0 = _mm256_unpacklo_epi16(x0.data(), x1.data()); auto tmp1 = _mm256_unpackhi_epi16(x0.data(), x1.data()); auto tmp2 = _mm256_unpacklo_epi16(tmp0, tmp1); auto tmp3 = _mm256_unpackhi_epi16(tmp0, tmp1); return Mem::permute4x64(_mm256_unpacklo_epi16(tmp2, tmp3)); } #endif // 3: to ushort_v {{{3 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_AVX_3(double_v, ushort_v) { const auto tmp0 = _mm256_cvttpd_epi32(x0.data()); const auto tmp1 = _mm256_cvttpd_epi32(x1.data()); const auto tmp2 = _mm256_cvttpd_epi32(x2.data()); return AVX::concat(_mm_packus_epi32(tmp0, tmp1), _mm_packus_epi32(tmp2, _mm_setzero_si128())); } #endif // 4: to ushort_v {{{3 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_AVX_4(double_v, ushort_v) { const auto tmp0 = _mm256_cvttpd_epi32(x0.data()); const auto tmp1 = _mm256_cvttpd_epi32(x1.data()); const auto tmp2 = _mm256_cvttpd_epi32(x2.data()); const auto tmp3 = _mm256_cvttpd_epi32(x3.data()); return AVX::concat(_mm_packus_epi32(tmp0, tmp1), _mm_packus_epi32(tmp2, tmp3)); } #endif // 1 SSE::Vector to 1 AVX2::Vector {{{2 Vc_SIMD_CAST_1(SSE::double_v, AVX2::double_v) { return AVX::zeroExtend(x.data()); } Vc_SIMD_CAST_1(SSE:: float_v, AVX2::double_v) { return _mm256_cvtps_pd(x.data()); } Vc_SIMD_CAST_1(SSE:: int_v, AVX2::double_v) { return _mm256_cvtepi32_pd(x.data()); } Vc_SIMD_CAST_1(SSE:: uint_v, AVX2::double_v) { using namespace AvxIntrinsics; return _mm256_add_pd(_mm256_cvtepi32_pd(_mm_sub_epi32(x.data(), _mm_setmin_epi32())), set1_pd(1u << 31)); } Vc_SIMD_CAST_1(SSE:: short_v, AVX2::double_v) { return simd_cast(simd_cast(x)); } Vc_SIMD_CAST_1(SSE::ushort_v, AVX2::double_v) { return simd_cast(simd_cast(x)); } Vc_SIMD_CAST_1(SSE::double_v, AVX2:: float_v) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE:: float_v, AVX2:: float_v) { return AVX::zeroExtend(x.data()); } Vc_SIMD_CAST_1(SSE:: int_v, AVX2:: float_v) { return AVX::zeroExtend(_mm_cvtepi32_ps(x.data())); } Vc_SIMD_CAST_1(SSE:: uint_v, AVX2:: float_v) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE:: short_v, AVX2:: float_v) { return AVX::convert< short, float>(x.data()); } Vc_SIMD_CAST_1(SSE::ushort_v, AVX2:: float_v) { return AVX::convert(x.data()); } #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_1(SSE::double_v, AVX2:: int_v) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE::double_v, AVX2:: uint_v) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE::double_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE::double_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE:: float_v, AVX2:: int_v) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE:: float_v, AVX2:: uint_v) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE:: float_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE:: float_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE:: int_v, AVX2:: int_v) { return AVX::zeroExtend(x.data()); } Vc_SIMD_CAST_1(SSE:: uint_v, AVX2:: int_v) { return AVX::zeroExtend(x.data()); } Vc_SIMD_CAST_1(SSE:: short_v, AVX2:: int_v) { return AVX::convert< short, int>(x.data()); } Vc_SIMD_CAST_1(SSE::ushort_v, AVX2:: int_v) { return AVX::convert(x.data()); } Vc_SIMD_CAST_1(SSE:: int_v, AVX2:: uint_v) { return AVX::zeroExtend(x.data()); } Vc_SIMD_CAST_1(SSE:: uint_v, AVX2:: uint_v) { return AVX::zeroExtend(x.data()); } Vc_SIMD_CAST_1(SSE:: short_v, AVX2:: uint_v) { return AVX::convert< short, uint>(x.data()); } Vc_SIMD_CAST_1(SSE::ushort_v, AVX2:: uint_v) { return AVX::convert(x.data()); } Vc_SIMD_CAST_1(SSE:: int_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE:: uint_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE:: short_v, AVX2:: short_v) { return AVX::zeroExtend(x.data()); } Vc_SIMD_CAST_1(SSE::ushort_v, AVX2:: short_v) { return AVX::zeroExtend(x.data()); } Vc_SIMD_CAST_1(SSE:: int_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE:: uint_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE:: short_v, AVX2::ushort_v) { return AVX::zeroExtend(x.data()); } Vc_SIMD_CAST_1(SSE::ushort_v, AVX2::ushort_v) { return AVX::zeroExtend(x.data()); } #endif // 2 SSE::Vector to 1 AVX2::Vector {{{2 Vc_SIMD_CAST_2(SSE::double_v, AVX2::double_v) { return AVX::concat(x0.data(), x1.data()); } Vc_SIMD_CAST_2(SSE::double_v, AVX2:: float_v) { return AVX::zeroExtend(simd_cast(x0, x1).data()); } Vc_SIMD_CAST_2(SSE:: float_v, AVX2:: float_v) { return AVX::concat(x0.data(), x1.data()); } Vc_SIMD_CAST_2(SSE:: int_v, AVX2:: float_v) { return AVX::convert< int, float>(AVX::concat(x0.data(), x1.data())); } Vc_SIMD_CAST_2(SSE:: uint_v, AVX2:: float_v) { return AVX::convert(AVX::concat(x0.data(), x1.data())); } #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_2(SSE::double_v, AVX2:: int_v) { return AVX::zeroExtend(simd_cast(x0, x1).data()); } Vc_SIMD_CAST_2(SSE::double_v, AVX2:: uint_v) { return AVX::zeroExtend(simd_cast(x0, x1).data()); } Vc_SIMD_CAST_2(SSE::double_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast(x0, x1).data()); } Vc_SIMD_CAST_2(SSE::double_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast(x0, x1).data()); } Vc_SIMD_CAST_2(SSE:: float_v, AVX2:: int_v) { return simd_cast(simd_cast(x0, x1)); } Vc_SIMD_CAST_2(SSE:: float_v, AVX2:: uint_v) { return simd_cast(simd_cast(x0, x1)); } Vc_SIMD_CAST_2(SSE:: float_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast(x0, x1).data()); } Vc_SIMD_CAST_2(SSE:: float_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast(x0, x1).data()); } Vc_SIMD_CAST_2(SSE:: int_v, AVX2:: int_v) { return AVX::concat(x0.data(), x1.data()); } Vc_SIMD_CAST_2(SSE:: uint_v, AVX2:: int_v) { return AVX::concat(x0.data(), x1.data()); } Vc_SIMD_CAST_2(SSE:: int_v, AVX2:: uint_v) { return AVX::concat(x0.data(), x1.data()); } Vc_SIMD_CAST_2(SSE:: uint_v, AVX2:: uint_v) { return AVX::concat(x0.data(), x1.data()); } Vc_SIMD_CAST_2(SSE:: int_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast(x0, x1).data()); } Vc_SIMD_CAST_2(SSE:: uint_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast(x0, x1).data()); } Vc_SIMD_CAST_2(SSE:: short_v, AVX2:: short_v) { return AVX::concat(x0.data(), x1.data()); } Vc_SIMD_CAST_2(SSE::ushort_v, AVX2:: short_v) { return AVX::concat(x0.data(), x1.data()); } Vc_SIMD_CAST_2(SSE:: int_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast(x0, x1).data()); } Vc_SIMD_CAST_2(SSE:: uint_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast(x0, x1).data()); } Vc_SIMD_CAST_2(SSE:: short_v, AVX2::ushort_v) { return AVX::concat(x0.data(), x1.data()); } Vc_SIMD_CAST_2(SSE::ushort_v, AVX2::ushort_v) { return AVX::concat(x0.data(), x1.data()); } #endif // 3 SSE::Vector to 1 AVX2::Vector {{{2 Vc_SIMD_CAST_3(SSE::double_v, AVX2:: float_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2)); } #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_3(SSE::double_v, AVX2:: int_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2)); } Vc_SIMD_CAST_3(SSE::double_v, AVX2:: uint_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2)); } Vc_SIMD_CAST_3(SSE::double_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast(x0, x1, x2).data()); } Vc_SIMD_CAST_3(SSE::double_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast(x0, x1, x2).data()); } Vc_SIMD_CAST_3(SSE:: float_v, AVX2:: short_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2)); } Vc_SIMD_CAST_3(SSE:: float_v, AVX2::ushort_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2)); } Vc_SIMD_CAST_3(SSE:: int_v, AVX2:: short_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2)); } Vc_SIMD_CAST_3(SSE:: uint_v, AVX2:: short_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2)); } Vc_SIMD_CAST_3(SSE:: int_v, AVX2::ushort_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2)); } Vc_SIMD_CAST_3(SSE:: uint_v, AVX2::ushort_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2)); } #endif // 4 SSE::Vector to 1 AVX2::Vector {{{2 Vc_SIMD_CAST_4(SSE::double_v, AVX2:: float_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2, x3)); } #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_4(SSE::double_v, AVX2:: int_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2, x3)); } Vc_SIMD_CAST_4(SSE::double_v, AVX2:: uint_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2, x3)); } Vc_SIMD_CAST_4(SSE::double_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast(x0, x1, x2, x3).data()); } Vc_SIMD_CAST_4(SSE::double_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast(x0, x1, x2, x3).data()); } Vc_SIMD_CAST_4(SSE:: float_v, AVX2:: short_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2, x3)); } Vc_SIMD_CAST_4(SSE:: float_v, AVX2::ushort_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2, x3)); } Vc_SIMD_CAST_4(SSE:: int_v, AVX2:: short_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2, x3)); } Vc_SIMD_CAST_4(SSE:: uint_v, AVX2:: short_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2, x3)); } Vc_SIMD_CAST_4(SSE:: int_v, AVX2::ushort_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2, x3)); } Vc_SIMD_CAST_4(SSE:: uint_v, AVX2::ushort_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2, x3)); } #endif // 5 SSE::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_5(SSE::double_v, AVX2:: short_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2, x3), simd_cast(x4)); } Vc_SIMD_CAST_5(SSE::double_v, AVX2::ushort_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2, x3), simd_cast(x4)); } #endif // 6 SSE::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_6(SSE::double_v, AVX2:: short_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2, x3), simd_cast(x4, x5)); } Vc_SIMD_CAST_6(SSE::double_v, AVX2::ushort_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2, x3), simd_cast(x4, x5)); } #endif // 7 SSE::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_7(SSE::double_v, AVX2:: short_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2, x3), simd_cast(x4, x5), simd_cast(x6)); } Vc_SIMD_CAST_7(SSE::double_v, AVX2::ushort_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2, x3), simd_cast(x4, x5), simd_cast(x6)); } #endif // 8 SSE::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_8(SSE::double_v, AVX2:: short_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2, x3), simd_cast(x4, x5), simd_cast(x6, x7)); } Vc_SIMD_CAST_8(SSE::double_v, AVX2::ushort_v) { return simd_cast(simd_cast(x0, x1), simd_cast(x2, x3), simd_cast(x4, x5), simd_cast(x6, x7)); } #endif // 1 AVX2::Vector to 1 SSE::Vector {{{2 Vc_SIMD_CAST_1(AVX2::double_v, SSE::double_v) { return AVX::lo128(x.data()); } Vc_SIMD_CAST_1(AVX2:: float_v, SSE:: float_v) { return AVX::lo128(x.data()); } #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_1(AVX2:: int_v, SSE:: int_v) { return AVX::lo128(x.data()); } Vc_SIMD_CAST_1(AVX2:: uint_v, SSE:: uint_v) { return AVX::lo128(x.data()); } Vc_SIMD_CAST_1(AVX2:: short_v, SSE:: short_v) { return AVX::lo128(x.data()); } Vc_SIMD_CAST_1(AVX2::ushort_v, SSE::ushort_v) { return AVX::lo128(x.data()); } #endif Vc_SIMD_CAST_1(AVX2::double_v, SSE:: float_v) { return simd_cast(simd_cast(x)); } Vc_SIMD_CAST_1(AVX2::double_v, SSE:: int_v) { return AVX::convert(x.data()); } Vc_SIMD_CAST_1(AVX2::double_v, SSE:: uint_v) { return AVX::convert(x.data()); } Vc_SIMD_CAST_1(AVX2::double_v, SSE:: short_v) { return AVX::convert(x.data()); } Vc_SIMD_CAST_1(AVX2::double_v, SSE::ushort_v) { return AVX::convert(x.data()); } Vc_SIMD_CAST_1(AVX2:: float_v, SSE::double_v) { return simd_cast(simd_cast(x)); } Vc_SIMD_CAST_1(AVX2:: float_v, SSE:: int_v) { return simd_cast(simd_cast(x)); } Vc_SIMD_CAST_1(AVX2:: float_v, SSE:: uint_v) { return simd_cast(simd_cast(x)); } Vc_SIMD_CAST_1(AVX2:: float_v, SSE:: short_v) { return AVX::convert(x.data()); } Vc_SIMD_CAST_1(AVX2:: float_v, SSE::ushort_v) { return AVX::convert(x.data()); } #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_1(AVX2:: int_v, SSE::double_v) { return SSE::convert(AVX::lo128(x.data())); } Vc_SIMD_CAST_1(AVX2:: int_v, SSE:: float_v) { return SSE::convert(AVX::lo128(x.data())); } Vc_SIMD_CAST_1(AVX2:: int_v, SSE:: uint_v) { return AVX::lo128(x.data()); } Vc_SIMD_CAST_1(AVX2:: int_v, SSE:: short_v) { return AVX::convert(x.data()); } Vc_SIMD_CAST_1(AVX2:: int_v, SSE::ushort_v) { return AVX::convert(x.data()); } Vc_SIMD_CAST_1(AVX2:: uint_v, SSE::double_v) { return SSE::convert(AVX::lo128(x.data())); } Vc_SIMD_CAST_1(AVX2:: uint_v, SSE:: float_v) { return SSE::convert(AVX::lo128(x.data())); } Vc_SIMD_CAST_1(AVX2:: uint_v, SSE:: int_v) { return AVX::lo128(x.data()); } Vc_SIMD_CAST_1(AVX2:: uint_v, SSE:: short_v) { return AVX::convert(x.data()); } Vc_SIMD_CAST_1(AVX2:: uint_v, SSE::ushort_v) { return AVX::convert(x.data()); } Vc_SIMD_CAST_1(AVX2:: short_v, SSE::double_v) { return simd_cast(simd_cast(x)); } Vc_SIMD_CAST_1(AVX2:: short_v, SSE:: float_v) { return simd_cast(simd_cast(x)); } Vc_SIMD_CAST_1(AVX2:: short_v, SSE:: int_v) { return simd_cast(simd_cast(x)); } Vc_SIMD_CAST_1(AVX2:: short_v, SSE:: uint_v) { return simd_cast(simd_cast(x)); } Vc_SIMD_CAST_1(AVX2:: short_v, SSE::ushort_v) { return simd_cast(simd_cast(x)); } Vc_SIMD_CAST_1(AVX2::ushort_v, SSE::double_v) { return simd_cast(simd_cast(x)); } Vc_SIMD_CAST_1(AVX2::ushort_v, SSE:: float_v) { return simd_cast(simd_cast(x)); } Vc_SIMD_CAST_1(AVX2::ushort_v, SSE:: int_v) { return simd_cast(simd_cast(x)); } Vc_SIMD_CAST_1(AVX2::ushort_v, SSE:: uint_v) { return simd_cast(simd_cast(x)); } Vc_SIMD_CAST_1(AVX2::ushort_v, SSE:: short_v) { return simd_cast(simd_cast(x)); } #endif // 2 AVX2::Vector to 1 SSE::Vector {{{2 Vc_SIMD_CAST_2(AVX2::double_v, SSE:: short_v) { const auto tmp0 = _mm256_cvttpd_epi32(x0.data()); const auto tmp1 = _mm256_cvttpd_epi32(x1.data()); return _mm_packs_epi32(tmp0, tmp1); } Vc_SIMD_CAST_2(AVX2::double_v, SSE::ushort_v) { const auto tmp0 = _mm256_cvttpd_epi32(x0.data()); const auto tmp1 = _mm256_cvttpd_epi32(x1.data()); return _mm_packus_epi32(tmp0, tmp1); } // 1 Scalar::Vector to 1 AVX2::Vector {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x, enable_if::value>) { return AVX::zeroExtend(_mm_setr_pd(x.data(), 0.)); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x, enable_if::value>) { return AVX::zeroExtend(_mm_setr_ps(x.data(), 0.f, 0.f, 0.f)); } #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x, enable_if::value>) { return _mm256_setr_epi32(x.data(), 0, 0, 0, 0, 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x, enable_if::value>) { return _mm256_setr_epi32(uint(x.data()), 0, 0, 0, 0, 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x, enable_if::value>) { return _mm256_setr_epi16(x.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x, enable_if::value>) { return _mm256_setr_epi16(x.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } #endif // 2 Scalar::Vector to 1 AVX2::Vector {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, enable_if::value>) { return AVX::zeroExtend(_mm_setr_pd(x0.data(), x1.data())); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, enable_if::value>) { return AVX::zeroExtend(_mm_setr_ps(x0.data(), x1.data(), 0.f, 0.f)); } #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, enable_if::value>) { return _mm256_setr_epi32(x0.data(), x1.data(), 0, 0, 0, 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, enable_if::value>) { return _mm256_setr_epi32(uint(x0.data()), uint(x1.data()), 0, 0, 0, 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } #endif // 3 Scalar::Vector to 1 AVX2::Vector {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, enable_if::value>) { return _mm256_setr_pd(x0.data(), x1.data(), x2.data(), 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, enable_if::value>) { return AVX::zeroExtend(_mm_setr_ps(x0.data(), x1.data(), x2.data(), 0)); } #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, enable_if::value>) { return _mm256_setr_epi32(x0.data(), x1.data(), x2.data(), 0, 0, 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, enable_if::value>) { return _mm256_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()), 0, 0, 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } #endif // 4 Scalar::Vector to 1 AVX2::Vector {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, enable_if::value>) { return _mm256_setr_pd(x0.data(), x1.data(), x2.data(), x3.data()); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, enable_if::value>) { return AVX::zeroExtend(_mm_setr_ps(x0.data(), x1.data(), x2.data(), x3.data())); } #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, enable_if::value>) { return _mm256_setr_epi32(x0.data(), x1.data(), x2.data(), x3.data(), 0, 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, enable_if::value>) { return _mm256_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()), uint(x3.data()), 0, 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } #endif // 5 Scalar::Vector to 1 AVX2::Vector {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, enable_if::value>) { return _mm256_setr_ps(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), 0, 0, 0); } #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, enable_if::value>) { return _mm256_setr_epi32(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, enable_if::value>) { return _mm256_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()), uint(x3.data()), uint(x4.data()), 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } #endif // 6 Scalar::Vector to 1 AVX2::Vector {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, enable_if::value>) { return _mm256_setr_ps(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), 0, 0); } #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, enable_if::value>) { return _mm256_setr_epi32(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, enable_if::value>) { return _mm256_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()), uint(x3.data()), uint(x4.data()), uint(x5.data()), 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } #endif // 7 Scalar::Vector to 1 AVX2::Vector {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, enable_if::value>) { return _mm256_setr_ps(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), 0); } #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, enable_if::value>) { return _mm256_setr_epi32(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, enable_if::value>) { return _mm256_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()), uint(x3.data()), uint(x4.data()), uint(x5.data()), uint(x6.data()), 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0); } #endif // 8 Scalar::Vector to 1 AVX2::Vector {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, enable_if::value>) { return _mm256_setr_ps(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), x7.data()); } #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, enable_if::value>) { return _mm256_setr_epi32(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), x7.data()); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, enable_if::value>) { return _mm256_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()), uint(x3.data()), uint(x4.data()), uint(x5.data()), uint(x6.data()), uint(x7.data())); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), x7.data(), 0, 0, 0, 0, 0, 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), x7.data(), 0, 0, 0, 0, 0, 0, 0, 0); } #endif // 9 Scalar::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), x7.data(), x8.data(), 0, 0, 0, 0, 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), x7.data(), x8.data(), 0, 0, 0, 0, 0, 0, 0); } #endif // 10 Scalar::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), x7.data(), x8.data(), x9.data(), 0, 0, 0, 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), x7.data(), x8.data(), x9.data(), 0, 0, 0, 0, 0, 0); } #endif // 11 Scalar::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), x7.data(), x8.data(), x9.data(), x10.data(), 0, 0, 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), x7.data(), x8.data(), x9.data(), x10.data(), 0, 0, 0, 0, 0); } #endif // 12 Scalar::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, Scalar::Vector x11, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), x7.data(), x8.data(), x9.data(), x10.data(), x11.data(), 0, 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, Scalar::Vector x11, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), x7.data(), x8.data(), x9.data(), x10.data(), x11.data(), 0, 0, 0, 0); } #endif // 13 Scalar::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, Scalar::Vector x11, Scalar::Vector x12, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), x7.data(), x8.data(), x9.data(), x10.data(), x11.data(), x12.data(), 0, 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, Scalar::Vector x11, Scalar::Vector x12, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), x7.data(), x8.data(), x9.data(), x10.data(), x11.data(), x12.data(), 0, 0, 0); } #endif // 14 Scalar::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, Scalar::Vector x11, Scalar::Vector x12, Scalar::Vector x13, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), x7.data(), x8.data(), x9.data(), x10.data(), x11.data(), x12.data(), x13.data(), 0, 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, Scalar::Vector x11, Scalar::Vector x12, Scalar::Vector x13, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), x7.data(), x8.data(), x9.data(), x10.data(), x11.data(), x12.data(), x13.data(), 0, 0); } #endif // 15 Scalar::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, Scalar::Vector x11, Scalar::Vector x12, Scalar::Vector x13, Scalar::Vector x14, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), x7.data(), x8.data(), x9.data(), x10.data(), x11.data(), x12.data(), x13.data(), x14.data(), 0); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, Scalar::Vector x11, Scalar::Vector x12, Scalar::Vector x13, Scalar::Vector x14, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), x7.data(), x8.data(), x9.data(), x10.data(), x11.data(), x12.data(), x13.data(), x14.data(), 0); } #endif // 16 Scalar::Vector to 1 AVX2::Vector {{{2 #ifdef Vc_IMPL_AVX2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, Scalar::Vector x11, Scalar::Vector x12, Scalar::Vector x13, Scalar::Vector x14, Scalar::Vector x15, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), x7.data(), x8.data(), x9.data(), x10.data(), x11.data(), x12.data(), x13.data(), x14.data(), x15.data()); } template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Vector x0, Scalar::Vector x1, Scalar::Vector x2, Scalar::Vector x3, Scalar::Vector x4, Scalar::Vector x5, Scalar::Vector x6, Scalar::Vector x7, Scalar::Vector x8, Scalar::Vector x9, Scalar::Vector x10, Scalar::Vector x11, Scalar::Vector x12, Scalar::Vector x13, Scalar::Vector x14, Scalar::Vector x15, enable_if::value>) { return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), x5.data(), x6.data(), x7.data(), x8.data(), x9.data(), x10.data(), x11.data(), x12.data(), x13.data(), x14.data(), x15.data()); } #endif // 1 AVX2::Vector to 1 Scalar::Vector {{{2 template Vc_INTRINSIC Vc_CONST To simd_cast(AVX2::Vector x, enable_if::value>) { return static_cast(x[0]); } // Mask casts without offset {{{1 // 1 AVX2::Mask to 1 AVX2::Mask {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(const AVX2::Mask &k, enable_if::value>) { return {Detail::mask_cast::Size, Return::Size, typename Return::VectorTypeF>(k.dataI())}; } // 2 AVX2::Mask to 1 AVX2::Mask {{{2 Vc_SIMD_CAST_AVX_2(double_m, float_m) { return AVX::concat(_mm_packs_epi32(AVX::lo128(x0.dataI()), AVX::hi128(x0.dataI())), _mm_packs_epi32(AVX::lo128(x1.dataI()), AVX::hi128(x1.dataI()))); } #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_AVX_2(double_m, int_m) { return Mem::permute4x64(_mm256_packs_epi32(x0.dataI(), x1.dataI())); } Vc_SIMD_CAST_AVX_2(double_m, uint_m) { return Mem::permute4x64(_mm256_packs_epi32(x0.dataI(), x1.dataI())); } Vc_SIMD_CAST_AVX_2(double_m, short_m) { return AVX::zeroExtend(_mm_packs_epi16(_mm_packs_epi32(AVX::lo128(x0.dataI()), AVX::hi128(x0.dataI())), _mm_packs_epi32(AVX::lo128(x1.dataI()), AVX::hi128(x1.dataI())))); } Vc_SIMD_CAST_AVX_2(double_m, ushort_m) { return AVX::zeroExtend(_mm_packs_epi16(_mm_packs_epi32(AVX::lo128(x0.dataI()), AVX::hi128(x0.dataI())), _mm_packs_epi32(AVX::lo128(x1.dataI()), AVX::hi128(x1.dataI())))); } Vc_SIMD_CAST_AVX_2( float_m, short_m) { return Mem::permute4x64(_mm256_packs_epi16(x0.dataI(), x1.dataI())); } Vc_SIMD_CAST_AVX_2( float_m, ushort_m) { return Mem::permute4x64(_mm256_packs_epi16(x0.dataI(), x1.dataI())); } Vc_SIMD_CAST_AVX_2( int_m, short_m) { return Mem::permute4x64(_mm256_packs_epi16(x0.dataI(), x1.dataI())); } Vc_SIMD_CAST_AVX_2( int_m, ushort_m) { return Mem::permute4x64(_mm256_packs_epi16(x0.dataI(), x1.dataI())); } Vc_SIMD_CAST_AVX_2( uint_m, short_m) { return Mem::permute4x64(_mm256_packs_epi16(x0.dataI(), x1.dataI())); } Vc_SIMD_CAST_AVX_2( uint_m, ushort_m) { return Mem::permute4x64(_mm256_packs_epi16(x0.dataI(), x1.dataI())); } #endif // 4 AVX2::Mask to 1 AVX2::Mask {{{2 #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_AVX_4(double_m, short_m) { using namespace AVX; const auto tmp = _mm256_packs_epi32( _mm256_packs_epi32(x0.dataI(), x1.dataI()) // a0 a1 b0 b1 a2 a3 b2 b3 , _mm256_packs_epi32(x2.dataI(), x3.dataI()) // c0 c1 d0 d1 c2 c3 d2 d3 ); // a0 a1 b0 b1 c0 c1 d0 d1 a2 a3 b2 b3 c2 c3 d2 d3 return concat(_mm_unpacklo_epi32(lo128(tmp), hi128(tmp)), // a0 a1 a2 a3 b0 b1 b2 b3 _mm_unpackhi_epi32(lo128(tmp), hi128(tmp))); // c0 c1 c2 c3 d0 d1 d2 d3 } Vc_SIMD_CAST_AVX_4(double_m, ushort_m) { return simd_cast(x0, x1, x2, x3).data(); } #endif // 1 SSE::Mask to 1 AVX2::Mask {{{2 Vc_SIMD_CAST_1(SSE::double_m, AVX2::double_m) { return AVX::zeroExtend(x.data()); } Vc_SIMD_CAST_1(SSE::double_m, AVX2:: float_m) { return AVX::zeroExtend(simd_cast(x).data()); } #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_1(SSE::double_m, AVX2:: int_m) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE::double_m, AVX2:: uint_m) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE::double_m, AVX2:: short_m) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE::double_m, AVX2::ushort_m) { return AVX::zeroExtend(simd_cast(x).data()); } #endif Vc_SIMD_CAST_1(SSE:: float_m, AVX2::double_m) { return AVX::concat(_mm_unpacklo_ps(x.dataF(), x.dataF()), _mm_unpackhi_ps(x.dataF(), x.dataF())); } Vc_SIMD_CAST_1(SSE:: int_m, AVX2::double_m) { return AVX::concat(_mm_unpacklo_ps(x.dataF(), x.dataF()), _mm_unpackhi_ps(x.dataF(), x.dataF())); } Vc_SIMD_CAST_1(SSE:: uint_m, AVX2::double_m) { return AVX::concat(_mm_unpacklo_ps(x.dataF(), x.dataF()), _mm_unpackhi_ps(x.dataF(), x.dataF())); } Vc_SIMD_CAST_1(SSE:: short_m, AVX2::double_m) { auto tmp = _mm_unpacklo_epi16(x.dataI(), x.dataI()); return AVX::concat(_mm_unpacklo_epi32(tmp, tmp), _mm_unpackhi_epi32(tmp, tmp)); } Vc_SIMD_CAST_1(SSE::ushort_m, AVX2::double_m) { auto tmp = _mm_unpacklo_epi16(x.dataI(), x.dataI()); return AVX::concat(_mm_unpacklo_epi32(tmp, tmp), _mm_unpackhi_epi32(tmp, tmp)); } Vc_SIMD_CAST_1(SSE:: float_m, AVX2:: float_m) { return AVX::zeroExtend(x.dataF()); } Vc_SIMD_CAST_1(SSE:: int_m, AVX2:: float_m) { return AVX::zeroExtend(x.dataF()); } Vc_SIMD_CAST_1(SSE:: uint_m, AVX2:: float_m) { return AVX::zeroExtend(x.dataF()); } Vc_SIMD_CAST_1(SSE:: short_m, AVX2:: float_m) { return AVX::concat(_mm_unpacklo_epi16(x.dataI(), x.dataI()), _mm_unpackhi_epi16(x.dataI(), x.dataI())); } Vc_SIMD_CAST_1(SSE::ushort_m, AVX2:: float_m) { return AVX::concat(_mm_unpacklo_epi16(x.dataI(), x.dataI()), _mm_unpackhi_epi16(x.dataI(), x.dataI())); } #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_1(SSE:: float_m, AVX2:: int_m) { return AVX::zeroExtend(x.data()); } Vc_SIMD_CAST_1(SSE:: float_m, AVX2:: uint_m) { return AVX::zeroExtend(x.data()); } Vc_SIMD_CAST_1(SSE:: int_m, AVX2:: int_m) { return AVX::zeroExtend(x.data()); } Vc_SIMD_CAST_1(SSE:: int_m, AVX2:: uint_m) { return AVX::zeroExtend(x.data()); } Vc_SIMD_CAST_1(SSE:: uint_m, AVX2:: int_m) { return AVX::zeroExtend(x.data()); } Vc_SIMD_CAST_1(SSE:: uint_m, AVX2:: uint_m) { return AVX::zeroExtend(x.data()); } Vc_SIMD_CAST_1(SSE:: float_m, AVX2:: short_m) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE:: int_m, AVX2:: short_m) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE:: uint_m, AVX2:: short_m) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE:: short_m, AVX2:: short_m) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE::ushort_m, AVX2:: short_m) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE:: float_m, AVX2::ushort_m) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE:: int_m, AVX2::ushort_m) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE:: uint_m, AVX2::ushort_m) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE:: short_m, AVX2::ushort_m) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE::ushort_m, AVX2::ushort_m) { return AVX::zeroExtend(simd_cast(x).data()); } Vc_SIMD_CAST_1(SSE:: short_m, AVX2:: int_m) { const auto v = Mem::permute4x64(AVX::avx_cast<__m256i>(x.data())); return _mm256_unpacklo_epi16(v, v); } Vc_SIMD_CAST_1(SSE:: short_m, AVX2:: uint_m) { const auto v = Mem::permute4x64(AVX::avx_cast<__m256i>(x.data())); return _mm256_unpacklo_epi16(v, v); } Vc_SIMD_CAST_1(SSE::ushort_m, AVX2:: int_m) { const auto v = Mem::permute4x64(AVX::avx_cast<__m256i>(x.data())); return _mm256_unpacklo_epi16(v, v); } Vc_SIMD_CAST_1(SSE::ushort_m, AVX2:: uint_m) { const auto v = Mem::permute4x64(AVX::avx_cast<__m256i>(x.data())); return _mm256_unpacklo_epi16(v, v); } #endif // 2 SSE::Mask to 1 AVX2::Mask {{{2 Vc_SIMD_CAST_2(SSE::double_m, AVX2::double_m) { return AVX::concat(x0.data(), x1.data()); } Vc_SIMD_CAST_2(SSE::double_m, AVX2:: float_m) { return AVX::zeroExtend(_mm_packs_epi32(x0.dataI(), x1.dataI())); } Vc_SIMD_CAST_2(SSE:: float_m, AVX2:: float_m) { return AVX::concat(x0.data(), x1.data()); } Vc_SIMD_CAST_2(SSE:: int_m, AVX2:: float_m) { return AVX::concat(x0.data(), x1.data()); } Vc_SIMD_CAST_2(SSE:: uint_m, AVX2:: float_m) { return AVX::concat(x0.data(), x1.data()); } #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_2(SSE::double_m, AVX2:: int_m) { return AVX::zeroExtend(_mm_packs_epi32(x0.dataI(), x1.dataI())); } Vc_SIMD_CAST_2(SSE::double_m, AVX2:: uint_m) { return AVX::zeroExtend(_mm_packs_epi32(x0.dataI(), x1.dataI())); } Vc_SIMD_CAST_2(SSE::double_m, AVX2:: short_m) { return AVX::zeroExtend(_mm_packs_epi16(_mm_packs_epi32(x0.dataI(), x1.dataI()), _mm_setzero_si128())); } Vc_SIMD_CAST_2(SSE::double_m, AVX2::ushort_m) { return AVX::zeroExtend(_mm_packs_epi16(_mm_packs_epi32(x0.dataI(), x1.dataI()), _mm_setzero_si128())); } Vc_SIMD_CAST_2(SSE:: float_m, AVX2:: int_m) { return AVX::concat(x0.data(), x1.data()); } Vc_SIMD_CAST_2(SSE:: float_m, AVX2:: uint_m) { return AVX::concat(x0.data(), x1.data()); } Vc_SIMD_CAST_2(SSE:: float_m, AVX2:: short_m) { return AVX::zeroExtend(_mm_packs_epi16(x0.dataI(), x1.dataI())); } Vc_SIMD_CAST_2(SSE:: float_m, AVX2::ushort_m) { return AVX::zeroExtend(_mm_packs_epi16(x0.dataI(), x1.dataI())); } Vc_SIMD_CAST_2(SSE:: int_m, AVX2:: int_m) { return AVX::concat(x0.data(), x1.data()); } Vc_SIMD_CAST_2(SSE:: int_m, AVX2:: uint_m) { return AVX::concat(x0.data(), x1.data()); } Vc_SIMD_CAST_2(SSE:: int_m, AVX2:: short_m) { return AVX::zeroExtend(_mm_packs_epi16(x0.dataI(), x1.dataI())); } Vc_SIMD_CAST_2(SSE:: int_m, AVX2::ushort_m) { return AVX::zeroExtend(_mm_packs_epi16(x0.dataI(), x1.dataI())); } Vc_SIMD_CAST_2(SSE:: uint_m, AVX2:: int_m) { return AVX::concat(x0.data(), x1.data()); } Vc_SIMD_CAST_2(SSE:: uint_m, AVX2:: uint_m) { return AVX::concat(x0.data(), x1.data()); } Vc_SIMD_CAST_2(SSE:: uint_m, AVX2:: short_m) { return AVX::zeroExtend(_mm_packs_epi16(x0.dataI(), x1.dataI())); } Vc_SIMD_CAST_2(SSE:: uint_m, AVX2::ushort_m) { return AVX::zeroExtend(_mm_packs_epi16(x0.dataI(), x1.dataI())); } Vc_SIMD_CAST_2(SSE:: short_m, AVX2:: short_m) { return AVX::concat(x0.data(), x1.data()); } Vc_SIMD_CAST_2(SSE:: short_m, AVX2::ushort_m) { return AVX::concat(x0.data(), x1.data()); } Vc_SIMD_CAST_2(SSE::ushort_m, AVX2:: short_m) { return AVX::concat(x0.data(), x1.data()); } Vc_SIMD_CAST_2(SSE::ushort_m, AVX2::ushort_m) { return AVX::concat(x0.data(), x1.data()); } #endif // 4 SSE::Mask to 1 AVX2::Mask {{{2 Vc_SIMD_CAST_4(SSE::double_m, AVX2:: float_m) { return AVX::concat(_mm_packs_epi32(x0.dataI(), x1.dataI()), _mm_packs_epi32(x2.dataI(), x3.dataI())); } #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_4(SSE::double_m, AVX2:: int_m) { return AVX::concat(_mm_packs_epi32(x0.dataI(), x1.dataI()), _mm_packs_epi32(x2.dataI(), x3.dataI())); } Vc_SIMD_CAST_4(SSE::double_m, AVX2:: uint_m) { return AVX::concat(_mm_packs_epi32(x0.dataI(), x1.dataI()), _mm_packs_epi32(x2.dataI(), x3.dataI())); } Vc_SIMD_CAST_4(SSE::double_m, AVX2:: short_m) { return AVX::zeroExtend(_mm_packs_epi16(_mm_packs_epi32(x0.dataI(), x1.dataI()), _mm_packs_epi32(x2.dataI(), x3.dataI()))); } Vc_SIMD_CAST_4(SSE::double_m, AVX2::ushort_m) { return AVX::zeroExtend(_mm_packs_epi16(_mm_packs_epi32(x0.dataI(), x1.dataI()), _mm_packs_epi32(x2.dataI(), x3.dataI()))); } Vc_SIMD_CAST_4(SSE:: float_m, AVX2:: short_m) { return AVX::concat(_mm_packs_epi16(x0.dataI(), x1.dataI()), _mm_packs_epi16(x2.dataI(), x3.dataI())); } Vc_SIMD_CAST_4(SSE:: float_m, AVX2::ushort_m) { return AVX::concat(_mm_packs_epi16(x0.dataI(), x1.dataI()), _mm_packs_epi16(x2.dataI(), x3.dataI())); } Vc_SIMD_CAST_4(SSE:: int_m, AVX2:: short_m) { return AVX::concat(_mm_packs_epi16(x0.dataI(), x1.dataI()), _mm_packs_epi16(x2.dataI(), x3.dataI())); } Vc_SIMD_CAST_4(SSE:: int_m, AVX2::ushort_m) { return AVX::concat(_mm_packs_epi16(x0.dataI(), x1.dataI()), _mm_packs_epi16(x2.dataI(), x3.dataI())); } Vc_SIMD_CAST_4(SSE:: uint_m, AVX2:: short_m) { return AVX::concat(_mm_packs_epi16(x0.dataI(), x1.dataI()), _mm_packs_epi16(x2.dataI(), x3.dataI())); } Vc_SIMD_CAST_4(SSE:: uint_m, AVX2::ushort_m) { return AVX::concat(_mm_packs_epi16(x0.dataI(), x1.dataI()), _mm_packs_epi16(x2.dataI(), x3.dataI())); } #endif // 1 Scalar::Mask to 1 AVX2::Mask {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Mask k, enable_if::value>) { Return r{false}; r[0] = k.data(); return r; } // 2 Scalar::Mask to 1 AVX2::Mask {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Mask k0, Scalar::Mask k1, enable_if::value>) { Return r{false}; r[0] = k0.data(); r[1] = k1.data(); return r; } // 4 Scalar::Mask to 1 AVX2::Mask {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Mask k0, Scalar::Mask k1, Scalar::Mask k2, Scalar::Mask k3, enable_if<(AVX2::is_mask::value && Return::Size >= 4)>) { Return r{false}; r[0] = k0.data(); r[1] = k1.data(); r[2] = k2.data(); r[3] = k3.data(); return r; } // 8 Scalar::Mask to 1 AVX2::Mask {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Mask k0, Scalar::Mask k1, Scalar::Mask k2, Scalar::Mask k3, Scalar::Mask k4, Scalar::Mask k5, Scalar::Mask k6, Scalar::Mask k7, enable_if<(AVX2::is_mask::value && Return::Size >= 8)>) { Return r{false}; r[0] = k0.data(); r[1] = k1.data(); r[2] = k2.data(); r[3] = k3.data(); r[4] = k4.data(); r[5] = k5.data(); r[6] = k6.data(); r[7] = k7.data(); return r; } // 16 Scalar::Mask to 1 AVX2::Mask {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Mask k0, Scalar::Mask k1, Scalar::Mask k2, Scalar::Mask k3, Scalar::Mask k4, Scalar::Mask k5, Scalar::Mask k6, Scalar::Mask k7, Scalar::Mask k8, Scalar::Mask k9, Scalar::Mask k10, Scalar::Mask k11, Scalar::Mask k12, Scalar::Mask k13, Scalar::Mask k14, Scalar::Mask k15, enable_if<(AVX2::is_mask::value && Return::Size >= 16)>) { Return r{false}; r[0] = k0.data(); r[1] = k1.data(); r[2] = k2.data(); r[3] = k3.data(); r[4] = k4.data(); r[5] = k5.data(); r[6] = k6.data(); r[7] = k7.data(); r[8] = k8.data(); r[9] = k9.data(); r[10] = k10.data(); r[11] = k11.data(); r[12] = k12.data(); r[13] = k13.data(); r[14] = k14.data(); r[15] = k15.data(); return r; } // 1 AVX2::Mask to 1 SSE::Mask {{{2 Vc_SIMD_CAST_1(AVX2::double_m, SSE::double_m) { return AVX::lo128(x.data()); } Vc_SIMD_CAST_1(AVX2::double_m, SSE:: float_m) { return _mm_packs_epi32(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); } Vc_SIMD_CAST_1(AVX2::double_m, SSE:: int_m) { return _mm_packs_epi32(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); } Vc_SIMD_CAST_1(AVX2::double_m, SSE:: uint_m) { return _mm_packs_epi32(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); } Vc_SIMD_CAST_1(AVX2::double_m, SSE:: short_m) { return _mm_packs_epi16(_mm_packs_epi32(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())), _mm_setzero_si128()); } Vc_SIMD_CAST_1(AVX2::double_m, SSE::ushort_m) { return _mm_packs_epi16(_mm_packs_epi32(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())), _mm_setzero_si128()); } Vc_SIMD_CAST_1(AVX2:: float_m, SSE::double_m) { return _mm_unpacklo_ps(AVX::lo128(x.data()), AVX::lo128(x.data())); } Vc_SIMD_CAST_1(AVX2:: float_m, SSE:: float_m) { return AVX::lo128(x.data()); } Vc_SIMD_CAST_1(AVX2:: float_m, SSE:: int_m) { return AVX::lo128(x.data()); } Vc_SIMD_CAST_1(AVX2:: float_m, SSE:: uint_m) { return AVX::lo128(x.data()); } Vc_SIMD_CAST_1(AVX2:: float_m, SSE:: short_m) { return _mm_packs_epi16(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); } Vc_SIMD_CAST_1(AVX2:: float_m, SSE::ushort_m) { return _mm_packs_epi16(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); } #ifdef Vc_IMPL_AVX2 Vc_SIMD_CAST_1(AVX2:: int_m, SSE::double_m) { return _mm_unpacklo_epi32(AVX::lo128(x.dataI()), AVX::lo128(x.dataI())); } Vc_SIMD_CAST_1(AVX2:: int_m, SSE:: float_m) { return AVX::lo128(x.dataI()); } Vc_SIMD_CAST_1(AVX2:: int_m, SSE:: int_m) { return AVX::lo128(x.dataI()); } Vc_SIMD_CAST_1(AVX2:: int_m, SSE:: uint_m) { return AVX::lo128(x.dataI()); } Vc_SIMD_CAST_1(AVX2:: int_m, SSE:: short_m) { return _mm_packs_epi16(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); } Vc_SIMD_CAST_1(AVX2:: int_m, SSE::ushort_m) { return _mm_packs_epi16(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); } Vc_SIMD_CAST_1(AVX2:: uint_m, SSE::double_m) { return _mm_unpacklo_epi32(AVX::lo128(x.dataI()), AVX::lo128(x.dataI())); } Vc_SIMD_CAST_1(AVX2:: uint_m, SSE:: float_m) { return AVX::lo128(x.dataI()); } Vc_SIMD_CAST_1(AVX2:: uint_m, SSE:: int_m) { return AVX::lo128(x.dataI()); } Vc_SIMD_CAST_1(AVX2:: uint_m, SSE:: uint_m) { return AVX::lo128(x.dataI()); } Vc_SIMD_CAST_1(AVX2:: uint_m, SSE:: short_m) { return _mm_packs_epi16(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); } Vc_SIMD_CAST_1(AVX2:: uint_m, SSE::ushort_m) { return _mm_packs_epi16(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); } Vc_SIMD_CAST_1(AVX2:: short_m, SSE::double_m) { return simd_cast(SSE::short_m(AVX::lo128(x.data()))); } Vc_SIMD_CAST_1(AVX2:: short_m, SSE:: float_m) { return simd_cast(SSE::short_m(AVX::lo128(x.data()))); } Vc_SIMD_CAST_1(AVX2:: short_m, SSE:: int_m) { return simd_cast(SSE::short_m(AVX::lo128(x.data()))); } Vc_SIMD_CAST_1(AVX2:: short_m, SSE:: uint_m) { return simd_cast(SSE::short_m(AVX::lo128(x.data()))); } Vc_SIMD_CAST_1(AVX2:: short_m, SSE:: short_m) { return simd_cast(SSE::short_m(AVX::lo128(x.data()))); } Vc_SIMD_CAST_1(AVX2:: short_m, SSE::ushort_m) { return simd_cast(SSE::short_m(AVX::lo128(x.data()))); } Vc_SIMD_CAST_1(AVX2::ushort_m, SSE::double_m) { return simd_cast(SSE::ushort_m(AVX::lo128(x.data()))); } Vc_SIMD_CAST_1(AVX2::ushort_m, SSE:: float_m) { return simd_cast(SSE::ushort_m(AVX::lo128(x.data()))); } Vc_SIMD_CAST_1(AVX2::ushort_m, SSE:: int_m) { return simd_cast(SSE::ushort_m(AVX::lo128(x.data()))); } Vc_SIMD_CAST_1(AVX2::ushort_m, SSE:: uint_m) { return simd_cast(SSE::ushort_m(AVX::lo128(x.data()))); } Vc_SIMD_CAST_1(AVX2::ushort_m, SSE:: short_m) { return simd_cast(SSE::ushort_m(AVX::lo128(x.data()))); } Vc_SIMD_CAST_1(AVX2::ushort_m, SSE::ushort_m) { return simd_cast(SSE::ushort_m(AVX::lo128(x.data()))); } #endif // 2 AVX2::Mask to 1 SSE::Mask {{{2 Vc_SIMD_CAST_2(AVX2::double_m, SSE:: short_m) { return _mm_packs_epi16(_mm_packs_epi32(AVX::lo128(x0.dataI()), AVX::hi128(x0.dataI())), _mm_packs_epi32(AVX::lo128(x1.dataI()), AVX::hi128(x1.dataI()))); } Vc_SIMD_CAST_2(AVX2::double_m, SSE::ushort_m) { return _mm_packs_epi16(_mm_packs_epi32(AVX::lo128(x0.dataI()), AVX::hi128(x0.dataI())), _mm_packs_epi32(AVX::lo128(x1.dataI()), AVX::hi128(x1.dataI()))); } // 1 AVX2::Mask to 1 Scalar::Mask {{{2 template Vc_INTRINSIC Vc_CONST To simd_cast(AVX2::Mask x, enable_if::value>) { return static_cast(x[0]); } // offset == 0 | convert from AVX2::Mask/Vector {{{1 template Vc_INTRINSIC Vc_CONST enable_if< (offset == 0 && ((AVX2::is_vector::value && !Scalar::is_vector::value && Traits::is_simd_vector::value && !Traits::isSimdArray::value) || (AVX2::is_mask::value && !Scalar::is_mask::value && Traits::is_simd_mask::value && !Traits::isSimdMaskArray::value))), Return> simd_cast(const From &x) { return simd_cast(x); } // offset == 0 | convert from SSE::Mask/Vector to AVX2::Mask/Vector {{{1 template Vc_INTRINSIC Vc_CONST Return simd_cast(const From &x, enable_if::value && AVX2::is_vector::value) || (SSE::is_mask::value && AVX2::is_mask::value))>) { return simd_cast(x); } // Vector casts with offset {{{1 // AVX2 to AVX2 {{{2 template Vc_INTRINSIC Vc_CONST enable_if<(AVX2::is_vector::value && offset != 0), Return> simd_cast(AVX2::Vector x) { // TODO: there certainly is potential for leaving out the shift/permute // instruction at the cost of a lot more specializations using V = AVX2::Vector; constexpr int shift = sizeof(T) * offset * Return::Size; static_assert(shift > 0 && shift < sizeof(x), ""); if (shift < 16) { return simd_cast(V{AVX::avx_cast( _mm_srli_si128(AVX::avx_cast<__m128i>(AVX::lo128(x.data())), shift))}); } else if (shift == 16) { return simd_cast(V{Mem::permute128(x.data())}); } else { #ifdef Vc_MSVC #pragma warning(push) #pragma warning(disable : 4556) // value of intrinsic immediate argument '-8' is out of // range '0 - 255' #endif return simd_cast(V{AVX::avx_cast( _mm_srli_si128(AVX::avx_cast<__m128i>(AVX::hi128(x.data())), shift - 16))}); #ifdef Vc_MSVC #pragma warning(pop) #endif } } // AVX2 to SSE (Vector) {{{2 template Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_vector::value && sizeof(AVX2::Vector) == 32), Return> simd_cast(AVX2::Vector x) { using V = AVX2::Vector; constexpr int shift = sizeof(V) / V::Size * offset * Return::Size; static_assert(shift > 0, ""); static_assert(shift < sizeof(V), ""); using SseVector = SSE::Vector; if (shift == 16) { return simd_cast(SseVector{AVX::hi128(x.data())}); } using Intrin = typename SseVector::VectorType; return simd_cast(SseVector{AVX::avx_cast( _mm_alignr_epi8(AVX::avx_cast<__m128i>(AVX::hi128(x.data())), AVX::avx_cast<__m128i>(AVX::lo128(x.data())), shift))}); } template Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_vector::value && sizeof(AVX2::Vector) == 16), Return> simd_cast(AVX2::Vector x) { using V = AVX2::Vector; constexpr int shift = sizeof(V) / V::Size * offset * Return::Size; static_assert(shift > 0, ""); static_assert(shift < sizeof(V), ""); using SseVector = SSE::Vector; return simd_cast(SseVector{_mm_srli_si128(x.data(), shift)}); } // SSE to AVX2 {{{2 Vc_SIMD_CAST_OFFSET(SSE:: short_v, AVX2::double_v, 1) { return simd_cast(simd_cast(x)); } Vc_SIMD_CAST_OFFSET(SSE::ushort_v, AVX2::double_v, 1) { return simd_cast(simd_cast(x)); } // Mask casts with offset {{{1 // 1 AVX2::Mask to N AVX2::Mask {{{2 // float_v and (u)int_v have size 8, double_v has size 4, and (u)short_v have size 16. Consequently, // offset can 0, 1, 2, or 3. // - offset == 0 is already done. // - offset == 1 can be 16 -> 8, 16 -> 4, 8 -> 4, and 16 -> 4 // - offset == 2 && offset == 3 can only be 16 -> 4 template Vc_INTRINSIC Vc_CONST Return simd_cast(const AVX2::Mask &k, enable_if<(AVX2::is_mask::value && offset == 1 && AVX2::Mask::Size == Return::Size * 2)> = nullarg) { const auto tmp = AVX::hi128(k.dataI()); return AVX::concat(_mm_unpacklo_epi8(tmp, tmp), _mm_unpackhi_epi8(tmp, tmp)); } template Vc_INTRINSIC Vc_CONST Return simd_cast(const AVX2::Mask &k, enable_if<(AVX2::is_mask::value && offset == 1 && AVX2::Mask::Size == Return::Size * 4)> = nullarg) { auto tmp = AVX::lo128(k.dataI()); tmp = _mm_unpackhi_epi8(tmp, tmp); return AVX::concat(_mm_unpacklo_epi16(tmp, tmp), _mm_unpackhi_epi16(tmp, tmp)); } template Vc_INTRINSIC Vc_CONST Return simd_cast(const AVX2::Mask &k, enable_if<(AVX2::is_mask::value && offset == 2 && AVX2::Mask::Size == Return::Size * 4)> = nullarg) { auto tmp = AVX::hi128(k.dataI()); tmp = _mm_unpacklo_epi8(tmp, tmp); return AVX::concat(_mm_unpacklo_epi16(tmp, tmp), _mm_unpackhi_epi16(tmp, tmp)); } template Vc_INTRINSIC Vc_CONST Return simd_cast(const AVX2::Mask &k, enable_if<(AVX2::is_mask::value && offset == 3 && AVX2::Mask::Size == Return::Size * 4)> = nullarg) { auto tmp = AVX::hi128(k.dataI()); tmp = _mm_unpackhi_epi8(tmp, tmp); return AVX::concat(_mm_unpacklo_epi16(tmp, tmp), _mm_unpackhi_epi16(tmp, tmp)); } // 1 SSE::Mask to N AVX2::Mask {{{2 Vc_SIMD_CAST_OFFSET(SSE:: short_m, AVX2::double_m, 1) { auto tmp = _mm_unpackhi_epi16(x.dataI(), x.dataI()); return AVX::concat(_mm_unpacklo_epi32(tmp, tmp), _mm_unpackhi_epi32(tmp, tmp)); } Vc_SIMD_CAST_OFFSET(SSE::ushort_m, AVX2::double_m, 1) { auto tmp = _mm_unpackhi_epi16(x.dataI(), x.dataI()); return AVX::concat(_mm_unpacklo_epi32(tmp, tmp), _mm_unpackhi_epi32(tmp, tmp)); } // AVX2 to SSE (Mask) {{{2 template Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_mask::value && sizeof(AVX2::Mask) == 32), Return> simd_cast(AVX2::Mask x) { using M = AVX2::Mask; constexpr int shift = sizeof(M) / M::Size * offset * Return::Size; static_assert(shift > 0, ""); static_assert(shift < sizeof(M), ""); using SseVector = SSE::Mask>; if (shift == 16) { return simd_cast(SseVector{AVX::hi128(x.data())}); } using Intrin = typename SseVector::VectorType; return simd_cast(SseVector{AVX::avx_cast( _mm_alignr_epi8(AVX::hi128(x.dataI()), AVX::lo128(x.dataI()), shift))}); } template Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_mask::value && sizeof(AVX2::Mask) == 16), Return> simd_cast(AVX2::Mask x) { return simd_cast(simd_cast>(x)); } // undef Vc_SIMD_CAST_AVX_[1234] & Vc_SIMD_CAST_[12345678] {{{1 #undef Vc_SIMD_CAST_AVX_1 #undef Vc_SIMD_CAST_AVX_2 #undef Vc_SIMD_CAST_AVX_3 #undef Vc_SIMD_CAST_AVX_4 #undef Vc_SIMD_CAST_1 #undef Vc_SIMD_CAST_2 #undef Vc_SIMD_CAST_3 #undef Vc_SIMD_CAST_4 #undef Vc_SIMD_CAST_5 #undef Vc_SIMD_CAST_6 #undef Vc_SIMD_CAST_7 #undef Vc_SIMD_CAST_8 #undef Vc_SIMD_CAST_OFFSET // }}}1 } // namespace Vc #endif // VC_AVX_SIMD_CAST_H_ // vim: foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/avx/simd_cast_caller.tcc000066400000000000000000000042431476554302100222030ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2014-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef Vc_AVX_SIMD_CAST_CALLER_TCC_ #define Vc_AVX_SIMD_CAST_CALLER_TCC_ #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { #if Vc_IS_VERSION_1 template template Vc_INTRINSIC Vector::Vector(U &&x) : d(simd_cast(std::forward(x)).data()) { } template template Vc_INTRINSIC Mask::Mask(U &&rhs, Common::enable_if_mask_converts_explicitly) : Mask(simd_cast(std::forward(rhs))) { } #endif // Vc_IS_VERSION_1 } #endif // Vc_AVX_SIMD_CAST_CALLER_TCC_ // vim: foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/avx/types.h000066400000000000000000000105321476554302100175330ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2009-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_AVX_TYPES_H_ #define VC_AVX_TYPES_H_ #include "../sse/types.h" #include "../traits/type_traits.h" #include "macros.h" #ifdef Vc_DEFAULT_IMPL_AVX2 #define Vc_DOUBLE_V_SIZE 4 #define Vc_FLOAT_V_SIZE 8 #define Vc_INT_V_SIZE 8 #define Vc_UINT_V_SIZE 8 #define Vc_SHORT_V_SIZE 16 #define Vc_USHORT_V_SIZE 16 #elif defined Vc_DEFAULT_IMPL_AVX #define Vc_DOUBLE_V_SIZE 4 #define Vc_FLOAT_V_SIZE 8 #define Vc_INT_V_SIZE 4 #define Vc_UINT_V_SIZE 4 #define Vc_SHORT_V_SIZE 8 #define Vc_USHORT_V_SIZE 8 #endif namespace Vc_VERSIONED_NAMESPACE { namespace AVX { template using Vector = Vc::Vector>; typedef Vector double_v; typedef Vector float_v; typedef Vector int_v; typedef Vector uint_v; typedef Vector short_v; typedef Vector ushort_v; template using Mask = Vc::Mask>; typedef Mask double_m; typedef Mask float_m; typedef Mask int_m; typedef Mask uint_m; typedef Mask short_m; typedef Mask ushort_m; template struct Const; template struct is_vector : public std::false_type {}; template struct is_vector> : public std::true_type {}; template struct is_mask : public std::false_type {}; template struct is_mask> : public std::true_type {}; } // namespace AVX namespace AVX2 { template using Vector = Vc::Vector; using double_v = Vector; using float_v = Vector< float>; using int_v = Vector< int>; using uint_v = Vector< uint>; using short_v = Vector< short>; using ushort_v = Vector; template using Mask = Vc::Mask; using double_m = Mask; using float_m = Mask< float>; using llong_m = Mask< llong>; using ullong_m = Mask; using long_m = Mask< long>; using ulong_m = Mask< ulong>; using int_m = Mask< int>; using uint_m = Mask< uint>; using short_m = Mask< short>; using ushort_m = Mask; using schar_m = Mask< schar>; using uchar_m = Mask< uchar>; template struct is_vector : public std::false_type {}; template struct is_vector> : public std::true_type {}; template struct is_mask : public std::false_type {}; template struct is_mask> : public std::true_type {}; } // namespace AVX2 namespace Traits { template struct is_simd_vector_internal> : public is_valid_vector_argument {}; template struct is_simd_mask_internal> : public std::true_type {}; } // namespace Traits } // namespace Vc #endif // VC_AVX_TYPES_H_ conky-1.22.1/3rdparty/Vc/Vc/avx/vector.h000066400000000000000000000564351476554302100177050ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2009-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_AVX_VECTOR_H_ #define VC_AVX_VECTOR_H_ #include "intrinsics.h" #include "casts.h" #include "../sse/vector.h" #include "shuffle.h" #include "vectorhelper.h" #include "mask.h" #include #include #include "../common/aliasingentryhelper.h" #include "../common/memoryfwd.h" #include "../common/where.h" #include "macros.h" #ifdef isfinite #undef isfinite #endif #ifdef isnan #undef isnan #endif namespace Vc_VERSIONED_NAMESPACE { namespace Detail { template struct VectorTraits { using mask_type = Vc::Mask; using vector_type = Vc::Vector; using writemasked_vector_type = Common::WriteMaskedVector; using intrinsic_type = typename AVX::VectorTypeHelper::Type; }; } // namespace Detail #define Vc_CURRENT_CLASS_NAME Vector template class Vector { public: using abi = VectorAbi::Avx; private: using traits_type = Detail::VectorTraits; static_assert( std::is_arithmetic::value, "Vector only accepts arithmetic builtin types as template parameter T."); using WriteMaskedVector = typename traits_type::writemasked_vector_type; public: using VectorType = typename traits_type::intrinsic_type; using vector_type = VectorType; using mask_type = typename traits_type::mask_type; using Mask = mask_type; using MaskType = mask_type; using MaskArg Vc_DEPRECATED_ALIAS("Use MaskArgument instead.") = typename Mask::AsArg; using MaskArgument = typename Mask::AsArg; using reference = Detail::ElementReference; Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(VectorType)); using EntryType = T; using value_type = EntryType; typedef EntryType VectorEntryType; static constexpr size_t Size = sizeof(VectorType) / sizeof(EntryType); static constexpr size_t MemoryAlignment = alignof(VectorType); using IndexType = fixed_size_simd; using index_type = IndexType; typedef Vector AsArg; typedef VectorType VectorTypeArg; protected: template using V = Vector; // helper that specializes on VectorType typedef AVX::VectorHelper HV; // helper that specializes on T typedef AVX::VectorHelper HT; // cast any m256/m128 to VectorType template static Vc_INTRINSIC VectorType _cast(V v) { return AVX::avx_cast(v); } typedef Common::VectorMemoryUnion StorageType; StorageType d; using WidthT = Common::WidthT; // ICC can't compile this: // static constexpr WidthT Width = WidthT(); public: #include "../common/generalinterface.h" static Vc_ALWAYS_INLINE_L Vector Random() Vc_ALWAYS_INLINE_R; /////////////////////////////////////////////////////////////////////////////////////////// // internal: required to enable returning objects of VectorType Vc_ALWAYS_INLINE Vector(VectorTypeArg x) : d(x) {} // implict conversion from compatible Vector template Vc_INTRINSIC Vector( V x, typename std::enable_if::value, void *>::type = nullptr) : d(AVX::convert(x.data())) { } #if Vc_IS_VERSION_1 // static_cast from the remaining Vector template Vc_DEPRECATED("use simd_cast instead of explicit type casting to convert between " "vector types") Vc_INTRINSIC explicit Vector( V x, typename std::enable_if::value, void *>::type = nullptr) : d(Detail::zeroExtendIfNeeded(AVX::convert(x.data()))) { } // static_cast from other types, implemented via the non-member simd_cast function in // simd_cast_caller.tcc template ::value && !std::is_same>::value>> Vc_DEPRECATED("use simd_cast instead of explicit type casting to convert between " "vector types") Vc_INTRINSIC_L explicit Vector(U &&x) Vc_INTRINSIC_R; #endif Vc_INTRINSIC explicit Vector(reference a) : Vector(static_cast(a)) {} /////////////////////////////////////////////////////////////////////////////////////////// // broadcast Vc_INTRINSIC Vector(EntryType a) : d(Detail::avx_broadcast(a)) {} template Vc_INTRINSIC Vector(U a, typename std::enable_if::value && !std::is_same::value, void *>::type = nullptr) : Vector(static_cast(a)) { } //template explicit Vector(std::initializer_list) { static_assert(std::is_same::value, "A SIMD vector object cannot be initialized from an initializer list " "because the number of entries in the vector is target-dependent."); } #include "../common/loadinterface.h" #include "../common/storeinterface.h" /////////////////////////////////////////////////////////////////////////////////////////// // zeroing Vc_INTRINSIC_L void setZero() Vc_INTRINSIC_R; Vc_INTRINSIC_L void setZero(const Mask &k) Vc_INTRINSIC_R; Vc_INTRINSIC_L void setZeroInverted(const Mask &k) Vc_INTRINSIC_R; Vc_INTRINSIC_L void setQnan() Vc_INTRINSIC_R; Vc_INTRINSIC_L void setQnan(MaskArgument k) Vc_INTRINSIC_R; #include "../common/gatherinterface.h" #include "../common/scatterinterface.h" #if defined Vc_IMPL_AVX2 && !defined Vc_MSVC // skip this code for MSVC because it fails to do overload resolution correctly //////////////////////////////////////////////////////////////////////////////// // non-converting pd, ps, and epi32 gathers template ::size(), class = enable_if<(Vector::size() >= size() && sizeof(T) >= 4)>> Vc_INTRINSIC void gatherImplementation( const Common::GatherArguments, Scale> &args) { d.v() = AVX::gather( args.address, simd_cast>(args.indexes) .data()); } // masked overload template ::size(), class = enable_if<(Vector::size() >= size() && sizeof(T) >= 4)>> Vc_INTRINSIC void gatherImplementation( const Common::GatherArguments, Scale> &args, MaskArgument k) { d.v() = AVX::gather( d.v(), k.data(), args.address, simd_cast>(args.indexes) .data()); } //////////////////////////////////////////////////////////////////////////////// // converting (from 8-bit and 16-bit integers only) epi16 gather emulation via // epi32 gathers template < class MT, class U, class A, int Scale, class = enable_if<(sizeof(T) == 2 && std::is_integral::value && (sizeof(MT) <= 2) && Vector::size() >= size())>> Vc_INTRINSIC void gatherImplementation( const Common::GatherArguments, Scale> &args) { using AVX2::int_v; const auto idx0 = simd_cast(args.indexes).data(); const auto idx1 = simd_cast(args.indexes).data(); *this = simd_cast(int_v(AVX::gather( aliasing_cast(args.address), idx0)), int_v(AVX::gather( aliasing_cast(args.address), idx1))); if (sizeof(MT) == 1) { if (std::is_signed::value) { using Signed = AVX2::Vector::type>; *this = (simd_cast(*this) << 8) >> 8; // sign extend } else { *this &= 0xff; } } } // masked overload template < class MT, class U, class A, int Scale, class = enable_if<(sizeof(T) == 2 && std::is_integral::value && (sizeof(MT) <= 2) && Vector::size() >= size())>> Vc_INTRINSIC void gatherImplementation( const Common::GatherArguments, Scale> &args, MaskArgument k) { using AVX2::int_v; const auto idx0 = simd_cast(args.indexes).data(); const auto idx1 = simd_cast(args.indexes).data(); const auto k0 = simd_cast(k).data(); const auto k1 = simd_cast(k).data(); auto v = simd_cast( int_v(AVX::gather( _mm256_setzero_si256(), k0, aliasing_cast(args.address), idx0)), int_v(AVX::gather( _mm256_setzero_si256(), k1, aliasing_cast(args.address), idx1))); if (sizeof(MT) == 1) { if (std::is_signed::value) { using Signed = AVX2::Vector::type>; v = (simd_cast(v) << 8) >> 8; // sign extend } else { v &= 0xff; } } assign(v, k); } //////////////////////////////////////////////////////////////////////////////// // all remaining converting gathers template Vc_INTRINSIC enable_if<((sizeof(T) != 2 || sizeof(MT) > 2) && Traits::is_valid_vector_argument::value && !std::is_same::value && Vector::size() >= size()), void> gatherImplementation(const Common::GatherArguments, Scale> &args) { *this = simd_cast(fixed_size_simd(args)); } // masked overload template Vc_INTRINSIC enable_if<((sizeof(T) != 2 || sizeof(MT) > 2) && Traits::is_valid_vector_argument::value && !std::is_same::value && Vector::size() >= size()), void> gatherImplementation(const Common::GatherArguments, Scale> &args, MaskArgument k) { assign(simd_cast(fixed_size_simd(args, k)), k); } #endif // Vc_IMPL_AVX2 && !MSVC /////////////////////////////////////////////////////////////////////////////////////////// //prefix Vc_ALWAYS_INLINE Vector &operator++() { data() = Detail::add(data(), Detail::one(T()), T()); return *this; } Vc_ALWAYS_INLINE Vector &operator--() { data() = Detail::sub(data(), Detail::one(T()), T()); return *this; } //postfix Vc_ALWAYS_INLINE Vector operator++(int) { const Vector r = *this; data() = Detail::add(data(), Detail::one(T()), T()); return r; } Vc_ALWAYS_INLINE Vector operator--(int) { const Vector r = *this; data() = Detail::sub(data(), Detail::one(T()), T()); return r; } private: friend reference; Vc_INTRINSIC static value_type get(const Vector &o, int i) noexcept { return o.d.m(i); } template Vc_INTRINSIC static void set(Vector &o, int i, U &&v) noexcept( noexcept(std::declval() = v)) { return o.d.set(i, v); } public: /** * \note the returned object models the concept of a reference and * as such it can exist longer than the data it is referencing. * \note to avoid lifetime issues, we strongly advice not to store * any reference objects. */ Vc_ALWAYS_INLINE reference operator[](size_t index) noexcept { static_assert(noexcept(reference{std::declval(), int()}), ""); return {*this, int(index)}; } Vc_ALWAYS_INLINE value_type operator[](size_t index) const noexcept { return d.m(index); } Vc_INTRINSIC_L Vc_PURE_L Vector operator[](Permutation::ReversedTag) const Vc_INTRINSIC_R Vc_PURE_R; Vc_INTRINSIC_L Vc_PURE_L Vector operator[](const IndexType &perm) const Vc_INTRINSIC_R Vc_PURE_R; Vc_INTRINSIC Vc_PURE Mask operator!() const { return *this == Zero(); } Vc_ALWAYS_INLINE Vector operator~() const { #ifndef Vc_ENABLE_FLOAT_BIT_OPERATORS static_assert(std::is_integral::value, "bit-complement can only be used with Vectors of integral type"); #endif return Detail::andnot_(data(), Detail::allone()); } Vc_ALWAYS_INLINE_L Vc_PURE_L Vector operator-() const Vc_ALWAYS_INLINE_R Vc_PURE_R; Vc_INTRINSIC Vc_PURE Vector operator+() const { return *this; } // shifts #define Vc_OP_VEC(op) \ Vc_INTRINSIC Vector &operator op##=(AsArg x); \ Vc_INTRINSIC Vc_PURE Vector operator op(AsArg x) const \ { \ static_assert( \ std::is_integral::value, \ "bitwise-operators can only be used with Vectors of integral type"); \ } Vc_ALL_SHIFTS(Vc_OP_VEC); #undef Vc_OP_VEC Vc_ALWAYS_INLINE_L Vector &operator>>=(int x) Vc_ALWAYS_INLINE_R; Vc_ALWAYS_INLINE_L Vector &operator<<=(int x) Vc_ALWAYS_INLINE_R; Vc_ALWAYS_INLINE_L Vector operator>>(int x) const Vc_ALWAYS_INLINE_R; Vc_ALWAYS_INLINE_L Vector operator<<(int x) const Vc_ALWAYS_INLINE_R; Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC Vc_PURE Mask isNegative() const { return Vc::isnegative(*this); } Vc_ALWAYS_INLINE void assign( const Vector &v, const Mask &mask ) { data() = Detail::blend(data(), v.data(), mask.data()); } template Vc_DEPRECATED("Use simd_cast instead of Vector::staticCast") Vc_ALWAYS_INLINE V2 staticCast() const { return V2(*this); } template Vc_DEPRECATED("use reinterpret_components_cast instead") Vc_ALWAYS_INLINE V2 reinterpretCast() const { return AVX::avx_cast(data()); } Vc_ALWAYS_INLINE WriteMaskedVector operator()(const Mask &k) { return {*this, k}; } Vc_ALWAYS_INLINE VectorType &data() { return d.v(); } Vc_ALWAYS_INLINE const VectorType &data() const { return d.v(); } template Vc_INTRINSIC_L Vector broadcast() const Vc_INTRINSIC_R; Vc_INTRINSIC_L std::pair minIndex() const Vc_INTRINSIC_R; Vc_INTRINSIC_L std::pair maxIndex() const Vc_INTRINSIC_R; Vc_ALWAYS_INLINE EntryType min() const { return Detail::min(data(), T()); } Vc_ALWAYS_INLINE EntryType max() const { return Detail::max(data(), T()); } Vc_ALWAYS_INLINE EntryType product() const { return Detail::mul(data(), T()); } Vc_ALWAYS_INLINE EntryType sum() const { return Detail::add(data(), T()); } Vc_ALWAYS_INLINE_L Vector partialSum() const Vc_ALWAYS_INLINE_R; //template Vc_ALWAYS_INLINE_L Vector partialSum(BinaryOperation op) const Vc_ALWAYS_INLINE_R; Vc_ALWAYS_INLINE_L EntryType min(MaskArgument m) const Vc_ALWAYS_INLINE_R; Vc_ALWAYS_INLINE_L EntryType max(MaskArgument m) const Vc_ALWAYS_INLINE_R; Vc_ALWAYS_INLINE_L EntryType product(MaskArgument m) const Vc_ALWAYS_INLINE_R; Vc_ALWAYS_INLINE_L EntryType sum(MaskArgument m) const Vc_ALWAYS_INLINE_R; Vc_INTRINSIC_L Vector shifted(int amount, Vector shiftIn) const Vc_INTRINSIC_R; Vc_INTRINSIC_L Vector shifted(int amount) const Vc_INTRINSIC_R; Vc_INTRINSIC_L Vector rotated(int amount) const Vc_INTRINSIC_R; Vc_INTRINSIC_L Vc_PURE_L Vector reversed() const Vc_INTRINSIC_R Vc_PURE_R; Vc_ALWAYS_INLINE_L Vc_PURE_L Vector sorted() const Vc_ALWAYS_INLINE_R Vc_PURE_R; template void callWithValuesSorted(F &&f) { EntryType value = d.m(0); f(value); for (size_t i = 1; i < Size; ++i) { if (d.m(i) != value) { value = d.m(i); f(value); } } } template Vc_INTRINSIC void call(F &&f) const { Common::for_all_vector_entries([&](size_t i) { f(EntryType(d.m(i))); }); } template Vc_INTRINSIC void call(F &&f, const Mask &mask) const { for (size_t i : where(mask)) { f(EntryType(d.m(i))); } } template Vc_INTRINSIC Vector apply(F &&f) const { Vector r; Common::for_all_vector_entries( [&](size_t i) { r.d.set(i, f(EntryType(d.m(i)))); }); return r; } template Vc_INTRINSIC Vector apply(F &&f, const Mask &mask) const { Vector r(*this); for (size_t i : where(mask)) { r.d.set(i, f(EntryType(r.d.m(i)))); } return r; } template Vc_INTRINSIC void fill(EntryType (&f)(IndexT)) { Common::for_all_vector_entries([&](size_t i) { d.set(i, f(i)); }); } Vc_INTRINSIC void fill(EntryType (&f)()) { Common::for_all_vector_entries([&](size_t i) { d.set(i, f()); }); } template static Vc_INTRINSIC_L Vector generate(G gen) Vc_INTRINSIC_R; Vc_DEPRECATED("use copysign(x, y) instead") Vc_INTRINSIC Vector copySign(AsArg x) const { return Vc::copysign(*this, x); } Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC Vector exponent() const { Vc::exponent(*this); } Vc_INTRINSIC_L Vector interleaveLow(Vector x) const Vc_INTRINSIC_R; Vc_INTRINSIC_L Vector interleaveHigh(Vector x) const Vc_INTRINSIC_R; }; #undef Vc_CURRENT_CLASS_NAME template constexpr size_t Vector::Size; template constexpr size_t Vector::MemoryAlignment; #define Vc_CONDITIONAL_ASSIGN(name_, op_) \ template \ Vc_INTRINSIC enable_if conditional_assign( \ AVX2::Vector &lhs, M &&mask, U &&rhs) \ { \ lhs(mask) op_ rhs; \ } \ Vc_NOTHING_EXPECTING_SEMICOLON Vc_CONDITIONAL_ASSIGN( Assign, =); Vc_CONDITIONAL_ASSIGN( PlusAssign, +=); Vc_CONDITIONAL_ASSIGN( MinusAssign, -=); Vc_CONDITIONAL_ASSIGN( MultiplyAssign, *=); Vc_CONDITIONAL_ASSIGN( DivideAssign, /=); Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=); Vc_CONDITIONAL_ASSIGN( XorAssign, ^=); Vc_CONDITIONAL_ASSIGN( AndAssign, &=); Vc_CONDITIONAL_ASSIGN( OrAssign, |=); Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=); Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=); #undef Vc_CONDITIONAL_ASSIGN #define Vc_CONDITIONAL_ASSIGN(name_, expr_) \ template \ Vc_INTRINSIC enable_if> conditional_assign( \ AVX2::Vector &lhs, M &&mask) \ { \ return expr_; \ } \ Vc_NOTHING_EXPECTING_SEMICOLON Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++); Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask)); Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--); Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask)); #undef Vc_CONDITIONAL_ASSIGN } // namespace Vc #include "vector.tcc" #include "simd_cast.h" #endif // VC_AVX_VECTOR_H_ conky-1.22.1/3rdparty/Vc/Vc/avx/vector.tcc000066400000000000000000001206231476554302100202160ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2011-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #include "../common/x86_prefetches.h" #include "../common/gatherimplementation.h" #include "../common/scatterimplementation.h" #include "limits.h" #include "const.h" #include "../common/set.h" #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace Detail { // compare operators {{{1 Vc_INTRINSIC AVX2::double_m operator==(AVX2::double_v a, AVX2::double_v b) { return AVX::cmpeq_pd(a.data(), b.data()); } Vc_INTRINSIC AVX2:: float_m operator==(AVX2:: float_v a, AVX2:: float_v b) { return AVX::cmpeq_ps(a.data(), b.data()); } Vc_INTRINSIC AVX2::double_m operator!=(AVX2::double_v a, AVX2::double_v b) { return AVX::cmpneq_pd(a.data(), b.data()); } Vc_INTRINSIC AVX2:: float_m operator!=(AVX2:: float_v a, AVX2:: float_v b) { return AVX::cmpneq_ps(a.data(), b.data()); } Vc_INTRINSIC AVX2::double_m operator>=(AVX2::double_v a, AVX2::double_v b) { return AVX::cmpnlt_pd(a.data(), b.data()); } Vc_INTRINSIC AVX2:: float_m operator>=(AVX2:: float_v a, AVX2:: float_v b) { return AVX::cmpnlt_ps(a.data(), b.data()); } Vc_INTRINSIC AVX2::double_m operator<=(AVX2::double_v a, AVX2::double_v b) { return AVX::cmple_pd(a.data(), b.data()); } Vc_INTRINSIC AVX2:: float_m operator<=(AVX2:: float_v a, AVX2:: float_v b) { return AVX::cmple_ps(a.data(), b.data()); } Vc_INTRINSIC AVX2::double_m operator> (AVX2::double_v a, AVX2::double_v b) { return AVX::cmpgt_pd(a.data(), b.data()); } Vc_INTRINSIC AVX2:: float_m operator> (AVX2:: float_v a, AVX2:: float_v b) { return AVX::cmpgt_ps(a.data(), b.data()); } Vc_INTRINSIC AVX2::double_m operator< (AVX2::double_v a, AVX2::double_v b) { return AVX::cmplt_pd(a.data(), b.data()); } Vc_INTRINSIC AVX2:: float_m operator< (AVX2:: float_v a, AVX2:: float_v b) { return AVX::cmplt_ps(a.data(), b.data()); } #ifdef Vc_IMPL_AVX2 Vc_INTRINSIC AVX2:: int_m operator==(AVX2:: int_v a, AVX2:: int_v b) { return AVX::cmpeq_epi32(a.data(), b.data()); } Vc_INTRINSIC AVX2:: uint_m operator==(AVX2:: uint_v a, AVX2:: uint_v b) { return AVX::cmpeq_epi32(a.data(), b.data()); } Vc_INTRINSIC AVX2:: short_m operator==(AVX2:: short_v a, AVX2:: short_v b) { return AVX::cmpeq_epi16(a.data(), b.data()); } Vc_INTRINSIC AVX2::ushort_m operator==(AVX2::ushort_v a, AVX2::ushort_v b) { return AVX::cmpeq_epi16(a.data(), b.data()); } Vc_INTRINSIC AVX2:: int_m operator!=(AVX2:: int_v a, AVX2:: int_v b) { return not_(AVX::cmpeq_epi32(a.data(), b.data())); } Vc_INTRINSIC AVX2:: uint_m operator!=(AVX2:: uint_v a, AVX2:: uint_v b) { return not_(AVX::cmpeq_epi32(a.data(), b.data())); } Vc_INTRINSIC AVX2:: short_m operator!=(AVX2:: short_v a, AVX2:: short_v b) { return not_(AVX::cmpeq_epi16(a.data(), b.data())); } Vc_INTRINSIC AVX2::ushort_m operator!=(AVX2::ushort_v a, AVX2::ushort_v b) { return not_(AVX::cmpeq_epi16(a.data(), b.data())); } Vc_INTRINSIC AVX2:: int_m operator>=(AVX2:: int_v a, AVX2:: int_v b) { return not_(AVX::cmplt_epi32(a.data(), b.data())); } Vc_INTRINSIC AVX2:: uint_m operator>=(AVX2:: uint_v a, AVX2:: uint_v b) { return not_(AVX::cmplt_epu32(a.data(), b.data())); } Vc_INTRINSIC AVX2:: short_m operator>=(AVX2:: short_v a, AVX2:: short_v b) { return not_(AVX::cmplt_epi16(a.data(), b.data())); } Vc_INTRINSIC AVX2::ushort_m operator>=(AVX2::ushort_v a, AVX2::ushort_v b) { return not_(AVX::cmplt_epu16(a.data(), b.data())); } Vc_INTRINSIC AVX2:: int_m operator<=(AVX2:: int_v a, AVX2:: int_v b) { return not_(AVX::cmpgt_epi32(a.data(), b.data())); } Vc_INTRINSIC AVX2:: uint_m operator<=(AVX2:: uint_v a, AVX2:: uint_v b) { return not_(AVX::cmpgt_epu32(a.data(), b.data())); } Vc_INTRINSIC AVX2:: short_m operator<=(AVX2:: short_v a, AVX2:: short_v b) { return not_(AVX::cmpgt_epi16(a.data(), b.data())); } Vc_INTRINSIC AVX2::ushort_m operator<=(AVX2::ushort_v a, AVX2::ushort_v b) { return not_(AVX::cmpgt_epu16(a.data(), b.data())); } Vc_INTRINSIC AVX2:: int_m operator> (AVX2:: int_v a, AVX2:: int_v b) { return AVX::cmpgt_epi32(a.data(), b.data()); } Vc_INTRINSIC AVX2:: uint_m operator> (AVX2:: uint_v a, AVX2:: uint_v b) { return AVX::cmpgt_epu32(a.data(), b.data()); } Vc_INTRINSIC AVX2:: short_m operator> (AVX2:: short_v a, AVX2:: short_v b) { return AVX::cmpgt_epi16(a.data(), b.data()); } Vc_INTRINSIC AVX2::ushort_m operator> (AVX2::ushort_v a, AVX2::ushort_v b) { return AVX::cmpgt_epu16(a.data(), b.data()); } Vc_INTRINSIC AVX2:: int_m operator< (AVX2:: int_v a, AVX2:: int_v b) { return AVX::cmplt_epi32(a.data(), b.data()); } Vc_INTRINSIC AVX2:: uint_m operator< (AVX2:: uint_v a, AVX2:: uint_v b) { return AVX::cmplt_epu32(a.data(), b.data()); } Vc_INTRINSIC AVX2:: short_m operator< (AVX2:: short_v a, AVX2:: short_v b) { return AVX::cmplt_epi16(a.data(), b.data()); } Vc_INTRINSIC AVX2::ushort_m operator< (AVX2::ushort_v a, AVX2::ushort_v b) { return AVX::cmplt_epu16(a.data(), b.data()); } #endif // Vc_IMPL_AVX2 // bitwise operators {{{1 template Vc_INTRINSIC AVX2::Vector operator^(AVX2::Vector a, AVX2::Vector b) { return xor_(a.data(), b.data()); } template Vc_INTRINSIC AVX2::Vector operator&(AVX2::Vector a, AVX2::Vector b) { return and_(a.data(), b.data()); } template Vc_INTRINSIC AVX2::Vector operator|(AVX2::Vector a, AVX2::Vector b) { return or_(a.data(), b.data()); } // }}}1 // arithmetic operators {{{1 template Vc_INTRINSIC AVX2::Vector operator+(AVX2::Vector a, AVX2::Vector b) { return add(a.data(), b.data(), T()); } template Vc_INTRINSIC AVX2::Vector operator-(AVX2::Vector a, AVX2::Vector b) { return sub(a.data(), b.data(), T()); } template Vc_INTRINSIC AVX2::Vector operator*(AVX2::Vector a, AVX2::Vector b) { return mul(a.data(), b.data(), T()); } template Vc_INTRINSIC AVX2::Vector operator/(AVX2::Vector a, AVX2::Vector b) { return div(a.data(), b.data(), T()); } Vc_INTRINSIC AVX2::Vector operator/(AVX2::Vector a, AVX2::Vector b) { using namespace AVX; const __m256 lo = _mm256_div_ps(convert(lo128(a.data())), convert(lo128(b.data()))); const __m256 hi = _mm256_div_ps(convert(hi128(a.data())), convert(hi128(b.data()))); const float_v threshold = 32767.f; using Detail::operator>; const __m128i loShort = (Vc_IS_UNLIKELY((float_v(lo) > threshold).isNotEmpty())) ? convert(lo) : convert(lo); const __m128i hiShort = (Vc_IS_UNLIKELY((float_v(hi) > threshold).isNotEmpty())) ? convert(hi) : convert(hi); return concat(loShort, hiShort); } template Vc_INTRINSIC enable_if::value, AVX2::Vector> operator%( AVX2::Vector a, AVX2::Vector b) { return a - a / b * b; } // }}}1 } // namespace Detail /////////////////////////////////////////////////////////////////////////////////////////// // generate {{{1 template <> template Vc_INTRINSIC AVX2::double_v AVX2::double_v::generate(G gen) { const auto tmp0 = gen(0); const auto tmp1 = gen(1); const auto tmp2 = gen(2); const auto tmp3 = gen(3); return _mm256_setr_pd(tmp0, tmp1, tmp2, tmp3); } template <> template Vc_INTRINSIC AVX2::float_v AVX2::float_v::generate(G gen) { const auto tmp0 = gen(0); const auto tmp1 = gen(1); const auto tmp2 = gen(2); const auto tmp3 = gen(3); const auto tmp4 = gen(4); const auto tmp5 = gen(5); const auto tmp6 = gen(6); const auto tmp7 = gen(7); return _mm256_setr_ps(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); } #ifdef Vc_IMPL_AVX2 template <> template Vc_INTRINSIC AVX2::int_v AVX2::int_v::generate(G gen) { const auto tmp0 = gen(0); const auto tmp1 = gen(1); const auto tmp2 = gen(2); const auto tmp3 = gen(3); const auto tmp4 = gen(4); const auto tmp5 = gen(5); const auto tmp6 = gen(6); const auto tmp7 = gen(7); return _mm256_setr_epi32(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); } template <> template Vc_INTRINSIC AVX2::uint_v AVX2::uint_v::generate(G gen) { const auto tmp0 = gen(0); const auto tmp1 = gen(1); const auto tmp2 = gen(2); const auto tmp3 = gen(3); const auto tmp4 = gen(4); const auto tmp5 = gen(5); const auto tmp6 = gen(6); const auto tmp7 = gen(7); return _mm256_setr_epi32(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); } template <> template Vc_INTRINSIC AVX2::short_v AVX2::short_v::generate(G gen) { const auto tmp0 = gen(0); const auto tmp1 = gen(1); const auto tmp2 = gen(2); const auto tmp3 = gen(3); const auto tmp4 = gen(4); const auto tmp5 = gen(5); const auto tmp6 = gen(6); const auto tmp7 = gen(7); const auto tmp8 = gen(8); const auto tmp9 = gen(9); const auto tmp10 = gen(10); const auto tmp11 = gen(11); const auto tmp12 = gen(12); const auto tmp13 = gen(13); const auto tmp14 = gen(14); const auto tmp15 = gen(15); return _mm256_setr_epi16(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15); } template <> template Vc_INTRINSIC AVX2::ushort_v AVX2::ushort_v::generate(G gen) { const auto tmp0 = gen(0); const auto tmp1 = gen(1); const auto tmp2 = gen(2); const auto tmp3 = gen(3); const auto tmp4 = gen(4); const auto tmp5 = gen(5); const auto tmp6 = gen(6); const auto tmp7 = gen(7); const auto tmp8 = gen(8); const auto tmp9 = gen(9); const auto tmp10 = gen(10); const auto tmp11 = gen(11); const auto tmp12 = gen(12); const auto tmp13 = gen(13); const auto tmp14 = gen(14); const auto tmp15 = gen(15); return _mm256_setr_epi16(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15); } #endif // constants {{{1 template Vc_INTRINSIC Vector::Vector(VectorSpecialInitializerZero) : d{} {} template <> Vc_INTRINSIC Vector::Vector(VectorSpecialInitializerOne) : d(AVX::setone_pd()) {} template <> Vc_INTRINSIC Vector< float, VectorAbi::Avx>::Vector(VectorSpecialInitializerOne) : d(AVX::setone_ps()) {} #ifdef Vc_IMPL_AVX2 template <> Vc_INTRINSIC Vector< int, VectorAbi::Avx>::Vector(VectorSpecialInitializerOne) : d(AVX::setone_epi32()) {} template <> Vc_INTRINSIC Vector< uint, VectorAbi::Avx>::Vector(VectorSpecialInitializerOne) : d(AVX::setone_epu32()) {} template <> Vc_INTRINSIC Vector< short, VectorAbi::Avx>::Vector(VectorSpecialInitializerOne) : d(AVX::setone_epi16()) {} template <> Vc_INTRINSIC Vector::Vector(VectorSpecialInitializerOne) : d(AVX::setone_epu16()) {} template <> Vc_INTRINSIC Vector< schar, VectorAbi::Avx>::Vector(VectorSpecialInitializerOne) : d(AVX::setone_epi8()) {} template <> Vc_INTRINSIC Vector< uchar, VectorAbi::Avx>::Vector(VectorSpecialInitializerOne) : d(AVX::setone_epu8()) {} #endif template Vc_ALWAYS_INLINE Vector::Vector( VectorSpecialInitializerIndexesFromZero) : Vector(AVX::IndexesFromZeroData::address(), Vc::Aligned) { } template <> Vc_ALWAYS_INLINE Vector::Vector(VectorSpecialInitializerIndexesFromZero) : Vector(AVX::IndexesFromZeroData::address(), Vc::Aligned) { } template <> Vc_ALWAYS_INLINE Vector::Vector(VectorSpecialInitializerIndexesFromZero) : Vector(AVX::IndexesFromZeroData::address(), Vc::Aligned) { } /////////////////////////////////////////////////////////////////////////////////////////// // load member functions {{{1 // general load, implemented via LoadHelper {{{2 template template Vc_INTRINSIC typename Vector:: #ifndef Vc_MSVC template #endif load_concept::type Vector::load(const SrcT *mem, Flags flags) { Common::handleLoadPrefetches(mem, flags); d.v() = Detail::load(mem, flags); } /////////////////////////////////////////////////////////////////////////////////////////// // zeroing {{{1 template Vc_INTRINSIC void Vector::setZero() { data() = Detail::zero(); } template Vc_INTRINSIC void Vector::setZero(const Mask &k) { data() = Detail::andnot_(k.data(), data()); } template Vc_INTRINSIC void Vector::setZeroInverted(const Mask &k) { data() = Detail::and_(k.data(), data()); } template<> Vc_INTRINSIC void Vector::setQnan() { data() = Detail::allone(); } template<> Vc_INTRINSIC void Vector::setQnan(MaskArgument k) { data() = _mm256_or_pd(data(), k.dataD()); } template<> Vc_INTRINSIC void Vector::setQnan() { data() = Detail::allone(); } template<> Vc_INTRINSIC void Vector::setQnan(MaskArgument k) { data() = _mm256_or_ps(data(), k.dataF()); } /////////////////////////////////////////////////////////////////////////////////////////// // stores {{{1 template template Vc_INTRINSIC void Vector::store(U *mem, Flags flags) const { Common::handleStorePrefetches(mem, flags); HV::template store(mem, data()); } template template Vc_INTRINSIC void Vector::store(U *mem, Mask mask, Flags flags) const { Common::handleStorePrefetches(mem, flags); HV::template store(mem, data(), mask.data()); } /////////////////////////////////////////////////////////////////////////////////////////// // integer ops {{{1 #ifdef Vc_IMPL_AVX2 template <> Vc_ALWAYS_INLINE AVX2::Vector< int> Vector< int, VectorAbi::Avx>::operator<<(AsArg x) const { return _mm256_sllv_epi32(d.v(), x.d.v()); } template <> Vc_ALWAYS_INLINE AVX2::Vector< uint> Vector< uint, VectorAbi::Avx>::operator<<(AsArg x) const { return _mm256_sllv_epi32(d.v(), x.d.v()); } template <> Vc_ALWAYS_INLINE AVX2::Vector< int> Vector< int, VectorAbi::Avx>::operator>>(AsArg x) const { return _mm256_srav_epi32(d.v(), x.d.v()); } template <> Vc_ALWAYS_INLINE AVX2::Vector< uint> Vector< uint, VectorAbi::Avx>::operator>>(AsArg x) const { return _mm256_srlv_epi32(d.v(), x.d.v()); } template <> Vc_ALWAYS_INLINE AVX2::Vector< short> Vector< short, VectorAbi::Avx>::operator<<(AsArg x) const { return generate([&](int i) { return get(*this, i) << get(x, i); }); } template <> Vc_ALWAYS_INLINE AVX2::Vector Vector::operator<<(AsArg x) const { return generate([&](int i) { return get(*this, i) << get(x, i); }); } template <> Vc_ALWAYS_INLINE AVX2::Vector< short> Vector< short, VectorAbi::Avx>::operator>>(AsArg x) const { return generate([&](int i) { return get(*this, i) >> get(x, i); }); } template <> Vc_ALWAYS_INLINE AVX2::Vector Vector::operator>>(AsArg x) const { return generate([&](int i) { return get(*this, i) >> get(x, i); }); } template Vc_ALWAYS_INLINE AVX2::Vector &Vector::operator<<=(AsArg x) { static_assert(std::is_integral::value, "bitwise-operators can only be used with Vectors of integral type"); return *this = *this << x; } template Vc_ALWAYS_INLINE AVX2::Vector &Vector::operator>>=(AsArg x) { static_assert(std::is_integral::value, "bitwise-operators can only be used with Vectors of integral type"); return *this = *this >> x; } #endif template Vc_ALWAYS_INLINE AVX2::Vector &Vector::operator>>=(int shift) { d.v() = Detail::shiftRight(d.v(), shift, T()); return *static_cast *>(this); } template Vc_ALWAYS_INLINE Vc_PURE AVX2::Vector Vector::operator>>(int shift) const { return Detail::shiftRight(d.v(), shift, T()); } template Vc_ALWAYS_INLINE AVX2::Vector &Vector::operator<<=(int shift) { d.v() = Detail::shiftLeft(d.v(), shift, T()); return *static_cast *>(this); } template Vc_ALWAYS_INLINE Vc_PURE AVX2::Vector Vector::operator<<(int shift) const { return Detail::shiftLeft(d.v(), shift, T()); } // isnegative {{{1 Vc_INTRINSIC Vc_CONST AVX2::float_m isnegative(AVX2::float_v x) { return AVX::avx_cast<__m256>(AVX::srai_epi32<31>( AVX::avx_cast<__m256i>(_mm256_and_ps(AVX::setsignmask_ps(), x.data())))); } Vc_INTRINSIC Vc_CONST AVX2::double_m isnegative(AVX2::double_v x) { return Mem::permute(AVX::avx_cast<__m256>(AVX::srai_epi32<31>( AVX::avx_cast<__m256i>(_mm256_and_pd(AVX::setsignmask_pd(), x.data()))))); } // gathers {{{1 #define Vc_GATHER_IMPL(V_) \ template <> \ template \ inline void AVX2::V_::gatherImplementation( \ const Common::GatherArguments &args) #define Vc_M(i_) static_cast(args.address[Scale * args.indexes[i_]]) Vc_GATHER_IMPL(double_v) { d.v() = _mm256_setr_pd(Vc_M(0), Vc_M(1), Vc_M(2), Vc_M(3)); } Vc_GATHER_IMPL(float_v) { d.v() = _mm256_setr_ps(Vc_M(0), Vc_M(1), Vc_M(2), Vc_M(3), Vc_M(4), Vc_M(5), Vc_M(6), Vc_M(7)); } #ifdef Vc_IMPL_AVX2 Vc_GATHER_IMPL(int_v) { d.v() = _mm256_setr_epi32(Vc_M(0), Vc_M(1), Vc_M(2), Vc_M(3), Vc_M(4), Vc_M(5), Vc_M(6), Vc_M(7)); } Vc_GATHER_IMPL(uint_v) { d.v() = _mm256_setr_epi32(Vc_M(0), Vc_M(1), Vc_M(2), Vc_M(3), Vc_M(4), Vc_M(5), Vc_M(6), Vc_M(7)); } Vc_GATHER_IMPL(short_v) { d.v() = _mm256_setr_epi16(Vc_M(0), Vc_M(1), Vc_M(2), Vc_M(3), Vc_M(4), Vc_M(5), Vc_M(6), Vc_M(7), Vc_M(8), Vc_M(9), Vc_M(10), Vc_M(11), Vc_M(12), Vc_M(13), Vc_M(14), Vc_M(15)); } Vc_GATHER_IMPL(ushort_v) { d.v() = _mm256_setr_epi16(Vc_M(0), Vc_M(1), Vc_M(2), Vc_M(3), Vc_M(4), Vc_M(5), Vc_M(6), Vc_M(7), Vc_M(8), Vc_M(9), Vc_M(10), Vc_M(11), Vc_M(12), Vc_M(13), Vc_M(14), Vc_M(15)); } #endif #undef Vc_M #undef Vc_GATHER_IMPL template template inline void Vector::gatherImplementation( const Common::GatherArguments &args, MaskArgument mask) { const auto *mem = args.address; const auto indexes = Scale * args.indexes; using Selector = std::integral_constant < Common::GatherScatterImplementation, #ifdef Vc_USE_SET_GATHERS Traits::is_simd_vector::value ? Common::GatherScatterImplementation::SetIndexZero : #endif #ifdef Vc_USE_BSF_GATHERS Common::GatherScatterImplementation::BitScanLoop #elif defined Vc_USE_POPCNT_BSF_GATHERS Common::GatherScatterImplementation::PopcntSwitch #else Common::GatherScatterImplementation::SimpleLoop #endif > ; Common::executeGather(Selector(), *this, mem, indexes, mask); } template template inline void Vector::scatterImplementation(MT *mem, IT &&indexes) const { Common::unrolled_loop([&](std::size_t i) { mem[indexes[i]] = d.m(i); }); } template template inline void Vector::scatterImplementation(MT *mem, IT &&indexes, MaskArgument mask) const { using Selector = std::integral_constant < Common::GatherScatterImplementation, #ifdef Vc_USE_SET_GATHERS Traits::is_simd_vector::value ? Common::GatherScatterImplementation::SetIndexZero : #endif #ifdef Vc_USE_BSF_GATHERS Common::GatherScatterImplementation::BitScanLoop #elif defined Vc_USE_POPCNT_BSF_GATHERS Common::GatherScatterImplementation::PopcntSwitch #else Common::GatherScatterImplementation::SimpleLoop #endif > ; Common::executeScatter(Selector(), *this, mem, std::forward(indexes), mask); } /////////////////////////////////////////////////////////////////////////////////////////// // operator- {{{1 #ifdef Vc_USE_BUILTIN_VECTOR_TYPES template Vc_ALWAYS_INLINE Vc_PURE AVX2::Vector Vector::operator-() const { return VectorType(-d.builtin()); } #else template Vc_ALWAYS_INLINE Vc_PURE AVX2::Vector Vector::operator-() const { return Detail::negate(d.v(), std::integral_constant()); } #endif /////////////////////////////////////////////////////////////////////////////////////////// // horizontal ops {{{1 template Vc_INTRINSIC std::pair, int> Vector::minIndex() const { AVX2::Vector x = min(); return std::make_pair(x, (*this == x).firstOne()); } template Vc_INTRINSIC std::pair, int> Vector::maxIndex() const { AVX2::Vector x = max(); return std::make_pair(x, (*this == x).firstOne()); } template <> Vc_INTRINSIC std::pair AVX2::float_v::minIndex() const { /* // 28 cycles latency: __m256 x = _mm256_min_ps(Mem::permute128(d.v()), d.v()); x = _mm256_min_ps(x, Reg::permute(x)); AVX2::float_v xx = _mm256_min_ps(x, Reg::permute(x)); AVX2::uint_v idx = AVX2::uint_v::IndexesFromZero(); idx = _mm256_castps_si256( _mm256_or_ps((*this != xx).data(), _mm256_castsi256_ps(idx.data()))); return std::make_pair(xx, (*this == xx).firstOne()); __m128 loData = AVX::lo128(d.v()); __m128 hiData = AVX::hi128(d.v()); const __m128 less2 = _mm_cmplt_ps(hiData, loData); loData = _mm_min_ps(loData, hiData); hiData = Mem::permute(loData); const __m128 less1 = _mm_cmplt_ps(hiData, loData); loData = _mm_min_ps(loData, hiData); hiData = Mem::permute(loData); const __m128 less0 = _mm_cmplt_ps(hiData, loData); unsigned bits = _mm_movemask_ps(less0) & 0x1; bits |= ((_mm_movemask_ps(less1) << 1) - bits) & 0x2; bits |= ((_mm_movemask_ps(less2) << 3) - bits) & 0x4; loData = _mm_min_ps(loData, hiData); return std::make_pair(AVX::concat(loData, loData), bits); */ // 28 cycles Latency: __m256 x = d.v(); __m256 idx = Vector::IndexesFromZero().data(); __m256 y = Mem::permute128(x); __m256 idy = Mem::permute128(idx); __m256 less = AVX::cmplt_ps(x, y); x = _mm256_blendv_ps(y, x, less); idx = _mm256_blendv_ps(idy, idx, less); y = Reg::permute(x); idy = Reg::permute(idx); less = AVX::cmplt_ps(x, y); x = _mm256_blendv_ps(y, x, less); idx = _mm256_blendv_ps(idy, idx, less); y = Reg::permute(x); idy = Reg::permute(idx); less = AVX::cmplt_ps(x, y); idx = _mm256_blendv_ps(idy, idx, less); const auto index = _mm_cvtsi128_si32(AVX::avx_cast<__m128i>(idx)); #ifdef Vc_GNU_ASM __asm__ __volatile__(""); // help GCC to order the instructions better #endif x = _mm256_blendv_ps(y, x, less); return std::make_pair(x, index); } template Vc_ALWAYS_INLINE AVX2::Vector Vector::partialSum() const { // a b c d e f g h // + a b c d e f g -> a ab bc cd de ef fg gh // + a ab bc cd de ef -> a ab abc abcd bcde cdef defg efgh // + a ab abc abcd -> a ab abc abcd abcde abcdef abcdefg abcdefgh AVX2::Vector tmp = *this; if (Size > 1) tmp += tmp.shifted(-1); if (Size > 2) tmp += tmp.shifted(-2); if (Size > 4) tmp += tmp.shifted(-4); if (Size > 8) tmp += tmp.shifted(-8); if (Size > 16) tmp += tmp.shifted(-16); return tmp; } /* This function requires correct masking because the neutral element of \p op is not necessarily 0 * template template Vc_ALWAYS_INLINE AVX2::Vector Vector::partialSum(BinaryOperation op) const { // a b c d e f g h // + a b c d e f g -> a ab bc cd de ef fg gh // + a ab bc cd de ef -> a ab abc abcd bcde cdef defg efgh // + a ab abc abcd -> a ab abc abcd abcde abcdef abcdefg abcdefgh AVX2::Vector tmp = *this; Mask mask(true); if (Size > 1) tmp(mask) = op(tmp, tmp.shifted(-1)); if (Size > 2) tmp(mask) = op(tmp, tmp.shifted(-2)); if (Size > 4) tmp(mask) = op(tmp, tmp.shifted(-4)); if (Size > 8) tmp(mask) = op(tmp, tmp.shifted(-8)); if (Size > 16) tmp(mask) = op(tmp, tmp.shifted(-16)); return tmp; } */ template Vc_ALWAYS_INLINE typename Vector::EntryType Vector::min(MaskArgument m) const { AVX2::Vector tmp = std::numeric_limits >::max(); tmp(m) = *this; return tmp.min(); } template Vc_ALWAYS_INLINE typename Vector::EntryType Vector::max(MaskArgument m) const { AVX2::Vector tmp = std::numeric_limits >::min(); tmp(m) = *this; return tmp.max(); } template Vc_ALWAYS_INLINE typename Vector::EntryType Vector::product(MaskArgument m) const { AVX2::Vector tmp(Vc::One); tmp(m) = *this; return tmp.product(); } template Vc_ALWAYS_INLINE typename Vector::EntryType Vector::sum(MaskArgument m) const { AVX2::Vector tmp(Vc::Zero); tmp(m) = *this; return tmp.sum(); }//}}} // exponent {{{1 namespace Detail { Vc_INTRINSIC Vc_CONST __m256 exponent(__m256 v) { using namespace AVX; __m128i tmp0 = _mm_srli_epi32(avx_cast<__m128i>(v), 23); __m128i tmp1 = _mm_srli_epi32(avx_cast<__m128i>(hi128(v)), 23); tmp0 = _mm_sub_epi32(tmp0, _mm_set1_epi32(0x7f)); tmp1 = _mm_sub_epi32(tmp1, _mm_set1_epi32(0x7f)); return _mm256_cvtepi32_ps(concat(tmp0, tmp1)); } Vc_INTRINSIC Vc_CONST __m256d exponent(__m256d v) { using namespace AVX; __m128i tmp0 = _mm_srli_epi64(avx_cast<__m128i>(v), 52); __m128i tmp1 = _mm_srli_epi64(avx_cast<__m128i>(hi128(v)), 52); tmp0 = _mm_sub_epi32(tmp0, _mm_set1_epi32(0x3ff)); tmp1 = _mm_sub_epi32(tmp1, _mm_set1_epi32(0x3ff)); return _mm256_cvtepi32_pd(avx_cast<__m128i>(Mem::shuffle(avx_cast<__m128>(tmp0), avx_cast<__m128>(tmp1)))); } } // namespace Detail Vc_INTRINSIC Vc_CONST AVX2::float_v exponent(AVX2::float_v x) { using Detail::operator>=; Vc_ASSERT((x >= x.Zero()).isFull()); return Detail::exponent(x.data()); } Vc_INTRINSIC Vc_CONST AVX2::double_v exponent(AVX2::double_v x) { using Detail::operator>=; Vc_ASSERT((x >= x.Zero()).isFull()); return Detail::exponent(x.data()); } // }}}1 // Random {{{1 static Vc_ALWAYS_INLINE __m256i _doRandomStep() { using Detail::operator*; using Detail::operator+; #ifdef Vc_IMPL_AVX2 using AVX2::uint_v; uint_v state0(&Common::RandomState[0]); uint_v state1(&Common::RandomState[uint_v::Size]); (state1 * uint_v(0xdeece66du) + uint_v(11)).store(&Common::RandomState[uint_v::Size]); uint_v(Detail::xor_((state0 * uint_v(0xdeece66du) + uint_v(11)).data(), _mm256_srli_epi32(state1.data(), 16))) .store(&Common::RandomState[0]); return state0.data(); #else using SSE::uint_v; uint_v state0(&Common::RandomState[0]); uint_v state1(&Common::RandomState[uint_v::Size]); uint_v state2(&Common::RandomState[2 * uint_v::Size]); uint_v state3(&Common::RandomState[3 * uint_v::Size]); (state2 * uint_v(0xdeece66du) + uint_v(11)) .store(&Common::RandomState[2 * uint_v::Size]); (state3 * uint_v(0xdeece66du) + uint_v(11)) .store(&Common::RandomState[3 * uint_v::Size]); uint_v(Detail::xor_((state0 * uint_v(0xdeece66du) + uint_v(11)).data(), _mm_srli_epi32(state2.data(), 16))) .store(&Common::RandomState[0]); uint_v(Detail::xor_((state1 * uint_v(0xdeece66du) + uint_v(11)).data(), _mm_srli_epi32(state3.data(), 16))) .store(&Common::RandomState[uint_v::Size]); return AVX::concat(state0.data(), state1.data()); #endif } #ifdef Vc_IMPL_AVX2 template Vc_ALWAYS_INLINE AVX2::Vector Vector::Random() { return {_doRandomStep()}; } #endif template <> Vc_ALWAYS_INLINE AVX2::float_v AVX2::float_v::Random() { return HT::sub(Detail::or_(_cast(AVX::srli_epi32<2>(_doRandomStep())), HT::one()), HT::one()); } template<> Vc_ALWAYS_INLINE AVX2::double_v AVX2::double_v::Random() { const __m256i state = Detail::load(&Common::RandomState[0], Vc::Aligned, Detail::LoadTag<__m256i, int>()); for (size_t k = 0; k < 8; k += 2) { typedef unsigned long long uint64 Vc_MAY_ALIAS; const uint64 stateX = *aliasing_cast(&Common::RandomState[k]); *aliasing_cast(&Common::RandomState[k]) = (stateX * 0x5deece66dull + 11); } return HT::sub(Detail::or_(_cast(AVX::srli_epi64<12>(state)), HT::one()), HT::one()); } // }}}1 // shifted / rotated {{{1 template Vc_INTRINSIC AVX2::Vector Vector::shifted(int amount) const { return Detail::shifted(d.v(), amount); } template Vc_INTRINSIC Vc_CONST VectorType shifted_shortcut(VectorType left, VectorType right, Common::WidthT<__m128>) { return Mem::shuffle(left, right); } template Vc_INTRINSIC Vc_CONST VectorType shifted_shortcut(VectorType left, VectorType right, Common::WidthT<__m256>) { return Mem::shuffle128(left, right); } template Vc_INTRINSIC AVX2::Vector Vector::shifted(int amount, Vector shiftIn) const { #ifdef __GNUC__ if (__builtin_constant_p(amount)) { const __m256i a = AVX::avx_cast<__m256i>(d.v()); const __m256i b = AVX::avx_cast<__m256i>(shiftIn.d.v()); if (amount * 2 == int(Size)) { return shifted_shortcut(d.v(), shiftIn.d.v(), WidthT()); } if (amount * 2 == -int(Size)) { return shifted_shortcut(shiftIn.d.v(), d.v(), WidthT()); } switch (amount) { case 1: return AVX::avx_cast( #ifdef Vc_IMPL_AVX2 _mm256_alignr_epi8(_mm256_permute2x128_si256(a, b, 0x21), a, sizeof(EntryType)) #else // Vc_IMPL_AVX2 AVX::concat( _mm_alignr_epi8(AVX::hi128(a), AVX::lo128(a), sizeof(EntryType)), _mm_alignr_epi8(AVX::lo128(b), AVX::hi128(a), sizeof(EntryType))) #endif // Vc_IMPL_AVX2 ); case 2: return AVX::avx_cast( #ifdef Vc_IMPL_AVX2 _mm256_alignr_epi8(_mm256_permute2x128_si256(a, b, 0x21), a, 2 * sizeof(EntryType)) #else // Vc_IMPL_AVX2 AVX::concat( _mm_alignr_epi8(AVX::hi128(a), AVX::lo128(a), 2 * sizeof(EntryType)), _mm_alignr_epi8(AVX::lo128(b), AVX::hi128(a), 2 * sizeof(EntryType))) #endif // Vc_IMPL_AVX2 ); case 3: if (6u < Size) { return AVX::avx_cast( #ifdef Vc_IMPL_AVX2 _mm256_alignr_epi8(_mm256_permute2x128_si256(a, b, 0x21), a, 3 * sizeof(EntryType)) #else // Vc_IMPL_AVX2 AVX::concat(_mm_alignr_epi8(AVX::hi128(a), AVX::lo128(a), 3 * sizeof(EntryType)), _mm_alignr_epi8(AVX::lo128(b), AVX::hi128(a), 3 * sizeof(EntryType))) #endif // Vc_IMPL_AVX2 ); // TODO: } else { } } } #endif using Detail::operator|; return shifted(amount) | (amount > 0 ? shiftIn.shifted(amount - Size) : shiftIn.shifted(Size + amount)); } template Vc_INTRINSIC AVX2::Vector Vector::rotated(int amount) const { return Detail::rotated(d.v(), amount); } // sorted {{{1 template Vc_ALWAYS_INLINE Vc_PURE Vector Vector::sorted() const { return Detail::sorted(*this); } // interleaveLow/-High {{{1 template <> Vc_INTRINSIC AVX2::double_v AVX2::double_v::interleaveLow(AVX2::double_v x) const { return Mem::shuffle128(_mm256_unpacklo_pd(data(), x.data()), _mm256_unpackhi_pd(data(), x.data())); } template <> Vc_INTRINSIC AVX2::double_v AVX2::double_v::interleaveHigh(AVX2::double_v x) const { return Mem::shuffle128(_mm256_unpacklo_pd(data(), x.data()), _mm256_unpackhi_pd(data(), x.data())); } template <> Vc_INTRINSIC AVX2::float_v AVX2::float_v::interleaveLow(AVX2::float_v x) const { return Mem::shuffle128(_mm256_unpacklo_ps(data(), x.data()), _mm256_unpackhi_ps(data(), x.data())); } template <> Vc_INTRINSIC AVX2::float_v AVX2::float_v::interleaveHigh(AVX2::float_v x) const { return Mem::shuffle128(_mm256_unpacklo_ps(data(), x.data()), _mm256_unpackhi_ps(data(), x.data())); } #ifdef Vc_IMPL_AVX2 template <> Vc_INTRINSIC AVX2::int_v AVX2::int_v::interleaveLow ( AVX2::int_v x) const { return Mem::shuffle128(_mm256_unpacklo_epi32(data(), x.data()), _mm256_unpackhi_epi32(data(), x.data())); } template <> Vc_INTRINSIC AVX2::int_v AVX2::int_v::interleaveHigh( AVX2::int_v x) const { return Mem::shuffle128(_mm256_unpacklo_epi32(data(), x.data()), _mm256_unpackhi_epi32(data(), x.data())); } template <> Vc_INTRINSIC AVX2::uint_v AVX2::uint_v::interleaveLow ( AVX2::uint_v x) const { return Mem::shuffle128(_mm256_unpacklo_epi32(data(), x.data()), _mm256_unpackhi_epi32(data(), x.data())); } template <> Vc_INTRINSIC AVX2::uint_v AVX2::uint_v::interleaveHigh( AVX2::uint_v x) const { return Mem::shuffle128(_mm256_unpacklo_epi32(data(), x.data()), _mm256_unpackhi_epi32(data(), x.data())); } template <> Vc_INTRINSIC AVX2::short_v AVX2::short_v::interleaveLow ( AVX2::short_v x) const { return Mem::shuffle128(_mm256_unpacklo_epi16(data(), x.data()), _mm256_unpackhi_epi16(data(), x.data())); } template <> Vc_INTRINSIC AVX2::short_v AVX2::short_v::interleaveHigh( AVX2::short_v x) const { return Mem::shuffle128(_mm256_unpacklo_epi16(data(), x.data()), _mm256_unpackhi_epi16(data(), x.data())); } template <> Vc_INTRINSIC AVX2::ushort_v AVX2::ushort_v::interleaveLow (AVX2::ushort_v x) const { return Mem::shuffle128(_mm256_unpacklo_epi16(data(), x.data()), _mm256_unpackhi_epi16(data(), x.data())); } template <> Vc_INTRINSIC AVX2::ushort_v AVX2::ushort_v::interleaveHigh(AVX2::ushort_v x) const { return Mem::shuffle128(_mm256_unpacklo_epi16(data(), x.data()), _mm256_unpackhi_epi16(data(), x.data())); } #endif // permutation via operator[] {{{1 template <> Vc_INTRINSIC Vc_PURE AVX2::double_v AVX2::double_v::operator[](Permutation::ReversedTag) const { return Mem::permute128(Mem::permute(d.v())); } template <> Vc_INTRINSIC Vc_PURE AVX2::float_v AVX2::float_v::operator[](Permutation::ReversedTag) const { return Mem::permute128(Mem::permute(d.v())); } #ifdef Vc_IMPL_AVX2 template <> Vc_INTRINSIC Vc_PURE AVX2::int_v AVX2::int_v::operator[](Permutation::ReversedTag) const { return Mem::permute128(Mem::permute(d.v())); } template <> Vc_INTRINSIC Vc_PURE AVX2::uint_v AVX2::uint_v::operator[](Permutation::ReversedTag) const { return Mem::permute128(Mem::permute(d.v())); } template <> Vc_INTRINSIC Vc_PURE AVX2::short_v AVX2::short_v::operator[]( Permutation::ReversedTag) const { return Mem::permute128(AVX::avx_cast<__m256i>(Mem::shuffle( AVX::avx_cast<__m256d>(Mem::permuteHi(d.v())), AVX::avx_cast<__m256d>(Mem::permuteLo(d.v()))))); } template <> Vc_INTRINSIC Vc_PURE AVX2::ushort_v AVX2::ushort_v::operator[]( Permutation::ReversedTag) const { return Mem::permute128(AVX::avx_cast<__m256i>(Mem::shuffle( AVX::avx_cast<__m256d>(Mem::permuteHi(d.v())), AVX::avx_cast<__m256d>(Mem::permuteLo(d.v()))))); } #endif template <> Vc_INTRINSIC AVX2::float_v Vector::operator[](const IndexType &/*perm*/) const { // TODO return *this; #ifdef Vc_IMPL_AVX2 #else /* const int_m cross128 = AVX::concat(_mm_cmpgt_epi32(AVX::lo128(perm.data()), _mm_set1_epi32(3)), _mm_cmplt_epi32(AVX::hi128(perm.data()), _mm_set1_epi32(4))); if (cross128.isNotEmpty()) { AVX2::float_v x = _mm256_permutevar_ps(d.v(), perm.data()); x(cross128) = _mm256_permutevar_ps(Mem::permute128(d.v()), perm.data()); return x; } else { */ #endif } // reversed {{{1 template Vc_INTRINSIC Vc_PURE Vector Vector::reversed() const { return (*this)[Permutation::Reversed]; } // broadcast from constexpr index {{{1 template <> template Vc_INTRINSIC AVX2::float_v AVX2::float_v::broadcast() const { constexpr VecPos Inner = static_cast(Index & 0x3); constexpr VecPos Outer = static_cast((Index & 0x4) / 4); return Mem::permute(Mem::permute128(d.v())); } template <> template Vc_INTRINSIC AVX2::double_v AVX2::double_v::broadcast() const { constexpr VecPos Inner = static_cast(Index & 0x1); constexpr VecPos Outer = static_cast((Index & 0x2) / 2); return Mem::permute(Mem::permute128(d.v())); } // }}}1 } // namespace Vc // vim: foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/avx/vectorhelper.h000066400000000000000000000365661476554302100211100ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2009-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_AVX_VECTORHELPER_H_ #define VC_AVX_VECTORHELPER_H_ #include #include "types.h" #include "intrinsics.h" #include "casts.h" #include "../common/loadstoreflags.h" #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace AVX { template<> struct VectorHelper<__m256> { typedef __m256 VectorType; typedef const VectorType VTArg; template static Vc_ALWAYS_INLINE void store(float *mem, VTArg x, typename Flags::EnableIfAligned = nullptr) { _mm256_store_ps(mem, x); } template static Vc_ALWAYS_INLINE void store(float *mem, VTArg x, typename Flags::EnableIfUnalignedNotStreaming = nullptr) { _mm256_storeu_ps(mem, x); } template static Vc_ALWAYS_INLINE void store(float *mem, VTArg x, typename Flags::EnableIfStreaming = nullptr) { _mm256_stream_ps(mem, x); } template static Vc_ALWAYS_INLINE void store(float *mem, VTArg x, typename Flags::EnableIfUnalignedAndStreaming = nullptr) { AvxIntrinsics::stream_store(mem, x, setallone_ps()); } template static Vc_ALWAYS_INLINE void store(float *mem, VTArg x, VTArg m, typename std::enable_if::type = nullptr) { _mm256_maskstore(mem, m, x); } template static Vc_ALWAYS_INLINE void store(float *mem, VTArg x, VTArg m, typename std::enable_if< Flags::IsStreaming, void *>::type = nullptr) { AvxIntrinsics::stream_store(mem, x, m); } }; template<> struct VectorHelper<__m256d> { typedef __m256d VectorType; typedef const VectorType VTArg; template static Vc_ALWAYS_INLINE void store(double *mem, VTArg x, typename Flags::EnableIfAligned = nullptr) { _mm256_store_pd(mem, x); } template static Vc_ALWAYS_INLINE void store(double *mem, VTArg x, typename Flags::EnableIfUnalignedNotStreaming = nullptr) { _mm256_storeu_pd(mem, x); } template static Vc_ALWAYS_INLINE void store(double *mem, VTArg x, typename Flags::EnableIfStreaming = nullptr) { _mm256_stream_pd(mem, x); } template static Vc_ALWAYS_INLINE void store(double *mem, VTArg x, typename Flags::EnableIfUnalignedAndStreaming = nullptr) { AvxIntrinsics::stream_store(mem, x, setallone_pd()); } template static Vc_ALWAYS_INLINE void store(double *mem, VTArg x, VTArg m, typename std::enable_if::type = nullptr) { _mm256_maskstore(mem, m, x); } template static Vc_ALWAYS_INLINE void store(double *mem, VTArg x, VTArg m, typename std::enable_if< Flags::IsStreaming, void *>::type = nullptr) { AvxIntrinsics::stream_store(mem, x, m); } }; template<> struct VectorHelper<__m256i> { typedef __m256i VectorType; typedef const VectorType VTArg; template static Vc_ALWAYS_INLINE void store(T *mem, VTArg x, typename Flags::EnableIfAligned = nullptr) { _mm256_store_si256(reinterpret_cast<__m256i *>(mem), x); } template static Vc_ALWAYS_INLINE void store(T *mem, VTArg x, typename Flags::EnableIfUnalignedNotStreaming = nullptr) { _mm256_storeu_si256(reinterpret_cast<__m256i *>(mem), x); } template static Vc_ALWAYS_INLINE void store(T *mem, VTArg x, typename Flags::EnableIfStreaming = nullptr) { _mm256_stream_si256(reinterpret_cast<__m256i *>(mem), x); } template static Vc_ALWAYS_INLINE void store(T *mem, VTArg x, typename Flags::EnableIfUnalignedAndStreaming = nullptr) { AvxIntrinsics::stream_store(mem, x, setallone_si256()); } template static Vc_ALWAYS_INLINE void store(T *mem, VTArg x, VTArg m, typename std::enable_if::type = nullptr) { _mm256_maskstore(mem, m, x); } template static Vc_ALWAYS_INLINE void store(T *mem, VTArg x, VTArg m, typename std::enable_if< Flags::IsStreaming, void *>::type = nullptr) { AvxIntrinsics::stream_store(mem, x, m); } }; #define Vc_OP1(op) \ static Vc_INTRINSIC VectorType Vc_CONST op(VTArg a) { return Vc_CAT2(_mm256_##op##_, Vc_SUFFIX)(a); } #define Vc_OP(op) \ static Vc_INTRINSIC VectorType Vc_CONST op(VTArg a, VTArg b) { return Vc_CAT2(op##_ , Vc_SUFFIX)(a, b); } #define Vc_OP_(op) \ static Vc_INTRINSIC VectorType Vc_CONST op(VTArg a, VTArg b) { return Vc_CAT2(_mm256_##op , Vc_SUFFIX)(a, b); } #define Vc_OPx(op, op2) \ static Vc_INTRINSIC VectorType Vc_CONST op(VTArg a, VTArg b) { return Vc_CAT2(_mm256_##op2##_, Vc_SUFFIX)(a, b); } template<> struct VectorHelper { typedef __m256d VectorType; typedef const VectorType VTArg; typedef double EntryType; #define Vc_SUFFIX pd static Vc_ALWAYS_INLINE VectorType notMaskedToZero(VTArg a, __m256 mask) { return Vc_CAT2(_mm256_and_, Vc_SUFFIX)(_mm256_castps_pd(mask), a); } static Vc_ALWAYS_INLINE VectorType set(const double a) { return Vc_CAT2(_mm256_set1_, Vc_SUFFIX)(a); } static Vc_ALWAYS_INLINE VectorType set(const double a, const double b, const double c, const double d) { return Vc_CAT2(_mm256_set_, Vc_SUFFIX)(a, b, c, d); } static Vc_ALWAYS_INLINE VectorType zero() { return Vc_CAT2(_mm256_setzero_, Vc_SUFFIX)(); } static Vc_ALWAYS_INLINE VectorType one() { return Vc_CAT2(setone_, Vc_SUFFIX)(); }// set(1.); } static inline void fma(VectorType &v1, VTArg v2, VTArg v3) { #ifdef Vc_IMPL_FMA4 v1 = _mm256_macc_pd(v1, v2, v3); #else VectorType h1 = _mm256_and_pd(v1, _mm256_broadcast_sd(reinterpret_cast(&c_general::highMaskDouble))); VectorType h2 = _mm256_and_pd(v2, _mm256_broadcast_sd(reinterpret_cast(&c_general::highMaskDouble))); #if defined(Vc_GCC) && Vc_GCC < 0x40703 // GCC before 4.7.3 uses an incorrect optimization where it replaces the subtraction with an andnot // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54703 asm("":"+x"(h1), "+x"(h2)); #endif const VectorType l1 = _mm256_sub_pd(v1, h1); const VectorType l2 = _mm256_sub_pd(v2, h2); const VectorType ll = mul(l1, l2); const VectorType lh = add(mul(l1, h2), mul(h1, l2)); const VectorType hh = mul(h1, h2); // ll < lh < hh for all entries is certain const VectorType lh_lt_v3 = cmplt_pd(abs(lh), abs(v3)); // |lh| < |v3| const VectorType b = _mm256_blendv_pd(v3, lh, lh_lt_v3); const VectorType c = _mm256_blendv_pd(lh, v3, lh_lt_v3); v1 = add(add(ll, b), add(c, hh)); #endif } static Vc_INTRINSIC VectorType Vc_CONST add(VTArg a, VTArg b) { return _mm256_add_pd(a,b); } static Vc_INTRINSIC VectorType Vc_CONST sub(VTArg a, VTArg b) { return _mm256_sub_pd(a,b); } static Vc_INTRINSIC VectorType Vc_CONST mul(VTArg a, VTArg b) { return _mm256_mul_pd(a,b); } Vc_OP1(sqrt) static Vc_ALWAYS_INLINE Vc_CONST VectorType rsqrt(VTArg x) { return _mm256_div_pd(one(), sqrt(x)); } static Vc_ALWAYS_INLINE Vc_CONST VectorType reciprocal(VTArg x) { return _mm256_div_pd(one(), x); } static Vc_ALWAYS_INLINE Vc_CONST VectorType abs(VTArg a) { return Vc_CAT2(_mm256_and_, Vc_SUFFIX)(a, setabsmask_pd()); } static Vc_INTRINSIC VectorType Vc_CONST min(VTArg a, VTArg b) { return _mm256_min_pd(a, b); } static Vc_INTRINSIC VectorType Vc_CONST max(VTArg a, VTArg b) { return _mm256_max_pd(a, b); } static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VTArg a) { __m128d b = _mm_min_pd(avx_cast<__m128d>(a), _mm256_extractf128_pd(a, 1)); b = _mm_min_sd(b, _mm_unpackhi_pd(b, b)); return _mm_cvtsd_f64(b); } static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VTArg a) { __m128d b = _mm_max_pd(avx_cast<__m128d>(a), _mm256_extractf128_pd(a, 1)); b = _mm_max_sd(b, _mm_unpackhi_pd(b, b)); return _mm_cvtsd_f64(b); } static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VTArg a) { __m128d b = _mm_mul_pd(avx_cast<__m128d>(a), _mm256_extractf128_pd(a, 1)); b = _mm_mul_sd(b, _mm_shuffle_pd(b, b, _MM_SHUFFLE2(0, 1))); return _mm_cvtsd_f64(b); } static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VTArg a) { __m128d b = _mm_add_pd(avx_cast<__m128d>(a), _mm256_extractf128_pd(a, 1)); b = _mm_hadd_pd(b, b); // or: b = _mm_add_sd(b, _mm256_shuffle_pd(b, b, _MM_SHUFFLE2(0, 1))); return _mm_cvtsd_f64(b); } #undef Vc_SUFFIX static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VTArg a) { return _mm256_round_pd(a, _MM_FROUND_NINT); } }; template<> struct VectorHelper { typedef float EntryType; typedef __m256 VectorType; typedef const VectorType VTArg; #define Vc_SUFFIX ps static Vc_ALWAYS_INLINE Vc_CONST VectorType notMaskedToZero(VTArg a, __m256 mask) { return Vc_CAT2(_mm256_and_, Vc_SUFFIX)(mask, a); } static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a) { return Vc_CAT2(_mm256_set1_, Vc_SUFFIX)(a); } static Vc_ALWAYS_INLINE Vc_CONST VectorType set(const float a, const float b, const float c, const float d, const float e, const float f, const float g, const float h) { return Vc_CAT2(_mm256_set_, Vc_SUFFIX)(a, b, c, d, e, f, g, h); } static Vc_ALWAYS_INLINE Vc_CONST VectorType zero() { return Vc_CAT2(_mm256_setzero_, Vc_SUFFIX)(); } static Vc_ALWAYS_INLINE Vc_CONST VectorType one() { return Vc_CAT2(setone_, Vc_SUFFIX)(); }// set(1.f); } static Vc_ALWAYS_INLINE Vc_CONST __m256 concat(__m256d a, __m256d b) { return _mm256_insertf128_ps(avx_cast<__m256>(_mm256_cvtpd_ps(a)), _mm256_cvtpd_ps(b), 1); } static inline void fma(VectorType &v1, VTArg v2, VTArg v3) { #ifdef Vc_IMPL_FMA4 v1 = _mm256_macc_ps(v1, v2, v3); #else __m256d v1_0 = _mm256_cvtps_pd(lo128(v1)); __m256d v1_1 = _mm256_cvtps_pd(hi128(v1)); __m256d v2_0 = _mm256_cvtps_pd(lo128(v2)); __m256d v2_1 = _mm256_cvtps_pd(hi128(v2)); __m256d v3_0 = _mm256_cvtps_pd(lo128(v3)); __m256d v3_1 = _mm256_cvtps_pd(hi128(v3)); v1 = AVX::concat( _mm256_cvtpd_ps(_mm256_add_pd(_mm256_mul_pd(v1_0, v2_0), v3_0)), _mm256_cvtpd_ps(_mm256_add_pd(_mm256_mul_pd(v1_1, v2_1), v3_1))); #endif } static Vc_INTRINSIC VectorType Vc_CONST add(VTArg a, VTArg b) { return _mm256_add_ps(a, b); } static Vc_INTRINSIC VectorType Vc_CONST sub(VTArg a, VTArg b) { return _mm256_sub_ps(a, b); } static Vc_INTRINSIC VectorType Vc_CONST mul(VTArg a, VTArg b) { return _mm256_mul_ps(a, b); } Vc_OP1(sqrt) Vc_OP1(rsqrt) static Vc_ALWAYS_INLINE Vc_CONST VectorType reciprocal(VTArg x) { return _mm256_rcp_ps(x); } static Vc_ALWAYS_INLINE Vc_CONST VectorType abs(VTArg a) { return Vc_CAT2(_mm256_and_, Vc_SUFFIX)(a, setabsmask_ps()); } static Vc_INTRINSIC VectorType Vc_CONST min(VTArg a, VTArg b) { return _mm256_min_ps(a, b); } static Vc_INTRINSIC VectorType Vc_CONST max(VTArg a, VTArg b) { return _mm256_max_ps(a, b); } static Vc_ALWAYS_INLINE Vc_CONST EntryType min(VTArg a) { __m128 b = _mm_min_ps(lo128(a), hi128(a)); b = _mm_min_ps(b, _mm_movehl_ps(b, b)); // b = min(a0, a2), min(a1, a3), min(a2, a2), min(a3, a3) b = _mm_min_ss(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1))); // b = min(a0, a1), a1, a2, a3 return _mm_cvtss_f32(b); } static Vc_ALWAYS_INLINE Vc_CONST EntryType max(VTArg a) { __m128 b = _mm_max_ps(avx_cast<__m128>(a), _mm256_extractf128_ps(a, 1)); b = _mm_max_ps(b, _mm_movehl_ps(b, b)); // b = max(a0, a2), max(a1, a3), max(a2, a2), max(a3, a3) b = _mm_max_ss(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1))); // b = max(a0, a1), a1, a2, a3 return _mm_cvtss_f32(b); } static Vc_ALWAYS_INLINE Vc_CONST EntryType mul(VTArg a) { __m128 b = _mm_mul_ps(avx_cast<__m128>(a), _mm256_extractf128_ps(a, 1)); b = _mm_mul_ps(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 1, 2, 3))); b = _mm_mul_ss(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 2, 0, 1))); return _mm_cvtss_f32(b); } static Vc_ALWAYS_INLINE Vc_CONST EntryType add(VTArg a) { __m128 b = _mm_add_ps(avx_cast<__m128>(a), _mm256_extractf128_ps(a, 1)); b = _mm_add_ps(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 1, 2, 3))); b = _mm_add_ss(b, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 2, 0, 1))); return _mm_cvtss_f32(b); } #undef Vc_SUFFIX static Vc_ALWAYS_INLINE Vc_CONST VectorType round(VTArg a) { return _mm256_round_ps(a, _MM_FROUND_NINT); } }; #undef Vc_OP1 #undef Vc_OP #undef Vc_OP_ #undef Vc_OPx } // namespace AVX(2) } // namespace Vc #endif // VC_AVX_VECTORHELPER_H_ conky-1.22.1/3rdparty/Vc/Vc/common/000077500000000000000000000000001476554302100167075ustar00rootroot00000000000000conky-1.22.1/3rdparty/Vc/Vc/common/algorithms.h000066400000000000000000000137671476554302100212470ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2013-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_ALGORITHMS_H_ #define VC_COMMON_ALGORITHMS_H_ #include "simdize.h" namespace Vc_VERSIONED_NAMESPACE { #ifdef DOXYGEN /** * \ingroup Utilities * \headerfile algorithms.h * * Vc variant of the `std::for_each` algorithm. * * This algorithm calls \p f with one argument of type * `Vc::Vector<` *iterator value type* `, ` *unspecified* `>` as often as is needed to * iterate over the complete range from \p first to \p last. * It will try to use the best vector size (VectorAbi) to work on the largest chunks * possible. * To support aligned loads (and stores) and to support arbitrary range distances, the * algorithm may require the use of `Vc::VectorAbi` types that work on fewer elements in * parallel. * * The following example requires C++14 for generic lambdas. If you don't have generic * lambdas available you can use a "classic" functor type with a templated call operator * instead. * * \code * void scale(std::vector &data, double factor) { * Vc::simd_for_each(data.begin(), data.end(), [&](auto v) { * v *= factor; * }); * } * \endcode */ template UnaryFunction simd_for_each(InputIt first, InputIt last, UnaryFunction f); #else template ::value_type> inline enable_if< Traits::is_functor_argument_immutable>::value, UnaryFunction> simd_for_each(InputIt first, InputIt last, UnaryFunction f) { typedef simdize V; typedef simdize V1; const auto lastV = last - V::Size + 1; for (; first < lastV; first += V::Size) { V tmp; load_interleaved(tmp, std::addressof(*first)); f(tmp); } for (; first != last; ++first) { V1 tmp; load_interleaved(tmp, std::addressof(*first)); f(tmp); } return f; } template ::value_type> inline enable_if< !Traits::is_functor_argument_immutable>::value, UnaryFunction> simd_for_each(InputIt first, InputIt last, UnaryFunction f) { typedef simdize V; typedef simdize V1; const auto lastV = last - V::size() + 1; for (; first < lastV; first += V::size()) { V tmp; load_interleaved(tmp, std::addressof(*first)); f(tmp); store_interleaved(tmp, std::addressof(*first)); } for (; first != last; ++first) { V1 tmp; load_interleaved(tmp, std::addressof(*first)); f(tmp); store_interleaved(tmp, std::addressof(*first)); } return f; } #endif /////////////////////////////////////////////////////////////////////////////// template ::value_type> inline enable_if< Traits::is_functor_argument_immutable>::value, UnaryFunction> simd_for_each_n(InputIt first, std::size_t count, UnaryFunction f) { typename std::make_signed::type len = count; typedef simdize V; typedef simdize V1; for (; len >= int(V::size()); len -= V::Size, first += V::Size) { V tmp; load_interleaved(tmp, std::addressof(*first)); f(tmp); } for (; len != 0; --len, ++first) { V1 tmp; load_interleaved(tmp, std::addressof(*first)); f(tmp); } return f; } template ::value_type> inline enable_if< !Traits::is_functor_argument_immutable>::value, UnaryFunction> simd_for_each_n(InputIt first, std::size_t count, UnaryFunction f) { typename std::make_signed::type len = count; typedef simdize V; typedef simdize V1; for (; len >= int(V::size()); len -= V::Size, first += V::Size) { V tmp; load_interleaved(tmp, std::addressof(*first)); f(tmp); store_interleaved(tmp, std::addressof(*first)); } for (; len != 0; --len, ++first) { V1 tmp; load_interleaved(tmp, std::addressof(*first)); f(tmp); store_interleaved(tmp, std::addressof(*first)); } return f; } } // namespace Vc #endif // VC_COMMON_ALGORITHMS_H_ conky-1.22.1/3rdparty/Vc/Vc/common/aliasingentryhelper.h000066400000000000000000000160771476554302100231440ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2010-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_ALIASINGENTRYHELPER_H_ #define VC_COMMON_ALIASINGENTRYHELPER_H_ #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace Common { template class AliasingEntryHelper { private: typedef typename StorageType::EntryType T; #ifdef Vc_ICC StorageType *const m_storage; const int m_index; public: Vc_ALWAYS_INLINE AliasingEntryHelper(StorageType *d, int index) : m_storage(d), m_index(index) {} Vc_ALWAYS_INLINE AliasingEntryHelper(const AliasingEntryHelper &) = default; Vc_ALWAYS_INLINE AliasingEntryHelper(AliasingEntryHelper &&) = default; Vc_ALWAYS_INLINE AliasingEntryHelper &operator=(const AliasingEntryHelper &rhs) { m_storage->assign(m_index, rhs); return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator =(T x) { m_storage->assign(m_index, x); return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator +=(T x) { m_storage->assign(m_index, m_storage->m(m_index) + x); return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator -=(T x) { m_storage->assign(m_index, m_storage->m(m_index) - x); return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator /=(T x) { m_storage->assign(m_index, m_storage->m(m_index) / x); return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator *=(T x) { m_storage->assign(m_index, m_storage->m(m_index) * x); return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator |=(T x) { m_storage->assign(m_index, m_storage->m(m_index) | x); return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator &=(T x) { m_storage->assign(m_index, m_storage->m(m_index) & x); return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator ^=(T x) { m_storage->assign(m_index, m_storage->m(m_index) ^ x); return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator %=(T x) { m_storage->assign(m_index, m_storage->m(m_index) % x); return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator<<=(T x) { m_storage->assign(m_index, m_storage->m(m_index)<< x); return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator>>=(T x) { m_storage->assign(m_index, m_storage->m(m_index)>> x); return *this; } #define m_data m_storage->read(m_index) #else typedef T A Vc_MAY_ALIAS; A &m_data; public: template Vc_ALWAYS_INLINE AliasingEntryHelper(T2 &d) : m_data(reinterpret_cast(d)) {} Vc_ALWAYS_INLINE AliasingEntryHelper(A &d) : m_data(d) {} Vc_ALWAYS_INLINE AliasingEntryHelper &operator=(const AliasingEntryHelper &rhs) { m_data = rhs.m_data; return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator =(T x) { m_data = x; return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator+=(T x) { m_data += x; return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator-=(T x) { m_data -= x; return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator/=(T x) { m_data /= x; return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator*=(T x) { m_data *= x; return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator|=(T x) { m_data |= x; return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator&=(T x) { m_data &= x; return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator^=(T x) { m_data ^= x; return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator%=(T x) { m_data %= x; return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator<<=(T x) { m_data <<= x; return *this; } Vc_ALWAYS_INLINE AliasingEntryHelper &operator>>=(T x) { m_data >>= x; return *this; } #endif Vc_ALWAYS_INLINE Vc_PURE operator const T() const { return m_data; } Vc_ALWAYS_INLINE Vc_PURE bool operator==(T x) const { return static_cast(m_data) == x; } Vc_ALWAYS_INLINE Vc_PURE bool operator!=(T x) const { return static_cast(m_data) != x; } Vc_ALWAYS_INLINE Vc_PURE bool operator<=(T x) const { return static_cast(m_data) <= x; } Vc_ALWAYS_INLINE Vc_PURE bool operator>=(T x) const { return static_cast(m_data) >= x; } Vc_ALWAYS_INLINE Vc_PURE bool operator< (T x) const { return static_cast(m_data) < x; } Vc_ALWAYS_INLINE Vc_PURE bool operator> (T x) const { return static_cast(m_data) > x; } Vc_ALWAYS_INLINE Vc_PURE T operator-() const { return -static_cast(m_data); } Vc_ALWAYS_INLINE Vc_PURE T operator~() const { return ~static_cast(m_data); } Vc_ALWAYS_INLINE Vc_PURE T operator+(T x) const { return static_cast(m_data) + x; } Vc_ALWAYS_INLINE Vc_PURE T operator-(T x) const { return static_cast(m_data) - x; } Vc_ALWAYS_INLINE Vc_PURE T operator/(T x) const { return static_cast(m_data) / x; } Vc_ALWAYS_INLINE Vc_PURE T operator*(T x) const { return static_cast(m_data) * x; } Vc_ALWAYS_INLINE Vc_PURE T operator|(T x) const { return static_cast(m_data) | x; } Vc_ALWAYS_INLINE Vc_PURE T operator&(T x) const { return static_cast(m_data) & x; } Vc_ALWAYS_INLINE Vc_PURE T operator^(T x) const { return static_cast(m_data) ^ x; } Vc_ALWAYS_INLINE Vc_PURE T operator%(T x) const { return static_cast(m_data) % x; } //T operator<<(T x) const { return static_cast(m_data) << x; } //T operator>>(T x) const { return static_cast(m_data) >> x; } #ifdef m_data #undef m_data #endif }; } // namespace Common } // namespace Vc #endif // VC_COMMON_ALIASINGENTRYHELPER_H_ conky-1.22.1/3rdparty/Vc/Vc/common/alignedbase.h000066400000000000000000000120441476554302100213170ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_ALIGNEDBASE_H_ #define VC_COMMON_ALIGNEDBASE_H_ #include "types.h" #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace Detail { /**\internal * Break the recursion of the function below. */ template constexpr T max(T a) { return a; } /**\internal * \returns the maximum of all specified arguments. */ template constexpr T max(T a, T b, Ts... rest) { return a > b ? max(a, rest...) : max(b, rest...); } } // namespace Detail namespace Common { template Vc_INTRINSIC void *aligned_malloc(std::size_t); Vc_ALWAYS_INLINE void free(void *); } // namespace Common /** * \ingroup Utilities * * Helper class to ensure a given alignment. * * This class reimplements the \c new and \c delete operators to align objects allocated * on the heap suitably with the specified alignment \c Alignment. * * \see Vc::VectorAlignedBase * \see Vc::MemoryAlignedBase */ template struct alignas(Alignment) AlignedBase { Vc_FREE_STORE_OPERATORS_ALIGNED(Alignment); }; /** * \ingroup Utilities * * Helper type to ensure suitable alignment for any Vc::Vector type (using the default * VectorAbi). * * This class reimplements the \c new and \c delete operators to align objects allocated * on the heap suitably for objects of Vc::Vector type. This is necessary since the * standard \c new operator does not adhere to the alignment requirements of the type. * * \see Vc::VectorAlignedBaseT * \see Vc::MemoryAlignedBase * \see Vc::AlignedBase */ using VectorAlignedBase = AlignedBase< Detail::max(alignof(Vector), alignof(Vector), alignof(Vector), alignof(Vector), alignof(Vector), alignof(Vector), alignof(Vector), alignof(Vector), alignof(Vector), alignof(Vector), alignof(Vector), alignof(Vector))>; /** * \ingroup Utilities * Variant of the above type ensuring suitable alignment only for the specified vector * type \p V. * * \see Vc::VectorAlignedBase * \see Vc::MemoryAlignedBaseT */ template using VectorAlignedBaseT = AlignedBase; /** * \ingroup Utilities * * Helper class to ensure suitable alignment for arrays of scalar objects for any * Vc::Vector type (using the default VectorAbi). * * This class reimplements the \c new and \c delete operators to align objects allocated * on the heap suitably for arrays of type \p Vc::Vector::EntryType. Subsequent load * and store operations are safe to use the aligned variant. * * \see Vc::MemoryAlignedBaseT * \see Vc::VectorAlignedBase * \see Vc::AlignedBase */ using MemoryAlignedBase = AlignedBase< Detail::max(Vector::MemoryAlignment, Vector::MemoryAlignment, Vector::MemoryAlignment, Vector::MemoryAlignment, Vector::MemoryAlignment, Vector::MemoryAlignment, Vector::MemoryAlignment, Vector::MemoryAlignment, Vector::MemoryAlignment, Vector::MemoryAlignment, Vector::MemoryAlignment, Vector::MemoryAlignment)>; /** * \ingroup Utilities * Variant of the above type ensuring suitable alignment only for the specified vector * type \p V. * * \see Vc::MemoryAlignedBase * \see Vc::VectorAlignedBaseT */ template using MemoryAlignedBaseT = AlignedBase; } #endif // VC_COMMON_ALIGNEDBASE_H_ // vim: foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/common/bitscanintrinsics.h000066400000000000000000000050021476554302100226060ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2011-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_BITSCANINTRINSICS_H_ #define VC_COMMON_BITSCANINTRINSICS_H_ #if defined(Vc_GCC) || defined(Vc_CLANG) || defined(Vc_APPLECLANG) #include # ifndef _bit_scan_forward # define _bit_scan_forward(x) __builtin_ctz(x) #include "macros.h" static Vc_ALWAYS_INLINE Vc_CONST int _Vc_bit_scan_reverse_asm(unsigned int x) { int r; __asm__("bsr %1,%0" : "=r"(r) : "X"(x)); return r; } # define _bit_scan_reverse(x) _Vc_bit_scan_reverse_asm(x) # endif #elif defined(_WIN32) #include static inline __forceinline unsigned long _bit_scan_forward(unsigned long x) { unsigned long index; _BitScanForward(&index, x); return index; } static inline __forceinline unsigned long _bit_scan_reverse(unsigned long x) { unsigned long index; _BitScanReverse(&index, x); return index; } #elif defined(Vc_ICC) // for all I know ICC supports the _bit_scan_* intrinsics #else // just assume the compiler can do it #endif #endif // VC_COMMON_BITSCANINTRINSICS_H_ conky-1.22.1/3rdparty/Vc/Vc/common/const.h000066400000000000000000000071461476554302100202160ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2013-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_CONST_H_ #define VC_COMMON_CONST_H_ #include #include "../global.h" namespace Vc_VERSIONED_NAMESPACE { namespace Detail { template constexpr double exponentToFloat(std::integral_constant); template constexpr double exponentToFloat(std::integral_constant); template <> constexpr double exponentToFloat<0>(std::integral_constant) { return 1.; } template <> constexpr double exponentToFloat<0>(std::integral_constant) { return 1.; } template <> constexpr double exponentToFloat<-32>(std::integral_constant) { return 1. / (65536. * 65536.); } template <> constexpr double exponentToFloat<32>(std::integral_constant) { return 65536. * 65536.; } template <> constexpr double exponentToFloat<-64>(std::integral_constant) { return 1. / (65536. * 65536. * 65536. * 65536.); } template <> constexpr double exponentToFloat<64>(std::integral_constant) { return 65536. * 65536. * 65536. * 65536.; } template constexpr double exponentToFloat(std::integral_constant negative) { return exponentToFloat(negative) * 2.0; } template constexpr double exponentToFloat(std::integral_constant negative) { return exponentToFloat(negative) * 0.5; } template constexpr double doubleConstant() { return (static_cast((mantissa & 0x000fffffffffffffull) | 0x0010000000000000ull) / 0x0010000000000000ull) * exponentToFloat(std::integral_constant()) * sign; } template constexpr float floatConstant() { return (static_cast((mantissa & 0x007fffffu) | 0x00800000u) / 0x00800000u) * static_cast( exponentToFloat(std::integral_constant())) * sign; } } // namespace Detail } // namespace Vc #endif // VC_COMMON_CONST_H_ conky-1.22.1/3rdparty/Vc/Vc/common/data.h000066400000000000000000000035311476554302100177730ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2013-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_CONST_DATA_H_ #define VC_COMMON_CONST_DATA_H_ #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace Common { alignas(64) extern unsigned int RandomState[]; alignas(32) extern const unsigned int AllBitsSet[8]; } // namespace Common } // namespace Vc #endif // VC_COMMON_CONST_DATA_H_ conky-1.22.1/3rdparty/Vc/Vc/common/deinterleave.h000066400000000000000000000072621476554302100215360ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2010-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_DEINTERLEAVE_H_ #define VC_COMMON_DEINTERLEAVE_H_ #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { /** * \ingroup Vectors * * \deprecated Turn to InterleavedMemoryWrapper for a more flexible and complete solution. * * Loads two vectors of values from an interleaved array. * * \param a, b The vectors to load the values from memory into. * \param memory The memory location where to read the next 2 * V::Size values from * \param align Either pass Vc::Aligned or Vc::Unaligned. It defaults to Vc::Aligned if nothing is * specified. * * If you store your data as * \code * struct { float x, y; } m[1000]; * \endcode * then the deinterleave function allows you to read \p Size concurrent x and y values like this: * \code * Vc::float_v x, y; * Vc::deinterleave(&x, &y, &m[10], Vc::Unaligned); * \endcode * This code will load m[10], m[12], m[14], ... into \p x and m[11], m[13], m[15], ... into \p y. * * The deinterleave function supports the following type combinations: \verbatim V \ M | float | double | ushort | short | uint | int =========|=======|========|========|=======|======|===== float_v | X | | X | X | | ---------|-------|--------|--------|-------|------|----- double_v | | X | | | | ---------|-------|--------|--------|-------|------|----- int_v | | | | X | | X ---------|-------|--------|--------|-------|------|----- uint_v | | | X | | X | ---------|-------|--------|--------|-------|------|----- short_v | | | | X | | ---------|-------|--------|--------|-------|------|----- ushort_v | | | X | | | \endverbatim */ template Vc_ALWAYS_INLINE void deinterleave(V *a, V *b, const M *memory, A align) { Detail::deinterleave(*a, *b, memory, align); } // documented as default for align above template Vc_ALWAYS_INLINE void deinterleave(V *a, V *b, const M *memory) { Detail::deinterleave(*a, *b, memory, Aligned); } } // namespace Vc #endif // VC_COMMON_DEINTERLEAVE_H_ conky-1.22.1/3rdparty/Vc/Vc/common/detail.h000066400000000000000000000125411476554302100203250ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2018 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_DETAIL_H_ #define VC_COMMON_DETAIL_H_ #include #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace Common { // convertIndexVector {{{ // if the argument is a Vector already we definitely want to keep it that way template Vc_INTRINSIC enable_if<(Traits::is_simd_vector::value && sizeof(typename IV::EntryType) >= sizeof(int)), const IV &> convertIndexVector(const IV &indexVector) { return indexVector; } // but if the scalar (integral) type is smaller than int we convert it up to int. Otherwise it's // very likely that the calculations we have to perform will overflow. template Vc_INTRINSIC enable_if<(Traits::is_simd_vector::value && sizeof(typename IV::EntryType) < sizeof(int)), fixed_size_simd> convertIndexVector(const IV &indexVector) { return static_cast>(indexVector); } // helper for promoting int types to int or higher template using promoted_type = decltype(std::declval() + 1); // std::array, Vc::array, and C-array are fixed size and can therefore be converted to a // fixed_size_simd of the same size template Vc_INTRINSIC enable_if::value, fixed_size_simd, N>> convertIndexVector(const std::array &indexVector) { return fixed_size_simd, N>{std::addressof(indexVector[0]), Vc::Unaligned}; } template Vc_INTRINSIC enable_if::value, fixed_size_simd, N>> convertIndexVector(const Vc::array &indexVector) { return fixed_size_simd, N>{std::addressof(indexVector[0]), Vc::Unaligned}; } template Vc_INTRINSIC enable_if::value, fixed_size_simd, N>> convertIndexVector(const T (&indexVector)[N]) { return fixed_size_simd, N>{std::addressof(indexVector[0]), Vc::Unaligned}; } // a plain pointer won't work. Because we need some information on the number of values in // the index argument #ifndef Vc_MSVC // MSVC treats the function as usable in SFINAE context if it is deleted. If it's not declared we // seem to get what we wanted (except for bad diagnostics) template enable_if::value, void> convertIndexVector(T indexVector) = delete; #endif // an initializer_list works, but is runtime-sized (before C++14, at least) so we have to // fall back to std::vector template Vc_INTRINSIC std::vector> convertIndexVector( const std::initializer_list &indexVector) { return {begin(indexVector), end(indexVector)}; } // a std::vector cannot be converted to anything better template Vc_INTRINSIC enable_if<(std::is_integral::value && sizeof(T) >= sizeof(int)), std::vector> convertIndexVector(const std::vector &indexVector) { return indexVector; } template Vc_INTRINSIC enable_if<(std::is_integral::value && sizeof(T) < sizeof(int)), std::vector>> convertIndexVector(const std::vector &indexVector) { return {std::begin(indexVector), std::end(indexVector)}; } template ::value && !Traits::is_simd_vector::value && !std::is_lvalue_reference()[0])>::value)>> Vc_INTRINSIC const T &convertIndexVector(const T &i) { return i; } // }}} } // namespace Common } // namespace Vc_VERSIONED_NAMESPACE #endif // VC_COMMON_DETAIL_H_ // vim: foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/common/elementreference.h000066400000000000000000000153751476554302100224030ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2016 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_ELEMENTREFERENCE_H_ #define VC_COMMON_ELEMENTREFERENCE_H_ #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace Detail { template class ElementReference { friend U; friend Accessor; Vc_INTRINSIC ElementReference(U &o, int i) noexcept : index(i), obj(o) {} static constexpr bool get_noexcept = noexcept(Accessor::get(std::declval(), int())); template static constexpr bool set_noexcept() { return noexcept(Accessor::set(std::declval(), int(), std::declval())); } public: using value_type = typename U::value_type; Vc_INTRINSIC ElementReference(const ElementReference &) = delete; /** * Move Constructor * * this is the only way to constructor an ElementReference in user code * * \note * Please be aware that this class models the concept of a reference * and as such it can have the same lifetime issue as a standard C++ * reference. * * \note * C++ 17 support copy-elision, which in turn allows to * the ElementReference obtained via operator[] from a function * and avoid copying. C++11 and C++14 don't offer this, thus we add * the move constructor, to allow them to move the data and thus avoid * copying (which was prohibited by the deleted constructor above */ Vc_INTRINSIC ElementReference(ElementReference &&) = default; Vc_INTRINSIC operator value_type() const noexcept(get_noexcept) { return Accessor::get(obj, index); } template Vc_INTRINSIC ElementReference &operator=(T &&x) && noexcept(noexcept(Accessor::set(std::declval(), int(), std::declval()))) { Accessor::set(obj, index, std::forward(x)); return *this; } // TODO: improve with operator.() #define Vc_OP_(op_) \ template () \ op_ std::declval())> \ Vc_INTRINSIC ElementReference &operator op_##=(T &&x) && \ noexcept(get_noexcept && noexcept(Accessor::set(std::declval(), int(), \ std::declval()))) \ { \ const value_type &lhs = Accessor::get(obj, index); \ Accessor::set(obj, index, lhs op_ std::forward(x)); \ return *this; \ } Vc_ALL_ARITHMETICS(Vc_OP_); Vc_ALL_SHIFTS(Vc_OP_); Vc_ALL_BINARY(Vc_OP_); #undef Vc_OP_ template Vc_INTRINSIC ElementReference &operator++() && noexcept(noexcept(std::declval() = Accessor::get(std::declval(), int())) && set_noexcept())>()) { value_type x = Accessor::get(obj, index); Accessor::set(obj, index, ++x); return *this; } template Vc_INTRINSIC value_type operator++(int) && noexcept(noexcept(std::declval() = Accessor::get(std::declval(), int())) && set_noexcept()++)>()) { const value_type r = Accessor::get(obj, index); value_type x = r; Accessor::set(obj, index, ++x); return r; } template Vc_INTRINSIC ElementReference &operator--() && noexcept(noexcept(std::declval() = Accessor::get(std::declval(), int())) && set_noexcept())>()) { value_type x = Accessor::get(obj, index); Accessor::set(obj, index, --x); return *this; } template Vc_INTRINSIC value_type operator--(int) && noexcept(noexcept(std::declval() = Accessor::get(std::declval(), int())) && set_noexcept()--)>()) { const value_type r = Accessor::get(obj, index); value_type x = r; Accessor::set(obj, index, --x); return r; } friend void swap(ElementReference &&a, ElementReference &&b) { value_type tmp(a); static_cast(a) = static_cast(b); static_cast(b) = tmp; } friend void swap(value_type &a, ElementReference &&b) { value_type tmp(a); a = static_cast(b); static_cast(b) = tmp; } friend void swap(ElementReference &&a, value_type &b) { value_type tmp(a); static_cast(a) = b; b = tmp; } private: int index; U &obj; }; } // namespace Detail } // namespace Vc #endif // VC_COMMON_ELEMENTREFERENCE_H_ // vim: foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/common/exponential.h000066400000000000000000000075421476554302100214160ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2012-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------- The exp implementation is derived from Cephes, which carries the following Copyright notice: Cephes Math Library Release 2.2: June, 1992 Copyright 1984, 1987, 1989 by Stephen L. Moshier Direct inquiries to 30 Frost Street, Cambridge, MA 02140 }}}*/ #ifdef Vc_COMMON_MATH_H_INTERNAL constexpr float log2_e = 1.44269504088896341f; // These constants are adjusted to account for single-precision floating point. // The original are for double precision: // // constexpr float MAXLOGF = 88.72283905206835f; // constexpr float MINLOGF = -103.278929903431851103f; /* log(2^-149) */ constexpr float MAXLOGF = 88.722831726074219f; /* log(2^127.99998474121094f) */ constexpr float MINLOGF = -88.029685974121094f; /* log(2^-126.99999237060547f) */ constexpr float MAXNUMF = 3.4028234663852885981170418348451692544e38f; template ::value || std::is_same::value>> inline Vector> exp(Vector x) { using V = Vector; typedef typename V::Mask M; typedef Detail::Const C; const M overflow = x > MAXLOGF; const M underflow = x < MINLOGF; // log₂(eˣ) = x * log₂(e) * log₂(2) // = log₂(2^(x * log₂(e))) // => eˣ = 2^(x * log₂(e)) // => n = ⌊x * log₂(e) + ½⌋ // => y = x - n * ln(2) | recall that: ln(2) * log₂(e) == 1 // <=> eˣ = 2ⁿ * eʸ V z = floor(C::log2_e() * x + 0.5f); const auto n = static_cast>(z); x -= z * C::ln2_large(); x -= z * C::ln2_small(); /* Theoretical peak relative error in [-0.5, +0.5] is 4.2e-9. */ z = ((((( 1.9875691500E-4f * x + 1.3981999507E-3f) * x + 8.3334519073E-3f) * x + 4.1665795894E-2f) * x + 1.6666665459E-1f) * x + 5.0000001201E-1f) * (x * x) + x + 1.0f; x = ldexp(z, n); // == z * 2ⁿ x(overflow) = std::numeric_limits::infinity(); x.setZero(underflow); return x; } #endif // Vc_COMMON_MATH_H_INTERNAL conky-1.22.1/3rdparty/Vc/Vc/common/fix_clang_emmintrin.h000066400000000000000000000061121476554302100230740ustar00rootroot00000000000000/*{{{ Copyright (C) 2013-2015 Matthias Kretz Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the name of the author not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. The author disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall the author be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. }}}*/ #ifndef VC_COMMON_FIX_CLANG_EMMINTRIN_H_ #define VC_COMMON_FIX_CLANG_EMMINTRIN_H_ #include "../global.h" #if (defined Vc_CLANG && Vc_CLANG < 0x30700) || (defined Vc_APPLECLANG && Vc_APPLECLANG < 0x70000) #ifdef _mm_slli_si128 #undef _mm_slli_si128 #define _mm_slli_si128(a, count) __extension__ ({ \ (__m128i)__builtin_ia32_pslldqi128((__m128i)(a), (count)*8); }) #endif #ifdef _mm_srli_si128 #undef _mm_srli_si128 #define _mm_srli_si128(a, count) __extension__ ({ \ (__m128i)__builtin_ia32_psrldqi128((__m128i)(a), (count)*8); }) #endif #ifdef _mm_shuffle_epi32 #undef _mm_shuffle_epi32 #define _mm_shuffle_epi32(a, imm) __extension__ ({ \ (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), (__v4si) _mm_set1_epi32(0), \ (imm) & 0x3, ((imm) & 0xc) >> 2, \ ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); }) #endif #ifdef _mm_shufflelo_epi16 #undef _mm_shufflelo_epi16 #define _mm_shufflelo_epi16(a, imm) __extension__ ({ \ (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), (__v8hi) _mm_set1_epi16(0), \ (imm) & 0x3, ((imm) & 0xc) >> 2, \ ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ 4, 5, 6, 7); }) #endif #ifdef _mm_shufflehi_epi16 #undef _mm_shufflehi_epi16 #define _mm_shufflehi_epi16(a, imm) __extension__ ({ \ (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), (__v8hi) _mm_set1_epi16(0), \ 0, 1, 2, 3, \ 4 + (((imm) & 0x03) >> 0), \ 4 + (((imm) & 0x0c) >> 2), \ 4 + (((imm) & 0x30) >> 4), \ 4 + (((imm) & 0xc0) >> 6)); }) #endif #ifdef _mm_shuffle_pd #undef _mm_shuffle_pd #define _mm_shuffle_pd(a, b, i) __extension__ ({ \ __builtin_shufflevector((__m128d)(a), (__m128d)(b), (i) & 1, (((i) & 2) >> 1) + 2); }) #endif #endif // Vc_CLANG || Vc_APPLECLANG #endif // VC_COMMON_FIX_CLANG_EMMINTRIN_H_ conky-1.22.1/3rdparty/Vc/Vc/common/gatherimplementation.h000066400000000000000000000245171476554302100233110ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2014-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_GATHERIMPLEMENTATION_H_ #define VC_COMMON_GATHERIMPLEMENTATION_H_ #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace Common { enum class GatherScatterImplementation : int { SimpleLoop, SetIndexZero, BitScanLoop, PopcntSwitch }; using SimpleLoopT = std::integral_constant; using SetIndexZeroT = std::integral_constant; using BitScanLoopT = std::integral_constant; using PopcntSwitchT = std::integral_constant; template Vc_ALWAYS_INLINE void executeGather(SetIndexZeroT, V &v, const MT *mem, IT &&indexes_, typename V::MaskArgument mask) { auto indexes = std::forward(indexes_); indexes.setZeroInverted(static_cast(mask)); const V tmp(mem, indexes); where(mask) | v = tmp; } template Vc_ALWAYS_INLINE void executeGather(SimpleLoopT, V &v, const MT *mem, const IT &indexes, const typename V::MaskArgument mask) { if (Vc_IS_UNLIKELY(mask.isEmpty())) { return; } #if defined Vc_GCC && Vc_GCC >= 0x40900 // GCC 4.8 doesn't support dependent type and constexpr vector_size argument constexpr std::size_t Sizeof = sizeof(V); using Builtin [[gnu::vector_size(Sizeof)]] = typename V::value_type; Builtin tmp = reinterpret_cast(v.data()); Common::unrolled_loop([&](std::size_t i) { if (mask[i]) { tmp[i] = mem[indexes[i]]; } }); v.data() = reinterpret_cast(tmp); #else Common::unrolled_loop([&](std::size_t i) { if (mask[i]) v[i] = mem[indexes[i]]; }); #endif } template Vc_ALWAYS_INLINE void executeGather(BitScanLoopT, V &v, const MT *mem, const IT &indexes, typename V::MaskArgument mask) { #ifdef Vc_GNU_ASM size_t bits = mask.toInt(); while (Vc_IS_LIKELY(bits > 0)) { size_t i, j; asm("bsf %[bits],%[i]\n\t" "bsr %[bits],%[j]\n\t" "btr %[i],%[bits]\n\t" "btr %[j],%[bits]\n\t" : [i] "=r"(i), [j] "=r"(j), [bits] "+r"(bits)); v[i] = mem[indexes[i]]; v[j] = mem[indexes[j]]; } #else // Alternative from Vc::SSE (0.7) int bits = mask.toInt(); while (bits) { const int i = _bit_scan_forward(bits); bits &= bits - 1; v[i] = mem[indexes[i]]; } #endif // Vc_GNU_ASM } template Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT, V &v, const MT *mem, const IT &indexes, typename V::MaskArgument mask, enable_if = nullarg) { unsigned int bits = mask.toInt(); unsigned int low, high = 0; switch (Vc::Detail::popcnt16(bits)) { case 16: v.gather(mem, indexes); break; case 15: low = _bit_scan_forward(bits); bits ^= 1 << low; v[low] = mem[indexes[low]]; // fallthrough case 14: high = _bit_scan_reverse(bits); v[high] = mem[indexes[high]]; high = (1 << high); // fallthrough case 13: low = _bit_scan_forward(bits); bits ^= high | (1 << low); v[low] = mem[indexes[low]]; // fallthrough case 12: high = _bit_scan_reverse(bits); v[high] = mem[indexes[high]]; high = (1 << high); // fallthrough case 11: low = _bit_scan_forward(bits); bits ^= high | (1 << low); v[low] = mem[indexes[low]]; // fallthrough case 10: high = _bit_scan_reverse(bits); v[high] = mem[indexes[high]]; high = (1 << high); // fallthrough case 9: low = _bit_scan_forward(bits); bits ^= high | (1 << low); v[low] = mem[indexes[low]]; // fallthrough case 8: high = _bit_scan_reverse(bits); v[high] = mem[indexes[high]]; high = (1 << high); // fallthrough case 7: low = _bit_scan_forward(bits); bits ^= high | (1 << low); v[low] = mem[indexes[low]]; // fallthrough case 6: high = _bit_scan_reverse(bits); v[high] = mem[indexes[high]]; high = (1 << high); // fallthrough case 5: low = _bit_scan_forward(bits); bits ^= high | (1 << low); v[low] = mem[indexes[low]]; // fallthrough case 4: high = _bit_scan_reverse(bits); v[high] = mem[indexes[high]]; high = (1 << high); // fallthrough case 3: low = _bit_scan_forward(bits); bits ^= high | (1 << low); v[low] = mem[indexes[low]]; // fallthrough case 2: high = _bit_scan_reverse(bits); v[high] = mem[indexes[high]]; // fallthrough case 1: low = _bit_scan_forward(bits); v[low] = mem[indexes[low]]; // fallthrough case 0: break; } } template Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT, V &v, const MT *mem, const IT &indexes, typename V::MaskArgument mask, enable_if = nullarg) { unsigned int bits = mask.toInt(); unsigned int low, high = 0; switch (Vc::Detail::popcnt8(bits)) { case 8: v.gather(mem, indexes); break; case 7: low = _bit_scan_forward(bits); bits ^= 1 << low; v[low] = mem[indexes[low]]; // fallthrough case 6: high = _bit_scan_reverse(bits); v[high] = mem[indexes[high]]; high = (1 << high); // fallthrough case 5: low = _bit_scan_forward(bits); bits ^= high | (1 << low); v[low] = mem[indexes[low]]; // fallthrough case 4: high = _bit_scan_reverse(bits); v[high] = mem[indexes[high]]; high = (1 << high); // fallthrough case 3: low = _bit_scan_forward(bits); bits ^= high | (1 << low); v[low] = mem[indexes[low]]; // fallthrough case 2: high = _bit_scan_reverse(bits); v[high] = mem[indexes[high]]; // fallthrough case 1: low = _bit_scan_forward(bits); v[low] = mem[indexes[low]]; // fallthrough case 0: break; } } template Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT, V &v, const MT *mem, const IT &indexes, typename V::MaskArgument mask, enable_if = nullarg) { unsigned int bits = mask.toInt(); unsigned int low, high = 0; switch (Vc::Detail::popcnt4(bits)) { case 4: v.gather(mem, indexes); break; case 3: low = _bit_scan_forward(bits); bits ^= 1 << low; v[low] = mem[indexes[low]]; // fallthrough case 2: high = _bit_scan_reverse(bits); v[high] = mem[indexes[high]]; // fallthrough case 1: low = _bit_scan_forward(bits); v[low] = mem[indexes[low]]; // fallthrough case 0: break; } } template Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT, V &v, const MT *mem, const IT &indexes, typename V::MaskArgument mask, enable_if = nullarg) { unsigned int bits = mask.toInt(); unsigned int low; switch (Vc::Detail::popcnt4(bits)) { case 2: v.gather(mem, indexes); break; case 1: low = _bit_scan_forward(bits); v[low] = mem[indexes[low]]; // fallthrough case 0: break; } } } // namespace Common } // namespace Vc #endif // VC_COMMON_GATHERIMPLEMENTATION_H_ conky-1.22.1/3rdparty/Vc/Vc/common/gatherinterface.h000066400000000000000000000250121476554302100222130ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2014-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef Vc_CURRENT_CLASS_NAME #error "incorrect use of common/gatherinterface.h: Vc_CURRENT_CLASS_NAME must be defined to the current class name for declaring constructors." #endif /////////////////////////////////////////////////////////////////////////////////////////// // gathers // A gather takes the following arguments: // 1. A const pointer to memory of any type that can convert to EntryType // 2. An indexes “vector”. The requirement is that the type implements the subscript operator, // stores «Size» valid index values, and each offset to the pointer above yields a valid // memory location for reading. // 3. Optionally the third argument may be a mask. The mask disables several memory reads and // thus removes the requirements in (2.) for the disabled entries. private: /**\internal * This function implements a gather given a pointer to memory \p mem and some * container object storing the gather \p indexes. * * \param mem This pointer must be aligned correctly for the type \p MT. This is the * natural behavior of C++, so this is typically the case. * \param indexes This object contains at least \VSize{T} indexes that denote the * offset in \p mem where the components for the current vector should be copied from. * The offset is not in Bytes, but in multiples of `sizeof(MT)`. */ // enable_if::value && // has_subscript_operator::value> template inline void gatherImplementation(const Common::GatherArguments &); /**\internal * This overload of the above function adds a \p mask argument to disable memory * accesses at the \p indexes offsets where \p mask is \c false. */ template inline void gatherImplementation(const Common::GatherArguments &, MaskArgument mask); public: #define Vc_ASSERT_GATHER_PARAMETER_TYPES_ \ static_assert( \ std::is_convertible::value, \ "The memory pointer needs to point to a type that can be converted to the " \ "EntryType of this SIMD vector type."); \ static_assert( \ Vc::Traits::has_subscript_operator::value, \ "The indexes argument must be a type that implements the subscript operator."); \ static_assert( \ !Traits::is_simd_vector::value || \ Traits::simd_vector_size::value >= Size, \ "If you use a SIMD vector for the indexes parameter, the index vector must " \ "have at least as many entries as this SIMD vector."); \ static_assert( \ !std::is_array::value || \ (std::rank::value == 1 && \ (std::extent::value == 0 || std::extent::value >= Size)), \ "If you use a simple array for the indexes parameter, the array must have " \ "at least as many entries as this SIMD vector.") /** * \name Gather constructors and member functions * * Constructs or loads a vector from the objects at `mem[indexes[0]]`, * `mem[indexes[1]]`, `mem[indexes[2]]`, ... * * All gather functions optionally take a mask as last argument. In that case only the * entries that are selected in the mask are accessed in memory and copied to the * vector. This enables invalid indexes in the \p indexes vector if those are masked * off in \p mask. * * Gathers from structured data (AoS: arrays of struct) are possible via a special * subscript operator of the container (array). You can use \ref Vc::array and \ref * Vc::vector as drop-in replacements for \c std::array and \c std::vector. These * container classes contain the necessary subscript operator overload. Example: * \code * Vc::vector data(100); * std::iota(data.begin(), data.end(), 0.f); // fill with values 0, 1, 2, ... * auto indexes = float_v::IndexType::IndexesFromZero(); * float_v gathered = data[indexes]; // gathered == [0, 1, 2, ...] * \endcode * * This also works for gathers into arrays of structures: * \code * struct Point { float x, y, z; }; * Vc::array points; * // fill points ... * auto indexes = float_v::IndexType::IndexesFromZero(); * float_v xs = data[indexes][&Point::x]; // [points[0].x, points[1].x, points[2].x, ...] * float_v ys = data[indexes][&Point::y]; // [points[0].y, points[1].y, points[2].y, ...] * float_v zs = data[indexes][&Point::z]; // [points[0].z, points[1].z, points[2].z, ...] * \endcode * * Alternatively, you can use Vc::Common::AdaptSubscriptOperator to extend a given * container class with the necessary subscript operator. Example: * \code * template > * using my_vector = Vc::Common::AdaptSubscriptOperator>; * \endcode * * \param mem A pointer to memory which contains objects of type \p MT at the offsets * given by \p indexes. * \param indexes A container/vector of offsets into \p mem. * The type of \p indexes (\p IT) may either be a pointer to integers * (C-array) or a vector of integers (preferrably IndexType). * \param mask If a mask is given, only the active entries will be copied from memory. * * \note If you use a masked gather constructor the masked-off entries of the vector * are zero-initilized. */ ///@{ /// Gather constructor template ::value>> Vc_INTRINSIC Vc_CURRENT_CLASS_NAME(const MT *mem, const IT &indexes) { Vc_ASSERT_GATHER_PARAMETER_TYPES_; gatherImplementation( Common::make_gather<1>(mem, Common::convertIndexVector(indexes))); } template Vc_INTRINSIC Vc_CURRENT_CLASS_NAME(const Common::GatherArguments &args) { Vc_ASSERT_GATHER_PARAMETER_TYPES_; gatherImplementation(args); } /// Masked gather constructor template ::value>> Vc_INTRINSIC Vc_CURRENT_CLASS_NAME(const MT *mem, const IT &indexes, MaskArgument mask) { Vc_ASSERT_GATHER_PARAMETER_TYPES_; gatherImplementation( Common::make_gather<1>(mem, Common::convertIndexVector(indexes)), mask); } template Vc_INTRINSIC Vc_CURRENT_CLASS_NAME(const Common::GatherArguments &args, MaskArgument mask) { Vc_ASSERT_GATHER_PARAMETER_TYPES_; gatherImplementation(args, mask); } /// Gather function template ::value>> Vc_INTRINSIC void gather(const MT *mem, const IT &indexes) { Vc_ASSERT_GATHER_PARAMETER_TYPES_; gatherImplementation( Common::make_gather<1>(mem, Common::convertIndexVector(indexes))); } /// Masked gather function template ::value>> Vc_INTRINSIC void gather(const MT *mem, const IT &indexes, MaskArgument mask) { Vc_ASSERT_GATHER_PARAMETER_TYPES_; gatherImplementation( Common::make_gather<1>(mem, Common::convertIndexVector(indexes)), mask); } ///@} #include "gatherinterface_deprecated.h" /**\internal * \name Gather function to use from Vc::Common::subscript_operator * * \param args * \param mask */ ///@{ template Vc_INTRINSIC void gather(const Common::GatherArguments &args) { Vc_ASSERT_GATHER_PARAMETER_TYPES_; gatherImplementation(args); } template Vc_INTRINSIC void gather(const Common::GatherArguments &args, MaskArgument mask) { Vc_ASSERT_GATHER_PARAMETER_TYPES_; gatherImplementation(args, mask); } ///@} #undef Vc_ASSERT_GATHER_PARAMETER_TYPES_ conky-1.22.1/3rdparty/Vc/Vc/common/gatherinterface_deprecated.h000066400000000000000000000406201476554302100243750ustar00rootroot00000000000000 /// \name Deprecated Members ///@{ /** * \deprecated Use Vc::array or Vc::vector subscripting instead. * * \param array A pointer into memory (without alignment restrictions). * \param member1 If \p array points to a struct, \p member1 determines the member in the struct to * be read. Thus the offsets in \p indexes are relative to the \p array and not to * the size of the gathered type (i.e. array[i].*member1 is accessed instead of * (&(array->*member1))[i]) * \param indexes Determines the offsets into \p array where the values are gathered from/scattered * to. The type of indexes can either be an integer vector or a type that supports * operator[] access. */ template Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " "instead.") inline Vc_CURRENT_CLASS_NAME(const S1 *array, const EntryType S1::*member1, IT indexes) { gather(Common::SubscriptOperation, true>( array, indexes)[member1] .gatherArguments()); } /** * \deprecated Use Vc::array or Vc::vector subscripting instead. * * \param array A pointer into memory (without alignment restrictions). * \param member1 If \p array points to a struct, \p member1 determines the member in the struct to * be read. Thus the offsets in \p indexes are relative to the \p array and not to * the size of the gathered type (i.e. array[i].*member1 is accessed instead of * (&(array->*member1))[i]) * \param indexes Determines the offsets into \p array where the values are gathered from/scattered * to. The type of indexes can either be an integer vector or a type that supports * operator[] access. * \param mask If a mask is given only the active entries will be gathered/scattered. */ template Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " "instead.") inline Vc_CURRENT_CLASS_NAME(const S1 *array, const EntryType S1::*member1, IT indexes, MaskArgument mask) { gather(Common::SubscriptOperation, true>( array, indexes)[member1] .gatherArguments(), mask); } /** * \deprecated Use Vc::array or Vc::vector subscripting instead. * * \param array A pointer into memory (without alignment restrictions). * \param member1 If \p array points to a struct, \p member1 determines the member in the struct to * be read. Thus the offsets in \p indexes are relative to the \p array and not to * the size of the gathered type (i.e. array[i].*member1 is accessed instead of * (&(array->*member1))[i]) * \param member2 If \p member1 is a struct then \p member2 selects the member to be read from that * struct (i.e. array[i].*member1.*member2 is read). * \param indexes Determines the offsets into \p array where the values are gathered from/scattered * to. The type of indexes can either be an integer vector or a type that supports * operator[] access. */ template Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " "instead.") inline Vc_CURRENT_CLASS_NAME(const S1 *array, const S2 S1::*member1, const EntryType S2::*member2, IT indexes) { gather(Common::SubscriptOperation, true>( array, indexes)[member1][member2] .gatherArguments()); } /** * \deprecated Use Vc::array or Vc::vector subscripting instead. * * \param array A pointer into memory (without alignment restrictions). * \param member1 If \p array points to a struct, \p member1 determines the member in the struct to * be read. Thus the offsets in \p indexes are relative to the \p array and not to * the size of the gathered type (i.e. array[i].*member1 is accessed instead of * (&(array->*member1))[i]) * \param member2 If \p member1 is a struct then \p member2 selects the member to be read from that * struct (i.e. array[i].*member1.*member2 is read). * \param indexes Determines the offsets into \p array where the values are gathered from/scattered * to. The type of indexes can either be an integer vector or a type that supports * operator[] access. * \param mask If a mask is given only the active entries will be gathered/scattered. */ template Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " "instead.") inline Vc_CURRENT_CLASS_NAME(const S1 *array, const S2 S1::*member1, const EntryType S2::*member2, IT indexes, MaskArgument mask) { gather(Common::SubscriptOperation, true>( array, indexes)[member1][member2] .gatherArguments(), mask); } /** * \deprecated Use Vc::array or Vc::vector subscripting instead. * * \param array A pointer into memory (without alignment restrictions). * \param ptrMember1 If \p array points to a struct, \p member1 determines the member in the struct to * be read. Thus the offsets in \p indexes are relative to the \p array and not to * the size of the gathered type (i.e. array[i].*member1 is accessed instead of * (&(array->*member1))[i]) * \param outerIndexes * \param innerIndexes */ template Vc_DEPRECATED( "use the subscript operator to Vc::array or Vc::vector " "instead.") inline Vc_CURRENT_CLASS_NAME(const S1 *array, const EntryType *const S1::*ptrMember1, IT1 outerIndexes, IT2 innerIndexes) { gather(Common::SubscriptOperation, true>( array, outerIndexes)[ptrMember1][innerIndexes] .gatherArguments()); } /** * \deprecated Use Vc::array or Vc::vector subscripting instead. * * \param array A pointer into memory (without alignment restrictions). * \param ptrMember1 If \p array points to a struct, \p member1 determines the member in the struct to * be read. Thus the offsets in \p indexes are relative to the \p array and not to * the size of the gathered type (i.e. array[i].*member1 is accessed instead of * (&(array->*member1))[i]) * \param outerIndexes * \param innerIndexes * \param mask If a mask is given only the active entries will be gathered/scattered. */ template Vc_DEPRECATED( "use the subscript operator to Vc::array or Vc::vector " "instead.") inline Vc_CURRENT_CLASS_NAME(const S1 *array, const EntryType *const S1::*ptrMember1, IT1 outerIndexes, IT2 innerIndexes, MaskArgument mask) { gather(Common::SubscriptOperation, true>( array, outerIndexes)[ptrMember1][innerIndexes] .gatherArguments(), mask); } /** * \deprecated Use Vc::array or Vc::vector subscripting instead. * * \param array A pointer into memory (without alignment restrictions). * \param member1 If \p array points to a struct, \p member1 determines the member in the struct to * be read. Thus the offsets in \p indexes are relative to the \p array and not to * the size of the gathered type (i.e. array[i].*member1 is accessed instead of * (&(array->*member1))[i]) * \param indexes Determines the offsets into \p array where the values are gathered from/scattered * to. The type of indexes can either be an integer vector or a type that supports * operator[] access. */ template Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " "instead.") inline void gather(const S1 *array, const EntryType S1::*member1, IT indexes) { gather(Common::SubscriptOperation, true>( array, indexes)[member1] .gatherArguments()); } /** * \deprecated Use Vc::array or Vc::vector subscripting instead. * * \param array A pointer into memory (without alignment restrictions). * \param member1 If \p array points to a struct, \p member1 determines the member in the struct to * be read. Thus the offsets in \p indexes are relative to the \p array and not to * the size of the gathered type (i.e. array[i].*member1 is accessed instead of * (&(array->*member1))[i]) * \param indexes Determines the offsets into \p array where the values are gathered from/scattered * to. The type of indexes can either be an integer vector or a type that supports * operator[] access. * \param mask If a mask is given only the active entries will be gathered/scattered. */ template Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " "instead.") inline void gather(const S1 *array, const EntryType S1::*member1, IT indexes, MaskArgument mask) { gather(Common::SubscriptOperation, true>( array, indexes)[member1] .gatherArguments(), mask); } /** * \deprecated Use Vc::array or Vc::vector subscripting instead. * * \param array A pointer into memory (without alignment restrictions). * \param member1 If \p array points to a struct, \p member1 determines the member in the struct to * be read. Thus the offsets in \p indexes are relative to the \p array and not to * the size of the gathered type (i.e. array[i].*member1 is accessed instead of * (&(array->*member1))[i]) * \param member2 If \p member1 is a struct then \p member2 selects the member to be read from that * struct (i.e. array[i].*member1.*member2 is read). * \param indexes Determines the offsets into \p array where the values are gathered from/scattered * to. The type of indexes can either be an integer vector or a type that supports * operator[] access. */ template Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " "instead.") inline void gather(const S1 *array, const S2 S1::*member1, const EntryType S2::*member2, IT indexes) { gather(Common::SubscriptOperation, true>( array, indexes)[member1][member2] .gatherArguments()); } /** * \deprecated Use Vc::array or Vc::vector subscripting instead. * * \param array A pointer into memory (without alignment restrictions). * \param member1 If \p array points to a struct, \p member1 determines the member in the struct to * be read. Thus the offsets in \p indexes are relative to the \p array and not to * the size of the gathered type (i.e. array[i].*member1 is accessed instead of * (&(array->*member1))[i]) * \param member2 If \p member1 is a struct then \p member2 selects the member to be read from that * struct (i.e. array[i].*member1.*member2 is read). * \param indexes Determines the offsets into \p array where the values are gathered from/scattered * to. The type of indexes can either be an integer vector or a type that supports * operator[] access. * \param mask If a mask is given only the active entries will be gathered/scattered. */ template Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " "instead.") inline void gather(const S1 *array, const S2 S1::*member1, const EntryType S2::*member2, IT indexes, MaskArgument mask) { gather(Common::SubscriptOperation, true>( array, indexes)[member1][member2] .gatherArguments(), mask); } /** * \deprecated Use Vc::array or Vc::vector subscripting instead. * * \param array A pointer into memory (without alignment restrictions). * \param ptrMember1 If \p array points to a struct, \p member1 determines the member in the struct to * be read. Thus the offsets in \p indexes are relative to the \p array and not to * the size of the gathered type (i.e. array[i].*member1 is accessed instead of * (&(array->*member1))[i]) * \param outerIndexes * \param innerIndexes */ template Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " "instead.") inline void gather(const S1 *array, const EntryType *const S1::*ptrMember1, IT1 outerIndexes, IT2 innerIndexes) { gather(Common::SubscriptOperation, true>( array, outerIndexes)[ptrMember1][innerIndexes] .gatherArguments()); } /** * \deprecated Use Vc::array or Vc::vector subscripting instead. * * \param array A pointer into memory (without alignment restrictions). * \param ptrMember1 If \p array points to a struct, \p member1 determines the member in the struct to * be read. Thus the offsets in \p indexes are relative to the \p array and not to * the size of the gathered type (i.e. array[i].*member1 is accessed instead of * (&(array->*member1))[i]) * \param outerIndexes * \param innerIndexes * \param mask If a mask is given only the active entries will be gathered/scattered. */ template Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " "instead.") inline void gather(const S1 *array, const EntryType *const S1::*ptrMember1, IT1 outerIndexes, IT2 innerIndexes, MaskArgument mask) { gather(Common::SubscriptOperation, true>( array, outerIndexes)[ptrMember1][innerIndexes] .gatherArguments(), mask); } ///@} conky-1.22.1/3rdparty/Vc/Vc/common/generalinterface.h000066400000000000000000000056321476554302100223640ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2014-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ public: /////////////////////////////////////////////////////////////////////////// // init to zero Vc_INTRINSIC Vector() = default; /////////////////////////////////////////////////////////////////////////// // types /////////////////////////////////////////////////////////////////////////// // constants static constexpr std::size_t size() { return Size; } /////////////////////////////////////////////////////////////////////////// // constant Vectors explicit Vc_INTRINSIC_L Vector(VectorSpecialInitializerZero) Vc_INTRINSIC_R; explicit Vc_INTRINSIC_L Vector(VectorSpecialInitializerOne) Vc_INTRINSIC_R; explicit Vc_INTRINSIC_L Vector(VectorSpecialInitializerIndexesFromZero) Vc_INTRINSIC_R; static Vc_INTRINSIC Vc_CONST Vector Zero() { return Vector(Vc::Zero); } static Vc_INTRINSIC Vc_CONST Vector One() { return Vector(Vc::One); } static Vc_INTRINSIC Vc_CONST Vector IndexesFromZero() { return Vector(Vc::IndexesFromZero); } /////////////////////////////////////////////////////////////////////////// // generator ctor template ()(size_t())), value_type>::value>::type> explicit Vector(G &&g) : Vector(generate(std::forward(g))) { } // vim: foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/common/iif.h000066400000000000000000000075121476554302100176340ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2012-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_IIF_H_ #define VC_COMMON_IIF_H_ #include "../type_traits" #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { /** * \ingroup Utilities * * Function to mimic the ternary operator '?:' (inline-if). * * \param condition Determines which values are returned. This is analog to the first argument to * the ternary operator. * \param trueValue The values to return where \p condition is \c true. * \param falseValue The values to return where \p condition is \c false. * \return A combination of entries from \p trueValue and \p falseValue, according to \p condition. * * So instead of the scalar variant * \code * float x = a > 1.f ? b : b + c; * \endcode * you'd write * \code * float_v x = Vc::iif (a > 1.f, b, b + c); * \endcode * * Assuming \c a has the values [0, 3, 5, 1], \c b is [1, 1, 1, 1], and \c c is [1, 2, 3, 4], then x * will be [2, 2, 3, 5]. */ template Vc_ALWAYS_INLINE enable_if::value && is_simd_vector::value, T> iif( const Mask &condition, const T &trueValue, const T &falseValue) { T result(falseValue); Vc::where(condition) | result = trueValue; return result; } /**\internal * The following declaration makes it explicit that `iif (Mask, non-vector, non-vector)` * is not supposed to work. Doing the same thing with \c static_assert would break SFINAE. */ template enable_if::value && !is_simd_vector::value, T> iif( const Mask &, const T &, const T &) = delete; /** * \ingroup Utilities * * Overload of the above for boolean conditions. * * This typically results in direct use of the ternary operator. This function makes it easier to * switch from a Vc type to a builtin type. * * \param condition Determines which value is returned. This is analog to the first argument to * the ternary operator. * \param trueValue The value to return if \p condition is \c true. * \param falseValue The value to return if \p condition is \c false. * \return Either \p trueValue or \p falseValue, depending on \p condition. */ template constexpr T iif (bool condition, const T &trueValue, const T &falseValue) { return condition ? trueValue : falseValue; } } // namespace Vc #endif // VC_COMMON_IIF_H_ conky-1.22.1/3rdparty/Vc/Vc/common/indexsequence.h000066400000000000000000000062631476554302100217270ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2014-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_INDEXSEQUENCE_H_ #define VC_COMMON_INDEXSEQUENCE_H_ #include "../global.h" namespace Vc_VERSIONED_NAMESPACE { /** \internal * Helper class for a sequence of size_t values from 0 to N. This type will be included in * C++14. */ template struct index_sequence { static constexpr std::size_t size() noexcept { return sizeof...(I); } }; /** \internal * This struct builds an index_sequence type from a given upper bound \p N. * It does so recursively via concatenation of to index sequences of length N/2. */ template struct make_index_sequence_impl { template static index_sequence join(std::false_type, index_sequence); template static index_sequence join( std::true_type, index_sequence); using is_odd = std::integral_constant; using half = typename make_index_sequence_impl::type; using type = decltype(join<(N + 1) / 2>(is_odd(), half())); }; template <> struct make_index_sequence_impl<0> { using type = index_sequence<>; }; template <> struct make_index_sequence_impl<1> { using type = index_sequence<0>; }; template <> struct make_index_sequence_impl<2> { using type = index_sequence<0, 1>; }; /** \internal * Creates an index_sequence type for the upper bound \p N. */ template using make_index_sequence = typename make_index_sequence_impl::type; } #endif // VC_COMMON_INDEXSEQUENCE_H_ // vim: foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/common/interleave.h000066400000000000000000000050331476554302100212170ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2014-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_INTERLEAVE_H_ #define VC_COMMON_INTERLEAVE_H_ #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { /** \ingroup Utilities Interleaves the entries from \p a and \p b into two vectors of the same type. The order in the returned vector contains the elements `a[0], b[0], a[1], b[1], a[2], b[2], a[3], b[3], ...`. Example: \code Vc::SimdArray a = { 1, 2, 3, 4 }; Vc::SimdArray b = { 9, 8, 7, 6 }; std::tie(a, b) = Vc::interleave(a, b); std::cout << a << b; // prints: // <1 9 2 8><3 7 4 6> \endcode \param a input vector whose data will appear at even indexes in the output \param b input vector whose data will appear at odd indexes in the output \return two vectors with data from \p a and \p b interleaved */ template ::value>> std::pair interleave(const V &a, const V &b) { return {a.interleaveLow(b), a.interleaveHigh(b)}; } } // namespace Vc #endif // VC_COMMON_INTERLEAVE_H_ // vim: foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/common/interleavedmemory.h000066400000000000000000000312741476554302100226220ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2012-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_INTERLEAVEDMEMORY_H_ #define VC_COMMON_INTERLEAVEDMEMORY_H_ #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace Common { /** * \internal */ template struct InterleavedMemoryAccessBase { // Partial specialization doesn't work for functions without partial specialization of the whole // class. Therefore we capture the contents of InterleavedMemoryAccessBase in a macro to easily // copy it into its specializations. typedef typename std::conditional< Readonly, typename std::add_const::type, typename V::EntryType>::type T; typedef typename V::AsArg VArg; typedef T Ta Vc_MAY_ALIAS; const I m_indexes; Ta *const m_data; Vc_ALWAYS_INLINE InterleavedMemoryAccessBase(typename I::AsArg indexes, Ta *data) : m_indexes(indexes), m_data(data) { } // implementations of the following are in {scalar,sse,avx}/detail.h template Vc_INTRINSIC void deinterleave(Vs &&... vs) const { Impl::deinterleave(m_data, m_indexes, std::forward(vs)...); } protected: using Impl = Vc::Detail::InterleaveImpl; template Vc_INTRINSIC void callInterleave(T &&a, index_sequence) { Impl::interleave(m_data, m_indexes, a[Indexes]...); } }; /** * \internal */ // delay execution of the deinterleaving gather until operator= template struct InterleavedMemoryReadAccess : public InterleavedMemoryAccessBase { typedef InterleavedMemoryAccessBase Base; typedef typename Base::Ta Ta; Vc_ALWAYS_INLINE InterleavedMemoryReadAccess(Ta *data, typename I::AsArg indexes) : Base(StructSize == 1u ? indexes : StructSize == 2u ? indexes << 1 : StructSize == 4u ? indexes << 2 : StructSize == 8u ? indexes << 3 : StructSize == 16u ? indexes << 4 : indexes * I(int(StructSize)), data) { } template Vc_ALWAYS_INLINE T deinterleave_unpack(index_sequence) const { T r; Base::Impl::deinterleave(this->m_data, this->m_indexes, std::get(r)...); return r; } template ::value && std::is_same( std::declval()))>>::value)>> Vc_ALWAYS_INLINE operator T() const { return deinterleave_unpack(make_index_sequence::value>()); } }; ///\internal Runtime check (NDEBUG) for asserting unique indexes. template struct CheckIndexesUnique { #ifdef NDEBUG static Vc_INTRINSIC void test(const I &) {} #else static void test(const I &indexes) { const I test = indexes.sorted(); Vc_ASSERT(I::Size == 1 || (test == test.rotated(1)).isEmpty()) } #endif }; ///\internal For SuccessiveEntries there can never be a problem. template struct CheckIndexesUnique > { static Vc_INTRINSIC void test(const SuccessiveEntries &) {} }; /** * \internal */ template struct InterleavedMemoryAccess : public InterleavedMemoryReadAccess { typedef InterleavedMemoryAccessBase Base; typedef typename Base::Ta Ta; Vc_ALWAYS_INLINE InterleavedMemoryAccess(Ta *data, typename I::AsArg indexes) : InterleavedMemoryReadAccess(data, indexes) { CheckIndexesUnique::test(indexes); } template Vc_ALWAYS_INLINE void operator=(VectorReferenceArray &&rhs) { static_assert(N <= StructSize, "You_are_trying_to_scatter_more_data_into_the_struct_than_it_has"); this->callInterleave(std::move(rhs), make_index_sequence()); } template Vc_ALWAYS_INLINE void operator=(VectorReferenceArray &&rhs) { static_assert(N <= StructSize, "You_are_trying_to_scatter_more_data_into_the_struct_than_it_has"); this->callInterleave(std::move(rhs), make_index_sequence()); } }; /** * Wraps a pointer to memory with convenience functions to access it via vectors. * * \param S The type of the struct. * \param V The type of the vector to be returned when read. This should reflect the type of the * members inside the struct. * * \see operator[] * \ingroup Containers * \headerfile interleavedmemory.h */ template class InterleavedMemoryWrapper { typedef typename std::conditional::value, const typename V::EntryType, typename V::EntryType>::type T; typedef typename V::IndexType I; typedef typename V::AsArg VArg; typedef const I &IndexType; static constexpr std::size_t StructSize = sizeof(S) / sizeof(T); using ReadAccess = InterleavedMemoryReadAccess; using Access = typename std::conditional::value, ReadAccess, InterleavedMemoryAccess>::type; using ReadSuccessiveEntries = InterleavedMemoryReadAccess>; using AccessSuccessiveEntries = typename std::conditional< std::is_const::value, ReadSuccessiveEntries, InterleavedMemoryAccess>>::type; typedef T Ta Vc_MAY_ALIAS; Ta *const m_data; static_assert(StructSize * sizeof(T) == sizeof(S), "InterleavedMemoryAccess_does_not_support_packed_structs"); public: /** * Constructs the wrapper object. * * \param s A pointer to a C-array. */ Vc_ALWAYS_INLINE InterleavedMemoryWrapper(S *s) : m_data(reinterpret_cast(s)) { } /** * Interleaved scatter/gather access. * * Assuming you have a struct of floats and a vector of \p indexes into the array, this function * can be used to access the struct entries as vectors using the minimal number of store or load * instructions. * * \param indexes Vector of indexes that determine the gather locations. * * \return A special (magic) object that executes the loads and deinterleave on assignment to a * vector tuple. * * Example: * \code * struct Foo { * float x, y, z; * }; * * void fillWithBar(Foo *_data, uint_v indexes) * { * Vc::InterleavedMemoryWrapper data(_data); * const float_v x = bar(1); * const float_v y = bar(2); * const float_v z = bar(3); * data[indexes] = (x, y, z); * // it's also possible to just store a subset at the front of the struct: * data[indexes] = (x, y); * // if you want to store a single entry, use scatter: * z.scatter(_data, &Foo::x, indexes); * } * * float_v normalizeStuff(Foo *_data, uint_v indexes) * { * Vc::InterleavedMemoryWrapper data(_data); * float_v x, y, z; * (x, y, z) = data[indexes]; * // it is also possible to just load a subset from the front of the struct: * // (x, y) = data[indexes]; * return Vc::sqrt(x * x + y * y + z * z); * } * \endcode * * You may think of the gather operation (or scatter as the inverse) like this: \verbatim Memory: {x0 y0 z0 x1 y1 z1 x2 y2 z2 x3 y3 z3 x4 y4 z4 x5 y5 z5 x6 y6 z6 x7 y7 z7 x8 y8 z8} indexes: [5, 0, 1, 7] Result in (x, y, z): ({x5 x0 x1 x7}, {y5 y0 y1 y7}, {z5 z0 z1 z7}) \endverbatim * * \warning If \p indexes contains non-unique entries on scatter, the result is undefined. If * \c NDEBUG is not defined the implementation will assert that the \p indexes entries are unique. */ template Vc_ALWAYS_INLINE enable_if::value && std::is_convertible::value && !std::is_const::value, Access> operator[](IT indexes) { return Access(m_data, indexes); } /// const overload (gathers only) of the above function Vc_ALWAYS_INLINE ReadAccess operator[](IndexType indexes) const { return ReadAccess(m_data, indexes); } /// alias of the above function Vc_ALWAYS_INLINE ReadAccess gather(IndexType indexes) const { return operator[](indexes); } /** * Interleaved access. * * This function is an optimization of the function above, for cases where the index vector * contains consecutive values. It will load \p V::Size consecutive entries from memory and * deinterleave them into Vc vectors. * * \param first The first of \p V::Size indizes to be accessed. * * \return A special (magic) object that executes the loads and deinterleave on assignment to a * vector tuple. * * Example: * \code * struct Foo { * float x, y, z; * }; * * void foo(Foo *_data) * { * Vc::InterleavedMemoryWrapper data(_data); * for (size_t i = 0; i < 32U; i += float_v::Size) { * float_v x, y, z; * (x, y, z) = data[i]; * // now: * // x = { _data[i].x, _data[i + 1].x, _data[i + 2].x, ... } * // y = { _data[i].y, _data[i + 1].y, _data[i + 2].y, ... } * // z = { _data[i].z, _data[i + 1].z, _data[i + 2].z, ... } * ... * } * } * \endcode */ Vc_ALWAYS_INLINE ReadSuccessiveEntries operator[](size_t first) const { return ReadSuccessiveEntries(m_data, first); } Vc_ALWAYS_INLINE AccessSuccessiveEntries operator[](size_t first) { return AccessSuccessiveEntries(m_data, first); } //Vc_ALWAYS_INLINE Access scatter(I indexes, VArg v0, VArg v1); }; } // namespace Common using Common::InterleavedMemoryWrapper; /** * Creates an adapter around a given array of structure (AoS) that enables optimized loads * + deinterleaving operations / interleaving operations + stores for vector access (using * \p V). * * \tparam V The `Vc::Vector` type to use per element of the structure. * \param s A pointer to an array of structures containing data members of type `T`. * * \see Vc::Common::InterleavedMemoryWrapper * * \todo Support destructuring via structured bindings. */ template inline Common::InterleavedMemoryWrapper make_interleave_wrapper(S *s) { return Common::InterleavedMemoryWrapper(s); } } // namespace Vc #endif // VC_COMMON_INTERLEAVEDMEMORY_H_ conky-1.22.1/3rdparty/Vc/Vc/common/iterators.h000066400000000000000000000233161476554302100211010ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2013-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_ITERATORS_H_ #define VC_COMMON_ITERATORS_H_ #include #include #ifdef Vc_MSVC #include // for _BitScanForward #endif // Vc_MSVC #include "where.h" #include "elementreference.h" #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace Common { template class MemoryVector; template class MemoryVectorIterator; template class Iterator; template class IteratorBase; template class IteratorBase { public: using iterator_category = std::input_iterator_tag; using value_type = typename V::value_type; using difference_type = int; using reference = value_type; Vc_ALWAYS_INLINE reference operator*() const { return v()[i()]; } Vc_ALWAYS_INLINE reference operator[](difference_type i2) const { return v()[i2]; } private: Vc_INTRINSIC V &v() const { return *static_cast *>(this)->v; } Vc_INTRINSIC difference_type i() const { return static_cast *>(this)->i; } }; template class IteratorBase { public: using iterator_category = std::input_iterator_tag; using value_type = typename V::value_type; using difference_type = int; using reference = Vc::Detail::ElementReference; Vc_ALWAYS_INLINE reference operator*() const { return {*v(), i()}; } Vc_ALWAYS_INLINE reference operator[](difference_type i2) const { return {*v(), i2}; } private: Vc_INTRINSIC V *v() const { return static_cast *>(this)->v; } Vc_INTRINSIC difference_type i() const { return static_cast *>(this)->i; } friend reference; static Vc_INTRINSIC value_type get(const V &o, int i) { return o[i]; } template static Vc_INTRINSIC void set(V &o, int i, T &&v) { o[i] = std::forward(v); } }; // class Iterator {{{ template class Iterator : public IteratorBase::value> { using Base = IteratorBase::value>; friend Base; public: using typename Base::iterator_category; using typename Base::value_type; using typename Base::difference_type; using pointer = const Iterator *; using typename Base::reference; constexpr Iterator() = default; constexpr Iterator(V &_v, difference_type _i) : v(&_v), i(_i) {} // rely on implicit copy constructor/assignment Vc_ALWAYS_INLINE pointer operator->() const { return this; } using Base::operator*; Vc_ALWAYS_INLINE Iterator &operator++() { ++i; return *this; } Vc_ALWAYS_INLINE Iterator operator++(int) { Iterator tmp = *this; ++i; return tmp; } // bidirectional iteration is supported Vc_ALWAYS_INLINE Iterator &operator--() { --i; return *this; } Vc_ALWAYS_INLINE Iterator operator--(int) { Iterator tmp = *this; --i; return tmp; } // RandomAccessIterator: using Base::operator[]; Vc_ALWAYS_INLINE Iterator &operator+=(difference_type d) { i += d; return *this; } Vc_ALWAYS_INLINE Iterator &operator-=(difference_type d) { i -= d; return *this; } Vc_ALWAYS_INLINE Iterator operator+(difference_type d) const { return {*v, i + d}; } Vc_ALWAYS_INLINE Iterator operator-(difference_type d) const { return {*v, i - d}; } Vc_ALWAYS_INLINE difference_type operator-(const Iterator &rhs) const { return i - rhs.i; } friend Vc_ALWAYS_INLINE Iterator operator+(difference_type d, const Iterator &rhs) { return {*rhs.v, rhs.i + d}; } // InputIterator would not need to test v == rhs.v, but except for `reference` this // class implements a complete RandomAccessIterator Vc_ALWAYS_INLINE bool operator==(const Iterator &rhs) const { return v == rhs.v && i == rhs.i; } Vc_ALWAYS_INLINE bool operator!=(const Iterator &rhs) const { return v == rhs.v && i != rhs.i; } Vc_ALWAYS_INLINE bool operator< (const Iterator &rhs) const { return v == rhs.v && i < rhs.i; } Vc_ALWAYS_INLINE bool operator<=(const Iterator &rhs) const { return v == rhs.v && i <= rhs.i; } Vc_ALWAYS_INLINE bool operator> (const Iterator &rhs) const { return v == rhs.v && i > rhs.i; } Vc_ALWAYS_INLINE bool operator>=(const Iterator &rhs) const { return v == rhs.v && i >= rhs.i; } private: V *v = nullptr; difference_type i = 0; };/*}}}*/ template using ConstIterator = Iterator; class BitmaskIterator/*{{{*/ { #ifdef Vc_MSVC unsigned long mask; unsigned long bit; #else size_t mask; size_t bit; #endif void nextBit() { #ifdef Vc_GNU_ASM bit = __builtin_ctzl(mask); #elif defined(Vc_MSVC) _BitScanForward(&bit, mask); #else #error "Not implemented yet. Please contact vc-devel@compeng.uni-frankfurt.de" #endif } void resetLsb() { // 01100100 - 1 = 01100011 mask &= (mask - 1); /* #ifdef Vc_GNU_ASM __asm__("btr %1,%0" : "+r"(mask) : "r"(bit)); #elif defined(_WIN64) _bittestandreset64(&mask, bit); #elif defined(_WIN32) _bittestandreset(&mask, bit); #else #error "Not implemented yet. Please contact vc-devel@compeng.uni-frankfurt.de" #endif */ } public: BitmaskIterator(decltype(mask) m) : mask(m) { nextBit(); } BitmaskIterator(const BitmaskIterator &) = default; BitmaskIterator(BitmaskIterator &&) = default; Vc_ALWAYS_INLINE size_t operator->() const { return bit; } Vc_ALWAYS_INLINE size_t operator*() const { return bit; } Vc_ALWAYS_INLINE BitmaskIterator &operator++() { resetLsb(); nextBit(); return *this; } Vc_ALWAYS_INLINE BitmaskIterator operator++(int) { BitmaskIterator tmp = *this; resetLsb(); nextBit(); return tmp; } Vc_ALWAYS_INLINE bool operator==(const BitmaskIterator &rhs) const { return mask == rhs.mask; } Vc_ALWAYS_INLINE bool operator!=(const BitmaskIterator &rhs) const { return mask != rhs.mask; } };/*}}}*/ template Vc_ALWAYS_INLINE enable_if::value || Traits::is_simd_mask::value, Iterator::type>> begin(T &&x) { return {std::forward(x), 0}; } template Vc_ALWAYS_INLINE enable_if::value || Traits::is_simd_mask::value, Iterator::type>> end(T &&x) { using TT = typename std::decay::type; return {std::forward(x), int(TT::size())}; } template Vc_ALWAYS_INLINE enable_if< Traits::is_simd_mask::value || Traits::is_simd_vector::value, ConstIterator> cbegin(const T &v) { return {v, 0}; } template Vc_ALWAYS_INLINE enable_if< Traits::is_simd_mask::value || Traits::is_simd_vector::value, ConstIterator> cend(const T &v) { return {v, int(T::size())}; } template Vc_ALWAYS_INLINE BitmaskIterator begin(const WhereImpl::WhereMask &w) { return w.mask.toInt(); } template Vc_ALWAYS_INLINE BitmaskIterator end(const WhereImpl::WhereMask &) { return 0; } template Vc_ALWAYS_INLINE MemoryVectorIterator makeIterator(T *mem, Flags) { return new(mem) MemoryVector; } template Vc_ALWAYS_INLINE MemoryVectorIterator makeIterator(const T *mem, Flags) { return new(const_cast(mem)) MemoryVector; } template Vc_ALWAYS_INLINE MemoryVectorIterator makeIterator(MemoryVector &mv, Flags) { return new(&mv) MemoryVector; } template Vc_ALWAYS_INLINE MemoryVectorIterator makeIterator(MemoryVector &mv, Flags) { return new(&mv) MemoryVector; } } // namespace Common using Common::begin; using Common::end; using Common::cbegin; using Common::cend; using Common::makeIterator; } // namespace Vc #endif // VC_COMMON_ITERATORS_H_ // vim: foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/common/loadinterface.h000066400000000000000000000103251476554302100216610ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2014-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ // load ctors{{{1 /** * Construct a vector from loading its entries from the array at \p mem. * * \param mem A pointer to data. The pointer must not be aligned on a * MemoryAlignment boundary unless you add the Vc::Aligned flag as a second * argument. */ explicit Vc_INTRINSIC Vector(const EntryType *mem) { load(mem); } /** * Construct a vector from loading its entries from the array at \p mem. * * \param mem A pointer to data. If \p flags contains the Vc::Aligned flag, the pointer * must be aligned on a MemoryAlignment boundary. * \param flags A (combination of) flag object(s), such as Vc::Aligned, Vc::Streaming, * Vc::Unaligned, and/or Vc::PrefetchDefault. */ template ::value>> explicit Vc_INTRINSIC Vector(const EntryType *mem, Flags flags) { load(mem, flags); } template ::value || !std::is_integral::value || sizeof(EntryType) >= sizeof(U)) && std::is_arithmetic::value &&Traits::is_load_store_flag::value>> explicit Vc_INTRINSIC Vector(const U *x, Flags flags = Flags()) { load(x, flags); } // load member functions{{{1 /** * Load the vector entries from \p mem, overwriting the previous values. * * \param mem * A pointer to data. The pointer must not be aligned on a MemoryAlignment boundary unless * you add the Vc::Aligned flag as a second argument. */ Vc_INTRINSIC void load(const EntryType *mem) { load(mem, DefaultLoadTag()); } /** * Load the vector entries from \p mem, overwriting the previous values. * * \param mem * A pointer to data. If \p flags contains the Vc::Aligned flag, the pointer must be * aligned on a MemoryAlignment boundary. * \param flags * A (combination of) flag object(s), such as Vc::Aligned, Vc::Streaming, Vc::Unaligned, * and/or Vc::PrefetchDefault. */ template Vc_INTRINSIC enable_if::value, void> load(const EntryType *mem, Flags flags) { load(mem, flags); } private: template struct load_concept : public std::enable_if< (!std::is_integral::value || !std::is_integral::value || sizeof(EntryType) >= sizeof(U)) && std::is_arithmetic::value && Traits::is_load_store_flag::value, void> {}; public: template Vc_INTRINSIC_L typename load_concept::type load(const U *mem, Flags = Flags()) Vc_INTRINSIC_R; //}}}1 // vim: foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/common/loadstoreflags.h000066400000000000000000000235231476554302100220760ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2013-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_LOADSTOREFLAGS_H_ #define VC_COMMON_LOADSTOREFLAGS_H_ #include "../traits/type_traits.h" namespace Vc_VERSIONED_NAMESPACE { /** * Hint for \ref Prefetch to select prefetches that mark the memory as exclusive. * * This hint may optimize the prefetch if the memory will subsequently be written to. */ struct Exclusive {}; /** * Hint for \ref Prefetch to select prefetches that mark the memory as shared. */ struct Shared {}; namespace LoadStoreFlags { struct StreamingFlag {}; struct UnalignedFlag {}; struct PrefetchFlagBase {}; // TODO: determine a good default for typical CPU use template struct PrefetchFlag : public PrefetchFlagBase { typedef ExclusiveOrShared_ ExclusiveOrShared; static constexpr size_t L1Stride = L1; static constexpr size_t L2Stride = L2; static constexpr bool IsExclusive = std::is_same::value; static constexpr bool IsShared = std::is_same::value; }; template struct ExtractType { typedef Default type; }; template struct ExtractType { typedef typename std::conditional::value, T, typename ExtractType::type>::type type; }; // ICC warns about the constexpr members in LoadStoreFlags: member "LoadStoreFlags::IsAligned" was declared but never referenced // who needs that warning, especially if it was referenced... // The warning cannot be reenabled because it gets emitted whenever the LoadStoreFlags is instantiated // somewhere, so it could be anywhere. #ifdef Vc_ICC #pragma warning(disable: 177) #endif /**\internal * Implementation of the load/store flags mechanism. This is internal API. Only some * concrete aliases are API-relevant types. */ template struct LoadStoreFlags { private: // ICC doesn't grok this line: //template using TestFlag = std::is_same::type, void>; typedef typename ExtractType, Flags...>::type Prefetch; public: constexpr LoadStoreFlags() {} static constexpr bool IsStreaming = !std::is_same::type, void>::value; static constexpr bool IsUnaligned = !std::is_same::type, void>::value; static constexpr bool IsAligned = !IsUnaligned; static constexpr bool IsPrefetch = !std::is_same::type, void>::value; static constexpr bool IsExclusivePrefetch = Prefetch::IsExclusive; static constexpr bool IsSharedPrefetch = Prefetch::IsShared; static constexpr size_t L1Stride = Prefetch::L1Stride; static constexpr size_t L2Stride = Prefetch::L2Stride; typedef LoadStoreFlags::value, void, Flags>::type...> UnalignedRemoved; // The following EnableIf* convenience types cannot use enable_if because then no LoadStoreFlags type // could ever be instantiated. Instead these types are defined either as void* or void. The // function that does SFINAE then assigns "= nullptr" to this type. Thus, the ones with just // void result in substitution failure. typedef typename std::conditional::type EnableIfAligned; typedef typename std::conditional::type EnableIfStreaming; typedef typename std::conditional::type EnableIfUnalignedNotStreaming; typedef typename std::conditional::type EnableIfUnalignedAndStreaming; typedef typename std::conditional::type EnableIfUnaligned; typedef typename std::conditional::type EnableIfNotUnaligned; typedef typename std::conditional::type EnableIfPrefetch; typedef typename std::conditional::type EnableIfNotPrefetch; }; /**\internal * Specialization for no flags (i.e aligned, non-streaming, no prefetching) */ template<> struct LoadStoreFlags<> { constexpr LoadStoreFlags() {} static constexpr bool IsStreaming = false; static constexpr bool IsUnaligned = false; static constexpr bool IsAligned = !IsUnaligned; static constexpr bool IsPrefetch = false; static constexpr bool IsExclusivePrefetch = false; static constexpr bool IsSharedPrefetch = false; static constexpr size_t L1Stride = 0; static constexpr size_t L2Stride = 0; typedef void* EnableIfAligned; typedef void* EnableIfNotUnaligned; typedef void* EnableIfNotPrefetch; }; /** * Operator for concatenation of LoadStoreFlags. * * Example: * \code * float_v x(mem, Vc::Aligned | Vc::Streaming); * \endcode */ template constexpr LoadStoreFlags operator|(LoadStoreFlags, LoadStoreFlags) { return LoadStoreFlags(); } } // LoadStoreFlags namespace using LoadStoreFlags::PrefetchFlag; typedef LoadStoreFlags::LoadStoreFlags<> AlignedTag; typedef LoadStoreFlags::LoadStoreFlags StreamingTag; typedef LoadStoreFlags::LoadStoreFlags UnalignedTag; /// The default load tag type uses unaligned (non-streaming) loads. typedef UnalignedTag DefaultLoadTag; /// The default store tag type uses unaligned (non-streaming) stores. typedef UnalignedTag DefaultStoreTag; /**\addtogroup Utilities * @{ */ /** * Use this object for a \p flags parameter to request aligned loads and stores. * * It specifies that a load/store can expect a memory address that is aligned on * the correct boundary. (i.e. \p MemoryAlignment) * * \warning * If you specify Aligned, but the memory address is not aligned the program * will most likely crash. */ constexpr AlignedTag Aligned; /** * Use this object for a \p flags parameter to request unaligned loads and stores. * * It specifies that a load/store can \em not expect a memory address that is * aligned on the correct boundary. (i.e. alignment is less than * \p MemoryAlignment) * * \note * If you specify Unaligned, but the memory address is aligned the load/store * will execute slightly slower than necessary. */ constexpr UnalignedTag Unaligned; /** * Use this object for a \p flags parameter to request streaming loads and stores. * * It specifies that the cache should be bypassed for the given load/store. * Whether this will actually be done depends on the target system's capabilities. * * Streaming stores can be interesting when the code calculates values that, after being * written to memory, will not be used for a long time or used by a different thread. * * \note * Expect that most target systems do not support unaligned streaming loads or stores. * Therefore, make sure that you also specify Aligned. */ constexpr StreamingTag Streaming; /** * Use this object for a \p flags parameter to request default software prefetches to be * emitted. */ constexpr LoadStoreFlags::LoadStoreFlags> PrefetchDefault; ///@} /** * \tparam L1 * \tparam L2 * \tparam ExclusiveOrShared */ template ::L1Stride, size_t L2 = PrefetchFlag<>::L2Stride, typename ExclusiveOrShared = PrefetchFlag<>::ExclusiveOrShared> struct Prefetch : public LoadStoreFlags::LoadStoreFlags> { }; namespace Traits { ///\internal partial specialization for detecting LoadStoreFlags types template struct is_loadstoreflag_internal> : public std::true_type { }; ///\internal partial specialization for detecting the derived Prefetch type as a /// load/store flag. template struct is_loadstoreflag_internal> : public std::true_type { }; } // namespace Traits } // namespace Vc #endif // VC_COMMON_LOADSTOREFLAGS_H_ conky-1.22.1/3rdparty/Vc/Vc/common/logarithm.h000066400000000000000000000261661476554302100210610ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2009-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ /* The log implementations are based on code from Julien Pommier which carries the following copyright information: */ /* Inspired by Intel Approximate Math library, and based on the corresponding algorithms of the cephes math library */ /* Copyright (C) 2007 Julien Pommier This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. (this is the zlib license) */ #ifdef Vc_COMMON_MATH_H_INTERNAL enum LogarithmBase { BaseE, Base10, Base2 }; namespace Detail { template using Const = typename std::conditional::value, AVX::Const, SSE::Const>::type; template struct LogImpl { template static Vc_ALWAYS_INLINE void log_series(Vector &Vc_RESTRICT x, typename Vector::AsArg exponent) { typedef Vector V; typedef Detail::Const C; // Taylor series around x = 2^exponent // f(x) = ln(x) → exponent * ln(2) → C::ln2_small + C::ln2_large // f'(x) = x⁻¹ → x → 1 // f''(x) = - x⁻² → -x² / 2 → C::_1_2() // = 2!x⁻³ → x³ / 3 → C::P(8) // = -3!x⁻⁴ → -x⁴ / 4 → C::P(7) // = 4!x⁻⁵ → x⁵ / 5 → C::P(6) // ... // The high order coefficients are adjusted to reduce the error that occurs from ommission // of higher order terms. // P(0) is the smallest term and |x| < 1 ⇒ |xⁿ| > |xⁿ⁺¹| // The order of additions must go from smallest to largest terms const V x2 = x * x; // 0 → 4 #ifdef Vc_LOG_ILP V y2 = (C::P(6) * /*4 → 8*/ x2 + /* 8 → 11*/ C::P(7) * /*1 → 5*/ x) + /*11 → 14*/ C::P(8); V y0 = (C::P(0) * /*5 → 9*/ x2 + /* 9 → 12*/ C::P(1) * /*2 → 6*/ x) + /*12 → 15*/ C::P(2); V y1 = (C::P(3) * /*6 → 10*/ x2 + /*10 → 13*/ C::P(4) * /*3 → 7*/ x) + /*13 → 16*/ C::P(5); const V x3 = x2 * x; // 7 → 11 const V x6 = x3 * x3; // 11 → 15 const V x9 = x6 * x3; // 15 → 19 V y = (y0 * /*19 → 23*/ x9 + /*23 → 26*/ y1 * /*16 → 20*/ x6) + /*26 → 29*/ y2 * /*14 → 18*/ x3; #elif defined Vc_LOG_ILP2 /* * name start done * movaps %xmm0, %xmm1 ; x 0 1 * movaps %xmm0, %xmm2 ; x 0 1 * mulps %xmm1, %xmm1 ; x2 1 5 *xmm1 * movaps , %xmm15 ; y8 1 2 * mulps %xmm1, %xmm2 ; x3 5 9 *xmm2 * movaps %xmm1, %xmm3 ; x2 5 6 * movaps %xmm1, %xmm4 ; x2 5 6 * mulps %xmm3, %xmm3 ; x4 6 10 *xmm3 * movaps %xmm2, %xmm5 ; x3 9 10 * movaps %xmm2, %xmm6 ; x3 9 10 * mulps %xmm2, %xmm4 ; x5 9 13 *xmm4 * movaps %xmm3, %xmm7 ; x4 10 11 * movaps %xmm3, %xmm8 ; x4 10 11 * movaps %xmm3, %xmm9 ; x4 10 11 * mulps %xmm5, %xmm5 ; x6 10 14 *xmm5 * mulps %xmm3, %xmm6 ; x7 11 15 *xmm6 * mulps %xmm7, %xmm7 ; x8 12 16 *xmm7 * movaps %xmm4, %xmm10 ; x5 13 14 * mulps %xmm4, %xmm8 ; x9 13 17 *xmm8 * mulps %xmm5, %xmm10 ; x11 14 18 *xmm10 * mulps %xmm5, %xmm9 ; x10 15 19 *xmm9 * mulps , %xmm10 ; y0 18 22 * mulps , %xmm9 ; y1 19 23 * mulps , %xmm8 ; y2 20 24 * mulps , %xmm7 ; y3 21 25 * addps %xmm10, %xmm9 ; y 23 26 * addps %xmm9, %xmm8 ; y 26 29 * addps %xmm8, %xmm7 ; y 29 32 */ const V x3 = x2 * x; // 4 → 8 const V x4 = x2 * x2; // 5 → 9 const V x5 = x2 * x3; // 8 → 12 const V x6 = x3 * x3; // 9 → 13 const V x7 = x4 * x3; // const V x8 = x4 * x4; const V x9 = x5 * x4; const V x10 = x5 * x5; const V x11 = x5 * x6; // 13 → 17 V y = C::P(0) * x11 + C::P(1) * x10 + C::P(2) * x9 + C::P(3) * x8 + C::P(4) * x7 + C::P(5) * x6 + C::P(6) * x5 + C::P(7) * x4 + C::P(8) * x3; #else V y = C::P(0); Vc::Common::unrolled_loop([&](int i) { y = y * x + C::P(i); }); y *= x * x2; #endif switch (Base) { case BaseE: // ln(2) is split in two parts to increase precision (i.e. ln2_small + ln2_large = ln(2)) y += exponent * C::ln2_small(); y -= x2 * C::_1_2(); // [0, 0.25[ x += y; x += exponent * C::ln2_large(); break; case Base10: y += exponent * C::ln2_small(); y -= x2 * C::_1_2(); // [0, 0.25[ x += y; x += exponent * C::ln2_large(); x *= C::log10_e(); break; case Base2: { const V x_ = x; x *= C::log2_e(); y *= C::log2_e(); y -= x_ * x * C::_1_2(); // [0, 0.25[ x += y; x += exponent; break; } } } template static Vc_ALWAYS_INLINE void log_series(Vector &Vc_RESTRICT x, typename Vector::AsArg exponent) { typedef Vector V; typedef Detail::Const C; const V x2 = x * x; V y = C::P(0); V y2 = C::Q(0) + x; Vc::Common::unrolled_loop([&](int i) { y = y * x + C::P(i); y2 = y2 * x + C::Q(i); }); y2 = x / y2; y = y * x + C::P(5); y = x2 * y * y2; // TODO: refactor the following with the float implementation: switch (Base) { case BaseE: // ln(2) is split in two parts to increase precision (i.e. ln2_small + ln2_large = ln(2)) y += exponent * C::ln2_small(); y -= x2 * C::_1_2(); // [0, 0.25[ x += y; x += exponent * C::ln2_large(); break; case Base10: y += exponent * C::ln2_small(); y -= x2 * C::_1_2(); // [0, 0.25[ x += y; x += exponent * C::ln2_large(); x *= C::log10_e(); break; case Base2: { const V x_ = x; x *= C::log2_e(); y *= C::log2_e(); y -= x_ * x * C::_1_2(); // [0, 0.25[ x += y; x += exponent; break; } } } template > static inline Vector calc(V _x) { typedef typename V::Mask M; typedef Detail::Const C; V x(_x); const M invalidMask = x < V::Zero(); const M infinityMask = x == V::Zero(); const M denormal = x <= C::min(); x(denormal) *= V(Vc::Detail::doubleConstant<1, 0, 54>()); // 2²⁵ V exponent = Detail::exponent(x.data()); // = ⎣log₂(x)⎦ exponent(denormal) -= 54; x.setZero(C::exponentMask()); // keep only the fractional part ⇒ x ∈ [1, 2[ x = Detail::operator|(x, C::_1_2()); // and set the exponent to 2⁻¹ ⇒ x ∈ [½, 1[ // split calculation in two cases: // A: x ∈ [½, √½[ // B: x ∈ [√½, 1[ // √½ defines the point where Δe(x) := log₂(x) - ⎣log₂(x)⎦ = ½, i.e. // log₂(√½) - ⎣log₂(√½)⎦ = ½ * -1 - ⎣½ * -1⎦ = -½ + 1 = ½ const M smallX = x < C::_1_sqrt2(); x(smallX) += x; // => x ∈ [√½, 1[ ∪ [1.5, 1 + √½[ x -= V::One(); // => x ∈ [√½ - 1, 0[ ∪ [0.5, √½[ exponent(!smallX) += V::One(); log_series(x, exponent); // A: (ˣ⁄₂ᵉ - 1, e) B: (ˣ⁄₂ᵉ⁺¹ - 1, e + 1) x.setQnan(invalidMask); // x < 0 → NaN x(infinityMask) = C::neginf(); // x = 0 → -∞ return x; } }; } // namespace Detail template Vc_INTRINSIC Vc_CONST Vector> log( const Vector &x) { return Detail::LogImpl::calc(x); } template Vc_INTRINSIC Vc_CONST Vector> log10( const Vector &x) { return Detail::LogImpl::calc(x); } template Vc_INTRINSIC Vc_CONST Vector> log2( const Vector &x) { return Detail::LogImpl::calc(x); } #endif // Vc_COMMON_MATH_H_INTERNAL conky-1.22.1/3rdparty/Vc/Vc/common/macros.h000066400000000000000000000316271476554302100203550ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2010-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_MACROS_H_ #define VC_COMMON_MACROS_H_ #include "../global.h" #ifdef Vc_MSVC #define Vc_ALIGNED_TYPEDEF(n_, type_, new_type_) \ typedef __declspec(align(n_)) type_ new_type_ #elif __GNUC__ #define Vc_ALIGNED_TYPEDEF(n_, type_, new_type_) \ typedef type_ new_type_[[gnu::aligned(n_)]] #else // the following is actually ill-formed according to C++1[14] #define Vc_ALIGNED_TYPEDEF(n_, type_, new_type_) \ using new_type_ alignas(sizeof(n_)) = type_ #endif // On Windows (WIN32) we might see macros called min and max. Just undefine them and hope // noone (re)defines them (NOMINMAX should help). #ifdef WIN32 #define NOMINMAX 1 #if defined min #undef min #endif #if defined max #undef max #endif #endif // WIN32 #if defined Vc_GCC && Vc_GCC >= 0x60000 // GCC 6 drops all attributes on types passed as template arguments. This is important // if a may_alias gets lost and therefore needs to be readded in the implementation of // the class template. #define Vc_TEMPLATES_DROP_ATTRIBUTES 1 #endif #if defined Vc_CLANG || defined Vc_APPLECLANG # define Vc_UNREACHABLE __builtin_unreachable # define Vc_NEVER_INLINE [[gnu::noinline]] # define Vc_INTRINSIC_L inline # define Vc_INTRINSIC_R __attribute__((always_inline)) # define Vc_INTRINSIC Vc_INTRINSIC_L Vc_INTRINSIC_R # define Vc_FLATTEN # define Vc_CONST __attribute__((const)) # define Vc_CONST_L # define Vc_CONST_R Vc_CONST # define Vc_PURE __attribute__((pure)) # define Vc_PURE_L # define Vc_PURE_R Vc_PURE # define Vc_MAY_ALIAS __attribute__((may_alias)) # define Vc_ALWAYS_INLINE_L inline # define Vc_ALWAYS_INLINE_R __attribute__((always_inline)) # define Vc_ALWAYS_INLINE Vc_ALWAYS_INLINE_L Vc_ALWAYS_INLINE_R # define Vc_IS_UNLIKELY(x) __builtin_expect(x, 0) # define Vc_IS_LIKELY(x) __builtin_expect(x, 1) # define Vc_RESTRICT __restrict__ # define Vc_DEPRECATED(msg) # define Vc_DEPRECATED_ALIAS(msg) # define Vc_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) #elif defined(__GNUC__) # define Vc_UNREACHABLE __builtin_unreachable # if defined Vc_GCC && !defined __OPTIMIZE__ # define Vc_MAY_ALIAS # else # define Vc_MAY_ALIAS __attribute__((__may_alias__)) # endif # define Vc_INTRINSIC_R __attribute__((__always_inline__, __artificial__)) # define Vc_INTRINSIC_L inline # define Vc_INTRINSIC Vc_INTRINSIC_L Vc_INTRINSIC_R # define Vc_FLATTEN __attribute__((__flatten__)) # define Vc_ALWAYS_INLINE_L inline # define Vc_ALWAYS_INLINE_R __attribute__((__always_inline__)) # define Vc_ALWAYS_INLINE Vc_ALWAYS_INLINE_L Vc_ALWAYS_INLINE_R # ifdef Vc_ICC // ICC miscompiles if there are functions marked as pure or const # define Vc_PURE # define Vc_CONST # define Vc_NEVER_INLINE # else # define Vc_NEVER_INLINE [[gnu::noinline]] # define Vc_PURE __attribute__((__pure__)) # define Vc_CONST __attribute__((__const__)) # endif # define Vc_CONST_L # define Vc_CONST_R Vc_CONST # define Vc_PURE_L # define Vc_PURE_R Vc_PURE # define Vc_IS_UNLIKELY(x) __builtin_expect(x, 0) # define Vc_IS_LIKELY(x) __builtin_expect(x, 1) # define Vc_RESTRICT __restrict__ # ifdef Vc_ICC # define Vc_DEPRECATED(msg) # define Vc_DEPRECATED_ALIAS(msg) # else # define Vc_DEPRECATED(msg) __attribute__((__deprecated__(msg))) # define Vc_DEPRECATED_ALIAS(msg) __attribute__((__deprecated__(msg))) # endif # define Vc_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) #else # define Vc_NEVER_INLINE # define Vc_FLATTEN # ifdef Vc_PURE # undef Vc_PURE # endif # define Vc_MAY_ALIAS # ifdef Vc_MSVC # define Vc_ALWAYS_INLINE inline __forceinline # define Vc_ALWAYS_INLINE_L Vc_ALWAYS_INLINE # define Vc_ALWAYS_INLINE_R # define Vc_CONST __declspec(noalias) # define Vc_CONST_L Vc_CONST # define Vc_CONST_R # define Vc_PURE /*Vc_CONST*/ # define Vc_PURE_L Vc_PURE # define Vc_PURE_R # define Vc_INTRINSIC inline __forceinline # define Vc_INTRINSIC_L Vc_INTRINSIC # define Vc_INTRINSIC_R namespace Vc_VERSIONED_NAMESPACE { namespace detail { static Vc_INTRINSIC void unreachable() { __assume(0); } } // namespace detail } # define Vc_UNREACHABLE Vc::detail::unreachable # else # define Vc_ALWAYS_INLINE # define Vc_ALWAYS_INLINE_L # define Vc_ALWAYS_INLINE_R # define Vc_CONST # define Vc_CONST_L # define Vc_CONST_R # define Vc_PURE # define Vc_PURE_L # define Vc_PURE_R # define Vc_INTRINSIC # define Vc_INTRINSIC_L # define Vc_INTRINSIC_R # define Vc_UNREACHABLE std::abort # endif # define Vc_IS_UNLIKELY(x) x # define Vc_IS_LIKELY(x) x # define Vc_RESTRICT __restrict # define Vc_DEPRECATED(msg) __declspec(deprecated(msg)) # define Vc_DEPRECATED_ALIAS(msg) # define Vc_WARN_UNUSED_RESULT #endif #ifdef Vc_CXX14 #undef Vc_DEPRECATED #define Vc_DEPRECATED(msg_) [[deprecated(msg_)]] #endif #define Vc_NOTHING_EXPECTING_SEMICOLON static_assert(true, "") #define Vc_FREE_STORE_OPERATORS_ALIGNED(align_) \ /**\name new/delete overloads for correct alignment */ \ /**@{*/ \ /*!\brief Allocates correctly aligned memory */ \ Vc_ALWAYS_INLINE void *operator new(size_t size) \ { \ return Vc::Common::aligned_malloc(size); \ } \ /*!\brief Returns \p p. */ \ Vc_ALWAYS_INLINE void *operator new(size_t, void *p) { return p; } \ /*!\brief Allocates correctly aligned memory */ \ Vc_ALWAYS_INLINE void *operator new[](size_t size) \ { \ return Vc::Common::aligned_malloc(size); \ } \ /*!\brief Returns \p p. */ \ Vc_ALWAYS_INLINE void *operator new[](size_t, void *p) { return p; } \ /*!\brief Frees aligned memory. */ \ Vc_ALWAYS_INLINE void operator delete(void *ptr, size_t) { Vc::Common::free(ptr); } \ /*!\brief Does nothing. */ \ Vc_ALWAYS_INLINE void operator delete(void *, void *) {} \ /*!\brief Frees aligned memory. */ \ Vc_ALWAYS_INLINE void operator delete[](void *ptr, size_t) \ { \ Vc::Common::free(ptr); \ } \ /*!\brief Does nothing. */ \ Vc_ALWAYS_INLINE void operator delete[](void *, void *) {} \ /**@}*/ \ Vc_NOTHING_EXPECTING_SEMICOLON #ifdef Vc_ASSERT #define Vc_EXTERNAL_ASSERT 1 #else #ifdef NDEBUG #define Vc_ASSERT(x) #else #include #define Vc_ASSERT(x) assert(x); #endif #endif #if defined Vc_CLANG || defined Vc_APPLECLANG #define Vc_HAS_BUILTIN(x) __has_builtin(x) #else #define Vc_HAS_BUILTIN(x) 0 #endif #define Vc_CAT_HELPER_(a, b, c, d) a##b##c##d #define Vc_CAT(a, b, c, d) Vc_CAT_HELPER_(a, b, c, d) #define Vc_CAT_IMPL(a, b) a##b #define Vc_CAT2(a, b) Vc_CAT_IMPL(a, b) #define Vc_APPLY_IMPL_1_(macro, a, b, c, d, e) macro(a) #define Vc_APPLY_IMPL_2_(macro, a, b, c, d, e) macro(a, b) #define Vc_APPLY_IMPL_3_(macro, a, b, c, d, e) macro(a, b, c) #define Vc_APPLY_IMPL_4_(macro, a, b, c, d, e) macro(a, b, c, d) #define Vc_APPLY_IMPL_5_(macro, a, b, c, d, e) macro(a, b, c, d, e) #define Vc_LIST_FLOAT_VECTOR_TYPES(size, macro, a, b, c, d) \ size(macro, double_v, a, b, c, d) \ size(macro, float_v, a, b, c, d) #define Vc_LIST_INT_VECTOR_TYPES(size, macro, a, b, c, d) \ size(macro, int_v, a, b, c, d) \ size(macro, uint_v, a, b, c, d) \ size(macro, short_v, a, b, c, d) \ size(macro, ushort_v, a, b, c, d) #define Vc_LIST_VECTOR_TYPES(size, macro, a, b, c, d) \ Vc_LIST_FLOAT_VECTOR_TYPES(size, macro, a, b, c, d) \ Vc_LIST_INT_VECTOR_TYPES(size, macro, a, b, c, d) #define Vc_LIST_COMPARES(size, macro, a, b, c, d) \ size(macro, ==, a, b, c, d) \ size(macro, !=, a, b, c, d) \ size(macro, <=, a, b, c, d) \ size(macro, >=, a, b, c, d) \ size(macro, < , a, b, c, d) \ size(macro, > , a, b, c, d) #define Vc_LIST_LOGICAL(size, macro, a, b, c, d) \ size(macro, &&, a, b, c, d) \ size(macro, ||, a, b, c, d) #define Vc_LIST_BINARY(size, macro, a, b, c, d) \ size(macro, |, a, b, c, d) \ size(macro, &, a, b, c, d) \ size(macro, ^, a, b, c, d) #define Vc_LIST_SHIFTS(size, macro, a, b, c, d) \ size(macro, <<, a, b, c, d) \ size(macro, >>, a, b, c, d) #define Vc_LIST_ARITHMETICS(size, macro, a, b, c, d) \ size(macro, +, a, b, c, d) \ size(macro, -, a, b, c, d) \ size(macro, *, a, b, c, d) \ size(macro, /, a, b, c, d) \ size(macro, %, a, b, c, d) #define Vc_APPLY_0(_list, macro) _list(Vc_APPLY_IMPL_1_, macro, 0, 0, 0, 0) Vc_NOTHING_EXPECTING_SEMICOLON #define Vc_APPLY_1(_list, macro, a) _list(Vc_APPLY_IMPL_2_, macro, a, 0, 0, 0) Vc_NOTHING_EXPECTING_SEMICOLON #define Vc_APPLY_2(_list, macro, a, b) _list(Vc_APPLY_IMPL_3_, macro, a, b, 0, 0) Vc_NOTHING_EXPECTING_SEMICOLON #define Vc_APPLY_3(_list, macro, a, b, c) _list(Vc_APPLY_IMPL_4_, macro, a, b, c, 0) Vc_NOTHING_EXPECTING_SEMICOLON #define Vc_APPLY_4(_list, macro, a, b, c, d) _list(Vc_APPLY_IMPL_5_, macro, a, b, c, d) Vc_NOTHING_EXPECTING_SEMICOLON #define Vc_ALL_COMPARES(macro) Vc_APPLY_0(Vc_LIST_COMPARES, macro) #define Vc_ALL_LOGICAL(macro) Vc_APPLY_0(Vc_LIST_LOGICAL, macro) #define Vc_ALL_BINARY(macro) Vc_APPLY_0(Vc_LIST_BINARY, macro) #define Vc_ALL_SHIFTS(macro) Vc_APPLY_0(Vc_LIST_SHIFTS, macro) #define Vc_ALL_ARITHMETICS(macro) Vc_APPLY_0(Vc_LIST_ARITHMETICS, macro) #define Vc_ALL_FLOAT_VECTOR_TYPES(macro) Vc_APPLY_0(Vc_LIST_FLOAT_VECTOR_TYPES, macro) #define Vc_ALL_VECTOR_TYPES(macro) Vc_APPLY_0(Vc_LIST_VECTOR_TYPES, macro) #define Vc_EXACT_TYPE(_test, _reference, _type) \ typename std::enable_if::value, _type>::type #define Vc_make_unique(name) Vc_CAT(Vc_,name,_,__LINE__) #if defined(Vc_NO_NOEXCEPT) #define Vc_NOEXCEPT throw() #else #define Vc_NOEXCEPT noexcept #endif #ifdef Vc_NO_ALWAYS_INLINE #undef Vc_ALWAYS_INLINE #undef Vc_ALWAYS_INLINE_L #undef Vc_ALWAYS_INLINE_R #define Vc_ALWAYS_INLINE inline #define Vc_ALWAYS_INLINE_L inline #define Vc_ALWAYS_INLINE_R #undef Vc_INTRINSIC #undef Vc_INTRINSIC_L #undef Vc_INTRINSIC_R #define Vc_INTRINSIC inline #define Vc_INTRINSIC_L inline #define Vc_INTRINSIC_R #endif #endif // VC_COMMON_MACROS_H_ conky-1.22.1/3rdparty/Vc/Vc/common/makeContainer.h000066400000000000000000000152551476554302100216500ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2013-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_MAKECONTAINER_H_ #define VC_COMMON_MAKECONTAINER_H_ #include "../vector.h" #include namespace Vc_VERSIONED_NAMESPACE { namespace { template struct make_container_helper { static constexpr Container help(std::initializer_list list) { return { list }; } }; template class Container> struct make_container_helper, Alloc>, typename Vector::EntryType> { typedef Vector V; typedef typename V::EntryType T; typedef Container C; static inline C help(std::initializer_list list) { const std::size_t size = (list.size() + (V::Size - 1)) / V::Size; C v(size); auto containerIt = v.begin(); auto init = std::begin(list); const auto initEnd = std::end(list); for (std::size_t i = 0; i < size - 1; ++i) { *containerIt++ = V(init, Vc::Unaligned); init += V::Size; } Vc_ASSERT(all_of(*containerIt == V::Zero())); int j = 0; while (init != initEnd) { (*containerIt)[j++] = *init++; } return v; } }; template class Container> struct make_container_helper, N>, typename Vector::EntryType> { typedef Vector V; typedef typename V::EntryType T; static constexpr std::size_t size = (N + (V::Size - 1)) / V::Size; typedef Container< V, #if defined Vc_CLANG && Vc_CLANG < 0x30700 // TODO: when did Vc_APPLECLANG fix it? // clang before 3.7.0 has a bug when returning std::array<__m256x, 1>. So // increase it to std::array<__m256x, 2> and fill it with zeros. Better // than returning garbage. (size == 1 && std::is_same::value) ? 2 : #endif size> C; static inline C help(std::initializer_list list) { Vc_ASSERT(N == list.size()) Vc_ASSERT(size == (list.size() + (V::Size - 1)) / V::Size) C v; auto containerIt = v.begin(); auto init = std::begin(list); const auto initEnd = std::end(list); for (std::size_t i = 0; i < size - 1; ++i) { *containerIt++ = V(init, Vc::Unaligned); init += V::Size; } Vc_ASSERT(all_of(*containerIt == V::Zero())); int j = 0; while (init != initEnd) { (*containerIt)[j++] = *init++; } return v; } }; } // anonymous namespace /** * \ingroup Containers * \headerfile makeContainer.h * * Construct a container of Vc vectors from a std::initializer_list of scalar entries. * * \tparam Container The container type to construct. * \tparam T The scalar type to use for the initializer_list. * * \param list An initializer list of arbitrary size. The type of the entries is important! * If you pass a list of integers you will get a container filled with Vc::int_v objects. * If, instead, you want to have a container of Vc::float_v objects, be sure the include a * period (.) and the 'f' postfix in the literals. Alternatively, you can pass the * type as second template argument to makeContainer. * * \return Returns a container of the requested class filled with the minimum number of SIMD * vectors to hold the values in the initializer list. * If the number of values in \p list does not match the number of values in the * returned container object, the remaining values in the returned object will be * zero-initialized. * * Example: * \code * auto data = Vc::makeContainer>({ 1.f, 2.f, 3.f, 4.f, 5.f }); * // data.size() == 5 if float_v::Size == 1 (i.e. Vc_IMPL=Scalar) * // data.size() == 2 if float_v::Size == 4 (i.e. Vc_IMPL=SSE) * // data.size() == 1 if float_v::Size == 8 (i.e. Vc_IMPL=AVX) * \endcode */ template constexpr auto makeContainer(std::initializer_list list) -> decltype(make_container_helper::help(list)) { return make_container_helper::help(list); } template constexpr auto make_container(std::initializer_list list) -> decltype(makeContainer(list)) { return makeContainer(list); } } // namespace Vc #endif // VC_COMMON_MAKECONTAINER_H_ conky-1.22.1/3rdparty/Vc/Vc/common/make_unique.h000066400000000000000000000042061476554302100213650ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2013-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_MAKE_UNIQUE_H_ #define VC_COMMON_MAKE_UNIQUE_H_ #include #include "malloc.h" namespace Vc_VERSIONED_NAMESPACE { namespace Common { template struct Deleter { Vc_ALWAYS_INLINE void operator()(T *ptr) { ptr->~T(); Vc::free(ptr); } }; template inline std::unique_ptr> make_unique(Args&&... args) { return std::unique_ptr>(new(Vc::malloc(1)) T(std::forward(args)...)); } } // namespace Common } // namespace Vc #endif // VC_COMMON_MAKE_UNIQUE_H_ conky-1.22.1/3rdparty/Vc/Vc/common/malloc.h000066400000000000000000000130611476554302100203300ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2013-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_MALLOC_H_ #define VC_COMMON_MALLOC_H_ #ifndef Vc_VECTOR_DECLARED_ #error "Incorrect inclusion order. This header must be included from Vc/vector.h only." #endif #if defined _WIN32 || defined _WIN64 #include #else #include #endif #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace Common { template static constexpr size_t nextMultipleOf(size_t value) { return (value % X) > 0 ? value + X - (value % X) : value; } template Vc_INTRINSIC void *aligned_malloc(std::size_t n) { #ifdef __MIC__ return _mm_malloc(nextMultipleOf(n), alignment); #elif defined(_WIN32) # ifdef __GNUC__ return __mingw_aligned_malloc(nextMultipleOf(n), alignment); # else return _aligned_malloc(nextMultipleOf(n), alignment); # endif #else void *ptr = nullptr; if (0 == posix_memalign(&ptr, alignment < sizeof(void *) ? sizeof(void *) : alignment, nextMultipleOf(n))) { return ptr; } return ptr; #endif } template Vc_ALWAYS_INLINE void *malloc(size_t n) { switch (A) { case Vc::AlignOnVector: return aligned_malloc(n); case Vc::AlignOnCacheline: // TODO: hardcoding 64 is not such a great idea return aligned_malloc<64>(n); case Vc::AlignOnPage: // TODO: hardcoding 4096 is not such a great idea return aligned_malloc<4096>(n); } return nullptr; } Vc_ALWAYS_INLINE void free(void *p) { #ifdef __MIC__ _mm_free(p); #elif defined(_WIN32) # ifdef __GNUC__ return __mingw_aligned_free(p); # else return _aligned_free(p); # endif #else std::free(p); #endif } } // namespace Common /** * Allocates memory on the Heap with alignment and padding suitable for vectorized access. * * Memory that was allocated with this function must be released with Vc::free! Other methods might * work but are not portable. * * \param n Specifies the number of objects the allocated memory must be able to store. * \tparam T The type of the allocated memory. Note, that the constructor is not called. * \tparam A Determines the alignment of the memory. See \ref Vc::MallocAlignment. * * \return Pointer to memory of the requested type, or 0 on error. The allocated memory is padded at * the end to be a multiple of the requested alignment \p A. Thus if you request memory for 21 * int objects, aligned via Vc::AlignOnCacheline, you can safely read a full cacheline until the * end of the array, without generating an out-of-bounds access. For a cacheline size of 64 Bytes * and an int size of 4 Bytes you would thus get an array of 128 Bytes to work with. * * \warning * \li The standard malloc function specifies the number of Bytes to allocate whereas this * function specifies the number of values, thus differing in a factor of sizeof(T). * \li This function is mainly meant for use with builtin types. If you use a custom * type with a sizeof that is not a multiple of 2 the results might not be what you expect. * \li The constructor of T is not called. You can make up for this: * \code * SomeType *array = new(Vc::malloc(N)) SomeType[N]; * \endcode * * \see Vc::free * * \ingroup Utilities * \headerfile memory.h */ template Vc_ALWAYS_INLINE T *malloc(size_t n) { return static_cast(Common::malloc(n * sizeof(T))); } /** * Frees memory that was allocated with Vc::malloc. * * \param p The pointer to the memory to be freed. * * \tparam T The type of the allocated memory. * * \warning The destructor of T is not called. If needed, you can call the destructor before calling * free: * \code * for (int i = 0; i < N; ++i) { * p[i].~T(); * } * Vc::free(p); * \endcode * * \ingroup Utilities * \headerfile memory.h * * \see Vc::malloc */ template Vc_ALWAYS_INLINE void free(T *p) { Common::free(p); } } // namespace Vc #endif // VC_COMMON_MALLOC_H_ conky-1.22.1/3rdparty/Vc/Vc/common/mask.h000066400000000000000000000360341476554302100200210ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_MASK_H_ #define VC_COMMON_MASK_H_ #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { /** * \class Mask mask.h * \ingroup Masks * * The main SIMD mask class. */ template > class Mask { public: /** * Returns the number of boolean components (\VSize{T}) in a mask of this type. * * The size of the mask. I.e. the number of boolean entries in the mask. Do not * make any assumptions about the size of masks. * * In addition, you can easily use if clauses that compare sizes. The compiler can * statically evaluate and fully optimize dead code away (very much like \#ifdef, but * with syntax checking). * * \returns The number of components (i.e. \VSize{T}) objects of this mask type store * and manipulate. */ static constexpr size_t size() { return VectorTraits::size(); } ///\copydoc size ///\deprecated Use Vc::Mask::size instead. static constexpr size_t Size = VectorTraits::size(); /** * Specifies the alignment requirement for aligned load and store calls for objects of * this mask type. */ static constexpr size_t MemoryAlignment = VectorTraits::maskMemoryAlignment(); /// The ABI tag type of the current template instantiation. using abi = Abi; /** * The \c EntryType of masks is always \c bool, independent of \c T. */ using EntryType = bool; /// \copydoc EntryType using value_type = EntryType; /// The reference wrapper type used for accessing individual mask components. using EntryReference = typename VectorTraits::EntryReference; /// \copydoc EntryReference using value_reference = EntryReference; /** * The \c VectorEntryType, in contrast to \c EntryType, reveals information about the SIMD * implementation. * This type is useful for the \c sizeof operator in generic functions. */ using VectorEntryType = typename VectorTraits::VectorEntryType; /**\internal * The \c VectorType reveals the implementation-specific internal type used for the SIMD type. */ using VectorType = typename VectorTraits::VectorType; /**\internal * \copydoc VectorType */ using vector_type = VectorType; /* * The associated Vector type. */ //using Vector = Vector; /// \name Generators ///@{ /** * Creates a new mask object initialized to zero/\c false. * * \returns A mask object with zero-initialized components. */ Vc_INTRINSIC static Mask Zero(); /** * Creates a mask object initialized to one/\c true. * * \returns A mask object with components initialized to \c true. */ Vc_INTRINSIC static Mask One(); /// Generate a mask object from booleans returned from the function \p gen. template static Vc_INTRINSIC Mask generate(G &&gen); ///@} /// \name Compile-Time Constant Initialization ///@{ /** * Construct a zero-initialized vector object. * * This constructor follows the behavior of the underlying \c bool type in that the * expression `bool()` zero-initializes the object (to \c false). On the other hand * the variable \c x in `bool x;` is uninitialized. * Since, for class types, both expressions call the default constructor `Mask x` * must zero-initialize \c x as well. */ Vc_INTRINSIC Mask() = default; /// Zero-initialize the new mask object (\c false). /// \see Vc::Zero, Zero() Vc_INTRINSIC explicit Mask(VectorSpecialInitializerZero); /// Initialize the new mask object to one (\c true). /// \see Vc::One, One() Vc_INTRINSIC explicit Mask(VectorSpecialInitializerOne); ///@} /// \name Conversion/Broadcast Constructors ///@{ /** * Broadcast constructor. * * Set all components of the new mask object to \p b. * * \param b Determines the initial state of the mask. */ Vc_INTRINSIC explicit Mask(bool b); /** * Implicit conversion from a compatible (equal \VSize{T} on every platform) mask * object. * * \param otherMask The mask to be converted. */ template Vc_INTRINSIC Mask(U &&otherMask, Common::enable_if_mask_converts_implicitly = nullarg); #if Vc_IS_VERSION_1 /** * Explicit conversion (static_cast) from a mask object that potentially has a * different \VSize{T}. * * \param otherMask The mask to be converted. * * \internal This is implemented via simd_cast in scalar/simd_cast_caller.h */ template Vc_DEPRECATED( "use simd_cast instead of explicit type casting to convert between mask types") Vc_INTRINSIC_L explicit Mask(U &&otherMask, Common::enable_if_mask_converts_explicitly = nullarg) Vc_INTRINSIC_R; ///@} #endif /** * \name Loads & Stores */ ///@{ /** * Load constructor from an array of \c bool. * * This constructor implements an explicit conversion from an array of booleans to a * mask object. It corresponds to a Vector load constructor. * * \param mem A pointer to the start of the array of booleans. * \see Mask(const bool *, Flags), load(const bool *) */ Vc_ALWAYS_INLINE explicit Mask(const bool *mem); /** * Overload of the above with a load/store flag argument. * * \param mem A pointer to the start of the array of booleans. * \param flags Choose a combination of flags such as Vc::Aligned, Vc::Streaming, * Vc::Unaligned, Vc::PrefetchDefault, ... * \see load(const bool *, Flags) */ template Vc_ALWAYS_INLINE explicit Mask(const bool *mem, Flags flags); /** * Load the components of the mask from an array of \c bool. * * \param mem A pointer to the start of the array of booleans. * \see load(const bool *, Flags), Mask(const bool *) */ Vc_ALWAYS_INLINE void load(const bool *mem); /** * Overload of the above with a load/store flag argument. * * \param mem A pointer to the start of the array of booleans. * \param flags Choose a combination of flags such as Vc::Aligned, Vc::Streaming, * Vc::Unaligned, Vc::PrefetchDefault, ... * \see Mask(const bool *, Flags) */ template Vc_ALWAYS_INLINE void load(const bool *mem, Flags flags); /** * Store the values of the mask to an array of \c bool. * * \param mem A pointer to the start of the array of booleans. * \see store(bool *, Flags) */ Vc_ALWAYS_INLINE void store(bool *mem) const; /** * Overload of the above with a load/store flag argument. * * \param mem A pointer to the start of the array of booleans. * \param flags Choose a combination of flags such as Vc::Aligned, Vc::Streaming, * Vc::Unaligned, Vc::PrefetchDefault, ... */ template Vc_ALWAYS_INLINE void store(bool *mem, Flags flags) const; ///@} /// \name Comparison Operators ///@{ /** * Returns whether the two masks are equal in all components. * * \param mask The other mask to compare against. * \returns A scalar boolean value that says whether all components of the two masks * are equal. * * \note If you expected a behavior similar to the compare operator of Vc::Vector, * consider that the bitwise operators already implement such functionality. There is * little use, typically, in having `a == b` return the same as `a ^ b`. In general, * it is more useful to query `all_of(a ^ b)` which is the same as this equality * operator. */ Vc_ALWAYS_INLINE bool operator==(const Mask &mask) const; /** * Returns whether the two masks are different in at least one component. * * \param mask The other mask to compare against. * \returns A scalar boolean value that says whether at least one component of the two masks is different. * * \note `(a == b) == !(a != b)` holds * \see Mask::operator==(const Mask &) */ Vc_ALWAYS_INLINE bool operator!=(const Mask &mask) const; ///@} /** * \name Logical and Binary Operators * * \brief Component-wise logical/binary operations on mask objects. * * The effect of logical and binary \c AND and \c OR is equivalent for mask types (as * it is for \c bool). */ ///@{ /// Returns the component-wise application of a logical \c AND to \p mask. Vc_ALWAYS_INLINE Mask operator&&(const Mask &mask) const; /// Returns the component-wise application of a binary \c AND to \p mask. Vc_ALWAYS_INLINE Mask operator&(const Mask &mask) const; /// Returns the component-wise application of a logical \c OR to \p mask. Vc_ALWAYS_INLINE Mask operator||(const Mask &mask) const; /// Returns the component-wise application of a binary \c OR to \p mask. Vc_ALWAYS_INLINE Mask operator|(const Mask &mask) const; /// Returns the component-wise application of a binary \c XOR to \p mask. Vc_ALWAYS_INLINE Mask operator^(const Mask &mask) const; /// Returns a mask with inverted components. Vc_ALWAYS_INLINE Mask operator!() const; /// Modifies the mask using an \c AND operation with \p mask. Vc_ALWAYS_INLINE Mask &operator&=(const Mask &mask); /// Modifies the mask using an \c OR operation with \p mask. Vc_ALWAYS_INLINE Mask &operator|=(const Mask &mask); /// Modifies the mask using an \c XOR operation with \p mask. Vc_ALWAYS_INLINE Mask &operator^=(const Mask &mask); ///@} /** * \name Reductions * * \see any_of, all_of, none_of, some_of */ ///@{ /// Returns a logical \c AND of all components. Vc_ALWAYS_INLINE bool isFull() const; /// Returns a logical \c OR of all components. Vc_ALWAYS_INLINE bool isNotEmpty() const; /// Returns \c true if components are \c false, \c false otherwise. Vc_ALWAYS_INLINE bool isEmpty() const; /// Returns `!isFull() && !isEmpty()`. Vc_ALWAYS_INLINE bool isMix() const; ///@} /**\internal * \name Internal Data Access */ ///@{ Vc_ALWAYS_INLINE bool data() const; Vc_ALWAYS_INLINE bool dataI() const; Vc_ALWAYS_INLINE bool dataD() const; ///@} /// \name Scalar Subscript Operators ///@{ /** * Lvalue-reference-like access to mask entries. * * \param index Determines the boolean to be accessed. * \return a temporary proxy object referencing the \p index th entry of the mask. * * \warning This operator does not return an lvalue reference (to \c bool), but rather * a temporary (rvalue) object that mimics an lvalue reference (as much as is possible * with C++11/14). */ Vc_ALWAYS_INLINE EntryReference operator[](size_t index); /** * Read-only access to mask entries. * * \param index Determines the boolean to be accessed. * \return The \p index th entry of the mask as a \c bool (rvalue). * * \warning This operator does not return an lvalue reference (to `const bool`), but * rather a temporary (rvalue) \c bool. */ Vc_ALWAYS_INLINE EntryType operator[](size_t index) const; ///@} /// Returns how many components of the mask are \c true. Vc_ALWAYS_INLINE int count() const; /** * Returns the index of the first one in the mask. * * \returns the index of the first component that is \c true. * * \warning The return value is undefined if the mask is empty. * * Thus, unless `none_of(mask)`, `mask[mask.firstOne()] == true` holds and `mask[i] == * false` for all `i < mask.firstOne()`. */ Vc_ALWAYS_INLINE int firstOne() const; /** * Convert the boolean components of the mask into bits of an integer. * * \return An \c int where each bit corresponds to the boolean value in the mask. * * For example, the mask `[true, false, false, true]` results in a `9` (in binary: `1001`). */ Vc_ALWAYS_INLINE int toInt() const; /// Returns a mask with components shifted by \p amount places. Vc_INTRINSIC Vc_PURE Mask shifted(int amount) const; Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(Mask)); private: VectorType d; }; /** * \ingroup Utilities * * \name Boolean Reductions */ //@{ /** \ingroup Utilities * Returns whether all entries in the mask \p m are \c true. */ template constexpr bool all_of(const Mask &m) { return m.isFull(); } /** \ingroup Utilities * Returns \p b */ constexpr bool all_of(bool b) { return b; } /** \ingroup Utilities * Returns whether at least one entry in the mask \p m is \c true. */ template constexpr bool any_of(const Mask &m) { return m.isNotEmpty(); } /** \ingroup Utilities * Returns \p b */ constexpr bool any_of(bool b) { return b; } /** \ingroup Utilities * Returns whether all entries in the mask \p m are \c false. */ template constexpr bool none_of(const Mask &m) { return m.isEmpty(); } /** \ingroup Utilities * Returns \p !b */ constexpr bool none_of(bool b) { return !b; } /** \ingroup Utilities * Returns whether at least one entry in \p m is \c true and at least one entry in \p m is \c * false. */ template constexpr bool some_of(const Mask &m) { return m.isMix(); } /** \ingroup Utilities * Returns \c false */ constexpr bool some_of(bool) { return false; } //@} } // namespace Vc #endif // VC_COMMON_MASK_H_ // vim: foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/common/maskbool.h000066400000000000000000000077201476554302100206750ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2013-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_MASKENTRY_H_ #define VC_COMMON_MASKENTRY_H_ #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace Common { namespace { template struct MaskBoolStorage; // the following for typedefs must use std::intN_t and NOT! Vc::intN_t. The latter // segfaults ICC 15.0.3. template<> struct MaskBoolStorage<1> { typedef std::int8_t type; }; template<> struct MaskBoolStorage<2> { typedef std::int16_t type; }; template<> struct MaskBoolStorage<4> { typedef std::int32_t type; }; template<> struct MaskBoolStorage<8> { typedef std::int64_t type; }; } // anonymous namespace template class MaskBool { typedef typename MaskBoolStorage::type storage_type Vc_MAY_ALIAS; storage_type data; public: constexpr MaskBool(bool x) noexcept : data(x ? -1 : 0) {} Vc_ALWAYS_INLINE MaskBool &operator=(bool x) noexcept { data = x ? -1 : 0; return *this; } template ::value && std::is_fundamental::value)>> Vc_ALWAYS_INLINE MaskBool &operator=(T x) noexcept { data = reinterpret_cast(x); return *this; } Vc_ALWAYS_INLINE MaskBool(const MaskBool &) noexcept = default; Vc_ALWAYS_INLINE MaskBool &operator=(const MaskBool &) noexcept = default; template ::value || (std::is_fundamental::value && sizeof(storage_type) == sizeof(T)))>> constexpr operator T() const noexcept { return std::is_same::value ? T((data & 1) != 0) : aliasing_cast(data); } } Vc_MAY_ALIAS; template ::value &&std::is_convertible::value, int>::type = 0> constexpr bool operator==(A &&a, B &&b) { return static_cast(a) == static_cast(b); } template ::value &&std::is_convertible::value, int>::type = 0> constexpr bool operator!=(A &&a, B &&b) { return static_cast(a) != static_cast(b); } } // namespace Common } // namespace Vc #endif // VC_COMMON_MASKENTRY_H_ conky-1.22.1/3rdparty/Vc/Vc/common/math.h000066400000000000000000000132161476554302100200140ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2013-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_MATH_H_ #define VC_COMMON_MATH_H_ #define Vc_COMMON_MATH_H_INTERNAL 1 #include "trigonometric.h" #include "const.h" #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { // TODO, not vectorized: template SimdArray::size()> fpclassify(const Vector &x) { return SimdArray::size()>( [&](std::size_t i) { return std::fpclassify(x[i]); }); } template SimdArray fpclassify(const SimdArray &x) { return SimdArray([&](std::size_t i) { return std::fpclassify(x[i]); }); } #ifdef Vc_IMPL_SSE // for SSE, AVX, and AVX2 #include "logarithm.h" #include "exponential.h" #ifdef Vc_IMPL_AVX inline AVX::double_v exp(AVX::double_v _x) { AVX::Vector x = _x; typedef AVX::Vector V; typedef V::Mask M; typedef AVX::Const C; const M overflow = x > Vc::Detail::doubleConstant< 1, 0x0006232bdd7abcd2ull, 9>(); // max log const M underflow = x < Vc::Detail::doubleConstant<-1, 0x0006232bdd7abcd2ull, 9>(); // min log V px = floor(C::log2_e() * x + 0.5); __m128i tmp = _mm256_cvttpd_epi32(px.data()); const SimdArray n = SSE::int_v{tmp}; x -= px * C::ln2_large(); //Vc::Detail::doubleConstant<1, 0x00062e4000000000ull, -1>(); // ln2 x -= px * C::ln2_small(); //Vc::Detail::doubleConstant<1, 0x0007f7d1cf79abcaull, -20>(); // ln2 const double P[] = { Vc::Detail::doubleConstant<1, 0x000089cdd5e44be8ull, -13>(), Vc::Detail::doubleConstant<1, 0x000f06d10cca2c7eull, -6>(), Vc::Detail::doubleConstant<1, 0x0000000000000000ull, 0>() }; const double Q[] = { Vc::Detail::doubleConstant<1, 0x00092eb6bc365fa0ull, -19>(), Vc::Detail::doubleConstant<1, 0x0004ae39b508b6c0ull, -9>(), Vc::Detail::doubleConstant<1, 0x000d17099887e074ull, -3>(), Vc::Detail::doubleConstant<1, 0x0000000000000000ull, 1>() }; const V x2 = x * x; px = x * ((P[0] * x2 + P[1]) * x2 + P[2]); x = px / ((((Q[0] * x2 + Q[1]) * x2 + Q[2]) * x2 + Q[3]) - px); x = V::One() + 2.0 * x; x = ldexp(x, n); // == x * 2ⁿ x(overflow) = std::numeric_limits::infinity(); x.setZero(underflow); return x; } #endif // Vc_IMPL_AVX inline SSE::double_v exp(SSE::double_v::AsArg _x) { SSE::Vector x = _x; typedef SSE::Vector V; typedef V::Mask M; typedef SSE::Const C; const M overflow = x > Vc::Detail::doubleConstant< 1, 0x0006232bdd7abcd2ull, 9>(); // max log const M underflow = x < Vc::Detail::doubleConstant<-1, 0x0006232bdd7abcd2ull, 9>(); // min log V px = floor(C::log2_e() * x + 0.5); SimdArray n; _mm_storel_epi64(reinterpret_cast<__m128i *>(&n), _mm_cvttpd_epi32(px.data())); x -= px * C::ln2_large(); //Vc::Detail::doubleConstant<1, 0x00062e4000000000ull, -1>(); // ln2 x -= px * C::ln2_small(); //Vc::Detail::doubleConstant<1, 0x0007f7d1cf79abcaull, -20>(); // ln2 const double P[] = { Vc::Detail::doubleConstant<1, 0x000089cdd5e44be8ull, -13>(), Vc::Detail::doubleConstant<1, 0x000f06d10cca2c7eull, -6>(), Vc::Detail::doubleConstant<1, 0x0000000000000000ull, 0>() }; const double Q[] = { Vc::Detail::doubleConstant<1, 0x00092eb6bc365fa0ull, -19>(), Vc::Detail::doubleConstant<1, 0x0004ae39b508b6c0ull, -9>(), Vc::Detail::doubleConstant<1, 0x000d17099887e074ull, -3>(), Vc::Detail::doubleConstant<1, 0x0000000000000000ull, 1>() }; const V x2 = x * x; px = x * ((P[0] * x2 + P[1]) * x2 + P[2]); x = px / ((((Q[0] * x2 + Q[1]) * x2 + Q[2]) * x2 + Q[3]) - px); x = V::One() + 2.0 * x; x = ldexp(x, n); // == x * 2ⁿ x(overflow) = std::numeric_limits::infinity(); x.setZero(underflow); return x; } #endif } // namespace Vc #undef Vc_COMMON_MATH_H_INTERNAL #endif // VC_COMMON_MATH_H_ conky-1.22.1/3rdparty/Vc/Vc/common/memory.h000066400000000000000000000535711476554302100204030ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2009-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_MEMORY_H_ #define VC_COMMON_MEMORY_H_ #include "memorybase.h" #include #include #include #include #include #include "memoryfwd.h" #include "malloc.h" #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace Common { template struct _MemorySizeCalculation { enum AlignmentCalculations { Alignment = V::Size, AlignmentMask = Alignment - 1, MaskedSize = Size & AlignmentMask, Padding = Alignment - MaskedSize, PaddedSize = MaskedSize == 0 ? Size : Size + Padding }; }; /** * \ingroup Containers * \headerfile memory.h * * A helper class for fixed-size two-dimensional arrays. * * \param V The vector type you want to operate on. (e.g. float_v or uint_v) * \param Size1 Number of rows * \param Size2 Number of columns */ template class Memory : public MemoryBase, 2, Memory> { public: typedef typename V::EntryType EntryType; private: using RowMemory = Memory; typedef MemoryBase, 2, RowMemory> Base; friend class MemoryBase, 2, RowMemory>; friend class MemoryDimensionBase, 2, RowMemory>; enum : size_t { Alignment = V::MemoryAlignment, PaddedSize2 = _MemorySizeCalculation::PaddedSize }; alignas(static_cast(Alignment)) // GCC complains about 'is not an // integer constant' unless the // static_cast is present RowMemory m_mem[Size1]; public: using Base::vector; enum Constants { RowCount = Size1, VectorsCount = PaddedSize2 / V::Size }; Memory() = default; /** * \return the number of rows in the array. * * \note This function can be eliminated by an optimizing compiler. */ static constexpr size_t rowsCount() { return RowCount; } /** * \return the number of scalar entries in the whole array. * * \warning Do not use this function for scalar iteration over the array since there will be * padding between rows if \c Size2 is not divisible by \c V::Size. * * \note This function can be optimized into a compile-time constant. */ static constexpr size_t entriesCount() { return Size1 * Size2; } /** * \return the number of vectors in the whole array. * * \note This function can be optimized into a compile-time constant. */ static constexpr size_t vectorsCount() { return VectorsCount * Size1; } /** * Copies the data from a different object. * * \param rhs The object to copy the data from. * * \return reference to the modified Memory object. * * \note Both objects must have the exact same vectorsCount(). */ template Vc_ALWAYS_INLINE Memory &operator=(const MemoryBase &rhs) { assert(vectorsCount() == rhs.vectorsCount()); Detail::copyVectors(*this, rhs); return *this; } Vc_ALWAYS_INLINE Memory &operator=(const Memory &rhs) { Detail::copyVectors(*this, rhs); return *this; } /** * Initialize all data with the given vector. * * \param v This vector will be used to initialize the memory. * * \return reference to the modified Memory object. */ inline Memory &operator=(const V &v) { for (size_t i = 0; i < vectorsCount(); ++i) { vector(i) = v; } return *this; } }; /** * A helper class to simplify usage of correctly aligned and padded memory, allowing both vector and * scalar access. * * Example: * \code Vc::Memory array; // scalar access: for (size_t i = 0; i < array.entriesCount(); ++i) { int x = array[i]; // read array[i] = x; // write } // more explicit alternative: for (size_t i = 0; i < array.entriesCount(); ++i) { int x = array.scalar(i); // read array.scalar(i) = x; // write } // vector access: for (size_t i = 0; i < array.vectorsCount(); ++i) { int_v x = array.vector(i); // read array.vector(i) = x; // write } * \endcode * This code allocates a small array and implements three equivalent loops (that do nothing useful). * The loops show how scalar and vector read/write access is best implemented. * * Since the size of 11 is not a multiple of int_v::Size (unless you use the * scalar Vc implementation) the last write access of the vector loop would normally be out of * bounds. But the Memory class automatically pads the memory such that the whole array can be * accessed with correctly aligned memory addresses. * * \param V The vector type you want to operate on. (e.g. float_v or uint_v) * \param Size The number of entries of the scalar base type the memory should hold. This * is thus the same number as you would use for a normal C array (e.g. float mem[11] becomes * Memory mem). * * \see Memory * * \ingroup Containers * \headerfile memory.h */ template class Memory : public MemoryBase, 1, void> { public: typedef typename V::EntryType EntryType; private: typedef MemoryBase, 1, void> Base; friend class MemoryBase, 1, void>; friend class MemoryDimensionBase, 1, void>; enum : size_t { Alignment = V::MemoryAlignment, // in Bytes MaskedSize = Size & (V::Size - 1), // the fraction of Size that exceeds // an integral multiple of V::Size Padding = V::Size - MaskedSize, PaddedSize = MaskedSize == 0 ? Size : Size + Padding }; alignas(static_cast(Alignment)) // GCC complains about 'is not an // integer constant' unless the // static_cast is present EntryType m_mem[PaddedSize]; public: using Base::vector; enum Constants { EntriesCount = Size, VectorsCount = PaddedSize / V::Size }; Memory() { if (InitPadding) { Base::lastVector() = V::Zero(); } } Memory(std::initializer_list init) { Vc_ASSERT(init.size() <= Size); Base::lastVector() = V::Zero(); std::copy(init.begin(), init.end(), &m_mem[0]); } /** * Wrap existing data with the Memory convenience class. * * This function returns a \em reference to a Memory object that you must * capture to avoid a copy of the whole data: * \code * Memory &m = Memory::fromRawData(someAlignedPointerToFloat) * \endcode * * \param ptr An aligned pointer to memory of type \p V::EntryType (e.g. \c float for * Vc::float_v). * \return A Memory object placed at the given location in memory. * * \warning The pointer \p ptr passed to this function must be aligned according to the * alignment restrictions of \p V. * \warning The size of the accessible memory must match \p Size. This includes the * required padding at the end to allow the last entries to be accessed via vectors. If * you know what you are doing you might violate this constraint. * \warning It is your responsibility to ensure that the memory is released correctly * (not too early/not leaked). This function simply adds convenience functions to \em * access the memory. */ static Vc_ALWAYS_INLINE Vc_CONST Memory &fromRawData(EntryType *ptr) { // DANGER! This placement new has to use the right address. If the compiler decides // RowMemory requires padding before the actual data then the address has to be adjusted // accordingly char *addr = reinterpret_cast(ptr); typedef Memory MM; addr -= offsetof(MM, m_mem); return *new(addr) MM; } /** * \return the number of scalar entries in the whole array. * * \note This function can be optimized into a compile-time constant. */ static constexpr size_t entriesCount() { return EntriesCount; } /** * \return the number of vectors in the whole array. * * \note This function can be optimized into a compile-time constant. */ static constexpr size_t vectorsCount() { return VectorsCount; } inline Memory(const Memory &rhs) { Detail::copyVectors(*this, rhs); } template inline Memory(const Memory &rhs) { assert(vectorsCount() == rhs.vectorsCount()); Detail::copyVectors(*this, rhs); } inline Memory &operator=(const Memory &rhs) { Detail::copyVectors(*this, rhs); return *this; } template inline Memory &operator=(const Memory &rhs) { assert(vectorsCount() == rhs.vectorsCount()); Detail::copyVectors(*this, rhs); return *this; } Vc_ALWAYS_INLINE Memory &operator=(const EntryType *rhs) { std::memcpy(m_mem, rhs, entriesCount() * sizeof(EntryType)); return *this; } inline Memory &operator=(const V &v) { for (size_t i = 0; i < vectorsCount(); ++i) { vector(i) = v; } return *this; } }; /** * A helper class that is very similar to Memory but with dynamically allocated memory and * thus dynamic size. * * Example: * \code size_t size = 11; Vc::Memory array(size); // scalar access: for (size_t i = 0; i < array.entriesCount(); ++i) { array[i] = i; } // vector access: for (size_t i = 0; i < array.vectorsCount(); ++i) { array.vector(i) = int_v::IndexesFromZero() + i * int_v::Size; } * \endcode * This code allocates a small array with 11 scalar entries * and implements two equivalent loops that initialize the memory. * The scalar loop writes each individual int. The vectorized loop writes int_v::Size values to * memory per iteration. Since the size of 11 is not a multiple of int_v::Size (unless you use the * scalar Vc implementation) the last write access of the vector loop would normally be out of * bounds. But the Memory class automatically pads the memory such that the whole array can be * accessed with correctly aligned memory addresses. * (Note: the scalar loop can be auto-vectorized, except for the last three assignments.) * * \note The internal data pointer is not declared with the \c __restrict__ keyword. Therefore * modifying memory of V::EntryType will require the compiler to assume aliasing. If you want to use * the \c __restrict__ keyword you need to use a standard pointer to memory and do the vector * address calculation and loads and stores manually. * * \param V The vector type you want to operate on. (e.g. float_v or uint_v) * * \see Memory * * \ingroup Containers * \headerfile memory.h */ template class Memory : public MemoryBase, 1, void> { public: typedef typename V::EntryType EntryType; private: typedef MemoryBase, 1, void> Base; friend class MemoryBase, 1, void>; friend class MemoryDimensionBase, 1, void>; enum InternalConstants { Alignment = V::Size, AlignmentMask = Alignment - 1 }; size_t m_entriesCount; size_t m_vectorsCount; EntryType *m_mem; size_t calcPaddedEntriesCount(size_t x) { size_t masked = x & AlignmentMask; return (masked == 0 ? x : x + (Alignment - masked)); } public: using Base::vector; /** * Allocate enough memory to access \p size values of type \p V::EntryType. * * The allocated memory is aligned and padded correctly for fully vectorized access. * * \param size Determines how many scalar values will fit into the allocated memory. */ Vc_ALWAYS_INLINE Memory(size_t size) : m_entriesCount(size), m_vectorsCount(calcPaddedEntriesCount(m_entriesCount)), m_mem(Vc::malloc(m_vectorsCount)) { m_vectorsCount /= V::Size; Base::lastVector() = V::Zero(); } /** * Copy the memory into a new memory area. * * The allocated memory is aligned and padded correctly for fully vectorized access. * * \param rhs The Memory object to copy from. */ template Vc_ALWAYS_INLINE Memory(const MemoryBase &rhs) : m_entriesCount(rhs.entriesCount()), m_vectorsCount(rhs.vectorsCount()), m_mem(Vc::malloc(m_vectorsCount * V::Size)) { Detail::copyVectors(*this, rhs); } /** * Overload of the above function. * * (Because C++ would otherwise not use the templated cctor and use a default-constructed cctor instead.) * * \param rhs The Memory object to copy from. */ Vc_ALWAYS_INLINE Memory(const Memory &rhs) : m_entriesCount(rhs.entriesCount()), m_vectorsCount(rhs.vectorsCount()), m_mem(Vc::malloc(m_vectorsCount * V::Size)) { Detail::copyVectors(*this, rhs); } /** * Frees the memory which was allocated in the constructor. */ Vc_ALWAYS_INLINE ~Memory() { Vc::free(m_mem); } /** * Swap the contents and size information of two Memory objects. * * \param rhs The other Memory object to swap. */ inline void swap(Memory &rhs) { std::swap(m_mem, rhs.m_mem); std::swap(m_entriesCount, rhs.m_entriesCount); std::swap(m_vectorsCount, rhs.m_vectorsCount); } /** * \return the number of scalar entries in the whole array. */ Vc_ALWAYS_INLINE Vc_PURE size_t entriesCount() const { return m_entriesCount; } /** * \return the number of vectors in the whole array. */ Vc_ALWAYS_INLINE Vc_PURE size_t vectorsCount() const { return m_vectorsCount; } /** * Overwrite all entries with the values stored in \p rhs. * * \param rhs The object to copy the data from. * * \return reference to the modified Memory object. * * \note this function requires the vectorsCount() of both Memory objects to be equal. */ template Vc_ALWAYS_INLINE Memory &operator=(const MemoryBase &rhs) { assert(vectorsCount() == rhs.vectorsCount()); Detail::copyVectors(*this, rhs); return *this; } Vc_ALWAYS_INLINE Memory &operator=(const Memory &rhs) { assert(vectorsCount() == rhs.vectorsCount()); Detail::copyVectors(*this, rhs); return *this; } /** * Overwrite all entries with the values stored in the memory at \p rhs. * * \param rhs The array to copy the data from. * * \return reference to the modified Memory object. * * \note this function requires that there are entriesCount() many values accessible from \p rhs. */ Vc_ALWAYS_INLINE Memory &operator=(const EntryType *rhs) { std::memcpy(m_mem, rhs, entriesCount() * sizeof(EntryType)); return *this; } }; /** * Prefetch the cacheline containing \p addr for a single read access. * * This prefetch completely bypasses the cache, not evicting any other data. * * \param addr The cacheline containing \p addr will be prefetched. * * \ingroup Utilities * \headerfile memory.h */ Vc_ALWAYS_INLINE void prefetchForOneRead(const void *addr) { Vc::Detail::prefetchForOneRead(addr, VectorAbi::Best()); } /** * Prefetch the cacheline containing \p addr for modification. * * This prefetch evicts data from the cache. So use it only for data you really will use. When the * target system supports it the cacheline will be marked as modified while prefetching, saving work * later on. * * \param addr The cacheline containing \p addr will be prefetched. * * \ingroup Utilities * \headerfile memory.h */ Vc_ALWAYS_INLINE void prefetchForModify(const void *addr) { Vc::Detail::prefetchForModify(addr, VectorAbi::Best()); } /** * Prefetch the cacheline containing \p addr to L1 cache. * * This prefetch evicts data from the cache. So use it only for data you really will use. * * \param addr The cacheline containing \p addr will be prefetched. * * \ingroup Utilities * \headerfile memory.h */ Vc_ALWAYS_INLINE void prefetchClose(const void *addr) { Vc::Detail::prefetchClose(addr, VectorAbi::Best()); } /** * Prefetch the cacheline containing \p addr to L2 cache. * * This prefetch evicts data from the cache. So use it only for data you really will use. * * \param addr The cacheline containing \p addr will be prefetched. * * \ingroup Utilities * \headerfile memory.h */ Vc_ALWAYS_INLINE void prefetchMid(const void *addr) { Vc::Detail::prefetchMid(addr, VectorAbi::Best()); } /** * Prefetch the cacheline containing \p addr to L3 cache. * * This prefetch evicts data from the cache. So use it only for data you really will use. * * \param addr The cacheline containing \p addr will be prefetched. * * \ingroup Utilities * \headerfile memory.h */ Vc_ALWAYS_INLINE void prefetchFar(const void *addr) { Vc::Detail::prefetchFar(addr, VectorAbi::Best()); } } // namespace Common using Common::Memory; using Common::prefetchForOneRead; using Common::prefetchForModify; using Common::prefetchClose; using Common::prefetchMid; using Common::prefetchFar; } // namespace Vc namespace std { template Vc_ALWAYS_INLINE void swap(Vc::Memory &a, Vc::Memory &b) { a.swap(b); } } // namespace std #endif // VC_COMMON_MEMORY_H_ conky-1.22.1/3rdparty/Vc/Vc/common/memorybase.h000066400000000000000000001021671476554302100212320ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2009-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_MEMORYBASE_H_ #define VC_COMMON_MEMORYBASE_H_ #include #include #include #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace Common { #define Vc_MEM_OPERATOR_EQ(op) \ template \ Vc_ALWAYS_INLINE enable_if_mutable operator op##=(const T &x) { \ const V v = value() op x; \ v.store(&m_data[0], Flags()); \ return *this; \ } /*dox{{{*/ /** * Helper class for the Memory::vector(size_t) class of functions. * * You will never need to directly make use of this class. It is an implementation detail of the * Memory API. * * \headerfile memorybase.h *//*}}}*/ template class MemoryVector/*{{{*/ { typedef typename std::remove_cv<_V>::type V; template using enable_if_mutable = typename std::enable_if::value && !std::is_const<_V>::value, R>::type; using EntryType = typename std::conditional::value, const typename V::EntryType, typename V::EntryType>::type; typedef typename V::Mask Mask; EntryType m_data[V::Size]; public: // It is important that neither initialization nor cleanup is done as MemoryVector aliases // other memory Vc_INTRINSIC MemoryVector() = default; // disable copies because this type is supposed to alias the data in a Memory object, // nothing else MemoryVector(const MemoryVector &) = delete; MemoryVector(MemoryVector &&) = delete; // Do not disable MemoryVector &operator=(const MemoryVector &) = delete; because it is // covered nicely by the operator= below. //! \internal Vc_ALWAYS_INLINE Vc_PURE V value() const { return V(&m_data[0], Flags()); } /** * Cast to \p V operator. * * This function allows to assign this object to any object of type \p V. */ Vc_ALWAYS_INLINE Vc_PURE operator V() const { return value(); } template Vc_ALWAYS_INLINE enable_if_mutable operator=(const T &x) { V v; v = x; v.store(&m_data[0], Flags()); return *this; } Vc_ALL_BINARY(Vc_MEM_OPERATOR_EQ); Vc_ALL_ARITHMETICS(Vc_MEM_OPERATOR_EQ); Vc_ALWAYS_INLINE EntryType &operator[](size_t i) { return m_data[i]; } Vc_ALWAYS_INLINE const EntryType &operator[](size_t i) const { return m_data[i]; } }; template class MemoryVectorIterator { typedef typename std::remove_cv<_V>::type V; template using enable_if_mutable = typename std::enable_if::value && !std::is_const<_V>::value, R>::type; using iterator_traits = std::iterator_traits *>; MemoryVector<_V, Flags> *d; public: typedef typename iterator_traits::difference_type difference_type; typedef typename iterator_traits::value_type value_type; typedef typename iterator_traits::pointer pointer; typedef typename iterator_traits::reference reference; typedef typename iterator_traits::iterator_category iterator_category; constexpr MemoryVectorIterator(MemoryVector<_V, Flags> *dd) : d(dd) {} constexpr MemoryVectorIterator(const MemoryVectorIterator &) = default; constexpr MemoryVectorIterator(MemoryVectorIterator &&) = default; Vc_ALWAYS_INLINE MemoryVectorIterator &operator=(const MemoryVectorIterator &) = default; Vc_ALWAYS_INLINE void *orderBy() const { return d; } Vc_ALWAYS_INLINE difference_type operator-(const MemoryVectorIterator &rhs) const { return d - rhs.d; } Vc_ALWAYS_INLINE reference operator[](size_t i) const { return d[i]; } Vc_ALWAYS_INLINE reference operator*() const { return *d; } Vc_ALWAYS_INLINE pointer operator->() const { return d; } Vc_ALWAYS_INLINE MemoryVectorIterator &operator++() { ++d; return *this; } Vc_ALWAYS_INLINE MemoryVectorIterator operator++(int) { MemoryVectorIterator r(*this); ++d; return r; } Vc_ALWAYS_INLINE MemoryVectorIterator &operator--() { --d; return *this; } Vc_ALWAYS_INLINE MemoryVectorIterator operator--(int) { MemoryVectorIterator r(*this); --d; return r; } Vc_ALWAYS_INLINE MemoryVectorIterator &operator+=(size_t n) { d += n; return *this; } Vc_ALWAYS_INLINE MemoryVectorIterator &operator-=(size_t n) { d -= n; return *this; } Vc_ALWAYS_INLINE MemoryVectorIterator operator+(size_t n) const { return MemoryVectorIterator(d + n); } Vc_ALWAYS_INLINE MemoryVectorIterator operator-(size_t n) const { return MemoryVectorIterator(d - n); } }; template Vc_ALWAYS_INLINE bool operator==(const MemoryVectorIterator &l, const MemoryVectorIterator &r) { return l.orderBy() == r.orderBy(); } template Vc_ALWAYS_INLINE bool operator!=(const MemoryVectorIterator &l, const MemoryVectorIterator &r) { return l.orderBy() != r.orderBy(); } template Vc_ALWAYS_INLINE bool operator>=(const MemoryVectorIterator &l, const MemoryVectorIterator &r) { return l.orderBy() >= r.orderBy(); } template Vc_ALWAYS_INLINE bool operator<=(const MemoryVectorIterator &l, const MemoryVectorIterator &r) { return l.orderBy() <= r.orderBy(); } template Vc_ALWAYS_INLINE bool operator> (const MemoryVectorIterator &l, const MemoryVectorIterator &r) { return l.orderBy() > r.orderBy(); } template Vc_ALWAYS_INLINE bool operator< (const MemoryVectorIterator &l, const MemoryVectorIterator &r) { return l.orderBy() < r.orderBy(); } /*}}}*/ #undef Vc_MEM_OPERATOR_EQ #define Vc_VPH_OPERATOR(op) \ template \ decltype(std::declval() op std::declval()) operator op( \ const MemoryVector &x, const MemoryVector &y) \ { \ return x.value() op y.value(); \ } Vc_ALL_ARITHMETICS(Vc_VPH_OPERATOR); Vc_ALL_BINARY (Vc_VPH_OPERATOR); Vc_ALL_COMPARES (Vc_VPH_OPERATOR); #undef Vc_VPH_OPERATOR template> class MemoryRange/*{{{*/ { Parent *m_parent; size_t m_first; size_t m_last; public: MemoryRange(Parent *p, size_t firstIndex, size_t lastIndex) : m_parent(p), m_first(firstIndex), m_last(lastIndex) {} MemoryVectorIterator begin() const { return &m_parent->vector(m_first , Flags()); } MemoryVectorIterator end() const { return &m_parent->vector(m_last + 1, Flags()); } };/*}}}*/ template class MemoryDimensionBase; template class MemoryDimensionBase // {{{1 { private: Parent *p() { return static_cast(this); } const Parent *p() const { return static_cast(this); } public: /** * The type of the scalar entries in the array. */ typedef typename V::EntryType EntryType; /** * Returns a pointer to the start of the allocated memory. */ Vc_ALWAYS_INLINE Vc_PURE EntryType *entries() { return &p()->m_mem[0]; } /// Const overload of the above function. Vc_ALWAYS_INLINE Vc_PURE const EntryType *entries() const { return &p()->m_mem[0]; } /** * Returns the \p i-th scalar value in the memory. */ Vc_ALWAYS_INLINE Vc_PURE EntryType &scalar(size_t i) { return entries()[i]; } /// Const overload of the above function. Vc_ALWAYS_INLINE Vc_PURE const EntryType scalar(size_t i) const { return entries()[i]; } #ifdef DOXYGEN /** * Cast operator to the scalar type. This allows to use the object very much like a standard * C array. */ Vc_ALWAYS_INLINE Vc_PURE operator EntryType*() { return entries(); } /// Const overload of the above function. Vc_ALWAYS_INLINE Vc_PURE operator const EntryType*() const { return entries(); } #else // The above conversion operator allows implicit conversion to bool. To prohibit this // conversion we use SFINAE to allow only conversion to EntryType* and void*. template ::type, EntryType *>::value || std::is_same::type, void *>::value, int>::type = 0> Vc_ALWAYS_INLINE Vc_PURE operator T() { return entries(); } template ::value || std::is_same::value, int>::type = 0> Vc_ALWAYS_INLINE Vc_PURE operator T() const { return entries(); } #endif /** * */ template Vc_ALWAYS_INLINE MemoryRange range(size_t firstIndex, size_t lastIndex, Flags) { return MemoryRange(p(), firstIndex, lastIndex); } Vc_ALWAYS_INLINE MemoryRange range(size_t firstIndex, size_t lastIndex) { return MemoryRange(p(), firstIndex, lastIndex); } template Vc_ALWAYS_INLINE MemoryRange range(size_t firstIndex, size_t lastIndex, Flags) const { return MemoryRange(p(), firstIndex, lastIndex); } Vc_ALWAYS_INLINE MemoryRange range(size_t firstIndex, size_t lastIndex) const { return MemoryRange(p(), firstIndex, lastIndex); } /** * Returns the \p i-th scalar value in the memory. */ Vc_ALWAYS_INLINE EntryType &operator[](size_t i) { return entries()[i]; } /// Const overload of the above function. Vc_ALWAYS_INLINE const EntryType &operator[](size_t i) const { return entries()[i]; } /** * Uses a vector gather to combine the entries at the indexes in \p i into the returned * vector object. * * \param i An integer vector. It determines the entries to be gathered. * \returns A vector object. Modification of this object will not modify the values in * memory. * * \warning The API of this function might change in future versions of Vc to additionally * support scatters. */ template Vc_ALWAYS_INLINE Vc_PURE V operator[](Vector i) const { return V(entries(), i); } }; template class MemoryDimensionBase // {{{1 { private: Parent *p() { return static_cast(this); } const Parent *p() const { return static_cast(this); } public: /** * The type of the scalar entries in the array. */ typedef typename V::EntryType EntryType; static constexpr size_t rowCount() { return Parent::RowCount; } /** * Returns a pointer to the start of the allocated memory. */ Vc_ALWAYS_INLINE Vc_PURE EntryType *entries(size_t x = 0) { return &p()->m_mem[x][0]; } /// Const overload of the above function. Vc_ALWAYS_INLINE Vc_PURE const EntryType *entries(size_t x = 0) const { return &p()->m_mem[x][0]; } /** * Returns the \p i,j-th scalar value in the memory. */ Vc_ALWAYS_INLINE Vc_PURE EntryType &scalar(size_t i, size_t j) { return entries(i)[j]; } /// Const overload of the above function. Vc_ALWAYS_INLINE Vc_PURE const EntryType scalar(size_t i, size_t j) const { return entries(i)[j]; } /** * Returns the \p i-th row in the memory. */ Vc_ALWAYS_INLINE Vc_PURE RowMemory &operator[](size_t i) { return p()->m_mem[i]; } /// Const overload of the above function. Vc_ALWAYS_INLINE Vc_PURE const RowMemory &operator[](size_t i) const { return p()->m_mem[i]; } /** * \return the number of rows in the array. * * \note This function can be eliminated by an optimizing compiler. */ Vc_ALWAYS_INLINE Vc_PURE size_t rowsCount() const { return p()->rowsCount(); } }; //dox{{{1 /** * \headerfile memorybase.h * * Common interface to all Memory classes, independent of allocation on the stack or heap. * * \param V The vector type you want to operate on. (e.g. float_v or uint_v) * \param Parent This type is the complete type of the class that derives from MemoryBase. * \param Dimension The number of dimensions the implementation provides. * \param RowMemory Class to be used to work on a single row. */ template class MemoryBase : public MemoryDimensionBase //{{{1 { static_assert((V::size() * sizeof(typename V::EntryType)) % V::MemoryAlignment == 0, "Vc::Memory can only be used for data-parallel types storing a number " "of values that's a multiple of the memory alignment."); private: Parent *p() { return static_cast(this); } const Parent *p() const { return static_cast(this); } template using vector_reference = MayAlias> &; template using const_vector_reference = const MayAlias> &; public: /** * The type of the scalar entries in the array. */ typedef typename V::EntryType EntryType; /** * \return the number of scalar entries in the array. This function is optimized away * if a constant size array is used. */ Vc_ALWAYS_INLINE Vc_PURE size_t entriesCount() const { return p()->entriesCount(); } /** * \return the number of vector entries that span the array. This function is optimized away * if a constant size array is used. */ Vc_ALWAYS_INLINE Vc_PURE size_t vectorsCount() const { return p()->vectorsCount(); } using MemoryDimensionBase::entries; using MemoryDimensionBase::scalar; /** * Return a (vectorized) iterator to the start of this memory object. */ template Vc_ALWAYS_INLINE MemoryVectorIterator< V, Flags> begin(Flags flags = Flags()) { return &firstVector(flags); } //! const overload of the above template Vc_ALWAYS_INLINE MemoryVectorIterator begin(Flags flags = Flags()) const { return &firstVector(flags); } /** * Return a (vectorized) iterator to the end of this memory object. */ template Vc_ALWAYS_INLINE MemoryVectorIterator< V, Flags> end(Flags flags = Flags()) { return &lastVector(flags) + 1; } //! const overload of the above template Vc_ALWAYS_INLINE MemoryVectorIterator end(Flags flags = Flags()) const { return &lastVector(flags) + 1; } /** * \param i Selects the offset, where the vector should be read. * * \return a smart object to wrap the \p i-th vector in the memory. * * The return value can be used as any other vector object. I.e. you can substitute * something like * \code * float_v a = ..., b = ...; * a += b; * \endcode * with * \code * mem.vector(i) += b; * \endcode * * This function ensures that only \em aligned loads and stores are used. Thus it only allows to * access memory at fixed strides. If access to known offsets from the aligned vectors is * needed the vector(size_t, int) function can be used. */ template Vc_ALWAYS_INLINE Vc_PURE typename std::enable_if::value, vector_reference>::type vector(size_t i, Flags = Flags()) { return *aliasing_cast>(&entries()[i * V::Size]); } /** \brief Const overload of the above function * * \param i Selects the offset, where the vector should be read. * * \return a smart object to wrap the \p i-th vector in the memory. */ template Vc_ALWAYS_INLINE Vc_PURE typename std::enable_if::value, const_vector_reference>::type vector(size_t i, Flags = Flags()) const { return *aliasing_cast>(&entries()[i * V::Size]); } /** * \return a smart object to wrap the vector starting from the \p i-th scalar entry in the memory. * * Example: * \code * Memory mem; * mem.setZero(); * for (int i = 0; i < mem.entriesCount(); i += float_v::Size) { * mem.vectorAt(i) += b; * } * \endcode * * \param i Specifies the scalar entry from where the vector will be loaded/stored. I.e. the * values scalar(i), scalar(i + 1), ..., scalar(i + V::Size - 1) will be read/overwritten. * * \param flags You must take care to determine whether an unaligned load/store is * required. Per default an unaligned load/store is used. If \p i is a multiple of \c V::Size * you may want to pass Vc::Aligned here. */ template Vc_ALWAYS_INLINE Vc_PURE vector_reference vectorAt(size_t i, Flags flags = Flags()) { return *aliasing_cast>(&entries()[i]); } /** \brief Const overload of the above function * * \return a smart object to wrap the vector starting from the \p i-th scalar entry in the memory. * * \param i Specifies the scalar entry from where the vector will be loaded/stored. I.e. the * values scalar(i), scalar(i + 1), ..., scalar(i + V::Size - 1) will be read/overwritten. * * \param flags You must take care to determine whether an unaligned load/store is * required. Per default an unaligned load/store is used. If \p i is a multiple of \c V::Size * you may want to pass Vc::Aligned here. */ template Vc_ALWAYS_INLINE Vc_PURE const_vector_reference vectorAt( size_t i, Flags flags = Flags()) const { return *aliasing_cast>(&entries()[i]); } /** * \return a smart object to wrap the \p i-th vector + \p shift in the memory. * * This function ensures that only \em unaligned loads and stores are used. * It allows to access memory at any location aligned to the entry type. * * \param i Selects the memory location of the i-th vector. Thus if \p V::Size == 4 and * \p i is set to 3 the base address for the load/store will be the 12th entry * (same as \p &mem[12]). * \param shift Shifts the base address determined by parameter \p i by \p shift many * entries. Thus \p vector(3, 1) for \p V::Size == 4 will load/store the * 13th - 16th entries (same as \p &mem[13]). * * \note Any shift value is allowed as long as you make sure it stays within bounds of the * allocated memory. Shift values that are a multiple of \p V::Size will \em not result in * aligned loads. You have to use the above vector(size_t) function for aligned loads * instead. * * \note Thus a simple way to access vectors randomly is to set \p i to 0 and use \p shift as the * parameter to select the memory address: * \code * // don't use: * mem.vector(i / V::Size, i % V::Size) += 1; * // instead use: * mem.vector(0, i) += 1; * \endcode */ template Vc_ALWAYS_INLINE Vc_PURE typename std::enable_if< std::is_convertible::value, vector_reference() | Unaligned)>>::type vector(size_t i, ShiftT shift, Flags = Flags()) { return *aliasing_cast< MemoryVector() | Unaligned)>>( &entries()[i * V::Size + shift]); } /// Const overload of the above function. template Vc_ALWAYS_INLINE Vc_PURE typename std::enable_if< std::is_convertible::value, const_vector_reference() | Unaligned)>>::type vector(size_t i, ShiftT shift, Flags = Flags()) const { return *aliasing_cast< MemoryVector() | Unaligned)>>( &entries()[i * V::Size + shift]); } /** * \return the first vector in the allocated memory. * * This function is simply a shorthand for vector(0). */ template Vc_ALWAYS_INLINE Vc_PURE vector_reference firstVector(Flags f = Flags()) { return vector(0, f); } /// Const overload of the above function. template Vc_ALWAYS_INLINE Vc_PURE const_vector_reference firstVector( Flags f = Flags()) const { return vector(0, f); } /** * \return the last vector in the allocated memory. * * This function is simply a shorthand for vector(vectorsCount() - 1). */ template Vc_ALWAYS_INLINE Vc_PURE vector_reference lastVector(Flags f = Flags()) { return vector(vectorsCount() - 1, f); } /// Const overload of the above function. template Vc_ALWAYS_INLINE Vc_PURE const_vector_reference lastVector( Flags f = Flags()) const { return vector(vectorsCount() - 1, f); } Vc_ALWAYS_INLINE Vc_PURE V gather(const unsigned char *indexes) const { return V(entries(), typename V::IndexType(indexes, Vc::Unaligned)); } Vc_ALWAYS_INLINE Vc_PURE V gather(const unsigned short *indexes) const { return V(entries(), typename V::IndexType(indexes, Vc::Unaligned)); } Vc_ALWAYS_INLINE Vc_PURE V gather(const unsigned int *indexes) const { return V(entries(), typename V::IndexType(indexes, Vc::Unaligned)); } Vc_ALWAYS_INLINE Vc_PURE V gather(const unsigned long *indexes) const { return V(entries(), typename V::IndexType(indexes, Vc::Unaligned)); } /** * Zero the whole memory area. */ Vc_ALWAYS_INLINE void setZero() { V zero(Vc::Zero); for (size_t i = 0; i < vectorsCount(); ++i) { vector(i) = zero; } } /** * Assign a value to all vectors in the array. */ template Vc_ALWAYS_INLINE Parent &operator=(U &&x) { for (size_t i = 0; i < vectorsCount(); ++i) { vector(i) = std::forward(x); } } /** * (Inefficient) shorthand to add up two arrays. */ template inline Parent &operator+=(const MemoryBase &rhs) { assert(vectorsCount() == rhs.vectorsCount()); for (size_t i = 0; i < vectorsCount(); ++i) { vector(i) += rhs.vector(i); } return static_cast(*this); } /** * (Inefficient) shorthand to subtract two arrays. */ template inline Parent &operator-=(const MemoryBase &rhs) { assert(vectorsCount() == rhs.vectorsCount()); for (size_t i = 0; i < vectorsCount(); ++i) { vector(i) -= rhs.vector(i); } return static_cast(*this); } /** * (Inefficient) shorthand to multiply two arrays. */ template inline Parent &operator*=(const MemoryBase &rhs) { assert(vectorsCount() == rhs.vectorsCount()); for (size_t i = 0; i < vectorsCount(); ++i) { vector(i) *= rhs.vector(i); } return static_cast(*this); } /** * (Inefficient) shorthand to divide two arrays. */ template inline Parent &operator/=(const MemoryBase &rhs) { assert(vectorsCount() == rhs.vectorsCount()); for (size_t i = 0; i < vectorsCount(); ++i) { vector(i) /= rhs.vector(i); } return static_cast(*this); } /** * (Inefficient) shorthand to add a value to an array. */ inline Parent &operator+=(EntryType rhs) { V v(rhs); for (size_t i = 0; i < vectorsCount(); ++i) { vector(i) += v; } return static_cast(*this); } /** * (Inefficient) shorthand to subtract a value from an array. */ inline Parent &operator-=(EntryType rhs) { V v(rhs); for (size_t i = 0; i < vectorsCount(); ++i) { vector(i) -= v; } return static_cast(*this); } /** * (Inefficient) shorthand to multiply a value to an array. */ inline Parent &operator*=(EntryType rhs) { V v(rhs); for (size_t i = 0; i < vectorsCount(); ++i) { vector(i) *= v; } return static_cast(*this); } /** * (Inefficient) shorthand to divide an array with a value. */ inline Parent &operator/=(EntryType rhs) { V v(rhs); for (size_t i = 0; i < vectorsCount(); ++i) { vector(i) /= v; } return static_cast(*this); } /** * (Inefficient) shorthand compare equality of two arrays. */ template inline bool operator==(const MemoryBase &rhs) const { assert(vectorsCount() == rhs.vectorsCount()); for (size_t i = 0; i < vectorsCount(); ++i) { if (!(V(vector(i)) == V(rhs.vector(i))).isFull()) { return false; } } return true; } /** * (Inefficient) shorthand compare two arrays. */ template inline bool operator!=(const MemoryBase &rhs) const { assert(vectorsCount() == rhs.vectorsCount()); for (size_t i = 0; i < vectorsCount(); ++i) { if (!(V(vector(i)) == V(rhs.vector(i))).isEmpty()) { return false; } } return true; } /** * (Inefficient) shorthand compare two arrays. */ template inline bool operator<(const MemoryBase &rhs) const { assert(vectorsCount() == rhs.vectorsCount()); for (size_t i = 0; i < vectorsCount(); ++i) { if (!(V(vector(i)) < V(rhs.vector(i))).isFull()) { return false; } } return true; } /** * (Inefficient) shorthand compare two arrays. */ template inline bool operator<=(const MemoryBase &rhs) const { assert(vectorsCount() == rhs.vectorsCount()); for (size_t i = 0; i < vectorsCount(); ++i) { if (!(V(vector(i)) <= V(rhs.vector(i))).isFull()) { return false; } } return true; } /** * (Inefficient) shorthand compare two arrays. */ template inline bool operator>(const MemoryBase &rhs) const { assert(vectorsCount() == rhs.vectorsCount()); for (size_t i = 0; i < vectorsCount(); ++i) { if (!(V(vector(i)) > V(rhs.vector(i))).isFull()) { return false; } } return true; } /** * (Inefficient) shorthand compare two arrays. */ template inline bool operator>=(const MemoryBase &rhs) const { assert(vectorsCount() == rhs.vectorsCount()); for (size_t i = 0; i < vectorsCount(); ++i) { if (!(V(vector(i)) >= V(rhs.vector(i))).isFull()) { return false; } } return true; } }; namespace Detail { template inline void copyVectors(MemoryBase &dst, const MemoryBase &src) { const size_t vectorsCount = dst.vectorsCount(); size_t i = 3; for (; i < vectorsCount; i += 4) { const V tmp3 = src.vector(i - 3); const V tmp2 = src.vector(i - 2); const V tmp1 = src.vector(i - 1); const V tmp0 = src.vector(i - 0); dst.vector(i - 3) = tmp3; dst.vector(i - 2) = tmp2; dst.vector(i - 1) = tmp1; dst.vector(i - 0) = tmp0; } for (i -= 3; i < vectorsCount; ++i) { dst.vector(i) = src.vector(i); } } } // namespace Detail } // namespace Common } // namespace Vc #endif // VC_COMMON_MEMORYBASE_H_ // vim: foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/common/memoryfwd.h000066400000000000000000000037051476554302100210760ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2011-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_MEMORYFWD_H_ #define VC_COMMON_MEMORYFWD_H_ namespace Vc_VERSIONED_NAMESPACE { namespace Common { template class Memory; template class MemoryBase; } // namespace Common using Common::Memory; } // namespace Vc #endif // VC_COMMON_MEMORYFWD_H_ conky-1.22.1/3rdparty/Vc/Vc/common/operators.h000066400000000000000000000344631476554302100211100ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2012-2016 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef COMMON_OPERATORS_H_ #define COMMON_OPERATORS_H_ #include "simdarray.h" #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace Detail { template enable_if::value, U> is_convertible_to_any_vector(Vector); template T is_convertible_to_any_vector(Vector); template ::value, bool = std::is_integral::value> struct FundamentalReturnType; template using fundamental_return_t = typename FundamentalReturnType::type; template struct FundamentalReturnType { using type = typename std::conditional< std::is_arithmetic::value, typename std::conditional<(sizeof(T) < sizeof(U)), U, T>::type, // U is not arithmetic, e.g. an enum or a type with e.g. operator int() T>::type; }; template struct FundamentalReturnType { using type = typename std::conditional< std::is_arithmetic::value, U, // U is not arithmetic, e.g. an enum or a type with e.g. operator int() T>::type; }; template struct FundamentalReturnType { using type = T; }; template struct my_make_signed : public std::make_signed { }; template <> struct my_make_signed { using type = bool; }; template struct higher_conversion_rank { template using fix_sign = typename std::conditional<(std::is_unsigned::value || std::is_unsigned::value), typename std::make_unsigned::type, A>::type; using T = typename my_make_signed::type; using U = typename my_make_signed::type; template using c = typename std::conditional::value || std::is_same::value, Test, Otherwise>::type; using type = fix_sign>>>>>; }; template struct FundamentalReturnType { template using c = typename std::conditional::type; using type = c<(sizeof(T) > sizeof(U)), T, c<(sizeof(T) < sizeof(U)), U, typename higher_conversion_rank::type>>; }; template struct ReturnTypeImpl { // no type => SFINAE }; // 1. Vector × Vector template struct ReturnTypeImpl, Vector, Uq, void> { using type = Vc::Vector, Abi>; }; // 2. Vector × int template struct ReturnTypeImpl, int, Uq, void> { // conversion from int is always allowed (because its the default when you hardcode a // number) using type = Vc::Vector; }; // 3. Vector × unsigned template struct ReturnTypeImpl, uint, Uq, void> { // conversion from unsigned int is allowed for all integral Vector, but ensures // unsigned result using type = Vc::Vector< typename std::conditional::value, std::make_unsigned, std::enable_if>::type::type, Abi>; }; // 4. Vector × {enum, arithmetic} template struct ReturnTypeImpl< Vector, U, Uq, enable_if::value && !std::is_same::value && !std::is_same::value && Traits::is_valid_vector_argument>::value, void>> { using type = Vc::Vector, Abi>; }; // 5. Vector × UDT template struct ReturnTypeImpl< Vector, U, Uq, enable_if::value && !Traits::is_simd_vector::value && Traits::is_valid_vector_argument(std::declval()))>::value, void>> { using type = Vc::Vector( std::declval()))>, Abi>; }; template > using ReturnType = typename ReturnTypeImpl::type; template struct is_a_type : public std::true_type { }; #ifdef Vc_ENABLE_FLOAT_BIT_OPERATORS #define Vc_TEST_FOR_BUILTIN_OPERATOR(op_) true #else #define Vc_TEST_FOR_BUILTIN_OPERATOR(op_) \ Detail::is_a_type() \ op_ std::declval())>::value #endif } // namespace Detail #define Vc_GENERIC_OPERATOR(op_) \ template , U>> \ Vc_ALWAYS_INLINE enable_if, R>::value && \ std::is_convertible::value, \ R> \ operator op_(Vector x, U &&y) \ { \ return Detail::operator op_(R(x), R(std::forward(y))); \ } \ template , U>> \ Vc_ALWAYS_INLINE enable_if::value && \ std::is_convertible, R>::value && \ std::is_convertible::value, \ R> \ operator op_(U &&x, Vector y) \ { \ return Detail::operator op_(R(std::forward(x)), R(y)); \ } \ template , U>> \ Vc_ALWAYS_INLINE enable_if, R>::value && \ std::is_convertible::value, \ Vector &> \ operator op_##=(Vector &x, U &&y) \ { \ x = Detail::operator op_(R(x), R(std::forward(y))); \ return x; \ } #define Vc_LOGICAL_OPERATOR(op_) \ template \ Vc_ALWAYS_INLINE typename Vector::Mask operator op_(Vector x, \ Vector y) \ { \ return !!x op_ !!y; \ } \ template \ Vc_ALWAYS_INLINE \ enable_if, Vector>::value && \ std::is_convertible, Vector>::value, \ typename Detail::ReturnType, Vector>::Mask> \ operator op_(Vector x, Vector y) \ { \ return !!x op_ !!y; \ } \ template \ Vc_ALWAYS_INLINE enable_if())>::value, \ typename Vector::Mask> \ operator op_(Vector x, U &&y) \ { \ using M = typename Vector::Mask; \ return !!x op_ M(!!std::forward(y)); \ } \ template \ Vc_ALWAYS_INLINE enable_if())>::value, \ typename Vector::Mask> \ operator op_(U &&x, Vector y) \ { \ using M = typename Vector::Mask; \ return M(!!std::forward(x)) op_ !!y; \ } #define Vc_COMPARE_OPERATOR(op_) \ template , U>> \ Vc_ALWAYS_INLINE enable_if, R>::value && \ std::is_convertible::value, \ typename R::Mask> \ operator op_(Vector x, U &&y) \ { \ return Detail::operator op_(R(x), R(std::forward(y))); \ } \ template , U>> \ Vc_ALWAYS_INLINE \ enable_if>::value && \ std::is_convertible, R>::value && \ std::is_convertible::value, \ typename R::Mask> \ operator op_(U &&x, Vector y) \ { \ return Detail::operator op_(R(std::forward(x)), R(y)); \ } Vc_ALL_LOGICAL (Vc_LOGICAL_OPERATOR); Vc_ALL_BINARY (Vc_GENERIC_OPERATOR); Vc_ALL_ARITHMETICS(Vc_GENERIC_OPERATOR); Vc_ALL_COMPARES (Vc_COMPARE_OPERATOR); #undef Vc_LOGICAL_OPERATOR #undef Vc_GENERIC_OPERATOR #undef Vc_COMPARE_OPERATOR #undef Vc_INVALID_OPERATOR } // namespace Vc #endif // COMMON_OPERATORS_H_ conky-1.22.1/3rdparty/Vc/Vc/common/permutation.h000066400000000000000000000035001476554302100214250ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_PERMUTATION_H_ #define VC_COMMON_PERMUTATION_H_ #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace Permutation { struct ReversedTag {}; constexpr ReversedTag Reversed{}; } // namespace Permutation } #endif // VC_COMMON_PERMUTATION_H_ // vim: foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/common/scatterimplementation.h000066400000000000000000000213301476554302100234720ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2014-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_SCATTERIMPLEMENTATION_H_ #define VC_COMMON_SCATTERIMPLEMENTATION_H_ #include "gatherimplementation.h" #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace Common { template Vc_ALWAYS_INLINE void executeScatter(SetIndexZeroT, V &v, MT *mem, IT indexes, typename V::MaskArgument mask) { indexes.setZeroInverted(static_cast(mask)); // Huh? const V tmp(mem, indexes); where(mask) | v = tmp; } template Vc_ALWAYS_INLINE void executeScatter(SimpleLoopT, V &v, MT *mem, const IT &indexes, typename V::MaskArgument mask) { if (Vc_IS_UNLIKELY(mask.isEmpty())) { return; } Common::unrolled_loop([&](std::size_t i) { if (mask[i]) mem[indexes[i]] = v[i]; }); } template Vc_ALWAYS_INLINE void executeScatter(BitScanLoopT, V &v, MT *mem, const IT &indexes, typename V::MaskArgument mask) { size_t bits = mask.toInt(); while (Vc_IS_LIKELY(bits > 0)) { size_t i, j; asm("bsf %[bits],%[i]\n\t" "bsr %[bits],%[j]\n\t" "btr %[i],%[bits]\n\t" "btr %[j],%[bits]\n\t" : [i] "=r"(i), [j] "=r"(j), [bits] "+r"(bits)); mem[indexes[i]] = v[i]; mem[indexes[j]] = v[j]; } /* Alternative from Vc::SSE (0.7) int bits = mask.toInt(); while (bits) { const int i = _bit_scan_forward(bits); bits ^= (1 << i); // btr? mem[indexes[i]] = v[i]; } */ } template Vc_ALWAYS_INLINE void executeScatter(PopcntSwitchT, V &v, MT *mem, const IT &indexes, typename V::MaskArgument mask, enable_if = nullarg) { unsigned int bits = mask.toInt(); unsigned int low, high = 0; switch (Vc::Detail::popcnt16(bits)) { case 16: v.scatter(mem, indexes); break; case 15: low = _bit_scan_forward(bits); bits ^= 1 << low; mem[indexes[low]] = v[low]; case 14: high = _bit_scan_reverse(bits); mem[indexes[high]] = v[high]; high = (1 << high); case 13: low = _bit_scan_forward(bits); bits ^= high | (1 << low); mem[indexes[low]] = v[low]; case 12: high = _bit_scan_reverse(bits); mem[indexes[high]] = v[high]; high = (1 << high); case 11: low = _bit_scan_forward(bits); bits ^= high | (1 << low); mem[indexes[low]] = v[low]; case 10: high = _bit_scan_reverse(bits); mem[indexes[high]] = v[high]; high = (1 << high); case 9: low = _bit_scan_forward(bits); bits ^= high | (1 << low); mem[indexes[low]] = v[low]; case 8: high = _bit_scan_reverse(bits); mem[indexes[high]] = v[high]; high = (1 << high); case 7: low = _bit_scan_forward(bits); bits ^= high | (1 << low); mem[indexes[low]] = v[low]; case 6: high = _bit_scan_reverse(bits); mem[indexes[high]] = v[high]; high = (1 << high); case 5: low = _bit_scan_forward(bits); bits ^= high | (1 << low); mem[indexes[low]] = v[low]; case 4: high = _bit_scan_reverse(bits); mem[indexes[high]] = v[high]; high = (1 << high); case 3: low = _bit_scan_forward(bits); bits ^= high | (1 << low); mem[indexes[low]] = v[low]; case 2: high = _bit_scan_reverse(bits); mem[indexes[high]] = v[high]; case 1: low = _bit_scan_forward(bits); mem[indexes[low]] = v[low]; case 0: break; } } template Vc_ALWAYS_INLINE void executeScatter(PopcntSwitchT, V &v, MT *mem, const IT &indexes, typename V::MaskArgument mask, enable_if = nullarg) { unsigned int bits = mask.toInt(); unsigned int low, high = 0; switch (Vc::Detail::popcnt8(bits)) { case 8: v.scatter(mem, indexes); break; case 7: low = _bit_scan_forward(bits); bits ^= 1 << low; mem[indexes[low]] = v[low]; case 6: high = _bit_scan_reverse(bits); mem[indexes[high]] = v[high]; high = (1 << high); case 5: low = _bit_scan_forward(bits); bits ^= high | (1 << low); mem[indexes[low]] = v[low]; case 4: high = _bit_scan_reverse(bits); mem[indexes[high]] = v[high]; high = (1 << high); case 3: low = _bit_scan_forward(bits); bits ^= high | (1 << low); mem[indexes[low]] = v[low]; case 2: high = _bit_scan_reverse(bits); mem[indexes[high]] = v[high]; case 1: low = _bit_scan_forward(bits); mem[indexes[low]] = v[low]; case 0: break; } } template Vc_ALWAYS_INLINE void executeScatter(PopcntSwitchT, V &v, MT *mem, const IT &indexes, typename V::MaskArgument mask, enable_if = nullarg) { unsigned int bits = mask.toInt(); unsigned int low, high = 0; switch (Vc::Detail::popcnt4(bits)) { case 4: v.scatter(mem, indexes); break; case 3: low = _bit_scan_forward(bits); bits ^= 1 << low; mem[indexes[low]] = v[low]; case 2: high = _bit_scan_reverse(bits); mem[indexes[high]] = v[high]; case 1: low = _bit_scan_forward(bits); mem[indexes[low]] = v[low]; case 0: break; } } template Vc_ALWAYS_INLINE void executeScatter(PopcntSwitchT, V &v, MT *mem, const IT &indexes, typename V::MaskArgument mask, enable_if = nullarg) { unsigned int bits = mask.toInt(); unsigned int low; switch (Vc::Detail::popcnt4(bits)) { case 2: v.scatter(mem, indexes); break; case 1: low = _bit_scan_forward(bits); mem[indexes[low]] = v[low]; case 0: break; } } } // namespace Common } // namespace Vc #endif // VC_COMMON_SCATTERIMPLEMENTATION_H_ conky-1.22.1/3rdparty/Vc/Vc/common/scatterinterface.h000066400000000000000000000150171476554302100224120ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2014-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ /////////////////////////////////////////////////////////////////////////////////////////// // scatters // A scatter takes the following arguments: // 1. A pointer to memory of any type that EntryType can convert to. // 2. An indexes “vector”. The requirement is that the type implements the subscript operator, // stores «Size» valid index values, and each offset to the pointer above yields a valid // memory location for reading. // 3. Optionally the third argument may be a mask. The mask disables several memory stores and // thus removes the requirements in (2.) for the disabled entries. private: /**\internal * This function implements a scatter given a pointer to memory \p mem and some * container object storing the scatter \p indexes. * * \param mem This pointer must be aligned correctly for the type \p MT. This is the * natural behavior of C++, so this is typically the case. * \param indexes This object contains at least \VSize{T} indexes that denote the * offset in \p mem where the components for the current vector should be copied to. * The offset is not in Bytes, but in multiples of `sizeof(MT)`. */ // enable_if::value && has_subscript_operator::value> template inline void scatterImplementation(MT *mem, IT &&indexes) const; /**\internal * This overload of the above function adds a \p mask argument to disable memory * accesses at the \p indexes offsets where \p mask is \c false. */ template inline void scatterImplementation(MT *mem, IT &&indexes, MaskArgument mask) const; public: #define Vc_ASSERT_SCATTER_PARAMETER_TYPES_ \ static_assert( \ std::is_convertible::value, \ "The memory pointer needs to point to a type that the EntryType of this " \ "SIMD vector type can be converted to."); \ static_assert( \ Vc::Traits::has_subscript_operator::value, \ "The indexes argument must be a type that implements the subscript operator."); \ static_assert( \ !Traits::is_simd_vector::value || \ Traits::simd_vector_size::value >= Size, \ "If you use a SIMD vector for the indexes parameter, the index vector must " \ "have at least as many entries as this SIMD vector."); \ static_assert( \ !std::is_array::value || \ (std::rank::value == 1 && \ (std::extent::value == 0 || std::extent::value >= Size)), \ "If you use a simple array for the indexes parameter, the array must have " \ "at least as many entries as this SIMD vector.") /** * \name Scatter functions * * Stores a vector to the objects at `mem[indexes[0]]`, `mem[indexes[1]]`, * `mem[indexes[2]]`, ... * * \param mem A pointer to memory which contains objects of type \p MT at the offsets * given by \p indexes. * \param indexes * \param mask */ ///@{ /// Scatter function template ::value>> Vc_INTRINSIC void scatter(MT *mem, IT &&indexes) const { Vc_ASSERT_SCATTER_PARAMETER_TYPES_; scatterImplementation(mem, std::forward(indexes)); } /// Masked scatter function template ::value>> Vc_INTRINSIC void scatter(MT *mem, IT &&indexes, MaskArgument mask) const { Vc_ASSERT_SCATTER_PARAMETER_TYPES_; scatterImplementation(mem, std::forward(indexes), mask); } ///@} #include "scatterinterface_deprecated.h" /**\internal * \name Scatter function to use from Vc::Common::subscript_operator * * \param args * \param mask */ ///@{ template Vc_INTRINSIC void scatter(const Common::ScatterArguments &args) const { scatter(args.address, args.indexes); } template Vc_INTRINSIC void scatter(const Common::ScatterArguments &args, MaskArgument mask) const { scatter(args.address, args.indexes, mask); } ///@} #undef Vc_ASSERT_SCATTER_PARAMETER_TYPES_ conky-1.22.1/3rdparty/Vc/Vc/common/scatterinterface_deprecated.h000066400000000000000000000176631476554302100246030ustar00rootroot00000000000000 /// \name Deprecated Members ///@{ /** * \deprecated Use Vc::array or Vc::vector subscripting instead. * * \param array A pointer into memory (without alignment restrictions). * \param member1 If \p array points to a struct, \p member1 determines the member in the struct to * be read. Thus the offsets in \p indexes are relative to the \p array and not to * the size of the gathered type (i.e. array[i].*member1 is accessed instead of * (&(array->*member1))[i]) * \param indexes Determines the offsets into \p array where the values are gathered from/scattered * to. The type of indexes can either be an integer vector or a type that supports * operator[] access. */ template Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " "instead.") inline void scatter(S1 *array, EntryType S1::*member1, IT indexes) const { scatter(Common::SubscriptOperation, true>( array, indexes)[member1] .scatterArguments()); } /** * \deprecated Use Vc::array or Vc::vector subscripting instead. * * \param array A pointer into memory (without alignment restrictions). * \param member1 If \p array points to a struct, \p member1 determines the member in the struct to * be read. Thus the offsets in \p indexes are relative to the \p array and not to * the size of the gathered type (i.e. array[i].*member1 is accessed instead of * (&(array->*member1))[i]) * \param indexes Determines the offsets into \p array where the values are gathered from/scattered * to. The type of indexes can either be an integer vector or a type that supports * operator[] access. * \param mask If a mask is given only the active entries will be gathered/scattered. */ template Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " "instead.") inline void scatter(S1 *array, EntryType S1::*member1, IT indexes, MaskArgument mask) const { scatter(Common::SubscriptOperation, true>( array, indexes)[member1] .scatterArguments(), mask); } /** * \deprecated Use Vc::array or Vc::vector subscripting instead. * * \param array A pointer into memory (without alignment restrictions). * \param member1 If \p array points to a struct, \p member1 determines the member in the struct to * be read. Thus the offsets in \p indexes are relative to the \p array and not to * the size of the gathered type (i.e. array[i].*member1 is accessed instead of * (&(array->*member1))[i]) * \param member2 If \p member1 is a struct then \p member2 selects the member to be read from that * struct (i.e. array[i].*member1.*member2 is read). * \param indexes Determines the offsets into \p array where the values are gathered from/scattered * to. The type of indexes can either be an integer vector or a type that supports * operator[] access. */ template Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " "instead.") inline void scatter(S1 *array, S2 S1::*member1, EntryType S2::*member2, IT indexes) const { scatter(Common::SubscriptOperation, true>( array, indexes)[member1][member2] .scatterArguments()); } /** * \deprecated Use Vc::array or Vc::vector subscripting instead. * * \param array A pointer into memory (without alignment restrictions). * \param member1 If \p array points to a struct, \p member1 determines the member in the struct to * be read. Thus the offsets in \p indexes are relative to the \p array and not to * the size of the gathered type (i.e. array[i].*member1 is accessed instead of * (&(array->*member1))[i]) * \param member2 If \p member1 is a struct then \p member2 selects the member to be read from that * struct (i.e. array[i].*member1.*member2 is read). * \param indexes Determines the offsets into \p array where the values are gathered from/scattered * to. The type of indexes can either be an integer vector or a type that supports * operator[] access. * \param mask If a mask is given only the active entries will be gathered/scattered. */ template Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " "instead.") inline void scatter(S1 *array, S2 S1::*member1, EntryType S2::*member2, IT indexes, MaskArgument mask) const { scatter(Common::SubscriptOperation, true>( array, indexes)[member1][member2] .scatterArguments(), mask); } /** * \deprecated Use Vc::array or Vc::vector subscripting instead. * * \param array A pointer into memory (without alignment restrictions). * \param ptrMember1 If \p array points to a struct, \p member1 determines the member in the struct to * be read. Thus the offsets in \p indexes are relative to the \p array and not to * the size of the gathered type (i.e. array[i].*member1 is accessed instead of * (&(array->*member1))[i]) * \param outerIndexes * \param innerIndexes */ template Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " "instead.") inline void scatter(S1 *array, EntryType *S1::*ptrMember1, IT1 outerIndexes, IT2 innerIndexes) const { scatter(Common::SubscriptOperation, true>( array, outerIndexes)[ptrMember1][innerIndexes] .scatterArguments()); } /** * \deprecated Use Vc::array or Vc::vector subscripting instead. * * \param array A pointer into memory (without alignment restrictions). * \param ptrMember1 If \p array points to a struct, \p member1 determines the member in the struct to * be read. Thus the offsets in \p indexes are relative to the \p array and not to * the size of the gathered type (i.e. array[i].*member1 is accessed instead of * (&(array->*member1))[i]) * \param outerIndexes * \param innerIndexes * \param mask If a mask is given only the active entries will be gathered/scattered. */ template Vc_DEPRECATED("use the subscript operator to Vc::array or Vc::vector " "instead.") inline void scatter(S1 *array, EntryType *S1::*ptrMember1, IT1 outerIndexes, IT2 innerIndexes, MaskArgument mask) const { scatter(Common::SubscriptOperation, true>( array, outerIndexes)[ptrMember1][innerIndexes] .scatterArguments(), mask); } ///@} conky-1.22.1/3rdparty/Vc/Vc/common/set.h000066400000000000000000000104741476554302100176610ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2013-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_SET_H_ #define VC_COMMON_SET_H_ #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { namespace { static Vc_INTRINSIC Vc_CONST __m128i set(unsigned short x0, unsigned short x1, unsigned short x2, unsigned short x3, unsigned short x4, unsigned short x5, unsigned short x6, unsigned short x7) { #if defined(Vc_GNU_ASM) #if 0 // defined(__x86_64__) // it appears that the 32bit variant is always faster __m128i r; unsigned long long tmp0 = x3; tmp0 = (tmp0 << 16) | x2; unsigned long long tmp1 = x1; tmp1 = (tmp1 << 16) | x0; asm("vmovq %1,%0" : "=x"(r) : "r"((tmp0 << 32) | tmp1)); unsigned long long tmp2 = x7; tmp2 = (tmp2 << 16) | x6; unsigned long long tmp3 = x5; tmp3 = (tmp3 << 16) | x4; asm("vpinsrq $1,%1,%0,%0" : "+x"(r) : "r"((tmp2 << 32) | tmp3)); return r; #elif defined(Vc_USE_VEX_CODING) __m128i r0, r1; unsigned int tmp0 = x1; tmp0 = (tmp0 << 16) | x0; unsigned int tmp1 = x3; tmp1 = (tmp1 << 16) | x2; unsigned int tmp2 = x5; tmp2 = (tmp2 << 16) | x4; unsigned int tmp3 = x7; tmp3 = (tmp3 << 16) | x6; asm("vmovd %1,%0" : "=x"(r0) : "r"(tmp0)); asm("vpinsrd $1,%1,%0,%0" : "+x"(r0) : "r"(tmp1)); asm("vmovd %1,%0" : "=x"(r1) : "r"(tmp2)); asm("vpinsrd $1,%1,%0,%0" : "+x"(r1) : "r"(tmp3)); asm("vpunpcklqdq %1,%0,%0" : "+x"(r0) : "x"(r1)); return r0; #else __m128i r0, r1; unsigned int tmp0 = x1; tmp0 = (tmp0 << 16) | x0; unsigned int tmp1 = x3; tmp1 = (tmp1 << 16) | x2; unsigned int tmp2 = x5; tmp2 = (tmp2 << 16) | x4; unsigned int tmp3 = x7; tmp3 = (tmp3 << 16) | x6; asm("movd %1,%0" : "=x"(r0) : "r"(tmp0)); asm("pinsrd $1,%1,%0" : "+x"(r0) : "r"(tmp1)); asm("movd %1,%0" : "=x"(r1) : "r"(tmp2)); asm("pinsrd $1,%1,%0" : "+x"(r1) : "r"(tmp3)); asm("punpcklqdq %1,%0" : "+x"(r0) : "x"(r1)); return r0; #endif #else unsigned int tmp0 = x1; tmp0 = (tmp0 << 16) | x0; unsigned int tmp1 = x3; tmp1 = (tmp1 << 16) | x2; unsigned int tmp2 = x5; tmp2 = (tmp2 << 16) | x4; unsigned int tmp3 = x7; tmp3 = (tmp3 << 16) | x6; return _mm_setr_epi32(tmp0, tmp1, tmp2, tmp3); #endif } static Vc_INTRINSIC Vc_CONST __m128i set(short x0, short x1, short x2, short x3, short x4, short x5, short x6, short x7) { return set(static_cast(x0), static_cast(x1), static_cast(x2), static_cast(x3), static_cast(x4), static_cast(x5), static_cast(x6), static_cast(x7)); } } // anonymous namespace } // namespace Vc #endif // VC_COMMON_SET_H_ conky-1.22.1/3rdparty/Vc/Vc/common/simd_cast.h000066400000000000000000000054161476554302100210340ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2014-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_SIMD_CAST_H_ #define VC_COMMON_SIMD_CAST_H_ #include #include "macros.h" // declare a bogus simd_cast function template in the global namespace to enable ADL for // simd_cast template void simd_cast(); namespace Vc_VERSIONED_NAMESPACE { /** * Casts the argument \p x from type \p From to type \p To. * * This function implements the trivial case where \p To and \p From are the same type. * * \param x The object of type \p From to be converted to type \p To. * \returns An object of type \p To with all vector components converted according to * standard conversion behavior as mandated by the C++ standard for the * underlying arithmetic types. */ template Vc_INTRINSIC Vc_CONST To simd_cast(From &&x, enable_if>::value> = nullarg) { return std::forward(x); } /** * A cast from nothing results in default-initialization of \p To. * * This function can be useful in generic code where a parameter pack expands to nothing. * * \returns A zero-initialized object of type \p To. */ template Vc_INTRINSIC Vc_CONST To simd_cast() { return To(); } } // namespace Vc #endif // VC_COMMON_SIMD_CAST_H_ conky-1.22.1/3rdparty/Vc/Vc/common/simd_cast_caller.tcc000066400000000000000000000067301476554302100227000ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2014-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_SIMD_CAST_CALLER_TCC_ #define VC_COMMON_SIMD_CAST_CALLER_TCC_ #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { template template Vc_INTRINSIC SimdMaskArray::SimdMaskArray( const SimdMaskArray &x) : data(simd_cast(internal_data(x))) { } template template Vc_INTRINSIC SimdMaskArray::SimdMaskArray( const SimdMaskArray &x) : data(simd_cast(internal_data(internal_data0(x)), internal_data(internal_data1(x)))) { } template template Vc_INTRINSIC SimdMaskArray::SimdMaskArray( const SimdMaskArray &x) : data(simd_cast(internal_data(internal_data0(internal_data0(x))), internal_data(internal_data1(internal_data0(x))), internal_data(internal_data0(internal_data1(x))), internal_data(internal_data1(internal_data1(x))))) { } // conversion from any Segment object (could be SimdMaskArray or Mask) template template Vc_INTRINSIC SimdMaskArray::SimdMaskArray( Common::Segment &&x, enable_if::value == Size * Pieces>) : data(simd_cast(x.data)) { } // conversion from Mask template template Vc_INTRINSIC SimdMaskArray::SimdMaskArray(M k) : data(simd_cast(k)) { } } // namespace Vc_VERSIONED_NAMESPACE #endif // VC_COMMON_SIMD_CAST_CALLER_TCC_ // vim: foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/common/simdarray.h000066400000000000000000003754311476554302100210700ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2013-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_SIMDARRAY_H_ #define VC_COMMON_SIMDARRAY_H_ //#define Vc_DEBUG_SIMD_CAST 1 //#define Vc_DEBUG_SORTED 1 //#include "../IO" #include #include #include "writemaskedvector.h" #include "simdarrayhelper.h" #include "simdmaskarray.h" #include "utility.h" #include "interleave.h" #include "indexsequence.h" #include "transpose.h" #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { // select_best_vector_type {{{ namespace Common { /// \addtogroup SimdArray /// @{ /** * \internal * Selects the best SIMD type out of a typelist to store N scalar values. */ template struct select_best_vector_type_impl; // last candidate; this one must work; assume it does: template struct select_best_vector_type_impl { using type = T; }; // check the next candidate; use it if N >= T::size(); recurse otherwise: template struct select_best_vector_type_impl { using type = typename std::conditional< (N < T::Size), typename select_best_vector_type_impl::type, T>::type; }; template struct select_best_vector_type : select_best_vector_type_impl, #elif defined Vc_IMPL_AVX Vc::AVX::Vector, #endif #ifdef Vc_IMPL_SSE Vc::SSE::Vector, #endif Vc::Scalar::Vector> { }; /// @} } // namespace Common // }}} // internal namespace (product & sum helper) {{{1 namespace internal { template T Vc_INTRINSIC Vc_PURE product_helper_(const T &l, const T &r) { return l * r; } template T Vc_INTRINSIC Vc_PURE sum_helper_(const T &l, const T &r) { return l + r; } } // namespace internal // min & max declarations {{{1 template inline fixed_size_simd min(const SimdArray &x, const SimdArray &y); template inline fixed_size_simd max(const SimdArray &x, const SimdArray &y); // SimdArray class {{{1 /// \addtogroup SimdArray /// @{ // atomic SimdArray {{{1 #define Vc_CURRENT_CLASS_NAME SimdArray /**\internal * Specialization of `SimdArray` for the case where `N == * VectorSize`. * * This is specialized for implementation purposes: Since the general implementation uses * two SimdArray data members it recurses over different SimdArray instantiations. The * recursion is ended by this specialization, which has a single \p VectorType_ data * member to which all functions are forwarded more or less directly. */ template class SimdArray { static_assert(std::is_same::value || std::is_same::value || std::is_same::value || std::is_same::value || std::is_same::value || std::is_same::value, "SimdArray may only be used with T = { double, float, int32_t, uint32_t, " "int16_t, uint16_t }"); static_assert( std::is_same::type>::value && VectorType_::size() == N, "ERROR: leave the third and fourth template parameters with their defaults. They " "are implementation details."); public: static constexpr bool is_atomic = true; using VectorType = VectorType_; using vector_type = VectorType; using storage_type = vector_type; using vectorentry_type = typename vector_type::VectorEntryType; using value_type = T; using mask_type = fixed_size_simd_mask; using index_type = fixed_size_simd; static constexpr std::size_t size() { return N; } using Mask = mask_type; using MaskType = Mask; using MaskArgument = const MaskType &; using VectorEntryType = vectorentry_type; using EntryType = value_type; using IndexType = index_type; using AsArg = const SimdArray &; using reference = Detail::ElementReference; static constexpr std::size_t Size = size(); static constexpr std::size_t MemoryAlignment = storage_type::MemoryAlignment; // zero init Vc_INTRINSIC SimdArray() = default; // default copy ctor/operator Vc_INTRINSIC SimdArray(const SimdArray &) = default; Vc_INTRINSIC SimdArray(SimdArray &&) = default; Vc_INTRINSIC SimdArray &operator=(const SimdArray &) = default; // broadcast Vc_INTRINSIC SimdArray(const value_type &a) : data(a) {} Vc_INTRINSIC SimdArray(value_type &a) : data(a) {} Vc_INTRINSIC SimdArray(value_type &&a) : data(a) {} template < typename U, typename = enable_if::value && !std::is_same::value>> Vc_INTRINSIC SimdArray(U a) : SimdArray(static_cast(a)) { } // implicit casts template > Vc_INTRINSIC SimdArray(const SimdArray &x) : data(simd_cast(internal_data(x))) { } template V::Size && N <= 2 * V::Size)>, class = U> Vc_INTRINSIC SimdArray(const SimdArray &x) : data(simd_cast(internal_data(internal_data0(x)), internal_data(internal_data1(x)))) { } template 2 * V::Size && N <= 4 * V::Size)>, class = U, class = U> Vc_INTRINSIC SimdArray(const SimdArray &x) : data(simd_cast(internal_data(internal_data0(internal_data0(x))), internal_data(internal_data1(internal_data0(x))), internal_data(internal_data0(internal_data1(x))), internal_data(internal_data1(internal_data1(x))))) { } template Vc_INTRINSIC SimdArray(Common::Segment &&x) : data(simd_cast(x.data)) { } Vc_INTRINSIC SimdArray(const std::initializer_list &init) : data(init.begin(), Vc::Unaligned) { Vc_ASSERT(init.size() == size()); } // implicit conversion from underlying vector_type template < typename V, typename = enable_if::value && !Traits::isSimdArray::value>> Vc_INTRINSIC SimdArray(const V &x) : data(simd_cast(x)) { } // implicit conversion to Vector for if Vector::size() == N and // T implicitly convertible to U template ::value && Vector::Size == N && !std::is_same>::value>> Vc_INTRINSIC operator Vector() const { return simd_cast>(data); } operator fixed_size_simd &() { return static_cast &>(*this); } operator const fixed_size_simd &() const { return static_cast &>(*this); } #include "gatherinterface.h" #include "scatterinterface.h" explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerZero) : data() {} explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerOne o) : data(o) {} explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerIndexesFromZero i) : data(i) { } template explicit Vc_INTRINSIC SimdArray( Common::AddOffset) : data(Vc::IndexesFromZero) { data += value_type(Offset); } Vc_INTRINSIC void setZero() { data.setZero(); } Vc_INTRINSIC void setZero(mask_type k) { data.setZero(internal_data(k)); } Vc_INTRINSIC void setZeroInverted() { data.setZeroInverted(); } Vc_INTRINSIC void setZeroInverted(mask_type k) { data.setZeroInverted(internal_data(k)); } Vc_INTRINSIC void setQnan() { data.setQnan(); } Vc_INTRINSIC void setQnan(mask_type m) { data.setQnan(internal_data(m)); } // internal: execute specified Operation template static Vc_INTRINSIC fixed_size_simd fromOperation(Op op, Args &&... args) { fixed_size_simd r; Common::unpackArgumentsAuto(op, r.data, std::forward(args)...); return r; } template static Vc_INTRINSIC void callOperation(Op op, Args &&... args) { Common::unpackArgumentsAuto(op, nullptr, std::forward(args)...); } static Vc_INTRINSIC fixed_size_simd Zero() { return SimdArray(Vc::Zero); } static Vc_INTRINSIC fixed_size_simd One() { return SimdArray(Vc::One); } static Vc_INTRINSIC fixed_size_simd IndexesFromZero() { return SimdArray(Vc::IndexesFromZero); } static Vc_INTRINSIC fixed_size_simd Random() { return fromOperation(Common::Operations::random()); } // load ctor template ::value && Traits::is_load_store_flag::value>> explicit Vc_INTRINSIC SimdArray(const U *mem, Flags f = {}) : data(mem, f) { } template Vc_INTRINSIC void load(Args &&... args) { data.load(std::forward(args)...); } template Vc_INTRINSIC void store(Args &&... args) const { data.store(std::forward(args)...); } Vc_INTRINSIC mask_type operator!() const { return {private_init, !data}; } Vc_INTRINSIC fixed_size_simd operator-() const { return {private_init, -data}; } /// Returns a copy of itself Vc_INTRINSIC fixed_size_simd operator+() const { return *this; } Vc_INTRINSIC fixed_size_simd operator~() const { return {private_init, ~data}; } template ::value && std::is_integral::value>> Vc_INTRINSIC Vc_CONST fixed_size_simd operator<<(U x) const { return {private_init, data << x}; } template ::value && std::is_integral::value>> Vc_INTRINSIC fixed_size_simd &operator<<=(U x) { data <<= x; return *this; } template ::value && std::is_integral::value>> Vc_INTRINSIC Vc_CONST fixed_size_simd operator>>(U x) const { return {private_init, data >> x}; } template ::value && std::is_integral::value>> Vc_INTRINSIC fixed_size_simd &operator>>=(U x) { data >>= x; return *this; } #define Vc_BINARY_OPERATOR_(op) \ Vc_INTRINSIC fixed_size_simd &operator op##=(const SimdArray &rhs) \ { \ data op## = rhs.data; \ return *this; \ } Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_); Vc_ALL_BINARY(Vc_BINARY_OPERATOR_); Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_); #undef Vc_BINARY_OPERATOR_ /// \copydoc Vector::isNegative Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const { return {private_init, isnegative(data)}; } private: friend reference; Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept { return o.data[i]; } template Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept( noexcept(std::declval() = v)) { o.data[i] = v; } public: /** * \note the returned object models the concept of a reference and * as such it can exist longer than the data it is referencing. * \note to avoid lifetime issues, we strongly advice not to store * any reference objects. */ Vc_INTRINSIC reference operator[](size_t i) noexcept { static_assert(noexcept(reference{std::declval(), int()}), ""); return {*this, int(i)}; } Vc_INTRINSIC value_type operator[](size_t i) const noexcept { return get(*this, int(i)); } Vc_INTRINSIC Common::WriteMaskedVector operator()(const mask_type &k) { return {*this, k}; } Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k) { data.assign(v.data, internal_data(k)); } // reductions //////////////////////////////////////////////////////// #define Vc_REDUCTION_FUNCTION_(name_) \ Vc_INTRINSIC Vc_PURE value_type name_() const { return data.name_(); } \ Vc_INTRINSIC Vc_PURE value_type name_(mask_type mask) const \ { \ return data.name_(internal_data(mask)); \ } \ Vc_NOTHING_EXPECTING_SEMICOLON Vc_REDUCTION_FUNCTION_(min); Vc_REDUCTION_FUNCTION_(max); Vc_REDUCTION_FUNCTION_(product); Vc_REDUCTION_FUNCTION_(sum); #undef Vc_REDUCTION_FUNCTION_ Vc_INTRINSIC Vc_PURE fixed_size_simd partialSum() const { return {private_init, data.partialSum()}; } template Vc_INTRINSIC fixed_size_simd apply(F &&f) const { return {private_init, data.apply(std::forward(f))}; } template Vc_INTRINSIC fixed_size_simd apply(F &&f, const mask_type &k) const { return {private_init, data.apply(std::forward(f), k)}; } Vc_INTRINSIC fixed_size_simd shifted(int amount) const { return {private_init, data.shifted(amount)}; } template Vc_INTRINSIC fixed_size_simd shifted(int amount, const SimdArray &shiftIn) const { return {private_init, data.shifted(amount, simd_cast(shiftIn))}; } Vc_INTRINSIC fixed_size_simd rotated(int amount) const { return {private_init, data.rotated(amount)}; } /// \copydoc Vector::exponent Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC fixed_size_simd exponent() const { return {private_init, exponent(data)}; } Vc_INTRINSIC fixed_size_simd interleaveLow(SimdArray x) const { return {private_init, data.interleaveLow(x.data)}; } Vc_INTRINSIC fixed_size_simd interleaveHigh(SimdArray x) const { return {private_init, data.interleaveHigh(x.data)}; } Vc_INTRINSIC fixed_size_simd reversed() const { return {private_init, data.reversed()}; } Vc_INTRINSIC fixed_size_simd sorted() const { return {private_init, data.sorted()}; } template ()(std::size_t())), class = enable_if::value>> Vc_INTRINSIC SimdArray(const G &gen) : data(gen) { } template static Vc_INTRINSIC fixed_size_simd generate(const G &gen) { return {private_init, VectorType::generate(gen)}; } Vc_DEPRECATED("use copysign(x, y) instead") Vc_INTRINSIC fixed_size_simd copySign(const SimdArray &x) const { return {private_init, Vc::copysign(data, x.data)}; } friend VectorType &internal_data<>(SimdArray &x); friend const VectorType &internal_data<>(const SimdArray &x); /// \internal Vc_INTRINSIC SimdArray(private_init_t, VectorType &&x) : data(std::move(x)) {} Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type)); private: // The alignas attribute attached to the class declaration above is ignored by ICC // 17.0.0 (at least). So just move the alignas attribute down here where it works for // all compilers. alignas(static_cast( Common::BoundedAlignment::value * sizeof(VectorType_) / VectorType_::size()>::value)) storage_type data; }; template constexpr std::size_t SimdArray::Size; template constexpr std::size_t SimdArray::MemoryAlignment; template #ifndef Vc_MSVC Vc_INTRINSIC #endif VectorType &internal_data(SimdArray &x) { return x.data; } template #ifndef Vc_MSVC Vc_INTRINSIC #endif const VectorType &internal_data(const SimdArray &x) { return x.data; } // unwrap {{{2 template Vc_INTRINSIC T unwrap(const T &x) { return x; } template Vc_INTRINSIC V unwrap(const SimdArray &x) { return internal_data(x); } template Vc_INTRINSIC auto unwrap(const Common::Segment &x) -> decltype(x.to_fixed_size()) { return unwrap(x.to_fixed_size()); } // gatherImplementation {{{2 template template Vc_INTRINSIC void SimdArray::gatherImplementation( const Common::GatherArguments &args) { data.gather(Common::make_gather(args.address, unwrap(args.indexes))); } template template Vc_INTRINSIC void SimdArray::gatherImplementation( const Common::GatherArguments &args, MaskArgument mask) { data.gather(Common::make_gather(args.address, unwrap(args.indexes)), mask); } // scatterImplementation {{{2 template template inline void SimdArray::scatterImplementation(MT *mem, IT &&indexes) const { data.scatter(mem, unwrap(std::forward(indexes))); } template template inline void SimdArray::scatterImplementation(MT *mem, IT &&indexes, MaskArgument mask) const { data.scatter(mem, unwrap(std::forward(indexes)), mask); } // generic SimdArray {{{1 /** * Data-parallel arithmetic type with user-defined number of elements. * * \tparam T The type of the vector's elements. The supported types currently are limited * to the types supported by Vc::Vector. * * \tparam N The number of elements to store and process concurrently. You can choose an * arbitrary number, though not every number is a good idea. * Generally, a power of two value or the sum of two power of two values might * work efficiently, though this depends a lot on the target system. * * \tparam V Don't change the default value unless you really know what you are doing. * This type is set to the underlying native Vc::Vector type used in the * implementation of the type. * Having it as part of the type name guards against some cases of ODR * violations (i.e. linking incompatible translation units / libraries). * * \tparam Wt Don't ever change the default value. * This parameter is an unfortunate implementation detail shining through. * * \warning Choosing \p N too large (what “too large” means depends on the target) will * result in excessive compilation times and high (or too high) register * pressure, thus potentially negating the improvement from concurrent execution. * As a rule of thumb, keep \p N less or equal to `2 * float_v::size()`. * * \warning A special portability concern arises from a current limitation in the MIC * implementation (Intel Knights Corner), where SimdArray types with \p T = \p * (u)short require an \p N either less than short_v::size() or a multiple of * short_v::size(). * * \headerfile simdarray.h */ template class SimdArray { static_assert(std::is_same::value || std::is_same::value || std::is_same::value || std::is_same::value || std::is_same::value || std::is_same::value, "SimdArray may only be used with T = { double, float, int32_t, uint32_t, int16_t, uint16_t }"); static_assert( std::is_same::type>::value && V::size() == Wt, "ERROR: leave the third and fourth template parameters with their defaults. They " "are implementation details."); static_assert( // either the EntryType and VectorEntryType of the main V are equal std::is_same::value || // or N is a multiple of V::size() (N % V::size() == 0), "SimdArray<(un)signed short, N> on MIC only works correctly for N = k * " "MIC::(u)short_v::size(), i.e. k * 16."); using my_traits = SimdArrayTraits; static constexpr std::size_t N0 = my_traits::N0; static constexpr std::size_t N1 = my_traits::N1; using Split = Common::Split; template using CArray = U[K]; public: static constexpr bool is_atomic = false; using storage_type0 = typename my_traits::storage_type0; using storage_type1 = typename my_traits::storage_type1; static_assert(storage_type0::size() == N0, ""); /**\internal * This type reveals the implementation-specific type used for the data member. */ using vector_type = V; using vectorentry_type = typename storage_type0::vectorentry_type; typedef vectorentry_type alias_type Vc_MAY_ALIAS; /// The type of the elements (i.e.\ \p T) using value_type = T; /// The type of the mask used for masked operations and returned from comparisons. using mask_type = fixed_size_simd_mask; /// The type of the vector used for indexes in gather and scatter operations. using index_type = fixed_size_simd; /** * Returns \p N, the number of scalar components in an object of this type. * * The size of the SimdArray, i.e. the number of scalar elements in the vector. In * contrast to Vector::size() you have control over this value via the \p N template * parameter of the SimdArray class template. * * \returns The number of scalar values stored and manipulated concurrently by objects * of this type. */ static constexpr std::size_t size() { return N; } /// \copydoc mask_type using Mask = mask_type; /// \copydoc mask_type using MaskType = Mask; using MaskArgument = const MaskType &; using VectorEntryType = vectorentry_type; /// \copydoc value_type using EntryType = value_type; /// \copydoc index_type using IndexType = index_type; using AsArg = const SimdArray &; using reference = Detail::ElementReference; ///\copydoc Vector::MemoryAlignment static constexpr std::size_t MemoryAlignment = storage_type0::MemoryAlignment > storage_type1::MemoryAlignment ? storage_type0::MemoryAlignment : storage_type1::MemoryAlignment; /// \name Generators ///@{ ///\copybrief Vector::Zero static Vc_INTRINSIC fixed_size_simd Zero() { return SimdArray(Vc::Zero); } ///\copybrief Vector::One static Vc_INTRINSIC fixed_size_simd One() { return SimdArray(Vc::One); } ///\copybrief Vector::IndexesFromZero static Vc_INTRINSIC fixed_size_simd IndexesFromZero() { return SimdArray(Vc::IndexesFromZero); } ///\copydoc Vector::Random static Vc_INTRINSIC fixed_size_simd Random() { return fromOperation(Common::Operations::random()); } template ()(std::size_t())), class = enable_if::value>> Vc_INTRINSIC SimdArray(const G &gen) : data0(gen), data1([&](std::size_t i) { return gen(i + storage_type0::size()); }) { } ///\copybrief Vector::generate template static Vc_INTRINSIC fixed_size_simd generate(const G &gen) // {{{2 { auto tmp = storage_type0::generate(gen); // GCC bug: the order of evaluation in // an initializer list is well-defined // (front to back), but GCC 4.8 doesn't // implement this correctly. Therefore // we enforce correct order. return {std::move(tmp), storage_type1::generate([&](std::size_t i) { return gen(i + N0); })}; } ///@} /// \name Compile-Time Constant Initialization ///@{ ///\copydoc Vector::Vector() SimdArray() = default; ///@} /// \name Conversion/Broadcast Constructors ///@{ ///\copydoc Vector::Vector(EntryType) Vc_INTRINSIC SimdArray(value_type a) : data0(a), data1(a) {} template < typename U, typename = enable_if::value && !std::is_same::value>> SimdArray(U a) : SimdArray(static_cast(a)) { } ///@} // default copy ctor/operator SimdArray(const SimdArray &) = default; SimdArray(SimdArray &&) = default; SimdArray &operator=(const SimdArray &) = default; // load ctor template ::value && Traits::is_load_store_flag::value>> explicit Vc_INTRINSIC SimdArray(const U *mem, Flags f = {}) : data0(mem, f), data1(mem + storage_type0::size(), f) { } // MSVC does overload resolution differently and takes the const U *mem overload (I hope) #ifndef Vc_MSVC /**\internal * Load from a C-array. This is basically the same function as the load constructor * above, except that the forwarding reference overload would steal the deal and the * constructor above doesn't get called. This overload is required to enable loads * from C-arrays. */ template ::value && Traits::is_load_store_flag::value>> explicit Vc_INTRINSIC SimdArray(CArray &mem, Flags f = {}) : data0(&mem[0], f), data1(&mem[storage_type0::size()], f) { } /**\internal * Const overload of the above. */ template ::value && Traits::is_load_store_flag::value>> explicit Vc_INTRINSIC SimdArray(const CArray &mem, Flags f = {}) : data0(&mem[0], f), data1(&mem[storage_type0::size()], f) { } #endif // initializer list Vc_INTRINSIC SimdArray(const std::initializer_list &init) : data0(init.begin(), Vc::Unaligned) , data1(init.begin() + storage_type0::size(), Vc::Unaligned) { Vc_ASSERT(init.size() == size()); } #include "gatherinterface.h" #include "scatterinterface.h" explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerZero) : data0(), data1() {} explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerOne o) : data0(o), data1(o) {} explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerIndexesFromZero i) : data0(i) , data1(Common::AddOffset()) { } template explicit Vc_INTRINSIC SimdArray( Common::AddOffset i) : data0(i) , data1(Common::AddOffset()) { } // explicit casts template ::value && Traits::simd_vector_size::value == N && !(std::is_convertible, T>::value && Traits::isSimdArray::value))>> Vc_INTRINSIC explicit SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x)) { } // implicit casts template ::value && Traits::simd_vector_size::value == N && std::is_convertible, T>::value)>, class = W> Vc_INTRINSIC SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x)) { } template Vc_INTRINSIC SimdArray(Common::Segment &&x) : data0(Common::Segment{x.data}) , data1(Common::Segment{x.data}) { } // implicit conversion to Vector for if Vector::size() == N and // T implicitly convertible to U template ::value && Vector::Size == N && !std::is_same>::value>> operator Vector() const { auto r = simd_cast>(data0, data1); return r; } Vc_INTRINSIC operator fixed_size_simd &() { return static_cast &>(*this); } Vc_INTRINSIC operator const fixed_size_simd &() const { return static_cast &>(*this); } //////////////////// other functions /////////////// Vc_INTRINSIC void setZero() { data0.setZero(); data1.setZero(); } Vc_INTRINSIC void setZero(const mask_type &k) { data0.setZero(Split::lo(k)); data1.setZero(Split::hi(k)); } Vc_INTRINSIC void setZeroInverted() { data0.setZeroInverted(); data1.setZeroInverted(); } Vc_INTRINSIC void setZeroInverted(const mask_type &k) { data0.setZeroInverted(Split::lo(k)); data1.setZeroInverted(Split::hi(k)); } Vc_INTRINSIC void setQnan() { data0.setQnan(); data1.setQnan(); } Vc_INTRINSIC void setQnan(const mask_type &m) { data0.setQnan(Split::lo(m)); data1.setQnan(Split::hi(m)); } ///\internal execute specified Operation template static Vc_INTRINSIC fixed_size_simd fromOperation(Op op, Args &&... args) { fixed_size_simd r = { storage_type0::fromOperation(op, Split::lo(args)...), // no forward here - it // could move and thus // break the next line storage_type1::fromOperation(op, Split::hi(std::forward(args))...)}; return r; } ///\internal template static Vc_INTRINSIC void callOperation(Op op, Args &&... args) { storage_type0::callOperation(op, Split::lo(args)...); storage_type1::callOperation(op, Split::hi(std::forward(args))...); } template Vc_INTRINSIC void load(const U *mem, Args &&... args) { data0.load(mem, Split::lo(args)...); // no forward here - it could move and thus // break the next line data1.load(mem + storage_type0::size(), Split::hi(std::forward(args))...); } template Vc_INTRINSIC void store(U *mem, Args &&... args) const { data0.store(mem, Split::lo(args)...); // no forward here - it could move and thus // break the next line data1.store(mem + storage_type0::size(), Split::hi(std::forward(args))...); } Vc_INTRINSIC mask_type operator!() const { return {!data0, !data1}; } Vc_INTRINSIC fixed_size_simd operator-() const { return {-data0, -data1}; } /// Returns a copy of itself Vc_INTRINSIC fixed_size_simd operator+() const { return *this; } Vc_INTRINSIC fixed_size_simd operator~() const { return {~data0, ~data1}; } // left/right shift operators {{{2 template ::value && std::is_integral::value>> Vc_INTRINSIC Vc_CONST fixed_size_simd operator<<(U x) const { return {data0 << x, data1 << x}; } template ::value && std::is_integral::value>> Vc_INTRINSIC fixed_size_simd &operator<<=(U x) { data0 <<= x; data1 <<= x; return *this; } template ::value && std::is_integral::value>> Vc_INTRINSIC Vc_CONST fixed_size_simd operator>>(U x) const { return {data0 >> x, data1 >> x}; } template ::value && std::is_integral::value>> Vc_INTRINSIC fixed_size_simd &operator>>=(U x) { data0 >>= x; data1 >>= x; return *this; } // binary operators {{{2 #define Vc_BINARY_OPERATOR_(op) \ Vc_INTRINSIC fixed_size_simd &operator op##=(const SimdArray &rhs) \ { \ data0 op## = rhs.data0; \ data1 op## = rhs.data1; \ return *this; \ } Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_); Vc_ALL_BINARY(Vc_BINARY_OPERATOR_); Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_); #undef Vc_BINARY_OPERATOR_ // operator[] {{{2 /// \name Scalar Subscript Operators ///@{ private: friend reference; Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept { return reinterpret_cast(&o)[i]; } template Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept( noexcept(std::declval() = v)) { reinterpret_cast(&o)[i] = v; } public: ///\copydoc Vector::operator[](size_t) /** * \note the returned object models the concept of a reference and * as such it can exist longer than the data it is referencing. * \note to avoid lifetime issues, we strongly advice not to store * any reference objects. */ Vc_INTRINSIC reference operator[](size_t i) noexcept { static_assert(noexcept(reference{std::declval(), int()}), ""); return {*this, int(i)}; } ///\copydoc Vector::operator[](size_t) const Vc_INTRINSIC value_type operator[](size_t index) const noexcept { return get(*this, int(index)); } ///@} // operator(){{{2 ///\copydoc Vector::operator()(MaskType) Vc_INTRINSIC Common::WriteMaskedVector operator()( const mask_type &mask) { return {*this, mask}; } ///\internal Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k) //{{{2 { data0.assign(v.data0, internal_data0(k)); data1.assign(v.data1, internal_data1(k)); } // reductions {{{2 #define Vc_REDUCTION_FUNCTION_(name_, binary_fun_, scalar_fun_) \ private: \ template \ Vc_INTRINSIC enable_if::value && \ storage_type0::Size == storage_type1::Size, \ value_type> name_##_impl() const \ { \ return binary_fun_(data0, data1).name_(); \ } \ \ template \ Vc_INTRINSIC enable_if::value && \ storage_type0::Size != storage_type1::Size, \ value_type> name_##_impl() const \ { \ return scalar_fun_(data0.name_(), data1.name_()); \ } \ \ public: \ /**\copybrief Vector::##name_ */ \ Vc_INTRINSIC value_type name_() const { return name_##_impl(); } \ /**\copybrief Vector::##name_ */ \ Vc_INTRINSIC value_type name_(const mask_type &mask) const \ { \ if (Vc_IS_UNLIKELY(Split::lo(mask).isEmpty())) { \ return data1.name_(Split::hi(mask)); \ } else if (Vc_IS_UNLIKELY(Split::hi(mask).isEmpty())) { \ return data0.name_(Split::lo(mask)); \ } else { \ return scalar_fun_(data0.name_(Split::lo(mask)), \ data1.name_(Split::hi(mask))); \ } \ } \ Vc_NOTHING_EXPECTING_SEMICOLON Vc_REDUCTION_FUNCTION_(min, Vc::min, std::min); Vc_REDUCTION_FUNCTION_(max, Vc::max, std::max); Vc_REDUCTION_FUNCTION_(product, internal::product_helper_, internal::product_helper_); Vc_REDUCTION_FUNCTION_(sum, internal::sum_helper_, internal::sum_helper_); #undef Vc_REDUCTION_FUNCTION_ ///\copybrief Vector::partialSum Vc_INTRINSIC Vc_PURE fixed_size_simd partialSum() const //{{{2 { auto ps0 = data0.partialSum(); auto tmp = data1; tmp[0] += ps0[data0.size() - 1]; return {std::move(ps0), tmp.partialSum()}; } // apply {{{2 ///\copybrief Vector::apply(F &&) const template inline fixed_size_simd apply(F &&f) const { return {data0.apply(f), data1.apply(f)}; } ///\copybrief Vector::apply(F &&, MaskType) const template inline fixed_size_simd apply(F &&f, const mask_type &k) const { return {data0.apply(f, Split::lo(k)), data1.apply(f, Split::hi(k))}; } // shifted {{{2 ///\copybrief Vector::shifted(int) const inline fixed_size_simd shifted(int amount) const { constexpr int SSize = Size; constexpr int SSize0 = storage_type0::Size; constexpr int SSize1 = storage_type1::Size; if (amount == 0) { return *this; } if (amount < 0) { if (amount > -SSize0) { return {data0.shifted(amount), data1.shifted(amount, data0)}; } if (amount == -SSize0) { return {storage_type0(0), simd_cast(data0)}; } if (amount < -SSize0) { return {storage_type0(0), simd_cast(data0.shifted( amount + SSize0))}; } return Zero(); } else { if (amount >= SSize) { return Zero(); } else if (amount >= SSize0) { return { simd_cast(data1).shifted(amount - SSize0), storage_type1(0)}; } else if (amount >= SSize1) { return {data0.shifted(amount, data1), storage_type1(0)}; } else { return {data0.shifted(amount, data1), data1.shifted(amount)}; } } } template inline enable_if< !(std::is_same::value && // not bisectable N == NN), fixed_size_simd> shifted(int amount, const SimdArray &shiftIn) const { constexpr int SSize = Size; if (amount < 0) { return fixed_size_simd([&](int i) -> value_type { i += amount; if (i >= 0) { return operator[](i); } else if (i >= -SSize) { return shiftIn[i + SSize]; } return 0; }); } return fixed_size_simd([&](int i) -> value_type { i += amount; if (i < SSize) { return operator[](i); } else if (i < 2 * SSize) { return shiftIn[i - SSize]; } return 0; }); } private: // workaround for MSVC not understanding the simpler and shorter expression of the boolean // expression directly in the enable_if below template struct bisectable_shift : public std::integral_constant::value && // bisectable N == NN> { }; public: template inline fixed_size_simd shifted( enable_if::value, int> amount, const SimdArray &shiftIn) const { constexpr int SSize = Size; if (amount < 0) { if (amount > -static_cast(storage_type0::Size)) { return {data0.shifted(amount, internal_data1(shiftIn)), data1.shifted(amount, data0)}; } if (amount == -static_cast(storage_type0::Size)) { return {storage_type0(internal_data1(shiftIn)), storage_type1(data0)}; } if (amount > -SSize) { return { internal_data1(shiftIn) .shifted(amount + static_cast(storage_type0::Size), internal_data0(shiftIn)), data0.shifted(amount + static_cast(storage_type0::Size), internal_data1(shiftIn))}; } if (amount == -SSize) { return shiftIn; } if (amount > -2 * SSize) { return shiftIn.shifted(amount + SSize); } } if (amount == 0) { return *this; } if (amount < static_cast(storage_type0::Size)) { return {data0.shifted(amount, data1), data1.shifted(amount, internal_data0(shiftIn))}; } if (amount == static_cast(storage_type0::Size)) { return {storage_type0(data1), storage_type1(internal_data0(shiftIn))}; } if (amount < SSize) { return {data1.shifted(amount - static_cast(storage_type0::Size), internal_data0(shiftIn)), internal_data0(shiftIn) .shifted(amount - static_cast(storage_type0::Size), internal_data1(shiftIn))}; } if (amount == SSize) { return shiftIn; } if (amount < 2 * SSize) { return shiftIn.shifted(amount - SSize); } return Zero(); } // rotated {{{2 ///\copybrief Vector::rotated Vc_INTRINSIC fixed_size_simd rotated(int amount) const { amount %= int(size()); if (amount == 0) { return *this; } else if (amount < 0) { amount += size(); } #ifdef Vc_MSVC // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use store // -> // load to implement the function instead. alignas(MemoryAlignment) T tmp[N + data0.size()]; data0.store(&tmp[0], Vc::Aligned); data1.store(&tmp[data0.size()], Vc::Aligned); data0.store(&tmp[N], Vc::Unaligned); fixed_size_simd r; r.data0.load(&tmp[amount], Vc::Unaligned); r.data1.load(&tmp[(amount + data0.size()) % size()], Vc::Unaligned); return r; #else auto &&d0cvtd = simd_cast(data0); auto &&d1cvtd = simd_cast(data1); constexpr int size0 = storage_type0::size(); constexpr int size1 = storage_type1::size(); if (amount == size0 && std::is_same::value) { return {std::move(d1cvtd), std::move(d0cvtd)}; } else if (amount < size1) { return {data0.shifted(amount, d1cvtd), data1.shifted(amount, d0cvtd)}; } else if (amount == size1) { return {data0.shifted(amount, d1cvtd), std::move(d0cvtd)}; } else if (int(size()) - amount < size1) { return {data0.shifted(amount - int(size()), d1cvtd.shifted(size1 - size0)), data1.shifted(amount - int(size()), data0.shifted(size0 - size1))}; } else if (int(size()) - amount == size1) { return {data0.shifted(-size1, d1cvtd.shifted(size1 - size0)), simd_cast(data0.shifted(size0 - size1))}; } else if (amount <= size0) { return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0), simd_cast(data0.shifted(amount - size1))}; } else { return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0), simd_cast(data0.shifted(amount - size1, d1cvtd))}; } return *this; #endif } // interleaveLow/-High {{{2 ///\internal \copydoc Vector::interleaveLow Vc_INTRINSIC fixed_size_simd interleaveLow(const SimdArray &x) const { // return data0[0], x.data0[0], data0[1], x.data0[1], ... return {data0.interleaveLow(x.data0), simd_cast(data0.interleaveHigh(x.data0))}; } ///\internal \copydoc Vector::interleaveHigh Vc_INTRINSIC fixed_size_simd interleaveHigh(const SimdArray &x) const { return interleaveHighImpl( x, std::integral_constant()); } private: ///\internal Vc_INTRINSIC fixed_size_simd interleaveHighImpl(const SimdArray &x, std::true_type) const { return {data1.interleaveLow(x.data1), data1.interleaveHigh(x.data1)}; } ///\internal inline fixed_size_simd interleaveHighImpl(const SimdArray &x, std::false_type) const { return {data0.interleaveHigh(x.data0) .shifted(storage_type1::Size, simd_cast(data1.interleaveLow(x.data1))), data1.interleaveHigh(x.data1)}; } public: ///\copybrief Vector::reversed inline fixed_size_simd reversed() const //{{{2 { if (std::is_same::value) { return {simd_cast(data1).reversed(), simd_cast(data0).reversed()}; } else { #ifdef Vc_MSVC // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use // store // -> load to implement the function instead. alignas(MemoryAlignment) T tmp[N]; data1.reversed().store(&tmp[0], Vc::Aligned); data0.reversed().store(&tmp[data1.size()], Vc::Unaligned); return fixed_size_simd{&tmp[0], Vc::Aligned}; #else return {data0.shifted(storage_type1::Size, data1).reversed(), simd_cast(data0.reversed().shifted( storage_type0::Size - storage_type1::Size))}; #endif } } ///\copydoc Vector::sorted inline fixed_size_simd sorted() const //{{{2 { return sortedImpl( std::integral_constant()); } ///\internal Vc_INTRINSIC fixed_size_simd sortedImpl(std::true_type) const { #ifdef Vc_DEBUG_SORTED std::cerr << "-- " << data0 << data1 << '\n'; #endif const auto a = data0.sorted(); const auto b = data1.sorted().reversed(); const auto lo = Vc::min(a, b); const auto hi = Vc::max(a, b); return {lo.sorted(), hi.sorted()}; } ///\internal Vc_INTRINSIC fixed_size_simd sortedImpl(std::false_type) const { using SortableArray = fixed_size_simd::value>; auto sortable = simd_cast(*this); for (std::size_t i = Size; i < SortableArray::Size; ++i) { using limits = std::numeric_limits; if (limits::has_infinity) { sortable[i] = limits::infinity(); } else { sortable[i] = std::numeric_limits::max(); } } return simd_cast>(sortable.sorted()); /* The following implementation appears to be less efficient. But this may need further * work. const auto a = data0.sorted(); const auto b = data1.sorted(); #ifdef Vc_DEBUG_SORTED std::cerr << "== " << a << b << '\n'; #endif auto aIt = Vc::begin(a); auto bIt = Vc::begin(b); const auto aEnd = Vc::end(a); const auto bEnd = Vc::end(b); return SimdArray::generate([&](std::size_t) { if (aIt == aEnd) { return *(bIt++); } if (bIt == bEnd) { return *(aIt++); } if (*aIt < *bIt) { return *(aIt++); } else { return *(bIt++); } }); */ } /// \name Deprecated Members ///@{ ///\copydoc size ///\deprecated Use size() instead. static constexpr std::size_t Size = size(); /// \copydoc Vector::exponent Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC fixed_size_simd exponent() const { return {exponent(data0), exponent(data1)}; } /// \copydoc Vector::isNegative Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const { return {isnegative(data0), isnegative(data1)}; } ///\copydoc Vector::copySign Vc_DEPRECATED("use copysign(x, y) instead") Vc_INTRINSIC fixed_size_simd copySign(const SimdArray &x) const { return {Vc::copysign(data0, x.data0), Vc::copysign(data1, x.data1)}; } ///@} // internal_data0/1 {{{2 friend storage_type0 &internal_data0<>(SimdArray &x); friend storage_type1 &internal_data1<>(SimdArray &x); friend const storage_type0 &internal_data0<>(const SimdArray &x); friend const storage_type1 &internal_data1<>(const SimdArray &x); /// \internal Vc_INTRINSIC SimdArray(storage_type0 &&x, storage_type1 &&y) //{{{2 : data0(std::move(x)), data1(std::move(y)) { } Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type0)); private: //{{{2 // The alignas attribute attached to the class declaration above is ignored by ICC // 17.0.0 (at least). So just move the alignas attribute down here where it works for // all compilers. alignas(static_cast( Common::BoundedAlignment::value * sizeof(V) / V::size()>::value)) storage_type0 data0; storage_type1 data1; }; #undef Vc_CURRENT_CLASS_NAME template constexpr std::size_t SimdArray::Size; template constexpr std::size_t SimdArray::MemoryAlignment; // gatherImplementation {{{2 template template inline void SimdArray::gatherImplementation( const Common::GatherArguments &args) { data0.gather(Common::make_gather( args.address, Split::lo(Common::Operations::gather(), args.indexes))); data1.gather(Common::make_gather( args.address, Split::hi(Common::Operations::gather(), args.indexes))); } template template inline void SimdArray::gatherImplementation( const Common::GatherArguments &args, MaskArgument mask) { data0.gather(Common::make_gather( args.address, Split::lo(Common::Operations::gather(), args.indexes)), Split::lo(mask)); data1.gather(Common::make_gather( args.address, Split::hi(Common::Operations::gather(), args.indexes)), Split::hi(mask)); } // scatterImplementation {{{2 template template inline void SimdArray::scatterImplementation(MT *mem, IT &&indexes) const { data0.scatter(mem, Split::lo(Common::Operations::gather(), indexes)); // don't forward indexes - it could move and // thus break the next line data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward(indexes))); } template template inline void SimdArray::scatterImplementation(MT *mem, IT &&indexes, MaskArgument mask) const { data0.scatter(mem, Split::lo(Common::Operations::gather(), indexes), Split::lo(mask)); // don't forward indexes - it could move and // thus break the next line data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward(indexes)), Split::hi(mask)); } // internal_data0/1 (SimdArray) {{{1 ///\internal Returns the first data member of a generic SimdArray template #ifndef Vc_MSVC Vc_INTRINSIC #endif typename SimdArrayTraits::storage_type0 &internal_data0( SimdArray &x) { return x.data0; } ///\internal Returns the second data member of a generic SimdArray template #ifndef Vc_MSVC Vc_INTRINSIC #endif typename SimdArrayTraits::storage_type1 &internal_data1( SimdArray &x) { return x.data1; } ///\internal Returns the first data member of a generic SimdArray (const overload) template #ifndef Vc_MSVC Vc_INTRINSIC #endif const typename SimdArrayTraits::storage_type0 &internal_data0( const SimdArray &x) { return x.data0; } ///\internal Returns the second data member of a generic SimdArray (const overload) template #ifndef Vc_MSVC Vc_INTRINSIC #endif const typename SimdArrayTraits::storage_type1 &internal_data1( const SimdArray &x) { return x.data1; } // MSVC workaround for SimdArray(storage_type0, storage_type1) ctor{{{1 // MSVC sometimes stores x to data1. By first broadcasting 0 and then assigning y // in the body the bug is supressed. #if defined Vc_MSVC && defined Vc_IMPL_SSE && !defined Vc_IMPL_AVX template <> Vc_INTRINSIC SimdArray::SimdArray(fixed_size_simd &&x, fixed_size_simd &&y) : data0(x), data1(0) { data1 = y; } #endif // binary operators {{{ namespace Detail { #define Vc_FIXED_OP(op) \ template ::is_atomic>::type> \ Vc_INTRINSIC fixed_size_simd operator op(const fixed_size_simd &a, \ const fixed_size_simd &b) \ { \ return {private_init, internal_data(a) op internal_data(b)}; \ } \ template ::is_atomic>::type, \ class = T> \ Vc_INTRINSIC fixed_size_simd operator op(const fixed_size_simd &a, \ const fixed_size_simd &b) \ { \ return {internal_data0(a) op internal_data0(b), \ internal_data1(a) op internal_data1(b)}; \ } Vc_ALL_ARITHMETICS(Vc_FIXED_OP); Vc_ALL_BINARY(Vc_FIXED_OP); Vc_ALL_SHIFTS(Vc_FIXED_OP); #undef Vc_FIXED_OP #define Vc_FIXED_OP(op) \ template ::is_atomic>::type> \ Vc_INTRINSIC fixed_size_simd_mask operator op(const fixed_size_simd &a, \ const fixed_size_simd &b) \ { \ return {private_init, internal_data(a) op internal_data(b)}; \ } \ template ::is_atomic>::type, \ class = T> \ Vc_INTRINSIC fixed_size_simd_mask operator op(const fixed_size_simd &a, \ const fixed_size_simd &b) \ { \ return {internal_data0(a) op internal_data0(b), \ internal_data1(a) op internal_data1(b)}; \ } Vc_ALL_COMPARES(Vc_FIXED_OP); #undef Vc_FIXED_OP } // namespace Detail // }}} // binary operators {{{1 namespace result_vector_type_internal { template using remove_cvref = typename std::remove_cv::type>::type; template using is_integer_larger_than_int = std::integral_constant< bool, std::is_integral::value &&(sizeof(T) > sizeof(int) || std::is_same::value || std::is_same::value)>; template < typename L, typename R, std::size_t N = Traits::isSimdArray::value ? Traits::simd_vector_size::value : Traits::simd_vector_size::value, bool = (Traits::isSimdArray::value || Traits::isSimdArray::value) && // one of the operands must be a SimdArray !(Traits::is_fixed_size_simd::value && // if both are fixed_size, use Traits::is_fixed_size_simd::value) && // common/operators.h ((std::is_arithmetic>::value && // one of the operands is a !is_integer_larger_than_int>::value) || // scalar type (std::is_arithmetic>::value && !is_integer_larger_than_int>::value) || // or one of the operands is Vector with Vector::size() == // SimdArray::size() Traits::simd_vector_size::value == Traits::simd_vector_size::value)> struct evaluate; template struct evaluate { private: using LScalar = Traits::entry_type_of; using RScalar = Traits::entry_type_of; template using conditional = typename std::conditional::type; public: // In principle we want the exact same rules for SimdArray ⨉ SimdArray as the standard // defines for T ⨉ U. BUT: short ⨉ short returns int (because all integral types smaller than // int are promoted to int before any operation). This would imply that SIMD types with integral // types smaller than int are more or less useless - and you could use SimdArray from the // start. Therefore we special-case those operations where the scalar type of both operands is // integral and smaller than int. // In addition, there is no generic support for 64-bit int SIMD types. Therefore // promotion to a 64-bit integral type (including `long` because it can potentially have 64 // bits) also is not done. But if one of the operands is a scalar type that is larger than int // then the operator is disabled altogether. We do not want an implicit demotion. using type = fixed_size_simd< conditional<(std::is_integral::value &&std::is_integral::value && sizeof(LScalar) < sizeof(int) && sizeof(RScalar) < sizeof(int)), conditional<(sizeof(LScalar) == sizeof(RScalar)), conditional::value, LScalar, RScalar>, conditional<(sizeof(LScalar) > sizeof(RScalar)), LScalar, RScalar>>, decltype(std::declval() + std::declval())>, N>; }; } // namespace result_vector_type_internal template using result_vector_type = typename result_vector_type_internal::evaluate::type; #define Vc_BINARY_OPERATORS_(op_) \ /*!\brief Applies op_ component-wise and concurrently. */ \ template \ Vc_INTRINSIC result_vector_type operator op_(L &&lhs, R &&rhs) \ { \ using Return = result_vector_type; \ return Vc::Detail::operator op_( \ static_cast(std::forward(lhs)), \ static_cast(std::forward(rhs))); \ } /** * \name Arithmetic and Bitwise Operators * * Applies the operator component-wise and concurrently on \p lhs and \p rhs and returns * a new SimdArray object containing the result values. * * This operator only participates in overload resolution if: * \li At least one of the template parameters \p L or \p R is a SimdArray type. * \li Either \p L or \p R is a fundamental arithmetic type but not an integral type * larger than \c int \n * or \n * \p L or \p R is a Vc::Vector type with equal number of elements (Vector::size() == * SimdArray::size()). * * The return type of the operator is a SimdArray type using the more precise EntryType of * \p L or \p R and the same number of elements as the SimdArray argument(s). */ ///@{ Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATORS_); Vc_ALL_BINARY(Vc_BINARY_OPERATORS_); ///@} #undef Vc_BINARY_OPERATORS_ #define Vc_BINARY_OPERATORS_(op_) \ /*!\brief Applies op_ component-wise and concurrently. */ \ template \ Vc_INTRINSIC typename result_vector_type::mask_type operator op_(L &&lhs, \ R &&rhs) \ { \ using Promote = result_vector_type; \ return Promote(std::forward(lhs)) op_ Promote(std::forward(rhs)); \ } /** * \name Compare Operators * * Applies the operator component-wise and concurrently on \p lhs and \p rhs and returns * a new SimdMaskArray object containing the result values. * * This operator only participates in overload resolution if (same rules as above): * \li At least one of the template parameters \p L or \p R is a SimdArray type. * \li Either \p L or \p R is a fundamental arithmetic type but not an integral type * larger than \c int \n * or \n * \p L or \p R is a Vc::Vector type with equal number of elements (Vector::size() == * SimdArray::size()). * * The return type of the operator is a SimdMaskArray type using the more precise EntryType of * \p L or \p R and the same number of elements as the SimdArray argument(s). */ ///@{ Vc_ALL_COMPARES(Vc_BINARY_OPERATORS_); ///@} #undef Vc_BINARY_OPERATORS_ // math functions {{{1 #define Vc_FORWARD_UNARY_OPERATOR(name_) \ /*!\brief Applies the std::name_ function component-wise and concurrently. */ \ template \ inline fixed_size_simd name_(const SimdArray &x) \ { \ return fixed_size_simd::fromOperation( \ Common::Operations::Forward_##name_(), x); \ } \ template \ fixed_size_simd name_(const fixed_size_simd &x) \ { \ return fixed_size_simd::fromOperation( \ Common::Operations::Forward_##name_(), x); \ } \ Vc_NOTHING_EXPECTING_SEMICOLON #define Vc_FORWARD_UNARY_BOOL_OPERATOR(name_) \ /*!\brief Applies the std::name_ function component-wise and concurrently. */ \ template \ inline fixed_size_simd_mask name_(const SimdArray &x) \ { \ return fixed_size_simd_mask::fromOperation( \ Common::Operations::Forward_##name_(), x); \ } \ template \ fixed_size_simd_mask name_(const fixed_size_simd &x) \ { \ return fixed_size_simd_mask::fromOperation( \ Common::Operations::Forward_##name_(), x); \ } \ Vc_NOTHING_EXPECTING_SEMICOLON #define Vc_FORWARD_BINARY_OPERATOR(name_) \ /*!\brief Applies the std::name_ function component-wise and concurrently. */ \ template \ inline fixed_size_simd name_(const SimdArray &x, \ const SimdArray &y) \ { \ return fixed_size_simd::fromOperation( \ Common::Operations::Forward_##name_(), x, y); \ } \ Vc_NOTHING_EXPECTING_SEMICOLON /** * \name Math functions * These functions evaluate the */ ///@{ Vc_FORWARD_UNARY_OPERATOR(abs); Vc_FORWARD_UNARY_OPERATOR(asin); Vc_FORWARD_UNARY_OPERATOR(atan); Vc_FORWARD_BINARY_OPERATOR(atan2); Vc_FORWARD_UNARY_OPERATOR(ceil); Vc_FORWARD_BINARY_OPERATOR(copysign); Vc_FORWARD_UNARY_OPERATOR(cos); Vc_FORWARD_UNARY_OPERATOR(exp); Vc_FORWARD_UNARY_OPERATOR(exponent); Vc_FORWARD_UNARY_OPERATOR(floor); /// Applies the std::fma function component-wise and concurrently. template inline SimdArray fma(const SimdArray &a, const SimdArray &b, const SimdArray &c) { return SimdArray::fromOperation(Common::Operations::Forward_fma(), a, b, c); } Vc_FORWARD_UNARY_BOOL_OPERATOR(isfinite); Vc_FORWARD_UNARY_BOOL_OPERATOR(isinf); Vc_FORWARD_UNARY_BOOL_OPERATOR(isnan); Vc_FORWARD_UNARY_BOOL_OPERATOR(isnegative); /// Applies the std::frexp function component-wise and concurrently. template inline SimdArray frexp(const SimdArray &x, SimdArray *e) { return SimdArray::fromOperation(Common::Operations::Forward_frexp(), x, e); } /// Applies the std::ldexp function component-wise and concurrently. template inline SimdArray ldexp(const SimdArray &x, const SimdArray &e) { return SimdArray::fromOperation(Common::Operations::Forward_ldexp(), x, e); } Vc_FORWARD_UNARY_OPERATOR(log); Vc_FORWARD_UNARY_OPERATOR(log10); Vc_FORWARD_UNARY_OPERATOR(log2); Vc_FORWARD_UNARY_OPERATOR(reciprocal); Vc_FORWARD_UNARY_OPERATOR(round); Vc_FORWARD_UNARY_OPERATOR(rsqrt); Vc_FORWARD_UNARY_OPERATOR(sin); /// Determines sine and cosine concurrently and component-wise on \p x. template void sincos(const SimdArray &x, SimdArray *sin, SimdArray *cos) { SimdArray::callOperation(Common::Operations::Forward_sincos(), x, sin, cos); } Vc_FORWARD_UNARY_OPERATOR(sqrt); Vc_FORWARD_UNARY_OPERATOR(trunc); Vc_FORWARD_BINARY_OPERATOR(min); Vc_FORWARD_BINARY_OPERATOR(max); ///@} #undef Vc_FORWARD_UNARY_OPERATOR #undef Vc_FORWARD_UNARY_BOOL_OPERATOR #undef Vc_FORWARD_BINARY_OPERATOR // simd_cast {{{1 #ifdef Vc_MSVC #define Vc_DUMMY_ARG0 , int = 0 #define Vc_DUMMY_ARG1 , long = 0 #define Vc_DUMMY_ARG2 , short = 0 #define Vc_DUMMY_ARG3 , char = '0' #define Vc_DUMMY_ARG4 , unsigned = 0u #define Vc_DUMMY_ARG5 , unsigned short = 0u #else #define Vc_DUMMY_ARG0 #define Vc_DUMMY_ARG1 #define Vc_DUMMY_ARG2 #define Vc_DUMMY_ARG3 #define Vc_DUMMY_ARG4 #define Vc_DUMMY_ARG5 #endif // Vc_MSVC // simd_cast_impl_smaller_input {{{2 // The following function can be implemented without the sizeof...(From) overload. // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the // function in two works around the issue. template Vc_INTRINSIC Vc_CONST enable_if simd_cast_impl_smaller_input(const From &... xs, const T &last) { Return r = simd_cast(xs...); for (size_t i = 0; i < N; ++i) { r[i + N * sizeof...(From)] = static_cast(last[i]); } return r; } template Vc_INTRINSIC Vc_CONST Return simd_cast_impl_smaller_input(const T &last) { Return r = Return(); for (size_t i = 0; i < N; ++i) { r[i] = static_cast(last[i]); } return r; } template Vc_INTRINSIC Vc_CONST enable_if simd_cast_impl_larger_input( const From &... xs, const T &last) { Return r = simd_cast(xs...); for (size_t i = N * sizeof...(From); i < Return::Size; ++i) { r[i] = static_cast(last[i - N * sizeof...(From)]); } return r; } template Vc_INTRINSIC Vc_CONST Return simd_cast_impl_larger_input(const T &last) { Return r = Return(); for (size_t i = 0; i < Return::size(); ++i) { r[i] = static_cast(last[i]); } return r; } // simd_cast_without_last (declaration) {{{2 template Vc_INTRINSIC_L Vc_CONST_L Return simd_cast_without_last(const From &... xs, const T &) Vc_INTRINSIC_R Vc_CONST_R; // are_all_types_equal {{{2 template struct are_all_types_equal; template struct are_all_types_equal : public std::integral_constant { }; template struct are_all_types_equal : public std::integral_constant< bool, std::is_same::value && are_all_types_equal::value> { }; // simd_cast_interleaved_argument_order (declarations) {{{2 /*! \internal The need for simd_cast_interleaved_argument_order stems from a shortcoming in pack expansion of variadic templates in C++. For a simd_cast with SimdArray arguments that are bisectable (i.e. \c storage_type0 and \c storage_type1 are equal) the generic implementation needs to forward to a simd_cast of the \c internal_data0 and \c internal_data1 of the arguments. But the required order of arguments is `internal_data0(arg0), internal_data1(arg0), internal_data0(arg1), ...`. This is impossible to achieve with pack expansion. It is only possible to write `internal_data0(args)..., internal_data1(args)...` and thus have the argument order mixed up. The simd_cast_interleaved_argument_order “simply” calls simd_cast with the arguments correctly reordered (i.e. interleaved). The implementation of simd_cast_interleaved_argument_order is done generically, so that it supports any number of arguments. The central idea of the implementation is an `extract` function which returns one value of an argument pack determined via an index passed as template argument. This index is generated via an index_sequence. The `extract` function uses two argument packs (of equal size) to easily return values from the front and middle of the argument pack (for doing the deinterleave). */ template Vc_INTRINSIC Vc_CONST Return simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b); // simd_cast_with_offset (declarations and one impl) {{{2 // offset == 0 {{{3 template Vc_INTRINSIC Vc_CONST enable_if<(are_all_types_equal::value && offset == 0), Return> simd_cast_with_offset(const From &x, const Froms &... xs); // offset > 0 && offset divisible by Return::Size {{{3 template Vc_INTRINSIC Vc_CONST enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0), Return> simd_cast_with_offset(const From &x); // offset > 0 && offset NOT divisible && Return is non-atomic simd(mask)array {{{3 template Vc_INTRINSIC Vc_CONST enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 && ((Traits::isSimdArray::value && !Traits::isAtomicSimdArray::value) || (Traits::isSimdMaskArray::value && !Traits::isAtomicSimdMaskArray::value))), Return> simd_cast_with_offset(const From &x); // offset > 0 && offset NOT divisible && Return is atomic simd(mask)array {{{3 template Vc_INTRINSIC Vc_CONST enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 && ((Traits::isSimdArray::value && Traits::isAtomicSimdArray::value) || (Traits::isSimdMaskArray::value && Traits::isAtomicSimdMaskArray::value))), Return> simd_cast_with_offset(const From &x); // offset > first argument (drops first arg) {{{3 template Vc_INTRINSIC Vc_CONST enable_if< (are_all_types_equal::value && From::Size <= offset), Return> simd_cast_with_offset(const From &, const Froms &... xs) { return simd_cast_with_offset(xs...); } // offset > first and only argument (returns Zero) {{{3 template Vc_INTRINSIC Vc_CONST enable_if<(From::Size <= offset), Return> simd_cast_with_offset( const From &) { return Return(0); } // first_type_of {{{2 template struct first_type_of_impl { using type = T; }; template using first_type_of = typename first_type_of_impl::type; // simd_cast_drop_arguments (declarations) {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x); template Vc_INTRINSIC Vc_CONST enable_if<(are_all_types_equal::value && sizeof...(Froms) * first_type_of::Size < Return::Size), Return> simd_cast_drop_arguments(Froms... xs, first_type_of x); // The following function can be implemented without the sizeof...(From) overload. // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the // function in two works around the issue. template Vc_INTRINSIC Vc_CONST enable_if< (are_all_types_equal::value && (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0), Return> simd_cast_drop_arguments(Froms... xs, From x, From); template Vc_INTRINSIC Vc_CONST enable_if<(are_all_types_equal::value && From::Size >= Return::Size), Return> simd_cast_drop_arguments(From x, From); namespace { #ifdef Vc_DEBUG_SIMD_CAST void debugDoNothing(const std::initializer_list &) {} template inline void vc_debug_(const char *prefix, const char *suffix, const T0 &arg0, const Ts &... args) { std::cerr << prefix << arg0; debugDoNothing({&(std::cerr << ", " << args)...}); std::cerr << suffix; } #else template Vc_INTRINSIC void vc_debug_(const char *, const char *, const T0 &, const Ts &...) { } #endif } // unnamed namespace // is_less trait{{{2 template struct is_less : public std::integral_constant { }; // is_power_of_2 trait{{{2 template struct is_power_of_2 : public std::integral_constant { }; // simd_cast(xs...) to SimdArray/-mask {{{2 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \ template \ Vc_INTRINSIC Vc_CONST enable_if< \ (Traits::isAtomic##SimdArrayType_::value && \ is_less::Size * sizeof...(Froms), Return::Size>::value && \ are_all_types_equal, Froms...>::value && \ !detail::is_fixed_size_abi::value), \ Return> \ simd_cast(NativeType_ x, Froms... xs) \ { \ vc_debug_("simd_cast{1}(", ")\n", x, xs...); \ return {private_init, simd_cast(x, xs...)}; \ } \ template \ Vc_INTRINSIC Vc_CONST enable_if< \ (Traits::isAtomic##SimdArrayType_::value && \ !is_less::Size * sizeof...(Froms), Return::Size>::value && \ are_all_types_equal, Froms...>::value && \ !detail::is_fixed_size_abi::value), \ Return> \ simd_cast(NativeType_ x, Froms... xs) \ { \ vc_debug_("simd_cast{2}(", ")\n", x, xs...); \ return {simd_cast_without_last, Froms...>(x, xs...)}; \ } \ template \ Vc_INTRINSIC Vc_CONST \ enable_if<(Traits::is##SimdArrayType_::value && \ !Traits::isAtomic##SimdArrayType_::value && \ is_less(), \ NativeType_::Size *(1 + sizeof...(Froms))>::value && \ are_all_types_equal, Froms...>::value && \ !detail::is_fixed_size_abi::value), \ Return> \ simd_cast(NativeType_ x, Froms... xs) \ { \ vc_debug_("simd_cast{3}(", ")\n", x, xs...); \ using R0 = typename Return::storage_type0; \ using R1 = typename Return::storage_type1; \ return {simd_cast_drop_arguments(x, xs...), \ simd_cast_with_offset(x, xs...)}; \ } \ template \ Vc_INTRINSIC Vc_CONST \ enable_if<(Traits::is##SimdArrayType_::value && \ !Traits::isAtomic##SimdArrayType_::value && \ !is_less(), \ NativeType_::Size *(1 + sizeof...(Froms))>::value && \ are_all_types_equal, Froms...>::value && \ !detail::is_fixed_size_abi::value), \ Return> \ simd_cast(NativeType_ x, Froms... xs) \ { \ vc_debug_("simd_cast{4}(", ")\n", x, xs...); \ using R0 = typename Return::storage_type0; \ using R1 = typename Return::storage_type1; \ return {simd_cast(x, xs...), R1(0)}; \ } \ Vc_NOTHING_EXPECTING_SEMICOLON Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector); Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask); #undef Vc_SIMDARRAY_CASTS // simd_cast(V) {{{2 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \ /* SIMD Vector/Mask to atomic SimdArray/simdmaskarray */ \ template \ Vc_INTRINSIC Vc_CONST \ enable_if::value, Return> \ simd_cast(NativeType_ x Vc_DUMMY_ARG0) \ { \ vc_debug_("simd_cast{offset, atomic}(", ")\n", offset, x); \ return {private_init, simd_cast(x)}; \ } \ /* both halves of Return array are extracted from argument */ \ template \ Vc_INTRINSIC Vc_CONST \ enable_if<(Traits::is##SimdArrayType_::value && \ !Traits::isAtomic##SimdArrayType_::value && \ Return::Size * offset + Common::left_size() < \ NativeType_::Size), \ Return> \ simd_cast(NativeType_ x Vc_DUMMY_ARG1) \ { \ vc_debug_("simd_cast{offset, split Return}(", ")\n", offset, x); \ using R0 = typename Return::storage_type0; \ constexpr int entries_offset = offset * Return::Size; \ constexpr int entries_offset_right = entries_offset + R0::Size; \ return { \ simd_cast_with_offset(x), \ simd_cast_with_offset( \ x)}; \ } \ /* SIMD Vector/Mask to non-atomic SimdArray/simdmaskarray */ \ /* right half of Return array is zero */ \ template \ Vc_INTRINSIC Vc_CONST \ enable_if<(Traits::is##SimdArrayType_::value && \ !Traits::isAtomic##SimdArrayType_::value && \ Return::Size * offset + Common::left_size() >= \ NativeType_::Size), \ Return> \ simd_cast(NativeType_ x Vc_DUMMY_ARG2) \ { \ vc_debug_("simd_cast{offset, R1::Zero}(", ")\n", offset, x); \ using R0 = typename Return::storage_type0; \ using R1 = typename Return::storage_type1; \ constexpr int entries_offset = offset * Return::Size; \ return {simd_cast_with_offset(x), R1(0)}; \ } \ Vc_NOTHING_EXPECTING_SEMICOLON Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector); Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask); #undef Vc_SIMDARRAY_CASTS // simd_cast(xs...) from SimdArray/-mask {{{2 #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \ /* indivisible SimdArrayType_ */ \ template \ Vc_INTRINSIC Vc_CONST \ enable_if<(are_all_types_equal, From...>::value && \ (sizeof...(From) == 0 || N * sizeof...(From) < Return::Size) && \ !std::is_same>::value), \ Return> \ simd_cast(const SimdArrayType_ &x0, const From &... xs) \ { \ vc_debug_("simd_cast{indivisible}(", ")\n", x0, xs...); \ return simd_cast(internal_data(x0), internal_data(xs)...); \ } \ /* indivisible SimdArrayType_ && can drop arguments from the end */ \ template \ Vc_INTRINSIC Vc_CONST \ enable_if<(are_all_types_equal, From...>::value && \ (sizeof...(From) > 0 && (N * sizeof...(From) >= Return::Size)) && \ !std::is_same>::value), \ Return> \ simd_cast(const SimdArrayType_ &x0, const From &... xs) \ { \ vc_debug_("simd_cast{indivisible2}(", ")\n", x0, xs...); \ return simd_cast_without_last::storage_type, \ typename From::storage_type...>( \ internal_data(x0), internal_data(xs)...); \ } \ /* bisectable SimdArrayType_ (N = 2^n) && never too large */ \ template \ Vc_INTRINSIC Vc_CONST enable_if< \ (N != M && are_all_types_equal, From...>::value && \ !std::is_same>::value && \ is_less::value && is_power_of_2::value), \ Return> \ simd_cast(const SimdArrayType_ &x0, const From &... xs) \ { \ vc_debug_("simd_cast{bisectable}(", ")\n", x0, xs...); \ return simd_cast_interleaved_argument_order< \ Return, typename SimdArrayType_::storage_type0, \ typename From::storage_type0...>(internal_data0(x0), internal_data0(xs)..., \ internal_data1(x0), internal_data1(xs)...); \ } \ /* bisectable SimdArrayType_ (N = 2^n) && input so large that at least the last \ * input can be dropped */ \ template \ Vc_INTRINSIC Vc_CONST enable_if< \ (N != M && are_all_types_equal, From...>::value && \ !is_less::value && is_power_of_2::value), \ Return> \ simd_cast(const SimdArrayType_ &x0, const From &... xs) \ { \ vc_debug_("simd_cast{bisectable2}(", ")\n", x0, xs...); \ return simd_cast_without_last, From...>( \ x0, xs...); \ } \ /* remaining SimdArrayType_ input never larger (N != 2^n) */ \ template \ Vc_INTRINSIC Vc_CONST enable_if< \ (N != M && are_all_types_equal, From...>::value && \ N * (1 + sizeof...(From)) <= Return::Size && !is_power_of_2::value), \ Return> \ simd_cast(const SimdArrayType_ &x0, const From &... xs) \ { \ vc_debug_("simd_cast{remaining}(", ")\n", x0, xs...); \ return simd_cast_impl_smaller_input, \ From...>(x0, xs...); \ } \ /* remaining SimdArrayType_ input larger (N != 2^n) */ \ template \ Vc_INTRINSIC Vc_CONST enable_if< \ (N != M && are_all_types_equal, From...>::value && \ N * (1 + sizeof...(From)) > Return::Size && !is_power_of_2::value), \ Return> \ simd_cast(const SimdArrayType_ &x0, const From &... xs) \ { \ vc_debug_("simd_cast{remaining2}(", ")\n", x0, xs...); \ return simd_cast_impl_larger_input, \ From...>(x0, xs...); \ } \ /* a single bisectable SimdArrayType_ (N = 2^n) too large */ \ template \ Vc_INTRINSIC Vc_CONST \ enable_if<(N != M && N >= 2 * Return::Size && is_power_of_2::value), Return> \ simd_cast(const SimdArrayType_ &x) \ { \ vc_debug_("simd_cast{single bisectable}(", ")\n", x); \ return simd_cast(internal_data0(x)); \ } \ template \ Vc_INTRINSIC Vc_CONST enable_if<(N != M && N > Return::Size && \ N < 2 * Return::Size && is_power_of_2::value), \ Return> \ simd_cast(const SimdArrayType_ &x) \ { \ vc_debug_("simd_cast{single bisectable2}(", ")\n", x); \ return simd_cast(internal_data0(x), internal_data1(x)); \ } \ Vc_NOTHING_EXPECTING_SEMICOLON Vc_SIMDARRAY_CASTS(SimdArray); Vc_SIMDARRAY_CASTS(SimdMaskArray); #undef Vc_SIMDARRAY_CASTS template >::value>> Vc_INTRINSIC Return simd_cast(const fixed_size_simd &x, const Ts &... xs) { return simd_cast(static_cast &>(x), static_cast &>(xs)...); } template >::value>> Vc_INTRINSIC Return simd_cast(const fixed_size_simd_mask &x, const Ts &... xs) { return simd_cast(static_cast &>(x), static_cast &>(xs)...); } // simd_cast(SimdArray/-mask) {{{2 #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \ /* offset == 0 is like without offset */ \ template \ Vc_INTRINSIC Vc_CONST enable_if<(offset == 0), Return> simd_cast( \ const SimdArrayType_ &x Vc_DUMMY_ARG0) \ { \ vc_debug_("simd_cast{offset == 0}(", ")\n", offset, x); \ return simd_cast(x); \ } \ /* forward to V */ \ template \ Vc_INTRINSIC Vc_CONST enable_if<(offset != 0), Return> simd_cast( \ const SimdArrayType_ &x Vc_DUMMY_ARG1) \ { \ vc_debug_("simd_cast{offset, forward}(", ")\n", offset, x); \ return simd_cast(internal_data(x)); \ } \ /* convert from right member of SimdArray */ \ template \ Vc_INTRINSIC Vc_CONST \ enable_if<(N != M && offset * Return::Size >= Common::left_size() && \ offset != 0 && Common::left_size() % Return::Size == 0), \ Return> \ simd_cast(const SimdArrayType_ &x Vc_DUMMY_ARG2) \ { \ vc_debug_("simd_cast{offset, right}(", ")\n", offset, x); \ return simd_cast() / Return::Size>( \ internal_data1(x)); \ } \ /* same as above except for odd cases where offset * Return::Size doesn't fit the \ * left side of the SimdArray */ \ template \ Vc_INTRINSIC Vc_CONST \ enable_if<(N != M && offset * Return::Size >= Common::left_size() && \ offset != 0 && Common::left_size() % Return::Size != 0), \ Return> \ simd_cast(const SimdArrayType_ &x Vc_DUMMY_ARG3) \ { \ vc_debug_("simd_cast{offset, right, nofit}(", ")\n", offset, x); \ return simd_cast_with_offset()>( \ internal_data1(x)); \ } \ /* convert from left member of SimdArray */ \ template \ Vc_INTRINSIC Vc_CONST enable_if< \ (N != M && /*offset * Return::Size < Common::left_size() &&*/ \ offset != 0 && (offset + 1) * Return::Size <= Common::left_size()), \ Return> \ simd_cast(const SimdArrayType_ &x Vc_DUMMY_ARG4) \ { \ vc_debug_("simd_cast{offset, left}(", ")\n", offset, x); \ return simd_cast(internal_data0(x)); \ } \ /* fallback to copying scalars */ \ template \ Vc_INTRINSIC Vc_CONST \ enable_if<(N != M && (offset * Return::Size < Common::left_size()) && \ offset != 0 && (offset + 1) * Return::Size > Common::left_size()), \ Return> \ simd_cast(const SimdArrayType_ &x Vc_DUMMY_ARG5) \ { \ vc_debug_("simd_cast{offset, copy scalars}(", ")\n", offset, x); \ using R = typename Return::EntryType; \ Return r = Return(0); \ for (std::size_t i = offset * Return::Size; \ i < std::min(N, (offset + 1) * Return::Size); ++i) { \ r[i - offset * Return::Size] = static_cast(x[i]); \ } \ return r; \ } \ Vc_NOTHING_EXPECTING_SEMICOLON Vc_SIMDARRAY_CASTS(SimdArray); Vc_SIMDARRAY_CASTS(SimdMaskArray); #undef Vc_SIMDARRAY_CASTS // simd_cast_drop_arguments (definitions) {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x) { return simd_cast(x); } template Vc_INTRINSIC Vc_CONST enable_if<(are_all_types_equal::value && sizeof...(Froms) * first_type_of::Size < Return::Size), Return> simd_cast_drop_arguments(Froms... xs, first_type_of x) { return simd_cast(xs..., x); } // The following function can be implemented without the sizeof...(From) overload. // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the // function in two works around the issue. template Vc_INTRINSIC Vc_CONST enable_if< (are_all_types_equal::value && (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0), Return> simd_cast_drop_arguments(Froms... xs, From x, From) { return simd_cast_drop_arguments(xs..., x); } template Vc_INTRINSIC Vc_CONST enable_if<(are_all_types_equal::value && From::Size >= Return::Size), Return> simd_cast_drop_arguments(From x, From) { return simd_cast_drop_arguments(x); } // simd_cast_with_offset (definitions) {{{2 template Vc_INTRINSIC Vc_CONST enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0), Return> simd_cast_with_offset(const From &x) { return simd_cast(x); } template Vc_INTRINSIC Vc_CONST enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 && ((Traits::isSimdArray::value && !Traits::isAtomicSimdArray::value) || (Traits::isSimdMaskArray::value && !Traits::isAtomicSimdMaskArray::value))), Return> simd_cast_with_offset(const From &x) { using R0 = typename Return::storage_type0; using R1 = typename Return::storage_type1; return {simd_cast_with_offset(x), simd_cast_with_offset(x)}; } template Vc_INTRINSIC Vc_CONST enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 && ((Traits::isSimdArray::value && Traits::isAtomicSimdArray::value) || (Traits::isSimdMaskArray::value && Traits::isAtomicSimdMaskArray::value))), Return> simd_cast_with_offset(const From &x) { return simd_cast(x.shifted(offset % Return::Size)); } template Vc_INTRINSIC Vc_CONST enable_if<(are_all_types_equal::value && offset == 0), Return> simd_cast_with_offset(const From &x, const Froms &... xs) { return simd_cast(x, xs...); } // simd_cast_without_last (definition) {{{2 template Vc_INTRINSIC Vc_CONST Return simd_cast_without_last(const From &... xs, const T &) { return simd_cast(xs...); } // simd_cast_interleaved_argument_order (definitions) {{{2 #ifdef Vc_MSVC // MSVC doesn't see that the Ts pack below can be empty and thus complains when extract_interleaved // is called with only 2 arguments. These overloads here are *INCORRECT standard C++*, but they make // MSVC do the right thing. template Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0, const T0 &) { return a0; } template Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &, const T0 &b0) { return b0; } #endif // Vc_MSVC /// \internal returns the first argument template Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0, const Ts &..., const T0 &, const Ts &...) { return a0; } /// \internal returns the center argument template Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &, const Ts &..., const T0 &b0, const Ts &...) { return b0; } /// \internal drops the first and center arguments and recurses template Vc_INTRINSIC Vc_CONST enable_if<(I > 1), T0> extract_interleaved(const T0 &, const Ts &... a, const T0 &, const Ts &... b) { return extract_interleaved(a..., b...); } /// \internal calls simd_cast with correct argument order thanks to extract_interleaved template Vc_INTRINSIC Vc_CONST Return simd_cast_interleaved_argument_order_1(index_sequence, const Ts &... a, const Ts &... b) { return simd_cast(extract_interleaved(a..., b...)...); } /// \internal constructs the necessary index_sequence to pass it to /// simd_cast_interleaved_argument_order_1 template Vc_INTRINSIC Vc_CONST Return simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b) { using seq = make_index_sequence; return simd_cast_interleaved_argument_order_1(seq(), a..., b...); } // conditional_assign {{{1 #define Vc_CONDITIONAL_ASSIGN(name_, op_) \ template \ Vc_INTRINSIC enable_if conditional_assign( \ SimdArray &lhs, M &&mask, U &&rhs) \ { \ lhs(mask) op_ rhs; \ } \ Vc_NOTHING_EXPECTING_SEMICOLON Vc_CONDITIONAL_ASSIGN( Assign, =); Vc_CONDITIONAL_ASSIGN( PlusAssign, +=); Vc_CONDITIONAL_ASSIGN( MinusAssign, -=); Vc_CONDITIONAL_ASSIGN( MultiplyAssign, *=); Vc_CONDITIONAL_ASSIGN( DivideAssign, /=); Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=); Vc_CONDITIONAL_ASSIGN( XorAssign, ^=); Vc_CONDITIONAL_ASSIGN( AndAssign, &=); Vc_CONDITIONAL_ASSIGN( OrAssign, |=); Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=); Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=); #undef Vc_CONDITIONAL_ASSIGN #define Vc_CONDITIONAL_ASSIGN(name_, expr_) \ template \ Vc_INTRINSIC enable_if> \ conditional_assign(SimdArray &lhs, M &&mask) \ { \ return expr_; \ } \ Vc_NOTHING_EXPECTING_SEMICOLON Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++); Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask)); Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--); Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask)); #undef Vc_CONDITIONAL_ASSIGN // transpose_impl {{{1 namespace Common { template inline void transpose_impl( TransposeTag<4, 4>, SimdArray *Vc_RESTRICT r[], const TransposeProxy, SimdArray, SimdArray, SimdArray> &proxy) { V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]), &internal_data(*r[2]), &internal_data(*r[3])}; transpose_impl(TransposeTag<4, 4>(), &r2[0], TransposeProxy{internal_data(std::get<0>(proxy.in)), internal_data(std::get<1>(proxy.in)), internal_data(std::get<2>(proxy.in)), internal_data(std::get<3>(proxy.in))}); } template inline void transpose_impl( TransposeTag<2, 4>, SimdArray *Vc_RESTRICT r[], const TransposeProxy, SimdArray, SimdArray, SimdArray> &proxy) { auto &lo = *r[0]; auto &hi = *r[1]; internal_data0(internal_data0(lo)) = internal_data0(std::get<0>(proxy.in)); internal_data1(internal_data0(lo)) = internal_data0(std::get<1>(proxy.in)); internal_data0(internal_data1(lo)) = internal_data0(std::get<2>(proxy.in)); internal_data1(internal_data1(lo)) = internal_data0(std::get<3>(proxy.in)); internal_data0(internal_data0(hi)) = internal_data1(std::get<0>(proxy.in)); internal_data1(internal_data0(hi)) = internal_data1(std::get<1>(proxy.in)); internal_data0(internal_data1(hi)) = internal_data1(std::get<2>(proxy.in)); internal_data1(internal_data1(hi)) = internal_data1(std::get<3>(proxy.in)); } template inline void transpose_impl( TransposeTag<4, 4>, SimdArray *Vc_RESTRICT r[], const TransposeProxy, SimdArray, SimdArray, SimdArray> &proxy) { V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]), &internal_data(*r[2]), &internal_data(*r[3])}; transpose_impl(TransposeTag<4, 4>(), &r2[0], TransposeProxy{internal_data(std::get<0>(proxy.in)), internal_data(std::get<1>(proxy.in)), internal_data(std::get<2>(proxy.in)), internal_data(std::get<3>(proxy.in))}); } template inline void transpose_impl( TransposeTag<4, 4>, SimdArray *Vc_RESTRICT r[], const TransposeProxy, SimdArray, SimdArray, SimdArray> &proxy) { SimdArray *Vc_RESTRICT r0[4 / 2] = {r[0], r[1]}; SimdArray *Vc_RESTRICT r1[4 / 2] = {r[2], r[3]}; using H = SimdArray; transpose_impl(TransposeTag<2, 4>(), &r0[0], TransposeProxy{internal_data0(std::get<0>(proxy.in)), internal_data0(std::get<1>(proxy.in)), internal_data0(std::get<2>(proxy.in)), internal_data0(std::get<3>(proxy.in))}); transpose_impl(TransposeTag<2, 4>(), &r1[0], TransposeProxy{internal_data1(std::get<0>(proxy.in)), internal_data1(std::get<1>(proxy.in)), internal_data1(std::get<2>(proxy.in)), internal_data1(std::get<3>(proxy.in))}); } /* TODO: template inline enable_if<(N > VSize), void> transpose_impl( std::array * Vc_RESTRICT, 4> & r, const TransposeProxy, SimdArray, SimdArray, SimdArray> &proxy) { typedef SimdArray SA; std::array r0 = { {&internal_data0(*r[0]), &internal_data0(*r[1]), &internal_data0(*r[2]), &internal_data0(*r[3])}}; transpose_impl( r0, TransposeProxy{ internal_data0(std::get<0>(proxy.in)), internal_data0(std::get<1>(proxy.in)), internal_data0(std::get<2>(proxy.in)), internal_data0(std::get<3>(proxy.in))}); std::array r1 = { {&internal_data1(*r[0]), &internal_data1(*r[1]), &internal_data1(*r[2]), &internal_data1(*r[3])}}; transpose_impl( r1, TransposeProxy{ internal_data1(std::get<0>(proxy.in)), internal_data1(std::get<1>(proxy.in)), internal_data1(std::get<2>(proxy.in)), internal_data1(std::get<3>(proxy.in))}); } */ } // namespace Common // }}}1 namespace Detail { // InterleaveImpl for SimdArrays {{{ // atomic {{{1 template struct InterleaveImpl, N, VSizeof> { template static Vc_INTRINSIC void interleave(T *const data, const I &i, const VV &... vv) { InterleaveImpl::interleave(data, i, internal_data(vv)...); } template static Vc_INTRINSIC void deinterleave(T const *const data, const I &i, VV &... vv) { InterleaveImpl::deinterleave(data, i, internal_data(vv)...); } }; // generic (TODO) {{{1 /* template struct InterleaveImpl, N, VSizeof> { using SA = SimdArray; using SA0 = typename SA::storage_type0; using SA1 = typename SA::storage_type1; template static Vc_INTRINSIC void interleave(T *const data, const I &i, const VV &... vv) { InterleaveImpl::interleave( data, i, // i needs to be split internal_data0(vv)...); InterleaveImpl::interleave( data, // how far to advance data? i, // i needs to be split internal_data1(vv)...); } template static Vc_INTRINSIC void deinterleave(T const *const data, const I &i, VV &... vv) { InterleaveImpl::deinterleave(data, i, internal_data(vv)...); } }; */ } // namespace Detail // }}} /// @} } // namespace Vc_VERSIONED_NAMESPACE // numeric_limits {{{1 namespace std { template struct numeric_limits> : public numeric_limits { private: using R = Vc::SimdArray; public: static Vc_ALWAYS_INLINE Vc_CONST R max() noexcept { return numeric_limits::max(); } static Vc_ALWAYS_INLINE Vc_CONST R min() noexcept { return numeric_limits::min(); } static Vc_ALWAYS_INLINE Vc_CONST R lowest() noexcept { return numeric_limits::lowest(); } static Vc_ALWAYS_INLINE Vc_CONST R epsilon() noexcept { return numeric_limits::epsilon(); } static Vc_ALWAYS_INLINE Vc_CONST R round_error() noexcept { return numeric_limits::round_error(); } static Vc_ALWAYS_INLINE Vc_CONST R infinity() noexcept { return numeric_limits::infinity(); } static Vc_ALWAYS_INLINE Vc_CONST R quiet_NaN() noexcept { return numeric_limits::quiet_NaN(); } static Vc_ALWAYS_INLINE Vc_CONST R signaling_NaN() noexcept { return numeric_limits::signaling_NaN(); } static Vc_ALWAYS_INLINE Vc_CONST R denorm_min() noexcept { return numeric_limits::denorm_min(); } }; } // namespace std //}}}1 #endif // VC_COMMON_SIMDARRAY_H_ // vim: foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/common/simdarrayfwd.h000066400000000000000000000173411476554302100215620ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2014-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_SIMDARRAYFWD_H_ #define VC_COMMON_SIMDARRAYFWD_H_ #include "../scalar/types.h" #include "../sse/types.h" #include "../avx/types.h" #include "utility.h" #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { // specialization of Vector for fixed_size {{{ template class Vector> : public SimdArray { using SimdArray::SimdArray; public: // overload copy to force argument passing via the stack. This makes the type more // usable on ABI boundaries Vc_INTRINSIC Vector(const Vector &x) : SimdArray(x) {} Vc_INTRINSIC Vector &operator=(const Vector &x) { SimdArray::operator=(x); return *this; } Vector() = default; using abi_type = simd_abi::fixed_size; using abi = abi_type; Vc_DEPRECATED("use Vector([](int n) { return n; }) instead of " "Vector::IndexesFromZero()") static Vector IndexesFromZero() { return Vector([](size_t i) -> T { return i; }); } Vc_DEPRECATED("use 0 instead of Vector::Zero()") static Vector Zero() { return 0; } Vc_DEPRECATED("use 1 instead of Vector::One()") static Vector One() { return 1; } }; template class Mask> : public SimdMaskArray { using SimdMaskArray::SimdMaskArray; public: // overload copy to force argument passing via the stack. This makes the type more // usable on ABI boundaries Vc_INTRINSIC Mask(const Mask &x) : SimdMaskArray(x) {} Vc_INTRINSIC Mask &operator=(const Mask &x) { SimdMaskArray::operator=(x); return *this; } Mask() = default; using abi_type = simd_abi::fixed_size; using abi = abi_type; }; // }}} /** \internal * Simple traits for SimdArray to easily access internal types of non-atomic SimdArray * types. */ template struct SimdArrayTraits { static constexpr std::size_t N0 = Common::left_size(); static constexpr std::size_t N1 = Common::right_size(); using storage_type0 = fixed_size_simd; using storage_type1 = fixed_size_simd; }; template Vc_INTRINSIC_L typename SimdArrayTraits::storage_type0 &internal_data0( SimdArray &x) Vc_INTRINSIC_R; template Vc_INTRINSIC_L typename SimdArrayTraits::storage_type1 &internal_data1( SimdArray &x) Vc_INTRINSIC_R; template Vc_INTRINSIC_L const typename SimdArrayTraits::storage_type0 &internal_data0( const SimdArray &x) Vc_INTRINSIC_R; template Vc_INTRINSIC_L const typename SimdArrayTraits::storage_type1 &internal_data1( const SimdArray &x) Vc_INTRINSIC_R; template Vc_INTRINSIC_L V &internal_data(SimdArray &x) Vc_INTRINSIC_R; template Vc_INTRINSIC_L const V &internal_data(const SimdArray &x) Vc_INTRINSIC_R; namespace Traits { // is_fixed_size_simd {{{1 template struct is_fixed_size_simd : std::false_type { }; template struct is_fixed_size_simd> : std::true_type { }; template struct is_fixed_size_simd> : std::true_type { }; // is_simd_vector_internal {{{1 template struct is_simd_vector_internal> : is_valid_vector_argument {}; // is_simd_mask_internal {{{1 template struct is_simd_mask_internal> : is_valid_vector_argument {}; // is_atomic_simdarray_internal {{{1 template struct is_atomic_simdarray_internal> : is_valid_vector_argument {}; template struct is_atomic_simdarray_internal> : is_atomic_simdarray_internal> { }; // is_atomic_simd_mask_array_internal {{{1 template struct is_atomic_simd_mask_array_internal> : is_valid_vector_argument { }; template struct is_atomic_simd_mask_array_internal> : is_atomic_simd_mask_array_internal> { }; // is_simdarray_internal {{{1 template struct is_simdarray_internal> : is_valid_vector_argument { }; template struct is_simdarray_internal> : is_valid_vector_argument { }; // is_simd_mask_array_internal {{{1 template struct is_simd_mask_array_internal> : is_valid_vector_argument { }; template struct is_simd_mask_array_internal> : is_valid_vector_argument { }; // is_integral_internal {{{1 template struct is_integral_internal, false> : std::is_integral { }; // is_floating_point_internal {{{1 template struct is_floating_point_internal, false> : std::is_floating_point { }; // is_signed_internal {{{1 template struct is_signed_internal, false> : std::is_signed { }; // is_unsigned_internal {{{1 template struct is_unsigned_internal, false> : std::is_unsigned { }; // has_no_allocated_data_impl {{{1 template struct has_no_allocated_data_impl> : std::true_type { }; // }}}1 } // namespace Traits } // namespace Vc #endif // VC_COMMON_SIMDARRAYFWD_H_ // vim: foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/common/simdarrayhelper.h000066400000000000000000000546721476554302100222710ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2013-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_SIMDARRAYHELPER_H_ #define VC_COMMON_SIMDARRAYHELPER_H_ #include "macros.h" namespace Vc_VERSIONED_NAMESPACE { // private_init {{{ namespace { static constexpr struct private_init_t {} private_init = {}; } // unnamed namespace // }}} namespace Common { /// \addtogroup SimdArray /// @{ namespace Operations/*{{{*/ { struct tag {}; #define Vc_DEFINE_OPERATION(name_) \ struct name_ : public tag { \ template \ Vc_INTRINSIC void operator()(V &v, Args &&... args) \ { \ v.name_(std::forward(args)...); \ } \ } Vc_DEFINE_OPERATION(gather); Vc_DEFINE_OPERATION(scatter); Vc_DEFINE_OPERATION(load); Vc_DEFINE_OPERATION(store); Vc_DEFINE_OPERATION(setZero); Vc_DEFINE_OPERATION(setZeroInverted); Vc_DEFINE_OPERATION(assign); #undef Vc_DEFINE_OPERATION #define Vc_DEFINE_OPERATION(name_, code_) \ struct name_ : public tag { \ template Vc_INTRINSIC void operator()(V &v) { code_; } \ } Vc_DEFINE_OPERATION(increment, ++(v)); Vc_DEFINE_OPERATION(decrement, --(v)); Vc_DEFINE_OPERATION(random, v = V::Random()); #undef Vc_DEFINE_OPERATION #define Vc_DEFINE_OPERATION_FORWARD(name_) \ struct Forward_##name_ : public tag \ { \ template ()...))> \ Vc_INTRINSIC void operator()(decltype(name_(std::declval()...)) &v, \ Args &&... args) \ { \ v = name_(std::forward(args)...); \ } \ template ()...))> \ Vc_INTRINSIC void operator()(std::nullptr_t, Args && ... args) \ { \ name_(std::forward(args)...); \ } \ } Vc_DEFINE_OPERATION_FORWARD(abs); Vc_DEFINE_OPERATION_FORWARD(asin); Vc_DEFINE_OPERATION_FORWARD(atan); Vc_DEFINE_OPERATION_FORWARD(atan2); Vc_DEFINE_OPERATION_FORWARD(cos); Vc_DEFINE_OPERATION_FORWARD(ceil); Vc_DEFINE_OPERATION_FORWARD(copysign); Vc_DEFINE_OPERATION_FORWARD(exp); Vc_DEFINE_OPERATION_FORWARD(exponent); Vc_DEFINE_OPERATION_FORWARD(fma); Vc_DEFINE_OPERATION_FORWARD(floor); Vc_DEFINE_OPERATION_FORWARD(frexp); Vc_DEFINE_OPERATION_FORWARD(isfinite); Vc_DEFINE_OPERATION_FORWARD(isinf); Vc_DEFINE_OPERATION_FORWARD(isnan); Vc_DEFINE_OPERATION_FORWARD(isnegative); Vc_DEFINE_OPERATION_FORWARD(ldexp); Vc_DEFINE_OPERATION_FORWARD(log); Vc_DEFINE_OPERATION_FORWARD(log10); Vc_DEFINE_OPERATION_FORWARD(log2); Vc_DEFINE_OPERATION_FORWARD(reciprocal); Vc_DEFINE_OPERATION_FORWARD(round); Vc_DEFINE_OPERATION_FORWARD(rsqrt); Vc_DEFINE_OPERATION_FORWARD(sin); Vc_DEFINE_OPERATION_FORWARD(sincos); Vc_DEFINE_OPERATION_FORWARD(sqrt); Vc_DEFINE_OPERATION_FORWARD(trunc); Vc_DEFINE_OPERATION_FORWARD(min); Vc_DEFINE_OPERATION_FORWARD(max); #undef Vc_DEFINE_OPERATION_FORWARD template using is_operation = std::is_base_of; } // namespace Operations }}} /** * \internal * Helper type to statically communicate segmentation of one vector register into 2^n parts * (Pieces). * * Forward declaration in common/types.h. */ template struct Segment/*{{{*/ { static_assert(Index_ < Pieces_, "You found a bug in Vc. Please report."); using type = T_; using type_decayed = typename std::decay::type; static constexpr std::size_t Pieces = Pieces_; static constexpr std::size_t Index = Index_; using fixed_size_type = fixed_size_simd::value, typename type_decayed::EntryType, float>, type_decayed::Size / Pieces>; type data; static constexpr std::size_t EntryOffset = Index * type_decayed::Size / Pieces; // no non-const operator[] needed decltype(std::declval()[0]) operator[](size_t i) const { return data[i + EntryOffset]; } fixed_size_type to_fixed_size() const { return simd_cast(data); } };/*}}}*/ //Segment specialization {{{ template struct Segment { static_assert(Index_ < Pieces_, "You found a bug in Vc. Please report."); using type = T_ *; using type_decayed = typename std::decay::type; static constexpr size_t Pieces = Pieces_; static constexpr size_t Index = Index_; using fixed_size_type = fixed_size_simd< typename std::conditional::value, typename type_decayed::VectorEntryType, float>::type, type_decayed::Size / Pieces> *; type data; static constexpr std::size_t EntryOffset = Index * type_decayed::size() / Pieces; fixed_size_type to_fixed_size() const { return reinterpret_cast< #ifdef Vc_GCC // GCC might ICE if this type is declared with may_alias. If it doesn't // ICE it warns about ignoring the attribute. typename std::remove_pointer::type #else MayAlias::type> #endif *>(data) + Index; } //decltype(std::declval()[0]) operator[](size_t i) { return data[i + EntryOffset]; } //decltype(std::declval()[0]) operator[](size_t i) const { return data[i + EntryOffset]; } };/*}}}*/ /** \internal Template class that is used to attach an offset value to an existing type. It is used for IndexesFromZero construction in SimdArray. The \c data1 constructor needs to know that the IndexesFromZero constructor requires an offset so that the whole data is constructed as a correct sequence from `0` to `Size - 1`. \tparam T The original type that needs the offset attached. \tparam Offset An integral value that determines the offset in the complete SimdArray. */ template struct AddOffset { constexpr AddOffset() = default; }; // class Split {{{1 /** \internal Helper type with static functions to generically adjust arguments for the \c data0 and \c data1 members of SimdArray and SimdMaskArray. \tparam secondOffset The offset in number of elements that \c data1 has in the SimdArray / SimdMaskArray. This is essentially equal to the number of elements in \c data0. */ template class Split { // split composite SimdArray template > static Vc_INTRINSIC auto loImpl(const SimdArray &x) -> decltype(internal_data0(x)) { return internal_data0(x); } template > static Vc_INTRINSIC auto hiImpl(const SimdArray &x) -> decltype(internal_data1(x)) { return internal_data1(x); } template > static Vc_INTRINSIC auto loImpl(SimdArray *x) -> decltype(&internal_data0(*x)) { return &internal_data0(*x); } template > static Vc_INTRINSIC auto hiImpl(SimdArray *x) -> decltype(&internal_data1(*x)) { return &internal_data1(*x); } // split atomic SimdArray template static Vc_INTRINSIC Segment loImpl(const SimdArray &x) { return {internal_data(x)}; } template static Vc_INTRINSIC Segment hiImpl(const SimdArray &x) { return {internal_data(x)}; } template static Vc_INTRINSIC Segment loImpl(SimdArray *x) { return {&internal_data(*x)}; } template static Vc_INTRINSIC Segment hiImpl(SimdArray *x) { return {&internal_data(*x)}; } // split composite SimdMaskArray template static Vc_INTRINSIC auto loImpl(const SimdMaskArray &x) -> decltype(internal_data0(x)) { return internal_data0(x); } template static Vc_INTRINSIC auto hiImpl(const SimdMaskArray &x) -> decltype(internal_data1(x)) { return internal_data1(x); } template static Vc_INTRINSIC Segment::mask_type, 2, 0> loImpl( const SimdMaskArray &x) { return {internal_data(x)}; } template static Vc_INTRINSIC Segment::mask_type, 2, 1> hiImpl( const SimdMaskArray &x) { return {internal_data(x)}; } // split Vector and Mask #ifdef Vc_IMPL_AVX template static Vc_INTRINSIC SSE::Vector loImpl(Vector &&x) { return simd_cast, 0>(x); } template static Vc_INTRINSIC SSE::Vector hiImpl(Vector &&x) { return simd_cast, 1>(x); } template static Vc_INTRINSIC SSE::Mask loImpl(Mask &&x) { return simd_cast, 0>(x); } template static Vc_INTRINSIC SSE::Mask hiImpl(Mask &&x) { return simd_cast, 1>(x); } #endif // Vc_IMPL_AVX template static constexpr bool is_vector_or_mask(){ return (Traits::is_simd_vector::value && !Traits::isSimdArray::value) || (Traits::is_simd_mask::value && !Traits::isSimdMaskArray::value); } template static Vc_INTRINSIC Segment loImpl(V &&x, enable_if()> = nullarg) { return {std::forward(x)}; } template static Vc_INTRINSIC Segment hiImpl(V &&x, enable_if()> = nullarg) { return {std::forward(x)}; } // split std::vector template static Vc_INTRINSIC const T *loImpl(const std::vector &x) { return x.data(); } template static Vc_INTRINSIC const T *hiImpl(const std::vector &x) { return x.data() + secondOffset; } // generically split Segments template static Vc_INTRINSIC Segment loImpl( const Segment &x) { return {x.data}; } template static Vc_INTRINSIC Segment hiImpl( const Segment &x) { return {x.data}; } /** \internal * \name Checks for existence of \c loImpl / \c hiImpl */ //@{ template ()))> static std::true_type have_lo_impl(int); template static std::false_type have_lo_impl(float); template static constexpr bool have_lo_impl() { return decltype(have_lo_impl(1))::value; } template ()))> static std::true_type have_hi_impl(int); template static std::false_type have_hi_impl(float); template static constexpr bool have_hi_impl() { return decltype(have_hi_impl(1))::value; } //@} public: /** \internal * \name with Operations tag * * These functions don't overload on the data parameter. The first parameter (the tag) clearly * identifies the intended function. */ //@{ template static Vc_INTRINSIC const U *lo(Operations::gather, const U *ptr) { return ptr; } template static Vc_INTRINSIC const U *hi(Operations::gather, const U *ptr) { return ptr + secondOffset; } template ::value>> static Vc_ALWAYS_INLINE decltype(loImpl(std::declval())) lo(Operations::gather, U &&x) { return loImpl(std::forward(x)); } template ::value>> static Vc_ALWAYS_INLINE decltype(hiImpl(std::declval())) hi(Operations::gather, U &&x) { return hiImpl(std::forward(x)); } template static Vc_INTRINSIC const U *lo(Operations::scatter, const U *ptr) { return ptr; } template static Vc_INTRINSIC const U *hi(Operations::scatter, const U *ptr) { return ptr + secondOffset; } //@} /** \internal \name without Operations tag These functions are not clearly tagged as to where they are used and therefore behave differently depending on the type of the parameter. Different behavior is implemented via overloads of \c loImpl and \c hiImpl. They are not overloads of \c lo and \c hi directly because it's hard to compete against a universal reference (i.e. an overload for `int` requires overloads for `int &`, `const int &`, and `int &&`. If one of them were missing `U &&` would win in overload resolution). */ //@{ template static Vc_ALWAYS_INLINE decltype(loImpl(std::declval())) lo(U &&x) { return loImpl(std::forward(x)); } template static Vc_ALWAYS_INLINE decltype(hiImpl(std::declval())) hi(U &&x) { return hiImpl(std::forward(x)); } template static Vc_ALWAYS_INLINE enable_if(), U> lo(U &&x) { return std::forward(x); } template static Vc_ALWAYS_INLINE enable_if(), U> hi(U &&x) { return std::forward(x); } //@} }; // actual_value {{{1 template static Vc_INTRINSIC const V &actual_value(Op, const SimdArray &x) { return internal_data(x); } template static Vc_INTRINSIC V *actual_value(Op, SimdArray *x) { return &internal_data(*x); } template static Vc_INTRINSIC typename Segment::fixed_size_type actual_value( Op, Segment &&seg) { return seg.to_fixed_size(); } template static Vc_INTRINSIC const typename V::Mask &actual_value(Op, const SimdMaskArray &x) { return internal_data(x); } template static Vc_INTRINSIC typename V::Mask *actual_value(Op, SimdMaskArray *x) { return &internal_data(*x); } // unpackArgumentsAuto {{{1 /**\internal * \name unpackArgumentsAuto * * Search for the right amount of SimdArray "unpacking" (via actual_value) to match the * interface of the function to be called. * * The compiler can figure this out for us thanks to SFINAE. The approach is to have a * number \c I that determines the indexes of the arguments to be transformed via * actual_value. Each bit of \c I identifies an argument. unpackArgumentsAuto starts the * recursion with `I = 0`, i.e. no actual_value transformations. If the overload calling * \c op is unavailable due to a substitution failure \c I is incremented and the function * recurses. Otherwise there are two unpackArgumentsAutoImpl functions in the overload * set. The first argument (\c int / \c float) leads to a preference of the function * calling \c op, thus ending the recursion. */ ///@{ ///\internal transforms \p arg via actual_value template Vc_INTRINSIC decltype(actual_value(std::declval(), std::declval())) conditionalUnpack(std::true_type, Op op, Arg &&arg) { return actual_value(op, std::forward(arg)); } ///\internal forwards \p arg to its return value template Vc_INTRINSIC Arg conditionalUnpack(std::false_type, Op, Arg &&arg) { return std::forward(arg); } ///\internal true-/false_type that selects whether the argument with index B should be unpacked template struct selectorType : public std::integral_constant { }; ///\internal ends the recursion, transforms arguments, and calls \p op template Vc_INTRINSIC decltype(std::declval()(std::declval(), conditionalUnpack(selectorType(), std::declval(), std::declval())...)) unpackArgumentsAutoImpl(int, index_sequence, Op op, R &&r, Args &&... args) { op(std::forward(r), conditionalUnpack(selectorType(), op, std::forward(args))...); } ///\internal the current actual_value calls don't work: recurse to I + 1 template Vc_INTRINSIC enable_if<(I <= (size_t(1) << sizeof...(Args))), void> unpackArgumentsAutoImpl( float, index_sequence is, Op op, R &&r, Args &&... args) { // if R is nullptr_t then the return type cannot enforce that actually any unwrapping // of the SimdArray types happens. Thus, you could get an endless loop of the // SimdArray function overload calling itself, if the index goes up to (1 << // sizeof...(Args)) - 1 (which means no argument transformations via actual_value). static_assert( I < (1 << sizeof...(Args)) - (std::is_same::value ? 1 : 0), "Vc or compiler bug. Please report. Failed to find a combination of " "actual_value(arg) transformations that allows calling Op."); unpackArgumentsAutoImpl(int(), is, op, std::forward(r), std::forward(args)...); } #ifdef Vc_ICC template struct IccWorkaround { using type = void; }; template struct IccWorkaround<2, Ts...> { using type = typename std::remove_pointer>::type>::type>::type; }; #endif ///\internal The interface to start the machinery. template Vc_INTRINSIC void unpackArgumentsAuto(Op op, R &&r, Args &&... args) { #ifdef Vc_ICC // ugly hacky workaround for ICC: // The compiler fails to do SFINAE right on recursion. We have to hit the right // recursionStart number from the start. const int recursionStart = Traits::isSimdArray< typename IccWorkaround::type>::value && (std::is_same::value || std::is_same::value) ? 2 : 0; #else const int recursionStart = 0; #endif unpackArgumentsAutoImpl( int(), make_index_sequence(), op, std::forward(r), std::forward(args)...); } ///@} //}}}1 ///@} } // namespace Common } // namespace Vc #endif // VC_COMMON_SIMDARRAYHELPER_H_ // vim: foldmethod=marker conky-1.22.1/3rdparty/Vc/Vc/common/simdize.h000066400000000000000000002377351476554302100205450ustar00rootroot00000000000000/* This file is part of the Vc library. {{{ Copyright © 2014-2015 Matthias Kretz Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names of contributing organizations nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. }}}*/ #ifndef VC_COMMON_SIMDIZE_H_ #define VC_COMMON_SIMDIZE_H_ #include #include #include "../Allocator" #include "interleavedmemory.h" /*! \addtogroup Simdize Automatic type vectorization. Struct Vectorization ====================== The `Vc::simdize` expression transforms the type \c T to a vectorized type. This requires the type \c T to be a class template instance or an arithmetic type. Example: First, we declare a class template for a three-dimensional point. The template parameter \c T determines the type of the members and is \c float in the scalar (classical) case. \code template struct PointTemplate { T x, y, z; // Declares tuple_size and makes the members accessible via get(point), allowing // the simdize implementation to convert between Point and PointV (see below). Vc_SIMDIZE_INTERFACE((x, y, z)); PointTemplate(T xx, T yy, T zz) : x{xx}, y{yy}, z{zz} {}; // The following function will automatically be vectorized in the PointV type. T distance_to_origin() const { using std::sqrt; return sqrt(x * x + y * y + z * z); } }; \endcode In the following we create a type alias for the scalar type, which simply means instantiating \c PointTemplate with \c float. The resulting type can then be transformed with \ref simdize. \code using Point = PointTemplate; // A simple struct with three floats and two functions. using PointV = Vc::simdize; // The vectorization of Point stores three float_v and thus // float_v::size() Points. \endcode The following shows a code example using the above \c Point and \c PointV types. \code PointV pv = Point{0.f, 1.f, 2.f}; // Constructs a PointV containing PointV::size() // copies of Point{0, 1, 2}. for (int i = 1; i < int(pv.size()); ++i) { assign(pv, i, {i + 0.f, i + 1.f, i + 2.f}); } const Vc::float_v l = pv.distance_to_origin(); std::cout << l << '\n'; // prints [2.23607, 3.74166, 5.38516, 7.07107, 8.77496, 10.4881, 12.2066, 13.9284] with // float_v::size() == 8 const Point most_distant = extract(pv, (l.max() == l).firstOne()); std::cout << '(' << most_distant.x << ", " << most_distant.y << ", " << most_distant.z << ")\n"; // prints (7, 8, 9) with float_v::size() == 8 \endcode Iterator Vectorization ====================== `Vc::simdize` can also be used to turn an iterator type into a new iterator type with `Vc::simdize` as its `value_type`. Note that `Vc::simdize` turns into `Vc::Vector`, which makes it easy to iterate over a given container of builtin arithmetics using `Vc::Vector`. \code void classic(const std::vector &data) { using It = std::vector::const_iterator; const It end = data.end(); for (It it = data.begin(); it != end; ++it) { Point x = *it; do_something(x); } } void vectorized(const std::vector &data) { using It = Vc::simdize::const_iterator>; const It end = data.end(); for (It it = data.begin(); it != end; ++it) { Vc::simdize x = *it; // i.e. PointV do_something(x); } } \endcode */ namespace Vc_VERSIONED_NAMESPACE { /**\internal * \ingroup Simdize * This namespace contains all the required code for implementing simdize. None of this * code should be directly accessed by users, though the unit test for simdize * certainly may look into some of the details if necessary. */ namespace SimdizeDetail // {{{ { /** * \addtogroup Simdize * @{ */ using std::is_same; using std::is_base_of; using std::false_type; using std::true_type; using std::iterator_traits; using std::conditional; using std::size_t; /**\internal * Typelist is a simple helper class for supporting multiple parameter packs in one class * template. */ template struct Typelist; /**\internal * The Category identifies how the type argument to simdize has to be transformed. */ enum class Category { ///\internal No transformation NoTransformation, ///\internal simple Vector transformation ArithmeticVectorizable, ///\internal transform an input iterator to return vectorized entries InputIterator, ///\internal transform a forward iterator to return vectorized entries OutputIterator, ///\internal transform an output iterator to return vectorized entries ForwardIterator, ///\internal transform a bidirectional iterator to return vectorized entries BidirectionalIterator, ///\internal transform a random access iterator to return vectorized entries RandomAccessIterator, ///\internal transform a class template recursively ClassTemplate }; /**\internal * iteratorCategories(int()) returns whether iterator_traits::iterator_category is a * valid type and whether it is derived from RandomAccessIterator or ForwardIterator. */ template constexpr Category iteratorCategories(int, ItCat * = nullptr) { return is_base_of::value ? Category::RandomAccessIterator : is_base_of::value ? Category::BidirectionalIterator : is_base_of::value ? Category::ForwardIterator : is_base_of::value ? Category::OutputIterator : is_base_of::value ? Category::InputIterator : Category::NoTransformation; } /**\internal * This overload is selected for pointer types => RandomAccessIterator. */ template constexpr enable_if::value, Category> iteratorCategories(float) { return Category::RandomAccessIterator; } /**\internal * This overload is selected if T does not work with iterator_traits. */ template constexpr Category iteratorCategories(...) { return Category::NoTransformation; } /**\internal * Simple trait to identify whether a type T is a class template or not. */ template struct is_class_template : public false_type { }; template