pax_global_header00006660000000000000000000000064147614240700014517gustar00rootroot0000000000000052 comment=955262555fe41e62701d980a60657054375d0d57 BTAS-1.0.0/000077500000000000000000000000001476142407000122465ustar00rootroot00000000000000BTAS-1.0.0/.clang-format000066400000000000000000000057551476142407000146350ustar00rootroot00000000000000--- Language: Cpp # BasedOnStyle: Google AccessModifierOffset: -1 AlignAfterOpenBracket: Align AlignConsecutiveAssignments: false AlignConsecutiveDeclarations: false AlignEscapedNewlines: Left AlignOperands: true AlignTrailingComments: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: false AllowShortFunctionsOnASingleLine: All AllowShortIfStatementsOnASingleLine: true AllowShortLoopsOnASingleLine: true AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: true AlwaysBreakTemplateDeclarations: true BinPackArguments: true BinPackParameters: true BraceWrapping: AfterClass: false AfterControlStatement: false AfterEnum: false AfterFunction: false AfterNamespace: false AfterObjCDeclaration: false AfterStruct: false AfterUnion: false BeforeCatch: false BeforeElse: false IndentBraces: false SplitEmptyFunction: true SplitEmptyRecord: true SplitEmptyNamespace: true BreakBeforeBinaryOperators: None BreakBeforeBraces: Attach BreakBeforeInheritanceComma: false BreakBeforeTernaryOperators: true BreakConstructorInitializersBeforeComma: true BreakConstructorInitializers: BeforeColon BreakAfterJavaFieldAnnotations: false BreakStringLiterals: true ColumnLimit: 120 CommentPragmas: '^ IWYU pragma:' CompactNamespaces: false ConstructorInitializerAllOnOneLineOrOnePerLine: true ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true DerivePointerAlignment: true DisableFormat: false ExperimentalAutoDetectBinPacking: false FixNamespaceComments: true ForEachMacros: - foreach - Q_FOREACH - BOOST_FOREACH IncludeCategories: - Regex: '^<.*\.h>' Priority: 1 - Regex: '^<.*' Priority: 2 - Regex: '.*' Priority: 3 IncludeIsMainRegex: '([-_](test|unittest))?$' IndentCaseLabels: true IndentWidth: 2 IndentWrappedFunctionNames: false JavaScriptQuotes: Leave JavaScriptWrapImports: true KeepEmptyLinesAtTheStartOfBlocks: false MacroBlockBegin: '' MacroBlockEnd: '' MaxEmptyLinesToKeep: 1 NamespaceIndentation: All ObjCBlockIndentWidth: 2 ObjCSpaceAfterProperty: false ObjCSpaceBeforeProtocolList: false PenaltyBreakAssignment: 2 PenaltyBreakBeforeFirstCallParameter: 1 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 200 PointerAlignment: Left ReflowComments: true SortIncludes: true SortUsingDeclarations: true SpaceAfterCStyleCast: false SpaceAfterTemplateKeyword: true SpaceBeforeAssignmentOperators: true SpaceBeforeParens: ControlStatements SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 2 SpacesInAngles: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false Standard: Auto TabWidth: 8 UseTab: Never ... BTAS-1.0.0/.github/000077500000000000000000000000001476142407000136065ustar00rootroot00000000000000BTAS-1.0.0/.github/workflows/000077500000000000000000000000001476142407000156435ustar00rootroot00000000000000BTAS-1.0.0/.github/workflows/cmake.yml000066400000000000000000000141661476142407000174560ustar00rootroot00000000000000name: Linux/MacOS Build on: [push, pull_request] #env: jobs: build: strategy: fail-fast: false matrix: build_type : [ Release, Debug ] os : [ macos-latest, ubuntu-22.04 ] linalg : [netlib, vendor] include: - os: ubuntu-22.04 cc: /usr/bin/gcc-12 cxx: /usr/bin/g++-12 - os: macos-latest cc: clang cxx: clang++ name: "${{ matrix.os }}: ${{ matrix.cxx }} ${{ matrix.build_type }} linalg=${{ matrix.linalg }}" runs-on: ${{ matrix.os }} env: CXX : ${{ matrix.cxx }} DOXYGEN_VERSION : 1.12.0 CCACHE_DIR : ${{github.workspace}}/build/.ccache CCACHE_COMPRESS : true CCACHE_COMPRESSLEVEL : 6 BUILD_CONFIG : > -G Ninja -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DCMAKE_PREFIX_PATH=/usr/local/opt/bison -DBUILD_SHARED_LIBS=OFF -DMPIEXEC_PREFLAGS='--bind-to;none;--allow-run-as-root' -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/install steps: - uses: actions/checkout@v2 - name: Create Build Environment # Some projects don't allow in-source building, so create a separate build directory # We'll use this as our working directory for all subsequent commands run: cmake -E make_directory ${{github.workspace}}/build - name: Install prerequisite MacOS packages if: ${{ matrix.os == 'macos-latest' }} run: | brew install ninja boost eigen open-mpi bison ccache # install Netlib if want generic linalg if [ "${{matrix.linalg}}" = "netlib" ]; then brew install lapack echo "BLAS_PREFERENCE_LIST=ReferenceBLAS" >> $GITHUB_ENV else echo "BLAS_PREFERENCE_LIST=Accelerate" >> $GITHUB_ENV fi - name: Install prerequisites Ubuntu packages if: ${{ matrix.os == 'ubuntu-22.04' }} run: | sudo apt-get update sudo apt-get install ninja-build g++-12 liblapack-dev libboost-dev libboost-serialization-dev libboost-random-dev libeigen3-dev openmpi-bin libopenmpi-dev libtbb-dev ccache if [ "${{matrix.build_type}}" = "Release" ]; then sudo apt-get install graphviz fonts-liberation cd ${{github.workspace}}/build # If we fail getting doxygen-${DOXYGEN_VERSION}.linux.bin.tar.gz from sourceforge, # use EFV's gdrive mirror of 1.9.2 to work around the unreliable sourceforge # the sharing link: https://drive.google.com/file/d/16GXpH4YOEUxGXQrXOKdAIibhdfzATY0d/view?usp=sharing wget https://www.doxygen.nl/files/doxygen-${DOXYGEN_VERSION}.linux.bin.tar.gz tar xzf ./doxygen-${DOXYGEN_VERSION}.linux.bin.tar.gz export DOXYGEN_DIR=${{github.workspace}}/build/doxygen-${DOXYGEN_VERSION} ${DOXYGEN_DIR}/bin/doxygen --version # doxygen should be in PATH in subsequent steps echo "${DOXYGEN_DIR}/bin" >> $GITHUB_PATH fi # install MKL if want vendor linalg if [ "${{matrix.linalg}}" = "vendor" ]; then sudo sh -c 'wget -O - https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor > /usr/share/keyrings/oneapi-archive-keyring.gpg' sudo sh -c 'echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" > /etc/apt/sources.list.d/oneAPI.list' sudo apt-get -yq update sudo apt-get install intel-oneapi-mkl-devel echo "BLAS_PREFERENCE_LIST=IntelMKL" >> $GITHUB_ENV echo "MKLROOT=/opt/intel/oneapi/mkl/latest" >> $GITHUB_ENV echo "MKL_NUM_THREADS=1" >> $GITHUB_ENV else echo "BLAS_PREFERENCE_LIST=ReferenceBLAS" >> $GITHUB_ENV fi - name: Setup ccache uses: hendrikmuhs/ccache-action@v1.2 with: key: ccache-${{ matrix.os }}-${{ matrix.build_type }}-${{ matrix.linalg }} - name: Configure CMake # Use a bash shell so we can use the same syntax for environment variable # access regardless of the host operating system shell: bash working-directory: ${{github.workspace}}/build # Note the current convention is to use the -S and -B options here to specify source # and build directories, but this is only available with CMake 3.13 and higher. # The CMake binaries on the Github Actions machines are (as of this writing) 3.12 run: | cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBLAS_PREFERENCE_LIST=$BLAS_PREFERENCE_LIST $BUILD_CONFIG || (cat CMakeFiles/CMakeConfigureLog.yaml) - name: Build working-directory: ${{github.workspace}}/build shell: bash # Execute the build. You can specify a specific target with "--target " run: ccache -p && ccache -z && cmake --build . && ccache -s - name: Test working-directory: ${{github.workspace}}/build shell: bash # Execute tests defined by the CMake configuration. # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail # run: ctest -C $BUILD_TYPE run: cmake --build . --target check-btas - name: Install working-directory: ${{github.workspace}}/build shell: bash run: cmake --build . --target install - name: Test Install Tree working-directory: ${{github.workspace}}/build shell: bash run: | cmake -S $GITHUB_WORKSPACE/doc/examples -B test_install -DCMAKE_PREFIX_PATH=${{github.workspace}}/install cmake --build test_install - name: Build+Deploy Dox if: ${{ matrix.os == 'ubuntu-22.04' && matrix.build_type == 'Release' && github.ref == 'refs/heads/master' }} working-directory: ${{github.workspace}}/build shell: bash run: | #echo "== deploying dox ==" #git config --global user.email "eduard@valeyev.net" #git config --global user.name "GitHub Actions" cmake --build . --target html #git clone --depth=1 https://github.com/ValeevGroup/BTAS.git --branch gh-pages --single-branch btas-docs-current #git clone --depth=1 https://github.com/ValeevGroup/BTAS.git --branch gh-pages-template --single-branch btas-docs-template BTAS-1.0.0/.gitignore000066400000000000000000000006221476142407000142360ustar00rootroot00000000000000# Compiled Object files *.slo *.lo *.o *.x # Compiled Dynamic libraries *.so *.dylib # Compiled Static libraries *.lai *.la *.a # Temporary files *~ *.swp # CMake files CMakeCache.txt CMakeFiles/ Makefile # Testing files Testing/ btas_test # Scratch directory work/ # Generated files by Doxygen doc/Doxyfile doc/html/ doc/latex/ # Binary file for unit test unittest/test build build_cblas test BTAS-1.0.0/CMakeLists.txt000066400000000000000000000240411476142407000150070ustar00rootroot00000000000000#; -*-CMake-*- cmake_minimum_required (VERSION 3.14.0) # FetchContent_MakeAvailable # Preload versions/tags of all dependencies ==================================== include(external/versions.cmake) ############################################################################### # CMake defaults to address key pain points ############################################################################### # safety net for dev workflow: accidental install will not affect FindOrFetch* if (NOT DEFINED CACHE{CMAKE_FIND_NO_INSTALL_PREFIX}) set(CMAKE_FIND_NO_INSTALL_PREFIX ON CACHE BOOL "Whether find_* commands will search CMAKE_INSTALL_PREFIX and CMAKE_STAGING_PREFIX; see https://cmake.org/cmake/help/latest/variable/CMAKE_FIND_NO_INSTALL_PREFIX.html#variable:CMAKE_FIND_NO_INSTALL_PREFIX") endif() ############################################################################### # Bring ValeevGroup cmake toolkit ############################################################################### include(FetchContent) if (DEFINED PROJECT_BINARY_DIR) set(VG_CMAKE_KIT_PREFIX_DIR PROJECT_BINARY_DIR) else () set(VG_CMAKE_KIT_PREFIX_DIR CMAKE_CURRENT_BINARY_DIR) endif() FetchContent_Declare( vg_cmake_kit QUIET GIT_REPOSITORY https://github.com/ValeevGroup/kit-cmake.git GIT_TAG ${BTAS_TRACKED_VGCMAKEKIT_TAG} SOURCE_DIR ${${VG_CMAKE_KIT_PREFIX_DIR}}/cmake/vg BINARY_DIR ${${VG_CMAKE_KIT_PREFIX_DIR}}/cmake/vg-build SUBBUILD_DIR ${${VG_CMAKE_KIT_PREFIX_DIR}}/cmake/vg-subbuild ) FetchContent_MakeAvailable(vg_cmake_kit) list(APPEND CMAKE_MODULE_PATH "${vg_cmake_kit_SOURCE_DIR}/modules") ############################################################################### # Announce ourselves ############################################################################### # see https://semver.org/ set(BTAS_MAJOR_VERSION 1) set(BTAS_MINOR_VERSION 0) set(BTAS_MICRO_VERSION 0) set(BTAS_PRERELEASE_ID ) set(BTAS_VERSION "${BTAS_MAJOR_VERSION}.${BTAS_MINOR_VERSION}.${BTAS_MICRO_VERSION}") if (BTAS_PRERELEASE_ID) set(BTAS_EXT_VERSION "${BTAS_VERSION}-${BTAS_PRERELEASE_ID}") else(BTAS_PRERELEASE_ID) set(BTAS_EXT_VERSION "${BTAS_VERSION}") endif(BTAS_PRERELEASE_ID) project(BTAS VERSION ${BTAS_VERSION} DESCRIPTION "BTAS: Basic Tensor Algebra for Seeplusplus" LANGUAGES CXX HOMEPAGE_URL "https://github.com/ValeevGroup/BTAS") enable_language(C) # C needed even for basic platform introspection # extra cmake files are shipped with BTAS list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/modules") include(FeatureSummary) include(RedefaultableOption) include(CMakePackageConfigHelpers) include(AddCustomTargetSubproject) include(CMakePushCheckState) include(CTest) # defines BUILD_TESTING option # Configure options redefaultable_option(BTAS_BUILD_DEPS_FROM_SOURCE "Whether to build missing dependencies from source" OFF) add_feature_info(BUILD_DEPS_FROM_SOURCE BTAS_BUILD_DEPS_FROM_SOURCE "Will build missing dependencies from source") redefaultable_option(BTAS_ASSERT_THROWS "Whether BTAS_ASSERT should throw; enable if BUILD_TESTING=ON" ${BUILD_TESTING}) add_feature_info(ASSERT_THROWS BTAS_ASSERT_THROWS "BTAS_ASSERT(x) will throw if x is false, and not be affected by NDEBUG") redefaultable_option(BTAS_USE_BLAS_LAPACK "Whether to enable BLAS/LAPACK bindings via BLAS++/LAPACK++" ON) add_feature_info(USE_BLAS_LAPACK BTAS_USE_BLAS_LAPACK "Will use BLAS and LAPACK linear algebra distributions via their BLAS++/LAPACK++ interfaces" ) option(ENABLE_WFN91_LINALG_DISCOVERY_KIT "Use linear algebra discovery kit from github.com/wavefunction91 [recommended]" ON) add_feature_info(WFN91LinearAlgebraDiscoveryKit ENABLE_WFN91_LINALG_DISCOVERY_KIT "Linear algebra discovery kit from github.com/wavefunction91 supports many more corner cases than the default CMake modules and/or ICL's BLAS++/LAPACK++ modules") set(TARGET_MAX_INDEX_RANK 6 CACHE STRING "Determines the rank for which the default BTAS index type will use stack (default: 6); this requires Boost.Container") add_feature_info("TARGET_MAX_INDEX_RANK=${TARGET_MAX_INDEX_RANK}" TRUE "default BTAS index type will use stack for rank<=${TARGET_MAX_INDEX_RANK}") set(TARGET_ARCH "${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") ########################## # INSTALL variables ########################## include(GNUInstallDirs) set(BTAS_INSTALL_BINDIR "${CMAKE_INSTALL_BINDIR}" CACHE PATH "BTAS BIN install directory") set(BTAS_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_INCLUDEDIR}" CACHE PATH "BTAS INCLUDE install directory") set(BTAS_INSTALL_LIBDIR "${CMAKE_INSTALL_LIBDIR}" CACHE PATH "BTAS LIB install directory") set(BTAS_INSTALL_DATADIR "${CMAKE_INSTALL_DATAROOTDIR}/BTAS/${BTAS_EXT_VERSION}" CACHE PATH "BTAS DATA install directory") set(BTAS_INSTALL_DOCDIR "${BTAS_INSTALL_DATADIR}/doc" CACHE PATH "BTAS DOC install directory") set(BTAS_INSTALL_CMAKEDIR "${CMAKE_INSTALL_LIBDIR}/cmake/BTAS" CACHE PATH "BTAS CMAKE install directory") ########################## # Standard build variables ########################## include(AppendFlags) # Get standard build variables from the environment if they have not already been set if(NOT CMAKE_C_FLAGS OR NOT DEFINED CMAKE_C_FLAGS) set(CMAKE_C_FLAGS "$ENV{CPPFLAGS}") append_flags(CMAKE_C_FLAGS "$ENV{CFLAGS}") endif() if(NOT CMAKE_CXX_FLAGS OR NOT DEFINED CMAKE_CXX_FLAGS) set(CMAKE_CXX_FLAGS "$ENV{CPPFLAGS}") append_flags(CMAKE_CXX_FLAGS "$ENV{CXXFLAGS}") endif() if(NOT CMAKE_EXE_LINKER_FLAGS OR NOT DEFINED CMAKE_EXE_LINKER_FLAGS) set(CMAKE_EXE_LINKER_FLAGS "$ENV{LDFLAGS}") endif() if (NOT CMAKE_CXX_COMPILER) message(FATAL_ERROR "C++ compiler not found") endif() set(CMAKE_SKIP_RPATH FALSE) ########################## # We use C++17 features ########################## # but insist on strict standard set(CMAKE_CXX_STANDARD 17 CACHE STRING "C++ ISO Standard version") if (NOT(CMAKE_CXX_STANDARD EQUAL 17 OR CMAKE_CXX_STANDARD EQUAL 20)) message(FATAL_ERROR "C++ 2017 ISO Standard or higher is required to compile BTAS") endif() # C++20 is only configurable via compile features with cmake 3.12 and older if (CMAKE_CXX_STANDARD EQUAL 20 AND CMAKE_VERSION VERSION_LESS 3.12.0) cmake_minimum_required (VERSION 3.12.0) endif() set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "Whether to use extensions of C++ ISO Standard version") # Check type support include(CheckTypeSize) check_type_size("long double" BTAS_HAS_LONG_DOUBLE) check_type_size("long long" BTAS_HAS_LONG_LONG) ####################################### # create exportable BTAS library target ####################################### add_library(BTAS INTERFACE) target_compile_features(BTAS INTERFACE "cxx_std_17") target_include_directories(BTAS INTERFACE $ $) install(TARGETS BTAS EXPORT btas COMPONENT BTAS) install(DIRECTORY btas COMPONENT BTAS DESTINATION "${BTAS_INSTALL_INCLUDEDIR}" FILES_MATCHING PATTERN "*.h" PATTERN "*.h.in" EXCLUDE ) ########################## # external dependencies ########################## # optional dependency: ccache, but need to be defined first so that mandatory dependencies can inherit it find_program(CCACHE ccache) if(CCACHE) mark_as_advanced(CCACHE) message (STATUS "Found ccache: ${CCACHE}") set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE}" CACHE STRING "Compiler launcher to use for compiling C++") set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE}" CACHE STRING "Compiler launcher to use for compiling C") endif(CCACHE) if( BTAS_USE_BLAS_LAPACK ) include(external/linalgpp.cmake) endif() include(external/boost.cmake) ########################## # configure BTAS_ASSERT ########################## if (BTAS_ASSERT_THROWS) target_compile_definitions(BTAS INTERFACE -DBTAS_ASSERT_THROWS=1) endif(BTAS_ASSERT_THROWS) ########################## # dox ########################## add_subdirectory(doc) ########################## # checking/testing ########################## if (BUILD_TESTING) add_custom_target_subproject(btas check USES_TERMINAL COMMAND ${CMAKE_CTEST_COMMAND} -V -R "btas/unit/run") add_subdirectory(unittest) else(BUILD_TESTING) add_custom_target_subproject(btas check COMMAND echo "WARNING: unit testing disabled. To enable, give -DBUILD_TESTING=ON to cmake") endif(BUILD_TESTING) configure_file( ${PROJECT_SOURCE_DIR}/btas/version.h.in ${PROJECT_BINARY_DIR}/btas/version.h ) install(FILES ${PROJECT_BINARY_DIR}/btas/version.h DESTINATION "${BTAS_INSTALL_INCLUDEDIR}/btas") # Create the version file write_basic_package_version_file(btas-config-version.cmake VERSION ${BTAS_VERSION} COMPATIBILITY AnyNewerVersion) # Create the targets file export(EXPORT btas NAMESPACE BTAS:: FILE "${PROJECT_BINARY_DIR}/btas-targets.cmake") ## Create the configure file configure_package_config_file(cmake/btas-config.cmake.in "${PROJECT_BINARY_DIR}/btas-config.cmake" INSTALL_DESTINATION "${BTAS_INSTALL_CMAKEDIR}" PATH_VARS CMAKE_INSTALL_PREFIX BTAS_INSTALL_BINDIR BTAS_INSTALL_INCLUDEDIR BTAS_INSTALL_LIBDIR BTAS_INSTALL_DOCDIR BTAS_INSTALL_CMAKEDIR) ## Install config, version, and target files install(EXPORT btas FILE "btas-targets.cmake" DESTINATION "${BTAS_INSTALL_CMAKEDIR}" NAMESPACE BTAS:: COMPONENT btas-config) install(FILES "${PROJECT_BINARY_DIR}/btas-config.cmake" "${PROJECT_BINARY_DIR}/btas-config-version.cmake" DESTINATION "${BTAS_INSTALL_CMAKEDIR}" COMPONENT btas-config) add_custom_target_subproject(btas install-config COMMAND ${CMAKE_COMMAND} -DCOMPONENT=btas-config -P ${PROJECT_BINARY_DIR}/cmake_install.cmake COMMENT "Installing BTAS config components") if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME) feature_summary(WHAT ALL DESCRIPTION "=== BTAS Package/Feature Info ===") feature_summary(FILENAME ${CMAKE_CURRENT_BINARY_DIR}/features.log WHAT ALL) endif() ############################################################################### # appendix: misc details ############################################################################### SET(CMAKE_COLOR_MAKEFILE ON) BTAS-1.0.0/LICENSE.md000066400000000000000000000027401476142407000136550ustar00rootroot00000000000000BSD 3-Clause License Copyright (c) 2019, BTAS All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. BTAS-1.0.0/README.md000066400000000000000000000065101476142407000135270ustar00rootroot00000000000000Description =========== Basic Tensor Algebra Subroutines (BTAS) is a C++ library for tensor algebra. BTAS is a reference implementation of Tensor Working Group concept spec. The library can be optionally used header-only at the cost of much lower performance for most operations. Prerequisites ============= * C++17 compiler * CMake * Boost C++ libraries - (required) Container, Iterator, Random - (optional) Serialization for serialization (non-header-only) * (used by default, strongly recommended, but can be disabled) BLAS+LAPACK libraries and their BLAS++/LAPACK++ C++ APIs for optimized operations (non-header-only) Building and Installing ======================= TL;DR version * `cmake -S /path/to/BTAS/srcdir -B /path/to/BTAS/builddir -DCMAKE_PREFIX_PATH="/path/to/boost;/path/to/blas/and/lapack"` * optional: `cmake --build /path/to/BTAS/builddir --target check` * if configured with `-DBTAS_BUILD_DEPS_FROM_SOURCE=ON`: `cmake --build /path/to/BTAS/builddir --target build-boost-in-BTAS` * `cmake --build /path/to/BTAS/builddir --target install` ## obtaining prerequisites * Linear algebra (BLAS+LAPACK): should come with your dev toolchain (e.g., on MacOS) or can be installed using the system package manager or as a vendor-provided package (e.g., Intel Math Kernel Libraries) * Boost: - It is recommended to use a package manager to install Boost. This can be doneas follows: - APT package manager (e.g., on Ubuntu Linux): `apt-get install libboost-all-dev` - Homebrew package manager (on MacOS) via `brew install boost`. - You can also try to build Boost yourself by following instructions [here](https://www.boost.org/doc/libs/1_84_0/more/getting_started/unix-variants.html). - Last resort is to let BTAS build Boost from source, as a CMake _subproject_ using [FetchContent](https://cmake.org/cmake/help/latest/module/FetchContent.html). Unfortunately, Boost's [emerging CMake harness](https://github.com/boostorg/cmake/) used to build it is not yet fully functional, hence may not be as robust as desired. Here are some ints: - Configure with CMake cache variable `BTAS_BUILD_DEPS_FROM_SOURCE` to `ON` (either via command line or the `ccmake` GUI) - If BTAS is the top-level CMake project (i.e. it is not being built as a subproject itself) installing BTAS by building its install target may not build Boost libraries automatically. Thus the user may need to build `build-boost-in-BTAS` target manually before building `install` target. ## useful CMake variables - `CMAKE_CXX_COMPILER` -- specifies the C++ compiler (by default CMake will look for the C++ compiler in `PATH`) - `CMAKE_INSTALL_PREFIX` -- specifies the installation prefix (by default CMake will install to `/usr/local`) - `CMAKE_BUILD_TYPE` -- specifies the build type (by default CMake will build in `Release` mode) - `CMAKE_PREFIX_PATH` -- semicolon-separated list of paths specifying the locations of dependencies - `BTAS_USE_BLAS_LAPACK` -- specifies whether to enable the use of BLAS/LAPACK via the BLAS++/LAPACK++ APIs; the default is `ON` - `BTAS_BUILD_DEPS_FROM_SOURCE` -- specifies whether to enable building the missing dependencies (Boost) from source; the default is `OFF` - `BUILD_TESTING` -- specifies whether to build unit tests; the default is `ON` - `TARGET_MAX_INDEX_RANK` -- specifies the rank for which the default BTAS index type will use stack; the default is `6` BTAS-1.0.0/btas/000077500000000000000000000000001476142407000131775ustar00rootroot00000000000000BTAS-1.0.0/btas/array_adaptor.h000066400000000000000000000255231476142407000162070ustar00rootroot00000000000000#ifndef __BTAS_ARRAYADAPTOR_H_ #define __BTAS_ARRAYADAPTOR_H_ // adaptors for std "array" containers #include #include #include #include #include #ifdef BTAS_HAS_BOOST_CONTAINER #include #endif #include #include #include #include namespace btas { template struct array_adaptor; /// Adaptor from std::array template struct array_adaptor< std::array > { typedef std::array array; typedef typename array::value_type value_type; static array construct(std::size_t n) { assert(n <= N); array result; // fill elements n+1 ... N-1 with zeroes std::fill_n(result.begin() + n, N - n, value_type{}); return result; } static array construct(std::size_t n, T value) { assert(n <= N); array result; std::fill_n(result.begin(), n, value); return result; } static void resize(array& x, std::size_t n) { assert(x.size() == N); assert(x.size() >= n); } static void print(const array& a, std::ostream& os) { os << "{"; for(std::size_t i = 0; i != N; ++i) { os << a[i]; if (i != (N - 1)) os << ","; } os << "}"; } }; template constexpr std::size_t rank(const std::array& x) noexcept { return N; } /// Adaptor from const-size array template struct array_adaptor< T[N] > { typedef T (array)[N]; typedef T value_type; static void print(const array& a, std::ostream& os) { os << "{"; for(std::size_t i = 0; i != N; ++i) { os << a[i]; if (i != (N - 1)) os << ","; } os << "}"; } }; template constexpr std::size_t rank(const T (&x)[N]) noexcept { return N; } template std::ostream& operator<<(std::ostream& os, const std::array& x) { array_adaptor >::print(x,os); return os; } /// Adaptors for sequence container, e.g. std::vector, btas::varray, and std::initializer_list template ::value>::type> std::size_t rank(const Array& x) { return x.size(); } template struct array_adaptor { typedef Array array; typedef typename Array::value_type value_type; static array construct(std::size_t N) { return array(N); } static array construct(std::size_t N, value_type value) { array result(N); std::fill(result.begin(), result.end(), value); return result; } static void resize(array& x, std::size_t N) { x.resize(N); } static void print(const array& a, std::ostream& os) { std::size_t n = rank(a); os << "{"; for(std::size_t i = 0; i != n; ++i) { os << a[i]; if (i != (n - 1)) os << ","; } os << "}"; } }; template std::ostream& operator<<(std::ostream& os, const btas::varray& x) { array_adaptor >::print(x,os); return os; } template std::ostream& operator<<(std::ostream& os, const std::vector& x) { array_adaptor >::print(x,os); return os; } template std::ostream& operator<<(std::ostream& os, const std::initializer_list& x) { array_adaptor >::print(x,os); return os; } #ifdef BTAS_HAS_BOOST_CONTAINER template std::ostream& operator<<(std::ostream& os, const boost::container::small_vector& x) { array_adaptor>::print(x,os); return os; } #endif } namespace std { #if __cplusplus < 201402L // add C++14 components to make transition to C++14 easier template const T* cbegin(const T(&x)[N]) { return &x[0]; } template const T* cend(const T(&x)[N]) { return &x[N]; } template const T* rbegin(T(&x)[N]) { return &x[N-1]; } template const T* rend(T(&x)[N]) { return &x[0] - 1; } template const T* cbegin(const T* x) { return x; } template const T* cbegin(T* x) { return x; } template T* begin(T* x) { return x; } template constexpr auto cbegin(const C& x) -> decltype(std::begin(x)) { return std::begin(x); } template constexpr auto cend(const C& x) -> decltype(std::end(x)) { return std::end(x); } template auto rbegin(C& x) -> decltype(x.rbegin()) { return x.rbegin(); } template auto rend(C& x) -> decltype(x.rend()) { return x.rend(); } #endif #if __cplusplus <= 201402L // add useful bits to make transition to C++17 easier template ().data())>::value>::type* = nullptr> constexpr auto data(C& c) -> decltype(c.data()) { return c.data(); } template ().data())>::value>::type* = nullptr> constexpr auto data(const C& c) -> decltype(c.data()) { return c.data(); } template constexpr T* data(T (&array)[N]) noexcept { return array; } template constexpr const E* data(const std::initializer_list& il) noexcept { return il.begin(); } template ().size())>::value>::type* = nullptr> constexpr auto size(const C& c) -> decltype(c.size()) { return c.size(); } template constexpr std::size_t size(const T (&array)[N]) noexcept { return N; } #endif template struct make_unsigned > { typedef std::vector::type > type; }; template struct make_unsigned > { typedef std::initializer_list::type > type; }; template struct make_unsigned > { typedef std::array::type, N> type; }; template struct make_unsigned > { typedef btas::varray::type > type; }; #ifdef BTAS_HAS_BOOST_CONTAINER template struct make_unsigned > { typedef boost::container::small_vector::type,N> type; }; #endif template struct make_unsigned { typedef typename make_unsigned::type uT; typedef uT (type)[N]; }; } namespace btas { template struct replace_value_type; template struct replace_value_type, U> { typedef std::vector type; }; template struct replace_value_type,U> { typedef std::initializer_list type; }; template struct replace_value_type,U> { typedef std::array type; }; template struct replace_value_type,U> { typedef btas::varray type; }; #ifdef BTAS_HAS_BOOST_CONTAINER template struct replace_value_type,U> { typedef boost::container::small_vector type; }; #endif template struct replace_value_type { typedef U (type)[N]; }; } #ifdef BTAS_HAS_BOOST_SERIALIZATION #ifndef BOOST_SERIALIZATION_STD_ARRAY // legacy switch to disable BTAS-provided serialization of std::array #define BOOST_SERIALIZATION_STD_ARRAY # if BOOST_VERSION / 100 < 1056 namespace boost { namespace serialization { template void serialize(Archive & ar, std::array & a, const unsigned int version) { ar & boost::serialization::make_array(a.data(), a.size()); } } // namespace serialization } // namespace boost # endif // boost < 1.56 does not serialize std::array ... provide our own #endif // not defined BOOST_SERIALIZATION_STD_ARRAY? provide our own #endif #if defined(BTAS_HAS_BOOST_CONTAINER) && defined(BTAS_HAS_BOOST_SERIALIZATION) namespace boost { namespace serialization { /// boost serialization for boost::container::small_vector template void serialize (Archive& ar, boost::container::small_vector& x, const unsigned int version) { boost::serialization::split_free(ar, x, version); } template void save (Archive& ar, const boost::container::small_vector& x, const unsigned int version) { const boost::serialization::collection_size_type count(x.size()); ar << BOOST_SERIALIZATION_NVP(count); if (count != decltype(count)(0)) ar << boost::serialization::make_array(x.data(), count); } template void load (Archive& ar, boost::container::small_vector& x, const unsigned int version) { boost::serialization::collection_size_type count; ar >> BOOST_SERIALIZATION_NVP(count); x.resize(count); if (count != decltype(count)(0)) ar >> boost::serialization::make_array(x.data(), count); } } // namespace serialization } // namespace boost #endif namespace madness { namespace archive { // Forward declarations template class archive_array; template inline archive_array wrap(const T*, unsigned int); template struct ArchiveLoadImpl> { static inline void load(const Archive& ar, boost::container::small_vector& x) { std::size_t n{}; ar& n; x.resize(n); ar & madness::archive::wrap(x.data(),n); } }; template struct ArchiveStoreImpl> { static inline void store(const Archive& ar, const boost::container::small_vector& x) { ar& x.size() & madness::archive::wrap(x.data(), x.size()); } }; } // namespace archive } // namespace madness #endif /* __BTAS_ARRAYADAPTOR_H_ */ BTAS-1.0.0/btas/btas.h000066400000000000000000000015021476142407000142770ustar00rootroot00000000000000#ifndef __BTAS_BTAS_H #define __BTAS_BTAS_H #include // check prerequisite headers, in case btas is used as headers-only, bail if not #define BTAS_SIGNAL_MISSING_PREREQUISITES #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef _CONTRACT_OPT_BAGEL #include #else #include #endif #endif // __BTAS_BTAS_H BTAS-1.0.0/btas/btas_fwd.h000066400000000000000000000050741476142407000151470ustar00rootroot00000000000000// // Created by efv on 9/23/19. // #ifndef BTAS_BTAS_FWD_H #define BTAS_BTAS_FWD_H #include #include // std::allocator #include #include namespace btas { /// test _Index conforms the TWG.Index concept /// check only value_type and operator[] template class is_index; /// BoxOrdinal is an implementation detail of BoxRange. /// It maps the index to its ordinal value. It also knows whether /// the map is contiguous (i.e. whether adjacent indices have adjacent ordinal /// values). template class BoxOrdinal; template std::ostream& operator<<(std::ostream&, const BoxOrdinal<_Order,_Index>&); /// RangeNd extends BaseRangeNd to compute ordinals, as specified by \c _Ordinal . /// It conforms to the \ref sec_TWG_Range_Concept_Range_Box "TWG.BoxRange" concept. template > class RangeNd; template std::ostream& operator<<(std::ostream&, const RangeNd<_Order,_Index, _Ordinal>&); namespace DEFAULT { using range = btas::RangeNd<>; } // namespace DEFAULT /// checks _Tensor meets the TWG.Tensor concept requirements /// checks only value_type, range_type, storage_type, and rank() member template class is_tensor; /// checks _Tensor meets the TWG.BoxTensor concept requirements template class is_boxtensor; template > class Tensor; template using rebind_tensor_t = typename Tensor::template rebind_t<_T>; template > std::ostream& operator<<(std::ostream&, const Tensor<_T,_Range, _Storage>&); template > class varray; template ::value>::type > class infinite_sequence_adaptor; } #endif //BTAS_BTAS_FWD_H BTAS-1.0.0/btas/corange.h000066400000000000000000000110751476142407000147720ustar00rootroot00000000000000/* * corange.h * * Created on: Dec 27, 2013 * Author: evaleev */ #ifndef BTAS_CORANGE_H_ #define BTAS_CORANGE_H_ #include #include namespace btas { /// CoRange is a pack of Range objects template class CoRange; /// CoRangeIterator implements iteration over CoRange /// it resembles Boost.Iterator's zip_iterator. template class CoRangeIterator; /// CoRangeIterator over a pair of ranges. template class CoRangeIterator { public: typedef CoRange corange_type; typedef std::tuple iterator; typedef std::tuple< typename R1::iterator::value_type, typename R2::iterator::value_type> value_type; CoRangeIterator(corange_type& corange, const iterator& iter) : corange_(corange), iter_(iter) {} value_type operator*() const { return std::make_tuple(*(std::get<0>(iter_)), *(std::get<1>(iter_))); } iterator iter() { return iter_; } /// reset second iterator to begin, if reached end void operator++() { ++(std::get<0>(iter_)); ++(std::get<1>(iter_)); if (std::get<1>(iter_) == std::get<1>(corange_.ranges()).end()) std::get<1>(iter_) = std::get<1>(corange_.ranges()).begin(); } template friend bool operator==(const CoRangeIterator<_R1,_R2>& i1, const CoRangeIterator<_R1,_R2>& i2); private: corange_type& corange_; iterator iter_; }; template bool operator==(const CoRangeIterator& i1, const CoRangeIterator& i2) { return std::get<0>(i1.iter_) == std::get<0>(i2.iter_); } template bool operator!=(const CoRangeIterator& i1, const CoRangeIterator& i2) { return not operator==(i1,i2); } /// This is a CoRange of two Ranges. The first Range iterates once from begin to end; the second Range /// re-starts from begin if necessary. template class CoRange { public: typedef std::tuple ranges_type; typedef CoRangeIterator iterator; typedef CoRangeIterator const_iterator; CoRange(R1& r1, R2& r2) : ranges_(r1,r2) {} const ranges_type& ranges() const { return ranges_; } iterator begin() { return iterator(*this, std::make_tuple(std::get<0>(ranges_).begin(), std::get<1>(ranges_).begin())); } const_iterator begin() const { return const_iterator(*this, std::make_tuple(std::get<0>(ranges_).begin(), std::get<1>(ranges_).begin())); } iterator end() { return iterator(*this, std::make_tuple(std::get<0>(ranges_).end(), std::get<1>(ranges_).end())); } const_iterator end() const { return const_iterator(*this, std::make_tuple(std::get<0>(ranges_).end(), std::get<1>(ranges_).end())); } private: ranges_type ranges_; }; template CoRange make_corange(Ranges&... args) { return CoRange(args...); } #if 0 /// This is hard or impossible ... need to make a tuple by applying a function to a tuple /// CoRangeIterator iterates over the pack of Ranges template class CoRangeIterator : public std::tuple { public: typedef CoRange corange_type; typedef std::tuple< typename Ranges::iterator::value_type ...> value_type; // how??? value_type operator*() const { return } }; /// CoRange is a pack of Range objects template class CoRange { public: typedef std::tuple ranges_type; typedef CoRangeIterator iterator; CoRange(Ranges&... ranges) : ranges_(ranges...) {} void print(std::ostream& os) { for(auto i=0; i::value; ++i) { os << "CoRange[" << i << "]: "; switch (i) { case 0: os << std::get<0>(ranges_) << std::endl; break; case 1: os << std::get<1>(ranges_) << std::endl; break; default: assert(false); } } } private: std::tuple ranges_; }; #endif } #endif /* BTAS_CORANGE_H_ */ BTAS-1.0.0/btas/defaults.h000066400000000000000000000017501476142407000151620ustar00rootroot00000000000000/* * defaults.h * * Created on: Dec 19, 2013 * Author: evaleev */ #ifndef BTAS_DEFAULTS_H_ #define BTAS_DEFAULTS_H_ #include #include #ifdef BTAS_HAS_BOOST_CONTAINER #include #else #include #endif // // Default index type // namespace btas { namespace DEFAULT { /// default index type #ifdef BTAS_HAS_BOOST_CONTAINER #ifndef BTAS_TARGET_MAX_INDEX_RANK # ifdef BTAS_DEFAULT_TARGET_MAX_INDEX_RANK # define BTAS_TARGET_MAX_INDEX_RANK BTAS_DEFAULT_TARGET_MAX_INDEX_RANK # else # define BTAS_TARGET_MAX_INDEX_RANK 6 #endif #endif template using index = boost::container::small_vector; #else template using index = btas::varray; #endif using index_type = index; /// default storage class template using storage = std::vector<_T>; } } // namespace btas #endif /* BTAS_DEFAULTS_H_ */ BTAS-1.0.0/btas/error.h000066400000000000000000000022311476142407000144770ustar00rootroot00000000000000#ifndef __BTAS_ERROR_H #define __BTAS_ERROR_H #include namespace btas { /// exception class, used to mark exceptions specific to BTAS class exception : public std::exception { public: exception(const char* m) : message_(m) { } virtual const char* what() const noexcept { return message_; } private: const char* message_; }; // class exception /// Place a break point in this function to stop before btas::exception is thrown. inline void exception_break() { } } // namespace btas #define BTAS_STRINGIZE( s ) #s #define BTAS_EXCEPTION_MESSAGE( file , line , mess ) \ "BTAS: exception at " file "(" BTAS_STRINGIZE( line ) "): " mess ". Break in btas::exception_break to learn more." #define BTAS_EXCEPTION( m ) \ { \ btas::exception_break(); \ throw btas::exception ( BTAS_EXCEPTION_MESSAGE( __FILE__ , __LINE__ , m ) ); \ } // configure BTAS_ASSERT #ifdef BTAS_ASSERT_THROWS # define BTAS_ASSERT( a ) if(! ( a ) ) BTAS_EXCEPTION( "assertion failed" ) #else // defined BTAS_ASSERT_THROWS # define BTAS_ASSERT( a ) assert((a)); #endif // defined BTAS_ASSERT_THROWS #endif // __BTAS_ERROR_H BTAS-1.0.0/btas/features.h000066400000000000000000000024761476142407000151770ustar00rootroot00000000000000/** * @file features.h * * include this to import macros describing features of BTAS * the only available macros are: * - BTAS_IS_USABLE : #define'd to 1 if BTAS is usable * - BTAS_HAS_BOOST_CONTAINER : #define'd to 1 if BTAS detected Boost.Container * * ALSO: a library configured with cmake (hence, non-header-only) will define BTAS_HAS_BOOST_SERIALIZATION (via a compiler flag) to 1 if * Boost.Serialization library were found. */ #ifndef BTAS_FEATURES_H_ #define BTAS_FEATURES_H_ #ifdef __has_include #if !defined(BTAS_HAS_BOOST_ITERATOR) && \ __has_include() #define BTAS_HAS_BOOST_ITERATOR 1 #endif // define BTAS_HAS_BOOST_ITERATOR if Boost.Iterator headers are // available #if !defined(BTAS_HAS_BOOST_CONTAINER) && \ __has_include() #define BTAS_HAS_BOOST_CONTAINER 1 #endif // define BTAS_HAS_BOOST_CONTAINER if Boost.Container headers are // available #endif // defined( __has_include) #ifdef BTAS_HAS_BOOST_ITERATOR #define BTAS_IS_USABLE 1 #else #ifdef BTAS_SIGNAL_MISSING_PREREQUISITES #error \ "Cannot find Boost.Iterators headers => BTAS is not usable as a headers-only library; download latest Boost from boost.org and provide -I/path/to/boost to the compiler" #endif #endif #endif /* BTAS_FEATURES_H_ */ BTAS-1.0.0/btas/fwd.h000066400000000000000000000002001476142407000141200ustar00rootroot00000000000000// // Created by efv on 1/3/21. // #ifndef BTAS_FWD_H #define BTAS_FWD_H #include "../btas/btas_fwd.h" #endif // BTAS_FWD_H BTAS-1.0.0/btas/generic/000077500000000000000000000000001476142407000146135ustar00rootroot00000000000000BTAS-1.0.0/btas/generic/axpy_impl.h000066400000000000000000000120321476142407000167640ustar00rootroot00000000000000#ifndef __BTAS_AXPY_IMPL_H #define __BTAS_AXPY_IMPL_H 1 #include #include #include #include #include #include #include #include #include #include namespace btas { // ================================================================================================ /// Call BLAS depending on type of Tensor class template struct axpy_impl { }; /// Case that alpha is trivially multipliable to elements template<> struct axpy_impl { template static void call_impl ( const unsigned long& Nsize, const _T& alpha, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, _IteratorY itrY, const iterator_difference_t<_IteratorY>& incY, generic_impl_tag) { for (unsigned long i = 0; i < Nsize; ++i, itrX += incX, itrY += incY) { (*itrY) += alpha * (*itrX); } } #ifdef BTAS_HAS_BLAS_LAPACK template static void call_impl ( const unsigned long& Nsize, const _T& alpha, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, _IteratorY itrY, const iterator_difference_t<_IteratorY>& incY, blas_lapack_impl_tag) { static_assert(std::is_same_v,iterator_value_t<_IteratorY>>, "mismatching iterator value types"); using T = iterator_value_t<_IteratorX>; blas::axpy( Nsize, static_cast(alpha), static_cast(&(*itrX)), incX, static_cast(&(*itrY)), incY ); } #endif template static void call ( const unsigned long& Nsize, const _T& alpha, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, _IteratorY itrY, const iterator_difference_t<_IteratorY>& incY) { call_impl( Nsize, alpha, itrX, incX, itrY, incY, blas_lapack_impl_t<_IteratorX,_IteratorY>() ); } }; /// Case that alpha is multiplied recursively by AXPY /// Note that incX and incY are disabled for recursive call template<> struct axpy_impl { template static void call ( const unsigned long& Nsize, const _T& alpha, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, _IteratorY itrY, const iterator_difference_t<_IteratorY>& incY) { for (unsigned long i = 0; i < Nsize; ++i, itrX += incX, itrY += incY) { axpy(alpha, *itrX, *itrY); } } }; // ================================================================================================ /// Generic implementation of BLAS AXPY in terms of C++ iterator template void axpy ( const unsigned long& Nsize, const _T& alpha, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, _IteratorY itrY, const iterator_difference_t<_IteratorY>& incY) { typedef std::iterator_traits<_IteratorX> __traits_X; typedef std::iterator_traits<_IteratorY> __traits_Y; static_assert(std::is_same::value, "value type of Y must be the same as that of X"); static_assert(is_random_access_iterator_v<_IteratorX>, "iterator X must be a random access iterator"); static_assert(is_random_access_iterator_v<_IteratorY>, "iterator Y must be a random access iterator"); typedef typename __traits_X::value_type __value_X; typedef typename std::conditional::value, __value_X, _T>::type __alpha; axpy_impl::value>::call(Nsize, static_cast<__alpha>(alpha), itrX, incX, itrY, incY); } // ================================================================================================ /// Convenient wrapper to call BLAS AXPY from tensor objects template< typename _T, class _TensorX, class _TensorY, class = typename std::enable_if< is_boxtensor<_TensorX>::value & is_boxtensor<_TensorY>::value >::type > void axpy ( const _T& alpha, const _TensorX& X, _TensorY& Y) { typedef typename _TensorX::value_type value_type; static_assert(std::is_same::value, "value type of Y must be the same as that of X"); if (X.empty()) { Y.clear(); return; } if (Y.empty()) { Y.resize(btas::extent(X)); NumericType::fill(std::begin(Y), std::end(Y), NumericType::zero()); } else { assert( range(X) == range(Y) ); } auto itrX = std::begin(X); auto itrY = std::begin(Y); axpy (X.size(), alpha, itrX, 1, itrY, 1); } } // namespace btas #endif // __BTAS_AXPY_IMPL_H BTAS-1.0.0/btas/generic/blas_lapack_delegator.h000066400000000000000000000012351476142407000212470ustar00rootroot00000000000000#pragma once #include namespace btas { struct generic_impl_tag {}; #ifdef BTAS_HAS_BLAS_LAPACK struct blas_lapack_impl_tag {}; template struct blas_lapack_impl_delegator { using tag_type = std::conditional_t< are_blas_lapack_compatible_v<_Iterators...>, blas_lapack_impl_tag, generic_impl_tag >; }; #else template struct blas_lapack_impl_delegator { using tag_type = generic_impl_tag; }; #endif template using blas_lapack_impl_t = typename blas_lapack_impl_delegator<_Iterators...>::tag_type; } BTAS-1.0.0/btas/generic/contract.h000066400000000000000000000147671476142407000166200ustar00rootroot00000000000000#ifndef __BTAS_CONTRACT_H #define __BTAS_CONTRACT_H #include #include #include #include #include #include #include #include #include #include #include namespace btas { /// contract tensors; for example, Cijk = \sum_{m,p} Aimp * Bmjpk /// /// Synopsis: /// enum {j,k,l,m,n,o}; /// /// contract(alpha,A,{m,o,k,n},B,{l,k,j},C,beta,{l,n,m,o,j}); /// /// o j o j /// | | | | /// m - A - k - B = m - C /// | | | | /// n l n l /// /// NOTE: in case of TArray, this performs many unused instances of gemv and gemm depend on tensor rank /// template< typename _T, class _TensorA, class _TensorB, class _TensorC, class _AnnotationA, class _AnnotationB, class _AnnotationC, class = typename std::enable_if< is_boxtensor<_TensorA>::value & is_boxtensor<_TensorB>::value & is_boxtensor<_TensorC>::value & is_container<_AnnotationA>::value & is_container<_AnnotationB>::value & is_container<_AnnotationC>::value >::type > void contract( const _T& alpha, const _TensorA& A, const _AnnotationA& aA, const _TensorB& B, const _AnnotationB& aB, const _T& beta, _TensorC& C, const _AnnotationC& aC) { // Check that the ranks of the tensors match that of the annotation. assert(rank(A) == rank(aA)); assert(rank(B) == rank(aB)); assert(C.empty() || (rank(C) == rank(aC))); // check index A auto __sort_indexA = _AnnotationA{aA}; std::sort(std::begin(__sort_indexA), std::end(__sort_indexA)); assert(std::unique(std::begin(__sort_indexA), std::end(__sort_indexA)) == std::end(__sort_indexA)); // check index B auto __sort_indexB = _AnnotationB{aB}; std::sort(std::begin(__sort_indexB), std::end(__sort_indexB)); assert(std::unique(std::begin(__sort_indexB), std::end(__sort_indexB)) == std::end(__sort_indexB)); // check index C auto __sort_indexC = _AnnotationC{aC}; std::sort(std::begin(__sort_indexC), std::end(__sort_indexC)); assert(std::unique(std::begin(__sort_indexC), std::end(__sort_indexC)) == std::end(__sort_indexC)); typedef btas::DEFAULT::index_type Permutation; // permute index A Permutation __permute_indexA; resize(__permute_indexA, aA.size()); // permute index B Permutation __permute_indexB; resize(__permute_indexB, aB.size()); // permute index C Permutation __permute_indexC; resize(__permute_indexC, aC.size()); size_type m = 0; size_type n = 0; size_type k = 0; // row index for(auto itrA = std::begin(aA); itrA != std::end(aA); ++itrA) { if(!std::binary_search(std::begin(__sort_indexB), std::end(__sort_indexB), *itrA)) { __permute_indexA[m] = *itrA; __permute_indexC[m] = *itrA; ++m; } } // index to be contracted for(auto itrA = std::begin(aA); itrA != std::end(aA); ++itrA) { if( std::binary_search(std::begin(__sort_indexB), std::end(__sort_indexB), *itrA)) { __permute_indexA[m+k] = *itrA; __permute_indexB[k] = *itrA; ++k; } } // column index for(auto itrB = std::begin(aB); itrB != std::end(aB); ++itrB) { if(!std::binary_search(std::begin(__sort_indexA), std::end(__sort_indexA), *itrB)) { __permute_indexB[k+n] = *itrB; __permute_indexC[m+n] = *itrB; ++n; } } // check result index C Permutation __sort_permute_indexC(__permute_indexC); std::sort(std::begin(__sort_permute_indexC), std::end(__sort_permute_indexC)); assert(std::equal(std::begin(__sort_permute_indexC), std::end(__sort_permute_indexC), std::begin(__sort_indexC))); optional_ptr __refA; __refA.set_external(&A); // permute A if necessary if(!std::equal(std::begin(aA), std::end(aA), std::begin(__permute_indexA))) { __refA.set_managed(new _TensorA()); permute(A, aA, const_cast<_TensorA&>(*__refA), __permute_indexA); } optional_ptr __refB; __refB.set_external(&B); // permute B if necessary if(!std::equal(std::begin(aB), std::end(aB), std::begin(__permute_indexB))) { __refB.set_managed(new _TensorB()); permute(B, aB, const_cast<_TensorB&>(*__refB), __permute_indexB); } bool __C_to_permute = false; // to set rank of C if(C.empty()) { Permutation __zero_shape; resize(__zero_shape, m+n); std::fill(std::begin(__zero_shape), std::end(__zero_shape), 0); C.resize(__zero_shape); } optional_ptr<_TensorC> __refC; __refC.set_external(&C); // permute C if necessary if(!std::equal(std::begin(aC), std::end(aC), std::begin(__permute_indexC))) { __refC.set_managed(new _TensorC()); permute(C, aC, *__refC, __permute_indexC); __C_to_permute = true; } // call BLAS functions if(rank(A) == k && rank(B) == k) { assert(false && "dot should be called instead"); } else if(k == 0) { scal(beta, *__refC); ger (alpha, *__refA, *__refB, *__refC); } else if(rank(A) == k) { gemv(blas::Op::Trans, alpha, *__refB, *__refA, beta, *__refC); } else if(rank(B) == k) { gemv(blas::Op::NoTrans, alpha, *__refA, *__refB, beta, *__refC); } else { gemm(blas::Op::NoTrans, blas::Op::NoTrans, alpha, *__refA, *__refB, beta, *__refC); } // permute back if(__C_to_permute) { permute(*__refC, __permute_indexC, C, aC); } } template< typename _T, class _TensorA, class _TensorB, class _TensorC, typename _UA, typename _UB, typename _UC, class = typename std::enable_if< is_tensor<_TensorA>::value & is_tensor<_TensorB>::value & is_tensor<_TensorC>::value & std::is_same::value & std::is_same::value >::type > void contract( const _T& alpha, const _TensorA& A, std::initializer_list<_UA> aA, const _TensorB& B, std::initializer_list<_UB> aB, const _T& beta, _TensorC& C, std::initializer_list<_UC> aC) { contract(alpha, A, btas::DEFAULT::index<_UA>(aA), B, btas::DEFAULT::index<_UB>(aB), beta, C, btas::DEFAULT::index<_UC>(aC) ); } } //namespace btas #endif BTAS-1.0.0/btas/generic/converge_class.h000066400000000000000000000367761476142407000200040ustar00rootroot00000000000000#ifndef BTAS_GENERIC_CONV_BASE_CLASS #define BTAS_GENERIC_CONV_BASE_CLASS #include #include #include #include namespace btas { /** \brief Default class to deciding when the ALS problem is converged Instead of using the change in the loss function \f$ \Delta \| \mathcal{T} - \mathcal{\hat{T}} \| \leq \epsilon \f$ where \f$ \mathcal{\hat{T}} = \sum_{r=1}^R a_1 \circ a_2 \circ \dots \circ a_N \f$ check the difference in the sum of average elements in factor matrices \f$ \sum_n^{ndim} \frac{\|A^{i}_n - A^{i+1}_n\|}{dim(A^{i}_n} \leq \epsilon \f$ **/ template class NormCheck { using ind_t = typename Tensor::range_type::index_type::value_type; using ord_t = typename range_traits::ordinal_type; public: /// constructor for the base convergence test object /// \param[in] tol tolerance for ALS convergence explicit NormCheck(double tol = 1e-3) : tol_(tol) { } ~NormCheck() = default; /// Function to check convergence of the ALS problem /// convergence when \f$ \sum_n^{ndim} \frac{\|A^{i}_n - A^{i+1}_n\|}{dim(A^{i}_n} \leq \epsilon \f$ /// \param[in] btas_factors Current set of factor matrices bool operator () (const std::vector & btas_factors){ size_t ndim = btas_factors.size() - 1; if (prev.empty() || prev[0].size() != btas_factors[0].size()) { prev.clear(); for (size_t i = 0; i < ndim; ++i) { prev.push_back(Tensor(btas_factors[i].range())); prev[i].fill(0.0); } } auto diff = 0.0; rank_ = btas_factors[0].extent(1); for (size_t r = 0; r < ndim; ++r) { ord_t elements = btas_factors[r].size(); auto change = prev[r] - btas_factors[r]; diff += std::sqrt(btas::dot(change, change) / elements); prev[r] = btas_factors[r]; } if (diff < this->tol_) { return true; } return false; } private: double tol_; std::vector prev; // Set of previous factor matrices size_t ndim; // Number of factor matrices ind_t rank_; // Rank of the CP problem }; /** \brief Class used to decide when ALS problem is converged The "fit" is defined as \f$ 1 - \frac{\|X-full(M)\|}{\|X\|} \leq \epsilon\f$ where X is the exact tensor and M is the reconstructed CP tensor. This fit is loosely the proportion of the data described by the CP model, i.e., a fit of 1 is perfect. **/ template class FitCheck{ public: using ind_t = typename Tensor::range_type::index_type::value_type; using dtype = typename Tensor::value_type; using ord_t = typename range_traits::ordinal_type; using RT = real_type_t; using RTensor = rebind_tensor_t; /// constructor for the base convergence test object /// \param[in] tol tolerance for ALS convergence default = 1e-4 explicit FitCheck(double tol = 1e-4): tol_(tol){ } ~FitCheck() = default; /// Function to check convergence of the ALS problem /// convergence when \f$ 1 - \frac{\|X-full(M)\|}{\|X\|} \leq \epsilon \f$ /// \param[in] btas_factors Current set of factor matrices /// \param[in] V Partial grammian matrices (rank x rank matricies from \f$ V^{i} = A^{iT} A^{i} \f$ /// default = std::vector(); bool operator()(const std::vector &btas_factors, const std::vector & V = std::vector()) { if (normT_ < 0.0) BTAS_EXCEPTION("One must set the norm of the reference tensor"); auto n = btas_factors.size() - 2; ord_t size = btas_factors[n].size(); ind_t rank = btas_factors[n].extent(1); auto *ptr_A = btas_factors[n].data(); auto *ptr_MtKRP = MtKRP_.data(); auto lam_ptr = btas_factors[n + 1].data(); dtype iprod = 0.0; for (ord_t i = 0; i < size; ++i) { iprod += *(ptr_MtKRP + i) * btas::impl::conj(*(ptr_A + i)) * *(lam_ptr + i % rank); } double normFactors = norm(btas_factors, V); double normResidual = sqrt(abs(normT_ * normT_ + normFactors * normFactors - 2 * abs(iprod))); double fit = 1. - (normResidual / normT_); double fitChange = abs(fitOld_ - fit); fitOld_ = fit; if (verbose_) { std::cout << MtKRP_.extent(1) << "\t" << iter_ << "\t" << std::setprecision(16) << fit << "\t" << fitChange << std::endl; } if (fitChange < tol_) { converged_num++; if (converged_num == 2) { iter_ = 0; converged_num = 0; final_fit_ = fitOld_; fitOld_ = 1.0; return true; } } ++iter_; return false; } /// Set the norm of the reference tensor T /// \param[in] normT Norm of the reference tensor; void set_norm(double normT){ normT_ = normT; } /// Set the current iteration's matricized tensor times KRP /// \f$ MtKRP = X_{n} * A^{1} \odot A^{2} \odot \dots \odot A^{n-1} \odot A^{n+1} \odot \dots \odot A^{N} \f$ /// Where N is the number of modes in the reference tensor X and \f$X_{n} \f$ is the nth mode /// matricization of X. /// \param[in] MtKRP matricized reference tensor times KRP void set_MtKRP(Tensor & MtKRP){ MtKRP_ = MtKRP; } /// Returns the fit of the CP approximation, \f$ 1 - \frac{\|X - full{M}\|}{\|T\|} \f$ /// from the previous () operator call. /// Where \f$ \hat{T} \f$ is the CP approximation of T /// \param[in] hit_max_iters bool, if CP_ALS strategy didn't converge hit_max_iters = true /// will return fitOld_ and reset the object, else return final_fit_; /// \returns fit of the CP approximation double get_fit(bool hit_max_iters = false){ if(hit_max_iters){ iter_ = 0; converged_num = 0; final_fit_ = fitOld_; fitOld_ = 1.0; } return final_fit_; } /// Option to print fit and change in fit in the () operator call /// \param[in] verb bool which turns off/on fit printing. void verbose(bool verb) { verbose_ = verb; } protected: double tol_; double fitOld_ = 1.0; double normT_ = -1.0; double final_fit_ = 0.0; size_t iter_ = 0; size_t converged_num = 0; Tensor MtKRP_; bool verbose_ = false; /// Function to compute the L2 norm of a tensors computed from the \c btas_factors /// \param[in] btas_factors Current set of factor matrices /// \param[in] V Partial grammian matrices (rank x rank matricies from \f$ V^{i} = A^{iT} A^{i} \f$ double norm(const std::vector &btas_factors, const std::vector & V) { ind_t rank = btas_factors[0].extent(1); auto n = btas_factors.size() - 1; Tensor coeffMat; auto &temp1 = btas_factors[n]; typename Tensor::value_type one = 1.0; ger(one, temp1.conj(), temp1, coeffMat); auto rank2 = rank * (ord_t)rank; Tensor temp(rank, rank); auto *ptr_coeff = coeffMat.data(); if (V.empty()) { for (size_t i = 0; i < n; ++i) { gemm(blas::Op::Trans, blas::Op::NoTrans, 1.0, btas_factors[i].conj(), btas_factors[i], 0.0, temp); auto *ptr_temp = temp.data(); for (ord_t j = 0; j < rank2; ++j) { *(ptr_coeff + j) *= *(ptr_temp + j); } } } else { for (size_t i = 0; i < n; ++i) { auto *ptr_V = V[i].data(); for (ord_t j = 0; j < rank2; ++j) { *(ptr_coeff + j) *= *(ptr_V + j); } } } dtype nrm = 0.0; for (auto &i : coeffMat) { nrm += i; } return sqrt(abs(nrm)); } }; /** \brief Class used to decide when ALS problem is converged The "fit" is defined as \f$ 1 - \frac{\|X_1-full(M_1)\|}{\|X_1\|} - \frac{\|X_2-full(M_2)\|}{\|X_2\|}\leq \epsilon\f$ where \f$ X_1 \f$ and \f$ X_2 \f$ are tensors coupled by a single mode \f$ M_1 \f$ and \f$ M_2 \f$ are the coupled reconstructed CP tensors. This fit is loosely the proportion of the data described by the CP model, i.e., a fit of 1 is perfect. **/ template class CoupledFitCheck { public: using ind_t = typename Tensor::range_type::index_type::value_type; using ord_t = typename range_traits::ordinal_type; /// constructor for the base convergence test object /// \param[in] tol tolerance for ALS convergence explicit CoupledFitCheck(size_t lhs_dims, double tol = 1e-4) : tol_(tol), ndimL_(lhs_dims) { } ~CoupledFitCheck() = default; /// Function to check convergence of the ALS problem /// convergence when \f$ \|T - \frac{\hat{T}^{i+1}_n\|}{dim(A^{i}_n} \leq \epsilon \f$ /// \param[in] btas_factors Current set of factor matrices bool operator () (const std::vector & btas_factors) { if (normTR_ < 0 || normTL_ < 0) BTAS_EXCEPTION("One must set the norm of the reference tensor"); // First KRP (hadamard contract) out the first dimension of MtKRP using the last factor matrix // Need to do this for the right and the left side ord_t contract_size = btas_factors[ndimL_ - 1].extent(0); ord_t rank = btas_factors[0].extent(1); Tensor tempL(rank), tempR(rank); tempL.fill(0.0); tempR.fill(0.0); { auto &A = btas_factors[ndimL_ - 1]; for (ord_t i = 0; i < contract_size; ++i) { auto *ptr_A = A.data() + i * rank; auto *ptr_MtKRP = MtKRPL_.data() + i * rank; for (ord_t r = 0; r < rank; ++r) { *(tempL.data() + r) += *(ptr_A + r) * *(ptr_MtKRP + r); } } } { auto n = btas_factors.size() - 2; contract_size = btas_factors[n].extent(0); auto &A = btas_factors[n]; for (ord_t i = 0; i < contract_size; ++i) { auto *ptr_A = A.data() + i * rank; auto *ptr_MtKRP = MtKRPR_.data() + i * rank; for (ord_t r = 0; r < rank; ++r) { *(tempR.data() + r) += *(ptr_A + r) * *(ptr_MtKRP + r); } } } // Scale the final product by lambdas // These are the innerproducts of left and right tensors with their factors double iprodL = 0.0; double iprodR = 0.0; auto n = btas_factors.size() - 1; { auto * ptr_A = btas_factors[n].data(); auto * ptr_temp = tempL.data(); for (ord_t i = 0; i < rank; ++i) { iprodL += *(ptr_temp + i) * *(ptr_A + i); } } { auto * ptr_A = btas_factors[n].data(); auto * ptr_temp = tempR.data(); for (ord_t i = 0; i < rank; ++i) { iprodR += *(ptr_temp + i) * *(ptr_A + i); } } // Take the inner product of the factors <[[A,B,C..]], [[A,B,C,...]]> std::vector tensors_left; std::vector tensors_right; tensors_left.push_back(btas_factors[0]); tensors_right.push_back(btas_factors[0]); for (size_t i = 1; i < ndimL_; ++i) { tensors_left.push_back(btas_factors[i]); } for (size_t i = ndimL_; i < n + 1; ++i) { tensors_right.push_back(btas_factors[i]); } tensors_left.push_back(btas_factors[n]); double normFactorsL = norm(tensors_left); double normFactorsR = norm(tensors_right); // Find the residual sqrt( + <[[A,B,C...]],[[A,B,C,...]]> - 2 * ) double normResidualL = sqrt(abs(normTL_ * normTL_ + normFactorsL * normFactorsL - 2 * iprodL)); double normResidualR = sqrt(abs(normTR_ * normTR_ + normFactorsR * normFactorsR - 2 * iprodR)); //double fit = 1 - ((normResidualL + normResidualR) / (normTL_ + normTR_)); double fit = 1 - ((normResidualR + normResidualL) / (normTR_ + normTL_)); double fitChange = abs(fitOld_ - fit); fitOld_ = fit; if (verbose_) { std::cout << MtKRPL_.extent(1) << "\t" << iter_ << "\t" << std::setprecision(16) << fit << "\t" << fitChange << std::endl; } if(fitChange < tol_) { iter_ = 0; final_fit_ = fitOld_; fitOld_ = 1.0; return true; } ++iter_; return false; } /// Set the norm of the reference tensors Tleft and Tright. /// \param[in] normTL Norm of the left reference tensor /// \param[in] normTR Norm of the right reference tensor void set_norm(double normTL, double normTR){ normTL_ = normTL; normTR_ = normTR; } /// Set the current iteration's matricized tensor times KRP /// \f$ MtKRP = T_{n} * A^{1} \odot A^{2} \odot \dots \odot A^{n-1} \odot A^{n+1} \odot \dots \odot A^{N} \f$ /// Where N is the number of modes in the left reference tensor Tleft \f$T_{n} \f$ is the nth mode /// matricization of Tleft. /// \param[in] MtKRPl matricized left reference tensor times KRP void set_MtKRPL(Tensor & MtKRPL){ MtKRPL_ = MtKRPL; } /// Set the current iteration's matricized tensor times KRP /// \f$ MtKRP = T_{n} * A^{1} \odot A^{2} \odot \dots \odot A^{n-1} \odot A^{n+1} \odot \dots \odot A^{N} \f$ /// Where N is the number of modes in the right reference tensor Tright \f$T_{n} \f$ is the nth mode /// matricization of Tright. /// \param[in] MtKRPr matricized right reference tensor times KRP void set_MtKRPR(Tensor & MtKRPR){ MtKRPR_ = MtKRPR; } /// Returns the fit of the CP approximation, \f$ 1 - \frac{\|T - \hat{T}\|}{\|T\|} \f$ /// from the previous () operator call /// Where \f$ T = T_{left}^T T_{right} \f$ and \f$ \hat{T} \f$ is the CP approximation of T /// \param[in] hit_max_iters bool, if CP_ALS strategy didn't converge hit_max_iters = true /// will return fitOld_ and reset the object, else return final_fit_; /// \returns fit of the CP approximation double get_fit(bool hit_max_iters = false){ if(hit_max_iters) { iter_ = 0; final_fit_ = fitOld_; fitOld_ = 1.0; } return final_fit_; } // returns the L2 norm of of the tensor generated by the CP // factor matrices. double get_norm(const std::vector & btas_array){ return norm(btas_array); } void verbose(bool verb) { verbose_ = verb; } private: double tol_; double fitOld_ = 1.0; double final_fit_ = 0.0; double normTL_ = -1.0, normTR_ = -1.0; size_t iter_ = 0; Tensor MtKRPL_, MtKRPR_; size_t ndimL_; bool verbose_ = false; /// Function to compute the L2 norm of a tensors computed from the \c btas_factors /// \param[in] btas_factors Current set of factor matrices /// \param[in] V Partial grammian matrices (rank x rank matricies from \f$ V^{i} = A^{iT} A^{i} \f$ double norm(const std::vector & btas_factors) { ord_t rank = btas_factors[0].extent(1); auto n = btas_factors.size() - 1; Tensor coeffMat(rank, rank); auto temp = btas_factors[n]; temp.resize(Range{Range1{rank}, Range1{1}}); gemm(blas::Op::NoTrans, blas::Op::Trans, 1.0, temp, temp, 0.0, coeffMat); auto rank2 = rank * rank; for (size_t i = 0; i < n; ++i) { Tensor temp(rank, rank); gemm(blas::Op::Trans, blas::Op::NoTrans, 1.0, btas_factors[i], btas_factors[i], 0.0, temp); auto *ptr_coeff = coeffMat.data(); auto *ptr_temp = temp.data(); for (ord_t j = 0; j < rank2; ++j) { *(ptr_coeff + j) *= *(ptr_temp + j); } } auto nrm = 0.0; for(auto & i: coeffMat){ nrm += i; } return sqrt(abs(nrm)); } }; } //namespace btas #endif // BTAS_GENERIC_CONV_BASE_CLASS BTAS-1.0.0/btas/generic/core_contract.h000066400000000000000000000040251476142407000176120ustar00rootroot00000000000000#ifndef BTAS_CORE_CONTRACT_H #define BTAS_CORE_CONTRACT_H #include #include namespace btas { /// Function used by Tucker and Randomized compression. /// Takes an order-N tensor swaps the mode of interest, \c mode, /// to the front and contracts it with a rank reducing /// factor matrix, \c Q, discovered by Tucker or Randomized decomposition methods. /// \param[in, out] A The order-N tensor to be contracted with Q /// \param[in] Q Factor matrix to be contracted with mode \c mode of \c A /// \param[in] mode Mode of A to be contracted with Q /// \param[in] transpose Is Q transposed in the matrix/tensor contraction? /// Default value = true. template void core_contract(Tensor &A, const Tensor &Q, size_t mode, bool transpose = true) { using btas::Range; using ord_t = typename range_traits::ordinal_type; size_t ndim = A.rank(); // Reorder A so contraction of nth mode will be in the front swap_to_first(A, mode, false, false); std::vector temp_dims, A_indices, Q_indicies; // Allocate the appropriate memory for the resulting tensor temp_dims.push_back((transpose) ? Q.extent(1) : Q.extent(0)); for (size_t i = 1; i < ndim; i++) temp_dims.push_back(A.extent(i)); Tensor temp(Range{temp_dims}); temp_dims.clear(); // Build index vectors to contract over the first index of A and // The correct index of Q depending if transpose == true. Q_indicies.push_back((transpose) ? 0 : ndim); Q_indicies.push_back((transpose) ? ndim : 0); temp_dims.push_back(ndim); A_indices.push_back(0); for (size_t i = 1; i < ndim; i++) { A_indices.push_back(i); temp_dims.push_back(i); } // contract Q^(T?) (x)_n A = temp; contract(1.0, Q, Q_indicies, A, A_indices, 0.0, temp, temp_dims); // A is now the (smaller) contracted tensor temp A = temp; // Reorder A as it was before contraction swap_to_first(A, mode, true, false); } } // namespace btas #endif // BTAS_CORE_CONTRACT_H BTAS-1.0.0/btas/generic/coupled_cp_als.h000066400000000000000000000637671476142407000177630ustar00rootroot00000000000000// // Created by Karl Pierce on 7/24/19. // #ifndef BTAS_GENERIC_COUPLED_CP_ALS_H #define BTAS_GENERIC_COUPLED_CP_ALS_H #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace btas{ /** \brief Computes the Canonical Product (CP) decomposition of two order-N tensors using the loss function \f$ = \|B - \hat{B}\| + \|Z - \hat{Z}\| \f$ where \f$ B \f$ and \f$ Z \f$ have a coupled dimension \f$ B \in \bf{R}^{X \times \dots} \f$ and \f$ Z \in \bf{R}^{X \times \dots} \f$ and thus share a factor matrix Decomposition optimization will use alternating least squares (ALS). \warning this code takes a non-const reference \c tensor_ref but does not modify the values. This is a result of API (reshape needs non-const tensor) Synopsis: \code // Constructors COUPLED_CP_ALS A(B, Z) // COUPLED_CP_ALS object with empty factor // matrices and no symmetries COUPLED_CP_ALS A(B, Z, symms) // COUPLED_CP_ALS object with empty factor // matrices and symmetries // Operations A.compute_rank(rank, converge_test) // Computes the CP_ALS of tensors to // rank, rank build and HOSVD options A.compute_rank_random(rank, converge_test) // Computes the CP_ALS of tensors to // rank. Factor matrices built at rank // with random numbers A.compute_error(converge_test, omega) // Computes the CP_ALS of tensors to // 2-norm // error < omega. A.compute_geometric(rank, converge_test, step) // Computes CP_ALS of tensors to // rank with // geometric steps of step between // guesses. A.compute_PALS(converge_test) // Not yet implemented. // computes CP_ALS of tensors to // rank = 3 * max_dim(tensor) // in 4 panels using a modified // HOSVD initial guess //See documentation for full range of options // Accessing Factor Matrices A.get_factor_matrices() // Returns a vector of factor matrices, if // they have been computed A.reconstruct() // Returns the tensor T computed using the // CP factor matrices \endcode */ template > class COUPLED_CP_ALS : public CP { public: using CP::A; using CP::ndim; using CP::normCol; using CP::generate_KRP; using CP::generate_V; using CP::norm; using CP::symmetries; using typename CP::ind_t; using typename CP::ord_t; /// Create a COUPLED CP ALS object, child class of the CP object /// that stores the reference tensors. /// Reference tensor has no symmetries. /// \param[in] left the reference tensor, \f$ B\ f$ to be decomposed. /// \param[in] right the reference tensor, \f$ Z \f$ to be decomposed. COUPLED_CP_ALS(Tensor& left, Tensor& right) : CP(left.rank() + right.rank() - 1), tensor_ref_left(left), tensor_ref_right(right), ndimL(left.rank()) { for (size_t i = 0; i < ndim; ++i) { symmetries.push_back(i); } } /// Create a CP ALS object, child class of the CP object /// that stores the reference tensors. /// Reference tensor has symmetries. /// Symmetries should be set such that the higher modes index /// are set equal to lower mode indices (a 4th order tensor, /// where the second & third modes are equal would have a /// symmetries of {0,1,1,3} /// \param[in] left the reference tensor, \f$ B \f$ to be decomposed. /// \param[in] right the reference tensor, \f$ Z \f$ to be decomposed. /// \param[in] symms the symmetries of the reference tensor. COUPLED_CP_ALS(Tensor &left, Tensor &right, std::vector &symms) : CP(left.rank() + right.rank()), tensor_ref_left(left), tensor_ref_right(right), ndimL(left.rank()) { symmetries = symms; for (size_t i = 0; i < ndim; ++i) { if (symmetries[i] > i) BTAS_EXCEPTION("Symmetries should always refer to factors at earlier positions"); } if (symmetries.size() != ndim) BTAS_EXCEPTION( "Tensor describing symmetries must be defined for all dimensions"); } /// \brief Computes decomposition of the order-N tensor \c tensor /// with rank = \c RankStep * \c panels * max_dim(reference_tensor) + max_dim(reference_tensor) /// Initial guess for factor matrices start at rank = max_dim(reference_tensor) /// and builds rank \c panel times by \c RankStep * max_dim(reference_tensor) increments /// \param[in, out] converge_list Tests to see if ALS is converged, holds the value of fit. /// should be as many tests as there are panels /// \param[in] RankStep CP_ALS increment of the panel /// \param[in] panels number of times the rank will be built /// \param[in] /// max_als Max number of iterations allowed to converge the ALS approximation default = 1e4 /// \param[in] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// default = true /// \param[in] /// calculate_epsilon Should the 2-norm error be calculated \f$ ||T_{\rm exact} - /// T_{\rm approx}|| = \epsilon. \f$ Default = false. /// \param[in] direct Should the CP decomposition be computed without /// calculating the Khatri-Rao product? Default = true. /// \return if ConvClass = FitCheck, returns the fit as defined by fitcheck /// else if calculate_epsilon = true, returns 2-norm error between exact and approximate tensor /// else return -1 double compute_PALS(std::vector &converge_list, double RankStep = 0.5, size_t panels = 4, int max_als = 20, bool fast_pI = false, bool calculate_epsilon = false, bool direct = true) override { BTAS_EXCEPTION("Function not yet implemented"); } protected: Tensor &tensor_ref_left; // Tensor in first term of the loss function Tensor &tensor_ref_right; // Tensor in second term of the loss function size_t ndimL; // Number of dimensions the left tensor has /// Creates an initial guess by computing the SVD of each mode /// If the rank of the mode is smaller than the CP rank requested /// The rest of the factor matrix is filled with random numbers /// Builds factor matricies starting with R=(1 or SVD_rank) /// and moves to R = \c rank /// incrementing column dimension, R, by \c step /// \param[in] rank The rank of the CP decomposition. /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. /// \param[in] direct The CP decomposition be computed without calculating the /// Khatri-Rao product? /// \param[in] max_als If CP decomposition is to finite /// error, max_als is the highest rank approximation computed before giving up /// on CP-ALS. /// \param[in] calculate_epsilon Should the 2-norm /// error be calculated \f$ ||T_{\rm exact} - T_{\rm approx}|| = \epsilon \f$ . /// \param[in] step /// CP_ALS built from r =1 to r = rank. r increments by step. /// \param[in, out] epsilon The 2-norm /// error between the exact and approximated reference tensor /// \param[in] SVD_initial_guess build inital guess from left singular vectors /// \param[in] SVD_rank rank of the initial guess using left singular vector /// \param[in,out] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// return if \c fast_pI was sucessful. // TODO make use of symmetries in this function void build(ind_t rank, ConvClass &converge_test, bool direct, ind_t max_als, bool calculate_epsilon, ind_t step, double &epsilon, bool SVD_initial_guess, ind_t SVD_rank, bool &fast_pI) override { // If its the first time into build and SVD_initial_guess // build and optimize the initial guess based on the left // singular vectors of the reference tensor. if (A.empty() && SVD_initial_guess) { if (SVD_rank == 0) BTAS_EXCEPTION("Must specify the rank of the initial approximation using SVD"); std::vector modes_w_dim_LT_svd; A = std::vector(ndim); // Determine which factor matrices one can fill using SVD initial guess // start with left then do right size_t ndimR = tensor_ref_right.rank(); for (size_t i = 0; i < ndimL; i++) { if (tensor_ref_left.extent(i) < SVD_rank) { modes_w_dim_LT_svd.push_back(i); } } for (size_t i = 1; i < ndimR; i++) { if (tensor_ref_right.extent(i) < SVD_rank) { modes_w_dim_LT_svd.push_back(i + ndimL - 1); } } for (size_t tensor = 0; tensor < 2; ++tensor) { auto &tensor_ref = tensor == 0 ? tensor_ref_left : tensor_ref_right; size_t ndim_curr = tensor_ref.rank(); // Fill all factor matrices with their singular vectors, // because we contract X X^T (where X is reference tensor) to make finding // singular vectors an eigenvalue problem some factor matrices will not be // full rank; bool left = tensor == 0; for (size_t i = left ? 0 : 1; i < ndim_curr; i++) { ind_t R = tensor_ref.extent(i); Tensor S(R, R), lambda(R); // Contract refrence tensor to make it square matrix of mode i gemm(blas::Op::NoTrans, blas::Op::Trans, 1.0, flatten(tensor_ref, i), flatten(tensor_ref, i), 0.0, S); // Find the Singular vectors of the matrix using eigenvalue decomposition eigenvalue_decomp(S, lambda); // Fill a factor matrix with the singular vectors with the largest corresponding singular // values lambda = Tensor(R, SVD_rank); lambda.fill(0.0); auto lower_bound = {0,0}; auto upper_bound = {R, ((R > SVD_rank) ? SVD_rank : R)}; auto view = make_view(S.range().slice(lower_bound, upper_bound), S.storage()); auto l_iter = lambda.begin(); for(auto iter = view.begin(); iter != view.end(); ++iter, ++l_iter){ *(l_iter) = *(iter); } size_t A_dim = left ? i : i + ndimL - 1; A[A_dim] = lambda; } } boost::random::mt19937 generator(random_seed_accessor()); boost::random::uniform_real_distribution<> distribution(-1.0, 1.0); // Fill the remaining columns in the set of factor matrices with dimension < SVD_rank with random numbers for(auto& i: modes_w_dim_LT_svd) { size_t dim = i < ndimL ? i : i - ndimL + 1; auto &tensor_ref = i < ndimL ? tensor_ref_left : tensor_ref_right; ind_t R = tensor_ref.extent(dim), zero = 0; auto lower_bound = {zero, R}; auto upper_bound = {R, SVD_rank}; auto view = make_view(A[i].range().slice(lower_bound, upper_bound), A[i].storage()); for (auto iter = view.begin(); iter != view.end(); ++iter) { *(iter) = distribution(generator); } } // Normalize the columns of the factor matrices and // set the values al lambda, the weigt of each order 1 tensor Tensor lambda(Range{Range1{SVD_rank}}); A.push_back(lambda); for (size_t i = 0; i < ndim; ++i) { this->normCol(A[i]); } // Optimize this initial guess. ALS(SVD_rank, converge_test, direct, max_als, calculate_epsilon, epsilon, fast_pI); } // This loop keeps track of column dimension for (ind_t i = (A.empty()) ? 0 : A.at(0).extent(1); i < rank; i += step) { ind_t rank_new = i + 1; // This loop walks through the factor matrices for (size_t j = 0; j < ndim; ++j) { // select a factor matrix // If no factor matrices exists, make a set of factor matrices // and fill them with random numbers that are column normalized // and create the weighting vector lambda auto left = (j < ndimL); auto &tensor_ref = left ? tensor_ref_left : tensor_ref_right; if (i == 0) { Tensor a(Range{tensor_ref.range().range((left ? j : j - ndimL + 1)), Range1{rank_new}}); fill_random(a); A.push_back(a); this->normCol(j); } // If the factor matrices have memory allocated, rebuild each matrix // with new column dimension col_dimension_old + skip // fill the new columns with random numbers and normalize the columns else { ind_t row_extent = A[0].extent(0), rank_old = A[0].extent(1), zero = 0; Tensor b(Range{A[0].range().range(0), Range1{rank_new}}); { auto lower_old = {zero, zero}, upper_old = {row_extent, rank_old}; auto old_view = make_view(b.range().slice(lower_old, upper_old), b.storage()); auto A_itr = A[0].begin(); for(auto iter = old_view.begin(); iter != old_view.end(); ++iter, ++A_itr){ *(iter) = *(A_itr); } } { auto lower_new = {zero, rank_old}, upper_new = {row_extent, rank_new}; auto new_view = make_view(b.range().slice(lower_new, upper_new), b.storage()); boost::random::mt19937 generator(random_seed_accessor()); boost::random::uniform_real_distribution<> distribution(-1.0, 1.0); for(auto iter = new_view.begin(); iter != new_view.end(); ++iter){ *(iter) = distribution(generator); } } A.erase(A.begin()); A.push_back(b); if (j + 1 == ndim) { A.erase(A.begin()); } } } { Tensor lam(Range{Range1{rank_new}}); A.push_back(lam); } // compute the ALS of factor matrices with rank = i + 1. ALS(rank_new, converge_test, direct, max_als, calculate_epsilon, epsilon, fast_pI); } } /// Create a rank \c rank initial guess using /// random numbers from a uniform distribution /// \param[in] rank The rank of the CP decomposition. /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. /// \param[in] direct The CP decomposition be computed without calculating the /// Khatri-Rao product? /// \param[in] max_als If CP decomposition is to finite /// error, max_als is the highest rank approximation computed before giving up /// on CP-ALS. /// \param[in] calculate_epsilon Should the 2-norm /// error be calculated \f$ ||T_{\rm exact} - T_{\rm approx}|| = \epsilon \f$ . /// \param[in] step /// CP_ALS built from r =1 to r = rank. r increments by step. /// \param[in, out] epsilon The 2-norm /// error between the exact and approximated reference tensor /// \param[in] SVD_initial_guess build inital guess from left singular vectors /// \param[in] SVD_rank rank of the initial guess using left singular vector /// \param[in,out] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// return if \c fast_pI was sucessful void build_random(ind_t rank, ConvClass &converge_test, bool direct, ind_t max_als, bool calculate_epsilon, double &epsilon, bool &fast_pI) override { BTAS_EXCEPTION("Function not yet implemented"); } /// performs the ALS method to minimize the loss function for a single rank /// \param[in] rank The rank of the CP decomposition. /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. /// \param[in] dir The CP decomposition be computed without calculating the /// Khatri-Rao product? /// \param[in] max_als If CP decomposition is to finite /// error, max_als is the highest rank approximation computed before giving up /// on CP-ALS. Default = 1e5. /// \param[in] calculate_epsilon Should the 2-norm /// error be calculated \f$ ||T_{\rm exact} - T_{\rm approx}|| = \epsilon \f$. /// \param[in] tcutALS /// How small difference in factor matrices must be to consider ALS of a /// single rank converged. Default = 0.1. /// \param[in, out] epsilon The 2-norm /// error between the exact and approximated reference tensor /// \param[in,out] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// return if \c fast_pI was successful. void ALS(ind_t rank, ConvClass &converge_test, bool dir, unsigned int max_als, bool calculate_epsilon, double &epsilon, bool &fast_pI) { size_t count = 0; // Until either the initial guess is converged or it runs out of iterations // update the factor matrices with or without Khatri-Rao product // intermediate bool is_converged = false; bool matlab = fast_pI; while (count < max_als && !is_converged) { count++; this->num_ALS++; for (size_t i = 0; i < ndim; i++) { auto tmp = symmetries[i]; if (tmp == i) { direct(i, rank, fast_pI, matlab, converge_test); } else { A[i] = A[tmp]; } } is_converged = converge_test(A); } detail::get_fit(converge_test, epsilon, (this->num_ALS == max_als)); epsilon = 1.0 - epsilon; } /// Computes an optimized factor matrix holding all others constant. /// No Khatri-Rao product computed, immediate contraction /// Does this by first contracting a factor matrix with the refrence tensor /// Then computes hadamard/contraction products along all other modes except n. /// In this we are minimizing a sum of two norms. If n = coupled dimension then we have to minimize /// f = ||B^{X}_{abcd...} - \hat{B}^X_{abcd...} || + || B^{X}_{ijkl...} - \hat{B}^X_{ijkl...}|| /// where X is the coupled dimension. otherwise we just minimize one of the two terms. /// \param[in] n The mode being optimized, all other modes held constant /// \param[in] rank The current rank, column dimension of the factor matrices /// \param[in,out] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// return if computing the fast_pI was successful. /// \param[in, out] matlab If \c fast_pI = true then try to solve VA = B instead of taking pseudoinverse /// in the same manner that matlab would compute the inverse. /// return if \matlab was successful /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. void direct(size_t n, ind_t rank, bool &fast_pI, bool &matlab, ConvClass &converge_test) { if (n == 0) { // Start by computing (B^{X}_{abcd...} C^{-X}_{abcd...}) + B^{X}_{ijkl...} C^{-X}_{ijkl...}) = K // where C^{-X}_{abcd...} = C^{a} \odot C^{b} \odot C^{c} \odot C^{d} \dots ( the khatri-rao // product without the factor matrix C^X ind_t coupled_dim = tensor_ref_left.extent(0); Tensor K(coupled_dim, rank); K.fill(0.0); for (int tensor = 0; tensor < 2; ++tensor) { auto left = tensor == 0; auto &tensor_ref = left ? tensor_ref_left : tensor_ref_right; size_t ndim_curr = tensor_ref.rank(), A_dim = left ? ndimL - 1 : this->ndim - 1, contract_size = tensor_ref.extent(ndim_curr - 1), LHSsize = tensor_ref.size() / contract_size; auto R = tensor_ref.range(); Tensor contract_tensor(LHSsize, rank); tensor_ref.resize(Range{Range1{LHSsize}, Range1{contract_size}}); gemm(blas::Op::NoTrans, blas::Op::NoTrans, 1.0, tensor_ref, A[A_dim], 0.0, contract_tensor); tensor_ref.resize(R); --A_dim; for (size_t contract_dim = ndim_curr - 2; contract_dim > 0; --contract_dim, --A_dim) { contract_size = tensor_ref.extent(contract_dim); LHSsize /= contract_size; contract_tensor.resize(Range{Range1{LHSsize}, Range1{contract_size}, Range1{rank}}); Tensor temp(LHSsize, rank); temp.fill(0.0); const auto &a = A[A_dim]; for (ord_t i = 0; i < LHSsize; ++i) { for (ord_t k = 0; k < contract_size; ++k) { for (ord_t r = 0; r < rank; ++r) { temp(i, r) += contract_tensor(i, k, r) * a(k, r); } } } contract_tensor = temp; } K += contract_tensor; } // Next form the Hadamard product sum // J = (C^{a\quadT} C^a * C^{b\quadT} C^b * \dots + C^{i\quadT} C^i * C^{j\quadT} C^j + \dots Tensor J1(rank, rank); J1.fill(1.0); auto rank2 = rank * (ord_t) rank; { for (size_t i = 1; i < ndimL; ++i) { Tensor temp(rank, rank); gemm(blas::Op::Trans, blas::Op::NoTrans, 1.0, A[i], A[i], 0.0, temp); for (ord_t j = 0; j < rank2; ++j) { *(J1.data() + j) *= *(temp.data() + j); } } Tensor J2(rank, rank); J2.fill(1.0); for (size_t i = ndimL; i < ndim; ++i) { Tensor temp(rank, rank); gemm(blas::Op::Trans, blas::Op::NoTrans, 1.0, A[i], A[i], 0.0, temp); for (ord_t j = 0; j < rank2; ++j) { *(J2.data() + j) *= *(temp.data() + j); } } J1 += J2; } // Finally Form the product of K * J^\dagger Tensor a0(coupled_dim, rank); gemm(blas::Op::NoTrans, blas::Op::NoTrans, 1.0, K, pseudoInverse(J1, fast_pI), 0.0, a0); this->normCol(a0); A[0] = a0; } else { bool left = n < ndimL; Tensor &tensor_ref = left ? tensor_ref_left : tensor_ref_right; size_t ndim_curr = tensor_ref.rank(), A_dim = 0, contract_size = tensor_ref.extent(0), LHSsize = tensor_ref.size() / contract_size, pseudo_rank = rank, skip_dim = A[n].extent(0); auto R = tensor_ref.range(); tensor_ref.resize(Range{Range1{contract_size}, Range1{LHSsize}}); Tensor contract_tensor(LHSsize, rank); gemm(blas::Op::Trans, blas::Op::NoTrans, 1.0, tensor_ref, A[A_dim], 0.0, contract_tensor); tensor_ref.resize(R); A_dim = left ? ndimL - 1 : ndim - 1; // TODO Use pointer arithmetic here instead of () operator for (size_t contract_dim = ndim_curr - 1; contract_dim > 0; --contract_dim, --A_dim) { contract_size = tensor_ref.extent(contract_dim); LHSsize /= contract_size; contract_tensor.resize(Range{Range1{LHSsize}, Range1{contract_size}, Range1{pseudo_rank}}); const auto &currA = A[A_dim]; if (A_dim == n) { pseudo_rank *= contract_size; } else if (A_dim > n) { Tensor temp(LHSsize, pseudo_rank); temp.fill(0.0); for (ord_t i = 0; i < LHSsize; ++i) { for (ord_t j = 0; j < contract_size; ++j) { for (ord_t r = 0; r < rank; ++r) { temp(i, r) += contract_tensor(i, j, r) * currA(j, r); } } } contract_tensor = temp; } else { Tensor temp(LHSsize, pseudo_rank); temp.fill(0.0); for (ord_t i = 0; i < LHSsize; ++i) { for (ord_t j = 0; j < contract_size; ++j) { for (ord_t k = 0; k < skip_dim; ++k) { for (ord_t r = 0; r < rank; ++r) { temp(i, r + k * rank) += contract_tensor(i, j, r + k * rank) * currA(j, r); } } } } contract_tensor = temp; } } contract_tensor.resize(Range{Range1{skip_dim}, Range1{rank}}); Tensor G(rank, rank); gemm(blas::Op::Trans, blas::Op::NoTrans, 1.0, A[0], A[0], 0.0, G); auto rank2 = rank * (ord_t) rank; for (size_t i = (left ? 1 : ndimL); i < (left ? ndimL : ndim); ++i) { if (i != n) { Tensor temp(rank, rank); gemm(blas::Op::Trans, blas::Op::NoTrans, 1.0, A[i], A[i], 0.0, temp); for (ord_t j = 0; j < rank2; ++j) { *(G.data() + j) *= *(temp.data() + j); } } } if (n == ndimL - 1) detail::set_MtKRPL(converge_test, contract_tensor); else if(n == this->ndim - 1) detail::set_MtKRPR(converge_test, contract_tensor); Tensor an(skip_dim, rank); gemm(blas::Op::NoTrans, blas::Op::NoTrans, 1.0, contract_tensor, pseudoInverse(G, fast_pI), 0.0, an); this->normCol(an); A[n] = an; } } }; } // namespace btas #endif //BTAS_GENERIC_COUPLED_CP_ALS_H BTAS-1.0.0/btas/generic/cp.h000066400000000000000000000656541476142407000154060ustar00rootroot00000000000000// // Created by Karl Pierce on 2/25/19. // #ifndef BTAS_GENERIC_CP_H #define BTAS_GENERIC_CP_H #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace btas { namespace detail { // Functions that set the value of the original tensor // times the factor matrices (excluding one factor) // if the converge_class isn't a FitCheck do nothing template void set_MtKRP(T &t, Tensor &tensor) { return; } template void set_MtKRP(FitCheck &t, Tensor &tensor) { t.set_MtKRP(tensor); } template void set_MtKRPL(CoupledFitCheck &t, Tensor &tensor) { t.set_MtKRPL(tensor); } template void set_MtKRPR(CoupledFitCheck &t, Tensor &tensor) { t.set_MtKRPR(tensor); } // Functions that can get the fit \|X - \hat{X}\|_F where // \hat{X} is the CP approximation (epsilon), if // converge_class object isn't FitCheck do nothing template void get_fit(T &t, double &epsilon) { // epsilon = epsilon; epsilon = -1; return; } template void get_fit(FitCheck &t, double &epsilon, bool max_iter = false) { epsilon = t.get_fit(max_iter); return; } template void get_fit(CoupledFitCheck &t, double &epsilon, bool max_iter = false) { epsilon = t.get_fit(max_iter); return; } template void set_norm(T &t, double norm){ return; } template void set_norm(FitCheck &t, double norm){ t.set_norm(norm); } } // namespace detail /** \brief Base class to compute the Canonical Product (CP) decomposition of an order-N tensor. This is a virtual class and is constructed by its children to compute the CP decomposition using some type of solver. Synopsis: \code // Constructors CP A(ndim) // CP_ALS object with empty factor matrices // Operations A.compute_rank(rank, converge_test) // Calls virtual build function // to decompose to a specific rank // has HOSVD option A.compute_rank_random(rank, converge_test) // Calls virtual build_random function // to decompose at specific rank, // no HOSVD option A.compute_error(converge_test, omega) // Calls the virtual build function to // compute the CP decomposition to a 2-norm // error < omega. A.compute_geometric(rank, converge_test, step) // Calls the virtual build function to // compute CP decomposition with rank that // grows in geometric steps A.paneled_tucker_build(converge_test) // computes CP_ALS of tensor to // rank = 2 * max_dim(tensor) // in 4 panels using a modified // HOSVD initial guess //See documentation for full range of options // Accessing Factor Matrices A.get_factor_matrices() // Returns a vector of factor matrices, if // they have been computed A.reconstruct() // Returns the tensor computed using the // CP factor matrices \endcode */ template class CP { public: using ind_t = typename Tensor::range_type::index_type::value_type; using dtype = typename Tensor::numeric_type; using ord_t = typename range_traits::ordinal_type; /// Create a generic CP object that stores the factor matrices, /// the number of iterations and the number of dimensions of the original /// tensor /// \param[in] dims number of modes in the reference tensor. CP(size_t dims) : num_ALS(0), ndim(dims) { } ~CP() = default; /// Computes decomposition of the order-N tensor \c tensor /// with CP rank = \c rank . /// Initial guess for factor matrices start at rank = ( 1 or \c SVD_rank) /// and build to rank = \c rank by increments of \c step, to minimize /// error. /// \param[in] rank The rank of the CP decomposition. /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. /// \param[in] step CP_ALS built /// from r =1 to r = \c rank. r increments by \c step; default = 1. /// \param[in] SVD_initial_guess Should the initial factor matrices be /// approximated with left singular values? default = false /// \param[in] SVD_rank if \c /// SVD_initial_guess is true specify the rank of the initial guess such that /// SVD_rank <= rank. default = 0 /// \param[in] max_als Max number of iterations allowed to converge the ALS approximation default = 1e4 /// \param[in] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// default = true /// \param[in] calculate_epsilon Should the 2-norm error be calculated /// \f$ ||T_{\rm exact} - T_{\rm approx}|| = \epsilon. \f$ Default = false. /// \param[in] direct Should the CP decomposition be computed without /// calculating the Khatri-Rao product? Default = true. /// \return if ConvClass = FitCheck, returns the fit as defined by fitcheck /// else if calculate_epsilon = true, returns 2-norm error between exact and approximate tensor /// else return -1 double compute_rank(ind_t rank, ConvClass &converge_test, ind_t step = 1, bool SVD_initial_guess = false, ind_t SVD_rank = 0, ind_t max_als = 1e4, bool fast_pI = true, bool calculate_epsilon = false, bool direct = true) { if (rank <= 0) BTAS_EXCEPTION("Decomposition rank must be greater than 0"); if (SVD_initial_guess && SVD_rank > rank) BTAS_EXCEPTION("Initial guess is larger than the desired CP rank"); double epsilon = -1.0; build(rank, converge_test, direct, max_als, calculate_epsilon, step, epsilon, SVD_initial_guess, SVD_rank, fast_pI); // std::cout << "Number of ALS iterations performed: " << num_ALS << std::endl; //detail::get_fit(converge_test, epsilon); return epsilon; } /// Computes decomposition of the order-N tensor \c tensor /// with CP rank = \c rank factors initialized to rank \c rank /// using random numbers. /// \param[in] rank The rank of the CP decomposition. /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. /// \param[in] /// max_als Max number of iterations allowed to converge the ALS approximation default = 1e4 /// \param[in] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// default = true /// \param[in] /// calculate_epsilon Should the 2-norm error be calculated \f$ ||T_{\rm exact} - /// T_{\rm approx}|| = \epsilon. \f$ Default = false. /// \param[in] direct Should the CP decomposition be computed without /// calculating the Khatri-Rao product? Default = true. /// \return if ConvClass = FitCheck, returns the fit as defined by fitcheck /// else if calculate_epsilon = true, returns 2-norm error between exact and approximate tensor /// else return -1 double compute_rank_random(ind_t rank, ConvClass &converge_test, ind_t max_als = 1e4, bool fast_pI = true, bool calculate_epsilon = false, bool direct = true) { if (rank <= 0) BTAS_EXCEPTION("Decomposition rank must be greater than 0"); double epsilon = -1.0; build_random(rank, converge_test, direct, max_als, calculate_epsilon, epsilon, fast_pI); // std::cout << "Number of ALS iterations performed: " << num_ALS << std::endl; //detail::get_fit(converge_test, epsilon); return epsilon; } /// Computes the decomposition of the order-N tensor \c tensor /// to \f$ rank \leq \f$ \c max_als such that /// \f[ || T_{\rm exact} - T_{\rm approx}||_F = \epsilon \leq tcutCP \f] /// with rank incrementing by \c step. /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. /// \param[in] tcutCP How small \f$\epsilon\f$ must be to consider the CP /// decomposition converged. Default = 1e-2. /// \param[in] step CP_ALS built from r =1 to r = \c rank. r /// increments by \c step; default = 1. /// \param[in] max_rank The highest rank /// approximation computed before giving up on CP-ALS. Default = 1e5. /// \param[in] SVD_initial_guess Should the initial factor matrices be /// approximated with left singular values? default = false /// \param[in] SVD_rank if \c /// SVD_initial_guess is true specify the rank of the initial guess such that /// SVD_rank <= rank. default = 0 /// \param[in] max_als Max number of iterations allowed to converge the ALS /// approximation default = 1e4 /// \param[in] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// default = true /// \param[in] direct Should the /// CP decomposition be computed without calculating the /// Khatri-Rao product? Default = true. /// \return if ConvClass = FitCheck, returns the fit as defined by fitcheck /// else if calculate_epsilon = true, returns 2-norm error between exact and approximate tensor /// else return -1 double compute_error(ConvClass &converge_test, double tcutCP = 1e-2, ind_t step = 1, ind_t max_rank = 1e5, bool SVD_initial_guess = false, ind_t SVD_rank = 0, ind_t max_als = 1e4, bool fast_pI = true, bool direct = true) { ind_t rank = (A.empty()) ? ((SVD_initial_guess) ? SVD_rank : 1) : A[0].extent(0); double epsilon = tcutCP + 1; while (epsilon > tcutCP && rank <= max_rank) { build(rank, converge_test, direct, max_als, true, step, epsilon, SVD_initial_guess, SVD_rank, fast_pI); rank += step; } // detail::get_fit(converge_test, epsilon); return epsilon; } /// Computes decomposition of the order-N tensor \c tensor /// with \f$ CP rank \leq \f$ \c desired_rank \n /// Initial guess for factor matrices start at rank = 1 /// and build to rank = \c rank by geometric steps of \c geometric_step, to /// minimize error. /// \param[in] desired_rank Rank of CP decomposition, r, will build by /// geometric step until \f$ r \leq \f$ \c desired_rank. /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. /// \param[in] geometric_step CP_ALS built from r =1 to r = \c rank. r increments by r *= /// \c geometric_step; default = 2. /// \param[in] SVD_initial_guess Should the initial factor matrices be /// approximated with left singular values? default = false /// \param[in] SVD_rank if \c /// SVD_initial_guess is true specify the rank of the initial guess such that /// SVD_rank <= rank. default = 0 /// \param[in] max_als Max number of iterations allowed to /// converge the ALS approximation. default = 1e4 /// \param[in] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// default = true /// \param[in] calculate_epsilon Should the /// 2-norm error be calculated \f$ ||T_{\rm exact} - T_{\rm approx}|| = \epsilon \f$. /// Default = false. /// \param[in] direct Should the CP /// decomposition be computed without calculating the Khatri-Rao product? /// Default = true. /// \return if ConvClass = FitCheck, returns the fit as defined by fitcheck /// else if calculate_epsilon = true, returns 2-norm error between exact and approximate tensor /// else return -1 double compute_geometric(ind_t desired_rank, ConvClass &converge_test, ind_t geometric_step = 2, bool SVD_initial_guess = false, ind_t SVD_rank = 0, ind_t max_als = 1e4, bool fast_pI = true, bool calculate_epsilon = false, bool direct = true) { if (geometric_step <= 0) { BTAS_EXCEPTION("The step size must be larger than 0"); } if (SVD_initial_guess && SVD_rank > desired_rank) { BTAS_EXCEPTION("Initial guess is larger than desired CP rank"); } double epsilon = -1.0; ind_t rank = (SVD_initial_guess) ? SVD_rank : 1; while (rank <= desired_rank && rank < max_als) { build(rank, converge_test, direct, max_als, calculate_epsilon, geometric_step, epsilon, SVD_initial_guess, SVD_rank, fast_pI); if (geometric_step <= 1) rank++; else rank *= geometric_step; } // detail::get_fit(converge_test, epsilon); return epsilon; } /// virtual function implemented in solver /// Computes decomposition of the order-N tensor \c tensor /// with rank = \c RankStep * \c panels * max_dim(reference_tensor) + max_dim(reference_tensor) /// Initial guess for factor matrices start at rank = max_dim(reference_tensor) /// and builds rank \c panel times by \c RankStep * max_dim(reference_tensor) increments /// \param[in, out] converge_list Tests to see if ALS is converged, holds the value of fit. /// should be as many tests as there are panels /// \param[in] RankStep CP_ALS increment of the panel /// \param[in] panels number of times the rank will be built /// \param[in] /// max_als Max number of iterations allowed to converge the ALS approximation default = 1e4 /// \param[in] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// default = true /// \param[in] /// calculate_epsilon Should the 2-norm error be calculated \f$ ||T_{\rm exact} - /// T_{\rm approx}|| = \epsilon. \f$ Default = false. /// \param[in] direct Should the CP decomposition be computed without /// calculating the Khatri-Rao product? Default = true. /// \return if ConvClass = FitCheck, returns the fit as defined by fitcheck /// else if calculate_epsilon = true, returns 2-norm error between exact and approximate tensor /// else return -1 virtual double compute_PALS(std::vector &converge_list, double RankStep = 0.5, size_t panels = 4, int max_als = 20, bool fast_pI = true, bool calculate_epsilon = false, bool direct = true) = 0; /// returns the rank \c rank optimized factor matrices /// \return Factor matrices stored in a vector. For example, a order-3 /// tensor has factor matrices in positions [0]-[2]. In [3] there is scaling /// factor vector of size \c rank /// \throw Exception if the CP decomposition is /// not yet computed. std::vector get_factor_matrices() { if (!A.empty()) return A; else BTAS_EXCEPTION("Attempting to return a NULL object. Compute CP decomposition first."); } /// Default function, uses the factor matrices from the CP /// decomposition and reconstructs the /// approximated tensor. /// Assumes that $T_{(A)} = A (B \odot C \odot D \dots) ^T$ /// \returns The tensor approxmimated from the factor /// matrices of the CP decomposition. /// \throws Exception if the CP decomposition is /// not yet computed. Tensor reconstruct() { if (A.empty()) BTAS_EXCEPTION("Factor matrices have not been computed. You must first calculate CP decomposition."); std::vector dims; for (size_t i = 0; i < ndim; ++i) { dims.push_back(i); } return btas::reconstruct(A, dims); } // For debug purposes void print(const Tensor &tensor) { if (tensor.rank() == 2) { ind_t row = tensor.extent(0), col = tensor.extent(1); ord_t i_times_col = 0; for (ind_t i = 0; i < row; ++i, i_times_col += col) { const auto *tensor_ptr = tensor.data() + i_times_col; for (ind_t j = 0; j < col; ++j) { // os << *(tensor_ptr + j) << ",\t"; std::cout << *(tensor_ptr + j) << ",\t"; } std::cout << std::endl; } } else { for (auto &i : tensor) { // os << i << ", \t"; std::cout << i << ","; } } std::cout << std::endl; return; } protected: size_t num_ALS; // Number of ALS iterations std::vector A; // Factor matrices std::vector AtA; size_t ndim; // Modes in the reference tensor std::vector symmetries; // Symmetries of the reference tensor double s = 0; // this is a variable for rals; dtype one {1.0}; dtype zero {0.0}; /// Virtual function. Solver classes should implement a build function to /// generate factor matrices then compute the CP decomposition /// This function should have options for HOSVD and for building rank by \c step increments /// \param[in] rank The rank of the CP decomposition. /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. /// \param[in] direct The CP decomposition be computed without calculating the /// Khatri-Rao product? /// \param[in] max_als If CP decomposition is to finite /// error, max_als is the highest rank approximation computed before giving up /// on CP-ALS. /// \param[in] calculate_epsilon Should the 2-norm /// error be calculated \f$ ||T_{\rm exact} - T_{\rm approx}|| = \epsilon \f$ . /// \param[in] step /// CP_ALS built from r =1 to r = rank. r increments by step. /// \param[in, out] epsilon The 2-norm /// error between the exact and approximated reference tensor /// \param[in] SVD_initial_guess build inital guess from left singular vectors /// \param[in] SVD_rank rank of the initial guess using left singular vector /// \param[in,out] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// return if \c fast_pI was successful. virtual void build(ind_t rank, ConvClass &converge_test, bool direct, ind_t max_als, bool calculate_epsilon, ind_t step, double &epsilon, bool SVD_initial_guess, ind_t SVD_rank, bool &fast_pI) = 0; /// Virtual function. Solver classes should implement a build function to generate factor matrices then compute the CP decomposition Create a rank \c rank initial guess using random numbers from a uniform distribution /// \param[in] rank The rank of the CP decomposition. /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. /// \param[in] direct The CP decomposition be computed without calculating the /// Khatri-Rao product? /// \param[in] max_als If CP decomposition is to finite /// error, max_als is the highest rank approximation computed before giving up /// on CP-ALS. /// \param[in] calculate_epsilon Should the 2-norm /// error be calculated \f$ ||T_{\rm exact} - T_{\rm approx}|| = \epsilon \f$ . /// \param[in, out] epsilon The 2-norm /// error between the exact and approximated reference tensor /// \param[in] SVD_initial_guess build inital guess from left singular vectors /// \param[in] SVD_rank rank of the initial guess using left singular vector /// \param[in,out] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// return if \c fast_pI was successful virtual void build_random(ind_t rank, ConvClass &converge_test, bool direct, ind_t max_als, bool calculate_epsilon, double &epsilon, bool &fast_pI) = 0; /// Generates V by first Multiply A^T.A then Hadamard product V(i,j) *= /// A^T.A(i,j); /// \param[in] n The mode being optimized, all other modes held constant /// \param[in] rank The current rank, column dimension of the factor matrices /// \param[in] lambda regularization parameter, lambda is added to the diagonal of V Tensor generate_V(size_t n, ind_t rank, double lambda = 0.0) { const ord_t rank2 = rank * (ord_t)rank; Tensor V(rank, rank); V.fill(1.0); auto *V_ptr = V.data(); if (AtA.empty()) { Tensor lhs_prod(rank, rank); for (size_t i = 0; i < ndim; ++i) { if (i != n) { gemm(blas::Op::Trans, blas::Op::NoTrans, 1.0, A[i].conj(), A[i], 0.0, lhs_prod); const auto *lhs_ptr = lhs_prod.data(); for (ord_t j = 0; j < rank2; j++) *(V_ptr + j) *= *(lhs_ptr + j); } } } else { for (size_t i = 0; i < ndim; ++i) { if (i != n) { auto *ptrA = AtA[i].data(); for (ord_t r = 0; r < rank2; ++r) *(V_ptr + r) *= *(ptrA + r); } } } ord_t r_times_rank = 0; for (ind_t r = 0; r < rank; ++r, r_times_rank += rank) { *(V_ptr + r_times_rank + r) += lambda; } return V; } /// Keep track of the Left hand Khatri-Rao product of matrices and /// Continues to multiply be right hand products, skipping /// the matrix at index n. /// \param[in] n The mode being optimized, all other modes held constant /// \param[in] rank The current rank, column dimension of the factor matrices /// \param[in] forward Should the Khatri-Rao product move through the factor /// matrices in the forward (0 to ndim) or backward (ndim to 0) direction /// \return the Khatri-Rao product of the factor matrices excluding the nth factor Tensor generate_KRP(size_t n, ind_t rank, bool forward) { Tensor temp(Range{Range1{A.at(n).extent(0)}, Range1{rank}}); Tensor left_side_product(Range{Range1{rank}, Range1{rank}}); if (forward) { for (size_t i = 0; i < ndim; ++i) { if ((i == 0 && n != 0) || (i == 1 && n == 0)) { left_side_product = A.at(i); } else if (i != n) { khatri_rao_product(left_side_product, A[i], temp); left_side_product = temp; } } } else { for (size_t i = ndim - 1; i > -1; --i) { if ((i == ndim - 1 && n != ndim - 1) || (i == ndim - 2 && n == ndim - 1)) { left_side_product = A.at(i); } else if (i != n) { khatri_rao_product(left_side_product, A[i], temp); left_side_product = temp; } } } return left_side_product; } /// \param[in] factor Which factor matrix to normalize, returns /// the \c factor factor matrix with all columns normalized. /// \return The column norms of the \c factor factor matrix Tensor normCol(size_t factor) { if (factor >= ndim) BTAS_EXCEPTION("Factor is out of range"); auto &a = A[factor]; ind_t rank = a.extent(1), Nsize = a.extent(0); ord_t size = a.size(); Tensor lambda(rank); lambda.fill(0.0); auto A_ptr = a.data(); auto lam_ptr = lambda.data(); for (ord_t i = 0; i < size; ++i) { *(lam_ptr + i % rank) += *(A_ptr + i) * btas::impl::conj(*(A_ptr + i)); } for (ind_t col = 0; col < rank; ++col) { auto val = sqrt(*(lam_ptr + col)); *(lam_ptr + col) = val; val = (abs(val) < 1e-12 ? 0.0 : 1.0 / val); btas::scal(Nsize, val, (A_ptr + col), rank); } return lambda; } /// Calculates the column norms of a matrix and saves the norm values into /// lambda tensor (last matrix in the A) /// \param[in, out] Mat The matrix whose column will be normalized, return /// \c Mat with all columns normalized void normCol(Tensor &Mat) { if (Mat.rank() > 2) BTAS_EXCEPTION("normCol with rank > 2 not yet supported"); ind_t rank = Mat.extent(1), Nsize = Mat.extent(0); ord_t size = Mat.size(); A[ndim].fill(0.0); auto Mat_ptr = Mat.data(); auto A_ptr = A[ndim].data(); for (ord_t i = 0; i < size; ++i) { *(A_ptr + i % rank) += *(Mat_ptr + i) * btas::impl::conj(*(Mat_ptr + i)); } for (ind_t i = 0; i < rank; ++i) { auto val = sqrt(*(A_ptr + i)); *(A_ptr + i) = val; val = (abs(val) < 1e-12 ? 0.0 : 1.0 / val); btas::scal(Nsize, val, (Mat_ptr + i), rank); } } /// \param[in] Mat Calculates the 2-norm of the matrix mat /// \return the 2-norm. auto norm(const Tensor &Mat) { return sqrt(abs(dot(Mat, Mat))); } /// SVD referencing code from /// http://www.netlib.org/lapack/explore-html/de/ddd/lapacke_8h_af31b3cb47f7cc3b9f6541303a2968c9f.html /// Fast pseudo-inverse algorithm described in /// https://arxiv.org/pdf/0804.4809.pdf /// Trying to solve Ax = B /// First try Cholesky to solve this problem directly /// second tryfast pseudo-inverse algorithm described in /// https://arxiv.org/pdf/0804.4809.pdf /// If all else fails use SVD /// \param[in] mode_of_A The mode being optimized used to compute hadamard LHS (V) of ALS problem (Vx = B) /// \param[in,out] fast_pI If true, try to compute the pseudo inverse via fast LU decomposition, else use SVD; /// on return reports whether the fast route was used. If \c fast_pI fails, variable will be set /// to false and SVD will be used. /// \param[in, out] cholesky If true, try to solve the linear equation Vx = B (the ALS problem) /// using a Cholesky decomposition (lapacke subroutine) on return reports if /// inversion was successful. /// \param[in, out] B In: The RHS of the ALS problem ( Vx = B ). Out: The solved linear equation /// \f$ V^{-1} B \f$ /// \param[in] lambda Regularization parameter lambda is added to the diagonal of V void pseudoinverse_helper(size_t mode_of_A, bool &fast_pI, bool &cholesky, Tensor &B, double lambda = 0.0) { if (B.empty()) { BTAS_EXCEPTION("pseudoinverse helper solves Ax = B. B cannot be an empty tensor"); } ind_t rank = A[0].extent(1); auto a = this->generate_V(mode_of_A, rank, lambda); if (cholesky) { cholesky = cholesky_inverse(a, B); return; } auto pInv = pseudoInverse(a, fast_pI); Tensor an(B.extent(0), rank); gemm(blas::Op::NoTrans, blas::Op::NoTrans, 1.0, B, pInv, 0.0, an); B = an; } }; };// namespace btas #endif //BTAS_GENERIC_CP_H BTAS-1.0.0/btas/generic/cp_als.h000066400000000000000000001232211476142407000162260ustar00rootroot00000000000000// // Created by Karl Pierce on 7/24/19. // #ifndef BTAS_GENERIC_CP_ALS_H #define BTAS_GENERIC_CP_ALS_H #include #include #include #include #include namespace btas { /** \brief Computes the Canonical Product (CP) decomposition of an order-N tensor using alternating least squares (ALS). This computes the CP decomposition of btas::Tensor objects with row major storage only with fixed (compile-time) and variable (run-time) ranks. Also provides Tucker and randomized Tucker-like compressions coupled with CP-ALS decomposition. Does not support strided ranges. \warning this code takes a non-const reference \c tensor_ref but does not modify the values. This is a result of API (reshape needs non-const tensor) Synopsis: \code // Constructors CP_ALS A(tensor) // CP_ALS object with empty factor // matrices and no symmetries CP_ALS A(tensor, symms) // CP_ALS object with empty factor // matrices and symmetries // Operations A.compute_rank(rank, converge_test) // Computes the CP_ALS of tensor to // rank, rank build and HOSVD options A.compute_rank_random(rank, converge_test) // Computes the CP_ALS of tensor to // rank. Factor matrices built at rank // with random numbers A.compute_error(converge_test, omega) // Computes the CP_ALS of tensor to // 2-norm // error < omega. A.compute_geometric(rank, converge_test, step) // Computes CP_ALS of tensor to // rank with // geometric steps of step between // guesses. A.compute_PALS(converge_test) // computes CP_ALS of tensor to // rank = 3 * max_dim(tensor) // in 4 panels using a modified // HOSVD initial guess A.compress_compute_tucker(tcut_SVD, converge_test) // Computes Tucker decomposition // using // truncated SVD method then // computes finite // error CP decomposition on core // tensor. A.compress_compute_rand(rank, converge_test) // Computes random decomposition on // Tensor to // make core tensor with every mode // size rank // Then computes CP decomposition // of core. //See documentation for full range of options // Accessing Factor Matrices A.get_factor_matrices() // Returns a vector of factor matrices, if // they have been computed A.reconstruct() // Returns the tensor computed using the // CP factor matrices \endcode */ template > class CP_ALS : public CP { public: using T = typename Tensor::value_type; using RT = real_type_t; using RTensor = rebind_tensor_t; using CP::A; using CP::ndim; using CP::symmetries; using typename CP::ind_t; using typename CP::ord_t; using CP::AtA; /// Create a CP ALS object, child class of the CP object /// that stores the reference tensor. /// Reference tensor has no symmetries. /// \param[in] tensor the reference tensor to be decomposed. CP_ALS(Tensor &tensor) : CP(tensor.rank()), tensor_ref(tensor), size(tensor.size()) { for (size_t i = 0; i < ndim; ++i) { symmetries.push_back(i); } } /// Create a CP ALS object, child class of the CP object /// that stores the reference tensor. /// Reference tensor has symmetries. /// Symmetries should be set such that the higher modes index /// are set equal to lower mode indices (a 4th order tensor, /// where the second & third modes are equal would have a /// symmetries of {0,1,1,3} /// \param[in] tensor the reference tensor to be decomposed. /// \param[in] symms the symmetries of the reference tensor. CP_ALS(Tensor &tensor, std::vector &symms) : CP(tensor.rank()), tensor_ref(tensor), size(tensor.size()) { symmetries = symms; if (symmetries.size() > ndim) BTAS_EXCEPTION("Too many symmetries provided") for (size_t i = 0; i < ndim; ++i) { if (symmetries[i] > i) BTAS_EXCEPTION("Symmetries should always refer to factors at earlier positions"); } } CP_ALS() = default; ~CP_ALS() = default; /// \brief Computes decomposition of the order-N tensor \c tensor /// with rank = \c RankStep * \c panels * max_dim(reference_tensor) + max_dim(reference_tensor) /// Initial guess for factor matrices start at rank = max_dim(reference_tensor) /// and builds rank \c panel times by \c RankStep * max_dim(reference_tensor) increments /// \param[in, out] converge_list Tests to see if ALS is converged, holds the value of fit. /// should be as many tests as there are panels /// \param[in] RankStep CP_ALS increment of the panel /// \param[in] panels number of times the rank will be built /// \param[in] /// max_als Max number of iterations allowed to converge the ALS approximation default = 1e4 /// \param[in] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// default = true /// \param[in] /// calculate_epsilon Should the 2-norm error be calculated \f$ ||T_{\rm exact} - /// T_{\rm approx}|| = \epsilon. \f$ Default = false. /// \param[in] direct Should the CP decomposition be computed without /// calculating the Khatri-Rao product? Default = true. /// \return if ConvClass = FitCheck, returns the fit as defined by fitcheck /// else if calculate_epsilon = true, returns 2-norm error between exact and approximate tensor /// else return -1 double compute_PALS(std::vector &converge_list, double RankStep = 0.5, size_t panels = 4, int max_als = 20, bool fast_pI = false, bool calculate_epsilon = false, bool direct = true) override { if (RankStep <= 0) BTAS_EXCEPTION("Panel step size cannot be less than or equal to zero"); if (converge_list.size() < panels) BTAS_EXCEPTION("Too few convergence tests. Must provide a list of panels convergence tests"); double epsilon = -1.0; size_t count = 0; // Find the largest rank this will be the first panel ind_t max_dim = tensor_ref.extent(0); for (size_t i = 1; i < ndim; ++i) { ind_t dim = tensor_ref.extent(i); max_dim = (dim > max_dim ? dim : max_dim); } while (count < panels) { auto converge_test = converge_list[count]; // Use tucker initial guess (SVD) to compute the first panel if (count == 0) { this->build(max_dim, converge_test, direct, max_als, calculate_epsilon, 1, epsilon, true, max_dim, fast_pI); } // All other panels build the rank buy RankStep variable else { // Always deal with the first matrix push new factors to the end of A // Kick out the first factor when it is replaced. // This is the easiest way to resize and preserve the columns // (if this is rebuilt with rank as columns this resize would be easier) ind_t rank = A[0].extent(1), rank_new = rank + RankStep * max_dim; for (int i = 0; i < ndim; ++i) { ind_t row_extent = A[0].extent(0), zero = 0; Tensor b(Range{Range1{A[0].extent(0)}, Range1{rank_new}}); // Move the old factor to the new larger matrix { auto lower_old = {zero, zero}, upper_old = {row_extent, rank}; auto old_view = make_view(b.range().slice(lower_old, upper_old), b.storage()); auto A_itr = A[0].begin(); for (auto iter = old_view.begin(); iter != old_view.end(); ++iter, ++A_itr) { *(iter) = *(A_itr); } } // Fill in the new columns of the factor with random numbers { auto lower_new = {zero, rank}, upper_new = {row_extent, rank_new}; auto new_view = make_view(b.range().slice(lower_new, upper_new), b.storage()); fill_random(new_view); } A.erase(A.begin()); A.push_back(b); // replace the lambda matrix when done with all the factors if (i + 1 == ndim) { b.resize(Range{Range1{rank_new}}); for (ind_t k = 0; k < A[0].extent(0); k++) b(k) = A[0](k); A.erase(A.begin()); A.push_back(b); } // normalize the factor (don't replace the previous lambda matrix) this->normCol(0); } ALS(rank_new, converge_test, direct, max_als, calculate_epsilon, epsilon, fast_pI); } count++; } return epsilon; } /// \brief Computes an approximate core tensor using /// Tucker decomposition, e.g. /// \f$ T(I_1 \dots I_N) \approx T(R_1 \dots R_N) U^{(1)} (R_1, I_1) \dots U^{(N)} (R_N, I_N) \f$ /// where \f$ \mathrm{rank} R_1 \leq \mathrm{rank } I_1 \f$ , etc. /// Reference: /// here. Using this approximation the CP decomposition is /// computed to either finite error or finite rank. Default settings /// calculate to finite error. Factor matrices from get_factor_matrices() are /// scaled by the Tucker transformations. /// \param[in] tcutSVD Truncation threshold for SVD of each mode in Tucker /// decomposition. /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. /// \param[in] rank If finding CP /// decomposition to finite rank, define CP rank. Default 0 will throw error /// for compute_rank. /// \param[in] direct The CP decomposition be computed /// without calculating the Khatri-Rao product? Default = true. /// \param[in] /// calculate_epsilon Should the 2-norm error be calculated \f$ ||T_{\rm exact} - /// T_{\rm approx}|| = \epsilon \f$ . Default = false. /// \param[in] max_als If CP decomposition is to finite /// error, max_als is the highest rank approximation computed before giving up /// on CP-ALS. Default = 1e4. /// \param[in] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// default = false /// \return if ConvClass = FitCheck, returns the fit as defined by fitcheck /// else if calculate_epsilon = true, returns 2-norm error between exact and approximate tensor /// else return -1 [[deprecated]] double compress_compute_tucker(double tcutSVD, ConvClass &converge_test, ind_t rank = 0, bool direct = true, bool calculate_epsilon = false, ind_t max_als = 1e4, bool fast_pI = false) { // Tensor compression std::vector transforms; tucker_compression(tensor_ref, tcutSVD, transforms); size = tensor_ref.size(); double epsilon = -1.0; // CP decomposition epsilon = this->compute_rank_random(rank, converge_test, max_als, fast_pI, calculate_epsilon, direct); // scale factor matrices for (size_t i = 0; i < ndim; i++) { Tensor tt(transforms[i].extent(0), A[i].extent(1)); gemm(blas::Op::NoTrans, blas::Op::NoTrans, 1.0, transforms[i], A[i], 0.0, tt); A[i] = tt; } return epsilon; } /// \brief Computes an approximate core tensor using /// random projection, i.e. /// \f$ T(I_1 \dots I_N) \approx T(R_1 \dots R_N) U^{(1)} (R_1, I_1) \dots U^{(N)} (R_N, I_N) \f$ /// where \f$ \mathrm{rank } R_1 \leq \mathrm{rank } I_1 \f$ , etc. /// Reference: arXiv:1703.09074 /// Using this approximation the CP decomposition is computed to /// either finite error or finite rank. /// Default settings calculate to finite error. /// Factor matrices are scaled by randomized transformation. /// \param[in] desired_compression_rank The new dimension of each mode after /// randomized compression. /// /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit.. /// \param[in] oversampl Oversampling added to the /// desired_compression_rank required to provide a more optimal decomposition. /// Default = suggested = 10. /// \param[in] powerit Number of power iterations, /// as specified in the literature, to scale the spectrum of each mode. /// Default = suggested = 2. /// \param[in] rank If finding CP /// decomposition to finite rank, define CP rank. Default 0 will throw error /// for compute_rank. /// \param[in] direct Should the CP decomposition be /// computed without calculating the Khatri-Rao product? Default = true. /// \param[in] calculate_epsilon Should the 2-norm error be calculated /// \f$ ||T_exact - T_approx|| = \epsilon \f$. Default = false. /// \param[in] max_als If CP decomposition is to /// finite error, max_als is the highest rank approximation computed before /// giving up on CP-ALS. Default = 1e5. /// \param[in] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// default = false /// \return if ConvClass = FitCheck, returns the fit as defined by fitcheck /// else if calculate_epsilon = true, returns 2-norm error between exact and approximate tensor /// else return -1 double compress_compute_rand(ind_t desired_compression_rank, ConvClass &converge_test, long oversampl = 10, size_t powerit = 2, ind_t rank = 0, bool direct = true, bool calculate_epsilon = false, ind_t max_als = 1e5, bool fast_pI = false) { std::vector transforms; randomized_decomposition(tensor_ref, transforms, desired_compression_rank, oversampl, powerit); size = tensor_ref.size(); auto epsilon = this->compute_rank_random(rank, converge_test, max_als, fast_pI, calculate_epsilon, direct); // scale factor matrices for (size_t i = 0; i < ndim; i++) { Tensor tt(transforms[i].extent(0), A[i].extent(1)); gemm(blas::Op::NoTrans, blas::Op::NoTrans, 1.0, transforms[i], A[i], 0.0, tt); A[i] = tt; } return epsilon; } /// sets the CP factor matrices to be used, e.g., as initial guess for ALS /// \param[in] vecs : set of initial factor matrices to use in ALS /// @note When factors are set via this computing ALS uses rank given by `vecs[0].extent(1)` void set_cp_factors(std::vector vecs){ BTAS_ASSERT(vecs.size() == ndim + 1); auto rank = vecs[0].extent(1); A.reserve(ndim + 1); auto ptr = vecs.begin(); for(size_t num = 0; num < ndim; ++num, ++ptr) { BTAS_ASSERT((*ptr).extent(1) == rank) this->A.emplace_back((*ptr)); } this->A.emplace_back((*ptr)); factors_set = true; } protected: Tensor &tensor_ref; // Tensor to be decomposed ord_t size; // Total number of elements bool factors_set = false; // Are the factors preset (not implemented yet). /// Creates an initial guess by computing the SVD of each mode /// If the rank of the mode is smaller than the CP rank requested /// The rest of the factor matrix is filled with random numbers /// Builds factor matricies starting with R=(1 or SVD_rank) /// and moves to R = \c rank /// incrementing column dimension, R, by \c step /// \param[in] rank The rank of the CP decomposition. /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit.. /// \param[in] direct The CP decomposition be computed without calculating the /// Khatri-Rao product? /// \param[in] max_als If CP decomposition is to finite /// error, max_als is the highest rank approximation computed before giving up /// on CP-ALS. /// \param[in] calculate_epsilon Should the 2-norm /// error be calculated \f$ ||T_{\rm exact} - T_{\rm approx}|| = \epsilon \f$ . /// \param[in] step /// CP_ALS built from r =1 to r = rank. r increments by step. /// \param[in, out] epsilon The 2-norm /// error between the exact and approximated reference tensor /// \param[in] SVD_initial_guess build inital guess from left singular vectors /// \param[in] SVD_rank rank of the initial guess using left singular vector /// \param[in,out] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// return if \c fast_pI was successful void build(ind_t rank, ConvClass &converge_test, bool direct, ind_t max_als, bool calculate_epsilon, ind_t step, double &epsilon, bool SVD_initial_guess, ind_t SVD_rank, bool &fast_pI) override { // If its the first time into build and SVD_initial_guess // build and optimize the initial guess based on the left // singular vectors of the reference tensor. if (A.empty() && SVD_initial_guess) { if (SVD_rank == 0) BTAS_EXCEPTION("Must specify the rank of the initial approximation using SVD"); std::vector modes_w_dim_LT_svd; A = std::vector(ndim); // Determine which factor matrices one can fill using SVD initial guess // Don't do the modes that are symmetric to other modes for (size_t i = 0; i < ndim; i++) { auto tmp = symmetries[i]; if (tmp != i) continue; if (tensor_ref.extent(i) < SVD_rank) { modes_w_dim_LT_svd.push_back(i); } } // Fill all factor matrices with their singular vectors, // because we contract X X^T (where X is reference tensor) to make finding // singular vectors an eigenvalue problem some factor matrices will not be // full rank; A[0] = Tensor(tensor_ref.extent(0), SVD_rank); A[0].fill(0.0); for (size_t i = 1; i < ndim; i++) { // If a mode is symmetric to another mode skip this whole process // Will set the modes equal at the end auto tmp = symmetries[i]; if (tmp != i) continue; ind_t R = tensor_ref.extent(i); Tensor S(R, R); RTensor lambda(R); // Contract reference tensor to make it square matrix of mode i gemm(blas::Op::NoTrans, blas::Op::Trans, 1.0, flatten(tensor_ref, i), flatten(tensor_ref, i).conj(), 0.0, S); // Find the Singular vectors of the matrix using eigenvalue decomposition eigenvalue_decomp(S, lambda); // Fill a factor matrix with the singular vectors with the largest corresponding singular // values Tensor lambda_(R,SVD_rank); lambda_.fill(0.0); auto lower_bound = {0, 0}; auto upper_bound = {R, ((R > SVD_rank) ? SVD_rank : R)}; auto view = make_view(S.range().slice(lower_bound, upper_bound), S.storage()); auto l_iter = lambda_.begin(); for (auto iter = view.begin(); iter != view.end(); ++iter, ++l_iter) { *(l_iter) = *(iter); } A[i] = lambda_; } // Fill the remaining columns in the set of factor matrices with dimension < SVD_rank with random numbers for (auto &i : modes_w_dim_LT_svd) { ind_t R = tensor_ref.extent(i), zero = 0; auto lower_bound = {zero, R}; auto upper_bound = {R, SVD_rank}; auto view = make_view(A[i].range().slice(lower_bound, upper_bound), A[i].storage()); fill_random(view); } // Normalize the columns of the factor matrices and // set the values al lambda, the weigt of each order 1 tensor Tensor lambda(Range{Range1{SVD_rank}}); A.push_back(lambda); for (size_t i = 1; i < ndim; ++i) { // normalize the columns of matrices that were set // i.e. not symmetric to another mode. auto tmp = symmetries[i]; if (tmp == i) this->normCol(A[i]); // Then make sure the summetric modes are set here A[i] = A[tmp]; } // Optimize this initial guess. ALS(SVD_rank, converge_test, direct, max_als, calculate_epsilon, epsilon, fast_pI); } // This loop keeps track of column dimension bool opt_in_for_loop = false; for (ind_t i = (A.empty()) ? 0 : A.at(0).extent(1); i < rank; i += step) { opt_in_for_loop = true; // This loop walks through the factor matrices ind_t rank_new = i + 1; for (size_t j = 0; j < ndim; ++j) { // select a factor matrix // If no factor matrices exists, make a set of factor matrices // and fill them with random numbers that are column normalized // and create the weighting vector lambda if (i == 0) { Tensor a(Range{tensor_ref.range().range(j), Range1{rank_new}}); fill_random(a); A.push_back(a); this->normCol(j); } // If the factor matrices have memory allocated, rebuild each matrix // with new column dimension col_dimension_old + skip // fill the new columns with random numbers and normalize the columns else { ind_t row_extent = A[0].extent(0), rank_old = A[0].extent(1), zero = 0; Tensor b(Range{A[0].range().range(0), Range1{rank_new}}); { auto lower_old = {zero, zero}, upper_old = {row_extent, rank_old}; auto old_view = make_view(b.range().slice(lower_old, upper_old), b.storage()); auto A_itr = A[0].begin(); for (auto iter = old_view.begin(); iter != old_view.end(); ++iter, ++A_itr) { *(iter) = *(A_itr); } } { auto lower_new = {zero, rank_old}, upper_new = {row_extent, rank_new}; auto new_view = make_view(b.range().slice(lower_new, upper_new), b.storage()); fill_random(new_view); } A.erase(A.begin()); A.push_back(b); if (j == ndim - 1) { A.erase(A.begin()); } } } { Tensor lam(Range{Range1{rank_new}}); A.push_back(lam); } // compute the ALS of factor matrices with rank = i + 1. ALS(rank_new, converge_test, direct, max_als, calculate_epsilon, epsilon, fast_pI); } if (factors_set && !opt_in_for_loop) { rank = A[0].extent(1); ALS(rank, converge_test, direct, max_als, calculate_epsilon, epsilon, fast_pI); } } /// Create a rank \c rank initial guess using /// random numbers from a uniform distribution /// \param[in] rank The rank of the CP decomposition. /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit.. /// \param[in] direct The CP decomposition be computed without calculating the /// Khatri-Rao product? /// \param[in] max_als If CP decomposition is to finite /// error, max_als is the highest rank approximation computed before giving up /// on CP-ALS. /// \param[in] calculate_epsilon Should the 2-norm /// error be calculated \f$ ||T_{\rm exact} - T_{\rm approx}|| = \epsilon \f$ . /// \param[in] step /// CP_ALS built from r =1 to r = rank. r increments by step. /// \param[in, out] epsilon The 2-norm /// error between the exact and approximated reference tensor /// \param[in] SVD_initial_guess build inital guess from left singular vectors /// \param[in] SVD_rank rank of the initial guess using left singular vector /// \param[in,out] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// return if fast_pI was successful. void build_random(ind_t rank, ConvClass &converge_test, bool direct, ind_t max_als, bool calculate_epsilon, double &epsilon, bool &fast_pI) override { boost::random::mt19937 generator(random_seed_accessor()); boost::random::uniform_real_distribution<> distribution(-1.0, 1.0); if(A.empty()) { for (size_t i = 0; i < this->ndim; ++i) { // If this mode is symmetric to a previous mode, set it equal to // previous mode, else make a random matrix. auto tmp = symmetries[i]; if (tmp != i) { A.push_back(A[tmp]); } else { Tensor a(tensor_ref.extent(i), rank); for (auto iter = a.begin(); iter != a.end(); ++iter) { *(iter) = distribution(generator); } this->A.push_back(a); this->normCol(i); } } } else{ for (size_t i = 0; i < this->ndim; ++i) { // If this mode is symmetric to a previous mode, set it equal to // previous mode, else make a random matrix. auto tmp = symmetries[i]; if (tmp != i) { A.push_back(A[tmp]); } else { ind_t col_dim = tensor_ref.extent(i); Tensor a(col_dim, rank); for (auto iter = a.begin(); iter != a.end(); ++iter) { *(iter) = distribution(generator); } auto & a_prev = A[i]; ind_t prev_rank = a_prev.extent(1), smaller_rank = (prev_rank < rank ? prev_rank : rank); auto lo_bound = {0l, 0l}, up_bound = {col_dim, smaller_rank}; auto view = make_view(a.range().slice(lo_bound, up_bound), a.storage()); std::copy(view.begin(), view.end(), a_prev.begin()); a_prev = a; this->normCol(i); } } A.pop_back(); } Tensor lambda(rank); lambda.fill(0.0); this->A.push_back(lambda); ALS(rank, converge_test, direct, max_als, calculate_epsilon, epsilon, fast_pI); } /// computed the CP decomposition using ALS to minimize the loss function for fixed rank \p rank /// \param[in] rank The rank of the CP decomposition. /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. /// \param[in] dir The CP decomposition be computed without calculating the /// Khatri-Rao product? /// \param[in] max_als If CP decomposition is to finite /// error, max_als is the highest rank approximation computed before giving up /// on CP-ALS. Default = 1e5. /// \param[in] calculate_epsilon Should the 2-norm /// error be calculated ||T_exact - T_approx|| = epsilon. /// \param[in] tcutALS /// How small difference in factor matrices must be to consider ALS of a /// single rank converged. Default = 0.1. /// \param[in, out] epsilon The 2-norm /// error between the exact and approximated reference tensor /// \param[in,out] fast_pI Whether the pseudo inverse be computed using a fast cholesky decomposition, /// on return \c fast_pI will be true if use of Cholesky was successful virtual void ALS(ind_t rank, ConvClass &converge_test, bool dir, int max_als, bool calculate_epsilon, double &epsilon, bool &fast_pI) { size_t count = 0; // forms and stores partial grammian to minimize number of // small gemm contractions bool is_converged = false; bool matlab = true; if(AtA.empty()) AtA = std::vector(ndim); auto ptr_ata = AtA.begin(); for (size_t i = 0; i < ndim; ++i, ++ptr_ata) { auto &a_mat = A[i]; *ptr_ata = Tensor(); contract(this->one, a_mat, {1, 2}, a_mat.conj(), {1, 3}, this->zero, *ptr_ata, {2, 3}); } // Until either the initial guess is converged or it runs out of iterations // update the factor matrices with or without Khatri-Rao product // intermediate do{ count++; this->num_ALS++; for (size_t i = 0; i < ndim; i++) { auto tmp = symmetries[i]; if (tmp != i) { A[i] = A[tmp]; } else if (dir) { direct(i, rank, fast_pI, matlab, converge_test, tensor_ref); } else { update_w_KRP(i, rank, fast_pI, matlab, converge_test); } auto &ai = A[i]; contract(this->one, ai, {1, 2}, ai.conj(), {1, 3}, this->zero, AtA[i], {2, 3}); } is_converged = converge_test(A, AtA); }while (count < max_als && !is_converged); detail::get_fit(converge_test, epsilon, (this->num_ALS == max_als)); epsilon = 1.0 - epsilon; // Checks loss function if required if (calculate_epsilon && epsilon == 2) { epsilon = this->norm(this->reconstruct() - tensor_ref); } } /// Calculates an optimized CP factor matrix using Khatri-Rao product /// intermediate /// \param[in] n The mode being optimized, all other modes held /// constant /// \param[in] rank The current rank, column dimension of the factor /// matrices /// iteration factor matrix /// \param[in, out] matlab If \c fast_pI = true then try to solve VA = B instead of taking pseudoinverse /// in the same manner that matlab would compute the inverse. /// return if matlab was successful. /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. test to see if the ALS is converged void update_w_KRP(size_t n, ind_t rank, bool &fast_pI, bool &matlab, ConvClass &converge_test, double lambda = 0) { Tensor temp(A[n].extent(0), rank); Tensor an(A[n].range()); #ifdef BTAS_HAS_INTEL_MKL // Computes the Khatri-Rao product intermediate auto KhatriRao = this->generate_KRP(n, rank, true); // moves mode n of the reference tensor to the front to simplify contraction swap_to_first(tensor_ref, n); std::vector tref_indices, KRP_dims, An_indices; // resize the Khatri-Rao product to the proper dimensions for (size_t i = 1; i < ndim; i++) { KRP_dims.push_back(tensor_ref.extent(i)); } KRP_dims.push_back(rank); KhatriRao.resize(KRP_dims); KRP_dims.clear(); // build contraction indices to contract over correct modes An_indices.push_back(0); An_indices.push_back(ndim); tref_indices.push_back(0); for (size_t i = 1; i < ndim; i++) { tref_indices.push_back(i); KRP_dims.push_back(i); } KRP_dims.push_back(ndim); contract(this->one, tensor_ref, tref_indices, KhatriRao, KRP_dims, this->zero, temp, An_indices); // move the nth mode of the reference tensor back where it belongs swap_to_first(tensor_ref, n, true); #else // BTAS_HAS_CBLAS // // Computes the Khatri-Rao product intermediate auto KhatriRao = this->generate_KRP(n, rank, true); // moves mode n of the reference tensor to the front to simplify contraction std::vector tref_indices, KRP_dims, An_indices; // resize the Khatri-Rao product to the proper dimensions for (size_t i = 0; i < ndim; i++) { tref_indices.push_back(i); if(i == n) continue; KRP_dims.push_back(tensor_ref.extent(i)); } KRP_dims.push_back(rank); KhatriRao.resize(KRP_dims); KRP_dims.clear(); An_indices.push_back(n); An_indices.push_back(ndim); for (size_t i = 0; i < ndim; i++) { if(i == n) continue; KRP_dims.push_back(i); } KRP_dims.push_back(ndim); contract(this->one, tensor_ref, tref_indices, KhatriRao, KRP_dims, this->zero, temp, An_indices); #endif if(lambda != 0){ auto LamA = A[n]; scal(lambda, LamA); temp += LamA; } detail::set_MtKRP(converge_test, temp); // contract the product from above with the pseudoinverse of the Hadamard // produce an optimize factor matrix this->pseudoinverse_helper(n, fast_pI, matlab, temp); // compute the difference between this new factor matrix and the previous // iteration this->normCol(temp); // Replace the old factor matrix with the new optimized result A[n] = temp; } /// Computes an optimized factor matrix holding all others constant. /// No Khatri-Rao product computed, immediate contraction // Does this by first contracting a factor matrix with the refrence tensor // Then computes hadamard/contraction products along all other modes except n. // Want A(I2, R) // T(I1, I2, I3, I4) // T(I1, I2, I3, I4) * A(I4, R) = T'(I1, I2, I3, R) // T'(I1, I2, I3, R) (*) A(I3, R) = T'(I1, I2, R) (contract along I3, Hadamard along R) // T'(I1, I2, R) (*) A(I1, R) = T'(I2, R) = A(I2, R) * V(R, R) /// \param[in] n The mode being optimized, all other modes held constant /// \param[in] rank The current rank, column dimension of the factor matrices /// \param[in,out] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// return if computing the \c fast_pI was successful. /// \param[in, out] matlab If \c fast_pI = true then try to solve VA = B instead of taking pseudoinverse /// in the same manner that matlab would compute the inverse. If this fails, variable will be manually /// set to false and SVD will be used. /// return if \c matlab was successful /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. test to see if the ALS is converged void direct(size_t n, ind_t rank, bool &fast_pI, bool &matlab, ConvClass &converge_test, Tensor& target, double lambda = 0.0) { // Determine if n is the last mode, if it is first contract with first mode // and transpose the product bool last_dim = n == ndim - 1; // product of all dimensions ord_t LH_size = size; size_t contract_dim = last_dim ? 0 : ndim - 1; ind_t offset_dim = target.extent(n); ind_t pseudo_rank = rank; // Store the dimensions which are available to hadamard contract std::vector dimensions; for (size_t i = last_dim ? 1 : 0; i < (last_dim ? ndim : ndim - 1); i++) { dimensions.push_back(target.extent(i)); } // Modifying the dimension of target so store the range here to resize Range R = target.range(); //Tensor an(A[n].range()); // Resize the tensor which will store the product of target and the first factor matrix Tensor temp = Tensor(size / target.extent(contract_dim), rank); target.resize( Range{Range1{last_dim ? target.extent(contract_dim) : size / target.extent(contract_dim)}, Range1{last_dim ? size / target.extent(contract_dim) : target.extent(contract_dim)}}); // contract tensor ref and the first factor matrix gemm((last_dim ? blas::Op::Trans : blas::Op::NoTrans), blas::Op::NoTrans, this->one , (last_dim? target.conj():target), A[contract_dim].conj(), this->zero, temp); // Resize target target.resize(R); // Remove the dimension which was just contracted out LH_size /= target.extent(contract_dim); // n tells which dimension not to contract, and contract_dim says which dimension I am trying to contract. // If n == contract_dim then that mode is skipped. // if n == ndim - 1, my contract_dim = 0. The gemm transposes to make rank = ndim - 1, so I // move the pointer that preserves the last dimension to n = ndim -2. // In all cases I want to walk through the orders in target backward so contract_dim = ndim - 2 n = last_dim ? ndim - 2 : n; contract_dim = ndim - 2; while (contract_dim > 0) { // Now temp is three index object where temp has size // (size of target/product of dimension contracted, dimension to be // contracted, rank) ord_t idx2 = dimensions[contract_dim], idx1 = LH_size / idx2; temp.resize( Range{Range1{idx1}, Range1{idx2}, Range1{pseudo_rank}}); Tensor contract_tensor; //Tensor contract_tensor(Range{Range1{idx1}, Range1{pseudo_rank}}); //contract_tensor.fill(0.0); const auto &a = A[(last_dim ? contract_dim + 1 : contract_dim)]; // If the middle dimension is the mode not being contracted, I will move // it to the right hand side temp((size of target/product of // dimension contracted, rank * mode n dimension) if (n == contract_dim) { pseudo_rank *= offset_dim; } // If the code hasn't hit the mode of interest yet, it will contract // over the middle dimension and sum over the rank. else if (contract_dim > n) { middle_contract(this->one, temp, a.conj(), this->zero, contract_tensor); temp = contract_tensor; } // If the code has passed the mode of interest, it will contract over // the middle dimension and sum over rank * mode n dimension else { middle_contract_with_pseudorank(this->one, temp, a.conj(), this->zero, contract_tensor); temp = contract_tensor; } LH_size /= idx2; contract_dim--; } n = last_dim ? n+1 : n; // If the mode of interest is the 0th mode, then the while loop above // contracts over all other dimensions and resulting temp is of the // correct dimension If the mode of interest isn't 0th mode, must contract // out the 0th mode here, the above algorithm can't perform this // contraction because the mode of interest is coupled with the rank if (n != 0) { ind_t idx1 = dimensions[0]; temp.resize(Range{Range1{idx1}, Range1{offset_dim}, Range1{rank}}); Tensor contract_tensor(Range{Range1{offset_dim}, Range1{rank}}); contract_tensor.fill(0.0); const auto &a = A[(last_dim ? 1 : 0)]; front_contract(this->one, temp, a.conj(), this->zero, contract_tensor); temp = contract_tensor; } // Add lambda to factor matrices if RALS if(lambda !=0){ auto LamA = A[n]; scal(lambda, LamA); temp += LamA; } // multiply resulting matrix temp by pseudoinverse to calculate optimized // factor matrix detail::set_MtKRP(converge_test, temp); // Temp is then rewritten with unnormalized new A[n] matrix this->pseudoinverse_helper(n, fast_pI, matlab, temp); // Normalize the columns of the new factor matrix and update this->normCol(temp); A[n] = temp; } void direct_improved(size_t n, ind_t rank, bool &fast_pI, bool &matlab, ConvClass & converge_test){ Tensor An; ind_t keep_dim = tensor_ref.extent(n); bool n_last_dim = (n == ndim -1); std::vector tref_idx, mat_idx, final_idx; auto contract_mode = (n_last_dim ? 0 : ndim - 1); // the matrix is A(contract_dimension, rank) mat_idx.emplace_back(contract_mode); mat_idx.emplace_back(ndim); // final will be T(gradient_mode, other, modes, ..., rank) final_idx.emplace_back(n); for(auto i = 0; i < ndim; ++i){ // for the reference tensor, add all modes INCLUDING the gradient mode tref_idx.emplace_back(i); // for the final add all modes EXCEPT the gradient mode and the mode we contract out if(i == n || i == contract_mode) continue; final_idx.emplace_back(i); } // replace that contracted mode with the rank final_idx.emplace_back(ndim); contract(this->one, tensor_ref, tref_idx, A[contract_mode], mat_idx, this->zero, An, final_idx); tref_idx = final_idx; auto ptr = final_idx.rbegin(); ++ptr; auto extent_ = tensor_ref.extent(); ord_t lhs_dim = tensor_ref.size() / tensor_ref.extent(contract_mode); for(auto i = 0; i < ndim - 2; ++i, ++ptr){ ind_t middle_dim = tensor_ref.extent(*ptr); lhs_dim /= middle_dim; An.resize(Range{Range1{lhs_dim}, Range1{middle_dim}, Range1{rank}}); Tensor TtKRP(lhs_dim, rank); auto & Fac_Mat = A[*ptr]; TtKRP.fill(0.0); ord_t idx1_times_rank = 0, idx1_times_rank_middle = 0; for (ind_t idx1 = 0; idx1 < lhs_dim; idx1++, idx1_times_rank += rank) { auto *TtKRP_ptr = TtKRP.data() + idx1_times_rank; ord_t idx2_times_rank = 0; for (ind_t idx2 = 0; idx2 < middle_dim; idx2++, idx2_times_rank += rank) { const auto *An_ptr = An.data() + idx1_times_rank_middle + idx2_times_rank; const auto *Fac_ptr = Fac_Mat.data() + idx2_times_rank; for (ind_t r = 0; r < rank; r++) { *(TtKRP_ptr + r) += *(Fac_ptr + r) * *(An_ptr + r); } } idx1_times_rank_middle += idx2_times_rank; } An = TtKRP; } detail::set_MtKRP(converge_test, An); // Temp is then rewritten with unnormalized new A[n] matrix this->pseudoinverse_helper(n, fast_pI, matlab, An); // Normalize the columns of the new factor matrix and update this->normCol(An); A[n] = An; } }; } //namespace btas #endif //BTAS_GENERIC_CP_ALS_H BTAS-1.0.0/btas/generic/cp_df_als.h000066400000000000000000001307751476142407000167130ustar00rootroot00000000000000// // Created by Karl Pierce on 7/24/19. // #ifndef BTAS_GENERIC_CP_DF_ALS_H #define BTAS_GENERIC_CP_DF_ALS_H #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace btas { /** \brief Computes the Canonical Product (CP) decomposition of an order-N tensor where the tensor is represented as \f$ T = B^T Z \f$ where \f$ B \in \mathbf{R}^{X \times I^1 \times I^2 \times \dots \times I^{n}} \f$ and \f$ Z \in \mathbf{R}^{X \times I^{n+1} \times \dots \times I^N} \f$ Here \f$ X \f$ is called the connected dimension, no factor matrix will be recovered for this mode. Decomposition optimization will use alternating least squares (ALS). \warning this code takes a non-const reference \c tensor_ref but does not modify the values. This is a result of API (reshape needs non-const tensor) Synopsis: \code // Constructors CP_DF_ALS A(B, Z) // CP_DF_ALS object with empty factor // matrices and no symmetries CP_DF_ALS A(B, Z, symms) // CP_DF_ALS object with empty factor // matrices and symmetries // Operations A.compute_rank(rank, converge_test) // Computes the CP_ALS of T tensor to // rank, rank build and HOSVD options A.compute_rank_random(rank, converge_test) // Computes the CP_ALS of T tensor to // rank. Factor matrices built at rank // with random numbers A.compute_error(converge_test, omega) // Computes the CP_ALS of T tensor to // 2-norm // error < omega. A.compute_geometric(rank, converge_test, step) // Computes CP_ALS of T tensor to // rank with // geometric steps of step between // guesses. A.compute_PALS(converge_test) // computes CP_ALS of T tensor to // rank = 3 * max_dim(tensor) // in 4 panels using a modified // HOSVD initial guess //See documentation for full range of options // Accessing Factor Matrices A.get_factor_matrices() // Returns a vector of factor matrices, if // they have been computed A.reconstruct() // Returns the tensor T computed using the // CP factor matrices \endcode */ template > class CP_DF_ALS : public CP { public: using CP::A; using CP::ndim; using CP::normCol; using CP::generate_KRP; using CP::generate_V; using CP::norm; using CP::symmetries; using typename CP::ind_t; using typename CP::ord_t; using T = typename Tensor::value_type; using RT = real_type_t; using RTensor = rebind_tensor_t; /// Create a CP DF ALS object, child class of the CP object /// that stores the reference tensors. /// Reference tensor has no symmetries. /// \param[in] left the reference tensor, \f$ B \f$ to be decomposed. /// \param[in] right the reference tensor, \f$ Z \f$ to be decomposed. CP_DF_ALS(Tensor &left, Tensor &right) : CP(left.rank() + right.rank() - 2) , tensor_ref_left(left) , tensor_ref_right(right) , ndimL(left.rank()) , ndimR(right.rank()) { for (size_t i = 0; i < ndim; ++i) { symmetries.push_back(i); } } /// Create a CP ALS object, child class of the CP object /// that stores the reference tensors. /// Reference tensor has symmetries. /// Symmetries should be set such that the higher modes index /// are set equal to lower mode indices (a 4th order tensor, /// where the second & third modes are equal would have a /// symmetries of {0,1,1,3} /// \param[in] left the reference tensor, \f$ B \f$ to be decomposed. /// \param[in] right the reference tensor, \f$ Z \f$ to be decomposed. /// \param[in] symms the symmetries of the reference tensor. CP_DF_ALS(Tensor &left, Tensor &right, std::vector &symms) : CP(left.rank() + right.rank() - 2) , tensor_ref_left(left) , tensor_ref_right(right) , ndimL(left.rank()) , ndimR(right.rank()) { symmetries = symms; for (size_t i = 0; i < ndim; ++i) { if (symmetries[i] > i) BTAS_EXCEPTION("Symmetries should always refer to factors at earlier positions"); } if (symmetries.size() != ndim) BTAS_EXCEPTION("Tensor describing symmetries must be equal to number of non-connected dimensions"); } CP_DF_ALS() = default; ~CP_DF_ALS() = default; /// \brief Computes decomposition of the order-N tensor \c tensor /// with rank = \c RankStep * \c panels * max_dim(reference_tensor) + max_dim(reference_tensor) /// Initial guess for factor matrices start at rank = max_dim(reference_tensor) /// and builds rank \c panel times by \c RankStep * max_dim(reference_tensor) increments /// \param[in, out] converge_list Tests to see if ALS is converged, holds the value of fit. /// should be as many tests as there are panels /// \param[in] RankStep CP_ALS increment of the panel /// \param[in] panels number of times the rank will be built /// \param[in] /// max_als Max number of iterations allowed to converge the ALS approximation default = 1e4 /// \param[in] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// default = true /// \param[in] /// calculate_epsilon Should the 2-norm error be calculated \f$ ||T_{\rm exact} - /// T_{\rm approx}|| = \epsilon. \f$ Default = false. /// \param[in] direct Should the CP decomposition be computed without /// calculating the Khatri-Rao product? Default = true. /// \return if ConvClass = FitCheck, returns the fit as defined by fitcheck /// else if calculate_epsilon = true, returns 2-norm error between exact and approximate tensor /// else return -1 double compute_PALS(std::vector &converge_list, double RankStep = 0.5, size_t panels = 4, int max_als = 20, bool fast_pI = false, bool calculate_epsilon = false, bool direct = true) override { if (RankStep <= 0) BTAS_EXCEPTION("Panel step size cannot be less than or equal to zero"); if (converge_list.size() < panels) BTAS_EXCEPTION("Too few convergence tests. Must provide a list of panels convergence tests"); double epsilon = -1.0; size_t count = 0; // Find the largest rank this will be the first panel ind_t max_dim = tensor_ref_left.extent(0); for (size_t i = 1; i < ndimL; ++i) { ind_t dim = tensor_ref_left.extent(i); max_dim = (dim > max_dim ? dim : max_dim); } for (size_t i = 0; i < ndimR; ++i) { ind_t dim = tensor_ref_right.extent(i); max_dim = (dim > max_dim ? dim : max_dim); } while (count < panels) { auto converge_test = converge_list[count]; // Use tucker initial guess (SVD) to compute the first panel if (count == 0) { build(max_dim, converge_test, direct, max_als, calculate_epsilon, 1, epsilon, true, max_dim, fast_pI); // build(max_dim, converge_test, max_als, calculate_epsilon, 1, epsilon, true, max_dim, fast_pI); } // All other panels build the rank buy RankStep variable else { // Always deal with the first matrix push new factors to the end of A // Kick out the first factor when it is replaced. // This is the easiest way to resize and preserve the columns // (if this is rebuilt with rank as columns this resize would be easier) ind_t rank = A[0].extent(1), rank_new = rank + RankStep * max_dim; for (size_t i = 0; i < ndim; ++i) { ind_t row_extent = A[0].extent(0), zero = 0; Tensor b(Range{Range1{A[0].extent(0)}, Range1{rank_new}}); // Move the old factor to the new larger matrix { auto lower_old = {zero, zero}, upper_old = {row_extent, rank}; auto old_view = make_view(b.range().slice(lower_old, upper_old), b.storage()); auto A_itr = A[0].begin(); for (auto iter = old_view.begin(); iter != old_view.end(); ++iter, ++A_itr) { *(iter) = *(A_itr); } } // Fill in the new columns of the factor with random numbers { auto lower_new = {zero, rank}, upper_new = {row_extent, rank_new}; auto new_view = make_view(b.range().slice(lower_new, upper_new), b.storage()); boost::random::mt19937 generator(random_seed_accessor()); boost::random::uniform_real_distribution<> distribution(-1.0, 1.0); for (auto iter = new_view.begin(); iter != new_view.end(); ++iter) { *(iter) = distribution(generator); } } A.erase(A.begin()); A.push_back(b); // replace the lambda matrix when done with all the factors if (i + 1 == ndim) { b.resize(Range{Range1{rank_new}}); for (ind_t k = 0; k < A[0].extent(0); k++) b(k) = A[0](k); A.erase(A.begin()); A.push_back(b); } // normalize the factor (don't replace the previous lambda matrix) normCol(0); } ALS(rank_new, converge_test, max_als, calculate_epsilon, epsilon, fast_pI); } count++; } return epsilon; } /// \brief Computes decomposition of the order-N tensor \c tensor /// with rank = \c rank by first CP decomposing the component tensors /// \c tensor_ref_left and \c tensor_ref_right then uses the /// optimized factor matrices as initial guess to the compound decomposition. /// \param[in] rank Rank of the CP decomposition. /// \param[in, out] converge_test Tests to see if ALS is converged, holds the value of fit. /// \param[in] /// max_als Max number of iterations allowed to converge the ALS approximation default = 1e4 /// \param[in] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// default = true /// \param[in] /// calculate_epsilon Should the 2-norm error be calculated \f$ ||T_{\rm exact} - /// T_{\rm approx}|| = \epsilon. \f$ Default = false. /// \param[in] direct Should the CP decomposition be computed without /// calculating the Khatri-Rao product? Default = true. /// \param[in] cp_comp_prec CP precision for the component subproblem decompositions /// Default = 1e-2. /// \return if ConvClass = FitCheck, returns the fit as defined by fitcheck /// else if calculate_epsilon = true, returns 2-norm error between exact and approximate tensor /// else return -1 double compute_comp_init(ind_t rank_cp3, ConvClass converge_test, size_t max_als = 1e4, bool fast_pI = true, bool calculate_epsilon = false, bool direct = true, double cp_comp_prec = 1e-2, ind_t rank_cp4 = 0, bool verbose = false) { rank_cp4 = (rank_cp4 == 0 ? rank_cp3 : rank_cp4); double epsilon = 0.0; auto nrm = [](Tensor &a) { auto norm = 0.0; for (auto &i : a) norm += i * i; return sqrt(norm); }; if(rank_cp3 == rank_cp4) { // compute the left factor { FitCheck fit(cp_comp_prec); fit.set_norm(nrm(tensor_ref_left)); fit.verbose(verbose); CP_ALS> CP3(tensor_ref_left); auto error = CP3.compute_rank_random(rank_cp3, fit, 100, true); if(verbose) std::cout << "The accuracy of the LHS decomposition is : " << error * 100 << std::endl; init_factors_left = CP3.get_factor_matrices(); auto cur_dim = init_factors_left.size() - 1; for (size_t i = 1; i < cur_dim; ++i) { A.emplace_back(init_factors_left[i]); } } // compute the right factor { FitCheck fit(cp_comp_prec); fit.set_norm(nrm(tensor_ref_right)); fit.verbose(verbose); CP_ALS> CP3(tensor_ref_right); auto error = CP3.compute_rank_random(rank_cp3, fit, 100, true); if(verbose) std::cout << "The accuracy of the RHS decomposition is : " << error * 100 << std::endl; init_factors_right = CP3.get_factor_matrices(); auto cur_dim = init_factors_right.size(); if (rank_cp3 == rank_cp4) { A.insert(A.end(), init_factors_right.begin() + 1, init_factors_right.end()); } } } else{ // fill the factors with random numbers boost::random::mt19937 generator(random_seed_accessor()); boost::random::uniform_real_distribution<> distribution(-1.0, 1.0); for (size_t i = 1; i < ndimL; ++i) { auto &tensor_ref = tensor_ref_left; Tensor a(tensor_ref.extent(i), rank_cp4); for (auto iter = a.begin(); iter != a.end(); ++iter) { *(iter) = distribution(generator); } A.emplace_back(a); } for (size_t i = 1; i < ndimR; ++i) { auto &tensor_ref = tensor_ref_right; Tensor a(tensor_ref.extent(i), rank_cp4); for (auto iter = a.begin(); iter != a.end(); ++iter) { *(iter) = distribution(generator); } this->A.emplace_back(a); } Tensor lam(rank_cp4); lam.fill(1.0); A.emplace_back(lam); // compute the left factor and put in the CP4 tensor auto a_ptr = A.begin(); auto col_dim = (rank_cp3 < rank_cp4 ? rank_cp3 : rank_cp4); { FitCheck fit(cp_comp_prec); fit.set_norm(nrm(tensor_ref_left)); fit.verbose(verbose); CP_ALS> CP3(tensor_ref_left); auto error = CP3.compute_rank_random(rank_cp3, fit, 100, true); if(verbose) std::cout << "LHS accuracy: " << error * 100 << std::endl; init_factors_left = CP3.get_factor_matrices(); auto cur_dim = init_factors_left.size() - 1; for (size_t i = 1; i < cur_dim; ++i, ++a_ptr) { auto & tensor_ref = init_factors_left[i]; auto left_ptr = tensor_ref.begin(); auto row_dim = tensor_ref.extent(0); auto a_val_ptr = (*a_ptr).data(); for(ind_t row = 0; row < row_dim; ++row){ for(ind_t col = 0; col < col_dim; ++col, ++left_ptr){ *(a_val_ptr + row * rank_cp4 + col) = *(left_ptr); } } } } // compute the right factor and put in the CP4 tensor { FitCheck fit(cp_comp_prec); fit.set_norm(nrm(tensor_ref_right)); fit.verbose(verbose); CP_ALS> CP3(tensor_ref_right); auto error = CP3.compute_rank_random(rank_cp3, fit, 100, true); if(verbose) std::cout << "RHS accuracy: " << error * 100 << std::endl; init_factors_right = CP3.get_factor_matrices(); auto cur_dim = init_factors_right.size() - 1; for (size_t i = 1; i < cur_dim; ++i, ++a_ptr) { auto & tensor_ref = init_factors_right[i]; auto right_ptr = tensor_ref.begin(); auto row_dim = tensor_ref.extent(0); auto a_val_ptr = (*a_ptr).data(); for(ind_t row = 0; row < row_dim; ++row){ for(ind_t col = 0; col < col_dim; ++col, ++right_ptr){ *(a_val_ptr + row * rank_cp4 + col) = *(right_ptr); } } } } } ALS(rank_cp4, converge_test, max_als, calculate_epsilon, epsilon, fast_pI); detail::get_fit(converge_test, epsilon, (this->num_ALS == max_als)); return 1.0 - epsilon; } std::tuple, std::vector> get_init_factors(){ return std::make_tuple(init_factors_left, init_factors_right); } protected: Tensor &tensor_ref_left; // Left connected tensor Tensor &tensor_ref_right; // Right connected tensor size_t ndimL; // Number of dimensions in left tensor size_t ndimR; // number of dims in the right tensor bool lastLeft = false; Tensor leftTimesRight; std::vector dims; std::vector init_factors_left; std::vector init_factors_right; /// Creates an initial guess by computing the SVD of each mode /// If the rank of the mode is smaller than the CP rank requested /// The rest of the factor matrix is filled with random numbers /// Builds factor matricies starting with R=(1 or SVD_rank) /// and moves to R = \c rank /// incrementing column dimension, R, by \c step /// \param[in] rank The rank of the CP decomposition. /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit.. /// \param[in] direct The CP decomposition be computed without calculating the /// Khatri-Rao product? /// \param[in] max_als If CP decomposition is to finite /// error, max_als is the highest rank approximation computed before giving up /// on CP-ALS. /// \param[in] calculate_epsilon Should the 2-norm /// error be calculated \f$ ||T_{\rm exact} - T_{\rm approx}|| = \epsilon \f$ . /// \param[in] step /// CP_ALS built from r =1 to r = rank. r increments by step. /// \param[in, out] epsilon The 2-norm /// error between the exact and approximated reference tensor /// \param[in] SVD_initial_guess build inital guess from left singular vectors /// \param[in] SVD_rank rank of the initial guess using left singular vector /// \param[in,out] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// return if \c fast_pI was successful // TODO take advantage of symmetries in build void build(ind_t rank, ConvClass &converge_test, bool direct, ind_t max_als, bool calculate_epsilon, ind_t step, double &epsilon, bool SVD_initial_guess, ind_t SVD_rank, bool &fast_pI) override { { bool factors_set = false; // If its the first time into build and SVD_initial_guess // build and optimize the initial guess based on the left // singular vectors of the reference tensor. if (A.empty() && SVD_initial_guess) { if (SVD_rank == 0) BTAS_EXCEPTION("Must specify the rank of the initial approximation using SVD"); make_svd_guess(SVD_rank); // Optimize this initial guess. ALS(SVD_rank, converge_test, max_als, calculate_epsilon, epsilon, fast_pI); } // This loop keeps track of column dimension bool opt_in_for_loop = false; for (ind_t i = (A.empty()) ? 0 : A.at(0).extent(1); i < rank; i += step) { opt_in_for_loop = true; ind_t rank_new = i + 1; // This loop walks through the factor matrices for (size_t j = 0; j < ndim; ++j) { // select a factor matrix // If no factor matrices exists, make a set of factor matrices // and fill them with random numbers that are column normalized // and create the weighting vector lambda if (i == 0) { Tensor a; if (j < ndimL - 1) { a = Tensor(Range{tensor_ref_left.range().range(j + 1), Range1{rank_new}}); } else { a = Tensor(Range{tensor_ref_right.range().range(j - ndimL + 2), Range1{rank_new}}); } fill_random(a); A.push_back(a); normCol(j); } // If the factor matrices have memory allocated, rebuild each matrix // with new column dimension col_dimension_old + skip // fill the new columns with random numbers and normalize the columns else { ind_t row_extent = A[0].extent(0), rank_old = A[0].extent(1), zero = 0; Tensor b(Range{A[0].range().range(0), Range1{rank_new}}); { auto lower_old = {zero, zero}, upper_old = {row_extent, rank_old}; auto old_view = make_view(b.range().slice(lower_old, upper_old), b.storage()); auto A_itr = A[0].begin(); for (auto iter = old_view.begin(); iter != old_view.end(); ++iter, ++A_itr) { *(iter) = *(A_itr); } } { auto lower_new = {zero, rank_old}, upper_new = {row_extent, rank_new}; auto new_view = make_view(b.range().slice(lower_new, upper_new), b.storage()); fill_random(new_view); } A.erase(A.begin()); A.push_back(b); if (j + 1 == ndim) { A.erase(A.begin()); } } } { Tensor lam(Range{Range1{rank_new}}); A.push_back(lam); } // compute the ALS of factor matrices with rank = i + 1. ALS(rank_new, converge_test, max_als, calculate_epsilon, epsilon, fast_pI); } if (factors_set && !opt_in_for_loop) { ALS(rank, converge_test, max_als, calculate_epsilon, epsilon, fast_pI); } } } /// Generate the SVD initial guess of the super tensor /// without ever generating the super tensor using /// the HOSVD method. Here one flattens the tensor along /// the nth way then isolates that way by computing /// \f$ H_{n} = T^T_{n} T_{n} \f$ then computes the eigenvalue /// decomposition of H which provides the right singular vectors of \f$ T_n \f$ /// \param[in] SVD_rank Initial guess rank, if SVD_rank is greater than the /// dimension of a mode, the factor matrix will be padded with random vectors. void make_svd_guess(ind_t SVD_rank){ std::vector left_modes, right_modes, result_modes, modes_w_dim_LT_svd; // Look through and find modes where I need to add extra columns { for (size_t i = 1; i < ndimL; i++) { if (tensor_ref_left.extent(i) < SVD_rank) { modes_w_dim_LT_svd.push_back(i - 1); } } for (size_t i = 1; i < ndimR; i++) { if (tensor_ref_right.extent(i) < SVD_rank) { modes_w_dim_LT_svd.push_back(i + ndimL - 2); } } } Tensor XXp, contracted; // Compute tensor_ref_right^T tensor_ref_right to keep the coupling dimension. // then contract with left hand side { for (size_t i = 0; i < ndimR; ++i) { left_modes.push_back(i); right_modes.push_back(i); } right_modes[0] = ndimR; result_modes.push_back(0); result_modes.push_back(ndimR); contract(this->one, tensor_ref_right, left_modes, tensor_ref_right.conj(), right_modes, this->zero, XXp, result_modes); left_modes.clear(); right_modes.clear(); for (size_t i = 0; i < ndimL; i++) { left_modes.push_back(i); right_modes.push_back(i); } right_modes[0] = ndim + 3; result_modes[1] = ndim + 3; contract(this->one, XXp, result_modes, tensor_ref_left, left_modes, this->zero, contracted, right_modes); right_modes[0] = 0; } // Compute the SVD tile for factors of tensor_ref_left { auto ptrr = right_modes.begin() + 1, ptrf = result_modes.begin(); for (size_t i = 1; i < ndimL; ++i, ++ptrr) { Tensor tucker; *(ptrr) = ndim + 3; *(ptrf) = i; contract(this->one, contracted, left_modes, tensor_ref_left.conj(), right_modes, this->zero, tucker, result_modes); *(ptrr) = i; auto R = tucker.extent(0); RTensor lambda(R); // Find the Singular vectors of the matrix using eigenvalue decomposition eigenvalue_decomp(tucker, lambda); // Fill a factor matrix with the singular vectors with the largest corresponding singular // values Tensor lambda_ (R, SVD_rank); lambda_.fill(this->zero); auto lower_bound = {0, 0}; auto upper_bound = {R, ((R > SVD_rank) ? SVD_rank : R)}; auto view = make_view(tucker.range().slice(lower_bound, upper_bound), tucker.storage()); auto l_iter = lambda_.begin(); for (auto iter = view.begin(); iter != view.end(); ++iter, ++l_iter) { *(l_iter) = *(iter); } A.push_back(lambda_); } } // compute tensor_ref_left^T tensor_ref_left to keep coupling dimension. // then contract with RHS { right_modes[0] = ndim + 3; result_modes[0] = 0; contract(this->one, tensor_ref_left, left_modes, tensor_ref_left.conj(), right_modes, this->zero, XXp, result_modes); left_modes.clear(); right_modes.clear(); for (size_t i = 0; i < ndimR; ++i) { left_modes.push_back(i); right_modes.push_back(i); } right_modes[0] = ndim + 3; result_modes[1] = ndim + 3; contracted = Tensor(); contract(this->one, XXp, result_modes, tensor_ref_right, left_modes, this->zero, contracted, right_modes); right_modes[0] = 0; auto ptrr = right_modes.begin() + 1, ptrf = result_modes.begin(); for (size_t i = 1; i < ndimR; ++i, ++ptrr) { Tensor tucker; *(ptrr) = ndim + 3; *(ptrf) = i; contract(this->one, contracted, left_modes, tensor_ref_right.conj(), right_modes, this->zero, tucker, result_modes); *(ptrr) = i; auto R = tucker.extent(0); RTensor lambda(R); // Find the Singular vectors of the matrix using eigenvalue decomposition eigenvalue_decomp(tucker, lambda); // Fill a factor matrix with the singular vectors with the largest corresponding singular // values Tensor lambda_(R, SVD_rank); lambda_.fill(this->zero); auto lower_bound = {0, 0}; auto upper_bound = {R, ((R > SVD_rank) ? SVD_rank : R)}; auto view = make_view(tucker.range().slice(lower_bound, upper_bound), tucker.storage()); auto l_iter = lambda_.begin(); for (auto iter = view.begin(); iter != view.end(); ++iter, ++l_iter) { *(l_iter) = *(iter); } A.push_back(lambda_); } } // Fill the remaining columns in the set of factor matrices with dimension < SVD_rank with random numbers for (auto &i : modes_w_dim_LT_svd) { ind_t R = A[i].extent(0), zero = 0; auto lower_bound = {zero, R}; auto upper_bound = {R, SVD_rank}; auto view = make_view(A[i].range().slice(lower_bound, upper_bound), A[i].storage()); fill_random(view); } // Normalize the columns of the factor matrices and // set the values al lambda, the weigt of each order 1 tensor Tensor lambda(Range{Range1{SVD_rank}}); A.push_back(lambda); for (size_t i = 0; i < ndim; ++i) { normCol(A[i]); } } /// Create a rank \c rank initial guess using /// random numbers from a uniform distribution /// \param[in] rank The rank of the CP decomposition. /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit.. /// \param[in] direct The CP decomposition be computed without calculating the /// Khatri-Rao product? /// \param[in] max_als If CP decomposition is to finite /// error, max_als is the highest rank approximation computed before giving up /// on CP-ALS. /// \param[in] calculate_epsilon Should the 2-norm /// error be calculated \f$ ||T_{\rm exact} - T_{\rm approx}|| = \epsilon \f$ . /// \param[in] step /// CP_ALS built from r =1 to r = rank. r increments by step. /// \param[in, out] epsilon The 2-norm /// error between the exact and approximated reference tensor /// \param[in] SVD_initial_guess build inital guess from left singular vectors /// \param[in] SVD_rank rank of the initial guess using left singular vector /// \param[in, out] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// return if \c fast_pI was successful void build_random(ind_t rank, ConvClass &converge_test, bool direct, ind_t max_als, bool calculate_epsilon, double &epsilon, bool &fast_pI) override { boost::random::mt19937 generator(random_seed_accessor()); boost::random::uniform_real_distribution<> distribution(-1.0, 1.0); for (size_t i = 1; i < ndimL; ++i) { auto &tensor_ref = tensor_ref_left; Tensor a(Range{Range1{tensor_ref.extent(i)}, Range1{rank}}); for (auto iter = a.begin(); iter != a.end(); ++iter) { *(iter) = distribution(generator); } A.push_back(a); } for (size_t i = 1; i < ndimR; ++i) { auto &tensor_ref = tensor_ref_right; Tensor a(tensor_ref.extent(i), rank); for (auto iter = a.begin(); iter != a.end(); ++iter) { *(iter) = distribution(generator); } this->A.push_back(a); } Tensor lambda(rank); lambda.fill(0.0); this->A.push_back(lambda); for (size_t i = 0; i < ndim; ++i) { normCol(i); } ALS(rank, converge_test, max_als, calculate_epsilon, epsilon, fast_pI); } /// performs the ALS method to minimize the loss function for a single rank /// \param[in] rank The rank of the CP decomposition. /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. /// \param[in] dir The CP decomposition be computed without calculating the /// Khatri-Rao product? /// \param[in] max_als If CP decomposition is to finite /// error, max_als is the highest rank approximation computed before giving up /// on CP-ALS. Default = 1e5. /// \param[in] calculate_epsilon Should the 2-norm /// error be calculated ||T_exact - T_approx|| = epsilon. /// \param[in] tcutALS /// How small difference in factor matrices must be to consider ALS of a /// single rank converged. Default = 0.1. /// \param[in, out] epsilon The 2-norm /// error between the exact and approximated reference tensor /// \param[in,out] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// return if \c fast_pI was successful void ALS(ind_t rank, ConvClass &converge_test, int max_als, bool calculate_epsilon, double &epsilon, bool &fast_pI) { size_t count = 0; // Until either the initial guess is converged or it runs out of iterations // update the factor matrices with or without Khatri-Rao product // intermediate bool is_converged = false; bool matlab = fast_pI; Tensor MtKRP(A[ndim - 1].extent(0), rank); leftTimesRight = Tensor(1); leftTimesRight.fill(0.0); // std::cout << "count\tfit\tchange" << std::endl; while (count < max_als && !is_converged) { count++; this->num_ALS++; for (size_t i = 0; i < ndim; i++) { auto tmp = symmetries[i]; if (tmp == i) { direct(i, rank, fast_pI, matlab, converge_test); } else if (tmp < i) { A[i] = A[tmp]; } else { BTAS_EXCEPTION("Incorrectly defined symmetry"); } } is_converged = converge_test(A); } // Checks loss function if required detail::get_fit(converge_test, epsilon, (this->num_ALS == max_als)); epsilon = 1.0 - epsilon; // Checks loss function if required if (calculate_epsilon && epsilon == 2) { // TODO make this work for non-FitCheck convergence_classes //epsilon = this->norm(this->reconstruct() - tensor_ref); } } /// Computes an optimized factor matrix holding all others constant. /// No Khatri-Rao product computed, immediate contraction /// Does this by first contracting a factor matrix with the refrence tensor /// Then computes hadamard/contraction products along all other modes except n. /// Want A(I2, R) /// T(I1, I2, I3, I4) = B(X, I1, I2) Z(X, I3, I4) /// B(X, I1, I2) (Z(X, I3, I4) * A(I4, R)) = B(X, I1, I2) Z'(X, I3, R) /// B(X, I1, I2) (Z'(X, I3, R) (*) A(I3, R)) = B(X, I1, I2) Z'(X, R) (contract along I3, Hadamard along R) /// B(X, I1, I2) * Z'(X, R) = B'(I1, I2, R) /// B'(I1, I2, R) (*) A(I1, R) = B'(I2, R) = A(I2, R) * V(R, R) /// \param[in] n The mode being optimized, all other modes held constant /// \param[in] rank The current rank, column dimension of the factor matrices /// \param[in,out] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// return if computing \c fast_pI was successful. /// \param[in, out] matlab If \c fast_pI = true then try to solve VA = B instead of taking pseudoinverse /// in the same manner that matlab would compute the inverse. /// return \c matlab was successful /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. test to see if the ALS is converged void direct(size_t n, ind_t rank, bool &fast_pI, bool &matlab, ConvClass &converge_test) { // Determine if n is in the left or the right tensor bool leftTensor = n < (ndimL - 1); Tensor an(A[n].range()); if (lastLeft != leftTensor) { dims = std::vector((leftTensor ? tensor_ref_left.rank() : tensor_ref_right.rank())); Tensor K(tensor_ref_right.extent(0), rank); { lastLeft = leftTensor; // want the tensor without n if n is in the left tensor take the right one and vice versa auto &tensor_ref = leftTensor ? tensor_ref_right : tensor_ref_left; // How many dimension in this side of the tensor size_t ndimCurr = tensor_ref.rank(); ord_t sizeCurr = tensor_ref.size(); // save range for resize at the end. auto R = tensor_ref.range(); // Start by contracting with the last dimension of tensor without n // This is important for picking the correct factor matrix // not for picking from tensor_ref int contract_dim_inter = leftTensor ? ndim - 1 : ndimL - 2; // This is for size of the dimension being contracted // picked from tensor_ref ind_t contract_size = tensor_ref.extent(ndimCurr - 1); // Make the intermediate that will be contracted then hadamard contracted // Also resize the tensor for gemm contraction Tensor contract_tensor(sizeCurr / contract_size, rank); tensor_ref.resize(Range{Range1{sizeCurr / contract_size}, Range1{contract_size}}); // Contract out the last dimension gemm(blas::Op::NoTrans, blas::Op::NoTrans, 1.0, tensor_ref.conj(), A[contract_dim_inter].conj(), 0.0, contract_tensor); // Resize tensor_ref back to original size tensor_ref.resize(R); // This is the size of the LH dimension of contract_tensor sizeCurr /= tensor_ref.extent(ndimCurr - 1); // for A now choose the next factor matrix --contract_dim_inter; // Now want to hadamard contract all the dimension that aren't the connecting dimension for (size_t i = 0; i < ndimCurr - 2; ++i, --contract_dim_inter) { // The contract_size now starts at the second last dimension contract_size = tensor_ref.extent(ndimCurr - 2 - i); // Store the LH most dimension size in idx1 ord_t idx1 = sizeCurr / contract_size; contract_tensor.resize(Range{Range1{idx1}, Range1{contract_size}, Range1{rank}}); // After hadamard product middle dimension is gone Tensor temp(idx1, rank); temp.fill(0.0); const auto &a = A[contract_dim_inter].conj(); ord_t j_times_rank = 0, j_times_cont_rank = 0; for (ind_t j = 0; j < idx1; ++j, j_times_rank += rank) { auto *temp_ptr = temp.data() + j_times_rank; ord_t k_times_rank = 0; for (ind_t k = 0; k < contract_size; ++k, k_times_rank += rank) { const auto *contract_ptr = contract_tensor.data() + j_times_cont_rank + k_times_rank; const auto *A_ptr = a.data() + k_times_rank; for (ord_t r = 0; r < rank; ++r) { *(temp_ptr + r) += *(contract_ptr + r) * *(A_ptr + r); } } j_times_cont_rank += k_times_rank; } // After hadamard contract reset contract_tensor with new product contract_tensor = temp; // Remove the contracted dimension from the current size. sizeCurr = idx1; } // set the hadamard contracted tensor to the intermediate K K = contract_tensor; } { // contract K with the other side tensor // Tensor_ref now can be the side that contains n Tensor &tensor_ref = leftTensor ? tensor_ref_left : tensor_ref_right; // Modifying the dimension of tensor_ref so store the range here to resize // after contraction. Range R = tensor_ref.range(); // make the new factor matrix for after process // LH side of tensor after contracting (doesn't include rank or connecting dimension) ord_t LH_size = tensor_ref.size() / tensor_ref.extent(0); // Temp holds the intermediate after contracting out the connecting dimension // It will be set up to enter hadamard product loop leftTimesRight = Tensor(LH_size, rank); // resize tensor_ref to remove connecting dimension tensor_ref.resize(Range{Range1{tensor_ref.extent(0)}, Range1{LH_size}}); gemm(blas::Op::Trans, blas::Op::NoTrans, 1.0, tensor_ref, K, 0.0, leftTimesRight); // resize tensor_ref back to original dimensions tensor_ref.resize(R); // std::vector dims(tensor_ref.rank()); for (size_t i = 1; i < tensor_ref.rank(); ++i) { dims[i - 1] = tensor_ref.extent(i); } dims[dims.size() - 1] = rank; } } Tensor contract_tensor = leftTimesRight; ord_t LH_size = contract_tensor.size() / rank; // If hadamard loop has to skip a dimension it is stored here. ord_t pseudo_rank = rank; // number of dimensions in tensor_ref size_t ndimCurr = leftTensor ? tensor_ref_left.rank() : tensor_ref_right.rank(); // the dimension that is being hadamard contracted out. size_t contract_dim = ndimCurr - 2, nInTensor = leftTensor ? n : n - ndimL + 1, a_dim = leftTensor ? contract_dim : ndim - 1, offset = 0; // go through hadamard contract on all dimensions excluding rank (will skip one dimension) for (size_t i = 0; i < ndimCurr - 2; ++i, --contract_dim, --a_dim) { auto contract_size = dims[contract_dim]; LH_size /= contract_size; contract_tensor.resize(Range{Range1{LH_size}, Range1{contract_size}, Range1{pseudo_rank}}); Tensor temp(Range{Range1{LH_size}, Range1{pseudo_rank}}); const auto &a = A[a_dim].conj(); temp.fill(this->zero); // If the middle dimension is the mode not being contracted, I will move // it to the right hand side temp((size of tensor_ref/product of // dimension contracted, rank * mode n dimension) if (nInTensor == contract_dim) { pseudo_rank *= contract_size; offset = contract_size; } // If the code hasn't hit the mode of interest yet, it will contract // over the middle dimension and sum over the rank. else if (contract_dim > nInTensor) { ord_t j_times_rank = 0, j_times_cont_rank = 0; for (ind_t j = 0; j < LH_size; ++j, j_times_rank += pseudo_rank) { auto *temp_ptr = temp.data() + j_times_rank; ord_t k_times_rank = 0; for (ind_t k = 0; k < contract_size; ++k, k_times_rank += pseudo_rank) { const auto *contract_ptr = contract_tensor.data() + j_times_cont_rank + k_times_rank; const auto *A_ptr = a.data() + k_times_rank; for (ind_t r = 0; r < pseudo_rank; ++r) { *(temp_ptr + r) += *(contract_ptr + r) * *(A_ptr + r); } } j_times_cont_rank += k_times_rank; } contract_tensor = temp; } // If the code has passed the mode of interest, it will contract over // the middle dimension and sum over rank * mode n dimension else { ord_t j_times_rank = 0, j_times_cont_rank = 0; for (ind_t j = 0; j < LH_size; ++j, j_times_rank += pseudo_rank) { auto *temp_ptr = temp.data() + j_times_rank; ord_t k_times_prank = 0, k_times_rank = 0; for (ind_t k = 0; k < contract_size; ++k, k_times_prank += pseudo_rank, k_times_rank += rank) { const auto *A_ptr = a.data() + k_times_rank; ord_t l_times_rank = 0; for (ind_t l = 0; l < offset; ++l, l_times_rank += rank) { const auto *contract_ptr = contract_tensor.data() + j_times_cont_rank + k_times_prank + l_times_rank; for (ind_t r = 0; r < rank; ++r) { *(temp_ptr + l * rank + r) += *(contract_ptr + r) * *(A_ptr + r); // temp(j, l*rank + r) += contract_tensor(j,k,l*rank+r) * A[a_dim](k,r); } } } j_times_cont_rank += k_times_rank; } contract_tensor = temp; } } // If the mode of interest is the 0th mode, then the while loop above // contracts over all other dimensions and resulting temp is of the // correct dimension If the mode of interest isn't 0th mode, must contract // out the 0th mode here, the above algorithm can't perform this // contraction because the mode of interest is coupled with the rank if (nInTensor != 0) { ind_t contract_size = contract_tensor.extent(0); Tensor temp(Range{Range1{offset}, Range1{rank}}); contract_tensor.resize(Range{Range1{contract_size}, Range1{offset}, Range1{rank}}); temp.fill(this->zero); const auto &a = A[a_dim].conj(); ord_t i_times_rank = 0, i_times_off_rank = 0; for (ind_t i = 0; i < contract_size; i++, i_times_rank += rank) { const auto *A_ptr = a.data() + i_times_rank; ord_t j_times_rank = 0; for (ind_t j = 0; j < offset; j++, j_times_rank += rank) { const auto *contract_ptr = contract_tensor.data() + i_times_off_rank + j_times_rank; auto *temp_ptr = temp.data() + j_times_rank; for (ord_t r = 0; r < rank; r++) { *(temp_ptr + r) += *(A_ptr + r) * *(contract_ptr + r); } } i_times_off_rank += j_times_rank; } contract_tensor = temp; } detail::set_MtKRP(converge_test, contract_tensor); // multiply resulting matrix temp by pseudoinverse to calculate optimized // factor matrix // t1 = std::chrono::high_resolution_clock::now(); this->pseudoinverse_helper(n, fast_pI, matlab, contract_tensor); // t2 = std::chrono::high_resolution_clock::now(); // time = t2 - t1; // gemm_wPI += time.count(); // Normalize the columns of the new factor matrix and update normCol(contract_tensor); A[n] = contract_tensor; } }; } // namepsace btas #endif //BTAS_GENERIC_CP_DF_ALS_HBTAS-1.0.0/btas/generic/cp_rals.h000066400000000000000000000206461476142407000164170ustar00rootroot00000000000000// // Created by Karl Pierce on 7/24/19. // #ifndef BTAS_GENERIC_CP_RALS_H #define BTAS_GENERIC_CP_RALS_H #include #include #include #include #include #include #ifdef BTAS_HAS_INTEL_MKL #include #endif namespace btas { /** \brief Computes the Canonical Product (CP) decomposition of an order-N tensor using regularized alternating least squares (RALS). This computes the CP decomposition of btas::Tensor objects with row major storage only with fixed (compile-time) and variable (run-time) ranks. Also provides Tucker and randomized Tucker-like compressions coupled with CP-RALS decomposition. Does not support strided ranges. \warning this code takes a non-const reference \c tensor_ref but does not modify the values. This is a result of API (reshape needs non-const tensor) Synopsis: \code // Constructors CP_RALS A(tensor) // CP_ALS object with empty factor // matrices and no symmetries CP_RALS A(tensor, symms) // CP_ALS object with empty factor // matrices and symmetries // Operations A.compute_rank(rank, converge_test) // Computes the CP_RALS of tensor to // rank, rank build and HOSVD options A.compute_rank_random(rank, converge_test) // Computes the CP_RALS of tensor to // rank. Factor matrices built at rank // with random numbers A.compute_error(converge_test, omega) // Computes the CP_RALS of tensor to // 2-norm // error < omega. A.compute_geometric(rank, converge_test, step) // Computes CP_RALS of tensor to // rank with // geometric steps of step between // guesses. A.compute_PALS(converge_test) // computes CP_RALS of tensor to // rank = 3 * max_dim(tensor) // in 4 panels using a modified // HOSVD initial guess A.compress_compute_tucker(tcut_SVD, converge_test) // Computes Tucker decomposition // using // truncated SVD method then // computes finite // error CP decomposition on core // tensor. A.compress_compute_rand(rank, converge_test) // Computes random decomposition on // Tensor to // make core tensor with every mode // size rank // Then computes CP decomposition // of core. //See documentation for full range of options // Accessing Factor Matrices A.get_factor_matrices() // Returns a vector of factor matrices, if // they have been computed A.reconstruct() // Returns the tensor computed using the // CP factor matrices \endcode */ template > class CP_RALS : public CP_ALS { public: using CP::A; using CP::ndim; using CP::normCol; using CP::generate_KRP; using CP::generate_V; using CP::norm; using CP::symmetries; using typename CP::ind_t; using typename CP::ord_t; using CP_ALS::tensor_ref; using CP_ALS::size; /// Create a CP ALS object, child class of the CP object /// that stores the reference tensor. /// Reference tensor has no symmetries. /// \param[in] tensor the reference tensor to be decomposed. CP_RALS(Tensor &tensor) : CP_ALS(tensor) { for (size_t i = 0; i < ndim; ++i) { symmetries.push_back(i); } } /// Create a CP ALS object, child class of the CP object /// that stores the reference tensor. /// Reference tensor has symmetries. /// Symmetries should be set such that the higher modes index /// are set equal to lower mode indices (a 4th order tensor, /// where the second & third modes are equal would have a /// symmetries of {0,1,1,3} /// \param[in] tensor the reference tensor to be decomposed. /// \param[in] symms the symmetries of the reference tensor. CP_RALS(Tensor &tensor, std::vector &symms) : CP(tensor.rank()) { symmetries = symms; if (symmetries.size() > ndim) BTAS_EXCEPTION("Too many symmetries provided") for (size_t i = 0; i < ndim; ++i) { if (symmetries[i] > i) BTAS_EXCEPTION("Symmetries should always refer to factors at earlier positions"); } } ~CP_RALS() = default; protected: RALSHelper helper; // Helper object to compute regularized steps /// performs the RALS method to minimize the loss function for a single rank /// \param[in] rank The rank of the CP decomposition. /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. /// \param[in] dir The CP decomposition be computed without calculating the /// Khatri-Rao product? /// \param[in] max_als If CP decomposition is to finite /// error, max_als is the highest rank approximation computed before giving up /// on CP-ALS. Default = 1e5. /// \param[in] calculate_epsilon Should the 2-norm /// error be calculated ||T_exact - T_approx|| = epsilon. /// \param[in] tcutALS /// How small difference in factor matrices must be to consider ALS of a /// single rank converged. Default = 0.1. /// \param[in, out] epsilon The 2-norm /// error between the exact and approximated reference tensor /// \param[in,out] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition /// return if \c fast_pI was successful. void ALS(ind_t rank, ConvClass &converge_test, bool dir, ind_t max_als, bool calculate_epsilon, double &epsilon, bool &fast_pI) { size_t count = 0; helper = RALSHelper(A); const auto s0 = 1.0; std::vector lambda(ndim, 1.0); const auto alpha = 0.8; // Until either the initial guess is converged or it runs out of iterations // update the factor matrices with or without Khatri-Rao product // intermediate bool is_converged = false; bool matlab = fast_pI; while (count < max_als && !is_converged) { count++; this->num_ALS++; for (size_t i = 0; i < ndim; i++) { auto tmp = symmetries[i]; if (tmp != i) { A[i] = A[tmp]; lambda[i] = lambda[tmp]; } else if (dir) { this->direct(i, rank, fast_pI, matlab, converge_test, tensor_ref, lambda[i]); } else { update_w_KRP(i, rank, fast_pI, matlab, converge_test, lambda[i]); } // Compute the value s after normalizing the columns auto & ai = A[i]; this->s = helper(i, ai); // recompute lambda lambda[i] = (lambda[i] * (this->s * this->s) / (s0 * s0)) * alpha + (1 - alpha) * lambda[i]; } is_converged = converge_test(A); } // Checks loss function if required detail::get_fit(converge_test, epsilon, (this->num_ALS == max_als)); epsilon = 1.0 - epsilon; // Checks loss function if required if (calculate_epsilon && epsilon == 2) { epsilon = this->norm(this->reconstruct() - tensor_ref); } } }; } // namespace btas #endif //BTAS_GENERIC_CP_RALS_H BTAS-1.0.0/btas/generic/default_random_seed.h000066400000000000000000000005561476142407000207560ustar00rootroot00000000000000// // Created by Karl Pierce on 5/16/19. // #ifndef BTAS_GENERIC_DEFAULT_RANDOM_SEED_H #define BTAS_GENERIC_DEFAULT_RANDOM_SEED_H namespace btas{ // A seed for the random number generator. static inline unsigned int& random_seed_accessor(){ static unsigned int value = 3; return value; } } //namespace btas #endif //BTAS_GENERIC_DEFAULT_RANDOM_SEED_H BTAS-1.0.0/btas/generic/dot_impl.h000066400000000000000000000251771476142407000166070ustar00rootroot00000000000000#ifndef __BTAS_DOT_IMPL_H #define __BTAS_DOT_IMPL_H 1 #include #include #include #include #include #include #include #include namespace btas { namespace detail { template struct dot_return_type; template struct dot_return_type<_T, Tail...> { private: using tail_type = typename dot_return_type::type; public: using type = decltype( std::declval<_T>() * std::declval() ); }; template struct dot_return_type<_T> { using type = _T; }; template using dot_return_type_t = typename dot_return_type<_Ts...>::type; /* template struct tensor_all_scalar_values; template struct tensor_all_scalar_values< Head, Tail... > { private: static constexpr bool head_value = tensor_all_scalar_values::value; static constexpr bool tail_value = tensor_all_scalar_values::value; public: static constexpr bool value = head_value and tail_value; }; template struct tensor_all_scalar_values< _Tensor > { static constexpr bool value = is_scalar_arithmetic_v< typename _Tensor::value_type >; }; template struct tensor_dot_return_type; template struct tensor_dot_return_type< std::enable_if_t::value>, _Tensors... > { using type = dot_return_type_t; }; template struct tensor_dot_return_type< std::enable_if_t::value>, _Tensors... > { using type = dot_return_type_t; }; template using tensor_dot_return_type_t = typename tensor_dot_return_type<_Tensors...>::type; */ } template struct dotc_impl; template struct dotu_impl; // Finalized DOTC impl template <> struct dotc_impl { template static auto call_impl ( const unsigned long& Nsize, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, _IteratorY itrY, const iterator_difference_t<_IteratorY>& incY, generic_impl_tag) { auto val = impl::conj(*itrX) * (*itrY); itrX += incX; itrY += incY; for (unsigned long i = 1; i < Nsize; ++i, itrX += incX, itrY += incY) { val += impl::conj(*itrX) * (*itrY); } return val; } #ifdef BTAS_HAS_BLAS_LAPACK template static auto call_impl ( const unsigned long& Nsize, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, _IteratorY itrY, const iterator_difference_t<_IteratorY>& incY, blas_lapack_impl_tag) { static_assert(std::is_same_v,iterator_value_t<_IteratorY>>, "mismatching iterator value types"); using T = iterator_value_t<_IteratorX>; // XXX: DOTC == DOT in BLASPP return blas::dot( Nsize, static_cast(&(*itrX)), incX, static_cast(&(*itrY)), incY ); } #endif template static auto call ( const unsigned long& Nsize, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, _IteratorY itrY, const iterator_difference_t<_IteratorY>& incY ) { return call_impl( Nsize, itrX, incX, itrY, incY, blas_lapack_impl_t<_IteratorX,_IteratorY>() ); } }; // Finalized DOTU impl template <> struct dotu_impl { template static auto call_impl ( const unsigned long& Nsize, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, _IteratorY itrY, const iterator_difference_t<_IteratorY>& incY, generic_impl_tag) { auto val = (*itrX) * (*itrY); itrX += incX; itrY += incY; for (unsigned long i = 1; i < Nsize; ++i, itrX += incX, itrY += incY) { val += (*itrX) * (*itrY); } return val; } #ifdef BTAS_HAS_BLAS_LAPACK template static auto call_impl ( const unsigned long& Nsize, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, _IteratorY itrY, const iterator_difference_t<_IteratorY>& incY, blas_lapack_impl_tag) { static_assert(std::is_same_v,iterator_value_t<_IteratorY>>, "mismatching iterator value types"); using T = iterator_value_t<_IteratorX>; return blas::dotu( Nsize, static_cast(&(*itrX)), incX, static_cast(&(*itrY)), incY ); } #endif template static auto call ( const unsigned long& Nsize, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, _IteratorY itrY, const iterator_difference_t<_IteratorY>& incY ) { return call_impl( Nsize, itrX, incX, itrY, incY, blas_lapack_impl_t<_IteratorX,_IteratorY>() ); } }; /// Unfinalized DOTC impl template <> struct dotc_impl { template static auto call ( const unsigned long& Nsize, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, _IteratorY itrY, const iterator_difference_t<_IteratorY>& incY ) { auto val = dotc( *itrX, *itrY ); itrX += incX; itrY += incY; for (unsigned long i = 1; i < Nsize; ++i, itrX += incX, itrY += incY) { val += dotc(*itrX, *itrY); } return val; } }; /// Unfinalized DOTU impl template <> struct dotu_impl { template static auto call ( const unsigned long& Nsize, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, _IteratorY itrY, const iterator_difference_t<_IteratorY>& incY ) { auto val = dotu( *itrX, *itrY ); itrX += incX; itrY += incY; for (unsigned long i = 1; i < Nsize; ++i, itrX += incX, itrY += incY) { val += dotu(*itrX, *itrY); } return val; } }; // ================================================================================================ /// Generic implementation of BLAS DOT in terms of C++ iterator template auto dotc ( const unsigned long& Nsize, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, _IteratorY itrY, const iterator_difference_t<_IteratorY>& incY ) { typedef std::iterator_traits<_IteratorX> __traits_X; typedef std::iterator_traits<_IteratorY> __traits_Y; static_assert(std::is_same::value, "value type of Y must be the same as that of X"); static_assert(is_random_access_iterator_v<_IteratorX>, "iterator X must be a random access iterator"); static_assert(is_random_access_iterator_v<_IteratorY>, "iterator Y must be a random access iterator"); constexpr bool value_is_scalar = is_scalar_arithmetic_v< typename __traits_X::value_type>; return dotc_impl::call(Nsize, itrX, incX, itrY, incY); } /// Generic implementation of BLAS DOT in terms of C++ iterator template auto dotu ( const unsigned long& Nsize, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, _IteratorY itrY, const iterator_difference_t<_IteratorY>& incY ) { typedef std::iterator_traits<_IteratorX> __traits_X; typedef std::iterator_traits<_IteratorY> __traits_Y; static_assert(std::is_same::value, "value type of Y must be the same as that of X"); static_assert(is_random_access_iterator_v<_IteratorX>, "iterator X must be a random access iterator"); static_assert(is_random_access_iterator_v<_IteratorY>, "iterator Y must be a random access iterator"); constexpr bool value_is_scalar = is_scalar_arithmetic_v< typename __traits_X::value_type>; return dotu_impl::call(Nsize, itrX, incX, itrY, incY); } /// Generic implementation of BLAS DOT in terms of C++ iterator template auto dot ( const unsigned long& Nsize, _IteratorX itrX, const typename std::iterator_traits<_IteratorX>::difference_type& incX, _IteratorY itrY, const typename std::iterator_traits<_IteratorY>::difference_type& incY) { return dotc(Nsize, itrX, incX, itrY, incY); } // ================================================================================================ /// Convenient wrapper to call BLAS DOT-C from tensor objects template< class _TensorX, class _TensorY, class = typename std::enable_if< is_tensor<_TensorX>::value & is_tensor<_TensorY>::value >::type > detail::dot_return_type_t< typename _TensorX::numeric_type, typename _TensorY::numeric_type > dotc (const _TensorX& X, const _TensorY& Y) { typedef typename _TensorX::value_type value_type; static_assert(std::is_same::value, "value type of Y must be the same as that of X"); if (X.empty() || Y.empty()) { return 0; } auto itrX = tbegin(X); auto itrY = tbegin(Y); return dotc(X.size(), itrX, 1, itrY, 1); } /// Convenient wrapper to call BLAS DOT-U from tensor objects template< class _TensorX, class _TensorY, class = typename std::enable_if< is_tensor<_TensorX>::value & is_tensor<_TensorY>::value >::type > detail::dot_return_type_t< typename _TensorX::numeric_type, typename _TensorY::numeric_type > dotu (const _TensorX& X, const _TensorY& Y) { typedef typename _TensorX::value_type value_type; static_assert(std::is_same::value, "value type of Y must be the same as that of X"); if (X.empty() || Y.empty()) { return 0; } auto itrX = tbegin(X); auto itrY = tbegin(Y); return dotu(X.size(), itrX, 1, itrY, 1); } /// Convenient wrapper to call BLAS DOT from tensor objects template< class _TensorX, class _TensorY, class = typename std::enable_if< is_tensor<_TensorX>::value & is_tensor<_TensorY>::value >::type > auto dot (const _TensorX& X, const _TensorY& Y) { return dotc(X, Y); } } // namespace btas #endif // __BTAS_DOT_IMPL_H BTAS-1.0.0/btas/generic/element_wise_contract.h000066400000000000000000000174021476142407000213450ustar00rootroot00000000000000// // Created by Karl Pierce on 2/17/22. // #ifndef BTAS_GENERIC_ELEMENT_WISE_CONTRACT_H #define BTAS_GENERIC_ELEMENT_WISE_CONTRACT_H namespace btas{ // This compute \alpha A(i, j, r) * B(j, r) + \beta C(i,r) = C(i,r) template< typename _T, class _TensorA, class _TensorB, class _TensorC, class = typename std::enable_if< is_boxtensor<_TensorA>::value & is_boxtensor<_TensorB>::value & is_boxtensor<_TensorC>::value & std::is_same::value & std::is_same::value >::type > void middle_contract(_T alpha, const _TensorA& A, const _TensorB& B, _T beta, _TensorC& C){ static_assert(boxtensor_storage_order<_TensorA>::value == boxtensor_storage_order<_TensorC>::value && boxtensor_storage_order<_TensorB>::value == boxtensor_storage_order<_TensorC>::value, "btas::middle_contract does not support mixed storage order"); static_assert(boxtensor_storage_order<_TensorC>::value != boxtensor_storage_order<_TensorC>::other, "btas::middle_contract does not support non-major storage order"); typedef typename _TensorA::value_type value_type; using ind_t = typename _TensorA::range_type::index_type::value_type; using ord_t = typename range_traits::ordinal_type; BTAS_ASSERT(A.rank() == 3) BTAS_ASSERT(B.rank() == 2) BTAS_ASSERT(A.extent(1) == B.extent(0)) BTAS_ASSERT(A.extent(2) == B.extent(1)) if(!C.empty()){ BTAS_ASSERT(C.rank() == 2); BTAS_ASSERT(C.extent(0) == A.extent(0)) BTAS_ASSERT(C.extent(1) == A.extent(2)) } else{ C = _TensorC(A.extent(0), A.extent(2)); NumericType::fill(std::begin(C), std::end(C), NumericType::zero()); } ind_t idx1 = A.extent(0), idx2 = A.extent(1), rank = A.extent(2); ord_t i_times_rank = 0, i_times_rank_idx2 = 0; for (ind_t i = 0; i < idx1; i++, i_times_rank += rank) { auto *C_ptr = C.data() + i_times_rank; ord_t j_times_rank = 0; for (ind_t j = 0; j < idx2; j++, j_times_rank += rank) { const auto *A_ptr = A.data() + i_times_rank_idx2 + j_times_rank; const auto *B_ptr = B.data() + j_times_rank; for (ind_t r = 0; r < rank; r++) { *(C_ptr + r) += alpha * (*(A_ptr + r) * *(B_ptr + r)) + beta * *(C_ptr + r); } } i_times_rank_idx2 += j_times_rank; } } // this does the elementwise contraction \alpha A(i,j,k,r) * B(j, r) + \beta C(i,k,r) = C(i,k,r) template< typename _T, class _TensorA, class _TensorB, class _TensorC, class = typename std::enable_if< is_boxtensor<_TensorA>::value & is_boxtensor<_TensorB>::value & is_boxtensor<_TensorC>::value & std::is_same::value & std::is_same::value >::type > void middle_contract_with_pseudorank(_T alpha, const _TensorA & A, const _TensorB& B, _T beta, _TensorC& C){ static_assert(boxtensor_storage_order<_TensorA>::value == boxtensor_storage_order<_TensorC>::value && boxtensor_storage_order<_TensorB>::value == boxtensor_storage_order<_TensorC>::value, "btas::middle_contract does not support mixed storage order"); static_assert(boxtensor_storage_order<_TensorC>::value != boxtensor_storage_order<_TensorC>::other, "btas::middle_contract does not support non-major storage order"); typedef typename _TensorA::value_type value_type; using ind_t = typename _TensorA::range_type::index_type::value_type; using ord_t = typename range_traits::ordinal_type; BTAS_ASSERT(A.rank() == 3) BTAS_ASSERT(B.rank() == 2) BTAS_ASSERT(A.extent(1) == B.extent(0)) ind_t rank = B.extent(1), idx3 = A.extent(2) / rank; BTAS_ASSERT(A.extent(2) / idx3 == B.extent(1)); if(!C.empty()){ BTAS_ASSERT(C.rank() == 2); BTAS_ASSERT(C.extent(0) == A.extent(0)) BTAS_ASSERT(C.extent(1) == A.extent(2)) } else{ C = _TensorC(A.extent(0), A.extent(2)); NumericType::fill(std::begin(C), std::end(C), NumericType::zero()); } ind_t idx1 = A.extent(0), idx2 = A.extent(1), pseudo_rank = A.extent(2); ord_t i_times_rank = 0, i_times_rank_idx2 = 0; for (ind_t i = 0; i < idx1; ++i, i_times_rank += pseudo_rank) { auto *C_ptr = C.data() + i_times_rank; ord_t j_times_prank = 0, j_times_rank = 0; for (ind_t j = 0; j < idx2; ++j, j_times_prank += pseudo_rank, j_times_rank += rank) { const auto *A_ptr = A.data() + i_times_rank_idx2 + j_times_prank; const auto *B_ptr = B.data() + j_times_rank; ord_t k_times_rank = 0; for (ind_t k = 0; k < idx3; ++k, k_times_rank += rank) { for (ind_t r = 0; r < rank; ++r) { *(C_ptr + k_times_rank + r) += alpha * ( *(A_ptr + k_times_rank + r) * *(B_ptr + r)) + beta * *(C_ptr + k_times_rank + r); } } } i_times_rank_idx2 += j_times_prank; } } // this computes \alpha A(i,j,r) * B(i,r) + \beta C(j,r) = C(j,r) template< typename _T, class _TensorA, class _TensorB, class _TensorC, class = typename std::enable_if< is_boxtensor<_TensorA>::value & is_boxtensor<_TensorB>::value & is_boxtensor<_TensorC>::value & std::is_same::value & std::is_same::value >::type > void front_contract(_T alpha, const _TensorA & A, const _TensorB& B, _T beta, _TensorC& C){ static_assert(boxtensor_storage_order<_TensorA>::value == boxtensor_storage_order<_TensorC>::value && boxtensor_storage_order<_TensorB>::value == boxtensor_storage_order<_TensorC>::value, "btas::middle_contract does not support mixed storage order"); static_assert(boxtensor_storage_order<_TensorC>::value != boxtensor_storage_order<_TensorC>::other, "btas::middle_contract does not support non-major storage order"); typedef typename _TensorA::value_type value_type; using ind_t = typename _TensorA::range_type::index_type::value_type; using ord_t = typename range_traits::ordinal_type; BTAS_ASSERT(A.rank() == 3) BTAS_ASSERT(B.rank() == 2) BTAS_ASSERT(A.extent(0) == B.extent(0)) BTAS_ASSERT(A.extent(2) == B.extent(1)) if(!C.empty()){ BTAS_ASSERT(C.rank() == 2); BTAS_ASSERT(C.extent(0) == A.extent(1)) BTAS_ASSERT(C.extent(1) == A.extent(2)) } else{ C = _TensorC(A.extent(0), A.extent(1)); NumericType::fill(std::begin(C), std::end(C), NumericType::zero()); } ind_t idx1 = A.extent(0), idx2 = A.extent(1), rank = A.extent(2); ord_t i_times_rank = 0, i_times_rank_idx2 = 0; for (ind_t i = 0; i < idx1; i++, i_times_rank += rank) { const auto *B_ptr = B.data() + i_times_rank; ord_t j_times_rank = 0; for (ind_t j = 0; j < idx2; j++, j_times_rank += rank) { const auto *A_ptr = A.data() + i_times_rank_idx2 + j_times_rank; auto *C_ptr = C.data() + j_times_rank; for (ind_t r = 0; r < rank; r++) { *(C_ptr + r) += *(B_ptr + r) * *(A_ptr + r); } } i_times_rank_idx2 += j_times_rank; } } } #endif // BTAS_GENERIC_ELEMENT_WISE_CONTRACT_H BTAS-1.0.0/btas/generic/flatten.h000066400000000000000000000026511476142407000164250ustar00rootroot00000000000000#ifndef BTAS_FLATTEN_H #define BTAS_FLATTEN_H namespace btas { /// methods to produce to matricize an order-N tensor along the n-th fiber /// \param[in] A The order-N tensor one wishes to flatten. /// \param[in] mode The mode of \c A to be flattened, i.e. /// \f[ A(I_1, I_2, I_3, ..., I_{mode}, ..., I_N) -> A(I_{mode}, J)\f] /// where \f$J = I_1 * I_2 * ...I_{mode-1} * I_{mode+1} * ... * I_N.\f$ /// \return Matrix with dimension \f$(I_{mode}, J)\f$ template Tensor flatten(Tensor A, size_t mode) { using ord_t = typename range_traits::ordinal_type; using ind_t = typename Tensor::range_type::index_type::value_type; // We are going to first make the order N tensor into a order 3 tensor with // (modes before `mode`, `mode`, modes after `mode` auto dim_mode = A.extent(mode); Tensor flat(dim_mode, A.range().area() / dim_mode); size_t ndim = A.rank(); ord_t dim1 = 1, dim3 = 1; for (ind_t i = 0; i < ndim; ++i) { if (i < mode) dim1 *= A.extent(i); else if (i > mode) dim3 *= A.extent(i); } A.resize(Range{Range1{dim1}, Range1{dim_mode}, Range1{dim3}}); for (ord_t i = 0; i < dim1; ++i) { for (ind_t j = 0; j < dim_mode; ++j) { for (ord_t k = 0; k < dim3; ++k) { flat(j, i * dim3 + k) = A(i,j,k); } } } return flat; } } // namespace btas #endif // BTAS_FLATTEN_H BTAS-1.0.0/btas/generic/gemm_impl.h000066400000000000000000000503461476142407000167420ustar00rootroot00000000000000#ifndef __BTAS_GEMM_IMPL_H #define __BTAS_GEMM_IMPL_H 1 #include #include #include #include #include #include #include #include #include #include #include namespace btas { template struct gemm_impl { }; template<> struct gemm_impl { template static void call_impl ( const blas::Layout& order, const blas::Op& transA, const blas::Op& transB, const unsigned long& Msize, const unsigned long& Nsize, const unsigned long& Ksize, const _T& alpha, _IteratorA itrA, const unsigned long& LDA, _IteratorB itrB, const unsigned long& LDB, const _T& beta, _IteratorC itrC, const unsigned long& LDC, generic_impl_tag) { // For column-major order, recall this as C^T = B^T * A^T in row-major order if (order == blas::Layout::ColMajor) { gemm_impl::call(blas::Layout::RowMajor, transB, transA, Nsize, Msize, Ksize, alpha, itrB, LDB, itrA, LDA, beta, itrC, LDC); return; } if (beta == NumericType<_T>::zero()) { std::fill_n(itrC, Msize*Nsize, NumericType::value_type>::zero()); } else if (beta != NumericType<_T>::one()) { scal (Msize*Nsize, beta, itrC, 1); } // A:NoTrans / B:NoTrans if (transA == blas::Op::NoTrans && transB == blas::Op::NoTrans) { auto itrB_save = itrB; auto itrC_save = itrC; for (size_type i = 0; i < Msize; ++i) { itrB = itrB_save; for (size_type k = 0; k < Ksize; ++k, ++itrA) { itrC = itrC_save; for (size_type j = 0; j < Nsize; ++j, ++itrB, ++itrC) { (*itrC) += alpha * (*itrA) * (*itrB); } } itrC_save += Nsize; } } // A:NoTrans / B:Trans else if (transA == blas::Op::NoTrans && transB == blas::Op::Trans) { auto itrA_save = itrA; auto itrB_save = itrB; for (size_type i = 0; i < Msize; ++i) { itrB = itrB_save; for (size_type j = 0; j < Nsize; ++j, ++itrC) { itrA = itrA_save; for (size_type k = 0; k < Ksize; ++k, ++itrA, ++itrB) { (*itrC) += alpha * (*itrA) * (*itrB); } } itrA_save += Ksize; } } // A:NoTrans / B:ConjTrans else if (transA == blas::Op::NoTrans && transB == blas::Op::ConjTrans) { auto itrA_save = itrA; auto itrB_save = itrB; for (size_type i = 0; i < Msize; ++i) { itrB = itrB_save; for (size_type j = 0; j < Nsize; ++j, ++itrC) { itrA = itrA_save; for (size_type k = 0; k < Ksize; ++k, ++itrA, ++itrB) { (*itrC) += alpha * (*itrA) * impl::conj(*itrB); } } itrA_save += Ksize; } } // A:Trans / B:NoTrans else if (transA == blas::Op::Trans && transB == blas::Op::NoTrans) { auto itrB_save = itrB; auto itrC_save = itrC; for (size_type k = 0; k < Ksize; ++k) { itrC = itrC_save; for (size_type i = 0; i < Msize; ++i, ++itrA) { itrB = itrB_save; for (size_type j = 0; j < Nsize; ++j, ++itrB, ++itrC) { (*itrC) += alpha * (*itrA) * (*itrB); } } itrB_save += Nsize; } } // A:ConjTrans / B:NoTrans else if (transA == blas::Op::ConjTrans && transB == blas::Op::NoTrans) { auto itrB_save = itrB; auto itrC_save = itrC; for (size_type k = 0; k < Ksize; ++k) { itrC = itrC_save; for (size_type i = 0; i < Msize; ++i, ++itrA) { itrB = itrB_save; for (size_type j = 0; j < Nsize; ++j, ++itrB, ++itrC) { (*itrC) += alpha * impl::conj(*itrA) * (*itrB); } } itrB_save += Nsize; } } // A:Trans / B:Trans else if (transA == blas::Op::Trans && transB == blas::Op::Trans) { auto itrA_save = itrA; auto itrC_save = itrC; for (size_type j = 0; j < Nsize; ++j, ++itrC_save) { itrA = itrA_save; for (size_type k = 0; k < Ksize; ++k, ++itrB) { itrC = itrC_save; for (size_type i = 0; i < Msize; ++i, ++itrA, itrC += Nsize) { (*itrC) += alpha * (*itrA) * (*itrB); } } } } // A:Trans / B:ConjTrans else if (transA == blas::Op::Trans && transB == blas::Op::ConjTrans) { auto itrA_save = itrA; auto itrC_save = itrC; for (size_type j = 0; j < Nsize; ++j, ++itrC_save) { itrA = itrA_save; for (size_type k = 0; k < Ksize; ++k, ++itrB) { itrC = itrC_save; for (size_type i = 0; i < Msize; ++i, ++itrA, itrC += Nsize) { (*itrC) += alpha * (*itrA) * impl::conj(*itrB); } } } } // A:ConjTrans / B:Trans else if (transA == blas::Op::ConjTrans && transB == blas::Op::Trans) { auto itrA_save = itrA; auto itrC_save = itrC; for (size_type j = 0; j < Nsize; ++j, ++itrC_save) { itrA = itrA_save; for (size_type k = 0; k < Ksize; ++k, ++itrB) { itrC = itrC_save; for (size_type i = 0; i < Msize; ++i, ++itrA, itrC += Nsize) { (*itrC) += alpha * impl::conj(*itrA) * (*itrB); } } } } // A:ConjTrans / B:ConjTrans else if (transA == blas::Op::ConjTrans && transB == blas::Op::ConjTrans) { auto itrA_save = itrA; auto itrC_save = itrC; for (size_type j = 0; j < Nsize; ++j, ++itrC_save) { itrA = itrA_save; for (size_type k = 0; k < Ksize; ++k, ++itrB) { itrC = itrC_save; for (size_type i = 0; i < Msize; ++i, ++itrA, itrC += Nsize) { (*itrC) += alpha * impl::conj(*itrA) * impl::conj(*itrB); } } } } else { assert(false); } } #ifdef BTAS_HAS_BLAS_LAPACK template static void call_impl ( const blas::Layout& order, const blas::Op& transA, const blas::Op& transB, const unsigned long& Msize, const unsigned long& Nsize, const unsigned long& Ksize, const _T& alpha, _IteratorA itrA, const unsigned long& LDA, _IteratorB itrB, const unsigned long& LDB, const _T& beta, _IteratorC itrC, const unsigned long& LDC, blas_lapack_impl_tag) { static_assert(std::is_same_v,iterator_value_t<_IteratorB>> && std::is_same_v,iterator_value_t<_IteratorC>>, "mismatching iterator value types"); using T = iterator_value_t<_IteratorA>; blas::gemm( order, transA, transB, Msize, Nsize, Ksize, static_cast(alpha), static_cast(&(*itrA)), LDA, static_cast(&(*itrB)), LDB, static_cast(beta), static_cast(&(*itrC)), LDC ); } #endif template static void call ( const blas::Layout& order, const blas::Op& transA, const blas::Op& transB, const unsigned long& Msize, const unsigned long& Nsize, const unsigned long& Ksize, const _T& alpha, _IteratorA itrA, const unsigned long& LDA, _IteratorB itrB, const unsigned long& LDB, const _T& beta, _IteratorC itrC, const unsigned long& LDC) { call_impl( order, transA, transB, Msize, Nsize, Ksize, alpha, itrA, LDA, itrB, LDB, beta, itrC, LDC, blas_lapack_impl_t<_IteratorA,_IteratorB,_IteratorC>() ); } }; #if 1 template<> struct gemm_impl { template static void call ( const blas::Layout& order, const blas::Op& transA, const blas::Op& transB, const unsigned long& Msize, const unsigned long& Nsize, const unsigned long& Ksize, const _T& alpha, _IteratorA itrA, const unsigned long& LDA, _IteratorB itrB, const unsigned long& LDB, const _T& beta, _IteratorC itrC, const unsigned long& LDC) { // currently, column-major order has not yet been supported at this level assert(order == blas::Layout::RowMajor); if (beta == NumericType<_T>::zero()) { std::fill_n(itrC, Msize*Nsize, NumericType::value_type>::zero()); } else if (beta != NumericType<_T>::one()) { scal (Msize*Nsize, beta, itrC, 1); } // A:NoTrans / B:NoTrans if (transA == blas::Op::NoTrans && transB == blas::Op::NoTrans) { auto itrB_save = itrB; auto itrC_save = itrC; for (size_type i = 0; i < Msize; ++i) { itrB = itrB_save; for (size_type k = 0; k < Ksize; ++k, ++itrA) { itrC = itrC_save; for (size_type j = 0; j < Nsize; ++j, ++itrB, ++itrC) { gemm(transA, transB, alpha, *itrA, *itrB, beta, *itrC); } } itrC_save += Nsize; } } // A:NoTrans / B:Trans else if (transA == blas::Op::NoTrans && transB != blas::Op::NoTrans) { auto itrA_save = itrA; auto itrB_save = itrB; for (size_type i = 0; i < Nsize; ++i) { itrB = itrB_save; for (size_type j = 0; j < Msize; ++j, ++itrC) { itrA = itrA_save; for (size_type k = 0; k < Ksize; ++k, ++itrA, ++itrB) { gemm(transA, transB, alpha, *itrA, *itrB, beta, *itrC); } } itrA_save += Ksize; } } // A:Trans / B:NoTrans else if (transA != blas::Op::NoTrans && transB == blas::Op::NoTrans) { auto itrB_save = itrB; auto itrC_save = itrC; for (size_type k = 0; k < Ksize; ++k) { itrC = itrC_save; for (size_type i = 0; i < Msize; ++i, ++itrA) { itrB = itrB_save; for (size_type j = 0; j < Nsize; ++j, ++itrB, ++itrC) { gemm(transA, transB, alpha, *itrA, *itrB, beta, *itrC); } } itrB_save += Nsize; } } // A:Trans / B:Trans else if (transA != blas::Op::NoTrans && transB != blas::Op::NoTrans) { auto itrA_save = itrA; auto itrC_save = itrC; for (size_type j = 0; j < Nsize; ++j, ++itrC_save) { itrA = itrA_save; for (size_type k = 0; k < Ksize; ++k, ++itrB) { itrC = itrC_save; for (size_type i = 0; i < Msize; ++i, ++itrA, itrC += Nsize) { gemm(transA, transB, alpha, *itrA, *itrB, beta, *itrC); } } } } } }; #endif // ================================================================================================ /// Generic implementation of BLAS GEMM in terms of C++ iterator template void gemm ( const blas::Layout& order, const blas::Op& transA, const blas::Op& transB, const unsigned long& Msize, const unsigned long& Nsize, const unsigned long& Ksize, const _T& alpha, _IteratorA itrA, const unsigned long& LDA, _IteratorB itrB, const unsigned long& LDB, const _T& beta, _IteratorC itrC, const unsigned long& LDC) { typedef std::iterator_traits<_IteratorA> __traits_A; typedef std::iterator_traits<_IteratorB> __traits_B; typedef std::iterator_traits<_IteratorC> __traits_C; typedef typename __traits_A::value_type value_type; static_assert(std::is_same::value, "value type of B must be the same as that of A"); static_assert(std::is_same::value, "value type of C must be the same as that of A"); static_assert(std::is_same::value, "iterator A must be a random access iterator"); static_assert(std::is_same::value, "iterator B must be a random access iterator"); static_assert(std::is_same::value, "iterator C must be a random access iterator"); typename __traits_A::pointer A = &(*itrA); typename __traits_B::pointer B = &(*itrB); typename __traits_C::pointer C = &(*itrC); gemm_impl::value>::call(order, transA, transB, Msize, Nsize, Ksize, alpha, A, LDA, B, LDB, beta, C, LDC); } // ================================================================================================ /// Generic implementation of BLAS-GEMM /// \param transA transpose directive for tensor \p A (blas::Op) /// \param transB transpose directive for tensor \p B (blas::Op) /// \param alpha scalar value to be multiplied to \param A * \param B /// \param A input tensor /// \param B input tensor /// \param beta scalar value to be multiplied to \param C /// \param C output tensor which can be empty tensor but needs to have rank info template< typename _T, class _TensorA, class _TensorB, class _TensorC, class = typename std::enable_if< is_boxtensor<_TensorA>::value & is_boxtensor<_TensorB>::value & is_boxtensor<_TensorC>::value & std::is_same::value & std::is_same::value >::type > void gemm ( const blas::Op& transA, const blas::Op& transB, const _T& alpha, const _TensorA& A, const _TensorB& B, const _T& beta, _TensorC& C) { static_assert(boxtensor_storage_order<_TensorA>::value == boxtensor_storage_order<_TensorC>::value && boxtensor_storage_order<_TensorB>::value == boxtensor_storage_order<_TensorC>::value, "btas::gemm does not support mixed storage order"); static_assert(boxtensor_storage_order<_TensorC>::value != boxtensor_storage_order<_TensorC>::other, "btas::gemm does not support non-major storage order"); const blas::Layout order = boxtensor_storage_order<_TensorC>::value == boxtensor_storage_order<_TensorC>::row_major ? blas::Layout::RowMajor : blas::Layout::ColMajor; typedef unsigned long size_type; //if (A.empty() || B.empty()) return; //assert (C.rank() != 0); if (A.empty() || B.empty()) { scal(beta, C); return; } typedef typename _TensorA::value_type value_type; assert(not ((transA == blas::Op::ConjTrans || transB == blas::Op::ConjTrans) && std::is_fundamental::value)); // get contraction rank const size_type rankA = rank(A); const size_type rankB = rank(B); const size_type rankC = rank(C); const size_type K = (rankA+rankB-rankC)/2; assert((rankA+rankB-rankC) % 2 == 0); const size_type M = rankA-K; const size_type N = rankB-K; // get extents auto extentA = extent(A); auto extentB = extent(B); typename _TensorC::range_type::extent_type extentC = extent(C); // if C is empty, this gives { }, will need to allocate size_type Msize = 0; // Rows count of C size_type Nsize = 0; // Cols count of C size_type Ksize = 0; // Dims to be contracted size_type LDA = 0; // Leading dims of A size_type LDB = 0; // Leading dims of B size_type LDC = 0; // Leading dims of C Msize = (transA == blas::Op::NoTrans) ? std::accumulate(std::begin(extentA), std::begin(extentA)+M, 1ul, std::multiplies()) : std::accumulate(std::begin(extentA)+K, std::end(extentA), 1ul, std::multiplies()) ; Ksize = (transA == blas::Op::NoTrans) ? std::accumulate(std::begin(extentA)+M, std::end(extentA), 1ul, std::multiplies()) : std::accumulate(std::begin(extentA), std::begin(extentA)+K, 1ul, std::multiplies()) ; // check that contraction dimensions match auto Barea = range(B).area(); { // weak check assert(Barea % Ksize == 0); // strong checks if (transA == blas::Op::NoTrans && transB == blas::Op::NoTrans) assert(std::equal(std::begin(extentA)+M, std::end(extentA), std::begin(extentB))); if (transA == blas::Op::NoTrans && transB != blas::Op::NoTrans) assert(std::equal(std::begin(extentA)+M, std::end(extentA), std::begin(extentB)+N)); if (transA != blas::Op::NoTrans && transB == blas::Op::NoTrans) assert(std::equal(std::begin(extentA), std::begin(extentA)+K, std::begin(extentB))); if (transA != blas::Op::NoTrans && transB != blas::Op::NoTrans) assert(std::equal(std::begin(extentA), std::begin(extentA)+K, std::begin(extentB)+N)); } Nsize = Barea / Ksize; if(order == blas::Layout::RowMajor) { if(transA == blas::Op::NoTrans) LDA = Ksize; else LDA = Msize; if(transB == blas::Op::NoTrans) LDB = Nsize; else LDB = Ksize; LDC = Nsize; } else { if(transA == blas::Op::NoTrans) LDA = Msize; else LDA = Ksize; if(transB == blas::Op::NoTrans) LDB = Ksize; else LDB = Msize; LDA = Msize; LDB = Ksize; LDC = Msize; } if (C.empty()) { // C empty -> compute extentC extentC = btas::array_adaptor::construct(M+N); if (transA == blas::Op::NoTrans) for (size_type i = 0; i < M; ++i) extentC[i] = extentA[i]; else for (size_type i = 0; i < M; ++i) extentC[i] = extentA[K+i]; if (transB == blas::Op::NoTrans) for (size_type i = 0; i < N; ++i) extentC[M+i] = extentB[K+i]; else for (size_type i = 0; i < N; ++i) extentC[M+i] = extentB[i]; } else { // C not empty -> validate extentC if (transA == blas::Op::NoTrans) assert(std::equal(std::begin(extentA), std::begin(extentA)+M, std::begin(extentC))); else assert(std::equal(std::begin(extentA)+K, std::end(extentA), std::begin(extentC))); if (transB == blas::Op::NoTrans) assert(std::equal(std::begin(extentB)+K, std::end(extentB), std::begin(extentC)+M)); else assert(std::equal(std::begin(extentB), std::begin(extentB)+N, std::begin(extentC)+M)); } // resize / scale if (C.empty()) { C.resize(extentC); NumericType::fill(std::begin(C), std::end(C), NumericType::zero()); } else { assert(std::equal(std::begin(extentC), std::end(extentC), std::begin(extent(C)))); if (beta == NumericType<_T>::zero()) NumericType::fill(std::begin(C), std::end(C), NumericType::zero()); } auto itrA = std::begin(A); auto itrB = std::begin(B); auto itrC = std::begin(C); gemm (order, transA, transB, Msize, Nsize, Ksize, alpha, itrA, LDA, itrB, LDB, beta, itrC, LDC); } } // namespace btas #endif // __BTAS_GEMM_IMPL_H BTAS-1.0.0/btas/generic/gemv_impl.h000066400000000000000000000342051476142407000167470ustar00rootroot00000000000000#ifndef __BTAS_GEMV_IMPL_H #define __BTAS_GEMV_IMPL_H 1 #include #include #include #include #include #include #include #include #include #include namespace btas { template struct gemv_impl { }; template<> struct gemv_impl { /// GEMV implementation template static void call_impl ( const blas::Layout& order, const blas::Op& transA, const unsigned long& Msize, const unsigned long& Nsize, const _T& alpha, _IteratorA itrA, const unsigned long& LDA, _IteratorX itrX, const typename std::iterator_traits<_IteratorX>::difference_type& incX, const _T& beta, _IteratorY itrY, const typename std::iterator_traits<_IteratorY>::difference_type& incY, generic_impl_tag) { if (beta == NumericType<_T>::zero()) { auto itrY_tmp = itrY; for(size_t i=0; i!=(transA == blas::Op::NoTrans?Msize:Nsize); ++i, itrY_tmp+=incY) *itrY_tmp = NumericType::value_type>::zero(); } else if (beta != NumericType<_T>::one()) { if (transA == blas::Op::NoTrans) scal (Msize, beta, itrY, incY); else scal (Nsize, beta, itrY, incY); } // A:NoTrans RowMajor if (transA == blas::Op::NoTrans && order == blas::Layout::RowMajor) { auto itrX_save = itrX; for (size_type i = 0; i < Msize; ++i, ++itrY) { itrX = itrX_save; for (size_type j = 0; j < Nsize; ++j, ++itrA, ++itrX) { (*itrY) += alpha * (*itrA) * (*itrX); } } } // A:Trans RowMajor else if (transA == blas::Op::Trans && order == blas::Layout::RowMajor) { auto itrY_save = itrY; for (size_type i = 0; i < Msize; ++i, ++itrX) { itrY = itrY_save; for (size_type j = 0; j < Nsize; ++j, ++itrA, ++itrY) { (*itrY) += alpha * (*itrA) * (*itrX); } } } // A:ConjTrans RowMajor else if (transA == blas::Op::ConjTrans && order == blas::Layout::RowMajor) { auto itrY_save = itrY; for (size_type i = 0; i < Msize; ++i, ++itrX) { itrY = itrY_save; for (size_type j = 0; j < Nsize; ++j, ++itrA, ++itrY) { (*itrY) += alpha * impl::conj(*itrA) * (*itrX); } } } // A:NoTrans ColMajor else if (transA == blas::Op::NoTrans && order == blas::Layout::ColMajor) { auto itrY_save = itrY; for (size_type i = 0; i < Nsize; ++i, ++itrX) { itrY = itrY_save; for (size_type j = 0; j < Msize; ++j, ++itrA, ++itrY) { (*itrY) += alpha * (*itrA) * (*itrX); } } } // A:Trans ColMajor else if (transA == blas::Op::Trans && order == blas::Layout::ColMajor) { auto itrX_save = itrX; for (size_type i = 0; i < Nsize; ++i, ++itrY) { itrX = itrX_save; for (size_type j = 0; j < Msize; ++j, ++itrA, ++itrX) { (*itrY) += alpha * (*itrA) * (*itrX); } } } // A:ConjTrans ColMajor else if (transA == blas::Op::ConjTrans && order == blas::Layout::ColMajor) { auto itrX_save = itrX; for (size_type i = 0; i < Nsize; ++i, ++itrY) { itrX = itrX_save; for (size_type j = 0; j < Msize; ++j, ++itrA, ++itrX) { (*itrY) += alpha * impl::conj(*itrA) * (*itrX); } } } } #ifdef BTAS_HAS_BLAS_LAPACK template static void call_impl ( const blas::Layout& order, const blas::Op& transA, const unsigned long& Msize, const unsigned long& Nsize, const _T& alpha, _IteratorA itrA, const unsigned long& LDA, _IteratorX itrX, const typename std::iterator_traits<_IteratorX>::difference_type& incX, const _T& beta, _IteratorY itrY, const typename std::iterator_traits<_IteratorY>::difference_type& incY, blas_lapack_impl_tag) { static_assert(std::is_same_v,iterator_value_t<_IteratorY>> && std::is_same_v,iterator_value_t<_IteratorA>>, "mismatching iterator value types"); using T = iterator_value_t<_IteratorX>; blas::gemv( order, transA, Msize, Nsize, static_cast(alpha), static_cast(&(*itrA)), LDA, static_cast(&(*itrX)), incX, static_cast(beta), static_cast< T*>(&(*itrY)), incY ); } #endif template static void call ( const blas::Layout& order, const blas::Op& transA, const unsigned long& Msize, const unsigned long& Nsize, const _T& alpha, _IteratorA itrA, const unsigned long& LDA, _IteratorX itrX, const typename std::iterator_traits<_IteratorX>::difference_type& incX, const _T& beta, _IteratorY itrY, const typename std::iterator_traits<_IteratorY>::difference_type& incY ) { call_impl( order, transA, Msize, Nsize, alpha, itrA, LDA, itrX, incX, beta, itrY, incY, blas_lapack_impl_t<_IteratorA, _IteratorX, _IteratorY>() ); } }; template<> struct gemv_impl { /// GEMV implementation template static void call ( const blas::Layout& order, const blas::Op& transA, const unsigned long& Msize, const unsigned long& Nsize, const _T& alpha, _IteratorA itrA, const unsigned long& LDA, _IteratorX itrX, const typename std::iterator_traits<_IteratorX>::difference_type& incX, const _T& beta, _IteratorY itrY, const typename std::iterator_traits<_IteratorY>::difference_type& incY) { // A:NoTrans RowMajor if (transA == blas::Op::NoTrans && order == blas::Layout::RowMajor) { auto itrX_save = itrX; for (size_type i = 0; i < Msize; ++i, ++itrY) { itrX = itrX_save; for (size_type j = 0; j < Nsize; ++j, ++itrA, ++itrX) { gemv(order, transA, alpha, *itrA, *itrX, beta, *itrY); } } } // A:Trans RowMajor else if (transA != blas::Op::NoTrans && order == blas::Layout::RowMajor) { auto itrY_save = itrY; for (size_type i = 0; i < Msize; ++i, ++itrX) { itrY = itrY_save; for (size_type j = 0; j < Nsize; ++j, ++itrA, ++itrY) { gemv(order, transA, alpha, *itrA, *itrX, beta, *itrY); } } } // A:NoTrans ColMajor else if (transA == blas::Op::NoTrans && order == blas::Layout::ColMajor) { auto itrY_save = itrY; for (size_type i = 0; i < Nsize; ++i, ++itrX) { itrY = itrY_save; for (size_type j = 0; j < Msize; ++j, ++itrA, ++itrY) { gemv(order, transA, alpha, *itrA, *itrX, beta, *itrY); } } } // A:Trans ColMajor else if (transA != blas::Op::NoTrans && order == blas::Layout::ColMajor) { auto itrX_save = itrX; for (size_type i = 0; i < Nsize; ++i, ++itrY) { itrX = itrX_save; for (size_type j = 0; j < Msize; ++j, ++itrA, ++itrX) { gemv(order, transA, alpha, *itrA, *itrX, beta, *itrY); } } } } }; // ================================================================================================ /// Generic implementation of BLAS GEMV in terms of C++ iterator template void gemv ( const blas::Layout& order, const blas::Op& transA, const unsigned long& Msize, const unsigned long& Nsize, const _T& alpha, _IteratorA itrA, const unsigned long& LDA, _IteratorX itrX, const typename std::iterator_traits<_IteratorX>::difference_type& incX, const _T& beta, _IteratorY itrY, const typename std::iterator_traits<_IteratorY>::difference_type& incY) { typedef std::iterator_traits<_IteratorA> __traits_A; typedef std::iterator_traits<_IteratorX> __traits_X; typedef std::iterator_traits<_IteratorY> __traits_Y; typedef typename __traits_A::value_type value_type; static_assert(std::is_same::value, "value type of X must be the same as that of A"); static_assert(std::is_same::value, "value type of Y must be the same as that of A"); static_assert(std::is_same::value, "iterator A must be a random access iterator"); static_assert(std::is_same::value, "iterator X must be a random access iterator"); static_assert(std::is_same::value, "iterator Y must be a random access iterator"); typename __traits_A::pointer A = &(*itrA); typename __traits_X::pointer X = &(*itrX); typename __traits_Y::pointer Y = &(*itrY); gemv_impl::value>::call(order, transA, Msize, Nsize, alpha, A, LDA, X, incX, beta, Y, incY); //gemv_impl::value>::call(order, transA, Msize, Nsize, alpha, itrA, LDA, itrX, incX, beta, itrY, incY); } // ================================================================================================ /// Generic interface of BLAS-GEMV /// \param transA transpose directive for tensor A (blas::Op::NoTrans, blas::Op::Trans, blas::Op::ConjTrans) /// \param alpha scalar value to be multiplied to A * X /// \param A input tensor /// \param X input tensor /// \param beta scalar value to be multiplied to Y /// \param Y output tensor which can be empty tensor but needs to have rank info (= size of shape). /// Iterator is assumed to be consecutive (or, random_access_iterator) , thus e.g. iterator to map doesn't work. template< typename _T, class _TensorA, class _TensorX, class _TensorY, class = typename std::enable_if< is_boxtensor<_TensorA>::value & is_boxtensor<_TensorX>::value & is_boxtensor<_TensorY>::value >::type > void gemv ( const blas::Op& transA, const _T& alpha, const _TensorA& A, const _TensorX& X, const _T& beta, _TensorY& Y) { static_assert(boxtensor_storage_order<_TensorA>::value == boxtensor_storage_order<_TensorY>::value && boxtensor_storage_order<_TensorX>::value == boxtensor_storage_order<_TensorY>::value, "btas::gemv does not support mixed storage order"); static_assert(boxtensor_storage_order<_TensorY>::value != boxtensor_storage_order<_TensorY>::other, "btas::gemv does not support non-major storage order"); const blas::Layout order = boxtensor_storage_order<_TensorY>::value == boxtensor_storage_order<_TensorY>::row_major ? blas::Layout::RowMajor : blas::Layout::ColMajor; if (A.empty() || X.empty()) { scal(beta, Y); return; } assert(not ((transA == blas::Op::ConjTrans ) && std::is_fundamental::value)); // get contraction rank const size_type rankX = rank(X); const size_type rankY = rank(Y); // get shapes const typename _TensorA::range_type::extent_type& extentA = extent(A); const typename _TensorX::range_type::extent_type& extentX = extent(X); typename _TensorY::range_type::extent_type extentY = extent(Y); // if Y is empty, this gives { 0,...,0 } size_type Msize = 0; // Rows count of Y size_type Nsize = 0; // Cols count of Y size_type LDA = 0; // Leading dims of A // to minimize forks by if? if (transA == blas::Op::NoTrans) { Msize = std::accumulate(std::begin(extentA), std::begin(extentA)+rankY, 1ul, std::multiplies()); Nsize = std::accumulate(std::begin(extentA)+rankY, std::end(extentA), 1ul, std::multiplies()); for (size_type i = 0; i < rankY; ++i) extentY[i] = extentA[i]; assert(std::equal(std::begin(extentA)+rankY, std::end(extentA), std::begin(extentX))); } else { Msize = std::accumulate(std::begin(extentA), std::begin(extentA)+rankX, 1ul, std::multiplies()); Nsize = std::accumulate(std::begin(extentA)+rankX, std::end(extentA), 1ul, std::multiplies()); for (size_type i = 0; i < rankY; ++i) extentY[i] = extentA[i+rankX]; assert(std::equal(std::begin(extentA), std::begin(extentA)+rankX, std::begin(extentX))); } // LDA = std::accumulate(std::begin(extentA)+rankY, std::end(extentA), 1ul, std::multiplies()); if(order == blas::Layout::RowMajor) { LDA = Nsize; } else { LDA = Msize; } // resize / scale if (Y.empty()) { typedef typename _TensorY::value_type value_type; Y.resize(extentY); NumericType::fill(std::begin(Y), std::end(Y), NumericType::zero()); } else { assert(std::equal(std::begin(extentY), std::end(extentY), std::begin(extent(Y)))); } auto itrA = std::begin(A); auto itrX = std::begin(X); auto itrY = std::begin(Y); gemv (order, transA, Msize, Nsize, alpha, itrA, LDA, itrX, 1, beta, itrY, 1); } } // namespace btas #endif // __BTAS_GEMV_IMPL_H BTAS-1.0.0/btas/generic/ger_impl.h000066400000000000000000000226621476142407000165720ustar00rootroot00000000000000#ifndef __BTAS_GER_IMPL_H #define __BTAS_GER_IMPL_H 1 #include #include #include #include #include #include #include #include #include #include #include namespace btas { template struct ger_impl { }; template<> struct ger_impl { /// Performs GER operation template static void call_impl ( const blas::Layout& order, const unsigned long& Msize, const unsigned long& Nsize, const _T& alpha, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, _IteratorY itrY, const iterator_difference_t<_IteratorY>& incY, _IteratorA itrA, const unsigned long& LDA, generic_impl_tag) { // RowMajor if (order == blas::Layout::RowMajor) { auto itrY_save = itrY; for (size_type i = 0; i < Msize; ++i, ++itrX) { itrY = itrY_save; for (size_type j = 0; j < Nsize; ++j, ++itrY, ++itrA) { (*itrA) += alpha * (*itrX) * (*itrY); } } } // A: ColMajor else { auto itrX_save = itrX; for (size_type i = 0; i < Nsize; ++i, ++itrY) { itrX = itrX_save; for (size_type j = 0; j < Msize; ++j, ++itrX, ++itrA) { (*itrA) += alpha * (*itrX) * (*itrY); } } } } #ifdef BTAS_HAS_BLAS_LAPACK template static void call_impl ( const blas::Layout& order, const unsigned long& Msize, const unsigned long& Nsize, const _T& alpha, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, _IteratorY itrY, const iterator_difference_t<_IteratorY>& incY, _IteratorA itrA, const unsigned long& LDA, blas_lapack_impl_tag) { static_assert(std::is_same_v,iterator_value_t<_IteratorY>> && std::is_same_v,iterator_value_t<_IteratorA>>, "mismatching iterator value types"); using T = iterator_value_t<_IteratorX>; blas::geru( order, Msize, Nsize, static_cast(alpha), static_cast(&(*itrX)), incX, static_cast(&(*itrY)), incY, static_cast< T*>(&*(itrA)), LDA ); } #endif template static void call ( const blas::Layout& order, const unsigned long& Msize, const unsigned long& Nsize, const _T& alpha, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, _IteratorY itrY, const iterator_difference_t<_IteratorY>& incY, _IteratorA itrA, const unsigned long& LDA) { call_impl( order, Msize, Nsize, alpha, itrX, incX, itrY, incY, itrA, LDA, blas_lapack_impl_t<_IteratorX, _IteratorY, _IteratorA>() ); } }; template<> struct ger_impl { /// GER implementation template static void call ( const blas::Layout& order, const unsigned long& Msize, const unsigned long& Nsize, const _T& alpha, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, _IteratorY itrY, const iterator_difference_t<_IteratorY>& incY, _IteratorA itrA, const unsigned long& LDA) { // RowMajor if (order == blas::Layout::RowMajor) { auto itrY_save = itrY; for (size_type i = 0; i < Msize; ++i, ++itrX) { itrY = itrY_save; for (size_type j = 0; j < Nsize; ++j, ++itrY, ++itrA) { ger(order, alpha, *itrX, *itrY, *itrA); } } } // A: ColMajor else { auto itrX_save = itrX; for (size_type i = 0; i < Nsize; ++i, ++itrY) { itrX = itrX_save; for (size_type j = 0; j < Msize; ++j, ++itrX, ++itrA) { ger(order, alpha, *itrX, *itrY, *itrA); } } } } }; // ================================================================================================ /// Generic implementation of BLAS GER in terms of C++ iterator template void ger ( const blas::Layout& order, const unsigned long& Msize, const unsigned long& Nsize, const _T& alpha, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, _IteratorY itrY, const iterator_difference_t<_IteratorY>& incY, _IteratorA itrA, const unsigned long& LDA) { typedef std::iterator_traits<_IteratorX> __traits_X; typedef std::iterator_traits<_IteratorY> __traits_Y; typedef std::iterator_traits<_IteratorA> __traits_A; typedef typename __traits_A::value_type value_type; static_assert(std::is_same::value, "value type of X must be the same as that of A"); static_assert(std::is_same::value, "value type of Y must be the same as that of A"); static_assert(std::is_same::value, "iterator X must be a random access iterator"); static_assert(std::is_same::value, "iterator Y must be a random access iterator"); static_assert(std::is_same::value, "iterator A must be a random access iterator"); typename __traits_X::pointer X = &(*itrX); typename __traits_Y::pointer Y = &(*itrY); typename __traits_A::pointer A = &(*itrA); ger_impl::value>::call(order, Msize, Nsize, alpha, X, incX, Y, incY, A, LDA); } // ================================================================================================ /// Generic implementation of operation (generalization of BLAS GER operation) /// \param alpha scalar value to be multiplied to A * X * Y /// \param X input tensor /// \param Y input tensor /// \param A output tensor which can be empty tensor but needs to have rank info (= size of shape). /// Iterator is assumed to be consecutive (or, random_access_iterator) , thus e.g. iterator to map doesn't work. template< typename _T, class _TensorX, class _TensorY, class _TensorA, class = typename std::enable_if< is_boxtensor<_TensorX>::value & is_boxtensor<_TensorY>::value & is_boxtensor<_TensorA>::value >::type > void ger ( const _T& alpha, const _TensorX& X, const _TensorY& Y, _TensorA& A) { static_assert(boxtensor_storage_order<_TensorX>::value == boxtensor_storage_order<_TensorA>::value && boxtensor_storage_order<_TensorY>::value == boxtensor_storage_order<_TensorA>::value, "btas::ger does not support mixed storage order"); static_assert(boxtensor_storage_order<_TensorY>::value != boxtensor_storage_order<_TensorY>::other, "btas::ger does not support non-major storage order"); const blas::Layout order = boxtensor_storage_order<_TensorA>::value == boxtensor_storage_order<_TensorA>::row_major ? blas::Layout::RowMajor : blas::Layout::ColMajor; if (X.empty() || Y.empty()) { return; } // get contraction rank const size_type rankX = rank(X); const size_type rankY = rank(Y); // get shapes const typename _TensorX::range_type::extent_type& extentX = extent(X); const typename _TensorY::range_type::extent_type& extentY = extent(Y); typename _TensorA::range_type::extent_type extentA; if (!A.empty()) extentA = extent(A); else { array_adaptor::resize(extentA, rank(extentX) + rank(extentY)); std::copy(std::begin(extentX), std::end(extentX), std::begin(extentA)); std::copy(std::begin(extentY), std::end(extentY), std::begin(extentA) + rank(extentX)); } size_type Msize = std::accumulate(std::begin(extentX), std::end(extentX), 1ul, std::multiplies()); size_type Nsize = std::accumulate(std::begin(extentY), std::end(extentY), 1ul, std::multiplies()); size_type LDA = (order == blas::Layout::RowMajor) ? Nsize : Msize; std::copy_n(std::begin(extentX), rankX, std::begin(extentA)); std::copy_n(std::begin(extentY), rankY, std::begin(extentA)+rankX); // resize / scale if (A.empty()) { typedef typename _TensorA::value_type value_type; A.resize(extentA); NumericType::fill(std::begin(A), std::end(A), NumericType::zero()); } else { assert(std::equal(std::begin(extentA), std::end(extentA), std::begin(extent(A)))); } auto itrX = std::begin(X); auto itrY = std::begin(Y); auto itrA = std::begin(A); ger (order, Msize, Nsize, alpha, itrX, 1, itrY, 1, itrA, LDA); } } // namespace btas #endif // __BTAS_GER_IMPL_H BTAS-1.0.0/btas/generic/gesvd_impl.h000066400000000000000000000232701476142407000171210ustar00rootroot00000000000000#ifndef __BTAS_GESVD_IMPL_H #define __BTAS_GESVD_IMPL_H 1 #include #include #include #include #include #include #include #include #include #include #include #include namespace btas { template struct gesvd_impl { }; template<> struct gesvd_impl { /// GESVD implementation template static void call_impl ( const blas::Layout& order, lapack::Job jobu, lapack::Job jobvt, const unsigned long& Msize, const unsigned long& Nsize, _IteratorA itrA, const unsigned long& LDA, _IteratorS itrS, _IteratorU itrU, const unsigned long& LDU, _IteratorVt itrVt, const unsigned long& LDVt, generic_impl_tag) { BTAS_EXCEPTION("GESVD Does not have a Generic Implementation"); } #ifdef BTAS_HAS_BLAS_LAPACK template static void call_impl ( const blas::Layout& order, lapack::Job jobu, lapack::Job jobvt, const unsigned long& Msize, const unsigned long& Nsize, _IteratorA itrA, const unsigned long& LDA, _IteratorS itrS, _IteratorU itrU, const unsigned long& LDU, _IteratorVt itrVt, const unsigned long& LDVt, blas_lapack_impl_tag) { using value_type = typename std::iterator_traits<_IteratorA>::value_type; using real_type = real_type_t; const bool needU = jobu != lapack::Job::NoVec; const bool needVt = jobvt != lapack::Job::NoVec; const bool inplaceU = jobu == lapack::Job::OverwriteVec; const bool inplaceVt = jobvt == lapack::Job::OverwriteVec; if( inplaceU and inplaceVt ) BTAS_EXCEPTION("SVD cannot return both vectors inplace"); value_type dummy; value_type* A = static_cast(&(*itrA)); value_type* U = (needU and not inplaceU) ? static_cast(&(*itrU)) : &dummy; value_type* Vt = (needVt and not inplaceVt) ? static_cast(&(*itrVt)) : &dummy; real_type* S = static_cast (&(*itrS)); auto info = gesvd( order, jobu, jobvt, Msize, Nsize, A, LDA, S, U, LDU, Vt, LDVt ); if( info ) BTAS_EXCEPTION("SVD Failed"); } #endif template static void call ( const blas::Layout& order, lapack::Job jobu, lapack::Job jobvt, const unsigned long& Msize, const unsigned long& Nsize, _IteratorA itrA, const unsigned long& LDA, _IteratorS itrS, _IteratorU itrU, const unsigned long& LDU, _IteratorVt itrVt, const unsigned long& LDVt ) { call_impl( order, jobu, jobvt, Msize, Nsize, itrA, LDA, itrS, itrU, LDU, itrVt, LDVt, blas_lapack_impl_t<_IteratorA,_IteratorS,_IteratorU,_IteratorVt>() ); } }; template<> struct gesvd_impl { /// GESVD implementation template static void call ( const blas::Layout& order, lapack::Job jobu, lapack::Job jobvt, const unsigned long& Msize, const unsigned long& Nsize, _IteratorA itrA, const unsigned long& LDA, _IteratorS itrS, _IteratorU itrU, const unsigned long& LDU, _IteratorVt itrVt, const unsigned long& LDVt) { assert(false); // gesvd_impl for a generic iterator type has not yet been implemented. } }; // ================================================================================================ /// Generic implementation of BLAS GESVD in terms of C++ iterator template void gesvd ( const blas::Layout& order, lapack::Job jobu, lapack::Job jobvt, const unsigned long& Msize, const unsigned long& Nsize, _IteratorA itrA, const unsigned long& LDA, _IteratorS itrS, _IteratorU itrU, const unsigned long& LDU, _IteratorVt itrVt, const unsigned long& LDVt) { typedef std::iterator_traits<_IteratorA> __traits_A; typedef std::iterator_traits<_IteratorS> __traits_S; typedef std::iterator_traits<_IteratorU> __traits_U; typedef std::iterator_traits<_IteratorVt> __traits_Vt; typedef typename __traits_A::value_type value_type; static_assert(std::is_same::value, "value type of U must be the same as that of A"); static_assert(std::is_same::value, "value type of Vt must be the same as that of A"); static_assert(std::is_same::value, "iterator A must be a random access iterator"); static_assert(std::is_same::value, "iterator S must be a random access iterator"); static_assert(std::is_same::value, "iterator U must be a random access iterator"); static_assert(std::is_same::value, "iterator Vt must be a random access iterator"); typename __traits_A::pointer A = &(*itrA); typename __traits_S::pointer S = &(*itrS); typename __traits_U::pointer U = &(*itrU); typename __traits_Vt::pointer Vt = &(*itrVt); gesvd_impl::call(order, jobu, jobvt, Msize, Nsize, A, LDA, S, U, LDU, Vt, LDVt); } // ================================================================================================ /// Generic interface of BLAS-GESVD /// \param order storage order of tensor in matrix view (blas::Layout) /// \param transA transpose directive for tensor \p A (blas::Op) /// \param alpha scalar value to be multiplied to A * X /// \param[in,out] A on input the tensor to be SVDed, contents overwritten on output /// \param X input tensor /// \param beta scalar value to be multiplied to Y /// \param Y output tensor which can be empty tensor but needs to have rank info (= size of shape). /// Iterator is assumed to be consecutive (or, random_access_iterator) , thus e.g. iterator to map doesn't work. template< class _TensorA, class _VectorS, class _TensorU, class _TensorVt, class = typename std::enable_if< is_boxtensor<_TensorA>::value & is_boxtensor<_TensorU>::value & is_boxtensor<_TensorVt>::value >::type > void gesvd ( lapack::Job jobu, lapack::Job jobvt, _TensorA& A, _VectorS& S, _TensorU& U, _TensorVt& Vt) { static_assert(boxtensor_storage_order<_TensorA>::value == boxtensor_storage_order<_TensorU>::value && boxtensor_storage_order<_TensorA>::value == boxtensor_storage_order<_TensorVt>::value, "btas::gesvd does not support mixed storage order"); static_assert(boxtensor_storage_order<_TensorA>::value != boxtensor_storage_order<_TensorA>::other, "btas::gesvd does not support non-major storage order"); const blas::Layout order = boxtensor_storage_order<_TensorA>::value == boxtensor_storage_order<_TensorA>::row_major ? blas::Layout::RowMajor : blas::Layout::ColMajor; assert(!A.empty()); const size_type rankA = rank(A); const size_type rankU = rank(U); const size_type rankVt = rank(Vt); assert(rankA == (rankU+rankVt-2)); // get shapes const typename _TensorA::range_type::extent_type& extentA = extent(A); typename _VectorS::range_type::extent_type extentS = extent(S); // if S is empty, this gives { 0,...,0 } typename _TensorU::range_type::extent_type extentU = extent(U); // if U is empty, this gives { 0,...,0 } typename _TensorVt::range_type::extent_type extentVt = extent(Vt); // if Vt is empty, this gives { 0,...,0 } size_type Msize = 0; // Rows count of Y size_type Nsize = 0; // Cols count of Y size_type LDA = 0; // Leading dims of A size_type LDU = 0; // Leading dims of U size_type LDVt = 0; // Leading dims of Vt Msize = std::accumulate(std::begin(extentA), std::begin(extentA)+rankU-1, 1ul, std::multiplies()); Nsize = std::accumulate(std::begin(extentA)+rankU-1, std::end(extentA), 1ul, std::multiplies()); size_type Ksize = std::min(Msize,Nsize); size_type Ucols = (jobu == lapack::Job::AllVec) ? Msize : Ksize; size_type Vtrows = (jobvt == lapack::Job::AllVec) ? Nsize : Ksize; extentS[0] = Ksize; for (size_type i = 0; i < rankU-1; ++i) extentU[i] = extentA[i]; extentU[rankU-1] = Ucols; extentVt[0] = Vtrows; for (size_type i = 1; i < rankVt; ++i) extentVt[i] = extentA[i+rankU-2]; if(order == blas::Layout::RowMajor) { LDA = Nsize; LDU = Ucols; LDVt = Nsize; } else { LDA = Msize; LDU = Msize; LDVt = Vtrows; } S.resize(extentS); U.resize(extentU); Vt.resize(extentVt); auto itrA = std::begin(A); auto itrS = std::begin(S); auto itrU = std::begin(U); auto itrVt = std::begin(Vt); gesvd (order, jobu, jobvt, Msize, Nsize, itrA, LDA, itrS, itrU, LDU, itrVt, LDVt); } } // namespace btas #endif // __BTAS_GESVD_IMPL_H BTAS-1.0.0/btas/generic/khatri_rao_product.h000066400000000000000000000032301476142407000206450ustar00rootroot00000000000000#ifndef BTAS_KRP_H #define BTAS_KRP_H namespace btas { /// The khatri-rao product is an outer product of column vectors of \param A /// and \param B, the product is then ordered to make a super column in a new matrix /// The dimension of this product is \f[ A(N, M) \cdot B(K, M) = AB(N*K , M)\f ] /// \param[in] A Matrix of size (N, M) /// \param[in] B Matrix of size (K, M) /// \param[in, out] AB In: Matrix of any size. Out: Matrix of size (N*K, /// M) template void khatri_rao_product(const Tensor &A, const Tensor &B, Tensor &AB) { using ind_t = typename Tensor::range_type::index_type::value_type; using ord_t = typename range_traits::ordinal_type; // Make sure the tensors are matrices if (A.rank() != 2 || B.rank() != 2) BTAS_EXCEPTION("A.rank() > 2 || B.rank() > 2, Matrices required"); // Resize the product AB.resize( Range{Range1{A.extent(0) * B.extent(0)}, Range1{A.extent(1)}}); // Calculate Khatri-Rao product by multiplying rows of A by rows of B. ind_t A_row = A.extent(0); ind_t B_row = B.extent(0); ind_t KRP_dim = A.extent(1); ord_t i_times_krp = 0, i_times_brow_krp = 0; for (ind_t i = 0; i < A_row; ++i, i_times_krp += KRP_dim) { const auto *A_ptr = A.data() + i_times_krp; ord_t j_times_KRP = 0; for (ind_t j = 0; j < B_row; ++j, j_times_KRP += KRP_dim) { const auto *B_ptr = B.data() + j_times_KRP; auto *AB_ptr = AB.data() + i_times_brow_krp + j_times_KRP; for (ind_t k = 0; k < KRP_dim; ++k) { *(AB_ptr + k) = *(A_ptr + k) * *(B_ptr + k); } } i_times_brow_krp += j_times_KRP; } } } // namespace btas #endif // BTAS_KRP_H BTAS-1.0.0/btas/generic/lapack_extensions.h000066400000000000000000000141221476142407000204760ustar00rootroot00000000000000// // Created by David Williams-Young on 12/2/20. // #ifndef BTAS_LAPACK_EXTENSIONS_H #define BTAS_LAPACK_EXTENSIONS_H 1 #ifdef BTAS_HAS_BLAS_LAPACK #include // BLASPP #include // LAPACKPP #include // Transpose #include namespace btas { template > int64_t getrf( blas::Layout order, int64_t M, int64_t N, T* A, int64_t LDA, int64_t* IPIV, Alloc alloc = Alloc() ) { //std::cout << "IN GETRF IMPL" << std::endl; if( order == blas::Layout::ColMajor ) { return lapack::getrf( M, N, A, LDA, IPIV ); } else { // Transpose input auto* A_transpose = alloc.allocate(M*N); transpose( N, M, A, LDA, A_transpose, M ); // A -> LU auto info = lapack::getrf( M, N, A_transpose, M, IPIV ); // Transpose output + cleanup if(!info) transpose( M, N, A_transpose, M, A, LDA ); alloc.deallocate( A_transpose, M*N ); return info; } } template , typename IntAlloc = std::allocator > int64_t gesv( blas::Layout order, int64_t N, int64_t NRHS, T* A, int64_t LDA, T* B, int64_t LDB, Alloc alloc = Alloc(), IntAlloc int_alloc = IntAlloc() ) { //std::cout << "IN GESV IMPL" << std::endl; // Allocate IPIV auto* IPIV = int_alloc.allocate(N); auto* A_use = A; int64_t LDA_use = LDA; // If row major, transpose input and redirect pointers if( order == blas::Layout::RowMajor ) { A_use = alloc.allocate( N*N ); LDA_use = N; transpose( N, N, A, LDA, A_use, N ); } auto info = lapack::gesv( N, NRHS, A, LDA, IPIV, B, LDB ); int_alloc.deallocate( IPIV, N ); // If row major, transpose output + cleanup if( order == blas::Layout::RowMajor ) { if(!info) transpose( N, N, A_use, N, A, LDA ); alloc.deallocate( A_use, N*N ); } return info; } template int64_t gesvd( blas::Layout order, lapack::Job jobu, lapack::Job jobvt, int64_t M, int64_t N, T* A, int64_t LDA, real_type_t* S, T* U, int64_t LDU, T* VT, int64_t LDVT ) { //std::cout << "IN GESVD IMPL" << std::endl; // Col major, no changes if( order == blas::Layout::ColMajor ) { return lapack::gesvd( jobu, jobvt, M, N, A, LDA, S, U, LDU, VT, LDVT ); // Row major, swap M <-> N and U <-> VT } else { return lapack::gesvd( jobvt, jobu, N, M, A, LDA, S, VT, LDVT, U, LDU ); } } template > int64_t householder_qr_genq( blas::Layout order, int64_t M, int64_t N, T* A, int64_t LDA, Alloc alloc = Alloc() ) { //std::cout << "IN QR IMPL" << std::endl; // Allocate temp storage for TAU factors const int64_t K = std::min(M,N); auto* TAU = alloc.allocate( K ); int64_t info; auto* A_use = A; int64_t LDA_use = LDA; // If row major, transpose input and redirect pointers if( order == blas::Layout::RowMajor ) { A_use = alloc.allocate( M*N ); LDA_use = M; transpose( N, M, A, LDA, A_use, M ); } // Generate QR factors in El reflector form info = lapack::geqrf( M, N, A_use, LDA_use, TAU ); if( !info ) { // Generate Q from reflectors // Real -> XORGQR if constexpr ( not is_complex_type_v ) info = lapack::orgqr( M, N, K, A_use, LDA_use, TAU ); // Complex -> XUNGQR else info = lapack::ungqr( M, N, K, A_use, LDA_use, TAU ); } // If row major, transpose output + cleanup if( order == blas::Layout::RowMajor ) { if(!info) transpose( M, N, A_use, M, A, LDA ); alloc.deallocate( A_use, M*N ); } // Cleanup Tau alloc.deallocate( TAU, K ); return info; } template , typename IntAlloc = std::allocator > int64_t lu_inverse( blas::Layout order, int64_t N, T* A, int64_t LDA, Alloc alloc = Alloc(), IntAlloc int_alloc = IntAlloc() ) { //std::cout << "IN LU INV IMPL" << std::endl; auto* A_use = A; int64_t LDA_use = LDA; // If row major, transpose input and redirect pointers if( order == blas::Layout::RowMajor ) { A_use = alloc.allocate( N*N ); LDA_use = N; transpose( N, N, A, LDA, A_use, N ); } // Allocate Pivot int64_t* IPIV = int_alloc.allocate( N ); // A -> LU int64_t info = lapack::getrf( N, N, A_use, LDA_use, IPIV ); // Generate inverse if( !info ) { info = lapack::getri( N, A_use, LDA_use, IPIV ); } // If row major + sucessful, transpose output + cleanup if( order == blas::Layout::RowMajor ) { if(!info) transpose( N, N, A_use, N, A, LDA ); alloc.deallocate( A_use, N*N ); } // Cleanup Pivot int_alloc.deallocate( IPIV, N ); return info; } template > int64_t hereig( blas::Layout order, lapack::Job jobz, lapack::Uplo uplo, int64_t N, T* A, int64_t LDA, real_type_t* W, Alloc alloc = Alloc() ) { //std::cout << "IN HEREIG IMPL" << std::endl; // If row major, Swap uplo if( order == blas::Layout::RowMajor ) { if( uplo == lapack::Uplo::Lower ) uplo = lapack::Uplo::Upper; else uplo = lapack::Uplo::Lower; } int64_t info; // Complex -> XHEEV if constexpr (is_complex_type_v) info = lapack::heev( jobz, uplo, N, A, LDA, W ); else info = lapack::syev( jobz, uplo, N, A, LDA, W ); // If row major + sucessful + vectors wanted, transpose output if( !info and order == blas::Layout::RowMajor and jobz == lapack::Job::Vec ) { // Allocate scratch space auto* A_t = alloc.allocate(N*N); transpose( N, N, A, LDA, A_t, N ); // If complex, conjugate (A**T = CONJ(A)) if constexpr ( is_complex_type_v ) { for( int64_t i = 0; i < N*N; ++i ) A_t[i] = std::conj(A_t[i]); } // Copy back to output vars for( int64_t i = 0; i < N; ++i ) for( int64_t j = 0; j < N; ++j ) A[i*LDA + j] = A_t[i*N + j]; // Free scratch alloc.deallocate( A_t, N*N ); } return info; } } #endif // BLAS_LAPACK #endif BTAS-1.0.0/btas/generic/linear_algebra.h000066400000000000000000000222001476142407000177070ustar00rootroot00000000000000// // Created by Karl Pierce on 1/26/20. // #ifndef BTAS_LINEAR_ALGEBRA_H #define BTAS_LINEAR_ALGEBRA_H #include #include namespace btas{ /// Computes L of the LU decomposition of tensor \c A /// \param[in, out] A In: A reference matrix to be LU decomposed. Out: /// The L of an LU decomposition of \c A. template void LU_decomp(Tensor &A) { #ifndef BTAS_HAS_BLAS_LAPACK BTAS_EXCEPTION("LU_decomp required BLAS/LAPACK bindings to be enabled: -DBTAS_USE_BLAS_LAPACK=ON"); #else using ind_t = typename Tensor::range_type::index_type::value_type; using ord_t = typename range_traits::ordinal_type; if (A.rank() > 2) { BTAS_EXCEPTION("Tensor rank > 2. Can only invert matrices."); } Tensor L(A.range()); Tensor P(A.extent(0), A.extent(0)); P.fill(0.0); L.fill(0.0); btas::Tensor piv(std::min(A.extent(0), A.extent(1))); auto info = getrf( blas::Layout::RowMajor, A.extent(0), A.extent(1), A.data(), A.extent(1), piv.data() ); if( info < 0) { BTAS_EXCEPTION("LU_decomp: GETRF had an illegal arg"); } // This means that part of the LU is singular which may cause a problem in // ones QR decomposition but LU can be computed fine. if (info != 0) { } // indexing the pivot matrix for (auto &j : piv) j -= 1; ind_t pivsize = piv.extent(0); piv.resize(Range{Range1{A.extent(0)}}); // Walk through the full pivot array and // put the correct index values throughout for (ind_t i = 0; i < piv.extent(0); i++) { if (i == piv(i) || i >= pivsize) { for (ind_t j = 0; j < i; j++) { if (i == piv(j)) { piv(i) = j; break; } } } if (i >= pivsize) { piv(i) = i; for (ind_t j = 0; j < i; j++) if (i == piv(j)) { piv(i) = j; break; } } } // generating the pivot matrix from the correct indices found above for (ind_t i = 0; i < piv.extent(0); i++) P(piv(i), i) = 1; // Use the output of LAPACK to make a lower triangular matrix, L // TODO Make this more efficient using pointer arithmetic for (ord_t i = 0; i < L.extent(0); i++) { for (ord_t j = 0; j < i && j < L.extent(1); j++) { L(i, j) = A(i, j); } if (i < L.extent(1)) L(i, i) = 1; } // contracting the pivoting matrix with L to put in correct order gemm(blas::Op::NoTrans, blas::Op::NoTrans, 1.0, P, L, 0.0, A); #endif } /// Computes the QR decomposition of matrix \c A /// \param[in, out] A In: A Reference matrix to be QR decomposed. Out: /// The Q of a QR decomposition of \c A. /// \return bool true if QR was successful false if failed. template bool QR_decomp(Tensor &A) { #ifndef BTAS_HAS_BLAS_LAPACK BTAS_EXCEPTION("QR_decomp required BLAS/LAPACK bindings to be enabled: -DBTAS_USE_BLAS_LAPACK=ON"); #else using ind_t = typename Tensor::range_type::index_type::value_type; if (A.rank() > 2) { BTAS_EXCEPTION("Tensor rank > 2. Can only QR decompose matrices."); } return !householder_qr_genq( blas::Layout::RowMajor, A.extent(0), A.extent(1), A.data(), A.extent(1) ); #endif } /// Computes the inverse of a matrix \c A using a pivoted LU decomposition /// \param[in, out] A In: A reference matrix to be inverted. Out: /// The inverse of A, computed using LU decomposition. /// \return bool true if inversion was successful false if failed template bool Inverse_Matrix(Tensor & A){ #ifndef BTAS_HAS_BLAS_LAPACK BTAS_EXCEPTION("INVERSE_MATRIX required BLAS/LAPACK bindings to be enabled: -DBTAS_USE_BLAS_LAPACK=ON"); #else if(A.rank() > 2){ BTAS_EXCEPTION("Tensor rank > 2. Can only invert matrices."); } if( A.extent(0) != A.extent(1) ) { BTAS_EXCEPTION("Can only invert square matrices."); } return !lu_inverse( blas::Layout::RowMajor, A.extent(0), A.data(), A.extent(0) ); #endif } /// Computes the eigenvalue decomposition of a matrix \c A and /// \param[in, out] A In: A reference matrix to be decomposed. Out: /// The eigenvectors of the matrix \c A. /// \param[in, out] lambda In: An empty vector with length greater than /// or equal to the largest mode of \c A. Out: The eigenvalues of the /// matrix \c A template void eigenvalue_decomp(Tensor& A, RealTensor& lambda) { #ifndef BTAS_HAS_BLAS_LAPACK BTAS_EXCEPTION("eigenvalue_decomp required BLAS/LAPACK bindings to be enabled: -DBTAS_USE_BLAS_LAPACK=ON"); #else using ind_t = typename Tensor::range_type::index_type::value_type; using ord_t = typename range_traits::ordinal_type; if (A.rank() > 2) { BTAS_EXCEPTION("Tensor rank > 2. Tensor A must be a matrix."); } ord_t lambda_length = lambda.size(); ind_t smallest_mode_A = (A.extent(0) < A.extent(1) ? A.extent(0) : A.extent(1)); if (lambda_length < smallest_mode_A) { lambda = RealTensor(smallest_mode_A); } auto info = hereig( blas::Layout::RowMajor, lapack::Job::Vec, lapack::Uplo::Upper, smallest_mode_A, A.data(), smallest_mode_A, lambda.data() ); if (info) BTAS_EXCEPTION("Error in computing the Eigenvalue decomposition"); #endif } /// Solving Ax = B using a Cholesky decomposition /// \param[in, out] A In: The right-hand side of the linear equation /// to be inverted using Cholesky. Out: /// the factors L and U from the factorization A = P*L*U; /// the unit diagonal elements of L are not stored. /// \param[in, out] B In: The left-hand side of the linear equation /// out: The solution x = A^{-1}B /// \return bool true if inversion was successful false if failed. template bool cholesky_inverse(Tensor & A, Tensor & B) { #ifndef BTAS_HAS_BLAS_LAPACK BTAS_EXCEPTION("cholesky_inverse required BLAS/LAPACK bindings to be enabled: -DBTAS_USE_BLAS_LAPACK=ON"); #else using ind_t = typename Tensor::range_type::index_type::value_type; // This method computes the inverse quickly for a square matrix // based on MATLAB's implementation of A / B operator. ind_t rank = B.extent(1); ind_t LDB = B.extent(0); // XXX DBWY Col Major? // Column major here because we are solving XA = B, not AX = B // But as you point out below, A is symmetric positive semi-definite so Row major should // give the same results // XXX DBWY GESV not POSV? return !gesv( blas::Layout::ColMajor, rank, LDB, A.data(), rank, B.data(), rank ); #endif } /// SVD referencing code from /// http://www.netlib.org/lapack/explore-html/de/ddd/lapacke_8h_af31b3cb47f7cc3b9f6541303a2968c9f.html /// Fast pseudo-inverse algorithm described in /// https://arxiv.org/pdf/0804.4809.pdf /// \param[in] A In: A reference to the matrix to be inverted. /// \param[in,out] fast_pI Should a faster version of the pseudoinverse be used? /// return if \c fast_pI was successful /// \return \f$ A^{\dagger} \f$ The pseudoinverse of the matrix A. template Tensor pseudoInverse(Tensor & A, bool & fast_pI) { #ifndef BTAS_HAS_BLAS_LAPACK BTAS_EXCEPTION("pseudoInverse required BLAS/LAPACK bindings to be enabled: -DBTAS_USE_BLAS_LAPACK=ON"); #else // BTAS_HAS_BLAS_LAPACK using ind_t = typename Tensor::range_type::index_type::value_type; using T = typename Tensor::value_type; using RT = real_type_t; using RTensor = rebind_tensor_t; if (A.rank() > 2) { BTAS_EXCEPTION("PseudoInverse can only be computed on a matrix"); } ind_t row = A.extent(0), col = A.extent(1); auto rank = (row < col ? row : col); if (fast_pI) { Tensor temp(col, col), inv(col, row); // compute V^{\dag} = (A^T A) ^{-1} A^T gemm(blas::Op::Trans, blas::Op::NoTrans, 1.0, A, A, 0.0, temp); fast_pI = Inverse_Matrix(temp); if (fast_pI) { gemm(blas::Op::NoTrans, blas::Op::Trans, 1.0, temp, A, 0.0, inv); return inv; } else { std::cout << "Fast pseudo-inverse failed reverting to normal pseudo-inverse" << std::endl; } } RTensor s(Range{Range1{rank}}); Tensor U(Range{Range1{row}, Range1{row}}); Tensor Vt(Range{Range1{col}, Range1{col}}); gesvd(lapack::Job::AllVec, lapack::Job::AllVec, A, s, U, Vt); // Inverse the Singular values with threshold 1e-13 = 0 double lr_thresh = 1e-13; Tensor s_inv(Range{Range1{row}, Range1{col}}); s_inv.fill(0.0); for (ind_t i = 0; i < rank; ++i) { if (abs(s(i)) > lr_thresh) s_inv(i, i) = 1.0 / s(i); else s_inv(i, i) = 0.0; } Tensor s_(Range{Range1{row}, Range1{col}}); // Compute the matrix A^-1 from the inverted singular values and the U and // V^T provided by the SVD gemm(blas::Op::NoTrans, blas::Op::NoTrans, 1.0, U, s_inv, 0.0, s_); U = Tensor(Range{Range1{row}, Range1{col}}); gemm(blas::Op::NoTrans, blas::Op::NoTrans, 1.0, s_, Vt, 0.0, U); return U; #endif } } // namespace btas #endif //BTAS_LINEAR_ALGEBRA_H BTAS-1.0.0/btas/generic/mkl_extensions.h000066400000000000000000000035731476142407000200360ustar00rootroot00000000000000#ifndef BTAS_MKL_EXTENSIONS_H #define BTAS_MKL_EXTENSIONS_H #ifdef BTAS_HAS_INTEL_MKL #define MKL_Complex16 std::complex #include #include namespace btas { template >> void imatcopy( char ORDERING, char TRANS, MKL_INT M, MKL_INT N, std::type_identity_t SCAL, T* A, MKL_INT SRC_LDA, MKL_INT DST_LDA ) { if constexpr ( std::is_same_v ) mkl_simatcopy( ORDERING, TRANS, M, N, T(SCAL), A, SRC_LDA, DST_LDA ); else if constexpr ( std::is_same_v ) mkl_dimatcopy( ORDERING, TRANS, M, N, T(SCAL), A, SRC_LDA, DST_LDA ); else if constexpr ( std::is_same_v > ) mkl_cimatcopy( ORDERING, TRANS, M, N, T(SCAL), A, SRC_LDA, DST_LDA ); else if constexpr ( std::is_same_v > ) mkl_zimatcopy( ORDERING, TRANS, M, N, T(SCAL), A, SRC_LDA, DST_LDA ); else BTAS_EXCEPTION( "Somehow made it into an unsupported IMATCOPY path" ); } template >> void omatcopy( char ORDERING, char TRANS, MKL_INT M, MKL_INT N, std::type_identity_t SCAL, const T* A, MKL_INT LDA, T* B, MKL_INT LDB ) { if constexpr ( std::is_same_v ) mkl_somatcopy( ORDERING, TRANS, M, N, T(SCAL), A, LDA, B, LDB ); else if constexpr ( std::is_same_v ) mkl_domatcopy( ORDERING, TRANS, M, N, T(SCAL), A, LDA, B, LDB ); else if constexpr ( std::is_same_v > ) mkl_comatcopy( ORDERING, TRANS, M, N, T(SCAL), A, LDA, B, LDB ); else if constexpr ( std::is_same_v > ) mkl_zomatcopy( ORDERING, TRANS, M, N, T(SCAL), A, LDA, B, LDB ); else BTAS_EXCEPTION( "Somehow made it into an unsupported OMATCOPY path" ); } } #endif #endif BTAS-1.0.0/btas/generic/numeric_type.h000066400000000000000000000112771476142407000174770ustar00rootroot00000000000000#ifndef __BTAS_NUMERIC_TYPE_H #define __BTAS_NUMERIC_TYPE_H 1 #include #include #include #include namespace btas { /// Numeric value functions template struct NumericType { static _T zero () { return _T(0); } static _T one () { return _T(1); } template static void fill(_Iterator begin, _Iterator end, _T value) { std::fill(begin, end, value); } template static void scal(_Iterator begin, _Iterator end, _T scaling_factor) { for(auto i=begin; i!=end; ++i) *i *= scaling_factor; } }; // // Specialization for each numeric value type // /// Single precision real number template <> struct NumericType { /// \return 0 constexpr static float zero () { return 0.0f; } /// \return 1 constexpr static float one () { return 1.0f; } template static void fill(_Iterator first, _Iterator last, const float& val) { static_assert(std::is_convertible::value_type>::value, "Value type is not convertible"); std::fill(first, last, val); } template static void scal(_Iterator first, _Iterator last, const float& val) { static_assert(std::is_convertible::value_type>::value, "Value type is not convertible"); while (first != last) { (*first) *= val; ++first; } } }; /// Double precision real number template <> struct NumericType { /// \return 0 constexpr static double zero () { return 0.0; } /// \return 1 constexpr static double one () { return 1.0; } template static void fill(_Iterator first, _Iterator last, const double& val) { static_assert(std::is_convertible::value_type>::value, "Value type is not convertible"); std::fill(first, last, val); } template static void scal(_Iterator first, _Iterator last, const double& val) { static_assert(std::is_convertible::value_type>::value, "Value type is not convertible"); while (first != last) { (*first) *= val; ++first; } } }; /// Single precision complex number template <> struct NumericType> { /// \return 0 const static std::complex zero () { return std::complex(0.0, 0.0); } /// \return 1 const static std::complex one () { return std::complex(1.0, 0.0); } /// \return 1i const static std::complex onei () { return std::complex(0.0, 1.0); } template static void fill(_Iterator first, _Iterator last, const std::complex& val) { static_assert(std::is_convertible, typename std::iterator_traits<_Iterator>::value_type>::value, "Value type is not convertible"); std::fill(first, last, val); } template static void scal(_Iterator first, _Iterator last, const std::complex& val) { static_assert(std::is_convertible, typename std::iterator_traits<_Iterator>::value_type>::value, "Value type is not convertible"); while (first != last) { (*first) *= val; ++first; } } }; /// Double precision complex number template <> struct NumericType> { /// \return 0 const static std::complex zero () { return std::complex(0.0, 0.0); } /// \return 1 const static std::complex one () { return std::complex(1.0, 0.0); } /// \return 1i const static std::complex onei () { return std::complex(0.0, 1.0); } template static void fill(_Iterator first, _Iterator last, const std::complex& val) { static_assert(std::is_convertible, typename std::iterator_traits<_Iterator>::value_type>::value, "Value type is not convertible"); std::fill(first, last, val); } template static void scal(_Iterator first, _Iterator last, const std::complex& val) { static_assert(std::is_convertible, typename std::iterator_traits<_Iterator>::value_type>::value, "Value type is not convertible"); while (first != last) { (*first) *= val; ++first; } } }; namespace impl { template T conj(const T& t) { return t; } template std::complex conj(const std::complex& t) { return std::conj(t); } } }; // namespace btas #endif // __BTAS_NUMERIC_TYPE_H BTAS-1.0.0/btas/generic/permute.h000066400000000000000000000105011476142407000164420ustar00rootroot00000000000000#ifndef __BTAS_PERMUTE_H #define __BTAS_PERMUTE_H 1 #include #include #include #include #include #include #include #include namespace btas { /// permute \c X using permutation \c p given in the preimage ("from") convention, write result to \c Y template::value && is_index<_Permutation>::value && is_boxtensor<_TensorY>::value >::type > void permute(const _TensorX& X, const _Permutation& p, _TensorY& Y) { const auto& r = X.range(); using range_type = std::decay_t; constexpr const bool r_is_permutable = range_traits::is_general_layout; auto do_perm = [](auto&& X, auto&& Y, auto&& pr) { Y.resize(pr); const auto itrX = std::begin(X); auto itrY = std::begin(Y); for (auto && i : Y.range()) { *itrY = *(itrX + pr.ordinal(i)); ++itrY; } }; if (r_is_permutable) do_perm(X, Y, permute(r, p)); else { do_perm(X, Y, permute(btas::Range(r.lobound(), r.upbound(), r.stride()), p)); } } /// permute \c X using permutation \c p given in the preimage ("from") convention, write result to \c Y template::value && is_boxtensor<_TensorY>::value >::type > void permute(const _TensorX& X, std::initializer_list<_T> pi, _TensorY& Y) { permute(X, btas::DEFAULT::index<_T>(pi) , Y); } /// permute \c X annotated with \c aX into \c Y annotated with \c aY /// \tparam _AnnotationX a container type /// \tparam _AnnotationY a container type template::value && is_boxtensor<_TensorY>::value && is_container<_AnnotationX>::value && is_container<_AnnotationY>::value>::type> void permute(const _TensorX& X, const _AnnotationX& aX, _TensorY& Y, const _AnnotationY& aY) { const auto Xrank = rank(X); // check rank assert(Xrank == rank(aX) && Xrank == rank(aY)); // case: doesn't need to permute if (std::equal(std::begin(aX), std::end(aX), std::begin(aY))) { Y = X; return; } { // validate aX auto aX_sorted = aX; std::sort(std::begin(aX_sorted), std::end(aX_sorted)); assert( std::unique(std::begin(aX_sorted), std::end(aX_sorted)) == std::end(aX_sorted)); // validate aY auto aY_sorted = aY; std::sort(std::begin(aY_sorted), std::end(aY_sorted)); assert( std::unique(std::begin(aY_sorted), std::end(aY_sorted)) == std::end(aY_sorted)); // and aX against aY assert(std::equal(std::begin(aX_sorted), std::end(aX_sorted), std::begin(aY_sorted))); } // calculate permutation btas::DEFAULT::index prm(Xrank); const auto first = std::begin(aX); const auto last = std::end(aX); auto aY_iter = std::begin(aY); for(size_t i = 0; i < Xrank; ++i, ++aY_iter) { auto found = std::find(std::begin(aX), std::end(aX), *aY_iter); assert(found != last); prm[i] = std::distance(first, found); } // call permute permute(X, prm, Y); } /// permute \c X annotated with \c aX into \c Y annotated with \c aY template::value && is_boxtensor<_TensorY>::value >::type > void permute(const _TensorX& X, std::initializer_list<_T> aX, _TensorY& Y, std::initializer_list<_T> aY) { permute(X, btas::DEFAULT::index<_T>(aX), Y, btas::varray<_T>(aY)); } } // namespace btas #endif // __BTAS_PERMUTE_H BTAS-1.0.0/btas/generic/rals_helper.h000066400000000000000000000032531476142407000172670ustar00rootroot00000000000000// // Created by Karl Pierce on 11/3/18. // #ifndef BTAS_RALS_HELPER_H #define BTAS_RALS_HELPER_H #include namespace btas{ /** \brief A helper function for the RALS solver Stores a previous iteration of factor matrices to compute the nth iteration step size see https://doi.org/10.1063/1.4977994 for more details **/ template class RALSHelper { public: using ind_t = typename Tensor::range_type::index_type::value_type; using ord_t = typename range_traits::ordinal_type; RALSHelper() = default; ~RALSHelper() = default; /// constructor of the helper function /// \param[in] prev an initial set of /// normalized factor matrices RALSHelper(std::vector prev) : prev_(prev) { } /// Operator to compute the nth iteration step size /// \param[in] mode which mode of the actual tensor /// is being updated /// \param[in] An the updated factor matrix double operator()(size_t mode, const Tensor &An) { ord_t size = An.size(); auto change = An - prev_[mode]; double denom = 0.0, s = 0.0; auto chg_ptr = change.data(); auto an_ptr = An.data(); for (ord_t i = 0; i < size; ++i) { auto val = *(chg_ptr + i); s += val * val; val = *(an_ptr + i); denom += val * val; } s = std::sqrt(s) / std::sqrt(denom); //double s = std::sqrt(btas::dot(change, change)); //s /= std::sqrt(dot(An, An)); prev_[mode] = An; return s; } private: std::vector prev_; // stores a set normalized of factor matrices }; } #endif //BTAS_RALS_HELPER_H BTAS-1.0.0/btas/generic/randomized.h000066400000000000000000000145141476142407000171250ustar00rootroot00000000000000#ifndef BTAS_RANDOMIZED_DECOMP_H #define BTAS_RANDOMIZED_DECOMP_H #include #include #include #include #include #include #include #include #include #include namespace btas { /// fills a Tensor or TensorView with random numbers /// \tparam Tensor_ a Tensor type (\sa btas::is_tensor) /// \param t a tensor object /// \param engine random number engine /// \param dist a distribution object template , typename = typename std::enable_if_t::value>> void fill_random(Tensor_& t, RandomEngine&& engine = RandomEngine(random_seed_accessor()), UniformRealDistribution&& dist = UniformRealDistribution(-1.0, 1.0)) { using Numeric_ = typename Tensor_::numeric_type; if constexpr (is_complex_type_v) { std::generate(t.begin(), t.end(), [&]() { auto re = dist(engine); auto im = dist(engine); return Numeric_(re, im); }); } else // real T std::generate(t.begin(), t.end(), [&]() { return dist(engine); }); } /// \param[in,out] A In: An empty matrix of size column dimension of the nth /// mode flattened tensor provided to the randomized compression method by the /// desired rank of the randmoized compression method. Out: A random matrix, /// column drawn from a random distribution and orthogonalized template > void generate_random_metric(Tensor& A, RandomEngine&& rand = RandomEngine(random_seed_accessor()), NormalDistribution&& dist = boost::random::normal_distribution<>(0., 10.)) { using ind_t = typename Tensor::range_type::index_type::value_type; using value_type = typename Tensor::value_type; boost::random::normal_distribution distribution(0.0, 10.0); for (ind_t i = 0; i < A.extent(1); i++) { value_type norm = 0.0; for (ind_t j = 0; j < A.extent(0); j++) { auto val = abs(dist(rand)); norm += val * val; A(j, i) = val; } norm = sqrt(norm); for (ind_t j = 0; j < A.extent(0); j++) { A(j, i) /= norm; } dist.reset(); } QR_decomp(A); } /// Calculates the randomized compression of tensor \c A. /// See reference /// \param[in, out] A In: An order-N tensor to be randomly decomposed. /// Out: The core tensor of random decomposition \param[in, out] transforms /// In: An empty vector. Out: The randomized decomposition factor matrices. /// \param[in] des_rank The rank of each mode of \c A after randomized /// decomposition. \param[in] oversampl Oversampling added to \c /// desired_compression_rank required to provide an optimal decomposition. /// Default = suggested = 10. \param[in] powerit Number of power iterations, as /// specified in the literature, to scale the spectrum of each mode. Default = /// suggested = 2. template void randomized_decomposition(Tensor& A, std::vector& transforms, long des_rank, size_t oversampl = 10, size_t powerit = 2) { using ind_t = typename Tensor::range_type::index_type::value_type; // Add the oversampling to the desired rank size_t ndim = A.rank(); ind_t rank = des_rank + oversampl; std::vector A_modes; for (size_t i = 0; i < ndim; ++i) { A_modes.push_back(i); } std::vector final_modes(A_modes); // Walk through all the modes of A for (size_t n = 0; n < ndim; n++) { // Flatten A auto An = flatten(A, n); // Make and fill the random matrix Gamma Tensor G(An.extent(1), rank); generate_random_metric(G); // Project The random matrix onto the flatten reference tensor Tensor Y(An.extent(0), rank); gemm(blas::Op::NoTrans, blas::Op::NoTrans, 1.0, An, G, 0.0, Y); // Start power iteration for (size_t j = 0; j < powerit; j++) { // Find L of an LU decomposition of the projected flattened tensor LU_decomp(Y); Tensor Z(An.extent(1), Y.extent(1)); // Find the L of an LU decomposition of the L above (called Y) projected // onto the flattened reference tensor gemm(blas::Op::Trans, blas::Op::NoTrans, 1.0, An, Y, 0.0, Z); LU_decomp(Z); // Project the second L from above (called Z) onto the flattened reference // tensor and start power iteration over again. Y.resize(Range{Range1{An.extent(0)}, Range1{Z.extent(1)}}); gemm(blas::Op::NoTrans, blas::Op::NoTrans, 1.0, An, Z, 0.0, Y); } // Compute the QR from Y above. If the QR is non-singular push it into // transforms and project the unitary matrix onto the reference tensor bool QR_good = true; QR_good = QR_decomp(Y); if (!QR_good) { BTAS_EXCEPTION( "QR did not complete successfully due to chosen " "dimension. Choose desired_compression_rank <= smallest " "dimension of tensor A"); } transforms.push_back(Y); } std::vector contract_modes; contract_modes.push_back(0); contract_modes.push_back(ndim); for (size_t n = 0; n < ndim; n++) { #ifdef BTAS_HAS_INTEL_MKL core_contract(A, transforms[n], n); #else std::vector final_dims; for (size_t j = 0; j < ndim; ++j) { if (j == n) { final_dims.push_back(transforms[n].extent(1)); } else { final_dims.push_back(A.extent(j)); } } contract_modes[0] = n; final_modes[n] = ndim; btas::Range final_range(final_dims); Tensor final(final_range); contract(1.0, A, A_modes, transforms[n], contract_modes, 0.0, final, final_modes); final_modes[n] = n; A = final; #endif // BTAS_HAS_INTEL_MKL } } } // namespace btas #endif // BTAS_RANDOMIZED_DECOMP_H BTAS-1.0.0/btas/generic/reconstruct.h000066400000000000000000000044041476142407000173410ustar00rootroot00000000000000// // Created by Karl Pierce on 4/12/19. // #ifndef BTAS_GENERIC_RECONSTRUCT_H #define BTAS_GENERIC_RECONSTRUCT_H #include namespace btas { template Tensor reconstruct(std::vector &A, std::vector dims_order, Tensor lambda = Tensor()) { using ind_t = typename Tensor::range_type::index_type::value_type; using ord_t = typename range_traits::ordinal_type; if (A.size() - 1 != dims_order.size() && lambda.empty()) { BTAS_EXCEPTION("A.size() - 1 != dims_order.size(), please verify that you have correctly assigned the " "order of dimension reconstruction"); } std::vector dimensions; // ndim is the size of dim_orders, this allows you // to resize a subtensor in the factors set A size_t ndim = dims_order.size(); for (size_t i = 0; i < ndim; i++) { dimensions.push_back(A[dims_order[i]].extent(0)); } ind_t rank = A[0].extent(1); lambda = (lambda.empty() ? A[ndim] : lambda); auto lam_ptr = lambda.data(); for (ind_t i = 0; i < rank; i++) { scal(A[dims_order[0]].extent(0), *(lam_ptr + i), std::begin(A[dims_order[0]]) + i, rank); } // Make the Khatri-Rao product of all the factor matrices execpt the last dimension Tensor KRP = A[dims_order[0]]; Tensor hold = A[dims_order[0]]; for (size_t i = 1; i < ndim - 1; i++) { khatri_rao_product(KRP, A[dims_order[i]], hold); KRP = hold; } // contract the rank dimension of the Khatri-Rao product with the rank dimension of // the last factor matrix. hold is now the reconstructed tensor hold = Tensor(KRP.extent(0), A[dims_order[ndim - 1]].extent(0)); gemm(blas::Op::NoTrans, blas::Op::Trans, 1.0, KRP, A[dims_order[ndim - 1]], 0.0, hold); // resize the reconstructed tensor to the correct dimensions hold.resize(dimensions); // remove the scaling applied to the first factor matrix // if the value of lambda is very small, don't invert it. for (ind_t i = 0; i < rank; i++) { auto val = *(lam_ptr + i); scal(A[dims_order[0]].extent(0), (abs(val) > 1e-12 ? 1.0/val : 1.0), std::begin(A[dims_order[0]]) + i, rank); } return hold; } } #endif //BTAS_GENERIC_RECONSTRUCT_H BTAS-1.0.0/btas/generic/scal_impl.h000066400000000000000000000066131476142407000167350ustar00rootroot00000000000000#ifndef __BTAS_SCAL_IMPL_H #define __BTAS_SCAL_IMPL_H 1 #include #include #include #include #include #include #include #include #include namespace btas { // ================================================================================================ /// Call BLAS depending on type of Tensor class template struct scal_impl { }; /// Case that alpha is trivially multipliable to elements template<> struct scal_impl { template static void call_impl ( const unsigned long& Nsize, const _T& alpha, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, generic_impl_tag) { for (unsigned long i = 0; i < Nsize; ++i, itrX += incX) { (*itrX) *= alpha; } } #ifdef BTAS_HAS_BLAS_LAPACK template static void call_impl ( const unsigned long& Nsize, const _T& alpha, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX, blas_lapack_impl_tag) { using T = iterator_value_t<_IteratorX>; blas::scal( Nsize, static_cast(alpha), static_cast(&(*itrX)), incX ); } #endif template static void call ( const unsigned long& Nsize, const _T& alpha, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX ) { call_impl( Nsize, alpha, itrX, incX, blas_lapack_impl_t<_IteratorX>() ); } }; /// Case that alpha is multiplied recursively by SCAL /// Note that incX is disabled for recursive call template<> struct scal_impl { template static void call ( const unsigned long& Nsize, const _T& alpha, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX) { for (unsigned long i = 0; i < Nsize; ++i, itrX += incX) { scal(alpha, *itrX); } } }; // ================================================================================================ /// Generic implementation of BLAS SCAL in terms of C++ iterator template void scal ( const unsigned long& Nsize, const _T& alpha, _IteratorX itrX, const iterator_difference_t<_IteratorX>& incX) { typedef std::iterator_traits<_IteratorX> __traits_X; static_assert(is_random_access_iterator_v<_IteratorX>, "iterator X must be a random access iterator"); typedef typename __traits_X::value_type __value_X; typedef typename std::conditional::value, __value_X, _T>::type __alpha; scal_impl::value>::call(Nsize, static_cast<__alpha>(alpha), itrX, incX); } // ================================================================================================ /// Convenient wrapper to call BLAS SCAL from tensor objects template< typename _T, class _TensorX, class = typename std::enable_if< is_boxtensor<_TensorX>::value >::type > void scal ( const _T& alpha, _TensorX& X) { if (X.empty()) { return; } auto itrX = std::begin(X); scal (X.size(), alpha, itrX, 1); } } // namespace btas #endif // __BTAS_SCAL_IMPL_H BTAS-1.0.0/btas/generic/swap.h000066400000000000000000000133671476142407000157500ustar00rootroot00000000000000#ifndef BTAS_SWAP_H #define BTAS_SWAP_H #ifdef BTAS_HAS_INTEL_MKL #include #include #include #include #include //***IMPORTANT***// // do not use swap to first then use swap to back // swap to first preserves order while swap to back does not // If you use swap to first, to undo the transpositions // make is_in_front = true and same goes for swap to back // do not mix swap to first and swap to back namespace btas { /// Swaps the nth mode of an Nth order tensor to the front preserving the /// order of the other modes. \n /// swap_to_first(A, I3, false, false) = /// A(I1, I2, I3, I4, I5) --> A(I3, I1, I2, I4, I5) /// \param[in, out] A In: An order-N tensor. Out: the order-N tensor with mode \c mode permuted. /// \param[in] mode The mode of \c A one wishes to permute to the front. /// \param[in] is_in_front \c Mode of \c A has already been permuted to the /// front. Default = false. \param[in] for_ALS_update Different indexing is /// required for the ALS, if making a general swap this should be false. template void swap_to_first(Tensor &A, size_t mode, bool is_in_front = false, bool for_ALS_update = true) { using ind_t = typename Tensor::range_type::index_type::value_type; using ord_t = typename range_traits::ordinal_type; using dtype = typename Tensor::numeric_type; auto ndim = A.rank(); // If the mode of interest is the the first mode you are done. if (mode > ndim) { BTAS_EXCEPTION("Mode index is greater than tensor rank"); } if (mode == 0) return; // Build the resize vector for reference tensor to update dimensions std::vector aug_dims; dtype one {1.0}; ord_t size = A.range().area(); for (size_t i = 0; i < ndim; i++) { aug_dims.push_back(A.extent(i)); } // Special indexing for ALS update if (for_ALS_update) { auto temp = aug_dims[0]; aug_dims[0] = aug_dims[mode]; aug_dims[mode] = temp; } // Order preserving swap of indices. else { auto temp = (is_in_front) ? aug_dims[0] : aug_dims[mode]; auto erase = (is_in_front) ? aug_dims.begin() : aug_dims.begin() + mode; auto begin = (is_in_front) ? aug_dims.begin() + mode : aug_dims.begin(); aug_dims.erase(erase); aug_dims.insert(begin, temp); } ord_t rows = 1; ord_t cols = 1; ind_t step = 1; // The last mode is an easier swap, make all dimensions before last, row // dimension Last dimension is column dimension, then permute. if (mode == ndim - 1) { rows = (is_in_front) ? A.extent(0) : size / A.extent(mode); cols = (is_in_front) ? size / A.extent(0) : A.extent(mode); dtype *data_ptr = A.data(); imatcopy('R', 'T', rows, cols, one, data_ptr, cols, rows); } // All other dimension not so easy all indices up to mode of interest row // dimension all othrs column dimension, then swap. After swapping, there are // row dimension many smaller tensors of size column dimension do row // dimension many swaps with inner row dimension = between the outer row // dimension and the last dimension and inner col dimension = last dimension, // now the mode of interest. Swapping the rows and columns back at the end // will preserve order of the dimensions. else { for (size_t i = 0; i <= mode; i++) rows *= A.extent(i); cols = size / rows; dtype *data_ptr = A.data(); imatcopy('R', 'T', rows, cols, one, data_ptr, cols, rows); step = rows; ind_t in_rows = (is_in_front) ? A.extent(0) : rows / A.extent(mode); ind_t in_cols = (is_in_front) ? rows / A.extent(0) : A.extent(mode); for (ind_t i = 0; i < cols; i++) { data_ptr = A.data() + i * step; imatcopy('R', 'T', in_rows, in_cols, one, data_ptr, in_cols, in_rows); } data_ptr = A.data(); imatcopy('R', 'T', cols, rows, one, data_ptr, rows, cols); } A.resize(aug_dims); } /// Swaps the nth order of an Nth order tensor to the end. /// Does not preserve order.\n /// swap_to_back(T, I2, false) = /// T(I1, I2, I3) --> T(I3, I1, I2) /// \param[in, out] A In: An order-N tensor. Out: the order-N tensor with mode \c mode permuted. /// \param[in] mode The mode of \c A one wishes to permute to the back. /// \param[in] is_in_back \c Mode of \c A has already been permuted to the /// back. Default = false. template void swap_to_back(Tensor &A, size_t mode, bool is_in_back = false) { using ind_t = typename Tensor::range_type::index_type::value_type; using ord_t = typename range_traits::ordinal_type; using dtype = typename Tensor::numeric_type; dtype one {1.0}; auto ndim = A.rank(); if (mode > ndim) BTAS_EXCEPTION_MESSAGE(__FILE__, __LINE__, "mode > A.rank(), mode out of range"); if (mode == ndim - 1) return; ord_t rows = 1; ord_t cols = 1; // counts the modes up to and including the mode of interest, these are stored // in rows counts all the modes beyond the mode of interest, these are stored // as columns auto midpoint = (is_in_back) ? ndim - 1 - mode : mode + 1; std::vector aug_dims; for (size_t i = midpoint; i < ndim; i++) { aug_dims.push_back(A.extent(i)); cols *= A.extent(i); } for (size_t i = 0; i < midpoint; i++) { aug_dims.push_back(A.extent(i)); rows *= A.extent(i); } // Permutes the rows and columns double *data_ptr = A.data(); imatcopy('R', 'T', rows, cols, one , data_ptr, cols, rows); // resized to the new correct order. A.resize(aug_dims); return; } } // namespace btas #endif //BTAS_HAS_INTEL_MKL #endif // BTAS_SWAP_H BTAS-1.0.0/btas/generic/tensor_iterator_wrapper.h000066400000000000000000000025571476142407000217600ustar00rootroot00000000000000#ifndef __BTAS_TENSOR_ITERATOR_WRAPPER_H #define __BTAS_TENSOR_ITERATOR_WRAPPER_H 1 #include #include namespace btas { template struct tensor_iterator_wrapper { template static auto begin(_Tensor& x) -> decltype(x.begin()) { return x.begin(); } template static auto end (_Tensor& x) -> decltype(x.end ()) { return x.end (); } }; template<> struct tensor_iterator_wrapper { template static auto begin(_Tensor& x) -> decltype(x.data()) { return x.data(); } template static auto end (_Tensor& x) -> decltype(x.data()) { return x.data()+x.size(); } }; /// \return wrapped iterator of Tensor to the first template::value>::type> auto tbegin (_Tensor& x) -> decltype(tensor_iterator_wrapper::value>::begin(x)) { return tensor_iterator_wrapper::value>::begin(x); } /// \return wrapped iterator of Tensor to the last template::value>::type> auto tend (_Tensor& x) -> decltype(tensor_iterator_wrapper::value>::end(x)) { return tensor_iterator_wrapper::value>::end(x); } } // namespace btas #endif // __BTAS_TENSOR_ITERATOR_WRAPPER_H BTAS-1.0.0/btas/generic/transpose.h000066400000000000000000000007751476142407000170130ustar00rootroot00000000000000#ifndef __BTAS_TRANSPOSE_H #define __BTAS_TRANSPOSE_H 1 #include #include namespace btas { template void transpose( int64_t M, int64_t N, const T* A, int64_t LDA, T* B, int64_t LDB ) { #ifdef BTAS_HAS_INTEL_MKL T one {1.0}; omatcopy('C', 'T', M, N, one, A, LDA, B, LDB ); #else for( int64_t j = 0; j < N; ++j ) for( int64_t i = 0; i < M; ++i ) { B[j + i*LDB] = A[i + j*LDA]; } #endif } } #endif BTAS-1.0.0/btas/generic/tuck_cp_als.h000066400000000000000000000547631476142407000172720ustar00rootroot00000000000000// // Created by Karl Pierce on 2/10/22. // #ifndef BTAS_GENERIC_TUCK_COMP_CP_ALS_IPP #define BTAS_GENERIC_TUCK_COMP_CP_ALS_IPP #include #include #include #include #include #include namespace btas{ /** \brief Computes the Canonical Product (CP) decomposition of an order-N tensor which has been transformed via HOSVD (or some other defined transformation) using alternating least squares (ALS). This computes the CP decomposition of btas::Tensor objects with row major storage only with fixed (compile-time) and variable (run-time) ranks Does not support strided ranges. \warning this code takes a non-const reference \c tensor_ref and does not modify the values. This is a result of API (reshape needs non-const tensor) Synopsis: \code // Constructors TUCKER_CP_ALS A(tensor) // TUCKER_CP_ALS object with empty factor // matrices and empty transformation matrices // Operations A.compute_rank(rank, converge_test) // Computes the CP of a tensor to // rank \c rank by either building the rank or using HOSVD. A.compute_rank_random(rank, converge_test) // Computes the CP of tensor to // rank \c rank. Factor matrices built at \c rank // with random numbers A.compute_error(converge_test, omega) // Computes the CP_ALS of tensor to // 2-norm // error < omega by building the rank (HOSVD option available) A.compute_geometric(rank, converge_test, step) // Computes CP of tensor to // rank with // geometric steps of step between // guesses. A.compute_PALS(converge_test) // computes CP_ALS of tensor to // rank = 3 * max_dim(tensor) // in 4 panels using a modified // HOSVD initial guess //See documentation for full range of options // Accessing Factor Matrices A.get_factor_matrices() // Returns a vector of factor matrices, if // they have been computed A.reconstruct() // Returns the tensor computed using the // CP factor matrices \endcode */ template > class TUCKER_CP_ALS : public CP_ALS { protected: using CP_ALS::tensor_ref; using typename CP_ALS::ind_t; using typename CP_ALS::ord_t; using CP_ALS::size; using CP::A; using CP::ndim; using CP::AtA; using T = typename Tensor::numeric_type; using RT = real_type_t; using RTensor = rebind_tensor_t; public: /// Create a Tucker compressed CP ALS object /// that stores but does not modify the reference tensor \c tensor. /// Unless some other transformation is defined, computes the /// Tucker decomposition truncating singular vectors with singular values /// less than |tensor| * \c epsilon_tucker /// \param[in] tensor the reference tensor to be decomposed. /// \param[in] epsilon_tucker truncation parameter for tucker decomposition TUCKER_CP_ALS(Tensor & tensor, double epsilon_tucker = 1e-3) :CP_ALS(tensor), tcut_tucker(epsilon_tucker){ } /// Set the tensor transformation /// require that the factors be dimension (modified_size, orig_size) /// also assume that since you have the transormations, the reference is already /// transformed /// \param[in] facs : set of tucker factor matrices /// \param[in] reference_is_core : was the reference tensor provided to TUCKER_CP_ALS a core tensor /// if no compute the core tensor. void set_tucker_factors(std::vector facs, bool reference_is_core = true){ BTAS_ASSERT(facs.size() == this->ndim) size_t num = 0; tucker_factors.reserve(ndim); // because the reference is transformed we need the untransformed as tensor_ref for(auto i : facs){ BTAS_ASSERT(i.rank() == 2) BTAS_ASSERT(i.extent((reference_is_core ? 0 : 1)) == tensor_ref.extent(num)) tucker_factors.emplace_back(i); ++num; } core_tensor = tensor_ref; if(reference_is_core) { tref_is_core = reference_is_core; } else{ transform_tucker(true, core_tensor, tucker_factors); } //if(transform_core) //transform_tucker(false, tensor_ref, tucker_factors); } /// Function to get the computed tucker factors. std::vector get_tucker_factors(){ if(tucker_factors.empty()) BTAS_EXCEPTION("Tucker factors have not been computed"); return tucker_factors; } protected: std::vector tucker_factors, transformed_A; Tensor core_tensor; double tcut_tucker; size_t core_size; bool tref_is_core = false; /// computes the CP ALS of the tensor \c tensor using the core tensor \c core_tensor /// stops when converge_test is satisfies. Stores the exact CP factors in A /// stores the transformed CP factors in transformed_A /// only one solver so dir isn't used, just an artifact of base class. /// \param[in] rank current rank of the decomposotion /// \param[in] converge_test ALS satisfactory condition checker. /// \param[in] dir not used in this function /// \param[in] max_als maximum number of ALS iterations /// \param[in] calculate_epsilon should epsilon be returned, disregarded if ConvClass = FitCheck /// \param[in, out] epsilon in: a double value is disregarded. /// out: if ConvClass = FitCheck || \c calculate_epsilon the 2-norm tensor /// error of the CP approximation else not modified. /// \param[in] fast_pI Should ALS use a faster version of pseudoinverse? void ALS(ind_t rank, ConvClass &converge_test, bool dir, int max_als, bool calculate_epsilon, double &epsilon, bool &fast_pI) override{ { // If no tucker factors if (tucker_factors.empty()) { core_tensor = tensor_ref; sequential_tucker(core_tensor, tcut_tucker, tucker_factors); //make_tucker_factors(tensor_ref, tcut_tucker, tucker_factors, false); //core_tensor = tensor_ref; //transform_tucker(true, core_tensor, tucker_factors); } if (AtA.empty()) { AtA = std::vector(ndim); transformed_A = std::vector(ndim); } auto ptr_A = A.begin(), ptr_T = tucker_factors.begin(), ptr_AtA = AtA.begin(), ptr_tran = transformed_A.begin(); if (!tref_is_core) { // tensor_ref is not the core tensor so the dimensions of the // reference factors need to be scaled by the tucker factors. for (size_t i = 0; i < ndim; ++i, ++ptr_A, ++ptr_T, ++ptr_AtA, ++ptr_tran) { *ptr_AtA = Tensor(); contract(this->one, *ptr_A, {1, 2}, btas::impl::conj(*ptr_A), {1, 3}, this->zero, *ptr_AtA, {2, 3}); Tensor trans; *ptr_tran = Tensor(); contract(this->one, *ptr_T, {1, 2}, btas::impl::conj(*ptr_A), {2, 3}, this->zero, *ptr_tran, {1, 3}); } } else { // if tensor_ref is the core tensor then the factors in A need to be taken // back into the correct (non tucker) subspace. for (size_t i = 0; i < ndim; ++i, ++ptr_A, ++ptr_T, ++ptr_AtA, ++ptr_tran) { Tensor trans; *ptr_tran = *ptr_A; *ptr_A = Tensor(); contract(this->one, *ptr_T, {2, 1}, btas::impl::conj(*ptr_tran), {2, 3}, this->zero, *ptr_A, {1, 3}); *ptr_AtA = Tensor(); contract(this->one, *ptr_A, {1, 2}, btas::impl::conj(*ptr_A), {1, 3}, this->zero, *ptr_AtA, {2, 3}); } } } core_size = core_tensor.size(); size_t count = 0; bool is_converged = false; bool matlab = fast_pI; // Until either the initial guess is converged or it runs out of iterations // update the factor matrices with or without Khatri-Rao product // intermediate do{ count++; this->num_ALS++; for (size_t i = 0; i < ndim; i++) { core_ALS_solver(i, rank, fast_pI, matlab, converge_test); auto &ai = A[i]; contract(this->one, ai, {1, 2}, ai.conj(), {1, 3}, this->zero, AtA[i], {2, 3}); contract(this->one, tucker_factors[i], {1, 2}, ai.conj(), {2, 3}, this->zero, transformed_A[i], {1, 3}); } is_converged = converge_test(A, AtA); }while (count < max_als && !is_converged); detail::get_fit(converge_test, epsilon, (this->num_ALS == max_als)); epsilon = 1.0 - epsilon; // Checks loss function if required if (calculate_epsilon && epsilon == 2) { epsilon = this->norm(this->reconstruct() - tensor_ref); } } /// This is solver for ALS, it computes the optimal factor assuming all others fixed. /// Does not compute khatri-rao product, instead uses the same algorithm as base classes /// direct algorithm. /// \param[in] n Current mode being optimized /// \param[in] rank rank of the decomposition /// \param[in] fast_pI Should ALS use a faster version of pseudoinverse? /// \param[in, out] matlab in: if cholesky failes use fast pseudoinverse? out: did fast pseudoinverse fail? /// \param[in, out] converge_test in: important to set matricized tensor times khatri rao (MttKRP) if using FitCheck /// otherwise not used. out: \c converge_test with MttKRP set. /// \param[in] lambda regularization parameter. void core_ALS_solver(size_t n, ind_t rank, bool &fast_pI, bool &matlab, ConvClass &converge_test, double lambda = 0.0) { // Determine if n is the last mode, if it is first contract with first mode // and transpose the product bool last_dim = n == ndim - 1; // product of all dimensions ord_t LH_size = core_size; size_t contract_dim = last_dim ? 0 : ndim - 1; ind_t offset_dim = core_tensor.extent(n); ind_t pseudo_rank = rank; // Store the dimensions which are available to hadamard contract std::vector dimensions; for (size_t i = last_dim ? 1 : 0; i < (last_dim ? ndim : ndim - 1); i++) { dimensions.push_back(core_tensor.extent(i)); } // Modifying the dimension of tensor_ref so store the range here to resize Range R = core_tensor.range(); //Tensor an(A[n].range()); // Resize the tensor which will store the product of tensor_ref and the first factor matrix Tensor An = Tensor(LH_size / core_tensor.extent(contract_dim), rank); core_tensor.resize( Range{Range1{last_dim ? core_tensor.extent(contract_dim) : LH_size / core_tensor.extent(contract_dim)}, Range1{last_dim ? LH_size / core_tensor.extent(contract_dim) : core_tensor.extent(contract_dim)}}); // contract tensor ref and the first factor matrix gemm((last_dim ? blas::Op::Trans : blas::Op::NoTrans), blas::Op::NoTrans, 1.0, core_tensor, transformed_A[contract_dim], 0.0, An); // Resize tensor_ref core_tensor.resize(R); // Remove the dimension which was just contracted out LH_size /= core_tensor.extent(contract_dim); // n tells which dimension not to contract, and contract_dim says which dimension I am trying to contract. // If n == contract_dim then that mode is skipped. // if n == ndim - 1, my contract_dim = 0. The gemm transposes to make rank = ndim - 1, so I // move the pointer that preserves the last dimension to n = ndim -2. // In all cases I want to walk through the orders in tensor_ref backward so contract_dim = ndim - 2 n = last_dim ? ndim - 2 : n; contract_dim = ndim - 2; while (contract_dim > 0) { // Now temp is three index object where temp has size // (size of tensor_ref/product of dimension contracted, dimension to be // contracted, rank) ord_t idx2 = dimensions[contract_dim], idx1 = LH_size / idx2; An.resize( Range{Range1{idx1}, Range1{idx2}, Range1{pseudo_rank}}); Tensor contract_tensor; //Tensor contract_tensor(Range{Range1{idx1}, Range1{pseudo_rank}}); //contract_tensor.fill(0.0); const auto &a = transformed_A[(last_dim ? contract_dim + 1 : contract_dim)]; // If the middle dimension is the mode not being contracted, I will move // it to the right hand side temp((size of tensor_ref/product of // dimension contracted, rank * mode n dimension) if (n == contract_dim) { pseudo_rank *= offset_dim; } // If the code hasn't hit the mode of interest yet, it will contract // over the middle dimension and sum over the rank. else if (contract_dim > n) { middle_contract(this->one, An, a, this->zero, contract_tensor); An = contract_tensor; } // If the code has passed the mode of interest, it will contract over // the middle dimension and sum over rank * mode n dimension else { middle_contract_with_pseudorank(this->one, An, a, this->zero, contract_tensor); An = contract_tensor; } LH_size /= idx2; contract_dim--; } n = last_dim ? n+1 : n; // If the mode of interest is the 0th mode, then the while loop above // contracts over all other dimensions and resulting An is of the // correct dimension If the mode of interest isn't 0th mode, must contract // out the 0th mode here, the above algorithm can't perform this // contraction because the mode of interest is coupled with the rank if (n != 0) { ind_t idx1 = dimensions[0]; An.resize(Range{Range1{idx1}, Range1{offset_dim}, Range1{rank}}); Tensor contract_tensor(Range{Range1{offset_dim}, Range1{rank}}); contract_tensor.fill(0.0); const auto &a = transformed_A[(last_dim ? 1 : 0)]; front_contract(this->one, An, a, this->zero, contract_tensor); An = contract_tensor; } // Add lambda to factor matrices if RALS if(lambda !=0){ auto LamA = A[n]; scal(lambda, LamA); An += LamA; } // before providing the Matricized tensor times khatri rao product // need to reverse tucker transformation of that mode. { Tensor temp; contract(this->one, tucker_factors[n], {1, 2}, An, {1, 3}, this->zero, temp, {2, 3}); An = temp; } // multiply resulting matrix An by pseudoinverse to calculate optimized // factor matrix detail::set_MtKRP(converge_test, An); // Temp is then rewritten with unnormalized new A[n] matrix this->pseudoinverse_helper(n, fast_pI, matlab, An); // Normalize the columns of the new factor matrix and update this->normCol(An); A[n] = An; } }; /** \brief Computes the Canonical Product (CP) decomposition of an order-N tensor which has been transformed via HOSVD (or some other defined transformation) using regularized alternating least squares (RALS). This computes the CP decomposition of btas::Tensor objects with row major storage only with fixed (compile-time) and variable (run-time) ranks Does not support strided ranges. \warning this code takes a non-const reference \c tensor_ref and does not modify the values. This is a result of API (reshape needs non-const tensor) Synopsis: \code // Constructors TUCKER_CP_RALS A(tensor) // TUCKER_CP_RALS object with empty factor // matrices and empty transformation matrices // Operations A.compute_rank(rank, converge_test) // Computes the CP of a tensor to // rank \c rank by either building the rank or using HOSVD. A.compute_rank_random(rank, converge_test) // Computes the CP of tensor to // rank \c rank. Factor matrices built at \c rank // with random numbers A.compute_error(converge_test, omega) // Computes the CP_RALS of tensor to // 2-norm // error < omega by building the rank (HOSVD option available) A.compute_geometric(rank, converge_test, step) // Computes CP of tensor to // rank with // geometric steps of step between // guesses. A.compute_PALS(converge_test) // computes CP_RALS of tensor to // rank = 3 * max_dim(tensor) // in 4 panels using a modified // HOSVD initial guess //See documentation for full range of options // Accessing Factor Matrices A.get_factor_matrices() // Returns a vector of factor matrices, if // they have been computed A.reconstruct() // Returns the tensor computed using the // CP factor matrices \endcode */ template > class TUCKER_CP_RALS : public TUCKER_CP_ALS{ protected: using CP_ALS::tensor_ref; using typename CP_ALS::ind_t; using typename CP_ALS::ord_t; using CP_ALS::size; using CP::A; using CP::ndim; using CP::AtA; using TUCKER_CP_ALS ::core_tensor; using TUCKER_CP_ALS ::tucker_factors; using TUCKER_CP_ALS ::transformed_A; public: /// Create a Tucker compressed CP ALS object /// that stores but does not modify the reference tensor \c tensor. /// Unless some other transformation is defined, computes the /// Tucker decomposition truncating singular vectors with singular values /// less than |tensor| * \c epsilon_tucker /// \param[in] tensor the reference tensor to be decomposed. /// \param[in] epsilon_tucker truncation parameter for tucker decomposition TUCKER_CP_RALS(Tensor & tensor, double epsilon_tucker) :TUCKER_CP_ALS(tensor, epsilon_tucker){ } protected: RALSHelper helper; // Helper object to compute regularized steps /// computes the CP ALS of the tensor \c tensor using the core tensor \c core_tensor /// stops when converge_test is satisfies. Stores the exact CP factors in A /// stores the transformed CP factors in transformed_A /// only one solver so dir isn't used, just an artifact of base class. /// \param[in] rank current rank of the decomposotion /// \param[in] converge_test ALS satisfactory condition checker. /// \param[in] dir not used in this function /// \param[in] max_als maximum number of ALS iterations /// \param[in] calculate_epsilon should epsilon be returned, disregarded if ConvClass = FitCheck /// \param[in, out] epsilon in: a double value is disregarded. /// out: if ConvClass = FitCheck || \c calculate_epsilon the 2-norm tensor /// error of the CP approximation else not modified. /// \param[in] fast_pI Should ALS use a faster version of pseudoinverse? void ALS(ind_t rank, ConvClass &converge_test, bool dir, ind_t max_als, bool calculate_epsilon, double &epsilon, bool &fast_pI) { size_t count = 0; if(tucker_factors.empty()) { make_tucker_factors(tensor_ref, this->tcut_tucker, tucker_factors, false); core_tensor = tensor_ref; transform_tucker(true, core_tensor, tucker_factors); } if(AtA.empty()) { AtA = std::vector(ndim); transformed_A = std::vector(ndim); } auto ptr_A = A.begin(), ptr_T = tucker_factors.begin(), ptr_AtA = AtA.begin(), ptr_tran = transformed_A.begin(); for (size_t i = 0; i < ndim; ++i,++ptr_A, ++ptr_T, ++ptr_AtA, ++ptr_tran) { auto &a_mat = A[i]; *ptr_AtA = Tensor(); contract(1.0, *ptr_A, {1, 2}, *ptr_A, {1, 3}, 0.0, *ptr_AtA, {2, 3}); Tensor trans; *ptr_tran = Tensor(); contract(1.0, *ptr_T, {1, 2}, *ptr_A, {2, 3}, 0.0, *ptr_tran, {1, 3}); } helper = RALSHelper(A); const auto s0 = 1.0; std::vector lambda(ndim, 1.0); const auto alpha = 0.8; // Until either the initial guess is converged or it runs out of iterations // update the factor matrices with or without Khatri-Rao product // intermediate bool is_converged = false; bool matlab = fast_pI; while (count < max_als && !is_converged) { count++; this->num_ALS++; for (size_t i = 0; i < ndim; i++) { this->direct(i, rank, fast_pI, matlab, converge_test, tensor_ref, lambda[i]); // Compute the value s after normalizing the columns auto & ai = A[i]; this->s = helper(i, ai); // recompute lambda lambda[i] = (lambda[i] * (this->s * this->s) / (s0 * s0)) * alpha + (1 - alpha) * lambda[i]; contract(1.0, tucker_factors[i], {1, 2}, ai, {2, 3}, 0.0, transformed_A[i], {1, 3}); contract(1.0, ai, {1, 2}, ai, {1, 3}, 0.0, AtA[i], {2, 3}); } is_converged = converge_test(A); } // Checks loss function if required detail::get_fit(converge_test, epsilon, (this->num_ALS == max_als)); epsilon = 1.0 - epsilon; // Checks loss function if required if (calculate_epsilon && epsilon == 2) { epsilon = this->norm(this->reconstruct() - tensor_ref); } } }; }//namespace btas #endif // BTAS_GENERIC_TUCK_COMP_CP_ALS_IPP BTAS-1.0.0/btas/generic/tucker.h000066400000000000000000000300101476142407000162530ustar00rootroot00000000000000#ifndef BTAS_TUCKER_DECOMP_H #define BTAS_TUCKER_DECOMP_H #include #include #include #include #include namespace btas { /// Computes the tucker compression of an order-N tensor A. /// See /// reference. /// First computes the tucker factor matrices for each mode of @a A then, if @a compute_core : /// @a A is transformed into the core tensor representation using the @a transforms /// \param[in, out] A In: Order-N tensor to be decomposed. Out: if @a compute_core The core /// tensor of the Tucker decomposition else @a A \param[in] epsilon_svd The threshold /// truncation value for the Truncated Tucker-SVD decomposition /// \param[in, out] transforms In: An empty vector. Out: The Tucker factor matrices. /// \param[in] compute_core A bool which indicates if the tensor \c A should be transformed /// into the Tucker core matrices using the computed Tucker factor matrices stored in /// \c transforms. template void make_tucker_factors(Tensor& A, double epsilon_svd, std::vector &transforms, bool compute_core = false){ using ind_t = typename Tensor::range_type::index_type::value_type; using dtype = typename Tensor::numeric_type; auto ndim = A.rank(); transforms.clear(); transforms.reserve(ndim); double norm2 = dot(A,A); auto threshold = epsilon_svd * epsilon_svd * norm2 / ndim; std::vector left_modes, right_modes, final; final.push_back(0); final.emplace_back(ndim+1); left_modes.reserve(ndim); right_modes.reserve(ndim); for(ind_t i = 1; i <= ndim; ++i){ left_modes.emplace_back(i); right_modes.emplace_back(i); } auto ptr_left = left_modes.begin(), ptr_right = right_modes.begin(); for(ind_t i = 0; i < ndim; ++i, ++ptr_left, ++ptr_right){ // Compute A * A to make tucker computation easier (this turns from SVD into an eigenvalue // decomposition, i.e. HOSVD) size_t temp = *ptr_left; *ptr_left = 0; *ptr_right = ndim + 1; Tensor AAt; contract(1.0, A, left_modes, A, right_modes, 0.0, AAt, final); *ptr_left = temp; *ptr_right = temp; // compute the eigenvalue decomposition of each mode of A ind_t r = AAt.extent(0); Tensor lambda(r); lambda.fill(0.0); auto info = hereig(blas::Layout::ColMajor, lapack::Job::Vec, lapack::Uplo::Lower, r, AAt.data(), r, lambda.data()); if (info) BTAS_EXCEPTION("Error in computing the tucker SVD"); // Find how many significant vectors are in this transformation ind_t rank = 0, zero = 0; for(auto & eig : lambda){ if(eig < threshold) ++rank; } // Truncate the column space of the unitary factor matrix. ind_t kept_evals = r - rank; if(kept_evals == 0) BTAS_EXCEPTION("Tucker decomposition failed. Tucker transformation rank = 0"); lambda = Tensor(kept_evals, r); auto lower_bound = {rank, zero}; auto upper_bound = {r, r}; auto view = btas::make_view(AAt.range().slice(lower_bound, upper_bound), AAt.storage()); std::copy(view.begin(), view.end(), lambda.begin()); // Push the factor matrix back as a transformation. transforms.emplace_back(lambda); } if(compute_core){ transform_tucker(true, A, transforms); } } /// Function much like `make_tucker_factors` however, after constructing /// nth factor of @a A, the nth mode of @a A is transformed into the /// core tensor space before the tucker factor of the (n+1)th mode is computed. /// \param[in, out] A In: Order-N tensor to be decomposed. Out: The core /// tensor of the Tucker decomposition \param[in] epsilon_svd The threshold /// truncation value for the Truncated Tucker-SVD decomposition /// \param[in, out] transforms In: An empty vector. Out: The Tucker factor matrices. template void sequential_tucker(Tensor& A, double epsilon_svd, std::vector &transforms){ using ind_t = typename Tensor::range_type::index_type::value_type; using T = typename Tensor::numeric_type; using RT = real_type_t; using RTensor = rebind_tensor_t; auto ndim = A.rank(); T one {1.0}; T zero {0.0}; transforms.clear(); transforms.reserve(ndim); double norm2 = std::abs(dot(A,A)); auto threshold = epsilon_svd * epsilon_svd * norm2 / ndim; std::vector left_modes, right_modes, final, core; final.push_back(0); final.emplace_back(ndim); left_modes.reserve(ndim); right_modes.reserve(ndim); core.reserve(ndim); for(ind_t i = 0; i < ndim; ++i){ left_modes.emplace_back(i); right_modes.emplace_back(i); core.emplace_back(i + 1); } *(right_modes.data()) = ndim; *(core.data() + ndim - 1) = 0; //auto ptr_left = left_modes.begin(), ptr_right = right_modes.begin(); for(ind_t i = 0; i < ndim; ++i){ // Compute A * A to make tucker computation easier (this turns from SVD into an eigenvalue // decomposition, i.e. HOSVD) // Because of later algorithm, mode of interest is always the 0th mode of the tensor Tensor AAt; contract(one , A, left_modes, A.conj(), right_modes, zero, AAt, final); // compute the eigenvalue decomposition of each mode of A ind_t r = AAt.extent(0); RTensor lambda(r); lambda.fill(0.0); auto info = hereig(blas::Layout::ColMajor, lapack::Job::Vec, lapack::Uplo::Lower, r, AAt.data(), r, lambda.data()); if (info) BTAS_EXCEPTION("Error in computing the tucker SVD"); // Find how many significant vectors are in this transformation ind_t rank = 0, zero_ind = 0; for(auto & eig : lambda){ if(eig < threshold) ++rank; } // Truncate the column space of the unitary factor matrix. ind_t kept_evals = r - rank; if(kept_evals == 0) BTAS_EXCEPTION("Tucker decomposition failed. Tucker transformation rank = 0"); Tensor lambda_ (kept_evals, r); auto lower_bound = {rank, zero_ind}; auto upper_bound = {r, r}; auto view = btas::make_view(AAt.range().slice(lower_bound, upper_bound), AAt.storage()); std::copy(view.begin(), view.end(), lambda_.begin()); // Push the factor matrix back as a transformation. transforms.emplace_back(lambda_); // Now use lambda to move reference tensor to the core tensor space AAt = Tensor(); contract(one, A, right_modes, lambda_.conj(), final, zero , AAt, core); A = AAt; } } /// A function to take an exact tensor to the Tucker core tensor or /// the Tucker core tensor to an approximation of the exact tensor. /// \param[in] to_core: if \c to_core tensor \c A will be taken from the exact representation to /// the Tucker core else \c A will be taken from the Tucker core representation to the exact representation /// \param[in, out] A In : depending on \c to_core an exact tensor or a Tucker core tensor. Out : /// a transformed tensor which represents either the Tucker core or exact tensor. /// \param[in] transforms the complete set of Tucker factor matrices. Note this does /// not include the core tensor. template void transform_tucker(bool to_core, Tensor & A, std::vector transforms){ using ind_t = typename Tensor::range_type::index_type::value_type; using ord_t = typename range_traits::ordinal_type; auto ndim = A.rank(); std::vector left_modes, right_modes, final; final.push_back(0); final.emplace_back(ndim+1); left_modes.reserve(ndim); right_modes.reserve(ndim); for(size_t i = 1; i <= ndim; ++i){ left_modes.emplace_back(i); right_modes.emplace_back(i); } if(!to_core) { // as a reference, this properly flips the original tensor back to the original // subspace. auto ptr_tran = transforms.begin(); for (size_t i = 0; i < ndim; ++i, ++ptr_tran) { right_modes.emplace_back(ndim + 1); right_modes.erase(right_modes.begin()); left_modes[0] = 0; Tensor temp; contract(1.0, *ptr_tran, final, A, left_modes, 0.0, temp, right_modes); A = temp; right_modes[ndim - 1] = i + 1; left_modes = right_modes; } } else { ord_t size = A.size(); right_modes = left_modes; auto ptr_tran = transforms.begin(); // contracts the first mode and then tranposes it to the back of the tensor. // works like the s^N algebra does N rotations and then finished for(size_t i = 0; i < ndim; ++i, ++ptr_tran){ right_modes.erase(right_modes.begin()); right_modes.emplace_back(0); left_modes[0] = ndim + 1; Tensor temp; contract(1.0, *ptr_tran, final, A, left_modes, 0.0, temp, right_modes); A = temp; right_modes[ndim - 1] = i + 1; left_modes = right_modes; } } } /// Computes the tucker compression of an order-N tensor A. /// See /// reference. /// \param[in, out] A In: Order-N tensor to be decomposed. Out: The core /// tensor of the Tucker decomposition \param[in] epsilon_svd The threshold /// truncation value for the Truncated Tucker-SVD decomposition. /// \param[in, out] transforms In: An empty vector. Out: The Tucker factor matrices. template [[deprecated]] void tucker_compression(Tensor &A, double epsilon_svd, std::vector &transforms) { using ind_t = typename Tensor::range_type::index_type::value_type; auto ndim = A.rank(); double norm2 = dot(A, A); auto threshold = epsilon_svd * epsilon_svd * norm2 / ndim; std::vector first, second, final; for (size_t i = 0; i < ndim; ++i) { first.push_back(i); second.push_back(i); } final.push_back(0); final.push_back(ndim); auto ptr_second = second.begin(), ptr_final = final.begin(); for (size_t i = 0; i < ndim; ++i, ++ptr_second) { Tensor S; *(ptr_second) = ndim; *(ptr_final) = i; contract(1.0, A, first, A, second, 0.0, S, final); *(ptr_second) = i; ind_t R = S.extent(0); Tensor lambda(R, 1); // Calculate the left singular vector of the flattened tensor // which is equivalent to the eigenvector of Flat \times Flat^T auto info = hereig(blas::Layout::RowMajor, lapack::Job::Vec, lapack::Uplo::Lower, R, S.data(), R, lambda.data()); if (info) BTAS_EXCEPTION("Error in computing the tucker SVD"); // Find the truncation rank based on the threshold. ind_t rank = 0; for (auto &eigvals : lambda) { if (eigvals < threshold) rank++; } // Truncate the column space of the unitary factor matrix. auto kept_evecs = R - rank; ind_t zero = 0; lambda = Tensor(R, kept_evecs); auto lower_bound = {zero, rank}; auto upper_bound = {R, R}; auto view = btas::make_view(S.range().slice(lower_bound, upper_bound), S.storage()); std::copy(view.begin(), view.end(), lambda.begin()); // Push the factor matrix back as a transformation. transforms.push_back(lambda); } // Make the second (the transformation modes) // order 2 and temp order N { auto temp = final; final = second; second = temp; } ptr_second = second.begin(); ptr_final = final.begin(); for (size_t i = 0; i < ndim; ++i, ++ptr_final) { auto &lambda = transforms[i]; #ifdef BTAS_HAS_INTEL_MKL // Contract the factor matrix with the reference tensor, A. core_contract(A, lambda, i); #else Tensor rotated; // This multiplies by the transpose so later all I need to do is multiply by non-transpose second[0] = i; second[1] = ndim; *(ptr_final) = ndim; btas::contract(1.0, lambda, second, A, first, 0.0, rotated, final); *(ptr_final) = i; A = rotated; #endif // BTAS_HAS_INTEL_MKL } } } // namespace btas #endif // BTAS_TUCKER_DECOMP_HBTAS-1.0.0/btas/index_traits.h000066400000000000000000000032641476142407000160520ustar00rootroot00000000000000#ifndef __BTAS_INDEX_TRAITS_H #define __BTAS_INDEX_TRAITS_H 1 #include #include #include #include namespace btas { /// test T has integral value_type template class has_integral_value_type { /// true case template::type > static std::true_type __test(typename U::value_type*); /// false case template static std::false_type __test(...); public: static constexpr const bool value = std::is_same(0))>::value; }; /// test _Index conforms the TWG.Index concept /// check only value_type and operator[] template class is_index : public std::false_type {}; template class is_index<_Index> { public: static constexpr const bool value = has_integral_value_type<_Index>::value & is_container<_Index>::value; }; template class is_index<_Index1,_Index2,Rest...> { public: static constexpr const bool value = false; }; template struct signed_int; template <> struct signed_int<8ul> { using type = int_fast8_t; }; template <> struct signed_int<16ul> { using type = int_fast16_t; }; template <> struct signed_int<32ul> { using type = int_fast32_t; }; template <> struct signed_int<64ul> { using type = int_fast64_t; }; template <> struct signed_int<128ul> { using type = int_fast64_t; }; template <> struct signed_int<256ul> { using type = int_fast64_t; }; template using signed_int_t = typename signed_int::type; } // namespace btas #endif // __BTAS_INDEX_TRAITS_H BTAS-1.0.0/btas/macros.h000066400000000000000000000033631476142407000146410ustar00rootroot00000000000000// // Created by Eduard Valeyev on 8/31/22. // #ifndef __BTAS_MACROS_H #define __BTAS_MACROS_H /* detect C++ compiler id: - ids taken from CMake - macros are discussed at https://sourceforge.net/p/predef/wiki/Compilers/ */ #define BTAS_CXX_COMPILER_ID_GNU 0 #define BTAS_CXX_COMPILER_ID_Clang 1 #define BTAS_CXX_COMPILER_ID_AppleClang 2 #define BTAS_CXX_COMPILER_ID_XLClang 3 #define BTAS_CXX_COMPILER_ID_Intel 4 #if defined(__INTEL_COMPILER_BUILD_DATE) /* macros like __ICC and even __INTEL_COMPILER can be affected by command options like -no-icc */ # define BTAS_CXX_COMPILER_ID BTAS_CXX_COMPILER_ID_Intel # define BTAS_CXX_COMPILER_IS_ICC 1 #endif #if defined(__clang__) && !defined(BTAS_CXX_COMPILER_IS_ICC) # define BTAS_CXX_COMPILER_IS_CLANG 1 # if defined(__apple_build_version__) # define BTAS_CXX_COMPILER_ID BTAS_CXX_COMPILER_ID_AppleClang # elif defined(__ibmxl__) # define BTAS_CXX_COMPILER_ID BTAS_CXX_COMPILER_ID_XLClang # else # define BTAS_CXX_COMPILER_ID BTAS_CXX_COMPILER_ID_Clang # endif #endif #if defined(__GNUG__) && !defined(BTAS_CXX_COMPILER_IS_ICC) && !defined(BTAS_CXX_COMPILER_IS_CLANG) # define BTAS_CXX_COMPILER_ID BTAS_CXX_COMPILER_ID_GNU # define BTAS_CXX_COMPILER_IS_GCC 1 #endif /* ----------- pragma helpers ---------------*/ #define BTAS_PRAGMA(x) _Pragma(#x) /* same as BTAS_PRAGMA(x), but expands x */ #define BTAS_XPRAGMA(x) BTAS_PRAGMA(x) /* "concats" a and b with a space in between */ #define BTAS_CONCAT(a,b) a b #if defined(BTAS_CXX_COMPILER_IS_CLANG) #define BTAS_PRAGMA_CLANG(x) BTAS_XPRAGMA( BTAS_CONCAT(clang,x) ) #else #define BTAS_PRAGMA_CLANG(x) #endif #if defined(BTAS_CXX_COMPILER_IS_GCC) #define BTAS_PRAGMA_GCC(x) BTAS_XPRAGMA( BTAS_CONCAT(GCC,x) ) #else #define BTAS_PRAGMA_GCC(x) #endif #endif // __BTAS_MACROS_H BTAS-1.0.0/btas/optimize/000077500000000000000000000000001476142407000150375ustar00rootroot00000000000000BTAS-1.0.0/btas/optimize/contract.h000066400000000000000000000346251476142407000170370ustar00rootroot00000000000000// TODO Only ColMajor so far. #ifndef BTAS_OPTIMIZE_CONTRACT_H #define BTAS_OPTIMIZE_CONTRACT_H #include #include #include #include #include #include namespace btas { template void contract_211(const _T& alpha, const _TensorA& A, const btas::DEFAULT::index<_UA>& aA, const _TensorB& B, const btas::DEFAULT::index<_UB>& aB, const _T& beta, _TensorC& C, const btas::DEFAULT::index<_UC>& aC, const bool conjgA, const bool conjgB) { assert(aA.size() == 2 && aB.size() == 1 && aC.size() == 1); assert(is_contiguous(A.range()) && is_contiguous(B.range()) && is_contiguous(C.range())); if (conjgB) throw std::logic_error("complex conjugation of 1-index tensors is not considered in contract_211"); const bool notrans = aB[0] == aA[1]; if (notrans && conjgA) throw std::logic_error("contract_211 not sure what to do"); auto cA = notrans ? blas::Op::NoTrans : (conjgA ? blas::Op::ConjTrans : blas::Op::Trans); assert((notrans && aA[0] == aC[0]) || (aB[0] == aA[0] && aA[1] == aC[0])); gemv_impl::call(blas::Layout::ColMajor, cA, A.extent(0), A.extent(1), alpha, &*A.begin(), A.extent(0), &*B.begin(), 1, beta, &*C.begin(), 1); } template void contract_222(const _T& alpha, const _TensorA& A, const btas::DEFAULT::index<_UA>& aA, const _TensorB& B, const btas::DEFAULT::index<_UB>& aB, const _T& beta, _TensorC& C, const btas::DEFAULT::index<_UC>& aC, const bool conjgA, const bool conjgB) { // TODO we do not consider complex matrices yet. assert(aA.size() == 2 && aB.size() == 2 && aC.size() == 2); assert(is_contiguous(A.range()) && is_contiguous(B.range()) && is_contiguous(C.range())); if (std::find(aA.begin(), aA.end(), aC.front()) != aA.end()) { // then multiply A * B -> C const bool notransA = aA.front() == aC.front(); const bool notransB = aB.front() != aC.back(); if (notransA && conjgA) throw std::logic_error("contract_211 not sure what to do (A)"); if (notransB && conjgB) throw std::logic_error("contract_211 not sure what to do (B)"); const auto cA = notransA ? blas::Op::NoTrans : (conjgA ? blas::Op::ConjTrans : blas::Op::Trans); const size_t condim = notransA ? A.extent(1) : A.extent(0); assert(std::find(aB.begin(), aB.end(), aC.back()) != aB.end()); const auto cB = notransB ? blas::Op::NoTrans : (conjgB ? blas::Op::ConjTrans : blas::Op::Trans); assert((notransA ? aA.back() : aA.front()) == (notransB ? aB.front() : aB.back())); gemm_impl::call(blas::Layout::ColMajor, cA, cB, C.extent(0), C.extent(1), condim, alpha, &*A.begin(), A.extent(0), &*B.begin(), B.extent(0), beta, &*C.begin(), C.extent(0)); } else { contract_222(alpha, B, aB, A, aA, beta, C, aC, conjgB, conjgA); } } template void contract_323(const _T& alpha, const _TensorA& A, const btas::DEFAULT::index<_UA>& aA, const _TensorB& B, const btas::DEFAULT::index<_UB>& aB, const _T& beta, _TensorC& C, const btas::DEFAULT::index<_UC>& aC, const bool conjgA, const bool conjgB) { assert(aA.size() == 3 && aB.size() == 2 && aC.size() == 3); assert(is_contiguous(A.range()) && is_contiguous(B.range()) && is_contiguous(C.range())); if (conjgA) throw std::logic_error("complex conjugation of 3-index tensors is not considered in contract_323"); // TODO this function is limited to special cases where one of three indices of A will be replaced in C. Permutation is not considered so far. // first idenfity which indices to be rotated int irot = -1; for (int i = 0; i != 3; ++i) if (aA[i] != aC[i]) { assert(irot < 0); irot = i; } else assert(A.extent(i) == C.extent(i)); if (irot == 0) { // in this case multiply from front const bool notrans = aB.back() == aA.front(); assert(notrans || aB.front() == aA.front()); if (notrans && conjgB) throw std::logic_error("contract_323 irot = 0 not sure what to do"); const auto cA = blas::Op::NoTrans; const auto cB = notrans ? blas::Op::NoTrans : (conjgB ? blas::Op::ConjTrans : blas::Op::Trans); assert(notrans ? B.extent(1) : B.extent(0) == A.extent(0)); gemm_impl::call(blas::Layout::ColMajor, cB, cA, C.extent(0), C.extent(1)*C.extent(2), A.extent(0), alpha, &*B.begin(), B.extent(0), &*A.begin(), A.extent(0), beta, &*C.begin(), C.extent(0)); } else if (irot == 1) { // in this case we loop over the last index of A const bool notrans = aB.front() == aA[1]; assert(notrans || aB.back() == aA[1]); if (notrans && conjgB) throw std::logic_error("contract_323 irot = 1 not sure what to do"); const auto cA = blas::Op::NoTrans; const auto cB = notrans ? blas::Op::NoTrans : (conjgB ? blas::Op::ConjTrans : blas::Op::Trans); assert(notrans ? B.extent(0) : B.extent(1) == A.extent(1)); const size_t ablock = A.extent(0)*A.extent(1); const size_t cblock = C.extent(0)*C.extent(1); for (int i = 0; i != A.extent(2); ++i) gemm_impl::call(blas::Layout::ColMajor, cA, cB, C.extent(0), C.extent(1), A.extent(1), alpha, &*A.begin()+i*ablock, A.extent(0), &*B.begin(), B.extent(0), beta, &*C.begin()+i*cblock, C.extent(0)); } else if (irot == 2) { // in this case multiply from back const bool notrans = aB.front() == aA[2]; assert(notrans || aB.back() == aA[2]); if (notrans && conjgB) throw std::logic_error("contract_323 irot = 2 not sure what to do"); const auto cA = blas::Op::NoTrans; const auto cB = notrans ? blas::Op::NoTrans : (conjgB ? blas::Op::ConjTrans : blas::Op::Trans); assert(notrans ? B.extent(0) : B.extent(1) == A.extent(2)); gemm_impl::call(blas::Layout::ColMajor, cA, cB, C.extent(0)*C.extent(1), C.extent(2), A.extent(2), alpha, &*A.begin(), A.extent(0)*A.extent(1), &*B.begin(), B.extent(0), beta, &*C.begin(), C.extent(0)*C.extent(1)); } else { assert(false); } } template void contract_332(const _T& alpha, const _TensorA& A, const btas::DEFAULT::index<_UA>& aA, const _TensorB& B, const btas::DEFAULT::index<_UB>& aB, const _T& beta, _TensorC& C, const btas::DEFAULT::index<_UC>& aC, const bool conjgA, const bool conjgB) { assert(aA.size() == 3 && aB.size() == 3 && aC.size() == 2); assert(is_contiguous(A.range()) && is_contiguous(B.range()) && is_contiguous(C.range())); const bool back2 = aA[0] == aB[0] && aA[1] == aB[1]; const bool front2 = aA[1] == aB[1] && aA[2] == aB[2]; const bool mid2 = aA[0] == aB[0] && aA[2] == aB[2]; if (back2) { const bool swap = aC[0] == aB[2]; assert(swap || aC[0] == aA[2]); if (!swap) { assert(A.extent(0)*A.extent(1) == B.extent(0)*B.extent(1) && A.extent(2) == C.extent(0) && B.extent(2) == C.extent(1)); assert(!conjgB); gemm_impl::call(blas::Layout::ColMajor, conjgA ? blas::Op::ConjTrans : blas::Op::Trans, blas::Op::NoTrans, C.extent(0), C.extent(1), A.extent(0)*A.extent(1), alpha, &*A.begin(), A.extent(0)*A.extent(1), &*B.begin(), B.extent(0)*B.extent(1), beta, &*C.begin(), C.extent(0)); } else { assert(A.extent(0)*A.extent(1) == B.extent(0)*B.extent(1) && B.extent(2) == C.extent(0) && A.extent(2) == C.extent(1)); assert(!conjgA); gemm_impl::call(blas::Layout::ColMajor, conjgB ? blas::Op::ConjTrans : blas::Op::Trans, blas::Op::NoTrans, C.extent(0), C.extent(1), A.extent(0)*A.extent(1), alpha, &*B.begin(), B.extent(0)*B.extent(1), &*A.begin(), A.extent(0)*A.extent(1), beta, &*C.begin(), C.extent(0)); } } else if (front2) { const bool swap = aC[0] == aB[0]; assert(swap || aC[0] == aA[0]); if (!swap) { assert(A.extent(1)*A.extent(2) == B.extent(1)*B.extent(2) && A.extent(0) == C.extent(0) && B.extent(0) == C.extent(1)); assert(!conjgA); gemm_impl::call(blas::Layout::ColMajor, blas::Op::NoTrans, conjgB ? blas::Op::ConjTrans : blas::Op::Trans, C.extent(0), C.extent(1), A.extent(1)*A.extent(2), alpha, &*A.begin(), A.extent(0), &*B.begin(), B.extent(0), beta, &*C.begin(), C.extent(0)); } else { assert(A.extent(1)*A.extent(2) == B.extent(1)*B.extent(2) && B.extent(0) == C.extent(0) && A.extent(0) == C.extent(1)); assert(!conjgB); gemm_impl::call(blas::Layout::ColMajor, blas::Op::NoTrans, conjgA ? blas::Op::ConjTrans : blas::Op::Trans, C.extent(0), C.extent(1), A.extent(1)*A.extent(2), alpha, &*B.begin(), B.extent(0), &*A.begin(), A.extent(0), beta, &*C.begin(), C.extent(0)); } } else if (mid2) { const bool swap = aC[0] == aB[1]; assert(swap || aC[0] == aA[1]); const size_t ablock = A.extent(0)*A.extent(1); const size_t bblock = B.extent(0)*B.extent(1); scal(C.size(), beta, &*C.begin(), 1); if (!swap) { assert(A.extent(0) == B.extent(0) && A.extent(2) == B.extent(2) && A.extent(1) == C.extent(0) && B.extent(1) == C.extent(1)); assert(!conjgB); for (int i = 0; i != A.extent(2); ++i) gemm_impl::call(blas::Layout::ColMajor, conjgA ? blas::Op::ConjTrans : blas::Op::Trans, blas::Op::NoTrans, C.extent(0), C.extent(1), A.extent(0), alpha, &*A.begin()+i*ablock, A.extent(0), &*B.begin()+i*bblock, B.extent(0), static_cast<_T>(1.0), &*C.begin(), C.extent(0)); } else { assert(A.extent(0) == B.extent(0) && A.extent(2) == B.extent(2) && B.extent(1) == C.extent(0) && A.extent(1) == C.extent(1)); assert(!conjgA); for (int i = 0; i != A.extent(2); ++i) gemm_impl::call(blas::Layout::ColMajor, conjgB ? blas::Op::ConjTrans : blas::Op::Trans, blas::Op::NoTrans, C.extent(0), C.extent(1), A.extent(0), alpha, &*B.begin()+i*bblock, B.extent(0), &*A.begin()+i*ablock, A.extent(0), static_cast<_T>(1.0), &*C.begin(), C.extent(0)); } } else throw std::logic_error("not yet implemented"); } template< typename _T, class _TensorA, class _TensorB, class _TensorC, typename _UA, typename _UB, typename _UC, class = typename std::enable_if< is_tensor<_TensorA>::value & is_tensor<_TensorB>::value & is_tensor<_TensorC>::value & (_TensorA::range_type::order == blas::Layout::ColMajor) & //checking if A, B, and C are all Colomn major (_TensorB::range_type::order == blas::Layout::ColMajor) & //checking if A, B, and C are all Colomn major (_TensorC::range_type::order == blas::Layout::ColMajor) & //checking if A, B, and C are all Colomn major std::is_same::type, typename std::remove_cv::type>::value & std::is_same::type, typename std::remove_cv::type>::value & (std::is_same::type, double>::value or std::is_same::type, std::complex>::value) >::type > void contract( const _T& alpha, const _TensorA& A, const btas::DEFAULT::index<_UA>& aA, const _TensorB& B, const btas::DEFAULT::index<_UB>& aB, const _T& beta, _TensorC& C, const btas::DEFAULT::index<_UC>& aC, const bool conjgA = false, const bool conjgB = false) { assert(A.rank() == aA.size()); assert(B.rank() == aB.size()); assert(C.rank() == aC.size()); if (A.rank() == 2 && B.rank() == 1 && C.rank() == 1) { contract_211(alpha, A, aA, B, aB, beta, C, aC, conjgA, conjgB); } else if (A.rank() == 1 && B.rank() == 2 && C.rank() == 1) { contract_211(alpha, B, aB, A, aA, beta, C, aC, conjgB, conjgA); } else if (A.rank() == 2 && B.rank() == 2 && C.rank() == 2) { contract_222(alpha, A, aA, B, aB, beta, C, aC, conjgA, conjgB); } else if (A.rank() == 3 && B.rank() == 2 && C.rank() == 3) { contract_323(alpha, A, aA, B, aB, beta, C, aC, conjgA, conjgB); } else if (A.rank() == 2 && B.rank() == 3 && C.rank() == 3) { contract_323(alpha, B, aB, A, aA, beta, C, aC, conjgB, conjgA); } else if (A.rank() == 3 && B.rank() == 3 && C.rank() == 2) { contract_332(alpha, A, aA, B, aB, beta, C, aC, conjgA, conjgB); } else { std::stringstream ss; ss << "not yet implemented: rank(A): " << A.rank() << " rank(B): " << B.rank() << " rank(C): " << C.rank(); throw std::logic_error(ss.str()); } } template< typename _T, class _TensorA, class _TensorB, class _TensorC, typename _UA, typename _UB, typename _UC, class = typename std::enable_if< is_tensor<_TensorA>::value & is_tensor<_TensorB>::value & is_tensor<_TensorC>::value & (_TensorA::range_type::order == blas::Layout::ColMajor) & //checking if A, B, and C are all Colomn major (_TensorB::range_type::order == blas::Layout::ColMajor) & //checking if A, B, and C are all Colomn major (_TensorC::range_type::order == blas::Layout::ColMajor) & //checking if A, B, and C are all Colomn major std::is_same::type, typename std::remove_cv::type>::value & std::is_same::type, typename std::remove_cv::type>::value & (std::is_same::type, double>::value or std::is_same::type, std::complex>::value) >::type > void contract( const _T& alpha, const _TensorA& A, std::initializer_list<_UA> aA, const _TensorB& B, std::initializer_list<_UB> aB, const _T& beta, _TensorC& C, std::initializer_list<_UC> aC, const bool conjgA = false, const bool conjgB = false) { return contract(alpha, A, btas::DEFAULT::index<_UA>{aA}, B, btas::DEFAULT::index<_UB>{aB}, beta, C, btas::DEFAULT::index<_UC>{aC}, conjgA, conjgB); } } //namespace btas #endif BTAS-1.0.0/btas/ordinal.h000066400000000000000000000264611476142407000150110ustar00rootroot00000000000000/* * ordinal.h * * Created on: Dec 26, 2013 * Author: evaleev */ #ifndef BTAS_ORDINAL_H_ #define BTAS_ORDINAL_H_ #include #include #include #include #include #include #include namespace btas { /// BoxOrdinal is an implementation detail of BoxRange. /// It maps the index to its ordinal value. It also knows whether /// the map is contiguous (i.e. whether adjacent indices have adjacent ordinal /// values). template class BoxOrdinal { public: static_assert(btas::is_index<_Index>::value, "BoxOrdinal<_Index> instantiated but _Index does not meet the TWG.Index concept"); typedef _Index index_type; const static blas::Layout order = _Order; typedef int64_t value_type; typedef typename btas::replace_value_type<_Index,value_type>::type stride_type; ///< stride type template friend class BoxOrdinal; BoxOrdinal() { assert((contiguous_ = false) || true); // workaround for Boost serialization // it breaks Debug builds when reading uninitialized bools } template ::type>::value && btas::is_index::type>::value>::type > BoxOrdinal(Index1&& lobound, Index2&& upbound) { init(std::forward(lobound), std::forward(upbound)); } template ::type>::value && btas::is_index::type>::value && btas::is_index::type>::value>::type > BoxOrdinal(Index1&& lobound, Index2&& upbound, Stride&& stride) { init(std::forward(lobound), std::forward(upbound), std::forward(stride)); } BoxOrdinal(stride_type&& stride, value_type&& offset, bool cont) : stride_(stride), offset_(offset), contiguous_(cont) { } BoxOrdinal(const BoxOrdinal& other) : stride_(other.stride_), offset_(other.offset_), contiguous_ (other.contiguous_) { } template ::value> > BoxOrdinal(const BoxOrdinal<_O,_I>& other) { auto n = other.rank(); stride_ = array_adaptor::construct(n); using std::cbegin; using std::begin; using std::cend; std::copy(cbegin(other.stride_), cend(other.stride_), begin(stride_)); offset_ = other.offset_; contiguous_ = other.contiguous_; } ~BoxOrdinal() {} std::size_t rank() const { using btas::rank; return rank(stride_); } const stride_type& stride() const { return stride_; } // no easy way without C++14 to invoke data(stride) in ADL-capable way #if __cplusplus < 201402L auto stride_data() const -> decltype(std::data(this->stride())) { return std::data(stride_); } #else auto stride_data() const { using std::data; return data(stride_); } #endif value_type offset() const { return offset_; } bool contiguous() const { return contiguous_; } template typename std::enable_if::value, value_type>::type operator()(const Index& index) const { assert(index.size() == rank()); value_type o = 0; const auto end = this->rank(); using std::cbegin; for(std::size_t i = 0; i != end; ++i) o += *(cbegin(index) + i) * *(cbegin(this->stride_) + i); return o - offset_; } /// computes the ordinal value using a pack of indices template typename std::enable_if::type...>::value, value_type>::type operator()(Index&& ... index) const { assert(sizeof...(index) == rank()); using std::cbegin; value_type o = zip(cbegin(this->stride_), std::forward(index)...); return o - offset_; } private: template value_type zip(Iterator&& it, FirstIndex&& index, RestOfIndices&& ... rest) const { return *it * index + zip(it+1, rest...); } template value_type zip(Iterator&& it) const { return 0; } public: /// Does ordinal value belong to this ordinal range? template typename std::enable_if::value, bool>::type includes(const I& ord) const { assert(false && "BoxOrdinal::includes() is not not yet implemented"); } private: template void init(const Index1& lobound, const Index2& upbound) { using btas::rank; auto n = rank(lobound); if (n == 0) return; value_type volume = 1; offset_ = 0; stride_ = array_adaptor::construct(n); // Compute range data if (order == blas::Layout::RowMajor) { for(typename std::make_signed::type i = n - 1; i >= 0; --i) { stride_[i] = volume; using std::cbegin; auto li = *(cbegin(lobound) + i); auto ui = *(cbegin(upbound) + i); offset_ += li * volume; volume *= (ui - li); } } else { for(decltype(n) i = 0; i != n; ++i) { stride_[i] = volume; using std::cbegin; auto li = *(cbegin(lobound) + i); auto ui = *(cbegin(upbound) + i); offset_ += li * volume; volume *= (ui - li); } } contiguous_ = true; } /// upbound only needed to check contiguousness template void init(const Index1& lobound, const Index2& upbound, const Weight& stride) { using btas::rank; auto n = rank(lobound); if (n == 0) return; value_type volume = 1; offset_ = 0; stride_ = array_adaptor::construct(n); using std::cbegin; using std::begin; using std::cend; std::copy(cbegin(stride), cend(stride), begin(stride_)); // Compute offset and check whether contiguous contiguous_ = true; if (order == blas::Layout::RowMajor) { for(typename std::make_signed::type i = n - 1; i >= 0; --i) { contiguous_ &= (volume == stride_[i]); auto li = *(cbegin(lobound) + i); auto ui = *(cbegin(upbound) + i); offset_ += li * stride_[i]; volume *= (ui - li); } } else { for(decltype(n) i = 0; i != n; ++i) { contiguous_ &= (volume == stride_[i]); auto li = *(cbegin(lobound) + i); auto ui = *(cbegin(upbound) + i); offset_ += li * stride_[i]; volume *= (ui - li); } } } #ifdef BTAS_HAS_BOOST_SERIALIZATION friend class boost::serialization::access; template void serialize(Archive& ar, const unsigned int version) { ar & BOOST_SERIALIZATION_NVP(stride_) & BOOST_SERIALIZATION_NVP(offset_) & BOOST_SERIALIZATION_NVP(contiguous_); } #endif stride_type stride_; //!< stride of each dimension (stride in the language of NumPy) value_type offset_; //!< lobound . stride so that easy to compute ordinal: ordinal(index) = index . stride - offset bool contiguous_; //!< whether index iterator traverses a contiguous sequence of ordinals }; /// Permutes BoxOrdinal /// Permutes a Range /// permutes the axes using permutation \c p={p[0],p[1],...} specified in the preimage ("from") convention; /// for example, after this call \c stride()[p[i]] will return the value originally /// returned by \c stride()[i] /// \param perm a sequence specifying from-permutation of the axes template ::value>::type> BoxOrdinal<_Order, _Index> permute(const BoxOrdinal<_Order, _Index>& ord, const AxisPermutation& perm) { const auto rank = ord.rank(); auto st = ord.stride(); typedef typename BoxOrdinal<_Order, _Index>::stride_type stride_type; stride_type stride; stride = array_adaptor::construct(rank); using std::cbegin; using std::begin; using std::cend; std::for_each(cbegin(perm), cend(perm), [&](const typename AxisPermutation::value_type& i){ const auto pi = *(cbegin(perm) + i); *(begin(stride)+i) = *(cbegin(st) + pi); }); return BoxOrdinal<_Order, _Index>(std::move(stride), ord.offset(), ord.contiguous()); } /// Range output operator /// \param os The output stream that will be used to print \c r /// \param r The range to be printed /// \return A reference to the output stream template std::ostream& operator<<(std::ostream& os, const BoxOrdinal<_Order,_Index>& ord) { array_adaptor::stride_type>::print(ord.stride(), os); return os; } } // namespace btas // serialization to/fro MADNESS archive (github.com/m-a-d-n-e-s-s/madness) namespace madness { namespace archive { template struct ArchiveLoadImpl> { static inline void load(const Archive& ar, btas::BoxOrdinal<_Order, _Index>& o) { typename btas::BoxOrdinal<_Order, _Index>::stride_type stride{}; typename btas::BoxOrdinal<_Order, _Index>::value_type offset{}; bool cont{}; ar& stride& offset& cont; o = btas::BoxOrdinal<_Order, _Index>(std::move(stride), std::move(offset), std::move(cont)); } }; template struct ArchiveStoreImpl> { static inline void store(const Archive& ar, const btas::BoxOrdinal<_Order, _Index>& o) { ar& o.stride() & o.offset() & o.contiguous(); } }; } // namespace archive } // namespace madness #endif /* BTAS_ORDINAL_H_ */ BTAS-1.0.0/btas/range.h000066400000000000000000001536761476142407000144660ustar00rootroot00000000000000/* * range.h * * Created on: Nov 26, 2013 * Author: evaleev */ #ifndef BTAS_RANGE_H_ #define BTAS_RANGE_H_ #include #include #include #include #include #include #include #ifndef BTAS_HAS_BOOST_ITERATOR #error \ "BTAS cannot be used without Boost.Iterator; add Boost dir to the include path" #else #include #endif #include #include #include #include #include #include #include #include #include /** @addtogroup BTAS_Range \section sec_BTAS_Range Range class Range implements the Range TWG concept. It supports dense and strided ranges, with fixed (compile-time) and variable (run-time) ranks. \subsection sec_BTAS_Range_Synopsis Synopsis The following will be valid with the reference implementation of Range. This does not belong to the concept specification, and not all of these operations will model the concept, but it is useful for discussion; will eventually be moved elsewhere. @code // Constructors Range1 r0; // empty = {} Range1 r1(5); // [0,5) = {0, 1, 2, 3, 4} Range1 r2(2,4); // [2,4) = {2, 3} Range1 r3(1,7,2); // [1,7) with stride 2 = {1, 3, 5} assert(r3.rank() == 1); Range x(r2,r3); // r1 x r2 = { {2,1}, {2,3}, {2,5}, {4,1}, {4,3}, {4,5} } assert(x.rank() == 2); // Operations std::cout << x.area() << std::endl; // will print "6" // Iteration for(auto& v: r3) { std::cout << v << " "; // will print "1 3 5 " } @endcode */ namespace btas { template class Range1d { public: typedef Index index_type; typedef index_type value_type; typedef const value_type const_reference; typedef RangeIterator const_iterator; ///< Index iterator typedef const_iterator iterator; ///< interator = const_iterator friend class RangeIterator; Range1d(size_t extent = 0ul) : lobound_(0), upbound_(extent), stride_(1) {} /// [begin, end) Range1d(index_type begin, index_type end, index_type stride = 1) : lobound_(begin), upbound_(end), stride_(stride) { assert(stride_ != 0); } /// to construct from an initializer list give it as {}, {extent}, {begin,end}, or {begin,end,stride} template Range1d(std::initializer_list x) : lobound_(0), upbound_(0), stride_(1) { assert(x.size() <= 3 //, "Range1d initializer-list constructor requires at most 3 parameters" ); if (x.size() == 1) upbound_ = *x.begin(); else if (x.size() >= 2) { lobound_ = *x.begin(); upbound_ = *(x.begin()+1); if (x.size() == 3) stride_ = *(x.begin()+2); } assert(stride_ != 0); } Range1d(const Range1d& other) : lobound_(other.lobound_), upbound_(other.upbound_), stride_(other.stride_) { } Range1d& operator=(const Range1d& other) { lobound_ = other.lobound_; upbound_ = other.upbound_; stride_ = other.stride_; return *this; } Range1d& operator=(Range1d&& other) { lobound_ = other.lobound_; upbound_ = other.upbound_; stride_ = other.stride_; return *this; } /// to construct from an initializer list give it as {}, {extent}, {begin,end}, or {begin,end,stride} template Range1d& operator=(std::initializer_list x) { assert(x.size() <= 3 //, "Range1d initializer-list constructor requires at most 3 parameters" ); if (x.size() == 0) { lobound_ = upbound_ = 0; stride_ = 1; } if (x.size() == 1) { lobound_ = 0; upbound_ = *x.begin(); stride_ = 1; } else if (x.size() >= 2) { lobound_ = *x.begin(); upbound_ = *(x.begin()+1); if (x.size() == 3) stride_ = *(x.begin()+2); else stride_ = 1; } return *this; } /// \return The rank (number of dimensions) of this range /// \throw nothing constexpr size_t rank() const { return 1ul; } const_reference lobound() const { return lobound_; } index_type front() const { return lobound_; } const_reference upbound() const { return upbound_; } index_type back() const { return upbound_ - 1; } const_reference stride() const { return stride_; } /// Size of Range1d is the number of elements encountered in iteration from begin to end. size_t size() const { return (upbound_ - lobound_) / stride_; } /// Index iterator factory /// The iterator dereferences to an index. The order of iteration matches /// the data layout of a dense tensor. /// \return An iterator that holds the lobound element index of a tensor /// \throw nothing const_iterator begin() const { return const_iterator(lobound_, this); } /// Index iterator factory /// The iterator dereferences to an index. The order of iteration matches /// the data layout of a dense tensor. /// \return An iterator that holds the upbound element index of a tensor /// \throw nothing const_iterator end() const { return const_iterator(upbound_, this); } /// Increment the coordinate index \c i in this range /// \param[in,out] i The coordinate index to be incremented void increment(index_type& i) const { i += stride_; if (not_past_end(i)) return; // if ended up outside the range, set to end i = upbound_; } private: index_type lobound_; index_type upbound_; index_type stride_; bool not_past_end(const index_type& i) const { if (stride_ > 0) return i < upbound_; else // stride_ < 0 return i > upbound_; } }; // Range1d using Range1 = Range1d<>; /// Merges 2 Range1d objects template Range1d<_Index> merge(const Range1d<_Index>& r1, const Range1d<_Index>& r2) { assert(r1.stride() == r2.stride()); assert((r2.lobound() - r1.lobound()) % r1.stride() == 0); return Range1d<_Index>{r1.lobound(), r2.upbound(), r1.stride()}; } /// Range1d output operator /// \param os The output stream that will be used to print \c r /// \param r The range to be printed /// \return A reference to the output stream template inline std::ostream& operator<<(std::ostream& os, const Range1d<_Index>& r) { os << "[" << r.lobound() << "," << r.upbound(); if (r.stride() != 1ul) os << "," << r.stride(); os << ")"; return os; } /// Range1d equality operator /// comparison of two Range1d objects, with potentially different index types /// \tparam _Index1 /// \tparam _Index2 /// \param r1 the first, Range1d<_Index1>, object /// \param r2 the second, Range1d<_Index1>, object /// returns true if \c r1 and \c r2 have identical lobound, upbound, and stride template bool operator==(const Range1d<_Index1>& r1, const Range1d<_Index2>& r2) { return r1.lobound() == r2.lobound() && r1.upbound() == r2.upbound() && r1.stride() == r2.stride(); } /// Range1d inequality operator /// comparison of two Range1d objects, with potentially different index types /// \tparam _Index1 /// \tparam _Index2 /// \param r1 the first, Range1d<_Index1>, object /// \param r2 the second, Range1d<_Index1>, object /// returns false if \c r1 and \c r2 have identical lobound, upbound, and stride template bool operator!=(const Range1d<_Index1>& r1, const Range1d<_Index2>& r2) { return !operator==(r1,r2); } /// Range1d congruence test /// two Range1d objects are congruent if their sizes are equal /// \tparam _Index1 /// \tparam _Index2 /// \param r1 the first, Range1d<_Index1>, object /// \param r2 the second, Range1d<_Index1>, object /// returns true if \c r1 and \c r2 have identical sizes template bool congruent(const Range1d<_Index1>& r1, const Range1d<_Index2>& r2) { return r1.size() == r2.size(); } /// convenient to iterate over dimensions according to \c Order template Range1 dim_range(size_t ndim) { if (Order == blas::Layout::RowMajor) return Range1(ndim-1,-1,-1); if (Order == blas::Layout::ColMajor) return Range1(0,ndim,1); assert(false); // unreachable return Range1(); } /// BaseRangeNd is a CRTP /// base for implementations of N-dimensional Ranges. /** * BaseRangeNd defines a box in the index space, and the iteration order on it. * The iteration order depends on the blas::Layout parameter (ordering of dimensions). * It implements most of the \ref sec_TWG_Range_Concept_Range_Box "TWG.BoxRange" concept, except it does * not define ordinals. * * \tparam _Derived implementation of Range, to be derived from \c BaseRangeNd as \c public \c BaseRangeNd * */ template class BaseRangeNd { public: const static blas::Layout order = range_traits<_Derived>::order; typedef typename range_traits<_Derived>::index_type index_type; ///< index type typedef typename std::make_unsigned::type extent_type; ///< Range extent type typedef std::size_t size_type; ///< Size type typedef typename index_type::value_type index_element_type; typedef typename extent_type::value_type extent_element_type; typedef index_type value_type; ///< Range can be viewed as a Container of value_type typedef index_type& reference; typedef const value_type& const_reference; // index iterator typedef RangeIterator iterator; ///< Index iterator typedef iterator const_iterator; ///< Index interator = Index const_iterator friend class RangeIterator; friend _Derived; private: struct Enabler {}; template void init(const Index1& lobound, const Index2& upbound) { using btas::rank; auto n = rank(lobound); if (n == 0) { lobound_ = array_adaptor::construct(0); upbound_ = array_adaptor::construct(0); extent_ = array_adaptor::construct(0); return; } validate(lobound, upbound); lobound_ = array_adaptor::construct(n); std::copy(std::begin(lobound), std::end(lobound), std::begin(lobound_)); upbound_ = array_adaptor::construct(n); std::copy(std::begin(upbound), std::end(upbound), std::begin(upbound_)); extent_ = array_adaptor::construct(n); std::transform(std::begin(lobound), std::end(lobound), std::begin(upbound), std::begin(extent_), [](index_element_type l, index_element_type u) -> extent_element_type { return u - l; }); } template void validate(const Index1& lobound, const Index2& upbound) { #ifndef NDEBUG using btas::rank; auto n = rank(lobound); assert(n == rank(upbound)); typedef typename common_signed_type::type ctype; for(decltype(n) i = 0; i != n; ++i) { auto li = *(std::begin(lobound) + i); auto ui = *(std::begin(upbound) + i); assert(static_cast(li) <= static_cast(ui)); } #endif } protected: /// Default constructor /// Construct an uninitialized range /// \note this is a rank-0 range if \c rank(index_type) return value is /// non-constexpr BaseRangeNd() : lobound_(), upbound_(), extent_() {} /// Constructor defined by the upper and lower bounds /// \tparam Index1 An array type convertible to \c index_type /// \tparam Index2 An array type convertible to \c index_type /// \param lobound The lower bound of the N-dimensional range /// \param upbound The upper bound of the N-dimensional range template BaseRangeNd(const Index1& lobound, const Index2& upbound, typename std::enable_if::value && btas::is_index::value, Enabler>::type = Enabler()) { validate(lobound, upbound); init(lobound, upbound); } /// "Move" constructor defined by the upper and lower bounds /// \param lobound The lower bound of the N-dimensional range /// \param upbound The upper bound of the N-dimensional range BaseRangeNd(index_type&& lobound, index_type&& upbound) : lobound_(lobound), upbound_(upbound) { validate(lobound, upbound); extent_ = array_adaptor::construct(rank()); std::transform( std::begin(lobound), std::end(lobound), std::begin(upbound), std::begin(extent_), [](index_element_type l, index_element_type u) -> extent_element_type { return u - l; }); } /// Range constructor from a pack of extents for each dimension /// \tparam _extent0 An integer /// \tparam _extents A pack of integers /// \param extent0 The extent of first dimension (0) /// \param sizes A pack of sizes for dimensions 1+ template::value>::type> explicit BaseRangeNd(const _extent0& extent0, const _extents&... extents) { typedef typename std::common_type<_extent0, typename extent_type::value_type>::type common_type; // make initializer_list auto range_extent = {static_cast(extent0), static_cast(extents)...}; index_type lb = array_adaptor::construct(range_extent.size(), 0); init(lb, range_extent); } /// to construct from an initializer list give it as {extent0, extent1, ... extentN} template BaseRangeNd(std::initializer_list extents) { index_type lb = array_adaptor::construct(extents.size(), 0); init(lb, extents); } /// to construct from an initializer list give it as {extent0, extent1, ... extentN} template BaseRangeNd(std::initializer_list lobound, std::initializer_list upbound) { assert(lobound.size() == upbound.size()); init(lobound, upbound); } /// Copy Constructor /// \param other The range to be copied BaseRangeNd(const BaseRangeNd& other) : lobound_(other.lobound_), upbound_(other.upbound_), extent_(other.extent_) { } /// copy constructor from another instantiation of Range template BaseRangeNd (const BaseRangeNd& x) { init(x.lobound(), x.upbound()); } /// Move Constructor /// \param other The range to be moved BaseRangeNd(BaseRangeNd&& other) : lobound_(std::move(other.lobound_)), upbound_(std::move(other.upbound_)), extent_(std::move(other.extent_)) { } /// Destructor ~BaseRangeNd() = default; /// Copy assignment operator /// \param other The range to be copied /// \return A reference to this object /// \throw std::bad_alloc When memory allocation fails. BaseRangeNd& operator=(const BaseRangeNd& other) { lobound_ = other.lobound_; upbound_ = other.upbound_; extent_ = other.extent_; return *this; } /// Move assignment operator /// \param other The range to be moved /// \return A reference to this object BaseRangeNd& operator=(BaseRangeNd&& other) { lobound_ = std::move(other.lobound_); upbound_ = std::move(other.upbound_); extent_ = std::move(other.extent_); return *this; } /// swaps the contents of \c *this with \c other void swap(BaseRangeNd& other) noexcept { using std::swap; swap(lobound_, other.lobound_); swap(upbound_, other.upbound_); swap(extent_, other.extent_); } public: /// Access a particular subrange of Range /// returns the Range1 corresponding to the dimension \c d /// \param d the dimension index Range1d range(size_t d) const { return Range1d(*(std::begin(lobound_)+d), *(std::begin(upbound_)+d)); } /// Range lobound coordinate accessor /// \return A \c size_array that contains the lower bound of this range /// \throw nothing const_reference lobound() const { return lobound_; } const index_element_type* lobound_data() const { return std::data(lobound_); } /// Range lobound coordinate accessor /// \return A \c size_array that contains the first index in this range /// \throw nothing index_type front() const { return lobound_; } /// Range upbound coordinate accessor /// \return A \c size_array that contains the upper bound of this range /// \throw nothing const_reference upbound() const { return upbound_; } const index_element_type* upbound_data() const { return std::data(upbound_); } /// Rank accessor /// \return The rank (number of dimensions) of this range /// \throw nothing //constexpr auto rank() const -> decltype(btas::rank(this->lobound())) { constexpr size_t rank() const { using btas::rank; return rank(lobound_); } /// Range size accessor /// \return A \c extent_type that contains the extent of each dimension /// \throw nothing const extent_type& extent() const { return extent_; // extent_type ex = array_adaptor::construct(rank()); // for(size_t i=0; i()); } else return 0; } /// An alias for area() size_type volume() const { return area(); } /// Index iterator factory /// The iterator dereferences to an index. The order of iteration matches /// the data layout of a dense tensor. /// \return An iterator that holds the lobound element index of a tensor /// \throw nothing const_iterator begin() const { return const_iterator(lobound_, static_cast(this)); } /// Index iterator factory /// The iterator dereferences to an index. The order of iteration matches /// the data layout of a dense tensor. /// \return An iterator that holds the upbound element index of a tensor /// \throw nothing const_iterator end() const { return const_iterator(upbound_, static_cast(this)); } /// Increment index \c i in this range /// \param[in,out] i The coordinate index to be incremented void increment(index_type& i) const { for(auto d: dim_range(rank())) { // increment coordinate ++i[d]; // break if done if(i[d] < upbound_[d]) return; // Reset current index to lobound value. i[d] = lobound_[d]; } // if the current location is outside the range, make it equal to range end iterator std::copy(std::begin(upbound_), std::end(upbound_), std::begin(i)); } #if 0 /// Advance the coordinate index \c i by \c n in this range /// \param[in,out] i The coordinate index to be advanced /// \param n The distance to advance \c i void advance(index& i, std::ptrdiff_t n) const { const size_type o = ord(i) + n; i = idx(o); } /// Compute the distance between the coordinate indices \c first and \c last /// \param first The lobounding position in the range /// \param last The ending position in the range /// \return The difference between first and last, in terms of range positions std::ptrdiff_t distance_to(const index& first, const index& last) const { assert(includes(first)); assert(includes(last)); return ord(last) - ord(first); } #endif /// Check the index to make sure it is within the range. /// \tparam Index An array type /// \param index The index to check for inclusion in the range /// \return \c true when \c i \c >= \c lobound and \c i \c < \c f, otherwise /// \c false /// equal to the size of the index. template typename std::enable_if::value, bool>::type includes(const Index& index, typename std::enable_if::value>::type* = 0) const { using btas::rank; assert(rank(index) == this->rank()); const auto end = this->rank(); for(size_t i = 0; i < end; ++i) if((index[i] < lobound_[i]) || (index[i] >= upbound_[i])) return false; return true; } private: /// Validates that the index is in the Range /// \tparam Index A coordinate index type (array type) /// \param index The index to be converted to an ordinal index /// \return The ordinal index of \c index /// \throw When \c index is not included in this range. template typename std::enable_if::value, void>::type validate_index(const Index& index) const { using btas::rank; assert(rank(index) == this->rank()); assert(this->includes(index)); } private: index_type lobound_; ///< range lower bound index_type upbound_; ///< range upper bound extent_type extent_; ///< range extent }; // class BaseRangeNd /// RangeNd extends BaseRangeNd to compute ordinals, as specified by \c _Ordinal . /// It conforms to the \ref sec_TWG_Range_Concept_Range_Box "TWG.BoxRange" concept. template class RangeNd : public BaseRangeNd< RangeNd<_Order,_Index, _Ordinal>> { private: struct Enabler {}; public: static_assert(btas::is_index<_Index>::value, "RangeNd<_Index> instantiated with an _Index type that does not meet the TWG.Index concept"); typedef RangeNd this_type; typedef _Index index_type; ///< index type typedef typename _Index::value_type index1_type; ///< 1-index type const static blas::Layout order = _Order; typedef typename _Ordinal::value_type ordinal_type; ///< Ordinal value type // ordinal iterator // to be efficient, implemented as iterator that updates index and ordinal at the same time typedef std::pair subiter_value_type; typedef RangeIterator ordinal_subiterator; typedef ::boost::transform_iterator< btas::second_of_pair, ordinal_subiterator > ordinal_iterator; ///< Ordinal iterator typedef ordinal_iterator const_ordinal_iterator; ///< Ordinal interator = Ordinal const_iterator typedef BaseRangeNd< RangeNd<_Order, _Index, _Ordinal> > base_type; ///< Parent type friend class BaseRangeNd< RangeNd<_Order, _Index, _Ordinal> >; template friend class RangeNd; typedef typename base_type::extent_type extent_type; /// Default constructor /// Construct a range with size and dimensions equal to zero. RangeNd() : base_type(), ordinal_() { } /// Constructor defined by the upper and lower bounds /// \tparam Index1 any type for which \c btas::is_index::value is true /// \tparam Index2 any type for which \c btas::is_index::value is true /// \param lobound The lower bound of the N-dimensional range /// \param upbound The upper bound of the N-dimensional range template RangeNd(const Index1& lobound, const Index2& upbound, typename std::enable_if::value && btas::is_index::value, Enabler>::type = Enabler()) : base_type(lobound, upbound), ordinal_(lobound, upbound) { } /// "Move" constructor defined by the upper and lower bounds /// \param lobound The lower bound of the N-dimensional range /// \param upbound The upper bound of the N-dimensional range RangeNd(index_type&& lobound, index_type&& upbound) : base_type(lobound, upbound), ordinal_(lobound, upbound) { } /// Constructor defined by the upper and lower bounds, and the axes strides /// \tparam Index1 any type for which \c btas::is_index::value is true /// \tparam Index2 any type for which \c btas::is_index::value is true /// \tparam Extent any type for which \c Ordinal(Index1,Index2,Extent) is a valid expression (similar to \c extent_type) /// \param lobound The lower bound of the N-dimensional range /// \param upbound The upper bound of the N-dimensional range /// \param stride The axes strides of the N-dimensional range template RangeNd(const Index1& lobound, const Index2& upbound, const Extent& stride, typename std::enable_if::value && btas::is_index::value && btas::is_index::value, Enabler>::type = Enabler()) : base_type(lobound, upbound), ordinal_(lobound, upbound, stride) { } /// "Move" constructor defined by the upper and lower bounds, and the axes strides /// \param lobound The lower bound of the N-dimensional range /// \param upbound The upper bound of the N-dimensional range /// \param stride The axes strides of the N-dimensional range RangeNd(index_type&& lobound, index_type&& upbound, extent_type&& stride) : base_type(lobound, upbound), ordinal_(lobound, upbound, stride) { } /// Constructor defined by the upper and lower bounds, and the ordinal object /// \tparam Index1 any type for which \c btas::is_index::value is true /// \tparam Index2 any type for which \c btas::is_index::value is true /// \param lobound The lower bound of the N-dimensional range /// \param upbound The upper bound of the N-dimensional range template RangeNd(const Index1& lobound, const Index2& upbound, _Ordinal&& ord, typename std::enable_if::value && btas::is_index::value, Enabler>::type = Enabler()) : base_type(lobound, upbound), ordinal_(ord) { } /// "Move" constructor defined by the upper and lower bounds, and the ordinal object /// \param lobound The lower bound of the N-dimensional range /// \param upbound The upper bound of the N-dimensional range /// \param ordinal The ordinal object RangeNd(index_type&& lobound, index_type&& upbound, _Ordinal&& ord) : base_type(lobound, upbound), ordinal_(ord) { } /// Range constructor from extent /// \tparam Extent An array type convertible to \c extent_type /// \param extent An array with the extent of each dimension template ::value>::type> RangeNd(const Extent& extent) : base_type() { index_type lb = array_adaptor::construct(extent.size(), 0); base_type::init(lb, extent); ordinal_ = _Ordinal(lb, extent); } /// Range constructor from a pack of extents for each dimension /// \tparam _extent0 An integer /// \tparam _extents A pack of integers /// \param extent0 The extent of first dimension (0) /// \param extents A pack of sizes for dimensions 1+ template::value>::type> explicit RangeNd(const _extent0& extent0, const _extents&... extents) : base_type() { typedef typename std::common_type<_extent0, typename extent_type::value_type>::type common_type; // make initializer_list auto range_extent = {static_cast(extent0), static_cast(extents)...}; index_type lb = array_adaptor::construct(range_extent.size(), 0); base_type::init(lb, range_extent); ordinal_ = _Ordinal(lb, range_extent); } /// to construct from an initializer list give it as {extent0, extent1, ... extentN} template RangeNd(std::initializer_list extents, typename std::enable_if::value>::type* = 0) : base_type() { index_type lb = array_adaptor::construct(extents.size(), 0); base_type::init(lb, extents); ordinal_ = _Ordinal(lb, extents); } /// to construct from an initializer list give it as {extent0, extent1, ... extentN} template RangeNd(std::initializer_list lobound, std::initializer_list upbound, typename std::enable_if::value && std::is_integral::value>::type* = 0) : base_type() { assert(lobound.size() == upbound.size()); base_type::init(lobound, upbound); ordinal_ = _Ordinal(lobound, upbound); } /// to construct from an initializer list give it as {Range1d_0, Range1d_1, ... Range1d_N} template RangeNd(std::initializer_list> range1s) : base_type() { for(auto i: range1s) assert(i.stride() == 1); std::vector lb(range1s.size()); std::vector ub(range1s.size()); int c=0; for(auto i: range1s) { lb[c] = i.lobound(); ub[c] = i.upbound(); ++c; } base_type::init(lb, ub); ordinal_ = _Ordinal(lb, ub); } /// to construct RangeNd from Range1d given N {Range1d, Range1d, ... Range1d} template RangeNd(Range1d range1, size_type n) : base_type() { assert(range1.stride() == 1); std::vector lb(n, range1.lobound()); std::vector ub(n, range1.upbound()); base_type::init(lb, ub); ordinal_ = _Ordinal(lb, ub); } /// Copy Constructor /// \param other The range to be copied RangeNd(const RangeNd& other) : base_type(static_cast(other)), ordinal_(other.ordinal_) { } /// copy constructor from another instantiation of Range template RangeNd (const RangeNd<_O,_I,_Ord>& x) : base_type(), ordinal_(x.ordinal_) { base_type::init(x.lobound(), x.upbound()); } /// Move Constructor /// \param other The range to be moved RangeNd(RangeNd&& other) : base_type(other), ordinal_(other.ordinal_) { } /// Destructor ~RangeNd() { } /// Copy assignment operator /// \param other The range to be copied /// \return A reference to this object /// \throw std::bad_alloc When memory allocation fails. RangeNd& operator=(const RangeNd& other) { this->base_type::operator=(static_cast(other)); ordinal_ = other.ordinal_; return *this; } /// Move assignment operator /// \param other The range to be moved /// \return A reference to this object /// \throw std::bad_alloc When memory allocation fails. RangeNd& operator=(RangeNd&& other) { this->base_type::operator=(static_cast(other)); ordinal_ = other.ordinal_; return *this; } /// \return a const reference to the ordinal object const _Ordinal& ordinal() const { return ordinal_; } /// swaps the contents of \c *this with \c other void swap(RangeNd& other) noexcept { base_type::swap(other); using std::swap; swap(ordinal_, other.ordinal_); } /// calculates the ordinal value of \c i /// Convert an index to its ordinal. /// \tparam Index A coordinate index type (array type) /// \param index The index to be converted to an ordinal index /// \return The ordinal index of \c index /// \throw When \c index is not included in this range. template ordinal_type ordinal(Index&& ... index) const { return ordinal_(std::forward(index)...); } /// Range stride accessor (just a shortcut to ordinal().stride() ) /// \return An Index type that contains the stride of each dimension /// \throw nothing auto stride() const -> decltype(this->ordinal().stride()) { return this->ordinal().stride(); } /// Range stride data accessor (just a shortcut to ordinal().stride_data() ) /// \return A pointer to a sequence of integers that contains the strides of each dimension /// \throw nothing auto stride_data() const -> decltype(this->ordinal().stride_data()) { return this->ordinal().stride_data(); } /// Constructs a Range slice defined by the upper and lower bounds within this Range /// \tparam Index1 An array type convertible to \c index_type /// \tparam Index2 An array type convertible to \c index_type /// \param lobound The lower bound of the new range /// \param upbound The upper bound of the new range template typename std::enable_if::value && btas::is_index::value, RangeNd>::type slice(const Index1& lobound, const Index2& upbound) const { return RangeNd(lobound, upbound, _Ordinal(this->lobound(), this->upbound(), this->ordinal().stride())); } /// Constructs a Range slice defined by a subrange for each dimension template RangeNd slice(std::initializer_list> range1s) const { for(auto i: range1s) assert(i.stride() == 1); btas::DEFAULT::index lb(range1s.size()); btas::DEFAULT::index ub(range1s.size()); int c=0; for(auto i: range1s) { lb[c] = i.lobound(); ub[c] = i.upbound(); ++c; } return RangeNd(std::move(lb), std::move(ub), _Ordinal(this->lobound(), this->upbound(), this->ordinal().stride())); } using base_type::includes; /// Check the index ordinal to make sure it is within the range. /// \tparam IndexOrdinal An integral type /// \param indexord The index ordinal to check for inclusion in the range /// equal to the size of the index. template typename std::enable_if::value, bool>::type includes(const IndexOrdinal& indexord) const { return ordinal_.includes(indexord); } using base_type::increment; /// Increments pair /// \param[in,out] pair to be incremented void increment(subiter_value_type& i) const { for(auto d: dim_range(this->rank())) { // increment subindex ++i.first[d]; // break if done if(i.first[d] < this->upbound_[d]) { i.second += ordinal_.stride()[d]; return; } // Reset current subindex to lobound value and move to the next i.second -= (this->upbound_[d] - this->lobound_[d] - 1) * ordinal_.stride()[d]; i.first[d] = this->lobound_[d]; } // if outside the range, point to the upper bound ... Range::end() will evaluate to upbound also! Range will use this std::copy(std::begin(this->upbound_), std::end(this->upbound_), std::begin(i.first)); i.second = ordinal(i.first); } private: /// The Ordinal object _Ordinal ordinal_; }; /// Range Traits template struct range_traits > { const static blas::Layout order = _Order; typedef _Index index_type; typedef typename _Ordinal::value_type ordinal_type; constexpr static const bool is_general_layout = true; }; using Range = RangeNd<>; /// Range output operator /// \param os The output stream that will be used to print \c r /// \param r The range to be printed /// \return A reference to the output stream template std::ostream& operator<<(std::ostream& os, const RangeNd<_Order,_Index, _Ordinal>& r) { os << "["; array_adaptor<_Index>::print(r.lobound(), os); os << ","; array_adaptor<_Index>::print(r.upbound(), os); os << ")_" << (_Order == blas::Layout::RowMajor ? "R" : "C"); os << ":" << r.ordinal(); return os; } /// swaps the contents of \c r0 with \c r1 template inline void swap(RangeNd<_Order,_Index,_Ordinal>& r0, RangeNd<_Order,_Index,_Ordinal>& r1) noexcept { r0.swap(r1); } /// Range equality comparison /// \param r1 The first range to be compared /// \param r2 The second range to be compared /// \return \c true when \c r1 represents the same range as \c r2, otherwise /// \c false. template inline bool operator ==(const RangeNd<_Order,_Index,_Ordinal>& r1, const RangeNd<_Order,_Index,_Ordinal>& r2) { return ((r1.lobound() == r2.lobound()) && (r1.extent() == r2.extent())); } /// Range inequality comparison /// \param r1 The first range to be compared /// \param r2 The second range to be compared /// \return \c true when \c r1 does not represent the same range as \c r2, /// otherwise \c false. template inline bool operator !=(const RangeNd<_Order,_Index,_Ordinal>& r1, const RangeNd<_Order,_Index,_Ordinal>& r2) { return ! operator ==(r1, r2); } /// Tests congruency of two Ranges /// Ranges are congruent if the have identical extents. The congruency of \c r1 and \c r2 of rank N is checked by the following code: /// \code /// if (_Order1 == _Order2) /// result = r1.extent()[0] == r2.extent()[0] && r1.extent()[1] == r2.extent()[1] && ... ; /// else /// result = r1.extent()[0] == r2.extent()[N-1] && r1.extent()[1] == r2.extent()[N-2] && ... ; /// \endcode /// \tparam _Order1 /// \tparam _Index1 /// \tparam _Ordinal1 /// \tparam _Order2 /// \tparam _Index2 /// \tparam _Ordinal2 /// \param r1 a RangeNd<_Order1,_Index1,_Ordinal1> object /// \param r2 a RangeNd<_Order2,_Index2,_Ordinal2> object /// \return \c true when \c r1 and \c r2 have same extents, otherwise \c false /// \note To compare also lobound (except when the ranges have diffferent Order) use Range::operator==() template inline bool congruent(const RangeNd<_Order1,_Index1,_Ordinal1>& r1, const RangeNd<_Order2,_Index2,_Ordinal2>& r2) { const auto r1_extent = r1.extent(); auto r2_extent = r2.extent(); // no std::crbegin even in C++14, hence no const here if (_Order1 == _Order2) // 7/15/2014: broken with clang++/libc++ (clang-503.0.40) on OS X //auto eq = std::equal(std::cbegin(r1.extent()), std::cend(r1.extent()), // std::cbegin(r2.extent())); return std::equal(std::cbegin(r1_extent), std::cend(r1_extent), std::cbegin(r2_extent)); else return std::equal(std::cbegin(r1_extent), std::cend(r1_extent), std::rbegin(r2_extent)); } /// Tests whether a range is contiguous, i.e. whether its ordinal values form a contiguous range /// \param range a Range /// \return true if \p range is contiguous template inline bool is_contiguous(const RangeNd<_Order, _Index, _Ordinal>& range) { return range.ordinal().contiguous(); } /// Permutes a Range /// permutes the dimensions using permutation \c p = {p[0], p[1], ... }; for example, if \c lobound() initially returned /// {lb[0], lb[1], ... }, after this call \c lobound() will return {lb[p[0]], lb[p[1]], ...}. /// \param perm an array specifying permutation of the dimensions template ::value>::type> RangeNd<_Order, _Index> permute(const RangeNd<_Order, _Index, _Ordinal>& r, const AxisPermutation& perm) { const auto rank = r.rank(); auto lb = r.lobound(); auto ub = r.upbound(); typedef typename RangeNd<_Order, _Index, _Ordinal>::index_type index_type; index_type lobound, upbound; lobound = array_adaptor::construct(rank); upbound = array_adaptor::construct(rank); std::for_each(std::begin(perm), std::end(perm), [&](const typename AxisPermutation::value_type& i){ const auto pi = *(std::begin(perm) + i); *(std::begin(lobound)+i) = *(std::begin(lb) + pi); *(std::begin(upbound)+i) = *(std::begin(ub) + pi); }); return RangeNd<_Order, _Index, _Ordinal>(std::move(lobound), std::move(upbound), permute(r.ordinal(), perm) ); } /// Permutes a Range /// permutes the axes using permutation \c p = {p[0], p[1], ... }; for example, if \c lobound() initially returned /// {lb[0], lb[1], ... }, after this call \c lobound() will return {lb[p[0]], lb[p[1]], ...} . /// \param perm an array specifying permutation of the axes template RangeNd<_Order, _Index, _Ordinal> permute(const RangeNd<_Order, _Index, _Ordinal>& r, std::initializer_list perm) { typename RangeNd<_Order, _Index, _Ordinal>::extent_type p = array_adaptor::extent_type>::construct(perm.size()); std::copy(std::begin(perm), std::end(perm), std::begin(p)); return permute(r, p); } /// Takes the diagonal part of a range /// Given a RangeNd, returns a new RangeNd whose indices increase in lock step. /// Requires \c lobound() to be uniform {n,n,n,...}. /// Iterating over the returned range yields: /// {n,n,n,...} /// {n+1,n+1,n+1,...} /// {n+2,n+2,n+2,...} /// up to \c upbound() template RangeNd<_Order, _Index> diag(const RangeNd<_Order, _Index, _Ordinal>& r) { if(r.rank() == 0ul) return r; using index_value = typename RangeNd<_Order,_Index>::index_type::value_type; index_value stride = 1, prod_extents = 1, extent = r.upbound()[0]; const auto dr = _Order == blas::Layout::RowMajor ? Range1(r.rank()-1,0,-1) : Range1(0,r.rank()-1,1); for(const auto i : dr) { assert(r.lobound()[0] == r.lobound()[i]); prod_extents *= (r.upbound()[i]-r.lobound()[i]); stride += prod_extents; extent = std::min(extent,r.upbound()[i]); } return RangeNd<_Order,_Index>({r.lobound()[0]},{extent},{static_cast::extent_type::value_type>(stride)}); } /// Group a set of adjacent indices of a Range /// Combine/group/flatten a set of adjacent indices into a single index. /// Groups the indices from [istart,iend) not including iend. /// If the original indices have extents e1,e2,e3,... the grouped index /// will have extent e1*e2*e3*... template RangeNd<_Order, _Index,_Ordinal> group(const RangeNd<_Order, _Index, _Ordinal>& r, size_t istart, size_t iend) { using index_type = typename RangeNd<_Order,_Index,_Ordinal>::index_type; if(r.rank() == 0 || iend <= (istart+1)) return r; const auto ngroup = iend-istart; const auto newr = r.rank()-ngroup+1; assert(ngroup >= 2); assert(r.rank() >= ngroup); assert(iend > 0); index_type lobound(newr), upbound(newr); for(size_t i = 0; i < istart; ++i) { lobound[i] = r.lobound()[i]; upbound[i] = r.upbound()[i]; } lobound[istart] = 0; upbound[istart] = 1; for(size_t i = istart; i < iend; ++i) { upbound[istart] *= (r.upbound()[i]-r.lobound()[i]); } for(size_t i = iend, j = istart+1; i < r.rank(); ++i,++j) { lobound[j] = r.lobound()[i]; upbound[j] = r.upbound()[i]; } return RangeNd<_Order,_Index,_Ordinal>(lobound,upbound); } template RangeNd<_Order, _Index,_Ordinal> flatten(const RangeNd<_Order, _Index, _Ordinal>& r) { using index_value = typename RangeNd<_Order,_Index,_Ordinal>::index_type::value_type; index_value lobound = 0, upbound = 1; for(size_t i = 0; i < r.rank(); ++i) { upbound *= (r.upbound()[i]-r.lobound()[i]); } return RangeNd<_Order,_Index,_Ordinal>({lobound},{upbound}); } /// /// Tie (i.e. lock or fuse) N indices together, returning a range with (N-1) fewer indices. /// The position of the tied index is the position of the first index in the group. /// Example: /// std::vector inds = { 0, 2 }; /// tie(T,inds)(i,j) = T(i,j,i) /// template RangeNd<_Order, _Index,_Ordinal> tieIndex(const RangeNd<_Order, _Index, _Ordinal>& r, const ArrayType& inds) { using index_type = typename RangeNd<_Order,_Index,_Ordinal>::index_type; using index_value = typename index_type::value_type; if(inds.size() < 2) return r; assert(inds.size() <= r.rank()); auto newr = r.rank()-(inds.size()-1); auto ti = inds[0]; auto tbegin = r.lobound()[ti]; auto tend = r.upbound()[ti]; for(const auto i : inds) { assert(i < r.rank()); ti = std::min(ti,i); tbegin = std::max(tbegin,r.lobound()[i]); tend = std::min(tend,r.upbound()[i]); } if(ti >= newr) ti = newr-1; index_type lobound(newr), upbound(newr), stride(newr); stride[ti] = 0; lobound[ti] = tbegin; upbound[ti] = tend; const auto dr = (_Order == blas::Layout::RowMajor) ? Range1(r.rank()-1,-1,-1) : Range1(0,r.rank(),1); const auto nr = (_Order == blas::Layout::RowMajor) ? Range1(newr-1,-1,-1) : Range1(0,newr,1); index_value prod_extents = 1; auto it = nr.begin(); for(const auto i : dr) { bool is_tied = false; for(auto j : inds) if(i == j) { is_tied = true; break; } if(is_tied) { stride[ti] += prod_extents; } else { if(*it == ti) ++it; stride[*it] = prod_extents; lobound[*it] = r.lobound()[i]; upbound[*it] = r.upbound()[i]; ++it; } prod_extents *= (r.upbound()[i]-r.lobound()[i]); } return RangeNd<_Order,_Index,_Ordinal>(lobound,upbound,stride); } /// /// tieIndex wrapper taking a variadic list of integers /// template RangeNd<_Order, _Index,_Ordinal> tieIndex(const RangeNd<_Order, _Index, _Ordinal>& r, size_t i0, const _args&... rest) { const auto size = 1 + sizeof...(rest); std::array inds = { i0, static_cast(rest)...}; return tieIndex(r,inds); } template class boxrange_iteration_order< btas::RangeNd<_Order, _Index, _Ordinal> > { public: enum {row_major = boxrange_iteration_order::row_major, other = boxrange_iteration_order::other, column_major = boxrange_iteration_order::column_major}; static constexpr int value = (_Order == blas::Layout::RowMajor) ? row_major : column_major; }; } // namespace btas // // Default range type // namespace btas { namespace DEFAULT { using range = btas::Range; } // namespace DEFAULT } // namespace btas #ifdef BTAS_HAS_BOOST_SERIALIZATION namespace boost { namespace serialization { /// boost serialization template void serialize(Archive& ar, btas::RangeNd<_Order, _Index, _Ordinal>& t, const unsigned int version) { boost::serialization::split_free(ar, t, version); } template void save(Archive& ar, const btas::RangeNd<_Order, _Index, _Ordinal>& t, const unsigned int version) { auto lobound = t.lobound(); auto upbound = t.upbound(); auto ordinal = t.ordinal(); ar << BOOST_SERIALIZATION_NVP(lobound) << BOOST_SERIALIZATION_NVP(upbound) << BOOST_SERIALIZATION_NVP(ordinal); } template void load(Archive& ar, btas::RangeNd<_Order, _Index, _Ordinal>& t, const unsigned int version) { typedef typename btas::BaseRangeNd>::index_type index_type; index_type lobound, upbound; _Ordinal ordinal; ar >> BOOST_SERIALIZATION_NVP(lobound) >> BOOST_SERIALIZATION_NVP(upbound) >> BOOST_SERIALIZATION_NVP(ordinal); t = btas::RangeNd<_Order, _Index, _Ordinal>(std::move(lobound), std::move(upbound), std::move(ordinal)); } } } #endif // serialization to/fro MADNESS archive (github.com/m-a-d-n-e-s-s/madness) namespace madness { namespace archive { template struct ArchiveLoadImpl> { static inline void load(const Archive& ar, btas::RangeNd<_Order, _Index, _Ordinal>& r) { typedef typename btas::BaseRangeNd< btas::RangeNd<_Order, _Index, _Ordinal>>::index_type index_type; index_type lobound{}, upbound{}; _Ordinal ordinal{}; ar& lobound& upbound& ordinal; r = btas::RangeNd<_Order, _Index, _Ordinal>( std::move(lobound), std::move(upbound), std::move(ordinal)); } }; template struct ArchiveStoreImpl> { static inline void store(const Archive& ar, const btas::RangeNd<_Order, _Index, _Ordinal>& r) { ar& r.lobound() & r.upbound() & r.ordinal(); } }; } // namespace archive } // namespace madness #endif /* BTAS_RANGE_H_ */ BTAS-1.0.0/btas/range_iterator.h000066400000000000000000000146201476142407000163600ustar00rootroot00000000000000/* * This file is a part of TiledArray. * Copyright (C) 2013 Virginia Tech * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * */ #ifndef BTAS_RANGE_ITERATOR_H__INCLUDED #define BTAS_RANGE_ITERATOR_H__INCLUDED #include namespace btas { template class RangeIterator; } // namespace btas namespace std { template void advance(btas::RangeIterator&, typename btas::RangeIterator::difference_type ); template typename btas::RangeIterator::difference_type distance(const btas::RangeIterator&, const btas::RangeIterator&); } // namespace std namespace btas { /// Iterates over a Range of Values /// This is an input iterator that is used to iterate over elements of a \c Range. /// \tparam Value The value type of the iterator /// \tparam Range The range that the iterator references /// \note The range object must define the function /// \c Range::increment(Value&) \c const, and be accessible to /// \c RangeIterator. template class RangeIterator { public: // Standard iterator typedefs typedef Value value_type; ///< Iterator value type typedef const value_type& reference; ///< Iterator reference type typedef const value_type* pointer; ///< Iterator pointer type typedef std::input_iterator_tag iterator_category; ///< Iterator category tag typedef std::ptrdiff_t difference_type; ///< Iterator difference type // additional typedefs typedef Range range_type; ///< Range type /// Copy constructor /// \param other The other iterator to be copied RangeIterator(const RangeIterator& other) : range_(other.range_), current_(other.current_) { } /// Construct an index iterator /// \param v The initial value of the iterator index /// \param c The range that the iterator will reference RangeIterator(const value_type& v, const range_type* c) : range_(c), current_(v) { } /// Copy constructor /// \param other The other iterator to be copied /// \return A reference to this object RangeIterator& operator=(const RangeIterator& other) { current_ = other.current_; range_ = other.range_; return *this; } const range_type* range() const { return range_; } /// Dereference operator /// \return A \c reference to the current data reference operator*() const { return current_; } /// Increment operator /// Increment the iterator /// \return The modified iterator RangeIterator& operator++() { range_->increment(current_); return *this; } /// Increment operator /// Increment the iterator /// \return An unmodified copy of the iterator RangeIterator operator++(int) { RangeIterator temp(*this); range_->increment(current_); return temp; } /// Pointer operator /// \return A \c pointer to the current data pointer operator->() const { return & current_; } void advance(difference_type n) { range_->advance(current_, n); } difference_type distance_to(const RangeIterator& other) const { assert(range_ == other.range_); return range_->distance_to(current_, other.current_); } private: const range_type* range_; ///< The range that the iterator references value_type current_; ///< The current value of the iterator }; // class RangeIterator /// Equality operator /// Compares the iterators for equality. They must reference the same range /// object to be considered equal. /// \tparam Value The value type of the iterator /// \tparam Range The range that the iterator references /// \param left_it The left-hand iterator to be compared /// \param right_it The right-hand iterator to be compared /// \return \c true if the the value and range are equal for the \c left_it /// and \c right_it , otherwise \c false . template bool operator==(const RangeIterator& left_it, const RangeIterator& right_it) { return ((*left_it) == (*right_it)) && (left_it.range() == right_it.range()); } /// Inequality operator /// Compares the iterators for inequality. /// \tparam Value The value type of the iterator /// \tparam Range The range that the iterator references /// \param left_it The left-hand iterator to be compared /// \param right_it The right-hand iterator to be compared /// \return \c true if the the value or range are not equal for the /// \c left_it and \c right_it , otherwise \c false . template bool operator!=(const RangeIterator& left_it, const RangeIterator& right_it) { return ((*left_it) != (*right_it)) || (left_it.range() != right_it.range()); } } // namespace btas namespace std { template void advance(btas::RangeIterator& it, typename btas::RangeIterator::difference_type n) { it.advance(n); } template typename btas::RangeIterator::difference_type distance(const btas::RangeIterator& first, const btas::RangeIterator& last) { return first.distance_to(last); } } // namespace std #endif // BTAS_RANGE_ITERATOR_H__INCLUDED BTAS-1.0.0/btas/range_traits.h000066400000000000000000000055751476142407000160460ustar00rootroot00000000000000#ifndef __BTAS_RANGE_TRAITS_H #define __BTAS_RANGE_TRAITS_H 1 #include #include #include namespace btas { /// test T has rank() member template class has_rank { /// true case template static auto __test(U* p) -> decltype(p->rank(), std::true_type()); /// false case template static std::false_type __test(...); public: static constexpr const bool value = std::is_same(0))>::value; }; /// test T has index_type template class has_index_type { /// true case template static std::true_type __test(typename U::index_type*); /// false case template static std::false_type __test(...); public: static constexpr const bool value = std::is_same(0))>::value; }; /// test T has ordinal_type template class has_ordinal_type { /// true case template static std::true_type __test(typename U::ordinal_type*); /// false case template static std::false_type __test(...); public: static constexpr const bool value = std::is_same(0))>::value; }; /// test _Range conforms the TWG.Range concept /// check only index_type, ordinal_type, and rank() member template class is_range { public: static constexpr const bool value = has_index_type<_Range>::value & has_ordinal_type<_Range>::value & has_rank<_Range>::value & has_begin<_Range>::value & has_end<_Range>::value; }; /// test T has extents() member template class has_extent { /// true case template static auto __test(U* p) -> decltype(p->extent(), std::true_type()); /// false case template static std::false_type __test(...); public: static constexpr const bool value = std::is_same(0))>::value; }; /// test T has range_size_type template class has_extent_type { /// true case template static std::true_type __test(typename U::extent_type*); /// false case template static std::false_type __test(...); public: static constexpr const bool value = std::is_same(0))>::value; }; /// test _Range conforms the TWG.BoxRange concept /// in addition to Range, check extent() member and extent_type template class is_boxrange { public: static constexpr const bool value = is_range<_Range>::value & has_extent<_Range>::value & has_extent_type<_Range>::value; }; template class boxrange_iteration_order { public: enum {row_major = -1, other = 0, column_major = 1}; // must specialize this trait static constexpr const int value = other; }; /// Range Traits template struct range_traits; } // namespace btas #endif // __BTAS_RANGE_TRAITS_H BTAS-1.0.0/btas/serialization.h000066400000000000000000000024671476142407000162360ustar00rootroot00000000000000#ifndef __BTAS_SERIALIZATION_H #define __BTAS_SERIALIZATION_H 1 #include ////// Boost serialization #ifdef BTAS_HAS_BOOST_SERIALIZATION #include #include #include #include #include namespace boost { namespace serialization { // this is needed to serialize efficiently corner cases, like std::vector>>. // since bitwise serialization is not portable anyway, this is OK in the context of btas template struct is_bitwise_serializable > : is_bitwise_serializable { }; }} #endif // BTAS_HAS_BOOST_SERIALIZATION ////// MADNESS serialization #if __has_include() # include #else // __has_include() namespace madness::archive { template struct ArchiveSerializeImpl; template struct ArchiveLoadImpl; template struct ArchiveStoreImpl; } // namespace madness::archive #endif // __has_include() #endif BTAS-1.0.0/btas/special/000077500000000000000000000000001476142407000146175ustar00rootroot00000000000000BTAS-1.0.0/btas/special/permute.h000066400000000000000000000015001476142407000164450ustar00rootroot00000000000000#ifndef __BTAS_TARRAY_PERMUTE_H #define __BTAS_TARRAY_PERMUTE_H 1 #include #include #include #include #include #include namespace btas { template std::array<_T, _N> __permute_index (const std::array<_T, _N>& x, const std::array& index) { std::array<_T, _N> y; for (size_type i = 0; i < _N; ++i) { y[i] = x[index[i]]; } return y; } template void permute (const TArray<_T, _N>& x, const std::array& index, TArray<_T, _N>& y) { y.resize(__permute_index(x.shape(), index)); Reindex(std::data(x), std::data(y), __permute_index(x.stride(), index), y.shape()); } } // namespace btas #endif // __BTAS_TARRAY_PERMUTE_H BTAS-1.0.0/btas/special/reindex.h000066400000000000000000000033321476142407000164270ustar00rootroot00000000000000#ifndef __BTAS_TARRAY_REINDEX_H #define __BTAS_TARRAY_REINDEX_H 1 #include #include namespace btas { /// NDloop class for Reindex template struct __NDloop_reindex { /// loop upon construction /// NOTE: pX and pY are passed as a reference of pointer to the next loop /// NOTE: on the other hand, addrX is passed as a value so that offset position (by addrX) is kept in this scope template::type> __NDloop_reindex (const _T*& pX, _T*& pY, size_type addrX, const std::array& strX, const std::array& shapeY) { for (size_type i = 0; i < shapeY[_I-1]; ++i) { __NDloop_reindex<_I+1, _N> loop(pX, pY, addrX+i*strX[_I-1], strX, shapeY); } } }; /// NDloop class for Reindex, specialized for the last index template struct __NDloop_reindex<_N, _N> { /// loop upon construction template __NDloop_reindex (const _T*& pX, _T*& pY, size_type addrX, const std::array& strX, const std::array& shapeY) { for (size_type i = 0; i < shapeY[_N-1]; ++i, ++pY) { *pY = pX[addrX+i*strX[_N-1]]; } } }; /// reindex (i.e. permute) for "any-rank" tensor /// multiple loop is expanded at compile time /// FIXME: how slower than explicit looping? /// if considerably slower, should be specialized for small ranks (_N = 1 ~ 8?) template void Reindex (const _T* pX, _T* pY, const std::array& strX, const std::array& shapeY) { __NDloop_reindex<1, _N> loop(pX, pY, 0, strX, shapeY); } } // namespace btas #endif // __BTAS_TARRAY_REINDEX_H BTAS-1.0.0/btas/storage_traits.h000066400000000000000000000137161476142407000164120ustar00rootroot00000000000000/* * storage_traits.h * * Created on: Dec 27, 2013 * Author: evaleev */ #ifndef BTAS_STORAGE_TRAITS_H_ #define BTAS_STORAGE_TRAITS_H_ #include #include #include namespace btas { /// describes storage traits; user must provide explicit specialization that defined the following types /// \code /// template /// struct storage_traits { /// typedef ... /* e.g., typename _Storage::value_type */ value_type; /// typedef ... /* e.g., typename _Storage::pointer */ pointer; /// typedef ... /* e.g., typename _Storage::const_pointer */ const_pointer; /// typedef ... /* e.g., typename _Storage::reference */ reference; /// typedef ... /* e.g., typename _Storage::const_reference */ const_reference; /// typedef ... /* e.g., typename _Storage::size_type */ size_type; /// typedef ... /* e.g., typename _Storage::difference_type */ difference_type; /// typedef ... /* e.g., typename _Storage::iterator */ iterator; /// typedef ... /* e.g., typename _Storage::const_iterator */ const_iterator; /// /// template rebind_t = ... ; // evaluates to _Storage counterpart storing objects of type U /// // e.g. if _Storage is std::vector this should be std::vector::rebind_alloc> /// }; /// \endcode template struct storage_traits; template struct storage_traits<_T*> { typedef typename std::remove_const<_T>::type value_type; typedef _T* pointer; typedef typename std::add_const::type const_pointer; typedef value_type& reference; typedef const value_type& const_reference; typedef size_t size_type; typedef ptrdiff_t difference_type; typedef pointer iterator; typedef const_pointer const_iterator; template using rebind_t = U*; }; template struct storage_traits<_T* const> { typedef typename std::remove_const<_T>::type value_type; typedef _T* pointer; typedef typename std::add_const::type const_pointer; typedef value_type& reference; typedef const value_type& const_reference; typedef size_t size_type; typedef ptrdiff_t difference_type; typedef pointer iterator; typedef const_pointer const_iterator; template using rebind_t = U* const; }; template struct storage_traits> { typedef _T value_type; typedef _T* pointer; typedef typename std::add_const::type const_pointer; typedef _T& reference; typedef const _T& const_reference; typedef size_t size_type; typedef ptrdiff_t difference_type; typedef _T* iterator; typedef typename std::add_const<_T*>::type const_iterator; template using rebind_t = std::valarray; }; template struct storage_traits_base_container { using value_type = typename _Container::value_type; using pointer = typename _Container::pointer; using const_pointer = typename _Container::const_pointer; using reference = typename _Container::reference; using const_reference = typename _Container::const_reference; using iterator = typename _Container::iterator; using const_iterator = typename _Container::const_iterator; using size_type = typename _Container::size_type; using difference_type = typename _Container::difference_type; }; template struct storage_traits> : public storage_traits_base_container> { template using rebind_t = std::array; }; template struct storage_traits> : public storage_traits_base_container> { template using rebind_t = std::vector::template rebind_alloc>; }; template struct storage_traits> : public storage_traits_base_container> { template using rebind_t = varray::template rebind_alloc>; }; template struct storage_traits> : public storage_traits_base_container> { template using rebind_t = infinite_sequence_adaptor>; }; // specialize to const container; N.B. T* const is not consistent of container const since the latter passes constness onto values template struct storage_traits<_Storage const> { using value_type = typename storage_traits<_Storage>::value_type; using pointer = typename storage_traits<_Storage>::const_pointer; using const_pointer = typename storage_traits<_Storage>::const_pointer; using reference = typename storage_traits<_Storage>::const_reference; using const_reference = typename storage_traits<_Storage>::const_reference; using iterator = typename storage_traits<_Storage>::const_iterator; using const_iterator = typename storage_traits<_Storage>::const_iterator; using size_type = typename storage_traits<_Storage>::size_type; using difference_type = typename storage_traits<_Storage>::difference_type; template using rebind_t = std::add_const_t::template rebind_t>; }; /// test if _Storage conforms to the TWG.Storage concept /// in addition to Storage, check extent() member and extent_type template class is_storage { public: static constexpr const bool value = has_begin<_Storage>::value & has_end<_Storage>::value; }; } // namespace btas #endif /* BTAS_STORAGE_TRAITS_H_ */ BTAS-1.0.0/btas/tarray.h000066400000000000000000000006461476142407000146600ustar00rootroot00000000000000#ifndef __BTAS_TARRAY_H #define __BTAS_TARRAY_H 1 #include namespace btas { /// Fixed-rank version of TArray template> using TArray = Tensor<_T, RangeNd<_Order, std::array >, _Container >; } // namespace btas #endif // __BTAS_TARRAY_H BTAS-1.0.0/btas/tensor.h000066400000000000000000000717451476142407000147000ustar00rootroot00000000000000#ifndef __BTAS_TENSOR_H #define __BTAS_TENSOR_H 1 #include #include #include #include #include #include #include #include #include #include #include namespace btas { /** BTAS implementation of "dense" tensor class that models \ref labelTWGTensor "TWG.BoxTensor" concept @tparam _T element type, Tensor contains values of this type @tparam _Range Range type, models \ref labelTWGRange "TWG.Range" concept @tparam _Storage Storage type, models \ref labelTWGStorage "TWG.Storage" concept */ template class Tensor { public: static_assert(std::is_same<_T, typename _Storage::value_type>::value, "Tensor<_T,_Range,_Storage> instantiated but _T != _Storage::value_type"); /// type of underlying data storage typedef _Storage storage_type; /// type of Range typedef _Range range_type; /// type of ordinal typedef typename _Range::ordinal_type ordinal_type; /// type of 1-index typedef typename _Range::index1_type index1_type; /// type of index typedef typename _Range::index_type index_type; ///\name Container requirements (c++std:[container.requirements.general]). ///@{ /// type of an element typedef _T value_type; /// element pointer typedef typename storage_traits::pointer pointer; /// constant element pointer typedef typename storage_traits::const_pointer const_pointer; /// type of an lvalue reference to an element typedef value_type& reference; /// type of a const lvalue reference to an element typedef const value_type& const_reference; /// element iterator typedef typename storage_traits::iterator iterator; /// constant element iterator typedef typename storage_traits::const_iterator const_iterator; /// size type typedef typename storage_traits::size_type size_type; ///@} /// the numeric type supporting `value_type` /// \note this is `value_type` if this is a plain (non-recursive) Tensor, but differs from `value_type` for /// recursive Tensor's, e.g. for `Tensor>` this is `T` whereas `value_type` is `Tensor` typedef typename numeric_type::type numeric_type; /// compute type of Tensor with different element type template using rebind_t = Tensor::template rebind_t>; template struct rebind_numeric; template struct rebind_numeric::value>> { using VU = typename V::template rebind_numeric::type; using type = Tensor::template rebind_t>; }; template struct rebind_numeric::value>> { using type = Tensor::template rebind_t>; }; /// compute type of Tensor with different numeric type template using rebind_numeric_t = typename rebind_numeric::type; /// compute type of Tensor with different range type template using rebind_range_t = Tensor<_T, Range, _Storage>; /// compute type of Tensor with different storage type template using rebind_storage_t = Tensor<_T, _Range, Storage>; private: struct Enabler {}; public: Tensor() = default; ~Tensor() = default; /// constructor with index extent template explicit Tensor(const size_type& first, const _args&... rest) : range_(range_type(first, rest...)) { // TODO make this disableable in all constructors // assert(range_.ordinal(range_.lobound()) == 0); array_adaptor::resize(storage_, range_.area()); } /// construct from \c range, allocate data, but not initialized template explicit Tensor(const Range& range, typename std::enable_if::value>::type* = 0) : range_(range.lobound(), range.upbound()) { array_adaptor::resize(storage_, range_.area()); } /// construct from \c range object, set all elements to \c v template Tensor(const Range& range, value_type v, typename std::enable_if::value>::type* = 0) : range_(range.lobound(), range.upbound()) { array_adaptor::resize(storage_, range_.area()); std::fill(begin(), end(), v); } /// construct from \c range object, copy elements from \c vec template Tensor(const Range& range, U* vec, typename std::enable_if::value>::type* = 0) : range_(range.lobound(), range.upbound()) { const auto size = range_.area(); array_adaptor::resize(storage_, size); std::copy(vec, vec + size, begin()); } /// construct from \c range and \c storage template Tensor(const Range& range, const Storage& storage, typename std::enable_if::value & not std::is_same::value & not std::is_same::value>::type* = 0) : range_(range.lobound(), range.upbound()), storage_(storage) { using std::size; if (size(storage_) != range_.area()) array_adaptor::resize(storage_, range_.area()); } /// copy-copy-construct from \c range and \c storage Tensor(const range_type& range, const storage_type& storage) : range_(range.ordinal(*range.begin()) == 0 ? range : range_type(range.lobound(), range.upbound())) , storage_(storage) { using std::size; if (size(storage_) != range_.area()) array_adaptor::resize(storage_, range_.area()); } /// copy-move-construct from \c range and \c storage Tensor(const range_type& range, storage_type&& storage) : range_(range.ordinal(*range.begin()) == 0 ? range : range_type(range.lobound(), range.upbound())) , storage_(std::move(storage)) { using std::size; if (size(storage_) != range_.area()) array_adaptor::resize(storage_, range_.area()); } /// move-construct from \c range and \c storage Tensor(range_type&& range, storage_type&& storage) : range_(range.ordinal(*range.begin()) == 0 ? std::move(range) : range_type(range.lobound(), range.upbound())) , storage_(std::move(storage)) { using std::size; if (size(storage_) != range_.area()) array_adaptor::resize(storage_, range_.area()); } /// Construct an evaluated tensor /// This constructor will allocate memory for \c range.area() elements. Each element /// will be initialized as: /// \code /// for(auto&& idx: range) /// (*this)[idx] = op(*(it++)); /// \endcode /// \tparam Range An input Range type. /// \tparam InIter An input iterator type. /// \tparam Op A unary operation type /// \param range the input range type /// \param first An input iterator for the argument /// \param op The unary operation to be applied to the argument data template Tensor(const Range& range, InIter it, const Op& op, typename std::enable_if::value>::type* = 0) : range_(range.lobound(), range.upbound()) { auto size = range_.area(); array_adaptor::resize(storage_, size); std::transform(it, it + size, begin(), op); } /// copy constructor /// It will accept Tensors and TensorViews template ::value>::type> Tensor(const _Tensor& x) : range_(x.range().lobound(), x.range().upbound()){ auto size = range_.area(); array_adaptor::resize(storage_, size); std::copy(x.cbegin(), x.cend(), storage_.begin()); } /// copy constructor /// @note this makes a shallow copy of @п х if `storage_type` has shallow-copy semantics; if need a deep copy /// in that case use Tensor::clone() /// @sa Tensor::clone() Tensor(const Tensor& x) : range_(x.range()), storage_(x.storage_) {} /// move constructor Tensor(Tensor&& x) : range_(std::move(x.range())), storage_(std::move(x.storage_)) {} /// @return deep copy of `*this`, even if `storage_type` is shallow copy Tensor clone() const { return Tensor(range(), storage_type(storage().cbegin(), storage().cend())); } /// returns element-wise conjugate of a *this tensor if have complex value type std::conditional_t, Tensor, const Tensor&> conj() const { if constexpr (is_complex_type_v<_T>) { Tensor conjT = clone(); auto conj_ptr = conjT.data(); auto self_ptr = data(); auto self_size = size(); for (auto i = 0; i < self_size; ++i) { *(conj_ptr + i) = btas::impl::conj(*(self_ptr + i)); } return conjT; } else { return *this; } } /// copy assignment operator template ::value && not std::is_same::value>::type> Tensor& operator=(const _Tensor& x) { using std::begin; using std::cbegin; using std::cend; using std::end; range_ = range_type(x.range().lobound(), x.range().upbound()); array_adaptor::resize(storage_, range_.area()); std::copy(cbegin(x), cend(x), begin(storage_)); return *this; } /// copy assignment operator template ::value>::type, class = typename std::enable_if< std::is_same::value>::type> Tensor& operator=(const _Tensor& x) { using std::begin; using std::cbegin; using std::cend; using std::end; range_ = range_type(x.range().lobound(), x.range().upbound()); if (&x.storage() != &this->storage()) { // safe to copy immediately, unless copying into self array_adaptor::resize(storage_, range_.area()); std::copy(cbegin(x), cend(x), begin(storage_)); } else { // must use temporary if copying into self :( storage_type new_storage; array_adaptor::resize(new_storage, range_.area()); std::copy(cbegin(x), cend(x), begin(new_storage)); using std::swap; swap(storage_, new_storage); } return *this; } /// copy assignment Tensor& operator=(const Tensor& x) { range_ = x.range_; storage_ = x.storage_; return *this; } /// move assignment operator Tensor& operator=(Tensor&& x) { using std::swap; swap(range_, x.range_); swap(storage_, x.storage_); return *this; } /// conversion to value_type, asserts that \c rang().area()==1 explicit operator value_type() const { BTAS_ASSERT(range_.area() == 1); return *data(); } /// assign scalar to this (i.e. fill this with scalar) template < typename Scalar, typename = typename std::enable_if::type, Tensor>::value && not btas::is_boxtensor::type>::value>::type, typename = btas::void_t(std::declval()))>> Tensor& operator=(Scalar&& v) { using std::begin; using std::end; std::fill(begin(storage_), end(storage_), static_cast(v)); return *this; } /// number of indices (tensor rank) size_type rank() const { return range_.rank(); } /// \return range object const range_type& range() const { return range_; } /// \return range's extent object typename range_type::extent_type extent() const { return range_.extent(); } /// \return extent of range along dimension \c d typename range_type::extent_type::value_type extent(size_t d) const { return range_.extent(d); } /// \return storage object const storage_type& storage() const { return storage_; } /// \return storage object storage_type& storage() { return storage_; } ///\name Container requirements (c++std:[container.requirements.general]). ///@{ /// \return const iterator begin const_iterator begin() const { return cbegin(); } /// \return const iterator end const_iterator end() const { return cend(); } /// \return const iterator begin const_iterator cbegin() const { using std::cbegin; return cbegin(storage_); } /// \return const iterator end const_iterator cend() const { using std::cend; return cend(storage_); } /// \return iterator begin iterator begin() { using std::begin; return begin(storage_); } /// \return iterator end iterator end() { using std::end; return end(storage_); } /// \return number of elements size_type size() const { return range_.area(); } /// \return maximum number of elements that can be be contained Tensor size_type max_size() const { return std::numeric_limits::max(); } /// test whether Tensor is empty bool empty() const { return range_.area() == 0; } /// swap this and x void swap(Tensor& x) { using std::swap; swap(range_, x.range_); swap(storage_, x.storage_); } ///@} // container requirements /// @name Element accessors without range check /// @{ /// accesses element using its index, given as a pack of integers template > && ...)>> typename std::enable_if...>::value, const_reference>::type operator()( Index&&... idx) const { return storage_[range_.ordinal(std::forward(idx)...)]; } template typename std::enable_if::value, const_reference>::type operator()(const Index& index) const { return storage_[range_.ordinal(index)]; } template typename std::enable_if::value, const_reference>::type operator[](const Index& index) const { return storage_[range_.ordinal(index)]; } template typename std::enable_if::value, const_reference>::type operator() (std::initializer_list index) const { return this->operator()>(index); } /// accesses element using its ordinal value /// \param indexord ordinal value of the index template typename std::enable_if::value, const_reference>::type operator[] (const IndexOrdinal& indexord) const { // can't distinguish between operator[](Index) and operator[](ordinal) // thus assume at_ordinal() if this->rank()==1 BTAS_ASSERT(this->range_.rank() != 1 && "use btas::Tensor::operator[](index) or " "btas::Tensor::at_ordinal(index_ordinal) if this->range().rank()==1"); return at_ordinal(indexord); } /// accesses element using its ordinal value /// \param indexord ordinal value of the index template typename std::enable_if::value, const_reference>::type at_ordinal(const IndexOrdinal& indexord) const { return storage_[indexord]; } template > && ...)>> typename std::enable_if::type...>::value, reference>::type operator()( Index&&... idx) { return storage_[range_.ordinal(std::forward(idx)...)]; } template typename std::enable_if::value, reference>::type operator()(const Index& index) { return storage_[range_.ordinal(index)]; } template typename std::enable_if::value, reference>::type operator[](const Index& index) { return storage_[range_.ordinal(index)]; } template typename std::enable_if::value, reference>::type operator() (std::initializer_list index) { return this->operator()>(index); } /// accesses element using its ordinal value /// \param indexord ordinal value of the index template typename std::enable_if::value, reference>::type operator[] (const IndexOrdinal& indexord) { // can't distinguish between operator[](Index) and operator[](ordinal) // thus assume at_ordinal() if this->rank()==1 BTAS_ASSERT(this->range_.rank() != 1 && "use btas::Tensor::operator[](index) or " "btas::Tensor::at_ordinal(index_ordinal) if this->range().rank()==1"); return at_ordinal(indexord); } /// accesses element using its ordinal value /// \param indexord ordinal value of the index template typename std::enable_if::value, reference>::type at_ordinal(const IndexOrdinal& indexord) { return storage_[indexord]; } ///@} // element accessors with range check /// @name Element accessors with range check /// @{ /// accesses element using its index, given as a pack of integers template > && ...)>> const_reference at(Index&&... idx) const { assert(sizeof...(idx) == range_.rank()); assert(range_.includes(std::array{std::forward(idx)...})); return storage_[range_.ordinal(std::forward(idx)...)]; } template typename std::enable_if::value, const_reference>::type at(const Index& index) const { using std::size; assert(size(index) == range_.rank()); assert(range_.includes(index)); return storage_[range_.ordinal(index)]; } // /// accesses element using its ordinal value // /// \param indexord ordinal value of the index // template // typename std::enable_if::value, const_reference>::type // at (const IndexOrdinal& indexord) const // { // assert( range_.includes(indexord) ); // return storage_[ indexord ]; // } /// accesses element using its index, given as a pack of integers template > && ...)>> reference at(Index&&... idx) { assert(sizeof...(idx) == range_.rank()); assert(range_.includes(std::array{std::forward(idx)...})); return storage_[range_.ordinal(std::forward(idx)...)]; } template typename std::enable_if::value, reference>::type at(const Index& index) { using std::size; assert(size(index) == range_.rank()); assert(range_.includes(index)); return storage_[range_.ordinal(index)]; } // /// accesses element using its ordinal value // /// \param indexord ordinal value of the index // template // typename std::enable_if::value, reference>::type // at (const IndexOrdinal& indexord) // { // assert( range_.includes(indexord) ); // return storage_[ indexord ]; // } ///@} // element accessors with range check /// resize array with range object template void resize(const Range& range, typename std::enable_if::value, Enabler>::type = Enabler()) { range_ = range_type(range.lobound(), range.upbound()); array_adaptor::resize(storage_, range_.area()); } /// resize array with extent object template void resize( const Extent& extent, typename std::enable_if::value && not is_boxrange::value, Enabler>::type = Enabler()) { range_ = range_type(extent); array_adaptor::resize(storage_, range_.area()); } /// clear all members void clear() { range_ = range_type(); storage_ = storage_type(); } // ========== Finished Public Interface and Its Reference Implementations ========== // // Here come Non-Standard members (to be discussed) // /// Constructs a Tensor slice defined by a subrange for each dimension template TensorView slice(std::initializer_list> range1s) const { return __make_cview>(this->range().slice(range1s), this->storage()); } /// addition assignment Tensor& operator+=(const Tensor& x) { using std::begin; using std::cbegin; using std::cend; using std::end; assert(std::equal(begin(range_), end(range_), begin(x.range_))); std::transform(cbegin(storage_), cend(storage_), cbegin(x.storage_), begin(storage_), std::plus()); return *this; } /// addition of tensors Tensor operator+(const Tensor& x) const { Tensor y = this->clone(); y += x; return y; /* automatically called move semantics */ } /// subtraction assignment Tensor& operator-=(const Tensor& x) { using std::begin; using std::cbegin; using std::cend; using std::end; assert(std::equal(begin(range_), end(range_), begin(x.range_))); std::transform(cbegin(storage_), cend(storage_), cbegin(x.storage_), begin(storage_), std::minus()); return *this; } /// subtraction of tensors Tensor operator-(const Tensor& x) const { Tensor y = this->clone(); y -= x; return y; /* automatically called move semantics */ } /// \return bare const pointer to the first element of data_ /// this enables to call BLAS functions const_pointer data() const { using std::data; return data(storage_); } /// \return bare pointer to the first element of data_ /// this enables to call BLAS functions pointer data() { using std::data; return data(storage_); } /// fill all elements by val void fill(const value_type& val) { using std::begin; using std::end; std::fill(begin(storage_), end(storage_), val); } /// generate all elements by gen() template void generate(Generator&& gen) { using std::begin; using std::end; std::generate(begin(storage_), end(storage_), std::forward(gen)); } private: range_type range_; ///< range object storage_type storage_; ///< data }; // end of Tensor /// maps Tensor -> Range template ::value>::type> auto range(const _Tensor& t) -> decltype(t.range()) { return t.range(); } /// maps Tensor -> Range extent template ::value>::type> auto extent(const _Tensor& t) -> decltype(t.range().extent()) { return t.range().extent(); } /// maps Tensor -> Range rank template ::value>::type> auto rank(const _Tensor& t) -> decltype(t.rank()) { return t.rank(); } /// Tensor stream output operator /// prints Tensor in row-major form. To be implemented elsewhere using slices. /// \param os The output stream that will be used to print \c t /// \param t The Tensor to be printed /// \return A reference to the output stream template std::ostream& operator<<(std::ostream& os, const Tensor<_T, _Range, _Storage>& t) { os << t.range() << " { "; for (const auto& v : t) { os << v << " "; } os << "}"; return os; } /// The equality operator template ::value>::type, class = typename std::enable_if::value>::type> bool operator==(const _Tensor1& t1, const _Tensor2& t2) { using std::cbegin; using std::cend; if (btas::range_traits>::order == btas::range_traits>::order && is_contiguous(t1.range()) && is_contiguous(t2.range())) // plain Tensor return congruent(t1.range(), t2.range()) && std::equal(cbegin(t1.storage()), cend(t1.storage()), cbegin(t2.storage())); else { // not plain, or different orders auto cong = congruent(t1.range(), t2.range()); if (not cong) return false; typedef TensorView cview1; typedef TensorView cview2; cview1 vt1(t1); cview2 vt2(t2); return std::equal(cbegin(vt1), cend(vt1), cbegin(vt2)); } } /// The inequality operator template ::value>::type, class = typename std::enable_if::value>::type> bool operator!=(const _Tensor1& t1, const _Tensor2& t2) { return !(t1 == t2); } /// Tensor with const number of dimensions template , class = typename std::enable_if::value>::type> using TensorNd = Tensor<_T, RangeNd<_Order, std::array, btas::BoxOrdinal<_Order, std::array>>, _Storage>; } // namespace btas #ifdef BTAS_HAS_BOOST_SERIALIZATION namespace boost { namespace serialization { /// boost serialization template void serialize(Archive& ar, btas::Tensor<_T, _Range, _Storage>& t, const unsigned int version) { boost::serialization::split_free(ar, t, version); } template void save(Archive& ar, const btas::Tensor<_T, _Range, _Storage>& t, const unsigned int version) { const auto& range = t.range(); const auto& storage = t.storage(); ar << BOOST_SERIALIZATION_NVP(range) << BOOST_SERIALIZATION_NVP(storage); } template void load(Archive& ar, btas::Tensor<_T, _Range, _Storage>& t, const unsigned int version) { _Range range; _Storage storage; ar >> BOOST_SERIALIZATION_NVP(range) >> BOOST_SERIALIZATION_NVP(storage); t = btas::Tensor<_T, _Range, _Storage>(range, storage); } } // namespace serialization } // namespace boost #endif // BTAS_HAS_BOOST_SERIALIZATION // serialization to/fro MADNESS archive (github.com/m-a-d-n-e-s-s/madness) namespace madness { namespace archive { template struct ArchiveLoadImpl> { static inline void load(const Archive& ar, btas::Tensor<_T, _Range, _Store>& t) { _Range range{}; _Store store{}; ar& range& store; t = btas::Tensor<_T, _Range, _Store>(std::move(range), std::move(store)); } }; template struct ArchiveStoreImpl> { static inline void store(const Archive& ar, const btas::Tensor<_T, _Range, _Store>& t) { ar& t.range() & t.storage(); } }; } // namespace archive } // namespace madness #endif // __BTAS_TENSOR_H BTAS-1.0.0/btas/tensor_func.h000066400000000000000000000050671476142407000157050ustar00rootroot00000000000000/* * tensor_func.h * * Created on: Dec 30, 2013 * Author: evaleev */ #ifndef BTAS_TENSOR_FUNC_H_ #define BTAS_TENSOR_FUNC_H_ #include namespace btas { // Helper template for TensorViewOf template using Nref = typename std::remove_reference<_T>::type; // Maps Tensor -> TensorView, // TensorView -> TensorView // appropriately transferring constness of the storage, that is, // if _T is const, uses const _T::storage_type, otherwise just _T::storage_type template using TensorViewOf = TensorView::value_type, typename Nref<_T>::range_type, typename std::conditional>::value, const typename Nref<_T>::storage_type, typename Nref<_T>::storage_type >::type>; /// Permutes tensor \p t using permutation \p p specified in the preimage ("from") convention template TensorViewOf<_T> permute( _T&& t, _Permutation p) { return make_view( permute(t.range(), p), t.storage() ); } /// Permutes tensor \p t using permutation \p p specified in the preimage ("from") convention template TensorViewOf<_T> permute( _T&& t, std::initializer_list<_U> p) { return make_view( permute(t.range(), p), t.storage() ); } template TensorViewOf<_T> diag(_T&& T) { return make_view(diag(T.range()),T.storage()); } template TensorViewOf<_T> tieIndex(_T&& T, const ArrayType& inds) { return make_view(tieIndex(T.range(),inds),T.storage()); } template TensorViewOf<_T> tieIndex(_T&& T, size_t i0, const _args&... rest) { const auto size = 1 + sizeof...(rest); std::array inds = { i0, static_cast(rest)...}; return make_view(tieIndex(T.range(),inds),T.storage()); } template TensorViewOf<_T> group(_T&& T, size_t istart, size_t iend) { return make_view(group(T.range(),istart,iend),T.storage()); } template TensorViewOf<_T> flatten(_T&& T) { return make_view(flatten(T.range()),T.storage()); } } // namespace btas #endif /* BTAS_TENSOR_FUNC_H_ */ BTAS-1.0.0/btas/tensor_traits.h000066400000000000000000000043501476142407000162520ustar00rootroot00000000000000#ifndef __BTAS_TENSOR_TRAITS_H #define __BTAS_TENSOR_TRAITS_H 1 #include #include #include #include namespace btas { /// test T has range_type template class has_range_type { /// true case template static std::true_type __test(typename U::range_type*); /// false case template static std::false_type __test(...); public: static constexpr const bool value = std::is_same(0))>::value; }; /// test T has range_type && is_boxrange::value is true template class has_boxrange_range_type { /// true case template static std::true_type __test(typename U::range_type*, typename std::enable_if::value, void*>::type = 0); /// false case template static std::false_type __test(...); public: static constexpr const bool value = std::is_same(0))>::value; }; /// test T has storage_type template class has_storage_type { /// true case template static std::true_type __test(typename U::storage_type*); /// false case template static std::false_type __test(...); public: static constexpr const bool value = std::is_same(0))>::value; }; /// checks _Tensor meets the TWG.Tensor concept requirements /// checks only value_type, range_type, storage_type, and rank() member TODO check the rest template class is_tensor { public: static constexpr const bool value = has_value_type<_Tensor>::value & has_range_type<_Tensor>::value & has_storage_type<_Tensor>::value & has_rank<_Tensor>::value; }; /// checks _Tensor meets the TWG.BoxTensor concept requirements template class is_boxtensor { public: static constexpr const bool value = is_tensor<_Tensor>::value && has_boxrange_range_type<_Tensor>::value; }; /// checks _Tensor meets the TWG.BoxTensor concept requirements template class boxtensor_storage_order : public boxrange_iteration_order {}; } // namespace btas #endif // __BTAS_TENSOR_TRAITS_H BTAS-1.0.0/btas/tensorview.h000066400000000000000000001453331476142407000155660ustar00rootroot00000000000000/* * tensorview.h * * Created on: Dec 28, 2013 * Author: evaleev */ #ifndef BTAS_TENSORVIEW_H_ #define BTAS_TENSORVIEW_H_ #include #include #include #include #include #include #include #include namespace btas { // \internal btas::TensorView has a policy that configures whether to support constexpr mutability (as part of the type), or trackable at runtime. // Such design allows to reduce code bloat due to the need to instantiate the code using views for const and non-const type variants; it also // makes it easier to use view by avoiding the need for metaprogramming when dealing with views with constexpr mutability. The // runtime tracking of mutability incurs extra storage overhead (TensorView will have an extra bool member) and extra performance overhead due to // extra runtime logic; the runtime overhead can be avoided by disabling BTAS_ASSERT after testing. Expert users can avoid storage overhead by // using constexpr mutability tracking. enum TensorViewPolicy_ConstnessPolicy { TensorViewPolicy_RuntimeConst = 1, TensorViewPolicy_CompiletimeConst = 0 }; /// TensorViewPolicy configures behavior of certain features of TensorView /// \tparam runtimeconst If true, constness of data access is checked at runtime. This involves /// extra space overhead (enough to store a boolean readwrite flag). Non-const data access members /// will also check whether readwrite is set using BTAS_ASSERT (hence runtime overhead can be eliminated after /// testing. This feature is needed if you want to use a single TensorView type /// for mutable (non-const) and immutable (const) views. template struct TensorViewPolicy { /// true if constness tracked at runtime static constexpr bool runtimeconst = (ConstnessPolicy == TensorViewPolicy_RuntimeConst); }; namespace detail { struct bool_wrapper { bool value; bool_wrapper() = default; bool_wrapper(const bool_wrapper& other) = default; bool_wrapper(bool_wrapper&& other) = default; inline bool_wrapper(bool b) : value(b) {} inline operator bool() const noexcept { return value; } inline bool operator()() const noexcept { return value; } }; inline bool operator==(const bool_wrapper& one, const bool_wrapper& two) { return one.value == two.value; } /// Helper class to implement the constness logic, as well as to guarantee empty /// base optimization for constexpr constness policy. template struct TensorViewMutabilityImpl : public std::conditional< std::is_same>::value && not std::is_const::value, bool_wrapper, btas::detail::bool_type::value>>::type { using impl_type = typename std::conditional< std::is_same>::value && not std::is_const::value, bool_wrapper, btas::detail::bool_type::value>>::type; /// By default, make TensorView mutable if Storage is mutable and Policy is constexpr. /// @return a default TensorViewWritable TensorViewMutabilityImpl() : TensorViewMutabilityImpl(make_default()) {} TensorViewMutabilityImpl(TensorViewMutabilityImpl& other) : impl_type(other) {} TensorViewMutabilityImpl(const TensorViewMutabilityImpl&) : impl_type(false) {} TensorViewMutabilityImpl& operator=(TensorViewMutabilityImpl& other) { *this = other; return *this; } TensorViewMutabilityImpl& operator=(const TensorViewMutabilityImpl& other) { *this = impl_type(false); return *this; } TensorViewMutabilityImpl(TensorViewMutabilityImpl&&) = default; TensorViewMutabilityImpl& operator=(TensorViewMutabilityImpl&&) = default; TensorViewMutabilityImpl(bool is_mutable) : impl_type(is_mutable) {} static constexpr TensorViewMutabilityImpl make_default() { return TensorViewMutabilityImpl(std::is_same>::value ? false : not std::is_const::value); } }; } // namespace detail /// View (aka generalized slice) of a tensor /** @tparam _T apparent element type, TensorView will present tensor elements as values of this type @tparam _Range Range type @tparam _Storage Storage type */ template, class _Policy = btas::TensorViewPolicy<> > class TensorView : private detail::TensorViewMutabilityImpl<_Policy,_Storage> { typedef detail::TensorViewMutabilityImpl<_Policy,_Storage> mutability_impl_type; public: /// type of an element typedef _T value_type; /// type of a pointer to an element typedef value_type* pointer; /// type of a pointer to a const element typedef const value_type* const_pointer; /// type of an lvalue reference to an element typedef value_type& reference; /// type of a const lvalue reference to an element typedef const value_type& const_reference; /// type of Range typedef _Range range_type; /// type of ordinal typedef typename _Range::ordinal_type ordinal_type; /// type of 1-index typedef typename _Range::index1_type index1_type; /// type of index typedef typename _Range::index_type index_type; /// type of underlying data storage typedef _Storage storage_type; /// type of data storage reference typedef std::reference_wrapper storageref_type; /// size type typedef typename storage_traits::size_type size_type; /// element iterator typedef TensorViewIterator iterator; /// element iterator typedef TensorViewIterator const_iterator; /// numeric type typedef typename numeric_type::type numeric_type; private: struct Enabler {}; /// use this to disable non-const members static constexpr bool constexpr_is_writable() { return _Policy::runtimeconst || not std::is_const::value; } public: /// default constructor creates an uninitialized view TensorView() : range_(), storageref_(*((storage_type*)nullptr)) {} /// destructor ~TensorView() = default; /// construct from \c range and \c storageref ; write access must be passed explicitly if \c _Policy requires template::type> TensorView (Range&& range, Storage&& storageref, bool can_write = mutability_impl_type::make_default()) : mutability_impl_type(can_write), range_(std::forward(range)), storageref_(std::forward(storageref)) { } /// conversion from const Tensor into TensorConstView template < class _Tensor, class Storage = _Storage, class = typename std::enable_if< is_boxtensor<_Tensor>::value && std::is_const::value && std::is_same< typename std::decay::type, typename std::decay::type>::value>::type> TensorView(const _Tensor& x) : mutability_impl_type(false), range_(x.range()), storageref_(std::cref(x.storage())) {} /// conversion from const Tensor to non-const View only possible if \c /// Policy::runtimeconst is \c true /// \note this is not explicit to allow simple assignments like \code TensorView view = tensor; \endcode template < class _Tensor, class Storage = _Storage, class Policy = _Policy, class = typename std::enable_if< is_boxtensor<_Tensor>::value && not std::is_const::value && Policy::runtimeconst && std::is_same< typename std::decay::type, typename std::decay::type>::value>::type> TensorView(const _Tensor& x) : mutability_impl_type(false), range_(x.range()), storageref_(std::ref(const_cast(x.storage()))) {} /// this constructor exists to generate a readable error upon conversion /// from const Tensor to compile-time non-const View template TensorView( const _Tensor& x, typename std::enable_if< is_boxtensor<_Tensor>::value && not std::is_const::value && not Policy::runtimeconst && std::is_same< typename std::decay::type, typename std::decay::type>::value>::type* = nullptr) : TensorView() { static_assert(!is_boxtensor<_Tensor>::value, "attempt to create a compile-time-const TensorView from " "a const Tensor"); } /// conversion from non-const Tensor template < class _Tensor, class Storage = _Storage, class = typename std::enable_if< is_boxtensor<_Tensor>::value && not std::is_const<_Tensor>::value && std::is_same< typename _Tensor::storage_type, Storage>::value>::type> TensorView(_Tensor& x) : mutability_impl_type(true), range_(x.range()), storageref_(std::ref(x.storage())) {} /// conversion from non-const TensorView template < class __T, class __Range, class __Storage, class __Policy, class = typename std::enable_if< not std::is_const<__Storage>::value && std::is_same< __Storage, _Storage>::value>::type> explicit TensorView(TensorView<__T, __Range, __Storage, __Policy>& x) : range_(x.range()), storageref_(std::ref(x.storage())), mutability_impl_type(x) {} TensorView (const TensorView& x) = default; TensorView& operator= (const TensorView& x) = default; TensorView (TensorView&& x) = default; TensorView& operator= (TensorView&& x) = default; /// number of indices (tensor rank) size_type rank () const { return range_.rank(); } /// \return number of elements size_type size () const { return range_.area(); } /// \return range object const range_type& range() const { return range_; } /// \param d dimension /// \return subrange for dimension \d const Range1d range(size_t d) const { return range_.range(d); } /// \return range's extent object typename range_type::extent_type extent() const { return range_.extent(); } /// \return extent of range along dimension \c d typename range_type::extent_type::value_type extent(size_t d) const { return range_.extent(d); } /// \return storage object const storage_type& storage() const { return storageref_.get(); } /// \return storage object storage_type& storage() { assert_writable(); return storageref_.get(); } /// test whether TensorView is empty bool empty() const { return range_.area() == 0; } /// \return const iterator begin const_iterator begin() const { return cbegin(); } /// \return begin iterator iterator begin() { assert_writable(); return iterator(range().begin(), storage()); } /// \return const end iterator const_iterator end() const { return cend(); } /// \return const end iterator iterator end() { assert_writable(); return iterator(range().end(), storageref_); } /// \return const iterator begin, even if this is not itself const const_iterator cbegin() const { return const_iterator(range().begin(), storage()); } /// \return const iterator end, even if this is not itself const const_iterator cend() const { return const_iterator(range().end(), storage()); } /// Immutable access to an element without range check. /// Available when \c value_type == \c storage_type::value_type. /// \return const reference to the element indexed by \c index template typename std::enable_if::type...>::value && std::is_same::value, const_reference>::type operator() (Index&& ... index) const { return storageref_.get()[ range_.ordinal(std::forward(index)...) ]; } /// Immutable access to an element without range check. /// Available when \c value_type == \c storage_type::value_type /// \return const reference to the element indexed by \c index template typename std::enable_if::value && std::is_same::value, const_reference >::type operator() (const Index& index) const { return storageref_.get()[range_.ordinal(index)]; } /// Mutable access to an element without range check. /// Available when \c value_type == \c storage_type::value_type. /// \return const reference to the element indexed by \c index template typename std::enable_if::type...>::value && std::is_same::value && TensorView::constexpr_is_writable(), reference>::type operator() (Index&& ... index) { assert_writable(); return storageref_.get()[ range_.ordinal(std::forward(index)...) ]; } /// Mutable access to an element without range check (rank() == general) /// Available when \c value_type == \c storag_type::value_type /// \return reference to the element indexed by \c index template typename std::enable_if::value && std::is_same::value && TensorView::constexpr_is_writable(), reference >::type operator() (const Index& index) { assert_writable(); return storageref_.get()[range_.ordinal(index)]; } /// Immutable access to an element without range check. /// Available when \c value_type != \c storage_type::value_type. /// \return value of the element indexed by \c index , converted to \c value_type template typename std::enable_if::type...>::value && not std::is_same::value, value_type>::type operator() (Index&& ... index) const { return static_cast(storageref_.get()[ range_.ordinal(std::forward(index)...) ]); } /// Immutable access to an element without range check (rank() == general) /// Available when \c value_type != \c storage_type::value_type /// \return value of the element indexed by \c index , converted to \c value_type template typename std::enable_if::value && not std::is_same::value, value_type >::type operator() (const Index& index) const { return static_cast(storageref_.get()[range_.ordinal(index)]); } /// Immutable access to an element with range check. /// Available when \c value_type == \c storage_type::value_type. /// \return const reference to the element indexed by \c index template typename std::enable_if::type...>::value && std::is_same::value, const_reference>::type at (Index&& ... index) const { BTAS_ASSERT( range_.includes(std::forward(index)...) ); return this->operator()(std::forward(index)...); } /// Immutable access to an element with range check. /// Available when \c value_type == \c storage_type::value_type. /// \return const reference to the element indexed by \c index template typename std::enable_if::value && std::is_same::value, const_reference>::type at (const Index& index) const { BTAS_ASSERT( range_.includes(index) ); return this->operator()(index); } /// Mutable access to an element with range check. /// Available when \c value_type == \c storage_type::value_type. /// \return reference to the element indexed by \c index template typename std::enable_if::type...>::value && std::is_same::value && TensorView::constexpr_is_writable(), reference>::type at (Index&& ... index) { assert_writable(); BTAS_ASSERT( range_.includes(std::forward(index)...) ); return this->operator()(std::forward(index)...); } /// Mutable access to an element with range check. /// Available when \c value_type == \c storage_type::value_type. /// \return reference to the element indexed by \c index template typename std::enable_if::value && std::is_same::value && TensorView::constexpr_is_writable(), reference>::type at (const Index& index) { assert_writable(); BTAS_ASSERT( range_.includes(index) ); return this->operator()(index); } /// Immutable access to an element with range check. /// Available when \c value_type != \c storage_type::value_type. /// \return the element value indexed by \c index , converted to \c value_type template typename std::enable_if::type...>::value && not std::is_same::value, value_type>::type at (Index&& ... index) const { BTAS_ASSERT( range_.includes(std::forward(index)...) ); return this->operator()(std::forward(index)...); } /// Immutable access to an element with range check. /// Available when \c value_type != \c storage_type::value_type. /// \return the element value indexed by \c index , converted to \c value_type template typename std::enable_if::value && not std::is_same::value, value_type>::type at (const Index& index) const { BTAS_ASSERT( range_.includes(index) ); return this->operator()(index); } /// swap this and x void swap (TensorView& x) noexcept { using std::swap; swap(range_, x.range_); swap(storageref_, x.storageref_); swap(static_cast(*this), static_cast(x)); } // ========== Finished Public Interface and Its Reference Implementations ========== // // Here come Non-Standard members (to be discussed) // #if 0 /// addition assignment TensorView& operator+= (const TensorView& x) { assert( std::equal(range_.begin(), range_.end(), x.range_.begin()) ); std::transform(storageref_.begin(), storageref_.end(), x.storageref_.begin(), storageref_.begin(), std::plus()); return *this; } /// addition of tensors TensorView operator+ (const TensorView& x) const { TensorView y(*this); y += x; return y; /* automatically called move semantics */ } /// subtraction assignment TensorView& operator-= (const TensorView& x) { assert( std::equal(range_.begin(), range_.end(), x.range_.begin())); std::transform(storageref_.begin(), storageref_.end(), x.storageref_.begin(), storageref_.begin(), std::minus()); return *this; } /// subtraction of tensors TensorView operator- (const TensorView& x) const { TensorView y(*this); y -= x; return y; /* automatically called move semantics */ } /// fill all elements by val void fill (const value_type& val) { std::fill(storageref_.begin(), storageref_.end(), val); } /// generate all elements by gen() template void generate (Generator gen) { std::generate(storageref_.begin(), storageref_.end(), gen); } #endif bool writable() const { return static_cast(static_cast(*this)); } private: range_type range_;///< range object storageref_type storageref_;///< dataref // typedef typename std::conditional<_Policy::runtimeconst, // bool, // btas::detail::bool_type::value> // >::type writable_type; // writable_type can_write_; /// use this in non-const members to assert writability if Policy calls for runtime const check void assert_writable() const { if (_Policy::runtimeconst) BTAS_ASSERT(writable()); } /// construct from \c range and \c storage; pass \c can_write explicitly if needed explicit TensorView (range_type&& range, storage_type& storage, bool can_write = mutability_impl_type::make_default()) : mutability_impl_type(can_write), range_(std::move(range)), storageref_(std::ref(storage)) { } template friend TensorView::value, Storage, typename std::add_const::type >::type, Policy> __make_view(Range&& range, Storage& storage, Policy, bool can_write); template friend TensorView __make_cview(Range&& range, const Storage& storage, Policy); template friend class TensorView; }; // end of TensorView // N.B. The equality and inequality operators are implemented by the generic ops in tensor.h /// TensorConstView is a read-only variant of TensorView template , class _Policy = btas::TensorViewPolicy<> > using TensorConstView = TensorView<_T, _Range, const _Storage, _Policy>; /// TensorRWView is a variant of TensorView with runtime write access check template , class _Policy = btas::TensorViewPolicy > using TensorRWView = TensorView<_T, _Range, typename std::remove_const<_Storage>::type, _Policy>; /// Helper function (friendly to TensorView) that constructs a view with an explicitly-specified element type of the view. Useful if need to /// view a tensor of floats as a tensor of complex floats. /// \tparam T the element type of the resulting view /// \tparam Range the range type /// \tparam Storage the storage type /// \tparam Policy the TensorViewPolicy type /// \param range the range object defining the view /// \param storage the storage object that will be viewed into /// \return TensorView into \c storage using \c range and policy \c Policy /// \attention use __make_cview if you must force a const view; this will provide const view, however, if \c storage is a const reference. template TensorView::value, Storage, typename std::add_const::type >::type, Policy> __make_view(Range&& range, Storage& storage, Policy = Policy(), bool can_write = not Policy::runtimeconst ? (not std::is_const::value && std::is_same::value) : false) { typedef TensorView::value, Storage, typename std::add_const::type >::type, Policy> result_type; return result_type(std::move(range), storage, can_write); } /// Helper function (friendly to TensorView) that constructs a view, with an explicitly-specified element type of the view. Useful if need to /// view a tensor of floats as a tensor of complex floats. \sa TensorConstView /// \tparam T the element type of the resulting view /// \tparam Range the range type /// \tparam Storage the storage type /// \param range the range object defining the view /// \param storage the storage object that will be viewed into /// \return TensorView into \c storage using \c range and policy \c Policy template TensorView __make_cview(Range&& range, const Storage& storage, Policy = Policy()) { return TensorView(std::move(range), storage, false); } /// Helper function that constructs TensorView. /// \tparam Range the range type /// \tparam Storage the storage type /// \tparam Policy the TensorViewPolicy type; if the Policy requires additional runtime parameters use __make_view instead /// \param range the range object defining the view /// \param storage the storage object that will be viewed into /// \return TensorView into \c storage using \c range, with policy \c Policy /// \attention use make_cview if you must force a const view; this will provide const view, however, if \c storage is a const reference. template , class = typename std::enable_if::value>::type> TensorView make_view(const Range& range, Storage& storage, Policy = Policy()) { return make_view(range, storage); } /// Helper function that constructs TensorView. /// \tparam Range the range type /// \tparam Storage the storage type /// \tparam Policy the TensorViewPolicy type /// \param range the range object defining the view /// \param storage the storage object that will be viewed into /// \return TensorView into \c storage using \c range, with policy \c Policy /// \attention use make_cview if you must force a const view; this will provide const view, however, if \c storage is a const reference. template , class = typename std::enable_if::value>::type> TensorView make_view(Range&& range, Storage& storage, Policy = Policy()) { return make_view(range, storage); } /// Helper function that constructs TensorView, with an explicitly-specified element type of the view. Useful if need to /// view a tensor of floats as a tensor of complex floats. /// \tparam T the element type of the resulting view /// \tparam Range the range type /// \tparam Storage the storage type /// \tparam Policy the TensorViewPolicy type /// \param range the range object defining the view /// \param storage the storage object that will be viewed into /// \return TensorView into \c storage using \c range, with policy \c Policy /// \attention use make_cview if you must force a const view; this will provide const view, however, if \c storage is a const reference. template , class = typename std::enable_if::value>::type> auto make_view(const Range& range, Storage& storage, Policy = Policy()) -> decltype(__make_view(Range(range), storage)) { return __make_view(Range(range), storage); } /// Helper function that constructs TensorView, with an explicitly-specified element type of the view. Useful if need to /// view a tensor of floats as a tensor of complex floats. /// \tparam T the element type of the resulting view /// \tparam Range the range type /// \tparam Storage the storage type /// \tparam Policy the TensorViewPolicy type /// \param range the range object defining the view /// \param storage the storage object that will be viewed into /// \return TensorView into \c storage using \c range, with policy \c Policy /// \attention use make_cview if you must force a const view; this will provide const view, however, if \c storage is a const reference. template , class = typename std::enable_if::value>::type> auto make_view(Range&& range, Storage& storage, Policy = Policy()) -> decltype(__make_view(range, storage)) { return __make_view(range, storage); } /// Helper function that constructs a full TensorView of a Tensor. /// \tparam Tensor the tensor type /// \param tensor the Tensor object /// \return TensorView, a full view of the \c tensor /// \attention use make_cview if you must force a const view; this will provide const view, however, if \c tensor is a const reference. /// \note Provided for completeness. template , class = typename std::enable_if::value>::type> TensorView make_view(Tensor& tensor, Policy = Policy()) { return TensorView(tensor); } /// Helper function that constructs a full TensorView of a Tensor, /// with an explicitly-specified element type of the view. Useful if need to /// view a tensor of floats as a tensor of complex floats. /// \tparam T the element type of the resulting view /// \tparam Tensor the tensor type /// \param tensor the Tensor object /// \return TensorView, a full view of the \c tensor /// \attention use make_cview if you must force a const view; this will provide const view, however, if \c tensor is a const reference. /// \note Provided for completeness. template , class = typename std::enable_if::value>::type> TensorView::value, typename Tensor::storage_type, typename std::add_const::type >::type, Policy> make_view(Tensor& tensor, Policy = Policy()) { typedef TensorView::value, typename Tensor::storage_type, typename std::add_const::type >::type, Policy> result_type; return result_type(tensor); } /// Helper function that constructs a constant TensorView. \sa TensorConstView /// \tparam Range the range type /// \tparam Storage the storage type /// \param range the range object defining the view /// \param storage the storage object that will be viewed into /// \return TensorView into \c storage using \c range template , class = typename std::enable_if::value>::type> TensorView make_cview(const Range& range, const Storage& storage, Policy = Policy()) { return make_cview(range, storage); } /// Helper function that constructs a constant TensorView, with an explicitly-specified element type of the view. Useful if need to /// view a tensor of floats as a tensor of complex floats. \sa TensorConstView /// \tparam T the element type of the resulting view /// \tparam Range the range type /// \tparam Storage the storage type /// \param range the range object defining the view /// \param storage the storage object that will be viewed into /// \return TensorView into \c storage using \c range template , class = typename std::enable_if::value>::type> TensorView make_cview(const Range& range, const Storage& storage, Policy = Policy()) { return __make_cview(Range(range), storage); } /// Helper function that constructs a full constant TensorView of a Tensor. /// \tparam Tensor the tensor type /// \param tensor the Tensor object /// \return TensorView, a full view of the \c tensor /// \note Provided for completeness. template , class = typename std::enable_if::value>::type> TensorView make_cview(const Tensor& tensor) { return TensorView(tensor); } /// Helper function that constructs a full constant TensorView of a Tensor, /// with an explicitly-specified element type of the view. Useful if need to /// view a tensor of floats as a tensor of complex floats. /// \tparam T the element type of the resulting view /// \tparam Tensor the tensor type /// \param tensor the Tensor object /// \return TensorView, a full view of the \c tensor /// \note Provided for completeness. template , class = typename std::enable_if::value>::type> TensorView make_cview(const Tensor& tensor) { return TensorView(tensor); } /// Helper function that constructs writable TensorView. /// \tparam Range the range type /// \tparam Storage the storage type /// \param range the range object defining the view /// \param storage the storage object that will be viewed into /// \return TensorView into \c storage using \c range /// \attention use make_cview if you must force a const view; this will provide const view, however, if \c storage is a const reference. template ::value>::type> TensorRWView make_rwview(const Range& range, Storage& storage, bool can_write = not std::is_const::value) { // enforce mutability can_write = can_write && (not std::is_const::value); return make_rwview(Range(range), storage, can_write); } /// Helper function that constructs writable TensorView. /// \tparam Range the range type /// \tparam Storage the storage type /// \param range the range object defining the view /// \param storage the storage object that will be viewed into /// \return TensorView into \c storage using \c range /// \attention use make_cview if you must force a const view; this will provide const view, however, if \c storage is a const reference. template ::value>::type> TensorRWView make_rwview(Range&& range, Storage& storage, bool can_write = not std::is_const::value) { // enforce mutability can_write = can_write && (not std::is_const::value); return make_rwview(std::move(range), storage, can_write); } /// Helper function that constructs writable TensorView, with an explicitly-specified element type of the view. Useful if need to /// view a tensor of floats as a tensor of complex floats. /// \tparam T the element type of the resulting view /// \tparam Range the range type /// \tparam Storage the storage type /// \param range the range object defining the view /// \param storage the storage object that will be viewed into /// \return TensorView into \c storage using \c range /// \attention use make_cview if you must force a const view; this will provide const view, however, if \c storage is a const reference. template ::value>::type> TensorRWView make_rwview(const Range& range, Storage& storage, bool can_write = not std::is_const::value && std::is_same::value) { // enforce mutability can_write = can_write && (not std::is_const::value && std::is_same::value); return make_rwview(Range(range), storage, can_write); } /// Helper function that constructs writable TensorView, with an explicitly-specified element type of the view. Useful if need to /// view a tensor of floats as a tensor of complex floats. /// \tparam T the element type of the resulting view /// \tparam Range the range type /// \tparam Storage the storage type /// \param range the range object defining the view /// \param storage the storage object that will be viewed into /// \return TensorView into \c storage using \c range /// \attention use make_cview if you must force a const view; this will provide const view, however, if \c storage is a const reference. template ::value>::type> TensorRWView make_rwview(Range&& range, Storage& storage, bool can_write = not std::is_const::value && std::is_same::value) { // enforce mutability can_write = can_write && (not std::is_const::value && std::is_same::value); return __make_view::type, TensorViewPolicy >(std::move(range), const_cast::type&>(storage), TensorViewPolicy(), can_write); } /// Helper function that constructs a full writable TensorView of a Tensor. /// \tparam Tensor the tensor type /// \param tensor the Tensor object /// \return TensorView, a full view of the \c tensor /// \note Provided for completeness. template ::value>::type> TensorRWView make_rwview(Tensor& tensor, bool can_write = not std::is_const::value && not std::is_const::value) { // enforce mutability can_write = can_write && (not std::is_const::value && not std::is_const::value); return make_rwview(tensor.range(), tensor.storage(), can_write); } /// Helper function that constructs a full writable TensorView of a Tensor, /// with an explicitly-specified element type of the view. Useful if need to /// view a tensor of floats as a tensor of complex floats. /// \tparam T the element type of the resulting view /// \tparam Tensor the tensor type /// \param tensor the Tensor object /// \return TensorView, a full view of the \c tensor /// \note Provided for completeness. template ::value>::type> TensorRWView make_rwview(Tensor& tensor, bool can_write = not std::is_const::value && not std::is_const::value && std::is_same::value) { // enforce mutability can_write = can_write && (not std::is_const::value && not std::is_const::value && std::is_same::value); return make_rwview(tensor.range(), tensor.storage(), can_write); } template auto cbegin(const btas::TensorView<_T, _Range, _Storage>& x) -> decltype(x.cbegin()) { return x.cbegin(); } template auto cend(const btas::TensorView<_T, _Range, _Storage>& x) -> decltype(x.cbegin()) { return x.cend(); } /// maps TensorView -> Range template auto range (const btas::TensorView<_T, _Range, _Storage>& t) -> decltype(t.range()) { return t.range(); } /// maps TensorView -> Range extent template auto extent (const btas::TensorView<_T, _Range, _Storage>& t) -> decltype(t.range().extent()) { return t.range().extent(); } /// TensorView stream output operator /// prints TensorView in row-major form. To be implemented elsewhere using slices. /// \param os The output stream that will be used to print \c t /// \param t The TensorView to be printed /// \return A reference to the output stream template std::ostream& operator<<(std::ostream& os, const btas::TensorView<_T, _Range, _Storage>& t) { os << "TensorView:\n Range: " << t.range() << std::endl; return os; } /// TensorMap views a sequence of values as a Tensor template using TensorMap = TensorView<_T, _Range, btas::infinite_sequence_adaptor<_T*>>; /// TensorConstMap const-views a sequence of values as a Tensor template using TensorConstMap = TensorView>; /// Helper function that constructs TensorMap. /// \tparam T the element type returned by the view /// \tparam Range the range type /// \param range the range object defining the view /// \return TensorView into \c storage using \c range /// \attention use make_cmap if you must force a const view; this will provide const view, however, if \c storage is a const reference. template TensorMap::type> make_map(T* data, Range&& range) { return TensorMap::type>(std::forward(range), std::ref(btas::infinite_sequence_adaptor(data))); } /// Helper function that constructs TensorConstMap. /// \tparam T the element type returned by the view /// \tparam Range the range type /// \param range the range object defining the view /// \return TensorView into \c storage using \c range /// \attention use make_cmap if you must force a const view; this will provide const view, however, if \c storage is a const reference. template TensorConstMap::type> make_map(const T* data, Range&& range) { return TensorConstMap::type>(std::forward(range), std::cref(btas::infinite_sequence_adaptor(data))); } /// Helper function that constructs TensorConstMap. /// \tparam Range the range type /// \param range the range object defining the view /// \return TensorView into \c storage using \c range /// \attention use make_cmap if you must force a const view; this will provide const view, however, if \c storage is a const reference. template TensorConstMap::type, typename std::decay::type> make_cmap(T* data, Range&& range) { typedef typename std::remove_const::type value_type; typedef TensorConstMap::type> result_type; return result_type(std::forward(range), std::cref(btas::infinite_sequence_adaptor(const_cast(data)))); } } // namespace btas // serialization of TensorView is disabled #if 0 namespace boost { namespace serialization { /// boost serialization template void serialize(Archive& ar, btas::TensorView<_T,_Range,_Storage,_Policy>& tv, const unsigned int version) { boost::serialization::split_free(ar, tv, version); } template void save(Archive& ar, const btas::TensorView<_T,_Range,_Storage,_Policy>& tv, const unsigned int version) { const auto& range = tv.range(); const auto* storage_ptr = &tv.storage(); bool writable = tv.writable(); ar << BOOST_SERIALIZATION_NVP(range) << BOOST_SERIALIZATION_NVP(storage_ptr) << BOOST_SERIALIZATION_NVP(writable); } template void load(Archive& ar, btas::TensorView<_T,_Range,_Storage,_Policy>& tv, const unsigned int version) { _Range range; _Storage* storage_ptr; bool writable; ar >> BOOST_SERIALIZATION_NVP(range) >> BOOST_SERIALIZATION_NVP(storage_ptr) >> BOOST_SERIALIZATION_NVP(writable); std::reference_wrapper<_Storage> storage_ref(*storage_ptr); tv = btas::TensorView<_T,_Range,_Storage,_Policy>(std::move(range), std::move(storage_ref), writable); } } // namespace serialization } // namespace boost #endif // serialization of TensorView is disabled #endif /* TENSORVIEW_H_ */ BTAS-1.0.0/btas/tensorview_iterator.h000066400000000000000000000107351476142407000174740ustar00rootroot00000000000000/* * tensorview_iterator.h * * Created on: Dec 28, 2013 * Author: evaleev */ #ifndef BTAS_TENSORVIEW_ITERATOR_H_ #define BTAS_TENSORVIEW_ITERATOR_H_ #include #include namespace btas { /// Iterates over elements of \c Storage using ordinal values of indices in \c Range template class TensorViewIterator { struct Enabler {}; public: typedef Storage storage_type; typedef std::reference_wrapper storageref_type; typedef std::reference_wrapper ncstorageref_type; using iterator_category = std::conditional_t::value, std::forward_iterator_tag, std::output_iterator_tag>; using value_type = std::conditional_t::value, const typename storage_traits::value_type, typename storage_traits::value_type>; using difference_type = std::ptrdiff_t; using pointer = std::add_pointer_t; using reference = std::add_lvalue_reference_t; private: typedef typename Range::ordinal_subiterator subiterator; typedef typename Range::ordinal_iterator iterator; typedef typename iterator::value_type ordinal_type; typedef typename Range::index_type index_type; public: /// Default constructor TensorViewIterator() {} /// Destructor ~TensorViewIterator() {} TensorViewIterator(const typename Range::iterator& index_iter, Storage& storage) : iter_(subiterator(std::make_pair(*index_iter,index_iter.range()->ordinal(*index_iter)),index_iter.range())), storageref_(storage) {} TensorViewIterator(const typename Range::iterator& index_iter, const storageref_type& storage) : iter_(subiterator(std::make_pair(*index_iter,index_iter.range()->ordinal(*index_iter)),index_iter.range())), storageref_(storage) {} template TensorViewIterator(const typename Range::iterator& index_iter, const ncstorageref_type& storage, typename std::enable_if::value>::type* = 0) : iter_(subiterator(std::make_pair(*index_iter,index_iter.range()->ordinal(*index_iter)),index_iter.range())), // standard const_cast cannot "map" const into nontrivial structures, have to reinterpret here storageref_(reinterpret_cast(storage)) {} TensorViewIterator(const typename Range::iterator& index_iter, const ordinal_type& ord, Storage& storage) : iter_(subiterator(std::make_pair(*index_iter,ord),index_iter.range())), storageref_(storage) {} TensorViewIterator(const iterator& iter, Storage& storage) : iter_(iter), storageref_(storage) {} TensorViewIterator(iterator&& iter, Storage& storage) : iter_(iter), storageref_(storage) {} TensorViewIterator& operator++() { ++iter_; return *this; } const reference operator*() const { return *(cbegin(storageref_.get()) + *iter_); } //template ::value,Enabler>::type> template typename std::enable_if::value,reference>::type operator*() { return *(begin(storageref_.get()) + *iter_); } const index_type& index() const { return first(*iter_.base()); } template friend bool operator==(const TensorViewIterator&, const TensorViewIterator&); private: iterator iter_; storageref_type storageref_; }; template inline bool operator==(const TensorViewIterator& i1, const TensorViewIterator& i2) { return i1.iter_ == i2.iter_; } template inline bool operator!=(const TensorViewIterator& i1, const TensorViewIterator& i2) { return not (i1 == i2); } } #endif /* BTAS_TENSORVIEW_ITERATOR_H_ */ BTAS-1.0.0/btas/type_traits.h000066400000000000000000000235441476142407000157270ustar00rootroot00000000000000#ifndef __BTAS_TYPE_TRAITS_H #define __BTAS_TYPE_TRAITS_H 1 #include #include // C++20 extensions #if __cplusplus <= 201703L namespace std { template< class T > struct remove_cvref { typedef std::remove_cv_t> type; }; template< class T> using remove_cvref_t = typename remove_cvref::type; template< class T > struct type_identity { using type = T; }; template< class T > using type_identity_t = typename type_identity::type; } #endif namespace btas { template struct make_void { using type = void; }; template using void_t = typename make_void::type; /// extends std::common_type to yield a signed integer type if one of the arguments is a signed type template struct common_signed_type { typedef typename std::common_type::type common_type; typedef typename std::conditional< std::is_signed::value || std::is_signed::value, typename std::make_signed::type, common_type >::type type; }; // common_signed_type /// test T has begin() member template class has_begin { /// true case template static auto __test(U* p) -> decltype(p->begin(), std::true_type()); /// false case template static std::false_type __test(...); public: static constexpr const bool value = std::is_same(0))>::value; }; template constexpr inline bool has_begin_v = has_begin::value; /// test T has end() member template class has_end { /// true case template static auto __test(U* p) -> decltype(p->end(), std::true_type()); /// false case template static std::false_type __test(...); public: static constexpr const bool value = std::is_same(0))>::value; }; template constexpr inline bool has_end_v = has_end::value; /// test T has value_type template class has_value_type { /// true case template static std::true_type __test(typename U::value_type*); /// false case template static std::false_type __test(...); public: static constexpr const bool value = std::is_same(0))>::value; }; template constexpr inline bool has_value_type_v = has_value_type::value; /// test _C conforms to the standard Container concept; basic tests only template class is_container { public: static constexpr const bool value = has_value_type<_C>::value & has_begin<_C>::value & has_end<_C>::value; }; template constexpr inline bool is_container_v = is_container::value; /// test T has operator[] member template class has_squarebraket { /// true case template static auto __test( U* p, std::size_t i) -> decltype(p->operator[](i), std::true_type()); /// false case template static std::false_type __test(...); public: static constexpr const bool value = std::is_same(0,std::size_t(0)))>::value; }; template constexpr inline bool has_squarebraket_v = has_squarebraket::value; /// test T has data() member /// this will be used to detect whether or not the storage is consecutive template class has_data { /// true case template static auto __test(U* p) -> decltype(p->data(), std::true_type()); /// false case template static std::false_type __test(...); public: static constexpr const bool value = std::is_same(0))>::value; }; template inline constexpr bool has_data_v = has_data::value; template inline constexpr bool has_size_v = false; template inline constexpr bool has_size_v().size())>> = true; template inline constexpr bool has_nonmember_begin_v = has_begin_v; // if have member begin, std::begin will apply template inline constexpr bool has_nonmember_begin_v()))>> = true; template inline constexpr bool has_nonmember_end_v = has_end_v; // if have member end, std::end will apply template inline constexpr bool has_nonmember_end_v()))>> = true; template inline constexpr bool has_nonmember_data_v = has_data_v; // if have member data, std::data will apply template inline constexpr bool has_nonmember_data_v()))>> = true; template inline constexpr bool has_nonmember_size_v = has_end_v; // if have member size, std::size will apply template inline constexpr bool has_nonmember_size_v()))>> = true; // Checks if an iterator is random access template struct is_random_access_iterator { private: using iterator_traits = std::iterator_traits<_Iterator>; public: static constexpr bool value = std::is_same_v< typename iterator_traits::iterator_category, std::random_access_iterator_tag >; }; template inline constexpr bool is_random_access_iterator_v = is_random_access_iterator<_Iterator>::value; // Checks whether a type is compatible with BLAS/LAPACK, i.e. // is is S (float) / D (double) / C (complex float) / Z (complex double) template struct is_blas_lapack_type { static constexpr bool value = false; }; template<> struct is_blas_lapack_type { static constexpr bool value = true; }; template<> struct is_blas_lapack_type { static constexpr bool value = true; }; template<> struct is_blas_lapack_type> { static constexpr bool value = true; }; template<> struct is_blas_lapack_type> { static constexpr bool value = true; }; template inline constexpr bool is_blas_lapack_type_v = is_blas_lapack_type::value; // Checks if an iterator decays to a BLAS/LAPACK compatible type // is_blas_lapack_type + is_random_access_iterator // TODO: Should be is_contiguous_iterator with C++20 template struct is_blas_lapack_compatible { private: using iterator_traits = std::iterator_traits<_Iterator>; using value_type = std::remove_cvref_t; static constexpr bool is_rai = is_random_access_iterator_v<_Iterator>; static constexpr bool is_blt = is_blas_lapack_type_v; public: static constexpr bool value = is_rai and is_blt; }; template inline constexpr bool is_blas_lapack_compatible_v = is_blas_lapack_compatible<_Iterator>::value; // Checks if a collection of iterators are all BLAS/LAPACK compatible template struct are_blas_lapack_compatible; template struct are_blas_lapack_compatible<_Iterator, Tail...> { private: static constexpr bool tail_is_compatible = are_blas_lapack_compatible::value; public: static constexpr bool value = is_blas_lapack_compatible_v<_Iterator> and tail_is_compatible; }; template struct are_blas_lapack_compatible<_Iterator> { static constexpr bool value = is_blas_lapack_compatible_v<_Iterator>; }; template inline constexpr bool are_blas_lapack_compatible_v = are_blas_lapack_compatible<_Iterators...>::value; template struct is_scalar_arithmetic { static constexpr bool value = std::is_arithmetic_v; }; template struct is_scalar_arithmetic< std::complex > { static constexpr bool value = std::is_arithmetic_v; }; template inline constexpr bool is_scalar_arithmetic_v = is_scalar_arithmetic::value; template struct real_type { using type = T; }; template struct real_type> { using type = T; }; template using real_type_t = typename real_type::type; // Convienience traits template using iterator_value_t = typename std::iterator_traits<_Iterator>::value_type; template using iterator_difference_t = typename std::iterator_traits<_Iterator>::difference_type; template inline constexpr bool is_complex_type_v = not std::is_same_v< T, real_type_t >; template > struct has_numeric_type : public std::false_type { }; template struct has_numeric_type< T, std::void_t > : public std::true_type { }; template > struct numeric_type; template struct numeric_type::value>> { using type = T; }; template struct numeric_type::value>> { using type = typename T::numeric_type; }; /// evalutes to true for types with deep-copy semantics, false for types that may not necessarily deep copy template constexpr inline bool is_deep_copy_v = false; } // namespace btas #endif // __BTAS_TYPE_TRAITS_H BTAS-1.0.0/btas/types.h000066400000000000000000000021271476142407000145160ustar00rootroot00000000000000#ifndef __BTAS_TYPES_H #define __BTAS_TYPES_H 1 // // BLAS types // #include #ifdef BTAS_HAS_BLAS_LAPACK #include #include #if defined(LAPACK_COMPLEX_CPP) BTAS_PRAGMA_CLANG(diagnostic push) BTAS_PRAGMA_CLANG(diagnostic ignored "-Wreturn-type-c-linkage") #endif // defined(LAPACK_COMPLEX_CPP) #include #if defined(LAPACK_COMPLEX_CPP) BTAS_PRAGMA_CLANG(diagnostic pop) #endif // defined(LAPACK_COMPLEX_CPP) #else namespace blas { enum class Layout : char { RowMajor = 'R', ColMajor = 'C' }; enum class Op : char { NoTrans = 'N', Trans = 'T', ConjTrans = 'C' }; enum class Uplo : char { Upper = 'U', Lower = 'L' }; } namespace lapack { enum class Job : char { Vec = 'V', NoVec = 'N', AllVec = 'A', OverwriteVec = 'O' }; typedef blas::Uplo Uplo; } #endif namespace btas { // // Other aliases for convenience // /// default size type typedef unsigned long size_type; /// null deleter struct nulldeleter { void operator()(void const*) {} }; } #endif // __BTAS_TYPES_H BTAS-1.0.0/btas/util/000077500000000000000000000000001476142407000141545ustar00rootroot00000000000000BTAS-1.0.0/btas/util/deprecated.h000066400000000000000000000015411476142407000164260ustar00rootroot00000000000000/* * deprecated.h * * Created on: Jul 9, 2017 * Author: evaleev */ #ifndef BTAS_UTIL_DEPRECATED_H_ #define BTAS_UTIL_DEPRECATED_H_ // mark functions as deprecated using this macro // will result in a warning #if __cplusplus >= 201402L #define DEPRECATED [[deprecated]] #elif defined(__GNUC__) #define DEPRECATED __attribute__((deprecated)) #else #pragma message("WARNING: You need to implement DEPRECATED for this compiler") #define DEPRECATED #endif // same as DEPRECATED, but annotated with a message // will result in a warning #if __cplusplus >= 201402L #define DEPRECATEDMSG(msg) [[deprecated(msg)]] #elif defined(__GNUC__) #define DEPRECATEDMSG(msg) __attribute__((deprecated(msg))) #else #pragma message("WARNING: You need to implement DEPRECATEDMSG for this compiler") #define DEPRECATEDMSG(msg) #endif #endif /* BTAS_UTIL_DEPRECATED_H_ */ BTAS-1.0.0/btas/util/dot.h000066400000000000000000000023171476142407000151160ustar00rootroot00000000000000#ifndef __BTAS_UTIL_DOT_H #define __BTAS_UTIL_DOT_H 1 #include #include #include namespace btas { template typename _Vector::value_type dot (const _Vector& x, const _Vector& y) { assert(x.size() == y.size()); auto ix = x.begin(); auto iy = y.begin(); typename _Vector::value_type value = (*ix) * (*iy); ++ix; ++iy; for(; ix != x.end(); ++ix, ++iy) { value += (*ix) * (*iy); } return value; } // // small overhead version for std::array // template struct __dot_helper { static _T multiply (const std::array<_T, _N>& x, const std::array<_T, _N>& y) { return x[_I-1]*y[_I-1]+__dot_helper<_T, _I+1, _N>::multiply(x, y); } }; template struct __dot_helper<_T, _N, _N> { static _T multiply (const std::array<_T, _N>& x, const std::array<_T, _N>& y) { return x[_N-1]*y[_N-1]; } }; template 0)>::type> _T dot (const std::array<_T, _N>& x, const std::array<_T, _N>& y) { return __dot_helper<_T, 1, _N>::multiply(x, y); } } // namespace btas #endif // __BTAS_UTIL_DOT_H BTAS-1.0.0/btas/util/functional.h000066400000000000000000000061011476142407000164650ustar00rootroot00000000000000/* * functional.h * * Created on: Dec 28, 2013 * Author: evaleev */ #ifndef BTAS_FUNCTIONAL_H_ #define BTAS_FUNCTIONAL_H_ #include #include #include #include namespace btas { /// Computes T -> T template struct identity { typedef T type; T& operator()(T& x) const { return x; } const T& operator()(const T& x) const { return x; } }; template struct first_of_pair; template struct second_of_pair; /// Computes pair -> T1 template struct first_of_pair< std::pair > { using argument_type = const std::pair&; using result_type = const T1&; typedef T1 type; const type& operator()(const std::pair& x) const { return x.first; } }; /// Computes pair -> T2 template struct second_of_pair< std::pair > { using argument_type = const std::pair&; using result_type = const T2&; typedef T2 type; const type& operator()(const std::pair& x) const { return x.second; } }; /// returns the first element of a pair template const T1& first(const std::pair& x) { return x.first; } /// returns the second element of a pair template const T2& second(const std::pair& x) { return x.second; } /// returns the first element of a tuple template auto first(const std::tuple& x) -> decltype(std::get<0>(x)) { return std::get<0>(x); } /// returns the second element of a tuple template auto second(const std::tuple& x) -> decltype(std::get<1>(x)) { return std::get<1>(x); } /// returns the third element of a tuple template auto third(const std::tuple& x) -> decltype(std::get<2>(x)) { return std::get<2>(x); } /// returns the fourth element of a tuple template auto fourth(const std::tuple& x) -> decltype(std::get<3>(x)) { return std::get<3>(x); } namespace detail { /// helper empty class designed to be used with bool in std::conditional<> template struct bool_type { static constexpr bool value = B; bool_type() = default; bool_type(const bool_type& other) = default; bool_type(bool_type&& other) = default; bool_type(bool b) { BTAS_ASSERT(b == value); } constexpr operator bool() const noexcept { return value; } constexpr bool operator()() const noexcept { return value; } }; template bool operator==(const bool_type& one, bool two) { BTAS_ASSERT(bool_type::value == two); return true; } } // namespace btas::detail } // namespace btas namespace std { template void swap(btas::detail::bool_type& a, btas::detail::bool_type& b) { } } #endif /* BTAS_FUNCTIONAL_H_ */ BTAS-1.0.0/btas/util/mohndle.h000066400000000000000000000373111476142407000157600ustar00rootroot00000000000000// // Created by Eduard Valeyev on 3/29/21. // #ifndef BTAS_UTIL_MOHNDLE_H #define BTAS_UTIL_MOHNDLE_H #include #include #include #ifdef BTAS_HAS_BOOST_SERIALIZATION # include # include # include # include #endif #include #include namespace btas { /// describes handle types that can be used for default/direct construction of mohndle enum class Handle { invalid, value, unique_ptr, shared_ptr, ptr }; /// @brief Maybe Owning HaNDLE (`mohndle`) to @c Storage /// @tparam Storage a type that meets the TWG.Storage concept /// @tparam DefaultHandle the handle type to use when default constructing, or constructing storage object directly from a pack /// Enacpsulates a value, a reference, or a pointer to (bare, unique, or shared) to a contiguous storage template ::value>> class mohndle : std::variant, std::shared_ptr, std::reference_wrapper, Storage*> { public: using base_type = std::variant, std::shared_ptr, std::reference_wrapper, Storage*>; // using base_type::base_type; typedef typename storage_traits::value_type value_type; typedef typename storage_traits::pointer pointer; typedef typename storage_traits::const_pointer const_pointer; typedef typename storage_traits::reference reference; typedef typename storage_traits::const_reference const_reference; typedef typename storage_traits::size_type size_type; typedef typename storage_traits::difference_type difference_type; typedef typename storage_traits::iterator iterator; typedef typename storage_traits::const_iterator const_iterator; mohndle() = default; /// constructs mohndle from a handle /// @param handle a handle object template >> explicit mohndle(Handle&& handle) : base_type(std::forward(handle)) {} mohndle(const mohndle& other) : base_type(std::visit( [](auto&& v) -> base_type { using v_t = std::remove_reference_t; if constexpr (std::is_same_v || std::is_same_v || std::is_same_v> || std::is_same_v const> || std::is_same_v || std::is_same_v || std::is_same_v> || std::is_same_v const>) { return v; } else if constexpr (std::is_same_v> || std::is_same_v const>) { return std::make_unique(*(v.get())); } else if constexpr (std::is_same_v || std::is_same_v) { return {}; } else abort(); }, other.base())) {} mohndle(mohndle&&) = default; mohndle& operator=(const mohndle& other) { std::swap(this->base(), mohndle(other).base()); return *this; } mohndle& operator=(mohndle&&) = default; ~mohndle() = default; /// constructs a mohndle of type given by DefaultHandle directly from zero or more arguments template >> explicit mohndle(Args&&... args) { if constexpr (DefaultHandle == Handle::value) this->base().template emplace(std::forward(args)...); else if constexpr (DefaultHandle == Handle::ptr) this->base().template emplace(new Storage(std::forward(args)...)); else if constexpr (DefaultHandle == Handle::unique_ptr) this->base().template emplace>(std::make_unique(std::forward(args)...)); else if constexpr (DefaultHandle == Handle::shared_ptr) this->base().template emplace>(std::make_shared(std::forward(args)...)); else // if constexpr (DefaultHandle == Handle::invalid) abort(); } explicit operator bool() const { return this->index() != 0; } bool is_owner() const { const auto idx = this->index(); return idx > 0 && idx < 4; } template std::enable_if_t && !std::is_const_v, iterator> begin() { using std::begin; return begin(*(this->get())); } template std::enable_if_t && !std::is_const_v, iterator> end() { using std::end; return end(*(this->get())); } template std::enable_if_t, const_iterator> begin() const { using std::begin; return begin(*(this->get())); } template std::enable_if_t, const_iterator> end() const { using std::end; return end(*(this->get())); } template std::enable_if_t, const_iterator> cbegin() const { return this->begin(); } template std::enable_if_t, const_iterator> cend() const { return this->end(); } template std::enable_if_t && !std::is_const_v, pointer> data() { using std::data; return data(*(this->get())); } template std::enable_if_t, const_pointer> data() const { using std::data; return data(*(this->get())); } template std::enable_if_t, std::size_t> size() const { using std::size; return size(*(this->get())); } template void resize(std::size_t new_size) { if (this->base().index() == 0 && new_size > 0) *this = mohndle(new_size); else array_adaptor::resize(*(this->get()), new_size); } template std::enable_if_t && !std::is_const_v, reference> operator[](std::size_t ord) { return (*(this->get()))[ord]; } template std::enable_if_t, const_reference> operator[](std::size_t ord) const { return (*(this->get()))[ord]; } template friend void swap(mohndle& first, mohndle& second); const Storage* get() const { return std::visit( [](auto&& v) -> const Storage* { using v_t = std::remove_reference_t; if constexpr (std::is_same_v || std::is_same_v) { return &null_storage(); } else if constexpr (std::is_same_v) { return &v; } else if constexpr (std::is_same_v) { return &v; } else if constexpr (std::is_same_v>) { return &(v.get()); } else if constexpr (std::is_same_v const>) { return &(v.get()); } else if constexpr (std::is_same_v) { assert(v); return v; } else if constexpr (std::is_same_v) { assert(v); return v; } else if constexpr (std::is_same_v>) { assert(v); return v.get(); } else if constexpr (std::is_same_v const>) { assert(v); return v.get(); } else if constexpr (std::is_same_v>) { assert(v); return v.get(); } else if constexpr (std::is_same_v const>) { assert(v); return v.get(); } else abort(); }, this->base()); } Storage* get() { return const_cast(const_cast(this)->get()); } #ifdef BTAS_HAS_BOOST_SERIALIZATION template void serialize(Archive& ar, const unsigned int /* version */) { constexpr bool writing = std::is_base_of_v; constexpr auto serializable_index = std::index_sequence<0, 1, 2, 3, 5>{}; auto index = this->base().index(); // abort if trying to store an unsupported case if constexpr (writing) { if (std::holds_alternative>(this->base())) abort(); } ar& BOOST_SERIALIZATION_NVP(index); if constexpr (writing) std::visit( [&ar](const auto& value) -> void { using v_t = std::decay_t; // - can't read reference_wrapper // - no need to write monostate if constexpr (!std::is_same_v> && !std::is_same_v) ar & BOOST_SERIALIZATION_NVP(value); }, this->base()); else variant_load_impl(ar, this->base(), index, serializable_index); } #endif auto& base() { return static_cast(*this); } const auto& base() const { return static_cast(*this); } bool operator==(const mohndle& other) const { return (*this && other) || (!*this && !other && *(this->get()) == *(other.get())); } private: template friend void swap(mohndle& first, mohndle& second); #ifdef BTAS_HAS_BOOST_SERIALIZATION // utility for serializing select members of variant template static Archive& variant_load_impl(Archive& ar, std::variant& v, std::size_t which, std::index_sequence) { constexpr bool writing = std::is_base_of_v; static_assert(!writing); if (which == I0) { using type = std::variant_alternative_t>; if constexpr (!std::is_same_v) { type value; ar& BOOST_SERIALIZATION_NVP(value); v.template emplace(std::move(value)); } else if constexpr (std::is_same_v) v = {}; } else { if constexpr (sizeof...(Is) == 0) throw std::logic_error("btas::mohndle::variant_load_impl(ar,v,idx,idxs): idx is not present in idxs"); else return variant_load_impl(ar, v, which, std::index_sequence{}); } return ar; } #endif /// delays construction of null storage object until first use static Storage& null_storage() { static Storage null = {}; return null; } }; template >> void swap(mohndle& first, mohndle& second) { using std::swap; swap(first.base(), second.base()); } /// mohndle can have shallow copy semantics template constexpr inline bool is_deep_copy_v> = false; template struct storage_traits> : public storage_traits_base_container> { template using rebind_t = mohndle::template rebind_t, H>; }; } // namespace btas // serialization to/fro MADNESS archive (github.com/m-a-d-n-e-s-s/madness) namespace madness::archive { template struct ArchiveLoadImpl> { static inline void load(const Archive& ar, btas::mohndle& t) { constexpr auto serializable_index = std::index_sequence<0, 1, 2, 3, 5>{}; auto index = t.base().index(); ar& index; variant_load_impl(ar, t.base(), index, serializable_index); } template struct value_type { using type = T; }; template struct value_type { using type = T; }; template struct value_type> { using type = T; }; template struct value_type> { using type = T; }; // utility for serializing select members of variant template static const Archive& variant_load_impl(const Archive& ar, std::variant& v, std::size_t which, std::index_sequence) { if (which == I0) { using type = std::variant_alternative_t>; if constexpr (!std::is_same_v) { type value; if constexpr (std::is_same_v) { ar& value; } else { // bare or smart ptr using v_t = typename value_type::type; std::allocator alloc; // instead use the allocator associated with the archive? auto* buf = alloc.allocate(sizeof(v_t)); v_t* ptr = new (buf) v_t; ar& *ptr; value = type(ptr); } v.template emplace(std::move(value)); } else if constexpr (std::is_same_v) v = {}; } else { if constexpr (sizeof...(Is) == 0) throw std::logic_error("btas::mohndle::variant_load_impl(ar,v,idx,idxs): idx is not present in idxs"); else return variant_load_impl(ar, v, which, std::index_sequence{}); } return ar; } }; template struct ArchiveStoreImpl> { static inline void store(const Archive& ar, const btas::mohndle& t) { constexpr auto serializable_index = std::index_sequence<0, 1, 2, 3, 5>{}; const auto index = t.base().index(); // abort if trying to store an unsupported case if (std::holds_alternative>(t.base())) abort(); ar& index; std::visit( [&ar](const auto& v) -> void { using v_t = std::decay_t; // - can't read reference_wrapper // - no need to write monostate if constexpr (!std::is_same_v> && !std::is_same_v) { if constexpr (std::is_same_v) { ar& v; } else { ar& *v; } } }, t.base()); } }; } // namespace madness::archive #endif // BTAS_UTIL_MOHNDLE_H BTAS-1.0.0/btas/util/optional_ptr.h000066400000000000000000000020151476142407000170350ustar00rootroot00000000000000#ifndef __BTAS_UTIL_OPTIONAL_PTR_H #define __BTAS_UTIL_OPTIONAL_PTR_H 1 #include namespace btas { /** optional_ptr functions either as a raw unmanaged pointer or as a smart pointer of type managed_ptr depending on whether it is initialized through the set_external method or the set_managed method */ template > class optional_ptr { public: using ptr = T*; optional_ptr() : p_(nullptr) { } optional_ptr(optional_ptr&& other) : p_(other.p_), up_(std::move(other.up_)) { } T& operator*() const { return *p_; } ptr operator->() const { return p_; } void set_managed(ptr new_p) { up_ = std::move(managed_ptr(new_p)); p_ = up_.get(); } void set_external(ptr ext_p) { p_ = ext_p; up_.reset(); } private: ptr p_; managed_ptr up_; }; } // namespace btas #endif // __BTAS_UTIL_OPTIONAL_PTR_H BTAS-1.0.0/btas/util/resize.h000066400000000000000000000026231476142407000156310ustar00rootroot00000000000000#ifndef __BTAS_RESIZE_H #define __BTAS_RESIZE_H 1 #include // // This provides generic wrapper to treat variable- and fixed-size vectors within the same interface // e.g. std::vector vs. std::array // namespace btas { /// test T has resize(size_type) member /// T is assumed to be a vector template class is_resizable { /// true case template static auto __test(U* p) -> decltype(p->resize(0), std::true_type()); /// false case template static std::false_type __test(...); public: static constexpr const bool value = std::is_same(0))>::value; }; /// decl. template struct __resize_wrapper { }; /// wrapper for variable size vector template<> struct __resize_wrapper { /// resize x by n template static void resize (_Vector& x, const typename _Vector::size_type& n) { x.resize(n); } }; /// wrapper for fixed-size vector template<> struct __resize_wrapper { /// nothing to do template static void resize (_Vector& x, const typename _Vector::size_type& n) { } }; /// resize vector x /// if x is resizable, resize x by n, otherwise, do nothing template void resize (_Vector& x, const typename _Vector::size_type& n) { __resize_wrapper::value>::resize(x, n); } } // namespace btas #endif // __BTAS_RESIZE_H BTAS-1.0.0/btas/util/sequence_adaptor.h000066400000000000000000000104341476142407000176510ustar00rootroot00000000000000#ifndef __BTAS_UTIL_SEQUENCEADAPTOR_H_ #define __BTAS_UTIL_SEQUENCEADAPTOR_H_ #include namespace btas { /// infinite_sequence_adaptor represents pointer \c ptr as a \c ptr[0] , \c ptr[1] , .. sequence. /// Because the sequence is infinite, several attributes of sequence are not supported (end, cend, size, resize) /// \tparam _Ptr pointer type template class infinite_sequence_adaptor { public: typedef typename std::remove_pointer<_Ptr>::type value_type; typedef _Ptr pointer; typedef typename std::add_const::type const_pointer; typedef value_type& reference; typedef const value_type& const_reference; typedef size_t size_type; typedef ptrdiff_t difference_type; typedef pointer iterator; typedef const_pointer const_iterator; infinite_sequence_adaptor() : ptr_(nullptr) {} infinite_sequence_adaptor(pointer ptr) : ptr_(ptr) {} infinite_sequence_adaptor(const infinite_sequence_adaptor&) = default; infinite_sequence_adaptor(infinite_sequence_adaptor&&) = default; ~infinite_sequence_adaptor() {} const_iterator cbegin() const { return const_cast(ptr_); } const_iterator begin() const { return cbegin(); } iterator begin() { return ptr_; } const_reference operator[](size_type i) const { return ptr_[i]; } reference operator[](size_type i) { return ptr_[i]; } private: pointer ptr_; }; // class infinite_sequence_adaptor template infinite_sequence_adaptor<_T*> make_infinite_sequence_adaptor(_T* data) { return infinite_sequence_adaptor<_T*>(data); } } // namespace btas #include namespace std { // re-implement reference_wrapper> to act like Ptr template class reference_wrapper> : private btas::infinite_sequence_adaptor<_T*> { public: // types typedef btas::infinite_sequence_adaptor<_T*> type; // construct/copy/destroy reference_wrapper(type& x) noexcept : type(x) {} // DO bind to temps reference_wrapper(type&& x) noexcept : type(x) {} reference_wrapper(const reference_wrapper& x) noexcept : type(x) {} // assignment reference_wrapper& operator=(const reference_wrapper& x) noexcept { static_cast(*this) = static_cast(x); } // access operator type& () noexcept { return *this; } type& get() noexcept { return *this; } const type& get() const noexcept { return *this; } }; // re-implement reference_wrapper> to act like const Ptr template class reference_wrapper> : private btas::infinite_sequence_adaptor<_T*> { public: // types typedef const btas::infinite_sequence_adaptor<_T*> ctype; typedef btas::infinite_sequence_adaptor<_T*> nctype; // construct/copy/destroy reference_wrapper(ctype& x) noexcept : ctype(x) {} // DO bind to temps reference_wrapper(nctype&& x) noexcept : ctype(x) {} reference_wrapper(const reference_wrapper& x) noexcept : ctype(x) {} // assignment reference_wrapper& operator=(const reference_wrapper& x) noexcept { static_cast(*this) = static_cast(x); } // access operator ctype& () const noexcept { return *this; } ctype& get() const noexcept { return *this; } }; // this is illegal // TODO move to btas namespace and rely on ADL to invoke (instead of calling std::ref directly do // \code // using std::ref; // ref(); // \endcode template reference_wrapper> ref(btas::infinite_sequence_adaptor<_T*>&& t) { return reference_wrapper>(t); } template reference_wrapper> cref(const btas::infinite_sequence_adaptor<_T*>&& t) { return reference_wrapper>(t); } } #endif /* __BTAS_UTIL_SEQUENCEADAPTOR_H_ */ BTAS-1.0.0/btas/varray/000077500000000000000000000000001476142407000145035ustar00rootroot00000000000000BTAS-1.0.0/btas/varray/allocators.h000066400000000000000000000063201476142407000170200ustar00rootroot00000000000000/* * allocators.h * * Created on: Jan 4, 2014 * Author: evaleev */ #ifndef BTAS_VARRAY_ALLOCATORS_H_ #define BTAS_VARRAY_ALLOCATORS_H_ #include #include #include #include #include #include #include namespace btas { struct stack_arena { size_t size; //!< in bytes char* const buffer; //!< buffer begin char* current; //!< ptr to the free space in buffer template stack_arena(T* b, size_t s) : size(s), buffer(reinterpret_cast(b)), current(reinterpret_cast(b)) { } void increment(const size_t n) { current += n; } void decrement(const size_t n) { current -= n; } }; /// This is a very simple allocator implementation that uses an externally managed memory stack. /// It's mostly for demonstration purposes. /// stack_allocator is a first-in-last-out allocator, /// i.e. deallocation of the memory must happen in the opposite order of allocation. template class stack_allocator { private: std::shared_ptr arena_; size_t size() const { return arena_->size / sizeof(T); } T* buffer() const { return reinterpret_cast(arena_->buffer); } T* current() const { return reinterpret_cast(arena_->current); } void increment(const size_t n) { arena_->increment(n * sizeof(T)); } void decrement(const size_t n) { arena_->decrement(n * sizeof(T)); } public: // do not allocate here stack_allocator(std::shared_ptr a) : arena_(a) { } stack_allocator(const stack_allocator& o) : arena_(o.arena_) { } stack_allocator& operator=(const stack_allocator& o) { arena_ = o.arena_; return *this; } struct rebind { typedef stack_allocator other; }; typedef T* pointer; typedef T& reference; typedef const T* const_pointer; typedef const T& const_reference; typedef T value_type; typedef size_t size_type; typedef ptrdiff_t difference_type; pointer allocate(size_type n, const void* = 0) { //std::cout << "Allocating " << std::setw(6) << sizeof(T)*n << " bytes. " // << "Available memory: " // << std::setw(6) << sizeof(T) * (size() - std::distance(buffer(), current())) << " bytes."<< std::endl; pointer out = current(); increment(n); if (std::distance(buffer(), current()) > size()) throw std::runtime_error("preallocated memory exhausted"); return out; } pointer address(reference x) const { return &x; } const_pointer address(const_reference x) const { return &x; } void deallocate(pointer p, size_type n) { assert(p == current() - n); decrement(n); } void construct(pointer p, const T& val) { *p = val; } void destroy(pointer p) { } size_type max_size() const { return size(); } }; } #endif /* BTAS_VARRAY_ALLOCATORS_H_ */ BTAS-1.0.0/btas/varray/varray.h000066400000000000000000000325001476142407000161600ustar00rootroot00000000000000#ifndef __BTAS_VARRAY_H #define __BTAS_VARRAY_H 1 #include #include #include #ifdef BTAS_HAS_BOOST_SERIALIZATION #include #include #include #endif // BTAS_HAS_BOOST_SERIALIZATION #include #include namespace btas { /// variable size array class without capacity info template class varray : private _Allocator { public: typedef std::allocator_traits<_Allocator> allocator_traits; ///< Allocator traits typedef typename allocator_traits::allocator_type allocator_type; ///< Allocator type typedef typename allocator_traits::value_type value_type; typedef value_type& reference; typedef const value_type& const_reference; typedef typename allocator_traits::pointer pointer; typedef typename allocator_traits::const_pointer const_pointer; typedef typename allocator_traits::difference_type difference_type; typedef typename allocator_traits::size_type size_type; typedef pointer iterator; typedef const_pointer const_iterator; typedef iterator reverse_iterator; typedef const_iterator const_reverse_iterator; private: struct _M_impl { pointer _M_start; pointer _M_finish; _M_impl() : _M_start(nullptr), _M_finish(nullptr) { } _M_impl(pointer s, pointer f) : _M_start(s), _M_finish(f) { } _M_impl(_M_impl&& other) { _M_start = other._M_start; _M_finish = other._M_finish; other._M_start = nullptr; other._M_finish = nullptr; } size_type size() const { return _M_finish - _M_start; } bool empty() const { return _M_start == _M_finish; } pointer begin() { return _M_start; } pointer end() { return _M_finish; } const_pointer begin() const { return _M_start; } const_pointer end() const { return _M_finish; } const_pointer cbegin() const { return const_cast(_M_start); } const_pointer cend() const { return const_cast(_M_finish); } pointer rbegin() { return _M_finish==nullptr ? nullptr : _M_finish-1; } pointer rend() { return _M_start==nullptr ? nullptr : _M_start-1; } const_pointer rbegin() const { return _M_finish==nullptr ? nullptr : _M_finish-1; } const_pointer rend() const { return _M_start==nullptr ? nullptr : _M_start-1; } const_pointer crbegin() const { return const_cast(_M_finish==nullptr ? nullptr : _M_finish-1); } const_pointer crend() const { return const_cast(_M_start==nullptr ? nullptr : _M_start-1); } reference front() { BTAS_ASSERT(!empty()); return *begin(); } reference back() { BTAS_ASSERT(!empty()); return *rbegin(); } const_reference front() const { BTAS_ASSERT(!empty()); return *cbegin(); } const_reference back() const { BTAS_ASSERT(!empty()); return *crbegin(); } reference operator[](size_type i) { return _M_start[i]; } const_reference operator[](size_type i) const { return const_cast(_M_start[i]); } reference at(size_type i) { BTAS_ASSERT(i < size()); return _M_start[i]; } const_reference at(size_type i) const { BTAS_ASSERT(i < size()); return const_cast(_M_start[i]); } pointer data() { BTAS_ASSERT(!empty()); return _M_start; } const_pointer data() const { BTAS_ASSERT(!empty()); return const_cast(_M_start); } void swap (_M_impl& other) { _M_start = other._M_start; _M_finish = other._M_finish; other._M_start = nullptr; other._M_finish = nullptr; } }; _M_impl data_; [[deprecated("use varray::get_allocator()")]] allocator_type& alloc() { return static_cast(*this); } [[deprecated("use varray::get_allocator()")]] const allocator_type& alloc() const { return static_cast(*this); } #ifdef BTAS_HAS_BOOST_SERIALIZATION friend class boost::serialization::access; #endif public: varray () : allocator_type() { } explicit varray (const allocator_type& a) : allocator_type(a) { } ~varray () { deallocate(); } explicit varray (size_type n, const allocator_type& a = allocator_type()) : allocator_type(a) { if (n > 0) { // this ensures that if n == 0, pointers are null allocate(n); construct(n); } } varray (size_type n, const_reference val, const allocator_type& a = allocator_type()) : allocator_type(a) { if (n > 0) { allocate(n); construct(n, val); } } template varray (InputIterator first, InputIterator last) { const auto n = std::distance(first, last); if (n > 0) { allocate(n); construct(first, last); } } varray (const varray& x) : allocator_type(x) { const auto n = x.size(); if (n > 0) { allocate(n); construct(x.cbegin(), x.cend()); } } varray (const varray& x, const allocator_type& a) : allocator_type(a) { const auto n = x.size(); if (n > 0) { allocate(n); construct(x.cbegin(), x.cend()); } } varray (varray&& x) : allocator_type(std::move(static_cast(x))), data_(std::move(x.data_)) { } template ::value >::type > varray (std::initializer_list il) { size_type n = il.size(); if (n > 0) { allocate(n); construct(il.begin(), il.end()); } } varray& operator= (const varray& x) { const auto n = x.size(); if (n != data_.size()) { deallocate(); if (n > 0) allocate(n); } if (n > 0) { construct(x.cbegin(), x.cend()); } return *this; } varray& operator= (varray&& x) { swap (x); // if something in this object, it will be destructed by x return *this; } template ::value >::type > varray& operator= (std::initializer_list il) { const auto n = il.size(); if (n != data_.size()) { deallocate(); if (n > 0) allocate(n); } if (n > 0) { construct(il.cbegin(), il.cend()); } return *this; } constexpr allocator_type get_allocator() const noexcept { return static_cast(*this); } iterator begin () noexcept { return data_.begin(); } const_iterator begin () const noexcept { return cbegin(); } const_iterator cbegin () const noexcept { return data_.cbegin(); } iterator end () noexcept { return data_.end(); } const_iterator end () const noexcept { return cend(); } const_iterator cend () const noexcept { return data_.cend(); } reverse_iterator rbegin () noexcept { return data_.rbegin(); } const_reverse_iterator rbegin () const noexcept { return data_.rbegin(); } reverse_iterator rend () noexcept { return data_.rend(); } const_reverse_iterator rend () const noexcept { return data_.rend(); } size_type size () const noexcept { return data_.size(); } void resize (size_type n) { if (size() != n) { if (!empty()) { deallocate(); } if (n > 0) { allocate(n); } } } void resize (size_type n, const value_type& val) { resize(n); construct(n, val); } bool empty () const noexcept { return data_.empty(); } reference operator [] (size_type n) { return data_[n]; } const_reference operator [] (size_type n) const { return data_[n]; } reference at (size_type n) { return data_.at(n); } const_reference at (size_type n) const { return data_.at(n); } reference front () { return data_.front(); } const_reference front () const { return data_.front(); } reference back () { return data_.back(); } const_reference back () const { return data_.back(); } value_type* data () noexcept { return data_.data(); } const value_type* data () const noexcept { return data_.data(); } void swap (varray& x) { data_.swap(x.data_); } void clear () { if (!empty()) { deallocate(); } } private: void allocate(size_type n) { assert(n <= allocator_traits::max_size(get_allocator_reference())); data_._M_start = allocator_traits::allocate(get_allocator_reference(), n); data_._M_finish = data_._M_start + n; } void deallocate() { if (!data_.empty()) allocator_traits::deallocate(get_allocator_reference(), data_._M_start, data_.size()); data_._M_start = data_._M_finish = nullptr; } void construct(size_type n) { auto ptr = data_._M_start; do { allocator_traits::construct(get_allocator_reference(), ptr); ++ptr; --n; } while (n > 0); } void construct(size_type n, const_reference x) { auto ptr = data_._M_start; do { allocator_traits::construct(get_allocator_reference(), ptr, x); ++ptr; --n; } while (n > 0); } template void construct(const InputIterator& begin, const InputIterator& end) { auto ptr = data_._M_start; for(auto i = begin; i != end; ++i) { allocator_traits::construct(get_allocator_reference(), ptr, *i); ++ptr; } } allocator_type& get_allocator_reference() { return static_cast(*this); } const allocator_type& get_allocator_reference() const { return static_cast(*this); } }; template inline bool operator== (const btas::varray& a, const btas::varray& b) { return std::equal(a.begin(), a.end(), b.begin()); } template inline bool operator!= (const btas::varray& a, const btas::varray& b) { return not (a == b); } } // namespace btas #ifdef BTAS_HAS_BOOST_SERIALIZATION namespace boost { namespace serialization { /// boost serialization for varray /// @warning Boost.Serialization does not know how to serialize std::allocator so assume stateless allocators only here template void serialize (Archive& ar, btas::varray& x, const unsigned int version) { boost::serialization::split_free(ar, x, version); } template void save (Archive& ar, const btas::varray& x, const unsigned int version) { // TODO : implement allocator serialization // const auto alloc = x.get_allocator(); // ar << BOOST_SERIALIZATION_NVP(alloc); const boost::serialization::collection_size_type count(x.size()); ar << BOOST_SERIALIZATION_NVP(count); if (count != decltype(count)(0)) ar << boost::serialization::make_array(x.data(), count); } template void load (Archive& ar, btas::varray& x, const unsigned int version) { // TODO : implement allocator serialization // A allocator; // ar >> BOOST_SERIALIZATION_NVP(allocator); // x = btas::varray(allocator); boost::serialization::collection_size_type count; ar >> BOOST_SERIALIZATION_NVP(count); x.resize(count); if (count != decltype(count)(0)) ar >> boost::serialization::make_array(x.data(), count); } } // namespace serialization } // namespace boost #endif // BTAS_HAS_BOOST_SERIALIZATION // serialization to/fro MADNESS archive (github.com/m-a-d-n-e-s-s/madness) namespace madness { namespace archive { template struct ArchiveLoadImpl> { static inline void load(const Archive& ar, btas::varray& x) { if constexpr (!std::allocator_traits::is_always_equal::value) { A allocator; ar & allocator; x = btas::varray(allocator); } typename btas::varray::size_type n{}; ar& n; x.resize(n); for (typename btas::varray::value_type& xi : x) ar& xi; } }; template struct ArchiveStoreImpl> { static inline void store(const Archive& ar, const btas::varray& x) { if constexpr (!std::allocator_traits::is_always_equal::value) { ar & x.get_allocator(); } ar & x.size(); for (const typename btas::varray::value_type& xi : x) ar& xi; } }; } // namespace archive } // namespace madness template inline bool operator== (const btas::varray& a, const btas::varray& b) { return std::equal(a.begin(), a.end(), b.begin()); } template inline bool operator!= (const btas::varray& a, const btas::varray& b) { return not (a == b); } #endif // __BTAS_VARRAY_H BTAS-1.0.0/btas/version.h.in000066400000000000000000000007231476142407000154440ustar00rootroot00000000000000#ifndef BTAS_VERSION_H__INCLUDED #define BTAS_VERSION_H__INCLUDED /* BTAS version X.Y.Z-id */ #define BTAS_VERSION "@BTAS_VERSION@" /* BTAS major version */ #define BTAS_MAJOR_VERSION @BTAS_MAJOR_VERSION@ /* BTAS minor version */ #define BTAS_MINOR_VERSION @BTAS_MINOR_VERSION@ /* BTAS micro version */ #define BTAS_MICRO_VERSION @BTAS_MICRO_VERSION@ /* BTAS prerelease id */ #define BTAS_PRERELEASE_ID "@BTAS_PRERELEASE_ID@" #endif // BTAS_VERSION_H__INCLUDED BTAS-1.0.0/cmake/000077500000000000000000000000001476142407000133265ustar00rootroot00000000000000BTAS-1.0.0/cmake/btas-config.cmake.in000066400000000000000000000053301476142407000171320ustar00rootroot00000000000000# - CMAKE Config file for the BTAS package # This will define the following CMake cache variables # # BTAS_FOUND - true if BTAS library were found # BTAS_VERSION - the BTAS version # BTAS_EXT_VERSION - the BTAS version including the (optional) buildid, such as beta.3 # # and the following imported targets # # BTAS::BTAS - the BTAS library # # Set package version set(BTAS_VERSION "@BTAS_VERSION@") set(BTAS_EXT_VERSION "@BTAS_EXT_VERSION@") @PACKAGE_INIT@ @Boost_CONFIG_FILE_CONTENTS@ # find linalgpp dependencies if(NOT TARGET blaspp) include( CMakeFindDependencyMacro ) get_filename_component(blaspp_DIR "@blaspp_CONFIG@" DIRECTORY) if (NOT DEFINED BLAS_LIBRARIES AND NOT DEFINED blaspp_defs_) if (NOT "@BLAS_LIBRARIES@" STREQUAL "") set(BLAS_LIBRARIES "@BLAS_LIBRARIES@" CACHE STRING "BLAS_LIBRARIES used during BTAS configuration") endif() if (NOT "@blaspp_defs_@" STREQUAL "") set(blaspp_defs_ "@blaspp_defs_@" CACHE STRING "blaspp_defs_ used during BTAS configuration") endif() endif() find_dependency( blaspp CONFIG REQUIRED HINTS "${blaspp_DIR}" ) # if need Threads::Threads, load it get_target_property(blaspp_LINK_LIBRARIES blaspp INTERFACE_LINK_LIBRARIES) if (Threads::Threads IN_LIST blaspp_LINK_LIBRARIES AND NOT TARGET Threads::Threads) find_dependency(Threads) # Threads::Threads by default is not GLOBAL, so to allow users of LINALG_LIBRARIES to safely use it we need to make it global # more discussion here: https://gitlab.kitware.com/cmake/cmake/-/issues/17256 set_target_properties(Threads::Threads PROPERTIES IMPORTED_GLOBAL TRUE) endif() endif() if(NOT TARGET lapackpp) include( CMakeFindDependencyMacro ) get_filename_component(lapackpp_DIR "@lapackpp_CONFIG@" DIRECTORY) if (NOT DEFINED LAPACK_LIBRARIES AND NOT DEFINED lapackpp_defs_) if (NOT "@LAPACK_LIBRARIES@" STREQUAL "") set(LAPACK_LIBRARIES "@LAPACK_LIBRARIES@" CACHE STRING "LAPACK_LIBRARIES used during BTAS configuration") endif() if (NOT "@lapackpp_defs_@" STREQUAL "") set(lapackpp_defs_ "@lapackpp_defs_@" CACHE STRING "lapackpp_defs_ used during BTAS configuration") endif() endif() find_dependency( lapackpp CONFIG REQUIRED HINTS "${lapackpp_DIR}" ) endif() # Include library IMPORT targets if(NOT TARGET blaspp_headers) include("${blaspp_DIR}/blaspp_headers-targets.cmake") if(NOT TARGET blaspp_headers) message(FATAL_ERROR "expected blaspp_headers among imported BTAS targets") endif() endif() if(NOT TARGET BTAS::BTAS) include("${CMAKE_CURRENT_LIST_DIR}/btas-targets.cmake") if(NOT TARGET BTAS::BTAS) message(FATAL_ERROR "expected BTAS::BTAS among imported BTAS targets") endif() endif() set(BTAS_FOUND TRUE) BTAS-1.0.0/cmake/modules/000077500000000000000000000000001476142407000147765ustar00rootroot00000000000000BTAS-1.0.0/cmake/modules/AddCustomTargetSubproject.cmake000066400000000000000000000034051476142407000230750ustar00rootroot00000000000000# Copyright 2020 Eduard F Valeyev # Distributed under the OSI-approved BSD 3-Clause License. # See https://opensource.org/licenses/BSD-3-Clause for details. # # add_custom_target_subproject(proj X ...) defines custom target X-proj and # - if target X already exists, makes it depend on X-proj # - else creates target X depending on X-proj # # use case: if custom target names (e.g. "check", "doc", etc.) clash # with other project's target when used as a subproject # # example: add_custom_target_subproject(myproject check USES_TERMINAL COMMAND ${CMAKE_CTEST_COMMAND} -V) # macro(add_custom_target_subproject _subproj _name) set(extra_args "${ARGN}") add_custom_target(${_name}-${_subproj} ${extra_args}) # does the newly-created target compiled by default? list(FIND extra_args "ALL" extra_args_has_all) if (NOT (extra_args_has_all EQUAL -1)) set (target_built_by_default ON) endif() if (TARGET ${_name}) # is existing target ${_name} also compiled by default? # warn if not, but this project's target is since that # may indicate inconsistent creation of generic targets get_target_property(supertarget_not_built_by_default ${_name} EXCLUDE_FROM_ALL) if (target_built_by_default AND supertarget_not_built_by_default) message(WARNING "Created target ${_name}-${_subproj} is built by default but \"super\"-target ${_name} is not; perhaps it should be?") endif() add_dependencies(${_name} ${_name}-${_subproj}) else (TARGET ${_name}) # use ALL if given if (target_built_by_default) add_custom_target(${_name} ALL DEPENDS ${_name}-${_subproj}) else (target_built_by_default) add_custom_target(${_name} DEPENDS ${_name}-${_subproj}) endif(target_built_by_default) endif (TARGET ${_name}) endmacro() BTAS-1.0.0/cmake/modules/AppendFlags.cmake000066400000000000000000000002751476142407000201700ustar00rootroot00000000000000macro(append_flags _flags _append_flag) string(STRIP "${_append_flag}" _append_flag ) set(${_flags} "${${_flags}} ${_append_flag}") string(STRIP "${${_flags}}" ${_flags}) endmacro()BTAS-1.0.0/cmake/modules/CheckCFortranFunctionExists.cmake000066400000000000000000000007511476142407000233650ustar00rootroot00000000000000include(CheckFunctionExists) macro(check_c_fortran_function_exists _func_base _result) string(TOLOWER "${_func_base}" _func_base_lower) string(TOUPPER "${_func_base}" _func_base_upper) set(${_result} FALSE) foreach(_func ${_func_base_lower}_;${_func_base_lower};${_func_base_lower}__;${_func_base_upper};${_func_base_upper}_) check_function_exists(${_func} _${_func}_found) if(_${_func}_found) set(${_result} TRUE) break() endif() endforeach() endmacro()BTAS-1.0.0/cmake/modules/CheckLibraryList.cmake000066400000000000000000000100311476142407000211710ustar00rootroot00000000000000 INCLUDE(CheckFunctionExists) INCLUDE(CheckIncludeFile) # This macro checks for the presence of the combination of libraries # given by _list. If the combination is found, this macro checks (using the # Check_Function_Exists macro) whether can link against that library # combination using the name of a routine given by _name using the linker # flags given by _flags. If the combination of libraries is found and passes # the link test, LIBRARIES is set to the list of complete library paths that # have been found. Otherwise, LIBRARIES is set to FALSE. # # If _search_include is set, also checks for presence of include file given by _include # using Check_Include_File macro # # N.B. _prefix is the prefix applied to the names of all cached variables that # are generated internally and marked advanced by this macro. MACRO(CHECK_LIBRARY_LIST LIBRARIES _prefix _name _flags _list _include _search_include) SET(__list) FOREACH(_elem ${_list}) IF(__list) SET(__list "${__list} - ${_elem}") ELSE(__list) SET(__list "${_elem}") ENDIF(__list) ENDFOREACH(_elem) IF(_verbose) MESSAGE(STATUS "Checking for [${__list}]") ENDIF(_verbose) SET(_libraries_work TRUE) SET(${LIBRARIES}) SET(_combined_name) SET(_paths) FOREACH(_library ${_list}) SET(_combined_name ${_combined_name}_${_library}) # did we find all the libraries in the _list until now? # (we stop at the first unfound one) IF(_libraries_work) IF(APPLE) FIND_LIBRARY(${_prefix}_${_library}_LIBRARY NAMES ${_library} PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 ENV DYLD_LIBRARY_PATH ) ELSE(APPLE) FIND_LIBRARY(${_prefix}_${_library}_LIBRARY NAMES ${_library} PATHS /usr/local/lib /usr/lib /usr/lib/libblas /usr/local/lib64 /usr/lib64 ENV LD_LIBRARY_PATH ) ENDIF(APPLE) MARK_AS_ADVANCED(${_prefix}_${_library}_LIBRARY) IF(${_prefix}_${_library}_LIBRARY) GET_FILENAME_COMPONENT(_path ${${_prefix}_${_library}_LIBRARY} PATH) LIST(APPEND _paths ${_path}/../include ${_path}/../../include) ENDIF(${_prefix}_${_library}_LIBRARY) SET(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY}) SET(_libraries_work ${${_prefix}_${_library}_LIBRARY}) IF(_verbose) MESSAGE(STATUS "Searched for library ${_library}: result=${${_prefix}_${_library}_LIBRARY}") ENDIF(_verbose) ENDIF(_libraries_work) ENDFOREACH(_library ${_list}) # Test include SET(_bug_search_include ${_search_include}) #CMAKE BUG!!! SHOULD NOT BE THAT IF(_bug_search_include) FIND_PATH(${_prefix}${_combined_name}_INCLUDE ${_include} ${_paths}) MARK_AS_ADVANCED(${_prefix}${_combined_name}_INCLUDE) IF(${_prefix}${_combined_name}_INCLUDE) IF (_verbose) MESSAGE(STATUS "Includes found: ${_prefix}${_combined_name}_INCLUDE=${${_prefix}${_combined_name}_INCLUDE} ${_prefix}_INCLUDE_FILE=${${_prefix}_INCLUDE_FILE}") ENDIF (_verbose) SET(${_prefix}_INCLUDE_DIR ${${_prefix}${_combined_name}_INCLUDE}) SET(${_prefix}_INCLUDE_FILE ${_include}) ELSE(${_prefix}${_combined_name}_INCLUDE) SET(_libraries_work FALSE) ENDIF(${_prefix}${_combined_name}_INCLUDE) ELSE(_bug_search_include) SET(${_prefix}_INCLUDE_DIR) SET(${_prefix}_INCLUDE_FILE ${_include}) ENDIF(_bug_search_include) # Test this combination of libraries. IF(_libraries_work) SET(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}}) CHECK_FUNCTION_EXISTS(${_name} ${_prefix}${_combined_name}_WORKS) SET(CMAKE_REQUIRED_LIBRARIES) MARK_AS_ADVANCED(${_prefix}${_combined_name}_WORKS) SET(_libraries_work ${${_prefix}${_combined_name}_WORKS}) IF(_verbose AND _libraries_work) MESSAGE(STATUS "Libraries found: ${_prefix}_${_library}_LIBRARY = ${${_prefix}_${_library}_LIBRARY}") ENDIF(_verbose AND _libraries_work) ENDIF(_libraries_work) # Fin IF(NOT _libraries_work) SET(${LIBRARIES} NOTFOUND) ENDIF(NOT _libraries_work) ENDMACRO(CHECK_LIBRARY_LIST) BTAS-1.0.0/cmake/modules/ConvertIncludesListToCompilerArgs.cmake000066400000000000000000000023331476142407000245570ustar00rootroot00000000000000# # This file is a part of TiledArray. # Copyright (C) 2013 Virginia Tech # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # Justus Calvin # Department of Chemistry, Virginia Tech # # ConvertIncludesListToCompileArgs.cmake # Sep 4, 2013 # # # converts a list of include paths (second argument, don't forget to enclose the # list in quotes) into a list of command-line parameters to the compiler/. # macro(convert_incs_to_compargs _args _inc_paths ) # transform library list into compiler args # Add include paths to _args foreach(_inc_path ${_inc_paths}) set(${_args} "${${_args}} -I${_inc_path}") endforeach() endmacro() BTAS-1.0.0/cmake/modules/ConvertLibrariesListToCompilerArgs.cmake000066400000000000000000000032561476142407000247320ustar00rootroot00000000000000# # This file is a part of TiledArray. # Copyright (C) 2013 Virginia Tech # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # Justus Calvin # Department of Chemistry, Virginia Tech # # ConvertLibrariesListToCompilerArgs.cmake # Jul 19, 2013 # # # Converts a list of libraries (second argument, don't forget to enclose the # list in quotes) into a list of command-line parameters to the compiler/linker. # macro(convert_libs_to_compargs _args _libs ) # transform library list into compiler args foreach (_lib ${_libs}) get_filename_component(_ext ${_lib} EXT) get_filename_component(_libname ${_lib} NAME_WE) if(APPLE AND "${_ext}" STREQUAL ".framework") # Handle Apple Frameworks get_filename_component(_path ${_lib} PATH) if(${_path} STREQUAL "/System/Library/Frameworks") set(MAD_LIBS "${${_args}} -F${_path} -framework ${_libname}") else() set(MAD_LIBS "${${_args}} -framework ${_libname}") endif() else() # Handle the general case set(MAD_LIBS "${${_args}} ${_lib}") endif() endforeach() endmacro() BTAS-1.0.0/cmake/modules/LibFindMacros.cmake000066400000000000000000000144151476142407000204610ustar00rootroot00000000000000# Version 1.0 (2013-04-12) # Public Domain, originally written by Lasse Karkkainen # Published at http://www.cmake.org/Wiki/CMake:How_To_Find_Libraries # If you improve the script, please modify the forementioned wiki page because # I no longer maintain my scripts (hosted as static files at zi.fi). Feel free # to remove this entire header if you use real version control instead. # Changelog: # 2013-04-12 Added version number (1.0) and this header, no other changes # 2009-10-08 Originally published # Works the same as find_package, but forwards the "REQUIRED" and "QUIET" arguments # used for the current package. For this to work, the first parameter must be the # prefix of the current package, then the prefix of the new package etc, which are # passed to find_package. macro (libfind_package PREFIX) set (LIBFIND_PACKAGE_ARGS ${ARGN}) if (${PREFIX}_FIND_QUIETLY) set (LIBFIND_PACKAGE_ARGS ${LIBFIND_PACKAGE_ARGS} QUIET) endif (${PREFIX}_FIND_QUIETLY) if (${PREFIX}_FIND_REQUIRED) set (LIBFIND_PACKAGE_ARGS ${LIBFIND_PACKAGE_ARGS} REQUIRED) endif (${PREFIX}_FIND_REQUIRED) find_package(${LIBFIND_PACKAGE_ARGS}) endmacro (libfind_package) # CMake developers made the UsePkgConfig system deprecated in the same release (2.6) # where they added pkg_check_modules. Consequently I need to support both in my scripts # to avoid those deprecated warnings. Here's a helper that does just that. # Works identically to pkg_check_modules, except that no checks are needed prior to use. macro (libfind_pkg_check_modules PREFIX PKGNAME) if (${CMAKE_MAJOR_VERSION} EQUAL 2 AND ${CMAKE_MINOR_VERSION} EQUAL 4) include(UsePkgConfig) pkgconfig(${PKGNAME} ${PREFIX}_INCLUDE_DIRS ${PREFIX}_LIBRARY_DIRS ${PREFIX}_LDFLAGS ${PREFIX}_CFLAGS) else (${CMAKE_MAJOR_VERSION} EQUAL 2 AND ${CMAKE_MINOR_VERSION} EQUAL 4) find_package(PkgConfig) if (PKG_CONFIG_FOUND) pkg_check_modules(${PREFIX} ${PKGNAME}) endif (PKG_CONFIG_FOUND) endif (${CMAKE_MAJOR_VERSION} EQUAL 2 AND ${CMAKE_MINOR_VERSION} EQUAL 4) endmacro (libfind_pkg_check_modules) # This macro searchs the _lib_list list for _lib, and, if found, adds any # missing dependencies in the _deps list. Also, the ${PREFIX}_FIND_REQUIRED_${_dep} # variable is set to true if the ${PREFIX}_FIND_REQUIRED_${_lib} is true macro(libfind_add_dep PREFIX _lib_list _lib _deps) list(FIND ${_lib_list} ${_lib} _lib_find) if(NOT _lib_find EQUAL -1) foreach(_dep ${_deps}) # Add missing dependencies to the list. list(FIND ${_lib_list} ${_dep} _dep_find) if(_dep_find EQUAL -1) list(APPEND ${_lib_list} ${_dep}) endif() # Set the find required flag for the dependency if(${PREFIX}_FIND_REQUIRED_${_lib}) set(${PREFIX}_FIND_REQUIRED_${_dep} TRUE) else() set(${PREFIX}_FIND_REQUIRED_${_dep} FALSE) endif() endforeach() endif() endmacro() # Do the final processing once the paths have been detected. # If include dirs are needed, ${PREFIX}_PROCESS_INCLUDES should be set to contain # all the variables, each of which contain one include directory. # Ditto for ${PREFIX}_PROCESS_LIBS and library files. # Will set ${PREFIX}_FOUND, ${PREFIX}_INCLUDE_DIRS and ${PREFIX}_LIBRARIES. # Also handles errors in case library detection was required, etc. macro (libfind_process PREFIX) # Skip processing if already processed during this run if (NOT ${PREFIX}_FOUND) # Start with the assumption that the library was found set (${PREFIX}_FOUND TRUE) # Process all includes and set _FOUND to false if any are missing foreach (i ${${PREFIX}_PROCESS_INCLUDES}) if (${i}) set (${PREFIX}_INCLUDE_DIRS ${${PREFIX}_INCLUDE_DIRS} ${${i}}) mark_as_advanced(${i}) else (${i}) set (${PREFIX}_FOUND FALSE) endif (${i}) endforeach (i) # Process all libraries and set _FOUND to false if any are missing foreach (i ${${PREFIX}_PROCESS_LIBS}) if (${i}) set (${PREFIX}_LIBRARIES ${${PREFIX}_LIBRARIES} ${${i}}) mark_as_advanced(${i}) else (${i}) set (${PREFIX}_FOUND FALSE) endif (${i}) endforeach (i) # Print message and/or exit on fatal error if (${PREFIX}_FOUND) if (NOT ${PREFIX}_FIND_QUIETLY) message (STATUS "Found ${PREFIX} ${${PREFIX}_VERSION}") endif (NOT ${PREFIX}_FIND_QUIETLY) else (${PREFIX}_FOUND) if (${PREFIX}_FIND_REQUIRED) foreach (i ${${PREFIX}_PROCESS_INCLUDES} ${${PREFIX}_PROCESS_LIBS}) message("${i}=${${i}}") endforeach (i) message (FATAL_ERROR "Required library ${PREFIX} NOT FOUND.\nInstall the library (dev version) and try again. If the library is already installed, use ccmake to set the missing variables manually.") endif (${PREFIX}_FIND_REQUIRED) endif (${PREFIX}_FOUND) endif (NOT ${PREFIX}_FOUND) endmacro (libfind_process) macro(libfind_header PREFIX _var _header) set(${PREFIX}_INCLUDE_SEARCH_DIR) if(${PREFIX}_ROOT_DIR) set(${PREFIX}_INCLUDE_SEARCH_DIR "${${PREFIX}_ROOT_DIR}/include") endif() find_path(${_var} ${_header} HINTS ${${PREFIX}_INCLUDE_DIR} ${${PREFIX}_INCLUDE_SEARCH_DIR} NO_CMAKE_SYSTEM_PATH) endmacro() macro(libfind_library PREFIX _name) if(NOT ${PREFIX}_${_name}_LIBRARY) if(${PREFIX}_LIBRARY) # Search the user provided libraries for _name foreach(_lib ${${PREFIX}_LIBRARY}) get_filename_component(_lib_name ${_lib} NAME) string(FIND ${_lib_name} ${_name} _lib_found) if(NOT _lib_found EQUAL -1) # Set the component library list set(${PREFIX}_${_name}_LIBRARY ${_lib}) break() endif() endforeach() else() set(${PREFIX}_LIB_SERACH_DIRS) if(${PREFIX}_ROOT_DIR) set(${PREFIX}_LIB_SERACH_DIRS "${${PREFIX}_ROOT_DIR}/lib") endif() # Search for the library find_library(${PREFIX}_${_name}_LIBRARY ${_name} HINTS ${${PREFIX}_PKGCONF_LIBRARY_DIRS} ${${PREFIX}_LIB_SERACH_DIRS} NO_CMAKE_SYSTEM_PATH) endif() endif() # Check that it exists and set the found variable if(${PREFIX}_${_name}_LIBRARY AND EXISTS ${${PREFIX}_${_name}_LIBRARY}) set(${PREFIX}_${_name}_FOUND TRUE) else() set(${PREFIX}_${_name}_FOUND FALSE) endif() mark_as_advanced(${PREFIX}_${_name}_LIBRARY) endmacro() BTAS-1.0.0/cmake/modules/RedefaultableOption.cmake000066400000000000000000000006021476142407000217260ustar00rootroot00000000000000# if local variable is defined, use its value as the default, otherwise use _default # this is consistent with cmake 3.13 and later (see policy CMP0077) macro(redefaultable_option _name _descr _default) if (DEFINED ${_name}) set(${_name}_DEFAULT ${${_name}}) else() set(${_name}_DEFAULT ${_default}) endif() option(${_name} "${_descr}" ${${_name}_DEFAULT}) endmacro() BTAS-1.0.0/doc/000077500000000000000000000000001476142407000130135ustar00rootroot00000000000000BTAS-1.0.0/doc/CMakeLists.txt000066400000000000000000000005001476142407000155460ustar00rootroot00000000000000find_package(Doxygen) if(DOXYGEN_FOUND) configure_file(Doxyfile.in Doxyfile @ONLY IMMEDIATE) add_custom_target_subproject(btas html COMMAND ${DOXYGEN_EXECUTABLE} ${PROJECT_BINARY_DIR}/doc/Doxyfile SOURCES ${PROJECT_BINARY_DIR}/doc/Doxyfile) add_custom_target_subproject(btas doc DEPENDS html-btas) endif() BTAS-1.0.0/doc/Doxyfile.in000066400000000000000000003217021476142407000151330ustar00rootroot00000000000000# Doxyfile 1.8.13 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. # # All text after a double hash (##) is considered a comment and is placed in # front of the TAG it is preceding. # # All text after a single hash (#) is considered a comment and will be ignored. # The format is: # TAG = value [value, ...] # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the config file # that follow. The default is UTF-8 which is also the encoding used for all text # before the first occurrence of this tag. Doxygen uses libiconv (or the iconv # built into libc) for the transcoding. See http://www.gnu.org/software/libiconv # for the list of possible encodings. # The default value is: UTF-8. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded by # double-quotes, unless you are using Doxywizard) that should identify the # project for which the documentation is generated. This name is used in the # title of most generated pages and in a few other places. # The default value is: My Project. PROJECT_NAME = @PROJECT_NAME@ # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version # control system is used. PROJECT_NUMBER = @BTAS_VERSION@ # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = "Basic Tensor Algebra System" # With the PROJECT_LOGO tag one can specify a logo or an icon that is included # in the documentation. The maximum height of the logo should not exceed 55 # pixels and the maximum width should not exceed 200 pixels. Doxygen will copy # the logo to the output directory. PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path # into which the generated documentation will be written. If a relative path is # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. OUTPUT_DIRECTORY = @PROJECT_BINARY_DIR@/doc # If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and # will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes # performance problems for the file system. # The default value is: NO. CREATE_SUBDIRS = NO # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode # U+3044. # The default value is: NO. ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, # Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), # Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, # Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), # Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, # Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, # Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, # Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. # The default value is: YES. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief # description of a member or function before the detailed description # # Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. # The default value is: YES. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator that is # used to form the text in various listings. Each string in this list, if found # as the leading text of the brief description, will be stripped from the text # and the result, after processing the whole list, is used as the annotated # text. Otherwise, the brief description is used as-is. If left blank, the # following values are used ($name is automatically replaced with the name of # the entity):The $name class, The $name widget, The $name file, is, provides, # specifies, contains, represents, a, an and the. ABBREVIATE_BRIEF = "The $name class" \ "The $name widget" \ "The $name file" \ is \ provides \ specifies \ contains \ represents \ a \ an \ the # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # doxygen will generate a detailed section even if there is only a brief # description. # The default value is: NO. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. # The default value is: NO. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path # before files name in the file list and in the header files. If set to NO the # shortest path that makes the file name unique will be used # The default value is: YES. FULL_PATH_NAMES = YES # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. # Stripping is only done if one of the specified strings matches the left-hand # part of the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the path to # strip. # # Note that you can specify absolute paths here, but also relative paths, which # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. STRIP_FROM_PATH = @PROJECT_SOURCE_DIR@/btas # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which # header file to include in order to use a class. If left blank only the name of # the header file containing the class definition is used. Otherwise one should # specify the list of include paths that are normally passed to the compiler # using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't # support long names like on DOS, Mac, or CD-ROM. # The default value is: NO. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the # first line (until the first dot) of a Javadoc-style comment as the brief # description. If set to NO, the Javadoc-style will behave just like regular Qt- # style comments (thus requiring an explicit @brief command for a brief # description.) # The default value is: NO. JAVADOC_AUTOBRIEF = NO # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If # set to NO, the Qt-style will behave just like regular Qt-style comments (thus # requiring an explicit \brief command for a brief description.) # The default value is: NO. QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a # multi-line C++ special comment block (i.e. a block of //! or /// comments) as # a brief description. This used to be the default behavior. The new default is # to treat a multi-line C++ comment block as a detailed description. Set this # tag to YES if you prefer the old behavior instead. # # Note that setting this tag to YES also means that rational rose comments are # not recognized any more. # The default value is: NO. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new # page for each member. If set to NO, the documentation of a member will be part # of the file/class/namespace that contains it. # The default value is: NO. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen # uses this value to replace tabs by spaces in code fragments. # Minimum value: 1, maximum value: 16, default value: 4. TAB_SIZE = 4 # This tag can be used to specify a number of aliases that act as commands in # the documentation. An alias has the form: # name=value # For example adding # "sideeffect=@par Side Effects:\n" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading # "Side Effects:". You can put \n's in the value part of an alias to insert # newlines. ALIASES = # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding "class=itcl::class" # will allow you to use the command class in the itcl::class meaning. TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all # members will be omitted, etc. # The default value is: NO. OPTIMIZE_OUTPUT_FOR_C = NO # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or # Python sources only. Doxygen will then generate output that is more tailored # for that language. For instance, namespaces will be presented as packages, # qualified scopes will look different, etc. # The default value is: NO. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources. Doxygen will then generate output that is tailored for Fortran. # The default value is: NO. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for VHDL. # The default value is: NO. OPTIMIZE_OUTPUT_VHDL = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, Javascript, # C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: # FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: # Fortran. In the later case the parser tries to guess whether the code is fixed # or free formatted code, this is the default for Fortran type files), VHDL. For # instance to make doxygen treat .inc files as Fortran files (default is PHP), # and .f files as C (default is Fortran), use: inc=Fortran f=C. # # Note: For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise # the files are not read by doxygen. EXTENSION_MAPPING = # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments # according to the Markdown format, which allows for more readable # documentation. See http://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you can # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in # case of backward compatibilities issues. # The default value is: YES. MARKDOWN_SUPPORT = YES # When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up # to that level are automatically included in the table of contents, even if # they do not have an id attribute. # Note: This feature currently applies only to Markdown headings. # Minimum value: 0, maximum value: 99, default value: 0. # This tag requires that the tag MARKDOWN_SUPPORT is set to YES. TOC_INCLUDE_HEADINGS = 0 # When enabled doxygen tries to link words that correspond to documented # classes, or namespaces to their corresponding documentation. Such a link can # be prevented in individual cases by putting a % sign in front of the word or # globally by setting AUTOLINK_SUPPORT to NO. # The default value is: YES. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should set this # tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); # versus func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. # The default value is: NO. BUILTIN_STL_SUPPORT = NO # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. # The default value is: NO. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip (see: # http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen # will parse them like normal C++ but will assume all classes use public instead # of private inheritance when no explicit protection keyword is present. # The default value is: NO. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES will make # doxygen to replace the get and set methods by a property in the documentation. # This will only work if the methods are indeed getting or setting a simple # type. If this is not the case, or you want to show the methods anyway, you # should set this option to NO. # The default value is: YES. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. # The default value is: NO. DISTRIBUTE_GROUP_DOC = YES # If one adds a struct or class to a group and this option is enabled, then also # any nested class or struct is added to the same group. By default this option # is disabled and one has to add nested compounds explicitly via \ingroup. # The default value is: NO. GROUP_NESTED_COMPOUNDS = NO # Set the SUBGROUPING tag to YES to allow class member groups of the same type # (for instance a group of public functions) to be put as a subgroup of that # type (e.g. under the Public Functions section). Set it to NO to prevent # subgrouping. Alternatively, this can be done per class using the # \nosubgrouping command. # The default value is: YES. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions # are shown inside the group in which they are included (e.g. using \ingroup) # instead of on a separate page (for HTML and Man pages) or section (for LaTeX # and RTF). # # Note that this feature does not work in combination with # SEPARATE_MEMBER_PAGES. # The default value is: NO. INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions # with only public data fields or simple typedef fields will be shown inline in # the documentation of the scope in which they are defined (i.e. file, # namespace, or group documentation), provided this scope is documented. If set # to NO, structs, classes, and unions are shown on a separate page (for HTML and # Man pages) or section (for LaTeX and RTF). # The default value is: NO. INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or # enum is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically be # useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. # The default value is: NO. TYPEDEF_HIDES_STRUCT = NO # The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This # cache is used to resolve symbols given their name and scope. Since this can be # an expensive process and often the same symbol appears multiple times in the # code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small # doxygen will become slower. If the cache is too large, memory is wasted. The # cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range # is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 # symbols. At the end of a run doxygen will report the cache usage and suggest # the optimal cache size from a speed point of view. # Minimum value: 0, maximum value: 9, default value: 0. LOOKUP_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in # documentation are documented, even if no documentation was available. Private # class members and static file members will be hidden unless the # EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. # Note: This will also disable the warnings about undocumented members that are # normally produced when WARNINGS is set to YES. # The default value is: NO. EXTRACT_ALL = YES # If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will # be included in the documentation. # The default value is: NO. EXTRACT_PRIVATE = NO # If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal # scope will be included in the documentation. # The default value is: NO. EXTRACT_PACKAGE = NO # If the EXTRACT_STATIC tag is set to YES, all static members of a file will be # included in the documentation. # The default value is: NO. EXTRACT_STATIC = NO # If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined # locally in source files will be included in the documentation. If set to NO, # only classes defined in header files are included. Does not have any effect # for Java sources. # The default value is: YES. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. If set to YES, local methods, # which are defined in the implementation section but not in the interface are # included in the documentation. If set to NO, only methods in the interface are # included. # The default value is: NO. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base name of # the file that contains the anonymous namespace. By default anonymous namespace # are hidden. # The default value is: NO. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation # section is generated. This option has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO, these classes will be included in the various overviews. This option # has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend # (class|struct|union) declarations. If set to NO, these declarations will be # included in the documentation. # The default value is: NO. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any # documentation blocks found inside the body of a function. If set to NO, these # blocks will be appended to the function's detailed documentation block. # The default value is: NO. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation that is typed after a # \internal command is included. If the tag is set to NO then the documentation # will be excluded. Set it to YES to include the internal documentation. # The default value is: NO. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file # names in lower-case letters. If set to YES, upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. # The default value is: system dependent. CASE_SENSE_NAMES = NO # If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with # their full class and namespace scopes in the documentation. If set to YES, the # scope will be hidden. # The default value is: NO. HIDE_SCOPE_NAMES = NO # If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will # append additional text to a page's title, such as Class Reference. If set to # YES the compound reference will be hidden. # The default value is: NO. HIDE_COMPOUND_REFERENCE= NO # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. SHOW_INCLUDE_FILES = YES # If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each # grouped member an include statement to the documentation, telling the reader # which file to include in order to use the member. # The default value is: NO. SHOW_GROUPED_MEMB_INC = NO # If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include # files with double quotes in the documentation rather than with sharp brackets. # The default value is: NO. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the # documentation for inline members. # The default value is: YES. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the # (detailed) documentation of file and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. # The default value is: YES. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief # descriptions of file, namespace and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. Note that # this will also influence the order of the classes in the class list. # The default value is: NO. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the # (brief and detailed) documentation of class members so that constructors and # destructors are listed first. If set to NO the constructors will appear in the # respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. # Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief # member documentation. # Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting # detailed member documentation. # The default value is: NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy # of group names into alphabetical order. If set to NO the group names will # appear in their defined order. # The default value is: NO. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by # fully-qualified names, including namespaces. If set to NO, the class list will # be sorted only by class name, not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the alphabetical # list. # The default value is: NO. SORT_BY_SCOPE_NAME = YES # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper # type resolution of all parameters of a function it will reject a match between # the prototype and the implementation of a member function even if there is # only one candidate or it is obvious which candidate to choose by doing a # simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still # accept a match between prototype and implementation in such cases. # The default value is: NO. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo # list. This list is created by putting \todo commands in the documentation. # The default value is: YES. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test # list. This list is created by putting \test commands in the documentation. # The default value is: YES. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug # list. This list is created by putting \bug commands in the documentation. # The default value is: YES. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) # the deprecated list. This list is created by putting \deprecated commands in # the documentation. # The default value is: YES. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional documentation # sections, marked by \if ... \endif and \cond # ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the # initial value of a variable or macro / define can have for it to appear in the # documentation. If the initializer consists of more lines than specified here # it will be hidden. Use a value of 0 to hide initializers completely. The # appearance of the value of individual variables and macros / defines can be # controlled using \showinitializer or \hideinitializer command in the # documentation regardless of this setting. # Minimum value: 0, maximum value: 10000, default value: 30. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated at # the bottom of the documentation of classes and structs. If set to YES, the # list will mention the files that were used to generate the documentation. # The default value is: YES. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. This # will remove the Files entry from the Quick Index and from the Folder Tree View # (if specified). # The default value is: YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces # page. This will remove the Namespaces entry from the Quick Index and from the # Folder Tree View (if specified). # The default value is: YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command command input-file, where command is the value of the # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml # will be used as the name of the layout file. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE # tag is left empty. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib # extension is automatically appended if omitted. This requires the bibtex tool # to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. # For LaTeX the style of the bibliography can be controlled using # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. See also \cite for info how to create references. CITE_BIB_FILES = #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated to # standard output by doxygen. If QUIET is set to YES this implies that the # messages are off. # The default value is: NO. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated to standard error (stderr) by doxygen. If WARNINGS is set to YES # this implies that the warnings are on. # # Tip: Turn warnings on while writing the documentation. # The default value is: YES. WARNINGS = YES # If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate # warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag # will automatically be disabled. # The default value is: YES. WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some parameters # in a documented function, or documenting parameters that don't exist or using # markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return # value. If set to NO, doxygen will only warn about wrong or incomplete # parameter documentation, but not about the absence of documentation. # The default value is: NO. WARN_NO_PARAMDOC = NO # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when # a warning is encountered. # The default value is: NO. WARN_AS_ERROR = NO # The WARN_FORMAT tag determines the format of the warning messages that doxygen # can produce. The string should contain the $file, $line, and $text tags, which # will be replaced by the file and line number from which the warning originated # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard # error (stderr). WARN_LOGFILE = #--------------------------------------------------------------------------- # Configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag is used to specify the files and/or directories that contain # documented source files. You may enter file names like myfile.cpp or # directories like /usr/src/myproject. Separate the files or directories with # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. INPUT = @PROJECT_SOURCE_DIR@/doc \ @PROJECT_SOURCE_DIR@/btas \ @PROJECT_SOURCE_DIR@/README.md # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv # documentation (see: http://www.gnu.org/software/libiconv) for the list of # possible encodings. # The default value is: UTF-8. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # read by doxygen. # # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, # *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, # *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, # *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf and *.qsf. FILE_PATTERNS = *.c \ *.cc \ *.cxx \ *.cpp \ *.c++ \ *.C \ *.d \ *.java \ *.ii \ *.ixx \ *.ipp \ *.i++ \ *.inl \ *.h \ *.hh \ *.hxx \ *.hpp \ *.h++ \ *.idl \ *.odl \ *.cs \ *.php \ *.php3 \ *.inc \ *.m \ *.mm \ *.dox \ *.py \ *.f90 \ *.f \ *.vhd \ *.vhdl # The RECURSIVE tag can be used to specify whether or not subdirectories should # be searched for input files as well. # The default value is: NO. RECURSIVE = YES # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = @PROJECT_SOURCE_DIR@/doc/devsamp # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. # The default value is: NO. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include # command). EXAMPLE_PATH = @PROJECT_SOURCE_DIR@/doc/examples \ @PROJECT_BINARY_DIR@/doc/examples # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank all # files are included. EXAMPLE_PATTERNS = * # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude commands # irrespective of the value of the RECURSIVE tag. # The default value is: NO. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or directories # that contain images that are to be included in the documentation (see the # \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command: # # # # where is the value of the INPUT_FILTER tag, and is the # name of an input file. Doxygen will then use the output that the filter # program writes to standard output. If FILTER_PATTERNS is specified, this tag # will be ignored. # # Note that the filter must not add or remove lines; it is applied before the # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: pattern=filter # (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how # filters are used. If the FILTER_PATTERNS tag is empty or if none of the # patterns match the file name, INPUT_FILTER is applied. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will also be used to filter the input files that are used for # producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). # The default value is: NO. FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) and # it is also possible to disable source filtering for a specific pattern using # *.ext= (so without naming a filter). # This tag requires that the tag FILTER_SOURCE_FILES is set to YES. FILTER_SOURCE_PATTERNS = # If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = README.md #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will be # generated. Documented entities will be cross-referenced with these sources. # # Note: To get rid of all source code in the generated output, make sure that # also VERBATIM_HEADERS is set to NO. # The default value is: NO. SOURCE_BROWSER = YES # Setting the INLINE_SOURCES tag to YES will include the body of functions, # classes and enums directly into the documentation. # The default value is: NO. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any # special comment blocks from generated source code fragments. Normal C, C++ and # Fortran comments will always remain visible. # The default value is: YES. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES then for each documented # function all documented functions referencing it will be listed. # The default value is: NO. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES then for each documented function # all documented entities called/used by that function will be listed. # The default value is: NO. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set # to YES then the hyperlinks from functions in REFERENCES_RELATION and # REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will # link to the documentation. # The default value is: YES. REFERENCES_LINK_SOURCE = YES # If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the # source code will show a tooltip with additional information such as prototype, # brief description and links to the definition and documentation. Since this # will make the HTML file larger and loading of large files a bit slower, you # can opt to disable this feature. # The default value is: YES. # This tag requires that the tag SOURCE_BROWSER is set to YES. SOURCE_TOOLTIPS = YES # If the USE_HTAGS tag is set to YES then the references to source code will # point to the HTML generated by the htags(1) tool instead of doxygen built-in # source browser. The htags tool is part of GNU's global source tagging system # (see http://www.gnu.org/software/global/global.html). You will need version # 4.8.6 or higher. # # To use it do the following: # - Install the latest version of global # - Enable SOURCE_BROWSER and USE_HTAGS in the config file # - Make sure the INPUT points to the root of the source tree # - Run doxygen as normal # # Doxygen will invoke htags (and that will in turn invoke gtags), so these # tools must be available from the command line (i.e. in the search path). # # The result: instead of the source browser generated by doxygen, the links to # source code will now point to the output of htags. # The default value is: NO. # This tag requires that the tag SOURCE_BROWSER is set to YES. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a # verbatim copy of the header file for each class for which an include is # specified. Set to NO to disable this. # See also: Section \class. # The default value is: YES. VERBATIM_HEADERS = YES #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all # compounds will be generated. Enable this if the project contains a lot of # classes, structs, unions or interfaces. # The default value is: YES. ALPHABETICAL_INDEX = NO # The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in # which the alphabetical index list will be split. # Minimum value: 1, maximum value: 20, default value: 5. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag # can be used to specify a prefix (or a list of prefixes) that should be ignored # while generating the index headers. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output # The default value is: YES. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of # it. # The default directory is: html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for each # generated HTML page (for example: .htm, .php, .asp). # The default value is: .html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a user-defined HTML header file for # each generated HTML page. If the tag is left blank doxygen will generate a # standard header. # # To get valid HTML the header file that includes any scripts and style sheets # that doxygen needs, which is dependent on the configuration options used (e.g. # the setting GENERATE_TREEVIEW). It is highly recommended to start with a # default header using # doxygen -w html new_header.html new_footer.html new_stylesheet.css # YourConfigFile # and then modify the file new_header.html. See also section "Doxygen usage" # for information on how to generate the default header that doxygen normally # uses. # Note: The header is subject to change so you typically have to regenerate the # default header when upgrading to a newer version of doxygen. For a description # of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each # generated HTML page. If the tag is left blank doxygen will generate a standard # footer. See HTML_HEADER for more information on how to generate a default # footer and what special commands can be used inside the footer. See also # section "Doxygen usage" for information on how to generate the default footer # that doxygen normally uses. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style # sheet that is used by each HTML page. It can be used to fine-tune the look of # the HTML output. If left blank doxygen will generate a default style sheet. # See also section "Doxygen usage" for information on how to generate the style # sheet that doxygen normally uses. # Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as # it is more robust and this tag (HTML_STYLESHEET) will in the future become # obsolete. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined # cascading style sheets that are included after the standard style sheets # created by doxygen. Using this option one can overrule certain style aspects. # This is preferred over using HTML_STYLESHEET since it does not replace the # standard style sheet and is therefore more robust against future updates. # Doxygen will copy the style sheet files to the output directory. # Note: The order of the extra style sheet files is of importance (e.g. the last # style sheet in the list overrules the setting of the previous ones in the # list). For an example see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that the # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to # this color. Hue is specified as an angle on a colorwheel, see # http://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. # Minimum value: 0, maximum value: 359, default value: 220. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors # in the HTML output. For a value of 0 the output will use grayscales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the # luminance component of the colors in the HTML output. Values below 100 # gradually make the output lighter, whereas values above 100 make the output # darker. The value divided by 100 is the actual gamma applied, so 80 represents # a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not # change the gamma. # Minimum value: 40, maximum value: 240, default value: 80. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting this # to YES can help to show when doxygen was last run and thus if the # documentation is up to date. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_TIMESTAMP = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_SECTIONS = NO # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries # shown in the various tree structured indices initially; the user can expand # and collapse entries dynamically later on. Doxygen will expand the tree to # such a level that at most the specified number of entries are visible (unless # a fully collapsed tree already exceeds this amount). So setting the number of # entries 1 will produce a full collapsed tree by default. 0 is a special value # representing an infinite number of entries and will result in a full expanded # tree by default. # Minimum value: 0, maximum value: 9999, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development # environment (see: http://developer.apple.com/tools/xcode/), introduced with # OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a # Makefile in the HTML output directory. Running make will produce the docset in # that directory and running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at # startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html # for more information. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_DOCSET = NO # This tag determines the name of the docset feed. A documentation feed provides # an umbrella under which multiple documentation sets from a single provider # (such as a company or product suite) can be grouped. # The default value is: Doxygen generated docs. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_FEEDNAME = "Doxygen generated docs" # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_BUNDLE_ID = org.doxygen.Project # The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify # the documentation publisher. This should be a reverse domain-name style # string, e.g. com.mycompany.MyDocSet.documentation. # The default value is: org.doxygen.Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. # The default value is: Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop # (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on # Windows. # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML # files are now used as the Windows 98 help format, and will replace the old # Windows help format (.hlp) on all Windows platforms in the future. Compressed # HTML files also contain an index, a table of contents, and you can search for # words in the documentation. The HTML workshop also contains a viewer for # compressed HTML files. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_HTMLHELP = NO # The CHM_FILE tag can be used to specify the file name of the resulting .chm # file. You can add a path in front of the file if the result should not be # written to the html output directory. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_FILE = # The HHC_LOCATION tag can be used to specify the location (absolute path # including file name) of the HTML help compiler (hhc.exe). If non-empty, # doxygen will try to run the HTML help compiler on the generated index.hhp. # The file has to be specified with full path. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated # (YES) or that it should be included in the master .chm file (NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. GENERATE_CHI = NO # The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) # and project file content. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_INDEX_ENCODING = # The BINARY_TOC flag controls whether a binary table of contents is generated # (YES) or a normal table of contents (NO) in the .chm file. Furthermore it # enables the Previous and Next buttons. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members to # the table of contents of the HTML help documentation and to the tree view. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help # (.qch) of the generated HTML documentation. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify # the file name of the resulting .qch file. The path specified is relative to # the HTML output folder. # This tag requires that the tag GENERATE_QHP is set to YES. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace # (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_NAMESPACE = # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual # Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- # folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: # http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_SECT_FILTER_ATTRS = # The QHG_LOCATION tag can be used to specify the location of Qt's # qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the # generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be # generated, together with the HTML files, they form an Eclipse help plugin. To # install this plugin and make it available under the help contents menu in # Eclipse, the contents of the directory containing the HTML and XML files needs # to be copied into the plugins directory of eclipse. The name of the directory # within the plugins directory should be the same as the ECLIPSE_DOC_ID value. # After copying Eclipse needs to be restarted before the help appears. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_ECLIPSEHELP = NO # A unique identifier for the Eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have this # name. Each documentation set should have its own identifier. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. ECLIPSE_DOC_ID = org.doxygen.Project # If you want full control over the layout of the generated HTML pages it might # be necessary to disable the index and replace it with your own. The # DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top # of each HTML page. A value of NO enables the index and the value YES disables # it. Since the tabs in the index contain the same information as the navigation # tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. If the tag # value is set to YES, a side panel will be generated containing a tree-like # index structure (just like the one that is generated for HTML Help). For this # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can # further fine-tune the look of the index. As an example, the default style # sheet generated by doxygen has an example that shows how to put an image at # the root of the tree instead of the PROJECT_NAME. Since the tree basically has # the same information as the tab index, you could consider setting # DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = YES # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # # Note that a value of 0 will completely suppress the enum values from appearing # in the overview section. # Minimum value: 0, maximum value: 20, default value: 4. # This tag requires that the tag GENERATE_HTML is set to YES. ENUM_VALUES_PER_LINE = 4 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used # to set the initial width (in pixels) of the frame in which the tree is shown. # Minimum value: 0, maximum value: 1500, default value: 250. # This tag requires that the tag GENERATE_HTML is set to YES. TREEVIEW_WIDTH = 250 # If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to # external symbols imported via tag files in a separate window. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML # output directory to force them to be regenerated. # Minimum value: 8, maximum value: 50, default value: 10. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are not # supported properly for IE 6.0, but are supported on all modern browsers. # # Note that when changing this option you need to delete any form_*.png files in # the HTML output directory before the changes have effect. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_TRANSPARENT = YES # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see # http://www.mathjax.org) which uses client side Javascript for the rendering # instead of using pre-rendered bitmaps. Use this if you do not have LaTeX # installed or if you want to formulas look prettier in the HTML output. When # enabled you may also need to install MathJax separately and configure the path # to it using the MATHJAX_RELPATH option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. USE_MATHJAX = NO # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: # http://docs.mathjax.org/en/latest/output.html) for more details. # Possible values are: HTML-CSS (which is slower, but has the best # compatibility), NativeMML (i.e. MathML) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the HTML # output directory using the MATHJAX_RELPATH option. The destination directory # should contain the MathJax.js script. For instance, if the mathjax directory # is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of # MathJax from http://www.mathjax.org before deployment. # The default value is: http://cdn.mathjax.org/mathjax/latest. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site # (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_CODEFILE = # When the SEARCHENGINE tag is enabled doxygen will generate a search box for # the HTML output. The underlying search engine uses javascript and DHTML and # should work on any modern browser. Note that when using HTML help # (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) # there is already a search function so this one should typically be disabled. # For large projects the javascript based search engine can be slow, then # enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to # search using the keyboard; to jump to the search box use + S # (what the is depends on the OS and browser, but it is typically # , /regular type, define types, and support expresisons specified below. \subsubsection sec_TWG_Index_Concept_Index_Description_Types Associated Types
type description comments
\c value_type the type of indices that compose the Index usually a signed integer type
\c const_iterator_type the type of indices that compose the Index; dereferences to const value_type this type model the constant Random-access Iterator concept defined in C++ Standard, Section 24.2
\subsubsection sec_TWG_Index_Concept_Index_Description_Expressions Valid Expressions To be a valid Index, a type must support the following expressions.
expression return type specification comments
\c Index() @code Index i0; @endcode creates a default Index, the value is undefined, and rank may also be undefined.
\c Index(const Index&) @code Index i1(i0); assert(r0 == r1); @endcode
\c operator=(const Index&) \c Index& @code Range i2 = i0; assert(i2 == i0); @endcode
\c operator==(const Index&, const Index&) \c bool @code assert(i0 == i0); i1 = i0; mutate(i1); // changes state of i1 assert(!(i1 == i0)); @endcode member or non-member
\c begin() \c const_iterator assert(i0.begin() == std::begin(i0)); returns the iterator to the first element in Index; if Index is empty (rank-0), begin() == end() is true
\c end() \c const_iterator assert(i0.end() == std::end(i0)); returns the iterator past the last element in Index; if Index is empty (rank-0), begin() == end() is true
\c size() \c uint returns the rank of Index; const
where we used the following notation: \li uint is an unsigned integral type, i.e. @code std::is_integral::value && std::is_unsigned::value @endcode is true. The following standard containers meet the Index requirements:
  • \c std::vector
  • \c std::array
Also, \c std::initializer_list partially satisfies the Index concept. While it is not a regular type, it can be viewed as a constant (unmutable) Index. */ BTAS-1.0.0/doc/main.dox000066400000000000000000000020001476142407000144430ustar00rootroot00000000000000 /** \mainpage Basic Tensor Algebra Software (BTAS) and Tensor Working Group (TWG) specification BTAS is a modern C++ software framework for general computation on tensor data. It is a reference imlementation of the Tensor Working Group (TWG) specification of concepts. It is possible to customize BTAS by providing replacing the reference implementations of TWG concepts with custom implementations, as demonstrated below. \section synopsis Synopsis Here's a short C++ example of what is possible with BTAS (see doc/examples/synopsis.cxx ): \snippet synopsis.cxx Synopsis \section twg Tensor Working Group Specification
  • \ref labelTWGIndex "TWG.Index"
  • \ref labelTWGRange "TWG.Range"
  • \ref labelTWGStorage "TWG.Storage"
  • \ref labelTWGTensor "TWG.Tensor"
\section btas Basic Tensor Algebra Software
  • \ref labelBTASRange "BTAS.Range"
  • \ref labelBTASTensor "BTAS.Tensor"
*/BTAS-1.0.0/doc/manual.dox000066400000000000000000003017241476142407000150130ustar00rootroot00000000000000# Doxyfile 1.8.5 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. # # All text after a double hash (##) is considered a comment and is placed in # front of the TAG it is preceding. # # All text after a single hash (#) is considered a comment and will be ignored. # The format is: # TAG = value [value, ...] # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the config file # that follow. The default is UTF-8 which is also the encoding used for all text # before the first occurrence of this tag. Doxygen uses libiconv (or the iconv # built into libc) for the transcoding. See http://www.gnu.org/software/libiconv # for the list of possible encodings. # The default value is: UTF-8. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded by # double-quotes, unless you are using Doxywizard) that should identify the # project for which the documentation is generated. This name is used in the # title of most generated pages and in a few other places. # The default value is: My Project. PROJECT_NAME = "Basic Tensor Algebra Software" # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version # control system is used. PROJECT_NUMBER = 0.0.0 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = "Modern Tensor Algebra Framework in C++" # With the PROJECT_LOGO tag one can specify an logo or icon that is included in # the documentation. The maximum height of the logo should not exceed 55 pixels # and the maximum width should not exceed 200 pixels. Doxygen will copy the logo # to the output directory. PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path # into which the generated documentation will be written. If a relative path is # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. OUTPUT_DIRECTORY = # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and # will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes # performance problems for the file system. # The default value is: NO. CREATE_SUBDIRS = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # Possible values are: Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese- # Traditional, Croatian, Czech, Danish, Dutch, English, Esperanto, Farsi, # Finnish, French, German, Greek, Hungarian, Italian, Japanese, Japanese-en, # Korean, Korean-en, Latvian, Norwegian, Macedonian, Persian, Polish, # Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish, # Turkish, Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. # The default value is: YES. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES doxygen will prepend the brief # description of a member or function before the detailed description # # Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. # The default value is: YES. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator that is # used to form the text in various listings. Each string in this list, if found # as the leading text of the brief description, will be stripped from the text # and the result, after processing the whole list, is used as the annotated # text. Otherwise, the brief description is used as-is. If left blank, the # following values are used ($name is automatically replaced with the name of # the entity):The $name class, The $name widget, The $name file, is, provides, # specifies, contains, represents, a, an and the. ABBREVIATE_BRIEF = # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # doxygen will generate a detailed section even if there is only a brief # description. # The default value is: NO. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. # The default value is: NO. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES doxygen will prepend the full path # before files name in the file list and in the header files. If set to NO the # shortest path that makes the file name unique will be used # The default value is: YES. FULL_PATH_NAMES = YES # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. # Stripping is only done if one of the specified strings matches the left-hand # part of the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the path to # strip. # # Note that you can specify absolute paths here, but also relative paths, which # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. STRIP_FROM_PATH = # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which # header file to include in order to use a class. If left blank only the name of # the header file containing the class definition is used. Otherwise one should # specify the list of include paths that are normally passed to the compiler # using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't # support long names like on DOS, Mac, or CD-ROM. # The default value is: NO. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the # first line (until the first dot) of a Javadoc-style comment as the brief # description. If set to NO, the Javadoc-style will behave just like regular Qt- # style comments (thus requiring an explicit @brief command for a brief # description.) # The default value is: NO. JAVADOC_AUTOBRIEF = NO # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If # set to NO, the Qt-style will behave just like regular Qt-style comments (thus # requiring an explicit \brief command for a brief description.) # The default value is: NO. QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a # multi-line C++ special comment block (i.e. a block of //! or /// comments) as # a brief description. This used to be the default behavior. The new default is # to treat a multi-line C++ comment block as a detailed description. Set this # tag to YES if you prefer the old behavior instead. # # Note that setting this tag to YES also means that rational rose comments are # not recognized any more. # The default value is: NO. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce a # new page for each member. If set to NO, the documentation of a member will be # part of the file/class/namespace that contains it. # The default value is: NO. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen # uses this value to replace tabs by spaces in code fragments. # Minimum value: 1, maximum value: 16, default value: 4. TAB_SIZE = 4 # This tag can be used to specify a number of aliases that act as commands in # the documentation. An alias has the form: # name=value # For example adding # "sideeffect=@par Side Effects:\n" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading # "Side Effects:". You can put \n's in the value part of an alias to insert # newlines. ALIASES = # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding "class=itcl::class" # will allow you to use the command class in the itcl::class meaning. TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all # members will be omitted, etc. # The default value is: NO. OPTIMIZE_OUTPUT_FOR_C = NO # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or # Python sources only. Doxygen will then generate output that is more tailored # for that language. For instance, namespaces will be presented as packages, # qualified scopes will look different, etc. # The default value is: NO. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources. Doxygen will then generate output that is tailored for Fortran. # The default value is: NO. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for VHDL. # The default value is: NO. OPTIMIZE_OUTPUT_VHDL = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, Javascript, # C#, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL. For instance to make # doxygen treat .inc files as Fortran files (default is PHP), and .f files as C # (default is Fortran), use: inc=Fortran f=C. # # Note For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise # the files are not read by doxygen. EXTENSION_MAPPING = # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments # according to the Markdown format, which allows for more readable # documentation. See http://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you can # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in # case of backward compatibilities issues. # The default value is: YES. MARKDOWN_SUPPORT = YES # When enabled doxygen tries to link words that correspond to documented # classes, or namespaces to their corresponding documentation. Such a link can # be prevented in individual cases by by putting a % sign in front of the word # or globally by setting AUTOLINK_SUPPORT to NO. # The default value is: YES. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should set this # tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); # versus func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. # The default value is: NO. BUILTIN_STL_SUPPORT = YES # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. # The default value is: NO. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip (see: # http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen # will parse them like normal C++ but will assume all classes use public instead # of private inheritance when no explicit protection keyword is present. # The default value is: NO. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES will make # doxygen to replace the get and set methods by a property in the documentation. # This will only work if the methods are indeed getting or setting a simple # type. If this is not the case, or you want to show the methods anyway, you # should set this option to NO. # The default value is: YES. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES, then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. # The default value is: NO. DISTRIBUTE_GROUP_DOC = NO # Set the SUBGROUPING tag to YES to allow class member groups of the same type # (for instance a group of public functions) to be put as a subgroup of that # type (e.g. under the Public Functions section). Set it to NO to prevent # subgrouping. Alternatively, this can be done per class using the # \nosubgrouping command. # The default value is: YES. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions # are shown inside the group in which they are included (e.g. using \ingroup) # instead of on a separate page (for HTML and Man pages) or section (for LaTeX # and RTF). # # Note that this feature does not work in combination with # SEPARATE_MEMBER_PAGES. # The default value is: NO. INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions # with only public data fields or simple typedef fields will be shown inline in # the documentation of the scope in which they are defined (i.e. file, # namespace, or group documentation), provided this scope is documented. If set # to NO, structs, classes, and unions are shown on a separate page (for HTML and # Man pages) or section (for LaTeX and RTF). # The default value is: NO. INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or # enum is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically be # useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. # The default value is: NO. TYPEDEF_HIDES_STRUCT = NO # The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This # cache is used to resolve symbols given their name and scope. Since this can be # an expensive process and often the same symbol appears multiple times in the # code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small # doxygen will become slower. If the cache is too large, memory is wasted. The # cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range # is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 # symbols. At the end of a run doxygen will report the cache usage and suggest # the optimal cache size from a speed point of view. # Minimum value: 0, maximum value: 9, default value: 0. LOOKUP_CACHE_SIZE = 0 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in # documentation are documented, even if no documentation was available. Private # class members and static file members will be hidden unless the # EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. # Note: This will also disable the warnings about undocumented members that are # normally produced when WARNINGS is set to YES. # The default value is: NO. EXTRACT_ALL = NO # If the EXTRACT_PRIVATE tag is set to YES all private members of a class will # be included in the documentation. # The default value is: NO. EXTRACT_PRIVATE = NO # If the EXTRACT_PACKAGE tag is set to YES all members with package or internal # scope will be included in the documentation. # The default value is: NO. EXTRACT_PACKAGE = NO # If the EXTRACT_STATIC tag is set to YES all static members of a file will be # included in the documentation. # The default value is: NO. EXTRACT_STATIC = NO # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) defined # locally in source files will be included in the documentation. If set to NO # only classes defined in header files are included. Does not have any effect # for Java sources. # The default value is: YES. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. When set to YES local methods, # which are defined in the implementation section but not in the interface are # included in the documentation. If set to NO only methods in the interface are # included. # The default value is: NO. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base name of # the file that contains the anonymous namespace. By default anonymous namespace # are hidden. # The default value is: NO. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation # section is generated. This option has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO these classes will be included in the various overviews. This option has # no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend # (class|struct|union) declarations. If set to NO these declarations will be # included in the documentation. # The default value is: NO. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any # documentation blocks found inside the body of a function. If set to NO these # blocks will be appended to the function's detailed documentation block. # The default value is: NO. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation that is typed after a # \internal command is included. If the tag is set to NO then the documentation # will be excluded. Set it to YES to include the internal documentation. # The default value is: NO. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file # names in lower-case letters. If set to YES upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. # The default value is: system dependent. CASE_SENSE_NAMES = NO # If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with # their full class and namespace scopes in the documentation. If set to YES the # scope will be hidden. # The default value is: NO. HIDE_SCOPE_NAMES = NO # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. SHOW_INCLUDE_FILES = YES # If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include # files with double quotes in the documentation rather than with sharp brackets. # The default value is: NO. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the # documentation for inline members. # The default value is: YES. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the # (detailed) documentation of file and class members alphabetically by member # name. If set to NO the members will appear in declaration order. # The default value is: YES. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief # descriptions of file, namespace and class members alphabetically by member # name. If set to NO the members will appear in declaration order. # The default value is: NO. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the # (brief and detailed) documentation of class members so that constructors and # destructors are listed first. If set to NO the constructors will appear in the # respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. # Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief # member documentation. # Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting # detailed member documentation. # The default value is: NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy # of group names into alphabetical order. If set to NO the group names will # appear in their defined order. # The default value is: NO. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by # fully-qualified names, including namespaces. If set to NO, the class list will # be sorted only by class name, not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the alphabetical # list. # The default value is: NO. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper # type resolution of all parameters of a function it will reject a match between # the prototype and the implementation of a member function even if there is # only one candidate or it is obvious which candidate to choose by doing a # simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still # accept a match between prototype and implementation in such cases. # The default value is: NO. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable ( YES) or disable ( NO) the # todo list. This list is created by putting \todo commands in the # documentation. # The default value is: YES. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable ( YES) or disable ( NO) the # test list. This list is created by putting \test commands in the # documentation. # The default value is: YES. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable ( YES) or disable ( NO) the bug # list. This list is created by putting \bug commands in the documentation. # The default value is: YES. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable ( YES) or disable ( NO) # the deprecated list. This list is created by putting \deprecated commands in # the documentation. # The default value is: YES. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional documentation # sections, marked by \if ... \endif and \cond # ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the # initial value of a variable or macro / define can have for it to appear in the # documentation. If the initializer consists of more lines than specified here # it will be hidden. Use a value of 0 to hide initializers completely. The # appearance of the value of individual variables and macros / defines can be # controlled using \showinitializer or \hideinitializer command in the # documentation regardless of this setting. # Minimum value: 0, maximum value: 10000, default value: 30. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated at # the bottom of the documentation of classes and structs. If set to YES the list # will mention the files that were used to generate the documentation. # The default value is: YES. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. This # will remove the Files entry from the Quick Index and from the Folder Tree View # (if specified). # The default value is: YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces # page. This will remove the Namespaces entry from the Quick Index and from the # Folder Tree View (if specified). # The default value is: YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command command input-file, where command is the value of the # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml # will be used as the name of the layout file. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE # tag is left empty. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib # extension is automatically appended if omitted. This requires the bibtex tool # to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. # For LaTeX the style of the bibliography can be controlled using # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. Do not use file names with spaces, bibtex cannot handle them. See # also \cite for info how to create references. CITE_BIB_FILES = #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated to # standard output by doxygen. If QUIET is set to YES this implies that the # messages are off. # The default value is: NO. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated to standard error ( stderr) by doxygen. If WARNINGS is set to YES # this implies that the warnings are on. # # Tip: Turn warnings on while writing the documentation. # The default value is: YES. WARNINGS = YES # If the WARN_IF_UNDOCUMENTED tag is set to YES, then doxygen will generate # warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag # will automatically be disabled. # The default value is: YES. WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some parameters # in a documented function, or documenting parameters that don't exist or using # markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return # value. If set to NO doxygen will only warn about wrong or incomplete parameter # documentation, but not about the absence of documentation. # The default value is: NO. WARN_NO_PARAMDOC = NO # The WARN_FORMAT tag determines the format of the warning messages that doxygen # can produce. The string should contain the $file, $line, and $text tags, which # will be replaced by the file and line number from which the warning originated # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard # error (stderr). WARN_LOGFILE = #--------------------------------------------------------------------------- # Configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag is used to specify the files and/or directories that contain # documented source files. You may enter file names like myfile.cpp or # directories like /usr/src/myproject. Separate the files or directories with # spaces. # Note: If this tag is empty the current directory is searched. INPUT = . ../btas # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv # documentation (see: http://www.gnu.org/software/libiconv) for the list of # possible encodings. # The default value is: UTF-8. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank the # following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii, # *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp, # *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown, # *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, # *.qsf, *.as and *.js. FILE_PATTERNS = # The RECURSIVE tag can be used to specify whether or not subdirectories should # be searched for input files as well. # The default value is: NO. RECURSIVE = NO # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. # The default value is: NO. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include # command). EXAMPLE_PATH = examples # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank all # files are included. EXAMPLE_PATTERNS = # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude commands # irrespective of the value of the RECURSIVE tag. # The default value is: NO. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or directories # that contain images that are to be included in the documentation (see the # \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command: # # # # where is the value of the INPUT_FILTER tag, and is the # name of an input file. Doxygen will then use the output that the filter # program writes to standard output. If FILTER_PATTERNS is specified, this tag # will be ignored. # # Note that the filter must not add or remove lines; it is applied before the # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: pattern=filter # (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how # filters are used. If the FILTER_PATTERNS tag is empty or if none of the # patterns match the file name, INPUT_FILTER is applied. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER ) will also be used to filter the input files that are used for # producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). # The default value is: NO. FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) and # it is also possible to disable source filtering for a specific pattern using # *.ext= (so without naming a filter). # This tag requires that the tag FILTER_SOURCE_FILES is set to YES. FILTER_SOURCE_PATTERNS = # If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will be # generated. Documented entities will be cross-referenced with these sources. # # Note: To get rid of all source code in the generated output, make sure that # also VERBATIM_HEADERS is set to NO. # The default value is: NO. SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body of functions, # classes and enums directly into the documentation. # The default value is: NO. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any # special comment blocks from generated source code fragments. Normal C, C++ and # Fortran comments will always remain visible. # The default value is: YES. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES then for each documented # function all documented functions referencing it will be listed. # The default value is: NO. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES then for each documented function # all documented entities called/used by that function will be listed. # The default value is: NO. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set # to YES, then the hyperlinks from functions in REFERENCES_RELATION and # REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will # link to the documentation. # The default value is: YES. REFERENCES_LINK_SOURCE = YES # If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the # source code will show a tooltip with additional information such as prototype, # brief description and links to the definition and documentation. Since this # will make the HTML file larger and loading of large files a bit slower, you # can opt to disable this feature. # The default value is: YES. # This tag requires that the tag SOURCE_BROWSER is set to YES. SOURCE_TOOLTIPS = YES # If the USE_HTAGS tag is set to YES then the references to source code will # point to the HTML generated by the htags(1) tool instead of doxygen built-in # source browser. The htags tool is part of GNU's global source tagging system # (see http://www.gnu.org/software/global/global.html). You will need version # 4.8.6 or higher. # # To use it do the following: # - Install the latest version of global # - Enable SOURCE_BROWSER and USE_HTAGS in the config file # - Make sure the INPUT points to the root of the source tree # - Run doxygen as normal # # Doxygen will invoke htags (and that will in turn invoke gtags), so these # tools must be available from the command line (i.e. in the search path). # # The result: instead of the source browser generated by doxygen, the links to # source code will now point to the output of htags. # The default value is: NO. # This tag requires that the tag SOURCE_BROWSER is set to YES. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a # verbatim copy of the header file for each class for which an include is # specified. Set to NO to disable this. # See also: Section \class. # The default value is: YES. VERBATIM_HEADERS = YES #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all # compounds will be generated. Enable this if the project contains a lot of # classes, structs, unions or interfaces. # The default value is: YES. ALPHABETICAL_INDEX = YES # The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in # which the alphabetical index list will be split. # Minimum value: 1, maximum value: 20, default value: 5. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag # can be used to specify a prefix (or a list of prefixes) that should be ignored # while generating the index headers. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES doxygen will generate HTML output # The default value is: YES. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of # it. # The default directory is: html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for each # generated HTML page (for example: .htm, .php, .asp). # The default value is: .html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a user-defined HTML header file for # each generated HTML page. If the tag is left blank doxygen will generate a # standard header. # # To get valid HTML the header file that includes any scripts and style sheets # that doxygen needs, which is dependent on the configuration options used (e.g. # the setting GENERATE_TREEVIEW). It is highly recommended to start with a # default header using # doxygen -w html new_header.html new_footer.html new_stylesheet.css # YourConfigFile # and then modify the file new_header.html. See also section "Doxygen usage" # for information on how to generate the default header that doxygen normally # uses. # Note: The header is subject to change so you typically have to regenerate the # default header when upgrading to a newer version of doxygen. For a description # of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each # generated HTML page. If the tag is left blank doxygen will generate a standard # footer. See HTML_HEADER for more information on how to generate a default # footer and what special commands can be used inside the footer. See also # section "Doxygen usage" for information on how to generate the default footer # that doxygen normally uses. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style # sheet that is used by each HTML page. It can be used to fine-tune the look of # the HTML output. If left blank doxygen will generate a default style sheet. # See also section "Doxygen usage" for information on how to generate the style # sheet that doxygen normally uses. # Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as # it is more robust and this tag (HTML_STYLESHEET) will in the future become # obsolete. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify an additional user- # defined cascading style sheet that is included after the standard style sheets # created by doxygen. Using this option one can overrule certain style aspects. # This is preferred over using HTML_STYLESHEET since it does not replace the # standard style sheet and is therefor more robust against future updates. # Doxygen will copy the style sheet file to the output directory. For an example # see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that the # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the stylesheet and background images according to # this color. Hue is specified as an angle on a colorwheel, see # http://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. # Minimum value: 0, maximum value: 359, default value: 220. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors # in the HTML output. For a value of 0 the output will use grayscales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the # luminance component of the colors in the HTML output. Values below 100 # gradually make the output lighter, whereas values above 100 make the output # darker. The value divided by 100 is the actual gamma applied, so 80 represents # a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not # change the gamma. # Minimum value: 40, maximum value: 240, default value: 80. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting this # to NO can help when comparing the output of multiple runs. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_TIMESTAMP = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_SECTIONS = NO # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries # shown in the various tree structured indices initially; the user can expand # and collapse entries dynamically later on. Doxygen will expand the tree to # such a level that at most the specified number of entries are visible (unless # a fully collapsed tree already exceeds this amount). So setting the number of # entries 1 will produce a full collapsed tree by default. 0 is a special value # representing an infinite number of entries and will result in a full expanded # tree by default. # Minimum value: 0, maximum value: 9999, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development # environment (see: http://developer.apple.com/tools/xcode/), introduced with # OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a # Makefile in the HTML output directory. Running make will produce the docset in # that directory and running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at # startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html # for more information. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_DOCSET = NO # This tag determines the name of the docset feed. A documentation feed provides # an umbrella under which multiple documentation sets from a single provider # (such as a company or product suite) can be grouped. # The default value is: Doxygen generated docs. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_FEEDNAME = "Doxygen generated docs" # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_BUNDLE_ID = org.doxygen.Project # The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify # the documentation publisher. This should be a reverse domain-name style # string, e.g. com.mycompany.MyDocSet.documentation. # The default value is: org.doxygen.Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. # The default value is: Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop # (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on # Windows. # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML # files are now used as the Windows 98 help format, and will replace the old # Windows help format (.hlp) on all Windows platforms in the future. Compressed # HTML files also contain an index, a table of contents, and you can search for # words in the documentation. The HTML workshop also contains a viewer for # compressed HTML files. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_HTMLHELP = NO # The CHM_FILE tag can be used to specify the file name of the resulting .chm # file. You can add a path in front of the file if the result should not be # written to the html output directory. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_FILE = # The HHC_LOCATION tag can be used to specify the location (absolute path # including file name) of the HTML help compiler ( hhc.exe). If non-empty # doxygen will try to run the HTML help compiler on the generated index.hhp. # The file has to be specified with full path. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated ( # YES) or that it should be included in the master .chm file ( NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. GENERATE_CHI = NO # The CHM_INDEX_ENCODING is used to encode HtmlHelp index ( hhk), content ( hhc) # and project file content. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_INDEX_ENCODING = # The BINARY_TOC flag controls whether a binary table of contents is generated ( # YES) or a normal table of contents ( NO) in the .chm file. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members to # the table of contents of the HTML help documentation and to the tree view. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help # (.qch) of the generated HTML documentation. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify # the file name of the resulting .qch file. The path specified is relative to # the HTML output folder. # This tag requires that the tag GENERATE_QHP is set to YES. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace # (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual # Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- # folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom # Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- # filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: # http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_SECT_FILTER_ATTRS = # The QHG_LOCATION tag can be used to specify the location of Qt's # qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the # generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be # generated, together with the HTML files, they form an Eclipse help plugin. To # install this plugin and make it available under the help contents menu in # Eclipse, the contents of the directory containing the HTML and XML files needs # to be copied into the plugins directory of eclipse. The name of the directory # within the plugins directory should be the same as the ECLIPSE_DOC_ID value. # After copying Eclipse needs to be restarted before the help appears. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_ECLIPSEHELP = NO # A unique identifier for the Eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have this # name. Each documentation set should have its own identifier. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. ECLIPSE_DOC_ID = org.doxygen.Project # If you want full control over the layout of the generated HTML pages it might # be necessary to disable the index and replace it with your own. The # DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top # of each HTML page. A value of NO enables the index and the value YES disables # it. Since the tabs in the index contain the same information as the navigation # tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. If the tag # value is set to YES, a side panel will be generated containing a tree-like # index structure (just like the one that is generated for HTML Help). For this # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom stylesheets (see HTML_EXTRA_STYLESHEET) one can # further fine-tune the look of the index. As an example, the default style # sheet generated by doxygen has an example that shows how to put an image at # the root of the tree instead of the PROJECT_NAME. Since the tree basically has # the same information as the tab index, you could consider setting # DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = NO # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # # Note that a value of 0 will completely suppress the enum values from appearing # in the overview section. # Minimum value: 0, maximum value: 20, default value: 4. # This tag requires that the tag GENERATE_HTML is set to YES. ENUM_VALUES_PER_LINE = 4 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used # to set the initial width (in pixels) of the frame in which the tree is shown. # Minimum value: 0, maximum value: 1500, default value: 250. # This tag requires that the tag GENERATE_HTML is set to YES. TREEVIEW_WIDTH = 250 # When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open links to # external symbols imported via tag files in a separate window. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. EXT_LINKS_IN_WINDOW = NO # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML # output directory to force them to be regenerated. # Minimum value: 8, maximum value: 50, default value: 10. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are not # supported properly for IE 6.0, but are supported on all modern browsers. # # Note that when changing this option you need to delete any form_*.png files in # the HTML output directory before the changes have effect. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_TRANSPARENT = YES # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see # http://www.mathjax.org) which uses client side Javascript for the rendering # instead of using prerendered bitmaps. Use this if you do not have LaTeX # installed or if you want to formulas look prettier in the HTML output. When # enabled you may also need to install MathJax separately and configure the path # to it using the MATHJAX_RELPATH option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. USE_MATHJAX = NO # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: # http://docs.mathjax.org/en/latest/output.html) for more details. # Possible values are: HTML-CSS (which is slower, but has the best # compatibility), NativeMML (i.e. MathML) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the HTML # output directory using the MATHJAX_RELPATH option. The destination directory # should contain the MathJax.js script. For instance, if the mathjax directory # is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of # MathJax from http://www.mathjax.org before deployment. # The default value is: http://cdn.mathjax.org/mathjax/latest. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site # (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_CODEFILE = # When the SEARCHENGINE tag is enabled doxygen will generate a search box for # the HTML output. The underlying search engine uses javascript and DHTML and # should work on any modern browser. Note that when using HTML help # (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) # there is already a search function so this one should typically be disabled. # For large projects the javascript based search engine can be slow, then # enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to # search using the keyboard; to jump to the search box use + S # (what the is depends on the OS and browser, but it is typically # , /