pax_global_header00006660000000000000000000000064147514702750014526gustar00rootroot0000000000000052 comment=9d255266e1df8f1dc5d11e1fbb03213acfaa4fc7 vc-intrinsics-0.22.1/000077500000000000000000000000001475147027500144035ustar00rootroot00000000000000vc-intrinsics-0.22.1/.github/000077500000000000000000000000001475147027500157435ustar00rootroot00000000000000vc-intrinsics-0.22.1/.github/workflows/000077500000000000000000000000001475147027500200005ustar00rootroot00000000000000vc-intrinsics-0.22.1/.github/workflows/on-pr-update.yml000066400000000000000000000016531475147027500230430ustar00rootroot00000000000000#=========================== begin_copyright_notice ============================ # # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # #============================ end_copyright_notice ============================= name: Notify on: status: pull_request_review: permissions: contents: read jobs: notify: runs-on: [self-hosted, linux, gfx] steps: - name: Get branch name if status changed if: github.event_name == 'status' env: PR_BR_NAME: ${{ github.event.branches[0].name }} run: > echo "os_branch=$PR_BR_NAME" >> $GITHUB_ENV - name: Get branch name if pr review state changed if: github.event_name == 'pull_request_review' env: PR_HEAD_REF: ${{ github.event.pull_request.head.ref }} run: > echo "os_branch=$PR_HEAD_REF" >> $GITHUB_ENV - name: Notify run: ${{ secrets.NOTIFY }} vc-intrinsics-0.22.1/.gitignore000066400000000000000000000033451475147027500164000ustar00rootroot00000000000000#==============================================================================# # This file specifies intentionally untracked files that git should ignore. # See: http://www.kernel.org/pub/software/scm/git/docs/gitignore.html # # This file is intentionally different from the output of `git svn show-ignore`, # as most of those are useless. #==============================================================================# #==============================================================================# # File extensions to be ignored anywhere in the tree. #==============================================================================# # Temp files created by most text editors. *~ # Merge files created by git. *.orig # Byte compiled python modules. *.pyc # vim swap files .*.sw? .sw? #OS X specific files. .DS_store # Nested build directory /build #==============================================================================# # Explicit files to ignore (only matches one). #==============================================================================# # Various tag programs /tags /TAGS /GPATH /GRTAGS /GSYMS /GTAGS .gitusers autom4te.cache cscope.files cscope.out autoconf/aclocal.m4 autoconf/autom4te.cache /compile_commands.json tags # Visual Studio built-in CMake configuration /CMakeSettings.json # CLion project configuration /.idea #==============================================================================# # Directories to ignore (do not add trailing '/'s, they skip symlinks). #==============================================================================# # Sphinx build tree, if building in-source dir. GenXIntrinsics/docs/_build GenXIntrinsics/docs/autogenerated # VS2017 and VSCode config files. .vscode .vs # clangd index .clangd vc-intrinsics-0.22.1/CMakeLists.txt000066400000000000000000000041071475147027500171450ustar00rootroot00000000000000#=========================== begin_copyright_notice ============================ # # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # #============================ end_copyright_notice ============================= cmake_minimum_required(VERSION 3.13.4) set(LLVM_GENX_INTRINSICS_VERSION 1.0) set(LLVM_GENX_INTRINSICS_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}) option(LLVM_GENX_INTRINSICS_IN_TREE_INSTALL "Do install after in-tree build" FALSE) # check if we build inside llvm or not if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) get_filename_component(LLVM_CMAKE_PATH ${LLVM_DIR} ABSOLUTE BASE_DIR ${CMAKE_BINARY_DIR}) set(BUILD_EXTERNAL YES) project(LLVM_GenXIntrinsics VERSION ${LLVM_GENX_INTRINSICS_VERSION} LANGUAGES C CXX ) set(CMAKE_CXX_STANDARD_REQUIRED OFF) find_package(LLVM REQUIRED HINTS "${LLVM_CMAKE_PATH}") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${LLVM_CMAKE_DIR} ) include(AddLLVM) include(HandleLLVMOptions) if (LLVM_PACKAGE_VERSION GREATER_EQUAL 14) set(CMAKE_CXX_STANDARD 17) else () set(CMAKE_CXX_STANDARD 14) endif() include_directories("${LLVM_INCLUDE_DIR}") link_directories("${LLVM_LIBRARY_DIR}") message(STATUS "Found LLVM: ${LLVM_VERSION}") else(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) set(BUILD_EXTERNAL NO) # LLVM_CMAKE_DIR is not set for non-standalone builds. Use LLVM_CMAKE_PATH # instead. (see clang/CMakeLists.txt) if(NOT LLVM_CMAKE_DIR) set(LLVM_CMAKE_DIR ${LLVM_CMAKE_PATH}) endif() endif(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) if(LLVM_VERSION_MAJOR VERSION_LESS 8) message(FATAL_ERROR "VC-intrinsics do not support ${LLVM_VERSION} LLVM version") endif() if (DEFINED PYTHON_EXECUTABLE) get_filename_component(PYTHON_EXECUTABLE "${PYTHON_EXECUTABLE}" ABSOLUTE) else() find_package(Python REQUIRED COMPONENTS Interpreter) if("${Python_VERSION}" VERSION_LESS 2.7) message(FATAL_ERROR "Python 2.7 or newer is required") endif() set(PYTHON_EXECUTABLE "${Python_EXECUTABLE}") endif() add_subdirectory(GenXIntrinsics) vc-intrinsics-0.22.1/GenXIntrinsics/000077500000000000000000000000001475147027500173125ustar00rootroot00000000000000vc-intrinsics-0.22.1/GenXIntrinsics/CMakeLists.txt000066400000000000000000000054321475147027500220560ustar00rootroot00000000000000#=========================== begin_copyright_notice ============================ # # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # #============================ end_copyright_notice ============================= cmake_minimum_required(VERSION 3.13.4) include(GNUInstallDirs) set(GENX_INTRINSICS_MAIN_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include) set(GENX_INTRINSICS_MAIN_DIR ${CMAKE_CURRENT_SOURCE_DIR}) if(IGC_INFRA) set(GENX_INTRINSICS_MAIN_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include PARENT_SCOPE) endif() include(cmake/utils.cmake) # Global config. add_compile_definitions(VC_INTR_LLVM_VERSION_MAJOR=${LLVM_VERSION_MAJOR}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) include_directories(${CMAKE_CURRENT_BINARY_DIR}/include) set(GENX_INTRINSICS_DESCRIPTION "GenXIntrinsicDescription.gen") add_subdirectory(include/llvm) add_subdirectory(lib) # LLVM doesn't install gtest which is required for our tests # so we cannot simply enable current unittests with prebuilt LLVM if(NOT BUILD_EXTERNAL) add_subdirectory(unittests) endif() # Experimental lit tests for intrinsic passes. Require plugin support, # so only available with LLVM dylib (for stability). if(VC_INTR_ENABLE_LIT_TESTS) if(LLVM_LINK_LLVM_DYLIB) message(STATUS "VC intrinsics lit tests are enabled") add_subdirectory(test) else() message(STATUS "VC intrinsics lit tests require dynamic LLVM, skipping") endif() endif() install(DIRECTORY include/llvm DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} COMPONENT genx-intrinsics-headers FILES_MATCHING PATTERN "*.h" ) # cmake creates too many subdirectories in build directory # and then "install(DIRECTORY" installs them even if they are empty # so generated file has to be installed separetely install(FILES ${CMAKE_CURRENT_BINARY_DIR}/include/llvm/GenXIntrinsics/${GENX_INTRINSICS_DESCRIPTION} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/llvm/GenXIntrinsics COMPONENT genx-intrinsics-headers ) install(TARGETS LLVMGenXIntrinsics EXPORT LLVMGenXIntrinsicsTargets LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} ) # Install Config-file only for external build if(BUILD_EXTERNAL OR LLVM_GENX_INTRINSICS_IN_TREE_INSTALL) # Legacy export. To remove when all clients switch to new name. install(EXPORT LLVMGenXIntrinsicsTargets FILE LLVMGenXIntrinsicsConfig.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/LLVMGenXIntrinsics ) set(PACKAGE_NAME VCIntrinsics${LLVM_VERSION_MAJOR}) install(EXPORT LLVMGenXIntrinsicsTargets FILE ${PACKAGE_NAME}Config.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PACKAGE_NAME} ) endif() # BUILD_EXTERNAL OR LLVM_GENX_INTRINSICS_IN_TREE_INSTALL vc-intrinsics-0.22.1/GenXIntrinsics/cmake/000077500000000000000000000000001475147027500203725ustar00rootroot00000000000000vc-intrinsics-0.22.1/GenXIntrinsics/cmake/utils.cmake000066400000000000000000000024201475147027500225320ustar00rootroot00000000000000#=========================== begin_copyright_notice ============================ # # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # #============================ end_copyright_notice ============================= # Convenience function to get list of LLVM components for # target_link_library. If LLVM was configured with llvm dylib, then # included in dylib llvm targets should be replaced with LLVM # lib. Otherwise, just return passed libraries. # ret -- name of variable with returned targets list. All other # arguments are components to process. function(vc_get_llvm_targets RET) set(TARGETS ${ARGN}) if (LLVM_LINK_LLVM_DYLIB) # Drop all components, though it is probably not right # and llvm_map_components_to_libnames should be used as filter. # However, in external build it returns empty list for "all" # so filtering is not really done. if ("${LLVM_DYLIB_COMPONENTS}" STREQUAL "all") set(TARGETS "") else() list(REMOVE_ITEM TARGETS ${LLVM_DYLIB_COMPONENTS}) endif() endif() # Expand rest of components names to target names. llvm_map_components_to_libnames(TARGETS ${TARGETS}) if (LLVM_LINK_LLVM_DYLIB) set(TARGETS ${TARGETS} LLVM) endif() set(${RET} ${TARGETS} PARENT_SCOPE) endfunction() vc-intrinsics-0.22.1/GenXIntrinsics/docs/000077500000000000000000000000001475147027500202425ustar00rootroot00000000000000vc-intrinsics-0.22.1/GenXIntrinsics/docs/GenXLangRef.rst000066400000000000000000000444151475147027500231040ustar00rootroot00000000000000.. ========================= begin_copyright_notice ============================ Copyright (C) 2015-2021 Intel Corporation SPDX-License-Identifier: MIT =========================== end_copyright_notice ============================= ============================ LLVM IR for the GenX backend ============================ .. contents:: :local: Introduction ============ The GenX backend accepts `LLVM intermediate representation `_ with certain restrictions, and with additional GenX-specific intrinsics. LLVM IR representation of EU code ================================= Whole thread representation --------------------------- When using LLVM with the GenX backend, the LLVM IR represents execution on a whole EU thread. This is distinct from IGC and Beignet (the OpenCL compiler for the Intel open source driver), in which the LLVM IR represents just a single work item, and a later stage of the compiler after LLVM IR parallelizes that into simd4,8,16 or 32. The GenX backend thus gives more flexibility for a client that needs full control over what is executed in the EU thread for one of these reasons: #. the compiler needs to expose that control in the language (like CM); #. the compiler wants to do some parallelization, but in a more flexible way (e.g. different SIMD width for different parts of the code). This could be done as an LLVM pass before reaching the GenX backend, or it could be done even before reaching LLVM; #. the compiler wants to expose "cross lane" functionality, where an algorithm can be executed in parallel within a single EU thread, but the separate lanes need to access each other's data at some points. Linkage ------- A kernel is represented by a function with ``dllexport`` linkage. A non-kernel function is represented by a function with ``public`` linkage. A subroutine is represented by a function with ``internal`` linkage. A subroutine is allowed to be accessed from multiple kernels and non-kernel functions; the GenX backend clones such a subroutine so it appears with each kernel and function that uses it in the vISA. No other linkage is supported. Global variables are not supported. (The CM compiler has its own CMABI pass that works around this by passing such variable into and out of any subroutine that uses it.) Kernel information ------------------ The genx.kernels named metadata node contains a metadata node for each kernel, containing: * 0: reference to Function * 1: kernel name * 2: asm name * 3: kernel argument kinds (i32 for each kernel argument) * 4: slm size in bytes * 5: kernel argument offsets (i32 for each kernel argument) Types ----- Only fundamental types that correspond to Gen types are allowed: * i1 for predicate * i8 for b/ub * i16 for w/uw * half for hf * i32 for d/ud * float for f * i64 for q/uq * double for df Arbitrary size vectors of these types are allowed. Arithmetic/logic operator ------------------------- There is no vector width restriction on operands and result of an arithmetic/logic operator. Where the operands and result have the same type, and no saturation is required, the corresponding LLVM IR instruction can be used, for example ``add``. A floating point operation where saturation is required is represented by the LLVM IR instruction followed by the ``llvm.genx.sat`` intrinsic. For an integer operation, vISA allows the operands to have one type and the result to have a different type. This is represented by an intrinsic, typically with signed/unsigned variants and variants with saturation. Saturation cannot be represented by a separate intrinsic as for floating point, because the intermediate result in the EU's ALU has one more bit than the execution size. An intrinsic is also required where the operator does not have an LLVM IR instruction equivalent, such as ``min``. Load and store -------------- Load and store instructions are allowed only to load/store from/to a static alloca, i.e. ones that are removed by a mem2reg pass. LLVM IR mangling rules ---------------------- Overloaded intrinsics will have the names of its overloaded argument types encoded into its function name, each preceded by a period. Only those types which are overloaded result in a name suffix. Arguments whose type is matched against another type do not. @llvm.genx.name.OverloadedType1.OverloadedType2(...) Overloading attributes: "p" - pointer, after that numbet of addrspace "v" - vector, after that amount of elements "a" - array, after that amounts of elements "s" - struct, after what's structure consists of "f" - floating type, after that size of type in bits "i" - integer type, after that size of type in bits Example: call void @llvm.genx.vstore.v16i8.p0v16i8(<16 x i8> %conv2, <16 x i8>* %ir) v16i8 - vector of 16 8-bit ints, first overloaded argument p0v16i8 - pointer to vector of 16 8-bit ints, second overloaded argument More info about names of intrinsics https://llvm.org/docs/LangRef.html#intrinsic-functions Vector regions ============== Introduction to region-based addressing --------------------------------------- The Gen hardware, and thus vISA, provide the ability for a vector operand of an instruction to be a region within a register. 1D region ^^^^^^^^^ A 1D region has the following parameters: * The execution size is the number of elements in the region. This is determined by the instruction in which the operand appears. * The horizontal stride (sometimes called just the stride) is the number of elements to step between each element of the region. This is 1 for a contiguous region, but can take other values, including 0 (in a source operand only) to splat the same scalar value across the whole operand. * The start index indicates which element within the register is the start of the region. The stride must be a constant. The start index can be a variable (giving an indirect operand). Here is a simple contiguous 1D region (yellow), with execution size 4, stride 1 and start index 3, in a register with 8 elements: .. image:: GenXLangRef_region_example1.png Here is a non-contiguous 1D region, with execution size 4, stride 2 and start index 3, in a register with 16 elements: .. image:: GenXLangRef_region_example2.png 2D region ^^^^^^^^^ A 2D region has multiple rows where each row is a 1D region. It has the following parameters: * The execution size is the number of elements in the region. This is determined by the instruction in which the operand appears. * The vertical stride (or vstride) is the number of elements to step between the start of one row and the start of the next row. It can be 0 (in a source operand only) to repeat the same row multiple times. * The width is the number of elements per row. * The horizontal stride (or stride) is the number of elements to step between each element of the region within a row. This is 1 for a contiguous row, but can take other values, including 0 (in a source operand only) to splat the same scalar value across the whole row. * The start index indicates which element within the register is the start of the region. The vstride, width and stride must be a constant. The start index can be a scalar variable (giving an indirect operand) or a vector variable with an element per row of the region (giving a multi-indirect operand). Here is a 2D region with contiguous rows, with: * execution size 8 (the number of elements in the region) * vstride 8 (the step between the start of one row (3) and the start of the next (11) * width 4 (the number of elements in a row) * stride 1 (the step between each element in a row) * start index 3 .. image:: GenXLangRef_region_example3.png Here is a 2D region with: * execution size 9 (the number of elements in the region) * vstride 7 (the step between the start of one row (8) and the start of the next (15) * width 3 (the number of elements in a row) * stride 3 (the step between each element in a row) * start index 8 .. image:: GenXLangRef_region_example4.png Notes ^^^^^ Some points that arise from these examples: * The execution size must be a multiple of the width. Execution size divided by width is the number of rows in a 2D region. If the number of rows is 1, then it is a 1D region. * Gen and vISA only support powers of two within certain limits for the region parameters other than start index. Also 2D regions are allowed only in a source operand. But source languages like CM using regions do not have these restrictions, and the compiler needs to allow for the more general case. * The matrix representation shown in the last two examples is not a property of the register from/into which the region is read/written. Rather, it is a property of the region parameters. We show a matrix whose width is the vstride of the region. In the last example, the register is not even a multiple of vstride number of elements, so we have some left-over elements at the bottom. Region access in LLVM IR ------------------------ Region access is represented in LLVM IR by intrinsics with the same region parameters as above. The representation is close to the hardware capabilities, but: * The vISA/hardware restrictions on the region parameters being powers of 2 within certain ranges are not initially imposed. The GenX backend includes a legalization pass that imposes these restrictions, and other gen-specific ones such as not being allowed to cross 2 GRF boundaries and not being allowed a 2D region as a destination, by splitting up region accesses. * There is an extra *parent width* region parameter used for optimizations when the GenX backend collapses and legalizes region accesses. * To make the parent width parameter effective when a variable start index is involved, a compiler frontend should compile a 2D region access as two separate accesses, one for the rows and one for the columns within the rows. The restriction still needs to be imposed that the region is entirely contained within the vector it is being read from or written to, otherwise undefined behavior ensues at runtime. Reading a region ^^^^^^^^^^^^^^^^ Reading a region, that is extracting certain elements from a vector to make a new smaller vector, is represented by the ``llvm.genx.rdregioni`` or ``llvm.genx.rdregionf`` intrinsic. (There are integer and fp variants simply because the tablegen language for declaring an overloaded intrinsic does not allow an "any scalar or vector type". The operands to this intrinsic are: * the vector being read from; * vstride (ignored for a 1D region, that is width == execution size); * width; * stride; * start index; * parent width (see below). The execution size is implied by the vector width of the return value of the intrinsic call. The vstride, width and stride are expressed in elements. But the start index is expressed in bytes, as this is what the hardware does in the variable index case. A read from the first example region from above: .. image:: GenXLangRef_region_example1.png is represented by the following LLVM IR (assuming the start index is constant, and the element type is i32): .. code-block:: text %v1 = <8 x i32> something %region1 = call <4 x i32> @llvm.genx.rdregioni.v4i32.v8i32(<8 x i32> %v1, i32 0, i32 4, i32 1, i16 12, i32 undef) The vstride is set to 0, but is ignored because it is a 1D region. The width is 4 (elements) and the stride is 1. The start index is 12, but remember this is in bytes, so it means 3 elements. (The elements have type i32.) A read from the fourth example region from above: .. image:: GenXLangRef_region_example2.png is represented by this LLVM IR (assuming constant start index and i32 element type): .. code-block:: text %v2 = <30 x i32> something %region2 = call <9 x i32> @llvm.genx.rdregioni.v9i32.v30i32(<30 x i32> %v2, i32 7, i32 3, i32 2, i16 32) With: * execution size 9 (the number of elements in the region) * vstride 7 (the step between the start of one row (8) and the start of the next (15) * width 3 (the number of elements in a row) * stride 3 (the step between each element in a row) * start index 32 bytes, which is 8 elements. The diagram above shows the input vector %v2 as a matrix of width 7 with two elements left over in a partial row. This 7 is not a property of the input vector value, which is just a vector (LLVM IR does not represent matrices). Instead it is the vstride of the region we are reading. Writing a region ^^^^^^^^^^^^^^^^ Writing a region, that is inserting the elements of a vector into certain positions of another vector, yielding a new value for the latter vector, is represented by the ``llvm.genx.wrregioni`` or ``llvm.genx.wrregionf`` intrinsic. (There are integer and fp variants simply because the tablegen language for declaring an overloaded intrinsic does not allow an "any scalar or vector type". In SSA, each value is defined exactly once. Since we are representing a vector value as an LLVM IR value, the only way of representing a write to a region, which is a partial write, is for the operation to take the old value of the vector as an input, and to return the updated value of the vector. It is then up to the GenX backend to ensure that the two values are allocated to the same register. The operands to this intrinsic are: * the "old value" of the vector being written into; * the "new value", that is, the vector or scalar value to write into the region; * vstride; * width; * stride; * start index; * parent width (see below); * mask. The execution size is the vector width of the "new value" input. For a 1D region (width == execution size), vstride is ignored. As above in llvm.genx.rdregion, the vstride, width and stride are expressed in elements, but the start index is expressed in bytes. Using the same two example regions as above in llvm.genx.rdregion: .. image:: GenXLangRef_region_example1.png Writing the elements of %region3 into the region in %v3, generating a new value %v3.new is represented by: .. code-block:: text %v3 = <8 x i32> something %region3 = <4 x i32> something %v3.new = call <8 x i32> @llvm.genx.wrregion.v8i32.v4i32.i1(<8 x i32> %v3, <4 x i32> %region3, i32 0, i32 4, i32 1, i16 12, i32 undef, i1 1) The .v8i32.v4i32.i1 decoration on the intrinsic name arises from LLVM’s intrinsic overloading mechanism. The v8i32 is the type of the return value, and the v4i32 is the type of the value being written in to the region. The i1 is the type of the mask operand; see below. The vstride is set to 0, but is ignored because it is a 1D region. The width is 4 (elements) and the stride is 1. The start index is 12, but remember this is in bytes, so it means 3 elements. (The elements have type i32.) .. image:: GenXLangRef_region_example4.png Writing the elements of %region4 into the region in %v4, generating a new value %v4.new is represented by: .. code-block:: text %v4 = <30 x i32> something %region4 = <9 x i32> something %v4.new = call <30 x i32> @llvm.genx.wrregion.v30i32.v9i32.i1(<30 x i32> %v4, <9 x i32> %region4, i32 7, i32 3, i32 2, i16 32, i32 undef, i1 1) With: * execution size 9 (the number of elements in the region) * vstride 7 (the step between the start of one row (8) and the start of the next (15) * width 3 (the number of elements in a row) * stride 3 (the step between each element in a row) * start index 32 bytes, which is 8 elements. The mask operand ^^^^^^^^^^^^^^^^ The wrregion* intrinsics have an extra mask operand. This is used to control which elements in the region are actually written, for use in predication and SIMD control flow. Most generally, the mask operand is a vector of i1 with the same vector width as the value being written in to the region, and it is variable. If any element of the mask is 0, the corresponding element of the value is not written in to the region, leaving that element unchanged. The most common case, used when there is no predication, is that the mask is all ones. As a shorthand, this is represented by a single constant i1 value of 1, rather than the whole vector. Single element region ^^^^^^^^^^^^^^^^^^^^^ A single element could be a scalar value or a 1-vector. It is convenient to allow both in LLVM IR, because CM allows both as distinct types. The rdregion and wrregion intrinsics are defined such that a single element region can be represented as either a scalar or a 1-vector. However, for the scalar case, it is recommended to use the LLVM IR instructions extractelement and insertelement instead, as core LLVM optimizations understand them. The parent width operand ^^^^^^^^^^^^^^^^^^^^^^^^ For a 2D region, certain parts of the GenX backend can optimize better if it is known that a row of the region cannot cross certain boundaries: * Collapsing two 2D regions is possible only if it is known that a row of the inner 2D region cannot cross a row boundary of the outer 2D region. * Knowing that a row of a 2D region cannot cross a GRF boundary can help to avoid splitting it up so much in legalization. For a region with a constant start index, this can all be calculated from the start index and region parameters. For a region with a variable start index, the *parent width* operand is set to value N to make a statement that the semantics of the language being compiled say that a row of the region cannot cross a multiple of N boundary. Predicates and predication ========================== Certain vector operations can be *predicated*, that is, a vector of i1 predicate controls whether the corresponding element of the operation is written into its result element. LLVM IR already has ``select`` with a vector condition. As outlined above, the wrregion intrinsics have a mask operand. If a bit is not set in the mask, then the corresponding element of the "new value" input is not written into the result. The intrinsics corresponding to predicated send messages (e.g. gather), and the raw send intrinsics, have a predicate operand. Non-GenX intrinsics =================== In general the GenX backend does not implement non-GenX intrinsics, with the following exceptions: * ``llvm.uadd.with.overflow`` -- the GenX backend does not implement the other arithmetic-with-overflow intrinsics, but there is a case where LLVM was transforming a CM program to use this one, so it is implemented. * ``llvm.fma`` -- this intrinsic is generated in the GenXPatternMatch pass, but the GenX backend would be able to cope with it being in the IR already. GenX intrinsics =============== .. include:: _build/autogenerated/GenXLangRef.rstinc vc-intrinsics-0.22.1/GenXIntrinsics/docs/GenXLangRef_region_example1.png000066400000000000000000000121211475147027500262040ustar00rootroot00000000000000PNG  IHDR(c& iCCPICC ProfileHT̤ZBޤkFH(T T](@ւX X (bo;97{}P+)y2cx)@XpbGXX@5=]sQ1yIb.Pʉ<17hrl4lL |dSup/ P8Q hMAP([ x|(rS97p,_£@F⅄"H2"BV#H R#-H;r"ab0LYYل)`10w0w,5:aXl v)[ž^`p8gspiM\+ 7xS >g GA'E& BppMxN#*NP"xBE$I$R$)TJ']&ޑd#yO^K.%#_%?S(&/RMiVqWIR)TiPQTQMWݦڤX f6Om>jfg;.}|CuXD=\}AQ M ? n4i;4ikѴ\Z;k`*3=R%戶DvNNNc].K7Ywn^J:D}~.v1 U نu}FT#7%FFwq,tƷM`;Tr[)ti99sz(ff9fuf <&z,-[YfX|dd`gbĚk]n}׆jkƦ捭m>v4` vmvEz {zYtVk#qNNNǝt6sNw><4pnCs\t\8.\LR7m7[Sw]w{sc4#=-=E3 [<ܼy[ oE,8!3rK((IT[|111XU7q>w\`` -HmQƢsH&$N TrFى{G^]ܗ3*oՂjiMxͥZꇷu# >}ެ@8&9◄_~RTK̗cK⿖~3={x#LZ 89P$7'MI|Pr(wBq?: CHw'XO"7֤d| |kV61Po. ٳ6iTXtXML:com.adobe.xmp 226 40 {תKIDATxmLGwx{A ƀDFZ#/VMJIh5Z&ibM Q |ZB|LA/p\;ÁtPfV&3<3dh#D\I'rDD @\j G>@^mPTcܭ>㈲1R/;ETC 2< tr,DHB1zTp"@ $UC!@=*K8 G! G %}H38UӌףuT5nY b#";dA?x>+ .'I?k, BeNUW ⽌P6L[!f S}NaAHݩ9Zad;SxY |4h?> nz%Q!|OKYSǺT%"Oc@bʷhh4fW1x>Bn0m6'_xU لV T5$&6bB#v*⍨l@=Ax?>/70#@n%:Z%P^V!H]uP 'A*d*n(¸{=$}V\ɴ0-+rdL:K_&`^Ho~]8 `4dC[IZ>KLCq爎fTm^lBLgA!nw DhӅsE#j#nvM1KbgM󺦇TjD^sqVH˘ $='[ -|jD2b<5̌p#%W}UBg`ӻ*b̈́,?g&"mӺ˷3|َBR~]@ê ݇[ԓfsX-X!T{aZc{x,P^1W!gk:PF&㝌a"gNlTlo%ZjsSPv-Bn`>rUڎ fךh}?Lvu" _֒5`9R$|Yg2:nK_Ẃ>1 P(f٭>l<\.i OH*1&n pMA7G*C'F:G"@h!#zC爀BM2DrDotP i x% ytdsDw^IZd4P &)"9'2N$@ l"#z"CD@A &)"9'2N$@ l"nI@IENDB`vc-intrinsics-0.22.1/GenXIntrinsics/docs/GenXLangRef_region_example2.png000066400000000000000000000144621475147027500262170ustar00rootroot00000000000000PNG  IHDR(D iCCPICC ProfileHT̤ZBޤkFH(T T](@ւX X (bo;97{}P+)y2cx)@XpbGXX@5=]sQ1yIb.Pʉ<17hrl4lL |dSup/ P8Q hMAP([ x|(rS97p,_£@F⅄"H2"BV#H R#-H;r"ab0LYYل)`10w0w,5:aXl v)[ž^`p8gspiM\+ 7xS >g GA'E& BppMxN#*NP"xBE$I$R$)TJ']&ޑd#yO^K.%#_%?S(&/RMiVqWIR)TiPQTQMWݦڤX f6Om>jfg;.}|CuXD=\}AQ M ? n4i;4ikѴ\Z;k`*3=R%戶DvNNNc].K7Ywn^J:D}~.v1 U نu}FT#7%FFwq,tƷM`;Tr[)ti99sz(ff9fuf <&z,-[YfX|dd`gbĚk]n}׆jkƦ捭m>v4` vmvEz {zYtVk#qNNNǝt6sNw><4pnCs\t\8.\LR7m7[Sw]w{sc4#=-=E3 [<ܼy[ oE,8!3rK((IT[|111XU7q>w\`` -HmQƢsH&$N TrFى{G^]ܗ3*oՂjiMxͥZꇷu# >}ެ@8&9◄_~RTK̗cK⿖~3={x#LZ 89P$7'MI|Pr(wBq?: CHw'XO"7֤d| |kV61Po. ٳ6iTXtXML:com.adobe.xmp 440 40 ظ IDATxLǿg(XV;$8uUE\#YR1٢&SMSi*Y6, nͮ6 d)&UŠhǹhLap2.ޡw~㮗K^|y>\4'&` F`\0&p+`L $$nX9(&`L $$Qtѫ`LkC 8%dE4\>} 9 .(9<}ɼО  Qu&H%D1rL 0&H@"` A(F 0& pHי`L !pA0&@ n:`L $1߃I8{,^Y1_KRaϵp˘Kև=>f|~eK(s:q mj"$FVvx0~TRsdN̞)b~S]|lٲ~IZx OD06g6k JK >ZZC\jd*J ֔nmH%7T|4Ս/oj U-oZNN lbe1_˩ ~-8 u-%7TJZVBmb u-OpZ߰ZW\k 1 :k&[KA4$u,X)ؔZY[95ȤT)\#߬܃($szL摡JNa ful)̌.6'`,0 M3V 0z`XEJt4(zӞ03MJGfz:觺{hP"a%7Դ.!zEURH7z7bb8ʈbsyL>o&٦L2fBܽ/.݋cO1c-8 əax(9kl2Ԕr:fnjV{0ꡆeQyG1o&ɮNt݆?)'U/c[PQ yRi>O]Ү4PZ,Zr|#'*gI iPZ2|#7o$H!h75p>d.髝O3lx_T~`!<4Iy܀_S,fO-SԕGUÿs)@=<2od<@UC[WbGndF3faya@FM4e=9=TD~6=bEou G# v|RVF7' vQvTol75pt~CܤRț6)XYyJh`BV#vea?+i|ٯ5ǣpT;tЃsy~J ذ`tY+iوpA3C]T,.x@K GP]Ql}* _ f!%0o9ב^4P\ %ԍԹQ3}墫Ϥe$Cn'|>._uLS ̚ա4BNqmCF/<͉U ^{jw 5- Toi -|m qX77 1y-ᾡ47Z7잸ԸQ>Sr 6Fe=\7lIĮ5:V}̲Ԣ1YpoPk17r7',.v'"!iR/|>y*!v0~JV2½H.XNP )O".=ZZ2@ԍ Gp8u?\^ Y!J v=ԛAe9. zީ@۩|CVp U[\N 7bKl<Ơ[4ɐ el!xr\!E+Hayײ+L渴s P38y`L % P`L n$8L 0& 7pBqrbL 0&/|0&P ɉ1&@5x 0&@4F}}c_ M|,`L 0x"CT&F8a(9!&'Sip^`L@n䄘`L pOyaL 0abL 0x" \< 0&g/IENDB`vc-intrinsics-0.22.1/GenXIntrinsics/docs/GenXLangRef_region_example3.png000066400000000000000000000151561475147027500262210ustar00rootroot00000000000000PNG  IHDR<Ѷ iCCPICC ProfileHT̤ZBޤkFH(T T](@ւX X (bo;97{}P+)y2cx)@XpbGXX@5=]sQ1yIb.Pʉ<17hrl4lL |dSup/ P8Q hMAP([ x|(rS97p,_£@F⅄"H2"BV#H R#-H;r"ab0LYYل)`10w0w,5:aXl v)[ž^`p8gspiM\+ 7xS >g GA'E& BppMxN#*NP"xBE$I$R$)TJ']&ޑd#yO^K.%#_%?S(&/RMiVqWIR)TiPQTQMWݦڤX f6Om>jfg;.}|CuXD=\}AQ M ? n4i;4ikѴ\Z;k`*3=R%戶DvNNNc].K7Ywn^J:D}~.v1 U نu}FT#7%FFwq,tƷM`;Tr[)ti99sz(ff9fuf <&z,-[YfX|dd`gbĚk]n}׆jkƦ捭m>v4` vmvEz {zYtVk#qNNNǝt6sNw><4pnCs\t\8.\LR7m7[Sw]w{sc4#=-=E3 [<ܼy[ oE,8!3rK((IT[|111XU7q>w\`` -HmQƢsH&$N TrFى{G^]ܗ3*oՂjiMxͥZꇷu# >}ެ@8&9◄_~RTK̗cK⿖~3={x#LZ 89P$7'MI|Pr(wBq?: CHw'XO"7֤d| |kV61Po. ٳ6iTXtXML:com.adobe.xmp 224 60 th- IDATx]}LTWv#\UHZ0jٚnnSIl?WRfW[MVSY͢fvݴ)MR V Z-EX 2cggϛ`f޻uK`ǽwyK xa[[ʠ3c;3p 06 3w63 @]߬qB ;˔/֖Ya0|F2nTSP-g2H Wg0= hdP#\.3@ruf@ Z̀F>bsC:}[C8= ,S?]maX>I*]u79shkЖ.R?wLG3LI]gA^ops_هfk&Nsl KEcáHzkGBՓseta8UԊ Y9 ;Uz+rluA]<)pLt{uGUQ%#0Lt#L(cuCx=5{a;["Uв6C3 ^r`H9cM.dJ|ʛ7#s["ldAqm~5+qxEGQB.gk9,D>T. O?,2q_ @RdNi|a7ʨCʽ ih4cbm4_oJ._L;T*?ok:'6{0cgsBB}XoGrM$8[^c_>TՐX 4tJbP:zxd܏(r&cGgXNQ2Fk@ey۟PU.BY$0td9XVrKv9 {."'g8c E r:*'2{T}1oQ%Q8K/kґ8U8(&wk8Dȳ0b d})hOxke 1Dأh>].-XZ5Pp[1. n t'Y˖:ۿ8c*a_;؋vg%EOK!tIES͘TE<Ό0Xqdᜐ{[(eH5_uK۴w^șXyH5-D(Zҟ)~]8u)/MbG1+JDNj4\=;>ё>8f\mx.9 e6+j]}(ݱ6WTi`2Dn Mo H{MaN(mDg:v[' 7P[{~NڼV@ߠ{Z=`ƇnN:p/4D~ud5$9ɔ a;K!k[(-7SY1gt\-C FA֊` ~Gu-1!c͝\9Ynr;uY1l^bQ;GE .wxB0-`r"`ʳѧrmMN{H%Yc %4K9G↑EUr9<yxK!g[.j)7>/Jn%#LdzpZiьH~Az$ȯ[@85;f@@%v3 @yf@@%v3 @yf@@%v3 cyfGȧV >iGQ|)h( 3L81 p<38τ33`( 3L81 p<39`mpV|zi m@A~n@-vkV[WGq߆'#ˎ [ORgS[rykwc"磻%yHA)~'$UV =# I{b,N]C_vv'aY:Y[ 1辎?4iL+ˆKSPsCA }M9Ӕ^$*e9݁#8ûgDlPެA{%*YϜ \ٞmΰFºc&mHRE[ (~UH?1+~+bX~ZRV~Qa2 2oQ)~0=QyW qޘ UT *Dٸz+~e$pH2$ېDbIMsLCdJXtaU[ZR7Kw@МVm~ ^?@ϱF/Mx8>.PRܷTcT)} ǐ ԔbU X>>2Dv8\))/z+~K7ɾ ~-WaܴmI[*7:_ "@ߩ=Bݱ+~+`Bߐ6Rn"o)~;tTE&F37:uӶ){_fG@Xc!%mzup@9u,̚k _+'Pv83Yݱ)-~Xg>6A?b:{ZwDlgmKιYk=aLs8x:Lb\ܤ=< cf󔇤_ȕ߂'GwՈƎw%F{KFWBw}^"oG@;Ew<,ͧSL}s 9G*+~kT6w4?9F-fP 3 @Q̲]f@*H"̀(8E1v p 0Pezr}sЃL3 eSPuf@@Ez 3 @uf@hVIENDB`vc-intrinsics-0.22.1/GenXIntrinsics/docs/GenXLangRef_region_example4.png000066400000000000000000000252041475147027500262150ustar00rootroot00000000000000PNG  IHDR#j iCCPICC ProfileHT̤ZBޤkFH(T T](@ւX X (bo;97{}P+)y2cx)@XpbGXX@5=]sQ1yIb.Pʉ<17hrl4lL |dSup/ P8Q hMAP([ x|(rS97p,_£@F⅄"H2"BV#H R#-H;r"ab0LYYل)`10w0w,5:aXl v)[ž^`p8gspiM\+ 7xS >g GA'E& BppMxN#*NP"xBE$I$R$)TJ']&ޑd#yO^K.%#_%?S(&/RMiVqWIR)TiPQTQMWݦڤX f6Om>jfg;.}|CuXD=\}AQ M ? n4i;4ikѴ\Z;k`*3=R%戶DvNNNc].K7Ywn^J:D}~.v1 U نu}FT#7%FFwq,tƷM`;Tr[)ti99sz(ff9fuf <&z,-[YfX|dd`gbĚk]n}׆jkƦ捭m>v4` vmvEz {zYtVk#qNNNǝt6sNw><4pnCs\t\8.\LR7m7[Sw]w{sc4#=-=E3 [<ܼy[ oE,8!3rK((IT[|111XU7q>w\`` -HmQƢsH&$N TrFى{G^]ܗ3*oՂjiMxͥZꇷu# >}ެ@8&9◄_~RTK̗cK⿖~3={x#LZ 89P$7'MI|Pr(wBq?: CHw'XO"7֤d| |kV61Po. ٳ6iTXtXML:com.adobe.xmp 196 140 F!tIDATxl'vvI &"ހiTtR0V%D RQ02wIIuZ;PA M;^].م]x;3˳3;3fA}lB| pD:p1x@Ĵ[\W N|y;//Ok@(<@l"eN]9xɼ<{HPerW+¼<{HPerW+¼<{HPerW+¼<{Hf;{>"xfcS+sٮDBLA6C+Fk #Gike-#?D8;KoޠCuv~Uz:$˨tBZPZD[tSV[W YZYTU[M=zg2 yVҏ۩Ϳ! 9-uP{4RT6%/$gfzo{tǕ'Srk%gGմ}蓻W(oc?'ubәzaVdRTC/5eEaV|~m,Fmz[C@zR21wJۊژ¨_lhw6ժRqJQt2;Rq1&aԆ5m1& ]ք9<.jŰ-\l7̆XCCŘQ[!gkjla=NxXO[hW!Q[ Ϯ|>> `{Yyr1 p͢ښbOg;:}VD5UHst`;0JԑĤa lMwd~W80 ღ自~e~hr5^B&֩FGw ѕ`~W{>?Fـ88;f-toTz! eJ fu힄`e?/mp^X!]*Zغ" hiXT hgIZ]:wi6ïk.[ghJZ[7|@ϦvCO|#mÛ"WԾ5ͅs9@<#47q >і~NG}0e򬡞 6ow>+}6m҄-l2a'.~H@Y4!~Cu S/,S󫐞Cþ>=l5M&LWnKEM$tG#y`=(+}H!TDB$CmkA?P!95)-C5,oHLiT+3C?}~Ҳۆ{[i~a{hEd6Q x/(p@'Us(_Ί-9Ӊ7_~(Y}ڳ2臏*ۊ,01LU648o'eJ7RI5D:hl g~xVxӠpvZMd~S:Y. o{oRЛ^Ic?_R&F>az)*@K>^ o$mt.\ߡK8_ (ۊ>hWJ%kOF;^~O U ut6hhu1B\MCVjWa)gU]bE?OΖ:~Y6 l֢旳<\~[Jd)tij~9;߲]ضJ7J?Wj~P*%gد fA1R0OڷPiznS~q~+B.j|Is ݹx*[ЫڠL~it-xKI~jt/LѤj\Z5d>YJ^N$~eS1'4#(/W@]9p< gsL9V]ux@ !rw.W`.=s:ĝN/zteŜcf_`0uO2%+sZ9]dx@$+sZ9]dx@$+sZ9]dx@$+sZjJӤ #}n=DIpoȍ;ыf=HVPt9{>:qnQmz{d\j~OLCu 1W~Xe~/n~x0@TDkSk#L#g{ /^~ʓ&R>D]}m Vhe8|Z49O/:ڶΙEV$87pK/(碟0>X//}HЕ-~awW|o~@+p3A?\ Ệ釪~:vpC臧@?4J$kPwF[6¢y\[s%A?pIi:1-cݿD!r\_U2nv+ w5"k6Оw^1`_u@?]7L?T+8 =4L?TD?|p 0HvAdئ3-;A?~XAݶ R%C Eul>HpRCRVܤ1w$ҷRM Ns^UbWრJ?q_B|KAm5n8EA~rR+R7(pM$DopP*h# ?I*bZoޠCuv>(ӂ?MeKPS8m=p8BE/W|{kSI >sK*}[Oіe6_p7@?|4P )~xB~"qEw&5 ))}tbSjz"]ſ`(&6{>tx%9}pbzot2o9Zm5m#:ǢŒuq[:M$z ?5.~HT*GL)`_vW-UHPNڰK&,)P"/h۲YWTE:TTt<*_a2 =SZx鐱;lT/wuTVh-]n1E[rADcߨR4A? 臽n0;~-:k@?|(] k{Tžo.\ߒ至L/ TiC6XR{QZmߢB oPGu-LRdX]A/,u0 ul :WƮKC%5 gK[ai/h^7-NZUxNQC^gijhw Ա{ ~azG:@ 7'IpaH]A&d=U5\ie61J7T)#mƯG# J`Ũ_rYZ"REN#5`o+bx&~Mպ|KW7!/!ioh~Q%[ 3"i,ˋ B0MEs Ȣq0?&Ta̜_`8CixO0, ٙKv ~X0OHh)!ɣ9[ A ,sh2RJ v.@臸i> Wq!2PNW`:(b:/)bҤ憦< C-2N< &Mjnh:(b:/)bҬrC\){*X: fE.[‹jWO2|eJV< r HV< r HV< r HVi·&ydžs4Њ2Ū`]lEMr#/m6mZ BEkُ@nV (a_R#A?d6A?T+ a1X̲'A? UC5ra@?,1ȷR+$Lx'.WLZXWMngg5BRB!2OȣO\GlU|gl (K-)]hk` MO)׸_>fVdTK :a-V |TKP|XQN/ѯ5H Qrش aBc|fF_YJQt2" !Ӥ]87#LZ൒-iidRk;]0;$qBlÆ%A Ȝj18]D|4"ҹq~kXdzȮZcدknlgr ƛ0?=d+F@E"}7'-#$"f~VJlE"DԖB.L.C eDV$_-iIs -sEᗐI8eUA]/j) BETVbWC`u@|RHWaҵ| 8>ôZ4كXT>mA03b߽ M>рY# -9J[+hPuG~E/ ~Lie.n`9U7%y)T}њ*EV@ș&)2:M&!:4ȝ?mڨ_vjͿ M._q~_ܷ> -ÍTW!臽DpO3 释~'@?\ \?7M?TKh,@? ;%PTXXgS1MpHsrRتYZؚ'RwzaZTrDmmGiF>.+\PIȱćjc ~1Ji " hOޏ"ו6ڡ eZ`O}Q71 nrG%o6v*lOmt۰Oio1kc%74pQ+ Gg~ P/w?A?G:O$[n䱫_d)WA?dS$->buW4PwIC!țkMbZ"粉a/b[ItDT9ɏeShce?͝(sŨ_Q[ZO"ʢW)޵eʭ= +z:~Ǩ_riZmP钜VVŒ9$D/i~JKҠN Bap2!PNLO| A?P{im^-FzSZ|gדRN(= MCd?W`(b/P^S$S@Sx< N]LCL"p&Uwo !OfdJ/D[;y .i-Mx@LJE<x@L4P4$^Si-Mx@LJE<x@L4P =Dӄ'wVQmMBeOg;:qZ%*O]l~+WtsZ$)2=֪KPIZC#|+ZhkIp4}>qtSLs1' єTÖW cH=lճ[grW4bZ+M"EYo""%@sRJ[RoMEdËErnRpH# 4iF{׽L;j#y4BkAlT4t@byTG ňiu*|ro1nS>V!Ia#. ٶD%(a@ c>uk xh5Afoj`mr #}o'8bZ3H7u@h؊`eي^7 h N}iB>4~XD\Z'ZNG!-_*0c\~r{)Xԇ&'n88WM u3#mԁ_6\@a2d!O:EH-;/W ȸ<\{WD%Ngx@Leϸ< 2.)p:+$_xD9Ydښ,@4 L:pW NqbMq or\n ĔIENDB`vc-intrinsics-0.22.1/GenXIntrinsics/docs/LangRefIndex.rst000066400000000000000000000010301475147027500232740ustar00rootroot00000000000000.. ========================= begin_copyright_notice ============================ Copyright (C) 2019-2021 Intel Corporation SPDX-License-Identifier: MIT =========================== end_copyright_notice ============================= VC Intrinsics documentation (language reference) ================================================== .. toctree:: :hidden: GenXLangRef :doc:`GenXLangRef` Defines the restrictions on and additions to LLVM intermediate representation for the GenX backend for Intel HD Graphics. vc-intrinsics-0.22.1/GenXIntrinsics/docs/Makefile.sphinx000066400000000000000000000026751475147027500232240ustar00rootroot00000000000000#=========================== begin_copyright_notice ============================ # # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # #============================ end_copyright_notice ============================= # Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SPHINXPROJ = VCIntrinsics SOURCEDIR = . BUILDDIR = _build AUTOGENDIR = _build/autogenerated # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile.sphinx # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile.sphinx autogen echo "$@" @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) # AUTOGENSED : auto-generation of docs from source files # 1. Remove initial "/// ", and blank any line that does not have it. # 2. Change ".. include ." into ".. include autogen/.rstinc" AUTOGENSEDCXX='/^ *\/\/\/ /!s/.*//; s/^ *\/\/\/ //; s/\(\.\. include:: \)\(.*\)\.[a-z]*/\1$(AUTOGENDIR)\/\2.rstinc/' AUTOGENSEDPY='/^ *\#\#\# /!s/.*//; s/^ *\#\#\# //' autogen: @mkdir -p $(AUTOGENDIR) @rm -f $(AUTOGENDIR)/* @sed $(AUTOGENSEDPY) ../include/llvm/GenXIntrinsics/Intrinsic_definitions.py > $(AUTOGENDIR)/GenXLangRef.rstinc vc-intrinsics-0.22.1/GenXIntrinsics/docs/ReadMe.txt000066400000000000000000000003501475147027500221360ustar00rootroot00000000000000 vc-intrinsics-0.22.1/GenXIntrinsics/docs/conf.py000066400000000000000000000115261475147027500215460ustar00rootroot00000000000000# ========================== begin_copyright_notice ============================ # # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # # =========================== end_copyright_notice ============================= # -*- coding: utf-8 -*- # # VC Intrinsics documentation build configuration file. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # # import os # import sys # sys.path.insert(0, os.path.abspath('.')) from datetime import date # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. # needs_sphinx = '1.5' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = '.rst' # The master toctree document. master_doc = 'LangRefIndex' # General information about the project. project = 'VC Intrinsics' copyright = '%d, Intel' % date.today().year author = 'Intel' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = '' # The full version, including alpha/beta/rc tags. release = '' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'haiku' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # # html_theme_options = {} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. htmlhelp_basename = 'VCIntrinsicsdoc' # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'VCIntrinsics.tex', 'VC Intrinsics Documentation', 'Intel', 'manual'), ] # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'vcintrinsics', 'VC Intrinsics Documentation', [author], 1) ] # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'VCIntrinsics', 'VC Intrinsics Documentation', author, 'VCIntrinsics', 'One line description of project.', 'Miscellaneous'), ] vc-intrinsics-0.22.1/GenXIntrinsics/include/000077500000000000000000000000001475147027500207355ustar00rootroot00000000000000vc-intrinsics-0.22.1/GenXIntrinsics/include/llvm/000077500000000000000000000000001475147027500217075ustar00rootroot00000000000000vc-intrinsics-0.22.1/GenXIntrinsics/include/llvm/CMakeLists.txt000066400000000000000000000004251475147027500244500ustar00rootroot00000000000000#=========================== begin_copyright_notice ============================ # # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # #============================ end_copyright_notice ============================= add_subdirectory(GenXIntrinsics) vc-intrinsics-0.22.1/GenXIntrinsics/include/llvm/GenXIntrinsics/000077500000000000000000000000001475147027500246165ustar00rootroot00000000000000vc-intrinsics-0.22.1/GenXIntrinsics/include/llvm/GenXIntrinsics/CMakeLists.txt000077500000000000000000000043261475147027500273660ustar00rootroot00000000000000#=========================== begin_copyright_notice ============================ # # Copyright (C) 2019-2024 Intel Corporation # # SPDX-License-Identifier: MIT # #============================ end_copyright_notice ============================= add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${GENX_INTRINSICS_DESCRIPTION} COMMAND ${PYTHON_EXECUTABLE} -B ${CMAKE_CURRENT_SOURCE_DIR}/Intrinsics.py ${CMAKE_CURRENT_SOURCE_DIR}/Intrinsic_definitions.py ${CMAKE_CURRENT_BINARY_DIR}/${GENX_INTRINSICS_DESCRIPTION} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/Intrinsics.py ${CMAKE_CURRENT_SOURCE_DIR}/Intrinsic_definitions.py COMMENT "Building ${GENX_INTRINSICS_DESCRIPTION}..." ) find_first_existing_vc_file(intrinsics_vc "${GENX_INTRINSICS_MAIN_DIR}") set(version_inc ${CMAKE_CURRENT_BINARY_DIR}/GenXVersion.inc) if(LLVM_GENX_INTRINSICS_ROOT_DIR) set(LLVM_GENX_INTRINSICS_REPO_DIR ${LLVM_GENX_INTRINSICS_ROOT_DIR}) else() set(LLVM_GENX_INTRINSICS_REPO_DIR ${GENX_INTRINSICS_MAIN_DIR}/..) endif() if(${LLVM_VERSION_MAJOR} LESS 9) find_file(FOUND_VCS GetSVN.cmake PATHS ${LLVM_CMAKE_DIR} REQUIRED NO_CMAKE_FIND_ROOT_PATH) add_custom_command( OUTPUT "${version_inc}" COMMAND ${CMAKE_COMMAND} "-DSOURCE_DIRS=${LLVM_GENX_INTRINSICS_REPO_DIR}" "-DNAMES=VCI" "-DHEADER_FILE=${version_inc}" -P "${FOUND_VCS}") else() find_file(FOUND_VCS VersionFromVCS.cmake PATHS ${LLVM_CMAKE_DIR} REQUIRED NO_CMAKE_FIND_ROOT_PATH) add_custom_command( OUTPUT "${version_inc}" COMMAND ${CMAKE_COMMAND} "-DSOURCE_DIR=${LLVM_GENX_INTRINSICS_REPO_DIR}" "-DNAME=VCI" "-DHEADER_FILE=${version_inc}" "-DVCS_SCRIPT=${FOUND_VCS}" -P "${CMAKE_CURRENT_SOURCE_DIR}/ConfigureVersionFile.cmake") endif() set_source_files_properties("${version_inc}" PROPERTIES GENERATED TRUE HEADER_FILE_ONLY TRUE) add_custom_target(GenXIntrinsicDescriptionGen DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${GENX_INTRINSICS_DESCRIPTION} ${version_inc} ) add_custom_target(GenXIntrinsicsGen) add_dependencies(GenXIntrinsicsGen GenXIntrinsicDescriptionGen) vc-intrinsics-0.22.1/GenXIntrinsics/include/llvm/GenXIntrinsics/ConfigureVersionFile.cmake000066400000000000000000000013031475147027500317040ustar00rootroot00000000000000#=========================== begin_copyright_notice ============================ # # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # #============================ end_copyright_notice ============================= include(${VCS_SCRIPT}) function(generate_version_file output_file) get_source_info(${SOURCE_DIR} rev repo) file(APPEND "${output_file}.txt" "#define ${NAME}_REVISION \"${rev}\"\n") file(APPEND "${output_file}.txt" "#define ${NAME}_REPOSITORY \"${repo}\"\n") execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different "${output_file}.txt" "${output_file}") file(REMOVE "${output_file}.txt") endfunction() generate_version_file(${HEADER_FILE}) vc-intrinsics-0.22.1/GenXIntrinsics/include/llvm/GenXIntrinsics/GenXIntrOpts.h000066400000000000000000000022601475147027500273330ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2020-2021 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ /*========================== begin_copyright_notice ============================ This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details. ============================= end_copyright_notice ===========================*/ // This header file defines prototypes for accessor functions that expose passes // in the GenX Intrinsics transformations library. #ifndef LLVM_GENX_INTR_OPTS_H #define LLVM_GENX_INTR_OPTS_H namespace llvm { class FunctionPass; class ModulePass; class Pass; //===----------------------------------------------------------------------===// // // CMSimdCFLowering - Lower CM SIMD control flow // Pass *createCMSimdCFLoweringPass(); Pass *createISPCSimdCFLoweringPass(); //===----------------------------------------------------------------------===// // // GenXRestoreIntrAttr - Restore Intrinsics' Attributes // Pass *createGenXRestoreIntrAttrPass(); } // End llvm namespace #endif vc-intrinsics-0.22.1/GenXIntrinsics/include/llvm/GenXIntrinsics/GenXIntrinsicInst.h000066400000000000000000000041331475147027500303520ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2019-2021 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ //===----------------------------------------------------------------------===// // // This file defines classes that make it really easy to deal with intrinsic // functions with the isa/dyncast family of functions. In particular, this // allows you to do things like: // // if (MemCpyInst *MCI = dyn_cast(Inst)) // ... MCI->getDest() ... MCI->getSource() ... // // All intrinsic function calls are instances of the call instruction, so these // are all subclasses of the CallInst class. Note that none of these classes // has state or virtual methods, which is an important part of this gross/neat // hack working. // //===----------------------------------------------------------------------===// #ifndef GENX_INTRINSIC_INST_H #define GENX_INTRINSIC_INST_H #include "llvm/GenXIntrinsics/GenXIntrinsics.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" namespace llvm { /// IntrinsicInst - A useful wrapper class for inspecting calls to intrinsic /// functions. This allows the standard isa/dyncast/cast functionality to /// work with calls to intrinsic functions. class GenXIntrinsicInst : public CallInst { public: GenXIntrinsicInst() = delete; GenXIntrinsicInst(const GenXIntrinsicInst &) = delete; void operator=(const GenXIntrinsicInst &) = delete; /// getIntrinsicID - Return the intrinsic ID of this intrinsic. /// GenXIntrinsic::ID getIntrinsicID() const { return GenXIntrinsic::getGenXIntrinsicID(getCalledFunction()); } // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const CallInst *I); static bool classof(const Value *V) { return isa(V) && classof(cast(V)); } }; // TODO: add more classes to make our intrinsics easier to use } // namespace llvm #endif vc-intrinsics-0.22.1/GenXIntrinsics/include/llvm/GenXIntrinsics/GenXIntrinsics.h000066400000000000000000000666161475147027500277150ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2019-2023 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ //===----------------------------------------------------------------------===// // // This file defines a set of enums which allow processing of intrinsic // functions. Values of these enum types are returned by // GenXIntrinsic::getGenXIntrinsicID. // //===----------------------------------------------------------------------===// #ifndef GENX_INTRINSIC_INTERFACE_H #define GENX_INTRINSIC_INTERFACE_H #include "llvm/IR/Module.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Instructions.h" #include "llvm/GenXIntrinsics/GenXVersion.h" #if LLVM_VERSION_MAJOR >= 16 #include "llvm/Support/ModRef.h" #endif namespace llvm { namespace GenXIntrinsic { enum ID : unsigned { not_genx_intrinsic = Intrinsic::num_intrinsics, #define GET_INTRINSIC_ENUM_VALUES #include "llvm/GenXIntrinsics/GenXIntrinsicDescription.gen" #undef GET_INTRINSIC_ENUM_VALUES num_genx_intrinsics, // note that Intrinsic::not_intrinsic means that it is not a LLVM intrinsic not_any_intrinsic }; namespace GenXResult { enum ResultIndexes { IdxAddc_Add = 1, IdxAddc_Carry = 0, IdxSubb_Sub = 1, IdxSubb_Borrow = 0, IdxAdd3c_Add = 1, IdxAdd3c_Carry = 0 }; } // The number of elements to load per address (vector size) // NOTE: taken from cmc/support enum class LSCVectorSize : uint8_t { N0 = 0, N1 = 1, // 1 element N2 = 2, // 2 element N3 = 3, // 3 element N4 = 4, // 4 element N8 = 5, // 8 element N16 = 6, // 16 element N32 = 7, // 32 element N64 = 8 // 64 element }; enum class LSCDataSize : uint8_t { Invalid, D8, D16, D32, D64, D8U32, D16U32, D16U32H, }; enum class LSCDataOrder : uint8_t { Invalid, NonTranspose, Transpose }; enum class LSCCategory : uint8_t { Load, Load2D, Prefetch, Prefetch2D, Store, Store2D, Load2DTyped, Store2DTyped, Fence, LegacyAtomic, Atomic, NotLSC }; namespace GenXRegion { enum { // Operands in both rdregion and wrregion: OldValueOperandNum = 0, // Operands in rdregion: RdVStrideOperandNum = 1, RdWidthOperandNum = 2, RdStrideOperandNum = 3, RdIndexOperandNum = 4, // Operands in wrregion: NewValueOperandNum = 1, WrVStrideOperandNum = 2, WrWidthOperandNum = 3, WrStrideOperandNum = 4, WrIndexOperandNum = 5, PredicateOperandNum = 7 }; } // namespace GenXRegion inline const char *getGenXIntrinsicPrefix() { return "llvm.genx."; } ID getGenXIntrinsicID(const Function *F); /// Utility function to get the genx_intrinsic ID if V is a GenXIntrinsic call. /// V is allowed to be 0. inline ID getGenXIntrinsicID(const Value *V) { if (V) if (const CallInst *CI = dyn_cast(V)) if (Function *Callee = CI->getCalledFunction()) return getGenXIntrinsicID(Callee); return GenXIntrinsic::not_genx_intrinsic; } /// GenXIntrinsic::isGenXIntrinsic(ID) - Is GenX intrinsic /// NOTE that this is include not_genx_intrinsic /// BUT DOES NOT include not_any_intrinsic inline bool isGenXIntrinsic(unsigned ID) { return ID >= not_genx_intrinsic && ID < num_genx_intrinsics; } /// GenXIntrinsic::isGenXIntrinsic(CF) - Returns true if /// the function's name starts with "llvm.genx.". /// It's possible for this function to return true while getGenXIntrinsicID() /// returns GenXIntrinsic::not_genx_intrinsic! bool isGenXIntrinsic(const Function *CF); /// GenXIntrinsic::isGenXIntrinsic(V) - Returns true if /// the function's name starts with "llvm.genx.". /// It's possible for this function to return true while getGenXIntrinsicID() /// returns GenXIntrinsic::not_genx_intrinsic! inline bool isGenXIntrinsic(const Value *V) { if (V) if (const CallInst *CI = dyn_cast(V)) if (Function *Callee = CI->getCalledFunction()) return isGenXIntrinsic(Callee); return false; } /// GenXIntrinsic::isGenXNonTrivialIntrinsic(ID) - Is GenX intrinsic, /// which is not equal to not_genx_intrinsic or not_any_intrinsic inline bool isGenXNonTrivialIntrinsic(unsigned ID) { return ID > not_genx_intrinsic && ID < num_genx_intrinsics; } /// GenXIntrinsic::isGenXNonTrivialIntrinsic(CF) - Returns true if /// CF is genx intrinsic, not equal to not_any_intrinsic or not_genx_intrinsic inline bool isGenXNonTrivialIntrinsic(const Function *CF) { return isGenXNonTrivialIntrinsic(getGenXIntrinsicID(CF)); } /// GenXIntrinsic::isGenXNonTrivialIntrinsic(V) - Returns true if /// V is genx intrinsic, not equal to not_any_intrinsic or not_genx_intrinsic inline bool isGenXNonTrivialIntrinsic(const Value *V) { return isGenXNonTrivialIntrinsic(getGenXIntrinsicID(V)); } /// GenXIntrinsic::getGenXName(ID) - Return the LLVM name for a GenX intrinsic, /// such as "llvm.genx.lane.id". std::string getGenXName(ID id, ArrayRef Tys = {}); ID lookupGenXIntrinsicID(StringRef Name); AttributeList getAttributes(LLVMContext &C, ID id); /// GenXIntrinsic::getGenXType(ID) - Return the function type for an intrinsic. FunctionType *getGenXType(LLVMContext &Context, GenXIntrinsic::ID id, ArrayRef Tys = {}); /// GenXIntrinsic::getGenXDeclaration(M, ID) - Create or insert a GenX LLVM /// Function declaration for an intrinsic, and return it. /// /// The Tys parameter is for intrinsics with overloaded types (e.g., those /// using iAny, fAny, vAny, or iPTRAny). For a declaration of an overloaded /// intrinsic, Tys must provide exactly one type for each overloaded type in /// the intrinsic. Function *getGenXDeclaration(Module *M, ID id, ArrayRef Tys = {}); void getIntrinsicInfoTableEntries( GenXIntrinsic::ID id, SmallVectorImpl &T); /// GenXIntrinsic::resetGenXAttributes(F) - recalculates attributes /// of a CM intrinsic by setting the default values (as per /// intrinsic definition). /// /// F is required to be a GenX intrinsic function void resetGenXAttributes(Function* F); /// GenXIntrinsic::getAnyIntrinsicID(F) - Return LLVM or GenX intrinsic ID /// If is not intrinsic returns not_any_intrinsic /// Note that Function::getIntrinsicID returns ONLY LLVM intrinsics inline unsigned getAnyIntrinsicID(const Function *F) { if (isGenXNonTrivialIntrinsic(F)) return getGenXIntrinsicID(F); else { assert(F); unsigned IID = F->getIntrinsicID(); if (IID == Intrinsic::not_intrinsic) return GenXIntrinsic::not_any_intrinsic; else return IID; } } /// Utility function to get the LLVM or GenX intrinsic ID if V is an intrinsic /// call. /// V is allowed to be 0. inline unsigned getAnyIntrinsicID(const Value *V) { if (V) if (const CallInst *CI = dyn_cast(V)) if (Function *Callee = CI->getCalledFunction()) return getAnyIntrinsicID(Callee); return GenXIntrinsic::not_any_intrinsic; } /// GenXIntrinsic::isAnyIntrinsic(ID) - Is any intrinsic /// including not_any_intrinsic inline bool isAnyIntrinsic(unsigned id) { assert(id != not_genx_intrinsic && id != Intrinsic::not_intrinsic && "Do not use this method with getGenXIntrinsicID or getIntrinsicID!"); return id < num_genx_intrinsics || id == not_any_intrinsic; } /// GenXIntrinsic::isAnyNonTrivialIntrinsic(id) - Is GenX or LLVM intrinsic, /// which is not equal to not_any_intrinsic inline bool isAnyNonTrivialIntrinsic(unsigned id) { assert(id != not_genx_intrinsic && id != Intrinsic::not_intrinsic && "Do not use this method with getGenXIntrinsicID or getIntrinsicID!"); return id < num_genx_intrinsics && id != not_any_intrinsic; } /// GenXIntrinsic::isAnyNonTrivialIntrinsic(ID) - Is GenX or LLVM intrinsic, /// which is not equal to not_genx_intrinsic, not_any_intrinsic or not_intrinsic inline bool isAnyNonTrivialIntrinsic(const Function *CF) { return isAnyNonTrivialIntrinsic(getAnyIntrinsicID(CF)); } /// Utility function to check if V is LLVM or GenX intrinsic call, /// which is not not_intrinsic, not_genx_intrinsic or not_any_intrinsic /// V is allowed to be 0. inline bool isAnyNonTrivialIntrinsic(const Value *V) { return isAnyNonTrivialIntrinsic(getAnyIntrinsicID(V)); } /// GenXIntrinsic::getAnyName(ID) - Return the LLVM name for LLVM or GenX /// intrinsic, such as "llvm.genx.lane.id". std::string getAnyName(unsigned id, ArrayRef Tys = {}); /// GenXIntrinsic::getAnyType(ID) - Return the function type for an intrinsic. inline FunctionType *getAnyType(LLVMContext &Context, unsigned id, ArrayRef Tys = {}) { assert(isAnyNonTrivialIntrinsic(id)); if (isGenXIntrinsic(id)) return getGenXType(Context, (ID)id, Tys); else return Intrinsic::getType(Context, (Intrinsic::ID)id, Tys); } /// GenXIntrinsic::isSupportedPlatform(CPU, ID) - Return true if GenxIntrinsic // is supported by current platform bool isSupportedPlatform(const std::string &CPU, unsigned id); /// GenXIntrinsic::isOverloadedArg(ID, ArgNum) - Return true if ArgNum /// in intrinsic overloaded bool isOverloadedArg(unsigned IntrinID, unsigned ArgNum); /// GenXIntrinsic::isOverloadedRet(ID) - Return true if return type /// in intrinsic is overloaded bool isOverloadedRet(unsigned IntrinID); /// GenXIntrinsic::getAnyDeclaration(M, ID) - Create or insert a LLVM /// Function declaration for an intrinsic, and return it. /// /// The Tys parameter is for intrinsics with overloaded types (e.g., those /// using iAny, fAny, vAny, or iPTRAny). For a declaration of an overloaded /// intrinsic, Tys must provide exactly one type for each overloaded type in /// the intrinsic. Function *getAnyDeclaration(Module *M, unsigned id, ArrayRef Tys = {}); /// GenXIntrinsic::getGenXMulIID(S1, S2) - returns GenXIntrinsic::ID for /// the enx_XXmul opertation, where XX is is defined by the input arguments /// which represent signs of the operands inline GenXIntrinsic::ID getGenXMulIID(bool LHSign, bool RHSign) { return LHSign ? (RHSign ? GenXIntrinsic::genx_ssmul : GenXIntrinsic::genx_sumul) : (RHSign ? GenXIntrinsic::genx_usmul : GenXIntrinsic::genx_uumul); } inline bool isRdRegion(unsigned IntrinID) { switch (IntrinID) { case GenXIntrinsic::genx_rdregioni: case GenXIntrinsic::genx_rdregionf: return true; default: return false; } } inline bool isRdRegion(const Function *F) { return isRdRegion(getGenXIntrinsicID(F)); } inline bool isRdRegion(const Value *V) { return isRdRegion(getGenXIntrinsicID(V)); } inline bool isWrRegion(unsigned IntrinID) { switch (IntrinID) { case GenXIntrinsic::genx_wrregioni: case GenXIntrinsic::genx_wrregionf: case GenXIntrinsic::genx_wrconstregion: return true; default: return false; } } inline bool isWrRegion(const Function *F) { return isWrRegion(getGenXIntrinsicID(F)); } inline bool isWrRegion(const Value *V) { return isWrRegion(getGenXIntrinsicID(V)); } inline bool isAbs(unsigned IntrinID) { if (IntrinID == GenXIntrinsic::genx_absf || IntrinID == GenXIntrinsic::genx_absi) return true; return false; } inline bool isAbs(const Function *F) { return isAbs(getGenXIntrinsicID(F)); } inline bool isAbs(const Value *V) { return isAbs(getGenXIntrinsicID(V)); } inline bool isIntegerSat(unsigned IID) { switch (IID) { case GenXIntrinsic::genx_sstrunc_sat: case GenXIntrinsic::genx_sutrunc_sat: case GenXIntrinsic::genx_ustrunc_sat: case GenXIntrinsic::genx_uutrunc_sat: return true; default: return false; } } inline bool isIntegerSat(const Function *F) { return isIntegerSat(getGenXIntrinsicID(F)); } inline bool isIntegerSat(const Value *V) { return isIntegerSat(getGenXIntrinsicID(V)); } inline bool isVLoad(unsigned IntrinID) { return IntrinID == GenXIntrinsic::genx_vload; } inline bool isVLoad(const Function *F) { return isVLoad(getGenXIntrinsicID(F)); } inline bool isVLoad(const Value *V) { return isVLoad(getGenXIntrinsicID(V)); } inline bool isVStore(unsigned IntrinID) { return IntrinID == GenXIntrinsic::genx_vstore; } inline bool isVStore(const Function *F) { return isVStore(getGenXIntrinsicID(F)); } inline bool isVStore(const Value *V) { return isVStore(getGenXIntrinsicID(V)); } inline bool isVLoadStore(unsigned IntrinID) { return isVLoad(IntrinID) || isVStore(IntrinID); } inline bool isVLoadStore(const Function *F) { return isVLoadStore(getGenXIntrinsicID(F)); } inline bool isVLoadStore(const Value *V) { return isVLoadStore(getGenXIntrinsicID(V)); } inline bool isReadPredefReg(unsigned IntrinID) { return IntrinID == GenXIntrinsic::genx_read_predef_reg; } inline bool isReadPredefReg(const Function *F) { return isReadPredefReg(getGenXIntrinsicID(F)); } inline bool isReadPredefReg(const Value *V) { return isReadPredefReg(getGenXIntrinsicID(V)); } inline bool isWritePredefReg(unsigned IntrinID) { return IntrinID == GenXIntrinsic::genx_write_predef_reg; } inline bool isWritePredefReg(const Function *F) { return isWritePredefReg(getGenXIntrinsicID(F)); } inline bool isWritePredefReg(const Value *V) { return isWritePredefReg(getGenXIntrinsicID(V)); } inline bool isReadWritePredefReg(unsigned IntrinID) { return isWritePredefReg(IntrinID) || isReadPredefReg(IntrinID); } inline bool isReadWritePredefReg(const Value *V) { return isWritePredefReg(getGenXIntrinsicID(V)) || isReadPredefReg(getGenXIntrinsicID(V)); } inline bool isReadWritePredefReg(const Function *F) { return isWritePredefReg(getGenXIntrinsicID(F)) || isReadPredefReg(getGenXIntrinsicID(F)); } inline LSCCategory getLSCCategory(unsigned IntrinID) { switch(IntrinID) { case GenXIntrinsic::genx_lsc_load_bti: case GenXIntrinsic::genx_lsc_load_stateless: case GenXIntrinsic::genx_lsc_load_slm: case GenXIntrinsic::genx_lsc_load_bindless: case GenXIntrinsic::genx_lsc_load_quad_bti: case GenXIntrinsic::genx_lsc_load_quad_slm: case GenXIntrinsic::genx_lsc_load_quad_stateless: case GenXIntrinsic::genx_lsc_load_merge_bti: case GenXIntrinsic::genx_lsc_load_merge_stateless: case GenXIntrinsic::genx_lsc_load_merge_slm: case GenXIntrinsic::genx_lsc_load_merge_bindless: case GenXIntrinsic::genx_lsc_load_merge_quad_bti: case GenXIntrinsic::genx_lsc_load_merge_quad_slm: case GenXIntrinsic::genx_lsc_load_merge_quad_stateless: return LSCCategory::Load; case GenXIntrinsic::genx_lsc_load2d_stateless: return LSCCategory::Load2D; case GenXIntrinsic::genx_lsc_load2d_typed_bti: return LSCCategory::Load2DTyped; case GenXIntrinsic::genx_lsc_prefetch_bti: case GenXIntrinsic::genx_lsc_prefetch_stateless: return LSCCategory::Prefetch; case GenXIntrinsic::genx_lsc_prefetch2d_stateless: return LSCCategory::Prefetch2D; case GenXIntrinsic::genx_lsc_store_bti: case GenXIntrinsic::genx_lsc_store_stateless: case GenXIntrinsic::genx_lsc_store_slm: case GenXIntrinsic::genx_lsc_store_bindless: case GenXIntrinsic::genx_lsc_store_quad_bti: case GenXIntrinsic::genx_lsc_store_quad_slm: case GenXIntrinsic::genx_lsc_store_quad_stateless: return LSCCategory::Store; case GenXIntrinsic::genx_lsc_store2d_stateless: return LSCCategory::Store2D; case GenXIntrinsic::genx_lsc_store2d_typed_bti: return LSCCategory::Store2DTyped; case GenXIntrinsic::genx_lsc_fence: return LSCCategory::Fence; case GenXIntrinsic::genx_lsc_atomic_bti: case GenXIntrinsic::genx_lsc_atomic_stateless: case GenXIntrinsic::genx_lsc_atomic_slm: case GenXIntrinsic::genx_lsc_atomic_bindless: return LSCCategory::LegacyAtomic; case GenXIntrinsic::genx_lsc_xatomic_bti: case GenXIntrinsic::genx_lsc_xatomic_stateless: case GenXIntrinsic::genx_lsc_xatomic_slm: case GenXIntrinsic::genx_lsc_xatomic_bindless: return LSCCategory::Atomic; default: return LSCCategory::NotLSC; } } inline LSCCategory getLSCCategory(const Value *V) { return getLSCCategory(getGenXIntrinsicID(V)); } inline LSCCategory getLSCCategory(const Function *F) { return getLSCCategory(getGenXIntrinsicID(F)); } inline bool isLSCLoad(unsigned IntrinID) { return getLSCCategory(IntrinID) == LSCCategory::Load; } inline bool isLSCLoad(const Value *V) { return isLSCLoad(getGenXIntrinsicID(V)); } inline bool isLSCLoad(const Function *F) { return isLSCLoad(getGenXIntrinsicID(F)); } inline bool isLSCLoad2D(unsigned IntrinID) { return getLSCCategory(IntrinID) == LSCCategory::Load2D; } inline bool isLSCLoad2D(const Value *V) { return isLSCLoad2D(getGenXIntrinsicID(V)); } inline bool isLSCLoad2D(const Function *F) { return isLSCLoad2D(getGenXIntrinsicID(F)); } inline bool isLSCLoad2DTyped(unsigned IntrinID) { return getLSCCategory(IntrinID) == LSCCategory::Load2DTyped; } inline bool isLSCLoad2DTyped(const Value *V) { return isLSCLoad2DTyped(getGenXIntrinsicID(V)); } inline bool isLSCLoad2DTyped(const Function *F) { return isLSCLoad2DTyped(getGenXIntrinsicID(F)); } inline bool isLSCPrefetch(unsigned IntrinID) { return getLSCCategory(IntrinID) == LSCCategory::Prefetch; } inline bool isLSCPrefetch(const Value *V) { return isLSCPrefetch(getGenXIntrinsicID(V)); } inline bool isLSCPrefetch(const Function *F) { return isLSCPrefetch(getGenXIntrinsicID(F)); } inline bool isLSCPrefetch2D(unsigned IntrinID) { return getLSCCategory(IntrinID) == LSCCategory::Prefetch2D; } inline bool isLSCPrefetch2D(const Value *V) { return isLSCPrefetch2D(getGenXIntrinsicID(V)); } inline bool isLSCPrefetch2D(const Function *F) { return isLSCPrefetch2D(getGenXIntrinsicID(F)); } inline bool isLSCStore(unsigned IntrinID) { return getLSCCategory(IntrinID) == LSCCategory::Store; } inline bool isLSCStore(const Value *V) { return isLSCStore(getGenXIntrinsicID(V)); } inline bool isLSCStore(const Function *F) { return isLSCStore(getGenXIntrinsicID(F)); } inline bool isLSCStore2D(unsigned IntrinID) { return getLSCCategory(IntrinID) == LSCCategory::Store2D; } inline bool isLSCStore2D(const Value *V) { return isLSCStore2D(getGenXIntrinsicID(V)); } inline bool isLSCStore2D(const Function *F) { return isLSCStore2D(getGenXIntrinsicID(F)); } inline bool isLSCStore2DTyped(unsigned IntrinID) { return getLSCCategory(IntrinID) == LSCCategory::Store2DTyped; } inline bool isLSCStore2DTyped(const Value *V) { return isLSCStore2DTyped(getGenXIntrinsicID(V)); } inline bool isLSCStore2DTyped(const Function *F) { return isLSCStore2DTyped(getGenXIntrinsicID(F)); } inline bool isLSCFence(unsigned IntrinID) { return getLSCCategory(IntrinID) == LSCCategory::Fence; } inline bool isLSCFence(const Value *V) { return isLSCFence(getGenXIntrinsicID(V)); } inline bool isLSCFence(const Function *F) { return isLSCFence(getGenXIntrinsicID(F)); } inline bool isLSCLegacyAtomic(unsigned IntrinID) { return getLSCCategory(IntrinID) == LSCCategory::LegacyAtomic; } inline bool isLSCLegacyAtomic(const Value *V) { return isLSCLegacyAtomic(getGenXIntrinsicID(V)); } inline bool isLSCLegacyAtomic(const Function *F) { return isLSCLegacyAtomic(getGenXIntrinsicID(F)); } inline bool isLSCAtomic(unsigned IntrinID) { return getLSCCategory(IntrinID) == LSCCategory::Atomic; } inline bool isLSCAtomic(const Value *V) { return isLSCAtomic(getGenXIntrinsicID(V)); } inline bool isLSCAtomic(const Function *F) { return isLSCAtomic(getGenXIntrinsicID(F)); } inline bool isLSC(unsigned IntrinID) { return getLSCCategory(IntrinID) != LSCCategory::NotLSC; } inline bool isLSC(const Value *V) { return isLSC(getGenXIntrinsicID(V)); } inline bool isLSC(const Function *F) { return isLSC(getGenXIntrinsicID(F)); } inline bool isLSC2D(unsigned IntrinID) { switch (getLSCCategory(IntrinID)) { case LSCCategory::Load2D: case LSCCategory::Prefetch2D: case LSCCategory::Store2D: case LSCCategory::Load2DTyped: case LSCCategory::Store2DTyped: return true; case LSCCategory::Load: case LSCCategory::Prefetch: case LSCCategory::Store: case LSCCategory::Fence: case LSCCategory::LegacyAtomic: case LSCCategory::Atomic: case LSCCategory::NotLSC: return false; } llvm_unreachable("Unknown LSC category"); } inline bool isLSC2D(const Value *V) { return isLSC2D(getGenXIntrinsicID(V)); } inline bool isLSC2D(const Function *F) { return isLSC2D(getGenXIntrinsicID(F)); } inline bool isLSCTyped(unsigned IntrinID) { switch (getLSCCategory(IntrinID)) { case LSCCategory::Load2DTyped: case LSCCategory::Store2DTyped: return true; case LSCCategory::Store2D: case LSCCategory::Load: case LSCCategory::Load2D: case LSCCategory::Prefetch: case LSCCategory::Prefetch2D: case LSCCategory::Store: case LSCCategory::Fence: case LSCCategory::LegacyAtomic: case LSCCategory::Atomic: case LSCCategory::NotLSC: return false; } llvm_unreachable("Unknown LSC category"); } inline bool isLSCTyped(const Value *V) { return isLSCTyped(getGenXIntrinsicID(V)); } inline bool isLSCTyped(const Function *F) { return isLSCTyped(getGenXIntrinsicID(F)); } // Dependency from visa_igc_common_header. // Converts vector size into LSC-appropriate code. inline LSCVectorSize getLSCVectorSize(unsigned N) { switch (N) { case 0: return LSCVectorSize::N0; case 1: return LSCVectorSize::N1; case 2: return LSCVectorSize::N2; case 3: return LSCVectorSize::N3; case 4: return LSCVectorSize::N4; case 8: return LSCVectorSize::N8; case 16: return LSCVectorSize::N16; case 32: return LSCVectorSize::N32; case 64: return LSCVectorSize::N64; } llvm_unreachable("Unknown vector size"); } // Gets encoded vector size for LSC instruction. inline uint8_t getEncodedLSCVectorSize(unsigned N) { return static_cast(getLSCVectorSize(N)); } // Functions in this namespace return argument index for LSC instruction. namespace LSCArgIdx { constexpr int Invalid = -1; // Returns VectorSize index. inline int getLSCVectorSize(LSCCategory Cat) { switch (Cat) { case LSCCategory::Load: case LSCCategory::Prefetch: case LSCCategory::Store: case LSCCategory::Atomic: return 7; case LSCCategory::LegacyAtomic: return 8; case LSCCategory::Prefetch2D: case LSCCategory::Load2D: case LSCCategory::Store2D: case LSCCategory::Load2DTyped: case LSCCategory::Store2DTyped: case LSCCategory::Fence: case LSCCategory::NotLSC: llvm_unreachable("no such argument"); return Invalid; } return Invalid; } // Returns VectorSize index. inline int getLSCVectorSize(unsigned IID) { return LSCArgIdx::getLSCVectorSize(getLSCCategory(IID)); } // Returns DataSize index. inline int getLSCDataSize(LSCCategory Cat) { switch (Cat) { case LSCCategory::Load: case LSCCategory::Prefetch: case LSCCategory::Store: case LSCCategory::LegacyAtomic: case LSCCategory::Atomic: return 6; case LSCCategory::Load2D: case LSCCategory::Prefetch2D: case LSCCategory::Store2D: return 3; case LSCCategory::Fence: case LSCCategory::Load2DTyped: case LSCCategory::Store2DTyped: case LSCCategory::NotLSC: llvm_unreachable("no such argument"); return Invalid; } return Invalid; } // Returns DataSize index. inline int getLSCDataSize(unsigned IID) { return LSCArgIdx::getLSCDataSize(getLSCCategory(IID)); } // Returns immediate offset index. inline int getLSCImmOffset(LSCCategory Cat) { switch (Cat) { case LSCCategory::Load: case LSCCategory::Prefetch: case LSCCategory::Store: case LSCCategory::LegacyAtomic: case LSCCategory::Atomic: return 5; case LSCCategory::Prefetch2D: case LSCCategory::Load2D: case LSCCategory::Store2D: case LSCCategory::Load2DTyped: case LSCCategory::Store2DTyped: case LSCCategory::Fence: case LSCCategory::NotLSC: llvm_unreachable("no such argument"); return Invalid; } return Invalid; } // Returns immediate offset index. inline int getLSCImmOffset(unsigned IID) { return LSCArgIdx::getLSCImmOffset(getLSCCategory(IID)); } // Returns data order index. inline int getLSCDataOrder(LSCCategory Cat) { switch (Cat) { case LSCCategory::Load: case LSCCategory::Prefetch: case LSCCategory::Store: case LSCCategory::Atomic: return 8; case LSCCategory::LegacyAtomic: return 7; case LSCCategory::Load2D: case LSCCategory::Prefetch2D: case LSCCategory::Store2D: return 4; case LSCCategory::Fence: case LSCCategory::Load2DTyped: case LSCCategory::Store2DTyped: case LSCCategory::NotLSC: llvm_unreachable("no such argument"); return Invalid; } return Invalid; } // Returns data order index. inline int getLSCDataOrder(unsigned IID) { return LSCArgIdx::getLSCDataOrder(getLSCCategory(IID)); } // Returns width index. inline int getLSCWidth(LSCCategory Cat) { switch (Cat) { case LSCCategory::Load: case LSCCategory::Prefetch: case LSCCategory::Store: case LSCCategory::Fence: case LSCCategory::LegacyAtomic: case LSCCategory::Atomic: case LSCCategory::Load2D: case LSCCategory::Prefetch2D: case LSCCategory::Store2D: case LSCCategory::Load2DTyped: case LSCCategory::Store2DTyped: return 0; case LSCCategory::NotLSC: llvm_unreachable("no such argument"); return Invalid; } return Invalid; } // Returns width index. inline int getLSCWidth(unsigned IID) { return LSCArgIdx::getLSCWidth(getLSCCategory(IID)); } } // namespace LSCArgIdx inline unsigned getLSCNumVectorElements(LSCVectorSize VS) { switch (VS) { case LSCVectorSize::N0: break; case LSCVectorSize::N1: return 1; case LSCVectorSize::N2: return 2; case LSCVectorSize::N3: return 3; case LSCVectorSize::N4: return 4; case LSCVectorSize::N8: return 8; case LSCVectorSize::N16: return 16; case LSCVectorSize::N32: return 32; case LSCVectorSize::N64: return 64; } llvm_unreachable("Unknown vector size"); } LSCVectorSize getLSCVectorSize(const Instruction *I); inline unsigned getLSCNumVectorElements(const Instruction *I) { return GenXIntrinsic::getLSCNumVectorElements(getLSCVectorSize(I)); } inline unsigned getLSCDataBitsRegister(LSCDataSize DS) { switch(DS) { case LSCDataSize::Invalid: break; case LSCDataSize::D8: return 8; case LSCDataSize::D16: return 16; case LSCDataSize::D32: case LSCDataSize::D8U32: case LSCDataSize::D16U32: case LSCDataSize::D16U32H: return 32; case LSCDataSize::D64: return 64; } llvm_unreachable("Unknown data size"); } inline unsigned getLSCDataBitsMemory(LSCDataSize DS) { switch(DS) { case LSCDataSize::Invalid: break; case LSCDataSize::D8: case LSCDataSize::D8U32: return 8; case LSCDataSize::D16: case LSCDataSize::D16U32: case LSCDataSize::D16U32H: return 16; case LSCDataSize::D32: return 32; case LSCDataSize::D64: return 64; } llvm_unreachable("Unknown data size"); } LSCDataSize getLSCDataSize(const Instruction *I); inline unsigned getLSCDataBitsRegister(const Instruction *I) { return getLSCDataBitsRegister(getLSCDataSize(I)); } inline unsigned getLSCDataBitsMemory(const Instruction *I) { return getLSCDataBitsMemory(getLSCDataSize(I)); } LSCDataOrder getLSCDataOrder(const Instruction *I); inline bool isLSCNonTransposed(const Instruction *I) { return getLSCDataOrder(I) == LSCDataOrder::NonTranspose; } inline bool isLSCTransposed(const Instruction *I) { return getLSCDataOrder(I) == LSCDataOrder::Transpose; } unsigned getLSCWidth(const Instruction *I); } // namespace GenXIntrinsic // todo: delete this namespace GenXIntrinsic { AttributeList getAttributes(LLVMContext &C, ID id); } // namespace GenXIntrinsic } // namespace llvm #endif vc-intrinsics-0.22.1/GenXIntrinsics/include/llvm/GenXIntrinsics/GenXMetadata.h000066400000000000000000000060261475147027500272750ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2020-2021 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ //===----------------------------------------------------------------------===// // // This file defines GenX kernel metadata operand numbers and other module // metadata. // //===----------------------------------------------------------------------===// #ifndef GENX_METADATA_H #define GENX_METADATA_H namespace llvm { class MDNode; class Function; namespace genx { namespace FunctionMD { static constexpr const char GenXKernels[] = "genx.kernels"; static constexpr const char GenXByteOffset[] = "genx_byte_offset"; static constexpr const char GenXVolatile[] = "genx_volatile"; static constexpr const char CMGenXMain[] = "CMGenxMain"; static constexpr const char CMStackCall[] = "CMStackCall"; static constexpr const char CMCallable[] = "CMCallable"; static constexpr const char CMEntry[] = "CMEntry"; static constexpr const char CMFloatControl[] = "CMFloatControl"; static constexpr const char CMGenxSIMT[] = "CMGenxSIMT"; static constexpr const char CMGenxReplicateMask[] = "CMGenxReplicateMask"; static constexpr const char OCLRuntime[] = "oclrt"; static constexpr const char ReferencedIndirectly[] = "referenced-indirectly"; } // namespace FunctionMD namespace VCModuleMD { static constexpr const char VCGlobalVariable[] = "VCGlobalVariable"; static constexpr const char VCVolatile[] = "VCVolatile"; static constexpr const char VCByteOffset[] = "VCByteOffset"; static constexpr const char VCSingleElementVector[] = "VCSingleElementVector"; } // namespace VCModuleMD namespace VCFunctionMD { static constexpr const char VCFunction[] = "VCFunction"; static constexpr const char VCStackCall[] = "VCStackCall"; static constexpr const char VCCallable[] = "VCCallable"; static constexpr const char VCFCEntry[] = "VCFCEntry"; static constexpr const char VCArgumentIOKind[] = "VCArgumentIOKind"; static constexpr const char VCFloatControl[] = "VCFloatControl"; static constexpr const char VCSLMSize[] = "VCSLMSize"; static constexpr const char VCArgumentKind[] = "VCArgumentKind"; static constexpr const char VCArgumentDesc[] = "VCArgumentDesc"; static constexpr const char VCSIMTCall[] = "VCSIMTCall"; static constexpr const char VCNamedBarrierCount[] = "VCNamedBarrierCount"; static constexpr const char VCMediaBlockIO[] = "VCMediaBlockIO"; } // namespace VCFunctionMD enum KernelMDOp { FunctionRef, // Reference to Function Name, // Kernel name ArgKinds, // Reference to metadata node containing kernel arg kinds SLMSize, // SLM-size in bytes ArgOffsets, // Kernel argument offsets ArgIOKinds, // Reference to metadata node containing kernel argument // input/output kinds ArgTypeDescs, // Kernel argument type descriptors NBarrierCnt, // Named barrier count BarrierCnt // Barrier count }; MDNode *GetOldStyleKernelMD(const Function &F); } // namespace genx } // namespace llvm #endif vc-intrinsics-0.22.1/GenXIntrinsics/include/llvm/GenXIntrinsics/GenXSPIRVReaderAdaptor.h000066400000000000000000000023141475147027500311120ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2020-2022 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ /// /// GenXSPIRVReaderAdaptor /// --------------------------- /// This pass converts metadata from SPIRV format to whichever used in backend #include "llvm/IR/PassManager.h" namespace llvm { class ModulePass; class PassRegistry; //----------------------------------------------------------------------------- // New PM support //----------------------------------------------------------------------------- // Reader adaptor for new PM. class GenXSPIRVReaderAdaptor final : public PassInfoMixin { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); static StringRef getArgString() { return "GenXSPIRVReaderAdaptor"; } }; //----------------------------------------------------------------------------- // Legacy PM support //----------------------------------------------------------------------------- void initializeGenXSPIRVReaderAdaptorLegacyPass(PassRegistry &); ModulePass *createGenXSPIRVReaderAdaptorPass(); } // namespace llvm vc-intrinsics-0.22.1/GenXIntrinsics/include/llvm/GenXIntrinsics/GenXSPIRVWriterAdaptor.h000066400000000000000000000035011475147027500311630ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2020-2021 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ /// /// GenXSPIRVWriterAdaptor /// --------------------------- /// This pass converts metadata to SPIRV format from whichever used in frontend #include "llvm/IR/PassManager.h" namespace llvm { class ModulePass; class PassRegistry; //----------------------------------------------------------------------------- // New PM support //----------------------------------------------------------------------------- // Writer adaptor for new PM. class GenXSPIRVWriterAdaptor final : public PassInfoMixin { bool RewriteTypes = true; bool RewriteSingleElementVectors = true; public: GenXSPIRVWriterAdaptor(bool RewriteTypesIn, bool RewriteSingleElementVectorsIn) : RewriteTypes(RewriteTypesIn), RewriteSingleElementVectors(RewriteSingleElementVectorsIn) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); static StringRef getArgString() { return "GenXSPIRVWriterAdaptor"; } }; //----------------------------------------------------------------------------- // Legacy PM support //----------------------------------------------------------------------------- void initializeGenXSPIRVWriterAdaptorLegacyPass(PassRegistry &); // Create spirv writer adaptor pass. // RewriteTypes -- whether plain types with decorations should be // rewritten with native SPIRV types. Defaults to false for // compatibility reasons until backend will be able to handle new // types. ModulePass * createGenXSPIRVWriterAdaptorPass(bool RewriteTypes = false, bool RewriteSingleElementVectors = false); } // namespace llvm vc-intrinsics-0.22.1/GenXIntrinsics/include/llvm/GenXIntrinsics/GenXSimdCFLowering.h000066400000000000000000000106501475147027500303670ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2019-2022 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ // This is the worker class to lowers CM SIMD control flow into a form where // the IR reflects the semantics. See CMSimdCFLowering.cpp for details. #ifndef CMSIMDCF_LOWER_H #define CMSIMDCF_LOWER_H #include "llvm/ADT/MapVector.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/PassRegistry.h" #include #include #include namespace llvm { // The worker class for lowering CM SIMD CF class CMSimdCFLower { Function *F = {}; // A map giving the basic blocks ending with a simd branch, and the simd // width of each one. MapVector SimdBranches; // A map giving the basic blocks to be predicated, and the simd width of // each one. MapVector PredicatedBlocks; // The join points, together with the simd width of each one. MapVector JoinPoints; // Mapping of join points to their correspond goto BBs std::map JoinToGoto; // The JIP for each simd branch and join point. std::map JIPs; // Subroutines that are predicated, mapping to the simd width. std::map PredicatedSubroutines; // Execution mask variable. GlobalVariable *EMVar; // Resume mask for each join point. std::map RMAddrs; // Set of intrinsic calls (other than wrregion) that have been predicated. std::set AlreadyPredicated; // Mask for shufflevector to extract part of EM. SmallVector ShuffleMask; // Original predicate for an instruction (if it was changed with AND respect // to EM) std::map OriginalPred; // Replicate mask for provided number of channels Value *replicateMask(Value *EM, Instruction *InsertBefore, unsigned SimdWidth, unsigned NumChannels = 1); void eraseInstruction(Instruction *I) { assert(!AlreadyPredicated.count(I) && "Shouldn't erase this instruction as it's predicated"); I->eraseFromParent(); } public: static const unsigned MAX_SIMD_CF_WIDTH = 32; CMSimdCFLower(GlobalVariable *EMask) : EMVar(EMask) {} static CallInst *isSimdCFAny(Value *V); static Use *getSimdConditionUse(Value *Cond); void processFunction(Function *F); private: bool findSimdBranches(unsigned CMWidth); void determinePredicatedBlocks(); void markPredicatedBranches(); void fixSimdBranches(); void findAndSplitJoinPoints(); void determineJIPs(); void determineJIP(BasicBlock *BB, std::map *Numbers, bool IsJoin); // Methods to add predication to the code void predicateCode(unsigned CMWidth); void predicateBlock(BasicBlock *BB, unsigned SimdWidth); void predicateInst(Instruction *Inst, unsigned SimdWidth); void rewritePredication(CallInst *CI, unsigned SimdWidth); void predicateStore(Instruction *SI, unsigned SimdWidth); void predicateSend(CallInst *CI, unsigned IntrinsicID, unsigned SimdWidth); void predicateScatterGather(CallInst *CI, unsigned SimdWidth, unsigned PredOperandNum); CallInst *predicateWrRegion(CallInst *WrR, unsigned SimdWidth); void predicateCall(CallInst *CI, unsigned SimdWidth); void lowerSimdCF(); void lowerUnmaskOps(); unsigned deduceNumChannels(Instruction *SI); Instruction *loadExecutionMask(Instruction *InsertBefore, unsigned SimdWidth); Value *getRMAddr(BasicBlock *JP, unsigned SimdWidth); }; //----------------------------------------------------------------------------- // New PM support //----------------------------------------------------------------------------- // CMSimdCFLovering adaptor for new PM. class CMSimdCFLowering final : public PassInfoMixin { public: CMSimdCFLowering() {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); static StringRef getArgString() { return "cmsimdcflowering"; } }; //----------------------------------------------------------------------------- // Legacy PM support //----------------------------------------------------------------------------- void initializeCMSimdCFLoweringLegacyPass(PassRegistry &); Pass *createCMSimdCFLoweringPass(); } // namespace #endif vc-intrinsics-0.22.1/GenXIntrinsics/include/llvm/GenXIntrinsics/GenXVersion.h000066400000000000000000000010521475147027500271740ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2020-2021 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ // This file declares interface functions used to aquire version info. #ifndef GENX_VERSION #define GENX_VERSION #include namespace llvm { namespace GenXIntrinsic { std::string getVCIntrinsicsRevision(); std::string getVCIntrinsicsRepository(); } // namespace GenXIntrinsic } // namespace llvm #endif vc-intrinsics-0.22.1/GenXIntrinsics/include/llvm/GenXIntrinsics/Intrinsic_definitions.py000066400000000000000000010037411475147027500315330ustar00rootroot00000000000000# ========================== begin_copyright_notice ============================ # # Copyright (C) 2019-2024 Intel Corporation # # SPDX-License-Identifier: MIT # # =========================== end_copyright_notice ============================= #===----------------------------------------------------------------------===// # # This file defines all of the GenX-specific intrinsics, which correspond to # vISA instructions. # # Comment lines with a triple slash ### introduction are extracted and # appended to docs/Targets/GenX/GenXLangRef.rst to give the GenX backend # language reference in docs/autogenerated/Targets/GenX/GenXLangRef.rst. # #===------------------------------------------------------------------------===# #------------ Currently Supported Types ---------------------- #PointerTypes = ["ptr_private", "ptr_global", "ptr_constant", "ptr_local", "ptr_generic"] #FloatingPointTypes = ["half", "float", "double"] #IntegerTypes = ["bool", "char", "short", "int", "long"] #AdditionalTypes = ["vararg"] #IntrinsicsProperties = ["None", "NoMem", "ReadArgMem", "ReadMem", "ReadWriteArgMem", "NoReturn", "NoDuplicate", "Convergent"] #IntrinsicsProperties may be specified as a comma separated list(e.g., "Convergent,NoMem") # # EX. "blah": {"result" : {return_type}, "arguments" : [arg1_type, arg2_type.....], "attributes" : Property } # # The "any" type can be followed by a default type if a type is not explicitly specified : Ex. "any:int" # # 0 - LLVMMatchType<0> # 1 - LLVMMatchType<1> # {int} - LLVMMatchType<{int}> #------------ Supported platforms ---------------------- # Every intrinsic has optinal field "platforms" : "CPU" # CPU can be any from "platforms" in Intrinsics.py or "ALL" # when field is absent - ALL by default # additional commands : # "CPU" = "-Gen9" - unsupported since Gen9 # "CPU" = "Gen11+" - supported from Gen11 # "CPU" = "~XeLP" - unsupported on XeLP # CPU can be list: # ["XeLP+", "Gen9"] - supported on Gen9 and all started from XeLP # ["ALL", "~XeLP"] - supported everyvere except XeLP Imported_Intrinsics = \ { ##-------------------------------------------------------------------- ## Start and end markers of the genx intrinsic enum values. This relies on ## tablegen outputting the intrinsics in sorted by name order. "aaaabegin" : { "result" : "anyvector", "arguments" : [], "attributes" : "None" }, "zzzzend" : { "result" : "anyvector", "arguments" : [], "attributes" : "None" }, ### ``llvm.genx.alloca.`` : CMC internal, no VISA ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### Indicates memory allocation in thread-private memory ### ### * arg0: type to allocate in thread-private memory ### ### * Return value: offset in stack surface ### "alloca" : { "result" : "anyint", "arguments" : ["any"], "attributes" : "None" }, ### ``llvm.genx.faddr.`` : take an address of the function provided ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### Takes an address of the provided function which then may be used ### in VISA indirect call instruction. ### ### * arg0: function to take address of ### ### * Return value: i64 address ready to be consumed by an indirect call ### "faddr" : { "result" : "long", "arguments" : ["any"], "attributes" : "NoMem" }, ## -------------------------------- ### Region/element access intrinsics ### -------------------------------- ### ### ``llvm.genx.rdregion*...`` : read a region, direct or single-indirect ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.rdregioni`` : integer element type (not i1) ### * ``llvm.genx.rdregionf`` : fp element type ### ### * arg0: vector to read region out of (overloaded) ### * arg1: i32 vstride in elements, constant ### * arg2: i32 width in elements, constant ### * arg3: i32 stride in elements, constant ### * arg4: i16 or vXi16 offset in bytes (overloaded) ### * arg5: i32 parent width, constant, ignored if offset is constant ### ### * Return value: the region extracted ### ### The return type must be a vector with the same element type as the input ### vector, and number of elements giving the total size of the region. ### A scalar can be used instead of a 1-vector. ### ### There are two variants, an integer one and an fp one, because the ### intrinsic declaration language does not let us declare the return type ### as any scalar or vector int or fp type. ### ### The element type must be an integral power of two number of bytes up to ### and including 8 bytes in size, thus one of i8, i16, i32, i64, half, ### float, double. In particular i1 is not allowed. ### The width must be non-zero and must divide the total size evenly. ### ### There is no requirement on vstride, width, stride or total size being ### a power of two or having any maximum. ### ### The offset in bytes arg can be i16 or vector of i16. If a vector, then ### its vector width must be the height of the region, i.e. the total ### size of the region divided by the width. ### ### The parent width arg is ignored if the offset arg is constant. If the ### offset arg is variable, then a non-undef parent width is a statement ### that the value of offset is such that a row of the region does not ### cross a multiple of parent width boundary. This is used by the backend ### to determine whether the region can be collapsed into another region. ### "rdregioni" : { "result" : "anyint", "arguments" : ["anyvector","int","int","int","anyint","int"], "attributes" : "NoMem" }, "rdregionf" : { "result" : "anyfloat", "arguments" : ["anyvector","int","int","int","anyint","int"], "attributes" : "NoMem" }, ### ``llvm.genx.wrregion*`` : write a region, direct or single-indirect ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.wrregioni....`` : integer element type (not i1) ### * ``llvm.genx.wrregionf....`` : fp element type ### ### * arg0: vector to write region in to ### * arg1: subvector or scalar to write into the region (overloaded) ### * arg2: i32 vstride in elements, constant ### * arg3: i32 width in elements, constant ### * arg4: i32 stride in elements, constant ### * arg5: i16 or vXi16 offset in bytes (overloaded) ### * arg6: i32 parent width, constant, ignored if offset is constant ### * arg7: vector of i1 mask, or scalar i1 (overloaded) ### ### * Return value: the updated vector with the region modified ### ### The return type must be a vector with the same type as the arg0 vector. ### The arg1 subvector must have the same element type as the arg0 vector ### and be no larger. Arg1 can be a scalar if the number of elements in ### the subregion is 1. ### ### There are two variants, an integer one and an fp one, because the ### intrinsic declaration language does not let us declare the arg1 type ### as any scalar or vector int or fp type. ### ### The element type must be an integral power of two number of bytes up to ### and including 8 bytes in size, thus one of i8, i16, i32, i64, half, ### float, double. In particular i1 is not allowed. ### The width must be non-zero and must divide the total size evenly. ### ### The arg7 mask is a vector of booleans, exactly as wide as the ### arg1 subvector, such that an element of the subvector is written into ### its place in the vector only if the corresponding element of the mask ### is true. ### Alternatively, arg7 can be a single i1 constant with value 1, ### meaning that the wrregion is unconditional. ### ### There is no requirement on vstride, width, stride or total size being ### a power of two or having any maximum. ### ### The offset in bytes arg can be i16 or vector of i16. If a vector, then ### its vector width must be the height of the region, i.e. the total ### size of the region divided by the width. ### ### After lowering, the arg1 subvector to write can be a scalar of the same ### type as an element of arg0, indicating that the region has one element. ### (Lowering lowers an insertelement to this type of wrregion.) ### ### The parent width arg is ignored if the offset arg is constant. If the ### offset arg is variable, then a non-undef parent width is a statement ### that the value of offset is such that a row of the region does not ### cross a multiple of parent width boundary. This is used by the backend ### to determine whether the region can be collapsed into another region. ### "wrregioni" : { "result" : "anyvector", "arguments" : [0,"anyint","int","int","int","anyint","int","anyint"], "attributes" : "NoMem" }, "wrregionf" : { "result" : "anyvector", "arguments" : [0,"anyfloat","int","int","int","anyint","int","anyint"], "attributes" : "NoMem" }, ### ``llvm.genx.vstore..`` : store a vector value into memory ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### This intrinsic has the exact semantics of an llvm store instruction. ### It is designed for reading and writing a pass-by-reference argument ### and it stops llvm optimizations from optimizing away accesses to the ### pass-by-reference arguments. ### ### * arg0: the vector to read from ### * arg1: the memory to be accessed ### "vstore" : { "result" : "void", "arguments" : ["anyvector","anyptr"], "attributes" : "None" }, ### ``llvm.genx.vload..`` : load a vector value from memory ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### This intrinsic has the exact semantics of an llvm load instruction. ### It is designed for reading and writing a pass-by-reference argument ### and it stops llvm optimizations from optimizing away accesses to the ### pass-by-reference arguments. ### ### * arg0: the memory to be accessed (overloaded) ### * Return value: the vector value read ### "vload" : { "result" : "anyvector", "arguments" : ["anyptr"], "attributes" : "None" }, ## ------------------------------ ### ALU type conversion intrinsics ### ------------------------------ ### ``llvm.genx.fptosi.sat..`` : convert floating point to signed integer with saturate ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: value to saturate, any scalar or vector floating point type (overloaded) ### ### * Return value: converted value, any scalar or vector integer type ### (treated as signed) with same vector width as arg0 ### "fptosi_sat" : { "result" : "anyint", "arguments" : ["anyfloat"], "attributes" : "NoMem" }, ### ``llvm.genx.fptoui.sat..`` : convert floating point to unsigned integer with saturate ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: value to saturate, any scalar or vector floating point type (overloaded) ### ### * Return value: converted value, any scalar or vector integer type ### (treated as unsigned) with same vector width as arg0 ### "fptoui_sat" : { "result" : "anyint", "arguments" : ["anyfloat"], "attributes" : "NoMem" }, ### ``llvm.genx.sat..`` : floating point saturate ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: value to saturate, any scalar or vector floating point type ### ### * Return value: saturated value, same type as arg0 ### ### We represent floating point saturation by simply calling this intrinsic ### on the result of a floating point operation. This works because the ### value before saturation fits in the same type. ### ### We do not have an equivalent for integer saturation, because the ### before-saturation value needs a bigger integer type than the result. ### Instead, any integer operation that supports saturation needs an ### intrinsic for the saturating variant. ### "sat" : { "result" : "anyfloat", "arguments" : [0], "attributes" : "NoMem" }, ### ``llvm.genx.*trunc.sat..`` : integer truncation with saturation ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.sstrunc.sat`` : signed result, signed operand ### * ``llvm.genx.sutrunc.sat`` : signed result, unsigned operand ### * ``llvm.genx.ustrunc.sat`` : unsigned result, signed operand ### * ``llvm.genx.uutrunc.sat`` : unsigned result, unsigned operand ### ### * arg0: value to truncate, any scalar or vector integer type (overloaded) ### ### * Return value: truncated value, any scalar or vector integer type ### with same vector width as arg0 ### "sstrunc_sat" : { "result" : "anyint", "arguments" : ["anyint"], "attributes" : "NoMem" }, "sutrunc_sat" : { "result" : "anyint", "arguments" : ["anyint"], "attributes" : "NoMem" }, "ustrunc_sat" : { "result" : "anyint", "arguments" : ["anyint"], "attributes" : "NoMem" }, "uutrunc_sat" : { "result" : "anyint", "arguments" : ["anyint"], "attributes" : "NoMem" }, ## ------------------- ### Modifier intrinsics ### ------------------- ### ### Abs is the only source modifier that is represented ### by an intrinsic; neg(x) uses 0-x, and not(x) uses x^-1. ### ### ``llvm.genx.abs*.`` : take absolute value ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.absf`` : abs modifier for fp ### * ``llvm.genx.absi`` : abs modifier for integer ### ### * arg0: input value, scalar/vector ### ### * Return value: result, same type ### "absf" : { "result" : "anyfloat", "arguments" : [0], "attributes" : "NoMem" }, "absi" : { "result" : "anyint", "arguments" : [0], "attributes" : "NoMem" }, ## ---------------------------- ### Boolean reduction intrinsics ### ---------------------------- ### ``llvm.genx.all.`` : true if all input elements are true ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: input value: v*i1 (overloaded) ### ### * Return value: i1 result ### "all" : { "result" : "bool", "arguments" : ["anyint"], "attributes" : "NoMem" }, ### ``llvm.genx.any.`` : true if any input element is true ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: input value: v*i1 (overloaded) ### ### * Return value: i1 result ### "any" : { "result" : "bool", "arguments" : ["anyint"], "attributes" : "NoMem" }, ## ---------------------------- ### SIMD control flow intrinsics ### ---------------------------- ### ### ``goto`` and ``join`` instructions are represented by ``llvm.genx.simdcf.goto`` ### and ``llvm.genx.simdcf.join`` intrinsics. ### ### The Architectural model ### ^^^^^^^^^^^^^^^ ### ### The Architectural defines SIMD control flow in terms of each of the 32 channels ### having a PcIP (per-channel instruction pointer), which determines where a ### disabled channel will be re-enabled: ### ### * A goto has two targets, UIP (update IP) and JIP (join IP). ### ### - A (forward) goto evaluates its vector condition, and, for each channel ### that is enabled and the condition is true, it sets the channel's PcIP to ### UIP, to mark that the channel is disabled until execution reaches the ### join instruction at UIP. If, after disabling channels in this way, no ### channels are left enabled, then execution jumps to JIP. ### ### UIP and JIP may be different, as there may be channels already disabled ### from an earlier goto with their PcIPs set to an earlier point than the ### present goto's UIP. So JIP needs to be set to the earliest point that ### a channel could have its PcIP pointing at. ### ### - There is also a backward goto variant for use in a conditional loop ### back edge (end of a do..while loop). It works the same as a forward goto ### over an unconditional jump back to the top of the loop. ### ### * A join has one target, JIP. It reenables all channels that have PcIP set ### to this join. If there are still no channels enabled, it jumps to JIP. ### ### * Each instruction's register write-back is gated by which channels are ### enabled, unless the instruction has a nomask bit set. This is in addition ### to optionally being gated by a predicate. ### ### * The action of the channel enable mask (and predicate) in a send depends ### on the shared function. Some (e.g. gather and scatter) have the expected ### semantics where disabled channels do not participate in the memory read/write, ### and (in the case of a read) do not update that channel's result. ### ### This scheme allows arbitrarily unstructured SIMD control flow. For it to work ### and guarantee convergence, it is sufficient (not sure if it is necessary) ### for there to be a linear chain of join points, and each goto/join's UIP and ### JIP are forward in the chain, and JIPs are set correctly so it is not possible ### for execution to "miss out" a join point where a channel should have been ### enabled. (As above, a backward goto is handled in this ### model by being considered a forward goto over a backward unconditional jump.) ### ### In Gen code, this linear chain of join points does not actually have to be in ### program order, as long as the join point order with forward UIP and JIP is ### derivable. ### ### In vISA, the linear chain of join points does have to be in program order. ### vISA does not encode the JIP of a goto/join; instead it derives it itself. ### Also, vISA uses whether a goto's target is before or after to encode whether ### it is a conditional loop backedge branch. ### ### The LLVM IR model ### ^^^^^^^^^^^^^^^^^ ### ### The model we use in LLVM IR is very similar to the above. ### ### The PcIP (per-channel instruction pointer) is replaced by: ### ### * a global (in the function) EM (execution mask), with each channel having a ### bit that is 1 when the channel is enabled; ### ### * each join point has a RM (resume mask), with each channel having a bit ### that is 1 if the channel is disabled and due to be re-enabled when execution ### reaches that join point. ### ### A goto is represented by the ``llvm.genx.simdcf.goto`` intrinsic. Its ### inputs are the current EM value, the current RM value for its UIP, and the ### vector condition. Its results are the updated EM value, the updated RM ### value for its UIP, and a scalar bool that says whether all channels are now ### disabled and execution should branch to the JIP. This last result is then ### (usually) used in a standard LLVM conditional ``br`` instruction. ### ### A goto is implicitly attached to its UIP join by the input and output RM ### values being part of a web of RM values connected by goto and phi nodes ### and used in that join. ### ### A join is represented by the ``llvm.genx.simdcf.join`` intrinsic. Its ### inputs are the current EM value and the current RM value for this join. ### Its results are the updated EM value (this join's RM value is now effectively ### all zeros so it not returned as a result), and a scalar bool that says whether ### all channels are still disabled and execution should branch to the JIP. ### This last result is then (optionally) used in a standard LLVM conditional ### ``br`` instruction. ### ### An instruction's register write-back being gated by which channels are enabled ### is modeled by the current EM value (or the appropriate size left slice of it) ### being used as the predicate in a select or wrregion or shared function ### intrinsic. ### ### Note that EM is always 32 bit, but a join's RM may be smaller as it has the same ### vector width as the condition on all gotos that update it. ### ### This model is equivalent to the Architectural model, as long as: ### ### * there is only ever one EM value live at a time with an initial value in a ### function of either all ones or the passed in call mask; ### ### * for each join point, there is only ever one RM value live at a time with an ### initial value in a function of all zeros, and a value after the join point of ### all zeros; ### ### * it is possible to re-order the code such that the "false" target of a ### conditional branch that a goto or join is attached to is fall-through, and ### all JIPs and UIPs are forward. ### ### Like any other variable with multiple values transformed to SSA, different ### EM values may be joined with a phi node. Similarly, for a particular join point's ### RM, different RM values may be joined with a phi node. ### ### The ``llvm.genx.simdcf.goto`` and ``llvm.genx.simdcf.join`` intrinsics can ### only be generated to ``goto`` and ``join`` instructions if the GenX backend ### deems them to be used in a way that is equivalent to the Architectural model. Otherwise, ### they are lowered to equivalent but slower code that implements the semantics ### of the spec of the intrinsics below. ### ### There are more detailed requirements on the use of these intrinsics to be able ### to generate them to ``goto`` and ``join`` instructions documented in the ### GenXSimdCFConformance pass. ### ### ``llvm.genx.simdcf.goto...`` : goto instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: OldEM (old execution mask): v32i1 (overloaded) ### * arg1: OldRM (old resume mask): vector of i1 (overloaded) ### * arg2: SimdCond (the SIMD control flow condition): same type as arg1 ### ### Return value: struct with the following elements: ### ### * ret0: NewEM (updated execution mask): v32i1 ### * ret1: NewRM (updated resume mask): same type as arg1 ### * ret2: BranchCond: i1 ### ### The elements of the returned struct are calculated as follows: ### ### * NewEM = OldEM & (SimdCond one extended to v16i1) ### * NewRM = OldRM | (OldEM & ~(SimdCond & (OldEM truncated to size of SimdCond))) ### * BranchCond = !any(NewEM truncated to size of SimdCond) ### ### ``llvm.genx.simdcf.goto`` represents a Gen goto instruction, taking a ### vector condition, modifying the global EM and the UIP's RM, and ### resulting in a scalar condition to be used in a conditional branch whose ### "true" successor is the goto's JIP. ### ### If the BranchCond result is not used, then the goto's JIP is set to the ### join immediately after. ### ### If the BranchCond result is used in a conditional branch, and JIP is ### later than the earliest join point ### where a channel would be re-enabled, then it is undefined whether the ### resulting goto instruction's JIP is as specified here, or an earlier join ### point. (This rule is to allow for the vISA finalizer re-deriving the JIPs.) ### ### If the goto intrinsic's conditional branch simply branches over an empty block ### with an unconditional branch, then the GenX backend takes the intrinsic and ### the two branches to be a do..while back edge, giving a Gen ``goto`` ### instruction with BranchCtrl=1, UIP set to the successor of the unconditional ### branch (the top of the do..while loop), and JIP set to the following join ### instruction. ### ### Channels already disabled in EM remain disabled. For enabled channels, ### any channel whose element in SimdCond is true becomes disabled in EM, and ### the corresponding bit in RM is set such that the channel becomes re-enabled ### upon reaching the RM's join point. If all channels in EM are then disabled, ### then BranchCond is true and the conditional branch in which it is used ### branches to the next join point in sequence. ### ### Note that SimdCond has the same sense as in the Gen goto instruction, but ### the opposite sense to that in a vISA forward goto instruction. ### "simdcf_goto" : { "result" : ["anyvector","anyvector","bool"], "arguments" : [0,1,1], "attributes" : "NoMem" }, ### ``llvm.genx.simdcf.join..`` : join instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: OldEM (old execution mask): v32i1 (overloaded) ### * arg1: RM (resume mask): vector of i1 ### ### Return value: struct with the following elements: ### ### * ret0: NewEM (updated execution mask): v32i1 ### * ret1: BranchCond: i1 ### ### The elements of the returned struct are calculated as follows: ### ### * NewEM = OldEM | (RM zero extended to v32i1) ### * BranchCond = !any(NewEM truncated to size of RM) ### ### This is marked as having side effects to stop LLVM removing an otherwise ### unused join at an outer endif. ### ### ``llvm.genx.simdcf.join`` represents a Gen join instruction, using the join ### point's RM, modifying the global EM, and resulting in a scalar condition to ### be used (optionally) in a conditional branch whose "true" successor is ### the join's JIP. ### ### If the BranchCond result is not used, then the join's JIP is undefined; this ### case is used when it is known that at least one channel is enabled after ### the join so JIP will never be used. ### ### If the BranchCond result is used in a conditional branch, and JIP is ### later than the earliest join point ### where a channel would be re-enabled, then it is undefined whether the ### resulting goto instruction's JIP is as specified here, or an earlier join ### point. (This rule is to allow for the vISA finalizer re-deriving the JIPs.) ### ### Note that vISA does not have a join instruction; the vISA finalizer ### recovers the join points from the goto instructions assuming a linear order. ### ### Channels with a set bit in RM become enabled in EM. If all channels in EM are ### still disabled, then BranchCond is true and the conditional branch in which it ### is used branches to the next join point in sequence. ### "simdcf_join" : { "result" : ["anyvector","bool"], "arguments" : [0,"anyvector"], "attributes" : "None" }, ### ``llvm.genx.simdcf.savemask.`` : ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: OldEM (old execution mask): v32i1 (overloaded) ### * ret: temp i32 for saving the oldEM "simdcf_savemask" : { "result" : "int", "arguments" : ["anyvector"], "attributes" : "WriteMem,SideEffects" }, ### ``llvm.genx.simdcf.unmask.`` : ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: temp i32 from savemask ### * arg1: i32 constant, should be all-one ### * ret: NewEM (updated execution mask, all-one): v32i1 "simdcf_unmask" : { "result" : "anyvector", "arguments" : ["int","int"], "attributes" : "WriteMem,SideEffects" }, ### ``llvm.genx.simdcf.remask.`` : ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: OldEM (old execution mask): v32i1 ### * arg1: temp i32 for restoring the EM ### ### Return value: NewEM (updated execution mask): v32i1 ### "simdcf_remask" : { "result" : "anyvector", "arguments" : [0,"int"], "attributes" : "WriteMem,SideEffects" }, ### ``llvm.genx.simdcf.get.em`` : ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: EM (execution mask): v32i1 ### ### * Return value: temp v32i1 to store EM ### ### This intrinsic prevents manipulations on EM usage ### and allows CM to create explicit value from EM. ### No masks are modified by this intrinsic. ### ### The WriteMem and SideEffects markers are used to ### prevent this instruction from being moved: in fact, ### EM is different in different locations even when the ### dominance of DF is not corrupted. ### "simdcf_get_em" : { "result" : "anyvector", "arguments" : [0], "attributes" : "WriteMem,SideEffects" }, ### -------------- ### ALU intrinsics ### -------------- ### add ### ^^^ ### Non-saturating add intrinsic is not needed. A vISA non-saturating add ### where the result type is different to the operand type is represented ### by trunc/zext/sext of each operand and then an LLVM IR Add instruction. ### ### ``llvm.genx.*add.sat..`` : add instruction with saturation ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.ssadd.sat`` : result signed, operands signed ### * ``llvm.genx.suadd.sat`` : result signed, operands unsigned ### * ``llvm.genx.usadd.sat`` : result unsigned, operands signed ### * ``llvm.genx.uuadd.sat`` : result unsigned, operands unsigned ### ### * arg0: first input, any scalar/vector integer type, even i64 (overloaded) ### * arg1: second input, same type as arg0 ### ### * Return value: result, any scalar or vector integer type with same ### vector width ### ### For an fp add, use the LLVM IR FAdd instruction, followed by ### llvm.genx.sat if saturation is required. ### "ssadd_sat" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "suadd_sat" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "usadd_sat" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "uuadd_sat" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, ### asr ### ^^^ ### asr intrinsic is not needed. Because asr cannot overflow, an asr that ### saturates with a smaller result type than the execution type can be ### represented by an LLVM IR Asr instruction then an llvm.genx.sstrunc.sat. ### ### ``llvm.genx.*avg..`` : integer averaging, no saturation ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.ssavg`` : result signed, operands signed ### * ``llvm.genx.suavg`` : result signed, operands unsigned ### * ``llvm.genx.usavg`` : result unsigned, operands signed ### * ``llvm.genx.uuavg`` : result unsigned, operands unsigned ### ### * arg0: first input, any scalar/vector integer type (not i64) (overloaded) ### * arg1: second input, same type as arg0 ### ### * Return value: result, any scalar/vector integer type (not i64) ### with same vector width ### "ssavg" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "suavg" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "usavg" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "uuavg" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, ### ``llvm.genx.*avg.sat..`` : integer averaging with saturation ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.ssavg.sat`` : result signed, operands signed ### * ``llvm.genx.suavg.sat`` : result signed, operands unsigned ### * ``llvm.genx.usavg.sat`` : result unsigned, operands signed ### * ``llvm.genx.uuavg.sat`` : result unsigned, operands unsigned ### ### * arg0: first input, any scalar/vector integer type (not i64) (overloaded) ### * arg1: second input, same type as arg0 ### ### * Return value: result, any scalar/vector integer type (not i64) ### with same vector width ### "ssavg_sat" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "suavg_sat" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "usavg_sat" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "uuavg_sat" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, ### ``llvm.genx.*bfe.`` : bitfield extract ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.sbfe`` : bitfield extract, signed result ### * ``llvm.genx.ubfe`` : bitfield extract, unsigned result ### ### * arg0: first input, any scalar/vector i32 type ### * arg1: second input, same type as arg0 ### * arg2: third input, same type as arg0 ### ### * Return value: result, same type as arg0 ### "sbfe" : { "result" : "anyint", "arguments" : [0,0,0], "attributes" : "NoMem" }, "ubfe" : { "result" : "anyint", "arguments" : [0,0,0], "attributes" : "NoMem" }, ### ``llvm.genx.bfi.`` : bitfield insert ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: first input, any scalar/vector i32 type ### * arg1: second input, same type as arg0 ### * arg2: third input, same type as arg0 ### * arg3: fourth input, same type as arg0 ### ### * Return value: result, same type as arg0 ### "bfi" : { "result" : "anyint", "arguments" : [0,0,0,0], "attributes" : "NoMem" }, ### ``llvm.genx.bfrev.`` : reverse bits ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: first input, any scalar/vector i32 type ### ### * Return value: result, same type as arg0 ### "bfrev" : { "result" : "anyint", "arguments" : [0], "attributes" : "NoMem" }, ### ``llvm.genx.cbit..`` : count set bits ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: first input, any scalar/vector integer type (overloaded) ### ### * Return value: result, int32 of same width as arg0 ### "cbit" : { "result" : "anyint", "arguments" : ["anyint"], "attributes" : "NoMem" }, ### cmp ### ^^^ ### No intrinsic needed as the LLVM IR ICmp and FCmp instructions cover ### vISA functionality ### ### ``llvm.genx.cos.`` : cos instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: input value, any scalar/vector half/float type ### ### * Return value: result, same type ### "cos" : { "result" : "anyfloat", "arguments" : [0], "attributes" : "NoMem" }, ### div ### ^^^ ### No intrinsic needed as the LLVM IR SDiv, UDiv and FDiv instructions ### cover vISA functionality ### ### ``llvm.genx.ieee.div.`` : Divide, IEEE variant ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: first input, any scalar/vector float/double type ### * arg1: second input, same type ### ### * Return value: result, same type ### "ieee_div" : { "result" : "anyfloat", "arguments" : [0,0], "attributes" : "NoMem" }, ### ``llvm.genx.dp2.`` : dp2 instruction (dot product on groups of 4 elements) ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: first input value, any vector float with a multiple of 4 elements ### * arg1: second input value, same type as arg0 ### ### * Return value: result, same type ### "dp2" : { "result" : "anyfloat", "arguments" : [0,0], "attributes" : "NoMem" }, ### ``llvm.genx.dp3.`` : dp3 instruction (dot product on groups of 3 elements) ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: first input value, any vector float with a multiple of 4 elements ### * arg1: second input value, same type as arg0 ### ### * Return value: result, same type ### "dp3" : { "result" : "anyfloat", "arguments" : [0,0], "attributes" : "NoMem" }, ### ``llvm.genx.dp4.`` : dp4 instruction (dot product on groups of 4 elements) ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: first input value, any vector float with a multiple of 4 elements ### * arg1: second input value, same type as arg0 ### ### * Return value: result, same type ### "dp4" : { "result" : "anyfloat", "arguments" : [0,0], "attributes" : "NoMem" }, ### ``llvm.genx.dph.`` : dph instruction (dot product homogenous) ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: first input value, any vector float with a multiple of 4 elements ### * arg1: second input value, same type as arg0 ### ### * Return value: result, same type ### "dph" : { "result" : "anyfloat", "arguments" : [0,0], "attributes" : "NoMem" }, ### ``llvm.genx.exp.`` : base 2 exponent ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: input value, any scalar/vector half/float type ### ### * Return value: result, same type ### "exp" : { "result" : "anyfloat", "arguments" : [0], "attributes" : "NoMem" }, ### ``llvm.genx.*fbh.`` : find bit high ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.sfbh`` : find bit high, signed operand ### * ``llvm.genx.ufbh`` : find bit high, unsigned operand ### ### * arg0: input value, any scalar/vector i32 type ### ### * Return value: result, same type ### "sfbh" : { "result" : "anyint", "arguments" : [0], "attributes" : "NoMem" }, "ufbh" : { "result" : "anyint", "arguments" : [0], "attributes" : "NoMem" }, ### ``llvm.genx.fbl.`` : find bit low ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: input value, any scalar/vector i32 type ### ### * Return value: result, same type ### "fbl" : { "result" : "anyint", "arguments" : [0], "attributes" : "NoMem" }, ### ``llvm.genx.frc.`` : fractional part ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: input value, any scalar/vector float type ### ### * Return value: result, same type ### "frc" : { "result" : "anyfloat", "arguments" : [0], "attributes" : "NoMem" }, ### ``llvm.genx.inv.`` : reciprocal ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: input value, any scalar/vector half/float type ### ### * Return value: result, same type ### "inv" : { "result" : "anyfloat", "arguments" : [0], "attributes" : "NoMem" }, ### ``llvm.genx.line.`` : linear equation ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: first input value, vector float with exactly 4 elements ### * arg1: second input value, vector float with a multiple of 4 elements ### ### * Return value: result, same type as arg1 ### "line" : { "result" : "anyfloat", "arguments" : ["float4",0], "attributes" : "NoMem" }, ### ``llvm.genx.log.`` : base 2 logarithm ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: input value, any scalar/vector half/float type ### ### * Return value: result, same type ### "log" : { "result" : "anyfloat", "arguments" : [0], "attributes" : "NoMem" }, ### ``llvm.genx.lrp.`` : linear interpolation ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: first input value, any vector float with a multiple of 4 elements ### * arg1: second input value, same type as arg0 ### * arg2: third input value, same type as arg0 ### ### * Return value: result, same type ### "lrp" : { "result" : "anyfloat", "arguments" : [0,0,0], "attributes" : "NoMem" }, ### ``llvm.genx.lzd.`` : leading zero detection ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: input value, any scalar/vector i32 type ### ### * Return value: result, same type ### "lzd" : { "result" : "anyint", "arguments" : [0], "attributes" : "NoMem" }, ### ``llvm.genx.*mad..`` : mad instruction, no saturation ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.ssmad`` : result signed, operands signed ### * ``llvm.genx.sumad`` : result signed, operands unsigned ### * ``llvm.genx.usmad`` : result unsigned, operands signed ### * ``llvm.genx.uumad`` : result unsigned, operands unsigned ### ### result := arg0 * arg1 + arg2 ### ### * Return value: result, any scalar or vector integer type with same ### vector width ### ### * arg0: first input, any scalar/vector integer type (not i64) (overloaded) ### * arg1: second input, same type as arg0 ### * arg2: third input, same type as result ### "ssmad" : { "result" : "anyint", "arguments" : ["anyint",1,0], "attributes" : "NoMem" }, "sumad" : { "result" : "anyint", "arguments" : ["anyint",1,0], "attributes" : "NoMem" }, "usmad" : { "result" : "anyint", "arguments" : ["anyint",1,0], "attributes" : "NoMem" }, "uumad" : { "result" : "anyint", "arguments" : ["anyint",1,0], "attributes" : "NoMem" }, ### ``llvm.genx.*mad.sat..`` : mad instruction with saturation ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.ssmad.sat`` : result signed, operands signed ### * ``llvm.genx.sumad.sat`` : result signed, operands unsigned ### * ``llvm.genx.usmad.sat`` : result unsigned, operands signed ### * ``llvm.genx.uumad.sat`` : result unsigned, operands unsigned ### ### result := sat(arg0 * arg1 + arg2) ### ### * Return value: result, any scalar or vector integer type with same ### vector width ### ### * arg0: first input, any scalar/vector integer type (not i64) (overloaded) ### * arg1: second input, same type as arg0 ### * arg2: third input, same type as result ### "ssmad_sat" : { "result" : "anyint", "arguments" : ["anyint",1,0], "attributes" : "NoMem" }, "sumad_sat" : { "result" : "anyint", "arguments" : ["anyint",1,0], "attributes" : "NoMem" }, "usmad_sat" : { "result" : "anyint", "arguments" : ["anyint",1,0], "attributes" : "NoMem" }, "uumad_sat" : { "result" : "anyint", "arguments" : ["anyint",1,0], "attributes" : "NoMem" }, ### ``llvm.genx.*max..`` : max instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.smax`` : result and operands signed ### * ``llvm.genx.umax`` : result and operands unsigned ### * ``llvm.genx.fmax`` : result and operands float ### ### * arg0: first input, any scalar/vector integer/float type, even i64 (overloaded) ### * arg1: second input, same type as arg0 ### ### * Return value: result, any scalar, vector integer/float type with same ### vector width ### ### There is no need for a saturating variant of this intrinsic. ### Because max cannot overflow, a saturating max can be represented ### by this non-saturating max followed by the applicable one of the ### saturating trunc intrinsics. ### "smax" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "umax" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "fmax" : { "result" : "anyfloat", "arguments" : ["anyfloat",1], "attributes" : "NoMem" }, ### ``llvm.genx.*min.`` : min instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.smin.`` : result and operands signed ### * ``llvm.genx.umin.`` : result and operands unsigned ### * ``llvm.genx.fmin.`` : result and operands float ### ### * arg0: first input, any scalar/vector integer/float type, even i64 (overloaded) ### * arg1: second input, same type as arg0 ### ### * Return value: result, any scalar or vector integer/float type with same ### vector width ### ### There is no need for a saturating variant of this intrinsic. ### Because min cannot overflow, a saturating min can be represented ### by this non-saturating min followed by the applicable one of the ### saturating trunc intrinsics. ### "smin" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "umin" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "fmin" : { "result" : "anyfloat", "arguments" : ["anyfloat",1], "attributes" : "NoMem" }, ### mod ### ^^^ ### No intrinsic needed as the LLVM IR SRem, URem and FRem instructions ### cover vISA functionality ### ### imad ### ^^^^ ### ### ``llvm.genx.*imad.<{hi, lo}>.`` : imad instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.simad`` : result and operands signed ### * ``llvm.genx.uimad`` : result and operands unsigned ### ### result := {hi, lo} = arg0 * arg1 + arg2 ### ### * arg0: first input, i32 scalar/vector integer type ### * arg1: second input, same type as arg0 ### * arg2: third input, same type as arg0 ### "simad" : { "result" : ["anyint", "anyint"], "arguments" : [0, 0, 0], "attributes" : "NoMem" }, "uimad" : { "result" : ["anyint", "anyint"], "arguments" : [0, 0, 0], "attributes" : "NoMem" }, ### mul ### ^^^ ### Still need non-saaturating mul intrinsic as def-hoist/copy-prop in jitter ### cannot fully remove the trunc/zext/sext on each operand. ### ### ``llvm.genx.*mul..`` : mul instruction, no saturation ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.ssmul`` : result signed, operands signed, signed ### * ``llvm.genx.sumul`` : result signed, operands signed, unsigned ### * ``llvm.genx.usmul`` : result signed, operands unsigned, signed ### * ``llvm.genx.uumul`` : result signed, operands unsigned, unsigned ### ### result := arg0 * arg1 ### ### * Return value: result, any scalar or vector integer type with same ### vector width ### ### * arg0: first input, any scalar/vector integer type (not i64) (overloaded) ### * arg1: second input, same type as arg0 ### "ssmul" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "sumul" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "usmul" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "uumul" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, ### ``llvm.genx.*mul.sat..`` : mul instruction with saturation ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.ssmul.sat`` : result signed, operands signed ### * ``llvm.genx.sumul.sat`` : result signed, operands unsigned ### * ``llvm.genx.usmul.sat`` : result unsigned, operands signed ### * ``llvm.genx.uumul.sat`` : result unsigned, operands unsigned ### ### * arg0: first input, any scalar/vector integer type (not i64) (overloaded) ### * arg1: second input, same type as arg0 ### ### * Return value: result, any scalar/vector integer type with same ### vector width, even i64 ### ### For an fp mul, use the LLVM IR FMul instruction, followed by ### llvm.genx.sat if saturation is required. ### "ssmul_sat" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "sumul_sat" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "usmul_sat" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "uumul_sat" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, ### ``llvm.genx.*mulh..`` : mulh instruction, no saturation ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.smulh`` : signed ### * ``llvm.genx.umulh`` : unsigned ### ### * arg0: first input, any scalar/vector i32 type (overloaded) ### * arg1: second input, same type as arg0 ### ### * Return value: result, same type as arg0 ### "smulh" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "umulh" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, ### not ### ^^^ ### Intrinsic not needed; use LLVM IR Xor instruction with -1 ### ### or ### ^^ ### Intrinsic not needed; use LLVM IR Or instruction ### ### ``llvm.genx.pln..`` : plane equation ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: first input value, vector float with exactly 4 elements ### * arg1: second input value, vector float with a multiple of 16 elements (overloaded) ### ### * Return value: result, vector float with half as many elements as arg1 ### "pln" : { "result" : "anyfloat", "arguments" : ["float4","anyfloat"], "attributes" : "NoMem" }, ### ``llvm.genx.pow.`` : power ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: first input, any scalar/vector half/float type ### * arg1: second input, same type ### ### * Return value: result, same type ### "pow" : { "result" : "anyfloat", "arguments" : [0,0], "attributes" : "NoMem" }, ### ``llvm.genx.rndd.`` : round down ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: input value, any scalar/vector float type ### ### * Return value: result, same type ### "rndd" : { "result" : "anyfloat", "arguments" : [0], "attributes" : "NoMem" }, ### ``llvm.genx.rnde.`` : round to even ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: input value, any scalar/vector float type ### ### * Return value: result, same type ### "rnde" : { "result" : "anyfloat", "arguments" : [0], "attributes" : "NoMem" }, ### ``llvm.genx.rndu.`` : round up ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: input value, any scalar/vector float type ### ### * Return value: result, same type ### "rndu" : { "result" : "anyfloat", "arguments" : [0], "attributes" : "NoMem" }, ### ``llvm.genx.rndz.`` : round to zero ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: input value, any scalar/vector float type ### ### * Return value: result, same type ### "rndz" : { "result" : "anyfloat", "arguments" : [0], "attributes" : "NoMem" }, ### ``llvm.genx.rsqrt.`` : reciprocal square root ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: input value, any scalar/vector half/float type ### ### * Return value: result, same type ### "rsqrt" : { "result" : "anyfloat", "arguments" : [0], "attributes" : "NoMem" }, ### ``llvm.genx.*sad2..`` : two-wide sum of absolute differences ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.ssad2`` : signed argument and result ### * ``llvm.genx.usad2`` : unsigned argument and result ### ### * arg0: first input, vector of i8, multiple of 2 wide (overloaded) ### * arg1: second input, same type ### ### * Return value: result, vector of i16 of same vector width ### "ssad2" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "usad2" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, ### ``llvm.genx.*sad2add..`` : two-wide sum of absolute differences and add ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.sssad2add`` : signed result and args ### * ``llvm.genx.uusad2add`` : unsigned result and args ### * ``llvm.genx.ussad2add`` : unsigned result and signed args ### * ``llvm.genx.susad2add`` : signed result and unsigned args ### ### * arg0: first input, vector of i8, multiple of 2 wide (overloaded) ### * arg1: second input, same type ### * arg2: third input, vector of i16 of same vector width ### ### * Return value: result, same type as arg2 ### "sssad2add" : { "result" : "anyint", "arguments" : ["anyint",1,0], "attributes" : "NoMem" }, "uusad2add" : { "result" : "anyint", "arguments" : ["anyint",1,0], "attributes" : "NoMem" }, "ussad2add" : { "result" : "anyint", "arguments" : ["anyint",1,0], "attributes" : "NoMem" }, "susad2add" : { "result" : "anyint", "arguments" : ["anyint",1,0], "attributes" : "NoMem" }, ### ``llvm.genx.*sad2add.sat..`` : two-wide sum of absolute differences and add, saturated ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.sssad2add.sat`` : signed result and args ### * ``llvm.genx.uusad2add.sat`` : unsigned result and args ### * ``llvm.genx.ussad2add.sat`` : unsigned result and signed args ### * ``llvm.genx.susad2add.sat`` : signed result and unsigned args ### ### * arg0: first input, vector of i8, multiple of 2 wide (overloaded) ### * arg1: second input, same type ### * arg2: third input, vector of i16 of same vector width ### ### * Return value: result, same type as arg2 ### "sssad2add_sat" : { "result" : "anyint", "arguments" : ["anyint",1,0], "attributes" : "NoMem" }, "uusad2add_sat" : { "result" : "anyint", "arguments" : ["anyint",1,0], "attributes" : "NoMem" }, "ussad2add_sat" : { "result" : "anyint", "arguments" : ["anyint",1,0], "attributes" : "NoMem" }, "susad2add_sat" : { "result" : "anyint", "arguments" : ["anyint",1,0], "attributes" : "NoMem" }, ### ``llvm.genx.*shl..`` : shl instruction, no saturation ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.ssshl`` : result signed, operands signed ### * ``llvm.genx.sushl`` : result signed, operands unsigned ### * ``llvm.genx.usshl`` : result unsigned, operands signed ### * ``llvm.genx.uushl`` : result unsigned, operands unsigned ### ### * arg0: first input, any scalar/vector integer type, even i64 (overloaded) ### * arg1: second input, same type as arg0 ### ### * Return value: result, any scalar or vector integer type with same ### vector width, even i64 ### "ssshl" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "sushl" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "usshl" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "uushl" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, ### ``llvm.genx.*shl.sat..`` : shl instruction with saturation ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.ssshl.sat`` : result signed, operands signed ### * ``llvm.genx.sushl.sat`` : result signed, operands unsigned ### * ``llvm.genx.usshl.sat`` : result unsigned, operands signed ### * ``llvm.genx.uushl.sat`` : result unsigned, operands unsigned ### ### * arg0: first input, any scalar/vector integer type, even i64 (overloaded) ### * arg1: second input, same type as arg0 ### ### * Return value: result, any scalar/vector integer type with same ### vector width, even i64 ### "ssshl_sat" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "sushl_sat" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "usshl_sat" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "uushl_sat" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, ### shr ### ^^^ ### Intrinsic is not needed. Because shr cannot overflow, an shr that ### saturates with a smaller result type than the execution type can be ### represented by an LLVM IR Shr instruction then an llvm.genx.sstrunc.sat. ### ### ``llvm.genx.ro*..`` : rol and ror instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.rol`` : rotate left ### * ``llvm.genx.ror`` : rotate right ### ### * arg0: first input, any scalar/vector integer type (even i64) (overloaded) ### * arg1: second input, same type as arg0 ### ### * Return value: result, any scalar or vector integer type with same ### vector width (even i64) ### "rol" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, "ror" : { "result" : "anyint", "arguments" : ["anyint",1], "attributes" : "NoMem" }, ### ``llvm.genx.sin.`` : reciprocal square root ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: input value, any scalar/vector half/float type ### ### * Return value: result, same type ### "sin" : { "result" : "anyfloat", "arguments" : [0], "attributes" : "NoMem" }, ### ``llvm.genx.sqrt.`` : reciprocal square root ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: input value, any scalar/vector half/float type ### ### * Return value: result, same type ### "sqrt" : { "result" : "anyfloat", "arguments" : [0], "attributes" : "NoMem" }, ### ``llvm.genx.ieee.sqrt.`` : reciprocal square root, IEEE variant ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: input value, any scalar/vector float/double type ### ### * Return value: result, same type ### "ieee_sqrt" : { "result" : "anyfloat", "arguments" : [0], "attributes" : "NoMem" }, ### ``llvm.genx.dpas...`` : dpas instruction (Dot Product Accumulate Systolic) ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: accumulator, vector integer/float type ### * arg1: src1 (W), vector integer/float type (overloaded) ### * arg2: src2 (A), vector integer/float type (overloaded) ### * arg3: integer, encodes informatioin about the operation type ### ### * Return value: result, same type as arg0 ### "dpas" : { "result" : "anyvector", "arguments" : [0,"anyvector","anyvector","int"], "attributes" : "NoMem", "platforms" : [ "XeHP+", "~XeLPG", "~XeHPCVG" ], }, ### ``llvm.genx.dpas2....`` : dpas instruction (Dot Product Accumulate Systolic) ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: accumulator first input value, vector integer/float type ### * arg1: src1 input value, vector integer/float type ### * arg2: src2 fourth input value, integer type ### * arg3: int information of src1 PresisionType ### * arg4: int information of src2 PresisionType ### * arg5: int SystolicDepth, must be a constant, the only supported value is 8 ### * arg6: int RepeatCount, must be a constant in range [1, 8] ### * arg7: int sign dst( 0 - unsigned, 1 sign) ### * arg8: int sign src0 ### ### * Return value: result ### ### The src1 and src2 PrecisionType arguments should be enum values defined as follows: ### ### +---------------+-------+-------------------------------------------------+ ### | PrecisionType | Value | Description | ### +---------------+-------+-------------------------------------------------+ ### | S2 | 3 | 2-bit signed integer | ### | U2 | 4 | 2-bit unsigned integer | ### | S4 | 5 | 4-bit signed integer | ### | U4 | 6 | 4-bit unsigned integer | ### | S8 | 7 | 8-bit signed integer | ### | U8 | 8 | 8-bit unsigned integer | ### | BF16 | 9 | bfloat16 (S1E8M7) floating point | ### | HF16 | 10 | half-precision (S1E5M10) floating point | ### | TF32 | 12 | tensorfloat32 (S1E8M10) floating point | ### +---------------+-------+-------------------------------------------------+ ### ### "dpas2" : { "result" : "anyvector", "arguments" : ["anyvector","anyvector","anyvector","int","int", "int", "int", "int", "int"], "attributes" : "NoMem", "platforms" : [ "XeHP+", "~XeLPG", "~XeHPCVG" ], }, ### ``llvm.genx.dpas.nosrc0...`` : dpas instruction (Dot Product Accumulate Systolic) with no src0 ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: second input value, vector integer/float type (overloaded) ### * arg1: third input value, vector integer/float type (overloaded) ### * arg2: fourth input value, integer type ### ### * Return value: result ### "dpas_nosrc0" : { "result" : "anyvector", "arguments" : ["anyvector","anyvector","int"], "attributes" : "NoMem", "platforms" : [ "XeHP+", "~XeLPG", "~XeHPCVG" ], }, ### ``llvm.genx.dpasw...`` : dpasw instruction (Dot Product Accumulate Systolic) ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: first input value, vector integer/float type ### * arg1: second input value, vector integer/float type (overloaded) ### * arg2: third input value, vector integer/float type (overloaded) ### * arg3: fourth input value, integer type ### ### * Return value: result, same type as arg0 ### "dpasw" : { "result" : "anyvector", "arguments" : [0,"anyvector","anyvector","int"], "attributes" : "NoMem", "platforms" : [ "XeHP", "XeHPG", "XeLPGPlus" ], }, ### ``llvm.genx.dpasw.nosrc0...`` : dpasw instruction (Dot Product Accumulate Systolic) with no src0 ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: second input value, vector integer/float type (overloaded) ### * arg1: third input value, vector integer/float type (overloaded) ### * arg2: fourth input value, integer type ### ### * Return value: result ### "dpasw_nosrc0" : { "result" : "anyvector", "arguments" : ["anyvector","anyvector","int"], "attributes" : "NoMem", "platforms" : [ "XeHP", "XeHPG", "XeLPGPlus" ], }, ### ``llvm.genx.*dp4a*....`` : dp4a instruction (Dot Product 4 Accumulate) ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.ssdp4a`` : result signed, operands signed ### * ``llvm.genx.sudp4a`` : result signed, operands unsigned ### * ``llvm.genx.usdp4a`` : result unsigned, operands signed ### * ``llvm.genx.uudp4a`` : result unsigned, operands unsigned ### * ``llvm.genx.ssdp4a_sat`` : result signed saturated, operands signed ### * ``llvm.genx.sudp4a_sat`` : result signed satruated, operands unsigned ### * ``llvm.genx.usdp4a_sat`` : result unsigned saturated, operands signed ### * ``llvm.genx.uudp4a_sat`` : result unsigned saturated, operands unsigned ### ### ### * arg0: first input value, vector integer type (overloaded) ### * arg1: second input value, vector integer type (overloaded) ### * arg2: third input value, vector integer type (overloaded) ### ### * Return value: result, vector integer type ### "ssdp4a" : { "result" : "anyvector", "arguments" : ["anyvector","anyvector","anyvector"], "attributes" : "NoMem" }, "sudp4a" : { "result" : "anyvector", "arguments" : ["anyvector","anyvector","anyvector"], "attributes" : "NoMem" }, "usdp4a" : { "result" : "anyvector", "arguments" : ["anyvector","anyvector","anyvector"], "attributes" : "NoMem" }, "uudp4a" : { "result" : "anyvector", "arguments" : ["anyvector","anyvector","anyvector"], "attributes" : "NoMem" }, "ssdp4a_sat" : { "result" : "anyvector", "arguments" : ["anyvector","anyvector","anyvector"], "attributes" : "NoMem" }, "sudp4a_sat" : { "result" : "anyvector", "arguments" : ["anyvector","anyvector","anyvector"], "attributes" : "NoMem" }, "usdp4a_sat" : { "result" : "anyvector", "arguments" : ["anyvector","anyvector","anyvector"], "attributes" : "NoMem" }, "uudp4a_sat" : { "result" : "anyvector", "arguments" : ["anyvector","anyvector","anyvector"], "attributes" : "NoMem" }, ### addc ### ^^^^ ### ### ``llvm.genx.addc.<{carry, add}>.`` : add with carry ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.addc`` : ### ### * arg0: first input, i32 scalar/vector integer type ### * arg1: second input, same type as arg0 "addc" : { "result" : ["anyint", "anyint"], "arguments" : [0, 0], "attributes" : "NoMem" }, ### subb ### ^^^^ ### ### ``llvm.genx.subb.<{borrow, sub}>.`` : sub with borrow ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.subb`` : ### ### * arg0: first input, i32 scalar/vector integer type ### * arg1: second input, same type as arg0 "subb" : { "result" : ["anyint", "anyint"], "arguments" : [0, 0], "attributes" : "NoMem" }, ### add3 ### ^^^^ ### ### ``llvm.genx.*add3..`` : add3 instruction without saturation ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.add3`` : ### ### * arg0: first input, any scalar/vector integer type, i16/i32 (overloaded) ### * arg1: second input, same type as arg0 ### * arg2: third input, same type as arg0 "add3" : { "result" : "anyint", "arguments" : ["anyint",1,1], "attributes" : "NoMem" }, ### ``llvm.genx.*add3.sat..`` : add3 instruction with saturation ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.ssadd3.sat`` : result signed, operands signed ### * ``llvm.genx.suadd3.sat`` : result signed, operands unsigned ### * ``llvm.genx.usadd3.sat`` : result unsigned, operands signed ### * ``llvm.genx.uuadd3.sat`` : result unsigned, operands unsigned ### ### * arg0: first input, any scalar/vector integer type, i16/i32 (overloaded) ### * arg1: second input, same type as arg0 ### * arg2: third input, same type as arg0 ### ### * Return value: result, any scalar or vector integer type with same ### vector width ### "ssadd3_sat" : { "result" : "anyint", "arguments" : ["anyint",1,1], "attributes" : "NoMem" }, "suadd3_sat" : { "result" : "anyint", "arguments" : ["anyint",1,1], "attributes" : "NoMem" }, "usadd3_sat" : { "result" : "anyint", "arguments" : ["anyint",1,1], "attributes" : "NoMem" }, "uuadd3_sat" : { "result" : "anyint", "arguments" : ["anyint",1,1], "attributes" : "NoMem" }, ### add3c ### ^^^^^ ### ### ``llvm.genx.add3c.<{carry, add3}>.`` : add3 with carry ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.add3c`` : ### ### * arg0: first input, i32 scalar/vector integer type ### * arg1: second input, same type as arg0 ### * arg2: third input, same type as arg0 "add3c" : { "result" : ["anyint", "intvector"], "arguments" : ["anyint",1,1,1], "attributes" : "NoMem" }, ### bfn ### ^^^ ### ### ``llvm.genx.bfn..`` : bfn instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.bfn`` : ### ### * arg0: first input, any scalar/vector integer type, i16/i32 (overloaded) ### * arg1: second input, same type as arg0 ### * arg2: third input, same type as arg0 ### * arg3: fourth input, byte, constant "bfn" : { "result" : "anyint", "arguments" : ["anyint",1,1,"char"], "attributes" : "NoMem" }, ### srnd ### ^^^ ### ### ``llvm.genx.srnd...`` : srnd instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.srnd`` : ### ### * arg0: first input, any vector f32/hf16 type ### * arg1: second input, same type as arg0 ### * Return value: result, must be half if arg0 is f32, or ub if arg0 is half. "srnd" : { "result" : "anyvector", "arguments" : ["anyvector", "anyvector"], "attributes" : "NoMem" }, ### ``llvm.genx.biased.rounding.bf8.*`` : biased rounding instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: input, vNf16 ### * arg1: bias, vNi8 ### ### * Return value: result, vNi8 ### ### "biased_rounding_bf8" : { "result" : "anyint", "arguments" : ["anyvector", 0], "attributes" : "NoMem", "platforms" : "Xe3+" }, ### bf_cvt ### ^^^^^^ ### ### ``llvm.genx.bf.cvt..`` : bf_cvt instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.bf.cvt`` : ### ### * arg0: first input, any scalar/vector bf/float type (overloaded) ### ### * Return value: result, must be float if arg0 is half, or half if arg0 is float. ### "bf_cvt" : { "result" : "anyfloat", "arguments" : ["anyfloat"], "attributes" : "NoMem" }, ### tf32_cvt ### ^^^^^^ ### ### ``llvm.genx.tf32.cvt..`` : tf32_cvt instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.tf32.cvt`` : ### ### * arg0: first input, vector float type fp32/hf16 ### ### * Return value: result, must be ud( Unsigned Doubleword) ### "tf32_cvt" : { "result" : "anyvector", "arguments" : ["anyvector"], "attributes" : "NoMem" }, ### qf_cvt ### ^^^^^^ ### ### ``llvm.genx.qf.cvt..`` : qf_cvt instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.qf.cvt`` : ### ### * arg0: first input, any scalar/vector i8/half type (overloaded) ### ### * Return value: result, must be i8 if arg0 is half, or half if arg0 is i8. ### "qf_cvt" : { "result" : "anyvector", "arguments" : ["anyvector"], "attributes" : "NoMem", "platforms" : "Xe3+" }, ### hf8_cvt ### ^^^^^^ ### ### ``llvm.genx.hf8.cvt..`` : hf8_cvt instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.hf8.cvt`` : hf8<->half conversion ### ### * arg0: first input, any scalar/vector i8/half type (overloaded) ### ### * Return value: result, must be i8 if arg0 is half, or half if arg0 is i8. ### "hf8_cvt" : { "result" : "anyvector", "arguments" : ["anyvector"], "attributes" : "NoMem", "platforms" : "Xe3+" }, ### ``llvm.genx.lsc.load.*...`` : lsc_load instructions ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * ``llvm.genx.lsc.load.slm`` : ### * ``llvm.genx.lsc.load.bti`` : ### * ``llvm.genx.lsc.load.stateless`` : ### * ``llvm.genx.lsc.prefetch.bti`` : ### * ``llvm.genx.lsc.prefetch.stateless`` : ### ### * Exec_size ignored unless operation is transposed (DataOrder == Tranpose) ### * arg0: {1,32}Xi1 predicate (overloaded) ### * arg1: i8 Subopcode, [MBZ] ### * arg2: i8 Caching behavior for L1, [MBC] ### * arg3: i8 Caching behavior for L3, [MBC] ### * arg4: i16 Address scale, [MBC] ### * arg5: i32 Immediate offset added to each address, [MBC] ### * arg6: i8 The dataum size, [MBC] ### * arg7: i8 Number of elements to load per address (vector size), [MBC] ### * arg8: i8 Indicates if the data is transposed during the transfer, [MBC] ### * arg9: i8 Channel mask for quad versions, [MBC] ### * arg10: {1,32}Xi{16,32,64} The vector register holding offsets (overloaded) ### for flat version Base Address + Offset[i] goes here ### * arg11: i32 surface to use for this operation. This can be an immediate or a register ### for flat and bindless version pass zero here ### ### * Return value: the value read or void for prefetch ### ### Cache mappings are: ### ### - 0 -> .df (default) ### - 1 -> .uc (uncached) ### - 2 -> .ca (cached) ### - 3 -> .wb (writeback) ### - 4 -> .wt (writethrough) ### - 5 -> .st (streaming) ### - 6 -> .ri (read-invalidate) ### - 7 -> .cc (const-cached) ### ### Only certain combinations of CachingL1 with CachingL3 are valid on hardware. ### ### +---------+-----+-----------------------------------------------------------------------+ ### | L1 | L3 | Notes | ### +---------+-----+-----------------------------------------------------------------------+ ### | .df | .df | default behavior on both L1 and L3 (L3 uses MOCS settings) | ### +---------+-----+-----------------------------------------------------------------------+ ### | .uc | .uc | uncached (bypass) both L1 and L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .st | .uc | streaming L1 / bypass L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .uc | .ca | bypass L1 / cache in L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .ca | .uc | cache in L1 / bypass L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .ca | .ca | cache in both L1 and L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .st | .ca | streaming L1 / cache in L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .ri | .ca | read-invalidate (e.g. last-use) on L1 loads / cache in L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### ### Xe2 caching combinations ### ### +---------+-----+-----------------------------------------------------------------------+ ### | L1 | L3 | Notes | ### +---------+-----+-----------------------------------------------------------------------+ ### | .df | .df | default behavior on both L1 and L3 (L3 uses MOCS settings) | ### +---------+-----+-----------------------------------------------------------------------+ ### | .uc | .uc | uncached (bypass) both L1 and L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .st | .uc | streaming L1 / bypass L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .uc | .ca | bypass L1 / cache in L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .ca | .uc | cache in L1 / bypass L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .ca | .ca | cache in both L1 and L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .st | .ca | streaming L1 / cache in L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .ca | .cc | cache in L1 / L3 cached as constant | ### +---------+-----+-----------------------------------------------------------------------+ ### | .uc | .cc | bypass L1 / L3 cached as constant | ### +---------+-----+-----------------------------------------------------------------------+ ### | .ri | .ri | read-invalidate both L1 and L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### ### Immediate offset. The compiler may be able to fuse this add into the message, otherwise ### additional instructions are generated to honor the semantics. ### ### Dataum size mapping is ### ### - 1 = :u8 ### - 2 = :u16 ### - 3 = :u32 ### - 4 = :u64 ### - 5 = :u8u32 (load 8b, zero extend to 32b; store the opposite), ### - 6 = :u16u32 (load 8b, zero extend to 32b; store the opposite), ### - 7 = :u16u32h (load 16b into high 16 of each 32b; store the high 16) ### "lsc_load_slm" : { "result" : "anyvector", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","int"], "attributes" : "ReadMem" }, "lsc_load_stateless" : { "result" : "anyvector", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","int"], "attributes" : "ReadMem" }, "lsc_load_bindless" : { "result" : "anyvector", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","int"], "attributes" : "ReadMem" }, "lsc_load_bti" : { "result" : "anyvector", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","int"], "attributes" : "ReadMem" }, "lsc_prefetch_slm" : { "result" : "void", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","int"], "attributes" : "None" }, "lsc_prefetch_bti" : { "result" : "void", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","int"], "attributes" : "None" }, "lsc_prefetch_stateless" : { "result" : "void", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","int"], "attributes" : "None" }, "lsc_prefetch_bindless" : { "result" : "void", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","int"], "attributes" : "None" }, "lsc_load_quad_slm" : { "result" : "anyvector", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","int"], "attributes" : "ReadMem" }, "lsc_load_quad_stateless" : { "result" : "anyvector", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","int"], "attributes" : "ReadMem" }, "lsc_load_quad_bindless" : { "result" : "anyvector", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","int"], "attributes" : "ReadMem" }, "lsc_load_quad_bti" : { "result" : "anyvector", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","int"], "attributes" : "ReadMem" }, ### ``llvm.genx.lsc.load.merge.*...`` : lsc_load merge instructions ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * ``llvm.genx.lsc.load.merge.slm`` : ### * ``llvm.genx.lsc.load.merge.bti`` : ### * ``llvm.genx.lsc.load.merge.stateless`` : ### ### * Exec_size ignored unless operation is transposed (DataOrder == Tranpose) ### * arg0: {1,32}Xi1 predicate (overloaded) ### * arg1: i8 Subopcode, [MBZ] ### * arg2: i8 Caching behavior for L1, [MBC] ### * arg3: i8 Caching behavior for L3, [MBC] ### * arg4: i16 Address scale, [MBC] ### * arg5: i32 Immediate offset added to each address, [MBC] ### * arg6: i8 The dataum size, [MBC] ### * arg7: i8 Number of elements to load per address (vector size), [MBC] ### * arg8: i8 Indicates if the data is transposed during the transfer, [MBC] ### * arg9: i8 Channel mask for quad versions, [MBC] ### * arg10: {1,32}Xi{16,32,64} The vector register holding offsets (overloaded) ### for flat version Base Address + Offset[i] goes here ### * arg11: i32 surface to use for this operation. This can be an immediate or a register ### for flat and bindless version pass zero here ### * arg12: The data to merge disable channels ### ### * Return value: the value read merged witg arg12 by predicate ### ### Cache mappings are: ### ### - 0 -> .df (default) ### - 1 -> .uc (uncached) ### - 2 -> .ca (cached) ### - 3 -> .wb (writeback) ### - 4 -> .wt (writethrough) ### - 5 -> .st (streaming) ### - 6 -> .ri (read-invalidate) ### - 7 -> .cc (const-cached) ### ### Only certain combinations of CachingL1 with CachingL3 are valid on hardware. ### ### +---------+-----+-----------------------------------------------------------------------+ ### | L1 | L3 | Notes | ### +---------+-----+-----------------------------------------------------------------------+ ### | .df | .df | default behavior on both L1 and L3 (L3 uses MOCS settings) | ### +---------+-----+-----------------------------------------------------------------------+ ### | .uc | .uc | uncached (bypass) both L1 and L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .st | .uc | streaming L1 / bypass L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .uc | .ca | bypass L1 / cache in L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .ca | .uc | cache in L1 / bypass L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .ca | .ca | cache in both L1 and L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .st | .ca | streaming L1 / cache in L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .ri | .ca | read-invalidate (e.g. last-use) on L1 loads / cache in L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### ### Xe2 caching combinations - same as above w/o(".ri.ca") plus next variants: ### ### +---------+-----+-----------------------------------------------------------------------+ ### | .ca | .cc | cache in L1 / L3 cached as constant | ### +---------+-----+-----------------------------------------------------------------------+ ### | .uc | .cc | bypass L1 / L3 cached as constant | ### +---------+-----+-----------------------------------------------------------------------+ ### | .ri | .ri | read-invalidate both L1 and L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### ### Immediate offset. The compiler may be able to fuse this add into the message, otherwise ### additional instructions are generated to honor the semantics. ### Alternative variant for predicated variant of loads - merge destination for disabled ### lanes with values from additional input(arg12) ### ### Dataum size mapping is ### ### - 1 = :u8 ### - 2 = :u16 ### - 3 = :u32 ### - 4 = :u64 ### - 5 = :u8u32 (load 8b, zero extend to 32b; store the opposite), ### - 6 = :u16u32 (load 8b, zero extend to 32b; store the opposite), ### - 7 = :u16u32h (load 16b into high 16 of each 32b; store the high 16) ### "lsc_load_merge_slm" : { "result" : "anyvector", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","int",0], "attributes" : "ReadMem" }, "lsc_load_merge_stateless" : { "result" : "anyvector", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","int",0], "attributes" : "ReadMem" }, "lsc_load_merge_bindless" : { "result" : "anyvector", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","int",0], "attributes" : "ReadMem" }, "lsc_load_merge_bti" : { "result" : "anyvector", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","int",0], "attributes" : "ReadMem" }, "lsc_load_merge_quad_slm" : { "result" : "anyvector", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","int",0], "attributes" : "ReadMem" }, "lsc_load_merge_quad_stateless" : { "result" : "anyvector", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","int",0], "attributes" : "ReadMem" }, "lsc_load_merge_quad_bindless" : { "result" : "anyvector", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","int",0], "attributes" : "ReadMem" }, "lsc_load_merge_quad_bti" : { "result" : "anyvector", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","int",0], "attributes" : "ReadMem" }, ### ``llvm.genx.lsc.store.*...`` : lsc_store instructions ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * ``llvm.genx.lsc.store.slm`` : ### * ``llvm.genx.lsc.store.bti`` : ### * ``llvm.genx.lsc.store.stateless`` : ### ### * Exec_size ignored unless operation is transposed (DataOrder == Tranpose) ### * arg0: {1,32}Xi1 predicate(overloaded) ### * arg1: i8 Subopcode, [MBZ] ### * arg2: i8 Caching behavior for L1, [MBC] ### * arg3: i8 Caching behavior for L3, [MBC] ### * arg4: i16 Address scale, [MBC] ### * arg5: {1,32}Xi32 Immediate offset added to each address, [MBC] ### * arg6: i8 The dataum size, [MBC] ### * arg7: i8 Number of elements to load per address (vector size), [MBC] ### * arg8: i8 Indicates if the data is transposed during the transfer, [MBC] ### * arg9: i8 Channel mask for quad version, [MBC] ### * arg10: {1,32}Xi{16,32,64} The vector register holding offsets (overloaded) ### for flat version Base Address + Offset[i] goes here ### * arg11: VXi{16,32,64} The data to write (overloaded) ### * arg12: i32 surface to use for this operation. This can be an immediate or a register ### for flat and bindless version pass zero here ### ### * Return value: void ### ### Cache mappings are: ### ### - 0 -> .df (default) ### - 1 -> .uc (uncached) ### - 2 -> .ca (cached) ### - 3 -> .wb (writeback) ### - 4 -> .wt (writethrough) ### - 5 -> .st (streaming) ### - 6 -> .ri (read-invalidate) ### ### Only certain combinations of CachingL1 with CachingL3 are valid on hardware. ### ### +---------+-----+-----------------------------------------------------------------------+ ### | L1 | L3 | Notes | ### +---------+-----+-----------------------------------------------------------------------+ ### | .df | .df | default behavior on both L1 and L3 (L3 uses MOCS settings) | ### +---------+-----+-----------------------------------------------------------------------+ ### | .uc | .uc | uncached (bypass) both L1 and L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .st | .uc | streaming L1 / bypass L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .uc | .wb | bypass L1/ writeback L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .wt | .uc | writethrough L1 / bypass L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .wt | .wb | writethrough L1 / writeback L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .st | .wb | streaming L1 / writeback L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### | .wb | .wb | writeback both L1 and L3 | ### +---------+-----+-----------------------------------------------------------------------+ ### "lsc_store_slm" : { "result" : "void", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","anyvector","int"], "attributes" : "None" }, "lsc_store_stateless" : { "result" : "void", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","anyvector","int"], "attributes" : "None" }, "lsc_store_bindless" : { "result" : "void", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","anyvector","int"], "attributes" : "None" }, "lsc_store_bti" : { "result" : "void", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","anyvector","int"], "attributes" : "None" }, "lsc_store_quad_slm" : { "result" : "void", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","anyvector","int"], "attributes" : "None" }, "lsc_store_quad_stateless" : { "result" : "void", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","anyvector","int"], "attributes" : "None" }, "lsc_store_quad_bindless" : { "result" : "void", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","anyvector","int"], "attributes" : "None" }, "lsc_store_quad_bti" : { "result" : "void", "arguments" : ["any","char","char","char","short","int","char","char","char","char","any","anyvector","int"], "attributes" : "None" }, ### ``llvm.genx.lsc.*2d.stateless.[return type]..
`` : 2d stateless load/prefecth instructions ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * ``llvm.genx.lsc.load2d.stateless...
`` : ### * ``llvm.genx.lsc.prefetch2d.stateless..
`` : ### ### * Exec_size ignored unless operation is transposed (DataOrder == Tranpose) ### * arg0: {1,32}Xi1 predicate (overloaded) ### * arg1: i8 Caching behavior for L1, [MBC] ### * arg2: i8 Caching behavior for L3, [MBC] ### * arg3: i8 The dataum size, [MBC] ### * arg4: i8 Indicates if the data is transposed during the transfer, [MBC] ### * arg5: i8 number of blocks, [MBC] ### * arg6: i32 BlockWidth, [MBC] ### * arg7: i32 BlockHeight, [MBC] ### * arg8: i8 VNNI. This performs a VNNI transform during the access. ### * arg9: i32/i64 surface base address for this operation. ### * arg10: i32 surface width minus 1. ### * arg11: i32 surface height minus 1. ### * arg12: i32 surface pitch minus 1. ### * arg13: i32 Src0AddrX, the base X position of the 2D region to load or store. ### * arg14: i32 Src0AddrY, the base Y position of the 2D region to load or store. ### ### * Return value: the value read or void for prefetch ### "lsc_load2d_stateless" : { "result" : "anyvector", "arguments" : ["anyvector","char","char","char","char","char","short","short","char","anyint","int","int","int","int","int"], "attributes" : "ReadMem" }, "lsc_prefetch2d_stateless" : { "result" : "void", "arguments" : ["anyvector","char","char","char","char","char","short","short","char","anyint","int","int","int","int","int"], "attributes" : "None" }, ## ``llvm.genx.lsc.store2d.stateless..
.`` : 2d stateless store ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * Exec_size ignored unless operation is transposed (DataOrder == Tranpose) ### * arg0: {1,32}Xi1 predicate (overloaded) ### * arg1: i8 Caching behavior for L1, [MBC] ### * arg2: i8 Caching behavior for L3, [MBC] ### * arg3: i8 The dataum size, [MBC] ### * arg4: i8 Indicates if the data is transposed during the transfer, [MBC] ### * arg5: i8 number of blocks, [MBC] ### * arg7: i32 BlockWidth, [MBC] ### * arg6: i32 BlockHeight, [MBC] ### * arg8: i8 VNNI. This performs a VNNI transform during the access. ### * arg9: i32/i64 surface base address for this operation. ### * arg10: i32 surface width minus 1. ### * arg11: i32 surface height minus 1. ### * arg12: i32 surface pitch minus 1. ### * arg13: i32 Src0AddrX, the base X position of the 2D region to load or store. ### * arg14: i32 Src0AddrY, the base Y position of the 2D region to load or store. ### * arg15: data to write (overloaded) ### ### * Return value: void ### "lsc_store2d_stateless" : { "result" : "void", "arguments" : ["anyvector","char","char","char","char","char","short","short","char","anyint","int","int","int","int","int","anyvector"], "attributes" : "None" }, ### ``llvm.genx.lsc.*.2d.ugm.desc.*`` : 2d block load/store/prefetch instructions ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i1, Predicate ### * arg1: vNi8, Cache controls, where N is the number of supported cache levels [MBC] ### * arg2: i8, Number of blocks [MBC] ### * arg3: i16, Block width (in elements) [MBC] ### * arg4: i16, Block height [MBC] ### * arg5: v16i32 Matrix descriptor ### * arg6: i32, Memory block X immediate offset (in elements) [MBC] ### * arg7: i32, Memory block Y immediate offset [MBC] ### * arg8: value to passthru when predicate is false on load, ### or value to write on store, ### or dummy value for prefetch to deduce the matrix element type ### ### * Return value: the value read or void ### ### The matrix descriptor is a 16-element vector that describes the 2D block layout in memory. ### The descriptor layout is as follows: ### desc[0]: low 32 bits of the base address ### desc[1]: high 32 bits of the base address ### desc[2]: matrix width in bytes, minus 1 ### desc[3]: matrix height, minus 1 ### desc[4]: matrix pitch in bytes, minus 1 ### desc[5]: block start X in elements, signed ### desc[6]: block start Y in rows, signed ### desc[7]: block size encoded as follows: ### (block_width - 1) | ((block_height - 1) << 8) | ((number_of_blocks - 1) << 16) ### desc[8-15]: reserved ### "lsc_load_2d_ugm_desc" : { "result" : "anyvector", "arguments" : [ "bool", # i1, predicate "anyvector", # cache controls "char", # number of blocks "short", # block width "short", # block height "int16", # matrix descriptor "int", # X offset "int", # Y offset 0, # value to passthru when predicate is false ], "attributes" : "ReadMem", "platforms" : "XeHPC+", }, "lsc_load_2d_ugm_desc_transpose" : { "result" : "anyvector", "arguments" : [ "bool", # i1, predicate "anyvector", # cache controls "char", # number of blocks "short", # block width "short", # block height "int16", # matrix descriptor "int", # X offset "int", # Y offset 0, # value to passthru when predicate is false ], "attributes" : "ReadMem", "platforms" : "XeHPC+", }, "lsc_load_2d_ugm_desc_vnni" : { "result" : "anyvector", "arguments" : [ "bool", # i1, predicate "anyvector", # cache controls "char", # number of blocks "short", # block width "short", # block height "int16", # matrix descriptor "int", # X offset "int", # Y offset 0, # value to passthru when predicate is false ], "attributes" : "ReadMem", "platforms" : "XeHPC+", }, "lsc_prefetch_2d_ugm_desc" : { "result" : "void", "arguments" : [ "bool", # i1, predicate "anyvector", # cache controls "char", # number of blocks "short", # block width "short", # block height "int16", # matrix descriptor "int", # X offset "int", # Y offset "anyvector", # dummy value, only element type is used ], "attributes" : "SideEffects", "platforms" : "XeHPC+", }, "lsc_store_2d_ugm_desc" : { "result" : "void", "arguments" : [ "bool", # i1, predicate "anyvector", # cache controls "char", # number of blocks "short", # block width "short", # block height "int16", # matrix descriptor "int", # X offset "int", # Y offset "anyvector", # value to store ], "attributes" : "WriteMem", "platforms" : "XeHPC+", }, ### ``llvm.genx.lsc.*.quad.typed.bti...
``: LSC typed load/store/prefetch ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: vXi1 predicate (overloaded) ### * arg1: i8 Caching behavior for L1, [MBC] ### * arg2: i8 Caching behavior for L3, [MBC] ### * arg3: i8 Channel mask for quad versions, [MBC] ### * arg4: i32 Surface index (BTI) ### * arg5: vxi32 U pixel address (overloaded) ### * arg6: vxi32 V pixel address ### * arg7: vxi32 R pixel address ### * arg8: vxi32 LOD pixel address ### * arg9: vXi32 or vXf32 old value of the data read (load) or data to write (store) ### ### * Return value: the value read or void ### "lsc_load_merge_quad_typed_bti": { "result": "anyvector", "arguments": ["anyvector", "char", "char", "char", "int", "anyvector", 2, 2, 2, 0], "attributes": "ReadMem", "platforms" : "Xe2+", }, "lsc_store_quad_typed_bti": { "result": "void", "arguments": ["anyvector", "char", "char", "char", "int", "anyvector", 1, 1, 1, "anyvector"], "attributes": "WriteMem", "platforms" : "Xe2+", }, "lsc_prefetch_quad_typed_bti": { "result": "void", "arguments": ["anyvector", "char", "char", "char", "int", "anyvector", 1, 1, 1], "attributes": "None", "platforms" : "Xe2+", }, ### ``llvm.genx.lsc.*2d.typed.bti.[return type/vector type]`` : LSC typed 2d block load/store/prefetch ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * arg0: i8 Caching behavior for L1 ### * arg1: i8 Caching behavior for L3 ### * arg2: i32 Surface ### * arg3: i32 BlockHeight ### * arg4: i32 BlockWidth ### * arg5: i32 XOff ### * arg6: i32 YOff ### * arg7: data to write (store only) ### ### * Return value: the value read or void ### "lsc_load2d_typed_bti":{ "result" : "anyvector", "arguments" : ["char", "char", "int", "int", "int", "int", "int"], "attributes" : "ReadMem", "platforms" : "Xe2+", }, "lsc_store2d_typed_bti":{ "result" : "void", "arguments" : ["char", "char", "int", "int", "int", "int", "int", "anyvector"], "attributes" : "WriteMem", "platforms" : "Xe2+", }, "lsc_prefetch2d_typed_bti":{ "result" : "void", "arguments" : ["char", "char", "int", "int", "int", "int", "int"], "attributes" : "ReadMem", "platforms" : "Xe2+", }, ### ``llvm.genx.lsc.atomic.*...`` : lsc_atomic instructions ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### !!! Those are legacy ones! Use xatomic version instead !!! ### ### * ``llvm.genx.lsc.atomic.bti`` : ### * ``llvm.genx.lsc.atomic.slm`` : ### * ``llvm.genx.lsc.atomic.slateless`` : ### ### * arg0: {1,32}Xi1 predicate (overloaded) ### * arg1: i8 Subopcode, [MBZ] ### * arg2: i8 Caching behavior for L1, [MBC] ### * arg3: i8 Caching behavior for L3, [MBC] ### * arg4: i16 Address scale, [MBC] ### * arg5: {1,32}Xi32 Immediate offset added to each address, [MBC] ### * arg6: i8 The dataum size, [MBC] ### * arg7: i8 Indicates if the data is transposed during the transfer, [MBC] ### * arg8: i8 Number of elements to load per address (vector size), [MBC] ### * arg9: i8 Channel mask, currently ignored, [MBC]. ### * arg10: i32/i64 surface base address for this operation. ### * arg11: {1,32}Xi{16,32,64} The vector register holding addresses. (overloaded) ### * arg12: i32 {1,32}Xi32 Src0 or undef (same vector size as predicate) ### * arg13: i32 {1,32}Xi32 Src1 or undef (same vector size as predicate) ### * arg14: i32 {1,32}Xi32 Old value of destination (same vector size as predicate), now always undef ### "lsc_atomic_bti" : { "result" : "any", "arguments" : ["any","char","char","char","short","int","char","char","char","char","int","anyvector",0,0,0], "attributes" : "None" }, "lsc_atomic_slm" : { "result" : "any", "arguments" : ["any","char","char","char","short","int","char","char","char","char","int","anyvector",0,0,0], "attributes" : "None" }, "lsc_atomic_stateless" : { "result" : "any", "arguments" : ["any","char","char","char","short","int","char","char","char","char","int","anyvector",0,0,0], "attributes" : "None" }, "lsc_atomic_bindless" : { "result" : "any", "arguments" : ["any","char","char","char","short","int","char","char","char","char","int","anyvector",0,0,0], "attributes" : "None" }, ### ``llvm.genx.lsc.xatomic.*...`` : lsc_atomic instructions ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * ``llvm.genx.lsc.xatomic.bti`` : ### * ``llvm.genx.lsc.xatomic.slm`` : ### * ``llvm.genx.lsc.xatomic.slateless`` : ### * ``llvm.genx.lsc.xatomic.bindless`` : ### ### * arg0: {1,32}Xi1 predicate (overloaded) ### * arg1: i8 Subopcode, [MBZ] ### * arg2: i8 Caching behavior for L1, [MBC] ### * arg3: i8 Caching behavior for L3, [MBC] ### * arg4: i16 Address scale, [MBC] ### * arg5: {1,32}Xi32 Immediate offset added to each address, [MBC] ### * arg6: i8 Data size, [MBC] ### * arg7: i8 Number of elements to load per address (vector size), [MBC] ### * arg8: i8 Indicates if the data is transposed during the transfer, [MBC] ### * arg9: i8 Channel mask, currently ignored, [MBC] ### * arg10: {1,32}Xi{16,32,64} The vector register holding offsets (overloaded) ### for flat version Base Address + Offset[i] goes here ### * arg11: i32 {1,32}Xi32 Src0 or undef (same vector size as predicate) ### * arg12: i32 {1,32}Xi32 Src1 or undef (same vector size as predicate) ### * arg13: i32 surface to use for this operation. This can be an immediate or a register ### for flat and bindless version pass zero here ### * arg14: i32 {1,32}Xi32 Old value of destination (same vector size as predicate), now always undef ### "lsc_xatomic_bti" : { "result" : "any", "arguments" : ["any","char","char","char","short","int","char","char","char","char","anyvector",0,0,"int",0], "attributes" : "None" }, "lsc_xatomic_slm" : { "result" : "any", "arguments" : ["any","char","char","char","short","int","char","char","char","char","anyvector",0,0,"int",0], "attributes" : "None" }, "lsc_xatomic_stateless" : { "result" : "any", "arguments" : ["any","char","char","char","short","int","char","char","char","char","anyvector",0,0,"int",0], "attributes" : "None" }, "lsc_xatomic_bindless" : { "result" : "any", "arguments" : ["any","char","char","char","short","int","char","char","char","char","anyvector",0,0,"int",0], "attributes" : "None" }, ### ``llvm.genx.lsc.fence.`` : lsc_fence instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * ``llvm.genx.lsc.fence`` : ### ### * Exec_size ignored unless operation is transposed (DataOrder == Tranpose) ### * arg0: {1,32}Xi1 predicate (overloaded) ### * arg1: i8 SFID ### * arg2: i8 Fence operation ### * arg3: i8 Fence operation scope ### ### [2] Mappings are: ### 0 -> .ugm (unified global memory) ### 1 -> .ugml (low-bandwith untyped global memory) ### 2 -> .tgm (typed global memory) ### 3 -> .slm (shared local memory) ### ### [3] Mappings are: ### 0 -> .none (no operation) ### 1 -> .evict (dirty lines evicted and invalidated from L1) ### 2 -> .invalidate (invalidate all clean lines) ### 3 -> .discard (direct and clean lines are discarded w/o eviction) ### 4 -> .clean (dirty lines are written to memory, but retained in cache in clean state) ### 5 -> .flushl3 (flush only L3) ### ### [4] Mappings are: ### 0 -> .group (flush out to the threadgroup's scope) ### 1 -> .local (flush out to the local scope) ### 2 -> .tile (tile, flush out to several DSSs) ### 3 -> .gpu (entire GPU, flush out to the GPUs LLC) ### 4 -> .gpus (all GPUs in the system, flush out to memory shared by all GPUs) ### 5 -> .system (the entire system memory space) ### 6 -> .sysacq (the entire system memory space with system-acquire semantics) ### "lsc_fence" : { "result" : "void", "arguments" : ["anyvector","char","char","char"], "attributes" : "None" }, ### xor ### ^^^ ### Intrinsic not needed; use LLVM IR Xor instruction ### ## --------------------------------- ### vISA reserved register intrinsics ### --------------------------------- ### ``llvm.genx.thread.*`` : read thread ID register ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.thread.x`` : read vISA v1 (%thread_x) ### * ``llvm.genx.thread.y`` : read vISA v2 (%thread_y) ### ### * Return value: i16 the value read ### "thread_x" : { "result" : "short", "arguments" : [], "attributes" : "NoMem" }, "thread_y" : { "result" : "short", "arguments" : [], "attributes" : "NoMem" }, ### ``llvm.genx.group.id.*`` : read group ID register ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ``llvm.genx.group.id.x`` : read vISA v7 (%group_id_x) ### ``llvm.genx.group.id.y`` : read vISA v8 (%group_id_y) ### ``llvm.genx.group.id.z`` : read vISA v23 (%group_id_z) ### ### * Return value: i32 the value read ### "group_id_x" : { "result" : "int", "arguments" : [], "attributes" : "NoMem" }, "group_id_y" : { "result" : "int", "arguments" : [], "attributes" : "NoMem" }, "group_id_z" : { "result" : "int", "arguments" : [], "attributes" : "NoMem" }, ### ``llvm.genx.timestamp.`` : read vISA v11 (%timestamp) ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * Return value: vxi32 the value read ### ### The vector width must be power of 2 and no larger than 4. ### "timestamp" : { "result" : "anyint", "arguments" : [], "attributes" : "None" }, ### ``llvm.genx.r0.`` : read vISA v12 (%r0) ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * Return value: vxi32 or i32 the value read ### ### The vector width must be power of 2 and no larger than 8. ### "r0" : { "result" : "anyint", "arguments" : [], "attributes" : "ReadMem" }, ### ``llvm.genx.sr0.`` : read vISA v13 (%sr0) ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * Return value: vxi32 the value read ### ### The vector width must be 4 ### ### "sr0" : { "result" : "anyint", "arguments" : [], "attributes" : "ReadMem" }, ### ``llvm.genx.set.sr0.2`` : write vISA v13(0, 2) (%sr0.2) ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### arg0: data to write (1 dword) ### ### * Return value: void ### "set_sr0_2" : { "result" : "void", "arguments" : ["int"], "attributes" : "None" }, ### ``llvm.genx.get.color`` : read color value of the thread origin ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### Return Value: i16 the value read ### ### This may not be the most appropriate way to access this value, ### but is a stop-gap solution. ### "get_color" : { "result" : "short", "arguments" : [], "attributes" : "NoMem" }, ### ``llvm.genx.get.hwid`` : read hw_id value ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### Return Value: i32 the value read ### "get_hwid" : { "result" : "int", "arguments" : [], "attributes" : "NoMem" }, ### ``llvm.genx.ce0`` : read channel-enable register ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### Return Value: i32 the value read ### "ce0" : { "result" : "int", "arguments" : [], "attributes" : "ReadMem" }, ### ``llvm.genx.set.pause`` : set the pause register (v11.4) ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### arg0: length of pause 10 bits (0-4 must be 0) ### ### Return Value: none ### ### ### Set the pause value - this pauses instruction issue until the value has been ### decremented to 0 (decrements every 32 clocks) ### ### We set this intrinsic to have side-effects (last field empty) to stop it being removed as it ### otherwise looks dead "set_pause" : { "result" : "void", "arguments" : ["short"], "attributes" : "None" }, ### ``llvm.genx.dummy.mov`` : insert a dummy mov to v0 ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### arg0: a value that we want to mov to v0 (usually to trigger a scoreboard dependency) ### ### Return Value: none ### ### ### This is primarily used to set up scoreboard dependencies. If a value is mov'ed to v0 then it ### will trigger a scoreboard dependency check. ### As a word (16 bits) is usually the basic type of value that is worked with, you only need to ### dummy mov one of these from any payload to correctly trigger the dependency ### ### We set this intrinsic to have side-effects (last field empty) to stop it being removed as it ### otherwise looks dead and also to prevent any kind of code motion optimisation "dummy_mov" : { "result" : "void", "arguments" : ["short"], "attributes" : "None" }, ### The following 2 predef.reg intrinsics aren't translated directly to read/writes of the reg, ### instead they're baled together with rd/wrregions and in fact indicate that those rdr/wrrs ### should use predefined VISA registers as their sources/dests ### ### ``llvm.genx.read.predef.reg.`` : read predefined vISA reg ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: ID of the reg (1 dword) ### * arg1: value that could affect the reg being read (e.g. stackcall), may be undef ### ### * Return value: value read ### ### "read_predef_reg" : { "result" : "any", "arguments" : ["int", "any"], "attributes" : "ReadMem" }, ### ``llvm.write.predef.reg..`` : write value to predefined vISA reg ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: ID of the reg (1 dword) ### * arg1: data to write (1 dword) ### ### * Return value: value written ### "write_predef_reg" : { "result" : "any", "arguments" : ["int", "any"], "attributes" : "WriteMem" }, ## -------------------------- ### Shared function intrinsics ### -------------------------- ### These are in the order they appear in the vISA spec, not in ### alphabetical order. ### ### ``llvm.genx.dword.atomic.*..`` : dword atomic with binary operator ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.dword.atomic.add`` : vISA DWORD_ATOMIC ADD instruction ### * ``llvm.genx.dword.atomic.sub`` : vISA DWORD_ATOMIC SUB instruction ### * ``llvm.genx.dword.atomic.min`` : vISA DWORD_ATOMIC MIN instruction ### * ``llvm.genx.dword.atomic.max`` : vISA DWORD_ATOMIC MAX instruction ### * ``llvm.genx.dword.atomic.xchg`` : vISA DWORD_ATOMIC XCHG instruction ### * ``llvm.genx.dword.atomic.and`` : vISA DWORD_ATOMIC AND instruction ### * ``llvm.genx.dword.atomic.or`` : vISA DWORD_ATOMIC OR instruction ### * ``llvm.genx.dword.atomic.xor`` : vISA DWORD_ATOMIC XOR instruction ### * ``llvm.genx.dword.atomic.imin`` : vISA DWORD_ATOMIC IMIN instruction ### * ``llvm.genx.dword.atomic.imax`` : vISA DWORD_ATOMIC IMAX instruction ### ### * (Exec_size inferred from element offset type) ### * arg0: vXi1 predicate (overloaded) ### * arg1: i32 surface index ### * arg2: vXi32 element offset in bytes (overloaded) ### * arg3: vXi32 src ### * arg4: vXi32 original value of the register that the data is read into ### ### * Return value: vXi32 the old value read ### ### Predicate, element offset, src, and the return value must all have the ### same vector width, which must be 1, 8 or 16. ### "dword_atomic_add" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0,0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic_sub" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0,0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic_min" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0,0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic_max" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0,0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic_xchg" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0,0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic_and" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0,0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic_or" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0,0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic_xor" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0,0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic_imin" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0,0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic_imax" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0,0], "attributes" : "None", "platforms" : "-Xe2", }, ### ``llvm.genx.dword.atomic2.*..`` : dword atomic with binary operator (variant with no oldval) ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.dword.atomic2.add`` : vISA DWORD_ATOMIC ADD instruction ### * ``llvm.genx.dword.atomic2.sub`` : vISA DWORD_ATOMIC SUB instruction ### * ``llvm.genx.dword.atomic2.min`` : vISA DWORD_ATOMIC MIN instruction ### * ``llvm.genx.dword.atomic2.max`` : vISA DWORD_ATOMIC MAX instruction ### * ``llvm.genx.dword.atomic2.xchg`` : vISA DWORD_ATOMIC XCHG instruction ### * ``llvm.genx.dword.atomic2.and`` : vISA DWORD_ATOMIC AND instruction ### * ``llvm.genx.dword.atomic2.or`` : vISA DWORD_ATOMIC OR instruction ### * ``llvm.genx.dword.atomic2.xor`` : vISA DWORD_ATOMIC XOR instruction ### * ``llvm.genx.dword.atomic2.imin`` : vISA DWORD_ATOMIC IMIN instruction ### * ``llvm.genx.dword.atomic2.imax`` : vISA DWORD_ATOMIC IMAX instruction ### ### * (Exec_size inferred from element offset type) ### * arg0: vXi1 predicate (overloaded) ### * arg1: i32 surface index ### * arg2: vXi32 element offset in bytes (overloaded) ### * arg3: vXi32 src ### ### * Return value: vXi32 the old value read ### ### Predicate, element offset, src, and the return value must all have the ### same vector width, which must be 1, 8 or 16. ### "dword_atomic2_add" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_sub" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_min" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_max" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_xchg" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_and" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_or" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_xor" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_imin" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_imax" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, ### ``llvm.genx.dword.atomic.*...`` : dword atomic with fmin/fmax operation ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.dword.atomic.fmin`` : vISA DWORD_ATOMIC FMIN instruction ### * ``llvm.genx.dword.atomic.fmax`` : vISA DWORD_ATOMIC FMAX instruction ### ### * (Exec_size inferred from element offset type) ### * arg0: vXi1 predicate(overloaded) ### * arg1: i32 surface index ### * arg2: vXi32 element offset in bytes (overloaded) ### * arg3: vXfloat src ### * arg4: vXfloat original value of the register that the data is read into ### ### * Return value: vXfloat the old value read ### ### Predicate, element offset, src, and the return value must all have the ### same vector width, which must be 1, 8 or 16. ### "dword_atomic_fmin" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0,0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic_fmax" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0,0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic_fadd" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0,0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic_fsub" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0,0], "attributes" : "None", "platforms" : "-Xe2", }, ### ``llvm.genx.dword.atomic2.*...`` : dword atomic with fmin/fmax operation (variant with no oldval) ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.dword.atomic2.fmin`` : vISA DWORD_ATOMIC FMIN instruction ### * ``llvm.genx.dword.atomic2.fmax`` : vISA DWORD_ATOMIC FMAX instruction ### ### * (Exec_size inferred from element offset type) ### * arg0: vXi1 predicate(overloaded) ### * arg1: i32 surface index ### * arg2: vXi32 element offset in bytes (overloaded) ### * arg3: vXfloat src ### ### * Return value: vXfloat the old value read ### ### Predicate, element offset, src, and the return value must all have the ### same vector width, which must be 1, 8 or 16. ### "dword_atomic2_fmin" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_fmax" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_fadd" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_fsub" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, ### ``llvm.genx.dword.atomic.*..`` : dword atomic with inc/dec operation ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.dword.atomic.inc`` : vISA DWORD_ATOMIC INC instruction ### * ``llvm.genx.dword.atomic.dec`` : vISA DWORD_ATOMIC DEC instruction ### ### * (Exec_size inferred from element offset type) ### * arg0: vXi1 predicate (overloaded) ### * arg1: i32 surface index ### * arg2: vXi32 element offset in bytes ### * arg3: vXi32 original value of the register that the data is read into ### ### * Return value: vXi32 the old value read ### ### Predicate, element offset, src, and the return value must all have the ### same vector width, which must be 1, 8 or 16. ### "dword_atomic_inc" : { "result" : "anyvector", "arguments" : ["anyvector","int",0,0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic_dec" : { "result" : "anyvector", "arguments" : ["anyvector","int",0,0], "attributes" : "None", "platforms" : "-Xe2", }, ### ``llvm.genx.dword.atomic2.*..`` : dword atomic with inc/dec operation (variant with no oldval) ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.dword.atomic2.inc`` : vISA DWORD_ATOMIC INC instruction ### * ``llvm.genx.dword.atomic2.dec`` : vISA DWORD_ATOMIC DEC instruction ### ### * (Exec_size inferred from element offset type) ### * arg0: vXi1 predicate (overloaded) ### * arg1: i32 surface index ### * arg2: vXi32 element offset in bytes ### ### * Return value: vXi32 the old value read ### ### Predicate, element offset, src, and the return value must all have the ### same vector width, which must be 1, 8 or 16. ### "dword_atomic2_inc" : { "result" : "anyvector", "arguments" : ["anyvector","int",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_dec" : { "result" : "anyvector", "arguments" : ["anyvector","int",0], "attributes" : "None", "platforms" : "-Xe2", }, ### ``llvm.genx.dword.atomic.cmpxchg..`` : vISA DWORD_ATOMIC CMPXCHG instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * (Exec_size inferred from element offset type) ### * arg0: vXi1 predicate (overloaded) ### * arg1: i32 surface index ### * arg2: vXi32 element offset in bytes ### * arg3: vXi32 src0 ### * arg4: vXi32 src1 ### * arg5: vXi32 original value of the register that the data is read into ### ### * Return value: vXi32 the old value read ### ### Predicate, element offset, src, and the return value must all have the ### same vector width, which must be 1, 8 or 16. ### "dword_atomic_cmpxchg" : { "result" : "anyvector", "arguments" : ["anyvector","int",0,0,0,0], "attributes" : "None", "platforms" : "-Xe2", }, ### ``llvm.genx.dword.atomic2.cmpxchg..`` : vISA DWORD_ATOMIC CMPXCHG instruction (variant with no oldval) ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * (Exec_size inferred from element offset type) ### * arg0: vXi1 predicate (overloaded) ### * arg1: i32 surface index ### * arg2: vXi32 element offset in bytes ### * arg3: vXi32 src0 ### * arg4: vXi32 src1 ### ### * Return value: vXi32 the old value read ### ### Predicate, element offset, src, and the return value must all have the ### same vector width, which must be 1, 8 or 16. ### "dword_atomic2_cmpxchg" : { "result" : "anyvector", "arguments" : ["anyvector","int",0,0,0], "attributes" : "None", "platforms" : "-Xe2", }, ### ``llvm.genx.dword.atomic.fcmpwr...`` : vISA DWORD_ATOMIC FCMPWR instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * (Exec_size inferred from element offset type) ### * arg0: vXi1 predicate (overloaded) ### * arg1: i32 surface index ### * arg2: vXi32 element offset in bytes (overloaded) ### * arg3: vXfloat src0 ### * arg4: vXfloat src1 ### * arg5: vXfloat original value of the register that the data is read into ### ### * Return value: vXfloat the old value read ### ### Predicate, element offset, src, and the return value must all have the ### same vector width, which must be 1, 8 or 16. ### "dword_atomic_fcmpwr" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0,0,0], "attributes" : "None", "platforms" : "-Xe2", }, ### ``llvm.genx.dword.atomic2.fcmpwr...`` : vISA DWORD_ATOMIC FCMPWR instruction (variant with no oldval) ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * (Exec_size inferred from element offset type) ### * arg0: vXi1 predicate (overloaded) ### * arg1: i32 surface index ### * arg2: vXi32 element offset in bytes (overloaded) ### * arg3: vXfloat src0 ### * arg4: vXfloat src1 ### ### * Return value: vXfloat the old value read ### ### Predicate, element offset, src, and the return value must all have the ### same vector width, which must be 1, 8 or 16. ### "dword_atomic2_fcmpwr" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0,0], "attributes" : "None", "platforms" : "-Xe2", }, ### ``llvm.genx.typed.atomic.*...`` : atomic typed with binary operator ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.typed.atomic.add`` : vISA TYPED_ATOMIC ADD instruction ### * ``llvm.genx.typed.atomic.sub`` : vISA TYPED_ATOMIC SUB instruction ### * ``llvm.genx.typed.atomic.min`` : vISA TYPED_ATOMIC MIN instruction ### * ``llvm.genx.typed.atomic.max`` : vISA TYPED_ATOMIC MAX instruction ### * ``llvm.genx.typed.atomic.xchg`` : vISA TYPED_ATOMIC XCHG instruction ### * ``llvm.genx.typed.atomic.and`` : vISA TYPED_ATOMIC AND instruction ### * ``llvm.genx.typed.atomic.or`` : vISA TYPED_ATOMIC OR instruction ### * ``llvm.genx.typed.atomic.xor`` : vISA TYPED_ATOMIC XOR instruction ### * ``llvm.genx.typed.atomic.imin`` : vISA TYPED_ATOMIC IMIN instruction ### * ``llvm.genx.typed.atomic.imax`` : vISA TYPED_ATOMIC IMAX instruction ### ### * (Exec_size inferred from element offset type) ### * arg0: vXi1 predicate (overloaded) ### * arg1: i32 surface index ### * arg2: vXT src ### * arg3: vXi32 u (overloaded) ### * arg4: vXi32 v - can be a constant 0 and becomes undef in lowering ### * arg5: vXi32 r - can be a constant 0 and becomes undef in lowering ### * arg6: vXi32 LOD - can be constant 0 and becomes undef in lowering ### ### * Return value: vXi32 the old value read ### ### Predicate, element offset, src, and the return value must all have the ### same vector width (which in reality must be 8) ### "typed_atomic_add" : { "result" : "anyvector", "arguments" : ["anyvector","int",0,"anyint",2,2,2], "platforms" : "-Xe2", "attributes" : "None" }, "typed_atomic_sub" : { "result" : "anyvector", "arguments" : ["anyvector","int",0,"anyint",2,2,2], "platforms" : "-Xe2", "attributes" : "None" }, "typed_atomic_min" : { "result" : "anyvector", "arguments" : ["anyvector","int",0,"anyint",2,2,2], "platforms" : "-Xe2", "attributes" : "None" }, "typed_atomic_max" : { "result" : "anyvector", "arguments" : ["anyvector","int",0,"anyint",2,2,2], "platforms" : "-Xe2", "attributes" : "None" }, "typed_atomic_xchg" : { "result" : "anyvector", "arguments" : ["anyvector","int",0,"anyint",2,2,2], "platforms" : "-Xe2", "attributes" : "None" }, "typed_atomic_and" : { "result" : "anyvector", "arguments" : ["anyvector","int",0,"anyint",2,2,2], "platforms" : "-Xe2", "attributes" : "None" }, "typed_atomic_or" : { "result" : "anyvector", "arguments" : ["anyvector","int",0,"anyint",2,2,2], "platforms" : "-Xe2", "attributes" : "None" }, "typed_atomic_xor" : { "result" : "anyvector", "arguments" : ["anyvector","int",0,"anyint",2,2,2], "platforms" : "-Xe2", "attributes" : "None" }, "typed_atomic_imin" : { "result" : "anyvector", "arguments" : ["anyvector","int",0,"anyint",2,2,2], "platforms" : "-Xe2", "attributes" : "None" }, "typed_atomic_imax" : { "result" : "anyvector", "arguments" : ["anyvector","int",0,"anyint",2,2,2], "platforms" : "-Xe2", "attributes" : "None" }, ### ``llvm.genx.typed.atomic.*...`` : atomic typed with fmin/fmax operation ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.typed.atomic.fmin`` : vISA TYPED_ATOMIC FMIN instruction ### * ``llvm.genx.typed.atomic.fmax`` : vISA TYPED_ATOMIC FMAX instruction ### ### * (Exec_size inferred from element offset type) ### * arg0: vXi1 predicate (overloaded) ### * arg1: i32 surface index ### * arg2: vXfloat src ### * arg3: vXi32 u (overloaded) ### * arg4: vXi32 v - can be a constant 0 and becomes undef in lowering ### * arg5: vXi32 r - can be a constant 0 and becomes undef in lowering ### * arg6: vXi32 LOD - can be a constant 0 and becomes undef in lowering ### ### * Return value: vXfloat the old value read ### ### Predicate, element offset, src, and the return value must all have the ### same vector width (which in reality must be 8) ### "typed_atomic_fmin" : { "result" : "anyvector", "arguments" : ["anyvector","int",0,"anyint",2,2,2], "platforms" : "-Xe2", "attributes" : "None" }, "typed_atomic_fmax" : { "result" : "anyvector", "arguments" : ["anyvector","int",0,"anyint",2,2,2], "platforms" : "-Xe2", "attributes" : "None" }, "typed_atomic_fadd" : { "result" : "anyvector", "arguments" : ["anyvector","int",0,"anyint",2,2,2], "platforms" : "-Xe2", "attributes" : "None" }, "typed_atomic_fsub" : { "result" : "anyvector", "arguments" : ["anyvector","int",0,"anyint",2,2,2], "platforms" : "-Xe2", "attributes" : "None" }, ### ``llvm.genx.typed.atomic.*...`` : atomic typed with inc/dec operation ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.typed.atomic.inc`` : vISA TYPED_ATOMIC INC instruction ### * ``llvm.genx.typed.atomic.dec`` : vISA TYPED_ATOMIC DEC instruction ### ### * (Exec_size inferred from element offset type) ### * arg0: vXi1 predicate (overloaded) ### * arg1: i32 surface index ### * arg2: vXi32 u (overloaded) ### * arg3: vXi32 v - can be a constant 0 and becomes undef in lowering ### * arg4: vXi32 r - can be a constant 0 and becomes undef in lowering ### * arg5: vXi32 LOD - can be a constant 0 and becomes undef in lowering ### ### * Return value: vXi32 the old value read ### ### Predicate, element offset, src, and the return value must all have the ### same vector width (which in reality must be 8) ### "typed_atomic_inc" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",2,2,2], "platforms" : "-Xe2", "attributes" : "None" }, "typed_atomic_dec" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",2,2,2], "platforms" : "-Xe2", "attributes" : "None" }, ### ``llvm.genx.typed.atomic.cmpxchg...`` : vISA TYPED_ATOMIC CMPXCHG instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * (Exec_size inferred from element offset type) ### * arg0: vXi1 predicate (overloaded) ### * arg1: i32 surface index ### * arg2: vXT src0 ### * arg3: vXT src1 ### * arg4: vXi32 u (overloaded) ### * arg5: vXi32 v - can be a constant 0 and becomes undef in lowering ### * arg6: vXi32 r - can be a constant 0 and becomes undef in lowering ### * arg7: vXi32 LOD - can be a constant 0 and becomes undef in lowering ### ### * Return value: vXi32 the old value read ### ### Predicate, element offset, src, and the return value must all have the ### same vector width (which in reality must be 8) ### "typed_atomic_cmpxchg" : { "result" : "anyvector", "arguments" : ["anyvector","int",0,0,"anyint",2,2,2], "platforms" : "-Xe2", "attributes" : "None" }, ### ``llvm.genx.typed.atomic.fcmpwr...`` : vISA TYPED_ATOMIC FCMPWR instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * (Exec_size inferred from element offset type) ### * arg0: vXi1 predicate (overloaded) ### * arg1: i32 surface index ### * arg2: vXfloat src0 ### * arg2: vXfloat src1 ### * arg3: vXi32 u (overloaded) ### * arg4: vXi32 v - can be a constant 0 and becomes undef in lowering ### * arg5: vXi32 r - can be a constant 0 and becomes undef in lowering ### * arg6: vXi32 LOD - can be a constant 0 and becomes undef in lowering ### ### * Return value: vXfloat the old value read ### ### Predicate, element offset, src, and the return value must all have the ### same vector width (which in reality must be 8) ### "typed_atomic_fcmpwr" : { "result" : "anyvector", "arguments" : ["anyvector","int",0,0,"anyint",2,2,2], "platforms" : "-Xe2", "attributes" : "None" }, ### ``llvm.genx.gather.private...`` : CMC internal, no VISA ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: v-by-i1 predicate (overloaded) ### * (Num_elts inferred from data type) ### * arg1: base pointer ### * arg2: vXi32 element offset in elements (overloaded) ### * arg3: old value of the data read ### ### * Return value: the data read ### ### The vector width of the return value is the number of elements to read, ### which must be 1, 8 or 16. ### ### The element offset arg must have the same vector width. ### "gather_private" : { "result" : "anyvector", "arguments" : ["anyvector","anyptr","anyint",0], "attributes" : "ReadMem" }, ### ``llvm.genx.gather.scaled...`` : vISA GATHER_SCALED instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * (Exec_size inferred from element offset type) ### * arg0: vXi1 predicate (overloaded) ### * arg1: i32 log2 num blocks, constant (0/1/2 for num blocks 1/2/4) ### * arg2: i16 scale, constant ### * arg3: i32 surface index ### * arg4: i32 global offset in bytes ### * arg5: vXi32 element offset in bytes (overloaded) ### * arg6: old value of the data read ### ### * Return value: the data read ### ### The vector width of the element offset arg is the number of elements to ### read, which must be power of 2 and less than or equal to 32. ### ### The predicate arg must have the same vector width. ### ### The old value of the data read (the return value) must have UD, D or ### F type. For 1 and 2 byte (1 x num blocks) reads the upper bytes have ### undefined values in the returned value. ### "gather_scaled" : { "result" : "anyvector", "arguments" : ["anyvector","int","short","int","int","anyint",0], "attributes" : "ReadMem", "platforms" : "-Xe2", }, ### ``llvm.genx.gather.scaled2`` : vISA GATHER_SCALED instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### This intrinsic doesn't have redundant predicate and old value that can be inferred ### from resulting wrregion. ### ### * (Exec_size inferred from element offset type) ### * arg0: i32 log2 num blocks, constant (0/1/2 for num blocks 1/2/4) ### * arg1: i16 scale, constant ### * arg2: i32 surface index ### * arg3: i32 global offset in bytes ### * arg4: vXi32 element offset in bytes (overloaded) ### ### * Return value: the data read ### ### The vector width of the element offset arg is the number of elements to ### read, which must be power of 2 and less than or equal to 32. ### ### For 1 and 2 byte (1 x num blocks) reads the upper bytes have ### undefined values in the returned value. ### "gather_scaled2" : { "result" : "anyvector", "arguments" : ["int","short","int","int","anyint"], "attributes" : "ReadMem", "platforms" : "-Xe2", }, ### ``llvm.genx.gather.masked.scaled2`` : vISA GATHER_SCALED instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### ### * (Exec_size inferred from element offset type) ### * arg0: i32 log2 num blocks, constant (0/1/2 for num blocks 1/2/4) ### * arg1: i16 scale, constant ### * arg2: i32 surface index ### * arg3: i32 global offset in bytes ### * arg4: vXi32 element offset in bytes (overloaded) ### * arg5: vXi1 predicate (overloaded) ### ### * Return value: the data read ### "gather_masked_scaled2" : { "result" : "anyvector", "arguments" : ["int","short","int","int","anyint","anyvector"], "attributes" : "ReadMem", "platforms" : "-Xe2", }, ### ``llvm.genx.gather4.scaled...`` : vISA GATHER4_SCALED instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * (Exec_size inferred from element offset type) ### * arg0: vXi1 predicate (overloaded) (overloaded) ### * arg1: i32 channel mask, constant ### * arg2: i16 scale, constant ### * arg3: i32 surface index ### * arg4: i32 global offset in bytes ### * arg5: vXi32 element offset in bytes (overloaded) ### * arg6: old value of the data read ### ### * Return value: the data read ### ### The vector width of the element offset arg is the number of elements to ### read, which must be 8 or 16. ### The predicate arg must have the same vector width. ### The instruction reads up to 4 channels per element, with the lowest 4 ### bits of the channel mask arg giving the mask of channels _not_ to read. ### The number of 0 bits in that lower 4 bits of the channel mask arg is the ### number of channels to read per element. ### The vector width of the return value must be the number of elements ### times the number of channels to read per element. ### The element type of the return value must be i32 or float. ### "gather4_scaled" : { "result" : "anyvector", "arguments" : ["anyvector","int","short","int","int","anyint",0], "attributes" : "ReadMem" , "platforms" : "-Xe2", }, ### ``llvm.genx.gather4.scaled2`` : vISA GATHER4_SCALED instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### This intrinsic doesn't have redundant predicate and old value that can be inferred ### from resulting wrregion. ### ### * (Exec_size inferred from element offset type) ### * arg0: i32 channel mask, constant ### * arg1: i16 scale, constant ### * arg2: i32 surface index ### * arg3: i32 global offset in bytes ### * arg4: vXi32 element offset in bytes ### ### * Return value: the data read ### ### The vector width of the element offset arg is the number of elements to ### read, which must be 8 or 16. ### The predicate arg must have the same vector width. ### The instruction reads up to 4 channels per element, with the lowest 4 ### bits of the channel mask arg giving the mask of channels _not_ to read. ### The number of 0 bits in that lower 4 bits of the channel mask arg is the ### number of channels to read per element. ### The vector width of the return value must be the number of elements ### times the number of channels to read per element. ### The element type of the return value must be i32 or float. ### "gather4_scaled2" : { "result" : "anyvector", "arguments" : ["int","short","int","int","anyint"], "attributes" : "ReadMem", "platforms" : "-Xe2", }, ### ``llvm.genx.gather4.masked.scaled2`` : vISA GATHER4_SCALED instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * (Exec_size inferred from element offset type) ### * arg0: i32 channel mask, constant ### * arg1: i16 scale, constant ### * arg2: i32 surface index ### * arg3: i32 global offset in bytes ### * arg4: vXi32 element offset in bytes ### * arg5: vXi1 predicate (overloaded) ### ### * Return value: the data read ### "gather4_masked_scaled2" : { "result" : "anyvector", "arguments" : ["int","short","int","int","anyint","anyvector"], "attributes" : "ReadMem", "platforms" : "-Xe2", }, ### ``llvm.genx.gather4.typed...`` : vISA GATHER4_TYPED instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 channel mask, constant ### * arg1: vXi1 predicate (Num_elts inferred from element offset type) (overloaded) ### * arg2: i32 surface index ### * arg3: vXi32 U pixel address (overloaded) ### * arg4: vXi32 V pixel address ### * arg5: vXi32 R pixel address ### * arg6: old value of the data read ### ### * Return value: the data read ### ### The vector widths of the U pixel address, V pixel address and R pixel ### address args must be equal and are the number of elements to read, which ### must be 8 or 16. (16 is split into 2x 8 by the GenX backend.) ### The predicate arg must have the same vector width. ### The instruction reads up to 4 channels per element, with the lowest 4 ### bits of the channel mask arg giving the mask of channels to read. ### The number of 1 bits in that lower 4 bits of the channel mask arg is the ### number of channels to read per element. Mask "0000" is not allowed. ### The vector width of the return value must be the number of elements ### times the number of channels to read per element. ### The element type of the return value must be i32 or float. ### "gather4_typed" : { "result" : "anyvector", "arguments" : ["int","anyvector","int","anyvector",2,2,0], "attributes" : "ReadMem", "platforms" : "-Xe2", }, ### ``llvm.genx.media.ld.`` : vISA MEDIA_LD instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 modifiers, constant ### * arg1: i32 surface index ### * arg2: i32 plane, constant ### * arg3: i32 block width in bytes, constant ### * (block height inferred from return type size and block width) ### * arg4: i32 x byte offset ### * arg5: i32 y byte offset ### ### * Return value: the data read. ### ### The number of bytes taken by a row in the return value, the "rounded ### block width", is the block width rounded up to the next power of two ### no less than 4. The size of the return type must be a multiple of ### this rounded block width, and the multiplier is the block height. ### ### The block width has a maximum of 32 (64 on BDW+). The maxmimum byte ### size of the return type is 256. ### "media_ld" : { "result" : "anyvector", "arguments" : ["int","int","int","int","int","int"], "attributes" : "ReadMem", "platforms" : "-Xe2", }, ### ``llvm.genx.media.st.`` : vISA MEDIA_ST instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 modifiers, constant ### * arg1: i32 surface index ### * arg2: i32 plane, constant ### * arg3: i32 block width in bytes, constant ### * (block height inferred from data type size and block width) ### * arg4: i32 x byte offset ### * arg5: i32 y byte offset ### * arg6: data to write (overloaded) ### ### The number of bytes taken by a row in the return value, the "rounded ### block width", is the block width rounded up to the next power of two ### no less than 4. The size of the data to write type must be a multiple of ### this rounded block width, and the multiplier is the block height. ### ### The block width has a maximum of 32 (64 on BDW+). The maxmimum byte ### size of the data to write is 256. ### "media_st" : { "result" : "void", "arguments" : ["int","int","int","int","int","int","anyvector"], "attributes" : "None", "platforms" : "-Xe2", }, ### ``llvm.genx.oword.ld*.`` : oword load instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.oword.ld`` : vISA OWORD_LD instruction ### * ``llvm.genx.oword.ld.unaligned`` : vISA OWORD_LD_UNALIGNED instruction ### ### * (log2 number of owords inferred from return type) ### * arg0: i32 is_modified, constant ### * arg1: i32 surface index ### * arg2: i32 offset (in owords for .ld / in bytes for .ld.unaligned) ### ### * Return value: the data read. ### ### The byte size of the return type must be 16, 32, 64, or 128. ### "oword_ld" : { "result" : "anyvector", "arguments" : ["int","int","int"], "attributes" : "ReadMem", "platforms" : "-Xe2", }, "oword_ld_unaligned" : { "result" : "anyvector", "arguments" : ["int","int","int"], "attributes" : "ReadMem", "platforms" : "-Xe2", }, ### ``llvm.genx.oword.st.`` : vISA OWORD_ST instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * (log2 number of owords inferred from return type) ### * arg0: i32 surface index ### * arg1: i32 offset (in owords) ### * arg2: data to write (overloaded) ### ### The byte size of the data to write must be 16, 32, 64, or 128. ### "oword_st" : { "result" : "void", "arguments" : ["int","int","anyvector"], "attributes" : "None", "platforms" : "-Xe2", }, ### ``llvm.genx.scatter.private....`` : CM internal, no VISA ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: v-by-i1 predicate (overloaded) ### * arg1: base pointer (overloaded) ### * arg2 vXi32 element offset in elements (overloaded) ### * arg3: the data to write. The first elements will be used. (overloaded) ### ### The vector width of the data to write is the number of elements to write, ### which must be 1, 8 or 16. ### The element offset arg must have the same vector width. ### "scatter_private" : { "result" : "void", "arguments" : ["anyvector","anyptr","anyint","anyvector"], "attributes" : "None" }, ### ``llvm.genx.scatter.scaled...`` : vISA SCATTER_SCALED instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * (Exec_size inferred from element offset type) ### * arg0: vXi1 predicate (overloaded) ### * arg1: i32 log2 num blocks, constant (0/1/2 for num blocks 1/2/4) ### * arg2: i16 scale, constant ### * arg3: i32 surface index ### * arg4: i32 global offset in bytes ### * arg5: vXi32 element offset (overloaded) ### * arg6: data to write (overloaded) ### ### The vector width of the element offset arg is the number of elements to ### write, which must be power of 2 and less than or equal to 32. ### ### The predicate arg must have the same vector width. ### ### The data type to write must have UD, D or F type. For 1 and 2 byte (1 x num ### blocks) accesses the upper bytes will be ignored. ### "scatter_scaled" : { "result" : "void", "arguments" : ["anyvector","int","short","int","int","anyint","anyvector"], "attributes" : "None", "platforms" : "-Xe2", }, ### ``llvm.genx.scatter4.scaled...`` : vISA SCATTER4_SCALED instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * (Exec_size inferred from element offset type) ### * arg0: vXi1 predicate (overloaded) ### * arg1: i32 channel mask, constant ### * arg2: i16 scale, constant ### * arg3: i32 surface index ### * arg4: i32 global offset in bytes ### * arg5: vXi32 element offset in bytes (overloaded) ### * arg6: data to write (overloaded) ### ### The vector width of the element offset arg is the number of elements to ### write, which must be 8 or 16. ### The predicate arg must have the same vector width. ### The instruction writes up to 4 channels per element, with the lowest 4 ### bits of the channel mask arg giving the mask of channels _not_ to read. ### The number of 0 bits in that lower 4 bits of the channel mask arg is the ### number of channels to write per element. ### The channels to write must be contiguous and starting at channel 0. ### The vector width of the data to write must be the number of elements ### times the number of channels to write per element. ### The element type of the data to write must be i32 or float. ### "scatter4_scaled" : { "result" : "void", "arguments" : ["anyvector","int","short","int","int","anyint","anyvector"], "attributes" : "None", "platforms" : "-Xe2", }, ### ``llvm.genx.scatter4.typed...`` : vISA SCATTER4_TYPED instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 channel mask, constant ### * arg1: vXi1 predicate (Num_elts inferred from U pixel address type) (overloaded) ### * arg2: i32 surface index ### * arg3: v8Xi32 U pixel address (overloaded) ### * arg4: v8Xi32 V pixel address ### * arg5: v8Xi32 R pixel address ### * arg6: data to write (overloaded) ### ### The vector widths of the U pixel address, V pixel address and R pixel ### address args must be equal and are the number of elements to write, which ### must be 8 or 16. (16 is split into 2x 8 by the GenX backend.) ### The predicate arg must have the same vector width. ### The instruction writes up to 4 channels per element, with the lowest 4 ### bits of the channel mask arg giving the mask of channels to write. ### The number of 1 bits in that lower 4 bits of the channel mask arg is the ### number of channels to write per element. Mask "0000" is not allowed. ### The vector width of the return value must be the number of elements ### times the number of channels to read per element. ### The element type of the source value must be i32 or float. ### "scatter4_typed" : { "result" : "void", "arguments" : ["int","anyvector","int","anyvector",1,1,"anyvector"], "attributes" : "None", "platforms" : "-Xe2", }, ### ``llvm.genx.transpose.ld.`` : vISA TRANSPOSE_LD instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 surface index ### * arg1: i32 log2 block width in i32s, constant (0-3) ### * (log2 block height inferred from block width and data type, 0-3) ### * arg2: i32 X offset ### * arg3: i32 Y offset ### ### * Return value: the data read ### ### The vector width of the return value is the number of elements to read. ### This must be a multiple of the block width. The block height is then ### inferred from those values. ### The element type of the return value must be i32 or float. ### "transpose_ld" : { "result" : "anyvector", "arguments" : ["int","int","int","int"], "platforms" : "-Xe2", "attributes" : "ReadMem" }, ### ``llvm.genx.untyped.atomic.*..`` : vISA UNTYPED_ATOMIC with binary operator ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.untyped.atomic.add`` : vISA UNTYPED_ATOMIC ADD instruction ### * ``llvm.genx.untyped.atomic.sub`` : vISA UNTYPED_ATOMIC SUB instruction ### * ``llvm.genx.untyped.atomic.min`` : vISA UNTYPED_ATOMIC MIN instruction ### * ``llvm.genx.untyped.atomic.max`` : vISA UNTYPED_ATOMIC MAX instruction ### * ``llvm.genx.untyped.atomic.xchg`` : vISA UNTYPED_ATOMIC XCHG instruction ### * ``llvm.genx.untyped.atomic.and`` : vISA UNTYPED_ATOMIC AND instruction ### * ``llvm.genx.untyped.atomic.or`` : vISA UNTYPED_ATOMIC OR instruction ### * ``llvm.genx.untyped.atomic.xor`` : vISA UNTYPED_ATOMIC XOR instruction ### * ``llvm.genx.untyped.atomic.imin`` : vISA UNTYPED_ATOMIC IMIN instruction ### * ``llvm.genx.untyped.atomic.imax`` : vISA UNTYPED_ATOMIC IMAX instruction ### ### * arg0: vXi1 predicate (Num_elts inferred from element offset type) (overloaded) ### * arg1: i32 surface index ### * arg2: i32 global offset in i32s ### * arg3: vXi32 element offset in i32s ### * arg4: vXi32 src ### * arg5: vXi32 original value of the register that the data is read into ### ### * Return value: vXi32 the old value read ### ### Predicate, element offset, src, and the return value must all have the ##same vector / width, which must be 8 or 16. ### "untyped_atomic_add" : { "result" : "anyvector", "arguments" : ["anyvector","int","int",0,0,0], "platforms" : "-Xe2", "attributes" : "None" }, "untyped_atomic_sub" : { "result" : "anyvector", "arguments" : ["anyvector","int","int",0,0,0], "platforms" : "-Xe2", "attributes" : "None" }, "untyped_atomic_min" : { "result" : "anyvector", "arguments" : ["anyvector","int","int",0,0,0], "platforms" : "-Xe2", "attributes" : "None" }, "untyped_atomic_max" : { "result" : "anyvector", "arguments" : ["anyvector","int","int",0,0,0], "platforms" : "-Xe2", "attributes" : "None" }, "untyped_atomic_xchg" : { "result" : "anyvector", "arguments" : ["anyvector","int","int",0,0,0], "platforms" : "-Xe2", "attributes" : "None" }, "untyped_atomic_and" : { "result" : "anyvector", "arguments" : ["anyvector","int","int",0,0,0], "platforms" : "-Xe2", "attributes" : "None" }, "untyped_atomic_or" : { "result" : "anyvector", "arguments" : ["anyvector","int","int",0,0,0], "platforms" : "-Xe2", "attributes" : "None" }, "untyped_atomic_xor" : { "result" : "anyvector", "arguments" : ["anyvector","int","int",0,0,0], "platforms" : "-Xe2", "attributes" : "None" }, "untyped_atomic_imin" : { "result" : "anyvector", "arguments" : ["anyvector","int","int",0,0,0], "platforms" : "-Xe2", "attributes" : "None" }, "untyped_atomic_imax" : { "result" : "anyvector", "arguments" : ["anyvector","int","int",0,0,0], "platforms" : "-Xe2", "attributes" : "None" }, ### ``llvm.genx.untyped.atomic.*..`` : vISA UNTYPED_ATOMIC with inc/dec ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.untyped.atomic.inc`` : vISA UNTYPED_ATOMIC INC instruction ### * ``llvm.genx.untyped.atomic.dec`` : vISA UNTYPED_ATOMIC DEC instruction ### ### * arg0: vXi1 predicate (Num_elts inferred from element offset type) (overloaded) ### * arg1: i32 surface index ### * arg2: i32 global offset in i32s ### * arg3: vXi32 element offset in i32s ### * arg4: vXi32 original value of the register that the data is read into ### ### * Return value: vXi32 the old value read ### ### Predicate, element offset and the return value must have the same vector ### width, which must be 8 or 16. ### "untyped_atomic_inc" : { "result" : "anyvector", "arguments" : ["anyvector","int","int",0,0], "platforms" : "-Xe2", "attributes" : "None" }, "untyped_atomic_dec" : { "result" : "anyvector", "arguments" : ["anyvector","int","int",0,0], "platforms" : "-Xe2", "attributes" : "None" }, ### ``llvm.genx.untyped.atomic.cmpxchg..`` : vISA UNTYPED_ATOMIC CMPXCHG instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: vXi1 predicate (Num_elts inferred from element offset type) (overloaded) ### * arg1: i32 surface index ### * arg2: i32 global offset in i32s ### * arg3: vXi32 element offset in i32s ### * arg4: vXi32 src0 ### * arg5: vXi32 src1 ### * arg6: vXi32 original value of the register that the data is read into ### ### * Return value: vXi32 the old value read ### ### Predicate, element offset, src0, src1, and the return value must all have ### the same vector width, which must be 8 or 16. ### "untyped_atomic_cmpxchg" : { "result" : "anyvector", "arguments" : ["anyvector","int","int",0,0,0,0], "platforms" : "-Xe2", "attributes" : "None" }, ### ``llvm.genx.svm.block.ld*..
`` : vISA SVM BLOCK_LD instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * ``llvm.genx.svm.block.ld`` : vISA SVM BLOCK_LD instruction with oword alignment ### * ``llvm.genx.svm.block.ld.unaligned`` : vISA SVM BLOCK_LD instruction with ### dword alignment ### ### * (log2 number of oword inferred from data type) ### * arg0: i32/i64 address ### ### * Return value: data read ### ### The data read must have a size that is a power of two from 16 to 128 ### bytes. ### "svm_block_ld" : { "result" : "anyvector", "arguments" : ["anyint"], "platforms" : "-Xe2", "attributes" : "ReadMem" }, "svm_block_ld_unaligned" : { "result" : "anyvector", "arguments" : ["anyint"], "platforms" : "-Xe2", "attributes" : "ReadMem" }, ### ``llvm.genx.svm.block.st.
`` : vISA SVM BLOCK_ST instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * (log2 number of oword inferred from data type) ### * arg0: i32/i64 address ### * arg1: data to write (overloaded) ### ### The data to write must have a size that is a power of two from 16 to 128 ### bytes. ### "svm_block_st" : { "result" : "void", "arguments" : ["anyint","anyvector"], "platforms" : "-Xe2", "attributes" : "None" }, ### ``llvm.genx.svm.gather...`` : vISA SVM GATHER instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * (exec size inferred from address vector width) ### * arg0: vXi1 predicate (Num_elts inferred from this arg) (overloaded) ### * (block size inferred from data element type) ### * arg1: i32 log2 num blocks, constant (0/1/2/3 for num blocks 1/2/4/8) ### * arg2: vXi64 address (X = 8 or 16) (overloaded) ### * arg3: old value of the data read ### ### * Return value: data read ### ### The return value element type is i8 for block size 1, i32/float for ### block size 4, or i64/double for block size 8. ### The return value vector width is the address vector width times ### number of blocks (rounded up to 4 if block size is 1). ### "svm_gather" : { "result" : "anyvector", "arguments" : ["anyvector","int","anyint",0], "platforms" : "-Xe2", "attributes" : "ReadMem" }, ### ``llvm.genx.svm.gather4.scaled...`` : vISA SVM GATHER4_SCALED instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * (exec_size inferred from element offset type) ### * arg0: vXi1 predicate (overloaded) ### * arg1: i32 channel mask, constant ### * arg2: i16 scale, constant ### * arg3: i64 global address in bytes ### * arg4: vXi64 element offset in bytes (overloaded) ### * arg5: old value of the data read ### ### * Return value: the data read ### ### The vector width of the element offset arg is the number of elements to ### read, which must be 8 or 16. ### The predicate arg must either have the same vector width, or be a scalar ### i1 constant with value 1. ### The instruction reads up to 4 channels per element, with the lowest 4 ### bits of the channel mask arg giving the mask of channels _not_ to read. ### The number of 0 bits in that lower 4 bits of the channel mask arg is the ### number of channels to read per element. ### The vector width of the return value must be the number of elements ### times the number of channels to read per element. ### The element type of the return value must be i32 or float. ### "svm_gather4_scaled" : { "result" : "anyvector", "arguments" : ["anyvector","int","short","long","anyint",0], "platforms" : "-Xe2", "attributes" : "ReadMem" }, ### ``llvm.genx.svm.scatter...`` : vISA SVM SCATTER instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * (exec size inferred from address vector width) ### * arg0: vXi1 predicate (Num_elts inferred from element offset type) (overloaded) ### * (block size inferred from data element type) ### * arg1: i32 log2 num blocks, constant (0/1/2/3 for num blocks 1/2/4/8) ### * arg2: vXi64 address (X = 8 or 16) (overloaded) ### * arg3: data to write (overloaded) ### ### The data to write element type is i8 for block size 1, i32/float for ### block size 4, or i64/double for block size 8. ### The data vector width is the address vector width times ### number of blocks (rounded up to 4 if block size is 1). ### "svm_scatter" : { "result" : "void", "arguments" : ["anyvector","int","anyint","anyvector"], "platforms" : "-Xe2", "attributes" : "None" }, ### ``llvm.genx.svm.scatter4.scaled...`` : vISA SVM SCATTER4_SCALED instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * (exec_size inferred from element offset type) ### * arg0: vXi1 predicate (overloaded) ### * arg1: i32 channel mask, constant ### * arg2: i16 scale, constant ### * arg3: i64 global address in bytes ### * arg4: vXi64 element offset in bytes (overloaded) ### * arg5: data to write (overloaded) ### ### The vector width of the element offset arg is the number of elements to ### read, which must be 8 or 16. ### The predicate arg must either have the same vector width, or be a scalar ### i1 constant with value 1. ### The instruction writes up to 4 channels per element, with the lowest 4 ### bits of the channel mask arg giving the mask of channels _not_ to read. ### The number of 0 bits in that lower 4 bits of the channel mask arg is the ### number of channels to write per element. ### The vector width of the data to write arg must be the number of elements ### times the number of channels to read per element. ### The element type of the data to write arg must be i32 or float. ### "svm_scatter4_scaled" : { "result" : "void", "arguments" : ["anyvector","int","short","long","anyint","anyvector"], "platforms" : "-Xe2", "attributes" : "None" }, ### ``llvm.genx.svm.atomic.*...`` : vISA SVM_ATOMIC with binary operator ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.svm.atomic.add`` : vISA SVM_ATOMIC ADD instruction ### * ``llvm.genx.svm.atomic.sub`` : vISA SVM_ATOMIC SUB instruction ### * ``llvm.genx.svm.atomic.min`` : vISA SVM_ATOMIC MIN instruction ### * ``llvm.genx.svm.atomic.max`` : vISA SVM_ATOMIC MAX instruction ### * ``llvm.genx.svm.atomic.xchg`` : vISA SVM_ATOMIC XCHG instruction ### * ``llvm.genx.svm.atomic.and`` : vISA SVM_ATOMIC AND instruction ### * ``llvm.genx.svm.atomic.or`` : vISA SVM_ATOMIC OR instruction ### * ``llvm.genx.svm.atomic.xor`` : vISA SVM_ATOMIC XOR instruction ### * ``llvm.genx.svm.atomic.imin`` : vISA SVM_ATOMIC IMIN instruction ### * ``llvm.genx.svm.atomic.imax`` : vISA SVM_ATOMIC IMAX instruction ### ### * arg0: vXi1 predicate (Num_elts inferred from this arg) (overloaded) ### * arg1: vXi64 element addresses in bytes (overloaded) ### * arg2: vXi32/vXi64 src ### * arg3: original value of the register that the data is read into ### ### * Return value: vXi32/vXi64 the old value read ### ### Predicate, element offset, src, and the return value must all have the ### same vector width, which must be 1, 2, 4, or 8. ### "svm_atomic_add" : { "result" : "anyvector", "arguments" : ["anyvector","anyint",0,0], "platforms" : "-Xe2", "attributes" : "None" }, "svm_atomic_sub" : { "result" : "anyvector", "arguments" : ["anyvector","anyint",0,0], "platforms" : "-Xe2", "attributes" : "None" }, "svm_atomic_min" : { "result" : "anyvector", "arguments" : ["anyvector","anyint",0,0], "platforms" : "-Xe2", "attributes" : "None" }, "svm_atomic_max" : { "result" : "anyvector", "arguments" : ["anyvector","anyint",0,0], "platforms" : "-Xe2", "attributes" : "None" }, "svm_atomic_xchg" : { "result" : "anyvector", "arguments" : ["anyvector","anyint",0,0], "platforms" : "-Xe2", "attributes" : "None" }, "svm_atomic_and" : { "result" : "anyvector", "arguments" : ["anyvector","anyint",0,0], "platforms" : "-Xe2", "attributes" : "None" }, "svm_atomic_or" : { "result" : "anyvector", "arguments" : ["anyvector","anyint",0,0], "platforms" : "-Xe2", "attributes" : "None" }, "svm_atomic_xor" : { "result" : "anyvector", "arguments" : ["anyvector","anyint",0,0], "platforms" : "-Xe2", "attributes" : "None" }, "svm_atomic_imin" : { "result" : "anyvector", "arguments" : ["anyvector","anyint",0,0], "platforms" : "-Xe2", "attributes" : "None" }, "svm_atomic_imax" : { "result" : "anyvector", "arguments" : ["anyvector","anyint",0,0], "platforms" : "-Xe2", "attributes" : "None" }, ### ``llvm.genx.svm.atomic.*...`` : vISA SVM_ATOMIC with inc/dec ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.svm.atomic.inc`` : vISA SVM_ATOMIC INC instruction ### * ``llvm.genx.svm.atomic.dec`` : vISA SVM_ATOMIC DEC instruction ### ### * arg0: vXi1 predicate (Num_elts inferred from this arg) (overloaded) ### * arg1: vXi64 element addresses in bytes (overloaded) ### * arg2: original value of the register that the data is read into ### ### * Return value: vXi32/vXi64 the old value read ### ### Predicate, element offset and the return value must have the same vector ### width, which must be 1, 2, 4 or 8. ### "svm_atomic_inc" : { "result" : "anyvector", "arguments" : ["anyvector","anyint",0], "platforms" : "-Xe2", "attributes" : "None" }, "svm_atomic_dec" : { "result" : "anyvector", "arguments" : ["anyvector","anyint",0], "platforms" : "-Xe2", "attributes" : "None" }, ### ``llvm.genx.svm.atomic.cmpxchg...`` : vISA SVM_ATOMIC CMPXCHG instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: vXi1 predicate (Num_elts inferred from element offset type) (overloaded) ### * arg1: vXi64 element addresses in bytes (overloaded) ### * arg2: vXi32/vXi64 src0 ### * arg3: vXi32/vXi64 src1 ### * arg4: original value of the register that the data is read into ### ### * Return value: vXi32/vXi64 the old value read ### ### Predicate, element offset, src0, src1, and the return value must all have ### the same vector width, which must be 1, 2, 4 or 8. ### "svm_atomic_cmpxchg" : { "result" : "anyvector", "arguments" : ["anyvector","anyint",0,0,0], "platforms" : "-Xe2", "attributes" : "None" }, ### ``llvm.genx.svm.atomic.*...`` : vISA SVM_ATOMIC with binary operator ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.svm.atomic.fmin`` : vISA SVM_ATOMIC FMIN instruction ### * ``llvm.genx.svm.atomic.fmax`` : vISA SVM_ATOMIC FMAX instruction ### ### * arg0: vXi1 predicate (Num_elts inferred from this arg) (overloaded) ### * arg1: vXi64 element addresses in bytes (overloaded) ### * arg2: vXf32 src ### * arg3: original value of the register that the data is read into ### ### * Return value: vXf32 the old value read ### ### Predicate, element offset, src, and the return value must all have the ### same vector width, which must be 1, 2, 4, or 8. ### "svm_atomic_fmin" : { "result" : "anyvector", "arguments" : ["anyvector","anyint",0,0], "platforms" : "-Xe2", "attributes" : "None" }, "svm_atomic_fmax" : { "result" : "anyvector", "arguments" : ["anyvector","anyint",0,0], "platforms" : "-Xe2", "attributes" : "None" }, ### ``llvm.genx.svm.atomic.fcmpwr...`` : vISA SVM_ATOMIC FCMPWR instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: vXi1 predicate (Num_elts inferred from element offset type) (overloaded) ### * arg1: vXi64 element addresses in bytes (overloaded) ### * arg2: vXf32 src0 ### * arg3: vXf32 src1 ### * arg4: original value of the register that the data is read into ### ### * Return value: vXf32 the old value read ### ### Predicate, element offset, src0, src1, and the return value must all have ### the same vector width, which must be 1, 2, 4 or 8. ### "svm_atomic_fcmpwr" : { "result" : "anyvector", "arguments" : ["anyvector","anyint",0,0,0], "platforms" : "-Xe2", "attributes" : "None" }, ### ``llvm.genx.load..`` : vISA LOAD (sampler load) instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 channel mask, constant (simd_mode inferred from pixel address operands) ### * arg1: i32 surface index ### * arg2: vXi32 U pixel address (overloaded) ### * arg3: vXi32 V pixel address ### * arg4: vXi32 R pixel address ### ### * Return value: the data read ### ### The vector widths of the U pixel address, V pixel address and R pixel ### address args must be equal, and either 8 or 16. ### ### The instruction reads up to 4 channels per element, with the lowest 4 ### bits of the channel mask arg giving the mask of channels _not_ to read. ### The number of 0 bits in that lower 4 bits of the channel mask arg is the ### number of channels to read per element. ### ### For SIMD8 pre-BDW, the vector width of the data read must be 32. ### For SIMD8 BDW+, or for SIMD16, the vector width of the data read must be ### the SIMD width times the number of enabled channels. ### ### The element type of the return value must be i32 or float. ### "load" : { "result" : "anyvector", "arguments" : ["int","int","anyint",1,1], "attributes" : "ReadMem" }, ### ``llvm.genx.sample..`` : vISA SAMPLE instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 channel mask, constant (simd_mode inferred from pixel address operands) ### * arg1: i32 sampler index ### * arg2: i32 surface index ### * arg3: vXfloat U pixel address(overloaded) ### * arg4: vXfloat V pixel address ### * arg5: vXfloat R pixel address ### ### * Return value: the data read ### ### The vector widths of the U pixel address, V pixel address and R pixel ### address args must be equal, and either 8 or 16. ### ### The instruction reads up to 4 channels per element, with the lowest 4 ### bits of the channel mask arg giving the mask of channels _not_ to read. ### The number of 0 bits in that lower 4 bits of the channel mask arg is the ### number of channels to read per element. ### ### For SIMD8 pre-BDW, the vector width of the data read must be 32. ### For SIMD8 BDW+, or for SIMD16, the vector width of the data read must be ### the SIMD width times the number of enabled channels. ### ### The element type of the return value must be i32 or float. ### "sample" : { "result" : "anyvector", "arguments" : ["int","int","int","anyfloat",1,1], "attributes" : "ReadMem" }, ### ``llvm.genx.sample..unorm`` : vISA SAMPLE_UNORM instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 channel mask, constant ### * arg1: i32 sampler index ### * arg2: i32 surface index ### * arg3: float U pixel address ### * arg4: float V pixel address ### * arg5: float DeltaU ### * arg6: float DeltaV ### ### * Return value: v8i16 the data read ### ### The instruction reads up to 4 channels per element, with the lowest 4 ### bits of the channel mask arg giving the mask of channels _not_ to read. ### The number of 0 bits in that lower 4 bits of the channel mask arg is the ### number of channels to read per element. ### "sample_unorm" : { "result" : "anyvector", "arguments" : ["int","int","int","float","float","float","float"], "attributes" : "ReadMem" }, ### ``llvm.genx.3d.sample......`` : vISA 3D_SAMPLE instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 opcode, constant ### * arg1: vXi1 predicate mask, used to determine execution size (overloaded) ### * arg2: i32 channel mask, constant ### * arg3: i16 aoffimmi ### * arg4: i32 sampler index ### * arg5: i32 surface index ### * argN: vXf or vXhf operand, for 6 <= N <= 20 (all overloaded) ### ### * Return value: the data read ### "3d_sample" : { "result" : "anyvector", "arguments" : ["int","anyvector","int","short","int","int","anyvector","anyvector","anyvector","anyvector","anyvector","anyvector","anyvector","anyvector","anyvector","anyvector","anyvector","anyvector","anyvector","anyvector","anyvector"], "attributes" : "ReadMem" }, ### ``llvm.genx.3d.load......`` : vISA 3D_LOAD instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 opcode, constant ### * arg1: vXi1 predicate mask, used to determine execution size(overloaded) ### * arg2: i32 channel mask, constant ### * arg3: i16 aoffimmi ### * arg4: i32 surface index ### * argN: vXf or vXhf operand, for 5 <= N <= 19 (all overloaded) ### ### * Return value: the data read ### "3d_load" : { "result" : "anyvector", "arguments" : ["int","anyvector","int","short","int","anyvector","anyvector","anyvector","anyvector","anyvector","anyvector","anyvector","anyvector","anyvector","anyvector","anyvector","anyvector","anyvector","anyvector","anyvector"], "attributes" : "ReadMem" }, ### ``llvm.genx.avs.`` : vISA AVS instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 channel mask, constant ### * arg1: i32 sampler index ### * arg2: i32 surface index ### * arg3: float U offset ### * arg4: float V offset ### * arg5: float deltaU ### * arg6: float deltaV ### * arg7: float u2d ### * arg8: i32 groupID ### * arg9: i32 verticalBlockNumber ### * arg10: i32 Output format control, constant ### * arg11: float v2d ### * arg12: i32 execMode, constant ### * arg13: i8 IEFBypass ### ### * Return value: the data read. ### ### The actual data returned is determined by a combination of , ### , , as well as whether output shuffle is enabled in the ### sampler state. ### ### SIMD Control Flow: channel enable is ignored. ### "avs" : { "result" : "anyvector", "arguments" : ["int","int","int","float","float","float","float","float","int","int","int","float","int","char"], "attributes" : "ReadMem" }, ### ``llvm.genx.barrier`` : vISA BARRIER instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### "barrier" : { "result" : "void", "arguments" : [], "attributes" : "Convergent" }, ### ``llvm.genx.sbarrier`` : vISA SBARRIER instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i8 signal flag, constant ### "sbarrier" : { "result" : "void", "arguments" : ["char"], "attributes" : "Convergent" }, ### ``llvm.genx.nbarrier`` : vISA NBARRIER instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i8 signal flag, constant ### * arg1: i8 barrier id ### * arg2: i8 number of threads ### "nbarrier" : { "result" : "void", "arguments" : ["char","char","char"], "attributes" : "SideEffects", "platforms" : "XeHPC+", }, ### ``llvm.genx.nbarrier.arrive`` : Named barrier arrive operation ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i8 barrier id ### * arg1: i8 thread role ### * arg2: i8 number of producers ### * arg3: i8 number of consumers ### ### Thread roles are the following: ### - 0: the thread is a barrier producer and consumer ### - 1: the thread is only a barrier producer ### - 2: the thread is only a barrier consumer ### - other values are invalid ### "nbarrier_arrive" : { "result" : "void", "arguments" : ["char", "char", "char", "char"], "attributes" : "SideEffects", "platforms" : "XeHPC+", }, ### ``llvm.genx.cache.flush`` : vISA CACHE_FLUSH instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### "cache_flush" : { "result" : "void", "arguments" : [], "attributes" : "None" }, ### ``llvm.genx.fence`` : vISA FENCE instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i8 mask, constant ### "fence" : { "result" : "void", "arguments" : ["char"], "attributes" : "None" }, ### ``llvm.genx.wait`` : vISA WAIT instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i8 thread mask ### "wait" : { "result" : "void", "arguments" : ["char"], "attributes" : "None" }, ### ``llvm.genx.yield`` : vISA YIELD instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### "yield" : { "result" : "void", "arguments" : [], "attributes" : "None" }, ### ``llvm.genx.raw.send...`` : vISA RAW_SEND instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0 i32 modifier whether it is send or sendc, constant ### * (exec_size inferred from predicate vector width, defaulting to 16 ### if predicate is i1) ### * arg1: i1/vXi1 predicate (overloaded) ### * arg2: i32 extended message descriptor, constant ### * (numsrc inferred from src size) ### * (numdst inferred from dst size) ### * arg3: i32 desc ### * arg4: src (overloaded) ### * arg5: old_dst ### ### * Return value: dst ### ### The SEND instruction has a field for the size of each of src ### and dst. These are inferred by rounding the size of each of src and ### dst up to the next whole GRF. ### ### If the send writes to the whole of dst, or the program does not care what ### was in those registers before, then set old_dst to UndefValue (of the same ### type as dst). If on the other hand the send is predicated and the program ### needs to see what was in the parts of destination registers not written ### by the send, then use old_dst as the "old value of destination registers" ### input. ### ### The predicate must be constant i1 with value 1 for a message that is not ### predicatable. For a predicatable message, it must be a vector of i1 with ### width determining the execution size. ### "raw_send" : { "result" : "anyvector", "arguments" : ["int","anyint","int","int","anyvector",0], "attributes" : "SideEffects" }, ### ``llvm.genx.raw.send.noresult..`` : vISA RAW_SEND instruction with no result ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0 i32 modifier whether it is send or sendc, constant ### * (exec_size inferred from predicate vector width, defaulting to 16 ### if predicate is i1) ### * arg1: i1/vXi1 predicate (overloaded) ### * arg2: i32 extended message descriptor, constant ### * (numsrc inferred from src size) ### (numdst is 0) ### * arg3: i32 desc ### * arg4: src (overloaded) ### ### The SEND instruction has a field for the size of src. This is inferred by ### rounding the size of src up to the next whole GRF. ### ### The predicate must be constant i1 with value 1 for a message that is not ### predicatable. For a predicatable message, it must be a vector of i1 with ### width determining the execution size. ### "raw_send_noresult" : { "result" : "void", "arguments" : ["int","anyint","int","int","anyvector"], "attributes" : "SideEffects" }, ### ``llvm.genx.raw.sends....`` : vISA RAW_SENDS instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0 i32 modifier whether it is send or sendc, constant ### * (exec_size inferred from predicate vector width, defaulting to 16 ### if predicate is i1) ### * arg1: i1/vXi1 predicate (overloaded) ### * arg2: i8 sfid ### * arg3: i32 extended message descriptor, constant ### * (numsrc inferred from src size) ### * (numsrc2 inferred from src2 size) ### * (numdst inferred from dst size) ### * arg4: i32 desc ### * arg5: src (overloaded) ### * arg6: src2 (overloaded) ### * arg7: old_dst ### ### * Return value: dst ### ### The SENDS instruction has a field for the size of each of src, src2 ### and dst. These are inferred by rounding the size of each of src, src2 and ### dst up to the next whole GRF. ### ### If the send writes to the whole of dst, or the program does not care what ### was in those registers before, then set old_dst to UndefValue (of the same ### type as dst). If on the other hand the send is predicated and the program ### needs to see what was in the parts of destination registers not written ### by the send, then use old_dst as the "old value of destination registers" ### input. ### ### The predicate must be constant i1 with value 1 for a message that is not ### predicatable. For a predicatable message, it must be a vector of i1 with ### width determining the execution size. ### "raw_sends" : { "result" : "anyvector", "arguments" : ["int","anyint","char","int","int","anyvector","anyvector",0], "attributes" : "None" }, ### ``llvm.genx.raw.sends.noresult...`` : vISA RAW_SENDS instruction with no result ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0 i32 modifier whether it is send or sendc, constant ### * (exec_size inferred from predicate vector width, defaulting to 16 ### if predicate is i1) ### * arg1: i1/vXi1 predicate (overloaded) ### * arg2: i8 sfid ### * arg3: i32 extended message descriptor ### * (numsrc inferred from src size) ### * (numsrc2 inferred from src2 size) ### * (numdst is 0) ### * arg4: i32 desc ### * arg5: src (overloaded) ### * arg6: src2 (overloaded) ### ### The SENDS instruction has a field for the size of each of src and src2. ### These are inferred by rounding the size of each of src and src2 up to the ### next whole GRF. ### ### The predicate must be constant i1 with value 1 for a message that is not ### predicatable. For a predicatable message, it must be a vector of i1 with ### width determining the execution size. ### "raw_sends_noresult" : { "result" : "void", "arguments" : ["int","anyint","char","int","int","anyvector","anyvector"], "attributes" : "None" }, ### ``llvm.genx.raw.send2...`` : vISA RAW_SEND instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * arg0 i8 modifier, bit-0 represents sendc, bit-1 repersents EOT ### * arg1 i8 exec_size ### * arg2: i1/vXi1 predicate (overloaded) ### * arg3: i8 numsrc1 ### * (numsrc2 is 0) ### * arg4: i8 numdst ### * arg5: i8 sfid ### * arg6: i32 extended message descriptor ### * arg7: i32 message descriptor ### * arg8: src (overloaded) ### * (src2 is NULL) ### * arg9: old_ds ### ### * Return value: dst ### ### This intrinsic supports full encoding of the vISA raw_send instruction. ### "raw_send2" : { "result" : "anyvector", "arguments" : ["char","char","anyvector","char","char","char","int","int","anyvector",0], "attributes" : "None" }, ### ``llvm.genx.raw.send2.noresult..`` : vISA RAW_SEND instruction with no result ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0 i8 modifier, bit-0 represents sendc, bit-1 repersents EOT ### * arg1 i8 exec_size ### * arg2: i1/vXi1 predicate (overloaded) ### * arg3: i8 numsrc1 ### * (numsrc2 is 0) ### * (numdst is 0) ### * arg4: i8 sfid ### * arg5: i32 extended message descriptor ### * arg6: i32 message descriptor ### * arg7: src (overloaded) ### * (src2 is NULL) ### ### This intrinsic supports full encoding of the vISA raw_send instruction with no result. ### "raw_send2_noresult" : { "result" : "void", "arguments" : ["char","char","anyvector","char","char","int","int","anyvector"], "attributes" : "None" }, ### ``llvm.genx.raw.sends2....`` : vISA RAW_SENDS instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0 i8 modifier, bit-0 represents sendc, bit-1 repersents EOT ### * arg1 i8 exec_size ### * arg2: i1/vXi1 predicate (overloaded) ### * arg3: i8 numsrc1 ### * arg4: i8 numsrc2 ### * arg5: i8 numdst ### * arg6: i8 sfid ### * arg7: i32 extended message descriptor ### * arg8: i32 message descriptor ### * arg9: src (overloaded) ### * arg10: src2 (overloaded) ### * arg11: old_dst ### ### * Return value: dst ### ### This intrinsic supports full encoding of the vISA raw_sends instruction. ### "raw_sends2" : { "result" : "anyvector", "arguments" : ["char","char","anyvector","char","char","char","char","int","int","anyvector","anyvector",0], "attributes" : "None" }, ### ``llvm.genx.raw.sends2.noresult...`` : vISA RAW_SENDS instruction with no result ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0 i8 modifier, bit-0 represents sendc, bit-1 repersents EOT ### * arg1 i8 exec_size ### * arg2: i1/vXi1 predicate (overloaded) ### * arg3: i8 numsrc1 ### * arg4: i8 numsrc2 ### * (numdst is 0) ### * arg5: i8 sfid ### * arg6: i32 extended message descriptor ### * arg7: i32 message descriptor ### * arg8: src (overloaded) ### * arg9: src2 (overloaded) ### ### This intrinsic supports full encoding of the vISA raw_sends instruction with no result. ### "raw_sends2_noresult" : { "result" : "void", "arguments" : ["char","char","anyvector","char","char","char","int","int","anyvector","anyvector"], "attributes" : "None" }, ## --------------------------- ### Video Analytics Instrinsics ### --------------------------- ### ### ``llvm.genx.va.convolve2d.`` vISA VA 2d Convolve instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 sampler index ### * arg1: i32 surface index ### * arg2: float normalized x-coordinate ### * arg3: float normalized y-coordinate ### * arg4: i32 properties value specifying return data format and input region size, constant ### ### * Return value: v64i16 or v16i16 matrix, depending on properties value ### "va_convolve2d" : { "result" : "anyint", "arguments" : ["int","int","float","float","int"], "attributes" : "ReadMem" }, ### ``llvm.genx.va.hdc.convolve2d`` vISA VA HDC 2d Convolve instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 sampler index ### * arg1: i32 surface index ### * arg2: float normalized x-coordinate ### * arg3: float normalized y-coordinate ### * arg4: i32 properties value specifying return data format and input region size, constant ### * arg5: i32 destination surface ### * arg6: i16 destination surface x-offset ### * arg7: i16 destination surface y-offset ### "va_hdc_convolve2d" : { "result" : "void", "arguments" : ["int","int","float","float","int","int","short","short"], "attributes" : "None" }, ### ``llvm.genx.va.erode.`` vISA VA Erode instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 sampler index ### * arg1: i32 surface index ### * arg2: float normalized x-coordinate ### * arg3: float normalized y-coordinate ### * arg4: i32 properties value specifying return data format, constant ### ### * Return value: vXi32 ### "va_erode" : { "result" : "anyint", "arguments" : ["int","int","float","float","int"], "attributes" : "ReadMem" }, ### ``llvm.genx.va.hdc.erode`` vISA VA HDC Erode instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 sampler index ### * arg1: i32 surface index ### * arg2: float normalized x-coordinate ### * arg3: float normalized y-coordinate ### * arg4: i32 destination surface ### * arg5: i16 destination surface x-offset ### * arg6: i16 destination surface y-offset ### "va_hdc_erode" : { "result" : "void", "arguments" : ["int","int","float","float","int","short","short"], "attributes" : "None" }, ### ``llvm.genx.va.dilate.`` vISA VA Dilate instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 sampler index ### * arg1: i32 surface index ### * arg2: float normalized x-coordinate ### * arg3: float normalized y-coordinate ### * arg4: i32 properties value specifying return data format, constant ### ### * Return value: vXi32 ### "va_dilate" : { "result" : "anyint", "arguments" : ["int","int","float","float","int"], "attributes" : "ReadMem" }, ### ``llvm.genx.va.hdc.dilate`` vISA VA HDC Dilate instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 sampler index ### * arg1: i32 surface index ### * arg2: float normalized x-coordinate ### * arg3: float normalized y-coordinate ### * arg4: i32 destination surface ### * arg5: i16 destination surface x-offset ### * arg6: i16 destination surface y-offset ### "va_hdc_dilate" : { "result" : "void", "arguments" : ["int","int","float","float","int","short","short"], "attributes" : "None" }, ### ``llvm.genx.va.minmax.`` vISA MinMax instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 surface index ### * arg1: float normalized x-coordinate ### * arg2: float normalized y-coordinate ### * arg3: i32 enable specific minmax functionality ### ### * Return: v32i8 or v16i16 depending on the surface format ### "va_minmax" : { "result" : "anyint", "arguments" : ["int","float","float","int"], "attributes" : "ReadMem" }, ### ``llvm.genx.va.minmax.filter.`` vISA MinMax Filter instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 sampler index ### * arg1: i32 surface index ### * arg2: float normalized x-coordinate ### * arg3: float normalized y-coordinate ### * arg4: i32 specifies the size of the minmax value returned, constant ### * arg5: i32 specifies the return data format, constant ### * arg6: i32 enable specific minmax functionality ### ### * Return: vXi8 or vXi16 depending on return data size and format ### "va_minmax_filter" : { "result" : "anyint", "arguments" : ["int","int","float","float","int","int","int"], "attributes" : "ReadMem" }, ### ``llvm.genx.va.hdc.minmax.filter`` vISA HDC MinMax Filter instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 sampler index ### * arg1: i32 surface index ### * arg2: float normalized x-coordinate ### * arg3: float normalized y-coordinate ### * arg4: i32 return data format, constant ### * arg5: i32 enable the specific minmax functionality, constant ### * arg6: i32 destination surface index ### * arg7: i16 destination surface x-offset ### * arg8: i16 destination surface y-offset ### "va_hdc_minmax_filter" : { "result" : "void", "arguments" : ["int","int","float","float","int","int","int","short","short"], "attributes" : "None" }, ### ``llvm.genx.va.bool.centroid.`` vISA Boolean Centroid instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 sampler index ### * arg1: float normalized x-coordinate ### * arg2: float normalized y-coordinate ### * arg3: i8 vertical size ### * arg4: i8 horizontal size ### ### * Return: v16i8 or v16i16 depending on surface format ### "va_bool_centroid" : { "result" : "anyint", "arguments" : ["int","float","float","char","char"], "attributes" : "ReadMem" }, ### ``llvm.genx.va.centroid.`` vISA Centroid instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 sampler index ### * arg1: float normalized x-coordinate ### * arg2: float normalized y-coordinate ### * arg3: i8 vertical size ### ### * Return: v32i32 ### "va_centroid" : { "result" : "anyint", "arguments" : ["int","float","float","char"], "attributes" : "ReadMem" }, ### ``llvm.genx.va.1d.convolve.horizontal.`` vISA 1d convolve horizontal instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 sampler index ### * arg1: i32 surface index ### * arg2: float normalized x-coordinate ### * arg3: float normalized y-coordinate ### * arg4: i32 mode, constant ### ### * Return: v16i16 or v64i16 depending on mode ### "va_1d_convolve_horizontal" : { "result" : "anyint", "arguments" : ["int","int","float","float","int"], "attributes" : "ReadMem" }, ### ``llvm.genx.va.hdc.1d.convolve.horizontal`` vISA HDC 1d convolve horizontal instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 sampler index ### * arg1: i32 surface index ### * arg2: float normalized x-coordinate ### * arg3: float normalized y-coordinate ### * arg4: i32 pixel size, constant ### * arg5: i32 destination surface index ### * arg6: i16 destination surface x-offset ### * arg7: i16 destination surface y-offset ### "va_hdc_1d_convolve_horizontal" : { "result" : "void", "arguments" : ["int","int","float","float","int","int","short","short"], "attributes" : "None" }, ### ``llvm.genx.va.1d.convolve.vertical.`` vISA 1d convolve vertical instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 sampler index ### * arg1: i32 surface index ### * arg2: float normalized x-coordinate ### * arg3: float normalized y-coordinate ### * arg4: i32 mode, constant ### ### * Return: v16i16 or v64i16 depending on mode ### "va_1d_convolve_vertical" : { "result" : "anyint", "arguments" : ["int","int","float","float","int"], "attributes" : "ReadMem" }, ### ``llvm.genx.va.hdc.1d.convolve.vertical`` vISA HDC 1d convolve vertical instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 sampler index ### * arg1: i32 surface index ### * arg2: float normalized x-coordinate ### * arg3: float normalized y-coordinate ### * arg4: i32 pixel size, constant ### * arg5: i32 destination surface index ### * arg6: i16 destination surface x-offset ### * arg7: i16 destination surface y-offset ### "va_hdc_1d_convolve_vertical" : { "result" : "void", "arguments" : ["int","int","float","float","int","int","short","short"], "attributes" : "None" }, ### ``llvm.genx.va.1pixel.convolve..`` vISA 1 Pixel Convolve instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 sampler index ### * arg1: i32 surface index ### * arg2: float normalized x-coordinate ### * arg3: float normalized y-coordinate ### * arg4: i32 mode, constant ### * arg5: v32i16 offsets (overloaded) ### ### * Return: v64i16 or v16i16 depending on mode. ### "va_1pixel_convolve" : { "result" : "anyint", "arguments" : ["int","int","float","float","int","anyint"], "attributes" : "ReadMem" }, ### ``llvm.genx.va.hdc.1pixel.convolve`` vISA HDC 1 Pixel Convolve instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 sampler index ### * arg1: i32 surface index ### * arg2: float normalized x-coordinate ### * arg3: float normalized y-coordinate ### * arg4: i32 pixel size, constant ### * arg5: v32i16 offsets ### * arg6: i32 destination surface index ### * arg7: i16 destination surface x-offset ### * arg8: i16 destination surface y-offset ### "va_hdc_1pixel_convolve" : { "result" : "void", "arguments" : ["int","int","float","float","int","anyint","int","short","short"], "attributes" : "None" }, ### ``llvm.genx.va.1pixel.convolve.1x1mode.`` vISA 1 Pixel Convolve (1x1 mode) instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 sampler index ### * arg1: i32 surface index ### * arg2: float normalized x-coordinate ### * arg3: float normalized y-coordinate ### ### * Return: v64i16 or v16i16 depending on mode. ### "va_1pixel_convolve_1x1mode" : { "result" : "anyint", "arguments" : ["int","int","float","float"], "attributes" : "ReadMem" }, ### ``llvm.genx.va.lbp.creation.`` vISA LBP Creation instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 surface index ### * arg1: float normalized x-coordinate ### * arg2: float normalized y-coordinate ### * arg3: mode, constant ### ### * Return: v64i8 or v128i8 depending on mode ### "va_lbp_creation" : { "result" : "anyint", "arguments" : ["int","float","float","int"], "attributes" : "ReadMem" }, ### ``llvm.genx.va.hdc.lbp.creation`` vISA HDC LBP Creation instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 surface index ### * arg1: float normalized x-coordinate ### * arg2: float normalized y-coordinate ### * arg3: mode, constant ### * arg4: i32 destination surface index ### * arg5: i16 destination surface x-offset ### * arg6: i16 destination surface y-offset ### "va_hdc_lbp_creation" : { "result" : "void", "arguments" : ["int","float","float","int","int","short","short"], "attributes" : "None" }, ### ``llvm.genx.va.lbp.correlation.`` vISA LBP Correlation instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 surface index ### * arg1: float normalized x-coordinate ### * arg2: float normalized y-coordinate ### * arg3: i16 horizontal disparity ### ### * Return: v64i8 ### "va_lbp_correlation" : { "result" : "anyint", "arguments" : ["int","float","float","short"], "attributes" : "ReadMem" }, ### ``llvm.genx.va.hdc.lbp.correlation`` vISA HDC LBP Correlation instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 surface index ### * arg1: float normalized x-coordinate ### * arg2: float normalized y-coordinate ### * arg3: i16 horizontal disparity ### * arg4: i32 destination surface index ### * arg5: i16 destination surface x-offset ### * arg6: i16 destination surface y-offset ### "va_hdc_lbp_correlation" : { "result" : "void", "arguments" : ["int","float","float","short","int","short","short"], "attributes" : "None" }, ### ``llvm.genx.va.correlation.search.`` vISA Correlation Search instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i32 surface index ### * arg1: float normalized x-coordinate ### * arg2: float normalized y-coordinate ### * arg3: float normalized vertical origin ### * arg4: float normalized horizontal origin ### * arg5: i8 x-direction size ### * arg6: i8 y-direction size ### * arg7: i8 x-direction search size ### * arg8: i8 y-direction search size ### ### * Return: vXi32 ### "va_correlation_search" : { "result" : "anyint", "arguments" : ["int","float","float","float","float","char","char","char","char"], "attributes" : "ReadMem" }, ### ``llvm.genx.va.flood.fill..`` vISA Flood Fill instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: i8 Is8Connect, constant (valid values 0 or 1). ### * arg1: v10i16 pixel mask horizontal direction (overloaded) ### * arg2: i16 pixel mask vertical direction left ### * arg3: i16 pixel mask vertical direction right ### * arg4: i16 loop count ### ### * Return: v8i16 ### "va_flood_fill" : { "result" : "anyint", "arguments" : ["char","anyint","short","short","short"], "attributes" : "ReadMem" }, ##-------------------------------------------------------------------- ### CM codegen internal intrinsics ### ------------------------------S ### ``llvm.genx.simdcf.predicate.`` : simd cf predication marker intrinsic. ### ### * arg0: vector with any element type ### * arg1: vector constant, same size as arg0 ### ### * Return value: a vector composed of elements selected from arg0 or arg1 ### according to the implied SIMD CF predication mask. ### ### This is generated by clang codegen in the implementation of the CM ### reduction functions (cm_sum etc) whose behavior is sensitive to the ### surrounding SIMD CF context. It is lowered by the CMSimdCFLowering pass. ### "simdcf_predicate" : { "result" : "anyvector", "arguments" : [0,0], "attributes" : "None" }, ### llvm.genx.simdcf.any. : simd cf marker intrinsic. ### ### arg0: vector of i1 (overloaded) ### ### Return value: i1 value as condition for a scalar control flow. ### ### This intrinsic is used to mark a simd cf that takes a predicate vector and ### returns a scalar value for scalar cf. ### ### This is generated by clang codegen in the implementation of SIMD control ### flow, and lowered by the CMSimdCFLowering pass. ### "simdcf_any" : { "result" : "bool", "arguments" : ["anyvector"], "attributes" : "None" }, ### ``llvm.genx.unmask.begin`` : simd-unmask region begin ### ### * Return value: i32 old execution mask ### ### This intrinsic is used by front-end to mark the beginning of ### an unmask region, sets execution mask to all-active, and return ### the old mask in a temp. ### this intrinsic will be replaced by genx.simdcf.unmask by SimdCFLowering ### "unmask_begin" : { "result" : "int", "arguments" : [], "attributes" : "WriteMem,SideEffects" }, ### ``llvm.genx.unmask.end`` : simd-unmask region end ### ### arg0: temp to restore the execution-mask (1 dword) ### ### * Return value: void ### This intrinsic is used by front-end to mark the end of an unmask ### region, set execution mask back using the temp value from unmask-begin. ### this intrinsic will be replaced by genx.simdcf.remask by SimdCFLowering ### "unmask_end" : { "result" : "void", "arguments" : ["int"], "attributes" : "WriteMem,SideEffects" }, ### ``llvm.genx.lane.id`` : implicit lane-id in the simd-fork statement ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.lane.id`` : read implicit lane_id ### ### * Return value: i32 ### "lane_id" : { "result" : "int", "arguments" : [], "attributes" : "NoMem" }, ### ``llvm.genx.local.*.`` : read local ID register ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### * ``llvm.genx.local.id`` : read implicit arg local_id ### * ``llvm.genx.local.id16`` : read implicit arg local_id16 ### * ``llvm.genx.local.size`` : read implicit arg local_size ### ### * Return value: v3i32 - allows for x, y and z components ### v3i16 local ids in 16 bits ### ### This is generated by clang codegen and lowered by CMImpParam. ### "local_id" : { "result" : "anyvector", "arguments" : [], "attributes" : "NoMem" }, "local_id16" : { "result" : "anyvector", "arguments" : [], "attributes" : "NoMem" }, "local_size" : { "result" : "anyvector", "arguments" : [], "attributes" : "NoMem" }, ### ``llvm.genx.group.count.`` : read group count register ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ``llvm.genx.group.count`` : read vISA v9 (%group_count_x) ### ### * Return value: 3xi32 the value read (allows for x, y and z components) ### ### This is generated by clang codegen and lowered by CMImpParam. ### "group_count" : { "result" : "anyvector", "arguments" : [], "attributes" : "NoMem" }, ### ``llvm.genx.get.scoreboard.bti`` : get scoreboard surface implicit ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### Return Value: i32 the surfaceindex of the scoreboard bti ### ###This is generated by clang codegen and lowered by CMImpParam. ### "get_scoreboard_bti" : { "result" : "int", "arguments" : [], "attributes" : "NoMem" }, ### ``llvm.genx.get.scoreboard.deltas`` : get scoreboard deltas ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### Return Value: vector of 16 i8 values (8 x and 8 y) ### ### This is generated by clang codegen and lowered by CMImpParam. ### "get_scoreboard_deltas" : { "result" : "char16", "arguments" : [], "attributes" : "NoMem" }, ### ``llvm.genx.get.scoreboard.depcnt`` : get the maximal scoreboard dependency count ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### Return Value: i32 ### ### This is generated by clang codegen and lowered by CMImpParam. ### "get_scoreboard_depcnt" : { "result" : "int", "arguments" : [], "attributes" : "NoMem" }, ### ``llvm.genx.predefined.surface`` : get predefined surface ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### Return Value: surface index of the specified id. ### ### This is generated by clang codegen when predefined surface is accessed. ### "predefined_surface" : { "result" : "int", "arguments" : ["int"], "attributes" : "NoMem" }, ##-------------------------------------------------------------------- ### GenX backend internal intrinsics ### -------------------------------- ### llvm.genx.constanti. : copy constant to register ### llvm.genx.constantf. : copy constant to register ### ### arg0: input value (constant, any scalar or vector type other than i1 or ### vector of i1) ### ### Return value: same type ### ### This intrinsic is inserted by the GenXLowering pass ### to load a constant in a way that stops the subsequent CSE pass ### from propagating it back into the operand using it. ### ### There are two variants simply because there is no way of saying here ### that an argument can have any scalar or vector type. ### "constanti" : { "result" : "anyint", "arguments" : [0], "attributes" : "NoMem" }, "constantf" : { "result" : "anyfloat", "arguments" : [0], "attributes" : "NoMem" }, ### llvm.genx.convert. : convert register category (non address) ### ### arg0: input value (i32 or vector of i32) ### ### Return value: converted value (same type) ### ### This intrinsic is inserted by the GenXCatgory pass to represent ### a value being converted between two register categories. The input and ### result categories are not represented; they are implied by the other ### def/uses of the value. Address conversion is not covered by this ### intrinsic. ### ### The intrinsic is also inserted by GenXCoalescing to represent a copy ### of a value of category other than general. Thus the input and output ### might be both the same category, but not both general. ### "convert" : { "result" : "anyint", "arguments" : [0], "attributes" : "NoMem" }, ### llvm.genx.convert.addr. : convert to address register category ### ### arg0: input value (i16 or vector of i16) ### arg1: constant offset (i16) ### ### Return value: converted value (same type) ### ### This intrinsic is inserted by the GenXCatgoryConversion pass to represent ### a value being converted from a general value to an address, used as the ### variable index in an element or region access. There it is created with ### offset set to 0; GenXAddressCommoning may adjust that offset to try and ### stop the address conversion falling outside of the register into which it ### points to avoid going out of spec (bug 4395). ### "convert_addr" : { "result" : "anyint", "arguments" : [0,"short"], "attributes" : "NoMem" }, ### llvm.genx.constantpred. : load constant predicate (i1 or vector of i1) ### ### arg0: constant i1 or vector of i1 ### ### Return value: loaded value, same type ### ### This intrinsic is inserted by GenXLowering to load a predicate constant. ### We could just use a bitcast, except that EarlyCSE follows ### GenXConstantMaterialization and it has a habit of putting the constant ### back in the wrregion. "constantpred" : { "result" : "anyint", "arguments" : [0], "attributes" : "NoMem" }, ### llvm.genx.add.addr.. : add an offset onto an address register ### ### arg0: lhs input (i16 or vector of i16) (overloaded) ### arg1: rhs input (i16 or vector of i16) ### ### Return value: result of add (same type with arg1) ### ### When the result of a constant add/sub is used as a variable index in ### a region access, GenXCategoryConversion converts it into this intrinsic ### so that it will be considered an add to an address register. ### "add_addr" : { "result" : "anyint", "arguments" : ["anyint",0], "attributes" : "NoMem" }, ### llvm.genx.rdpredregion.. : read region at specified offset from a predicate ### ### arg0: i1 vector (overloaded) ### arg1: constant i32 offset (in elements) ### ### Return value: v4i1/v8i1/v16i1 result of region read ### ### The number of elements to read is determined from the number of elements ### in the return type, and must be 4, 8 or 16. ### The offset must be a multiple of the number of elements. ### "rdpredregion" : { "result" : "anyint", "arguments" : ["anyint","int"], "attributes" : "NoMem" }, ### llvm.genx.wrpredregion.. : write region at specified offset into a predicate ### ### arg0: i1 old value of vector ### arg1: i1 subvector to write into region (overloaded) ### arg2: constant i32 offset (in elements) ### ### Return value: v4i1/v8i1/v16i1 result of region write ### ### The number of elements to write is determined from the number of elements ### in the "subvector to write" arg, and must be 4, 8 or 16. ### The offset must be a multiple of the number of elements. ### "wrpredregion" : { "result" : "anyint", "arguments" : [0,"anyint","int"], "attributes" : "NoMem" }, ### llvm.genx.wrpredpredregion.. : predicated write region at specified offset ### into a predicate ### ### arg0: vXi1 old value of vector ### arg1: vYi1 subvector to write into region (overloaded) ### arg2: constant i32 offset (in elements) ### arg3: vXi1 predicate ### ### Return value: vXi1 result of region write ### ### The number of elements to write is determined from the number of elements ### in the "subvector to write" arg, and must be 4, 8 or 16. ### The offset must be a multiple of the number of elements. ### ### The constant offset indexes both the vector itself and the predicate. This ### intrinsic is valid only if the predicate is an EM value, and the subvector ### operand is the result of a cmp (which is then baled in). ### "wrpredpredregion" : { "result" : "anyint", "arguments" : [0,"anyint","int",0], "attributes" : "NoMem" }, ### ``llvm.genx.wrconstregion....`` : write a constant region ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: vector to write region in to ### * arg1: subvector to write into the region, constant (overloaded) ### * arg2: i32 vstride in elements, constant ### * arg3: i32 width in elements, constant ### * arg4: i32 stride in elements, constant ### * arg5: i16 or vXi16 offset in bytes, constant (overloaded) ### * arg6: i32 parent width, constant, ignored ### * arg7: constant scalar i1 predicate value 1 (overloaded) ### ### * Return value: the updated vector with the region modified ### ### This is the same as llvm.genx.wrregion, but with the following restrictions: ### ### * the subvector to write is constant; ### * the offset is constant; ### * the predicate is 1. ### ###It is used by GenXConstants when inserting code to load a constant, and ### specifically does not participate in simplification or constant ### propagation so we do not lose that constant loading code. ### ### The operands are the same as llvm.genx.wrregion so it can mostly be handled ### by the same code as llvm.genx.wrregion. ### "wrconstregion" : { "result" : "anyvector", "arguments" : [0,"anyvector","int","int","int","anyint","int","anyint"], "attributes" : "NoMem" }, ### ``llvm.genx.output`` : Mark output arguments ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * Return value: void ### ### This implementation intrinsic is to mark a list of output arguments. ### This intrinsic call only extends the live range of marked arguments and ### emits no code. ### "output" : { "result" : "void", "arguments" : ["vararg"], "attributes" : "None" }, ### ``llvm.genx.output.1.`` : Mark output argument ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * Return value: void ### ### SPIRV does not support functions with variable-length argument number, ### so output_1 is output analog with single argument ### This implementation intrinsic is to mark output argument. ### This intrinsic call only extends the live range of marked argument and ### emits no code. ### "output_1" : { "result" : "void", "arguments" : ["any"], "attributes" : "None" }, ## ``llvm.genx.print.buffer`` : read stateless pointer to print buffer ## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ## ``llvm.genx.print.buffer`` : read implicit arg print buffer ptr ## ## * return value: i64 address of print buffer ## ## this is generated by clang codegen and lowered by cmimpparam. ## "print_buffer" : { "result" : "long", "arguments" : [], "attributes" : "None" }, ## ``llvm.genx.print.format.index`` : add printf format string to collection ## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ## ``llvm.genx.print.format.index`` : return index of printf format string ## ## * arg0: pointer for printf format string ## ## * Return value: the vector value read ## "print_format_index" : { "result" : "int", "arguments" : ["anyptr"], "attributes" : "NoMem" }, ## ``llvm.genx.address.convert`` : convert dataport address to integer ## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ## ## * arg0: pointer kernel argument (svmptr_t or state pointer like image) ## ## * Return value: i32/i64, surface/sampler index or stateless address ## ## Intrinsic is used as a temporary SPIRV instruction to convert ## distinct address arguments into simple format (i32/i64) that is ## used across all memory instructions. This is needed to encode ## SPIRV with appropriate types for kernel arguments. ## "address_convert" : { "result" : "anyint", "arguments" : ["anyptr"], "attributes" : "NoMem" }, ## ``llvm.genx.gaddr`` : take an address of a global variable ## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ## ## The semantics of this intrinsic is equal to ptrtoint instruction. ## Only global variable can be an argument of this intrinsic. ## ## * arg0: global variable ## ## * Return value: i64/i32 (depending on data layout) value of pointer ## "gaddr" : { "result" : "anyint", "arguments" : ["anyptr"], "attributes" : "NoMem" }, ## ``llvm.genx.jump.table`` : CMC internal, no VISA ## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ## ## * arg0: integer BasicBlock index in the full set of destinations ## * arg1-N: the full set of switch labels ## ## * Return value: selected label ## ## The intrinsic is a helper for switch jump tables generation. Arg0 ## will be used by visa switchjmp as index. Return value and arg1-N are ## used to make ir semantically legal. ## "jump_table" : { "result" : "anyptr", "arguments" : ["anyint", "vararg"], "attributes" : "NoMem" }, ## ``llvm.genx.write.predef.surface`` : write predefined surface variable ## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ## ## * arg0: ptr predefined surface variable ## * arg1: i32 value to write ## ## This corresponds to MOVS visa instruction and utilizes technique of using ## global variable in LLVM IR for predefined surfaces. ## "write_predef_surface" : { "result": "void", "arguments" : ["anyptr", "int"], "attributes" : "WriteMem", }, ## Internal VC memory intrinsics. ## These versions are supposed to use predefined visa variables like %bss. ## Intrinsics are supposed to be internal to VC backend. ## ``llvm.genx.dword.atomic2.*.predef.surface`` : dword atomic with binary operator with predefined surface ## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ## * ``llvm.genx.dword.atomic2.add.predef.surface`` : vISA DWORD_ATOMIC ADD instruction ## * ``llvm.genx.dword.atomic2.sub.predef.surface`` : vISA DWORD_ATOMIC SUB instruction ## * ``llvm.genx.dword.atomic2.min.predef.surface`` : vISA DWORD_ATOMIC MIN instruction ## * ``llvm.genx.dword.atomic2.max.predef.surface`` : vISA DWORD_ATOMIC MAX instruction ## * ``llvm.genx.dword.atomic2.xchg.predef.surface`` : vISA DWORD_ATOMIC XCHG instruction ## * ``llvm.genx.dword.atomic2.and.predef.surface`` : vISA DWORD_ATOMIC AND instruction ## * ``llvm.genx.dword.atomic2.or.predef.surface`` : vISA DWORD_ATOMIC OR instruction ## * ``llvm.genx.dword.atomic2.xor.predef.surface`` : vISA DWORD_ATOMIC XOR instruction ## * ``llvm.genx.dword.atomic2.imin.predef.surface`` : vISA DWORD_ATOMIC IMIN instruction ## * ``llvm.genx.dword.atomic2.imax.predef.surface`` : vISA DWORD_ATOMIC IMAX instruction ## ## * (Exec_size inferred from element offset type) ## * arg0: vXi1 predicate (overloaded) ## * arg1: ptr predefined surface (overloaded) ## * arg2: vXi32 element offset in bytes (overloaded) ## * arg3: vXi32 src ## ## * Return value: vXi32 the old value read ## ## Predicate, element offset, src, and the return value must all have the ## same vector width, which must be 1, 8 or 16. ## "dword_atomic2_add_predef_surface" : { "result" : "anyvector", "arguments" : ["anyvector","anyptr","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_sub_predef_surface" : { "result" : "anyvector", "arguments" : ["anyvector","anyptr","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_min_predef_surface" : { "result" : "anyvector", "arguments" : ["anyvector","anyptr","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_max_predef_surface" : { "result" : "anyvector", "arguments" : ["anyvector","anyptr","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_xchg_predef_surface" : { "result" : "anyvector", "arguments" : ["anyvector","anyptr","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_and_predef_surface" : { "result" : "anyvector", "arguments" : ["anyvector","anyptr","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_or_predef_surface" : { "result" : "anyvector", "arguments" : ["anyvector","anyptr","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_xor_predef_surface" : { "result" : "anyvector", "arguments" : ["anyvector","anyptr","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_imin_predef_surface" : { "result" : "anyvector", "arguments" : ["anyvector","anyptr","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_imax_predef_surface" : { "result" : "anyvector", "arguments" : ["anyvector","anyptr","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, ## ``llvm.genx.dword.atomic2.*.predef.surface`` : dword atomic with fmin/fmax operation with predefined surface ## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ## * ``llvm.genx.dword.atomic2.fmin.predef.surface`` : vISA DWORD_ATOMIC FMIN instruction ## * ``llvm.genx.dword.atomic2.fmax.predef.surface`` : vISA DWORD_ATOMIC FMAX instruction ## * ``llvm.genx.dword.atomic2.fadd.predef.surface`` : vISA DWORD_ATOMIC FADD instruction ## * ``llvm.genx.dword.atomic2.fsub.predef.surface`` : vISA DWORD_ATOMIC FSUB instruction ## ## * (Exec_size inferred from element offset type) ## * arg0: vXi1 predicate (overloaded) ## * arg1: ptr predefined surface (overloaded) ## * arg2: vXi32 element offset in bytes (overloaded) ## * arg3: vXfloat src ## ## * Return value: vXfloat the old value read ## ## Predicate, element offset, src, and the return value must all have the ## same vector width, which must be 1, 8 or 16. ## "dword_atomic2_fmin_predef_surface" : { "result" : "anyvector", "arguments" : ["anyvector","anyptr","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_fmax_predef_surface" : { "result" : "anyvector", "arguments" : ["anyvector","anyptr","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_fadd_predef_surface" : { "result" : "anyvector", "arguments" : ["anyvector","anyptr","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_fsub_predef_surface" : { "result" : "anyvector", "arguments" : ["anyvector","anyptr","anyint",0], "attributes" : "None", "platforms" : "-Xe2", }, ## ``llvm.genx.dword.atomic2.*.predef.surface`` : dword atomic with inc/dec operation with predefined surface ## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ## * ``llvm.genx.dword.atomic2.inc.predef.surface`` : vISA DWORD_ATOMIC INC instruction ## * ``llvm.genx.dword.atomic2.dec.predef.surface`` : vISA DWORD_ATOMIC DEC instruction ## ## * (Exec_size inferred from element offset type) ## * arg0: vXi1 predicate (overloaded) ## * arg1: ptr predefined surface (overloaded) ## * arg2: vXi32 element offset in bytes (overloaded) ## ## * Return value: vXi32 the old value read ## ## Predicate, element offset, src, and the return value must all have the ## same vector width, which must be 1, 8 or 16. ## "dword_atomic2_inc_predef_surface" : { "result" : "anyvector", "arguments" : ["anyvector","anyptr","anyint"], "attributes" : "None", "platforms" : "-Xe2", }, "dword_atomic2_dec_predef_surface" : { "result" : "anyvector", "arguments" : ["anyvector","anyptr","anyint"], "attributes" : "None", "platforms" : "-Xe2", }, ## ``llvm.genx.dword.atomic2.cmpxchg.predef.surface`` : vISA DWORD_ATOMIC CMPXCHG instruction with predefined surface ## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ## ## * (Exec_size inferred from element offset type) ## * arg0: vXi1 predicate (overloaded) ## * arg1: ptr predefined surface (overloaded) ## * arg2: vXi32 element offset in bytes (overloaded) ## * arg3: vXi32 src0 ## * arg4: vXi32 src1 ## ## * Return value: vXi32 the old value read ## ## Predicate, element offset, src, and the return value must all have the ## same vector width, which must be 1, 8 or 16. ## "dword_atomic2_cmpxchg_predef_surface" : { "result" : "anyvector", "arguments" : ["anyvector","anyptr","anyint",0,0], "attributes" : "None", "platforms" : "-Xe2", }, ## ``llvm.genx.dword.atomic2.fcmpwr.predef.surface`` : vISA DWORD_ATOMIC FCMPWR instruction with predefined surface ## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ## ## * (Exec_size inferred from element offset type) ## * arg0: vXi1 predicate (overloaded) ## * arg1: ptr predefined surface (overloaded) ## * arg2: vXi32 element offset in bytes (overloaded) ## * arg3: vXfloat src0 ## * arg4: vXfloat src1 ## ## * Return value: vXfloat the old value read ## ## Predicate, element offset, src, and the return value must all have the ## same vector width, which must be 1, 8 or 16. ## "dword_atomic2_fcmpwr_predef_surface" : { "result" : "anyvector", "arguments" : ["anyvector","anyptr","anyint",0,0], "attributes" : "None", "platforms" : "-Xe2", }, ## ``llvm.genx.gather.masked.scaled2.predef.surface`` : vISA GATHER_SCALED instruction with predefined surface ## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ## ## * (Exec_size inferred from element offset type) ## * arg0: i32 log2 num blocks, constant (0/1/2 for num blocks 1/2/4) ## * arg1: i16 scale, constant ## * arg2: ptr predefined surface (overloaded) ## * arg3: i32 global offset in bytes ## * arg4: vXi32 element offset in bytes (overloaded) ## * arg5: vXi1 predicate (overloaded) ## ## * Return value: vXi32/float the data read ## "gather_masked_scaled2_predef_surface" : { "result" : "anyvector", "arguments" : ["int","short","anyptr","int","anyint","anyvector"], "attributes" : "ReadMem", "platforms" : "-Xe2", }, ## ``llvm.genx.gather4.masked.scaled2.predef.surface`` : vISA GATHER4_SCALED instruction with predefined surface ## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ## ## * (Exec_size inferred from element offset type) ## * arg0: i32 channel mask, constant ## * arg1: i16 scale, constant ## * arg2: ptr predefined surface (overloaded) ## * arg3: i32 global offset in bytes ## * arg4: vXi32 element offset in bytes ## * arg5: vXi1 predicate (overloaded) ## ## * Return value: vXi32/float the data read ## "gather4_masked_scaled2_predef_surface" : { "result" : "anyvector", "arguments" : ["int","short","anyptr","int","anyint","anyvector"], "attributes" : "ReadMem", "platforms" : "-Xe2", }, ## ``llvm.genx.scatter.scaled.predef.surface`` : vISA SCATTER_SCALED instruction with predefined surface ## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ## ## * (Exec_size inferred from element offset type) ## * arg0: vXi1 predicate (overloaded) ## * arg1: i32 log2 num blocks, constant (0/1/2 for num blocks 1/2/4) ## * arg2: i16 scale, constant ## * arg3: ptr predefined surface (overloaded) ## * arg4: i32 global offset in bytes ## * arg5: vXi32 element offset (overloaded) ## * arg6: data to write (overloaded) ## ## The vector width of the element offset arg is the number of elements to ## write, which must be power of 2 and less than or equal to 32. ## ## The predicate arg must have the same vector width. ## ## The data type to write must have UD, D or F type. For 1 and 2 byte (1 x num ## blocks) accesses the upper bytes will be ignored. ## "scatter_scaled_predef_surface" : { "result" : "void", "arguments" : ["anyvector","int","short","anyptr","int","anyint","anyvector"], "attributes" : "None", "platforms" : "-Xe2", }, ## ``llvm.genx.scatter4.scaled.predef.surface`` : vISA SCATTER4_SCALED instruction with predefined surface ## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ## ## * (Exec_size inferred from element offset type) ## * arg0: vXi1 predicate (overloaded) ## * arg1: i32 channel mask, constant ## * arg2: i16 scale, constant ## * arg3: ptr predefined surface (overloaded) ## * arg4: i32 global offset in bytes ## * arg5: vXi32 element offset in bytes (overloaded) ## * arg6: data to write (overloaded) ## ## The vector width of the element offset arg is the number of elements to ## write, which must be 8 or 16. ## The predicate arg must have the same vector width. ## The instruction writes up to 4 channels per element, with the lowest 4 ## bits of the channel mask arg giving the mask of channels _not_ to read. ## The number of 0 bits in that lower 4 bits of the channel mask arg is the ## number of channels to write per element. ## The channels to write must be contiguous and starting at channel 0. ## The vector width of the data to write must be the number of elements ## times the number of channels to write per element. ## The element type of the data to write must be i32 or float. ## "scatter4_scaled_predef_surface" : { "result" : "void", "arguments" : ["anyvector","int","short","anyptr","int","anyint","anyvector"], "attributes" : "None", "platforms" : "-Xe2", }, ## ``llvm.genx.oword.ld*.predef.surface`` : oword load instruction with predefined surface ## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ## * ``llvm.genx.oword.ld.predef.surface`` : vISA OWORD_LD instruction ## * ``llvm.genx.oword.ld.unaligned.predef.surface`` : vISA OWORD_LD_UNALIGNED instruction ## ## * (log2 number of owords inferred from return type) ## * arg0: i32 is_modified, constant ## * arg1: ptr predefined surface variable (overloaded) ## * arg2: i32 offset (in owords for .ld / in bytes for .ld.unaligned) ## ## * Return value: vXiN the data read. ## ## The byte size of the return type must be 16, 32, 64, or 128. ## "oword_ld_predef_surface" : { "result" : "anyvector", "arguments" : ["int", "anyptr", "int"], "attributes": "ReadMem", "platforms" : "-Xe2", }, "oword_ld_unaligned_predef_surface" : { "result" : "anyvector", "arguments": ["int", "anyptr", "int"], "attributes" : "ReadMem", "platforms" : "-Xe2", }, ## ``llvm.genx.oword.st.predef.surface`` : vISA OWORD_ST instruction with predefined surface ## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ## ## * (log2 number of owords inferred from return type) ## * arg0: ptr predefined surface variable (overloaded) ## * arg1: i32 offset (in owords) ## * arg2: data to write (overloaded) ## ## The byte size of the data to write must be 16, 32, 64, or 128. ## "oword_st_predef_surface" : { "result" : "void", "arguments" : ["anyptr", "int", "anyvector"], "attributes" : "None", "platforms" : "-Xe2", }, ## ``llvm.genx.*madw..`` : madw instruction, no saturation ## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ## * ``llvm.genx.smadw`` : result signed ## * ``llvm.genx.umadw`` : result unsigned ## ## result := arg0 * arg1 + arg2 ## ## * Return value: result, the full 64-bit of the results of multiplying two 32-bit ## integers and adding 32-bit integer(32b*32b+32b->64b). ## The low 32b of results are stored in the lower GRF and ## the high 32b of results are stored in the high GRF. ## ## Return width must be 2*GRF/sizeof(i32) ## Args width must be no more than GRF/sizeof(i32) and must be a power of two ## ## * arg0: first input, same element type as result ## * arg1: second input, same type as arg0 ## * arg2: third input, same type as arg0 ## "umadw" : { "result" : "anyint", "arguments" : ["anyint", 1, 1], "attributes" : "NoMem" }, "smadw" : { "result" : "anyint", "arguments" : ["anyint", 1, 1], "attributes" : "NoMem" }, ### ``llvm.genx.slm.init`` : slm_init instruction ### ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ### ### * arg0: slm size, i32 scalar integer type ### "slm_init" : { "result" : "void", "arguments" : ["int"], "attributes" : "None" }, } vc-intrinsics-0.22.1/GenXIntrinsics/include/llvm/GenXIntrinsics/Intrinsics.py000077500000000000000000000572301475147027500273270ustar00rootroot00000000000000#!/usr/bin/env python # ========================== begin_copyright_notice ============================ # # Copyright (C) 2019-2024 Intel Corporation # # SPDX-License-Identifier: MIT # # =========================== end_copyright_notice ============================= import os import sys import re import importlib import functools # Compatibility with Python 3.X if sys.version_info[0] >= 3: global reduce reduce = functools.reduce OverloadedTypes = ["any","anyint","anyfloat","anyptr","anyvector"] VectorTypes = ["2","4","8","16"] DestSizes = ["","","21","22","23","24"] type_map = \ { "void":"0", "bool":"1", "char":"2", "short":"3", "int":"4", "long":"5", "half":"6", "float":"7", "double":"8", "2":"9", "4":"A", "8":"B", "16":"C", "32":"D", } pointerTypesi8_map = \ { "ptr_private":"E2", "ptr_global":"<27>12", "ptr_constant":"<27>22", "ptr_local":"<27>32", "ptr_generic":"<27>42", } any_map = \ { "any":0, "anyint":1, "anyfloat":2, "anyvector":3, "anyptr":4, } vararg_val = "<29>" attribute_map = { "None": set(["NoUnwind"]), "NoMem": set(["NoUnwind","ReadNone"]), "ReadMem": set(["NoUnwind","ReadOnly"]), "ReadArgMem": set(["NoUnwind","ReadOnly","ArgMemOnly"]), "ReadWriteArgMem": set(["NoUnwind","ArgMemOnly"]), "NoReturn": set(["NoUnwind","NoReturn"]), "NoDuplicate": set(["NoUnwind","NoDuplicate"]), "Convergent": set(["NoUnwind","Convergent"]), "InaccessibleMemOnly": set(["NoUnwind","InaccessibleMemOnly"]), "WriteMem": set(["NoUnwind","WriteOnly"]), "SideEffects": set(["NoUnwind"]), } modref_map = { "ReadNone": "none", "ReadOnly": "readOnly", "WriteOnly": "writeOnly" } # order does really matter. # It is used to define ordering between the respected platforms platform_list = [ "Gen8", "Gen9", "Gen9LP", "Gen11", "XeLP", "XeHP", "XeHPG", "XeLPG", "XeLPGPlus", "XeHPC", "XeHPCVG", "Xe2", "Xe3", ] def getAttributeList(Attrs): """ Takes a list of attribute names, calculates the union, and returns a list of the the given attributes """ s = reduce(lambda acc, v: attribute_map[v] | acc, Attrs, set()) return ['Attribute::'+x for x in sorted(s)] def getAttributeListModRef(Attrs): """ Takes a list of attribute names, calculates the union, and returns a list of the the given attributes """ s = reduce(lambda acc, v: attribute_map[v] | acc, Attrs, set()) attr = [] isReturn = True for x in sorted(s): if x in modref_map: attr += ['addMemoryAttr(MemoryEffects::' + modref_map[x] + '())'] else: attr += ['addAttribute(Attribute::'+x+')'] if x == "NoReturn" : isReturn = False if isReturn : attr += ['addAttribute(Attribute::WillReturn)'] return attr Intrinsics = dict() parse = sys.argv for i in range(len(parse)): #Populate the dictionary with the appropriate Intrinsics if i != 0: if (".py" in parse[i]): module = importlib.import_module(os.path.split(parse[i])[1].replace(".py","")) Intrinsics.update(module.Imported_Intrinsics) # Output file is always last outputFile = parse[-1] def ik_compare(ikl, ikr): ikl = ikl.replace("_",".") ikr = ikr.replace("_",".") if ikl < ikr: return -1 elif ikl > ikr: return 1 else: return 0 # NOTE: the ordering does matter here as lookupLLVMIntrinsicByName depend on it ID_array = sorted(Intrinsics, key = functools.cmp_to_key(ik_compare)) def emitPrefix(): f = open(outputFile,"w") f.write("// VisualStudio defines setjmp as _setjmp\n" "#if defined(_MSC_VER) && defined(setjmp) && \\\n" " !defined(setjmp_undefined_for_msvc)\n" "# pragma push_macro(\"setjmp\")\n" "# undef setjmp\n" "# define setjmp_undefined_for_msvc\n" "#endif\n\n") f.close() def createTargetData(): f = open(outputFile,"a") f.write("// Target mapping\n" "#ifdef GET_INTRINSIC_TARGET_DATA\n") f.write( "struct IntrinsicTargetInfo {\n" " llvm::StringLiteral Name;\n" " size_t Offset;\n" " size_t Count;\n" "};\n" "static constexpr IntrinsicTargetInfo TargetInfos[] = {\n" " {llvm::StringLiteral(\"\"), 0, 0},\n" " {llvm::StringLiteral(\"genx\"), 0, " + str(len(ID_array)) + "},\n" "};\n") f.write("#endif\n\n") f.close() def generateEnums(): f = open(outputFile,"a") f.write("// Enum values for Intrinsics.h\n" "#ifdef GET_INTRINSIC_ENUM_VALUES\n") for i in range(len(ID_array)): pretty_indent = 40 - len(ID_array[i]) f.write(" genx_" + ID_array[i]+",") f.write((" "*pretty_indent)+'// llvm.genx.'+ID_array[i].replace("_",".")+'\n') f.write("#endif\n\n") f.close() def generateIDArray(): f = open(outputFile,"a") f.write("// Intrinsic ID to name table\n" "#ifdef GET_INTRINSIC_NAME_TABLE\n") for i in range(len(ID_array)): f.write(' MANGLE("llvm.genx.'+ID_array[i].replace("_",".")+'"),\n') f.write("#endif\n\n") f.close() def numberofCharacterMatches(array_of_strings): other_array = [] if isinstance(array_of_strings,list): for i in range(len(array_of_strings)): final_num = 0 char_string = str() for j in range(len(array_of_strings[i])): char_string += array_of_strings[i][j] matching = [s for s in array_of_strings if char_string == s[:len(char_string)]] if len(matching) <= 1: break else: final_num += 1 other_array.append([final_num-7,array_of_strings[i][7:]]) #Subtract 7 because of GenISA_ return other_array def sortedIntrinsicsOnLenth(): final_array = [] special_array = numberofCharacterMatches(ID_array) for i in range(len(special_array)): pair = [special_array[i][1] + "@","GenISAIntrinsic::GenISA_"+special_array[i][1]] final_array.append([special_array[i][0],pair]) f = open(outputFile,"a") f.write("// Sorted by length table\n" "#ifdef GET_FUNCTION_RECOGNIZER\n\n" "struct IntrinsicEntry\n" "{\n" " unsigned num;\n" " GenISAIntrinsic::ID id;\n" " const char* str;\n};\n\n" "static const std::array LengthTable = {{\n") for i in range(len(final_array)): #Go through and write each element f.write("{ "+str(final_array[i][0])+", "+str(final_array[i][1][1])+", MANGLE(\""+str(final_array[i][1][0])+"\")}") if i != len(final_array) - 1: f.write(", ") if i%2 == 0: f.write("\n") f.write("}};\n\n") #Now to write the algorithm to search f.write("std::string input_name(Name);\n" "unsigned start = 0;\n" "unsigned end = "+str(len(final_array))+";\n" "unsigned initial_size = end;\n" "unsigned cur_pos = (start + end) / 2;\n" "char letter;\n" "char input_letter;\n" "bool isError = false;\n" "bool bump = false;\n" "unsigned start_index = std::string(\"llvm.genx.GenISA.\").size();\n" "for (unsigned i = 0; i < Len; i++)\n" "{\n" " input_letter = input_name[start_index + i];\n" " unsigned counter = 0;\n" " while (1)\n" " {\n" " if (counter == initial_size || cur_pos >= initial_size)\n" " {\n" " isError = true;\n" " break;\n" " }\n" " counter++;\n" " letter = LengthTable[cur_pos].str[i];\n" " if (letter == input_letter)\n" " {\n" " if (LengthTable[cur_pos].num == i)\n" " return LengthTable[cur_pos].id;\n" " bump = true;\n" " break;\n" " }\n" " else if (input_letter == '\\0' && letter == '@')\n" " return LengthTable[cur_pos].id;\n" " else if (input_letter == '.' && letter == '_')\n" " break;\n" " else if (input_letter == '.' && letter == '@')\n" " {\n" " unsigned original_cur_pos = cur_pos;\n" " while (1)\n" " {\n" " if (cur_pos >= initial_size || LengthTable[cur_pos].num < i)\n" " return LengthTable[original_cur_pos].id;\n" " if (LengthTable[cur_pos].str[i] == '_')\n" " break;\n" " cur_pos += 1;\n" " }\n" " break;\n" " }\n" " else if ((bump && letter < input_letter) || letter == '@')\n" " {\n" " cur_pos += 1;\n" " continue;\n" " }\n" " else if (bump && letter > input_letter)\n" " {\n" " cur_pos -= 1;\n" " continue;\n" " }\n" " else if (letter < input_letter)\n" " start = cur_pos;\n" " else\n" " end = cur_pos;\n" " cur_pos = (start + end) / 2;\n" " }\n" " if (isError)\n" " break;\n" "}\n") f.write("\n#endif\n\n") f.close() def createOverloadTable(): f = open(outputFile,"a") f.write("// Intrinsic ID to overload bitset\n" "#ifdef GET_INTRINSIC_OVERLOAD_TABLE\n" "static const uint8_t OTable[] = {\n 0") for i in range(len(ID_array)): if ((i+1)%8 == 0): f.write(",\n 0") isOverloadable = False genISA_Intrinsic = Intrinsics[ID_array[i]] for key in genISA_Intrinsic: val = genISA_Intrinsic[key] if isinstance(val,list): for z in range(len(val)): if isinstance(val[z],int): continue elif "any" in val[z]: isOverloadable = True break else: if "any" in val: isOverloadable = True break if isOverloadable: f.write(" | (1U<<" + str((i+1)%8) + ")") f.write("\n};\n\n") f.write("assert( ((id / 8) < (sizeof(OTable) / sizeof(OTable[0]))) && " "\"Overload Table index overflow\");\n"); f.write("return (OTable[id/8] & (1 << (id%8))) != 0;\n") f.write("#endif\n\n") f.close() def createOverloadRetTable(): f = open(outputFile,"a") f.write("// Is ret overloaded\n" "#ifdef GET_INTRINSIC_OVERLOAD_RET_TABLE\n" "switch(IntrinID) {\n" "default:\n" " return false;\n") for i in range(len(ID_array)): genISA_Intrinsic = Intrinsics[ID_array[i]] isOverloadable = False ret = genISA_Intrinsic["result"] if "any" in ret: isOverloadable = True elif isinstance(ret, list): for j in range(len(ret)): if "any" in ret[j]: isOverloadable = True if isOverloadable: f.write("case GenXIntrinsic::genx_" + ID_array[i] + ":\n") isOverloadable = False f.write(" return true;\n") f.write("}\n") f.write("#endif\n\n") f.close() def createOverloadArgsTable(): f = open(outputFile,"a") f.write("// Is arg overloaded\n" "#ifdef GET_INTRINSIC_OVERLOAD_ARGS_TABLE\n" "switch(IntrinID) {\n" "default: llvm_unreachable(\"Unknown intrinsic ID\");\n") for i in range(len(ID_array)): f.write("case GenXIntrinsic::genx_" + ID_array[i]+": ") argNums = [] genISA_Intrinsic = Intrinsics[ID_array[i]] args = genISA_Intrinsic["arguments"] if isinstance(args,list): for z in range(len(args)): if isinstance(args[z],int): continue elif "any" in args[z]: argNums.append(z) else: if "any" in args: argNums.append(0) if not argNums: f.write("\n return false;\n") else: f.write("{\n switch(ArgNum) {\n" " default: return false;\n") for arg in argNums: f.write(" case " + str(arg) + ": return true;\n") f.write(" }\n}\n") #info for llvm.fma f.write("case Intrinsic::fma:\n" " return false;\n") f.write("}\n") f.write("#endif\n\n") f.close() def addAnyTypes(value,argNum): return_val = str() default_value = str() if "any:" in value: default_value = value[4:] #get the default value encoded after the "any" type value = "any" calculated_num = (argNum << 3) | any_map[value] if calculated_num < 16: return_val = hex(calculated_num).upper()[2:] else: return_val = "<" + str(calculated_num) + ">" #Can't represent in hex we will need to use long table return_val = "F" + return_val if default_value: encoded_default_value = encodeTypeString([default_value], str(), [])[0] #encode the default value return_val = return_val + encoded_default_value return return_val def addVectorTypes(source): vec_str = str() for vec in range(len(VectorTypes)): if VectorTypes[vec] in source: vec_str = type_map[source.split(VectorTypes[vec])[0]] vec_str = type_map[VectorTypes[vec]] + vec_str break return vec_str def encodeTypeString(array_of_types,type_string,array_of_anys): for j in range(len(array_of_types)): if isinstance(array_of_types[j],int): type_string += array_of_anys[array_of_types[j]] elif array_of_types[j] in type_map: type_string += type_map[array_of_types[j]] elif array_of_types[j] == "vararg": type_string += vararg_val else: #vector or any case if "any" in array_of_types[j]: new_string = addAnyTypes(array_of_types[j], len(array_of_anys)) type_string += new_string array_of_anys.append(new_string) elif "ptr_" in array_of_types[j]: type_string += pointerTypesi8_map[array_of_types[j]] else: type_string += addVectorTypes(array_of_types[j]) return [type_string,array_of_anys] def createTypeTable(): IIT_Basic = [] IIT_Long = [] # For the first part we will create the basic type table for i in range(len(ID_array)): genISA_Intrinsic = Intrinsics[ID_array[i]] # This is our array of types dest = genISA_Intrinsic['result'] source_list = genISA_Intrinsic['arguments'] anyArgs_array = [] type_string = str() #Start with Destination if isinstance(dest,str): dest = [dest] else: if len(dest) > 1: type_string = "<" + DestSizes[len(dest)] + ">" dest_result = encodeTypeString(dest,type_string,anyArgs_array) type_string = dest_result[0] anyArgs_array = dest_result[1] #Next we go over the Source source_result = encodeTypeString(source_list,type_string,anyArgs_array) type_string = source_result[0] array_of_longs = re.findall(r"(?<=\<)(.*?)(?=\>)",type_string) #Search for my long values <> type_string = re.sub(r"(<)(.*?)(>)",".",type_string) #Replace long_nums for now with . IIT_Basic.append(["0x"+type_string[::-1],array_of_longs]) #Reverse the string before appending and add array of longs # Now we will create the table for entries that take up more than 4 bytes pos_counter = 0 #Keeps track of the position in the Long Encoding table for i in range(len(IIT_Basic)): isGreaterThan10 = len(IIT_Basic[i][0]) >= 10 isLongArrayUsed = len(IIT_Basic[i][1]) > 0 if isGreaterThan10 or isLongArrayUsed: hex_list = list(reversed(IIT_Basic[i][0][2:])) #remove "0x" if len(hex_list) == 8 and not isLongArrayUsed and int(hex_list[-1],16) < 8: #checks if bit 32 is used continue; IIT_Basic[i][0] = "(1U<<31) | " + str(pos_counter) long_counter = 0 for j in range(len(hex_list)): if hex_list[j] == ".": #Now to replace the "." with an actual number IIT_Long.append(int(IIT_Basic[i][1][long_counter])) long_counter += 1 else: IIT_Long.append(int(hex_list[j],16)) # convert hex to int pos_counter += 1 IIT_Long.append(-1) #keeps track of new line add at the end pos_counter += 1 #Write the IIT_Table f = open(outputFile,"a") f.write("// Global intrinsic function declaration type table.\n" "#ifdef GET_INTRINSIC_GENERATOR_GLOBAL\n" "static const unsigned IIT_Table[] = {\n ") for i in range(len(IIT_Basic)): #write out the IIT_Table f.write(str(IIT_Basic[i][0]) + ", ") if i%8 == 7: f.write("\n ") f.write("\n};\n\n") #Write the IIT_LongEncodingTable f.write("static const unsigned char IIT_LongEncodingTable[] = {\n /* 0 */ ") for i in range(len(IIT_Long)): newline = False if IIT_Long[i] == -1: IIT_Long[i] = 0 newline = True f.write(str(IIT_Long[i]) + ", ") if newline and i != len(IIT_Long)-1: f.write("\n /* "+ str(i+1) + " */ ") f.write("\n 255\n};\n\n#endif\n\n") f.close() def createAttributeTable(): f = open(outputFile,"a") f.write("// Add parameter attributes that are not common to all intrinsics.\n" "#ifdef GET_INTRINSIC_ATTRIBUTES\n" "AttributeList GenXIntrinsic::getAttributes(LLVMContext &C, GenXIntrinsic::ID id) {\n" " static const uint8_t IntrinsicsToAttributesMap[] = {\n") attribute_Array = [] for i in range(len(ID_array)): found = False intrinsic_attribute = Intrinsics[ID_array[i]]['attributes'] #This is the location of that attribute for j in range(len(attribute_Array)): if intrinsic_attribute == attribute_Array[j]: found = True f.write(" " + str(j+1) + ", // llvm.genx." + ID_array[i].replace("_",".") + "\n") break if not found: f.write(" " + str(len(attribute_Array)+1) + ", // llvm.genx." + ID_array[i].replace("_",".") + "\n") attribute_Array.append(intrinsic_attribute) f.write(" };\n\n") f.write(" unsigned AttrIdx = id - 1 - GenXIntrinsic::not_genx_intrinsic;\n" " #ifndef NDEBUG\n" " const size_t AttrMapNum = sizeof(IntrinsicsToAttributesMap)/sizeof(IntrinsicsToAttributesMap[0]);\n" " assert(AttrIdx < AttrMapNum && \"invalid attribute index\");\n" " #endif // NDEBUG\n") f.write(" AttributeList AS[1];\n" #Currently only allowed to have one attribute per instrinsic " unsigned NumAttrs = 0;\n" " if (id != 0) {\n" " switch(IntrinsicsToAttributesMap[AttrIdx]) {\n" " default: llvm_unreachable(\"Invalid attribute number\");\n") for i in range(len(attribute_Array)): #Building case statements Attrs = getAttributeList([x.strip() for x in attribute_Array[i].split(',')]) AttrModRef = getAttributeListModRef([x.strip() for x in attribute_Array[i].split(',')]) f.write(""" case {num}: {{ #if LLVM_VERSION_MAJOR >= 16 AttrBuilder Atts(C); Atts.{attrs_mod}; #else const Attribute::AttrKind Atts[] = {{{attrs}}}; #endif AS[0] = AttributeList::get(C, AttributeList::FunctionIndex, Atts); NumAttrs = 1; break; }}\n""".format(num=i+1, attrs_mod='.'.join(AttrModRef), attrs=','.join(Attrs))) f.write(" }\n" " }\n" " return AttributeList::get(C, ArrayRef(AS, NumAttrs));\n" "}\n" "#endif // GET_INTRINSIC_ATTRIBUTES\n\n") f.close() def platformExprProcess(curr_line,platf_expr,platforms): platf_expr = platf_expr.strip() # simple case platf_id = platforms.get(platf_expr) if platf_id is not None: curr_line[platf_id] = 1; # "platform+" case: elif platf_expr[-1] == "+": platf_id = platforms.get(platf_expr[:-1]) if platf_id is None: raise NameError("Error in platf in " + str(Intrinsics[ID_array[i]])) for j in range(platf_id,len(platforms)): curr_line[j] = 1; # "-platform" case: elif platf_expr[0] == "-": platf_id = platforms.get(platf_expr[1:]) if platf_id is None: raise NameError("Error in platf in " + str(Intrinsics[ID_array[i]])) for j in range(platf_id): curr_line[j] = 1; # "~platform" case elif platf_expr[0] == "~": platf_id = platforms.get(platf_expr[1:]) if platf_id is None: raise NameError("Error in platf in " + str(Intrinsics[ID_array[i]])) curr_line[platf_id] = 0; elif platf_expr == "ALL": curr_line = [1]*len(platforms) else: raise NameError("Error in platf in " + str(Intrinsics[ID_array[i]])) return curr_line def override_platform_name(platform): return platform def createPlatformTable(): f = open(outputFile,"a") # platforms dict "platform" : number platforms = { platform_list[i] : i for i in range(len(platform_list)) } # by default all platfroms are supported support_matrix = [ [1]*len(platforms) for i in range(len(ID_array))] # fill support matrix for i in range(len(ID_array)): platf_expr = Intrinsics[ID_array[i]].get('platforms') if platf_expr is None: continue curr_line = [0]*len(platforms) if not isinstance(platf_expr,list): platf_expr = [platf_expr] for expr in platf_expr: curr_line = platformExprProcess(curr_line,expr,platforms) # swope line support_matrix[i] = curr_line f.write("// Add list of supported intrinsics for each platform.\n" "#ifdef GET_INTRINSIC_PLATFORMS\n" "static const std::map> SupportedIntrinsics {\n") transformed_matrix = [list(x) for x in zip(*support_matrix)] for pl,ar in zip(platforms,transformed_matrix): dump_ar = str(ar).replace("[", "{" ,1).replace("]", "}" ,1) name = override_platform_name(pl) wrstring = "{MANGLE(\"" + str(name) + "\") , " + str(dump_ar) + " },\n" f.write(wrstring) f.write("};\n") f.write("#endif // GET_INTRINSIC_PLATFORMS\n") f.close() def emitSuffix(): f = open(outputFile,"a") f.write("#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)\n" "// let's return it to _setjmp state\n" "# pragma pop_macro(\"setjmp\")\n" "# undef setjmp_undefined_for_msvc\n" "#endif\n\n") f.close() # main functions in order emitPrefix() createTargetData() generateEnums() generateIDArray() createOverloadTable() createOverloadArgsTable() createOverloadRetTable() sortedIntrinsicsOnLenth() createTypeTable() createAttributeTable() createPlatformTable() emitSuffix() vc-intrinsics-0.22.1/GenXIntrinsics/include/llvmVCWrapper/000077500000000000000000000000001475147027500235015ustar00rootroot00000000000000vc-intrinsics-0.22.1/GenXIntrinsics/include/llvmVCWrapper/ADT/000077500000000000000000000000001475147027500241115ustar00rootroot00000000000000vc-intrinsics-0.22.1/GenXIntrinsics/include/llvmVCWrapper/ADT/Optional.h000066400000000000000000000010611475147027500260450ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2020-2022 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ #ifndef VCINTR_ADT_OPTIONAL_H #define VCINTR_ADT_OPTIONAL_H #if VC_INTR_LLVM_VERSION_MAJOR < 16 #include namespace VCINTR { template using Optional = llvm::Optional; } #else #include namespace VCINTR { template using Optional = std::optional; } #endif #endif vc-intrinsics-0.22.1/GenXIntrinsics/include/llvmVCWrapper/ADT/StringRef.h000066400000000000000000000011511475147027500261630ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2020-2022 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ #ifndef VCINTR_ADT_STRINGREF_H #define VCINTR_ADT_STRINGREF_H #include namespace VCINTR { namespace StringRef { inline bool starts_with(llvm::StringRef S, llvm::StringRef Prefix) { #if VC_INTR_LLVM_VERSION_MAJOR >= 16 return S.starts_with(Prefix); #else return S.startswith(Prefix); #endif } } // namespace StringRef } // namespace VCINTR #endif vc-intrinsics-0.22.1/GenXIntrinsics/include/llvmVCWrapper/Analysis/000077500000000000000000000000001475147027500252645ustar00rootroot00000000000000vc-intrinsics-0.22.1/GenXIntrinsics/include/llvmVCWrapper/Analysis/InstructionSimplify.h000066400000000000000000000031421475147027500314730ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2022 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ #ifndef VCINTR_ANALYSIS_INSTRUCTIONSIMPLIFY_H #define VCINTR_ANALYSIS_INSTRUCTIONSIMPLIFY_H #include namespace VCINTR { inline llvm::Value *SimplifyInsertElementInst(llvm::Value *Vec, llvm::Value *Elt, llvm::Value *Idx, const llvm::SimplifyQuery &Q) { #if VC_INTR_LLVM_VERSION_MAJOR <= 14 return llvm::SimplifyInsertElementInst(Vec, Elt, Idx, Q); #else return llvm::simplifyInsertElementInst(Vec, Elt, Idx, Q); #endif } inline llvm::Value *SimplifyExtractElementInst(llvm::Value *Vec, llvm::Value *Idx, const llvm::SimplifyQuery &Q) { #if VC_INTR_LLVM_VERSION_MAJOR <= 14 return llvm::SimplifyExtractElementInst(Vec, Idx, Q); #else return llvm::simplifyExtractElementInst(Vec, Idx, Q); #endif } inline llvm::Value *SimplifyCastInst(unsigned CastOpc, llvm::Value *Op, llvm::Type *Ty, const llvm::SimplifyQuery &Q) { #if VC_INTR_LLVM_VERSION_MAJOR <= 14 return llvm::SimplifyCastInst(CastOpc, Op, Ty, Q); #else return llvm::simplifyCastInst(CastOpc, Op, Ty, Q); #endif } } // namespace VCINTR #endif // VCINTR_ANALYSIS_INSTRUCTIONSIMPLIFY_H vc-intrinsics-0.22.1/GenXIntrinsics/include/llvmVCWrapper/IR/000077500000000000000000000000001475147027500240135ustar00rootroot00000000000000vc-intrinsics-0.22.1/GenXIntrinsics/include/llvmVCWrapper/IR/Attributes.h000066400000000000000000000056771475147027500263310ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2021 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ #ifndef VCINTR_IR_ATTRIBUTES_H #define VCINTR_IR_ATTRIBUTES_H #include namespace VCINTR { namespace AttributeList { inline bool hasFnAttr(const llvm::AttributeList &AttrList, llvm::Attribute::AttrKind Kind) { #if VC_INTR_LLVM_VERSION_MAJOR >= 14 return AttrList.hasFnAttr(Kind); #else return AttrList.hasFnAttribute(Kind); #endif } inline bool hasFnAttr(const llvm::AttributeList &AttrList, llvm::StringRef Kind) { #if VC_INTR_LLVM_VERSION_MAJOR >= 14 return AttrList.hasFnAttr(Kind); #else return AttrList.hasFnAttribute(Kind); #endif } inline bool hasAttributeAtIndex(const llvm::AttributeList &AttrList, unsigned Index, llvm::Attribute::AttrKind Kind) { #if VC_INTR_LLVM_VERSION_MAJOR >= 14 return AttrList.hasAttributeAtIndex(Index, Kind); #else return AttrList.hasAttribute(Index, Kind); #endif } inline bool hasAttributeAtIndex(const llvm::AttributeList &AttrList, unsigned Index, llvm::StringRef Kind) { #if VC_INTR_LLVM_VERSION_MAJOR >= 14 return AttrList.hasAttributeAtIndex(Index, Kind); #else return AttrList.hasAttribute(Index, Kind); #endif } inline llvm::Attribute getAttributeAtIndex(const llvm::AttributeList &AttrList, unsigned Index, llvm::Attribute::AttrKind Kind) { #if VC_INTR_LLVM_VERSION_MAJOR >= 14 return AttrList.getAttributeAtIndex(Index, Kind); #else return AttrList.getAttribute(Index, Kind); #endif } inline llvm::Attribute getAttributeAtIndex(const llvm::AttributeList &AttrList, unsigned Index, llvm::StringRef Kind) { #if VC_INTR_LLVM_VERSION_MAJOR >= 14 return AttrList.getAttributeAtIndex(Index, Kind); #else return AttrList.getAttribute(Index, Kind); #endif } inline llvm::AttributeList removeAttributeAtIndex(llvm::LLVMContext &C, const llvm::AttributeList &AttrList, unsigned Index, llvm::Attribute::AttrKind Kind) { #if VC_INTR_LLVM_VERSION_MAJOR >= 14 return AttrList.removeAttributeAtIndex(C, Index, Kind); #else return AttrList.removeAttribute(C, Index, Kind); #endif } inline llvm::AttributeList removeAttributeAtIndex(llvm::LLVMContext &C, const llvm::AttributeList &AttrList, unsigned Index, llvm::StringRef Kind) { #if VC_INTR_LLVM_VERSION_MAJOR >= 14 return AttrList.removeAttributeAtIndex(C, Index, Kind); #else return AttrList.removeAttribute(C, Index, Kind); #endif } } // namespace AttributeList } // namespace VCINTR #endif // VCINTR_IR_ATTRIBUTES_H vc-intrinsics-0.22.1/GenXIntrinsics/include/llvmVCWrapper/IR/DerivedTypes.h000066400000000000000000000041501475147027500265730ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2020-2021 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ #ifndef VCINTR_IR_DERIVEDYPES_H #define VCINTR_IR_DERIVEDYPES_H #include #include namespace VCINTR { // TODO: move this to namespace VectorType and rename to "get" #if VC_INTR_LLVM_VERSION_MAJOR >= 9 inline llvm::VectorType *getVectorType(llvm::Type *ElementType, llvm::ElementCount EC) { return llvm::VectorType::get(ElementType, EC); } #endif inline llvm::VectorType *getVectorType(llvm::Type *ElementType, unsigned NumElements) { #if VC_INTR_LLVM_VERSION_MAJOR >= 11 return llvm::VectorType::get(ElementType, NumElements, false /*Scalable*/); #else return llvm::VectorType::get(ElementType, NumElements); #endif } inline llvm::StructType *getTypeByName(llvm::Module *M, llvm::StringRef Name) { #if VC_INTR_LLVM_VERSION_MAJOR >= 12 return llvm::StructType::getTypeByName(M->getContext(), Name); #else return M->getTypeByName(Name); #endif } namespace VectorType { inline unsigned getNumElements(llvm::VectorType *VecType) { using namespace llvm; #if VC_INTR_LLVM_VERSION_MAJOR <= 10 return VecType->getNumElements(); #else auto *FixedVecType = cast(VecType); return FixedVecType->getNumElements(); #endif } } // namespace VectorType namespace PointerType { inline llvm::PointerType *getWithSamePointeeType(llvm::PointerType *PT, unsigned AddressSpace) { #if VC_INTR_LLVM_VERSION_MAJOR < 14 return llvm::PointerType::get(PT->getElementType(), AddressSpace); #elif VC_INTR_LLVM_VERSION_MAJOR < 17 return llvm::PointerType::getWithSamePointeeType(PT, AddressSpace); #else return llvm::PointerType::get(PT->getContext(), AddressSpace); #endif } } // namespace PointerType } // namespace VCINTR #endif // VCINTR_IR_DERIVEDYPES_H vc-intrinsics-0.22.1/GenXIntrinsics/include/llvmVCWrapper/IR/Function.h000066400000000000000000000031061475147027500257510ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2020-2024 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ #ifndef VCINTR_IR_FUNCTION_H #define VCINTR_IR_FUNCTION_H #include namespace VCINTR { namespace Function { inline llvm::Argument *getArg(const llvm::Function &F, unsigned ArgNo) { assert(F.arg_size() > ArgNo); llvm::Argument *Arg = nullptr; #if LLVM_VERSION_MAJOR < 10 // similar to lvm::Function::getArg implementation auto ArgIt = F.arg_begin(); std::advance(ArgIt, ArgNo); Arg = const_cast(&*ArgIt); #else Arg = F.getArg(ArgNo); #endif return Arg; } inline void addAttributeAtIndex(llvm::Function &F, unsigned Index, llvm::Attribute Attr) { #if VC_INTR_LLVM_VERSION_MAJOR >= 14 F.addAttributeAtIndex(Index, Attr); #else F.addAttribute(Index, Attr); #endif } inline void removeAttributeAtIndex(llvm::Function &F, unsigned Index, llvm::Attribute::AttrKind Kind) { #if VC_INTR_LLVM_VERSION_MAJOR >= 14 F.removeAttributeAtIndex(Index, Kind); #else F.removeAttribute(Index, Kind); #endif } inline void removeAttributeAtIndex(llvm::Function &F, unsigned Index, llvm::StringRef Kind) { #if VC_INTR_LLVM_VERSION_MAJOR >= 14 F.removeAttributeAtIndex(Index, Kind); #else F.removeAttribute(Index, Kind); #endif } } // namespace Function } // namespace VCINTR #endif // VCINTR_IR_GLOBALVARIABLE_H vc-intrinsics-0.22.1/GenXIntrinsics/include/llvmVCWrapper/IR/Instructions.h000066400000000000000000000036411475147027500266740ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2020-2022 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ #ifndef VCINTR_IR_INSTRUCTIONS_H #define VCINTR_IR_INSTRUCTIONS_H #include #include #include namespace VCINTR { namespace ShuffleVectorInst { auto static constexpr UndefMaskElem = -1; // LLVM <= 10 does not have ShuffleVectorInst ctor which accepts ArrayRef // This method returns mask with appropriate type for ShuffleVectorInst ctor #if VC_INTR_LLVM_VERSION_MAJOR <= 10 inline llvm::Constant *getShuffleMask(llvm::ArrayRef Mask, llvm::LLVMContext &Context) { using namespace llvm; auto Indices = SmallVector{}; auto *Int32Ty = IntegerType::getInt32Ty(Context); std::transform(Mask.begin(), Mask.end(), std::back_inserter(Indices), [&](int El) -> llvm::Constant * { if (El == UndefMaskElem) return UndefValue::get(Int32Ty); else return ConstantInt::get(Int32Ty, El); }); return ConstantVector::get(Indices); } #else inline llvm::ArrayRef getShuffleMask(llvm::ArrayRef Mask, llvm::LLVMContext &Context) { return Mask; } #endif } // namespace ShuffleVectorInst template inline ArgKind &getValue(VCINTR::Optional &opt) { #if VC_INTR_LLVM_VERSION_MAJOR < 15 return opt.getValue(); #else return opt.value(); #endif } template inline const ArgKind &getValue(const VCINTR::Optional &opt) { #if VC_INTR_LLVM_VERSION_MAJOR < 15 return opt.getValue(); #else return opt.value(); #endif } } // namespace VCINTR #endif // VCINTR_IR_INSTRUCTIONS_H vc-intrinsics-0.22.1/GenXIntrinsics/include/llvmVCWrapper/IR/Intrinsics.h000066400000000000000000000013251475147027500263120ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2021-2021 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ #ifndef VCINTR_IR_INTRINSICS_H #define VCINTR_IR_INTRINSICS_H #include namespace VCINTR { namespace Intrinsic { inline std::string getName(llvm::Intrinsic::ID Id, llvm::ArrayRef Tys) { #if VC_INTR_LLVM_VERSION_MAJOR >= 13 return llvm::Intrinsic::getNameNoUnnamedTypes(Id, Tys); #else return llvm::Intrinsic::getName(Id, Tys); #endif } } // namespace Intrinsic } // namespace VCINTR #endif // VCINTR_IR_INTRINSICS_H vc-intrinsics-0.22.1/GenXIntrinsics/include/llvmVCWrapper/IR/Type.h000066400000000000000000000016631475147027500251130ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2022-2024 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ #ifndef VCINTR_IR_TYPE_H #define VCINTR_IR_TYPE_H #include namespace VCINTR { namespace Type { inline llvm::Type *getNonOpaquePtrEltTy(const llvm::Type *PTy) { #if VC_INTR_LLVM_VERSION_MAJOR < 14 return PTy->getPointerElementType(); #elif VC_INTR_LLVM_VERSION_MAJOR < 17 return PTy->getNonOpaquePointerElementType(); #else llvm_unreachable("Pointers no longer have element types"); #endif } inline bool isOpaquePointerTy(const llvm::Type *Ty) { #if VC_INTR_LLVM_VERSION_MAJOR < 14 return false; #elif VC_INTR_LLVM_VERSION_MAJOR < 17 return Ty->isOpaquePointerTy(); #else return Ty->isPointerTy(); #endif } } // namespace Type } // namespace VCINTR #endif // VCINTR_IR_TYPE_H vc-intrinsics-0.22.1/GenXIntrinsics/include/llvmVCWrapper/Support/000077500000000000000000000000001475147027500251555ustar00rootroot00000000000000vc-intrinsics-0.22.1/GenXIntrinsics/include/llvmVCWrapper/Support/Alignment.h000066400000000000000000000016171475147027500272510ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2020-2021 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ #ifndef VCINTR_IR_ALIGNMENT_H #define VCINTR_IR_ALIGNMENT_H #include namespace VCINTR { namespace Align { #if VC_INTR_LLVM_VERSION_MAJOR <= 9 template unsigned getAlign(TValue *Val) { return Val->getAlignment(); } #elif VC_INTR_LLVM_VERSION_MAJOR <= 10 template llvm::MaybeAlign getAlign(TValue *Val) { // LLVM 10 instructions accept MaybeAlign but do not provide // getMaybeAlignMethod return llvm::MaybeAlign(Val->getAlignment()); } #else template auto getAlign(TValue *Val) { return Val->getAlign(); } #endif } // namespace Align } // namespace VCINTR #endif // VCINTR_IR_ALIGNMENT_H vc-intrinsics-0.22.1/GenXIntrinsics/lib/000077500000000000000000000000001475147027500200605ustar00rootroot00000000000000vc-intrinsics-0.22.1/GenXIntrinsics/lib/CMakeLists.txt000066400000000000000000000004251475147027500226210ustar00rootroot00000000000000#=========================== begin_copyright_notice ============================ # # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # #============================ end_copyright_notice ============================= add_subdirectory(GenXIntrinsics) vc-intrinsics-0.22.1/GenXIntrinsics/lib/GenXIntrinsics/000077500000000000000000000000001475147027500227675ustar00rootroot00000000000000vc-intrinsics-0.22.1/GenXIntrinsics/lib/GenXIntrinsics/AdaptorsCommon.cpp000066400000000000000000000035621475147027500264270ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2021 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ #include "AdaptorsCommon.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvmVCWrapper/IR/Type.h" namespace llvm { namespace genx { #if VC_INTR_LLVM_VERSION_MAJOR >= 9 static void legalizeAttribute(Argument &Arg, Type *NewType, Attribute::AttrKind Kind) { if (!Arg.hasAttribute(Kind) || Arg.getAttribute(Kind).getValueAsType() == NewType) return; Arg.removeAttr(Kind); Arg.addAttr(Attribute::get(Arg.getParent()->getContext(), Kind, NewType)); } #endif void legalizeParamAttributes(Function *F) { assert(F && "Valid function ptr must be passed"); #if VC_INTR_LLVM_VERSION_MAJOR >= 9 for (auto &Arg : F->args()) { auto *PTy = dyn_cast(Arg.getType()); if (!PTy) continue; #if VC_INTR_LLVM_VERSION_MAJOR >= 13 #if VC_INTR_LLVM_VERSION_MAJOR < 17 if (PTy->isOpaque()) #endif // VC_INTR_LLVM_VERSION_MAJOR < 18 continue; #endif // VC_INTR_LLVM_VERSION_MAJOR >= 13 auto *ElemType = VCINTR::Type::getNonOpaquePtrEltTy(PTy); legalizeAttribute(Arg, ElemType, Attribute::ByVal); #if VC_INTR_LLVM_VERSION_MAJOR >= 11 legalizeAttribute(Arg, ElemType, Attribute::Preallocated); #if VC_INTR_LLVM_VERSION_MAJOR >= 12 legalizeAttribute(Arg, ElemType, Attribute::ByRef); #if VC_INTR_LLVM_VERSION_MAJOR >= 13 legalizeAttribute(Arg, ElemType, Attribute::InAlloca); legalizeAttribute(Arg, ElemType, Attribute::ElementType); #endif // VC_INTR_LLVM_VERSION_MAJOR >= 13 #endif // VC_INTR_LLVM_VERSION_MAJOR >= 12 #endif // VC_INTR_LLVM_VERSION_MAJOR >= 11 } #endif // VC_INTR_LLVM_VERSION_MAJOR >= 9 } } // namespace genx } // namespace llvm vc-intrinsics-0.22.1/GenXIntrinsics/lib/GenXIntrinsics/AdaptorsCommon.h000066400000000000000000000126621475147027500260750ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2020-2021 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ // This file defines common constants for writer/reader spirv adaptors. namespace llvm { class Function; namespace genx { enum class ArgKind { General = 0, Sampler = 1, Surface = 2, }; namespace ArgDesc { static constexpr const char ReadOnly[] = "read_only"; static constexpr const char WriteOnly[] = "write_only"; static constexpr const char ReadWrite[] = "read_write"; static constexpr const char Buffer[] = "buffer_t"; static constexpr const char SVM[] = "svmptr_t"; static constexpr const char Sampler[] = "sampler_t"; static constexpr const char Image1d[] = "image1d_t"; static constexpr const char Image1dArray[] = "image1d_array_t"; static constexpr const char Image1dBuffer[] = "image1d_buffer_t"; static constexpr const char Image2d[] = "image2d_t"; static constexpr const char Image2dArray[] = "image2d_array_t"; static constexpr const char Image2dMediaBlock[] = "image2d_media_block_t"; static constexpr const char Image3d[] = "image3d_t"; } // namespace ArgDesc // Separate kinds of SPIRV types. // Each of these kinds has different representation // in terms of arg kind and arg desc. enum class SPIRVType { // Surfaces + corresponding desc. Buffer, Image1d, Image1dArray, Image1dBuffer, Image2d, Image2dArray, Image2dMediaBlock, Image3d, // Sampler + sampler_t. Sampler, // General + smvptr_t. Pointer, // Other general types (no arg desc). Other, // Old-style decorated types or already SPIRV type. None, }; // Access type used by surfaces. enum class AccessType { ReadOnly, WriteOnly, ReadWrite, }; struct SPIRVArgDesc { SPIRVType Ty; AccessType Acc = AccessType::ReadWrite; SPIRVArgDesc(SPIRVType T) : Ty(T) {} SPIRVArgDesc(SPIRVType T, AccessType A) : Ty(T), Acc(A) {} }; namespace OCLTypes { // Common type prefix for ocl types in llvm IR. static constexpr const char TypePrefix[] = "opencl."; // Main types. // Currently used image types. static constexpr const char Image[] = "image"; static constexpr const char Dim1d[] = "1d"; static constexpr const char Dim1dArray[] = "1d_array"; static constexpr const char Dim1dBuffer[] = "1d_buffer"; static constexpr const char Dim2d[] = "2d"; static constexpr const char Dim2dArray[] = "2d_array"; static constexpr const char Dim3d[] = "3d"; // Sampler type. static constexpr const char Sampler[] = "sampler"; } // namespace OCLTypes // SPIRV friendly IR types. May be generated by SPIRV-LLVM-Translator. namespace SPIRVIRTypes { static constexpr const char TypePrefix[] = "spirv."; enum Dim { Dim1D = 0, Dim2D = 1, Dim3D = 2, DimBuffer = 5 }; enum ImageIntParam { Dimension = 0, Arrayed = 2, Access = 6 }; static constexpr const char Buffer[] = "BufferSurfaceINTEL"; static constexpr const char Image[] = "Image"; static constexpr const char Sampler[] = "Sampler"; } // namespace SPIRVIRTypes // These are not really standardized names. // Just something for POC implementation. namespace IntelTypes { // Type prefix for custom types. static constexpr const char TypePrefix[] = "intel."; // Stateful buffer type. static constexpr const char Buffer[] = "buffer"; // Media block image. static constexpr const char MediaBlockImage[] = "image2d_media_block"; } // namespace IntelTypes namespace CommonTypes { // Access qualifiers. Should come after image type. static constexpr const char ReadOnly[] = "_ro"; static constexpr const char WriteOnly[] = "_wo"; static constexpr const char ReadWrite[] = "_rw"; // Common type suffix for ocl types in llvm IR. static constexpr const char TypeSuffix[] = "_t"; } // namespace CommonTypes namespace SPIRVParams { static constexpr const char SPIRVMemoryModel[] = "spirv.MemoryModel"; static constexpr const char SPIRVSIMDSubgroupSize[] = "intel_reqd_sub_group_size"; static constexpr unsigned SPIRVMemoryModelSimple = 0; static constexpr unsigned SPIRVMemoryModelOCL = 2; static constexpr unsigned SPIRVAddressingModel32 = 1; static constexpr unsigned SPIRVAddressingModel64 = 2; // Has to correspond to spir address space encoding. static constexpr unsigned SPIRVPrivateAS = 0; static constexpr unsigned SPIRVGlobalAS = 1; static constexpr unsigned SPIRVConstantAS = 2; static constexpr unsigned SPIRVLocalAS = 3; static constexpr unsigned SPIRVGenericAS = 4; } // namespace SPIRVParams inline unsigned getOpaqueTypeAddressSpace(SPIRVType Ty) { switch (Ty) { case SPIRVType::Sampler: return SPIRVParams::SPIRVConstantAS; case SPIRVType::Buffer: case SPIRVType::Image1d: case SPIRVType::Image1dArray: case SPIRVType::Image1dBuffer: case SPIRVType::Image2d: case SPIRVType::Image2dArray: case SPIRVType::Image2dMediaBlock: case SPIRVType::Image3d: return SPIRVParams::SPIRVGlobalAS; default: // Default to zero for other types. return 0; } } // Overrides specific attributes of function parameters. // // Function arguments of PointerType can have specific // attributes like ByVal, ByRef, Preallocated, InAlloca // that contain Pointee Type of that pointer as parameter. // SPIRV Adaptor passes may change Pointee type, so we must // explicitly change this type in corresponding attributes // in order to construct valid llvm-IR. // // (see more here: https://llvm.org/docs/LangRef.html#parameter-attributes) void legalizeParamAttributes(Function* F); } // namespace genx } // namespace llvm vc-intrinsics-0.22.1/GenXIntrinsics/lib/GenXIntrinsics/CMakeLists.txt000077500000000000000000000033731475147027500255400ustar00rootroot00000000000000#=========================== begin_copyright_notice ============================ # # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # #============================ end_copyright_notice ============================= set(LLVM_COMPONENTS CodeGen Support Core Analysis ) set(SRC_LIST GenXIntrinsics.cpp GenXRestoreIntrAttr.cpp GenXSimdCFLowering.cpp GenXSingleElementVectorUtil.cpp GenXSPIRVReaderAdaptor.cpp GenXSPIRVWriterAdaptor.cpp GenXVersion.cpp AdaptorsCommon.cpp GenXMetadata.cpp ) if(BUILD_EXTERNAL) add_library(LLVMGenXIntrinsics ${SRC_LIST}) llvm_update_compile_flags(LLVMGenXIntrinsics) add_dependencies(LLVMGenXIntrinsics GenXIntrinsicsGen) vc_get_llvm_targets(LLVM_LIBS ${LLVM_COMPONENTS}) target_link_libraries(LLVMGenXIntrinsics ${LLVM_LIBS}) else() # when we are building in LLVM infra, we need to conform set(LLVM_LINK_COMPONENTS ${LLVM_COMPONENTS} ) if(LLVM_LINK_LLVM_DYLIB) add_llvm_library(LLVMGenXIntrinsics STATIC DISABLE_LLVM_LINK_LLVM_DYLIB ${SRC_LIST} ADDITIONAL_HEADER_DIRS ${GENX_INTRINSICS_MAIN_INCLUDE_DIR}/llvm/GenXIntrinsics DEPENDS GenXIntrinsicsGen intrinsics_gen LLVMCodeGen LLVMSupport LLVMCore LLVMAnalysis LLVMSPIRVLib ) else() add_llvm_library(LLVMGenXIntrinsics ${SRC_LIST} ADDITIONAL_HEADER_DIRS ${GENX_INTRINSICS_MAIN_INCLUDE_DIR}/llvm/GenXIntrinsics DEPENDS GenXIntrinsicsGen intrinsics_gen ) endif() endif() target_include_directories(LLVMGenXIntrinsics PUBLIC $ $ $ ) vc-intrinsics-0.22.1/GenXIntrinsics/lib/GenXIntrinsics/GenXIntrinsics.cpp000066400000000000000000000726351475147027500264170ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2019-2024 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ /*========================== begin_copyright_notice ============================ This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details. ============================= end_copyright_notice ===========================*/ // Originated from llvm source lib/IR/Function.cpp // Function.cpp - Implement the Global object classes // Implementation of methods declared in llvm/GenXIntrinsics/GenXIntrinsics.h #include "llvm/GenXIntrinsics/GenXIntrinsics.h" #include "llvm/GenXIntrinsics/GenXIntrinsicInst.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" #include #include #include #include "llvmVCWrapper/ADT/StringRef.h" #include "llvmVCWrapper/IR/DerivedTypes.h" #include "llvmVCWrapper/IR/Intrinsics.h" #include "llvmVCWrapper/IR/Type.h" #include #include using namespace llvm; static cl::opt EnableGenXIntrinsicsCache( "enable-genx-intrinsics-cache", cl::init(true), cl::Hidden, cl::desc("Enable metadata caching of genx intrinsics")); #define MANGLE(STR) (STR) /// Intrinsic::isOverloaded(ID) - Returns true if the intrinsic can be /// overloaded. static bool isOverloaded(GenXIntrinsic::ID id); /// getIntrinsicInfoTableEntries - Return the IIT table descriptor for the /// specified intrinsic into an array of IITDescriptors. /// void getIntrinsicInfoTableEntries(GenXIntrinsic::ID id, SmallVectorImpl &T); /// IIT_Info - These are enumerators that describe the entries returned by the /// getIntrinsicInfoTableEntries function. /// /// NOTE: This must be kept in synch with the copy in TblGen/IntrinsicEmitter! enum IIT_Info { // Common values should be encoded with 0-15. IIT_Done = 0, IIT_I1 = 1, IIT_I8 = 2, IIT_I16 = 3, IIT_I32 = 4, IIT_I64 = 5, IIT_F16 = 6, IIT_F32 = 7, IIT_F64 = 8, IIT_V2 = 9, IIT_V4 = 10, IIT_V8 = 11, IIT_V16 = 12, IIT_V32 = 13, IIT_PTR = 14, IIT_ARG = 15, // Values from 16+ are only encodable with the inefficient encoding. IIT_V64 = 16, IIT_MMX = 17, IIT_TOKEN = 18, IIT_METADATA = 19, IIT_EMPTYSTRUCT = 20, IIT_STRUCT2 = 21, IIT_STRUCT3 = 22, IIT_STRUCT4 = 23, IIT_STRUCT5 = 24, IIT_EXTEND_ARG = 25, IIT_TRUNC_ARG = 26, IIT_ANYPTR = 27, IIT_V1 = 28, IIT_VARARG = 29, IIT_HALF_VEC_ARG = 30, IIT_SAME_VEC_WIDTH_ARG = 31, #if VC_INTR_LLVM_VERSION_MAJOR < 18 IIT_PTR_TO_ARG = 32, IIT_PTR_TO_ELT = 33, #endif IIT_VEC_OF_ANYPTRS_TO_ELT = 34, IIT_I128 = 35, IIT_V512 = 36, IIT_V1024 = 37, IIT_STRUCT6 = 38, IIT_STRUCT7 = 39, IIT_STRUCT8 = 40, IIT_F128 = 41 }; static Intrinsic::IITDescriptor getVector(unsigned Width) { using namespace Intrinsic; #if VC_INTR_LLVM_VERSION_MAJOR >= 11 return IITDescriptor::getVector(Width, false); #else return IITDescriptor::get(IITDescriptor::Vector, Width); #endif } static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, SmallVectorImpl &OutputTable) { using namespace Intrinsic; IIT_Info Info = IIT_Info(Infos[NextElt++]); unsigned StructElts = 2; switch (Info) { case IIT_Done: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Void, 0)); return; case IIT_VARARG: OutputTable.push_back(IITDescriptor::get(IITDescriptor::VarArg, 0)); return; case IIT_MMX: OutputTable.push_back(IITDescriptor::get(IITDescriptor::MMX, 0)); return; case IIT_TOKEN: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Token, 0)); return; case IIT_METADATA: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Metadata, 0)); return; case IIT_F16: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Half, 0)); return; case IIT_F32: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Float, 0)); return; case IIT_F64: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Double, 0)); return; case IIT_F128: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Quad, 0)); return; case IIT_I1: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 1)); return; case IIT_I8: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 8)); return; case IIT_I16: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer,16)); return; case IIT_I32: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 32)); return; case IIT_I64: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 64)); return; case IIT_I128: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 128)); return; case IIT_V1: OutputTable.push_back(getVector(1)); DecodeIITType(NextElt, Infos, OutputTable); return; case IIT_V2: OutputTable.push_back(getVector(2)); DecodeIITType(NextElt, Infos, OutputTable); return; case IIT_V4: OutputTable.push_back(getVector(4)); DecodeIITType(NextElt, Infos, OutputTable); return; case IIT_V8: OutputTable.push_back(getVector(8)); DecodeIITType(NextElt, Infos, OutputTable); return; case IIT_V16: OutputTable.push_back(getVector(16)); DecodeIITType(NextElt, Infos, OutputTable); return; case IIT_V32: OutputTable.push_back(getVector(32)); DecodeIITType(NextElt, Infos, OutputTable); return; case IIT_V64: OutputTable.push_back(getVector(64)); DecodeIITType(NextElt, Infos, OutputTable); return; case IIT_V512: OutputTable.push_back(getVector(512)); DecodeIITType(NextElt, Infos, OutputTable); return; case IIT_V1024: OutputTable.push_back(getVector(1024)); DecodeIITType(NextElt, Infos, OutputTable); return; case IIT_PTR: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 0)); DecodeIITType(NextElt, Infos, OutputTable); return; case IIT_ANYPTR: { // [ANYPTR addrspace, subtype] OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, Infos[NextElt++])); DecodeIITType(NextElt, Infos, OutputTable); return; } case IIT_ARG: { unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); OutputTable.push_back(IITDescriptor::get(IITDescriptor::Argument, ArgInfo)); return; } case IIT_EXTEND_ARG: { unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); OutputTable.push_back(IITDescriptor::get(IITDescriptor::ExtendArgument, ArgInfo)); return; } case IIT_TRUNC_ARG: { unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); OutputTable.push_back(IITDescriptor::get(IITDescriptor::TruncArgument, ArgInfo)); return; } case IIT_HALF_VEC_ARG: { unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); OutputTable.push_back(IITDescriptor::get(IITDescriptor::HalfVecArgument, ArgInfo)); return; } case IIT_SAME_VEC_WIDTH_ARG: { unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); OutputTable.push_back(IITDescriptor::get(IITDescriptor::SameVecWidthArgument, ArgInfo)); return; } #if VC_INTR_LLVM_VERSION_MAJOR < 17 case IIT_PTR_TO_ARG: { unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); OutputTable.push_back(IITDescriptor::get(IITDescriptor::PtrToArgument, ArgInfo)); return; } case IIT_PTR_TO_ELT: { unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); OutputTable.push_back(IITDescriptor::get(IITDescriptor::PtrToElt, ArgInfo)); return; } #endif case IIT_VEC_OF_ANYPTRS_TO_ELT: { unsigned short ArgNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); unsigned short RefNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); OutputTable.push_back( IITDescriptor::get(IITDescriptor::VecOfAnyPtrsToElt, ArgNo, RefNo)); return; } case IIT_EMPTYSTRUCT: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct, 0)); return; case IIT_STRUCT8: ++StructElts; LLVM_FALLTHROUGH; case IIT_STRUCT7: ++StructElts; LLVM_FALLTHROUGH; case IIT_STRUCT6: ++StructElts; LLVM_FALLTHROUGH; case IIT_STRUCT5: ++StructElts; LLVM_FALLTHROUGH; case IIT_STRUCT4: ++StructElts; LLVM_FALLTHROUGH; case IIT_STRUCT3: ++StructElts; LLVM_FALLTHROUGH; case IIT_STRUCT2: { OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct,StructElts)); for (unsigned i = 0; i != StructElts; ++i) DecodeIITType(NextElt, Infos, OutputTable); return; } } llvm_unreachable("unhandled"); } static Type *DecodeFixedType(ArrayRef &Infos, ArrayRef Tys, LLVMContext &Context) { using namespace Intrinsic; IITDescriptor D = Infos.front(); Infos = Infos.slice(1); switch (D.Kind) { case IITDescriptor::Void: return Type::getVoidTy(Context); case IITDescriptor::VarArg: return Type::getVoidTy(Context); case IITDescriptor::MMX: #if VC_INTR_LLVM_VERSION_MAJOR >= 20 return FixedVectorType::get(Type::getInt64Ty(Context), 1); #else // VC_INTR_LLVM_VERSION_MAJOR >= 20 return Type::getX86_MMXTy(Context); #endif // VC_INTR_LLVM_VERSION_MAJOR >= 20 case IITDescriptor::Token: return Type::getTokenTy(Context); case IITDescriptor::Metadata: return Type::getMetadataTy(Context); case IITDescriptor::Half: return Type::getHalfTy(Context); case IITDescriptor::Float: return Type::getFloatTy(Context); case IITDescriptor::Double: return Type::getDoubleTy(Context); case IITDescriptor::Quad: return Type::getFP128Ty(Context); case IITDescriptor::Integer: return IntegerType::get(Context, D.Integer_Width); case IITDescriptor::Vector: return VCINTR::getVectorType(DecodeFixedType(Infos, Tys, Context),D.Vector_Width); case IITDescriptor::Pointer: return PointerType::get(DecodeFixedType(Infos, Tys, Context), D.Pointer_AddressSpace); case IITDescriptor::Struct: { SmallVector Elts; for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i) Elts.push_back(DecodeFixedType(Infos, Tys, Context)); return StructType::get(Context, Elts); } case IITDescriptor::Argument: return Tys[D.getArgumentNumber()]; case IITDescriptor::ExtendArgument: { Type *Ty = Tys[D.getArgumentNumber()]; if (VectorType *VTy = dyn_cast(Ty)) return VectorType::getExtendedElementVectorType(VTy); return IntegerType::get(Context, 2 * cast(Ty)->getBitWidth()); } case IITDescriptor::TruncArgument: { Type *Ty = Tys[D.getArgumentNumber()]; if (VectorType *VTy = dyn_cast(Ty)) return VectorType::getTruncatedElementVectorType(VTy); IntegerType *ITy = cast(Ty); assert(ITy->getBitWidth() % 2 == 0); return IntegerType::get(Context, ITy->getBitWidth() / 2); } case IITDescriptor::HalfVecArgument: return VectorType::getHalfElementsVectorType(cast( Tys[D.getArgumentNumber()])); case IITDescriptor::SameVecWidthArgument: { Type *EltTy = DecodeFixedType(Infos, Tys, Context); Type *Ty = Tys[D.getArgumentNumber()]; if (VectorType *VTy = dyn_cast(Ty)) { return VCINTR::getVectorType(EltTy, VCINTR::VectorType::getNumElements(VTy)); } llvm_unreachable("unhandled"); } #if VC_INTR_LLVM_VERSION_MAJOR < 17 case IITDescriptor::PtrToArgument: { Type *Ty = Tys[D.getArgumentNumber()]; return PointerType::getUnqual(Ty); } case IITDescriptor::PtrToElt: { Type *Ty = Tys[D.getArgumentNumber()]; VectorType *VTy = dyn_cast(Ty); if (!VTy) llvm_unreachable("Expected an argument of Vector Type"); Type *EltTy = cast(VTy)->getElementType(); return PointerType::getUnqual(EltTy); } #endif case IITDescriptor::VecOfAnyPtrsToElt: // Return the overloaded type (which determines the pointers address space) return Tys[D.getOverloadArgNumber()]; default: break; } llvm_unreachable("unhandled"); } #define GET_INTRINSIC_GENERATOR_GLOBAL #include "llvm/GenXIntrinsics/GenXIntrinsicDescription.gen" #undef GET_INTRINSIC_GENERATOR_GLOBAL void GenXIntrinsic::getIntrinsicInfoTableEntries( GenXIntrinsic::ID id, SmallVectorImpl &T) { assert(id > GenXIntrinsic::not_genx_intrinsic); id = static_cast(id - GenXIntrinsic::not_genx_intrinsic); assert(id < sizeof(IIT_Table) / sizeof(*IIT_Table)); // Check to see if the intrinsic's type was expressible by the table. unsigned TableVal = IIT_Table[id - 1]; // Decode the TableVal into an array of IITValues. SmallVector IITValues; ArrayRef IITEntries; unsigned NextElt = 0; if ((TableVal >> 31) != 0) { // This is an offset into the IIT_LongEncodingTable. IITEntries = IIT_LongEncodingTable; // Strip sentinel bit. NextElt = (TableVal << 1) >> 1; } else { // Decode the TableVal into an array of IITValues. If the entry was encoded // into a single word in the table itself, decode it now. do { IITValues.push_back(TableVal & 0xF); TableVal >>= 4; } while (TableVal); IITEntries = IITValues; NextElt = 0; } // Okay, decode the table into the output vector of IITDescriptors. DecodeIITType(NextElt, IITEntries, T); while (NextElt != IITEntries.size() && IITEntries[NextElt] != 0) DecodeIITType(NextElt, IITEntries, T); } /// Returns a stable mangling for the type specified for use in the name /// mangling scheme used by 'any' types in intrinsic signatures. The mangling /// of named types is simply their name. Manglings for unnamed types consist /// of a prefix ('p' for pointers, 'a' for arrays, 'f_' for functions) /// combined with the mangling of their component types. A vararg function /// type will have a suffix of 'vararg'. Since function types can contain /// other function types, we close a function type mangling with suffix 'f' /// which can't be confused with it's prefix. This ensures we don't have /// collisions between two unrelated function types. Otherwise, you might /// parse ffXX as f(fXX) or f(fX)X. (X is a placeholder for any other type.) static std::string getMangledTypeStr(Type *Ty) { std::string Result; if (!Ty) return Result; if (PointerType *PTyp = dyn_cast(Ty)) { Result += "p" + llvm::utostr(PTyp->getAddressSpace()); #if VC_INTR_LLVM_VERSION_MAJOR >= 13 #if VC_INTR_LLVM_VERSION_MAJOR < 17 if (PTyp->isOpaque()) #endif // VC_INTR_LLVM_VERSION_MAJOR < 18 return Result; #endif // VC_INTR_LLVM_VERSION_MAJOR >= 13 Result += getMangledTypeStr(VCINTR::Type::getNonOpaquePtrEltTy(PTyp)); } else if (ArrayType *ATyp = dyn_cast(Ty)) { Result += "a" + llvm::utostr(ATyp->getNumElements()) + getMangledTypeStr(ATyp->getElementType()); } else if (StructType *STyp = dyn_cast(Ty)) { if (!STyp->isLiteral()) Result += STyp->getName(); else { Result += "s" + llvm::utostr(STyp->getNumElements()); for (unsigned int i = 0; i < STyp->getNumElements(); i++) Result += getMangledTypeStr(STyp->getElementType(i)); } } else if (FunctionType *FT = dyn_cast(Ty)) { Result += "f_" + getMangledTypeStr(FT->getReturnType()); for (size_t i = 0; i < FT->getNumParams(); i++) Result += getMangledTypeStr(FT->getParamType(i)); if (FT->isVarArg()) Result += "vararg"; // Ensure nested function types are distinguishable. Result += "f"; } else if (auto *VTy = dyn_cast(Ty)) { Result += "v" + utostr(VCINTR::VectorType::getNumElements(VTy)) + getMangledTypeStr(VTy->getElementType()); #if VC_INTR_LLVM_VERSION_MAJOR >= 16 } else if (auto *TargetTy = dyn_cast(Ty)) { Result += "t_" + TargetTy->getName().str(); for (auto *PTy : TargetTy->type_params()) Result += "_" + getMangledTypeStr(PTy); for (auto I : TargetTy->int_params()) Result += "_" + llvm::utostr(I); #endif // VC_INTR_LLVM_VERSION_MAJOR >= 16 } else { Result += EVT::getEVT(Ty).getEVTString(); } return Result; } static const char * const GenXIntrinsicNameTable[] = { "not_genx_intrinsic", #define GET_INTRINSIC_NAME_TABLE #include "llvm/GenXIntrinsics/GenXIntrinsicDescription.gen" #undef GET_INTRINSIC_NAME_TABLE }; bool isOverloaded(GenXIntrinsic::ID id) { assert(isGenXIntrinsic(id) && "Invalid intrinsic ID!"); id = static_cast(id - GenXIntrinsic::not_genx_intrinsic); #define GET_INTRINSIC_OVERLOAD_TABLE #include "llvm/GenXIntrinsics/GenXIntrinsicDescription.gen" #undef GET_INTRINSIC_OVERLOAD_TABLE } /// This defines the "getAttributes(ID id)" method. #define GET_INTRINSIC_ATTRIBUTES #include "llvm/GenXIntrinsics/GenXIntrinsicDescription.gen" #undef GET_INTRINSIC_ATTRIBUTES static StringRef GenXIntrinsicMDName{ "genx_intrinsic_id" }; bool GenXIntrinsic::isSupportedPlatform(const std::string &CPU, unsigned id) { #define GET_INTRINSIC_PLATFORMS #include "llvm/GenXIntrinsics/GenXIntrinsicDescription.gen" #undef GET_INTRINSIC_PLATFORMS assert(SupportedIntrinsics.find(CPU) != SupportedIntrinsics.end() && "Unknown Platform"); assert(GenXIntrinsic::isGenXIntrinsic(id) && "this function should be used only for GenXIntrinsics"); auto PlatformInfoIt = SupportedIntrinsics.find(CPU); if (PlatformInfoIt == SupportedIntrinsics.end()) return false; const auto &IntrinsicInfo = PlatformInfoIt->second; size_t IntrinsicIdx = id - GenXIntrinsic::ID::not_genx_intrinsic - 1; if (IntrinsicIdx < IntrinsicInfo.size()) return IntrinsicInfo[IntrinsicIdx]; return false; } /// Table of per-target intrinsic name tables. #define GET_INTRINSIC_TARGET_DATA #include "llvm/GenXIntrinsics/GenXIntrinsicDescription.gen" #undef GET_INTRINSIC_TARGET_DATA bool GenXIntrinsic::isOverloadedArg(unsigned IntrinID, unsigned ArgNum) { #define GET_INTRINSIC_OVERLOAD_ARGS_TABLE #include "llvm/GenXIntrinsics/GenXIntrinsicDescription.gen" #undef GET_INTRINSIC_OVERLOAD_ARGS_TABLE } bool GenXIntrinsic::isOverloadedRet(unsigned IntrinID) { #define GET_INTRINSIC_OVERLOAD_RET_TABLE #include "llvm/GenXIntrinsics/GenXIntrinsicDescription.gen" #undef GET_INTRINSIC_OVERLOAD_RET_TABLE } Function *GenXIntrinsic::getAnyDeclaration(Module *M, unsigned id, ArrayRef Tys) { assert(isAnyNonTrivialIntrinsic(id)); if (isGenXIntrinsic(id)) { return getGenXDeclaration(M, (ID)id, Tys); } else { #if VC_INTR_LLVM_VERSION_MAJOR < 20 return Intrinsic::getDeclaration(M, (Intrinsic::ID)id, Tys); #else return Intrinsic::getOrInsertDeclaration(M, (Intrinsic::ID)id, Tys); #endif } } /// Find the segment of \c IntrinsicNameTable for intrinsics with the same /// target as \c Name, or the generic table if \c Name is not target specific. /// /// Returns the relevant slice of \c IntrinsicNameTable static ArrayRef findTargetSubtable(StringRef Name) { assert(VCINTR::StringRef::starts_with(Name, "llvm.genx.")); ArrayRef Targets(TargetInfos); // Drop "llvm." and take the first dotted component. That will be the target // if this is target specific. StringRef Target = Name.drop_front(5).split('.').first; auto It = std::lower_bound(Targets.begin(), Targets.end(), Target, [](const IntrinsicTargetInfo &TI, StringRef Target) { return TI.Name < Target; }); // We've either found the target or just fall back to the generic set, which // is always first. const auto &TI = It != Targets.end() && It->Name == Target ? *It : Targets[0]; return ArrayRef(&GenXIntrinsicNameTable[1] + TI.Offset, TI.Count); } GenXIntrinsic::ID GenXIntrinsic::getGenXIntrinsicID(const Function *F) { assert(F); llvm::StringRef Name = F->getName(); if (!VCINTR::StringRef::starts_with(Name, getGenXIntrinsicPrefix())) return GenXIntrinsic::not_genx_intrinsic; // Check metadata cache. if (auto *MD = F->getMetadata(GenXIntrinsicMDName)) { assert(MD->getNumOperands() == 1 && "Invalid intrinsic metadata"); auto Val = cast(MD->getOperand(0))->getValue(); GenXIntrinsic::ID Id = static_cast(cast(Val)->getZExtValue()); // we need to check that metadata is correct and can be actually used if (isGenXIntrinsic(Id)) { const char *NamePrefix = GenXIntrinsicNameTable[Id - GenXIntrinsic::not_genx_intrinsic]; if (VCINTR::StringRef::starts_with(Name, NamePrefix)) return Id; } } // Fallback to string lookup. auto ID = lookupGenXIntrinsicID(Name); assert(ID != GenXIntrinsic::not_genx_intrinsic && "Intrinsic not found!"); return ID; } std::string GenXIntrinsic::getGenXName(GenXIntrinsic::ID id, ArrayRef Tys) { assert(isGenXIntrinsic(id) && "Invalid intrinsic ID!"); assert(Tys.empty() || (isOverloaded(id) && "Non-overloadable intrinsic was overloaded!")); id = static_cast(id - GenXIntrinsic::not_genx_intrinsic); std::string Result(GenXIntrinsicNameTable[id]); for (Type *Ty : Tys) { Result += "." + getMangledTypeStr(Ty); } return Result; } static int lookupGenXIntrinsicByName(ArrayRef NameTable, StringRef Name) { #if VC_INTR_LLVM_VERSION_MAJOR < 20 return Intrinsic::lookupLLVMIntrinsicByName(NameTable, Name); #else // VC_INTR_LLVM_VERSION_MAJOR < 20 assert(Name.starts_with("llvm.genx.") && "Unexpected intrinsic prefix"); size_t CmpEnd = 4; // Skip the "llvm" component. const char *const *Low = NameTable.begin(); const char *const *High = NameTable.end(); const char *const *LastLow = Low; while (CmpEnd < Name.size() && High - Low > 0) { size_t CmpStart = CmpEnd; CmpEnd = Name.find('.', CmpStart + 1); CmpEnd = CmpEnd == StringRef::npos ? Name.size() : CmpEnd; auto Cmp = [CmpStart, CmpEnd](const char *LHS, const char *RHS) { return strncmp(LHS + CmpStart, RHS + CmpStart, CmpEnd - CmpStart) < 0; }; LastLow = Low; std::tie(Low, High) = std::equal_range(Low, High, Name.data(), Cmp); } if (High - Low > 0) LastLow = Low; if (LastLow == NameTable.end()) return -1; StringRef NameFound = *LastLow; if (Name == NameFound || (Name.starts_with(NameFound) && Name[NameFound.size()] == '.')) return LastLow - NameTable.begin(); return -1; #endif // VC_INTR_LLVM_VERSION_MAJOR < 20 } GenXIntrinsic::ID GenXIntrinsic::lookupGenXIntrinsicID(StringRef Name) { ArrayRef NameTable = findTargetSubtable(Name); int Idx = lookupGenXIntrinsicByName(NameTable, Name); if (Idx == -1) return GenXIntrinsic::not_genx_intrinsic; // Intrinsic IDs correspond to the location in IntrinsicNameTable, but we have // an index into a sub-table. int Adjust = NameTable.data() - GenXIntrinsicNameTable; auto ID = static_cast(Idx + Adjust + GenXIntrinsic::not_genx_intrinsic); // If the intrinsic is not overloaded, require an exact match. If it is // overloaded, require either exact or prefix match. assert(Name.size() >= strlen(NameTable[Idx]) && "Expected either exact or prefix match"); assert((Name.size() == strlen(NameTable[Idx])) || (isOverloaded(ID) && "Non-overloadable intrinsic was overloaded!")); return ID; } FunctionType *GenXIntrinsic::getGenXType(LLVMContext &Context, GenXIntrinsic::ID id, ArrayRef Tys) { SmallVector Table; getIntrinsicInfoTableEntries(id, Table); ArrayRef TableRef = Table; Type *ResultTy = DecodeFixedType(TableRef, Tys, Context); SmallVector ArgTys; while (!TableRef.empty()) ArgTys.push_back(DecodeFixedType(TableRef, Tys, Context)); // DecodeFixedType returns Void for IITDescriptor::Void and // IITDescriptor::VarArg If we see void type as the type of the last argument, // it is vararg intrinsic if (!ArgTys.empty() && ArgTys.back()->isVoidTy()) { ArgTys.pop_back(); return FunctionType::get(ResultTy, ArgTys, true); } return FunctionType::get(ResultTy, ArgTys, false); } #ifndef NDEBUG // Sanity check for intrinsic types. // After translation from SPIRV literal structures become identified. // However, if intrinsic returns multiple values, then it returns // literal structure. // Having this, compatible intrinsics will have same argument types // and either same return types or layout identical structure types. static bool isCompatibleIntrinsicSignature(FunctionType *DecodedType, FunctionType *FoundType) { if (DecodedType == FoundType) return true; if (DecodedType->params() != FoundType->params()) return false; // Return types are different. Check for structures. auto *DecStrTy = dyn_cast(DecodedType->getReturnType()); auto *FoundStrTy = dyn_cast(FoundType->getReturnType()); if (!DecStrTy || !FoundStrTy) return false; return DecStrTy->isLayoutIdentical(FoundStrTy); } #endif Function *GenXIntrinsic::getGenXDeclaration(Module *M, GenXIntrinsic::ID id, ArrayRef Tys) { assert(isGenXNonTrivialIntrinsic(id)); assert(Tys.empty() || (isOverloaded(id) && "Non-overloadable intrinsic was overloaded!")); auto GenXName = getGenXName(id, Tys); FunctionType *FTy = getGenXType(M->getContext(), id, Tys); Function *F = M->getFunction(GenXName); if (!F) F = Function::Create(FTy, GlobalVariable::ExternalLinkage, GenXName, M); assert(isCompatibleIntrinsicSignature(FTy, F->getFunctionType()) && "Module contains intrinsic declaration with incompatible type!"); resetGenXAttributes(F); return F; } void GenXIntrinsic::resetGenXAttributes(Function *F) { assert(F); GenXIntrinsic::ID GXID = getGenXIntrinsicID(F); assert(GXID != GenXIntrinsic::not_genx_intrinsic); // Since Function::isIntrinsic() will return true due to llvm. prefix, // Module::getOrInsertFunction fails to add the attributes. explicitly adding // the attribute to handle this problem. This since is setup on the function // declaration, attribute assignment is global and hence this approach // suffices. F->setAttributes(GenXIntrinsic::getAttributes(F->getContext(), GXID)); // Cache intrinsic ID in metadata. if (EnableGenXIntrinsicsCache && !F->hasMetadata(GenXIntrinsicMDName)) { LLVMContext &Ctx = F->getContext(); auto *Ty = IntegerType::getInt32Ty(Ctx); auto *Cached = ConstantInt::get(Ty, GXID); auto *MD = MDNode::get(Ctx, {ConstantAsMetadata::get(Cached)}); F->addMetadata(GenXIntrinsicMDName, *MD); } } std::string GenXIntrinsic::getAnyName(unsigned id, ArrayRef Tys) { assert(isAnyIntrinsic(id)); if (id == not_any_intrinsic) { std::string Result("not_any_intrinsic"); for (Type *Ty : Tys) { Result += "." + getMangledTypeStr(Ty); } return Result; } else if (isGenXIntrinsic(id)) return getGenXName((GenXIntrinsic::ID)id, Tys); else return VCINTR::Intrinsic::getName((Intrinsic::ID)id, Tys); } GenXIntrinsic::LSCVectorSize GenXIntrinsic::getLSCVectorSize( const Instruction *I) { assert(isLSC(I)); const int VectorSizeIdx = LSCArgIdx::getLSCVectorSize(getLSCCategory(I)); if (VectorSizeIdx == LSCArgIdx::Invalid) return LSCVectorSize::N0; return static_cast( cast(I->getOperand(VectorSizeIdx))->getZExtValue()); } GenXIntrinsic::LSCDataSize GenXIntrinsic::getLSCDataSize( const Instruction *I) { assert(isLSC(I)); const int DataSizeIdx = LSCArgIdx::getLSCDataSize(getLSCCategory(I)); if (DataSizeIdx == LSCArgIdx::Invalid) return LSCDataSize::Invalid; return static_cast( cast(I->getOperand(DataSizeIdx))->getZExtValue()); } GenXIntrinsic::LSCDataOrder GenXIntrinsic::getLSCDataOrder( const Instruction *I) { assert(isLSC(I)); const int DataOrderIdx = LSCArgIdx::getLSCDataOrder(getLSCCategory(I)); if (DataOrderIdx == LSCArgIdx::Invalid) return LSCDataOrder::Invalid; return static_cast( cast(I->getOperand(DataOrderIdx))->getZExtValue()); } unsigned GenXIntrinsic::getLSCWidth(const Instruction *I) { assert(isLSC(I)); const int WidthIdx = LSCArgIdx::getLSCWidth(getLSCCategory(I)); if (WidthIdx == LSCArgIdx::Invalid) return 1; if (auto VT = dyn_cast(I->getOperand(WidthIdx)->getType())) return VCINTR::VectorType::getNumElements(VT); return 1; } bool GenXIntrinsic::isGenXIntrinsic(const Function *CF) { return VCINTR::StringRef::starts_with(CF->getName(), getGenXIntrinsicPrefix()); } bool GenXIntrinsicInst::classof(const CallInst *I) { if (const Function *CF = I->getCalledFunction()) { return VCINTR::StringRef::starts_with( CF->getName(), GenXIntrinsic::getGenXIntrinsicPrefix()); } return false; } vc-intrinsics-0.22.1/GenXIntrinsics/lib/GenXIntrinsics/GenXMetadata.cpp000066400000000000000000000016351475147027500260020ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2021 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ #include "llvm/GenXIntrinsics/GenXMetadata.h" #include #include #include using namespace llvm; MDNode *llvm::genx::GetOldStyleKernelMD(Function const &F) { auto *KernelMD = static_cast(nullptr); auto *KernelMDs = F.getParent()->getNamedMetadata(FunctionMD::GenXKernels); if (!KernelMDs) return KernelMD; for (unsigned I = 0, E = KernelMDs->getNumOperands(); I < E; ++I) { auto *Kernel = mdconst::dyn_extract( KernelMDs->getOperand(I)->getOperand(KernelMDOp::FunctionRef)); if (Kernel == &F) { KernelMD = KernelMDs->getOperand(I); break; } } return KernelMD; } vc-intrinsics-0.22.1/GenXIntrinsics/lib/GenXIntrinsics/GenXRestoreIntrAttr.cpp000066400000000000000000000051141475147027500273710ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2020-2021 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ /*========================== begin_copyright_notice ============================ This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details. ============================= end_copyright_notice ===========================*/ //===----------------------------------------------------------------------===// // /// GenXRestoreIntrAttr /// ------------------- /// /// This is a module pass that restores attributes for intrinsics: /// /// * Since SPIR-V doesn't save intrinsics' attributes, some important /// information can be lost. This pass restores it. /// /// * Only GenX intrinsics are handled. /// //===----------------------------------------------------------------------===// #include "llvm/GenXIntrinsics/GenXIntrOpts.h" #include "llvm/GenXIntrinsics/GenXIntrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Pass.h" #define DEBUG_TYPE "GENX_RESTOREINTRATTR" using namespace llvm; namespace { // GenXRestoreIntrAttr : restore intrinsics' attributes class GenXRestoreIntrAttr : public ModulePass { public: GenXRestoreIntrAttr(); StringRef getPassName() const override { return "GenX Restore Intrinsics' Attributes"; } bool runOnModule(Module &M) override; private: bool restoreAttributes(Function *F); public: static char ID; }; } // namespace namespace llvm { void initializeGenXRestoreIntrAttrPass(PassRegistry &); } INITIALIZE_PASS_BEGIN(GenXRestoreIntrAttr, "GenXRestoreIntrAttr", "GenXRestoreIntrAttr", false, false) INITIALIZE_PASS_END(GenXRestoreIntrAttr, "GenXRestoreIntrAttr", "GenXRestoreIntrAttr", false, false) char GenXRestoreIntrAttr::ID = 0; Pass *llvm::createGenXRestoreIntrAttrPass() { return new GenXRestoreIntrAttr; } GenXRestoreIntrAttr::GenXRestoreIntrAttr() : ModulePass(ID) { initializeGenXRestoreIntrAttrPass(*PassRegistry::getPassRegistry()); } bool GenXRestoreIntrAttr::restoreAttributes(Function *F) { LLVM_DEBUG(dbgs() << "Restoring attributes for: " << F->getName() << "\n"); F->setAttributes(GenXIntrinsic::getAttributes(F->getContext(), GenXIntrinsic::getGenXIntrinsicID(F))); return true; } bool GenXRestoreIntrAttr::runOnModule(Module &M) { bool Modified = false; for (auto &F : M.getFunctionList()) { if (GenXIntrinsic::isGenXIntrinsic(&F)) Modified |= restoreAttributes(&F); } return Modified; } vc-intrinsics-0.22.1/GenXIntrinsics/lib/GenXIntrinsics/GenXSPIRVReaderAdaptor.cpp000066400000000000000000000702111475147027500276170ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2020-2025 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ // This pass converts metadata from SPIRV format to whichever used in backend. #include "AdaptorsCommon.h" #include "GenXSingleElementVectorUtil.h" #include "llvm/GenXIntrinsics/GenXIntrinsics.h" #include "llvm/GenXIntrinsics/GenXMetadata.h" #include "llvm/GenXIntrinsics/GenXSPIRVReaderAdaptor.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvmVCWrapper/IR/Attributes.h" #include "llvmVCWrapper/IR/Function.h" #include "llvmVCWrapper/IR/Instructions.h" #include "llvmVCWrapper/IR/Type.h" using namespace llvm; using namespace genx; namespace { class GenXSPIRVReaderAdaptorImpl final { public: explicit GenXSPIRVReaderAdaptorImpl() {} bool run(Module &M); private: bool runOnFunction(Function &F); bool processVCFunctionAttributes(Function &F); bool processVCKernelAttributes(Function &F); void dropAttributeAtIndex(Function &F, unsigned Index, StringRef Kind) { auto NewAttributes = VCINTR::AttributeList::removeAttributeAtIndex( F.getContext(), F.getAttributes(), Index, Kind); F.setAttributes(NewAttributes); } void dropFnAttribute(Function &F, StringRef Kind) { dropAttributeAtIndex(F, AttributeList::FunctionIndex, Kind); } }; } // namespace static std::pair parseImageDim(StringRef TyName) { // Greedy match: 1d_buffer first. if (TyName.consume_front(OCLTypes::Dim1dBuffer)) return {SPIRVType::Image1dBuffer, TyName}; if (TyName.consume_front(OCLTypes::Dim1dArray)) return {SPIRVType::Image1dArray, TyName}; if (TyName.consume_front(OCLTypes::Dim1d)) return {SPIRVType::Image1d, TyName}; if (TyName.consume_front(OCLTypes::Dim2dArray)) return {SPIRVType::Image2dArray, TyName}; if (TyName.consume_front(OCLTypes::Dim2d)) return {SPIRVType::Image2d, TyName}; if (TyName.consume_front(OCLTypes::Dim3d)) return {SPIRVType::Image3d, TyName}; llvm_unreachable("Unexpected image dimensionality"); } static std::pair parseAccessQualifier(StringRef TyName) { if (TyName.consume_front(CommonTypes::ReadOnly)) return {AccessType::ReadOnly, TyName}; if (TyName.consume_front(CommonTypes::WriteOnly)) return {AccessType::WriteOnly, TyName}; if (TyName.consume_front(CommonTypes::ReadWrite)) return {AccessType::ReadWrite, TyName}; llvm_unreachable("Unexpected image access modifier"); } static SPIRVArgDesc parseImageType(StringRef TyName) { const bool Consumed = TyName.consume_front(OCLTypes::Image); assert(Consumed && "Unexpected opencl type"); (void)Consumed; SPIRVType ImageType; std::tie(ImageType, TyName) = parseImageDim(TyName); AccessType AccType; std::tie(AccType, TyName) = parseAccessQualifier(TyName); #if VC_INTR_LLVM_VERSION_MAJOR >= 16 assert(TyName.starts_with(CommonTypes::TypeSuffix) && "Bad image type"); #else assert(TyName.startswith(CommonTypes::TypeSuffix) && "Bad image type"); #endif return {ImageType, AccType}; } static std::pair parseIntelMainType(StringRef TyName) { if (TyName.consume_front(IntelTypes::Buffer)) return {SPIRVType::Buffer, TyName}; if (TyName.consume_front(IntelTypes::MediaBlockImage)) return {SPIRVType::Image2dMediaBlock, TyName}; llvm_unreachable("Unexpected intel extension type"); } template T consumeIntegerLiteral(StringRef TyName) { int Literal; auto ProperlyConsumed = !TyName.consumeInteger(0, Literal); assert(ProperlyConsumed && "Expected string to rpresent integer literal"); (void)ProperlyConsumed; return static_cast(Literal); } static SPIRVType evaluateImageTypeFromSPVIR(SPIRVIRTypes::Dim Dim, bool Arrayed) { SPIRVType ResultType; if (!Arrayed) { switch (Dim) { case SPIRVIRTypes::Dim1D: ResultType = SPIRVType::Image1d; break; case SPIRVIRTypes::Dim2D: ResultType = SPIRVType::Image2d; break; case SPIRVIRTypes::Dim3D: ResultType = SPIRVType::Image3d; break; case SPIRVIRTypes::DimBuffer: ResultType = SPIRVType::Image1dBuffer; break; } } else { switch (Dim) { case SPIRVIRTypes::Dim1D: ResultType = SPIRVType::Image1dArray; break; case SPIRVIRTypes::Dim2D: ResultType = SPIRVType::Image2dArray; break; default: llvm_unreachable("Bad Image Type"); } } return ResultType; } static StringRef skipUnderscores(StringRef StrRef, int Count) { for (int i = 0; i < Count; ++i) { StrRef = StrRef.drop_while([](char C) { return C != '_'; }); StrRef = StrRef.drop_front(1); } return StrRef; } static SPIRVArgDesc parseSPIRVIRImageType(StringRef TyName) { const bool Consumed = TyName.consume_front(SPIRVIRTypes::Image); assert(Consumed && "Unexpected SPIRV friendly IR type"); (void)Consumed; // SPIRV friendly Ir image type looks like this: // spirv.Image._{Sampled T}_{Dim}_{Depth}_{Arrayed}_{MS}_{Fmt}_{Acc} // skip dot TyName = TyName.drop_front(1); // skip Samled Type. TyName = skipUnderscores(TyName, 2); auto Dim = consumeIntegerLiteral(TyName); // Skip Depth. TyName = skipUnderscores(TyName, 2); auto Arrayed = consumeIntegerLiteral(TyName); // Skip Multisampling and Format. TyName = skipUnderscores(TyName, 4); AccessType AccessTy = AccessType::ReadOnly; if (!TyName.empty()) AccessTy = consumeIntegerLiteral(TyName); auto ResultType = evaluateImageTypeFromSPVIR(Dim, Arrayed); return {ResultType, AccessTy}; } static VCINTR::Optional parseIntelType(StringRef TyName) { if (!TyName.consume_front(IntelTypes::TypePrefix)) return {}; SPIRVType MainType; std::tie(MainType, TyName) = parseIntelMainType(TyName); AccessType AccType; std::tie(AccType, TyName) = parseAccessQualifier(TyName); #if VC_INTR_LLVM_VERSION_MAJOR >= 16 assert(TyName.starts_with(CommonTypes::TypeSuffix) && "Bad intel type"); #else assert(TyName.startswith(CommonTypes::TypeSuffix) && "Bad intel type"); #endif return SPIRVArgDesc{MainType, AccType}; } static VCINTR::Optional parseOCLType(StringRef TyName) { if (!TyName.consume_front(OCLTypes::TypePrefix)) return {}; // Sampler type. if (TyName.consume_front(OCLTypes::Sampler)) { #if VC_INTR_LLVM_VERSION_MAJOR >= 16 assert(TyName.starts_with(CommonTypes::TypeSuffix) && "Bad sampler type"); #else assert(TyName.startswith(CommonTypes::TypeSuffix) && "Bad sampler type"); #endif return {SPIRVType::Sampler}; } // Images are the rest. return parseImageType(TyName); } static VCINTR::Optional parseSPIRVIRType(StringRef TyName) { if (!TyName.consume_front(SPIRVIRTypes::TypePrefix)) return {}; if (TyName.consume_front(SPIRVIRTypes::Sampler)) return {SPIRVType::Sampler}; return parseSPIRVIRImageType(TyName); } // Parse opaque type name. // Ty -> "opencl." OCLTy | "spirv." SPVIRTy | "intel" IntelTy // OCLTy -> "sampler_t" | ImageTy // IntelTy -> MainIntelTy Acc "_t" // MainIntelTy -> "buffer" | "image2d_media_block" // ImageTy -> "image" Dim Acc "_t" // Dim -> "1d" | "1d_buffer" | "2d" | "3d" // Acc -> "_ro" | "_wo" | "_rw" // SPVIRTy -> "Sampler" | SPVImageTy // SPVImageTy -> "Image." _..._{Dim}_..._{Arrayed}_..._{Acc} // Dim, Arrayed, Acc - literal operands matching OpTypeImage operands in SPIRV // Assume that "opencl." "spirv." and "intel.buffer" types are well-formed. static VCINTR::Optional parseOpaqueType(StringRef TyName) { if (auto MaybeIntelTy = parseIntelType(TyName)) return VCINTR::getValue(MaybeIntelTy); if (auto MaybeOCL = parseOCLType(TyName)) return VCINTR::getValue(MaybeOCL); return parseSPIRVIRType(TyName); } #if VC_INTR_LLVM_VERSION_MAJOR >= 16 static SPIRVArgDesc analyzeTargetExtTypeArg(const Argument &Arg, TargetExtType *TET) { auto TyName = TET->getName(); if (TyName.consume_front(SPIRVIRTypes::TypePrefix)) { if (TyName.consume_front(SPIRVIRTypes::Sampler)) return {SPIRVType::Sampler}; if (TyName.consume_front(SPIRVIRTypes::Buffer)) { assert(TET->getNumIntParameters() == 1); auto Acc = static_cast(TET->getIntParameter(0)); return SPIRVArgDesc(SPIRVType::Buffer, Acc); } if (TyName.consume_front(SPIRVIRTypes::Image)) { auto Dim = static_cast( TET->getIntParameter(SPIRVIRTypes::Dimension)); auto Arr = static_cast(TET->getIntParameter(SPIRVIRTypes::Arrayed)); auto Acc = static_cast(TET->getIntParameter(SPIRVIRTypes::Access)); auto SpvTy = evaluateImageTypeFromSPVIR(Dim, Arr); if (SpvTy == SPIRVType::Image2d && Arg.getParent()->getAttributes().hasParamAttr( Arg.getArgNo(), VCFunctionMD::VCMediaBlockIO)) SpvTy = SPIRVType::Image2dMediaBlock; return SPIRVArgDesc(SpvTy, Acc); } llvm_unreachable("Unexpected spirv target extension type"); } llvm_unreachable("Unexpected target extension type"); } #endif //VC_INTR_LLVM_VERSION_MAJOR >= 16 static SPIRVArgDesc analyzeKernelArg(const Argument &Arg) { const Function *F = Arg.getParent(); // If there is vc attribute, then no conversion is needed. if (F->getAttributes().hasParamAttr(Arg.getArgNo(), VCFunctionMD::VCArgumentKind)) return {SPIRVType::None}; Type *Ty = Arg.getType(); #if VC_INTR_LLVM_VERSION_MAJOR >= 16 if (auto *TET = dyn_cast(Ty)) return analyzeTargetExtTypeArg(Arg, TET); #endif //VC_INTR_LLVM_VERSION_MAJOR >= 16 // Not a pointer means that it is general argument without annotation. if (!isa(Ty)) return {SPIRVType::Other}; auto *PointerTy = cast(Ty); // Annotated things are converted to global and constant pointers. const unsigned AddressSpace = PointerTy->getAddressSpace(); if (AddressSpace != SPIRVParams::SPIRVGlobalAS && AddressSpace != SPIRVParams::SPIRVConstantAS) return {SPIRVType::Other}; if (VCINTR::Type::isOpaquePointerTy(Ty)) return {SPIRVType::Pointer}; Type *PointeeTy = VCINTR::Type::getNonOpaquePtrEltTy(PointerTy); // Not a pointer to struct, cannot be sampler or image. if (!isa(PointeeTy)) return {SPIRVType::Pointer}; auto *StrTy = cast(PointeeTy); // Pointer to literal structure, cannot be sampler or image. // (is this case possible in SPIRV translator?) if (!StrTy->hasName()) return {SPIRVType::Pointer}; if (auto MaybeDesc = parseOpaqueType(StrTy->getName())) { SPIRVArgDesc Desc = VCINTR::getValue(MaybeDesc); assert(getOpaqueTypeAddressSpace(Desc.Ty) == AddressSpace && "Mismatching address space for type"); return Desc; } // If nothing was matched then it is simple pointer. return {SPIRVType::Pointer}; } static std::vector analyzeKernelArguments(Function &F) { std::vector Descs; std::transform(F.arg_begin(), F.arg_end(), std::back_inserter(Descs), [](const Argument &Arg) { return analyzeKernelArg(Arg); }); return Descs; } static bool isArgConvIntrinsic(const Value *V) { return GenXIntrinsic::getGenXIntrinsicID(V) == GenXIntrinsic::genx_address_convert; } // Get original value that should be used in restored kernel. // SPIRV arguments converted to old style with address convert intrinsic // so if intrinsic is present, then its type should be used instead of // current argument. Otherwise argument was not changed. static Value *getOriginalValue(Argument &Arg) { if (Arg.hasOneUse()) { User *U = Arg.user_back(); if (isArgConvIntrinsic(U) || isa(U) || isa(U) || isa(U)) return U; } assert(llvm::none_of(Arg.users(), isArgConvIntrinsic) && "Arg convert can occur as the only user of argument"); return &Arg; } static ArgKind mapSPIRVTypeToArgKind(SPIRVType Ty) { switch (Ty) { case SPIRVType::Buffer: case SPIRVType::Image1d: case SPIRVType::Image1dArray: case SPIRVType::Image1dBuffer: case SPIRVType::Image2d: case SPIRVType::Image2dArray: case SPIRVType::Image2dMediaBlock: case SPIRVType::Image3d: return ArgKind::Surface; case SPIRVType::Sampler: return ArgKind::Sampler; case SPIRVType::Pointer: case SPIRVType::Other: return ArgKind::General; case SPIRVType::None: break; } llvm_unreachable("Unexpected spirv type"); } static std::string mapSPIRVDescToArgDesc(SPIRVArgDesc SPIRVDesc) { std::string Desc; switch (SPIRVDesc.Ty) { case SPIRVType::Buffer: Desc += ArgDesc::Buffer; break; case SPIRVType::Image1d: Desc += ArgDesc::Image1d; break; case SPIRVType::Image1dArray: Desc += ArgDesc::Image1dArray; break; case SPIRVType::Image1dBuffer: Desc += ArgDesc::Image1dBuffer; break; case SPIRVType::Image2d: Desc += ArgDesc::Image2d; break; case SPIRVType::Image2dArray: Desc += ArgDesc::Image2dArray; break; case SPIRVType::Image2dMediaBlock: Desc += ArgDesc::Image2dMediaBlock; break; case SPIRVType::Image3d: Desc += ArgDesc::Image3d; break; case SPIRVType::Sampler: return ArgDesc::Sampler; case SPIRVType::Pointer: return ArgDesc::SVM; case SPIRVType::Other: return {}; default: llvm_unreachable("Unexpected spirv type"); } Desc += ' '; // Surface arg kinds also have access modifier. switch (SPIRVDesc.Acc) { case AccessType::ReadOnly: Desc += ArgDesc::ReadOnly; break; case AccessType::WriteOnly: Desc += ArgDesc::WriteOnly; break; case AccessType::ReadWrite: Desc += ArgDesc::ReadWrite; break; } return Desc; } static PointerType *getKernelArgPointerType(PointerType *ConvertTy, PointerType *ArgTy) { auto AddressSpace = ConvertTy->getPointerAddressSpace(); if (VCINTR::Type::isOpaquePointerTy(ArgTy)) return VCINTR::PointerType::getWithSamePointeeType(ArgTy, AddressSpace); auto *ConvertPointeeTy = VCINTR::Type::getNonOpaquePtrEltTy(ConvertTy); auto *ArgPointeeTy = VCINTR::Type::getNonOpaquePtrEltTy(ArgTy); if (ConvertPointeeTy->isAggregateType()) return ConvertTy; return ArgPointeeTy->getPointerTo(AddressSpace); } // Create new empty function with restored types based on old function and // arguments descriptors. static Function * transformKernelSignature(Function &F, const std::vector &Descs) { // Collect new kernel argument types. std::vector NewTypes; std::transform(F.arg_begin(), F.arg_end(), std::back_inserter(NewTypes), [](Argument &Arg) { auto *Ty = getOriginalValue(Arg)->getType(); auto *ArgTy = Arg.getType(); if (Ty->isPointerTy() && ArgTy->isPointerTy()) Ty = getKernelArgPointerType(cast(Ty), cast(ArgTy)); return Ty; }); auto *NewFTy = FunctionType::get(F.getReturnType(), NewTypes, false); auto *NewF = Function::Create(NewFTy, F.getLinkage(), F.getAddressSpace()); // Copy function info. LLVMContext &Ctx = F.getContext(); NewF->copyAttributesFrom(&F); NewF->takeName(&F); NewF->copyMetadata(&F, 0); NewF->setComdat(F.getComdat()); // Set appropriate argument attributes related to kind and desc. std::string ArgDesc; for (int i = 0, e = Descs.size(); i != e; ++i) { SPIRVArgDesc SPVDesc = Descs[i]; // No need to set things, old style argument attributes were copied before. if (SPVDesc.Ty == SPIRVType::None) continue; // Add needed attributes to newly created function argument. ArgKind AK = mapSPIRVTypeToArgKind(SPVDesc.Ty); ArgDesc = mapSPIRVDescToArgDesc(SPVDesc); Attribute Attr = Attribute::get(Ctx, VCFunctionMD::VCArgumentKind, std::to_string(static_cast(AK))); NewF->addParamAttr(i, Attr); Attr = Attribute::get(Ctx, VCFunctionMD::VCArgumentDesc, ArgDesc); NewF->addParamAttr(i, Attr); } legalizeParamAttributes(NewF); return NewF; } // Rewrite function if it has SPIRV types as parameters. // Function // define spir_kernel @foo(%opencl.image2d_rw_t addrspace(1)* %im) { // %conv = ptrtoint %opencl.image2d_rw_t addrspace(1)* %im to i32 // ... // } // will be changed to // define spir_kernel @foo(i32 "VCArgumentKind"="2" "VCArgumentDesc"="image2d_t // read_write" %im) { // ... // } // If parameter has at least "VCArgumentKind" attribute then it is not // converted. static void rewriteKernelArguments(Function &F) { std::vector ArgDescs = analyzeKernelArguments(F); if (std::all_of( ArgDescs.begin(), ArgDescs.end(), [](const SPIRVArgDesc Desc) { return Desc.Ty == SPIRVType::None; })) // All arguments are in old style. return; // At the moment there are only two cases when kernel function with converted // parameters can have users: // 1. Kernel is called from another function via fast composite // For such kernels we just don't rewrite arguments on SPIRV write, so // there should not be presented on read // 2. Kernel is referenced in @llvm.global.annotations // We have to replace the original function with the new one if (!F.use_empty()) { Value *Ptr = &F; assert(Ptr->hasOneUse()); if (isa(Ptr->user_back())) { Ptr = Ptr->user_back(); assert(Ptr->hasOneUse()); } auto *Struct = Ptr->user_back(); assert(Struct->hasOneUse()); auto *Array = Struct->user_back(); assert(Array->hasOneUse()); auto *GV = dyn_cast(Array->user_back()); assert(GV && GV->getName() == "llvm.global.annotations"); } Function *NewF = transformKernelSignature(F, ArgDescs); F.getParent()->getFunctionList().insert(F.getIterator(), NewF); #if VC_INTR_LLVM_VERSION_MAJOR > 15 NewF->splice(NewF->begin(), &F); #else NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList()); #endif // Rewrite uses and delete conversion intrinsics. for (int i = 0, e = ArgDescs.size(); i != e; ++i) { Argument &OldArg = *std::next(F.arg_begin(), i); Argument &NewArg = *std::next(NewF->arg_begin(), i); Value *Orig = getOriginalValue(OldArg); NewArg.takeName(&OldArg); auto *OrigTy = Orig->getType(); auto *NewTy = NewArg.getType(); Value *NewVal = &NewArg; if (isa(Orig) && OrigTy != NewTy) { IRBuilder<> Builder(cast(Orig)); NewVal = Builder.CreatePointerBitCastOrAddrSpaceCast(NewVal, OrigTy); } Orig->replaceAllUsesWith(NewVal); if (Orig != &OldArg) { cast(Orig)->eraseFromParent(); } } F.mutateType(NewF->getType()); F.replaceAllUsesWith(NewF); F.eraseFromParent(); } // Rewrite kernels from SPIRV representation to old style VC // integers with attributes as incoming parameters. static void rewriteKernelsTypes(Module &M) { SmallVector Kernels; std::transform(M.begin(), M.end(), std::back_inserter(Kernels), [](Function &F) { return &F; }); for (auto *F : Kernels) { // Skip things that are not VC kernels. if (F->getCallingConv() != CallingConv::SPIR_KERNEL) continue; if (!VCINTR::AttributeList::hasFnAttr(F->getAttributes(), VCFunctionMD::VCFunction)) continue; rewriteKernelArguments(*F); } } bool GenXSPIRVReaderAdaptorImpl::run(Module &M) { auto *KernelMDs = M.getNamedMetadata(FunctionMD::GenXKernels); if (KernelMDs) return false; for (auto &&GV : M.globals()) { if (!GV.hasAttribute(VCModuleMD::VCGlobalVariable)) continue; if (GV.hasAttribute(VCModuleMD::VCVolatile)) GV.addAttribute(FunctionMD::GenXVolatile); if (GV.hasAttribute(VCModuleMD::VCByteOffset)) { auto Offset = GV.getAttribute(VCModuleMD::VCByteOffset).getValueAsString(); GV.addAttribute(FunctionMD::GenXByteOffset, Offset); } } rewriteKernelsTypes(M); SEVUtil(M).restoreSEVs(); for (auto &&F : M) runOnFunction(F); return true; } bool GenXSPIRVReaderAdaptorImpl::processVCFunctionAttributes(Function &F) { auto Attrs = F.getAttributes(); if (!VCINTR::AttributeList::hasFnAttr(Attrs, VCFunctionMD::VCFunction)) return false; dropFnAttribute(F, VCFunctionMD::VCFunction); if (VCINTR::AttributeList::hasFnAttr(Attrs, VCFunctionMD::VCStackCall)) { F.addFnAttr(FunctionMD::CMStackCall); dropFnAttribute(F, VCFunctionMD::VCStackCall); } if (VCINTR::AttributeList::hasFnAttr(Attrs, VCFunctionMD::VCCallable)) { F.addFnAttr(FunctionMD::CMCallable); dropFnAttribute(F, VCFunctionMD::VCCallable); } if (VCINTR::AttributeList::hasFnAttr(Attrs, VCFunctionMD::VCFCEntry)) { F.addFnAttr(FunctionMD::CMEntry); dropFnAttribute(F, VCFunctionMD::VCFCEntry); } if (VCINTR::AttributeList::hasFnAttr(Attrs, VCFunctionMD::VCSIMTCall)) { auto SIMTMode = StringRef(); SIMTMode = VCINTR::AttributeList::getAttributeAtIndex( Attrs, AttributeList::FunctionIndex, VCFunctionMD::VCSIMTCall) .getValueAsString(); F.addFnAttr(FunctionMD::CMGenxSIMT, SIMTMode); dropFnAttribute(F, VCFunctionMD::VCSIMTCall); } auto &&Context = F.getContext(); if (VCINTR::AttributeList::hasFnAttr(Attrs, VCFunctionMD::VCFloatControl)) { auto FloatControl = unsigned(0); VCINTR::AttributeList::getAttributeAtIndex( Attrs, AttributeList::FunctionIndex, VCFunctionMD::VCFloatControl) .getValueAsString() .getAsInteger(0, FloatControl); auto Attr = Attribute::get(Context, FunctionMD::CMFloatControl, std::to_string(FloatControl)); VCINTR::Function::addAttributeAtIndex(F, AttributeList::FunctionIndex, Attr); dropFnAttribute(F, VCFunctionMD::VCFloatControl); } if (auto *ReqdSubgroupSize = F.getMetadata(SPIRVParams::SPIRVSIMDSubgroupSize)) { auto SIMDSize = mdconst::extract(ReqdSubgroupSize->getOperand(0)) ->getZExtValue(); Attribute Attr = Attribute::get(Context, FunctionMD::OCLRuntime, std::to_string(SIMDSize)); VCINTR::Function::addAttributeAtIndex(F, AttributeList::FunctionIndex, Attr); } return true; } bool GenXSPIRVReaderAdaptorImpl::processVCKernelAttributes(Function &F) { if (!(F.getCallingConv() == CallingConv::SPIR_KERNEL)) return false; F.addFnAttr(FunctionMD::CMGenXMain); F.setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass); auto Attrs = F.getAttributes(); auto *FunctionRef = ValueAsMetadata::get(&F); auto KernelName = F.getName(); auto ArgKinds = llvm::SmallVector(); auto SLMSize = unsigned(0); auto ArgOffset = unsigned(0); auto ArgIOKinds = llvm::SmallVector(); auto ArgDescs = llvm::SmallVector(); auto NBarrierCnt = unsigned(0); auto &&Context = F.getContext(); llvm::Type *I32Ty = llvm::Type::getInt32Ty(Context); for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { auto ArgNo = I->getArgNo(); auto ArgKind = unsigned(0); auto ArgIOKind = unsigned(0); auto ArgDesc = std::string(); auto AttrIndex = ArgNo + 1; if (VCINTR::AttributeList::hasAttributeAtIndex( Attrs, AttrIndex, VCFunctionMD::VCArgumentKind)) { VCINTR::AttributeList::getAttributeAtIndex(Attrs, AttrIndex, VCFunctionMD::VCArgumentKind) .getValueAsString() .getAsInteger(0, ArgKind); dropAttributeAtIndex(F, AttrIndex, VCFunctionMD::VCArgumentKind); } if (VCINTR::AttributeList::hasAttributeAtIndex( Attrs, AttrIndex, VCFunctionMD::VCArgumentIOKind)) { VCINTR::AttributeList::getAttributeAtIndex(Attrs, AttrIndex, VCFunctionMD::VCArgumentIOKind) .getValueAsString() .getAsInteger(0, ArgIOKind); dropAttributeAtIndex(F, AttrIndex, VCFunctionMD::VCArgumentIOKind); } if (VCINTR::AttributeList::hasAttributeAtIndex( Attrs, AttrIndex, VCFunctionMD::VCArgumentDesc)) { ArgDesc = VCINTR::AttributeList::getAttributeAtIndex( Attrs, AttrIndex, VCFunctionMD::VCArgumentDesc) .getValueAsString() .str(); dropAttributeAtIndex(F, AttrIndex, VCFunctionMD::VCArgumentDesc); } ArgKinds.push_back( llvm::ValueAsMetadata::get(llvm::ConstantInt::get(I32Ty, ArgKind))); ArgIOKinds.push_back( llvm::ValueAsMetadata::get(llvm::ConstantInt::get(I32Ty, ArgIOKind))); ArgDescs.push_back(llvm::MDString::get(Context, ArgDesc)); } if (VCINTR::AttributeList::hasFnAttr(Attrs, VCFunctionMD::VCSLMSize)) { VCINTR::AttributeList::getAttributeAtIndex( Attrs, AttributeList::FunctionIndex, VCFunctionMD::VCSLMSize) .getValueAsString() .getAsInteger(0, SLMSize); dropFnAttribute(F, VCFunctionMD::VCSLMSize); } if (VCINTR::AttributeList::hasFnAttr(Attrs, VCFunctionMD::VCNamedBarrierCount)) { VCINTR::AttributeList::getAttributeAtIndex( Attrs, AttributeList::FunctionIndex, VCFunctionMD::VCNamedBarrierCount) .getValueAsString() .getAsInteger(0, NBarrierCnt); dropFnAttribute(F, VCFunctionMD::VCNamedBarrierCount); } auto KernelMD = std::vector(); KernelMD.push_back(FunctionRef); KernelMD.push_back(llvm::MDString::get(Context, KernelName)); KernelMD.push_back(llvm::MDNode::get(Context, ArgKinds)); KernelMD.push_back(ConstantAsMetadata::get(ConstantInt::get(I32Ty, SLMSize))); KernelMD.push_back( ConstantAsMetadata::get(ConstantInt::get(I32Ty, ArgOffset))); KernelMD.push_back(llvm::MDNode::get(Context, ArgIOKinds)); KernelMD.push_back(llvm::MDNode::get(Context, ArgDescs)); KernelMD.push_back( ConstantAsMetadata::get(ConstantInt::get(I32Ty, NBarrierCnt))); NamedMDNode *KernelMDs = F.getParent()->getOrInsertNamedMetadata(FunctionMD::GenXKernels); llvm::MDNode *Node = MDNode::get(F.getContext(), KernelMD); KernelMDs->addOperand(Node); return true; } bool GenXSPIRVReaderAdaptorImpl::runOnFunction(Function &F) { if (!processVCFunctionAttributes(F)) return true; processVCKernelAttributes(F); return true; } //----------------------------------------------------------------------------- // New PM support //----------------------------------------------------------------------------- PreservedAnalyses llvm::GenXSPIRVReaderAdaptor::run(Module &M, ModuleAnalysisManager &) { GenXSPIRVReaderAdaptorImpl Impl; if (!Impl.run(M)) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserveSet(); return PA; } //----------------------------------------------------------------------------- // Legacy PM support //----------------------------------------------------------------------------- namespace { class GenXSPIRVReaderAdaptorLegacy final : public ModulePass { public: static char ID; public: explicit GenXSPIRVReaderAdaptorLegacy() : ModulePass(ID) {} llvm::StringRef getPassName() const override { return GenXSPIRVReaderAdaptor::getArgString(); } void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnModule(Module &M) override; }; // class GenXSPIRVReaderAdaptorLegacy } // namespace char GenXSPIRVReaderAdaptorLegacy::ID = 0; INITIALIZE_PASS(GenXSPIRVReaderAdaptorLegacy, GenXSPIRVReaderAdaptor::getArgString(), GenXSPIRVReaderAdaptor::getArgString(), false, false) ModulePass *llvm::createGenXSPIRVReaderAdaptorPass() { return new GenXSPIRVReaderAdaptorLegacy(); } void GenXSPIRVReaderAdaptorLegacy::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); } bool GenXSPIRVReaderAdaptorLegacy::runOnModule(Module &M) { GenXSPIRVReaderAdaptorImpl impl; return impl.run(M); } vc-intrinsics-0.22.1/GenXIntrinsics/lib/GenXIntrinsics/GenXSPIRVWriterAdaptor.cpp000066400000000000000000000676711475147027500277110ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2020-2024 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ // This pass converts metadata to SPIRV format from whichever used in frontend. #include "AdaptorsCommon.h" #include "GenXSingleElementVectorUtil.h" #include "llvm/GenXIntrinsics/GenXIntrinsics.h" #include "llvm/GenXIntrinsics/GenXMetadata.h" #include "llvm/GenXIntrinsics/GenXSPIRVWriterAdaptor.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Metadata.h" #if VC_INTR_LLVM_VERSION_MAJOR >= 16 #include #endif #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Process.h" #include "llvmVCWrapper/ADT/StringRef.h" #include "llvmVCWrapper/IR/Attributes.h" #include "llvmVCWrapper/IR/DerivedTypes.h" #include "llvmVCWrapper/IR/Function.h" #include "llvmVCWrapper/IR/Instructions.h" using namespace llvm; using namespace genx; namespace { class GenXSPIRVWriterAdaptorImpl final { private: bool RewriteTypes = true; bool RewriteSingleElementVectors = true; public: explicit GenXSPIRVWriterAdaptorImpl(bool RewriteTypesIn, bool RewriteSingleElementVectorsIn) : RewriteTypes(RewriteTypesIn), RewriteSingleElementVectors(RewriteSingleElementVectorsIn) { overrideOptionsWithEnv(); } bool run(Module &M); private: // This function overrides options with environment variables // It is used for debugging. void overrideOptionsWithEnv() { auto RewriteSEVOpt = llvm::sys::Process::GetEnv("GENX_REWRITE_SEV"); if (RewriteSEVOpt) RewriteSingleElementVectors = VCINTR::getValue(RewriteSEVOpt) == "1"; } bool runOnFunction(Function &F); }; } // namespace // Get some pointer to global address space. static Type *getGlobalPtrType(LLVMContext &Ctx) { return PointerType::get(Type::getInt8Ty(Ctx), SPIRVParams::SPIRVGlobalAS); } // Get some opaque structure pointer to global address space. This is // how OCL/SPIRV types are implemented in clang/SPIRV Translator. static Type *getOpaquePtrType(Module *M, StringRef Name, unsigned AddressSpace) { StructType *STy = VCINTR::getTypeByName(M, Name); if (!STy) STy = StructType::create(M->getContext(), Name); return PointerType::get(STy, AddressSpace); } static Type *getSamplerType(Module *M) { std::string Name = OCLTypes::TypePrefix; Name += OCLTypes::Sampler; Name += CommonTypes::TypeSuffix; return getOpaquePtrType(M, Name, getOpaqueTypeAddressSpace(SPIRVType::Sampler)); } // Add access qualifiers and type suffix to type name. static void addCommonTypesPostfix(std::string &Name, AccessType Acc) { switch (Acc) { case AccessType::ReadOnly: Name += CommonTypes::ReadOnly; break; case AccessType::WriteOnly: Name += CommonTypes::WriteOnly; break; case AccessType::ReadWrite: Name += CommonTypes::ReadWrite; break; } Name += CommonTypes::TypeSuffix; } // Get or create image type from spirv type descriptor. Name encoding // is the same as in clang and it is required by SPIRV translator. static Type *getImageType(SPIRVArgDesc Desc, Module *M) { std::string Name = OCLTypes::TypePrefix; Name += OCLTypes::Image; switch (Desc.Ty) { case SPIRVType::Image1d: Name += OCLTypes::Dim1d; break; case SPIRVType::Image1dArray: Name += OCLTypes::Dim1dArray; break; case SPIRVType::Image1dBuffer: Name += OCLTypes::Dim1dBuffer; break; case SPIRVType::Image2d: Name += OCLTypes::Dim2d; break; case SPIRVType::Image2dArray: Name += OCLTypes::Dim2dArray; break; case SPIRVType::Image3d: Name += OCLTypes::Dim3d; break; default: llvm_unreachable("Unexpected spirv type for image"); } addCommonTypesPostfix(Name, Desc.Acc); return getOpaquePtrType(M, Name, getOpaqueTypeAddressSpace(Desc.Ty)); } // Get or create vector compute extension type with given access qualifier. static Type *getIntelExtType(SPIRVArgDesc Desc, Module *M) { std::string Name = IntelTypes::TypePrefix; switch (Desc.Ty) { case SPIRVType::Buffer: Name += IntelTypes::Buffer; break; case SPIRVType::Image2dMediaBlock: Name += IntelTypes::MediaBlockImage; break; default: llvm_unreachable("Unexpected spirv type for intel extensions"); } addCommonTypesPostfix(Name, Desc.Acc); return getOpaquePtrType(M, Name, getOpaqueTypeAddressSpace(Desc.Ty)); } // Sampler and surface arguments require opaque types that will be // translated in native SPIRV types. static Type *getOpaqueType(SPIRVArgDesc Desc, Module *M) { switch (Desc.Ty) { case SPIRVType::Sampler: return getSamplerType(M); case SPIRVType::Buffer: case SPIRVType::Image2dMediaBlock: return getIntelExtType(Desc, M); default: return getImageType(Desc, M); } } // Convert spirv type descriptor to LLVM type that later will be // handled by SPIRV translator. Mostly relying on implementation of // clang/SPIRV translator to handle image/sampler types. static Type *getArgTypeFromDesc(SPIRVArgDesc Desc, Argument &Arg) { std::string TypeName; switch (Desc.Ty) { case SPIRVType::Pointer: if (!Arg.hasByValAttr()) return getGlobalPtrType(Arg.getContext()); LLVM_FALLTHROUGH; case SPIRVType::Other: case SPIRVType::None: return Arg.getType(); default: return getOpaqueType(Desc, Arg.getParent()->getParent()); } } #if VC_INTR_LLVM_VERSION_MAJOR >= 16 static Type *getImageTargetType(SPIRVArgDesc Desc, Argument &Arg) { auto &Ctx = Arg.getContext(); auto *VoidTy = Type::getVoidTy(Ctx); SmallVector IntParams(7, 0); IntParams[SPIRVIRTypes::Access] = static_cast(Desc.Acc); switch (Desc.Ty) { case SPIRVType::Image1d: IntParams[SPIRVIRTypes::Dimension] = SPIRVIRTypes::Dim1D; break; case SPIRVType::Image1dArray: IntParams[SPIRVIRTypes::Dimension] = SPIRVIRTypes::Dim1D; IntParams[SPIRVIRTypes::Arrayed] = 1; break; case SPIRVType::Image1dBuffer: IntParams[SPIRVIRTypes::Dimension] = SPIRVIRTypes::DimBuffer; break; case SPIRVType::Image2d: IntParams[SPIRVIRTypes::Dimension] = SPIRVIRTypes::Dim2D; break; case SPIRVType::Image2dArray: IntParams[SPIRVIRTypes::Dimension] = SPIRVIRTypes::Dim2D; IntParams[SPIRVIRTypes::Arrayed] = 1; break; case SPIRVType::Image3d: IntParams[SPIRVIRTypes::Dimension] = SPIRVIRTypes::Dim3D; break; default: llvm_unreachable("Only images are supported here"); } std::string NamePrefix = SPIRVIRTypes::TypePrefix; return TargetExtType::get(Ctx, NamePrefix + SPIRVIRTypes::Image, {VoidTy}, IntParams); } static Type *getArgTargetTypeFromDesc(SPIRVArgDesc Desc, Argument &Arg) { std::string NamePrefix = SPIRVIRTypes::TypePrefix; auto &Ctx = Arg.getContext(); SmallVector Acc = {static_cast(Desc.Acc)}; switch (Desc.Ty) { default: return getImageTargetType(Desc, Arg); case SPIRVType::Sampler: return TargetExtType::get(Ctx, NamePrefix + SPIRVIRTypes::Sampler); case SPIRVType::Pointer: if (!Arg.hasByValAttr()) return getGlobalPtrType(Ctx); LLVM_FALLTHROUGH; case SPIRVType::Other: case SPIRVType::None: return Arg.getType(); case SPIRVType::Buffer: return TargetExtType::get(Ctx, NamePrefix + SPIRVIRTypes::Buffer, {}, Acc); case SPIRVType::Image2dMediaBlock: return getImageTargetType(SPIRVArgDesc(SPIRVType::Image2d, Desc.Acc), Arg); } } #endif // VC_INTR_LLVM_VERSION_MAJOR >= 16 static Function * transformKernelSignature(Function &F, const std::vector &Descs) { SmallVector NewParams; // Before LLVM 16, we don't want to use target types. After LLVM 16, typed // pointers are always disabled, so we must use target types. #if VC_INTR_LLVM_VERSION_MAJOR == 16 bool UseTargetTypes = !F.getContext().supportsTypedPointers(); #elif VC_INTR_LLVM_VERSION_MAJOR > 16 constexpr bool UseTargetTypes = true; #endif auto GetArgType = [&](SPIRVArgDesc Desc, Argument &Arg) { #if VC_INTR_LLVM_VERSION_MAJOR == 16 if (UseTargetTypes) return getArgTargetTypeFromDesc(Desc, Arg); #elif VC_INTR_LLVM_VERSION_MAJOR > 16 return getArgTargetTypeFromDesc(Desc, Arg); #endif return getArgTypeFromDesc(Desc, Arg); }; std::transform(Descs.begin(), Descs.end(), F.arg_begin(), std::back_inserter(NewParams), GetArgType); assert(!F.isVarArg() && "Kernel cannot be vararg"); auto *NewFTy = FunctionType::get(F.getReturnType(), NewParams, false); auto *NewF = Function::Create(NewFTy, F.getLinkage(), F.getAddressSpace()); NewF->copyAttributesFrom(&F); NewF->takeName(&F); NewF->copyMetadata(&F, 0); NewF->setComdat(F.getComdat()); // Remove no more needed attributes. for (int i = 0, e = Descs.size(); i != e; ++i) { if (Descs[i].Ty == SPIRVType::None) continue; #if VC_INTR_LLVM_VERSION_MAJOR >= 16 if (UseTargetTypes && Descs[i].Ty == SPIRVType::Image2dMediaBlock) { AttrBuilder AttrBuilder(NewF->getContext()); AttrBuilder.addAttribute(VCFunctionMD::VCMediaBlockIO); NewF->addParamAttrs(i, AttrBuilder); } #endif // VC_INTR_LLVM_VERSION_MAJOR >= 16 NewF->removeParamAttr(i, VCFunctionMD::VCArgumentKind); NewF->removeParamAttr(i, VCFunctionMD::VCArgumentDesc); } legalizeParamAttributes(NewF); return NewF; } // Replace old arguments with new ones generating conversion // intrinsics for types that were changed. static void rewriteArgumentUses(Instruction *InsertBefore, Argument &OldArg, Argument &NewArg) { NewArg.takeName(&OldArg); Type *OldTy = OldArg.getType(); Type *NewTy = NewArg.getType(); if (OldTy == NewTy) { OldArg.replaceAllUsesWith(&NewArg); return; } IRBuilder<> Builder(InsertBefore); Value *Cast = nullptr; if (OldTy->isPointerTy() && NewTy->isPointerTy()) { auto OldAS = OldTy->getPointerAddressSpace(); auto NewAS = NewTy->getPointerAddressSpace(); // Some frontends mix private and global pointers which is not allowed by // SPIR-V. Using ptr->i64->ptr cast in this case to avoid failures until // the frontends are fixed. if (OldAS == NewAS || OldAS == SPIRVParams::SPIRVGenericAS || NewAS == SPIRVParams::SPIRVGenericAS) { Cast = Builder.CreatePointerBitCastOrAddrSpaceCast(&NewArg, OldTy); } else { auto *Int64Ty = Builder.getInt64Ty(); auto *PToI = Builder.CreatePtrToInt(&NewArg, Int64Ty); Cast = Builder.CreateIntToPtr(PToI, OldTy); } } else if (OldTy->isPointerTy() && NewTy->isIntegerTy()) { Cast = Builder.CreateIntToPtr(&NewArg, OldTy); } else if (OldTy->isIntegerTy() && NewTy->isPointerTy()) { Cast = Builder.CreatePtrToInt(&NewArg, OldTy); } else { auto *M = OldArg.getParent()->getParent(); auto *ConvFn = GenXIntrinsic::getGenXDeclaration( M, GenXIntrinsic::genx_address_convert, {OldTy, NewTy}); ConvFn->addFnAttr(VCFunctionMD::VCFunction); Cast = Builder.CreateCall(ConvFn, {&NewArg}); } if (Cast) OldArg.replaceAllUsesWith(Cast); } // Parse argument desc. // String can contain arbitrary words, some of which have special meaning. // Special words are listed in ArgDesc namespace and correspond to SPIRVType // and AccessType. // If no special words were encountered, default to other general types. static SPIRVArgDesc parseArgDesc(StringRef Desc) { SmallVector Tokens; Desc.split(Tokens, /*Separator=*/' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false); // Scan tokens until end or required info is found. VCINTR::Optional AccTy; VCINTR::Optional Ty; for (StringRef Tok : Tokens) { if (!Ty) { Ty = StringSwitch>(Tok) .Case(ArgDesc::Buffer, SPIRVType::Buffer) .Case(ArgDesc::Image1d, SPIRVType::Image1d) .Case(ArgDesc::Image1dArray, SPIRVType::Image1dArray) .Case(ArgDesc::Image1dBuffer, SPIRVType::Image1dBuffer) .Case(ArgDesc::Image2d, SPIRVType::Image2d) .Case(ArgDesc::Image2dArray, SPIRVType::Image2dArray) .Case(ArgDesc::Image2dMediaBlock, SPIRVType::Image2dMediaBlock) .Case(ArgDesc::Image3d, SPIRVType::Image3d) .Case(ArgDesc::SVM, SPIRVType::Pointer) .Case(ArgDesc::Sampler, SPIRVType::Sampler) .Default({}); } if (!AccTy) { AccTy = StringSwitch>(Tok) .Case(ArgDesc::ReadOnly, AccessType::ReadOnly) .Case(ArgDesc::WriteOnly, AccessType::WriteOnly) .Case(ArgDesc::ReadWrite, AccessType::ReadWrite) .Default({}); } if (Ty && AccTy) break; } // Default to other types. if (!Ty) return {SPIRVType::Other}; // Default to read write access qualifier. if (!AccTy) AccTy = AccessType::ReadWrite; return {VCINTR::getValue(Ty), VCINTR::getValue(AccTy)}; } // General arguments can be either pointers or any other types. static SPIRVArgDesc analyzeGeneralArg(StringRef Desc) { SPIRVArgDesc SPVDesc = parseArgDesc(Desc); switch (SPVDesc.Ty) { case SPIRVType::Other: case SPIRVType::Pointer: return SPVDesc; // Default to other types since there are cases where people write // strange things. default: return {SPIRVType::Other}; } } static SPIRVArgDesc analyzeSurfaceArg(StringRef Desc) { SPIRVArgDesc SPVDesc = parseArgDesc(Desc); switch (SPVDesc.Ty) { case SPIRVType::Buffer: case SPIRVType::Image1d: case SPIRVType::Image1dArray: case SPIRVType::Image1dBuffer: case SPIRVType::Image2d: case SPIRVType::Image2dArray: case SPIRVType::Image2dMediaBlock: case SPIRVType::Image3d: return SPVDesc; // CMRT does not require to annotate arguments. // Default to read_write buffer_t currently. case SPIRVType::Other: return {SPIRVType::Buffer}; default: llvm_unreachable("Unexpected descs on surface argument"); } } // Redundant analysis for sampler. Sampler arg kind can // have "sampler_t" annotation. static SPIRVArgDesc analyzeSamplerArg(StringRef Desc) { SPIRVArgDesc SPVDesc = parseArgDesc(Desc); switch (SPVDesc.Ty) { // sampler_t annotation. case SPIRVType::Sampler: // CMRT does not require to annotate arguments. case SPIRVType::Other: return {SPIRVType::Sampler}; default: llvm_unreachable("Unexpected descs on sampler argument"); } } // Convert arg kind and arg desc to spirv type decriptor. Requires // parsing of arg desc. static SPIRVArgDesc analyzeArgumentAttributes(ArgKind Kind, StringRef Desc) { switch (Kind) { case ArgKind::General: return analyzeGeneralArg(Desc); case ArgKind::Sampler: return analyzeSamplerArg(Desc); case ArgKind::Surface: return analyzeSurfaceArg(Desc); } return {SPIRVType::None}; } // Extract ArgKind from VCArgumentKind attribute. // In presence of implicit arguments (that is temporary), // value can be out of listed in ArgKind enum. // Such values are not processed later. // Return None if there is no such attribute. static VCINTR::Optional extractArgumentKind(const Argument &Arg) { const Function *F = Arg.getParent(); const AttributeList Attrs = F->getAttributes(); if (!Attrs.hasParamAttr(Arg.getArgNo(), VCFunctionMD::VCArgumentKind)) return {}; const Attribute Attr = Attrs.getParamAttr(Arg.getArgNo(), VCFunctionMD::VCArgumentKind); unsigned AttrVal = {}; const bool Conv = Attr.getValueAsString().getAsInteger(0, AttrVal); assert(!Conv && "Expected integer value as arg kind"); // TODO: add some sanity check that the value can be casted to ArgKind return static_cast(AttrVal); } // Extract string desc from VCArgumentDesc attribute. static StringRef extractArgumentDesc(const Argument &Arg) { const Function *F = Arg.getParent(); const AttributeList Attrs = F->getAttributes(); return Attrs.getParamAttr(Arg.getArgNo(), VCFunctionMD::VCArgumentDesc) .getValueAsString(); } // Get SPIRV type and access qualifier of kernel argument // using its corresponding attributes. // Default to None if no information available. static SPIRVArgDesc analyzeKernelArg(const Argument &Arg) { if (auto Kind = extractArgumentKind(Arg)) { const StringRef Desc = extractArgumentDesc(Arg); return analyzeArgumentAttributes(VCINTR::getValue(Kind), Desc); } return {SPIRVType::None}; } static std::vector analyzeKernelArguments(const Function &F) { std::vector Descs; std::transform(F.arg_begin(), F.arg_end(), std::back_inserter(Descs), [](const Argument &Arg) { return analyzeKernelArg(Arg); }); return Descs; } // Rewrite function if it has SPIRV types as parameters. // Function // define spir_kernel @foo(i32 "VCArgumentKind"="2" "VCArgumentDesc"="image2d_t // read_write" %im) { // ... // } // will be changed to // define spir_kernel @foo(%opencl.image2d_rw_t addrspace(1)* %im) { // %conv = ptrtoint %opencl.image2d_rw_t addrspace(1)* %im to i32 // ... // } // Parameters that are not part of public interface (implicit arguments) // are not converted. Currently there are generated by old cmc. They are not // needed for IGC VC backend. static void rewriteKernelArguments(Function &F) { // Do not touch callable kernels at this moment. Other kernels // should have no uses. if (!F.use_empty()) return; std::vector ArgDescs = analyzeKernelArguments(F); Function *NewF = transformKernelSignature(F, ArgDescs); F.getParent()->getFunctionList().insert(F.getIterator(), NewF); #if VC_INTR_LLVM_VERSION_MAJOR > 15 NewF->splice(NewF->begin(), &F); #else NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList()); #endif Instruction *InsPt = &NewF->getEntryBlock().front(); for (auto &&ArgPair : llvm::zip(F.args(), NewF->args())) rewriteArgumentUses(InsPt, std::get<0>(ArgPair), std::get<1>(ArgPair)); #if VC_INTR_LLVM_VERSION_MAJOR >= 17 // There might be module level named metadata referencing old function, so replace those usages with new function. // This can be done safely (will not cause type mismatch) when only opaque pointers are used (since LLVM 17). F.replaceAllUsesWith(NewF); #endif F.eraseFromParent(); } // Rewrite kernels from VC representation to SPIRV // with different types as incoming parameters. static void rewriteKernelsTypes(Module &M) { SmallVector Kernels; std::transform(M.begin(), M.end(), std::back_inserter(Kernels), [](Function &F) { return &F; }); for (auto *F : Kernels) if (F->getCallingConv() == CallingConv::SPIR_KERNEL) rewriteKernelArguments(*F); } #if VC_INTR_LLVM_VERSION_MAJOR >= 16 static inline void FixAttributes(Function &F, Attribute::AttrKind Attr, MemoryEffects MemEf) { if (F.getFnAttribute(Attr).isValid()) { for (auto &U : F.uses()) { if (auto *Call = dyn_cast(&*U)) { Call->setMemoryEffects(MemEf); } } F.removeFnAttr(Attr); } } #endif bool GenXSPIRVWriterAdaptorImpl::run(Module &M) { auto TargetTriple = StringRef(M.getTargetTriple()); if (VCINTR::StringRef::starts_with(TargetTriple, "genx")) { if (VCINTR::StringRef::starts_with(TargetTriple, "genx32")) M.setTargetTriple("spir"); else M.setTargetTriple("spir64"); } for (auto &&GV : M.globals()) { GV.addAttribute(VCModuleMD::VCGlobalVariable); if (GV.hasAttribute(FunctionMD::GenXVolatile)) GV.addAttribute(VCModuleMD::VCVolatile); if (GV.hasAttribute(FunctionMD::GenXByteOffset)) { auto Offset = GV.getAttribute(FunctionMD::GenXByteOffset).getValueAsString(); GV.addAttribute(VCModuleMD::VCByteOffset, Offset); } } for (auto &&F : M) runOnFunction(F); // Old metadata is not needed anymore at this point. if (auto *MD = M.getNamedMetadata(FunctionMD::GenXKernels)) M.eraseNamedMetadata(MD); if (RewriteTypes) rewriteKernelsTypes(M); if (RewriteSingleElementVectors) SEVUtil(M).rewriteSEVs(); #if VC_INTR_LLVM_VERSION_MAJOR >= 16 // ReadNone and ReadOnly is no more supported for intrinsics: // https://reviews.llvm.org/D135780 for (auto &&F : M) { FixAttributes(F, llvm::Attribute::ReadNone, llvm::MemoryEffects::none()); FixAttributes(F, llvm::Attribute::ReadOnly, llvm::MemoryEffects::readOnly()); FixAttributes(F, llvm::Attribute::WriteOnly, llvm::MemoryEffects::writeOnly()); } #endif return true; } bool GenXSPIRVWriterAdaptorImpl::runOnFunction(Function &F) { if (F.isIntrinsic() && !GenXIntrinsic::isGenXIntrinsic(&F)) return true; F.addFnAttr(VCFunctionMD::VCFunction); auto Attrs = F.getAttributes(); if (VCINTR::AttributeList::hasFnAttr(Attrs, FunctionMD::CMStackCall)) { F.addFnAttr(VCFunctionMD::VCStackCall); } if (VCINTR::AttributeList::hasFnAttr(Attrs, FunctionMD::CMCallable)) { F.addFnAttr(VCFunctionMD::VCCallable); } if (VCINTR::AttributeList::hasFnAttr(Attrs, FunctionMD::CMEntry)) { F.addFnAttr(VCFunctionMD::VCFCEntry); } if (VCINTR::AttributeList::hasFnAttr(Attrs, FunctionMD::CMGenxSIMT)) { auto SIMTMode = StringRef(); SIMTMode = VCINTR::AttributeList::getAttributeAtIndex( Attrs, AttributeList::FunctionIndex, FunctionMD::CMGenxSIMT) .getValueAsString(); F.addFnAttr(VCFunctionMD::VCSIMTCall, SIMTMode); } auto &&Context = F.getContext(); if (VCINTR::AttributeList::hasFnAttr(Attrs, FunctionMD::CMFloatControl)) { auto FloatControl = unsigned(0); VCINTR::AttributeList::getAttributeAtIndex( Attrs, AttributeList::FunctionIndex, FunctionMD::CMFloatControl) .getValueAsString() .getAsInteger(0, FloatControl); auto Attr = Attribute::get(Context, VCFunctionMD::VCFloatControl, std::to_string(FloatControl)); VCINTR::Function::addAttributeAtIndex(F, AttributeList::FunctionIndex, Attr); } auto *KernelMDs = F.getParent()->getNamedMetadata(FunctionMD::GenXKernels); if (!KernelMDs) return true; if (VCINTR::AttributeList::hasFnAttr(Attrs, FunctionMD::OCLRuntime)) { auto SIMDSize = unsigned(0); VCINTR::AttributeList::getAttributeAtIndex( Attrs, AttributeList::FunctionIndex, FunctionMD::OCLRuntime) .getValueAsString() .getAsInteger(0, SIMDSize); auto SizeMD = ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(Context), SIMDSize)); F.setMetadata(SPIRVParams::SPIRVSIMDSubgroupSize, MDNode::get(Context, SizeMD)); } auto *KernelMD = GetOldStyleKernelMD(F); if (!KernelMD) return true; F.setCallingConv(CallingConv::SPIR_KERNEL); auto MDName = cast(KernelMD->getOperand(KernelMDOp::Name).get())->getString(); if (MDName != F.getName()) F.setName(MDName); if (KernelMD->getNumOperands() > KernelMDOp::ArgKinds) { if (auto *KindsNode = dyn_cast(KernelMD->getOperand(KernelMDOp::ArgKinds))) { for (unsigned ArgNo = 0, e = KindsNode->getNumOperands(); ArgNo != e; ++ArgNo) { if (auto *VM = dyn_cast(KindsNode->getOperand(ArgNo))) if (auto *V = dyn_cast(VM->getValue())) { auto ArgKind = V->getZExtValue(); auto Attr = Attribute::get(Context, VCFunctionMD::VCArgumentKind, std::to_string(ArgKind)); VCINTR::Function::addAttributeAtIndex(F, ArgNo + 1, Attr); } } } } if (KernelMD->getNumOperands() > KernelMDOp::SLMSize) { if (auto *VM = dyn_cast( KernelMD->getOperand(KernelMDOp::SLMSize))) if (auto *V = dyn_cast(VM->getValue())) { auto SLMSize = V->getZExtValue(); auto Attr = Attribute::get(Context, VCFunctionMD::VCSLMSize, std::to_string(SLMSize)); VCINTR::Function::addAttributeAtIndex(F, AttributeList::FunctionIndex, Attr); } } if (KernelMD->getNumOperands() > KernelMDOp::ArgIOKinds) { if (auto *KindsNode = dyn_cast(KernelMD->getOperand(KernelMDOp::ArgIOKinds))) { for (unsigned ArgNo = 0, e = KindsNode->getNumOperands(); ArgNo != e; ++ArgNo) { if (auto *VM = dyn_cast(KindsNode->getOperand(ArgNo))) if (auto *V = dyn_cast(VM->getValue())) { auto ArgKind = V->getZExtValue(); auto Attr = Attribute::get(Context, VCFunctionMD::VCArgumentIOKind, std::to_string(ArgKind)); VCINTR::Function::addAttributeAtIndex(F, ArgNo + 1, Attr); } } } } if (KernelMD->getNumOperands() > KernelMDOp::ArgTypeDescs) { if (auto Node = dyn_cast(KernelMD->getOperand(KernelMDOp::ArgTypeDescs))) { for (unsigned ArgNo = 0, e = Node->getNumOperands(); ArgNo != e; ++ArgNo) { if (auto *MS = dyn_cast(Node->getOperand(ArgNo))) { auto &&Desc = MS->getString(); auto Attr = Attribute::get(Context, VCFunctionMD::VCArgumentDesc, Desc); VCINTR::Function::addAttributeAtIndex(F, ArgNo + 1, Attr); } } } } if (KernelMD->getNumOperands() > KernelMDOp::NBarrierCnt) { if (auto VM = dyn_cast( KernelMD->getOperand(KernelMDOp::NBarrierCnt))) if (auto V = dyn_cast(VM->getValue())) { auto NBarrierCnt = V->getZExtValue(); auto Attr = Attribute::get(Context, VCFunctionMD::VCNamedBarrierCount, std::to_string(NBarrierCnt)); VCINTR::Function::addAttributeAtIndex(F, AttributeList::FunctionIndex, Attr); } } return true; } //----------------------------------------------------------------------------- // New PM support //----------------------------------------------------------------------------- PreservedAnalyses llvm::GenXSPIRVWriterAdaptor::run(Module &M, ModuleAnalysisManager &) { GenXSPIRVWriterAdaptorImpl Impl(RewriteTypes, RewriteSingleElementVectors); if (!Impl.run(M)) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserveSet(); return PA; } //----------------------------------------------------------------------------- // Legacy PM support //----------------------------------------------------------------------------- namespace { class GenXSPIRVWriterAdaptorLegacy final : public ModulePass { public: static char ID; bool RewriteTypes = true; bool RewriteSingleElementVectors = true; public: explicit GenXSPIRVWriterAdaptorLegacy() : ModulePass(ID) {} explicit GenXSPIRVWriterAdaptorLegacy(bool RewriteTypesIn, bool RewriteSingleElementVectorsIn) : ModulePass(ID), RewriteTypes(RewriteTypesIn), RewriteSingleElementVectors(RewriteSingleElementVectorsIn) {} llvm::StringRef getPassName() const override { return GenXSPIRVWriterAdaptor::getArgString(); } void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnModule(Module &M) override; }; } // namespace char GenXSPIRVWriterAdaptorLegacy::ID = 0; INITIALIZE_PASS(GenXSPIRVWriterAdaptorLegacy, GenXSPIRVWriterAdaptor::getArgString(), GenXSPIRVWriterAdaptor::getArgString(), false, false) ModulePass * llvm::createGenXSPIRVWriterAdaptorPass(bool RewriteTypes, bool RewriteSingleElementVectors) { return new GenXSPIRVWriterAdaptorLegacy(RewriteTypes, RewriteSingleElementVectors); } void GenXSPIRVWriterAdaptorLegacy::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); } bool GenXSPIRVWriterAdaptorLegacy::runOnModule(Module &M) { GenXSPIRVWriterAdaptorImpl Impl(RewriteTypes, RewriteSingleElementVectors); return Impl.run(M); } vc-intrinsics-0.22.1/GenXIntrinsics/lib/GenXIntrinsics/GenXSimdCFLowering.cpp000066400000000000000000002341441475147027500271010ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2015-2024 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ // Lower CM SIMD control flow /// CMSimdCFLowering /// ---------------- /// /// This pass lowers CM SIMD control flow into a form where the IR reflects /// the semantics. /// /// On entry, SIMD control flow is as it was generated by Clang codegen: /// /// * Any SIMD control flow conditional branch is a ``br`` instruction with a /// scalar condition that is the result of an ``llvm.genx.simdcf.any`` /// intrinsic, which has a predicate (vector bool) operand. /// /// * Any reducing intrinsic (cm_sum, cm_prod) has its input gated by a /// ``llvm.genx.simdcf.predicate`` intrinsic. The semantics of the CM language /// are that, in a reducing function such as cm_sum, elements corresponding /// to a disabled channel take a default value (0 for cm_sum and 1 for cm_prod) /// such that the disabled elements do not affect the result. /// /// * There is no other masking of instructions by SIMD control flow condition(s). /// /// In this state, the IR does not reflect the semantics of SIMD control flow. /// No existing LLVM analysis or transformation understands what we really mean /// by these intrinsics, so we are in danger of incorrect changes being made to /// the IR by LLVM optimizations. /// /// This CMSimdCFLowering pass runs very early, pretty much straight after Clang /// codegen, so no other LLVM pass has had a chance to make an incorrect change. /// This pass fixes the IR to reflect the semantics, so that subsequent passes /// do not make an incorrect change. After this lowering, the IR uses: /// /// * ``llvm.genx.simdcf.goto`` and ``llvm.genx.simdcf.join`` intrinsics for the /// control flow, using and generating explicit EM (execution mask) values; /// /// * predication of certain SIMD-CF-controlled instructions by the current EM /// value, using select or the predicate operand of wrregion. /// /// See the GenX language reference for details of these intrinsics and the model /// for representing SIMD control flow semantics in LLVM IR. /// /// Language semantics /// ^^^^^^^^^^^^^^^^^^ /// /// The algorithm that this pass uses allows more general semantics than is /// currently defined to work in the CM language. The SIMD control flow can be /// arbitrarily unstructured, and it can be mixed with scalar control flow in /// an arbitrarily unstructured way. It also allows up to 32 channels. /// /// Algorithm /// ^^^^^^^^^ /// /// 1. Find the SIMD branches, ones where Clang codegen has used /// ``llvm.genx.simdcf.any``. /// /// 2. Determine which basic blocks need to be predicated. Any block that is /// *control dependent* on a SIMD branch needs to be predicated. See Muchnick /// section 9.5 *Program-Dependence Graphs*. For each edge m->n in the /// control flow graph where n does not post-dominate m, find l, the closest /// common ancestor in the post-dominance tree of m and n. All nodes in the /// post-dominance tree from l to n except l itself are control dependent on /// m. /// /// This step also issues an error if any block is found to be control /// dependent on multiple SIMD branches that have different SIMD widths. /// /// 3. Mark the branch at the end of any to-be-predicated block as a SIMD /// branch. This is what allows the arbitrarily unstructured mixing of SIMD /// and scalar control flow; any scalar control flow that is control dependent /// on SIMD control flow is itself converted to SIMD control flow with a /// splatted predicate. (It will get converted back into scalar control flow /// if it turns out to be strictly nested inside SIMD control flow.) /// /// 4. Fix SIMD branches: /// /// a. Convert a backward SIMD branch into a forward one over a backward /// unconditional branch, as required by the GenX backend's IR model of /// SIMD CF. /// /// b. For a SIMD branch, ensure that the false leg is fallthrough, also as /// required by the GenX backend's IR model. /// /// Both these changes are likely to be broken by subsequent LLVM passes, but /// leaving the IR in a state that can be recovered to re-impose these /// restrictions. /// /// 5. Find the join points, any point that is the non-fallthrough target of a /// SIMD branch. Split out any join point into its own basic block (so we /// have somewhere to put the join intrinsic and its conditional branch). /// /// 6. Determine the JIP for each conditional branch that will be a goto (a SIMD /// branch) or join (a join point). Starting at the goto or join, the JIP is /// the closest point lower down where a channel could become re-enabled. /// /// 7. Predicate the code. Code within blocks identified in step 2 are /// predicated. Code in other blocks is also predicated, but without /// predicating stores, in a subroutine that is called with a predicated call /// and thus takes a call mask. /// /// a. A store to a vector local variable (an alloca) is predicated. /// /// i. If it is the same vector width as the controlling SIMD branch width, /// then the store is predicated by turning it into a load, select, /// store sequence. /// /// ii. If it is a wider vector than the controlling SIMD branch width, /// then we look back through the chain of wrregions whose result is /// stored to find one of the right width, and predicate that. /// /// iii. If no wrregion of the correct width is found, then an error is /// issued, as the original CM code must be incorrect. /// /// b. Shared function intrinsics with a predicate operand are predicated /// (for example gather). /// /// c. Any ``llvm.genx.simdcf.predicate`` intrinsic is changed to a select. /// /// d. A subroutine call is predicated by adding an extra argument for the /// predicate, which acts as the *call mask*. /// /// e. There is a single 32 bit EM (execution mask) global variable created /// for the whole function, statically initialized to all ones. In /// implementing predication in the items above, the EM value is loaded from /// the variable. If a narrower-than-32-bit EM value is required, it is /// sliced using a ``shufflevector``. /// /// Like any other global variable, the EM global variable is transformed by /// the CMABI pass into a local variable that is passed in and out of /// subroutines as required. Where it is passed into a subroutine, this is /// the *call mask* of the subroutine call. /// /// 8. Lower the control flow. This is where the ``llvm.genx.simdcf.goto`` and /// ``llvm.genx.simdcf.join`` intrinsic calls are inserted. Also each join /// has a RM (resume mask) local variable created for the whole function, /// initialized to all zeros. The width of the RM for a particular join is /// the same as the width of the SIMD branches that modify that join's RM. /// For multiple SIMD branches modifying the same join's RM, the widths are /// guaranteed to be the same because of the check carried out in 2. /// /// The inserted goto and join intrinsics manipulate the EM and RM values. /// //===----------------------------------------------------------------------===// #include "llvm/GenXIntrinsics/GenXSimdCFLowering.h" #include "llvm/ADT/MapVector.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/GenXIntrinsics/GenXIntrOpts.h" #include "llvm/GenXIntrinsics/GenXIntrinsics.h" #include "llvm/GenXIntrinsics/GenXMetadata.h" #include "llvm/InitializePasses.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #if VC_INTR_LLVM_VERSION_MAJOR >= 16 #include "llvm/Support/ModRef.h" #endif #include #include #include "llvmVCWrapper/IR/DerivedTypes.h" #include "llvmVCWrapper/IR/Type.h" #define DEBUG_TYPE "cmsimdcflowering" using namespace llvm; namespace { // Grouping : utility class to maintain a grouping, a partition of a set of // items into disjoint groups. The initial state is that each item is in its // own group, then you call joinGroups to join two groups together. template class Grouping { std::map Group; public: // joinGroups : join the groups that Block1 and Block2 are in void joinGroups(T Block1, T Block2) { auto G1 = getGroup(Block1); auto G2 = getGroup(Block2); if (G1 != G2) Group[G2] = G1; } // getGroup : get the group for Block // The chain of blocks between Block and its group are modified to point // directly to the group at the end of the chain. T getGroup(T Block) { SmallVector Chain; T G; for (;;) { G = Group[Block]; if (!G) Group[Block] = G = Block; // never seen before, initialize if (G == Block) break; Chain.push_back(Block); Block = G; } for (auto i = Chain.begin(), e = Chain.end(); i != e; ++i) *i = G; return G; } }; // Diagnostic information for error/warning relating to SIMD control flow. class DiagnosticInfoSimdCF : public DiagnosticInfoOptimizationBase { private: static int KindID; static int getKindID() { if (KindID == 0) KindID = llvm::getNextAvailablePluginDiagnosticKind(); return KindID; } public: static void emit(Instruction *Inst, StringRef Msg, DiagnosticSeverity Severity = DS_Error); DiagnosticInfoSimdCF(DiagnosticSeverity Severity, const Function &Fn, const DebugLoc &DLoc, StringRef Msg) : DiagnosticInfoOptimizationBase((DiagnosticKind)getKindID(), Severity, /*PassName=*/nullptr, Msg, Fn, DLoc) {} // This kind of message is always enabled, and not affected by -rpass. virtual bool isEnabled() const override { return true; } static bool classof(const DiagnosticInfo *DI) { return DI->getKind() == getKindID(); } void print(DiagnosticPrinter &DP) const override { DP << "CMSimdCFLowering: " << RemarkName; } }; int DiagnosticInfoSimdCF::KindID = 0; // Call graph node struct CGNode { Function *F; std::set UnvisitedCallers; std::set Callees; }; // The ISPC SIMD CF lowering pass (a module pass) class ISPCSimdCFLowering : public ModulePass { public: static char ID; ISPCSimdCFLowering() : ModulePass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { ModulePass::getAnalysisUsage(AU); } bool runOnModule(Module &M) override; }; // The CM SIMD CF lowering pass (a function pass) class CMSimdCFLoweringImpl final { using GListType = std::vector; std::map DTs; GListType VolList; public: CMSimdCFLoweringImpl() { initializeCMSimdCFLoweringLegacyPass(*PassRegistry::getPassRegistry()); } bool run(Module &M); private: DominatorTree *getDomTree(Function *F); bool isGlobalInterseptVol(GlobalVariable &G, const GListType& VolList); void initializeVolatileGlobals(Module &M); void calculateVisitOrder(Module *M, std::vector *VisitOrder); }; } // namespace char ISPCSimdCFLowering::ID = 0; namespace llvm { void initializeISPCSimdCFLoweringPass(PassRegistry&); } INITIALIZE_PASS_BEGIN(ISPCSimdCFLowering, "ispcsimdcflowering", "Lower ISPC SIMD control flow", false, false) INITIALIZE_PASS_END(ISPCSimdCFLowering, "ispcsimdcflowering", "Lower ISPC SIMD control flow", false, false) Pass *llvm::createISPCSimdCFLoweringPass() { initializeISPCSimdCFLoweringPass(*PassRegistry::getPassRegistry()); return new ISPCSimdCFLowering(); } bool ISPCSimdCFLowering::runOnModule(Module &M) { return CMSimdCFLoweringImpl().run(M); } DominatorTree *CMSimdCFLoweringImpl::getDomTree(Function *F) { if (!DTs[F]) { auto DT = new DominatorTree; DT->recalculate(*F); DTs[F] = DT; } return DTs[F]; } #if VC_INTR_LLVM_VERSION_MAJOR >= 16 static inline void updateFnAttr(Function *Fn) { // Modify ReadNone attribute to support llvm16 if (Fn->getFnAttribute(llvm::Attribute::ReadNone).isValid()) { Fn->removeFnAttr(llvm::Attribute::ReadNone); for(auto ui = Fn->use_begin(), ue = Fn->use_end(); ui != ue; ++ui) { if (auto I = dyn_cast(ui->getUser())) I->setMemoryEffects(llvm::MemoryEffects::none()); } } } #else static inline void updateFnAttr(Function *) {} #endif /*********************************************************************** * isGlobalInterseptVol : Check interseption between global var and * a list of global volatile variables */ bool CMSimdCFLoweringImpl::isGlobalInterseptVol(GlobalVariable &G, const GListType &VolList) { for (auto UI = G.user_begin(), UE = G.user_end(); UI != UE; UI++) { llvm::Instruction *U = dyn_cast(*UI); if (!U) continue; auto *F = U->getParent()->getParent(); auto *DT = getDomTree(F); for(auto &VG : VolList) { for(auto SUI = VG->user_begin(), SUIE = VG->user_end(); SUI != SUIE;SUI++) { auto *I = dyn_cast(*SUI); if (I && DT->dominates(I,&*U)) { return true; } } } } return false; } //----------------------------------------------------------------------------- // New PM support //----------------------------------------------------------------------------- PreservedAnalyses llvm::CMSimdCFLowering::run(Module &M, ModuleAnalysisManager &) { CMSimdCFLoweringImpl Impl; if (!Impl.run(M)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); } //----------------------------------------------------------------------------- // Legacy PM support //----------------------------------------------------------------------------- namespace { class CMSimdCFLoweringLegacy : public FunctionPass { public: static char ID; CMSimdCFLoweringLegacy() : FunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { FunctionPass::getAnalysisUsage(AU); } /*********************************************************************** * doInitialization : per-module initialization for CM simd CF lowering * * Really we want a module pass for CM simd CF lowering. But, without * modifying llvm's PassManagerBuilder, the earliest place to insert a pass is * EP_EarlyAsPossible, which must be a function pass. So, we do our * per-module processing here in doInitialization. */ bool doInitialization(Module &M) override { CMSimdCFLoweringImpl Impl; return Impl.run(M); } bool runOnFunction(Function &F) override { return false; } }; } // namespace char CMSimdCFLoweringLegacy::ID = 0; INITIALIZE_PASS_BEGIN(CMSimdCFLoweringLegacy, CMSimdCFLowering::getArgString(), "Lower CM SIMD control flow", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); INITIALIZE_PASS_END(CMSimdCFLoweringLegacy, CMSimdCFLowering::getArgString(), "Lower CM SIMD control flow", false, false) Pass *llvm::createCMSimdCFLoweringPass() { return new CMSimdCFLoweringLegacy(); } bool CMSimdCFLoweringImpl::run(Module &M) { VolList.clear(); DTs.clear(); initializeVolatileGlobals(M); // See if simd CF is used anywhere in this module. // We have to try each overload of llvm.genx.simdcf.any separately. bool HasSimdCF = false; for (unsigned Width = 2; Width <= CMSimdCFLower::MAX_SIMD_CF_WIDTH; Width <<= 1) { auto VT = VCINTR::getVectorType(Type::getInt1Ty(M.getContext()), Width); Function *SimdCFAny = GenXIntrinsic::getGenXDeclaration( &M, GenXIntrinsic::genx_simdcf_any, VT); if (!SimdCFAny->use_empty()) { updateFnAttr(SimdCFAny); HasSimdCF = true; break; } } if (HasSimdCF) { // Create the global variable for the execution mask. auto EMTy = VCINTR::getVectorType(Type::getInt1Ty(M.getContext()), CMSimdCFLower::MAX_SIMD_CF_WIDTH); auto EMVar = new GlobalVariable(M, EMTy, false /*isConstant*/, GlobalValue::InternalLinkage, Constant::getAllOnesValue(EMTy), "EM"); // Derive an order to process functions such that a function is visited // after anything that calls it. std::vector VisitOrder; calculateVisitOrder(&M, &VisitOrder); // Process functions in that order. CMSimdCFLower CFL(EMVar); for (auto i = VisitOrder.begin(), e = VisitOrder.end(); i != e; ++i) { Function *Fn = *i; if (Fn->hasFnAttribute("CMGenxNoSIMDPred")) continue; CFL.processFunction(Fn); } } // Any predication calls which remain are not in SIMD CF regions, // so can be deleted. for (auto mi = M.begin(), me = M.end(); mi != me; ++mi) { Function *F = &*mi; if (GenXIntrinsic::getGenXIntrinsicID(F) != GenXIntrinsic::genx_simdcf_predicate) continue; while (!F->use_empty()) { auto CI = cast(F->use_begin()->getUser()); auto EnabledValues = CI->getArgOperand(0); CI->replaceAllUsesWith(EnabledValues); CI->eraseFromParent(); } } return HasSimdCF; } /*********************************************************************** * initializeVolatileGlobals : Check and modify global variables for vc * * Special case for volatile globals, because there is agreement - they * will be put to hw-register (this agreement give a lot of perfomance). * That's why we need save load and store instruction until the end of * vc-pipeline. And for reach the goal - they will be replaced by * genx.vload/vstore instructions. * But if volatile global overlap other non-volatile global variable * it generate issue in register allocator - because it will be put to * same register. It is special case in coalescing, that's why here * we mark them as volatile too. */ void CMSimdCFLoweringImpl::initializeVolatileGlobals(Module &M) { // Analise interseption between globals for (auto &G : M.globals()) { if (G.hasAttribute(genx::FunctionMD::GenXVolatile)) { VolList.push_back(&G); } } // If non-volatile global vector intercepts with a volatile global mark it // volatile as well. for (auto &G : M.globals()) { // For non-vectors will be not generated vstores - ignore them if (G.getValueType()->isVectorTy() && !G.hasAttribute(genx::FunctionMD::GenXVolatile) && isGlobalInterseptVol(G, VolList)) { G.addAttribute(genx::FunctionMD::GenXVolatile); } } // Replace instructions to save them untill the end of vc for (auto &G : M.globals()) { if (!G.hasAttribute(genx::FunctionMD::GenXVolatile)) continue; // Transform all load store on volatile globals to vload/vstore to disable // optimizations on this global (no PHI will be produced.). auto AS0 = G.getAddressSpace(); std::vector WL; for (auto UI = G.user_begin(); UI != G.user_end();) { auto U = *UI++; WL.push_back(U); } while (!WL.empty()) { auto Inst = WL.back(); WL.pop_back(); if (auto CE = dyn_cast(Inst)) { for (auto UI = CE->user_begin(); UI != CE->user_end();) { auto U = *UI++; WL.push_back(U); } } else if (auto CI = dyn_cast(Inst)) { for (auto UI = CI->user_begin(); UI != CI->user_end();) { auto U = *UI++; WL.push_back(U); } } else if (auto GEP = dyn_cast(Inst)) { for (auto UI = GEP->user_begin(); UI != GEP->user_end();) { auto U = *UI++; WL.push_back(U); } } else if (auto LI = dyn_cast(Inst)) { IRBuilder<> Builder(LI); auto Ptr = LI->getPointerOperand(); auto AS1 = LI->getPointerAddressSpace(); if (AS1 != AS0) { auto PtrTy = cast(Ptr->getType()); PtrTy = VCINTR::PointerType::getWithSamePointeeType(PtrTy, AS0); Ptr = Builder.CreateAddrSpaceCast(Ptr, PtrTy); } Type* Tys[] = { LI->getType(), Ptr->getType() }; Function* Fn = GenXIntrinsic::getGenXDeclaration(&M, GenXIntrinsic::genx_vload, Tys); Value* VLoad = Builder.CreateCall(Fn, Ptr, "gload"); LI->replaceAllUsesWith(VLoad); LI->eraseFromParent(); updateFnAttr(Fn); } else if (auto SI = dyn_cast(Inst)) { if (!SI->getValueOperand()->getType()->isVectorTy()) continue; IRBuilder<> Builder(SI); auto Ptr = SI->getPointerOperand(); auto AS1 = SI->getPointerAddressSpace(); if (AS1 != AS0) { auto PtrTy = cast(Ptr->getType()); PtrTy = VCINTR::PointerType::getWithSamePointeeType(PtrTy, AS0); Ptr = Builder.CreateAddrSpaceCast(Ptr, PtrTy); } Type* Tys[] = { SI->getValueOperand()->getType(), Ptr->getType() }; Value* Args[] = { SI->getValueOperand(), Ptr }; Function* Fn = GenXIntrinsic::getGenXDeclaration(&M, GenXIntrinsic::genx_vstore, Tys); Builder.CreateCall(Fn, Args); SI->eraseFromParent(); updateFnAttr(Fn); } } } } /*********************************************************************** * calculateVisitOrder : calculate the order we want to visit functions, * such that a function is not visited until all its callers have been */ void CMSimdCFLoweringImpl::calculateVisitOrder( Module *M, std::vector *VisitOrder) { // First build the call graph. // We roll our own call graph here, because it is simpler than the general // case supported by LLVM's call graph analysis (CM does not support // recursion or function pointers), and we want to modify it (using the // UnvisitedCallers set) when we traverse it. std::map CallGraph; for (auto mi = M->begin(), me = M->end(); mi != me; ++mi) { Function *F = &*mi; if (F->empty()) continue; // For each defined function: for each use (a call), add it to our // UnvisitedCallers set, and add us to its Callees set. // We are ignoring an illegal non-call use of a function; someone // else can spot and diagnose that later. // If the function has no callers, then add it straight in to VisitOrder. CGNode *CGN = &CallGraph[F]; CGN->F = F; if (F->use_empty()) { VisitOrder->push_back(F); continue; } for (auto ui = F->use_begin(), ue = F->use_end(); ui != ue; ++ui) { if (auto I = dyn_cast(ui->getUser())) { Function *Caller = I->getFunction(); // do not add a recursive call edge to the UnvisitedCallers if (Caller == F) { if (F->hasFnAttribute(genx::FunctionMD::CMStackCall)) DiagnosticInfoSimdCF::emit(I, "SIMD recursive call", DS_Warning); else DiagnosticInfoSimdCF::emit( I, "Recursive function doesn't have CMStackCall attribute"); } else { CGNode *CallerNode = &CallGraph[Caller]; CallerNode->F = Caller; CGN->UnvisitedCallers.insert(CallerNode); CallerNode->Callees.insert(CGN); } } } } // Run through the visit order. For each function, remove it from each // callee's UnvisitedCallers set, and, if now empty, add the callee to // the end of the visit order. for (unsigned i = 0; i != VisitOrder->size(); ++i) { CGNode *CGN = &CallGraph[(*VisitOrder)[i]]; for (auto ci = CGN->Callees.begin(), ce = CGN->Callees.end(); ci != ce; ++ci) { CGNode *Callee = *ci; Callee->UnvisitedCallers.erase(CGN); if (Callee->UnvisitedCallers.empty()) VisitOrder->push_back(Callee->F); } } } /*********************************************************************** * processFunction : process CM SIMD CF in a function */ void CMSimdCFLower::processFunction(Function *ArgF) { F = ArgF; LLVM_DEBUG(dbgs() << "CMSimdCFLoweringImpl::processFunction:\n" << *F << "\n"); LLVM_DEBUG(F->print(dbgs())); unsigned CMWidth = PredicatedSubroutines[F]; // Find the simd branches. bool FoundSIMD = findSimdBranches(CMWidth); // Create shuffle mask for EM adjustment if (ShuffleMask.empty()) { auto I32Ty = Type::getInt32Ty(F->getContext()); for (unsigned i = 0; i != 32; ++i) ShuffleMask.push_back(ConstantInt::get(I32Ty, i)); } if (CMWidth > 0 || FoundSIMD) { // Determine which basic blocks need to be predicated. determinePredicatedBlocks(); // Mark the branch at the end of any to-be-predicated block as a simd branch. markPredicatedBranches(); // Fix simd branches: // - remove backward simd branches // - ensure that the false leg is fallthrough fixSimdBranches(); // Find the join points, and split out any join point into its own basic // block. findAndSplitJoinPoints(); // Determine the JIPs for the gotos and joins. determineJIPs(); // Predicate the code. predicateCode(CMWidth); // Lower the control flow. lowerSimdCF(); lowerUnmaskOps(); } ShuffleMask.clear(); SimdBranches.clear(); PredicatedBlocks.clear(); JoinPoints.clear(); RMAddrs.clear(); OriginalPred.clear(); AlreadyPredicated.clear(); } /*********************************************************************** * findSimdBranches : find the simd branches in the function * * Enter: CMWidth = 0 normally, or call mask width if in predicated subroutine * * This adds blocks to SimdBranches. */ bool CMSimdCFLower::findSimdBranches(unsigned CMWidth) { bool found = false; for (auto fi = F->begin(), fe = F->end(); fi != fe; ++fi) { BasicBlock *BB = &*fi; auto Br = dyn_cast(BB->getTerminator()); if (!Br || !Br->isConditional()) continue; if (auto SimdCondUse = getSimdConditionUse(Br->getCondition())) { unsigned SimdWidth = VCINTR::VectorType::getNumElements( cast((*SimdCondUse)->getType())); if (CMWidth && SimdWidth != CMWidth) DiagnosticInfoSimdCF::emit(Br, "mismatching SIMD CF width inside SIMD call"); SimdBranches[BB] = SimdWidth; found = true; } } return found; } /*********************************************************************** * determinePredicatedBlocks : determine which blocks need to be predicated * * We need to find blocks that are control dependent on a simd branch. * * This adds blocks to PredicatedBlocks. It also errors when a block is control * dependent on more than one simd branch with disagreeing simd width. * * See Muchnick section 9.5 Program-Dependence Graphs. For each edge m->n in * the control flow graph where n does not post-dominate m, find l, the * closest common ancestor in the post-dominance tree of m and n. All nodes * in the post-dominance tree from l to n except l itself are control dependent * on m. */ void CMSimdCFLower::determinePredicatedBlocks() { PostDominatorTree PDT; PDT.recalculate(*F); for (auto sbi = SimdBranches.begin(), sbe = SimdBranches.end(); sbi != sbe; ++sbi) { BasicBlock *BlockM = sbi->first; auto Br = cast(BlockM->getTerminator()); unsigned SimdWidth = sbi->second; LLVM_DEBUG(dbgs() << "simd branch (width " << SimdWidth << ") at " << BlockM->getName() << "\n"); if (SimdWidth < 2 || SimdWidth > MAX_SIMD_CF_WIDTH || !isPowerOf2_32(SimdWidth)) DiagnosticInfoSimdCF::emit(Br, "illegal SIMD CF width"); // For each successor BlockN of BlockM... for (unsigned si = 0, se = Br->getNumSuccessors(); si != se; ++si) { auto BlockN = Br->getSuccessor(si); // Get BlockL, the closest common postdominator. auto BlockL = PDT.findNearestCommonDominator(BlockM, BlockN); if (BlockL == BlockM) { // need to include BlockM into the chain // if the branch is the do-while back-edge if (auto ParentNode = PDT.getNode(BlockM)) if (auto IDom = ParentNode->getIDom()) BlockL = IDom->getBlock(); } // Trace up the postdominator tree from BlockN (inclusive) to BlockL // (exclusive) to find blocks control dependent on BlockM. This also // handles the case that BlockN does postdominate BlockM; no blocks // are control dependent on BlockM. for (auto Node = PDT.getNode(BlockN); Node && Node->getBlock() != BlockL; Node = Node->getIDom()) { auto BB = Node->getBlock(); LLVM_DEBUG(dbgs() << " " << BB->getName() << " needs predicating\n"); auto PBEntry = &PredicatedBlocks[BB]; if (*PBEntry && *PBEntry != SimdWidth) DiagnosticInfoSimdCF::emit(Br, "mismatching SIMD CF width"); *PBEntry = SimdWidth; } } } } /*********************************************************************** * markPredicatedBranches : mark the branch in any to-be-predicated block * as a simd branch, even if it is unconditional * * This errors if it finds anything other than a BranchInst. Using switch or * return inside simd control flow is not allowed. */ void CMSimdCFLower::markPredicatedBranches() { for (auto pbi = PredicatedBlocks.begin(), pbe = PredicatedBlocks.end(); pbi != pbe; ++pbi) { auto BB = pbi->first; unsigned SimdWidth = pbi->second; auto Term = BB->getTerminator(); if (!isa(Term)) DiagnosticInfoSimdCF::emit(Term, "return or switch not allowed in SIMD control flow"); if (!SimdBranches[BB]) LLVM_DEBUG(dbgs() << "branch at " << BB->getName() << " becomes simd\n"); SimdBranches[BB] = SimdWidth; } } static void fixPHIInput(BasicBlock *Succ, BasicBlock *OldPred, BasicBlock *NewPred) { for (BasicBlock::iterator SBI = Succ->begin(), SBE = Succ->end(); SBI != SBE; ++SBI) { PHINode *phi = dyn_cast(SBI); if (!phi) break; Value *SrcV = phi->getIncomingValueForBlock(OldPred); phi->removeIncomingValue(OldPred); phi->addIncoming(SrcV, NewPred); } } /*********************************************************************** * fixSimdBranches : fix simd branches ready for JIP determination * * - remove backward simd branches * - ensure that the false leg is fallthrough */ void CMSimdCFLower::fixSimdBranches() { // Scan through all basic blocks, remembering which ones we have seen. std::set Seen; for (auto fi = F->begin(), fe = F->end(); fi != fe; ++fi) { BasicBlock *BB = &*fi; Seen.insert(BB); if (!SimdBranches.count(BB)) continue; // This is a simd branch. auto Br = cast(BB->getTerminator()); bool IsBackward = false; // Check for backward branch in either leg. for (unsigned si = 0, se = Br->getNumSuccessors(); si != se; ++si) { BasicBlock *Succ = Br->getSuccessor(si); if (Seen.find(Succ) != Seen.end()) { LLVM_DEBUG(dbgs() << "simd branch at " << BB->getName() << " succ " << si << " is backward\n"); if (!Br->isConditional()) { // Unconditional simd backward branch. We can just remove its simdness. LLVM_DEBUG(dbgs() << " unconditional, so unsimding\n"); SimdBranches.erase(SimdBranches.find(BB)); } else { // Conditional simd branch where a leg is backward. Insert an extra block. IsBackward = true; auto NextBB = BB->getNextNode(); auto NewBB = BasicBlock::Create(BB->getContext(), BB->getName() + ".backward", BB->getParent(), NextBB); BranchInst::Create(Succ, NewBB)->setDebugLoc(Br->getDebugLoc()); Br->setSuccessor(si, NewBB); fixPHIInput(Succ, BB, NewBB); } } } // fix loop-end critical edge if (IsBackward) { for (unsigned si = 0, se = Br->getNumSuccessors(); si != se; ++si) { BasicBlock *Succ = Br->getSuccessor(si); if (Seen.find(Succ) == Seen.end() && Succ->getUniquePredecessor() == nullptr) { auto NewBB = BasicBlock::Create(BB->getContext(), BB->getName() + ".loopend", BB->getParent(), Succ); BranchInst::Create(Succ, NewBB)->setDebugLoc(Br->getDebugLoc()); Br->setSuccessor(si, NewBB); } } } if (Br->isConditional()) { // Ensure that the false leg is fallthrough. auto NextBB = BB->getNextNode(); if (Br->getSuccessor(1) != NextBB) { if (Br->getSuccessor(0) != NextBB) { // Neither leg is fallthrough. Add an extra basic block to make the // false one fallthrough. LLVM_DEBUG(dbgs() << "simd branch at " << BB->getName() << ": inserted fallthrough\n"); auto Succ = Br->getSuccessor(1); auto NewBB = BasicBlock::Create(BB->getContext(), BB->getName() + ".fallthrough", BB->getParent(), NextBB); PredicatedBlocks[NewBB] = PredicatedBlocks[Br->getSuccessor(0)]; BranchInst::Create(Succ, NewBB)->setDebugLoc(Br->getDebugLoc()); Br->setSuccessor(1, NewBB); fixPHIInput(Succ, BB, NewBB); } else { // The true leg is fallthrough. Invert the branch. LLVM_DEBUG(dbgs() << "simd branch at " << BB->getName() << ": inverting\n"); Use *U = getSimdConditionUse(Br->getCondition()); if (!U) U = &Br->getOperandUse(0); Value *Cond = *U; auto Xor = BinaryOperator::Create(Instruction::Xor, *U, Constant::getAllOnesValue(Cond->getType()), "invert", cast(U->getUser())); Xor->setDebugLoc(Br->getDebugLoc()); *U = Xor; Br->setSuccessor(0, Br->getSuccessor(1)); Br->setSuccessor(1, NextBB); } } } } } /*********************************************************************** * findAndSplitJoinPoints : find the join points, and split out any join point * into its own basic block */ void CMSimdCFLower::findAndSplitJoinPoints() { // cannot iterate the simd-branch blocks directly because some blocks may // be splitted in the loop, and the owner-block of a simd-branch may be // changed. So we collect the simd-branches first. SmallVector Jumps; for (auto sbi = SimdBranches.begin(), sbe = SimdBranches.end(); sbi != sbe; ++sbi) { auto Br = sbi->first->getTerminator(); Jumps.push_back(Br); } for (auto sji = Jumps.begin(), sje = Jumps.end(); sji != sje; ++sji) { assert((*sji)->isTerminator() && "Expected terminator inst"); auto *Br = *sji; unsigned SimdWidth = SimdBranches[Br->getParent()]; LLVM_DEBUG(dbgs() << *Br << "\n"); auto JP = Br->getSuccessor(0); if (JoinPoints.count(JP)) continue; JoinToGoto[JP] = Br->getParent(); // This is a new join point. LLVM_DEBUG(dbgs() << "new join point " << JP->getName() << "\n"); auto SplitBB = JP->splitBasicBlock(JP->getFirstNonPHI(), ".afterjoin"); // We need to split it into its own basic block, so later we can modify // the join to do a branch to its JIP. if (PredicatedBlocks.find(JP) != PredicatedBlocks.end()) PredicatedBlocks.insert(std::make_pair(SplitBB, PredicatedBlocks[JP])); if (SimdBranches.find(JP) != SimdBranches.end()) { assert( SimdBranches[JP] == SimdWidth); SimdBranches[SplitBB] = SimdWidth; SimdBranches.erase(JP); } LLVM_DEBUG(dbgs() << "split join point " << JP->getName() << " out to " << SplitBB->getName() << "\n"); JoinPoints[JP] = SimdWidth; } } /*********************************************************************** * determineJIPs : determine the JIPs for the gotos and joins */ void CMSimdCFLower::determineJIPs() { LLVM_DEBUG(dbgs() << "determineJIPs: " << F->getName() << "\n"); // Number the basic blocks. std::map Numbers; unsigned Num = 0; for (auto fi = F->begin(), fe = F->end(); fi != fe; ++fi) { BasicBlock *BB = &*fi; Numbers[BB] = Num++; } // Work out which joins do not need a JIP at all. Doing that helps avoid // problems in the GenX backend where a join that turns out to be a branching // join label needs to be in a basic block by itself, so other code has to be // moved out, which is not always possible. // // A join does not need a JIP if we can guarantee that any path reaching the // join will result in at least one channel being enabled. // // As a proxy for that, which is sufficient but maybe not necessary, we // divide the control flow up into groups. Two groups are either disjoint, or // one is nested inside the other. Then the join at the end of a group does // not need a JIP. // // We find the groups as follows: any edge that is not a fallthrough edge // causes the target block and the block after the branch block to be in the // same group. Grouping Groups; for (auto NextBB = &F->front(), EndBB = &F->back(); NextBB;) { auto BB = NextBB; NextBB = BB == EndBB ? nullptr : BB->getNextNode(); auto *Term = BB->getTerminator(); for (unsigned si = 0, se = Term->getNumSuccessors(); si != se; ++si) { BasicBlock *Succ = Term->getSuccessor(si); if (Succ == NextBB) continue; // We have a non-fallthrough edge BB -> Succ. Thus NextBB and Succ need // to be in the same group. if (NextBB && Succ) { LLVM_DEBUG(dbgs() << "joinGroups " << NextBB->getName() << " " << Succ->getName() << "\n"); } else { LLVM_DEBUG(dbgs() << "Warning: NextBB or Succ is nullptr\n"); } Groups.joinGroups(NextBB, Succ); } } // Repeat until we stop un-simding branches... for (;;) { // Determine the JIPs for the SIMD branches. for (auto sbi = SimdBranches.begin(), sbe = SimdBranches.end(); sbi != sbe; ++sbi) determineJIP(sbi->first, &Numbers, /*IsJoin=*/false); // Determine the JIPs for the joins. A join does not need a JIP if it is the // last block in its group. std::set SeenGroup; for (auto BB = &F->back();;) { LLVM_DEBUG(dbgs() << " " << BB->getName() << " is group " << Groups.getGroup(BB)->getName() << "\n"); if (JoinPoints.count(BB)) { if (!SeenGroup.insert(Groups.getGroup(BB)).second) determineJIP(BB, &Numbers, /*IsJoin=*/true); else LLVM_DEBUG(dbgs() << BB->getName() << " does not need JIP\n"); } if (BB == &F->front()) break; BB = BB->getPrevNode(); } // See if we have any unconditional branch with UIP == JIP or no JIP. If so, // it can stay as a scalar unconditional branch. SmallVector BranchesToUnsimd; std::set UIPs; for (auto sbi = SimdBranches.begin(), sbe = SimdBranches.end(); sbi != sbe; ++sbi) { BasicBlock *BB = sbi->first; auto Br = cast(BB->getTerminator()); BasicBlock *UIP = Br->getSuccessor(0); BasicBlock *JIP = JIPs[BB]; if (!Br->isConditional() && (!JIP || UIP == JIP)) { LLVM_DEBUG(dbgs() << BB->getName() << ": converting back to unconditional branch to " << UIP->getName() << "\n"); BranchesToUnsimd.push_back(BB); } else UIPs.insert(UIP); } // If we did not un-simd any branch, we are done. if (BranchesToUnsimd.empty()) break; for (auto i = BranchesToUnsimd.begin(), e = BranchesToUnsimd.end(); i != e; ++i) SimdBranches.erase(SimdBranches.find(*i)); // For each join, see if it is still the UIP of any goto. If not, remove it. SmallVector JoinsToRemove; for (auto i = JoinPoints.begin(), e = JoinPoints.end(); i != e; ++i) if (UIPs.find(i->first) == UIPs.end()) JoinsToRemove.push_back(i->first); for (auto i = JoinsToRemove.begin(), e = JoinsToRemove.end(); i != e; ++i) { LLVM_DEBUG(dbgs() << (*i)->getName() << ": removing now unreferenced join\n"); JoinPoints.erase(JoinPoints.find(*i)); } } } /*********************************************************************** * determineJIP : determine the JIP for a goto or join */ void CMSimdCFLower::determineJIP(BasicBlock *BB, std::map *Numbers, bool IsJoin) { BasicBlock *UIP = nullptr; auto Br = cast(BB->getTerminator()); if (!IsJoin) UIP = Br->getSuccessor(0); // this is a goto with a UIP, not a join LLVM_DEBUG(dbgs() << BB->getName() << ": UIP is " << (UIP ? UIP->getName() : "(none)") << "\n"); // Scan forwards to find the next join point that could be resumed by any // code before or at BB. unsigned BBNum = (*Numbers)[BB]; bool NeedNextJoin = false; BasicBlock *JP = BB->getNextNode(); unsigned JPNum = BBNum + 1; for (;; JP = JP->getNextNode(), ++JPNum) { assert(JP); if ((*Numbers)[JP] != JPNum) LLVM_DEBUG(dbgs() << JP->getName() << " number " << (*Numbers)[JP] << " does not match " << JPNum << " for " << JP->getName() << "\n"); assert((*Numbers)[JP] == JPNum); // If we have reached UIP, then that is also JIP. if (JP == UIP) break; // See if JP is a basic block with a branch from before BB. for (auto ui = JP->use_begin(), ue = JP->use_end(); ui != ue; ++ui) { auto BranchBlock = cast(ui->getUser())->getParent(); if ((*Numbers)[BranchBlock] < BBNum) { NeedNextJoin = true; break; } } if (NeedNextJoin && JoinPoints.count(JP)) break; // found join point // See if JP finishes with a branch to BB or before. auto *Term = JP->getTerminator(); for (unsigned si = 0, se = Term->getNumSuccessors(); si != se; ++si) { auto Succ = Term->getSuccessor(si); if ((*Numbers)[Succ] <= BBNum) { NeedNextJoin = true; break; } } assert(JP != &BB->getParent()->back() && "reached end"); } LLVM_DEBUG(dbgs() << BB->getName() << ": JIP is " << JP->getName() << "\n"); JIPs[BB] = JP; } /*********************************************************************** * predicateCode : predicate the instructions in the code */ void CMSimdCFLower::predicateCode(unsigned CMWidth) { if (CMWidth) { // Inside a predicated call, also predicate all other blocks. We do this // first so the entry block gets done before any other block, avoiding a // problem that code we insert to set up the EMs and RMs accidentally gets // predicated. for (auto fi = F->begin(), fe = F->end(); fi != fe; ++fi) { BasicBlock *BB = &*fi; if (PredicatedBlocks.find(BB) == PredicatedBlocks.end()) predicateBlock(BB, CMWidth); } } // Predicate all basic blocks that need it. for (auto pbi = PredicatedBlocks.begin(), pbe = PredicatedBlocks.end(); pbi != pbe; ++pbi) { BasicBlock *BB = pbi->first; unsigned SimdWidth = pbi->second; predicateBlock(BB, SimdWidth); } } /*********************************************************************** * predicateBlock : add predication to a basic block * * Enter: BB = basic block * SimdWidth = simd width of controlling simd branch or call mask */ void CMSimdCFLower::predicateBlock(BasicBlock *BB, unsigned SimdWidth) { for (auto bi = BB->begin(), be = BB->end(); bi != be; ) { Instruction *Inst = &*bi; ++bi; // Increment here in case Inst is removed predicateInst(Inst, SimdWidth); } } /*********************************************************************** * createWrRegion : create wrregion instruction * * Enter: Args = the args for wrregion * Name * InsertBefore */ static CallInst *createWrRegion(ArrayRef Args, const Twine &Name, Instruction *InsertBefore) { Type *OverloadedTypes[] = { Args[0]->getType(), Args[1]->getType(), Args[5]->getType(), Args[7]->getType() }; Module *M = InsertBefore->getParent()->getParent()->getParent(); Function *Decl = GenXIntrinsic::getGenXDeclaration(M, OverloadedTypes[0]->isFPOrFPVectorTy() ? GenXIntrinsic::genx_wrregionf : GenXIntrinsic::genx_wrregioni, OverloadedTypes); auto WrRegion = CallInst::Create(Decl, Args, Name, InsertBefore); WrRegion->setDebugLoc(InsertBefore->getDebugLoc()); updateFnAttr(Decl); return WrRegion; } /*********************************************************************** * predicateInst : add predication to an Instruction if necessary * * Enter: Inst = the instruction * SimdWidth = simd cf width in force */ void CMSimdCFLower::predicateInst(Instruction *Inst, unsigned SimdWidth) { if (isa(Inst) || GenXIntrinsic::isVStore(Inst)) { predicateStore(Inst, SimdWidth); return; } if (auto CI = dyn_cast(Inst)) { unsigned IntrinsicID = GenXIntrinsic::getAnyIntrinsicID(Inst); auto Callee = CI->getCalledFunction(); switch (IntrinsicID) { case GenXIntrinsic::genx_rdregioni: case GenXIntrinsic::genx_rdregionf: case GenXIntrinsic::genx_wrregioni: case GenXIntrinsic::genx_wrregionf: case GenXIntrinsic::genx_simdcf_any: case GenXIntrinsic::genx_vload: case GenXIntrinsic::genx_vstore: case GenXIntrinsic::genx_simdcf_savemask: case GenXIntrinsic::genx_simdcf_unmask: case GenXIntrinsic::genx_simdcf_remask: case GenXIntrinsic::genx_unmask_begin: case GenXIntrinsic::genx_unmask_end: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: return; // ignore these intrinsics // These intrinsics can be predicated but they do not have // explicit predicate operand: they use predicate of wrregion. case GenXIntrinsic::genx_gather_scaled2: case GenXIntrinsic::genx_gather4_scaled2: return; case GenXIntrinsic::genx_simdcf_predicate: rewritePredication(CI, SimdWidth); return; case GenXIntrinsic::genx_raw_send: case GenXIntrinsic::genx_raw_send_noresult: case GenXIntrinsic::genx_raw_sends: case GenXIntrinsic::genx_raw_sends_noresult: predicateSend(CI, IntrinsicID, SimdWidth); return; case GenXIntrinsic::not_any_intrinsic: // Call to a real subroutine. // Ignore those SIMT entry function for direct calls, for indirect ones // conservatively allow everything for now. if (!Callee || (!Callee->hasFnAttribute("CMGenxSIMT") && !Callee->hasFnAttribute("CMGenxNoSIMDPred"))) { predicateCall(CI, SimdWidth); } return; } // An IntrNoMem intrinsic is an ALU intrinsic and can be ignored. if (Callee->doesNotAccessMemory() || CI->arg_size() == 0) return; // no predication for intrinsic marked as ISPC uniform, // for example, atomic and oword_store used in printf if (CI->getMetadata("ISPC-Uniform") != nullptr) return; // Look for a predicate operand in operand 2, 1 or 0. unsigned PredNum = CI->arg_size() - 1; for (;;) { if (auto VT = dyn_cast(CI->getArgOperand(PredNum)->getType())) { if (VT->getElementType()->isIntegerTy(1)) { // We have a predicate operand. predicateScatterGather(CI, SimdWidth, PredNum); return; } } if (!PredNum) break; --PredNum; } DiagnosticInfoSimdCF::emit(CI, "illegal instruction inside SIMD control flow"); return; } } /*********************************************************************** * rewritePredication : convert a predication intrinsic call into a * selection based on the region's SIMD predicate mask. * * Enter: Inst = the predication intrinsic call instruction * SimdWidth = simd cf width in force */ void CMSimdCFLower::rewritePredication(CallInst *CI, unsigned SimdWidth) { auto EnabledValues = CI->getArgOperand(0); auto DisabledDefaults = CI->getArgOperand(1); assert(isa(EnabledValues->getType()) && EnabledValues->getType() == DisabledDefaults->getType() && "malformed predication intrinsic"); if (VCINTR::VectorType::getNumElements( cast(EnabledValues->getType())) != SimdWidth) { DiagnosticInfoSimdCF::emit(CI, "mismatching SIMD width inside SIMD control flow"); return; } auto EM = loadExecutionMask(CI, SimdWidth); auto Select = SelectInst::Create(EM, EnabledValues, DisabledDefaults, EnabledValues->getName() + ".simdcfpred", CI); Select->setDebugLoc(CI->getDebugLoc()); CI->replaceAllUsesWith(Select); eraseInstruction(CI); } static bool IsBitCastForLifetimeMark(const Value *V) { if (!V || !isa(V)) { return false; } for (auto U : V->users()) { unsigned IntrinsicID = GenXIntrinsic::getAnyIntrinsicID(U); if (IntrinsicID != Intrinsic::lifetime_start && IntrinsicID != Intrinsic::lifetime_end) { return false; } } return true; } static bool isSingleBlockLocalStore(const Instruction *SI) { const Value *P = SI->getOperand(1); // pointer has to be an alloca if (isa(P)) { // check every uses of P, it has to be either a lift-time intrinsic or a // load/store in the same basic block. auto BLK = SI->getParent(); for (auto U : P->users()) { if (isa(U) || isa(U) || GenXIntrinsic::isVLoadStore(U)) { if (cast(U)->getParent() != BLK) return false; } else if (!IsBitCastForLifetimeMark(U)) return false; } return true; } return false; } static bool replicatesMask(Function *F) { return F->hasFnAttribute(genx::FunctionMD::CMGenxReplicateMask); } static unsigned getNumChannelsReplicated(Function *F) { assert(replicatesMask(F) && "Expected function with 'genx_replicate_mask' attribute"); uint32_t NumChannels = 0; F->getFnAttribute(genx::FunctionMD::CMGenxReplicateMask) .getValueAsString() .getAsInteger(0, NumChannels); return NumChannels; } // Instructions like gather4 or functions which use gather4 // (marked with genx_replicate_mask attribute) have more output than execution // size. In this case, subsequent store will be wider. Handle this case here. unsigned CMSimdCFLower::deduceNumChannels(Instruction *SI) { assert((isa(SI) || GenXIntrinsic::isVStore(SI)) && "Store inst expected"); unsigned NumChannels = 1; Value *StoredValue = SI->getOperand(0); // TODO: handle cases when stored value is used in cast instructions // If it's not a call there can't be possible replication of the mask if (!isa(StoredValue)) return NumChannels; auto *CI = cast(StoredValue); if (Function *F = CI->getCalledFunction()) { if (!GenXIntrinsic::isGenXIntrinsic(F)) { if (replicatesMask(F)) return getNumChannelsReplicated(F); return NumChannels; } } // If it's not a function call then check for a specific instruction unsigned IID = GenXIntrinsic::getGenXIntrinsicID(CI); switch (IID) { case GenXIntrinsic::genx_gather4_masked_scaled2: case GenXIntrinsic::genx_gather4_scaled2: { unsigned AddrElems = VCINTR::VectorType::getNumElements( cast(CI->getOperand(4)->getType())); unsigned ResultElems = VCINTR::VectorType::getNumElements(cast(CI->getType())); NumChannels = ResultElems / AddrElems; break; } case GenXIntrinsic::genx_lsc_load_slm: case GenXIntrinsic::genx_lsc_load_stateless: case GenXIntrinsic::genx_lsc_load_bindless: case GenXIntrinsic::genx_lsc_load_bti: case GenXIntrinsic::genx_lsc_prefetch_bti: case GenXIntrinsic::genx_lsc_prefetch_stateless: case GenXIntrinsic::genx_lsc_prefetch_bindless: NumChannels = GenXIntrinsic::getLSCNumVectorElements( static_cast( cast(CI->getOperand(7))->getZExtValue())); break; default: break; } return NumChannels; } /*********************************************************************** * predicateStore : add predication to a StoreInst * * Enter: Inst = the instruction * SimdWidth = simd cf width in force * * This code avoids using the utility functions and classes for the wrregion * intrinsic that are in the GenX backend because this pass is not part of the * GenX backend. */ void CMSimdCFLower::predicateStore(Instruction *SI, unsigned SimdWidth) { auto V = SI->getOperand(0); auto StoreVT = dyn_cast(V->getType()); // Scalar store not predicated if (!StoreVT || VCINTR::VectorType::getNumElements(StoreVT) == 1) return; // no predication for ISPC uniform store if (SI->getMetadata("ISPC-Uniform") != nullptr) return; // local-variable store that is only used within the same basic block // do not need predicate if (isSingleBlockLocalStore(SI)) return; // See if the value to store is a wrregion (possibly predicated) of the // right width. If so, we predicate that instead. This also handles // the case that the value to store is wider than the simd CF width, // but there is a wrregion with the right width. // Also allow for a chain of multiple wrregions, as clang can generate // two, one for the columns and one for the rows. // Also skip any bitcasts. CallInst *WrRegionToPredicate = nullptr; Use *U = &SI->getOperandUse(0); Use *UseNeedsUpdate = nullptr; Value *ExistingPred = nullptr; for (;;) { if (auto BC = dyn_cast(V)) { U = &BC->getOperandUse(0); V = *U; continue; } auto WrRegion = dyn_cast(V); if (!WrRegion) break; auto Callee = WrRegion->getCalledFunction(); if (!Callee) break; unsigned IID = GenXIntrinsic::getGenXIntrinsicID(WrRegion); if (IID != GenXIntrinsic::genx_wrregioni && IID != GenXIntrinsic::genx_wrregionf) { // genx_gather4_masked_scaled2 and genx_gather_masked_scaled2 are slightly // different: they have predicate operand and their users have to be // predicated as well. if (IID == GenXIntrinsic::genx_gather4_masked_scaled2 || IID == GenXIntrinsic::genx_gather_masked_scaled2) { assert(AlreadyPredicated.find(WrRegion) != AlreadyPredicated.end()); if (OriginalPred.count(WrRegion)) ExistingPred = OriginalPred[WrRegion]; break; } // Not wrregion. See if it is an intrinsic that has already been // predicated; if so do not attempt to predicate the store. if (AlreadyPredicated.find(WrRegion) != AlreadyPredicated.end()) return; // Otherwise break out of the wrregion-and-bitcast-traversing loop. break; } // We have a wrregion. Check its input width. unsigned Width = 0; Value *Input = WrRegion->getArgOperand( GenXIntrinsic::GenXRegion::NewValueOperandNum); if (auto VT = dyn_cast(Input->getType())) Width = VCINTR::VectorType::getNumElements(VT); if (Width == SimdWidth) { // This wrregion has the right width input. We could predicate it. if (WrRegionToPredicate) { UseNeedsUpdate = &WrRegionToPredicate->getOperandUse( GenXIntrinsic::GenXRegion::NewValueOperandNum); // if there is a bitcast in between then replace bitcats's operand if (auto BC = dyn_cast(UseNeedsUpdate->get())) UseNeedsUpdate = &BC->getOperandUse(0); } else { UseNeedsUpdate = U; } WrRegionToPredicate = WrRegion; V = WrRegionToPredicate->getArgOperand( GenXIntrinsic::GenXRegion::NewValueOperandNum); // See if it is already predicated, other than by an all true constant. Value *Pred = WrRegion->getArgOperand( GenXIntrinsic::GenXRegion::PredicateOperandNum); if (auto C = dyn_cast(Pred)) { if (C->isAllOnesValue()) Pred = nullptr; } if (Pred) { // Yes it is predicated. Stop here and further predicate it. break; } } else if (Width == 1) { // Single element wrregion. This is a scalar operation, so we do not // want to predicate it at all. return; } else if (Width < SimdWidth) { // Too narrow. Predicate the last correctly sized wrregion or the store. break; } else { assert(false && "unexpected data size inside SIMD control flow"); } } if (WrRegionToPredicate) { // We found a wrregion to predicate. Replace it with a predicated one. assert(UseNeedsUpdate); *UseNeedsUpdate = predicateWrRegion(WrRegionToPredicate, SimdWidth); if (WrRegionToPredicate->use_empty()) eraseInstruction(WrRegionToPredicate); return; } // Try to deduce number of channels to fit into // current SIMD Width (check if certain instruction used or // 'genx_replicate_mask' attribute is provided) unsigned NumChannels = deduceNumChannels(SI); if (VCINTR::VectorType::getNumElements(StoreVT) != SimdWidth * NumChannels) { DiagnosticInfoSimdCF::emit( SI, "mismatching SIMD width inside SIMD control flow"); return; } // Predicate the store by creating a select. Instruction *Load = nullptr; if (auto SInst = dyn_cast(SI)) { auto *PtrOp = SInst->getPointerOperand(); Load = new LoadInst(SInst->getValueOperand()->getType(), PtrOp, PtrOp->getName() + ".simdcfpred.load", false /* isVolatile */, SI); } else { auto ID = GenXIntrinsic::genx_vload; Value *Data = SI->getOperand(0); Value *Addr = SI->getOperand(1); Type *Tys[] = {Data->getType(), Addr->getType()}; auto Fn = GenXIntrinsic::getGenXDeclaration( SI->getParent()->getParent()->getParent(), ID, Tys); Load = CallInst::Create(Fn, Addr, ".simdcfpred.vload", SI); updateFnAttr(Fn); } Load->setDebugLoc(SI->getDebugLoc()); Value *EM = loadExecutionMask(SI, SimdWidth); // If there was a predicate already then update it with current EM if (ExistingPred) { EM = BinaryOperator::Create( Instruction::And, ExistingPred, EM, ExistingPred->getName() + ".and." + EM->getName(), SI); cast(EM)->setDebugLoc(SI->getDebugLoc()); } // Replicate mask for each channel if needed EM = replicateMask(EM, SI, SimdWidth, NumChannels); auto Select = SelectInst::Create(EM, SI->getOperand(0), Load, SI->getOperand(0)->getName() + ".simdcfpred", SI); SI->setOperand(0, Select); } /*********************************************************************** * predicateSend : predicate a raw send * * This has to cope with the case that the predicate is a scalar i1 constant * 1. We first convert the predicate to whatever width matches current simd * control flow. */ void CMSimdCFLower::predicateSend(CallInst *CI, unsigned IntrinsicID, unsigned SimdWidth) { unsigned PredOperandNum = 1; if (isa(CI->getOperand(PredOperandNum)->getType())) { // We already have a vector predicate. predicateScatterGather(CI, SimdWidth, PredOperandNum); return; } IRBuilder<> Builder(CI); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); // Need to convert scalar predicate to vector. We need to get a new intrinsic // declaration from an array of overloaded types, and that depends on exactly // which send intrinsic we have. auto Pred = Builder.CreateVectorSplat( SimdWidth, cast(CI->getOperand(PredOperandNum))); Function *Decl = nullptr; switch (IntrinsicID) { case GenXIntrinsic::genx_raw_send: { Type *Tys[] = { CI->getType(), Pred->getType(), CI->getOperand(4)->getType() }; Decl = GenXIntrinsic::getGenXDeclaration(CI->getParent()->getParent()->getParent(), (GenXIntrinsic::ID)IntrinsicID, Tys); break; } case GenXIntrinsic::genx_raw_send_noresult: { Type *Tys[] = { Pred->getType(), CI->getOperand(4)->getType() }; Decl = GenXIntrinsic::getGenXDeclaration( CI->getParent()->getParent()->getParent(), (GenXIntrinsic::ID)IntrinsicID, Tys); break; } case GenXIntrinsic::genx_raw_sends: { Type *Tys[] = { CI->getType(), Pred->getType(), CI->getOperand(4)->getType(), CI->getOperand(5)->getType() }; Decl = GenXIntrinsic::getGenXDeclaration( CI->getParent()->getParent()->getParent(), (GenXIntrinsic::ID)IntrinsicID, Tys); break; } case GenXIntrinsic::genx_raw_sends_noresult: { Type *Tys[] = { Pred->getType(), CI->getOperand(4)->getType(), CI->getOperand(5)->getType() }; Decl = GenXIntrinsic::getGenXDeclaration( CI->getParent()->getParent()->getParent(), (GenXIntrinsic::ID)IntrinsicID, Tys); break; } default: llvm_unreachable("unexpected send intrinsic"); break; } SmallVector Args; for (unsigned i = 0, e = CI->arg_size(); i != e; ++i) if (i == PredOperandNum) Args.push_back(Pred); else Args.push_back(CI->getOperand(i)); auto NewCI = Builder.CreateCall(Decl, Args, CI->getName()); CI->replaceAllUsesWith(NewCI); eraseInstruction(CI); // Now we can predicate the new send instruction. predicateScatterGather(NewCI, SimdWidth, PredOperandNum); updateFnAttr(Decl); } /*********************************************************************** * predicateScatterGather : predicate a scatter/gather intrinsic call * * This works on the scatter/gather intrinsics with a predicate operand. */ void CMSimdCFLower::predicateScatterGather(CallInst *CI, unsigned SimdWidth, unsigned PredOperandNum) { Value *OldPred = CI->getArgOperand(PredOperandNum); assert(OldPred->getType()->getScalarType()->isIntegerTy(1)); if (SimdWidth != VCINTR::VectorType::getNumElements( cast(OldPred->getType()))) { DiagnosticInfoSimdCF::emit( CI, "mismatching SIMD width of scatter/gather inside SIMD control flow"); return; } Instruction *NewPred = loadExecutionMask(CI, SimdWidth); if (auto C = dyn_cast(OldPred)) if (C->isAllOnesValue()) OldPred = nullptr; if (OldPred) { OriginalPred[CI] = OldPred; auto And = BinaryOperator::Create(Instruction::And, OldPred, NewPred, OldPred->getName() + ".and." + NewPred->getName(), CI); And->setDebugLoc(CI->getDebugLoc()); NewPred = And; } CI->setArgOperand(PredOperandNum, NewPred); AlreadyPredicated.insert(CI); } /*********************************************************************** * predicateWrRegion : create a predicated version of a wrregion * * Enter: WrR = the wrregion, whose value width must be equal to the * simd CF width * SimdWidth = simd cf width in force * * Return: the new predicated wrregion * * If the wrregion is already predicated, the new one has a predicated that * is an "and" of the original predicate and our EM. */ CallInst *CMSimdCFLower::predicateWrRegion(CallInst *WrR, unsigned SimdWidth) { // First gather the args of the original wrregion. SmallVector Args; for (unsigned i = 0, e = WrR->arg_size(); i != e; ++i) Args.push_back(WrR->getArgOperand(i)); // Modify the predicate in Args. Value *Pred = Args[GenXIntrinsic::GenXRegion::PredicateOperandNum]; if (auto C = dyn_cast(Pred)) if (C->isAllOnesValue()) Pred = nullptr; auto EM = loadExecutionMask(WrR, SimdWidth); if (!Pred) Pred = EM; else { OriginalPred[WrR] = Pred; auto And = BinaryOperator::Create(Instruction::And, EM, Pred, Pred->getName() + ".and." + EM->getName(), WrR); And->setDebugLoc(WrR->getDebugLoc()); Pred = And; } Args[GenXIntrinsic::GenXRegion::PredicateOperandNum] = Pred; return createWrRegion(Args, WrR->getName(), WrR); } /*********************************************************************** * predicateCall : predicate a real call to a subroutine */ void CMSimdCFLower::predicateCall(CallInst *CI, unsigned SimdWidth) { Function *F = CI->getCalledFunction(); // TODO: support width warnings for indirect calls, // now PSEntry for them is actually fake as F=nullptr for such cases auto PSEntry = &PredicatedSubroutines[F]; // Skip predicating recursive function if (CI->getFunction() == F) return; if (!*PSEntry) *PSEntry = SimdWidth; else if (*PSEntry != SimdWidth) DiagnosticInfoSimdCF::emit(CI, "mismatching SIMD width of called subroutine"); } /*********************************************************************** * lowerSimdCF : lower the simd control flow */ void CMSimdCFLower::lowerSimdCF() { IRBuilder<> Builder(F->getContext()); // First lower the simd branches. for (auto sbi = SimdBranches.begin(), sbe = SimdBranches.end(); sbi != sbe; ++sbi) { BasicBlock *BB = sbi->first; auto Br = cast(BB->getTerminator()); Builder.SetInsertPoint(Br); BasicBlock *UIP = Br->getSuccessor(0); BasicBlock *JIP = JIPs[BB]; LLVM_DEBUG(dbgs() << "lower branch at " << BB->getName() << ", UIP=" << UIP->getName() << ", JIP=" << JIP->getName() << "\n"); if (!Br->isConditional()) { // Unconditional branch. Turn it into a conditional branch on true, // adding a fallthrough on false. auto NewBr = Builder.CreateCondBr( Constant::getAllOnesValue(Type::getInt1Ty(BB->getContext())), UIP, BB->getNextNode()); eraseInstruction(Br); Br = NewBr; } Value *Cond = Br->getCondition(); Use *CondUse = getSimdConditionUse(Cond); const auto &DL = Br->getDebugLoc(); if (CondUse) Cond = *CondUse; else { // Branch is currently scalar. Splat to a vector condition. unsigned SimdWidth = PredicatedBlocks[BB]; if (auto C = dyn_cast(Cond)) Cond = Builder.CreateVectorSplat(SimdWidth, C); else { Cond = Br->getCondition(); Type *VecTy = VCINTR::getVectorType(Cond->getType(), 1); Value *Undef = UndefValue::get(VecTy); Type *I32Ty = Type::getInt32Ty(Cond->getContext()); auto Insert = Builder.CreateInsertElement(Undef, Cond, Constant::getNullValue(I32Ty), Cond->getName() + ".splat"); auto Splat = Builder.CreateShuffleVector( Insert, Undef, Constant::getNullValue(VCINTR::getVectorType(I32Ty, SimdWidth)), Insert->getName()); Cond = Splat; } } // Insert {NewEM,NewRM,BranchCond} = llvm.genx.simdcf.goto(OldEM,OldRM,~Cond) // TODO: rewrite everything below using IRBuilder unsigned SimdWidth = VCINTR::VectorType::getNumElements(cast(Cond->getType())); auto NotCond = BinaryOperator::Create(Instruction::Xor, Cond, Constant::getAllOnesValue(Cond->getType()), Cond->getName() + ".not", Br); Value *RMAddr = getRMAddr(UIP, SimdWidth); Instruction *OldEM = new LoadInst(EMVar->getValueType(), EMVar, EMVar->getName(), false /* isVolatile */, Br); OldEM->setDebugLoc(DL); auto OldRM = new LoadInst(cast(RMAddr)->getAllocatedType(), RMAddr, RMAddr->getName(), false /* isVolatile */, Br); OldRM->setDebugLoc(DL); Type *Tys[] = { OldEM->getType(), OldRM->getType() }; auto GotoFunc = GenXIntrinsic::getGenXDeclaration(BB->getParent()->getParent(), GenXIntrinsic::genx_simdcf_goto, Tys); Value *Args[] = { OldEM, OldRM, NotCond }; auto Goto = CallInst::Create(GotoFunc, Args, "goto", Br); Goto->setDebugLoc(DL); Goto->setConvergent(); Instruction *NewEM = ExtractValueInst::Create(Goto, 0, "goto.extractem", Br); (new StoreInst(NewEM, EMVar, false /* isVolatile */, Br))->setDebugLoc(DL); auto NewRM = ExtractValueInst::Create(Goto, 1, "goto.extractrm", Br); (new StoreInst(NewRM, RMAddr, false /* isVolatile */, Br))->setDebugLoc(DL); auto BranchCond = ExtractValueInst::Create(Goto, 2, "goto.extractcond", Br); // Change the branch condition. auto OldCond = dyn_cast(Br->getCondition()); Br->setCondition(BranchCond); // Change the branch target to JIP. Br->setSuccessor(0, JIP); // Erase the old llvm.genx.simdcf.any. if (OldCond && OldCond->use_empty()) eraseInstruction(OldCond); updateFnAttr(GotoFunc); } // Then lower the join points. for (auto jpi = JoinPoints.begin(), jpe = JoinPoints.end(); jpi != jpe; ++jpi) { BasicBlock *JP = jpi->first; unsigned SimdWidth = jpi->second; LLVM_DEBUG(dbgs() << "lower join point " << JP->getName() << "\n"); DebugLoc DL = JP->front().getDebugLoc(); Instruction *InsertBefore = JP->getFirstNonPHI(); // Insert {NewEM,BranchCond} = llvm.genx.simdcf.join(OldEM,RM) Value *RMAddr = getRMAddr(JP, SimdWidth); Instruction *OldEM = new LoadInst(EMVar->getValueType(), EMVar, EMVar->getName(), false /* isVolatile */, InsertBefore); OldEM->setDebugLoc(DL); auto RM = new LoadInst(cast(RMAddr)->getAllocatedType(), RMAddr, RMAddr->getName(), false /* isVolatile */, InsertBefore); RM->setDebugLoc(DL); Type *Tys[] = { OldEM->getType(), RM->getType() }; auto JoinFunc = GenXIntrinsic::getGenXDeclaration( JP->getParent()->getParent(), GenXIntrinsic::genx_simdcf_join, Tys); Value *Args[] = { OldEM, RM }; auto Join = CallInst::Create(JoinFunc, Args, "join", InsertBefore); Join->setDebugLoc(DL); Join->setConvergent(); auto NewEM = ExtractValueInst::Create(Join, 0, "join.extractem", InsertBefore); (new StoreInst(NewEM, EMVar, false /* isVolatile */, InsertBefore)) ->setDebugLoc(DL); auto BranchCond = ExtractValueInst::Create(Join, 1, "join.extractcond", InsertBefore); // Zero RM. (new StoreInst(Constant::getNullValue(RM->getType()), RMAddr, false /* isVolatile */, InsertBefore)) ->setDebugLoc(DL); BasicBlock *JIP = JIPs[JP]; if (JIP) { // This join point is in predicated code, so it was separated into its // own block. It needs to be turned into a conditional branch to JIP, // with the condition from llvm.genx.simdcf.join. auto Br = cast(JP->getTerminator()); assert(!Br->isConditional()); auto NewBr = BranchInst::Create(JIP, JP->getNextNode(), BranchCond, Br); assert(JoinToGoto.count(JP)); NewBr->setDebugLoc(DL); eraseInstruction(Br); auto *OrigBranch = cast(JoinToGoto.at(JP)->getTerminator()); if (OrigBranch->isConditional()) fixPHIInput(JIP, (OrigBranch->getSuccessor(0) == JP ? OrigBranch->getSuccessor(1) : OrigBranch->getSuccessor(0)), NewBr->getParent()); // Get the JIP's RM, just to ensure that it knows its SIMD width in case // nothing else references it. getRMAddr(JIP, VCINTR::VectorType::getNumElements( cast(RM->getType()))); } updateFnAttr(JoinFunc); } } /*********************************************************************** * lowerUnmaskOps : lower the simd unmask begins and ends */ void CMSimdCFLower::lowerUnmaskOps() { std::vector MaskBegins; std::vector MaskEnds; for (auto fi = F->begin(), fe = F->end(); fi != fe; ++fi) { BasicBlock *BB = &*fi; for (auto bi = BB->begin(), be = BB->end(); bi != be; ++bi) { Instruction *Inst = &*bi; // doing the work if (auto *CIE = dyn_cast(Inst)) { if (GenXIntrinsic::getGenXIntrinsicID(CIE) == GenXIntrinsic::genx_unmask_end) { auto LoadV = dyn_cast(CIE->getArgOperand(0)); assert(LoadV); auto PtrV = dyn_cast(LoadV->getPointerOperand()); assert(PtrV); StoreInst *StoreV = nullptr; // search uses of PtrV for (auto ui = PtrV->use_begin(), ue = PtrV->use_end(); ui != ue; ++ui) { if (auto SI = dyn_cast(ui->getUser())) { StoreV = SI; break; } } assert(StoreV); auto *CIB = cast(StoreV->getValueOperand()); assert(GenXIntrinsic::getGenXIntrinsicID(CIB) == GenXIntrinsic::genx_unmask_begin); MaskBegins.push_back(CIB); MaskEnds.push_back(CIE); // put in genx_simdcf_savemask and genx_simdcf_remask const auto &DL = CIB->getDebugLoc(); Instruction *OldEM = new LoadInst(EMVar->getValueType(), EMVar, EMVar->getName(), false /* isVolatile */, CIB); OldEM->setDebugLoc(DL); Type *Tys[] = {OldEM->getType()}; auto SavemaskFunc = GenXIntrinsic::getGenXDeclaration( BB->getParent()->getParent(), GenXIntrinsic::genx_simdcf_savemask, Tys); Value *Args[] = {OldEM}; auto Savemask = CallInst::Create(SavemaskFunc, Args, "savemask", CIB); Savemask->setDebugLoc(DL); // the use should be the store for savemask CIB->replaceAllUsesWith(Savemask); Type *Ty1s[] = {OldEM->getType()}; auto UnmaskFunc = GenXIntrinsic::getGenXDeclaration( BB->getParent()->getParent(), GenXIntrinsic::genx_simdcf_unmask, Ty1s); Value *Arg1s[] = {Savemask, ConstantInt::get(Savemask->getType(), 0xFFFFFFFF) }; auto Unmask = CallInst::Create(UnmaskFunc, Arg1s, "unmask", CIB); Unmask->setDebugLoc(DL); (new StoreInst(Unmask, EMVar, false /* isVolatile */, CIB)) ->setDebugLoc(DL); // put in genx_simdcf_remask const auto &DLCIE = CIE->getDebugLoc(); OldEM = new LoadInst(EMVar->getValueType(), EMVar, EMVar->getName(), false /* isVolatile */, CIE); OldEM->setDebugLoc(DLCIE); Type *Ty2s[] = {OldEM->getType()}; auto RemaskFunc = GenXIntrinsic::getGenXDeclaration( BB->getParent()->getParent(), GenXIntrinsic::genx_simdcf_remask, Ty2s); Value *Arg2s[] = {OldEM, LoadV}; auto Remask = CallInst::Create(RemaskFunc, Arg2s, "remask", CIE); Remask->setDebugLoc(DLCIE); (new StoreInst(Remask, EMVar, false /* isVolatile */, CIE)) ->setDebugLoc(DLCIE); updateFnAttr(SavemaskFunc); updateFnAttr(UnmaskFunc); updateFnAttr(RemaskFunc); } } } } // erase Mask Ends for (auto CIE : MaskEnds) { eraseInstruction(CIE); } // erase Mask Begins for (auto CIB : MaskBegins) { eraseInstruction(CIB); } } /*********************************************************************** * getSimdConditionUse : given a branch condition, if it is * llvm.genx.simdcf.any, get the vector condition */ Use *CMSimdCFLower::getSimdConditionUse(Value *Cond) { if (auto CI = isSimdCFAny(Cond)) return &CI->getOperandUse(0); return nullptr; } /*********************************************************************** * isSimdCFAny : given a value (or nullptr), see if it is a call to * llvm.genx.simdcf.any * * Return: the instruction (cast to CallInst) if it is such a call * else nullptr */ CallInst *CMSimdCFLower::isSimdCFAny(Value *V) { if (GenXIntrinsic::getGenXIntrinsicID(V) == GenXIntrinsic::genx_simdcf_any) return cast(V); return nullptr; } /*********************************************************************** * replicateMask : copy mask for provided number of channels using shufflevector */ Value *CMSimdCFLower::replicateMask(Value *EM, Instruction *InsertBefore, unsigned SimdWidth, unsigned NumChannels) { // No need to replicate the mask for one channel if (NumChannels == 1) return EM; SmallVector ChannelMask{SimdWidth * NumChannels}; for (unsigned i = 0; i < NumChannels; ++i) std::copy(ShuffleMask.begin(), ShuffleMask.begin() + SimdWidth, ChannelMask.begin() + SimdWidth * i); EM = new ShuffleVectorInst( EM, UndefValue::get(EM->getType()), ConstantVector::get(ChannelMask), Twine("ChannelEM") + Twine(SimdWidth), InsertBefore); return EM; } /*********************************************************************** * loadExecutionMask : create instruction to load EM */ Instruction *CMSimdCFLower::loadExecutionMask(Instruction *InsertBefore, unsigned SimdWidth) { Instruction *EM = new LoadInst(EMVar->getValueType(), EMVar, EMVar->getName(), false /* isVolatile */, InsertBefore); // If the simd width is not MAX_SIMD_CF_WIDTH, extract the part of EM we want. if (SimdWidth == MAX_SIMD_CF_WIDTH) return EM; ArrayRef Mask = ShuffleMask; EM = new ShuffleVectorInst(EM, UndefValue::get(EM->getType()), ConstantVector::get(Mask.take_front(SimdWidth)), Twine("EM") + Twine(SimdWidth), InsertBefore); EM->setDebugLoc(InsertBefore->getDebugLoc()); return EM; } /*********************************************************************** * getRMAddr : get address of resume mask variable for a particular join * point, creating the variable if necessary * * Enter: JP = the join point * SimdWidth = the simd width for the join point, used for creating * the RM variable. Can be 0 as long as the RM variable already * exists. */ Value *CMSimdCFLower::getRMAddr(BasicBlock *JP, unsigned SimdWidth) { LLVM_DEBUG(dbgs() << "getRMAddr(" << JP->getName() << ", " << SimdWidth << ")\n"); auto RMAddr = &RMAddrs[JP]; if (!*RMAddr) { assert(SimdWidth); // Create an RM variable for this join point. Insert an alloca at the start // of the function. Type *RMTy = VCINTR::getVectorType(Type::getInt1Ty(F->getContext()), SimdWidth); Instruction *InsertBefore = &F->front().front(); *RMAddr = new AllocaInst(RMTy, /*AddrSpace*/ 0, Twine("RM.") + JP->getName(), InsertBefore); // Initialize to all zeros. new StoreInst(Constant::getNullValue(RMTy), *RMAddr, false /* isVolatile */, InsertBefore); } assert(!SimdWidth || VCINTR::VectorType::getNumElements(cast( cast(*RMAddr)->getAllocatedType())) == SimdWidth); return *RMAddr; } /*********************************************************************** * DiagnosticInfoSimdCF::emit : emit an error or warning */ void DiagnosticInfoSimdCF::emit(Instruction *Inst, StringRef Msg, DiagnosticSeverity Severity) { DiagnosticInfoSimdCF Err(Severity, *Inst->getParent()->getParent(), Inst->getDebugLoc(), Msg); Inst->getContext().diagnose(Err); } vc-intrinsics-0.22.1/GenXIntrinsics/lib/GenXIntrinsics/GenXSingleElementVectorUtil.cpp000066400000000000000000001214611475147027500310360ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2020-2024 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ // This file defines common functions for rewriting single element vectors // in GenXSPIRV adaptors. #include "GenXSingleElementVectorUtil.h" #include "llvm/GenXIntrinsics/GenXIntrinsics.h" #include "llvm/GenXIntrinsics/GenXMetadata.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" #include "llvmVCWrapper/Analysis/InstructionSimplify.h" #include "llvmVCWrapper/IR/Attributes.h" #include "llvmVCWrapper/Support/Alignment.h" namespace llvm { namespace genx { /// This section contains some arbitrary constants // Default size for arguments of SEV-free version ShuffleVector instruction static unsigned constexpr ShuffleVectorSize = 2; /// This section contains general utils: /// * For safe iteration over functions and instructions /// * For vectors operations such as converting constant vector element from /// llvm::Value to int /// * For examining pointer types /// These utils are used across this module but they do not contain /// any design solutions for removing Single Element Vectors (SEVs) // Functions with SEVs are deleted from module // This util allows to continue iteration even after deletion std::vector SEVUtil::getFunctions() { auto Functions = std::vector{}; std::transform(M.begin(), M.end(), std::back_inserter(Functions), [](Function &F) { return &F; }); return Functions; } // Globals with SEVs are deleted from module // This util allows to continue iteration even after deletion std::vector SEVUtil::getGlobalVariables() { auto Globals = std::vector{}; std::transform(M.global_begin(), M.global_end(), std::back_inserter(Globals), [](GlobalVariable &GV) { return &GV; }); return Globals; } // Instructions with SEVs are deleted from module // This util allows to continue iteration even after deletion std::vector SEVUtil::getInstructions(Function &F) { auto Instructions = std::vector{}; for (auto &&BB : F) { std::transform(BB.begin(), BB.end(), std::back_inserter(Instructions), [](Instruction &I) { return &I; }); } return Instructions; } // Returns requested vector index as Value* // It is helpful for creating ExtractElementInst and InsertElementInst ConstantInt *SEVUtil::getVectorIndex(size_t idx) { auto *ITy = IntegerType::getIntNTy(M.getContext(), M.getDataLayout().getPointerSizeInBits(0)); return ConstantInt::get(ITy, idx, false); } // Returns underlying int from Value* int64_t SEVUtil::getConstantElement(ConstantInt *Const) { assert(!isa(Const)); return Const->getSExtValue(); } // For type U***** returns number of stars and type U in the second argument size_t SEVUtil::getPointerNesting(Type *Ty, Type **ReturnNested) { auto NPtrs = size_t{0}; auto *NestedTy = Ty; if (!VCINTR::Type::isOpaquePointerTy(Ty)) { while (isa(NestedTy)) { NestedTy = VCINTR::Type::getNonOpaquePtrEltTy(NestedTy); ++NPtrs; } } if (ReturnNested) *ReturnNested = NestedTy; return NPtrs; } // For type **** returns total number of stars and type U in the // second argument size_t SEVUtil::getPointerVectorNesting(Type *Ty, Type **ReturnNested) { Type *NestedTy = nullptr; auto Outer = getPointerNesting(Ty, &NestedTy); auto VTy = dyn_cast(NestedTy); if (!VTy) { if (ReturnNested) *ReturnNested = NestedTy; return Outer; } auto Inner = getPointerNesting(VTy->getElementType(), &NestedTy); if (ReturnNested) *ReturnNested = NestedTy; return Outer + Inner; } // For type **** returns number of stars inside vector size_t SEVUtil::getInnerPointerVectorNesting(Type *Ty) { auto Total = getPointerVectorNesting(Ty); auto Outer = getPointerNesting(Ty); assert(Total >= Outer); return Total - Outer; } /// This section contains core utils for Single Element Vectors: /// * Convertion of types from SEV-rich to SEV-free and vice versa /// * Detecting types which contain SEVs /// * Creating intermediate instructions for conversion of SEV-rich and SEV-free /// values /// * Finalizing replacement of SEV-rich or SEV-free instruction with its /// antipod // Returns SEV-free analogue of Type Ty accordingly to the following scheme: // <1 x U>**...* ---> U**...* Type *SEVUtil::getTypeFreeFromSEV(Type *Ty) { if (VCINTR::Type::isOpaquePointerTy(Ty)) return Ty; // Pointer types should be "undressed" first if (auto *Ptr = dyn_cast(Ty)) { auto UTy = getTypeFreeFromSEV(VCINTR::Type::getNonOpaquePtrEltTy(Ptr)); if (UTy == VCINTR::Type::getNonOpaquePtrEltTy(Ptr)) return Ptr; return PointerType::get(UTy, Ptr->getAddressSpace()); } else if (auto *VecTy = dyn_cast(Ty)) { if (VCINTR::VectorType::getNumElements(VecTy) == 1) return VecTy->getElementType(); } else if (auto *StructTy = dyn_cast(Ty)) { // If there is a key for this struct type is in SEV-Free to SEV-Rich map it // means that the type is already SEV-Free if (SEVRichStructMap.find(StructTy) != SEVRichStructMap.end()) return Ty; if (SEVFreeStructTypes.find(StructTy) != SEVFreeStructTypes.end()) return Ty; auto It = SEVFreeStructMap.find(StructTy); if (It != SEVFreeStructMap.end()) return It->second; // To handle circle dependencies we create opaque struct type and add it to // the map. If this struct or any nested one contains a pointer to the type // we are rewriting it will be automatically changed to this incomplete type // and traversing will stop StructType *NewStructTy = StructType::create(Ty->getContext()); It = SEVFreeStructMap.insert(std::make_pair(StructTy, NewStructTy)).first; bool HasSEV = false; std::vector NewElements; for (auto *ElemTy : StructTy->elements()) { Type *NewElemTy = getTypeFreeFromSEV(ElemTy); NewElements.push_back(NewElemTy); if (!HasSEV && NewElemTy != ElemTy) { // If new type is not equal to the old one it doesn't always mean that // there is a SEV element in the struct. It could be also temporary // unfininished (opaque) struct type or a pointer to it auto *TempTy = NewElemTy; while (auto *Ptr = dyn_cast(TempTy)) TempTy = VCINTR::Type::getNonOpaquePtrEltTy(Ptr); if (auto *NestedStructTy = dyn_cast(TempTy)) HasSEV = !NestedStructTy->isOpaque(); else HasSEV = true; } } if (HasSEV) { NewStructTy->setBody(NewElements); SEVRichStructMap.insert(std::make_pair(NewStructTy, StructTy)); return NewStructTy; } SEVFreeStructMap.erase(It); SEVFreeStructTypes.insert(StructTy); } return Ty; } // Returns SEV-rich analogue of Type Ty accordingly to the following scheme: // U*...**...* ---> <1 x U*...*>*...* Type *SEVUtil::getTypeWithSEV(Type *Ty, size_t InnerPointers) { if (auto *VecTy = dyn_cast(Ty)) { (void)VecTy; assert(InnerPointers == 0); assert(VCINTR::VectorType::getNumElements(VecTy) == 1 && "Cannot put vector type inside another vector!"); return Ty; } else if (auto *StructTy = dyn_cast(Ty)) { auto It = SEVRichStructMap.find(StructTy); if (It == SEVRichStructMap.end()) llvm_unreachable("Unexpected SEV StructType"); return It->second; } auto NPtrs = getPointerNesting(Ty); assert(InnerPointers <= NPtrs); if (InnerPointers == NPtrs) return VCINTR::getVectorType(Ty, 1); auto *Ptr = cast(Ty); auto *UTy = getTypeWithSEV(VCINTR::Type::getNonOpaquePtrEltTy(Ptr), InnerPointers); return PointerType::get(UTy, Ptr->getAddressSpace()); } // Returns true if Ty is SEV or it is a pointer to SEV bool SEVUtil::hasSEV(Type *Ty) { return Ty != getTypeFreeFromSEV(Ty); } // Returns true if Instruction type or type of any of its arguments has SEV bool SEVUtil::hasSEV(Instruction *I) { if (hasSEV(I->getType())) return true; if (auto *AI = dyn_cast(I)) if (hasSEV(AI->getAllocatedType())) return true; if (auto *GEPI = dyn_cast(I)) if (hasSEV(GEPI->getSourceElementType())) return true; return std::find_if(I->op_begin(), I->op_end(), [this](Use &Op) { return this->hasSEV(Op.get()->getType()); }) != I->op_end(); } // Returns true if return value or any of arguments have SEV bool SEVUtil::doesSignatureHaveSEV(Function &F) { if (hasSEV(F.getReturnType())) return true; return std::find_if(F.arg_begin(), F.arg_end(), [this](Argument &Arg) { return this->hasSEV(Arg.getType()); }) != F.arg_end(); } // This util accepts SEV-rich Value and returns new, SEV-free one // For pointer types it returns BitCastInst // For constant vector it returns element of Vector // For non-constant vectors it ExtractElementInst Value *SEVUtil::createVectorToScalarValue(Value *Vector, Instruction *InsertBefore, size_t idx) { assert(hasSEV(Vector->getType())); Instruction *Val = nullptr; if (isa(Vector)) return UndefValue::get(getTypeFreeFromSEV(Vector->getType())); else if (isa(Vector->getType())) Val = new BitCastInst(Vector, getTypeFreeFromSEV(Vector->getType()), "sev.cast.", InsertBefore); else if (auto *Const = dyn_cast(Vector)) return Const->getAggregateElement(idx); else { Val = ExtractElementInst::Create(Vector, getVectorIndex(idx), "sev.cast.", InsertBefore); } if (auto *InVector = dyn_cast(Vector)) Val->setDebugLoc(InVector->getDebugLoc()); return Val; } // This util accepts SEV-rich Value and returns new, SEV-free one // For pointer types it returns BitCastInst // For constant vector it returns element of Vector // For non-constant vectors it returns ExtractElementInst Value *SEVUtil::createVectorToScalarValue(Value *Vector, BasicBlock *BB, size_t idx) { assert(hasSEV(Vector->getType())); Instruction *Val = nullptr; if (isa(Vector)) return UndefValue::get(getTypeFreeFromSEV(Vector->getType())); else if (isa(Vector->getType())) Val = new BitCastInst(Vector, getTypeFreeFromSEV(Vector->getType()), "sev.cast.", BB); else if (auto *Const = dyn_cast(Vector)) return Const->getAggregateElement(idx); else { Val = ExtractElementInst::Create(Vector, getVectorIndex(idx), "sev.cast.", BB); } if (auto *InVector = dyn_cast(Vector)) Val->setDebugLoc(InVector->getDebugLoc()); return Val; } // This util accepts Scalar Value and returns new SEV-rich Value // For pointer types it returns BitCastInst // For constant elements it returns constant vector // For non-constant vectors it returns InsertElementInst Value *SEVUtil::createScalarToVectorValue(Value *Scalar, Type *RefTy, Instruction *InsertBefore) { if (isa(Scalar)) return UndefValue::get(RefTy); else if (isa(Scalar->getType()) && isa(RefTy)) { auto Inner = getInnerPointerVectorNesting(RefTy); return new BitCastInst(Scalar, getTypeWithSEV(Scalar->getType(), Inner), "sev.cast.", InsertBefore); } else if (auto *Const = dyn_cast(Scalar)) return ConstantInt::getSigned(RefTy, getConstantElement(Const)); else { return InsertElementInst::Create(UndefValue::get(RefTy), Scalar, getVectorIndex(0), "sev.cast.", InsertBefore); } } // Returns Old Value if it is already SEV-free // Creates SEV-free value otherwise Value *SEVUtil::getValueFreeFromSEV(Value *OldV, Instruction *InsertBefore) { if (!hasSEV(OldV->getType())) return OldV; return createVectorToScalarValue(OldV, InsertBefore); } // Returns Old Value if it is already SEV free // Creates SEV-free value otherwise Value *SEVUtil::getValueFreeFromSEV(Value *OldV, BasicBlock *BB) { if (!hasSEV(OldV->getType())) return OldV; return createVectorToScalarValue(OldV, BB); } // Returns Old Value if it is already SEV-rich // Creates SEV-rich value otherwise Value *SEVUtil::getValueWithSEV(Value *OldV, Type *RefTy, Instruction *InsertBefore) { if (hasSEV(OldV->getType())) { assert(RefTy == OldV->getType()); return OldV; } return createScalarToVectorValue(OldV, RefTy, InsertBefore); } // Returns SEV-free type of new instruction in the first parameter // Returns SEV-free analogues of old instruction parameteres in the second // parameter std::pair SEVUtil::getOperandsFreeFromSEV(Instruction &OldInst) { auto Values = ValueCont{}; auto *NewRetTy = getTypeFreeFromSEV(OldInst.getType()); for (auto I = size_t{0}; I < OldInst.getNumOperands(); ++I) { auto *Op = OldInst.getOperand(I); auto *NewOp = getValueFreeFromSEV(Op, &OldInst); Values.push_back(NewOp); } return {NewRetTy, std::move(Values)}; } // This util accepts SEV value and inserts its only element to the new // empty vector of size 2 // Returns this new vector as a result // For undef vectors it returns new undefs directly without any insertions // // Because this function may cause regressions, // it is used only in specific case of shufflevector instruction Value *SEVUtil::getTwoElementVectorFromOneElement(Value *V, Instruction *InsertBefore) { auto *VTy = cast(V->getType()); auto *NewVTy = VCINTR::getVectorType(VTy->getElementType(), ShuffleVectorSize); if (isa(V)) return UndefValue::get(NewVTy); auto *Extract = createVectorToScalarValue(V, InsertBefore); auto *Insert = createScalarToVectorValue(Extract, NewVTy, InsertBefore); return Insert; } // This function finalizes replacement of old instruction with the new one // After all arguments of OldInst were converted to SEV-rich/free form // this util moves all properties of OldInst to NewInst and inserts // a convertion instruction if type of OldInst is not the same as of NewInst void SEVUtil::replaceAllUsesWith(Instruction *OldInst, Instruction *NewInst) { NewInst->takeName(OldInst); NewInst->copyMetadata(*OldInst); NewInst->copyIRFlags(OldInst); auto *ReplaceInst = cast(NewInst); if (!hasSEV(NewInst->getType()) && hasSEV(OldInst->getType())) ReplaceInst = createScalarToVectorValue(NewInst, OldInst->getType(), OldInst); else if (hasSEV(NewInst->getType()) && !hasSEV(OldInst->getType())) ReplaceInst = createVectorToScalarValue(NewInst, OldInst); OldInst->replaceAllUsesWith(ReplaceInst); OldInst->eraseFromParent(); } /// This section contains utilities for rewriting function signatures: /// * Generating SEV-rich/free version of given function signature /// * Fixing return instruction so that theirs type match new signature /// * Replacing old function arguments with new ones /// * Replacing call instructions of old function /// * For convertions from SEV-free to SEV-rich dertemines if specific argument /// should be SEV, or it was a scalar originally // This is a final step of replacing an old function // After new function was generated, this util finds all uses of old function // and replaces them with use of new one. void SEVUtil::replaceAllUsesWith(Function &OldF, Function &NewF) { assert(OldF.getFunctionType() != NewF.getFunctionType()); auto Users = SmallVector{}; std::transform(OldF.user_begin(), OldF.user_end(), std::back_inserter(Users), [](User *U) { return U; }); auto IsScalarToVector = doesSignatureHaveSEV(NewF); assert(IsScalarToVector == !doesSignatureHaveSEV(OldF)); for (auto *U : Users) { auto *OldInst = cast(U); assert(OldInst); auto NewParams = SmallVector{}; for (auto &&ArgPair : llvm::zip(OldF.args(), NewF.args())) { auto &&OldArg = std::get<0>(ArgPair); auto &&NewArg = std::get<1>(ArgPair); auto ArgNo = OldArg.getArgNo(); auto *Op = OldInst->getOperand(ArgNo); auto *Conv = Op; if (!IsScalarToVector) Conv = getValueFreeFromSEV(Op, OldInst); else { if (OldArg.getType() != NewArg.getType()) Conv = getValueWithSEV(Op, NewArg.getType(), OldInst); } NewParams.push_back(Conv); } auto *NewCall = CallInst::Create(&NewF, NewParams, "", OldInst); NewCall->setCallingConv(OldInst->getCallingConv()); NewCall->setTailCallKind(OldInst->getTailCallKind()); NewCall->copyIRFlags(OldInst); NewCall->copyMetadata(*OldInst); NewCall->setAttributes(OldInst->getAttributes()); replaceAllUsesWith(OldInst, NewCall); } } // After new function was generated, it still uses arguments from old function // inside its body. This util moves all properties of old argument to the new // one and insert convert instructions if needed. // After that it replaces old argument with the new one void SEVUtil::replaceAllUsesWith(Argument &OldArg, Argument &NewArg, Function &NewF) { NewArg.takeName(&OldArg); auto *OldTy = OldArg.getType(); auto *NewTy = NewArg.getType(); if (OldTy == NewTy) { OldArg.replaceAllUsesWith(&NewArg); return; } Value *Conv = nullptr; auto &&InsPt = NewF.getEntryBlock().front(); if (hasSEV(OldTy)) { assert(!hasSEV(NewTy)); Conv = createScalarToVectorValue(&NewArg, OldTy, &InsPt); } else { assert(hasSEV(NewTy)); assert(!hasSEV(OldTy)); Conv = createVectorToScalarValue(&NewArg, &InsPt); } OldArg.replaceAllUsesWith(Conv); } // After new function was generated, its return instructions might not match the // signature. This util inserts convert instructions for returns if needed void SEVUtil::rewriteSEVReturns(Function &NewF) { auto &&Context = NewF.getContext(); auto Instructions = getInstructions(NewF); auto *NewRetTy = NewF.getReturnType(); bool IsVectorReturn = hasSEV(NewRetTy); for (auto *Inst : Instructions) { auto *RetInst = dyn_cast(Inst); if (!RetInst) continue; auto *RetV = RetInst->getReturnValue(); Value *Conv = nullptr; if (IsVectorReturn) { assert(!hasSEV(RetV->getType())); Conv = createScalarToVectorValue(RetV, NewRetTy, RetInst); } else { assert(hasSEV(RetV->getType())); Conv = createVectorToScalarValue(RetV, RetInst); } auto *NewRet = ReturnInst::Create(Context, Conv, RetInst); NewRet->takeName(RetInst); RetInst->eraseFromParent(); } } // For conversion in SEV-rich to SEV-free direction // this function adds VCSingleElementVector attribute to argument or function // if theirs types were modified. // It is needed for telling reader part that they should be converted back // For conversion in SEV-free to SEV-rich direction // this function removes VCSingleElementVector attributes void SEVUtil::manageSEVAttribute(Function &NewF, Type *OldTy, Type *NewTy, size_t AttrNo) { if (hasSEV(OldTy)) { assert(!hasSEV(NewTy)); auto InnerPtrs = std::to_string(getInnerPointerVectorNesting(OldTy)); auto Attr = Attribute::get(NewF.getContext(), VCModuleMD::VCSingleElementVector, InnerPtrs); VCINTR::Function::addAttributeAtIndex(NewF, AttrNo, Attr); } else if (hasSEV(NewTy)) { assert(!hasSEV(OldTy)); VCINTR::Function::removeAttributeAtIndex(NewF, AttrNo, VCModuleMD::VCSingleElementVector); } } void SEVUtil::manageSEVAttributes(Function &OldF, Function &NewF) { for (Function::arg_iterator ArgIt = NewF.arg_begin(), E = NewF.arg_end(); ArgIt != E; ++ArgIt) { auto ArgNo = ArgIt->getArgNo(); auto *OldTy = VCINTR::Function::getArg(OldF, ArgNo)->getType(); auto *NewTy = ArgIt->getType(); manageSEVAttribute(NewF, OldTy, NewTy, ArgNo + 1); } manageSEVAttribute(NewF, OldF.getReturnType(), NewF.getReturnType(), AttributeList::ReturnIndex); } // For conversion in SEV-free to SEV-rich direction // this function determines whether return value or argument of function // should be converted to single element vector // If true it returns type to convert to. Otherwise it returns currently // presented type in Function. Type *SEVUtil::getOriginalType(Function &F, size_t AttrNo) { using namespace llvm::GenXIntrinsic; auto *FuncTy = F.getFunctionType(); auto *Ty = AttrNo == 0 ? FuncTy->getReturnType() : FuncTy->getParamType(AttrNo - 1); auto Attrs = F.getAttributes(); if (!VCINTR::AttributeList::hasAttributeAtIndex( Attrs, AttrNo, VCModuleMD::VCSingleElementVector)) return Ty; NeedCollapse = true; auto InnerPtrsStr = VCINTR::AttributeList::getAttributeAtIndex( Attrs, AttrNo, VCModuleMD::VCSingleElementVector) .getValueAsString(); auto InnerPtrs = InnerPtrsStr.empty() ? 0 : std::stoull(InnerPtrsStr.str()); return getTypeWithSEV(Ty, InnerPtrs); } // Returns function with SEV-rich or SEV-free signature depending on // IsScalarToVector parameter // If signature did not change it returns the same function // This is the first step of rewriting a function Function &SEVUtil::getSEVSignature(Function &F, bool IsScalarToVector) { auto NewParams = SmallVector{}; for (Function::arg_iterator ArgIt = F.arg_begin(), E = F.arg_end(); ArgIt != E; ++ArgIt) { auto ArgNo = ArgIt->getArgNo(); Type *NewTy = nullptr; if (!IsScalarToVector) NewTy = getTypeFreeFromSEV(ArgIt->getType()); else NewTy = getOriginalType(F, size_t(ArgNo) + 1); NewParams.push_back(NewTy); } Type *NewRetTy = nullptr; if (!IsScalarToVector) NewRetTy = getTypeFreeFromSEV(F.getReturnType()); else NewRetTy = getOriginalType(F, AttributeList::ReturnIndex); auto *NewFuncTy = FunctionType::get(NewRetTy, NewParams, F.isVarArg()); if (NewFuncTy == F.getFunctionType()) return F; auto &&NewF = *Function::Create(NewFuncTy, F.getLinkage(), F.getAddressSpace()); assert(doesSignatureHaveSEV(F) || doesSignatureHaveSEV(NewF)); return NewF; } // Completely rewrites function in the entire module to its SEV-rich or SEV-free // analogue depending on IsScalarToVector parameter // This is a main util in this section void SEVUtil::rewriteSEVSignature(Function &F, bool IsScalarToVector) { auto &&NewF = getSEVSignature(F, IsScalarToVector); if (&NewF == &F) return; NewF.copyAttributesFrom(&F); NewF.takeName(&F); NewF.copyMetadata(&F, 0); #if VC_INTR_LLVM_VERSION_MAJOR >= 18 NewF.updateAfterNameChange(); #else // VC_INTR_LLVM_VERSION_MAJOR >= 18 NewF.recalculateIntrinsicID(); #endif // VC_INTR_LLVM_VERSION_MAJOR >= 18 F.getParent()->getFunctionList().insert(F.getIterator(), &NewF); #if VC_INTR_LLVM_VERSION_MAJOR > 15 NewF.splice(NewF.begin(), &F); #else NewF.getBasicBlockList().splice(NewF.begin(), F.getBasicBlockList()); #endif manageSEVAttributes(F, NewF); if (NewF.size() > 0) { for (auto &&ArgPair : llvm::zip(F.args(), NewF.args())) replaceAllUsesWith(std::get<0>(ArgPair), std::get<1>(ArgPair), NewF); if (NewF.getReturnType() != F.getReturnType()) rewriteSEVReturns(NewF); } replaceAllUsesWith(F, NewF); F.eraseFromParent(); } /// This section contains class for rewriting different types of /// instructions in SEV-rich to SEV-free direction. /// Each instruction is rewritten to its SEV-free analogue and /// guarded with convert instructions for its arguments and uses /// Convert instructions (BitCastInst, InsertElementInst, ExtractElementInst) /// are not covered in this section. Instead they are managed with collapsing /// utils in the next section void SEVUtil::visit(Function &F) { auto Instructions = getInstructions(F); for (auto *OldInst : Instructions) { if (!hasSEV(OldInst)) continue; auto *NewInst = visit(*OldInst); if (NewInst) replaceAllUsesWith(OldInst, NewInst); } } Instruction *SEVUtil::visitStoreInst(StoreInst &OldInst) { Type *NewTy = nullptr; auto NewVals = ValueCont{}; std::tie(NewTy, NewVals) = getOperandsFreeFromSEV(OldInst); return new llvm::StoreInst(NewVals[0], NewVals[1], OldInst.isVolatile(), VCINTR::Align::getAlign(&OldInst), OldInst.getOrdering(), OldInst.getSyncScopeID(), &OldInst); } Instruction *SEVUtil::visitBinaryOperator(BinaryOperator &OldInst) { Type *NewTy = nullptr; auto NewVals = ValueCont{}; std::tie(NewTy, NewVals) = getOperandsFreeFromSEV(OldInst); return BinaryOperator::Create(OldInst.getOpcode(), NewVals[0], NewVals[1], "", &OldInst); } Instruction *SEVUtil::visitCmpInst(CmpInst &OldInst) { Type *NewTy = nullptr; auto NewVals = ValueCont{}; std::tie(NewTy, NewVals) = getOperandsFreeFromSEV(OldInst); return CmpInst::Create(OldInst.getOpcode(), OldInst.getPredicate(), NewVals[0], NewVals[1], "", &OldInst); } Instruction *SEVUtil::visitShuffleVectorInst(ShuffleVectorInst &OldInst) { auto Mask = SmallVector{}; // Ensures copy OldInst.getShuffleMask(Mask); auto *Op0 = OldInst.getOperand(0); auto *Op1 = OldInst.getOperand(1); auto *Op0Ty = cast(Op0->getType()); auto *Op1Ty = cast(Op1->getType()); auto &&Context = OldInst.getContext(); auto *Int32Ty = IntegerType::getInt32Ty(Context); if (Mask.size() == 1) { Value *VectorOp = nullptr; Value *Idx = nullptr; auto IsUndef = Mask[0] == VCINTR::ShuffleVectorInst::UndefMaskElem; if (IsUndef) VectorOp = UndefValue::get( VCINTR::getVectorType(Op0Ty->getElementType(), ShuffleVectorSize)); else { auto IsUsedFirstOperand = static_cast(Mask[0]) < VCINTR::VectorType::getNumElements(Op0Ty); VectorOp = IsUsedFirstOperand ? Op0 : Op1; } if (IsUndef) Idx = UndefValue::get(Int32Ty); else Idx = ConstantInt::get(Int32Ty, Mask[0]); return ExtractElementInst::Create(VectorOp, Idx, "", &OldInst); } auto *NewOp0 = Op0; auto *NewOp1 = Op1; if (hasSEV(Op0Ty)) { NewOp0 = getTwoElementVectorFromOneElement(Op0, &OldInst); std::transform(Mask.begin(), Mask.end(), Mask.begin(), [](int El) { if (El > 0 && El != VCINTR::ShuffleVectorInst::UndefMaskElem) return El + 1; return El; }); } if (hasSEV(Op1Ty)) NewOp1 = getTwoElementVectorFromOneElement(Op1, &OldInst); return new ShuffleVectorInst( NewOp0, NewOp1, VCINTR::ShuffleVectorInst::getShuffleMask(Mask, Context), "", &OldInst); } Instruction *SEVUtil::visitSelectInst(SelectInst &OldInst) { Type *NewTy = nullptr; auto NewVals = ValueCont{}; std::tie(NewTy, NewVals) = getOperandsFreeFromSEV(OldInst); return SelectInst::Create(NewVals[0], NewVals[1], NewVals[2], "", &OldInst, &OldInst); } Instruction *SEVUtil::visitPHINode(PHINode &OldInst) { auto NewTy = getTypeFreeFromSEV(OldInst.getType()); auto Phi = PHINode::Create(NewTy, OldInst.getNumIncomingValues(), "", &OldInst); for (auto I = size_t{0}; I < OldInst.getNumIncomingValues(); ++I) { auto *V = OldInst.getIncomingValue(I); auto *BB = OldInst.getIncomingBlock(I); auto *NewV = getValueFreeFromSEV(V, BB); Phi->addIncoming(NewV, BB); } return Phi; } Instruction *SEVUtil::visitAllocaInst(AllocaInst &OldInst) { auto *NewTy = getTypeFreeFromSEV(OldInst.getAllocatedType()); return new llvm::AllocaInst(NewTy, OldInst.getType()->getAddressSpace(), OldInst.getArraySize(), VCINTR::Align::getAlign(&OldInst), "", &OldInst); } Instruction *SEVUtil::visitCastInst(CastInst &OldInst) { Type *NewTy = nullptr; auto NewVals = ValueCont{}; std::tie(NewTy, NewVals) = getOperandsFreeFromSEV(OldInst); return CastInst::Create(OldInst.getOpcode(), NewVals[0], NewTy, "", &OldInst); } Instruction *SEVUtil::visitLoadInst(LoadInst &OldInst) { Type *NewTy = nullptr; auto NewVals = ValueCont{}; std::tie(NewTy, NewVals) = getOperandsFreeFromSEV(OldInst); return new llvm::LoadInst(NewTy, NewVals[0], "", OldInst.isVolatile(), VCINTR::Align::getAlign(&OldInst), OldInst.getOrdering(), OldInst.getSyncScopeID(), &OldInst); } Instruction *SEVUtil::visitUnaryOperator(UnaryOperator &OldInst) { Type *NewTy = nullptr; auto NewVals = ValueCont{}; std::tie(NewTy, NewVals) = getOperandsFreeFromSEV(OldInst); return UnaryOperator::Create(OldInst.getOpcode(), NewVals[0], "", &OldInst); } Instruction *SEVUtil::visitVAArgInst(VAArgInst &OldInst) { Type *NewTy = nullptr; auto NewVals = ValueCont{}; std::tie(NewTy, NewVals) = getOperandsFreeFromSEV(OldInst); return new VAArgInst(NewVals[0], NewTy, "", &OldInst); } Instruction *SEVUtil::visitExtractValueInst(ExtractValueInst &OldInst) { Type *NewTy = nullptr; auto NewVals = ValueCont{}; std::tie(NewTy, NewVals) = getOperandsFreeFromSEV(OldInst); return ExtractValueInst::Create(NewVals[0], OldInst.getIndices(), "", &OldInst); } Instruction *SEVUtil::visitGetElementPtrInst(GetElementPtrInst &OldInst) { Type *NewTy = nullptr; auto NewVals = ValueCont{}; std::tie(NewTy, NewVals) = getOperandsFreeFromSEV(OldInst); std::vector IdxList; std::transform(NewVals.begin() + 1, NewVals.end(), std::back_inserter(IdxList), [](Value *V) { return V; }); auto *PointeeTy = getTypeFreeFromSEV(OldInst.getSourceElementType()); return GetElementPtrInst::Create(PointeeTy, NewVals[0], IdxList, "", &OldInst); } Instruction *SEVUtil::visitExtractElementInst(ExtractElementInst &OldInst) { // No processing required // Extracts and Inserts will be collapsed later return nullptr; } Instruction *SEVUtil::visitInsertElementInst(InsertElementInst &OldInst) { // No processing required // Extracts and Inserts will be collapsed later return nullptr; } Instruction *SEVUtil::visitInstruction(Instruction &I) { // For CallInst this is a bug, because // Calls have been processed in rewriteSEVUses function // For ReturnInst this is a bug, because // Returns have been processed in rewriteSEVReturn function // For other cases this assert is due to "visit" method is not implemented assert(false && "Oops... Cannot rewrite instruction!"); return nullptr; } /// This section contains utils for rewriting global variables // For conversion in SEV-rich to SEV-free direction // this function adds VCSingleElementVector attribute to global var void SEVUtil::manageSEVAttribute(GlobalVariable &GV, Type *OldTy, Type *NewTy) { if (hasSEV(OldTy)) { assert(!hasSEV(NewTy)); auto InnerPtrs = std::to_string(getInnerPointerVectorNesting(OldTy)); GV.addAttribute(VCModuleMD::VCSingleElementVector, InnerPtrs); } } GlobalVariable &SEVUtil::createAndTakeFrom(GlobalVariable &GV, Type *NewTy, Constant *Initializer) { auto *NewGV = new GlobalVariable( *GV.getParent(), NewTy, GV.isConstant(), GV.getLinkage(), Initializer, "sev.global.", &GV, GV.getThreadLocalMode(), GV.getAddressSpace(), GV.isExternallyInitialized()); auto DebugInfoVec = SmallVector{}; GV.getDebugInfo(DebugInfoVec); NewGV->takeName(&GV); NewGV->setAttributes(GV.getAttributes()); NewGV->copyMetadata(&GV, 0); NewGV->setComdat(GV.getComdat()); NewGV->setAlignment(VCINTR::Align::getAlign(&GV)); for (auto *DebugInf : DebugInfoVec) NewGV->addDebugInfo(DebugInf); return *NewGV; } void SEVUtil::rewriteGlobalVariable(GlobalVariable &GV) { auto *Ty = GV.getValueType(); auto *NewTy = getTypeFreeFromSEV(Ty); if (NewTy == Ty) return; Constant *Initializer = nullptr; if (GV.hasInitializer()) Initializer = cast(createVectorToScalarValue( GV.getInitializer(), static_cast(nullptr))); auto &&NewGV = createAndTakeFrom(GV, NewTy, Initializer); if (VCINTR::Type::isOpaquePointerTy(GV.getType())) { GV.replaceAllUsesWith(&NewGV); } else { while (GV.use_begin() != GV.use_end()) { auto &&Use = GV.use_begin(); auto *Inst = cast(Use->getUser()); auto *V = createScalarToVectorValue(&NewGV, GV.getType(), Inst); *Use = V; } } manageSEVAttribute(NewGV, Ty, NewTy); GV.eraseFromParent(); } void SEVUtil::restoreGlobalVariable(GlobalVariable &GV) { auto *Ty = GV.getValueType(); if (!GV.hasAttribute(VCModuleMD::VCSingleElementVector)) return; NeedCollapse = true; auto InnerPtrsStr = GV.getAttribute(VCModuleMD::VCSingleElementVector).getValueAsString(); auto InnerPtrs = InnerPtrsStr.empty() ? 0 : std::stoull(InnerPtrsStr.str()); auto *NewTy = getTypeWithSEV(Ty, InnerPtrs); if (NewTy == Ty) return; Constant *Initializer = nullptr; if (GV.hasInitializer()) Initializer = cast(createScalarToVectorValue( GV.getInitializer(), NewTy, static_cast(nullptr))); auto &&NewGV = createAndTakeFrom(GV, NewTy, Initializer); if (VCINTR::Type::isOpaquePointerTy(GV.getType())) { GV.replaceAllUsesWith(&NewGV); } else { while (GV.use_begin() != GV.use_end()) { auto &&Use = GV.use_begin(); auto *Inst = cast(Use->getUser()); auto *V = createVectorToScalarValue(&NewGV, Inst); *Use = V; } } manageSEVAttribute(NewGV, Ty, NewTy); GV.eraseFromParent(); } void SEVUtil::rewriteGlobalVariables(bool IsScalarToVector) { auto &&Globals = getGlobalVariables(); for (auto *GV : Globals) { if (IsScalarToVector) restoreGlobalVariable(*GV); else rewriteGlobalVariable(*GV); } } /// This section contains utils for collapsing pairs of convertion instructions /// After rewriting all insructions in the module there are lots of pairs /// Extract-insert and bitcast-bitcast conversions left /// These utilities eliminate such pairs void SEVUtil::collapseBitcastInst(BitCastInst *BitCast, bool CollapseCannotFail) { if (BitCast->user_empty()) { BitCast->eraseFromParent(); return; } auto &&Q = SimplifyQuery(M.getDataLayout()); auto *ReplaceWith = VCINTR::SimplifyCastInst( BitCast->getOpcode(), BitCast->getOperand(0), BitCast->getType(), Q); if (!CollapseCannotFail && !ReplaceWith) return; assert(ReplaceWith && "Oops... Cannot collapse BitCast instruction!"); BitCast->replaceAllUsesWith(ReplaceWith); BitCast->eraseFromParent(); } // After rewriting instructions from SEV-rich/free form to SEV-free/rich one // There are lots of auxiliary pairs of bitcasts left, like these: // %b = bitcast %a to // %d = bitcast %b to // %e = some_user_of_T %d // // This util collapses such pairs of bitcasts in two iterations: // First iteration will remove %d // Second iteration will remove %b void SEVUtil::collapseBitcastInstructions(Function &F, bool CollapseCannotFail) { for (size_t i = 0; i < 2; i++) { auto Instructions = getInstructions(F); for (auto *I : Instructions) { if (auto *BitCast = dyn_cast(I)) { auto HasSEV = hasSEV(BitCast->getOperand(0)->getType()) || hasSEV(BitCast->getType()); collapseBitcastInst(BitCast, i && CollapseCannotFail && HasSEV); } } } } void SEVUtil::collapseExtractInst(ExtractElementInst *Extract, bool CollapseCannotFail) { if (Extract->user_empty()) { Extract->eraseFromParent(); return; } auto &&Q = SimplifyQuery(M.getDataLayout()); auto *ReplaceWith = VCINTR::SimplifyExtractElementInst( Extract->getOperand(0), Extract->getOperand(1), Q); if (!CollapseCannotFail && !ReplaceWith) return; assert(ReplaceWith && "Oops... Cannot collapse ExtractElement instruction"); Extract->replaceAllUsesWith(ReplaceWith); Extract->eraseFromParent(); } void SEVUtil::collapseInsertInst(InsertElementInst *Insert, bool CollapseCannotFail) { if (Insert->user_empty()) { Insert->eraseFromParent(); return; } auto &&Q = SimplifyQuery(M.getDataLayout()); auto *ReplaceWith = VCINTR::SimplifyInsertElementInst( Insert->getOperand(0), Insert->getOperand(1), Insert->getOperand(2), Q); // SimplifyInsertElementInst provides too simple analysis // which does not work in some cases handled below: if (!ReplaceWith && hasSEV(Insert->getType())) { auto *Scal = Insert->getOperand(1); auto *VecTy = cast(Insert->getType()); if (auto *Extract = dyn_cast(Scal)) { if (hasSEV(Extract->getOperand(0)->getType())) ReplaceWith = Extract->getOperand(0); } else if (isa(Scal)) ReplaceWith = UndefValue::get(VecTy); else if (auto *Const = dyn_cast(Scal)) ReplaceWith = ConstantInt::get(VecTy, getConstantElement(Const)); } if (!CollapseCannotFail && !ReplaceWith) return; assert(ReplaceWith && "Oops... Cannot collapse InsertElement instruction"); Insert->replaceAllUsesWith(ReplaceWith); Insert->eraseFromParent(); } // After rewriting instructions from SEV-free form to SEV-rich one // There are lots of auxiliary pairs of insert-extract // instructions left, like these: // 1. // %v = insertelement <1 x Ty> %u, %element, %zero_idx // %s = extractlement <1 x Ty> %v, %zero_idx // %e = some_user_of_T %s // 2. // %s = extractlement %ConstantVector, %ConstantIdx // %e = some_user_of_T %s // This utility removes excessive ExtractElement instructions // // After rewriting instructions from SEV-rich form to SEV-free one // collapseInsertInstructions utility leaves lots of ExtractElement instructions // with no users. This utility removes them as well void SEVUtil::collapseExtractInstructions(Function &F, bool CollapseCannotFail) { auto Instructions = getInstructions(F); for (auto *I : Instructions) { if (auto *Extract = dyn_cast(I)) { auto OpTy = I->getOperand(0)->getType(); collapseExtractInst(Extract, CollapseCannotFail && hasSEV(OpTy)); } } } // After rewriting instructions from SEV-rich form to SEV-free one // There are lots of auxiliary pairs of extract-insert // instructions left, like these: // 1. // %s = extractlement <1 x Ty> %v, %idx // %g = insertelement <1 x Ty> %u, %s, %idx // %e = some_user_of_1T <1 x Ty> %g // 2. // %g = insertelement <1 x Ty> %v, %ConstantElement, %idx // %e = some_user_of_T %g // This utility removes excessive InsertElement instructions // // After rewriting instructions from SEV-free form to SEV-rich one // collapseExtractInstructions utility leaves lots of InsertElement instructions // with no users. This utility removes them as well void SEVUtil::collapseInsertInstructions(Function &F, bool CollapseCannotFail) { auto Instructions = getInstructions(F); for (auto *I : Instructions) if (auto *Insert = dyn_cast(I)) collapseInsertInst(Insert, CollapseCannotFail && hasSEV(I->getType())); } /// This section contains upper-level functions /// for calling in GenXSPIRV adaptors /// They either remove or restore Single Element Vectors in the module void SEVUtil::rewriteSEVs() { rewriteGlobalVariables(/*IsScalarToVector=*/false); auto Functions = getFunctions(); for (auto *F : Functions) rewriteSEVSignature(*F, /*IsScalarToVector=*/false); // Functions container should be refreshed after signatures rewriting Functions = getFunctions(); visit(M); for (auto *F : Functions) { collapseExtractInstructions(*F); collapseInsertInstructions(*F); } for (auto *F : Functions) collapseBitcastInstructions(*F); } void SEVUtil::restoreSEVs() { rewriteGlobalVariables(/*IsScalarToVector=*/true); auto Functions = getFunctions(); for (auto *F : Functions) rewriteSEVSignature(*F, /*IsScalarToVector=*/true); // Functions container should be refreshed after signatures rewriting Functions = getFunctions(); if (!NeedCollapse) return; for (auto *F : Functions) { // When converting in SEV-free to SEV-rich direction // Collapsing of instructions may fail, because only call instructions were // rewritten. All other instructions were left intact. collapseInsertInstructions(*F, /*CollapseCannotFail=*/false); collapseExtractInstructions(*F, /*CollapseCannotFail=*/false); } for (auto *F : Functions) collapseBitcastInstructions(*F, /*CollapseCannotFail=*/false); } } // namespace genx } // namespace llvm vc-intrinsics-0.22.1/GenXIntrinsics/lib/GenXIntrinsics/GenXSingleElementVectorUtil.h000066400000000000000000000116251475147027500305030ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2020-2024 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ // This file declares class for rewriting single element vectors // in GenXSPIRV adaptors. #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Module.h" #include "llvmVCWrapper/IR/DerivedTypes.h" #include "llvmVCWrapper/IR/Function.h" #include "llvmVCWrapper/IR/Instructions.h" #include "llvmVCWrapper/IR/Type.h" #include #include namespace llvm { namespace genx { class SEVUtil : public InstVisitor { private: using ValueCont = SmallVector; using InstVisitor::visit; Module &M; bool NeedCollapse = false; std::unordered_map SEVFreeStructMap; std::unordered_map SEVRichStructMap; std::unordered_set SEVFreeStructTypes; std::vector getFunctions(); std::vector getGlobalVariables(); std::vector getInstructions(Function &F); ConstantInt *getVectorIndex(size_t idx); int64_t getConstantElement(ConstantInt *Const); size_t getPointerNesting(Type *Ty, Type **ReturnNested = nullptr); size_t getPointerVectorNesting(Type *Ty, Type **ReturnNested = nullptr); size_t getInnerPointerVectorNesting(Type *Ty); Type *getTypeFreeFromSEV(Type *Ty); Type *getTypeWithSEV(Type *Ty, size_t InnerPointers = 0); bool hasSEV(Type *Ty); bool hasSEV(Instruction *I); bool doesSignatureHaveSEV(Function &F); Value *createVectorToScalarValue(Value *Vector, Instruction *InsertBefore, size_t idx = 0); Value *createVectorToScalarValue(Value *Vector, BasicBlock *BB, size_t idx = 0); Value *createScalarToVectorValue(Value *Scalar, Type *RefTy, Instruction *InsertBefore); Value *getValueFreeFromSEV(Value *OldV, Instruction *InsertBefore); Value *getValueFreeFromSEV(Value *OldV, BasicBlock *BB); Value *getValueWithSEV(Value *OldV, Type *RefTy, Instruction *InsertBefore); std::pair getOperandsFreeFromSEV(Instruction &OldInst); Value *getTwoElementVectorFromOneElement(Value *V, Instruction *InsertBefore); void replaceAllUsesWith(Instruction *OldInst, Instruction *NewInst); void replaceAllUsesWith(Function &OldF, Function &NewF); void replaceAllUsesWith(Argument &OldArg, Argument &NewArg, Function &NewF); void rewriteSEVReturns(Function &NewF); void manageSEVAttribute(Function &NewF, Type *OldTy, Type *NewTy, size_t AttrNo); void manageSEVAttributes(Function &OldF, Function &NewF); Type *getOriginalType(Function &F, size_t AttrNo); Function &getSEVSignature(Function &F, bool IsScalarToVector); void rewriteSEVSignature(Function &F, bool IsScalarToVector); void visit(Function &F); Instruction *visitStoreInst(StoreInst &OldInst); Instruction *visitBinaryOperator(BinaryOperator &OldInst); Instruction *visitCmpInst(CmpInst &OldInst); Instruction *visitShuffleVectorInst(ShuffleVectorInst &OldInst); Instruction *visitSelectInst(SelectInst &OldInst); Instruction *visitPHINode(PHINode &OldInst); Instruction *visitAllocaInst(AllocaInst &OldInst); Instruction *visitCastInst(CastInst &OldInst); Instruction *visitLoadInst(LoadInst &OldInst); Instruction *visitUnaryOperator(UnaryOperator &OldInst); Instruction *visitVAArgInst(VAArgInst &OldInst); Instruction *visitExtractValueInst(ExtractValueInst &OldInst); Instruction *visitGetElementPtrInst(GetElementPtrInst &OldInst); Instruction *visitExtractElementInst(ExtractElementInst &OldInst); Instruction *visitInsertElementInst(InsertElementInst &OldInst); Instruction *visitInstruction(Instruction &I); void manageSEVAttribute(GlobalVariable &GV, Type *OldTy, Type *NewTy); GlobalVariable &createAndTakeFrom(GlobalVariable &GV, Type *NewTy, Constant *Initializer); void rewriteGlobalVariable(GlobalVariable &GV); void restoreGlobalVariable(GlobalVariable &GV); void rewriteGlobalVariables(bool IsScalarToVector = false); void collapseBitcastInst(BitCastInst *BitCast, bool CollapseCannotFail); void collapseBitcastInstructions(Function &F, bool CollapseCannotFail = true); void collapseExtractInst(ExtractElementInst *Extract, bool CollapseCannotFail); void collapseInsertInst(InsertElementInst *Insert, bool CollapseCannotFail); void collapseExtractInstructions(Function &F, bool CollapseCannotFail = true); void collapseInsertInstructions(Function &F, bool CollapseCannotFail = true); public: SEVUtil(Module &InM) : M{InM} {} void rewriteSEVs(); void restoreSEVs(); friend class InstVisitor; }; } // namespace genx } // namespace llvm vc-intrinsics-0.22.1/GenXIntrinsics/lib/GenXIntrinsics/GenXVersion.cpp000066400000000000000000000012431475147027500257020ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2020-2021 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ // This source file defines interface functions to retrive version info. #include "llvm/GenXIntrinsics/GenXVersion.h" #include "llvm/GenXIntrinsics/GenXVersion.inc" std::string llvm::GenXIntrinsic::getVCIntrinsicsRevision() { #ifdef VCI_REVISION return VCI_REVISION; #else return ""; #endif } std::string llvm::GenXIntrinsic::getVCIntrinsicsRepository() { #ifdef VCI_REPOSITORY return VCI_REPOSITORY; #else return ""; #endif } vc-intrinsics-0.22.1/GenXIntrinsics/test/000077500000000000000000000000001475147027500202715ustar00rootroot00000000000000vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/000077500000000000000000000000001475147027500220465ustar00rootroot00000000000000vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/addr_conv_attribute_writer.ll000066400000000000000000000015131475147027500300150ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2021-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test @llvm.genx.address.convert intrinsic generation with proper attributes ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVWriterAdaptor -S < %s | FileCheck %s define void @test(i32 %buf) { ; CHECK-LABEL: @test( ; CHECK: %intel.buffer_rw_t addrspace(1)* ; CHECK: [[BUF:%[^,]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint %intel.buffer_rw_t addrspace(1)* [[BUF]] to i32 ; CHECK-NEXT: ret void ; entry: ret void } !genx.kernels = !{!0} !0 = !{void (i32)* @test, !"test", !1, i32 0, i32 0, !2, !3, i32 0, i32 0} !1 = !{i32 2} !2 = !{i32 0} !3 = !{!"buffer_t"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/annot_mess_writer.ll000066400000000000000000000031511475147027500261410ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test messy annnotations translation in writer. First valid ; annotation should be matched. ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVWriterAdaptor -S < %s | FileCheck %s define void @test(i32 %im2d, i32 %samp, i64 %ptr, i32 %gen) { ; CHECK-LABEL: @test( ; CHECK: %opencl.image2d_ro_t addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[IM2D:%[^,]+]], ; CHECK: %opencl.sampler_t addrspace(2)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[SAMP:%[^,]+]], ; CHECK: i8 addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[PTR:%[^,]+]], ; CHECK: i32 ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[GEN:%[^)]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint %opencl.image2d_ro_t addrspace(1)* [[IM2D]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint %opencl.sampler_t addrspace(2)* [[SAMP]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint i8 addrspace(1)* [[PTR:%.*]] to i64 ; CHECK-NEXT: ret void ; entry: ret void } ; CHECK-NOT: !genx.kernels !genx.kernels = !{!0} !0 = !{void (i32, i32, i64, i32)* @test, !"test", !1, i32 0, i32 0, !2, !3, i32 0, i32 0} !1 = !{i32 2, i32 1, i32 0, i32 0} !2 = !{i32 0, i32 0, i32 0, i32 0} !3 = !{!"image2d_t buffer_t read_only read_write", !"sampler_t read_only", !"svmptr_t write_only", !"write_only"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/annotated_args_mixed_reader.ll000066400000000000000000000061761475147027500301120ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test that reader can cope with mixed mode when some ; arguments use address convert and some do not. ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVReaderAdaptor -S < %s | FileCheck %s %intel.buffer_rw_t = type opaque %opencl.image1d_rw_t = type opaque %opencl.image1d_buffer_rw_t = type opaque %opencl.image2d_rw_t = type opaque %opencl.image3d_rw_t = type opaque %opencl.sampler_t = type opaque define spir_kernel void @test(%intel.buffer_rw_t addrspace(1)* %buf, %opencl.image1d_rw_t addrspace(1)* %im1d, %opencl.image1d_buffer_rw_t addrspace(1)* %im1db, %opencl.image2d_rw_t addrspace(1)* %im2d, %opencl.image3d_rw_t addrspace(1)* %im3d, %opencl.sampler_t addrspace(2)* %samp, i8 addrspace(1)* %ptr, <4 x i32> %gen) #0 { ; CHECK-LABEL: @test( ; CHECK: i32 ; CHECK: [[BUF:%[^,]+]], ; CHECK: %opencl.image1d_rw_t addrspace(1)* ; CHECK: [[IM1D:%[^,]+]], ; CHECK: i32 ; CHECK: [[IM1DB:%[^,]+]], ; CHECK: %opencl.image2d_rw_t addrspace(1)* ; CHECK: [[IM2D:%[^,]+]], ; CHECK: i32 ; CHECK: [[IM3D:%[^,]+]], ; CHECK: %opencl.sampler_t addrspace(2)* ; CHECK: [[SAMP:%[^,]+]], ; CHECK: i64 ; CHECK: [[PTR:%[^,]+]], ; CHECK: <4 x i32> ; CHECK: [[GEN:%[^)]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; entry: %0 = call i32 @llvm.genx.address.convert.i32.p1intel.buffer_rw_t(%intel.buffer_rw_t addrspace(1)* %buf) %1 = call i32 @llvm.genx.address.convert.i32.p1opencl.image1d_buffer_rw_t(%opencl.image1d_buffer_rw_t addrspace(1)* %im1db) %2 = call i32 @llvm.genx.address.convert.i32.p1opencl.image3d_rw_t(%opencl.image3d_rw_t addrspace(1)* %im3d) %3 = call i64 @llvm.genx.address.convert.i64.p1i8(i8 addrspace(1)* %ptr) ret void } declare i32 @llvm.genx.address.convert.i32.p1intel.buffer_rw_t(%intel.buffer_rw_t addrspace(1)*) declare i32 @llvm.genx.address.convert.i32.p1opencl.image1d_rw_t(%opencl.image1d_rw_t addrspace(1)*) declare i32 @llvm.genx.address.convert.i32.p1opencl.image1d_buffer_rw_t(%opencl.image1d_buffer_rw_t addrspace(1)*) declare i32 @llvm.genx.address.convert.i32.p1opencl.image2d_rw_t(%opencl.image2d_rw_t addrspace(1)*) declare i32 @llvm.genx.address.convert.i32.p1opencl.image3d_rw_t(%opencl.image3d_rw_t addrspace(1)*) declare i32 @llvm.genx.address.convert.i32.p2opencl.sampler_t(%opencl.sampler_t addrspace(2)*) declare i64 @llvm.genx.address.convert.i64.p1i8(i8 addrspace(1)*) attributes #0 = { "VCFunction" } ; CHECK: !genx.kernels = !{[[KERNEL:![0-9]+]]} ; CHECK: [[KERNEL]] = !{void (i32, %opencl.image1d_rw_t addrspace(1)*, i32, %opencl.image2d_rw_t addrspace(1)*, i32, %opencl.sampler_t addrspace(2)*, i64, <4 x i32>)* @test, !"test", ![[KINDS:[0-9]+]], i32 0, i32 0, !{{[0-9]+}}, ![[DESCS:[0-9]+]], i32 0} ; CHECK-DAG: ![[KINDS]] = !{i32 2, i32 2, i32 2, i32 2, i32 2, i32 1, i32 0, i32 0} ; CHECK-DAG: ![[DESCS]] = !{!"buffer_t read_write", !"image1d_t read_write", !"image1d_buffer_t read_write", !"image2d_t read_write", !"image3d_t read_write", !"sampler_t", !"svmptr_t", !""} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/annotated_args_no_conv_reader.ll000066400000000000000000000045121475147027500304350ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test that reader correctly restores metadata and does ; not change other things if there is no address conversion ; but correct SPIRV types in signature. ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVReaderAdaptor -S < %s | FileCheck %s %intel.buffer_rw_t = type opaque %opencl.image1d_rw_t = type opaque %opencl.image1d_buffer_rw_t = type opaque %opencl.image2d_rw_t = type opaque %opencl.image3d_rw_t = type opaque %opencl.sampler_t = type opaque define spir_kernel void @test(%intel.buffer_rw_t addrspace(1)* %buf, %opencl.image1d_rw_t addrspace(1)* %im1d, %opencl.image1d_buffer_rw_t addrspace(1)* %im1db, %opencl.image2d_rw_t addrspace(1)* %im2d, %opencl.image3d_rw_t addrspace(1)* %im3d, %opencl.sampler_t addrspace(2)* %samp, i8 addrspace(2)* %ptr, <4 x i32> %gen) #0 { ; CHECK-LABEL: @test( ; CHECK: %intel.buffer_rw_t addrspace(1)* ; CHECK: [[BUF:%[^,]+]], ; CHECK: %opencl.image1d_rw_t addrspace(1)* ; CHECK: [[IM1D:%[^,]+]], ; CHECK: %opencl.image1d_buffer_rw_t addrspace(1)* ; CHECK: [[IM1DB:%[^,]+]], ; CHECK: %opencl.image2d_rw_t addrspace(1)* ; CHECK: [[IM2D:%[^,]+]], ; CHECK: %opencl.image3d_rw_t addrspace(1)* ; CHECK: [[IM3D:%[^,]+]], ; CHECK: %opencl.sampler_t addrspace(2)* ; CHECK: [[SAMP:%[^,]+]], ; CHECK: i8 addrspace(2)* ; CHECK: [[PTR:%[^,]+]], ; CHECK: <4 x i32> ; CHECK: [[GEN:%[^)]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; entry: ret void } attributes #0 = { "VCFunction" } ; CHECK: !genx.kernels = !{[[KERNEL:![0-9]+]]} ; CHECK: [[KERNEL]] = !{void (%intel.buffer_rw_t addrspace(1)*, %opencl.image1d_rw_t addrspace(1)*, %opencl.image1d_buffer_rw_t addrspace(1)*, %opencl.image2d_rw_t addrspace(1)*, %opencl.image3d_rw_t addrspace(1)*, %opencl.sampler_t addrspace(2)*, i8 addrspace(2)*, <4 x i32>)* @test, !"test", ![[KINDS:[0-9]+]], i32 0, i32 0, !{{[0-9]+}}, ![[DESCS:[0-9]+]], i32 0} ; CHECK-DAG: ![[KINDS]] = !{i32 2, i32 2, i32 2, i32 2, i32 2, i32 1, i32 0, i32 0} ; CHECK-DAG: ![[DESCS]] = !{!"buffer_t read_write", !"image1d_t read_write", !"image1d_buffer_t read_write", !"image2d_t read_write", !"image3d_t read_write", !"sampler_t", !"svmptr_t", !""} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/annotated_args_no_conv_writer.ll000066400000000000000000000051721475147027500305120ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test that writer does not changes signature if correct ; types are already used. Just drop all annotations. ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVWriterAdaptor -S < %s | FileCheck %s %intel.buffer_rw_t = type opaque %opencl.image1d_rw_t = type opaque %opencl.image1d_buffer_rw_t = type opaque %opencl.image2d_rw_t = type opaque %opencl.image3d_rw_t = type opaque %opencl.sampler_t = type opaque define void @test(%intel.buffer_rw_t addrspace(1)* %buf, %opencl.image1d_rw_t addrspace(1)* %im1d, %opencl.image1d_buffer_rw_t addrspace(1)* %im1db, %opencl.image2d_rw_t addrspace(1)* %im2d, %opencl.image3d_rw_t addrspace(1)* %im3d, %opencl.sampler_t addrspace(2)* %samp, i8 addrspace(1)* %ptr, <4 x i32> %gen) { ; CHECK-LABEL: @test( ; CHECK: %intel.buffer_rw_t addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[BUF:%[^,]+]], ; CHECK: %opencl.image1d_rw_t addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[IM1D:%[^,]+]], ; CHECK: %opencl.image1d_buffer_rw_t addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[IM1DB:%[^,]+]], ; CHECK: %opencl.image2d_rw_t addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[IM2D:%[^,]+]], ; CHECK: %opencl.image3d_rw_t addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[IM3D:%[^,]+]], ; CHECK: %opencl.sampler_t addrspace(2)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[SAMP:%[^,]+]], ; CHECK: i8 addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[PTR:%[^,]+]], ; CHECK: <4 x i32> ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[GEN:%[^)]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; entry: ret void } ; CHECK-NOT: !genx.kernels !genx.kernels = !{!0} !0 = !{void (%intel.buffer_rw_t addrspace(1)*, %opencl.image1d_rw_t addrspace(1)*, %opencl.image1d_buffer_rw_t addrspace(1)*, %opencl.image2d_rw_t addrspace(1)*, %opencl.image3d_rw_t addrspace(1)*, %opencl.sampler_t addrspace(2)*, i8 addrspace(1)*, <4 x i32>)* @test, !"test", !1, i32 0, i32 0, !2, !3, i32 0, i32 0} !1 = !{i32 2, i32 2, i32 2, i32 2, i32 2, i32 1, i32 0, i32 0} !2 = !{i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0} !3 = !{!"buffer_t", !"image1d_t", !"image1d_buffer_t", !"image2d_t", !"image3d_t", !"sampler_t", !"svmptr_t", !""} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/annotated_args_reader.ll000066400000000000000000000100301475147027500267040ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test kernel argument translation from new style with opaque types ; that SPIRV translator can understand to old style with ; metadata. Here annotations for OCL runtime are used. ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVReaderAdaptor -S < %s | FileCheck %s %intel.buffer_rw_t = type opaque %opencl.image1d_rw_t = type opaque %opencl.image1d_buffer_rw_t = type opaque %opencl.image2d_rw_t = type opaque %opencl.image3d_rw_t = type opaque %opencl.sampler_t = type opaque @0 = private unnamed_addr constant [15 x i8] c"some attribute\00", section "llvm.metadata" @llvm.global.annotations = appending global [1 x { i8*, i8*, i8*, i32 }] [{ i8*, i8*, i8*, i32 } { i8* bitcast (void (%intel.buffer_rw_t addrspace(1)*, %opencl.image1d_rw_t addrspace(1)*, %opencl.image1d_buffer_rw_t addrspace(1)*, %opencl.image2d_rw_t addrspace(1)*, %opencl.image3d_rw_t addrspace(1)*, %opencl.sampler_t addrspace(2)*, i8 addrspace(1)*, <4 x i32>)* @test to i8*), i8* getelementptr inbounds ([15 x i8], [15 x i8]* @0, i32 0, i32 0), i8* undef, i32 undef }], section "llvm.metadata" ; CHECK-LABEL: @llvm.global.annotations ; CHECK void (i32, i32, i32, i32, i32, i32, i64, <4 x i32>)* @test define spir_kernel void @test(%intel.buffer_rw_t addrspace(1)* %buf, %opencl.image1d_rw_t addrspace(1)* %im1d, %opencl.image1d_buffer_rw_t addrspace(1)* %im1db, %opencl.image2d_rw_t addrspace(1)* %im2d, %opencl.image3d_rw_t addrspace(1)* %im3d, %opencl.sampler_t addrspace(2)* %samp, i8 addrspace(1)* %ptr, <4 x i32> %gen) #0 { ; CHECK-LABEL: @test( ; CHECK: i32 ; CHECK: [[BUF:%[^,]+]], ; CHECK: i32 ; CHECK: [[IM1D:%[^,]+]], ; CHECK: i32 ; CHECK: [[IM1DB:%[^,]+]], ; CHECK: i32 ; CHECK: [[IM2D:%[^,]+]], ; CHECK: i32 ; CHECK: [[IM3D:%[^,]+]], ; CHECK: i32 ; CHECK: [[SAMP:%[^,]+]], ; CHECK: i64 ; CHECK: [[PTR:%[^,]+]], ; CHECK: <4 x i32> ; CHECK: [[GEN:%[^)]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; entry: %0 = call i32 @llvm.genx.address.convert.i32.p1intel.buffer_rw_t(%intel.buffer_rw_t addrspace(1)* %buf) %1 = call i32 @llvm.genx.address.convert.i32.p1opencl.image1d_rw_t(%opencl.image1d_rw_t addrspace(1)* %im1d) %2 = call i32 @llvm.genx.address.convert.i32.p1opencl.image1d_buffer_rw_t(%opencl.image1d_buffer_rw_t addrspace(1)* %im1db) %3 = call i32 @llvm.genx.address.convert.i32.p1opencl.image2d_rw_t(%opencl.image2d_rw_t addrspace(1)* %im2d) %4 = call i32 @llvm.genx.address.convert.i32.p1opencl.image3d_rw_t(%opencl.image3d_rw_t addrspace(1)* %im3d) %5 = call i32 @llvm.genx.address.convert.i32.p2opencl.sampler_t(%opencl.sampler_t addrspace(2)* %samp) %6 = call i64 @llvm.genx.address.convert.i64.p1i8(i8 addrspace(1)* %ptr) ret void } declare i32 @llvm.genx.address.convert.i32.p1intel.buffer_rw_t(%intel.buffer_rw_t addrspace(1)*) declare i32 @llvm.genx.address.convert.i32.p1opencl.image1d_rw_t(%opencl.image1d_rw_t addrspace(1)*) declare i32 @llvm.genx.address.convert.i32.p1opencl.image1d_buffer_rw_t(%opencl.image1d_buffer_rw_t addrspace(1)*) declare i32 @llvm.genx.address.convert.i32.p1opencl.image2d_rw_t(%opencl.image2d_rw_t addrspace(1)*) declare i32 @llvm.genx.address.convert.i32.p1opencl.image3d_rw_t(%opencl.image3d_rw_t addrspace(1)*) declare i32 @llvm.genx.address.convert.i32.p2opencl.sampler_t(%opencl.sampler_t addrspace(2)*) declare i64 @llvm.genx.address.convert.i64.p1i8(i8 addrspace(1)*) attributes #0 = { "VCFunction" } ; CHECK: !genx.kernels = !{[[KERNEL:![0-9]+]]} ; CHECK: [[KERNEL]] = !{void (i32, i32, i32, i32, i32, i32, i64, <4 x i32>)* @test, !"test", ![[KINDS:[0-9]+]], i32 0, i32 0, !{{[0-9]+}}, ![[DESCS:[0-9]+]], i32 0} ; CHECK-DAG: ![[KINDS]] = !{i32 2, i32 2, i32 2, i32 2, i32 2, i32 1, i32 0, i32 0} ; CHECK-DAG: ![[DESCS]] = !{!"buffer_t read_write", !"image1d_t read_write", !"image1d_buffer_t read_write", !"image2d_t read_write", !"image3d_t read_write", !"sampler_t", !"svmptr_t", !""} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/annotated_args_writer.ll000066400000000000000000000055011475147027500267650ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test kernel arguments translation from old style with metadata to ; new style with opaque types that SPIRV translator can ; understand. Here annotations for OCL runtime are used. ; UNSUPPORTED: opaque-pointers ; XFAIL: llvm13, llvm14 ; RUN: opt %pass%GenXSPIRVWriterAdaptor -S < %s | FileCheck %s ; RUN: opt %pass%GenXSPIRVWriterAdaptor %pass%GenXSPIRVWriterAdaptor -S < %s | FileCheck %s define void @test(i32 %buf, i32 %im1d, i32 %im1db, i32 %im2d, i32 %im3d, i32 %samp, i64 %ptr, <4 x i32> %gen) { ; CHECK-LABEL: @test( ; CHECK: %intel.buffer_rw_t addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[BUF:%[^,]+]], ; CHECK: %opencl.image1d_rw_t addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[IM1D:%[^,]+]], ; CHECK: %opencl.image1d_buffer_rw_t addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[IM1DB:%[^,]+]], ; CHECK: %opencl.image2d_rw_t addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[IM2D:%[^,]+]], ; CHECK: %opencl.image3d_rw_t addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[IM3D:%[^,]+]], ; CHECK: %opencl.sampler_t addrspace(2)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[SAMP:%[^,]+]], ; CHECK: i8 addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[PTR:%[^,]+]], ; CHECK: <4 x i32> ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[GEN:%[^)]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint %intel.buffer_rw_t addrspace(1)* [[BUF]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint %opencl.image1d_rw_t addrspace(1)* [[IM1D]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint %opencl.image1d_buffer_rw_t addrspace(1)* [[IM1DB]] to i32 ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint %opencl.image2d_rw_t addrspace(1)* [[IM2D]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint %opencl.image3d_rw_t addrspace(1)* [[IM3D]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint %opencl.sampler_t addrspace(2)* [[SAMP]] to i32 ; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64 ; CHECK-NEXT: ret void ; entry: ret void } ; CHECK-NOT: !genx.kernels !genx.kernels = !{!0} !0 = !{void (i32, i32, i32, i32, i32, i32, i64, <4 x i32>)* @test, !"test", !1, i32 0, i32 0, !2, !3, i32 0, i32 0} !1 = !{i32 2, i32 2, i32 2, i32 2, i32 2, i32 1, i32 0, i32 0} !2 = !{i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0} !3 = !{!"buffer_t", !"image1d_t", !"image1d_buffer_t", !"image2d_t", !"image3d_t", !"sampler_t", !"svmptr_t", !""} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/args_attributes_transform_reader.ll000066400000000000000000000025051475147027500312200ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2021-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test that adaptor correctly handles parameter attributes with types. ; UNSUPPORTED: llvm8, opaque-pointers ; RUN: opt %pass%GenXSPIRVReaderAdaptor -S < %s | FileCheck %s ; CHECK: @test ; CHECK-SAME: (%foo addrspace(1)* byval(%foo) %arg) %foo = type { i32 } declare void @bar(%foo addrspace(1)*) define spir_kernel void @test(i8 addrspace(1)* byval(i8) "VCArgumentIOKind"="0" %arg) #0 { %1 = call %foo addrspace(1)* @llvm.genx.address.convert.p1foo.p1i8(i8 addrspace(1)* %arg) call void @bar(%foo addrspace(1)* %1) ret void } ; CHECK: @testx ; CHECK-SAME: (%foo addrspace(1)* byval(%foo) %arg) define spir_kernel void @testx(%foo addrspace(1)* byval(%foo) "VCArgumentIOKind"="0" %arg) #0 { %1 = call i8 addrspace(1)* @llvm.genx.address.convert.p1i8.p1foo(%foo addrspace(1)* %arg) %2 = bitcast i8 addrspace(1)* %1 to %foo addrspace(1)* call void @bar(%foo addrspace(1)* %2) ret void } declare %foo addrspace(1)* @llvm.genx.address.convert.p1foo.p1i8(i8 addrspace(1)*) declare i8 addrspace(1)* @llvm.genx.address.convert.p1i8.p1foo(%foo addrspace(1)*) attributes #0 = { "VCFunction" } vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/args_attributes_transform_writer.ll000066400000000000000000000013621475147027500312720ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2021-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test that adaptor correctly handles parameter attributes with types. ; UNSUPPORTED: llvm8, opaque-pointers ; RUN: opt %pass%GenXSPIRVWriterAdaptor -S < %s | FileCheck %s ; CHECK: @test ; CHECK-SAME: %foo addrspace(1)* byval(%foo) ; CHECK-SAME: %arg %foo = type { i32 } define spir_kernel void @test(%foo addrspace(1)* byval(%foo) %arg) { ret void } !genx.kernels = !{!0} !0 = !{void (%foo addrspace(1)*)* @test, !"test", !1, i32 0, i32 0, !2, !3, i32 0} !1 = !{i32 0} !2 = !{i32 0} !3 = !{!"svmptr_t"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/combined_args_reader.ll000066400000000000000000000050041475147027500265140ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test combined reader translation: kernel has both native SPIRV types ; and impicit arguments. Implicit arguments would not show in normal ; flow, though they appear in old cmc. ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVReaderAdaptor -S < %s | FileCheck %s %opencl.image2d_ro_t = type opaque %opencl.image2d_wo_t = type opaque define spir_kernel void @test(%opencl.image2d_ro_t addrspace(1)* %in, %opencl.image2d_wo_t addrspace(1)* %out, <3 x i32> "VCArgumentKind"="24" %__arg_llvm.genx.local.id) #0 { ; CHECK-LABEL: @test( ; CHECK: i32 ; CHECK: [[IN:%[^,]+]], ; CHECK: i32 ; CHECK: [[OUT:%[^,]+]], ; CHECK: <3 x i32> ; CHECK: [[LOCAL_ID:%[^)]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x i32> [[LOCAL_ID]], i32 0 ; CHECK-NEXT: [[CALL1_I_I_I:%.*]] = tail call <8 x i32> @llvm.genx.media.ld.v8i32(i32 0, i32 [[IN]], i32 0, i32 32, i32 [[TMP0]], i32 0) ; CHECK-NEXT: tail call void @llvm.genx.media.st.v8i32(i32 0, i32 [[OUT]], i32 0, i32 32, i32 [[TMP0]], i32 0, <8 x i32> [[CALL1_I_I_I]]) ; CHECK-NEXT: ret void ; entry: %0 = call i32 @llvm.genx.address.convert.i32.p1opencl.image2d_ro_t(%opencl.image2d_ro_t addrspace(1)* %in) %1 = call i32 @llvm.genx.address.convert.i32.p1opencl.image2d_wo_t(%opencl.image2d_wo_t addrspace(1)* %out) %2 = extractelement <3 x i32> %__arg_llvm.genx.local.id, i32 0 %call1.i.i.i = tail call <8 x i32> @llvm.genx.media.ld.v8i32(i32 0, i32 %0, i32 0, i32 32, i32 %2, i32 0) tail call void @llvm.genx.media.st.v8i32(i32 0, i32 %1, i32 0, i32 32, i32 %2, i32 0, <8 x i32> %call1.i.i.i) ret void } declare <8 x i32> @llvm.genx.media.ld.v8i32(i32, i32, i32, i32, i32, i32) declare void @llvm.genx.media.st.v8i32(i32, i32, i32, i32, i32, i32, <8 x i32>) declare i32 @llvm.genx.address.convert.i32.p1opencl.image2d_ro_t(%opencl.image2d_ro_t addrspace(1)*) #0 declare i32 @llvm.genx.address.convert.i32.p1opencl.image2d_wo_t(%opencl.image2d_wo_t addrspace(1)*) #0 attributes #0 = { "VCFunction" } ; CHECK: !genx.kernels = !{[[KERNEL:![0-9]+]]} ; CHECK: [[KERNEL]] = !{void (i32, i32, <3 x i32>)* @test, !"test", ![[KINDS:[0-9]+]], i32 0, i32 0, !{{[0-9]+}}, ![[DESCS:[0-9]+]], i32 0} ; CHECK-DAG: ![[KINDS]] = !{i32 2, i32 2, i32 24} ; CHECK-DAG: ![[DESCS]] = !{!"image2d_t read_only", !"image2d_t write_only", !""} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/combined_args_writer.ll000066400000000000000000000045221475147027500265720ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test combined writer translation: kernel has both annotated explicit ; arguments and impicit arguments. Implicit arguments would not show ; in normal flow, though they appear in old cmc. ; XFAIL: llvm13, llvm14 ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVWriterAdaptor -S < %s | FileCheck %s ; RUN: opt %pass%GenXSPIRVWriterAdaptor %pass%GenXSPIRVWriterAdaptor -S < %s | FileCheck %s define void @test(i32 %in, i32 %out, <3 x i32> %__arg_llvm.genx.local.id) { ; CHECK-LABEL: @test( ; CHECK: %opencl.image2d_ro_t addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[IN:%[^,]+]], ; CHECK: %opencl.image2d_wo_t addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[OUT:%[^,]+]], ; CHECK: <3 x i32> ; CHECK: "VCArgumentKind"="24" ; CHECK: [[LOCAL_ID:%[^)]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint %opencl.image2d_ro_t addrspace(1)* [[IN]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint %opencl.image2d_wo_t addrspace(1)* [[OUT]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i32> [[LOCAL_ID]], i32 0 ; CHECK-NEXT: [[CALL1_I_I_I:%.*]] = tail call <8 x i32> @llvm.genx.media.ld.v8i32(i32 0, i32 [[TMP0]], i32 0, i32 32, i32 [[TMP2]], i32 0) ; CHECK-NEXT: tail call void @llvm.genx.media.st.v8i32(i32 0, i32 [[TMP1]], i32 0, i32 32, i32 [[TMP2]], i32 0, <8 x i32> [[CALL1_I_I_I]]) ; CHECK-NEXT: ret void ; entry: %0 = extractelement <3 x i32> %__arg_llvm.genx.local.id, i32 0 %call1.i.i.i = tail call <8 x i32> @llvm.genx.media.ld.v8i32(i32 0, i32 %in, i32 0, i32 32, i32 %0, i32 0) tail call void @llvm.genx.media.st.v8i32(i32 0, i32 %out, i32 0, i32 32, i32 %0, i32 0, <8 x i32> %call1.i.i.i) ret void } declare <8 x i32> @llvm.genx.media.ld.v8i32(i32, i32, i32, i32, i32, i32) declare void @llvm.genx.media.st.v8i32(i32, i32, i32, i32, i32, i32, <8 x i32>) ; CHECK-NOT: !genx.kernels !genx.kernels = !{!0} !0 = !{void (i32, i32, <3 x i32>)* @test, !"test", !1, i32 0, i32 0, !2, !3, i32 0, i32 0} !1 = !{i32 2, i32 2, i32 24} !2 = !{i32 0, i32 0} !3 = !{!"image2d_t read_only", !"image2d_t write_only"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/empty_kernel_writer.ll000066400000000000000000000013111475147027500264650ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2022 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test empty kernel metadata translation: old -> new. ; RUN: opt %pass%GenXSPIRVWriterAdaptor -S < %s | FileCheck %s ; CHECK: @test() #[[ATTR_GROUP:[0-9]+]] define void @test() #0 { ret void } ; CHECK: attributes #[[ATTR_GROUP]] = { ; CHECK-DAG: "VCFunction" ; CHECK-DAG: "VCSLMSize"="0" ; CHECK: } attributes #0 = { "CMGenxMain" } ; CHECK-NOT: !genx.kernels !genx.kernels = !{!0} !0 = !{void ()* @test, !"test", !1, i32 0, i32 0, !1, !1, i32 0, i32 0} !1 = !{} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/fun_attributes_transform_reader.ll000066400000000000000000000042101475147027500310470ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2021-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test that adaptor correctly translates function attributes to VC-specific ; metadata (the processed attributes are expected to be discarded) ; UNSUPPORTED: llvm8 ; RUN: opt %pass%GenXSPIRVReaderAdaptor -S < %s | FileCheck %s ; CHECK: @test_VCFunction() ; CHECK: @test_VCStackCall() ; CHECK-SAME: #[[FATR_STACK_CALL_ATTR_IDX:[0-9]+]] ; CHECK: @test_VCCallable() ; CHECK-SAME: #[[FATR_CALLABLE_ATTR_IDX:[0-9]+]] ; CHECK: @test_VCFCEntry() ; CHECK-SAME: #[[FATR_FC_ENTRY_IDX:[0-9]+]] ; CHECK: @test_VCSIMTCall() ; CHECK-SAME: #[[FATR_SIMT_CALL_IDX:[0-9]+]] ; CHECK: @test_VCFloatControl() ; CHECK-SAME: #[[FATR_FLOAT_CONTROL_IDX:[0-9]+]] ; CHECK: @test_VCSLMSize() ; CHECK-SAME: #[[FATR_SLM_SIZE_IDX:[0-9]+]] define void @test_VCFunction() #0 { ret void } define void @test_VCStackCall() #1 { ret void } define void @test_VCCallable() #2 { ret void } define void @test_VCFCEntry() #3 { ret void } define void @test_VCSIMTCall() #4 { ret void } define void @test_VCFloatControl() #5 { ret void } define spir_kernel void @test_VCSLMSize() #6 { ret void } ; CHECK-DAG: attributes #[[FATR_STACK_CALL_ATTR_IDX]] = { "CMStackCall" } ; CHECK-DAG: attributes #[[FATR_CALLABLE_ATTR_IDX]] = { "CMCallable" } ; CHECK-DAG: attributes #[[FATR_FC_ENTRY_IDX]] = { "CMEntry" } ; CHECK-DAG: attributes #[[FATR_SIMT_CALL_IDX]] = { "CMGenxSIMT" } ; CHECK-DAG: attributes #[[FATR_FLOAT_CONTROL_IDX]] = { "CMFloatControl"="0" } ; CHECK-DAG: attributes #[[FATR_SLM_SIZE_IDX]] = { "CMGenxMain" } ; CHECK-DAG: !{{{.*}} @test_VCSLMSize, !"test_VCSLMSize", !{{[0-9]+}}, i32 100500, i32 0, !{{[0-9]+}}, !{{[0-9]+}}, i32 0} attributes #0 = { "VCFunction" } attributes #1 = { "VCFunction" "VCStackCall" } attributes #2 = { "VCFunction" "VCCallable" } attributes #3 = { "VCFunction" "VCFCEntry" } attributes #4 = { "VCFunction" "VCSIMTCall" } attributes #5 = { "VCFunction" "VCFloatControl"="0" } attributes #6 = { "VCFunction" "VCSLMSize"="100500" } vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/image_array_reader.ll000066400000000000000000000030701475147027500262010ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2021-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test reader translation of image array arguments. ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVReaderAdaptor -S < %s | FileCheck %s %opencl.image1d_array_ro_t = type opaque %opencl.image2d_array_wo_t = type opaque define spir_kernel void @test(%opencl.image1d_array_ro_t addrspace(1)* %im1d, %opencl.image2d_array_wo_t addrspace(1)* %im2d) #0 { ; CHECK-LABEL: @test( ; CHECK: i32 ; CHECK: [[IM1D:%[^,]+]], ; CHECK: i32 ; CHECK: [[IM2D:%[^,]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; entry: %0 = call i32 @llvm.genx.address.convert.i32.p1opencl.image1d_array_ro_t(%opencl.image1d_array_ro_t addrspace(1)* %im1d) %1 = call i32 @llvm.genx.address.convert.i32.p1opencl.image2d_array_wo_t(%opencl.image2d_array_wo_t addrspace(1)* %im2d) ret void } declare i32 @llvm.genx.address.convert.i32.p1opencl.image1d_array_ro_t(%opencl.image1d_array_ro_t addrspace(1)*) declare i32 @llvm.genx.address.convert.i32.p1opencl.image2d_array_wo_t(%opencl.image2d_array_wo_t addrspace(1)*) attributes #0 = { "VCFunction" } ; CHECK: !genx.kernels = !{[[KERNEL:![0-9]+]]} ; CHECK: [[KERNEL]] = !{void (i32, i32)* @test, !"test", [[KINDS:![0-9]+]], i32 0, i32 0, !{{[0-9]+}}, [[DESCS:![0-9]+]], i32 0} ; CHECK-DAG: [[KINDS]] = !{i32 2, i32 2} ; CHECK-DAG: [[DESCS]] = !{!"image1d_array_t read_only", !"image2d_array_t write_only"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/image_array_writer.ll000066400000000000000000000021121475147027500262470ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2021-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test writer translation of image array arguments. ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVWriterAdaptor -S < %s | FileCheck %s define void @test(i32 %im1darr, i32 %im2darr) { ; CHECK-LABEL: @test( ; CHECK: %opencl.image1d_array_ro_t addrspace(1)* ; CHECK: [[IM1D:%[^,]+]], ; CHECK: %opencl.image2d_array_wo_t addrspace(1)* ; CHECK: [[IM2D:%[^)]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint %opencl.image1d_array_ro_t addrspace(1)* [[IM1D]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint %opencl.image2d_array_wo_t addrspace(1)* [[IM2D]] to i32 ; CHECK-NEXT: ret void ; entry: ret void } !genx.kernels = !{!0} !0 = !{void (i32, i32)* @test, !"test", !1, i32 0, i32 0, !2, !3, i32 0, i32 0} !1 = !{i32 2, i32 2} !2 = !{i32 0, i32 0} !3 = !{!"image1d_array_t read_only", !"image2d_array_t write_only"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/media_block_reader.ll000066400000000000000000000023141475147027500261520ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2021-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test reader translation of media block image arguments. ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVReaderAdaptor -S < %s | FileCheck %s %intel.image2d_media_block_ro_t = type opaque define spir_kernel void @test(%intel.image2d_media_block_ro_t addrspace(1)* %image) #0 { ; CHECK-LABEL: @test( ; CHECK: i32 ; CHECK [[IMAGE:%[^)]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; entry: %0 = call i32 @llvm.genx.address.convert.i32.p1intel.image2d_media_block_ro_t(%intel.image2d_media_block_ro_t addrspace(1)* %image) ret void } declare i32 @llvm.genx.address.convert.i32.p1intel.image2d_media_block_ro_t(%intel.image2d_media_block_ro_t addrspace(1)*) attributes #0 = { "VCFunction" } ; CHECK: !genx.kernels = !{[[KERNEL:![0-9]+]]} ; CHECK: [[KERNEL]] = !{void (i32)* @test, !"test", [[KINDS:![0-9]+]], i32 0, i32 0, !{{[0-9]+}}, [[DESCS:![0-9]+]], i32 0} ; CHECK-DAG: [[KINDS]] = !{i32 2} ; CHECK-DAG: [[DESCS]] = !{!"image2d_media_block_t read_only"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/media_block_writer.ll000066400000000000000000000015611475147027500262270ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2021-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test writer translation of media block images arguments. ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVWriterAdaptor -S < %s | FileCheck %s define void @test(i32 %image) { ; CHECK-LABEL: @test( ; CHECK: %intel.image2d_media_block_ro_t addrspace(1)* ; CHECK: [[IMAGE:%[^)]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint %intel.image2d_media_block_ro_t addrspace(1)* [[IMAGE]] to i32 ; CHECK-NEXT: ret void ; entry: ret void } !genx.kernels = !{!0} !0 = !{void (i32)* @test, !"test", !1, i32 0, i32 0, !2, !3, i32 0, i32 0} !1 = !{i32 2} !2 = !{i32 0} !3 = !{!"image2d_media_block_t read_only"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/no_kernels_module_reader.ll000066400000000000000000000012221475147027500274220ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2021-2022 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test general translation of attributes within module that has no kernels ; RUN: opt %pass%GenXSPIRVReaderAdaptor -S < %s | FileCheck %s ; CHECK: @some_func ; CHECK-SAME: #[[ATTR_GROUP:[0-9]+]] define <16 x float> @some_func(<16 x float> %x) local_unnamed_addr #0 { ret <16 x float> %x } ; CHECK: attributes #[[ATTR_GROUP]] = { ; CHECK: "CMStackCall" ; CHECK: } attributes #0 = { "VCStackCall" "VCFunction"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/no_kernels_module_writer.ll000066400000000000000000000012401475147027500274740ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2021-2022 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test general translation of attributes within module that has no kernels ; RUN: opt %pass%GenXSPIRVWriterAdaptor -S < %s | FileCheck %s ; CHECK: @some_func ; CHECK: #[[ATTR_GROUP:[0-9]+]] define <16 x float> @some_func(<16 x float> %x) local_unnamed_addr #0 { ret <16 x float> %x } ; CHECK: attributes #[[ATTR_GROUP]] = { ; CHECK-DAG: "VCFunction" ; CHECK-DAG: "VCStackCall" ; CHECK: } attributes #0 = { "CMStackCall" } vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/no_vcfunction_reader.ll000066400000000000000000000010711475147027500265720ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2022 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test that reader ignores signature rewriting for kernels ; that are not VCFunction. ; RUN: opt %pass%GenXSPIRVReaderAdaptor -S < %s | FileCheck %s define spir_kernel void @test(i8 addrspace(1) *%ptr) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void entry: ret void } ; CHECK-NOT: !genx.kernels vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/non_global_ptr_reader.ll000066400000000000000000000015141475147027500267210ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test that reader treats only global pointer as svmptr type ; and ignores other address spaces. ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVReaderAdaptor -S < %s | FileCheck %s define spir_kernel void @test(i32* %ptr) #0 { ; CHECK-LABEL: @test ; CHECK-SAME: (i32* [[PTR:%[^)]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; entry: ret void } attributes #0 = { "VCFunction" } ; CHECK: !genx.kernels = !{[[KERNEL:![0-9]+]]} ; CHECK: [[KERNEL]] = !{void (i32*)* @test, !"test", ![[KINDS:[0-9]+]], i32 0, i32 0, !{{[0-9]+}}, !{{[0-9]+}}, i32 0} ; CHECK: ![[KINDS]] = !{i32 0} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/old_decorated_args_reader.ll000066400000000000000000000040631475147027500275300ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test reader translation of old-style decorated arguments. ; Annotations for these are directly translated from attributes to ; kernel metadata without any checks. Required until full transition ; is done. ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVReaderAdaptor -S < %s | FileCheck %s define spir_kernel void @test(i32 "VCArgumentDesc"="image2d_t read_only" "VCArgumentKind"="2" %in, i32 "VCArgumentDesc"="image2d_t write_only" "VCArgumentKind"="2" %out, <3 x i32> "VCArgumentKind"="24" %__arg_llvm.genx.local.id) #0 { ; CHECK-LABEL: @test ; CHECK-SAME: (i32 [[IN:%[^,]+]], i32 [[OUT:%[^,]+]], <3 x i32> [[LOCAL_ID:%[^)]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x i32> [[LOCAL_ID]], i32 0 ; CHECK-NEXT: [[CALL1_I_I_I:%.*]] = tail call <8 x i32> @llvm.genx.media.ld.v8i32(i32 0, i32 [[IN]], i32 0, i32 32, i32 [[TMP0]], i32 0) ; CHECK-NEXT: tail call void @llvm.genx.media.st.v8i32(i32 0, i32 [[OUT]], i32 0, i32 32, i32 [[TMP0]], i32 0, <8 x i32> [[CALL1_I_I_I]]) ; CHECK-NEXT: ret void ; entry: %0 = extractelement <3 x i32> %__arg_llvm.genx.local.id, i32 0 %call1.i.i.i = tail call <8 x i32> @llvm.genx.media.ld.v8i32(i32 0, i32 %in, i32 0, i32 32, i32 %0, i32 0) tail call void @llvm.genx.media.st.v8i32(i32 0, i32 %out, i32 0, i32 32, i32 %0, i32 0, <8 x i32> %call1.i.i.i) ret void } declare <8 x i32> @llvm.genx.media.ld.v8i32(i32, i32, i32, i32, i32, i32) #0 declare void @llvm.genx.media.st.v8i32(i32, i32, i32, i32, i32, i32, <8 x i32>) #0 attributes #0 = { "VCFunction" } ; CHECK: !genx.kernels = !{[[KERNEL:![0-9]+]]} ; CHECK: [[KERNEL]] = !{{{.*}} @test, !"test", ![[KINDS:[0-9]+]], i32 0, i32 0, !{{[0-9]+}}, ![[DESCS:[0-9]+]], i32 0} ; CHECK-DAG: ![[KINDS]] = !{i32 2, i32 2, i32 24} ; CHECK-DAG: ![[DESCS]] = !{!"image2d_t read_only", !"image2d_t write_only", !""} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/000077500000000000000000000000001475147027500244105ustar00rootroot00000000000000vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/addr_conv_attribute_writer.ll000066400000000000000000000016541475147027500323650ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test @llvm.genx.address.convert intrinsic generation with proper attributes ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVWriterAdaptor -S < %s | FileCheck %s ; CHECK: define spir_kernel void @test( ; CHECK-SAME: target("spirv.BufferSurfaceINTEL", 2) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[BUF:%[^)]+]]) define spir_kernel void @test(i32 %buf) { ; CHECK: call i32 @llvm.genx.address.convert.i32.t_spirv.BufferSurfaceINTEL_2(target("spirv.BufferSurfaceINTEL", 2) [[BUF]]) ret void } ; CHECK-NOT: !genx.kernels !genx.kernels = !{!0} !0 = !{ptr @test, !"test", !1, i32 0, i32 0, !2, !3, i32 0, i32 0} !1 = !{i32 2} !2 = !{i32 0} !3 = !{!"buffer_t"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/annot_mess_writer.ll000066400000000000000000000032211475147027500305010ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test messy annnotations translation in writer. First valid ; annotation should be matched. ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVWriterAdaptor -S < %s | FileCheck %s ; CHECK: define spir_kernel void @test( ; CHECK-SAME: target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[IM2D:%[^,]+]], ; CHECK-SAME: target("spirv.Sampler") ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[SAMP:%[^,]+]], ; CHECK-SAME: ptr addrspace(1) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[PTR:%[^,]+]], ; CHECK-SAME: i32 ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[GEN:%[^)]+]]) define spir_kernel void @test(i32 %im2d, i32 %samp, i64 %ptr, i32 %gen) { ; CHECK: call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_1_0_0_0_0_0_0(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) [[IM2D]]) ; CHECK: call i32 @llvm.genx.address.convert.i32.t_spirv.Sampler(target("spirv.Sampler") [[SAMP]]) ; CHECK: ptrtoint ptr addrspace(1) [[PTR]] to i64 ; CHECK-NOT: [[GEN]] ret void } ; CHECK-NOT: !genx.kernels !genx.kernels = !{!0} !0 = !{ptr @test, !"test", !1, i32 0, i32 0, !2, !3, i32 0, i32 0} !1 = !{i32 2, i32 1, i32 0, i32 0} !2 = !{i32 0, i32 0, i32 0, i32 0} !3 = !{!"image2d_t buffer_t read_only read_write", !"sampler_t read_only", !"svmptr_t write_only", !"write_only"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/annotated_args_mixed_reader.ll000066400000000000000000000056211475147027500324460ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test that reader can cope with mixed mode when some ; arguments use address convert and some do not. ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVReaderAdaptor -S < %s | FileCheck %s ; CHECK: define dllexport spir_kernel void @test( ; CHECK-SAME: i32 ; CHECK-SAME: [[BUF:%[^,]+]], ; CHECK-SAME: target("spirv.Image", void, 0, 0, 0, 0, 0, 0, 2) ; CHECK-SAME: [[IM1D:%[^,]+]], ; CHECK-SAME: i32 ; CHECK-SAME: [[IM1DB:%[^,]+]], ; CHECK-SAME: target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) ; CHECK-SAME: [[IM2D:%[^,]+]], ; CHECK-SAME: i32 ; CHECK-SAME: [[IM3D:%[^,]+]], ; CHECK-SAME: target("spirv.Sampler") ; CHECK-SAME: [[SAMP:%[^,]+]], ; CHECK-SAME: i64 ; CHECK-SAME: [[PTR:%[^,]+]], ; CHECK-SAME: <4 x i32> ; CHECK-SAME: [[GEN:%[^)]+]]) define spir_kernel void @test(target("spirv.BufferSurfaceINTEL", 2) %buf, target("spirv.Image", void, 0, 0, 0, 0, 0, 0, 2) %im1d, target("spirv.Image", void, 5, 0, 0, 0, 0, 0, 2) %im1db, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) %im2d, target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 2) %im3d, target("spirv.Sampler") %samp, ptr addrspace(1) %ptr, <4 x i32> %gen) #0 { ; CHECK-NOT: [[BUF]] ; CHECK-NOT: [[IM1D]] ; CHECK-NOT: [[IM1DB]] ; CHECK-NOT: [[IM2D]] ; CHECK-NOT: [[IM3D]] ; CHECK-NOT: [[SAMP]] ; CHECK-NOT: [[PTR]] ; CHECK-NOT: [[GEN]] %buf.conv = call i32 @llvm.genx.address.convert.i32.t_spirv.BufferSurfaceINTEL_2(target("spirv.BufferSurfaceINTEL", 2) %buf) %im1db.conv = call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_5_0_0_0_0_0_2(target("spirv.Image", void, 5, 0, 0, 0, 0, 0, 2) %im1db) %im3d.conv = call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_2_0_0_0_0_0_2(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 2) %im3d) %ptr.conv = call i64 @llvm.genx.address.convert.i64.p1(ptr addrspace(1) %ptr) ret void } declare i32 @llvm.genx.address.convert.i32.t_spirv.BufferSurfaceINTEL_2(target("spirv.BufferSurfaceINTEL", 2)) declare i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_5_0_0_0_0_0_2(target("spirv.Image", void, 5, 0, 0, 0, 0, 0, 2)) declare i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_2_0_0_0_0_0_2(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 2)) declare i64 @llvm.genx.address.convert.i64.p1(ptr addrspace(1)) attributes #0 = { "VCFunction" } ; CHECK: !genx.kernels = !{[[KERNEL:![0-9]+]]} ; CHECK: [[KERNEL]] = !{ptr @test, !"test", ![[KINDS:[0-9]+]], i32 0, i32 0, !{{[0-9]+}}, ![[DESCS:[0-9]+]], i32 0} ; CHECK-DAG: ![[KINDS]] = !{i32 2, i32 2, i32 2, i32 2, i32 2, i32 1, i32 0, i32 0} ; CHECK-DAG: ![[DESCS]] = !{!"buffer_t read_write", !"image1d_t read_write", !"image1d_buffer_t read_write", !"image2d_t read_write", !"image3d_t read_write", !"sampler_t", !"svmptr_t", !""} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/annotated_args_no_conv_reader.ll000066400000000000000000000042521475147027500330000ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test that reader correctly restores metadata and does ; not change other things if there is no address conversion ; but correct SPIRV types in signature. ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVReaderAdaptor -S < %s | FileCheck %s ; CHECK: define dllexport spir_kernel void @test( ; CHECK-SAME: target("spirv.BufferSurfaceINTEL", 2) ; CHECK-SAME: [[BUF:%[^,]+]], ; CHECK-SAME: target("spirv.Image", void, 0, 0, 0, 0, 0, 0, 2) ; CHECK-SAME: [[IM1D:%[^,]+]], ; CHECK-SAME: target("spirv.Image", void, 5, 0, 0, 0, 0, 0, 2) ; CHECK-SAME: [[IM1DB:%[^,]+]], ; CHECK-SAME: target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) ; CHECK-SAME: [[IM2D:%[^,]+]], ; CHECK-SAME: target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 2) ; CHECK-SAME: [[IM3D:%[^,]+]], ; CHECK-SAME: target("spirv.Sampler") ; CHECK-SAME: [[SAMP:%[^,]+]], ; CHECK-SAME: ptr addrspace(1) ; CHECK-SAME: [[PTR:%[^,]+]], ; CHECK-SAME: <4 x i32> ; CHECK-SAME: [[GEN:%[^)]+]]) define spir_kernel void @test(target("spirv.BufferSurfaceINTEL", 2) %buf, target("spirv.Image", void, 0, 0, 0, 0, 0, 0, 2) %im1d, target("spirv.Image", void, 5, 0, 0, 0, 0, 0, 2) %im1db, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) %im2d, target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 2) %im3d, target("spirv.Sampler") %samp, ptr addrspace(1) %ptr, <4 x i32> %gen) #0 { ; CHECK-NOT: [[BUF]] ; CHECK-NOT: [[IM1D]] ; CHECK-NOT: [[IM1DB]] ; CHECK-NOT: [[IM2D]] ; CHECK-NOT: [[IM3D]] ; CHECK-NOT: [[SAMP]] ; CHECK-NOT: [[PTR]] ; CHECK-NOT: [[GEN]] ret void } attributes #0 = { "VCFunction" } ; CHECK: !genx.kernels = !{[[KERNEL:![0-9]+]]} ; CHECK: [[KERNEL]] = !{ptr @test, !"test", ![[KINDS:[0-9]+]], i32 0, i32 0, !{{[0-9]+}}, ![[DESCS:[0-9]+]], i32 0} ; CHECK-DAG: ![[KINDS]] = !{i32 2, i32 2, i32 2, i32 2, i32 2, i32 1, i32 0, i32 0} ; CHECK-DAG: ![[DESCS]] = !{!"buffer_t read_write", !"image1d_t read_write", !"image1d_buffer_t read_write", !"image2d_t read_write", !"image3d_t read_write", !"sampler_t", !"svmptr_t", !""} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/annotated_args_no_conv_writer.ll000066400000000000000000000047331475147027500330560ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test that writer does not changes signature if correct ; types are already used. Just drop all annotations. ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVWriterAdaptor -S < %s | FileCheck %s ; CHECK: define spir_kernel void @test( ; CHECK-SAME: target("spirv.BufferSurfaceINTEL", 2) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[BUF:%[^,]+]], ; CHECK-SAME: target("spirv.Image", void, 0, 0, 0, 0, 0, 0, 2) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[IM1D:%[^,]+]], ; CHECK-SAME: target("spirv.Image", void, 5, 0, 0, 0, 0, 0, 2) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[IM1DB:%[^,]+]], ; CHECK-SAME: target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[IM2D:%[^,]+]], ; CHECK-SAME: target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 2) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[IM3D:%[^,]+]], ; CHECK-SAME: target("spirv.Sampler") ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[SAMP:%[^,]+]], ; CHECK-SAME: ptr addrspace(1) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[PTR:%[^,]+]], ; CHECK-SAME: <4 x i32> ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[GEN:%[^)]+]]) define spir_kernel void @test(target("spirv.BufferSurfaceINTEL", 2) %buf, target("spirv.Image", void, 0, 0, 0, 0, 0, 0, 2) %im1d, target("spirv.Image", void, 5, 0, 0, 0, 0, 0, 2) %im1db, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) %im2d, target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 2) %im3d, target("spirv.Sampler") %samp, ptr addrspace(1) %ptr, <4 x i32> %gen) { ; CHECK-NOT: [[BUF]] ; CHECK-NOT: [[IM1D]] ; CHECK-NOT: [[IM1DB]] ; CHECK-NOT: [[IM2D]] ; CHECK-NOT: [[IM3D]] ; CHECK-NOT: [[SAMP]] ; CHECK-NOT: [[PTR]] ; CHECK-NOT: [[GEN]] ret void } ; CHECK-NOT: !genx.kernels !genx.kernels = !{!0} !0 = !{ptr @test, !"test", !1, i32 0, i32 0, !2, !3, i32 0, i32 0} !1 = !{i32 2, i32 2, i32 2, i32 2, i32 2, i32 1, i32 0, i32 0} !2 = !{i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0} !3 = !{!"buffer_t", !"image1d_t", !"image1d_buffer_t", !"image2d_t", !"image3d_t", !"sampler_t", !"svmptr_t", !""} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/annotated_args_reader.ll000066400000000000000000000074671475147027500312720ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test kernel argument translation from new style with opaque types ; that SPIRV translator can understand to old style with ; metadata. Here annotations for OCL runtime are used. ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVReaderAdaptor -S < %s | FileCheck %s @0 = private unnamed_addr constant [15 x i8] c"some attribute\00", section "llvm.metadata" ; CHECK-LABEL: @llvm.global.annotations ; CHECK ptr @test @llvm.global.annotations = appending global [1 x { ptr, ptr, ptr, i32 }] [{ ptr, ptr, ptr, i32 } { ptr @test, ptr @0, ptr undef, i32 undef }], section "llvm.metadata" ; CHECK: define dllexport spir_kernel void @test( ; CHECK-SAME: i32 ; CHECK-SAME: [[BUF:%[^,]+]], ; CHECK-SAME: i32 ; CHECK-SAME: [[IM1D:%[^,]+]], ; CHECK-SAME: i32 ; CHECK-SAME: [[IM1DB:%[^,]+]], ; CHECK-SAME: i32 ; CHECK-SAME: [[IM2D:%[^,]+]], ; CHECK-SAME: i32 ; CHECK-SAME: [[IM3D:%[^,]+]], ; CHECK-SAME: i32 ; CHECK-SAME: [[SAMP:%[^,]+]], ; CHECK-SAME: ptr addrspace(1) ; CHECK-SAME: [[PTR:%[^,]+]], ; CHECK-SAME: <4 x i32> ; CHECK-SAME: [[GEN:%[^)]+]]) define spir_kernel void @test(target("spirv.BufferSurfaceINTEL", 2) %buf, target("spirv.Image", void, 0, 0, 0, 0, 0, 0, 2) %im1d, target("spirv.Image", void, 5, 0, 0, 0, 0, 0, 2) %im1db, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) %im2d, target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 2) %im3d, target("spirv.Sampler") %samp, ptr addrspace(1) %ptr, <4 x i32> %gen) #0 { ; CHECK-NOT: [[BUF]] ; CHECK-NOT: [[IM1D]] ; CHECK-NOT: [[IM1DB]] ; CHECK-NOT: [[IM2D]] ; CHECK-NOT: [[IM3D]] ; CHECK-NOT: [[SAMP]] ; CHECK: [[PTR]] ; CHECK-NOT: [[GEN]] %buf.conv = call i32 @llvm.genx.address.convert.i32.t_spirv.BufferSurfaceINTEL_2(target("spirv.BufferSurfaceINTEL", 2) %buf) %im1d.conv = call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_0_0_0_0_0_0_2(target("spirv.Image", void, 0, 0, 0, 0, 0, 0, 2) %im1d) %im1db.conv = call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_5_0_0_0_0_0_2(target("spirv.Image", void, 5, 0, 0, 0, 0, 0, 2) %im1db) %im2d.conv = call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_1_0_0_0_0_0_2(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) %im2d) %im3d.conv = call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_2_0_0_0_0_0_2(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 2) %im3d) %samp.conv = call i32 @llvm.genx.address.convert.i32.t_spirv.Sampler(target("spirv.Sampler") %samp) %ptr.conv = ptrtoint ptr addrspace(1) %ptr to i64 ret void } declare i32 @llvm.genx.address.convert.i32.t_spirv.BufferSurfaceINTEL_2(target("spirv.BufferSurfaceINTEL", 2)) declare i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_0_0_0_0_0_0_2(target("spirv.Image", void, 0, 0, 0, 0, 0, 0, 2)) declare i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_5_0_0_0_0_0_2(target("spirv.Image", void, 5, 0, 0, 0, 0, 0, 2)) declare i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_1_0_0_0_0_0_2(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2)) declare i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_2_0_0_0_0_0_2(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 2)) declare i32 @llvm.genx.address.convert.i32.t_spirv.Sampler(target("spirv.Sampler")) attributes #0 = { "VCFunction" } ; CHECK: !genx.kernels = !{[[KERNEL:![0-9]+]]} ; CHECK: [[KERNEL]] = !{ptr @test, !"test", ![[KINDS:[0-9]+]], i32 0, i32 0, !{{[0-9]+}}, ![[DESCS:[0-9]+]], i32 0} ; CHECK-DAG: ![[KINDS]] = !{i32 2, i32 2, i32 2, i32 2, i32 2, i32 1, i32 0, i32 0} ; CHECK-DAG: ![[DESCS]] = !{!"buffer_t read_write", !"image1d_t read_write", !"image1d_buffer_t read_write", !"image2d_t read_write", !"image3d_t read_write", !"sampler_t", !"svmptr_t", !""} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/annotated_args_writer.ll000066400000000000000000000060161475147027500313310ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test kernel arguments translation from old style with metadata to ; new style with opaque types that SPIRV translator can ; understand. ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVWriterAdaptor -S < %s | FileCheck %s ; RUN: opt -passes=GenXSPIRVWriterAdaptor,GenXSPIRVWriterAdaptor -S < %s | FileCheck %s ; CHECK: define spir_kernel void @test( ; CHECK-SAME: target("spirv.BufferSurfaceINTEL", 2) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[BUF:%[^,]+]], ; CHECK-SAME: target("spirv.Image", void, 0, 0, 0, 0, 0, 0, 2) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[IM1D:%[^,]+]], ; CHECK-SAME: target("spirv.Image", void, 5, 0, 0, 0, 0, 0, 2) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[IM1DB:%[^,]+]], ; CHECK-SAME: target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[IM2D:%[^,]+]], ; CHECK-SAME: target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 2) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[IM3D:%[^,]+]], ; CHECK-SAME: target("spirv.Sampler") ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[SAMP:%[^,]+]], ; CHECK-SAME: ptr addrspace(1) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[PTR:%[^,]+]], ; CHECK-SAME: <4 x i32> ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[GEN:%[^)]+]]) define spir_kernel void @test(i32 %buf, i32 %im1d, i32 %im1db, i32 %im2d, i32 %im3d, i32 %samp, i64 %ptr, <4 x i32> %gen) { ; CHECK: call i32 @llvm.genx.address.convert.i32.t_spirv.BufferSurfaceINTEL_2(target("spirv.BufferSurfaceINTEL", 2) [[BUF]]) ; CHECK: call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_0_0_0_0_0_0_2(target("spirv.Image", void, 0, 0, 0, 0, 0, 0, 2) [[IM1D]]) ; CHECK: call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_5_0_0_0_0_0_2(target("spirv.Image", void, 5, 0, 0, 0, 0, 0, 2) [[IM1DB]]) ; CHECK: call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_1_0_0_0_0_0_2(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 2) [[IM2D]]) ; CHECK: call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_2_0_0_0_0_0_2(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 2) [[IM3D]]) ; CHECK: call i32 @llvm.genx.address.convert.i32.t_spirv.Sampler(target("spirv.Sampler") [[SAMP]]) ; CHECK: ptrtoint ptr addrspace(1) [[PTR]] to i64 ; CHECK-NOT: [[GEN]] ret void } ; CHECK-NOT: !genx.kernels !genx.kernels = !{!0} !0 = !{ptr @test, !"test", !1, i32 0, i32 0, !2, !3, i32 0, i32 0} !1 = !{i32 2, i32 2, i32 2, i32 2, i32 2, i32 1, i32 0, i32 0} !2 = !{i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0} !3 = !{!"buffer_t", !"image1d_t", !"image1d_buffer_t", !"image2d_t", !"image3d_t", !"sampler_t", !"svmptr_t", !""} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/args_attributes_transform_reader.ll000066400000000000000000000012571475147027500335650ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test that adaptor correctly handles parameter attributes with types. ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVReaderAdaptor -S < %s | FileCheck %s %foo = type { i32 } ; CHECK: define dllexport spir_kernel void @test( ; CHECK-SAME: ptr addrspace(1) byval(%foo) ; CHECK-SAME: [[ARG:%[^)]+]]) define spir_kernel void @test(ptr addrspace(1) byval(%foo) %arg) #0 { ; CHECK-NOT: [[ARG]] ret void } attributes #0 = { "VCFunction" } vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/args_attributes_transform_writer.ll000066400000000000000000000015321475147027500336330ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test that adaptor correctly handles parameter attributes with types. ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVWriterAdaptor -S < %s | FileCheck %s %foo = type { i32 } ; CHECK: define spir_kernel void @test( ; CHECK-SAME: ptr addrspace(1) byval(%foo) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[ARG:%[^)]+]]) define spir_kernel void @test(ptr addrspace(1) byval(%foo) %arg) { ; CHECK-NOT: [[ARG]] ret void } ; CHECK-NOT: !genx.kernels !genx.kernels = !{!0} !0 = !{ptr @test, !"test", !1, i32 0, i32 0, !2, !3, i32 0} !1 = !{i32 0} !2 = !{i32 0} !3 = !{!"svmptr_t"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/combined_args_reader.ll000066400000000000000000000050631475147027500310630ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test combined reader translation: kernel has both native SPIRV types ; and impicit arguments. Implicit arguments would not show in normal ; flow, though they appear in old cmc. ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVReaderAdaptor -S < %s | FileCheck %s ; CHECK: define dllexport spir_kernel void @test( ; CHECK-SAME: i32 ; CHECK-SAME: [[IN:%[^,]+]], ; CHECK-SAME: i32 ; CHECK-SAME: [[OUT:%[^,]+]], ; CHECK-SAME: <3 x i32> ; CHECK-SAME: [[LOCAL_ID:%[^)]+]]) define spir_kernel void @test(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %in, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %out, <3 x i32> "VCArgumentKind"="24" %__arg_llvm.genx.local.id) #0 { %in.conv = call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_1_0_0_0_0_0_0(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %in) %out.conv = call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_1_0_0_0_0_0_1(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %out) ; CHECK: [[LOCAL_ID_0:%.*]] = extractelement <3 x i32> [[LOCAL_ID]], i32 0 ; CHECK-NEXT: [[LD:%.*]] = tail call <8 x i32> @llvm.genx.media.ld.v8i32(i32 0, i32 [[IN]], i32 0, i32 32, i32 [[LOCAL_ID_0]], i32 0) ; CHECK-NEXT: tail call void @llvm.genx.media.st.v8i32(i32 0, i32 [[OUT]], i32 0, i32 32, i32 [[LOCAL_ID_0]], i32 0, <8 x i32> [[LD]]) %local.id.0 = extractelement <3 x i32> %__arg_llvm.genx.local.id, i32 0 %ld = tail call <8 x i32> @llvm.genx.media.ld.v8i32(i32 0, i32 %in.conv, i32 0, i32 32, i32 %local.id.0, i32 0) tail call void @llvm.genx.media.st.v8i32(i32 0, i32 %out.conv, i32 0, i32 32, i32 %local.id.0, i32 0, <8 x i32> %ld) ret void } declare <8 x i32> @llvm.genx.media.ld.v8i32(i32, i32, i32, i32, i32, i32) declare void @llvm.genx.media.st.v8i32(i32, i32, i32, i32, i32, i32, <8 x i32>) declare i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_1_0_0_0_0_0_0(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)) declare i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_1_0_0_0_0_0_1(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1)) attributes #0 = { "VCFunction" } ; CHECK: !genx.kernels = !{[[KERNEL:![0-9]+]]} ; CHECK: [[KERNEL]] = !{ptr @test, !"test", ![[KINDS:[0-9]+]], i32 0, i32 0, !{{[0-9]+}}, ![[DESCS:[0-9]+]], i32 0} ; CHECK-DAG: ![[KINDS]] = !{i32 2, i32 2, i32 24} ; CHECK-DAG: ![[DESCS]] = !{!"image2d_t read_only", !"image2d_t write_only", !""} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/combined_args_writer.ll000066400000000000000000000047051475147027500311370ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test combined writer translation: kernel has both annotated explicit ; arguments and impicit arguments. Implicit arguments would not show ; in normal flow, though they appear in old cmc. ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVWriterAdaptor -S < %s | FileCheck %s ; RUN: opt -passes=GenXSPIRVWriterAdaptor,GenXSPIRVWriterAdaptor -S < %s | FileCheck %s ; CHECK: define spir_kernel void @test( ; CHECK-SAME: target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[IN:%[^,]+]], ; CHECK-SAME: target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[OUT:%[^,]+]], ; CHECK-SAME: <3 x i32> ; CHECK-SAME: "VCArgumentKind"="24" ; CHECK-SAME: [[LOCAL_ID:%[^)]+]]) define spir_kernel void @test(i32 %in, i32 %out, <3 x i32> %__arg_llvm.genx.local.id) { ; CHECK: [[IN_CONV:%.*]] = call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_1_0_0_0_0_0_0(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) [[IN]]) ; CHECK: [[OUT_CONV:%.*]] = call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_1_0_0_0_0_0_1(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) [[OUT]]) ; CHECK: [[LOCAL_ID_0:%.*]] = extractelement <3 x i32> [[LOCAL_ID]], i32 0 ; CHECK-NEXT: [[LD:%.*]] = tail call <8 x i32> @llvm.genx.media.ld.v8i32(i32 0, i32 [[IN_CONV]], i32 0, i32 32, i32 [[LOCAL_ID_0]], i32 0) ; CHECK-NEXT: tail call void @llvm.genx.media.st.v8i32(i32 0, i32 [[OUT_CONV]], i32 0, i32 32, i32 [[LOCAL_ID_0]], i32 0, <8 x i32> [[LD]]) %local.id.0 = extractelement <3 x i32> %__arg_llvm.genx.local.id, i32 0 %ld = tail call <8 x i32> @llvm.genx.media.ld.v8i32(i32 0, i32 %in, i32 0, i32 32, i32 %local.id.0, i32 0) tail call void @llvm.genx.media.st.v8i32(i32 0, i32 %out, i32 0, i32 32, i32 %local.id.0, i32 0, <8 x i32> %ld) ret void } declare <8 x i32> @llvm.genx.media.ld.v8i32(i32, i32, i32, i32, i32, i32) declare void @llvm.genx.media.st.v8i32(i32, i32, i32, i32, i32, i32, <8 x i32>) ; CHECK-NOT: !genx.kernels !genx.kernels = !{!0} !0 = !{ptr @test, !"test", !1, i32 0, i32 0, !2, !3, i32 0, i32 0} !1 = !{i32 2, i32 2, i32 24} !2 = !{i32 0, i32 0} !3 = !{!"image2d_t read_only", !"image2d_t write_only"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/image_array_reader.ll000066400000000000000000000031541475147027500305460ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test reader translation of image array arguments. ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVReaderAdaptor -S < %s | FileCheck %s ; CHECK: define dllexport spir_kernel void @test( ; CHECK-SAME: i32 ; CHECK-SAME: [[IM1DARR:%[^,]+]], ; CHECK-SAME: i32 ; CHECK-SAME: [[IM2DARR:%[^,]+]]) define spir_kernel void @test(target("spirv.Image", void, 0, 0, 1, 0, 0, 0, 0) %im1darr, target("spirv.Image", void, 1, 0, 1, 0, 0, 0, 1) %im2darr) #0 { ; CHECK-NOT: [[IM1DARR]] ; CHECK-NOT: [[IM2DARR]] %im1darr.conv = call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_0_0_1_0_0_0_0(target("spirv.Image", void, 0, 0, 1, 0, 0, 0, 0) %im1darr) %im2darr.conv = call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_1_0_1_0_0_0_1(target("spirv.Image", void, 1, 0, 1, 0, 0, 0, 1) %im2darr) ret void } declare i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_0_0_1_0_0_0_0(target("spirv.Image", void, 0, 0, 1, 0, 0, 0, 0)) declare i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_1_0_1_0_0_0_1(target("spirv.Image", void, 1, 0, 1, 0, 0, 0, 1)) attributes #0 = { "VCFunction" } ; CHECK: !genx.kernels = !{[[KERNEL:![0-9]+]]} ; CHECK: [[KERNEL]] = !{ptr @test, !"test", [[KINDS:![0-9]+]], i32 0, i32 0, !{{[0-9]+}}, [[DESCS:![0-9]+]], i32 0} ; CHECK-DAG: [[KINDS]] = !{i32 2, i32 2} ; CHECK-DAG: [[DESCS]] = !{!"image1d_array_t read_only", !"image2d_array_t write_only"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/image_array_writer.ll000066400000000000000000000024601475147027500306170ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test writer translation of image array arguments. ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVWriterAdaptor -S < %s | FileCheck %s ; CHECK: define spir_kernel void @test( ; CHECK-SAME: target("spirv.Image", void, 0, 0, 1, 0, 0, 0, 0) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[IM1DARR:%[^,]+]], ; CHECK-SAME: target("spirv.Image", void, 1, 0, 1, 0, 0, 0, 1) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[IM2DARR:%[^)]+]]) define spir_kernel void @test(i32 %im1darr, i32 %im2darr) { ; CHECK: call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_0_0_1_0_0_0_0(target("spirv.Image", void, 0, 0, 1, 0, 0, 0, 0) [[IM1DARR]]) ; CHECK: call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_1_0_1_0_0_0_1(target("spirv.Image", void, 1, 0, 1, 0, 0, 0, 1) [[IM2DARR]]) ret void } ; CHECK-NOT: !genx.kernels !genx.kernels = !{!0} !0 = !{ptr @test, !"test", !1, i32 0, i32 0, !2, !3, i32 0, i32 0} !1 = !{i32 2, i32 2} !2 = !{i32 0, i32 0} !3 = !{!"image1d_array_t read_only", !"image2d_array_t write_only"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/media_block_reader.ll000066400000000000000000000022711475147027500305160ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test reader translation of media block image arguments. ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVReaderAdaptor -S < %s | FileCheck %s ; CHECK: define dllexport spir_kernel void @test( ; CHECK-SAME: i32 ; CHECK-SAME: [[IMAGE:%[^)]+]]) define spir_kernel void @test(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) "VCMediaBlockIO" %image) #0 { ; CHECK-NOT: [[IMAGE]] %image.conv = call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_1_0_0_0_0_0_0(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) %image) ret void } declare i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_1_0_0_0_0_0_0(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0)) attributes #0 = { "VCFunction" } ; CHECK: !genx.kernels = !{[[KERNEL:![0-9]+]]} ; CHECK: [[KERNEL]] = !{ptr @test, !"test", [[KINDS:![0-9]+]], i32 0, i32 0, !{{[0-9]+}}, [[DESCS:![0-9]+]], i32 0} ; CHECK-DAG: [[KINDS]] = !{i32 2} ; CHECK-DAG: [[DESCS]] = !{!"image2d_media_block_t read_only"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/media_block_writer.ll000066400000000000000000000017611475147027500305730ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test writer translation of media block images arguments. ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVWriterAdaptor -S < %s | FileCheck %s ; CHECK: define spir_kernel void @test( ; CHECK-SAME: target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) ; CHECK-SAME: "VCMediaBlockIO" ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[IMAGE:%[^)]+]]) define spir_kernel void @test(i32 %image) { ; CHECK: call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_1_0_0_0_0_0_0(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 0) [[IMAGE]]) ret void } ; CHECK-NOT: !genx.kernels !genx.kernels = !{!0} !0 = !{ptr @test, !"test", !1, i32 0, i32 0, !2, !3, i32 0, i32 0} !1 = !{i32 2} !2 = !{i32 0} !3 = !{!"image2d_media_block_t read_only"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/non_global_ptr_reader.ll000066400000000000000000000014611475147027500312640ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test that reader treats only global pointer as svmptr type ; and ignores other address spaces. ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVReaderAdaptor -S < %s | FileCheck %s ; CHECK: define dllexport spir_kernel void @test( ; CHECK-SAME: ptr [[PTR:%[^)]+]]) define spir_kernel void @test(ptr %ptr) #0 { ; CHECK-NOT: [[PTR]] ret void } attributes #0 = { "VCFunction" } ; CHECK: !genx.kernels = !{[[KERNEL:![0-9]+]]} ; CHECK: [[KERNEL]] = !{ptr @test, !"test", ![[KINDS:[0-9]+]], i32 0, i32 0, !{{[0-9]+}}, !{{[0-9]+}}, i32 0} ; CHECK: ![[KINDS]] = !{i32 0} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/plain_args_reader.ll000066400000000000000000000033371475147027500304100ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test kernel argument translation from new style with opaque types ; that SPIRV translator can understand to old style with ; metadata. Arguments without annotations are used here (CMRT like). ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVReaderAdaptor -S < %s | FileCheck %s ; CHECK: define dllexport spir_kernel void @test( ; CHECK-SAME: i32 ; CHECK-SAME: [[SURF:%[^,]+]], ; CHECK-SAME: i32 ; CHECK-SAME: [[SAMP:%[^,]+]], ; CHECK-SAME: i64 ; CHECK-SAME: [[PTR:%[^,]+]], ; CHECK-SAME: i32 ; CHECK-SAME: [[GEN:%[^)]+]]) define spir_kernel void @test(target("spirv.BufferSurfaceINTEL", 2) %surf, target("spirv.Sampler") %samp, i64 %ptr, i32 %gen) #0 { ; CHECK-NOT: [[SURF]] ; CHECK-NOT: [[SAMP]] ; CHECK-NOT: [[PTR]] ; CHECK-NOT: [[GEN]] %surf.conv = call i32 @llvm.genx.address.convert.i32.t_spirv.BufferSurfaceINTEL_2(target("spirv.BufferSurfaceINTEL", 2) %surf) %samp.conv = call i32 @llvm.genx.address.convert.i32.t_spirv.Sampler(target("spirv.Sampler") %samp) ret void } declare i32 @llvm.genx.address.convert.i32.t_spirv.BufferSurfaceINTEL_2(target("spirv.BufferSurfaceINTEL", 2)) declare i32 @llvm.genx.address.convert.i32.t_spirv.Sampler(target("spirv.Sampler")) attributes #0 = { "VCFunction" } ; CHECK: !genx.kernels = !{[[KERNEL:![0-9]+]]} ; CHECK: [[KERNEL]] = !{ptr @test, !"test", ![[KINDS:[0-9]+]], i32 0, i32 0, !{{[0-9]+}}, ![[DESCS:[0-9]+]], i32 0} ; CHECK-DAG: ![[KINDS]] = !{i32 2, i32 1, i32 0, i32 0} ; CHECK-DAG: ![[DESCS]] = !{!"buffer_t read_write", !"sampler_t", !"", !""} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/plain_args_writer.ll000066400000000000000000000032531475147027500304570ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test kernel arguments translation from old style with metadata to ; new style with opaque types that SPIRV translator can ; understand. Arguments without annotations are used here (CMRT like). ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVWriterAdaptor -S < %s | FileCheck %s ; RUN: opt -passes=GenXSPIRVWriterAdaptor,GenXSPIRVWriterAdaptor -S < %s | FileCheck %s ; CHECK: define spir_kernel void @test( ; CHECK-SAME: target("spirv.BufferSurfaceINTEL", 2) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[SURF:%[^,]+]], ; CHECK-SAME: target("spirv.Sampler") ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[SAMP:%[^,]+]], ; CHECK-SAME: i64 ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[PTR:%[^,]+]], ; CHECK-SAME: i32 ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[GEN:%[^)]+]]) define spir_kernel void @test(i32 %surf, i32 %samp, i64 %ptr, i32 %gen) { ; CHECK: call i32 @llvm.genx.address.convert.i32.t_spirv.BufferSurfaceINTEL_2(target("spirv.BufferSurfaceINTEL", 2) [[SURF]]) ; CHECK: call i32 @llvm.genx.address.convert.i32.t_spirv.Sampler(target("spirv.Sampler") [[SAMP]]) ; CHECK-NOT: [[PTR]] ; CHECK-NOT: [[GEN]] ret void } ; CHECK-NOT: !genx.kernels !genx.kernels = !{!0} !0 = !{ptr @test, !"test", !1, i32 0, i32 0, !2, !3, i32 0} !1 = !{i32 2, i32 1, i32 0, i32 0} !2 = !{i32 0, i32 0, i32 0, i32 0} !3 = !{!"", !"", !"", !""} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/sev_ptr_reader.ll000066400000000000000000000017621475147027500277530ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVReaderAdaptor -S < %s | FileCheck %s define internal void @foo(ptr "VCSingleElementVector"="0" %v) #0 { ; CHECK: [[EX:[^ ]+]] = extractelement <1 x ptr> %v, i64 0 ; CHECK-NEXT: [[LD:[^ ]+]] = load i32, ptr [[EX]], align 4 %ld = load i32, ptr %v, align 4 ret void } define internal "VCSingleElementVector"="0" ptr @bar(ptr "VCSingleElementVector"="0" %in, ptr "VCSingleElementVector"="0" %out) #0 { ; CHECK: [[EX:[^ ]+]] = extractelement <1 x ptr> %out, i64 0 ; CHECK-NEXT: [[INS:[^ ]+]] = extractelement <1 x ptr> %in, i64 0 ; CHECK-NEXT: store ptr [[INS]], ptr [[EX]], align 8 ; CHECK-NEXT: ret <1 x ptr> %out store ptr %in, ptr %out, align 8 ret ptr %out } attributes #0 = { "VCFunction" } vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/sev_ptr_writer.ll000066400000000000000000000013631475147027500300220ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVWriterAdaptor -S < %s | FileCheck %s define internal void @foo(<1 x ptr> %v) { ; CHECK: %ld = load i32, ptr %v, align 4 %ex = extractelement <1 x ptr> %v, i64 0 %ld = load i32, ptr %ex, align 4 ret void } define internal <1 x ptr> @bar(<1 x ptr> %in, <1 x ptr> %out) { ; CHECK: store ptr %in, ptr %out, align 8 ; CHECK-NEXT: ret ptr %out %ex = extractelement <1 x ptr> %out, i64 0 store <1 x ptr> %in, ptr %ex, align 8 ret <1 x ptr> %out } vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/sev_signature_reader.ll000066400000000000000000000036111475147027500311420ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test simple signatures tranform ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVReaderAdaptor -S < %s | FileCheck %s ; CHECK: @global1 = internal global <1 x i32> undef, align 4 ; CHECK: @global2 = internal global <1 x ptr> undef, align 4 ; CHECK: @global3 = external global <1 x ptr> @global1 = internal global i32 undef, align 4 #1 @global2 = internal global ptr undef, align 4 #1 @global3 = external global ptr #1 ; CHECK: define <1 x i32> @f1(<1 x i32> %a, <1 x i32> %b) define "VCSingleElementVector"="0" i32 @f1(i32 "VCSingleElementVector" %a, i32 "VCSingleElementVector" %b) #0 { ; CHECK: call void @llvm.genx.intr(ptr @global1) ; CHECK: call void @llvm.genx.intr(ptr @global2) ; CHECK: call void @llvm.genx.intr(ptr @global3) call void @llvm.genx.intr(ptr @global1) call void @llvm.genx.intr(ptr @global2) call void @llvm.genx.intr(ptr @global3) ; CHECK: store i32 0, ptr @global1, align 4 ; CHECK: store ptr @global1, ptr @global2, align 8 ; CHECK: store ptr @global2, ptr @global3, align 8 store i32 0, ptr @global1, align 4 store ptr @global1, ptr @global2, align 8 store ptr @global2, ptr @global3, align 8 ; CHECK: ret <1 x i32> %a ret i32 %a } ; CHECK: define i32 @f2(<1 x i32> %a, <1 x i32> %b) define i32 @f2(i32 "VCSingleElementVector"="0" %a, i32 "VCSingleElementVector"="0" %b) #0 { ; CHECK: [[EX:[^ ]+]] = extractelement <1 x i32> %a, i64 0 ; CHECK: ret i32 [[EX]] ret i32 %a } ; CHECK: define i32 @f3(i32 %a, <1 x i32> %b) define i32 @f3(i32 %a, i32 "VCSingleElementVector"="0" %b) #0 { ret i32 %a } declare void @llvm.genx.intr(ptr) attributes #0 = { "VCFunction" } attributes #1 = { "VCGlobalVariable" "VCSingleElementVector"="0" } vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/sev_signature_writer.ll000066400000000000000000000037461475147027500312250ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test simple signatures tranform ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVWriterAdaptor -S < %s | FileCheck %s ; CHECK: @global1 = internal global i32 undef, align 4 ; CHECK: @global2 = internal global ptr undef, align 4 ; CHECK: @global3 = external global ptr @global1 = internal global <1 x i32> undef, align 4 #0 @global2 = internal global <1 x ptr> undef, align 4 #0 @global3 = external global <1 x ptr> #0 ; CHECK: define "VCSingleElementVector"="0" i32 @f1(i32 "VCSingleElementVector"="0" %a, i32 "VCSingleElementVector"="0" %b) define <1 x i32> @f1(<1 x i32> %a, <1 x i32> %b) { ; CHECK: call void @llvm.genx.intr(ptr @global1) ; CHECK: call void @llvm.genx.intr(ptr @global2) ; CHECK: call void @llvm.genx.intr(ptr @global3) call void @llvm.genx.intr(ptr @global1) call void @llvm.genx.intr(ptr @global2) call void @llvm.genx.intr(ptr @global3) ; CHECK: store i32 0, ptr @global1, align 4 ; CHECK: store ptr @global1, ptr @global2, align 8 ; CHECK: store ptr @global2, ptr @global3, align 8 store <1 x i32> zeroinitializer, ptr @global1, align 4 %v1 = insertelement <1 x ptr> undef, ptr @global1, i64 0 store <1 x ptr> %v1, ptr @global2, align 8 %v2 = insertelement <1 x ptr> undef, ptr @global2, i64 0 store <1 x ptr> %v2, ptr @global3, align 8 ; CHECK: ret i32 %a ret <1 x i32> %a } ; CHECK: define i32 @f2(i32 "VCSingleElementVector"="0" %a, i32 "VCSingleElementVector"="0" %b) define i32 @f2(<1 x i32> %a, <1 x i32> %b) { ; CHECK ret i32 %a %c = extractelement <1 x i32> %a, i64 0 ret i32 %c } ; CHECK: define i32 @f3(i32 %a, i32 "VCSingleElementVector"="0" %b) define i32 @f3(i32 %a, <1 x i32> %b) { ret i32 %a } declare void @llvm.genx.intr(ptr) ; CHECK: "VCSingleElementVector"="0" attributes #0 = { "VCGlobalVariable" } vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/sev_struct.ll000066400000000000000000000032021475147027500271370ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVWriterAdaptor -S < %s | FileCheck %s target datalayout = "e-p:64:64-i64:64-n8:16:32" target triple = "spir64" ; CHECK: [[STRUCT1:[^ ]+]] = type { i32, <2 x i32>, [[STRUCT2:[^ ]+]] } %struct.sev_test = type { <1 x i32>, <2 x i32>, %struct.sev_test_nested } ; CHECK: [[STRUCT2]] = type { ptr, ptr, ptr } %struct.sev_test_nested = type { ptr, ptr, ptr } ; CHECK: define void @test(i32 "VCSingleElementVector"="0" %sev, ptr %ptr) define void @test(<1 x i32> %sev, ptr %ptr) { ; CHECK: %1 = alloca [[STRUCT1]], align 8 %1 = alloca %struct.sev_test, align 8 ; CHECK: %2 = getelementptr inbounds [[STRUCT1]], ptr %1, i32 0, i32 0 %2 = getelementptr inbounds %struct.sev_test, ptr %1, i32 0, i32 0 ; CHECK: store i32 %sev, ptr %2, align 4 store <1 x i32> %sev, ptr %2, align 4 ; CHECK: %3 = getelementptr inbounds [[STRUCT1]], ptr %1, i32 0, i32 2 %3 = getelementptr inbounds %struct.sev_test, ptr %1, i32 0, i32 2 ; CHECK: %4 = getelementptr inbounds [[STRUCT2]], ptr %3, i32 0, i32 0 %4 = getelementptr inbounds %struct.sev_test_nested, ptr %3, i32 0, i32 0 ; CHECK: store ptr %ptr, ptr %4, align 8 store ptr %ptr, ptr %4, align 8 ; CHECK: %5 = getelementptr inbounds [[STRUCT2]], ptr %3, i32 0, i32 2 %5 = getelementptr inbounds %struct.sev_test_nested, ptr %3, i32 0, i32 2 ; CHECK: store ptr %1, ptr %5, align 8 store ptr %1, ptr %5, align 8 ret void } vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/surface_access_reader.ll000066400000000000000000000055321475147027500312410ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test kernel surface argument translation from new style with opaque ; types that SPIRV translator can understand to old style with ; metadata. This test checks access qualifiers translation. ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVReaderAdaptor -S < %s | FileCheck %s ; CHECK: define dllexport spir_kernel void @test( ; CHECK-SAME: i32 ; CHECK-SAME: [[BUF:%[^,]+]], ; CHECK-SAME: i32 ; CHECK-SAME: [[IM1D:%[^,]+]], ; CHECK-SAME: i32 ; CHECK-SAME: [[IM1DB:%[^,]+]], ; CHECK-SAME: i32 ; CHECK-SAME: [[IM2D:%[^,]+]], ; CHECK-SAME: i32 ; CHECK-SAME: [[IM3D:%[^)]+]]) define dllexport spir_kernel void @test(target("spirv.BufferSurfaceINTEL", 0) %buf, target("spirv.Image", void, 0, 0, 0, 0, 0, 0, 2) %im1d, target("spirv.Image", void, 5, 0, 0, 0, 0, 0, 1) %im1db, target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %im2d, target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0) %im3d) #0 { %buf.conv = call i32 @llvm.genx.address.convert.i32.t_spirv.BufferSurfaceINTEL_0(target("spirv.BufferSurfaceINTEL", 0) %buf) %im1d.conv = call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_0_0_0_0_0_0_2(target("spirv.Image", void, 0, 0, 0, 0, 0, 0, 2) %im1d) %im1db.conv = call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_5_0_0_0_0_0_1(target("spirv.Image", void, 5, 0, 0, 0, 0, 0, 1) %im1db) %im2d.conv = call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_1_0_0_0_0_0_1(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) %im2d) %im3d.conv = call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_2_0_0_0_0_0_0(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0) %im3d) ret void } declare i32 @llvm.genx.address.convert.i32.t_spirv.BufferSurfaceINTEL_0(target("spirv.BufferSurfaceINTEL", 0)) declare i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_0_0_0_0_0_0_2(target("spirv.Image", void, 0, 0, 0, 0, 0, 0, 2)) declare i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_5_0_0_0_0_0_1(target("spirv.Image", void, 5, 0, 0, 0, 0, 0, 1)) declare i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_1_0_0_0_0_0_1(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1)) declare i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_2_0_0_0_0_0_0(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0)) attributes #0 = { "VCFunction" } ; CHECK: !genx.kernels = !{[[KERNEL:![0-9]+]]} ; CHECK: [[KERNEL]] = !{ptr @test, !"test", ![[KINDS:[0-9]+]], i32 0, i32 0, !{{[0-9]+}}, ![[DESCS:[0-9]+]], i32 0} ; CHECK-DAG: ![[KINDS]] = !{i32 2, i32 2, i32 2, i32 2, i32 2} ; CHECK-DAG: ![[DESCS]] = !{!"buffer_t read_only", !"image1d_t read_write", !"image1d_buffer_t write_only", !"image2d_t write_only", !"image3d_t read_only"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/opaque_ptrs/surface_access_writer.ll000066400000000000000000000046311475147027500313120ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test kernel surface argument translation from old style with ; metadata to new style with opaque types that SPIRV translator can ; understand. This test checks access qualifiers translation. ; REQUIRES: opaque-pointers ; RUN: opt -passes=GenXSPIRVWriterAdaptor -S < %s | FileCheck %s ; CHECK: define spir_kernel void @test( ; CHECK-SAME: target("spirv.BufferSurfaceINTEL", 0) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[BUF:%[^,]+]], ; CHECK-SAME: target("spirv.Image", void, 0, 0, 0, 0, 0, 0, 2) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[IM1D:%[^,]+]], ; CHECK-SAME: target("spirv.Image", void, 5, 0, 0, 0, 0, 0, 1) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[IM1DB:%[^,]+]], ; CHECK-SAME: target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK-SAME: [[IM2D:%[^,]+]], ; CHECK-SAME: target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0) ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[IM3D:%[^)]+]]) define spir_kernel void @test(i32 %buf, i32 %im1d, i32 %im1db, i32 %im2d, i32 %im3d) { ; CHECK: call i32 @llvm.genx.address.convert.i32.t_spirv.BufferSurfaceINTEL_0(target("spirv.BufferSurfaceINTEL", 0) [[BUF]]) ; CHECK: call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_0_0_0_0_0_0_2(target("spirv.Image", void, 0, 0, 0, 0, 0, 0, 2) [[IM1D]]) ; CHECK: call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_5_0_0_0_0_0_1(target("spirv.Image", void, 5, 0, 0, 0, 0, 0, 1) [[IM1DB]]) ; CHECK: call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_1_0_0_0_0_0_1(target("spirv.Image", void, 1, 0, 0, 0, 0, 0, 1) [[IM2D]]) ; CHECK: call i32 @llvm.genx.address.convert.i32.t_spirv.Image_isVoid_2_0_0_0_0_0_0(target("spirv.Image", void, 2, 0, 0, 0, 0, 0, 0) [[IM3D]]) ret void } ; CHECK-NOT: !genx.kernels !genx.kernels = !{!0} !0 = !{ptr @test, !"test", !1, i32 0, i32 0, !2, !3, i32 0, i32 0} !1 = !{i32 2, i32 2, i32 2, i32 2, i32 2} !2 = !{i32 0, i32 0, i32 0, i32 0, i32 0} !3 = !{!"buffer_t read_only", !"image1d_t read_write", !"image1d_buffer_t write_only", !"image2d_t write_only", !"image3d_t read_only"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/plain_args_reader.ll000066400000000000000000000033171475147027500260440ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test kernel argument translation from new style with opaque types ; that SPIRV translator can understand to old style with ; metadata. Arguments without annotations are used here (CMRT like). ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVReaderAdaptor -S < %s | FileCheck %s %intel.buffer_rw_t = type opaque %opencl.sampler_t = type opaque define spir_kernel void @test(%intel.buffer_rw_t addrspace(1)* %surf, %opencl.sampler_t addrspace(2)* %samp, i64 %ptr, i32 %gen) #0 { ; CHECK-LABEL: @test( ; CHECK: i32 ; CHECK: [[SURF:%[^,]+]], ; CHECK: i32 ; CHECK: [[SAMP:%[^,]+]], ; CHECK: i64 ; CHECK: [[PTR:%[^,]+]], ; CHECK: i32 ; CHECK: [[GEN:%[^)]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; entry: %0 = call i32 @llvm.genx.address.convert.i32.p1intel.buffer_rw_t(%intel.buffer_rw_t addrspace(1)* %surf) %1 = call i32 @llvm.genx.address.convert.i32.p2opencl.sampler_t(%opencl.sampler_t addrspace(2)* %samp) ret void } declare i32 @llvm.genx.address.convert.i32.p1intel.buffer_rw_t(%intel.buffer_rw_t addrspace(1)*) declare i32 @llvm.genx.address.convert.i32.p2opencl.sampler_t(%opencl.sampler_t addrspace(2)*) attributes #0 = { "VCFunction" } ; CHECK: !genx.kernels = !{[[KERNEL:![0-9]+]]} ; CHECK: [[KERNEL]] = !{void (i32, i32, i64, i32)* @test, !"test", ![[KINDS:[0-9]+]], i32 0, i32 0, !{{[0-9]+}}, ![[DESCS:[0-9]+]], i32 0} ; CHECK-DAG: ![[KINDS]] = !{i32 2, i32 1, i32 0, i32 0} ; CHECK-DAG: ![[DESCS]] = !{!"buffer_t read_write", !"sampler_t", !"", !""} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/plain_args_writer.ll000066400000000000000000000032221475147027500261110ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test kernel arguments translation from old style with metadata to ; new style with opaque types that SPIRV translator can ; understand. Arguments without annotations are used here (CMRT like). ; UNSUPPORTED: opaque-pointers ; XFAIL: llvm13, llvm14 ; RUN: opt %pass%GenXSPIRVWriterAdaptor -S < %s | FileCheck %s ; RUN: opt %pass%GenXSPIRVWriterAdaptor %pass%GenXSPIRVWriterAdaptor -S < %s | FileCheck %s define spir_kernel void @test(i32 %surf, i32 %samp, i64 %ptr, i32 %gen) { ; CHECK-LABEL: @test( ; CHECK: %intel.buffer_rw_t addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[SURF:%[^,]+]], ; CHECK: %opencl.sampler_t addrspace(2)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[SAMP:%[^,]+]], ; CHECK: i64 ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[PTR:%[^,]+]], ; CHECK: i32 ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[GEN:%[^)]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint %intel.buffer_rw_t addrspace(1)* [[SURF]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint %opencl.sampler_t addrspace(2)* [[SAMP]] to i32 ; CHECK-NEXT: ret void ; entry: ret void } ; CHECK-NOT: !genx.kernels !genx.kernels = !{!0} !0 = !{void (i32, i32, i64, i32)* @test, !"test", !1, i32 0, i32 0, !2, !3, i32 0} !1 = !{i32 2, i32 1, i32 0, i32 0} !2 = !{i32 0, i32 0, i32 0, i32 0} !3 = !{!"", !"", !"", !""} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/sev_calling_conv_reader.ll000066400000000000000000000023671475147027500272440ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2022-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test GenXSingleElementVectorUtil preserves calling convention ; (spir_func here) ; RUN: opt %pass%GenXSPIRVReaderAdaptor -S < %s | FileCheck %s ; ModuleID = 'start.ll' source_filename = "start.ll" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024" target triple = "spir64-unknown-unknown" ; Function Attrs: noinline nounwind ; CHECK: define internal spir_func void @bar(<1 x i32> %a) #0 { define internal spir_func void @bar(i32 "VCSingleElementVector"="0" %a) #0 { ret void } ; Function Attrs: noinline nounwind define spir_kernel void @foo() #1 !intel_reqd_sub_group_size !0 { ; CHECK: call spir_func void @bar(<1 x i32> undef) call spir_func void @bar(i32 undef) ret void } attributes #0 = { noinline nounwind "VCFunction" } attributes #1 = { noinline nounwind "VCFunction" "VCNamedBarrierCount"="0" "VCSLMSize"="0" } !0 = !{i32 1} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/sev_calling_conv_writer.ll000066400000000000000000000023461475147027500273130ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2022-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test GenXSingleElementVectorUtil preserves calling convention ; (spir_func here) ; RUN: opt %pass%GenXSPIRVWriterAdaptor -S < %s | FileCheck %s ; ModuleID = 'sev_calling_conv_reader.ll' source_filename = "start.ll" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024" target triple = "spir64-unknown-unknown" ; Function Attrs: noinline nounwind ; CHECK: define internal spir_func void @bar(i32 "VCSingleElementVector"="0" %a) #0 define internal spir_func void @bar(<1 x i32> %a) #0 { ret void } ; Function Attrs: noinline nounwind define dllexport spir_kernel void @foo() #1 !intel_reqd_sub_group_size !0 { ; CHECK: call spir_func void @bar(i32 undef) call spir_func void @bar(<1 x i32> undef) ret void } attributes #0 = { noinline nounwind } attributes #1 = { noinline nounwind "CMGenxMain" "oclrt"="1" } !0 = !{i32 1} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/sev_ptr_reader.ll000066400000000000000000000021561475147027500254070ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2023-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVReaderAdaptor -S < %s | FileCheck %s define internal void @foo(i32** "VCSingleElementVector"="1" %v) #0 { entry: ; CHECK: [[SEV:[^ ]+]] = bitcast <1 x i32*>* %v to i32** ; CHECK: %ld.v = load i32*, i32** [[SEV]], align 8 ; CHECK: %ld.ex = load i32, i32* %ld.v, align 4 %ld.v = load i32*, i32** %v, align 8 %ld.ex = load i32, i32* %ld.v, align 4 ret void } define internal "VCSingleElementVector"="2" i64*** @bar(i64** "VCSingleElementVector"="2" %in, i64*** "VCSingleElementVector"="2" %out) #0 { entry: ; CHECK: [[SEV:[^ ]+]] = bitcast <1 x i64**>* %out to i64*** ; CHECK: [[SEVIN:[^ ]+]] = extractelement <1 x i64**> %in, i64 0 ; CHECK: store i64** [[SEVIN]], i64*** [[SEV]], align 8 store i64** %in, i64*** %out, align 8 ; CHECK: ret <1 x i64**>* %out ret i64*** %out } attributes #0 = { "VCFunction" } vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/sev_ptr_writer.ll000066400000000000000000000015651475147027500254640ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2023-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVWriterAdaptor -S < %s | FileCheck %s define internal void @foo(<1 x i32*>* %v) #0 { entry: ; CHECK: %ld.v = load i32*, i32** %v, align 8 ; CHECK: %ld.ex = load i32, i32* %ld.v, align 4 %ld.v = load <1 x i32*>, <1 x i32*>* %v, align 8 %ex = extractelement <1 x i32*> %ld.v, i32 0 %ld.ex = load i32, i32* %ex, align 4 ret void } define internal <1 x i64**>* @bar(<1 x i64**> %in, <1 x i64**>* %out) #0 { entry: ; CHECK: store i64** %in, i64*** %out, align 8 store <1 x i64**> %in, <1 x i64**>* %out, align 8 ret <1 x i64**>* %out } attributes #0 = { "VCFunction" } vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/sev_signature_reader.ll000066400000000000000000000051051475147027500266000ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test simple signatures tranform ; UNSUPPORTED: opaque-pointers ; LLVM16 error: symbol with local linkage cannot have a DLL storage class ; for test-function (internal dllexport) ; RUN: opt %pass%GenXSPIRVReaderAdaptor -S < %s | FileCheck %s ; CHECK: @global_var_0 = internal global <1 x i32> undef, align 4 @global_var_0 = internal global i32 undef, align 4 #2 ; CHECK: @global_var_1 = internal global <1 x i32**> undef, align 4 @global_var_1 = internal global i32** undef, align 4 #3 ; CHECK: @global_var_2 = external global <1 x i32**> @global_var_2 = external global i32** #3 ; CHECK: @global_var_3 = internal global i32** undef, align 4 @global_var_3 = internal global i32** undef, align 4 ; CHECK: <1 x i32> @some.func.1(<1 x i32> %a, <1 x i32> %b) define internal "VCSingleElementVector" i32 @some.func.1(i32 "VCSingleElementVector" %a, i32 "VCSingleElementVector" %b) local_unnamed_addr #0 { entry: ; CHECK: call void @llvm.genx.some.intr.0(<1 x i32>* @global_var_0) call void @llvm.genx.some.intr.0(i32* @global_var_0) ; CHECK: call void @llvm.genx.some.intr.1(<1 x i32**>* @global_var_1) call void @llvm.genx.some.intr.1(i32*** @global_var_1) ; CHECK: call void @llvm.genx.some.intr.1(<1 x i32**>* @global_var_2) call void @llvm.genx.some.intr.1(i32*** @global_var_2) ret i32 %a } ; CHECK: i32 @some.func.2(<1 x i32> %a, <1 x i32> %b) define internal i32 @some.func.2(i32 "VCSingleElementVector"="0" %a, i32 "VCSingleElementVector"="0" %b) local_unnamed_addr #0 { entry: ret i32 %a } ; CHECK: i32 @some.func.3(i32 %a, <1 x i32> %b) define internal i32 @some.func.3(i32 %a, i32 "VCSingleElementVector"="0" %b) local_unnamed_addr #0 { entry: ret i32 %a } ; CHECK: i32 @some.func.4(<1 x i32***> %a, <1 x i32>*** %b, <1 x i32*>** %c) define internal i32 @some.func.4(i32*** "VCSingleElementVector"="3" %a, i32*** "VCSingleElementVector"="0" %b, i32*** "VCSingleElementVector"="1" %c) local_unnamed_addr #0 { entry: ret i32 0 } define dllexport spir_kernel void @test() #1 { entry: ret void } declare void @llvm.genx.some.intr.0(i32* "VCSingleElementVector") declare void @llvm.genx.some.intr.1(i32*** "VCSingleElementVector"="2") attributes #0 = { "VCFunction" } attributes #1 = { "VCFunction" "VCSLMSize"="0" } attributes #2 = { "VCGlobalVariable" "VCSingleElementVector"="0" } attributes #3 = { "VCGlobalVariable" "VCSingleElementVector"="2" } vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/sev_signature_writer.ll000066400000000000000000000046211475147027500266540ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test simple signatures tranform ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVWriterAdaptor -S < %s | FileCheck %s ; CHECK: @global_var_0 = internal global i32 undef, align 4 @global_var_0 = internal global <1 x i32> undef, align 4 #0 ; CHECK: @global_var_1 = internal global i32** undef, align 4 @global_var_1 = internal global <1 x i32**> undef, align 4 #0 ; CHECK: @global_var_2 = external global i32** @global_var_2 = external global <1 x i32**> #0 ; CHECK: "VCSingleElementVector"="0" i32 @some.func.1(i32 "VCSingleElementVector"="0" %a, i32 "VCSingleElementVector"="0" %b) define dso_local <1 x i32> @some.func.1(<1 x i32> %a, <1 x i32> %b) local_unnamed_addr { entry: ; CHECK: call void @llvm.genx.some.intr.0(i32* @global_var_0) call void @llvm.genx.some.intr.0(<1 x i32>* @global_var_0) ; CHECK: call void @llvm.genx.some.intr.1(i32*** @global_var_1) call void @llvm.genx.some.intr.1(<1 x i32**>* @global_var_1) ; CHECK: call void @llvm.genx.some.intr.1(i32*** @global_var_2) call void @llvm.genx.some.intr.1(<1 x i32**>* @global_var_2) ret <1 x i32> %a } ; CHECK: i32 @some.func.2(i32 "VCSingleElementVector"="0" %a, i32 "VCSingleElementVector"="0" %b) define dso_local i32 @some.func.2(<1 x i32> %a, <1 x i32> %b) local_unnamed_addr { entry: ; CHECK-NOT: extractelement %c = extractelement <1 x i32> %a, i32 0 ret i32 %c } ; CHECK: i32 @some.func.3(i32 %a, i32 "VCSingleElementVector"="0" %b) define dso_local i32 @some.func.3(i32 %a, <1 x i32> %b) local_unnamed_addr { entry: ret i32 %a } ; CHECK: i32 @some.func.4(i32*** "VCSingleElementVector"="3" %a, i32*** "VCSingleElementVector"="0" %b, i32*** "VCSingleElementVector"="1" %c) define dso_local i32 @some.func.4(<1 x i32***> %a, <1 x i32>*** %b, <1 x i32*>** %c) local_unnamed_addr { entry: ret i32 0 } define spir_kernel void @test() { entry: ret void } declare void @llvm.genx.some.intr.0(<1 x i32>*) declare void @llvm.genx.some.intr.1(<1 x i32**>*) ; CHECK: "VCSingleElementVector"="0" ; CHECK: "VCSingleElementVector"="2" attributes #0 = { "VCGlobalVariable" } !genx.kernels = !{!0} !0 = !{void ()* @test, !"test", !1, i32 0, i32 0, !2, !3, i32 0} !1 = !{} !2 = !{} !3 = !{} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/sev_struct.ll000066400000000000000000000036351475147027500246070ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2023-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVWriterAdaptor -S < %s | FileCheck %s target datalayout = "e-p:64:64-i64:64-n8:16:32" target triple = "spir64" ; CHECK: [[STRUCT1:[^ ]+]] = type { i32, <2 x i32>, [[STRUCT2:[^ ]+]] } %struct.sev_test = type { <1 x i32>, <2 x i32>, %struct.sev_test_nested } ; CHECK: [[STRUCT2]] = type { i32*, <2 x i32>*, [[STRUCT1]]* } %struct.sev_test_nested = type { <1 x i32>*, <2 x i32>*, %struct.sev_test* } ; CHECK: define void @test(i32 "VCSingleElementVector"="0" %sev, i32* "VCSingleElementVector"="0" %sev_ptr) define void @test(<1 x i32> %sev, <1 x i32>* %sev_ptr) { ; CHECK: %1 = alloca [[STRUCT1]], align 8 %1 = alloca %struct.sev_test, align 8 ; CHECK: %2 = getelementptr inbounds [[STRUCT1]], [[STRUCT1]]* %1, i32 0, i32 0 %2 = getelementptr inbounds %struct.sev_test, %struct.sev_test* %1, i32 0, i32 0 ; CHECK: store i32 %sev, i32* %2, align 4 store <1 x i32> %sev, <1 x i32>* %2, align 4 ; CHECK: %3 = getelementptr inbounds [[STRUCT1]], [[STRUCT1]]* %1, i32 0, i32 2 %3 = getelementptr inbounds %struct.sev_test, %struct.sev_test* %1, i32 0, i32 2 ; CHECK: %4 = getelementptr inbounds [[STRUCT2]], [[STRUCT2]]* %3, i32 0, i32 0 %4 = getelementptr inbounds %struct.sev_test_nested, %struct.sev_test_nested* %3, i32 0, i32 0 ; CHECK: store i32* %sev_ptr, i32** %4, align 8 store <1 x i32>* %sev_ptr, <1 x i32>** %4, align 8 ; CHECK: %5 = getelementptr inbounds [[STRUCT2]], [[STRUCT2]]* %3, i32 0, i32 2 %5 = getelementptr inbounds %struct.sev_test_nested, %struct.sev_test_nested* %3, i32 0, i32 2 ; CHECK: store [[STRUCT1]]* %1, [[STRUCT1]]** %5, align 8 store %struct.sev_test* %1, %struct.sev_test** %5, align 8 ret void } vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/spirv_friendly_types_reader.ll000066400000000000000000000045631475147027500302140ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2021-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test reader translation of SPIRV friendly IR types ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVReaderAdaptor -S < %s | FileCheck %s %spirv.Sampler = type opaque %spirv.Image._void_0_0_1_0_0_0_0 = type opaque %spirv.Image._void_1_0_1_0_0_0_1 = type opaque %spirv.Image._void_2_0_0_0_0_0_2 = type opaque define spir_kernel void @test(%spirv.Sampler addrspace(2)* %smp, %spirv.Image._void_0_0_1_0_0_0_0 addrspace(1)* %im1d, %spirv.Image._void_1_0_1_0_0_0_1 addrspace(1)* %im2d, %spirv.Image._void_2_0_0_0_0_0_2 addrspace(1)* %im3d) #0 { ; CHECK-LABEL: @test( ; CHECK: i32 ; CHECK: [[SMP:%[^,]+]], ; CHECK: i32 ; CHECK: [[IM1D:%[^,]+]], ; CHECK: i32 ; CHECK: [[IM2D:%[^,]+]], ; CHECK: i32 ; CHECK: [[IM3D:%[^,]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; entry: %0 = call i32 @llvm.genx.address.convert.i32.p2spirv.Sampler(%spirv.Sampler addrspace(2)* %smp) %1 = call i32 @llvm.genx.address.convert.i32.p1spirv.Image._void_0_0_1_0_0_0_0(%spirv.Image._void_0_0_1_0_0_0_0 addrspace(1)* %im1d) %2 = call i32 @llvm.genx.address.convert.i32.p1spirv.Image._void_1_0_1_0_0_0_1(%spirv.Image._void_1_0_1_0_0_0_1 addrspace(1)* %im2d) %3 = call i32 @llvm.genx.address.convert.i32.p1spirv.Image._void_2_0_0_0_0_0_2(%spirv.Image._void_2_0_0_0_0_0_2 addrspace(1)* %im3d) ret void } declare i32 @llvm.genx.address.convert.i32.p2spirv.Sampler(%spirv.Sampler addrspace(2)*) declare i32 @llvm.genx.address.convert.i32.p1spirv.Image._void_0_0_1_0_0_0_0(%spirv.Image._void_0_0_1_0_0_0_0 addrspace(1)*) declare i32 @llvm.genx.address.convert.i32.p1spirv.Image._void_1_0_1_0_0_0_1(%spirv.Image._void_1_0_1_0_0_0_1 addrspace(1)*) declare i32 @llvm.genx.address.convert.i32.p1spirv.Image._void_2_0_0_0_0_0_2(%spirv.Image._void_2_0_0_0_0_0_2 addrspace(1)*) attributes #0 = { "VCFunction" } ; CHECK: !genx.kernels = !{[[KERNEL:![0-9]+]]} ; CHECK: [[KERNEL]] = !{void (i32, i32, i32, i32)* @test, !"test", [[KINDS:![0-9]+]], i32 0, i32 0, !{{[0-9]+}}, [[DESCS:![0-9]+]], i32 0} ; CHECK-DAG: [[KINDS]] = !{i32 1, i32 2, i32 2, i32 2} ; CHECK-DAG: [[DESCS]] = !{!"sampler_t", !"image1d_array_t read_only", !"image2d_array_t write_only", !"image3d_t read_write"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/surface_access_reader.ll000066400000000000000000000053401475147027500266740ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test kernel surface argument translation from new style with opaque ; types that SPIRV translator can understand to old style with ; metadata. This test checks access qualifiers translation. ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVReaderAdaptor -S < %s | FileCheck %s %intel.buffer_ro_t = type opaque %opencl.image1d_rw_t = type opaque %opencl.image1d_buffer_wo_t = type opaque %opencl.image2d_wo_t = type opaque %opencl.image3d_ro_t = type opaque define spir_kernel void @test(%intel.buffer_ro_t addrspace(1)* %buf, %opencl.image1d_rw_t addrspace(1)* %im1d, %opencl.image1d_buffer_wo_t addrspace(1)* %im1db, %opencl.image2d_wo_t addrspace(1)* %im2d, %opencl.image3d_ro_t addrspace(1)* %im3d) #0 { ; CHECK-LABEL: @test( ; CHECK: i32 ; CHECK: [[BUF:%[^,]+]], ; CHECK: i32 ; CHECK: [[IM1D:%[^,]+]], ; CHECK: i32 ; CHECK: [[IM1DB:%[^,]+]], ; CHECK: i32 ; CHECK: [[IM2D:%[^,]+]], ; CHECK: i32 ; CHECK: [[IM3D:%[^)]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; entry: %0 = call i32 @llvm.genx.address.convert.i32.p1intel.buffer_ro_t(%intel.buffer_ro_t addrspace(1)* %buf) %1 = call i32 @llvm.genx.address.convert.i32.p1opencl.image1d_rw_t(%opencl.image1d_rw_t addrspace(1)* %im1d) %2 = call i32 @llvm.genx.address.convert.i32.p1opencl.image1d_buffer_wo_t(%opencl.image1d_buffer_wo_t addrspace(1)* %im1db) %3 = call i32 @llvm.genx.address.convert.i32.p1opencl.image2d_wo_t(%opencl.image2d_wo_t addrspace(1)* %im2d) %4 = call i32 @llvm.genx.address.convert.i32.p1opencl.image3d_ro_t(%opencl.image3d_ro_t addrspace(1)* %im3d) ret void } declare i32 @llvm.genx.address.convert.i32.p1intel.buffer_ro_t(%intel.buffer_ro_t addrspace(1)*) declare i32 @llvm.genx.address.convert.i32.p1opencl.image1d_rw_t(%opencl.image1d_rw_t addrspace(1)*) declare i32 @llvm.genx.address.convert.i32.p1opencl.image1d_buffer_wo_t(%opencl.image1d_buffer_wo_t addrspace(1)*) declare i32 @llvm.genx.address.convert.i32.p1opencl.image2d_wo_t(%opencl.image2d_wo_t addrspace(1)*) declare i32 @llvm.genx.address.convert.i32.p1opencl.image3d_ro_t(%opencl.image3d_ro_t addrspace(1)*) attributes #0 = { "VCFunction" } ; CHECK: !genx.kernels = !{[[KERNEL:![0-9]+]]} ; CHECK: [[KERNEL]] = !{void (i32, i32, i32, i32, i32)* @test, !"test", ![[KINDS:[0-9]+]], i32 0, i32 0, !{{[0-9]+}}, ![[DESCS:[0-9]+]], i32 0} ; CHECK-DAG: ![[KINDS]] = !{i32 2, i32 2, i32 2, i32 2, i32 2} ; CHECK-DAG: ![[DESCS]] = !{!"buffer_t read_only", !"image1d_t read_write", !"image1d_buffer_t write_only", !"image2d_t write_only", !"image3d_t read_only"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/surface_access_writer.ll000066400000000000000000000042201475147027500267420ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test kernel surface argument translation from old style with ; metadata to new style with opaque types that SPIRV translator can ; understand. This test checks access qualifiers translation. ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%GenXSPIRVWriterAdaptor -S < %s | FileCheck %s define void @test(i32 %buf, i32 %im1d, i32 %im1db, i32 %im2d, i32 %im3d) { ; CHECK-LABEL: @test( ; CHECK: %intel.buffer_ro_t addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[BUF:%[^,]+]], ; CHECK: %opencl.image1d_rw_t addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[IM1D:%[^,]+]], ; CHECK: %opencl.image1d_buffer_wo_t addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[IM1DB:%[^,]+]], ; CHECK: %opencl.image2d_wo_t addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[IM2D:%[^,]+]], ; CHECK: %opencl.image3d_ro_t addrspace(1)* ; CHECK-NOT: "VCArgumentDesc" ; CHECK-NOT: "VCArgumentKind" ; CHECK: [[IM3D:%[^)]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint %intel.buffer_ro_t addrspace(1)* [[BUF]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint %opencl.image1d_rw_t addrspace(1)* [[IM1D]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint %opencl.image1d_buffer_wo_t addrspace(1)* [[IM1DB]] to i32 ; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint %opencl.image2d_wo_t addrspace(1)* [[IM2D]] to i32 ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint %opencl.image3d_ro_t addrspace(1)* [[IM3D]] to i32 ; CHECK-NEXT: ret void ; entry: ret void } ; CHECK-NOT: !genx.kernels !genx.kernels = !{!0} !0 = !{void (i32, i32, i32, i32, i32)* @test, !"test", !1, i32 0, i32 0, !2, !3, i32 0, i32 0} !1 = !{i32 2, i32 2, i32 2, i32 2, i32 2} !2 = !{i32 0, i32 0, i32 0, i32 0, i32 0} !3 = !{!"buffer_t read_only", !"image1d_t read_write", !"image1d_buffer_t write_only", !"image2d_t write_only", !"image3d_t read_only"} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/unknown_arg_reader.ll000066400000000000000000000015211475147027500262500ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test reader translation of implicit argument with argument kind ; decoration. ; RUN: opt %pass%GenXSPIRVReaderAdaptor -S < %s | FileCheck %s define spir_kernel void @test(<3 x i32> "VCArgumentKind"="24" %__arg_llvm.genx.local.id) #0 { ; CHECK-LABEL: @test ; CHECK-SAME: (<3 x i32> [[LOCAL_ID:%[^)]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; entry: ret void } attributes #0 = { "VCFunction" } ; CHECK: !genx.kernels = !{[[KERNEL:![0-9]+]]} ; CHECK: [[KERNEL]] = !{{{.*}} @test, !"test", ![[KINDS:[0-9]+]], i32 0, i32 0, !{{[0-9]+}}, !{{[0-9]+}}, i32 0} ; CHECK: ![[KINDS]] = !{i32 24} vc-intrinsics-0.22.1/GenXIntrinsics/test/Adaptors/unknown_arg_writer.ll000066400000000000000000000014721475147027500263270ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2021 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; Test writer translation of implicit argument. Implicit arguments ; should not appear in current form after transition from cmc. ; RUN: opt %pass%GenXSPIRVWriterAdaptor -S < %s | FileCheck %s define void @test(<3 x i32> %__arg_llvm.genx.local.id) { ; CHECK-LABEL: @test( ; CHECK: <3 x i32> ; CHECK: "VCArgumentKind"="24" ; CHECK: [[LOCAL_ID:%[^)]+]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; entry: ret void } ; CHECK-NOT: !genx.kernels !genx.kernels = !{!0} !0 = !{void (<3 x i32>)* @test, !"test", !1, i32 0, i32 0, !2, !2, i32 0, i32 0} !1 = !{i32 24} !2 = !{} vc-intrinsics-0.22.1/GenXIntrinsics/test/CMakeLists.txt000066400000000000000000000035251475147027500230360ustar00rootroot00000000000000#=========================== begin_copyright_notice ============================ # # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # #============================ end_copyright_notice ============================= if(BUILD_EXTERNAL) if(NOT EXISTS ${LLVM_EXTERNAL_LIT}) message(FATAL_ERROR "External build requires LLVM_EXTERNAL_LIT to be defined to lit executable") endif() endif() # Add plugin with all intrinsics libraries for loading with opt. add_subdirectory(Plugin) set(VC_INTRINSICS_TEST_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) # Generate temporary site config with LLVM variables filled. configure_lit_site_cfg( ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in ${CMAKE_CURRENT_BINARY_DIR}/temp.cfg.py MAIN_CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py ) # Need to regenerate again since plugin name is required and proper # way to get it is to use generator expressions that are not allowed # in configure_file. file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/vcint.$.lit.site.cfg.py" INPUT "${CMAKE_CURRENT_BINARY_DIR}/temp.cfg.py" ) set(USED_TOOLS # These are required by lit default substitutions. FileCheck count not # Main tool for plugin testing. opt ) if(NOT BUILD_EXTERNAL) set(TEST_DEPS ${USED_TOOLS} ) else() # Check for tools availability. foreach(tool ${USED_TOOLS}) set(TOOL_PATH "${LLVM_TOOLS_BINARY_DIR}/${tool}") if(NOT EXISTS ${TOOL_PATH}) message(FATAL_ERROR "Tool ${tool} is not found (required by lit tests)") endif() endforeach() endif() # Add testsuite with custom config name that depends on generator. add_lit_testsuite(check-vc-intrinsics "Running the vc-intrinsics regression tests" ${CMAKE_CURRENT_BINARY_DIR} ARGS "--config-prefix=vcint.$.lit" "-sv" DEPENDS ${TEST_DEPS} VCIntrinsicsPlugin ) vc-intrinsics-0.22.1/GenXIntrinsics/test/Plugin/000077500000000000000000000000001475147027500215275ustar00rootroot00000000000000vc-intrinsics-0.22.1/GenXIntrinsics/test/Plugin/CMakeLists.txt000066400000000000000000000006761475147027500243000ustar00rootroot00000000000000#=========================== begin_copyright_notice ============================ # # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # #============================ end_copyright_notice ============================= set(PLUGIN_SOURCES Plugin.cpp ) add_library(VCIntrinsicsPlugin MODULE ${PLUGIN_SOURCES} ) set(LINK_LIBS LLVMGenXIntrinsics ) target_link_libraries(VCIntrinsicsPlugin ${LINK_LIBS} ) vc-intrinsics-0.22.1/GenXIntrinsics/test/Plugin/Plugin.cpp000066400000000000000000000045621475147027500235000ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2020-2022 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ #include "llvm/GenXIntrinsics/GenXSimdCFLowering.h" #include "llvm/GenXIntrinsics/GenXSPIRVReaderAdaptor.h" #include "llvm/GenXIntrinsics/GenXSPIRVWriterAdaptor.h" #include "llvm/PassRegistry.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Passes/PassPlugin.h" using namespace llvm; //----------------------------------------------------------------------------- // New PM support //----------------------------------------------------------------------------- // Add callback to create plugin pass to pass builder. // PassArgs - arguments for pass construction, passed by value to avoid // dangling references in callbacks. template static void registerModulePass(PassBuilder &PB, ArgsT... PassArgs) { auto Reg = [PassArgs...](StringRef Name, ModulePassManager &MPM, ArrayRef) { if (Name != PassT::getArgString()) return false; MPM.addPass(PassT{PassArgs...}); return true; }; PB.registerPipelineParsingCallback(Reg); } static void registerPasses(PassBuilder &PB) { registerModulePass(PB); registerModulePass( PB, /*RewriteTypes=*/true, /*RewriteSingleElementVectors=*/true); registerModulePass(PB); } static PassPluginLibraryInfo getIntrinsicsPluginInfo() { return {LLVM_PLUGIN_API_VERSION, "VC intrinsics plugin", "v1", registerPasses}; } // Entry point for plugin in new PM infrastructure. extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK llvmGetPassPluginInfo() { return getIntrinsicsPluginInfo(); } //----------------------------------------------------------------------------- // Legacy PM support //----------------------------------------------------------------------------- // Register intrinsics passes on dynamic loading of plugin library. static int initializePasses() { PassRegistry &PR = *PassRegistry::getPassRegistry(); initializeCMSimdCFLoweringLegacyPass(PR); initializeGenXSPIRVReaderAdaptorLegacyPass(PR); initializeGenXSPIRVWriterAdaptorLegacyPass(PR); return 0; } static const int Init = initializePasses(); vc-intrinsics-0.22.1/GenXIntrinsics/test/SimdCFLowering/000077500000000000000000000000001475147027500231055ustar00rootroot00000000000000vc-intrinsics-0.22.1/GenXIntrinsics/test/SimdCFLowering/bitcast_between_wrrs.ll000066400000000000000000000040101475147027500276500ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%cmsimdcflowering -S < %s | FileCheck %s @Rcp_T2 = internal global <64 x double> undef ; CHECK: @EM = internal global <32 x i1> define dso_local dllexport void @test1(<32 x i16> %mask, <64 x i32> %oldval) { entry: %Rcp_T = alloca <64 x double>, align 512 %0 = icmp ne <32 x i16> %mask, zeroinitializer %call = call i1 @llvm.genx.simdcf.any.v32i1(<32 x i1> %0) br i1 %call, label %if.then, label %if.end if.then: ; CHECK: if.then: ; CHECK-NEXT: [[EM_LOAD:%.*]] = load <32 x i1>, <32 x i1>* @EM ; CHECK-NEXT: [[PRED_WRR:%.*]] = call <64 x i32> @llvm.genx.wrregioni.v64i32.v32i32.i16.v32i1(<64 x i32> %oldval, <32 x i32> zeroinitializer, i32 0, i32 32, i32 2, i16 0, i32 undef, <32 x i1> [[EM_LOAD]]) ; CHECK-NEXT: [[PRED_WRR_CAST:%.*]] = bitcast <64 x i32> [[PRED_WRR]] to <32 x double> %wrregion26 = call <64 x i32> @llvm.genx.wrregioni.v64i32.v32i32.i16.i1(<64 x i32> %oldval, <32 x i32> zeroinitializer, i32 0, i32 32, i32 2, i16 0, i32 undef, i1 true) %cast27 = bitcast <64 x i32> %wrregion26 to <32 x double> %Rcp_T2_load = load <64 x double>, <64 x double>* %Rcp_T %wrregion28 = call <64 x double> @llvm.genx.wrregionf.v64f64.v32f64.i16.i1(<64 x double> %Rcp_T2_load, <32 x double> %cast27, i32 0, i32 32, i32 1, i16 0, i32 32, i1 true) store <64 x double> %wrregion28, <64 x double>* %Rcp_T br label %if.end if.end: %1 = load <64 x double>, <64 x double>* %Rcp_T store <64 x double> %1, <64 x double>* @Rcp_T2 ret void } declare i1 @llvm.genx.simdcf.any.v32i1(<32 x i1>) declare <64 x i32> @llvm.genx.wrregioni.v64i32.v32i32.i16.i1(<64 x i32>, <32 x i32>, i32, i32, i32, i16, i32, i1) declare <64 x double> @llvm.genx.wrregionf.v64f64.v32f64.i16.i1(<64 x double>, <32 x double>, i32, i32, i32, i16, i32, i1) vc-intrinsics-0.22.1/GenXIntrinsics/test/SimdCFLowering/opaque_ptrs/000077500000000000000000000000001475147027500254475ustar00rootroot00000000000000vc-intrinsics-0.22.1/GenXIntrinsics/test/SimdCFLowering/opaque_ptrs/bitcast_between_wrrs.ll000066400000000000000000000036731475147027500322300ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; REQUIRES: opaque-pointers ; RUN: opt -passes=cmsimdcflowering -S < %s | FileCheck %s @Rcp_T2 = internal global <64 x double> undef ; CHECK: @EM = internal global <32 x i1> define dso_local dllexport void @test1(<32 x i16> %mask, <64 x i32> %oldval) { %Rcp_T = alloca <64 x double>, align 512 %cmp = icmp ne <32 x i16> %mask, zeroinitializer %call = call i1 @llvm.genx.simdcf.any.v32i1(<32 x i1> %cmp) br i1 %call, label %if.then, label %if.end ; CHECK-LABEL: if.then: if.then: ; CHECK: [[EM_LOAD:%.*]] = load <32 x i1>, ptr @EM ; CHECK-NEXT: [[PRED_WRR:%.*]] = call <64 x i32> @llvm.genx.wrregioni.v64i32.v32i32.i16.v32i1(<64 x i32> %oldval, <32 x i32> zeroinitializer, i32 0, i32 32, i32 2, i16 0, i32 undef, <32 x i1> [[EM_LOAD]]) ; CHECK-NEXT: [[PRED_WRR_CAST:%.*]] = bitcast <64 x i32> [[PRED_WRR]] to <32 x double> %wrregion26 = call <64 x i32> @llvm.genx.wrregioni.v64i32.v32i32.i16.i1(<64 x i32> %oldval, <32 x i32> zeroinitializer, i32 0, i32 32, i32 2, i16 0, i32 undef, i1 true) %cast27 = bitcast <64 x i32> %wrregion26 to <32 x double> %Rcp_T2_load = load <64 x double>, ptr %Rcp_T %wrregion28 = call <64 x double> @llvm.genx.wrregionf.v64f64.v32f64.i16.i1(<64 x double> %Rcp_T2_load, <32 x double> %cast27, i32 0, i32 32, i32 1, i16 0, i32 32, i1 true) store <64 x double> %wrregion28, ptr %Rcp_T br label %if.end if.end: %ld = load <64 x double>, ptr %Rcp_T store <64 x double> %ld, ptr @Rcp_T2 ret void } declare i1 @llvm.genx.simdcf.any.v32i1(<32 x i1>) declare <64 x i32> @llvm.genx.wrregioni.v64i32.v32i32.i16.i1(<64 x i32>, <32 x i32>, i32, i32, i32, i16, i32, i1) declare <64 x double> @llvm.genx.wrregionf.v64f64.v32f64.i16.i1(<64 x double>, <32 x double>, i32, i32, i32, i16, i32, i1) vc-intrinsics-0.22.1/GenXIntrinsics/test/SimdCFLowering/opaque_ptrs/predicate_masked_gather.ll000066400000000000000000000033111475147027500326140ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; REQUIRES: opaque-pointers ; RUN: opt -passes=cmsimdcflowering -S < %s | FileCheck %s ; CHECK: @EM = internal global <32 x i1> @g2 = internal global <32 x i32> undef define dso_local dllexport void @test_gather(<32 x i16> %mask, <32 x i32> %addrs) { %g = alloca <32 x i32>, align 512 %cmp = icmp ne <32 x i16> %mask, zeroinitializer %call = call i1 @llvm.genx.simdcf.any.v32i1(<32 x i1> %cmp) br i1 %call, label %if.then, label %if.end ; CHECK-LABEL: if.then: if.then: ; CHECK: [[EM_LOAD1:%.*]] = load <32 x i1>, ptr @EM ; CHECK-NEXT: [[CALL1:%.*]] = call <32 x i32> @llvm.genx.gather.masked.scaled2.v32i32.v32i32.v32i1(i32 2, i16 0, i32 254, i32 0, <32 x i32> %addrs, <32 x i1> [[EM_LOAD1]]) %call1 = call <32 x i32> @llvm.genx.gather.masked.scaled2.v32i32.v32i32.v32i1(i32 2, i16 0, i32 254, i32 0, <32 x i32> %addrs, <32 x i1> ) ; CHECK: [[EM_LOAD2:%.*]] = load <32 x i1>, ptr @EM ; CHECK-NEXT: [[CALL1_SIMDCFPREDL:%.*]] = select <32 x i1> [[EM_LOAD2:%.*]], <32 x i32> [[CALL1:%.*]] store <32 x i32> %call1, ptr %g br label %if.end if.end: %ld = load <32 x i32>, ptr %g store <32 x i32> %ld, ptr @g2 ret void } declare <32 x i32> @llvm.genx.gather.masked.scaled2.v32i32.v32i32.v32i1(i32, i16, i32, i32, <32 x i32>, <32 x i1>) declare i1 @llvm.genx.simdcf.any.v32i1(<32 x i1>) vc-intrinsics-0.22.1/GenXIntrinsics/test/SimdCFLowering/opaque_ptrs/replicate_mask.ll000066400000000000000000000074021475147027500307660ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; REQUIRES: opaque-pointers ; RUN: opt -passes=cmsimdcflowering -S < %s | FileCheck %s @g1 = internal global <64 x i32> undef ; CHECK: @EM = internal global <32 x i1> ; CHECK-LABEL: @test define dso_local dllexport void @test(<32 x i16> %mask) { %g = alloca <64 x i32>, align 512 %cmp = icmp ne <32 x i16> %mask, zeroinitializer %call = call i1 @llvm.genx.simdcf.any.v32i1(<32 x i1> %cmp) br i1 %call, label %if.then, label %if.end ; CHECK-LABEL: if.then: if.then: ; CHECK: [[CALL1:%.*]] = call <64 x i32> @_Z24func_replicate_mask_attrv() %call1 = call <64 x i32> @_Z24func_replicate_mask_attrv() ; CHECK: [[EM_LOAD:%.*]] = load <32 x i1>, ptr @EM ; CHECK-NEXT: [[CHENNELEM:%.*]] = shufflevector <32 x i1> [[EM_LOAD]], <32 x i1> undef, <64 x i32> ; CHECK-NEXT: [[CALL1_SIMDCFPREDL:%.*]] = select <64 x i1> [[CHENNELEM]], <64 x i32> [[CALL1]] store <64 x i32> %call1, ptr %g br label %if.end if.end: %ld = load <64 x i32>, ptr %g store <64 x i32> %ld, ptr @g1 ret void } ; CHECK-LABEL: @test_gather4 define dso_local dllexport void @test_gather4(<32 x i16> %mask, <32 x i32> %addrs) { %g = alloca <64 x i32>, align 512 %cmp = icmp ne <32 x i16> %mask, zeroinitializer %call = call i1 @llvm.genx.simdcf.any.v32i1(<32 x i1> %cmp) br i1 %call, label %if.then, label %if.end ; CHECK-LABEL: if.then: if.then: ; CHECK: [[EM_LOAD1:%.*]] = load <32 x i1>, ptr @EM ; CHECK-NEXT: [[CALL1:%.*]] = call <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32 12, i16 0, i32 254, i32 0, <32 x i32> %addrs, <32 x i1> [[EM_LOAD1]]) %call1 = call <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32 12, i16 0, i32 254, i32 0, <32 x i32> %addrs, <32 x i1> ) ; CHECK: [[EM_LOAD2:%.*]] = load <32 x i1>, ptr @EM ; CHECK-NEXT: [[CHENNELEM:%.*]] = shufflevector <32 x i1> [[EM_LOAD2]], <32 x i1> undef, <64 x i32> ; CHECK-NEXT: [[CALL1_SIMDCFPREDL:%.*]] = select <64 x i1> [[CHENNELEM]], <64 x i32> [[CALL1]] store <64 x i32> %call1, ptr %g br label %if.end if.end: %ld = load <64 x i32>, ptr %g store <64 x i32> %ld, ptr @g1 ret void } define internal <64 x i32> @_Z24func_replicate_mask_attrv() #0 { ret <64 x i32> zeroinitializer } declare <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32, i16, i32, i32, <32 x i32>, <32 x i1>) declare i1 @llvm.genx.simdcf.any.v32i1(<32 x i1>) attributes #0 = { noinline nounwind "CMGenxReplicateMask"="2"} vc-intrinsics-0.22.1/GenXIntrinsics/test/SimdCFLowering/opaque_ptrs/replicate_mask_masked_gather4.ll000066400000000000000000000044461475147027500337350ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; REQUIRES: opaque-pointers ; RUN: opt -passes=cmsimdcflowering -S < %s | FileCheck %s @Rcp_T2 = internal global <64 x i32> undef ; CHECK: @EM = internal global <32 x i1> define dso_local dllexport void @test(<32 x i16> %mask, <32 x i32> %addrs) { %Rcp_T = alloca <64 x i32>, align 512 %cmp = icmp ne <32 x i16> %mask, zeroinitializer %call = call i1 @llvm.genx.simdcf.any.v32i1(<32 x i1> %cmp) br i1 %call, label %if.then, label %if.end ; CHECK-LABEL: if.then: if.then: ; CHECK: [[EM_LOAD1:%.*]] = load <32 x i1>, ptr @EM ; CHECK-NEXT: [[CALL1:%.*]] = call <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32 12, i16 0, i32 254, i32 0, <32 x i32> %addrs, <32 x i1> [[EM_LOAD1]]) %call1 = call <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32 12, i16 0, i32 254, i32 0, <32 x i32> %addrs, <32 x i1> ) ; CHECK: [[EM_LOAD2:%.*]] = load <32 x i1>, ptr @EM ; CHECK-NEXT: [[CHENNELEM:%.*]] = shufflevector <32 x i1> [[EM_LOAD2]], <32 x i1> undef, <64 x i32> ; CHECK-NEXT: [[CALL1_SIMDCFPREDL:%.*]] = select <64 x i1> [[CHENNELEM]], <64 x i32> [[CALL1]] store <64 x i32> %call1, ptr %Rcp_T br label %if.end if.end: %ld = load <64 x i32>, ptr %Rcp_T store <64 x i32> %ld, ptr @Rcp_T2 ret void } declare <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32, i16, i32, i32, <32 x i32>, <32 x i1>) declare i1 @llvm.genx.simdcf.any.v32i1(<32 x i1>) vc-intrinsics-0.22.1/GenXIntrinsics/test/SimdCFLowering/opaque_ptrs/update_mask_masked_gather4.ll000066400000000000000000000066701475147027500332500ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; REQUIRES: opaque-pointers ; RUN: opt -passes=cmsimdcflowering -S < %s | FileCheck %s @g1 = internal global <64 x i32> undef ; CHECK: @EM = internal global <32 x i1> define dso_local dllexport void @test(<32 x i16> %cond1, <32 x i16> %cond2, <32 x i32> %addrs, <32 x i1> %pred) { %g = alloca <64 x i32>, align 512 %cmp1 = icmp ne <32 x i16> %cond1, zeroinitializer %call = call i1 @llvm.genx.simdcf.any.v32i1(<32 x i1> %cmp1) br i1 %call, label %if.then, label %if.end ; CHECK-LABEL: if.then: if.then: ; CHECK: [[EM_LOAD1:%.*]] = load <32 x i1>, ptr @EM ; CHECK-NEXT: [[EM_UPDATE1:%.*]] = and <32 x i1> %pred, [[EM_LOAD1]] ; CHECK-NEXT: [[CALL1:%.*]] = call <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32 12, i16 0, i32 254, i32 0, <32 x i32> %addrs, <32 x i1> [[EM_UPDATE1]]) %call1 = call <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32 12, i16 0, i32 254, i32 0, <32 x i32> %addrs, <32 x i1> %pred) ; CHECK: [[EM_LOAD2:%.*]] = load <32 x i1>, ptr @EM ; CHECK-NEXT: [[EM_UPDATE2:%.*]] = and <32 x i1> %pred, [[EM_LOAD2]] ; CHECK-NEXT: [[CHENNELEM:%.*]] = shufflevector <32 x i1> [[EM_UPDATE2]], <32 x i1> undef, <64 x i32> ; CHECK-NEXT: [[CALL1_SIMDCFPRED1:%.*]] = select <64 x i1> [[CHENNELEM]], <64 x i32> [[CALL1]] store <64 x i32> %call1, ptr %g %cmp2 = icmp ne <32 x i16> %cond2, zeroinitializer %nest = call i1 @llvm.genx.simdcf.any.v32i1(<32 x i1> %cmp2) br i1 %nest, label %if.then2, label %if.end2 ; CHECK-LABEL: if.then2: if.then2: ; CHECK: [[EM_LOAD3:%.*]] = load <32 x i1>, ptr @EM ; CHECK-NEXT: [[EM_UPDATE2:%.*]] = and <32 x i1> %pred, [[EM_LOAD3]] ; CHECK-NEXT: [[CHENNELEM2:%.*]] = shufflevector <32 x i1> [[EM_UPDATE2]], <32 x i1> undef, <64 x i32> ; CHECK-NEXT: %call1.simdcfpred7 = select <64 x i1> [[CHENNELEM2]], <64 x i32> [[CALL1]] store <64 x i32> %call1, ptr %g br label %if.end2 if.end2: br label %if.end if.end: %ld = load <64 x i32>, ptr %g store <64 x i32> %ld, ptr @g1 ret void } declare <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32, i16, i32, i32, <32 x i32>, <32 x i1>) declare <64 x i32> @llvm.genx.wrregioni.v64i32.v16i32.i16.i1(<64 x i32> %load, <64 x i32> %call, i32, i32, i32, i16, i32, i1) declare i1 @llvm.genx.simdcf.any.v32i1(<32 x i1>) vc-intrinsics-0.22.1/GenXIntrinsics/test/SimdCFLowering/predicate_masked_gather.ll000066400000000000000000000033671475147027500302650ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%cmsimdcflowering -S < %s | FileCheck %s ; CHECK: @EM = internal global <32 x i1> @g2 = internal global <32 x i32> undef define dso_local dllexport void @test_gather(<32 x i16> %mask, <32 x i32> %addrs) { entry: %g = alloca <32 x i32>, align 512 %0 = icmp ne <32 x i16> %mask, zeroinitializer %call = call i1 @llvm.genx.simdcf.any.v32i1(<32 x i1> %0) br i1 %call, label %if.then, label %if.end if.then: ; CHECK-LABEL: if.then: ; CHECK: [[EM_LOAD1:%.*]] = load <32 x i1>, <32 x i1>* @EM ; CHECK-NEXT: [[CALL1:%.*]] = call <32 x i32> @llvm.genx.gather.masked.scaled2.v32i32.v32i32.v32i1(i32 2, i16 0, i32 254, i32 0, <32 x i32> %addrs, <32 x i1> [[EM_LOAD1]]) %call1 = call <32 x i32> @llvm.genx.gather.masked.scaled2.v32i32.v32i32.v32i1(i32 2, i16 0, i32 254, i32 0, <32 x i32> %addrs, <32 x i1> ) ; CHECK: [[EM_LOAD2:%.*]] = load <32 x i1>, <32 x i1>* @EM ; CHECK-NEXT: [[CALL1_SIMDCFPREDL:%.*]] = select <32 x i1> [[EM_LOAD2:%.*]], <32 x i32> [[CALL1:%.*]] store <32 x i32> %call1, <32 x i32>* %g br label %if.end if.end: %1 = load <32 x i32>, <32 x i32>* %g store <32 x i32> %1, <32 x i32>* @g2 ret void } declare <32 x i32> @llvm.genx.gather.masked.scaled2.v32i32.v32i32.v32i1(i32, i16, i32, i32, <32 x i32>, <32 x i1>) declare i1 @llvm.genx.simdcf.any.v32i1(<32 x i1>) vc-intrinsics-0.22.1/GenXIntrinsics/test/SimdCFLowering/replicate_mask.ll000066400000000000000000000075251475147027500264320ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%cmsimdcflowering -S < %s | FileCheck %s @g1 = internal global <64 x i32> undef ; CHECK: @EM = internal global <32 x i1> ; CHECK-LABEL: @test define dso_local dllexport void @test(<32 x i16> %mask) { entry: %g = alloca <64 x i32>, align 512 %0 = icmp ne <32 x i16> %mask, zeroinitializer %call = call i1 @llvm.genx.simdcf.any.v32i1(<32 x i1> %0) br i1 %call, label %if.then, label %if.end if.then: ; CHECK: [[CALL1:%.*]] = call <64 x i32> @_Z24func_replicate_mask_attrv() %call1 = call <64 x i32> @_Z24func_replicate_mask_attrv() ; CHECK: [[EM_LOAD:%.*]] = load <32 x i1>, <32 x i1>* @EM ; CHECK-NEXT: [[CHENNELEM:%.*]] = shufflevector <32 x i1> [[EM_LOAD]], <32 x i1> undef, <64 x i32> ; CHECK-NEXT: [[CALL1_SIMDCFPREDL:%.*]] = select <64 x i1> [[CHENNELEM]], <64 x i32> [[CALL1]] store <64 x i32> %call1, <64 x i32>* %g br label %if.end if.end: %1 = load <64 x i32>, <64 x i32>* %g store <64 x i32> %1, <64 x i32>* @g1 ret void } ; CHECK-LABEL: @test_gather4 define dso_local dllexport void @test_gather4(<32 x i16> %mask, <32 x i32> %addrs) { entry: %g = alloca <64 x i32>, align 512 %0 = icmp ne <32 x i16> %mask, zeroinitializer %call = call i1 @llvm.genx.simdcf.any.v32i1(<32 x i1> %0) br i1 %call, label %if.then, label %if.end if.then: ; CHECK-LABEL: if.then: ; CHECK: [[EM_LOAD1:%.*]] = load <32 x i1>, <32 x i1>* @EM ; CHECK-NEXT: [[CALL1:%.*]] = call <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32 12, i16 0, i32 254, i32 0, <32 x i32> %addrs, <32 x i1> [[EM_LOAD1]]) %call1 = call <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32 12, i16 0, i32 254, i32 0, <32 x i32> %addrs, <32 x i1> ) ; CHECK: [[EM_LOAD2:%.*]] = load <32 x i1>, <32 x i1>* @EM ; CHECK-NEXT: [[CHENNELEM:%.*]] = shufflevector <32 x i1> [[EM_LOAD2]], <32 x i1> undef, <64 x i32> ; CHECK-NEXT: [[CALL1_SIMDCFPREDL:%.*]] = select <64 x i1> [[CHENNELEM]], <64 x i32> [[CALL1]] store <64 x i32> %call1, <64 x i32>* %g br label %if.end if.end: %1 = load <64 x i32>, <64 x i32>* %g store <64 x i32> %1, <64 x i32>* @g1 ret void } define internal <64 x i32> @_Z24func_replicate_mask_attrv() #0 { entry: ret <64 x i32> zeroinitializer } declare <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32, i16, i32, i32, <32 x i32>, <32 x i1>) declare i1 @llvm.genx.simdcf.any.v32i1(<32 x i1>) attributes #0 = { noinline nounwind "CMGenxReplicateMask"="2"} vc-intrinsics-0.22.1/GenXIntrinsics/test/SimdCFLowering/replicate_mask_masked_gather4.ll000066400000000000000000000045231475147027500313670ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2021-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%cmsimdcflowering -S < %s | FileCheck %s @Rcp_T2 = internal global <64 x i32> undef ; CHECK: @EM = internal global <32 x i1> define dso_local dllexport void @test(<32 x i16> %mask, <32 x i32> %addrs) { entry: %Rcp_T = alloca <64 x i32>, align 512 %0 = icmp ne <32 x i16> %mask, zeroinitializer %call = call i1 @llvm.genx.simdcf.any.v32i1(<32 x i1> %0) br i1 %call, label %if.then, label %if.end if.then: ; CHECK-LABEL: if.then: ; CHECK: [[EM_LOAD1:%.*]] = load <32 x i1>, <32 x i1>* @EM ; CHECK-NEXT: [[CALL1:%.*]] = call <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32 12, i16 0, i32 254, i32 0, <32 x i32> %addrs, <32 x i1> [[EM_LOAD1]]) %call1 = call <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32 12, i16 0, i32 254, i32 0, <32 x i32> %addrs, <32 x i1> ) ; CHECK: [[EM_LOAD2:%.*]] = load <32 x i1>, <32 x i1>* @EM ; CHECK-NEXT: [[CHENNELEM:%.*]] = shufflevector <32 x i1> [[EM_LOAD2]], <32 x i1> undef, <64 x i32> ; CHECK-NEXT: [[CALL1_SIMDCFPREDL:%.*]] = select <64 x i1> [[CHENNELEM]], <64 x i32> [[CALL1]] store <64 x i32> %call1, <64 x i32>* %Rcp_T br label %if.end if.end: %1 = load <64 x i32>, <64 x i32>* %Rcp_T store <64 x i32> %1, <64 x i32>* @Rcp_T2 ret void } declare <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32, i16, i32, i32, <32 x i32>, <32 x i1>) declare i1 @llvm.genx.simdcf.any.v32i1(<32 x i1>) vc-intrinsics-0.22.1/GenXIntrinsics/test/SimdCFLowering/update_mask_masked_gather4.ll000066400000000000000000000067561475147027500307130ustar00rootroot00000000000000;=========================== begin_copyright_notice ============================ ; ; Copyright (C) 2020-2024 Intel Corporation ; ; SPDX-License-Identifier: MIT ; ;============================ end_copyright_notice ============================= ; UNSUPPORTED: opaque-pointers ; RUN: opt %pass%cmsimdcflowering -S < %s | FileCheck %s @g1 = internal global <64 x i32> undef ; CHECK: @EM = internal global <32 x i1> define dso_local dllexport void @test(<32 x i16> %cond1, <32 x i16> %cond2, <32 x i32> %addrs, <32 x i1> %pred) { entry: %g = alloca <64 x i32>, align 512 %0 = icmp ne <32 x i16> %cond1, zeroinitializer %call = call i1 @llvm.genx.simdcf.any.v32i1(<32 x i1> %0) br i1 %call, label %if.then, label %if.end if.then: ; CHECK-LABEL: if.then: ; CHECK: [[EM_LOAD1:%.*]] = load <32 x i1>, <32 x i1>* @EM ; CHECK-NEXT: [[EM_UPDATE1:%.*]] = and <32 x i1> %pred, [[EM_LOAD1]] ; CHECK-NEXT: [[CALL1:%.*]] = call <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32 12, i16 0, i32 254, i32 0, <32 x i32> %addrs, <32 x i1> [[EM_UPDATE1]]) %call1 = call <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32 12, i16 0, i32 254, i32 0, <32 x i32> %addrs, <32 x i1> %pred) ; CHECK: [[EM_LOAD2:%.*]] = load <32 x i1>, <32 x i1>* @EM ; CHECK-NEXT: [[EM_UPDATE2:%.*]] = and <32 x i1> %pred, [[EM_LOAD2]] ; CHECK-NEXT: [[CHENNELEM:%.*]] = shufflevector <32 x i1> [[EM_UPDATE2]], <32 x i1> undef, <64 x i32> ; CHECK-NEXT: [[CALL1_SIMDCFPRED1:%.*]] = select <64 x i1> [[CHENNELEM]], <64 x i32> [[CALL1]] store <64 x i32> %call1, <64 x i32>* %g %1 = icmp ne <32 x i16> %cond2, zeroinitializer %nest = call i1 @llvm.genx.simdcf.any.v32i1(<32 x i1> %1) br i1 %nest, label %if.then2, label %if.end2 if.then2: ; CHECK-LABEL: if.then2: ; CHECK: [[EM_LOAD3:%.*]] = load <32 x i1>, <32 x i1>* @EM ; CHECK-NEXT: [[EM_UPDATE2:%.*]] = and <32 x i1> %pred, [[EM_LOAD3]] ; CHECK-NEXT: [[CHENNELEM2:%.*]] = shufflevector <32 x i1> [[EM_UPDATE2]], <32 x i1> undef, <64 x i32> ; CHECK-NEXT: %call1.simdcfpred7 = select <64 x i1> [[CHENNELEM2]], <64 x i32> [[CALL1]] store <64 x i32> %call1, <64 x i32>* %g br label %if.end2 if.end2: br label %if.end if.end: %2 = load <64 x i32>, <64 x i32>* %g store <64 x i32> %2, <64 x i32>* @g1 ret void } declare <64 x i32> @llvm.genx.gather4.masked.scaled2.v64i32.v32i32.v32i1(i32, i16, i32, i32, <32 x i32>, <32 x i1>) declare <64 x i32> @llvm.genx.wrregioni.v64i32.v16i32.i16.i1(<64 x i32> %load, <64 x i32> %call, i32, i32, i32, i16, i32, i1) declare i1 @llvm.genx.simdcf.any.v32i1(<32 x i1>) vc-intrinsics-0.22.1/GenXIntrinsics/test/lit.cfg.py000066400000000000000000000047611475147027500222010ustar00rootroot00000000000000# ========================== begin_copyright_notice ============================ # # Copyright (C) 2020-2024 Intel Corporation # # SPDX-License-Identifier: MIT # # =========================== end_copyright_notice ============================= # -*- Python -*- import lit.formats import lit.util from lit.llvm import llvm_config from lit.llvm.subst import ToolSubst from lit.llvm.subst import FindTool # Configuration file for the 'lit' test runner. # name: The name of this test suite. config.name = 'vc-intrinsics' # testFormat: The test format to use to interpret tests. config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell) # suffixes: A list of file extensions to treat as test files. config.suffixes = ['.ll'] # excludes: A list of directories and files to exclude from the testsuite. config.excludes = ['CMakeLists.txt', 'Plugin'] used_llvm = "llvm{}".format(config.llvm_version_major) config.available_features = [used_llvm] if int(config.llvm_version_major) >= 16: config.available_features.append('opaque-pointers') # test_source_root: The root path where tests are located. config.test_source_root = os.path.dirname(__file__) # test_exec_root: The root path where tests should be run. config.test_exec_root = os.path.join(config.test_run_dir, 'test_output') llvm_config.use_default_substitutions() config.substitutions.append(('%PATH%', config.environment['PATH'])) tool_dirs = [config.llvm_tools_dir] # Add extra args for opt to remove boilerplate from tests. opt_extra_args = ['-load', config.vc_intrinsics_plugin] # Add option for new pass manager plugins. Extension instead of # replacement is needed to hack option parsing mechanism. Argument of # '-load' is processed during initial option parsing and all passes # from plugin are registed in legacy PM. This registration allows to # add passes to new PM via command line options in the same way as # with old PM. Otherwise, -passes= option will be used for new PM and # - for old PM. Additionally, LLVM will load plugin only once # because it permanently loads libraries with caching behavior. if int(config.llvm_version_major) >= 13: opt_extra_args.extend(['-load-pass-plugin', config.vc_intrinsics_plugin]) if int(config.llvm_version_major) < 13: config.substitutions.append(('%pass%', ' -')) else: config.substitutions.append(('%pass%', ' -passes=')) print(f"llvm_version_major:{config.llvm_version_major}") tools = [ToolSubst('opt', extra_args=opt_extra_args)] llvm_config.add_tool_substitutions(tools, tool_dirs) vc-intrinsics-0.22.1/GenXIntrinsics/test/lit.site.cfg.py.in000066400000000000000000000024711475147027500235450ustar00rootroot00000000000000# ========================== begin_copyright_notice ============================ # # Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT # # =========================== end_copyright_notice ============================= @LIT_SITE_CFG_IN_HEADER@ import sys config.llvm_src_root = "@LLVM_SOURCE_DIR@" config.llvm_obj_root = "@LLVM_BINARY_DIR@" config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@" config.host_triple = "@LLVM_HOST_TRIPLE@" config.target_triple = "@TARGET_TRIPLE@" config.host_arch = "@HOST_ARCH@" config.python_executable = "@PYTHON_EXECUTABLE@" config.test_run_dir = "@CMAKE_CURRENT_BINARY_DIR@" config.vc_intrinsics_plugin = "$" config.llvm_version_major = "@LLVM_VERSION_MAJOR@" # Support substitution of the tools and libs dirs with user parameters. This is # used when we can't determine the tool dir at configuration time. try: config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params except KeyError: e = sys.exc_info()[1] key, = e.args lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key)) import lit.llvm lit.llvm.initialize(lit_config, config) # Let the main config do the real work. lit_config.load_config(config, "@VC_INTRINSICS_TEST_SOURCE_DIR@/lit.cfg.py") vc-intrinsics-0.22.1/GenXIntrinsics/unittests/000077500000000000000000000000001475147027500213545ustar00rootroot00000000000000vc-intrinsics-0.22.1/GenXIntrinsics/unittests/CMakeLists.txt000066400000000000000000000010331475147027500241110ustar00rootroot00000000000000#=========================== begin_copyright_notice ============================ # # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # #============================ end_copyright_notice ============================= add_custom_target(GenXIntrinsicsUnitTests) set_target_properties(GenXIntrinsicsUnitTests PROPERTIES FOLDER "GenXIntrinsicsTests") function(add_genx_intrinsics_unittest test_dirname) add_unittest(GenXIntrinsicsUnitTests ${test_dirname} ${ARGN}) endfunction() add_subdirectory(GenXIntrinsics) vc-intrinsics-0.22.1/GenXIntrinsics/unittests/GenXIntrinsics/000077500000000000000000000000001475147027500242635ustar00rootroot00000000000000vc-intrinsics-0.22.1/GenXIntrinsics/unittests/GenXIntrinsics/CMakeLists.txt000066400000000000000000000007361475147027500270310ustar00rootroot00000000000000#=========================== begin_copyright_notice ============================ # # Copyright (C) 2019-2021 Intel Corporation # # SPDX-License-Identifier: MIT # #============================ end_copyright_notice ============================= set(LLVM_LINK_COMPONENTS Core Support CodeGen ) add_genx_intrinsics_unittest(GenXIntrinsicsTests GenXIntrinsicsTest.cpp ) target_link_libraries(GenXIntrinsicsTests PRIVATE LLVMGenXIntrinsics LLVMTestingSupport ) vc-intrinsics-0.22.1/GenXIntrinsics/unittests/GenXIntrinsics/GenXIntrinsicsTest.cpp000066400000000000000000000031241475147027500305360ustar00rootroot00000000000000/*========================== begin_copyright_notice ============================ Copyright (C) 2019-2023 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ #include "llvm/ADT/StringRef.h" #include "llvm/GenXIntrinsics/GenXIntrinsics.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "gtest/gtest.h" using namespace llvm; namespace { TEST(GenXIntrinsics, OverloadedTypes) { EXPECT_EQ(GenXIntrinsic::isOverloadedArg(Intrinsic::fma, 0), false); EXPECT_EQ(GenXIntrinsic::isOverloadedArg(Intrinsic::fma, 1), false); EXPECT_EQ(GenXIntrinsic::isOverloadedArg(GenXIntrinsic::genx_3d_sample, 7), true); EXPECT_EQ(GenXIntrinsic::isOverloadedArg(GenXIntrinsic::genx_raw_send, 1), true); EXPECT_EQ(GenXIntrinsic::isOverloadedArg(GenXIntrinsic::genx_simdcf_any, 0), true); EXPECT_EQ(GenXIntrinsic::isOverloadedArg(GenXIntrinsic::genx_ssdp4a, 0), true); EXPECT_EQ(GenXIntrinsic::isOverloadedArg(GenXIntrinsic::genx_ssdp4a, 1), true); EXPECT_EQ(GenXIntrinsic::isOverloadedArg(GenXIntrinsic::genx_ssdp4a, 2), true); EXPECT_EQ(GenXIntrinsic::isOverloadedArg(GenXIntrinsic::genx_dpasw_nosrc0, 2), false); EXPECT_EQ( GenXIntrinsic::isOverloadedArg(GenXIntrinsic::genx_lsc_store_slm, 10), true); EXPECT_EQ( GenXIntrinsic::isOverloadedArg(GenXIntrinsic::genx_lsc_store_slm, 11), true); EXPECT_EQ( GenXIntrinsic::isOverloadedArg(GenXIntrinsic::genx_lsc_store_slm, 12), false); } } // namespace vc-intrinsics-0.22.1/LICENSE.md000066400000000000000000000020621475147027500160070ustar00rootroot00000000000000MIT License Copyright (c) 2019 Intel Corporation Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. vc-intrinsics-0.22.1/Readme.md000066400000000000000000000063451475147027500161320ustar00rootroot00000000000000 # VC Intrinsics ## Introduction VC Intrinsics project contains a set of new intrinsics on top of core LLVM IR instructions that represent SIMD semantics of a program targeting GPU. This set is now used by * CMC - https://github.com/intel/cm-compiler * DPC++ - https://github.com/intel/llvm * ISPC - https://github.com/ispc/ispc frontend compilers and * IGC VC backend - https://github.com/intel/intel-graphics-compiler ## License VC Intrinsics are distributed under the MIT license. You may obtain a copy of the License at: https://opensource.org/licenses/MIT ## Dependencies ### Source code * LLVM Project - https://github.com/llvm/llvm-project ### Tools To build libraries: * CMake - https://cmake.org/ - 3.13.4 or later * Python - https://www.python.org/ - 2.7 or later * C++ compiler - anything that can compile LLVM To build documentation: * Sphinx - https://www.sphinx-doc.org - 1.5 or later * GNU Make - https://www.gnu.org/software/make/ - 3.79 or later * Standard Unix utilities (mkdir, rm, sed) ## Building VC Intrinsics can be built in two major modes: in-tree and external. All major LLVM versions starting from LLVM 8 are supported. LLVM ToT can be used too, but there is no guarantee that it will always work (because of sudden breaking changes in LLVM C++ API). However, maintainers are trying to fix such issues as fast as possible. ### In-tree build For in-tree build VC Intrinsics can be considered as an external LLVM project. Put VC Intrinsics source directory inside `llvm/projects` directory or add `-DLLVM_EXTERNAL_PROJECTS="vc-intrinsics" -DLLVM_EXTERNAL_VC_INTRINSICS_SOURCE_DIR=` to cmake command arguments when configuring LLVM. ### External build To configure VC Intrinsics with prebuilt LLVM run cmake as follows: ```shell $ cmake -DLLVM_DIR=/lib/cmake/llvm ``` ### Documentation VC Intrinsics documentation is inside `docs` subdirectory and can be built using Sphinx. To build html version do the following: ```shell $ cd docs $ make -f Makefile.sphinx html ``` This will extract comments from main intrinsics description and generate readable html output in `_build/html` subdirectory. ## Testing VC Intrinsics repository contains lit tests that are enabled when `-DVC_INTR_ENABLE_LIT_TESTS=ON` is passed to cmake command. Lit tests use LLVM plugins and currently are supported only with dynamic LLVM (when LLVM is configured with `-DLLVM_LINK_LLVM_DYLIB=ON`). In external build path to `lit` utility should be specified as follows: `-DLLVM_EXTERNAL_LIT=`. Full example with external build: ```shell $ cmake -DLLVM_DIR=/lib/cmake/llvm -DVC_INTR_ENABLE_LIT_TESTS=ON -DLLVM_EXTERNAL_LIT= ``` Target `check-vc-intrinsics` will run lit tests. ## How to provide feedback Please submit an issue using native github.com interface: https://github.com/intel/vc-intrinsics/issues. ## How to contribute Create a pull request on github.com with your patch. A maintainer will contact you if there are questions or concerns. vc-intrinsics-0.22.1/SECURITY.md000066400000000000000000000006251475147027500161770ustar00rootroot00000000000000# Security Policy Intel is committed to rapidly addressing security vulnerabilities affecting our customers and providing clear guidance on the solution, impact, severity and mitigation. ## Reporting a Vulnerability Please report any security vulnerabilities in this project [utilizing the guidelines here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html).