pax_global_header00006660000000000000000000000064150163534230014514gustar00rootroot0000000000000052 comment=334a88afba37aba639f080a77ab535c87720ed8a xrootd-s3-http-0.4.1/000077500000000000000000000000001501635342300143355ustar00rootroot00000000000000xrootd-s3-http-0.4.1/.clang-format000066400000000000000000000000751501635342300167120ustar00rootroot00000000000000BasedOnStyle: LLVM IndentWidth: 4 UseTab: Always TabWidth: 4 xrootd-s3-http-0.4.1/.github/000077500000000000000000000000001501635342300156755ustar00rootroot00000000000000xrootd-s3-http-0.4.1/.github/workflows/000077500000000000000000000000001501635342300177325ustar00rootroot00000000000000xrootd-s3-http-0.4.1/.github/workflows/linter.yml000066400000000000000000000042051501635342300217530ustar00rootroot00000000000000name: Lint # Linter Action documentation at https://github.com/marketplace/actions/lint-action # One thing to note is that this action is currently configured automatically fix and re-push the linted code to the repo on a pull request. # Because the github token used for authenticating this commit comes from the upstream repo (ie pelicanplatform/xrootd-s3-http), those linter changes will not be pushed # to the fork that is providing the pull request. A manual git fetch will have to be run by the fork after the PR is merged to update the fork to the linted code. # The linter does not have authorization to lint any code in the repo's .github/workflows/ directory. # If the linter fails, the PR can still be completed, but none of the linter changes will be made. on: push: branches: - main pull_request_target: branches: - main permissions: checks: write contents: read pull-requests: write jobs: run-linters: name: Run linters runs-on: ubuntu-latest steps: - name: Check out repository (push) if: ${{ github.event_name == 'push' }} uses: actions/checkout@v3 - name: Check out repository (pull_request_target) if: ${{ github.event_name == 'pull_request_target' }} uses: actions/checkout@v3 with: ref: ${{ github.event.pull_request.head.sha }} - name: Install ClangFormat run: sudo apt-get install -y clang-format - name: Run linters uses: wearerequired/lint-action@v2 with: github_token: ${{ secrets.github_token }} # For providing the commit authorization for the auto_fix feature clang_format: true clang_format_auto_fix: true auto_fix: true commit: false continue_on_error: false git_email: github.event.commits[0].author.name # Uses the author's git email instead of the default git email associated with the action ("lint-action@samuelmeuli.com") clang_format_args: -style=file # Any additional arguments for clang_format - name: suggester / lint uses: reviewdog/action-suggester@v1 with: tool_name: lint xrootd-s3-http-0.4.1/.github/workflows/test.yml000066400000000000000000000071171501635342300214420ustar00rootroot00000000000000name: Test on: workflow_dispatch: branches: - main pull_request: branches: - main push: branches: - main env: # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) BUILD_TYPE: Debug jobs: build: strategy: matrix: external-gtest: [ YES ] os: [ ubuntu-24.04 ] runs-on: ${{ matrix.os }} name: Build with external_gtest=${{ matrix.external-gtest }} on ${{ matrix.os }} steps: - uses: actions/checkout@v3 with: submodules: recursive - uses: actions/setup-go@v5 with: go-version: '1.23.5' - name: install deps run: | sudo curl -L https://xrootd.web.cern.ch/repo/RPM-GPG-KEY.txt -o /etc/apt/trusted.gpg.d/xrootd.asc sudo /bin/sh -c 'echo "deb https://xrootd.web.cern.ch/ubuntu noble stable" >> /etc/apt/sources.list.d/xrootd.list' sudo apt update && sudo apt-get install -y cmake libcurl4-openssl-dev libcurl4 pkg-config libssl-dev xrootd-server libxrootd-dev libxrootd-server-dev libgtest-dev sudo curl -L https://dl.min.io/server/minio/release/linux-amd64/minio -o /usr/local/bin/minio sudo chmod +x /usr/local/bin/minio sudo curl -L https://dl.min.io/client/mc/release/linux-amd64/mc -o /usr/local/bin/mc sudo chmod +x /usr/local/bin/mc - name: Create Build Environment # Some projects don't allow in-source building, so create a separate build directory # We'll use this as our working directory for all subsequent commands run: cmake -E make_directory ${{runner.workspace}}/build - name: Configure CMake # Use a bash shell so we can use the same syntax for environment variable # access regardless of the host operating system shell: bash working-directory: ${{runner.workspace}}/build # Note the current convention is to use the -S and -B options here to specify source # and build directories, but this is only available with CMake 3.13 and higher. # The CMake binaries on the Github Actions machines are (as of this writing) 3.12 run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DBUILD_TESTING=yes -DXROOTD_PLUGINS_EXTERNAL_GTEST=${{ matrix.external-gtest }} - name: Build working-directory: ${{runner.workspace}}/build shell: bash # Execute the build. You can specify a specific target with "--target " run: cmake --build . --config $BUILD_TYPE - name: Unit Tests working-directory: ${{runner.workspace}}/build shell: bash # Execute tests defined by the CMake configuration. # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail run: ctest -C $BUILD_TYPE --verbose - name: Start xrootd working-directory: ${{runner.workspace}}/build shell: bash run: xrootd -c ${{runner.workspace}}/xrootd-s3-http/test/s3-xrootd-test.cfg & - name: Get a file working-directory: ${{runner.workspace}}/build shell: bash run: curl -f http://localhost:8080/aws-opendata/2024/wod_apb_2024.nc -o wod_apb_2024.nc - name: Fail a file working-directory: ${{runner.workspace}}/build shell: bash run: | if curl -f http://localhost:8080/aws-opendata/2024/bogus_file_name; then echo "Error: Command unexpectedly succeeded." exit 1 else echo "Command failed as expected." fi - name: Get metadata working-directory: ${{runner.workspace}}/build shell: bash run: curl -f -k -X PROPFIND http://localhost:8080/aws-opendata/2024/wod_apb_2024.nc -d prop_query xrootd-s3-http-0.4.1/.gitignore000066400000000000000000000000071501635342300163220ustar00rootroot00000000000000build/ xrootd-s3-http-0.4.1/.gitmodules000066400000000000000000000000001501635342300165000ustar00rootroot00000000000000xrootd-s3-http-0.4.1/.pre-commit-config.yaml000066400000000000000000000005061501635342300206170ustar00rootroot00000000000000repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v3.2.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer - id: check-yaml - id: check-added-large-files - repo: https://github.com/pre-commit/mirrors-clang-format rev: v18.1.6 hooks: - id: clang-format xrootd-s3-http-0.4.1/CMakeLists.txt000066400000000000000000000210701501635342300170750ustar00rootroot00000000000000cmake_minimum_required( VERSION 3.14 ) project( xrootd-http/s3 ) option( XROOTD_PLUGINS_EXTERNAL_GTEST "Use an external/pre-installed copy of GTest" OFF ) option( VALGRIND "Run select unit tests under valgrind" OFF ) option( ASAN "Build the plugin with the address sanitizer" OFF ) set( CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake ) if( "${CMAKE_BUILD_TYPE}" STREQUAL "" ) set( CMAKE_BUILD_TYPE Debug ) endif() find_package( XRootD REQUIRED COMPONENTS UTILS SERVER ) find_package( CURL REQUIRED ) find_package( Threads REQUIRED ) find_package( OpenSSL REQUIRED ) if(VALGRIND) find_program(VALGRIND_BIN valgrind REQUIRED) endif() if(ASAN) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address") set(CMAKE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS} -fsanitize=address") endif() set( CMAKE_CXX_STANDARD 17 ) set( CMAKE_CXX_STANDARD_REQUIRED ON ) if( CMAKE_BUILD_TYPE STREQUAL Debug ) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror" ) endif() if(NOT APPLE) SET( CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined") SET( CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,--no-undefined") endif() # Our custom Filesystem module creates the std::filesystem library target for # any special dependencies needed for using std::filesystem. find_package( Filesystem REQUIRED ) # Similar setup for libatomic; required only on 32-bit systems find_package( Atomic REQUIRED ) if( NOT XROOTD_EXTERNAL_TINYXML2 ) include(FetchContent) # Allow a locally-found tinyxml2 tarball to be used; provides the ability for packagers # to build this without any external network connectivity (as in the case of Koji). set( TINYXML2_URL "${CMAKE_CURRENT_SOURCE_DIR}/tinyxml2-10.0.0.tar.gz" ) if( NOT EXISTS "${TINYXML2_URL}" ) set( TINYXML2_URL "https://github.com/leethomason/tinyxml2/archive/refs/tags/10.0.0.tar.gz" ) endif() cmake_policy( SET CMP0135 NEW ) FetchContent_Declare( tinyxml2 URL "${TINYXML2_URL}" URL_HASH SHA256=3bdf15128ba16686e69bce256cc468e76c7b94ff2c7f391cc5ec09e40bff3839 ) set(CMAKE_POSITION_INDEPENDENT_CODE ON CACHE INTERNAL "Force tinyxml2 to use PIC") FetchContent_MakeAvailable( tinyxml2 ) else() find_package( tinyxml2 REQUIRED ) endif() ## # Flag is needed explicitly for 32-bit platforms but not currently exported by XRootD # as a reverse dependency. Can be removed once this is merged and in all supported # releases: # # https://github.com/xrootd/xrootd/pull/2369 # add_definitions( -D_FILE_OFFSET_BITS=64 ) ###################### ## libXrdOssS3 ## ###################### # On Linux, we hide all the symbols for the final libraries, exposing only what's needed for the XRootD # runtime loader. So here we create the object library and will create a separate one for testing with # the symbols exposed. add_library(XrdS3Obj OBJECT src/CurlUtil.cc src/S3File.cc src/S3Directory.cc src/S3AccessInfo.cc src/S3FileSystem.cc src/AWSv4-impl.cc src/S3Commands.cc src/HTTPCommands.cc src/TokenFile.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) set_target_properties(XrdS3Obj PROPERTIES POSITION_INDEPENDENT_CODE ON) target_include_directories(XrdS3Obj PRIVATE ${XRootD_INCLUDE_DIRS}) target_link_libraries( XrdS3Obj ${XRootD_UTILS_LIBRARIES} ${XRootD_SERVER_LIBRARIES} CURL::libcurl OpenSSL::Crypto tinyxml2::tinyxml2 Threads::Threads std::filesystem std::atomic ) # Compatability library, doesn't match upstream's naming convention add_library(XrdS3 MODULE "$") target_link_libraries(XrdS3 XrdS3Obj) # New library name, matches upstream's naming convention add_library(XrdOssS3 MODULE "$") target_link_libraries(XrdOssS3 XrdS3Obj) ###################### ## libXrdOssHTTP ## ###################### add_library(XrdHTTPServerObj OBJECT src/CurlUtil.cc src/HTTPFile.cc src/HTTPFileSystem.cc src/HTTPCommands.cc src/TokenFile.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc) set_target_properties(XrdHTTPServerObj PROPERTIES POSITION_INDEPENDENT_CODE ON) target_include_directories(XrdHTTPServerObj PRIVATE ${XRootD_INCLUDE_DIRS}) target_link_libraries(XrdHTTPServerObj ${XRootD_UTILS_LIBRARIES} ${XRootD_SERVER_LIBRARIES} CURL::libcurl OpenSSL::Crypto Threads::Threads std::filesystem) add_library(XrdHTTPServer MODULE "$") target_link_libraries(XrdHTTPServer XrdHTTPServerObj) # New library name, matches upstream's naming convention add_library(XrdOssHttp MODULE "$") target_link_libraries(XrdOssHttp XrdHTTPServerObj) ###################### ## libXrdOssFilter ## ###################### add_library( XrdOssFilterObj OBJECT src/Filter.cc src/logging.cc ) set_target_properties( XrdOssFilterObj PROPERTIES POSITION_INDEPENDENT_CODE ON ) target_include_directories( XrdOssFilterObj PRIVATE ${XRootD_INCLUDE_DIRS} ) target_link_libraries( XrdOssFilterObj ${XRootD_UTILS_LIBRARIES} ${XRootD_SERVER_LIBRARIES} ) add_library( XrdOssFilter MODULE "$" ) target_link_libraries( XrdOssFilter XrdOssFilterObj ) # Customize module's suffix and, on Linux, hide unnecessary symbols if( APPLE ) set_target_properties( XrdS3 PROPERTIES OUTPUT_NAME "XrdS3-${XRootD_PLUGIN_VERSION}" SUFFIX ".so" ) set_target_properties( XrdHTTPServer PROPERTIES OUTPUT_NAME "XrdHTTPServer-${XRootD_PLUGIN_VERSION}" SUFFIX ".so" ) set_target_properties( XrdOssS3 PROPERTIES OUTPUT_NAME "XrdOssS3-${XRootD_PLUGIN_VERSION}" SUFFIX ".so" ) set_target_properties( XrdOssHttp PROPERTIES OUTPUT_NAME "XrdOssHttp-${XRootD_PLUGIN_VERSION}" SUFFIX ".so" ) set_target_properties( XrdOssFilter PROPERTIES OUTPUT_NAME "XrdOssFilter-${XRootD_PLUGIN_VERSION}" SUFFIX ".so" ) else() set_target_properties( XrdS3 PROPERTIES OUTPUT_NAME "XrdS3-${XRootD_PLUGIN_VERSION}" SUFFIX ".so" LINK_FLAGS "-Wl,--version-script=${CMAKE_SOURCE_DIR}/configs/export-lib-symbols" ) set_target_properties( XrdHTTPServer PROPERTIES OUTPUT_NAME "XrdHTTPServer-${XRootD_PLUGIN_VERSION}" SUFFIX ".so" LINK_FLAGS "-Wl,--version-script=${CMAKE_SOURCE_DIR}/configs/export-lib-symbols" ) set_target_properties( XrdOssS3 PROPERTIES OUTPUT_NAME "XrdOssS3-${XRootD_PLUGIN_VERSION}" SUFFIX ".so" LINK_FLAGS "-Wl,--version-script=${CMAKE_SOURCE_DIR}/configs/export-lib-symbols" ) set_target_properties( XrdOssHttp PROPERTIES OUTPUT_NAME "XrdOssHttp-${XRootD_PLUGIN_VERSION}" SUFFIX ".so" LINK_FLAGS "-Wl,--version-script=${CMAKE_SOURCE_DIR}/configs/export-lib-symbols" ) set_target_properties( XrdOssFilter PROPERTIES OUTPUT_NAME "XrdOssFilter-${XRootD_PLUGIN_VERSION}" SUFFIX ".so" LINK_FLAGS "-Wl,--version-script=${CMAKE_SOURCE_DIR}/configs/export-lib-symbols" ) endif() include(GNUInstallDirs) install( TARGETS XrdS3 XrdHTTPServer XrdOssS3 XrdOssHttp XrdOssFilter LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ) if( BUILD_TESTING ) # Create shared libraries for testing from the existing objects add_library(XrdS3Testing SHARED "$") target_link_libraries(XrdS3Testing XrdS3Obj) target_include_directories(XrdS3Testing INTERFACE ${XRootD_INCLUDE_DIRS}) add_library(XrdHTTPServerTesting SHARED "$") target_link_libraries(XrdHTTPServerTesting XrdHTTPServerObj) target_include_directories(XrdHTTPServerTesting INTERFACE ${XRootD_INCLUDE_DIRS}) add_library( XrdOssFilterTesting SHARED "$" ) target_link_libraries( XrdOssFilterTesting XrdOssFilterObj ) target_include_directories( XrdOssFilterTesting INTERFACE ${XRootD_INCLUDE_DIRS} ) find_program(GoWrk go-wrk HINTS "$ENV{HOME}/go/bin") if( NOT GoWrk ) # Try installing the go-wrk variable to generate a reasonable stress test execute_process( COMMAND go install github.com/bbockelm/go-wrk@92dbe19 RESULT_VARIABLE go_install_result ) if( go_install_result EQUAL 0 ) find_program(GoWrk go-wrk HINTS "$ENV{HOME}/go/bin") else() message(ERROR "Failed to install the go-wrk binary" ) endif() endif() if( NOT XROOTD_PLUGINS_EXTERNAL_GTEST ) include( FetchContent ) set( GTEST_URL "${CMAKE_CURRENT_SOURCE_DIR}/googletest-1.15.2.tar.gz" ) if( NOT EXISTS "${GTEST_URL}" ) set( GTEST_URL "https://github.com/google/googletest/releases/download/v1.15.2/googletest-1.15.2.tar.gz" ) endif() cmake_policy(SET CMP0135 NEW) FetchContent_Declare(GTest URL "${GTEST_URL}" URL_HASH SHA256=7b42b4d6ed48810c5362c265a17faebe90dc2373c885e5216439d37927f02926 TEST_COMMAND "" ) FetchContent_MakeAvailable( GTest ) else() find_package(GTest REQUIRED) endif() enable_testing() add_subdirectory(test) endif() xrootd-s3-http-0.4.1/LICENSE000066400000000000000000000261351501635342300153510ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "{}" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright {yyyy} {name of copyright owner} Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. xrootd-s3-http-0.4.1/README.md000066400000000000000000000110721501635342300156150ustar00rootroot00000000000000 # S3/HTTP filesystem plugins for XRootD These filesystem plugins for [XRootD](https://github.com/xrootd/xrootd) allow you to serve objects from S3 and HTTP backends through an XRootD server. ## Building and Installing Assuming XRootD, CMake>=3.13 and gcc>=8 are already installed, run: ``` mkdir build cd build cmake .. make # For system installation, uncomment: # make install ``` If building XRootD from source instead, add `-DXROOTD_DIR` to the CMake command line to point it at the installed directory. ### Building with Tests Unit tests for this repository require `gtest`, which is included as a submodule of this repo. The tests can be compiled with a slight modification to your build command: ``` mkdir build cd build cmake -DXROOTD_PLUGINS_BUILD_UNITTESTS=ON .. make ``` This creates the directory `build/test` with two unit test executables that can be run: - `build/test/s3-gtest` - `build/test/http-gtest` Alternatively, `gtest` can be installed externally. For example, on RHEL-based linux distributions: ```bash dnf install gtest ``` Add `-DXROOTD_PLUGINS_EXTERNAL_GTEST=ON` to your `cmake` command if you're using an external installation. ## Configuration ### Configure an HTTP Server Backend To configure the HTTP server plugin, add the following line to the Xrootd configuration file: ``` ofs.osslib ``` Here's a minimal config file ``` # Enable the HTTP protocol on port 1094 (same as the default XRootD port) # NOTE: This is NOT the HTTP plugin -- it is the library XRootD uses to # speak the HTTP protocol, as opposed to the root protocol, for incoming requests xrd.protocol http:1094 libXrdHttp.so # Allow access to path with given prefix. # all.export # Setting up HTTP plugin ofs.osslib libXrdHTTPServer.so # Use this if libXrdHTTPServer.so is in a development directory # ofs.osslib /path/to/libXrdHTTPServer.so # Upon last testing, the plugin did not yet work in async mode xrootd.async off # Configure the upstream HTTP server that XRootD is to treat as a filesystem httpserver.host_name httpserver.host_url ``` ### Configure an S3 Backend To configure the S3 plugin, add the following line to the Xrootd configuration file: ``` ofs.osslib ``` Here's a minimal config file ``` # Enable the HTTP protocol on port 1094 (same as the default XRootD port) # The S3 plugin use xrd.protocol http:1094 libXrdHttp.so # Allow access to path with given prefix. # all.export # Setting up S3 plugin ofs.osslib libXrdS3.so # Use this if libXrdS3.so is in a development directory # ofs.osslib /path/to/libXrdS3.so # Upon last testing, the plugin did not yet work in async mode xrootd.async off #example url #https:///my-magic-path/bar/foo # these must be in this order to allow parsing of multiple entries # To export a bucket requiring an access/private key: s3.begin s3.path_name my-magic-path s3.bucket_name hubzero-private-rich s3.service_name s3.amazonaws.com s3.region us-east-1 s3.access_key_file /xrootd-dev/access-key s3.secret_key_file /xrootd-dev/secret-key s3.service_url https://s3.us-east-1.amazonaws.com s3.url_style path s3.end # To export an unauthenticated (public) bucket, remove # the key-related directives s3.begin s3.path_name my-other-magic-path s3.bucket_name hubzero-private-rich-2 s3.service_name s3.amazonaws.com s3.region us-east-1 s3.service_url https://s3.us-east-1.amazonaws.com s3.url_style virtual s3.end # Specify the path style for URL queries at the endpoint. Valid # options are `path` and `virtual`, where path corresponds to URLs # like `https://my-service-url.com/bucket/object` and virtual # corresponds to URLs like `https://bucket.my-service-url.com/object` s3.url_style virtual # trace levels are # error # warning # info # debug # dump # debug produces a fair amount of log, # but dump produces the actual wire traffic to the client and # should only be used if you have a reason to do so s3.trace debug ``` ## Startup and Testing ### HTTP Server Backend Assuming you named the config file `xrootd-http.cfg`, as a non-rootly user run: ``` xrootd -d -c xrootd-http.cfg ``` In a separate terminal, run ``` curl -v http://localhost:1094// ``` ### S3 Server Backend Startup and Testing Assuming you named the config file `xrootd-s3.cfg`, as a non-rootly user run: ``` xrootd -d -c xrootd-s3.cfg ``` In a separate terminal, run ``` curl -v http://localhost:1094// ``` xrootd-s3-http-0.4.1/cmake/000077500000000000000000000000001501635342300154155ustar00rootroot00000000000000xrootd-s3-http-0.4.1/cmake/FindAtomic.cmake000066400000000000000000000033311501635342300204340ustar00rootroot00000000000000 # Ideas come from # # https://gitlab.kitware.com/cmake/cmake/-/issues/17834 # # But applied to the use of libatomic instead of libstdc++fs. # The need to do this was highlighted as part of: # https://github.com/PelicanPlatform/xrootd-s3-http/pull/81 # The original driving use case was 32-bit builds; if we # decide to drop those, we can rip out the target include(CheckSourceCompiles) function( check_working_cxx_atomics varname ) CHECK_SOURCE_COMPILES( CXX " #include #include #include int main() { std::atomic a1; std::atomic a2; std::atomic a3; std::atomic a4; return a1++ + a2++ + a3++ + a4++; }" ${varname} ) endfunction( check_working_cxx_atomics varname ) check_working_cxx_atomics( CXX_ATOMIC_NO_LINK_NEEDED ) set( _found FALSE ) if( CXX_ATOMIC_NO_LINK_NEEDED ) set( _found TRUE ) else() check_library_exists(atomic __atomic_fetch_add_4 "" HAVE_LIBATOMIC) if (HAVE_LIBATOMIC) set( OLD_CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES} ) list( APPEND CMAKE_REQUIRED_LIBRARIES "atomic" ) check_working_cxx_atomics( HAVE_CXX_ATOMICS_WITH_LIB ) set( CMAKE_REQUIRED_LIBRARIES ${OLD_CMAKE_REQUIRED_LIBRARIES} ) set( HAVE_CXX_ATOMICS_WITH_LIB TRUE ) set( _found TRUE ) endif() endif() add_library( std::atomic INTERFACE IMPORTED ) if( HAVE_CXX_ATOMICS_WITH_LIB ) set_property( TARGET std::atomic APPEND PROPERTY INTERFACE_LINK_LIBRARIES atomic ) endif() set( Atomic_FOUND ${_found} CACHE BOOL "TRUE if we can run a program using std::atomic" FORCE ) if( Atomic_FIND_REQUIRED AND NOT Atomic_FOUND ) message( FATAL_ERROR "Cannot run simple program using std::atomic" ) endif() xrootd-s3-http-0.4.1/cmake/FindFilesystem.cmake000066400000000000000000000027721501635342300213540ustar00rootroot00000000000000 # Ideas come from # # https://gitlab.kitware.com/cmake/cmake/-/issues/17834 # # Basically, upstream CMake claims the fact that a separate library is # needed for std::filesystem support is a short-lived fact (of all the # platforms we use, only RHEL 8 uses a compiler where this is needed), # hence they don't want a standardized way to detect std::filesystem include(CheckSourceCompiles) set( CMAKE_REQUIRED_INCLUDES "${XRootD_INCLUDE_DIR}" ) set( SAMPLE_FILESYSTEM "#include #include int main() { auto cwd = std::filesystem::current_path(); return cwd.empty(); }") CHECK_SOURCE_COMPILES( CXX "${SAMPLE_FILESYSTEM}" CXX_FILESYSTEM_NO_LINK_NEEDED ) set( _found FALSE ) if( CXX_FILESYSTEM_NO_LINK_NEEDED ) set( _found TRUE ) else() # Add the libstdc++ flag set( CMAKE_REQUIRED_LIBRARIES "-lstdc++fs" ) CHECK_SOURCE_COMPILES( CXX "${SAMPLE_FILESYSTEM}" CXX_FILESYSTEM_STDCPPFS_NEEDED ) set( _found TRUE ) endif() add_library( std::filesystem INTERFACE IMPORTED ) #set_property( TARGET std::filesystem APPEND PROPERTY INTERFACE_COMPILE_FEATURES cxx_std_17 ) if( CXX_FILESYSTEM_STDCPPFS_NEEDED ) set_property( TARGET std::filesystem APPEND PROPERTY INTERFACE_LINK_LIBRARIES -lstdc++fs ) endif() set( Filesystem_FOUND ${_found} CACHE BOOL "TRUE if we can run a program using std::filesystem" FORCE ) if( Filesystem_FIND_REQUIRED AND NOT Filesystem_FOUND ) message( FATAL_ERROR "Cannot run simple program using std::filesystem" ) endif() xrootd-s3-http-0.4.1/configs/000077500000000000000000000000001501635342300157655ustar00rootroot00000000000000xrootd-s3-http-0.4.1/configs/export-lib-symbols000066400000000000000000000001201501635342300214540ustar00rootroot00000000000000{ global: XrdOssGetStorageSystem*; XrdOssAddStorageSystem*; local: *; }; xrootd-s3-http-0.4.1/rpm/000077500000000000000000000000001501635342300151335ustar00rootroot00000000000000xrootd-s3-http-0.4.1/rpm/xrootd-s3-http.spec000066400000000000000000000045641501635342300206370ustar00rootroot00000000000000Name: xrootd-s3-http Version: 0.4.1 Release: 1%{?dist} Summary: S3/HTTP filesystem plugins for xrootd License: Apache-2.0 URL: https://github.com/PelicanPlatform/%{name} Source0: %{url}/archive/refs/tags/v%{version}/%{name}-%{version}.tar.gz %define xrootd_current_major 5 %define xrootd_current_minor 7 %define xrootd_next_major 6 BuildRequires: cmake3 BuildRequires: gcc-c++ BuildRequires: make BuildRequires: xrootd-server-libs >= 1:%{xrootd_current_major} BuildRequires: xrootd-server-libs < 1:%{xrootd_next_major} BuildRequires: xrootd-server-devel >= 1:%{xrootd_current_major} BuildRequires: xrootd-server-devel < 1:%{xrootd_next_major} BuildRequires: libcurl-devel BuildRequires: openssl-devel BuildRequires: tinyxml2-devel Requires: xrootd-server >= 1:%{xrootd_current_major}.%{xrootd_current_minor} Requires: xrootd-server < 1:%{xrootd_next_major}.0.0-1 %description %{summary} %prep %setup -q %build %cmake . -DCMAKE_BUILD_TYPE=RelWithDebInfo -DXROOTD_EXTERNAL_TINYXML2=ON cmake --build redhat-linux-build --verbose %install %cmake_install %files %{_libdir}/libXrdHTTPServer-5.so %{_libdir}/libXrdS3-5.so %{_libdir}/libXrdOssHttp-5.so %{_libdir}/libXrdOssS3-5.so %{_libdir}/libXrdOssFilter-5.so %doc README.md %license LICENSE %changelog * Fri May 30 2025 Brian Bockelman - 0.4.1-1 - Fix stall timeouts which would never fire. - Fix bug where S3 rate limiting would result in corrupt data being sent back to the client. - Remove redundant HEAD which was invoked twice on S3 file open. - Put libcurl into threadsafe mode, avoiding potential deadlocks or long unresponsive periods. * Thu May 29 2025 Brian Bockelman - 0.4.0-1 - Improve logging messages to include timing of read requests - Implement the vector read method, used by some clients. - Send basic cache performance statistics out via the XRootD OSS g-stream. * Sat Mar 15 2025 Brian Bockelman - 0.3.0-1 - Add new filter plugin to the package - Add renamed plugins to the package * Sat Feb 1 2025 Brian Bockelman - 0.2.1-1 - Bump to upstream version 0.2.1. * Tue Nov 28 2023 Justin Hiemstra - 0.0.2-1 - Add HTTPServer plugin * Tue Dec 06 2022 Brian Bockelman - 0.0.1-1 - Initial, "Hello world" version of the S3 filesystem plugin xrootd-s3-http-0.4.1/src/000077500000000000000000000000001501635342300151245ustar00rootroot00000000000000xrootd-s3-http-0.4.1/src/AWSCredential.hh000066400000000000000000000022131501635342300200700ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #pragma once class AWSCredential { public: AWSCredential(const std::string &accessKeyID, const std::string &secretAccessKey, const std::string &securityToken) : m_access_key(accessKeyID), m_secret_key(secretAccessKey), m_security_token(securityToken) {} private: const std::string m_access_key; const std::string m_secret_key; const std::string m_security_token; }; xrootd-s3-http-0.4.1/src/AWSv4-impl.cc000066400000000000000000000141131501635342300172760ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2023, HTCondor team, UW-Madison * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ /** * Utilities for generating pre-signed URLs. * * These were originally authored by the HTCondor team under the Apache 2.0 * license which can also be found in the LICENSE file at the top-level * directory of this project. No copyright statement was present in the * original file. */ #include #include #include "AWSv4-impl.hh" #include #include namespace AWSv4Impl { // // This function should not be called for anything in query_parameters, // except for by AmazonQuery::SendRequest(). // std::string amazonURLEncode(const std::string &input) { /* * See * http://docs.amazonwebservices.com/AWSEC2/2010-11-15/DeveloperGuide/using-query-api.html * */ std::string output; for (unsigned i = 0; i < input.length(); ++i) { // "Do not URL encode ... A-Z, a-z, 0-9, hyphen ( - ), // underscore ( _ ), period ( . ), and tilde ( ~ ). Percent // encode all other characters with %XY, where X and Y are hex // characters 0-9 and uppercase A-F. Percent encode extended // UTF-8 characters in the form %XY%ZA..." if (('A' <= input[i] && input[i] <= 'Z') || ('a' <= input[i] && input[i] <= 'z') || ('0' <= input[i] && input[i] <= '9') || input[i] == '-' || input[i] == '_' || input[i] == '.' || input[i] == '~') { char uglyHack[] = "X"; uglyHack[0] = input[i]; output.append(uglyHack); } else { char percentEncode[4]; snprintf(percentEncode, 4, "%%%.2hhX", input[i]); output.append(percentEncode); } } return output; } std::string pathEncode(const std::string &original) { std::string segment; std::string encoded; const char *o = original.c_str(); size_t next = 0; size_t offset = 0; size_t length = strlen(o); while (offset < length) { next = strcspn(o + offset, "/"); if (next == 0) { encoded += "/"; offset += 1; continue; } segment = std::string(o + offset, next); encoded += amazonURLEncode(segment); offset += next; } return encoded; } void convertMessageDigestToLowercaseHex(const unsigned char *messageDigest, unsigned int mdLength, std::string &hexEncoded) { char *buffer = (char *)malloc((mdLength * 2) + 1); char *ptr = buffer; for (unsigned int i = 0; i < mdLength; ++i, ptr += 2) { snprintf(ptr, 3, "%02x", messageDigest[i]); } hexEncoded.assign(buffer, mdLength * 2); free(buffer); } bool doSha256(const std::string_view payload, unsigned char *messageDigest, unsigned int *mdLength) { EVP_MD_CTX *mdctx = EVP_MD_CTX_create(); if (mdctx == NULL) { return false; } if (!EVP_DigestInit_ex(mdctx, EVP_sha256(), NULL)) { EVP_MD_CTX_destroy(mdctx); return false; } if (!EVP_DigestUpdate(mdctx, payload.data(), payload.length())) { EVP_MD_CTX_destroy(mdctx); return false; } if (!EVP_DigestFinal_ex(mdctx, messageDigest, mdLength)) { EVP_MD_CTX_destroy(mdctx); return false; } EVP_MD_CTX_destroy(mdctx); return true; } bool createSignature(const std::string &secretAccessKey, const std::string &date, const std::string ®ion, const std::string &service, const std::string &stringToSign, std::string &signature) { unsigned int mdLength = 0; unsigned char messageDigest[EVP_MAX_MD_SIZE]; std::string saKey = "AWS4" + secretAccessKey; const unsigned char *hmac = HMAC(EVP_sha256(), saKey.c_str(), saKey.length(), (const unsigned char *)date.c_str(), date.length(), messageDigest, &mdLength); if (hmac == NULL) { return false; } unsigned int md2Length = 0; unsigned char messageDigest2[EVP_MAX_MD_SIZE]; hmac = HMAC(EVP_sha256(), messageDigest, mdLength, (const unsigned char *)region.c_str(), region.length(), messageDigest2, &md2Length); if (hmac == NULL) { return false; } hmac = HMAC(EVP_sha256(), messageDigest2, md2Length, (const unsigned char *)service.c_str(), service.length(), messageDigest, &mdLength); if (hmac == NULL) { return false; } const char c[] = "aws4_request"; hmac = HMAC(EVP_sha256(), messageDigest, mdLength, (const unsigned char *)c, sizeof(c) - 1, messageDigest2, &md2Length); if (hmac == NULL) { return false; } hmac = HMAC(EVP_sha256(), messageDigest2, md2Length, (const unsigned char *)stringToSign.c_str(), stringToSign.length(), messageDigest, &mdLength); if (hmac == NULL) { return false; } convertMessageDigestToLowercaseHex(messageDigest, mdLength, signature); return true; } std::string canonicalizeQueryString( const std::map &query_parameters) { std::string canonicalQueryString; for (auto i = query_parameters.begin(); i != query_parameters.end(); ++i) { // Step 1A: The map sorts the query parameters for us. Strictly // speaking, we should encode into a different AttributeValueMap // and then compose the string out of that, in case amazonURLEncode() // changes the sort order, but we don't specify parameters like that. // Step 1B: Encode the parameter names and values. std::string name = amazonURLEncode(i->first); std::string value = amazonURLEncode(i->second); // Step 1C: Separate parameter names from values with '='. canonicalQueryString += name + '=' + value; // Step 1D: Separate name-value pairs with '&'; canonicalQueryString += '&'; } // We'll always have a superflous trailing ampersand. if (!canonicalQueryString.empty()) { canonicalQueryString.erase(canonicalQueryString.end() - 1); } return canonicalQueryString; } } /* end namespace AWSv4Impl */ xrootd-s3-http-0.4.1/src/AWSv4-impl.hh000066400000000000000000000030151501635342300173070ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #pragma once #include #include #include namespace AWSv4Impl { std::string pathEncode(const std::string &original); std::string amazonURLEncode(const std::string &input); std::string canonicalizeQueryString(const std::map &qp); void convertMessageDigestToLowercaseHex(const unsigned char *messageDigest, unsigned int mdLength, std::string &hexEncoded); bool doSha256(const std::string_view payload, unsigned char *messageDigest, unsigned int *mdLength); bool createSignature(const std::string &secretAccessKey, const std::string &date, const std::string ®ion, const std::string &service, const std::string &stringToSign, std::string &signature); } // namespace AWSv4Impl xrootd-s3-http-0.4.1/src/CurlUtil.cc000066400000000000000000000211121501635342300171730ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #include "CurlUtil.hh" #include "CurlWorker.hh" #include "HTTPCommands.hh" #include "logging.hh" #include #include #include #include #include #include #include #include using namespace XrdHTTPServer; thread_local std::stack HandlerQueue::m_handles; HandlerQueue::HandlerQueue() { int filedes[2]; auto result = pipe(filedes); if (result == -1) { throw std::runtime_error(strerror(errno)); } m_read_fd = filedes[0]; m_write_fd = filedes[1]; }; CURL *HandlerQueue::GetHandle() { if (!m_handles.empty()) { CURL *result = m_handles.top(); m_handles.pop(); return result; } auto result = curl_easy_init(); if (result == nullptr) { return result; } curl_easy_setopt(result, CURLOPT_USERAGENT, "xrootd-s3/0.4.1"); curl_easy_setopt(result, CURLOPT_BUFFERSIZE, 32 * 1024); curl_easy_setopt(result, CURLOPT_NOSIGNAL, 1L); return result; } void HandlerQueue::RecycleHandle(CURL *curl) { m_handles.push(curl); } void HandlerQueue::Produce(HTTPRequest *handler) { std::unique_lock lk{m_mutex}; m_cv.wait(lk, [&] { return m_ops.size() < m_max_pending_ops; }); m_ops.push_back(handler); char ready[] = "1"; while (true) { auto result = write(m_write_fd, ready, 1); if (result == -1) { if (errno == EINTR) { continue; } throw std::runtime_error(strerror(errno)); } break; } lk.unlock(); m_cv.notify_one(); } HTTPRequest *HandlerQueue::Consume() { std::unique_lock lk(m_mutex); m_cv.wait(lk, [&] { return m_ops.size() > 0; }); auto result = std::move(m_ops.front()); m_ops.pop_front(); char ready[1]; while (true) { auto result = read(m_read_fd, ready, 1); if (result == -1) { if (errno == EINTR) { continue; } throw std::runtime_error(strerror(errno)); } break; } lk.unlock(); m_cv.notify_one(); return result; } HTTPRequest *HandlerQueue::TryConsume() { std::unique_lock lk(m_mutex); if (m_ops.size() == 0) { return nullptr; } auto result = std::move(m_ops.front()); m_ops.pop_front(); char ready[1]; while (true) { auto result = read(m_read_fd, ready, 1); if (result == -1) { if (errno == EINTR) { continue; } throw std::runtime_error(strerror(errno)); } break; } lk.unlock(); m_cv.notify_one(); return result; } void CurlWorker::RunStatic(CurlWorker *myself) { try { myself->Run(); } catch (std::exception &exc) { myself->m_logger.Log(LogMask::Error, "CurlWorker::RunStatic", "Curl worker got an exception:", exc.what()); } } void CurlWorker::Run() { // Create a copy of the shared_ptr here. Otherwise, when the main thread's // destructors run, there won't be any other live references to the // shared_ptr, triggering cleanup of the condition variable. Because we // purposely don't shutdown the worker threads, those threads may be waiting // on the condition variable; destroying a condition variable while a thread // is waiting on it is undefined behavior. auto queue_ref = m_queue; auto &queue = *queue_ref.get(); m_unpause_queue.reset(new HandlerQueue()); m_logger.Log(LogMask::Debug, "Run", "Started a curl worker"); CURLM *multi_handle = curl_multi_init(); if (multi_handle == nullptr) { throw std::runtime_error("Failed to create curl multi-handle"); } int running_handles = 0; time_t last_marker = time(NULL); CURLMcode mres = CURLM_OK; std::vector waitfds; waitfds.resize(2); // The `curl_multi_wait` call in the event loop needs to be interrupted when // additional work comes into one of the two queues (either the global queue // or the per-worker unpause queue). To do this, the queue objects will // write to a file descriptor when a new HTTP request is ready; we add these // FDs to the list of FDs for libcurl to poll in order to trigger a wakeup. // The `Consume`/`TryConsume` methods will have a side-effect of reading // from the pipe if a request is available. waitfds[0].fd = queue.PollFD(); waitfds[0].events = CURL_WAIT_POLLIN; waitfds[0].revents = 0; waitfds[1].fd = m_unpause_queue->PollFD(); waitfds[1].events = CURL_WAIT_POLLIN; waitfds[1].revents = 0; while (true) { while (running_handles < static_cast(m_max_ops)) { auto op = m_unpause_queue->TryConsume(); if (!op) { break; } op->ContinueHandle(); } while (running_handles < static_cast(m_max_ops)) { auto op = running_handles == 0 ? queue.Consume() : queue.TryConsume(); if (!op) { break; } op->SetUnpauseQueue(m_unpause_queue); auto curl = queue.GetHandle(); if (curl == nullptr) { m_logger.Log(LogMask::Warning, "Run", "Unable to allocate a curl handle"); op->Fail("E_NOMEM", "Unable to get allocate a curl handle"); continue; } try { if (!op->SetupHandle(curl)) { op->Fail(op->getErrorCode(), op->getErrorMessage()); } } catch (...) { m_logger.Log(LogMask::Debug, "Run", "Unable to set up the curl handle"); op->Fail("E_NOMEM", "Failed to set up the curl handle for the operation"); continue; } m_op_map[curl] = op; auto mres = curl_multi_add_handle(multi_handle, curl); if (mres != CURLM_OK) { if (m_logger.getMsgMask() & LogMask::Debug) { std::stringstream ss; ss << "Unable to add operation to the curl multi-handle: " << curl_multi_strerror(mres); m_logger.Log(LogMask::Debug, "Run", ss.str().c_str()); } m_op_map.erase(curl); op->Fail("E_CURL_LIB", "Unable to add operation to the curl multi-handle"); continue; } running_handles += 1; } // Maintain the periodic reporting of thread activity time_t now = time(NULL); time_t next_marker = last_marker + m_marker_period; if (now >= next_marker) { if (m_logger.getMsgMask() & LogMask::Debug) { std::stringstream ss; ss << "Curl worker thread is running " << running_handles << " operations"; m_logger.Log(LogMask::Debug, "CurlWorker", ss.str().c_str()); } last_marker = now; } mres = curl_multi_wait(multi_handle, &waitfds[0], waitfds.size(), 50, nullptr); if (mres != CURLM_OK) { if (m_logger.getMsgMask() & LogMask::Warning) { std::stringstream ss; ss << "Failed to wait on multi-handle: " << mres; m_logger.Log(LogMask::Warning, "CurlWorker", ss.str().c_str()); } } // Do maintenance on the multi-handle int still_running; auto mres = curl_multi_perform(multi_handle, &still_running); if (mres == CURLM_CALL_MULTI_PERFORM) { continue; } else if (mres != CURLM_OK) { if (m_logger.getMsgMask() & LogMask::Warning) { std::stringstream ss; ss << "Failed to perform multi-handle operation: " << curl_multi_strerror(mres); m_logger.Log(LogMask::Warning, "CurlWorker", ss.str().c_str()); } break; } CURLMsg *msg; do { int msgq = 0; msg = curl_multi_info_read(multi_handle, &msgq); if (msg && (msg->msg == CURLMSG_DONE)) { auto iter = m_op_map.find(msg->easy_handle); if (iter == m_op_map.end()) { m_logger.Log(LogMask::Error, "CurlWorker", "Logic error: got a callback for an entry " "that doesn't exist"); mres = CURLM_BAD_EASY_HANDLE; break; } auto &op = iter->second; auto res = msg->data.result; m_logger.Log(LogMask::Dump, "Run", "Processing result from curl"); op->ProcessCurlResult(iter->first, res); op->ReleaseHandle(iter->first); op->Notify(); running_handles -= 1; curl_multi_remove_handle(multi_handle, iter->first); if (res == CURLE_OK) { // If the handle was successful, then we can recycle it. queue.RecycleHandle(iter->first); } else { curl_easy_cleanup(iter->first); } m_op_map.erase(iter); } } while (msg); } for (auto &map_entry : m_op_map) { map_entry.second->Fail("E_CURL_LIB", curl_multi_strerror(mres)); } m_op_map.clear(); } xrootd-s3-http-0.4.1/src/CurlUtil.hh000066400000000000000000000034221501635342300172110ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #pragma once #include #include #include #include #include #include // Forward dec'ls typedef void CURL; struct curl_slist; class HTTPRequest; // Returns a newly-created curl handle (no internal caching) CURL *GetHandle(bool verbose); /** * HandlerQueue is a deque of curl operations that need * to be performed. The object is thread safe and can * be waited on via poll(). * * The fact that it's poll'able is necessary because the * multi-curl driver thread is based on polling FD's */ class HandlerQueue { public: HandlerQueue(); void Produce(HTTPRequest *handler); HTTPRequest *Consume(); HTTPRequest *TryConsume(); int PollFD() const { return m_read_fd; } CURL *GetHandle(); void RecycleHandle(CURL *); private: std::deque m_ops; thread_local static std::stack m_handles; std::condition_variable m_cv; std::mutex m_mutex; const static unsigned m_max_pending_ops{20}; int m_read_fd{-1}; int m_write_fd{-1}; }; xrootd-s3-http-0.4.1/src/CurlWorker.hh000066400000000000000000000030441501635342300175450ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #pragma once #include #include typedef void CURL; class XrdSysError; class HTTPRequest; class HandlerQueue; class CurlWorker { public: CurlWorker(std::shared_ptr queue, XrdSysError &logger) : m_queue(queue), m_logger(logger) {} CurlWorker(const CurlWorker &) = delete; void Run(); static void RunStatic(CurlWorker *myself); static unsigned GetPollThreads() { return m_workers; } private: std::shared_ptr m_queue; std::shared_ptr m_unpause_queue; // Queue for notifications that a handle can be // unpaused. std::unordered_map m_op_map; XrdSysError &m_logger; const static unsigned m_workers{5}; const static unsigned m_max_ops{20}; const static unsigned m_marker_period{5}; }; xrootd-s3-http-0.4.1/src/Filter.cc000066400000000000000000000524651501635342300166740ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2025, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #include "Filter.hh" #include "logging.hh" #include #if defined(__GNU_SOURCE) #define FNMATCH_FLAGS (FNM_NOESCAPE | FNM_EXTMATCH) #else #define FNMATCH_FLAGS (FNM_NOESCAPE) #endif #include #include #include #include #include using namespace XrdHTTPServer; FilterFileSystem::FilterFileSystem(XrdOss *oss, XrdSysLogger *log, const char *configName, XrdOucEnv *envP) : XrdOssWrapper(*oss), m_oss(oss), m_log(log, "filter_") { if (!Config(configName)) { m_log.Emsg("Initialize", "Failed to configure the filter filesystem"); throw std::runtime_error("Failed to configure the filter filesystem"); } m_log.Emsg("Initialize", "FilterFileSystem initialized"); } FilterFileSystem::~FilterFileSystem() {} // Parse the provided file to configure the class // // We understand the following options: // - filter.trace [all|error|warning|info|debug|none] // - filter.glob [-a] [glob1] [glob2] ... // - filter.prefix [prefix1] [prefix2] // Each of the space-separated globs will be added to the list of permitted // paths for the filter. If `-a` is specified, then path components beginning // with a `.` character will be matched. The globs must be absolute paths. // // If a prefix is specified, everything underneath the prefix is permitted. // filter.prefix /foo // is equivalent to // filter.glob -a /foo/** bool FilterFileSystem::Config(const char *configfn) { m_log.setMsgMask(LogMask::Error | LogMask::Warning); XrdOucGatherConf filterConf("filter.trace filter.glob filter.prefix", &m_log); int result; if ((result = filterConf.Gather(configfn, XrdOucGatherConf::trim_lines)) < 0) { m_log.Emsg("Config", -result, "parsing config file", configfn); return false; } char *val; while (filterConf.GetLine()) { val = filterConf.GetToken(); if (!strcmp(val, "trace")) { m_log.setMsgMask(0); if (!(val = filterConf.GetToken())) { m_log.Emsg("Config", "filter.trace requires an argument. Usage: " "filter.trace [all|error|warning|info|debug|none]"); return false; } do { if (!strcmp(val, "all")) { m_log.setMsgMask(m_log.getMsgMask() | LogMask::All); } else if (!strcmp(val, "error")) { m_log.setMsgMask(m_log.getMsgMask() | LogMask::Error); } else if (!strcmp(val, "warning")) { m_log.setMsgMask(m_log.getMsgMask() | LogMask::Error | LogMask::Warning); } else if (!strcmp(val, "info")) { m_log.setMsgMask(m_log.getMsgMask() | LogMask::Error | LogMask::Warning | LogMask::Info); } else if (!strcmp(val, "debug")) { m_log.setMsgMask(m_log.getMsgMask() | LogMask::Error | LogMask::Warning | LogMask::Info | LogMask::Debug); } else if (!strcmp(val, "none")) { m_log.setMsgMask(0); } } while ((val = filterConf.GetToken())); } else if (!strcmp(val, "glob")) { if (!(val = filterConf.GetToken())) { m_log.Emsg("Config", "filter.glob requires an argument. " "Usage: filter.glob [-a] [glob1] [glob2] ..."); return false; } auto all = false; if (!strcmp(val, "-a")) { all = true; if (!(val = filterConf.GetToken())) { m_log.Emsg("Config", "filter.glob requires an argument. " "Usage: filter.glob [-a] [glob1] [glob2] ..."); return false; } } do { std::filesystem::path path(val); if (!path.is_absolute()) { m_log.Emsg("Config", "filter.glob requires an absolute path. Usage: " "filter.glob [-a] [glob1] [glob2] ..."); return false; } m_globs.push_back({all, std::move(path)}); } while ((val = filterConf.GetToken())); } else if (!strcmp(val, "prefix")) { if (!(val = filterConf.GetToken())) { m_log.Emsg("Config", "filter.prefix requires an argument. " "Usage: filter.prefix [prefix1] [prefix2] ..."); return false; } do { std::filesystem::path path(val); if (!path.is_absolute()) { m_log.Emsg( "Config", "filter.prefix requires an absolute path. Usage: " "filter.prefix [prefix1] [prefix2] ..."); return false; } bool success; std::tie(success, path) = SanitizePrefix(path); if (!success) { m_log.Emsg("Config", "filter.prefix requires an absolute prefix " "without globs. Usage: " "filter.prefix [prefix1] [prefix2] ..."); return false; } m_globs.push_back({true, (path / "**").lexically_normal()}); } while ((val = filterConf.GetToken())); } else { m_log.Emsg("Config", "Unknown configuration directive", val); return false; } } if (m_globs.empty()) { m_log.Emsg("Config", "No globs specified; will allow all paths"); return true; } for (const auto &glob : m_globs) { m_log.Log(LogMask::Info, "Config", "Will permit glob", glob.m_glob.string().c_str(), glob.m_match_dotfile ? "all" : ""); } return true; } // Given an administrator-provided prefix, sanitize it according to our rules. // // The function will *fail* if one of the following is true: // - Any path components are equal to '.' or '..' // - Any path components contain glob special characters of '[', '*', or '?'. // // If the prefix is acceptable, a returned prefix will be given that is // normalized according to std::filesystem::path's rules. // // Return is a boolean indicating success and the resulting prefix string. std::pair FilterFileSystem::SanitizePrefix(const std::filesystem::path &prefix) { if (!prefix.is_absolute()) { m_log.Emsg("SanitizePrefix", "Provided prefix must be absolute"); return {false, ""}; } for (const auto &component : prefix) { if (component == "." || component == "..") { m_log.Emsg( "SanitizePrefix", "Prefix may not contain a path component of '.' or '..':", prefix.c_str()); return {false, ""}; } if (component.string().find_first_of("[*?") != std::string::npos) { m_log.Emsg("SanitizePrefix", "Prefix may not contain a path component with any of " "the following characters: '*', '?', or '[':", prefix.c_str()); return {false, ""}; } } return {true, prefix.lexically_normal()}; } int FilterFileSystem::Chmod(const char *path, mode_t mode, XrdOucEnv *env) { return VerifyPath(path, true, &XrdOss::Chmod, path, mode, env); } int FilterFileSystem::Create(const char *tid, const char *path, mode_t mode, XrdOucEnv &env, int opts) { return VerifyPath(path, false, &XrdOss::Create, tid, path, mode, env, opts); } int FilterFileSystem::Mkdir(const char *path, mode_t mode, int mkpath, XrdOucEnv *envP) { return VerifyPath(path, true, &XrdOss::Mkdir, path, mode, mkpath, envP); } int FilterFileSystem::Reloc(const char *tident, const char *path, const char *cgName, const char *anchor) { if (!path || !cgName) { return -ENOENT; } bool partial; if (!Glob(path, partial)) { m_log.Log(LogMask::Debug, "Glob", "Failing relocation as source path matches no glob", path); return -ENOENT; } if (!Glob(cgName, partial)) { m_log.Log(LogMask::Debug, "Glob", "Failing relocation as destination path matches no glob", cgName); return -ENOENT; } return wrapPI.Reloc(tident, path, cgName, anchor); } int FilterFileSystem::Remdir(const char *path, int Opts, XrdOucEnv *envP) { return VerifyPath(path, true, &XrdOss::Remdir, path, Opts, envP); } int FilterFileSystem::Rename(const char *oPath, const char *nPath, XrdOucEnv *oEnvP, XrdOucEnv *nEnvP) { if (!oPath || !nPath) { return -ENOENT; } bool partial; if (!Glob(oPath, partial)) { m_log.Log(LogMask::Debug, "Glob", "Failing rename as source path matches no glob", oPath); return -ENOENT; } if (!Glob(nPath, partial)) { m_log.Log(LogMask::Debug, "Glob", "Failing rename as destination path matches no glob", nPath); return -ENOENT; } return wrapPI.Rename(oPath, nPath, oEnvP, nEnvP); } int FilterFileSystem::Stat(const char *path, struct stat *buff, int opts, XrdOucEnv *env) { return VerifyPath(path, true, &XrdOss::Stat, path, buff, opts, env); } int FilterFileSystem::StatFS(const char *path, char *buff, int &blen, XrdOucEnv *env) { return VerifyPath(path, true, &XrdOss::StatFS, path, buff, blen, env); } int FilterFileSystem::StatLS(XrdOucEnv &env, const char *path, char *buff, int &blen) { return VerifyPath(path, true, &XrdOss::StatLS, env, path, buff, blen); } int FilterFileSystem::StatPF(const char *path, struct stat *buff, int opts) { return VerifyPath( path, true, static_cast( &XrdOss::StatPF), path, buff, opts); } int FilterFileSystem::StatPF(const char *path, struct stat *buff) { return VerifyPath(path, true, static_cast( &XrdOss::StatPF), path, buff); } int FilterFileSystem::StatVS(XrdOssVSInfo *vsP, const char *sname, int updt) { return VerifyPath(sname, true, &XrdOss::StatVS, vsP, sname, updt); } int FilterFileSystem::StatXA(const char *path, char *buff, int &blen, XrdOucEnv *env) { return VerifyPath(path, true, &XrdOss::StatXA, path, buff, blen, env); } int FilterFileSystem::StatXP(const char *path, unsigned long long &attr, XrdOucEnv *env) { return VerifyPath(path, true, &XrdOss::StatXP, path, attr, env); } int FilterFileSystem::Truncate(const char *path, unsigned long long fsize, XrdOucEnv *env) { return VerifyPath(path, false, &XrdOss::Truncate, path, fsize, env); } int FilterFileSystem::Unlink(const char *path, int Opts, XrdOucEnv *env) { return VerifyPath(path, false, &XrdOss::Unlink, path, Opts, env); } int FilterFileSystem::Lfn2Pfn(const char *Path, char *buff, int blen) { return VerifyPath(Path, true, static_cast( &XrdOss::Lfn2Pfn), Path, buff, blen); } const char *FilterFileSystem::Lfn2Pfn(const char *Path, char *buff, int blen, int &rc) { bool partial; if (!Glob(Path, partial)) { rc = -ENOENT; return nullptr; } return wrapPI.Lfn2Pfn(Path, buff, blen, rc); } // Helper template for filesystem methods that need to verify the path passes // the filter. // // If `partial_ok` is set, then a partial match is permissible (typically, this // is done for stat- or directory-related methods to allow interacting with the // directory hierarchy). template int FilterFileSystem::VerifyPath(std::string_view path, bool partial_ok, Fn &&fn, Args &&...args) { bool partial; if (!Glob(path, partial)) { m_log.Log(LogMask::Debug, "Glob", "Path matches no glob", path.data()); return -ENOENT; } else if (!partial_ok && partial) { m_log.Log(LogMask::Debug, "Glob", "Path is a prefix of a glob", path.data()); return -EISDIR; } // Invoke the provided method `fn` on the underlying XrdOss object we are // wrapping (`wrapPI`). This template is agnostic to the actual arguments to // the method; they are just forwarded straight through. // // For example, if this object is wrapping an `S3FileSystem` object, then // ``` // std::invoke(&XrdOss::Open, wrapPI, std::forward("/foo", // O_RDONLY, 0, nullptr)); // ``` // is just a funky way of saying `wrapPI->Open("/foo", O_RDONLY, 0, // nullptr);` return std::invoke(fn, wrapPI, std::forward(args)...); } // Returns true if the path matches any of the globs, false otherwise. // bool FilterFileSystem::Glob(const char *path, bool &partial) { if (!path) { return false; } return Glob(std::filesystem::path(path), partial); } // Returns true if the path matches any of the globs, false otherwise. // bool FilterFileSystem::Glob(std::string_view path_view, bool &partial) { return Glob(std::filesystem::path(path_view), partial); } // Returns true if the path matches any of the globs, false otherwise. // // If the path is a prefix of any of the globs, `partial` will be set to true // on return. For example, if the glob is /foo/*/*.txt and the path is // /foo/bar, then partial will be set to true. bool FilterFileSystem::Glob(const std::filesystem::path &path, bool &partial) { if (m_globs.empty()) { partial = false; return true; } if (!path.is_absolute()) { return false; } for (const auto &glob : m_globs) { if (GlobOne(path, glob, partial)) { return true; } } return false; } // Core logic for evaluating a path against a single glob match pattern. // // Returns `true` if the path matches the glob - or if the path is the prefix // of a potential path that matches the glob. In the latter case, `partial` // will be set to `true` on return. bool FilterFileSystem::GlobOne(const std::filesystem::path &path, const glob &glob, bool &partial) { auto path_iter = path.begin(); auto match = true; for (auto glob_iter = glob.m_glob.begin(); glob_iter != glob.m_glob.end(); ++glob_iter, ++path_iter) { // The path has fewer components than the provided glob. if (path_iter == path.end()) { // The globstar can match against zero components, meaning if the // full glob ends in globstar (and that's the next component), then // this is actually a full match. if (*glob_iter == "**" && ++glob_iter == glob.m_glob.end()) { partial = false; } else { partial = true; } return true; } // Logic for the "globstar" operator. The globstar evaluates to // match zero-or-more paths. if (*glob_iter == "**") { auto cur_glob_component = glob_iter; // If the globstar is at the end of the glob, then we match // any subsequent part of the path. if (++cur_glob_component == glob.m_glob.end()) { return true; } else { // To evaluate the globstar, we compare the remainder of the // glob against the remainder of the path. Since the globstar // can consume any number of path components, we start with the // shortest possible path and recursively call `GlobOne` with // increasingly longer ones. // // So, if the glob is /foo/**/2*/bar and the path is // /foo/1/22/bar, then the new glob after the globstar will be // `/2/bar`. The for-loop below will start with comparing the // glob `/2*/bar` against the path `/bar`, then grow the path // and compare against `/22/bar` (then matching). auto new_glob = std::filesystem::path("/"); for (auto iter = cur_glob_component; iter != glob.m_glob.end(); iter++) { new_glob /= *iter; } // If there is a "dot file" in the path and we are not matching // dotfiles, then we must have a full match as the globstar // operator doesn't match such path components by default. bool has_dotfile = false; if (!glob.m_match_dotfile) { // Detect the presence of a dotfile for (auto iter = path_iter; iter != path.end(); iter++) { const auto &path_component = iter->string(); if (!path_component.empty() && path_component[0] == '.') { has_dotfile = true; break; } } } std::string cur_glob = *cur_glob_component; auto potential_match = true; for (auto back_iter = --path.end();; back_iter--) { auto subpath = std::filesystem::path("/"); auto path_prefix_has_dotfile = false; if (has_dotfile) { for (auto iter = path_iter; iter != back_iter; iter++) { const auto &path_component = iter->string(); if (!path_component.empty() && path_component[0] == '.') { path_prefix_has_dotfile = true; break; } } } for (auto iter = back_iter; iter != path.end(); iter++) { subpath /= *iter; } bool subpartial; if (GlobOne(subpath, {glob.m_match_dotfile, new_glob}, subpartial)) { if (!subpartial && !path_prefix_has_dotfile) { return true; } else if (path_prefix_has_dotfile) { potential_match = false; } } else if (has_dotfile) { potential_match = false; } // By placing the break condition here, instead of in the // for construct, we test the case where back_iter == // path_iter. if (back_iter == path_iter) { break; } } // The globstar can always 'consume' all the path components, // resuming in a partial match beyond it. That is, // Path: /foo/bar/baz, Glob: /foo/**/idx.txt // Is going to be a partial match because the path could be the // prefix for // /foo/bar/baz/idx.txt if (potential_match) { partial = true; return true; } return false; } } // Rely on the libc fnmatch function to implement the glob logic for a // single component. int rc; if (FNM_NOMATCH == (rc = fnmatch(glob_iter->c_str(), path_iter->c_str(), FNMATCH_FLAGS | (glob.m_match_dotfile ? 0 : FNM_PERIOD)))) { match = false; break; } else if (rc) { m_log.Log(LogMask::Warning, "Glob", "Error in fnmatch for glob", glob_iter->c_str(), std::to_string(rc).c_str()); } } // If the path has more components than the glob -- and there were no // globstar operators found -- then we cannot have a match. Otherwise, we // consumed all the glob and path components and we have a full match. if (path_iter != path.end()) { match = false; } if (match) { partial = false; return true; } return false; } XrdOssDF *FilterFileSystem::newFile(char const *user) { std::unique_ptr wrapped(m_oss->newFile(user)); return new FilterFile(std::move(wrapped), m_log, *this); } XrdOssDF *FilterFileSystem::newDir(char const *user) { std::unique_ptr wrapped(m_oss->newDir(user)); return new FilterDir(std::move(wrapped), m_log, *this); } FilterDir::~FilterDir() {} int FilterDir::Opendir(const char *path, XrdOucEnv &env) { if (!path) { return -ENOENT; } bool partial; if (!m_oss.Glob(path, partial)) { m_log.Log(LogMask::Debug, "Opendir", "Ignoring directory as it passes no glob", path); return -ENOENT; } m_prefix = path; return wrapDF.Opendir(path, env); } int FilterDir::Readdir(char *buff, int blen) { m_stat_avail = false; while (true) { auto rc = wrapDF.Readdir(buff, blen); if (rc) { return rc; } if (*buff == '\0') { return 0; } else if (!strcmp(buff, ".") || !strcmp(buff, "..")) { // Always permit special current and parent directory links for // `Readdir`. They allow the users of the XrdHttp web interface // to navigate the directory hierarchy through the rendered HTML. // If they're actually used to construct a path, they will get // normalized out by the XrdOfs layer before being passed back to // the XrdOss layer (this class). return 0; } auto path = m_prefix / std::string_view(buff, strnlen(buff, blen)); bool partial; if (m_oss.Glob(path, partial)) { if (partial) { struct stat buff; auto rc = StatRet(&buff); if (rc) { return rc; } if (buff.st_mode & S_IFDIR) { return 0; } m_stat_avail = false; if (m_log.getMsgMask() & LogMask::Debug) { m_log.Log(LogMask::Debug, "Readdir", "Ignoring file in directory as it is a prefix " "for a glob", path.string().c_str()); } } else { return 0; } } else if (m_log.getMsgMask() & LogMask::Debug) { m_log.Log(LogMask::Debug, "Readdir", "Ignoring directory component as it passes no glob", path.string().c_str()); } } } // Returns the struct stat corresponding to the current // directory entry name. // // If `Readdir` required a stat of the path to determine // if its visible, the cached copy may be served here. int FilterDir::StatRet(struct stat *buff) { if (m_stat_avail) { memcpy(buff, &m_stat, sizeof(m_stat)); return 0; } auto rc = wrapDF.StatRet(&m_stat); if (!rc) { m_stat_avail = true; memcpy(buff, &m_stat, sizeof(m_stat)); } return rc; } int FilterDir::Close(long long *retsz) { m_prefix.clear(); return wrapDF.Close(retsz); } FilterFile::~FilterFile() {} int FilterFile::Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) { bool partial; if (!m_oss.Glob(path, partial)) { m_log.Log(LogMask::Debug, "Glob", "Failing file open as path matches no glob", path); return -ENOENT; } else if (partial) { m_log.Log(LogMask::Debug, "Glob", "Failing file open as path is a prefix of a glob", path); return -EISDIR; } return wrapDF.Open(path, Oflag, Mode, env); } extern "C" { XrdVERSIONINFO(XrdOssAddStorageSystem2, Filter); XrdOss *XrdOssAddStorageSystem2(XrdOss *curr_oss, XrdSysLogger *logger, const char *config_fn, const char *parms, XrdOucEnv *envP) { XrdSysError log(logger, "filter_"); try { return new FilterFileSystem(curr_oss, logger, config_fn, envP); } catch (std::runtime_error &re) { log.Emsg("Initialize", "Encountered a runtime failure when initializing the " "filter filesystem:", re.what()); return nullptr; } } } xrootd-s3-http-0.4.1/src/Filter.hh000066400000000000000000000116721501635342300167010ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2025, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #include #include #include #include #include #include #include class XrdOucEnv; class XrdSecEntity; class XrdSysError; // // A filesystem wrapper which only permits accessing specific paths. // // For example, if the filter is "/foo/*.txt", then the underlying // path /foo/test.txt will be accessible but the paths /bar.txt and // /foo/test.csv will result in an ENOENT. // class FilterFileSystem final : public XrdOssWrapper { public: struct glob { bool m_match_dotfile{false}; std::filesystem::path m_glob; }; FilterFileSystem(XrdOss *oss, XrdSysLogger *log, const char *configName, XrdOucEnv *envP); virtual ~FilterFileSystem(); bool Config(const char *configfn); XrdOssDF *newDir(const char *user = 0) override; XrdOssDF *newFile(const char *user = 0) override; virtual int Chmod(const char *path, mode_t mode, XrdOucEnv *env = 0) override; virtual int Create(const char *tid, const char *path, mode_t mode, XrdOucEnv &env, int opts = 0) override; virtual int Mkdir(const char *path, mode_t mode, int mkpath = 0, XrdOucEnv *envP = 0) override; virtual int Reloc(const char *tident, const char *path, const char *cgName, const char *anchor = 0) override; virtual int Remdir(const char *path, int Opts = 0, XrdOucEnv *envP = 0) override; virtual int Rename(const char *oPath, const char *nPath, XrdOucEnv *oEnvP = 0, XrdOucEnv *nEnvP = 0) override; virtual int Stat(const char *path, struct stat *buff, int opts = 0, XrdOucEnv *env = 0) override; virtual int StatFS(const char *path, char *buff, int &blen, XrdOucEnv *env = 0) override; virtual int StatLS(XrdOucEnv &env, const char *path, char *buff, int &blen) override; virtual int StatPF(const char *path, struct stat *buff, int opts) override; virtual int StatPF(const char *path, struct stat *buff) override; virtual int StatVS(XrdOssVSInfo *vsP, const char *sname = 0, int updt = 0) override; virtual int StatXA(const char *path, char *buff, int &blen, XrdOucEnv *env = 0) override; virtual int StatXP(const char *path, unsigned long long &attr, XrdOucEnv *env = 0) override; virtual int Truncate(const char *path, unsigned long long fsize, XrdOucEnv *env = 0) override; virtual int Unlink(const char *path, int Opts = 0, XrdOucEnv *env = 0) override; virtual int Lfn2Pfn(const char *Path, char *buff, int blen) override; virtual const char *Lfn2Pfn(const char *Path, char *buff, int blen, int &rc) override; bool Glob(const char *path, bool &partial); bool Glob(std::string_view path, bool &partial); bool Glob(const std::filesystem::path &path, bool &partial); bool GlobOne(const std::filesystem::path &path, const glob &glob, bool &partial); std::pair SanitizePrefix(const std::filesystem::path &); private: template int VerifyPath(std::string_view path, bool partial_ok, Fn &&fn, Args &&...args); std::vector m_globs; std::unique_ptr m_oss; XrdSysError m_log; }; class FilterFile final : public XrdOssWrapDF { public: FilterFile(std::unique_ptr wrapDF, XrdSysError &log, FilterFileSystem &oss) : XrdOssWrapDF(*wrapDF), m_wrapped(std::move(wrapDF)), m_log(log), m_oss(oss) {} virtual ~FilterFile(); int Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) override; private: std::unique_ptr m_wrapped; XrdSysError &m_log; FilterFileSystem &m_oss; }; class FilterDir final : public XrdOssWrapDF { public: FilterDir(std::unique_ptr wrapDF, XrdSysError &log, FilterFileSystem &oss) : XrdOssWrapDF(*wrapDF), m_wrapped(std::move(wrapDF)), m_log(log), m_oss(oss) {} virtual ~FilterDir(); virtual int Opendir(const char *path, XrdOucEnv &env) override; virtual int Readdir(char *buff, int blen) override; virtual int StatRet(struct stat *buff) override; virtual int Close(long long *retsz = 0) override; private: bool m_stat_avail{false}; struct stat m_stat; std::unique_ptr m_wrapped; XrdSysError &m_log; FilterFileSystem &m_oss; std::filesystem::path m_prefix; }; xrootd-s3-http-0.4.1/src/HTTPCommands.cc000066400000000000000000000600471501635342300177030ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include "CurlUtil.hh" #include "CurlWorker.hh" #include "HTTPCommands.hh" #include "logging.hh" #include "shortfile.hh" #include "stl_string_utils.hh" using namespace XrdHTTPServer; std::shared_ptr HTTPRequest::m_queue = std::make_unique(); bool HTTPRequest::m_workers_initialized = false; std::vector HTTPRequest::m_workers; std::chrono::steady_clock::duration HTTPRequest::m_timeout_duration = std::chrono::seconds(10); // // "This function gets called by libcurl as soon as there is data received // that needs to be saved. The size of the data pointed to by ptr is size // multiplied with nmemb, it will not be zero terminated. Return the number // of bytes actually taken care of. If that amount differs from the amount // passed to your function, it'll signal an error to the library. This will // abort the transfer and return CURLE_WRITE_ERROR." // // We also make extensive use of this function in the XML parsing code, // for pretty much exactly the same reason. // size_t HTTPRequest::handleResults(const void *ptr, size_t size, size_t nmemb, void *me_ptr) { if (size == 0 || nmemb == 0) { return 0; } auto me = reinterpret_cast(me_ptr); if (!me) { return 0; } std::string_view source(static_cast(ptr), size * nmemb); // std::cout << "Handling results with size " << (size * nmemb) << " and // HTTP verb " << me->httpVerb << "\n"; if (me->httpVerb == "GET") { if (!me->responseCode) { auto rv = curl_easy_getinfo( me->m_curl_handle, CURLINFO_RESPONSE_CODE, &(me->responseCode)); if (rv != CURLE_OK) { me->errorCode = "E_CURL_LIB"; me->errorMessage = "curl_easy_getinfo() failed."; return 0; } } if (me->getResponseCode() == me->expectedResponseCode && me->requestResult() != nullptr) { if (!me->m_result_buffer_initialized) { me->m_result_buffer_initialized = true; me->m_result_buffer = *me->requestResult(); // std::cout << "Handling data for GET with response code " << // me->responseCode << "and expected response size " << // me->m_result.size() << "\n"; } if (me->m_result_buffer.size() < source.size()) { me->errorCode = "E_CURL_LIB"; me->errorMessage = "Curl had response with too-long result."; return 0; } memcpy(const_cast(me->m_result_buffer.data()), source.data(), source.size()); me->m_result_buffer = me->m_result_buffer.substr(source.size()); } else { me->m_result.append(source); } } else { me->m_result.append(source); } return (size * nmemb); } HTTPRequest::~HTTPRequest() {} #define SET_CURL_SECURITY_OPTION(A, B, C) \ { \ CURLcode rv##B = curl_easy_setopt(A, B, C); \ if (rv##B != CURLE_OK) { \ errorCode = "E_CURL_LIB"; \ errorMessage = "curl_easy_setopt( " #B " ) failed."; \ return false; \ } \ } bool HTTPRequest::parseProtocol(const std::string &url, std::string &protocol) { auto i = url.find("://"); if (i == std::string::npos) { return false; } protocol = substring(url, 0, i); return true; } bool HTTPRequest::SendHTTPRequest(const std::string &payload) { if ((m_protocol != "http") && (m_protocol != "https")) { errorCode = "E_INVALID_SERVICE_URL"; errorMessage = "Service URL not of a known protocol (http[s])."; m_log.Log(LogMask::Warning, "HTTPRequest::SendHTTPRequest", "Host URL '", hostUrl.c_str(), "' not of a known protocol (http[s])."); return false; } headers["Content-Type"] = "binary/octet-stream"; return sendPreparedRequest(hostUrl, payload, payload.size(), true); } static void dump(XrdSysError *log, const char *text, unsigned char *ptr, size_t size) { size_t i; size_t c; unsigned int width = 0x10; if (!log) return; std::stringstream ss; std::string stream; formatstr(stream, "%s, %10.10ld bytes (0x%8.8lx)\n", text, (long)size, (long)size); ss << stream; for (i = 0; i < size; i += width) { formatstr(stream, "%4.4lx: ", (long)i); ss << stream; /* show hex to the left */ for (c = 0; c < width; c++) { if (i + c < size) { formatstr(stream, "%02x ", ptr[i + c]); ss << stream; } else { ss << " "; } } /* show data on the right */ for (c = 0; (c < width) && (i + c < size); c++) { char x = (ptr[i + c] >= 0x20 && ptr[i + c] < 0x80) ? ptr[i + c] : '.'; ss << x; } ss << std::endl; } log->Log(LogMask::Dump, "Curl", ss.str().c_str()); } static void dumpPlain(XrdSysError *log, const char *text, unsigned char *ptr, size_t size) { if (!log) return; std::string info; formatstr(info, "%s, %10.10ld bytes (0x%8.8lx)\n", text, (long)size, (long)size); log->Log(LogMask::Dump, "Curl", info.c_str()); } int debugCallback(CURL *handle, curl_infotype ci, char *data, size_t size, void *clientp) { const char *text; (void)handle; /* prevent compiler warning */ auto log = static_cast(clientp); if (!log) return 0; switch (ci) { case CURLINFO_TEXT: log->Log(LogMask::Dump, "CurlInfo", std::string(data, size).c_str()); default: /* in case a new one is introduced to shock us */ return 0; case CURLINFO_HEADER_OUT: text = "=> Send header"; dumpPlain(log, text, (unsigned char *)data, size); break; } return 0; } int debugAndDumpCallback(CURL *handle, curl_infotype ci, char *data, size_t size, void *clientp) { const char *text; (void)handle; /* prevent compiler warning */ auto log = reinterpret_cast(clientp); if (!log) return 0; std::stringstream ss; switch (ci) { case CURLINFO_TEXT: if (size && data[size - 1] == '\n') { ss << std::string(data, size - 1); } else { ss << std::string(data, size); } log->Log(LogMask::Dump, "CurlInfo", ss.str().c_str()); default: /* in case a new one is introduced to shock us */ return 0; case CURLINFO_HEADER_OUT: text = "=> Send header"; dumpPlain(log, text, (unsigned char *)data, size); break; case CURLINFO_DATA_OUT: text = "=> Send data"; break; case CURLINFO_SSL_DATA_OUT: text = "=> Send SSL data"; break; case CURLINFO_HEADER_IN: text = "<= Recv header"; break; case CURLINFO_DATA_IN: text = "<= Recv data"; break; case CURLINFO_SSL_DATA_IN: text = "<= Recv SSL data"; break; } dump(log, text, (unsigned char *)data, size); return 0; } void HTTPRequest::Payload::NotifyPaused() { m_parent.Notify(); } // A callback function that gets passed to curl_easy_setopt for reading data // from the payload size_t HTTPRequest::ReadCallback(char *buffer, size_t size, size_t n, void *v) { // The callback gets the void pointer that we set with CURLOPT_READDATA. In // this case, it's a pointer to an HTTPRequest::Payload struct that contains // the data to be sent, along with the offset of the data that has already // been sent. HTTPRequest::Payload *payload = (HTTPRequest::Payload *)v; if (payload->m_parent.Timeout()) { payload->m_parent.errorCode = "E_TIMEOUT"; payload->m_parent.errorMessage = "Upload operation timed out"; return CURL_READFUNC_ABORT; } if (payload->sentSoFar == static_cast(payload->data.size())) { payload->sentSoFar = 0; if (payload->final) { return 0; } else { payload->NotifyPaused(); return CURL_READFUNC_PAUSE; } } size_t request = size * n; if (request > payload->data.size()) { request = payload->data.size(); } if (payload->sentSoFar + request > payload->data.size()) { request = payload->data.size() - payload->sentSoFar; } memcpy(buffer, payload->data.data() + payload->sentSoFar, request); payload->sentSoFar += request; return request; } // Periodic callback from libcurl reporting overall transfer progress. // This is used to detect transfer stalls, where no data has been sent for // at least `m_transfer_stall` duration (defaults to 10s). // // Note: // - dltotal/ultotal are the total number of bytes to be downloaded/uploaded. // - dlnow/ulnow are the number of bytes downloaded/uploaded so far. int HTTPRequest::XferInfoCallback(void *clientp, curl_off_t /*dltotal*/, curl_off_t dlnow, curl_off_t /*ultotal*/, curl_off_t ulnow) { auto me = reinterpret_cast(clientp); if ((me->m_bytes_recv != dlnow) || (me->m_bytes_sent != ulnow)) { me->m_last_movement = std::chrono::steady_clock::now(); } else if (std::chrono::steady_clock::now() - me->m_last_movement > m_transfer_stall) { me->errorCode = "E_TIMEOUT"; me->errorMessage = "I/O stall during transfer"; return 1; } me->m_bytes_recv = dlnow; me->m_bytes_sent = ulnow; return 0; } bool HTTPRequest::sendPreparedRequestNonblocking(const std::string &uri, const std::string_view payload, off_t payload_size, bool final) { m_uri = uri; m_payload = payload; m_payload_size = payload_size; if (!m_is_streaming && !final) { m_is_streaming = true; } if (m_timeout) { errorCode = "E_TIMEOUT"; errorMessage = "Transfer has timed out due to inactivity."; return false; } if (!errorCode.empty()) { return false; } m_last_request = std::chrono::steady_clock::now(); m_final = final; // Detect whether we were given an undersized buffer in non-streaming mode if (!m_is_streaming && payload_size && payload_size != static_cast(payload.size())) { errorCode = "E_LOGIC"; std::stringstream ss; ss << "Logic error: given an undersized payload (have " << payload.size() << ", expected " << payload_size << ") in a non-streaming mode"; errorMessage = ss.str(); return false; } m_result_ready = false; if (m_unpause_queue) { m_unpause_queue->Produce(this); } else { m_last_movement = std::chrono::steady_clock::now(); m_queue->Produce(this); } return true; } bool HTTPRequest::sendPreparedRequest(const std::string &uri, const std::string_view payload, off_t payload_size, bool final) { if (!sendPreparedRequestNonblocking(uri, payload, payload_size, final)) { return false; } std::unique_lock lk(m_mtx); m_cv.wait(lk, [&] { return m_result_ready; }); return errorCode.empty(); } void HTTPRequest::Tick(std::chrono::steady_clock::time_point now) { if (!m_is_streaming) { return; } if (now - m_last_request <= m_timeout_duration) { return; } if (m_timeout) { return; } m_timeout = true; if (m_unpause_queue) { std::unique_lock lk(m_mtx); m_result_ready = false; m_unpause_queue->Produce(this); m_cv.wait(lk, [&] { return m_result_ready; }); } } bool HTTPRequest::ReleaseHandle(CURL *curl) { m_curl_handle = nullptr; if (curl == nullptr) return false; // Note: Any option that's conditionally set in `HTTPRequest::SetupHandle` // must be restored to the original state here. // // Only changing back the things we explicitly set is a conscious decision // here versus using `curl_easy_reset`; we are trying to avoid whacking // all the configuration of the handle. curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, nullptr); curl_easy_setopt(curl, CURLOPT_WRITEDATA, nullptr); curl_easy_setopt(curl, CURLOPT_HTTPHEADER, nullptr); curl_easy_setopt(curl, CURLOPT_XFERINFOFUNCTION, nullptr); curl_easy_setopt(curl, CURLOPT_SOCKOPTFUNCTION, nullptr); curl_easy_setopt(curl, CURLOPT_SOCKOPTDATA, nullptr); curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, nullptr); curl_easy_setopt(curl, CURLOPT_DEBUGDATA, nullptr); curl_easy_setopt(curl, CURLOPT_VERBOSE, 0L); curl_easy_setopt(curl, CURLOPT_NOBODY, 0); curl_easy_setopt(curl, CURLOPT_POST, 0); curl_easy_setopt(curl, CURLOPT_UPLOAD, 0); curl_easy_setopt(curl, CURLOPT_HEADER, 0); curl_easy_setopt(curl, CURLOPT_SSLCERT, nullptr); curl_easy_setopt(curl, CURLOPT_SSLKEY, nullptr); return true; } bool HTTPRequest::ContinueHandle() { if (!m_curl_handle) { return false; } m_callback_payload->data = m_payload; m_callback_payload->final = m_final; m_callback_payload->sentSoFar = 0; curl_easy_pause(m_curl_handle, CURLPAUSE_CONT); return true; } bool HTTPRequest::SetupHandle(CURL *curl) { m_log.Log(XrdHTTPServer::Debug, "SetupHandle", "Sending HTTP request", m_uri.c_str()); if (curl == nullptr) { errorCode = "E_CURL_LIB"; errorMessage = "curl_easy_init() failed."; return false; } auto rv = curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, m_errorBuffer); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_ERRORBUFFER ) failed."; return false; } rv = curl_easy_setopt(curl, CURLOPT_URL, m_uri.c_str()); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_URL ) failed."; return false; } if (httpVerb == "HEAD") { rv = curl_easy_setopt(curl, CURLOPT_NOBODY, 1); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_HEAD ) failed."; return false; } } if (httpVerb == "POST") { rv = curl_easy_setopt(curl, CURLOPT_POST, 1); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_POST ) failed."; return false; } rv = curl_easy_setopt(curl, CURLOPT_POSTFIELDS, m_payload.data()); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_POSTFIELDS ) failed."; return false; } if (m_is_streaming) { errorCode = "E_NOT_IMPL"; errorMessage = "Streaming posts not implemented in backend; internal error."; } } if (httpVerb == "PUT") { rv = curl_easy_setopt(curl, CURLOPT_UPLOAD, 1); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_UPLOAD ) failed."; return false; } // Our HTTPRequest instance should have a pointer to the payload data // and the offset of the data Here, we tell curl_easy_setopt to use the // read_callback function to read the data from the payload m_callback_payload = std::unique_ptr( new HTTPRequest::Payload{m_payload, 0, m_final, *this}); rv = curl_easy_setopt(curl, CURLOPT_READDATA, m_callback_payload.get()); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_READDATA ) failed."; return false; } rv = curl_easy_setopt(curl, CURLOPT_READFUNCTION, HTTPRequest::ReadCallback); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_READFUNCTION ) failed."; return false; } if (m_payload_size || !m_is_streaming) { if (curl_easy_setopt(curl, CURLOPT_INFILESIZE_LARGE, m_payload_size) != CURLE_OK) { errorCode = "E_CURL_LIB"; errorMessage = "curl_easy_setopt( CURLOPT_INFILESIZE_LARGE ) failed."; } } } rv = curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_NOPROGRESS ) failed."; return false; } if (includeResponseHeader) { rv = curl_easy_setopt(curl, CURLOPT_HEADER, 1); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_HEADER ) failed."; return false; } } rv = curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &handleResults); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_WRITEFUNCTION ) failed."; return false; } rv = curl_easy_setopt(curl, CURLOPT_WRITEDATA, this); if (rv != CURLE_OK) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_easy_setopt( CURLOPT_WRITEDATA ) failed."; return false; } if (curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1) != CURLE_OK) { errorCode = "E_CURL_LIB"; errorMessage = "curl_easy_setopt( CURLOPT_FOLLOWLOCATION ) failed."; return false; } if (curl_easy_setopt(curl, CURLOPT_XFERINFOFUNCTION, HTTPRequest::XferInfoCallback) != CURLE_OK) { errorCode = "E_CURL_LIB"; errorMessage = "Failed to set the transfer info callback function."; return false; } if (curl_easy_setopt(curl, CURLOPT_XFERINFODATA, this) != CURLE_OK) { errorCode = "E_CURL_LIB"; errorMessage = "Failed to set the transfer info callback data."; return false; } // // Set security options. // SET_CURL_SECURITY_OPTION(curl, CURLOPT_SSL_VERIFYPEER, 1); SET_CURL_SECURITY_OPTION(curl, CURLOPT_SSL_VERIFYHOST, 2); std::string CAFile = ""; std::string CAPath = ""; auto x509_ca_dir = getenv("X509_CERT_DIR"); if (x509_ca_dir != nullptr && x509_ca_dir[0] != '\0') { SET_CURL_SECURITY_OPTION(curl, CURLOPT_CAPATH, x509_ca_dir); } auto x509_ca_file = getenv("X509_CERT_FILE"); if (x509_ca_file != nullptr) { SET_CURL_SECURITY_OPTION(curl, CURLOPT_CAINFO, x509_ca_file); } // // Configure for x.509 operation. // if (m_protocol == "x509" && requiresSignature) { auto accessKeyFilePtr = getAccessKey(); auto secretKeyFilePtr = getSecretKey(); if (accessKeyFilePtr && secretKeyFilePtr) { SET_CURL_SECURITY_OPTION(curl, CURLOPT_SSLKEYTYPE, "PEM"); SET_CURL_SECURITY_OPTION(curl, CURLOPT_SSLKEY, *secretKeyFilePtr->c_str()); SET_CURL_SECURITY_OPTION(curl, CURLOPT_SSLCERTTYPE, "PEM"); SET_CURL_SECURITY_OPTION(curl, CURLOPT_SSLCERT, *accessKeyFilePtr->c_str()); } } if (m_token) { const auto iter = headers.find("Authorization"); if (iter == headers.end()) { std::string token; if (m_token->Get(token)) { if (!token.empty()) { headers["Authorization"] = "Bearer " + token; } } else { errorCode = "E_TOKEN"; errorMessage = "failed to load authorization token from file"; return false; } } } std::string headerPair; m_header_list.reset(); for (auto i = headers.begin(); i != headers.end(); ++i) { formatstr(headerPair, "%s: %s", i->first.c_str(), i->second.c_str()); auto tmp_headers = curl_slist_append(m_header_list.get(), headerPair.c_str()); if (tmp_headers == nullptr) { this->errorCode = "E_CURL_LIB"; this->errorMessage = "curl_slist_append() failed."; return false; } m_header_list.release(); m_header_list.reset(tmp_headers); } rv = curl_easy_setopt(curl, CURLOPT_HTTPHEADER, m_header_list.get()); if (rv != CURLE_OK) { errorCode = "E_CURL_LIB"; errorMessage = "curl_easy_setopt( CURLOPT_HTTPHEADER ) failed."; return false; } if (m_log.getMsgMask() & LogMask::Debug) { rv = curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, debugCallback); if (rv != CURLE_OK) { errorCode = "E_CURL_LIB"; errorMessage = "Failed to set the debug function"; return false; } rv = curl_easy_setopt(curl, CURLOPT_DEBUGDATA, &m_log); if (rv != CURLE_OK) { errorCode = "E_CURL_LIB"; errorMessage = "Failed to set the debug function handler data"; return false; } } if (m_log.getMsgMask() & LogMask::Dump) { rv = curl_easy_setopt(curl, CURLOPT_DEBUGFUNCTION, debugAndDumpCallback); if (rv != CURLE_OK) { errorCode = "E_CURL_LIB"; errorMessage = "Failed to set the debug function"; return false; } rv = curl_easy_setopt(curl, CURLOPT_DEBUGDATA, &m_log); if (rv != CURLE_OK) { errorCode = "E_CURL_LIB"; errorMessage = "Failed to set the debug function handler data"; return false; } } if (m_log.getMsgMask() & (LogMask::Dump | LogMask::Debug)) { if (curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L) != CURLE_OK) { errorCode = "E_CURL_LIB"; errorMessage = "Failed to enable verbose mode for libcurl"; return false; } } m_curl_handle = curl; return true; } bool HTTPRequest::Fail(const std::string &ecode, const std::string &emsg) { errorCode = ecode; errorMessage = emsg; Notify(); return true; } void HTTPRequest::Notify() { std::lock_guard lk(m_mtx); m_result_ready = true; modifyResponse(m_result); m_cv.notify_one(); } void HTTPRequest::ProcessCurlResult(CURL *curl, CURLcode rv) { if (rv != 0) { if (errorCode.empty()) { errorCode = "E_CURL_IO"; std::ostringstream error; error << "curl failed (" << rv << "): '" << curl_easy_strerror(rv) << "'."; errorMessage = error.str(); } return; } responseCode = 0; rv = curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &responseCode); if (rv != CURLE_OK) { // So we contacted the server but it returned such gibberish that // CURL couldn't identify the response code. Let's assume that's // bad news. Since we're already terminally failing the request, // don't bother to check if this was our last chance at retrying. errorCode = "E_CURL_LIB"; errorMessage = "curl_easy_getinfo() failed."; return; } // 503 is "Service Unavailable"; S3 uses this for throttling if (responseCode == 503 && (m_result.find("RequestLimitExceeded") != std::string::npos)) { m_result.clear(); errorCode = "E_HTTP_REQUEST_LIMIT_EXCEEDED"; errorMessage = "Request limit exceeded."; m_log.Log(LogMask::Warning, "HTTPRequest::ProcessCurlResult", "Request limit exceeded for ", m_uri.c_str()); return; } if (responseCode != expectedResponseCode) { formatstr(errorCode, "E_HTTP_RESPONSE_NOT_EXPECTED (response %lu != expected %lu)", responseCode, expectedResponseCode); errorMessage = m_result; if (errorMessage.empty()) { formatstr( errorMessage, "HTTP response was %lu, not %lu, and no body was returned.", responseCode, expectedResponseCode); } return; } return; } // --------------------------------------------------------------------------- HTTPUpload::~HTTPUpload() {} bool HTTPUpload::SendRequest(const std::string &payload, off_t offset, size_t size) { if (offset != 0 || size != 0) { std::string range; formatstr(range, "bytes=%lld-%lld", static_cast(offset), static_cast(offset + size - 1)); headers["Range"] = range.c_str(); } httpVerb = "PUT"; return SendHTTPRequest(payload); } void HTTPRequest::Init(XrdSysError &log) { if (!m_workers_initialized) { for (unsigned idx = 0; idx < CurlWorker::GetPollThreads(); idx++) { m_workers.push_back(new CurlWorker(m_queue, log)); std::thread t(CurlWorker::RunStatic, m_workers.back()); t.detach(); } m_workers_initialized = true; } CURLcode rv = curl_global_init(CURL_GLOBAL_ALL); if (rv != 0) { throw std::runtime_error("libcurl failed to initialize"); } } // --------------------------------------------------------------------------- HTTPDownload::~HTTPDownload() {} bool HTTPDownload::SendRequest(off_t offset, size_t size) { if (offset != 0 || size != 0) { std::string range; formatstr(range, "bytes=%lld-%lld", static_cast(offset), static_cast(offset + size - 1)); headers["Range"] = range.c_str(); this->expectedResponseCode = 206; } httpVerb = "GET"; std::string noPayloadAllowed; return SendHTTPRequest(noPayloadAllowed); } // --------------------------------------------------------------------------- HTTPHead::~HTTPHead() {} bool HTTPHead::SendRequest() { httpVerb = "HEAD"; includeResponseHeader = true; std::string noPayloadAllowed; return SendHTTPRequest(noPayloadAllowed); } // --------------------------------------------------------------------------- xrootd-s3-http-0.4.1/src/HTTPCommands.hh000066400000000000000000000263541501635342300177200ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #pragma once #include "TokenFile.hh" #include #include #include #include #include #include #include #include class XrdSysError; class HandlerQueue; class CurlWorker; class HTTPRequest { friend class CurlWorker; public: HTTPRequest(const std::string &hostUrl, XrdSysError &log, const TokenFile *token) : hostUrl(hostUrl), m_header_list(nullptr, &curl_slist_free_all), m_log(log), m_token(token) { // Parse the URL and populate // What to do if the function returns false? // TODO: Figure out best way to deal with this if (!parseProtocol(hostUrl, m_protocol)) { errorCode = "E_INVALID_HOST_URL"; errorMessage = "Failed to parse protocol from host/service URL."; } } virtual ~HTTPRequest(); virtual const std::string *getAccessKey() const { return nullptr; } virtual const std::string *getSecretKey() const { return nullptr; } virtual bool parseProtocol(const std::string &url, std::string &protocol); virtual bool SendHTTPRequest(const std::string &payload); unsigned long getResponseCode() const { return responseCode; } const std::string &getErrorCode() const { return errorCode; } const std::string &getErrorMessage() const { return errorMessage; } const std::string &getResultString() const { return m_result; } // State of the payload upload for the curl callbacks struct Payload { std::string_view data; off_t sentSoFar{0}; bool final{true}; HTTPRequest &m_parent; void NotifyPaused(); // Notify the parent request the curl handle has // been paused }; // Initialize libraries for HTTP. // // Should be called at least once per application from a non-threaded // context. static void Init(XrdSysError &); // Perform maintenance of the request. void Tick(std::chrono::steady_clock::time_point); // Sets the duration after which an in-progress operation may be considered // stalled and hence timeout. static void SetStallTimeout(std::chrono::steady_clock::duration timeout) { m_timeout_duration = timeout; } // Return the stall timeout duration currently in use. static std::chrono::steady_clock::duration GetStallTimeout() { return m_timeout_duration; } protected: // Send the request to the HTTP server. // Blocks until the request has completed. // If `final` is set to `false`, the HTTPRequest object will start streaming // a request and assume that `sendPreparedRequest` will be repeated until // all data is provided (the sum total of the chunks given is the // payload_size). If payload_size is 0 and final is false, this indicates // the complete size of the PUT is unknown and chunked encoding will be // used. // // - url: URL, including query parameters, to use. // - payload: The payload contents when uploading. // - payload_size: Size of the entire payload (not just the current chunk). // - final: True if this is the last or only payload for the request. False // otherwise. bool sendPreparedRequest(const std::string &url, const std::string_view payload, off_t payload_size, bool final); // Send the request to the HTTP server. // Returns immediately, not waiting for the result. // // If `final` is set to `false`, the HTTPRequest object will start streaming // a request and assume that `sendPreparedRequest` will be repeated until // all data is provided (the sum total of the chunks given is the // payload_size). If payload_size is 0 and final is false, this indicates // the complete size of the PUT is unknown and chunked encoding will be // used. // // - url: URL, including query parameters, to use. // - payload: The payload contents when uploading. // - payload_size: Size of the entire payload (not just the current chunk). // - final: True if this is the last or only payload for the request. False // otherwise. bool sendPreparedRequestNonblocking(const std::string &uri, const std::string_view payload, off_t payload_size, bool final); // Called by the curl handler thread that the request has been finished. virtual void Notify(); // Returns the standalone buffer if a sub-classe's externally-managed one // is supposed to be used. // // If the std::string_view is empty, then it's assumed the HTTPRequest // itself owns the result buffer and should create one. Note that, // on errors, the HTTPRequest result buffer is still used. virtual std::string_view *requestResult() { return nullptr; } const std::string &getProtocol() { return m_protocol; } // Returns true if the command is a streaming/partial request. // A streaming request is one that requires multiple calls to // `sendPreparedRequest` to complete. bool isStreamingRequest() const { return m_is_streaming; } // Record the unpause queue associated with this request. // // Future continuations of this request will be sent directly to this queue. void SetUnpauseQueue(std::shared_ptr queue) { m_unpause_queue = queue; } // Return whether or not the request has timed out since the last // call to send more data. bool Timeout() const { return m_timeout; } // Function that can be overridden by test cases, allowing modification // of the server response virtual void modifyResponse(std::string &) {} typedef std::map AttributeValueMap; AttributeValueMap query_parameters; AttributeValueMap headers; std::string hostUrl; bool requiresSignature{false}; struct timespec signatureTime; std::string errorMessage; std::string errorCode; // The contents of the result from the HTTP server. // If this is a GET and we got the expectedResponseCode, then // the results are populated in the m_result_buffer instead. std::string m_result; unsigned long responseCode{0}; unsigned long expectedResponseCode = 200; bool includeResponseHeader{false}; std::string httpVerb{"POST"}; std::unique_ptr m_callback_payload; std::unique_ptr m_header_list; // Headers associated with the request XrdSysError &m_log; private: virtual bool SetupHandle( CURL *curl); // Configure the curl handle to be used by a given request. virtual bool ContinueHandle(); // Continue the request processing after a pause. void ProcessCurlResult( CURL *curl, CURLcode rv); // Process a curl command that ran to completion. bool Fail(const std::string &ecode, const std::string &emsg); // Record a failure occurring for the request // (curl request did not complete) bool ReleaseHandle( CURL *curl); // Cleanup any resources associated with the curl handle CURL *getHandle() const { return m_curl_handle; } // Callback for libcurl when the library is ready to read more data from our // buffer. static size_t ReadCallback(char *buffer, size_t size, size_t n, void *v); // Handle the callback from libcurl static size_t handleResults(const void *ptr, size_t size, size_t nmemb, void *me_ptr); // Transfer information callback from libcurl static int XferInfoCallback(void *clientp, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow); const TokenFile *m_token{nullptr}; // The following members manage the work queue and workers. static bool m_workers_initialized; // The global state of the worker initialization. static std::shared_ptr m_queue; // Global queue for all HTTP requests to be processed. std::shared_ptr m_unpause_queue{ nullptr}; // Queue to notify the request can be resumed. static std::vector m_workers; // Set of all the curl worker threads. // The following variables manage the state of the request. std::mutex m_mtx; // Mutex guarding the results from the curl worker's callback // Condition variable to notify the curl worker completed the callback. std::condition_variable m_cv; bool m_final{false}; // Flag indicating this is the last sendPreparedRequest // call of the overall HTTPRequest bool m_is_streaming{ false}; // Flag indicating this command is a streaming request. bool m_timeout{false}; // Flag indicating the request has timed out. bool m_result_ready{false}; // Flag indicating the results data is ready. bool m_result_buffer_initialized{ false}; // Flag indicating whether the result buffer view has been // initialized. off_t m_payload_size{0}; // Size of the entire upload payload; 0 if unknown. std::string m_protocol; std::string m_uri; // URL to request from libcurl std::string_view m_payload; // Total number of bytes received from the server off_t m_bytes_recv{0}; // Total number of bytes sent to server off_t m_bytes_sent{0}; // Time of last data movement (upload or download). Used to detect transfer // stalls std::chrono::steady_clock::time_point m_last_movement; // Transfer stall timeout static constexpr std::chrono::steady_clock::duration m_transfer_stall{ std::chrono::seconds(9)}; // The contents of a successful GET request. std::string_view m_result_buffer; CURL *m_curl_handle{nullptr}; // The curl handle for the ongoing request char m_errorBuffer[CURL_ERROR_SIZE]; // Static error buffer for libcurl // Time when the last request was sent on this object; used to determine // whether the operation has timed out. std::chrono::steady_clock::time_point m_last_request{ std::chrono::steady_clock::now()}; // Duration after which a partially-completed request will timeout if // no progress has been made. static std::chrono::steady_clock::duration m_timeout_duration; }; class HTTPUpload : public HTTPRequest { public: HTTPUpload(const std::string &h, const std::string &o, XrdSysError &log, const TokenFile *token) : HTTPRequest(h, log, token), object(o) { hostUrl = hostUrl + "/" + object; } virtual ~HTTPUpload(); virtual bool SendRequest(const std::string &payload, off_t offset, size_t size); protected: std::string object; std::string path; }; class HTTPDownload : public HTTPRequest { public: HTTPDownload(const std::string &h, const std::string &o, XrdSysError &log, const TokenFile *token) : HTTPRequest(h, log, token), object(o) { hostUrl = hostUrl + "/" + object; } virtual ~HTTPDownload(); virtual bool SendRequest(off_t offset, size_t size); protected: std::string object; }; class HTTPHead : public HTTPRequest { public: HTTPHead(const std::string &h, const std::string &o, XrdSysError &log, const TokenFile *token) : HTTPRequest(h, log, token), object(o) { hostUrl = hostUrl + "/" + object; } virtual ~HTTPHead(); virtual bool SendRequest(); protected: std::string object; }; xrootd-s3-http-0.4.1/src/HTTPDirectory.hh000066400000000000000000000024721501635342300201160ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #pragma once #include "XrdOss/XrdOss.hh" #include "XrdOuc/XrdOucEnv.hh" class XrdSysError; class HTTPDirectory : public XrdOssDF { public: HTTPDirectory(XrdSysError &log) : m_log(log) {} virtual ~HTTPDirectory() {} virtual int Opendir(const char *path, XrdOucEnv &env) override { return -ENOSYS; } virtual int Readdir(char *buff, int blen) override { return -ENOSYS; } virtual int StatRet(struct stat *statStruct) override { return -ENOSYS; } virtual int Close(long long *retsz = 0) override { return -ENOSYS; } protected: XrdSysError &m_log; }; xrootd-s3-http-0.4.1/src/HTTPFile.cc000066400000000000000000000214361501635342300170200ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #include "HTTPFile.hh" #include "HTTPCommands.hh" #include "HTTPFileSystem.hh" #include "logging.hh" #include "stl_string_utils.hh" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace XrdHTTPServer; HTTPFileSystem *g_http_oss = nullptr; XrdVERSIONINFO(XrdOssGetFileSystem, HTTP); HTTPFile::HTTPFile(XrdSysError &log, HTTPFileSystem *oss) : m_log(log), m_oss(oss), content_length(0), last_modified(0) {} // Ensures that path is of the form /storagePrefix/object and returns // the resulting object value. The storagePrefix does not necessarily begin // with '/' // // Examples: // /foo/bar, /foo/bar/baz -> baz // storage.com/foo, /storage.com/foo/bar -> bar // /baz, /foo/bar -> error int parse_path(const std::string &storagePrefixStr, const char *pathStr, std::string &object) { const std::filesystem::path storagePath(pathStr); const std::filesystem::path storagePrefix(storagePrefixStr); auto prefixComponents = storagePrefix.begin(); auto pathComponents = storagePath.begin(); std::filesystem::path full; std::filesystem::path prefix; pathComponents++; if (!storagePrefixStr.empty() && storagePrefixStr[0] == '/') { prefixComponents++; } while (prefixComponents != storagePrefix.end() && *prefixComponents == *pathComponents) { full /= *prefixComponents++; prefix /= *pathComponents++; } // Check that nothing diverged before reaching end of service name if (prefixComponents != storagePrefix.end()) { return -ENOENT; } std::filesystem::path obj_path; while (pathComponents != storagePath.end()) { obj_path /= *pathComponents++; } object = obj_path.string(); return 0; } int HTTPFile::Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) { auto configured_hostname = m_oss->getHTTPHostName(); auto configured_hostUrl = m_oss->getHTTPHostUrl(); const auto &configured_url_base = m_oss->getHTTPUrlBase(); if (!configured_url_base.empty()) { configured_hostUrl = configured_url_base; configured_hostname = m_oss->getStoragePrefix(); } // // Check the path for validity. // std::string object; int rv = parse_path(configured_hostname, path, object); if (rv != 0) { return rv; } m_object = object; m_hostname = configured_hostname; m_hostUrl = configured_hostUrl; if (!Oflag) { struct stat buf; return Fstat(&buf); } return 0; } ssize_t HTTPFile::Read(void *buffer, off_t offset, size_t size) { HTTPDownload download(m_hostUrl, m_object, m_log, m_oss->getToken()); m_log.Log( LogMask::Debug, "HTTPFile::Read", "About to perform download from HTTPFile::Read(): hostname / object:", m_hostname.c_str(), m_object.c_str()); if (!download.SendRequest(offset, size)) { std::stringstream ss; ss << "Failed to send GetObject command: " << download.getResponseCode() << "'" << download.getResultString() << "'"; m_log.Log(LogMask::Warning, "HTTPFile::Read", ss.str().c_str()); return 0; } const std::string &bytes = download.getResultString(); memcpy(buffer, bytes.data(), bytes.size()); return bytes.size(); } int HTTPFile::Fstat(struct stat *buff) { if (m_stat) { memset(buff, '\0', sizeof(struct stat)); buff->st_mode = 0600 | S_IFREG; buff->st_nlink = 1; buff->st_uid = 1; buff->st_gid = 1; buff->st_size = content_length; buff->st_mtime = last_modified; buff->st_atime = 0; buff->st_ctime = 0; buff->st_dev = 0; buff->st_ino = 0; return 0; } m_log.Log(LogMask::Debug, "HTTPFile::Fstat", "About to perform HTTPFile::Fstat():", m_hostUrl.c_str(), m_object.c_str()); HTTPHead head(m_hostUrl, m_object, m_log, m_oss->getToken()); if (!head.SendRequest()) { // SendRequest() returns false for all errors, including ones // where the server properly responded with something other // than code 200. If xrootd wants us to distinguish between // these cases, head.getResponseCode() is initialized to 0, so // we can check. auto httpCode = head.getResponseCode(); if (httpCode) { std::stringstream ss; ss << "HEAD command failed: " << head.getResponseCode() << ": " << head.getResultString(); m_log.Log(LogMask::Warning, "HTTPFile::Fstat", ss.str().c_str()); switch (httpCode) { case 404: return -ENOENT; case 500: return -EIO; case 403: return -EPERM; default: return -EIO; } } else { std::stringstream ss; ss << "Failed to send HEAD command: " << head.getErrorCode() << ": " << head.getErrorMessage(); m_log.Log(LogMask::Warning, "HTTPFile::Fstat", ss.str().c_str()); return -EIO; } } std::string headers = head.getResultString(); std::string line; size_t current_newline = 0; size_t next_newline = std::string::npos; size_t last_character = headers.size(); while (current_newline != std::string::npos && current_newline != last_character - 1 && last_character) { next_newline = headers.find("\r\n", current_newline + 2); std::string line = substring(headers, current_newline + 2, next_newline); size_t colon = line.find(":"); if (colon != std::string::npos && colon != line.size()) { std::string attr = substring(line, 0, colon); toLower(attr); // Some servers might not follow conventional // capitalization schemes std::string value = substring(line, colon + 1); trim(value); if (attr == "content-length") { this->content_length = std::stol(value); } else if (attr == "last-modified") { struct tm t; char *eos = strptime(value.c_str(), "%a, %d %b %Y %T %Z", &t); if (eos == &value.c_str()[value.size()]) { time_t epoch = timegm(&t); if (epoch != -1) { this->last_modified = epoch; } } } } current_newline = next_newline; } if (buff) { memset(buff, '\0', sizeof(struct stat)); buff->st_mode = 0600 | S_IFREG; buff->st_nlink = 1; buff->st_uid = 1; buff->st_gid = 1; buff->st_size = this->content_length; buff->st_mtime = this->last_modified; buff->st_atime = 0; buff->st_ctime = 0; buff->st_dev = 0; buff->st_ino = 0; } m_stat = true; return 0; } ssize_t HTTPFile::Write(const void *buffer, off_t offset, size_t size) { HTTPUpload upload(m_hostUrl, m_object, m_log, m_oss->getToken()); std::string payload((char *)buffer, size); if (!upload.SendRequest(payload, offset, size)) { m_log.Emsg("Open", "upload.SendRequest() failed"); return -ENOENT; } else { m_log.Emsg("Open", "upload.SendRequest() succeeded"); return 0; } } int HTTPFile::Close(long long *retsz) { m_log.Emsg("Close", "Closed our HTTP file"); return 0; } extern "C" { /* This function is called when we are wrapping something. */ XrdOss *XrdOssAddStorageSystem2(XrdOss *curr_oss, XrdSysLogger *Logger, const char *config_fn, const char *parms, XrdOucEnv *envP) { XrdSysError log(Logger, "httpserver_"); log.Emsg("Initialize", "HTTP filesystem cannot be stacked with other filesystems"); return nullptr; } /* This function is called when it is the top level file system and we are not wrapping anything */ XrdOss *XrdOssGetStorageSystem2(XrdOss *native_oss, XrdSysLogger *Logger, const char *config_fn, const char *parms, XrdOucEnv *envP) { auto log = new XrdSysError(Logger, "httpserver_"); envP->Export("XRDXROOTD_NOPOSC", "1"); try { HTTPRequest::Init(*log); g_http_oss = new HTTPFileSystem(Logger, config_fn, envP); return g_http_oss; } catch (std::runtime_error &re) { log->Emsg("Initialize", "Encountered a runtime failure", re.what()); return nullptr; } } XrdOss *XrdOssGetStorageSystem(XrdOss *native_oss, XrdSysLogger *Logger, const char *config_fn, const char *parms) { return XrdOssGetStorageSystem2(native_oss, Logger, config_fn, parms, nullptr); } } // end extern "C" XrdVERSIONINFO(XrdOssGetStorageSystem, HTTPserver); XrdVERSIONINFO(XrdOssGetStorageSystem2, HTTPserver); XrdVERSIONINFO(XrdOssAddStorageSystem2, HTTPserver); xrootd-s3-http-0.4.1/src/HTTPFile.hh000066400000000000000000000060561501635342300170330ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #pragma once #include "HTTPFileSystem.hh" #include "XrdOss/XrdOss.hh" #include "XrdOuc/XrdOucEnv.hh" #include "XrdSec/XrdSecEntity.hh" #include "XrdSec/XrdSecEntityAttr.hh" #include "XrdVersion.hh" #include int parse_path(const std::string &hostname, const char *path, std::string &object); class HTTPFile : public XrdOssDF { public: HTTPFile(XrdSysError &log, HTTPFileSystem *oss); virtual ~HTTPFile() {} virtual int Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) override; int Fchmod(mode_t mode) override { return -ENOSYS; } void Flush() override {} virtual int Fstat(struct stat *buf) override; int Fsync() override { return -ENOSYS; } int Fsync(XrdSfsAio *aiop) override { return -ENOSYS; } int Ftruncate(unsigned long long size) override { return -ENOSYS; } off_t getMmap(void **addr) override { return 0; } int isCompressed(char *cxidp = 0) override { return -ENOSYS; } ssize_t pgRead(void *buffer, off_t offset, size_t rdlen, uint32_t *csvec, uint64_t opts) override { return -ENOSYS; } int pgRead(XrdSfsAio *aioparm, uint64_t opts) override { return -ENOSYS; } ssize_t pgWrite(void *buffer, off_t offset, size_t wrlen, uint32_t *csvec, uint64_t opts) override { return -ENOSYS; } int pgWrite(XrdSfsAio *aioparm, uint64_t opts) override { return -ENOSYS; } ssize_t Read(off_t offset, size_t size) override { return -ENOSYS; } virtual ssize_t Read(void *buffer, off_t offset, size_t size) override; int Read(XrdSfsAio *aiop) override { return -ENOSYS; } ssize_t ReadRaw(void *buffer, off_t offset, size_t size) override { return -ENOSYS; } ssize_t ReadV(XrdOucIOVec *readV, int rdvcnt) override { return -ENOSYS; } virtual ssize_t Write(const void *buffer, off_t offset, size_t size) override; int Write(XrdSfsAio *aiop) override { return -ENOSYS; } ssize_t WriteV(XrdOucIOVec *writeV, int wrvcnt) override { return -ENOSYS; } virtual int Close(long long *retsz = 0) override; // upstream is abstract definition size_t getContentLength() { return content_length; } time_t getLastModified() { return last_modified; } private: bool m_stat{false}; XrdSysError &m_log; HTTPFileSystem *m_oss; std::string m_hostname; std::string m_hostUrl; std::string m_object; size_t content_length; time_t last_modified; }; xrootd-s3-http-0.4.1/src/HTTPFileSystem.cc000066400000000000000000000113551501635342300202240ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #include "HTTPFileSystem.hh" #include "HTTPDirectory.hh" #include "HTTPFile.hh" #include "logging.hh" #include #include #include #include #include #include #include #include #include #include #include #include #include "stl_string_utils.hh" using namespace XrdHTTPServer; HTTPFileSystem::HTTPFileSystem(XrdSysLogger *lp, const char *configfn, XrdOucEnv * /*envP*/) : m_log(lp, "httpserver_"), m_token("", &m_log) { m_log.Say("------ Initializing the HTTP filesystem plugin."); if (!Config(lp, configfn)) { throw std::runtime_error("Failed to configure HTTP filesystem plugin."); } } HTTPFileSystem::~HTTPFileSystem() {} bool HTTPFileSystem::handle_required_config(const std::string &name_from_config, const char *desired_name, const std::string &source, std::string &target) { if (name_from_config != desired_name) { return true; } if (source.empty()) { std::string error; formatstr(error, "%s must specify a value", desired_name); m_log.Emsg("Config", error.c_str()); return false; } std::stringstream ss; ss << "Setting " << desired_name << "=" << source; m_log.Log(LogMask::Debug, "Config", ss.str().c_str()); target = source; return true; } bool HTTPFileSystem::Config(XrdSysLogger *lp, const char *configfn) { XrdOucEnv myEnv; XrdOucGatherConf httpserver_conf("httpserver.", &m_log); int result; if ((result = httpserver_conf.Gather(configfn, XrdOucGatherConf::full_lines)) < 0) { m_log.Emsg("Config", -result, "parsing config file", configfn); return false; } std::string attribute; std::string token_file; m_log.setMsgMask(0); while (httpserver_conf.GetLine()) { auto attribute = httpserver_conf.GetToken(); if (!strcmp(attribute, "httpserver.trace")) { if (!XrdHTTPServer::ConfigLog(httpserver_conf, m_log)) { m_log.Emsg("Config", "Failed to configure the log level"); } continue; } auto value = httpserver_conf.GetToken(); if (!value) { continue; } if (!handle_required_config(attribute, "httpserver.host_name", value, http_host_name) || !handle_required_config(attribute, "httpserver.host_url", value, http_host_url) || !handle_required_config(attribute, "httpserver.url_base", value, m_url_base) || !handle_required_config(attribute, "httpserver.storage_prefix", value, m_storage_prefix) || !handle_required_config(attribute, "httpserver.token_file", value, token_file)) { return false; } } if (m_url_base.empty()) { if (http_host_name.empty()) { m_log.Emsg("Config", "httpserver.host_name not specified; this or " "httpserver.url_base are required"); return false; } if (http_host_url.empty()) { m_log.Emsg("Config", "httpserver.host_url not specified; this or " "httpserver.url_base are required"); return false; } } if (!token_file.empty()) { m_token = TokenFile(token_file, &m_log); } return true; } // Object Allocation Functions // XrdOssDF *HTTPFileSystem::newDir(const char *user) { return new HTTPDirectory(m_log); } XrdOssDF *HTTPFileSystem::newFile(const char *user) { return new HTTPFile(m_log, this); } int HTTPFileSystem::Stat(const char *path, struct stat *buff, int opts, XrdOucEnv *env) { std::string error; m_log.Emsg("Stat", "Stat'ing path", path); HTTPFile httpFile(m_log, this); int rv = httpFile.Open(path, 0, (mode_t)0, *env); if (rv) { m_log.Emsg("Stat", "Failed to open path:", path); } // Assume that HTTPFile::FStat() doesn't write to buff unless it succeeds. return httpFile.Fstat(buff); } int HTTPFileSystem::Create(const char *tid, const char *path, mode_t mode, XrdOucEnv &env, int opts) { // Is path valid? std::string object; std::string hostname = this->getHTTPHostName(); int rv = parse_path(hostname, path, object); if (rv != 0) { return rv; } return 0; } xrootd-s3-http-0.4.1/src/HTTPFileSystem.hh000066400000000000000000000077501501635342300202420ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #pragma once #include "TokenFile.hh" #include #include #include #include #include #include #include class HTTPFileSystem : public XrdOss { public: HTTPFileSystem(XrdSysLogger *lp, const char *configfn, XrdOucEnv *envP); virtual ~HTTPFileSystem(); virtual bool Config(XrdSysLogger *lp, const char *configfn); XrdOssDF *newDir(const char *user = 0); XrdOssDF *newFile(const char *user = 0); int Chmod(const char *path, mode_t mode, XrdOucEnv *env = 0) { return -ENOSYS; } void Connect(XrdOucEnv &env) {} int Create(const char *tid, const char *path, mode_t mode, XrdOucEnv &env, int opts = 0); void Disc(XrdOucEnv &env) {} void EnvInfo(XrdOucEnv *env) {} uint64_t Features() { return 0; } int FSctl(int cmd, int alen, const char *args, char **resp = 0) { return -ENOSYS; } int Init(XrdSysLogger *lp, const char *cfn) { return 0; } int Init(XrdSysLogger *lp, const char *cfn, XrdOucEnv *en) { return 0; } int Mkdir(const char *path, mode_t mode, int mkpath = 0, XrdOucEnv *env = 0) { return -ENOSYS; } int Reloc(const char *tident, const char *path, const char *cgName, const char *anchor = 0) { return -ENOSYS; } int Remdir(const char *path, int Opts = 0, XrdOucEnv *env = 0) { return -ENOSYS; } int Rename(const char *oPath, const char *nPath, XrdOucEnv *oEnvP = 0, XrdOucEnv *nEnvP = 0) { return -ENOSYS; } int Stat(const char *path, struct stat *buff, int opts = 0, XrdOucEnv *env = 0); int Stats(char *buff, int blen) { return -ENOSYS; } int StatFS(const char *path, char *buff, int &blen, XrdOucEnv *env = 0) { return -ENOSYS; } int StatLS(XrdOucEnv &env, const char *path, char *buff, int &blen) { return -ENOSYS; } int StatPF(const char *path, struct stat *buff, int opts) { return -ENOSYS; } int StatPF(const char *path, struct stat *buff) { return -ENOSYS; } int StatVS(XrdOssVSInfo *vsP, const char *sname = 0, int updt = 0) { return -ENOSYS; } int StatXA(const char *path, char *buff, int &blen, XrdOucEnv *env = 0) { return -ENOSYS; } int StatXP(const char *path, unsigned long long &attr, XrdOucEnv *env = 0) { return -ENOSYS; } int Truncate(const char *path, unsigned long long fsize, XrdOucEnv *env = 0) { return -ENOSYS; } int Unlink(const char *path, int Opts = 0, XrdOucEnv *env = 0) { return -ENOSYS; } int Lfn2Pfn(const char *Path, char *buff, int blen) { return -ENOSYS; } const char *Lfn2Pfn(const char *Path, char *buff, int blen, int &rc) { return nullptr; } const std::string &getHTTPHostName() const { return http_host_name; } const std::string &getHTTPHostUrl() const { return http_host_url; } const std::string &getHTTPUrlBase() const { return m_url_base; } const std::string &getStoragePrefix() const { return m_storage_prefix; } const TokenFile *getToken() const { return &m_token; } protected: XrdSysError m_log; bool handle_required_config(const std::string &name_from_config, const char *desired_name, const std::string &source, std::string &target); private: std::string http_host_name; std::string http_host_url; std::string m_url_base; std::string m_storage_prefix; TokenFile m_token; }; xrootd-s3-http-0.4.1/src/S3AccessInfo.cc000066400000000000000000000026101501635342300176550ustar00rootroot00000000000000// // Created by Rich Wellner on 2/29/24. // #include "S3AccessInfo.hh" const std::string &S3AccessInfo::getS3BucketName() const { return s3_bucket_name; } void S3AccessInfo::setS3BucketName(const std::string &s3BucketName) { s3_bucket_name = s3BucketName; } const std::string &S3AccessInfo::getS3ServiceName() const { return s3_service_name; } void S3AccessInfo::setS3ServiceName(const std::string &s3ServiceName) { s3_service_name = s3ServiceName; } const std::string &S3AccessInfo::getS3Region() const { return s3_region; } void S3AccessInfo::setS3Region(const std::string &s3Region) { s3_region = s3Region; } const std::string &S3AccessInfo::getS3ServiceUrl() const { return s3_service_url; } void S3AccessInfo::setS3ServiceUrl(const std::string &s3ServiceUrl) { s3_service_url = s3ServiceUrl; } const std::string &S3AccessInfo::getS3AccessKeyFile() const { return s3_access_key_file; } void S3AccessInfo::setS3AccessKeyFile(const std::string &s3AccessKeyFile) { s3_access_key_file = s3AccessKeyFile; } const std::string &S3AccessInfo::getS3SecretKeyFile() const { return s3_secret_key_file; } void S3AccessInfo::setS3SecretKeyFile(const std::string &s3SecretKeyFile) { s3_secret_key_file = s3SecretKeyFile; } const std::string &S3AccessInfo::getS3UrlStyle() const { return s3_url_style; } void S3AccessInfo::setS3UrlStyle(const std::string &s3UrlStyle) { s3_url_style = s3UrlStyle; } xrootd-s3-http-0.4.1/src/S3AccessInfo.hh000066400000000000000000000021251501635342300176700ustar00rootroot00000000000000// // Created by Rich Wellner on 2/29/24. // #pragma once #include class S3AccessInfo { public: const std::string &getS3BucketName() const; void setS3BucketName(const std::string &s3BucketName); const std::string &getS3ServiceName() const; void setS3ServiceName(const std::string &s3ServiceName); const std::string &getS3Region() const; void setS3Region(const std::string &s3Region); const std::string &getS3ServiceUrl() const; void setS3ServiceUrl(const std::string &s3ServiceUrl); const std::string &getS3AccessKeyFile() const; void setS3AccessKeyFile(const std::string &s3AccessKeyFile); const std::string &getS3SecretKeyFile() const; void setS3SecretKeyFile(const std::string &s3SecretKeyFile); const std::string &getS3UrlStyle() const; void setS3UrlStyle(const std::string &s3UrlStyle); const int getS3SignatureVersion() const { return 4; } private: std::string s3_bucket_name; std::string s3_service_name; std::string s3_region; std::string s3_service_url; std::string s3_access_key_file; std::string s3_secret_key_file; std::string s3_url_style; }; xrootd-s3-http-0.4.1/src/S3Commands.cc000066400000000000000000000607261501635342300174150ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #include "S3Commands.hh" #include "AWSv4-impl.hh" #include "S3File.hh" #include "shortfile.hh" #include "stl_string_utils.hh" #include #include #include #include #include #include #include #include #include #include #include #include AmazonRequest::~AmazonRequest() {} bool AmazonRequest::SendRequest() { query_parameters.insert(std::make_pair("Version", "2012-10-01")); switch (signatureVersion) { case 4: { auto qs = canonicalizeQueryString(); return sendV4Request(qs, qs.size(), true, true, true); } default: this->errorCode = "E_INTERNAL"; this->errorMessage = "Invalid signature version."; return false; } } std::string AmazonRequest::canonicalizeQueryString() { return AWSv4Impl::canonicalizeQueryString(query_parameters); } // Takes in the configured `s3.service_url` and uses the bucket/object requested // to generate the host URL, as well as the canonical URI (which is the path to // the object). bool AmazonRequest::parseURL(const std::string &url, std::string &bucket_path, std::string &path) { auto schemeEndIdx = url.find("://"); if (schemeEndIdx == std::string::npos) { return false; } if (url.size() < schemeEndIdx + 3) { return false; } auto hostStartIdx = schemeEndIdx + 3; auto resourceStartIdx = url.find("/", hostStartIdx); if (resourceStartIdx == std::string::npos) { if (m_style == "path") { // If we're configured for path-style requests, then the host is // everything between // :// and the last / host = substring(url, hostStartIdx); // Likewise, the path is going to be /bucket/object // Sometimes we intentionally configure the plugin with no bucket // because we assume the incoming object request already encodes the // bucket. This is used for exporting many buckets from a single // endpoint. if (bucket.empty()) { path = "/" + object; bucket_path = "/" + object.substr(0, object.find('/')); } else { path = "/" + bucket + "/" + object; bucket_path = "/" + bucket; } } else { // In virtual-style requests, the host should be determined as // everything between // :// up until the last /, but with appended to the front. host = bucket + "." + substring(url, hostStartIdx); if (retainObject) { path = "/" + object; } else { path = "/"; } bucket_path = "/"; } return true; } if (m_style == "path") { host = substring(url, hostStartIdx, resourceStartIdx); auto resourcePrefix = substring(url, resourceStartIdx); if (resourcePrefix[resourcePrefix.size() - 1] == '/') { resourcePrefix = substring(resourcePrefix, 0, resourcePrefix.size() - 1); } if (bucket.empty()) { path = resourcePrefix + object; bucket_path = resourcePrefix + object.substr(0, object.find('/')); } else { path = resourcePrefix + "/" + bucket + "/" + object; bucket_path = resourcePrefix + "/" + bucket; } } else { host = bucket + "." + substring(url, hostStartIdx, resourceStartIdx); path = substring(url, resourceStartIdx) + object; bucket_path = "/"; } return true; } void convertMessageDigestToLowercaseHex(const unsigned char *messageDigest, unsigned int mdLength, std::string &hexEncoded) { AWSv4Impl::convertMessageDigestToLowercaseHex(messageDigest, mdLength, hexEncoded); } bool doSha256(const std::string_view payload, unsigned char *messageDigest, unsigned int *mdLength) { return AWSv4Impl::doSha256(payload, messageDigest, mdLength); } std::string pathEncode(const std::string &original) { return AWSv4Impl::pathEncode(original); } bool AmazonRequest::createV4Signature(const std::string_view payload, std::string &authorizationValue, bool sendContentSHA) { // If we're using temporary credentials, we need to add the token // header here as well. We set saKey and keyID here (well before // necessary) since we'll get them for free when we get the token. std::string keyID; std::string saKey; std::string token; if (!this->secretKeyFile.empty()) { // Some origins may exist in front of // unauthenticated buckets if (!readShortFile(this->secretKeyFile, saKey)) { this->errorCode = "E_FILE_IO"; this->errorMessage = "Unable to read from secretkey file '" + this->secretKeyFile + "'."; return false; } trim(saKey); } else { canonicalQueryString = canonicalizeQueryString(); requiresSignature = false; // If we don't create a signature, it must not be needed... return true; // If there was no saKey, we need not generate a signature } if (!this->accessKeyFile.empty()) { // Some origins may exist in front of // unauthenticated buckets if (!readShortFile(this->accessKeyFile, keyID)) { this->errorCode = "E_FILE_IO"; this->errorMessage = "Unable to read from accesskey file '" + this->accessKeyFile + "'."; return false; } trim(keyID); } else { this->errorCode = "E_FILE_IO"; this->errorMessage = "The secretkey file was read, but I can't read " "from accesskey file '" + this->secretKeyFile + "'."; return false; } time_t now; time(&now); struct tm brokenDownTime; gmtime_r(&now, &brokenDownTime); // // Create task 1's inputs. // // The canonical URI is the absolute path component of the service URL, // normalized according to RFC 3986 (removing redundant and relative // path components), with each path segment being URI-encoded. // But that sounds like a lot of work, so until something we do actually // requires it, I'll just assume the path is already normalized. canonicalURI = pathEncode(canonicalURI); // The canonical query string is the alphabetically sorted list of // URI-encoded parameter names '=' values, separated by '&'s. canonicalQueryString = canonicalizeQueryString(); // The canonical headers must include the Host header, so add that // now if we don't have it. if (headers.find("Host") == headers.end()) { headers["Host"] = host; } // S3 complains if x-amz-date isn't signed, so do this early. char dt[] = "YYYYMMDDThhmmssZ"; strftime(dt, sizeof(dt), "%Y%m%dT%H%M%SZ", &brokenDownTime); headers["X-Amz-Date"] = dt; char d[] = "YYYYMMDD"; strftime(d, sizeof(d), "%Y%m%d", &brokenDownTime); // S3 complains if x-amz-content-sha256 isn't signed, which makes sense, // so do this early. // The canonical payload hash is the lowercase hexadecimal string of the // (SHA256) hash value of the payload. std::string payloadHash; if (sendContentSHA) { unsigned int mdLength = 0; unsigned char messageDigest[EVP_MAX_MD_SIZE]; if (!doSha256(payload, messageDigest, &mdLength)) { this->errorCode = "E_INTERNAL"; this->errorMessage = "Unable to hash payload."; return false; } convertMessageDigestToLowercaseHex(messageDigest, mdLength, payloadHash); } else { payloadHash = "UNSIGNED-PAYLOAD"; } headers["X-Amz-Content-Sha256"] = payloadHash; // The canonical list of headers is a sorted list of lowercase header // names paired via ':' with the trimmed header value, each pair // terminated with a newline. AmazonRequest::AttributeValueMap transformedHeaders; for (auto i = headers.begin(); i != headers.end(); ++i) { std::string header = i->first; std::transform(header.begin(), header.end(), header.begin(), &tolower); std::string value = i->second; // We need to leave empty headers alone so that they can be used // to disable CURL stupidity later. if (value.size() == 0) { continue; } // Eliminate trailing spaces. unsigned j = value.length() - 1; while (value[j] == ' ') { --j; } if (j != value.length() - 1) { value.erase(j + 1); } // Eliminate leading spaces. for (j = 0; value[j] == ' '; ++j) { } value.erase(0, j); // Convert internal runs of spaces into single spaces. unsigned left = 1; unsigned right = 1; bool inSpaces = false; while (right < value.length()) { if (!inSpaces) { if (value[right] == ' ') { inSpaces = true; left = right; ++right; } else { ++right; } } else { if (value[right] == ' ') { ++right; } else { inSpaces = false; value.erase(left, right - left - 1); right = left + 1; } } } transformedHeaders[header] = value; } // The canonical list of signed headers is trivial to generate while // generating the list of headers. std::string signedHeaders; std::string canonicalHeaders; for (auto i = transformedHeaders.begin(); i != transformedHeaders.end(); ++i) { canonicalHeaders += i->first + ":" + i->second + "\n"; signedHeaders += i->first + ";"; } signedHeaders.erase(signedHeaders.end() - 1); // Task 1: create the canonical request. std::string canonicalRequest = httpVerb + "\n" + canonicalURI + "\n" + canonicalQueryString + "\n" + canonicalHeaders + "\n" + signedHeaders + "\n" + payloadHash; // // Create task 2's inputs. // // Hash the canonical request the way we did the payload. std::string canonicalRequestHash; unsigned int mdLength = 0; unsigned char messageDigest[EVP_MAX_MD_SIZE]; if (!doSha256(canonicalRequest, messageDigest, &mdLength)) { errorCode = "E_INTERNAL"; errorMessage = "Unable to hash canonical request."; return false; } convertMessageDigestToLowercaseHex(messageDigest, mdLength, canonicalRequestHash); std::string s = service; if (s.empty()) { size_t i = host.find("."); if (i != std::string::npos) { s = host.substr(0, i); } else { s = host; } } std::string r = this->region; if (r.empty()) { size_t i = host.find("."); size_t j = host.find(".", i + 1); if (j != std::string::npos) { r = host.substr(i + 1, j - i - 1); } else { r = host; } } // Task 2: create the string to sign. std::string credentialScope; formatstr(credentialScope, "%s/%s/%s/aws4_request", d, r.c_str(), s.c_str()); std::string stringToSign; formatstr(stringToSign, "AWS4-HMAC-SHA256\n%s\n%s\n%s", dt, credentialScope.c_str(), canonicalRequestHash.c_str()); // // Creating task 3's inputs was done when we checked to see if we needed // to get the security token, since they come along for free when we do. // // Task 3: calculate the signature. saKey = "AWS4" + saKey; const unsigned char *hmac = HMAC(EVP_sha256(), saKey.c_str(), saKey.length(), (unsigned char *)d, sizeof(d) - 1, messageDigest, &mdLength); if (hmac == NULL) { return false; } unsigned int md2Length = 0; unsigned char messageDigest2[EVP_MAX_MD_SIZE]; hmac = HMAC(EVP_sha256(), messageDigest, mdLength, (const unsigned char *)r.c_str(), r.length(), messageDigest2, &md2Length); if (hmac == NULL) { return false; } hmac = HMAC(EVP_sha256(), messageDigest2, md2Length, (const unsigned char *)s.c_str(), s.length(), messageDigest, &mdLength); if (hmac == NULL) { return false; } const char c[] = "aws4_request"; hmac = HMAC(EVP_sha256(), messageDigest, mdLength, (const unsigned char *)c, sizeof(c) - 1, messageDigest2, &md2Length); if (hmac == NULL) { return false; } hmac = HMAC(EVP_sha256(), messageDigest2, md2Length, (const unsigned char *)stringToSign.c_str(), stringToSign.length(), messageDigest, &mdLength); if (hmac == NULL) { return false; } std::string signature; convertMessageDigestToLowercaseHex(messageDigest, mdLength, signature); formatstr(authorizationValue, "AWS4-HMAC-SHA256 Credential=%s/%s," " SignedHeaders=%s, Signature=%s", keyID.c_str(), credentialScope.c_str(), signedHeaders.c_str(), signature.c_str()); return true; } bool AmazonRequest::sendV4Request(const std::string_view payload, off_t payload_size, bool sendContentSHA, bool final, bool blocking) { if ((getProtocol() != "http") && (getProtocol() != "https")) { this->errorCode = "E_INVALID_SERVICE_URL"; this->errorMessage = "Service URL not of a known protocol (http[s])."; return false; } std::string authorizationValue; if (!createV4Signature(payload, authorizationValue, sendContentSHA)) { if (this->errorCode.empty()) { this->errorCode = "E_INTERNAL"; } if (this->errorMessage.empty()) { this->errorMessage = "Failed to create v4 signature."; } return false; } // When accessing an unauthenticated bucket, providing an auth header will // cause errors if (!authorizationValue.empty()) { headers["Authorization"] = authorizationValue; } // This operation is on the bucket itself; alter the URL auto url = hostUrl; if (!canonicalQueryString.empty()) { url += "?" + canonicalQueryString; } if (blocking) { return sendPreparedRequest(url, payload, payload_size, final); } else { return sendPreparedRequestNonblocking(url, payload, payload_size, final); } } // Send a request to a S3 backend bool AmazonRequest::SendS3Request(const std::string_view payload, off_t payload_size, bool final, bool blocking) { if (!m_streamingRequest && !final) { if (payload_size == 0) { errorCode = "E_INTERNAL"; errorMessage = "S3 does not support streaming requests where the " "payload size is unknown"; return false; } m_streamingRequest = true; } headers["Content-Type"] = "binary/octet-stream"; service = "s3"; if (region.empty()) { region = "us-east-1"; } return sendV4Request(payload, payload_size, !m_streamingRequest, final, blocking); } // --------------------------------------------------------------------------- AmazonS3Upload::~AmazonS3Upload() {} bool AmazonS3Upload::SendRequest(const std::string_view &payload) { httpVerb = "PUT"; return SendS3Request(payload, payload.size(), true, true); } // --------------------------------------------------------------------------- AmazonS3CompleteMultipartUpload::~AmazonS3CompleteMultipartUpload() {} bool AmazonS3CompleteMultipartUpload::SendRequest( const std::vector &eTags, int partNumber, const std::string &uploadId) { query_parameters["uploadId"] = uploadId; httpVerb = "POST"; std::string payload; payload += ""; for (int i = 1; i < partNumber; i++) { payload += ""; payload += "" + eTags[i - 1] + ""; payload += "" + std::to_string(i) + ""; payload += ""; } payload += ""; return SendS3Request(payload, payload.size(), true, true); } // --------------------------------------------------------------------------- AmazonS3CreateMultipartUpload::~AmazonS3CreateMultipartUpload() {} AmazonS3SendMultipartPart::~AmazonS3SendMultipartPart() {} bool AmazonS3CreateMultipartUpload::SendRequest() { query_parameters["uploads"] = ""; query_parameters["x-id"] = "CreateMultipartUpload"; httpVerb = "POST"; return SendS3Request("", 0, true, true); } bool AmazonS3SendMultipartPart::SendRequest(const std::string_view payload, const std::string &partNumber, const std::string &uploadId, size_t payloadSize, bool final) { query_parameters["partNumber"] = partNumber; query_parameters["uploadId"] = uploadId; includeResponseHeader = true; httpVerb = "PUT"; return SendS3Request(payload, payloadSize, final, true); } bool AmazonS3SendMultipartPart::GetEtag(std::string &result) { if (!m_etag.empty()) { result = m_etag; return true; } auto resultString = getResultString(); static const std::string etag = "etag: \""; auto iter = std::search( resultString.begin(), resultString.end(), etag.begin(), etag.end(), [](char a, char b) { return std::tolower(a) == std::tolower(b); }); if (iter == resultString.end()) { return false; } std::size_t startPos = std::distance(resultString.begin(), iter); std::size_t endPos = resultString.find("\"", startPos + 7); if (endPos == std::string::npos) { return false; } m_etag = result = resultString.substr(startPos + 7, endPos - startPos - 7); return true; } // --------------------------------------------------------------------------- AmazonS3Download::~AmazonS3Download() {} bool AmazonS3Download::SendRequest(off_t offset, size_t size) { m_request_start = std::chrono::steady_clock::now(); if (offset != 0 || size != 0) { std::string range; formatstr(range, "bytes=%lld-%lld", static_cast(offset), static_cast(offset + size - 1)); headers["Range"] = range.c_str(); this->expectedResponseCode = 206; } if (size && m_buffer) { m_buffer_view = std::string_view(m_buffer, size); } httpVerb = "GET"; return SendS3Request("", 0, true, IsBlocking()); } // --------------------------------------------------------------------------- template AmazonS3NonblockingDownload::~AmazonS3NonblockingDownload() {} template class AmazonS3NonblockingDownload; // --------------------------------------------------------------------------- AmazonS3Head::~AmazonS3Head() {} bool AmazonS3Head::SendRequest() { httpVerb = "HEAD"; includeResponseHeader = true; std::string noPayloadAllowed; return SendS3Request(noPayloadAllowed, 0, true, true); } void AmazonS3Head::parseResponse() { if (m_parsedResponse) { return; } m_parsedResponse = true; const std::string &headers = getResultString(); std::string line; size_t current_newline = 0; size_t next_newline = std::string::npos; size_t last_character = headers.size(); while (headers.size() && current_newline != std::string::npos && current_newline != last_character - 1) { next_newline = headers.find("\r\n", current_newline + 2); line = substring(headers, current_newline + 2, next_newline); size_t colon = line.find(":"); if (colon != std::string::npos && colon != line.size()) { auto attr = substring(line, 0, colon); auto value = substring(line, colon + 1); trim(value); toLower(attr); if (attr == "content-length") { m_size = std::stol(value); } else if (attr == "last-modified") { struct tm t; char *eos = strptime(value.c_str(), "%a, %d %b %Y %T %Z", &t); if (eos == &value.c_str()[value.size()]) { auto epoch = timegm(&t); if (epoch != -1) { m_last_modified = epoch; } } } } current_newline = next_newline; } } // --------------------------------------------------------------------------- bool AmazonS3List::SendRequest(const std::string &continuationToken) { query_parameters["list-type"] = "2"; // Version 2 of the object-listing query_parameters["delimiter"] = "/"; query_parameters["prefix"] = urlquote(object); query_parameters["encoding-type"] = "url"; if (!continuationToken.empty()) { query_parameters["continuation-token"] = urlquote(continuationToken); } query_parameters["max-keys"] = std::to_string(m_maxKeys); httpVerb = "GET"; // Operation is on the bucket itself; alter the URL to remove the object hostUrl = getProtocol() + "://" + host + bucketPath; canonicalURI = bucketPath; return SendS3Request("", 0, true, true); } bool AmazonS3CreateMultipartUpload::Results(std::string &uploadId, std::string &errMsg) { tinyxml2::XMLDocument doc; auto err = doc.Parse(getResultString().c_str()); if (err != tinyxml2::XML_SUCCESS) { errMsg = doc.ErrorStr(); return false; } auto elem = doc.RootElement(); if (strcmp(elem->Name(), "InitiateMultipartUploadResult")) { errMsg = "S3 Uploads response is not rooted with " "InitiateMultipartUploadResult " "element"; return false; } for (auto child = elem->FirstChildElement(); child != nullptr; child = child->NextSiblingElement()) { if (!strcmp(child->Name(), "UploadId")) { uploadId = child->GetText(); } } return true; } // Parse the results of the AWS directory listing // // S3 returns an XML structure for directory listings so we must pick it apart // and convert it to `objInfo` and `commonPrefixes`. The `objInfo` is a list of // objects that match the current prefix but don't have a subsequent `/` in the // object name. The `commonPrefixes` are the unique prefixes of other objects // that have the same prefix as the original query but also have an `/`. // // Example. Suppose we have the following objects in the bucket: // - /foo/bar.txt // - /foo/bar/example.txt // - /foo/baz/example.txt // Then, a query to list with prefix `/foo/` would return object info for // `/foo/bar.txt` while the common prefixes would be `/foo/bar/` and `/foo/baz`. // Note this is quite close to returning a list of files in a directory and a // list of sub-directories. bool AmazonS3List::Results(std::vector &objInfo, std::vector &commonPrefixes, std::string &ct, std::string &errMsg) { tinyxml2::XMLDocument doc; auto err = doc.Parse(m_result.c_str()); if (err != tinyxml2::XML_SUCCESS) { errMsg = doc.ErrorStr(); return false; } auto elem = doc.RootElement(); if (strcmp(elem->Name(), "ListBucketResult")) { errMsg = "S3 ListBucket response is not rooted with ListBucketResult " "element"; return false; } // Example response from S3: // // // genome-browser // cells/muscle-ibm/endothelial-stromal-cells // 40 // 40 // 1PnsptbFFpBSb6UBNN4F/RrxtBvIHjNpdXNYlX8E7IyqXRK26w2y36KViUAbyPPsjzikVY0Zj4jMvQHRhsGWZbcKKrEVvaR0HaZDtfUXUwnc= // false // // cells/muscle-ibm/endothelial-stromal-cells/UMAP.coords.tsv.gz // 2023-08-21T11:02:53.000Z // "b9b0065f10cbd91c9d341acc235c63b0" // 360012 // STANDARD // // // cells/muscle-ibm/endothelial-stromal-cells/barcodes.tsv.gz // 2023-07-17T11:02:19.000Z // "048feef5d340e2dd4d2d2d495c24ad7e" // 118061 // STANDARD // // ... (truncated some entries for readability) ... // // cells/muscle-ibm/endothelial-stromal-cells/coords/ // // // cells/muscle-ibm/endothelial-stromal-cells/markers/ // // // cells/muscle-ibm/endothelial-stromal-cells/metaFields/ // // bool isTruncated = false; for (auto child = elem->FirstChildElement(); child != nullptr; child = child->NextSiblingElement()) { if (!strcmp(child->Name(), "IsTruncated")) { bool isTrunc; if (child->QueryBoolText(&isTrunc) == tinyxml2::XML_SUCCESS) { isTruncated = isTrunc; } } else if (!strcmp(child->Name(), "CommonPrefixes")) { auto prefix = child->FirstChildElement("Prefix"); if (prefix != nullptr) { auto prefixChar = prefix->GetText(); if (prefixChar != nullptr) { auto prefixStr = std::string(prefixChar); trim(prefixStr); if (!prefixStr.empty()) { commonPrefixes.emplace_back(prefixStr); } } } } else if (!strcmp(child->Name(), "Contents")) { std::string keyStr; int64_t size; bool goodSize = false; auto key = child->FirstChildElement("Key"); if (key != nullptr) { auto keyChar = key->GetText(); if (keyChar != nullptr) { keyStr = std::string(keyChar); trim(keyStr); } } auto sizeElem = child->FirstChildElement("Size"); if (sizeElem != nullptr) { goodSize = (sizeElem->QueryInt64Text(&size) == tinyxml2::XML_SUCCESS); } if (goodSize && !keyStr.empty()) { S3ObjectInfo obj; obj.m_key = keyStr; obj.m_size = size; objInfo.emplace_back(obj); } } else if (!strcmp(child->Name(), "NextContinuationToken")) { auto ctChar = child->GetText(); if (ctChar) { ct = ctChar; trim(ct); } } } if (!isTruncated) { ct = ""; } return true; } xrootd-s3-http-0.4.1/src/S3Commands.hh000066400000000000000000000267121501635342300174240ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #pragma once #include "HTTPCommands.hh" #include "S3AccessInfo.hh" #include #include #include // The base class for all requests to the S3 endpoint. // Handles common activities like signing requests and forwarding to the // underlying HTTPRequest object. class AmazonRequest : public HTTPRequest { public: AmazonRequest(const S3AccessInfo &ai, const std::string objectName, XrdSysError &log, bool ro = true) : AmazonRequest(ai.getS3ServiceUrl(), ai.getS3AccessKeyFile(), ai.getS3SecretKeyFile(), ai.getS3BucketName(), objectName, ai.getS3UrlStyle(), ai.getS3SignatureVersion(), log, ro) {} AmazonRequest(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, const std::string &o, const std::string &style, int sv, XrdSysError &log, bool ro = true) : HTTPRequest(s, log, nullptr), accessKeyFile(akf), secretKeyFile(skf), signatureVersion(sv), bucket(b), object(o), m_style(style) { requiresSignature = true; retainObject = ro; // Start off by parsing the hostUrl, which we use in conjunction with // the bucket to fill in the host (for setting host header). For // example, if the incoming hostUrl (which we get from config) is // "https://my-url.com:443", the bucket is "my-bucket", and the object // is "my-object", then the host will be "my-bucket.my-url.com:443" and // the canonicalURI will be "/my-object". if (!parseURL(hostUrl, bucketPath, canonicalURI)) { errorCode = "E_INVALID_SERVICE_URL"; errorMessage = "Failed to parse host and canonicalURI from service URL."; } if (canonicalURI.empty()) { canonicalURI = "/"; } // Now that we have the host and canonicalURI, we can build the actual // url we perform the curl against. Using the previous example, we'd get // a new hostUrl of // --> "https://my-bucket.my-url.com:443/my-object" for virtual style // requests, and // --> "https://my-url.com:443/my-bucket/my-object" for path style // requests. hostUrl = getProtocol() + "://" + host + canonicalURI; // If we can, set the region based on the host. size_t secondDot = host.find(".", 2 + 1); if (host.find("s3.") == 0) { region = host.substr(3, secondDot - 2 - 1); } } virtual ~AmazonRequest(); virtual const std::string *getAccessKey() const { return &accessKeyFile; } virtual const std::string *getSecretKey() const { return &secretKeyFile; } bool parseURL(const std::string &url, std::string &bucket_path, std::string &path); virtual bool SendRequest(); // Send a request to the S3 service. // // - payload: contents of the request itself // - payload_size: final size of the payload for uploads; 0 if unknown. // - final: True if this is the last (or only) payload of the request; false // otherwise // - blocking: True if the method should block on a response; false // otherwise virtual bool SendS3Request(const std::string_view payload, off_t payload_size, bool final, bool blocking); static void Init(XrdSysError &log) { HTTPRequest::Init(log); } protected: // Send a request to the S3 service using the V4 signing method. // // - payload: contents of the request (for uploads or for XML-based // commands) // - payload_size: final size of the payload for uploads; 0 if unknown. // - sendContentSHA: Whether to add the header indicating the checksum of // the final payload. Servers may verify this is what they received. // - final: True if this is the last (or only) payload of the request; false // otherwise. // - blocking: True if this method should block until a response; false // otherwise bool sendV4Request(const std::string_view payload, off_t payload_size, bool sendContentSHA, bool final, bool blocking); bool retainObject; bool m_streamingRequest{ false}; // Is this a streaming request? Streaming requests will not // include a SHA-256 signature in the header std::string accessKeyFile; std::string secretKeyFile; int signatureVersion; std::string host; std::string canonicalURI; std::string bucketPath; // Path to use for bucket-level operations (such as // listings). May be empty for DNS-style buckets std::string canonicalQueryString; std::string bucket; std::string object; std::string region; std::string service; std::string m_style; private: bool createV4Signature(const std::string_view payload, std::string &authorizationHeader, bool sendContentSHA = false); std::string canonicalizeQueryString(); }; class AmazonS3Upload final : public AmazonRequest { using AmazonRequest::SendRequest; public: AmazonS3Upload(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log) : AmazonRequest(ai, objectName, log) {} AmazonS3Upload(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, const std::string &o, const std::string &style, XrdSysError &log) : AmazonRequest(s, akf, skf, b, o, style, 4, log) {} virtual ~AmazonS3Upload(); bool SendRequest(const std::string_view &payload); protected: std::string path; }; class AmazonS3CreateMultipartUpload final : public AmazonRequest { using AmazonRequest::SendRequest; public: AmazonS3CreateMultipartUpload(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log) : AmazonRequest(ai, objectName, log) {} AmazonS3CreateMultipartUpload(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, const std::string &o, const std::string &style, XrdSysError &log) : AmazonRequest(s, akf, skf, b, o, style, 4, log) {} bool Results(std::string &uploadId, std::string &errMsg); virtual ~AmazonS3CreateMultipartUpload(); virtual bool SendRequest(); protected: // std::string path; }; class AmazonS3CompleteMultipartUpload : public AmazonRequest { using AmazonRequest::SendRequest; public: AmazonS3CompleteMultipartUpload(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log) : AmazonRequest(ai, objectName, log) {} AmazonS3CompleteMultipartUpload(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, const std::string &o, const std::string &style, XrdSysError &log) : AmazonRequest(s, akf, skf, b, o, style, 4, log) {} virtual ~AmazonS3CompleteMultipartUpload(); virtual bool SendRequest(const std::vector &eTags, int partNumber, const std::string &uploadId); protected: }; class AmazonS3SendMultipartPart : public AmazonRequest { using AmazonRequest::SendRequest; public: AmazonS3SendMultipartPart(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log) : AmazonRequest(ai, objectName, log) {} AmazonS3SendMultipartPart(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, const std::string &o, const std::string &style, XrdSysError &log) : AmazonRequest(s, akf, skf, b, o, style, 4, log) {} bool Results(std::string &uploadId, std::string &errMsg); virtual ~AmazonS3SendMultipartPart(); // Send (potentially a partial) payload up to S3. // Blocks until all the data in payload has been sent to AWS. // // - payload: The data corresponding to this partial upload. // - partNumber: The portion of the multipart upload. // - uploadId: The upload ID assigned by the creation of the multipart // upload // - final: Set to true if this is the last of the part; false otherwise bool SendRequest(const std::string_view payload, const std::string &partNumber, const std::string &uploadId, size_t payloadSize, bool final); // Retrieve the ETag header from the returned headers; bool GetEtag(std::string &result); private: std::string m_etag; }; class AmazonS3Download : public AmazonRequest { using AmazonRequest::SendRequest; public: AmazonS3Download(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log, char *buffer) : AmazonRequest(ai, objectName, log), m_buffer(buffer) {} AmazonS3Download(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, const std::string &o, const std::string &style, XrdSysError &log, char *buffer) : AmazonRequest(s, akf, skf, b, o, style, 4, log), m_buffer(buffer) {} virtual ~AmazonS3Download(); virtual bool SendRequest(off_t offset, size_t size); // Return the elapsed time since the request was started with SendRequest(). std::chrono::steady_clock::duration getElapsedTime() const { return std::chrono::steady_clock::now() - m_request_start; } protected: virtual bool IsBlocking() { return true; } virtual std::string_view *requestResult() override { return &m_buffer_view; } private: char *m_buffer{nullptr}; std::chrono::steady_clock::time_point m_request_start; std::string_view m_buffer_view; }; template class AmazonS3NonblockingDownload final : public AmazonS3Download { public: AmazonS3NonblockingDownload(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log, char *buffer, T ¬ifier) : AmazonS3Download(ai, objectName, log, buffer), m_notifier(notifier) {} virtual ~AmazonS3NonblockingDownload(); protected: virtual bool IsBlocking() override { return false; } virtual void Notify() override { m_notifier.Notify(); } private: T &m_notifier; }; class AmazonS3Head final : public AmazonRequest { using AmazonRequest::SendRequest; public: AmazonS3Head(const S3AccessInfo &ai, const std::string &objectName, XrdSysError &log) : AmazonRequest(ai, objectName, log) {} AmazonS3Head(const std::string &s, const std::string &akf, const std::string &skf, const std::string &b, const std::string &o, const std::string &style, XrdSysError &log) : AmazonRequest(s, akf, skf, b, o, style, 4, log) {} virtual ~AmazonS3Head(); virtual bool SendRequest(); off_t getSize() { parseResponse(); return m_size; } time_t getLastModified() { parseResponse(); return m_last_modified; } private: void parseResponse(); bool m_parsedResponse{false}; off_t m_size{0}; time_t m_last_modified{0}; }; struct S3ObjectInfo { size_t m_size; std::string m_key; }; class AmazonS3List final : public AmazonRequest { using AmazonRequest::SendRequest; public: AmazonS3List(const S3AccessInfo &ai, const std::string &objectName, size_t maxKeys, XrdSysError &log) : AmazonRequest(ai, objectName, log, false), m_maxKeys(maxKeys) {} virtual ~AmazonS3List() {} bool SendRequest(const std::string &continuationToken); bool Results(std::vector &objInfo, std::vector &commonPrefixes, std::string &ct, std::string &errMsg); private: size_t m_maxKeys{1000}; }; xrootd-s3-http-0.4.1/src/S3Directory.cc000066400000000000000000000146711501635342300176160ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #include "S3Directory.hh" #include "S3Commands.hh" #include "logging.hh" #include "stl_string_utils.hh" #include #include #include void S3Directory::Reset() { m_opened = false; m_ct = ""; m_idx = 0; m_objInfo.clear(); m_commonPrefixes.clear(); m_stat_buf = nullptr; m_ai = S3AccessInfo(); m_object = ""; } int S3Directory::ListS3Dir(const std::string &ct) { AmazonS3List listCommand(m_ai, m_object, 1000, m_log); auto res = listCommand.SendRequest(ct); if (!res) { switch (listCommand.getResponseCode()) { case 404: return -ENOENT; case 500: return -EIO; case 403: return -EPERM; default: return -EIO; } } std::string errMsg; m_idx = 0; res = listCommand.Results(m_objInfo, m_commonPrefixes, m_ct, errMsg); if (!res) { m_log.Log(XrdHTTPServer::Warning, "Opendir", "Failed to parse S3 results:", errMsg.c_str()); return -EIO; } if (m_log.getMsgMask() & XrdHTTPServer::Debug) { std::stringstream ss; ss << "Directory listing returned " << m_objInfo.size() << " objects and " << m_commonPrefixes.size() << " prefixes"; m_log.Log(XrdHTTPServer::Debug, "Stat", ss.str().c_str()); } m_opened = true; return 0; } int S3Directory::Opendir(const char *path, XrdOucEnv &env) { if (m_opened) { return -EBADF; } Reset(); std::string realPath = path; if (realPath.back() != '/') { realPath = realPath + "/"; } std::string exposedPath, object; int rv = m_fs.parsePath(realPath.c_str(), exposedPath, object); if (rv != 0) { return rv; } auto ai = m_fs.getS3AccessInfo(exposedPath, object); if (!ai) { return -ENOENT; } if (ai->getS3BucketName().empty()) { return -EINVAL; } m_ai = *ai; // If the prefix is "foo" and there's an object "foo/bar", then // the lookup only returns "foo/" (as it's the longest common prefix prior // to a delimiter). Instead, we want to query for "foo/", which returns // "foo/bar". if (!object.empty() && (object[object.size() - 1] != '/')) { object += "/"; } m_object = object; return ListS3Dir(""); } int S3Directory::Readdir(char *buff, int blen) { if (!m_opened) { return -EBADF; } if (m_stat_buf) { memset(m_stat_buf, '\0', sizeof(struct stat)); } // m_idx encodes the location inside the current directory. // - m_idx in [0, m_objInfo.size) means return a "file" from the object // list. // - m_idx == m_objectInfo.size means return the first entry in the // directory/common prefix list. // - m_idx in (m_commonPrefixes.size, -1] means return an entry from the // common prefix list. // - m_idx == -m_commonPrefixes.size means that all the path elements have // been consumed. // // If all the paths entry have been consumed, then if the continuation token // is set, list more objects in the bucket. If it's unset, then we've // iterated through all the bucket contents. auto idx = m_idx; if (m_objInfo.empty() && m_commonPrefixes.empty()) { *buff = '\0'; return XrdOssOK; } else if (idx >= 0 && idx < static_cast(m_objInfo.size())) { m_idx++; std::string full_name = m_objInfo[idx].m_key; auto lastSlashIdx = full_name.rfind("/"); if (lastSlashIdx != std::string::npos) { full_name.erase(0, lastSlashIdx); } trimslashes(full_name); strncpy(buff, full_name.c_str(), blen); if (buff[blen - 1] != '\0') { buff[blen - 1] = '\0'; return -ENOMEM; } if (m_stat_buf) { m_stat_buf->st_mode = 0x0600 | S_IFREG; m_stat_buf->st_nlink = 1; m_stat_buf->st_size = m_objInfo[idx].m_size; } } else if (idx < 0 && -idx == static_cast(m_commonPrefixes.size())) { if (!m_ct.empty()) { // Get the next set of results from S3. m_idx = 0; m_objInfo.clear(); m_commonPrefixes.clear(); if (m_stat_buf) { memset(m_stat_buf, '\0', sizeof(struct stat)); } auto rv = ListS3Dir(m_ct); if (rv != 0) { m_opened = false; return rv; } // Recurse to parse the fresh results. return Readdir(buff, blen); } *buff = '\0'; return XrdOssOK; } else if (idx == static_cast(m_objInfo.size()) || -idx < static_cast(m_commonPrefixes.size())) { if (m_commonPrefixes.empty()) { if (!m_ct.empty()) { // Get the next set of results from S3. m_idx = 0; m_objInfo.clear(); m_commonPrefixes.clear(); if (m_stat_buf) { memset(m_stat_buf, '\0', sizeof(struct stat)); } auto rv = ListS3Dir(m_ct); if (rv != 0) { m_opened = false; return rv; } // Recurse to parse the fresh results. return Readdir(buff, blen); } *buff = '\0'; return XrdOssOK; } if (idx == static_cast(m_objInfo.size())) { m_idx = -1; idx = 0; } else { idx = -m_idx; m_idx--; } std::string full_name = m_commonPrefixes[idx]; trimslashes(full_name); auto lastSlashIdx = full_name.rfind("/"); if (lastSlashIdx != std::string::npos) { full_name.erase(0, lastSlashIdx); } trimslashes(full_name); strncpy(buff, full_name.c_str(), blen); if (buff[blen - 1] != '\0') { buff[blen - 1] = '\0'; return -ENOMEM; } if (m_stat_buf) { m_stat_buf->st_mode = 0x0700 | S_IFDIR; m_stat_buf->st_nlink = 0; m_stat_buf->st_size = 4096; } } else { return -EBADF; } if (m_stat_buf) { m_stat_buf->st_uid = 1; m_stat_buf->st_gid = 1; m_stat_buf->st_mtime = m_stat_buf->st_ctime = m_stat_buf->st_atime = 0; m_stat_buf->st_dev = 0; m_stat_buf->st_ino = 1; // If both st_dev and st_ino are 0, then XRootD // interprets that as an unavailable file. } return XrdOssOK; } int S3Directory::StatRet(struct stat *buf) { if (!m_opened) { return -EBADF; } m_stat_buf = buf; return XrdOssOK; } int S3Directory::Close(long long *retsz) { if (!m_opened) { return -EBADF; } Reset(); return XrdOssOK; } xrootd-s3-http-0.4.1/src/S3Directory.hh000066400000000000000000000031471501635342300176240ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #pragma once #include "HTTPDirectory.hh" #include "S3Commands.hh" #include "S3FileSystem.hh" #include #include class XrdSysError; class S3Directory : public HTTPDirectory { public: S3Directory(XrdSysError &log, const S3FileSystem &fs) : HTTPDirectory(log), m_fs(fs) {} virtual ~S3Directory() {} virtual int Opendir(const char *path, XrdOucEnv &env) override; int Readdir(char *buff, int blen) override; int StatRet(struct stat *statStruct) override; int Close(long long *retsz = 0) override; private: void Reset(); int ListS3Dir(const std::string &ct); bool m_opened{false}; ssize_t m_idx{0}; std::vector m_objInfo; std::vector m_commonPrefixes; std::string m_prefix; std::string m_ct; std::string m_object; const S3FileSystem &m_fs; S3AccessInfo m_ai; struct stat *m_stat_buf{nullptr}; }; xrootd-s3-http-0.4.1/src/S3File.cc000066400000000000000000001206371501635342300165310ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #include "S3File.hh" #include "CurlWorker.hh" #include "S3Commands.hh" #include "S3FileSystem.hh" #include "logging.hh" #include "stl_string_utils.hh" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace XrdHTTPServer; S3FileSystem *g_s3_oss = nullptr; size_t S3File::m_cache_entry_size = (2 * 1024 * 1024); // Default size of the cache's buffer XrdXrootdGStream *S3File::m_gstream = nullptr; std::atomic S3File::S3Cache::m_hit_bytes{0}; std::atomic S3File::S3Cache::m_miss_bytes{0}; std::atomic S3File::S3Cache::m_full_hit_count{0}; std::atomic S3File::S3Cache::m_partial_hit_count{0}; std::atomic S3File::S3Cache::m_miss_count{0}; std::atomic S3File::S3Cache::m_bypass_bytes{0}; std::atomic S3File::S3Cache::m_bypass_count{0}; std::atomic S3File::S3Cache::m_fetch_bytes{0}; std::atomic S3File::S3Cache::m_fetch_count{0}; std::atomic S3File::S3Cache::m_unused_bytes{0}; std::atomic S3File::S3Cache::m_prefetch_bytes{0}; std::atomic S3File::S3Cache::m_prefetch_count{0}; std::atomic S3File::S3Cache::m_errors{0}; std::atomic S3File::S3Cache::m_bypass_duration{0}; std::atomic S3File::S3Cache::m_fetch_duration{0}; XrdVERSIONINFO(XrdOssGetFileSystem, S3); std::vector, std::weak_ptr>> S3File::m_pending_ops; std::mutex S3File::m_pending_lk; std::once_flag S3File::m_monitor_launch; S3File::S3File(XrdSysError &log, S3FileSystem *oss) : m_log(log), m_oss(oss) {} int S3File::Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) { if (m_is_open) { m_log.Log(LogMask::Warning, "Open", "File already open:", path); return -EBADF; } if (Oflag & O_CREAT) { m_log.Log(LogMask::Info, "Open", "File opened for creation:", path); m_create = true; } if (Oflag & O_APPEND) { m_log.Log(LogMask::Info, "Open", "File opened for append:", path); } if (Oflag & (O_RDWR | O_WRONLY)) { m_write_lk.reset(new std::mutex); } char *asize_char; if ((asize_char = env.Get("oss.asize"))) { off_t result{0}; auto [ptr, ec] = std::from_chars( asize_char, asize_char + strlen(asize_char), result); if (ec == std::errc()) { m_object_size = result; } else { m_log.Log(LogMask::Warning, "Opened file has oss.asize set to an unparseable value: ", asize_char); } } if (m_log.getMsgMask() & XrdHTTPServer::Debug) { m_log.Log(LogMask::Warning, "S3File::Open", "Opening file", path); } std::string exposedPath, object; auto rv = m_oss->parsePath(path, exposedPath, object); if (rv != 0) { return rv; } auto ai = m_oss->getS3AccessInfo(exposedPath, object); if (!ai) { return -ENOENT; } if (ai->getS3BucketName().empty()) { return -EINVAL; } if (object == "") { return -ENOENT; } m_ai = *ai; m_object = object; // This flag is not set when it's going to be a read operation // so we check if the file exists in order to be able to return a 404 if (!Oflag || (Oflag & O_APPEND)) { auto res = Fstat(nullptr); if (res < 0) { return res; } } m_is_open = true; return 0; } ssize_t S3File::ReadV(XrdOucIOVec *readV, int rdvcnt) { if (!m_is_open) { m_log.Log(LogMask::Warning, "Write", "File not open"); return -EBADF; } if (rdvcnt <= 0 || !readV) { return -EINVAL; } size_t totalRead = 0; for (int i = 0; i < rdvcnt; ++i) { auto &iov = readV[i]; if (iov.size == 0) { continue; } auto bytesRead = Read(static_cast(iov.data), iov.offset, iov.size); if (bytesRead < 0) { return bytesRead; } else if (bytesRead != iov.size) { // Error number copied from implementation in XrdOss/XrdOssApi.cc return -ESPIPE; } totalRead += bytesRead; } return totalRead; } ssize_t S3File::Read(void *buffer, off_t offset, size_t size) { if (!m_is_open) { m_log.Log(LogMask::Warning, "Write", "File not open"); return -EBADF; } return m_cache.Read(static_cast(buffer), offset, size); } int S3File::Fstat(struct stat *buff) { if (content_length == -1) { AmazonS3Head head(m_ai, m_object, m_log); if (!head.SendRequest()) { auto httpCode = head.getResponseCode(); if (httpCode) { std::stringstream ss; ss << "HEAD command failed: " << head.getResponseCode() << ": " << head.getResultString(); m_log.Log(LogMask::Warning, "S3ile::Fstat", ss.str().c_str()); switch (httpCode) { case 404: return -ENOENT; case 500: return -EIO; case 403: return -EPERM; default: return -EIO; } } else { std::stringstream ss; ss << "Failed to send HEAD command: " << head.getErrorCode() << ": " << head.getErrorMessage(); m_log.Log(LogMask::Warning, "S3File::Fstat", ss.str().c_str()); return -EIO; } } content_length = head.getSize(); last_modified = head.getLastModified(); if (content_length < 0) { m_log.Log(LogMask::Warning, "S3File::Fstat", "Returned content length is negative"); return -EINVAL; } } if (buff) { memset(buff, '\0', sizeof(struct stat)); buff->st_mode = 0600 | S_IFREG; buff->st_nlink = 1; buff->st_uid = 1; buff->st_gid = 1; buff->st_size = content_length; buff->st_mtime = last_modified; buff->st_atime = 0; buff->st_ctime = 0; buff->st_dev = 0; buff->st_ino = 0; } return 0; } ssize_t S3File::Write(const void *buffer, off_t offset, size_t size) { if (!m_is_open) { m_log.Log(LogMask::Warning, "Write", "File not open"); return -EBADF; } auto write_mutex = m_write_lk; if (!write_mutex) { return -EBADF; } std::lock_guard lk(*write_mutex); // Small object optimization -- if this is the full object, upload // it immediately. if (!m_write_offset && m_object_size == static_cast(size)) { AmazonS3Upload upload(m_ai, m_object, m_log); m_write_lk.reset(); if (!upload.SendRequest( std::string_view(static_cast(buffer), size))) { m_log.Log(LogMask::Warning, "Write", "Failed to create small object"); return -EIO; } else { m_write_offset += size; m_log.Log(LogMask::Debug, "Write", "Creation of small object succeeded", std::to_string(size).c_str()); return size; } } if (offset != m_write_offset) { m_log.Emsg( "Write", "Out-of-order write detected; S3 requires writes to be in order"); m_write_offset = -1; return -EIO; } if (m_write_offset == -1) { // Previous I/O error has occurred. File is in bad state, immediately // fail. return -EIO; } if (uploadId == "") { AmazonS3CreateMultipartUpload startUpload(m_ai, m_object, m_log); if (!startUpload.SendRequest()) { m_log.Emsg("Write", "S3 multipart request failed"); m_write_offset = -1; return -ENOENT; } std::string errMsg; startUpload.Results(uploadId, errMsg); } // If we don't know the final object size, we must use the streaming // variant. if (m_object_size == -1) { return WriteStreaming(buffer, offset, size); } size_t written = 0; while (written != size) { if (m_write_op) { auto write_size = ContinueSendPart(buffer, size); if (write_size < 0) { return write_size; } offset += write_size; m_write_offset += write_size; buffer = static_cast(buffer) + write_size; size -= write_size; written += write_size; if (!size) { return written; } } m_write_op.reset(new AmazonS3SendMultipartPart(m_ai, m_object, m_log)); { std::lock_guard lk(m_pending_lk); m_pending_ops.emplace_back(m_write_lk, m_write_op); } // Calculate the size of the current chunk, if it's known. m_part_size = m_s3_part_size; if (!m_object_size) { m_part_size = 0; } else if (m_write_offset + static_cast(m_part_size) > m_object_size) { m_part_size = m_object_size - m_write_offset; } } return written; } ssize_t S3File::WriteStreaming(const void *buffer, off_t offset, size_t size) { m_streaming_buffer.append( std::string_view(static_cast(buffer), size)); m_write_offset += size; ssize_t rv = size; if (m_streaming_buffer.size() > 100'000'000) { rv = SendPartStreaming(); } return rv; } ssize_t S3File::SendPartStreaming() { int length = m_streaming_buffer.length(); AmazonS3SendMultipartPart upload_part_request = AmazonS3SendMultipartPart(m_ai, m_object, m_log); if (!upload_part_request.SendRequest(m_streaming_buffer, std::to_string(partNumber), uploadId, m_streaming_buffer.size(), true)) { m_log.Log(LogMask::Debug, "SendPart", "upload.SendRequest() failed"); return -EIO; } else { m_log.Log(LogMask::Debug, "SendPart", "upload.SendRequest() succeeded"); std::string etag; if (!upload_part_request.GetEtag(etag)) { m_log.Log( LogMask::Debug, "SendPart", "upload.SendRequest() response missing an eTag in response"); return -EIO; } eTags.push_back(etag); partNumber++; m_streaming_buffer.clear(); } return length; } ssize_t S3File::ContinueSendPart(const void *buffer, size_t size) { m_part_written += size; auto write_size = size; if (m_part_written > m_s3_part_size) { write_size = size - (m_part_written - m_s3_part_size); m_part_written = m_s3_part_size; } auto is_final = (m_part_size > 0 && m_part_written == m_part_size) || m_part_written == m_s3_part_size; if (m_log.getMsgMask() & LogMask::Debug) { std::stringstream ss; ss << "Sending request with buffer of size=" << write_size << ", offset=" << m_write_offset << " and is_final=" << is_final; m_log.Log(LogMask::Debug, "ContinueSendPart", ss.str().c_str()); } if (!m_write_op->SendRequest( std::string_view(static_cast(buffer), write_size), std::to_string(partNumber), uploadId, m_object_size, is_final)) { m_write_offset = -1; if (m_write_op->getErrorCode() == "E_TIMEOUT") { m_log.Emsg("Write", "Timeout when uploading to S3"); m_write_op.reset(); return -ETIMEDOUT; } m_log.Emsg("Write", "Upload to S3 failed: ", m_write_op->getErrorMessage().c_str()); m_write_op.reset(); return -EIO; } if (is_final) { m_part_written = 0; m_part_size = 0; std::string etag; if (!m_write_op->GetEtag(etag)) { m_log.Emsg("Write", "Result from S3 does not include ETag"); m_write_op.reset(); m_write_offset = -1; return -EIO; } eTags.push_back(etag); m_write_op.reset(); partNumber++; } return write_size; } void S3File::LaunchMonitorThread(XrdSysError &log, XrdOucEnv *envP) { std::call_once(m_monitor_launch, [&] { if (envP) { m_gstream = reinterpret_cast( envP->GetPtr("oss.gStream*")); if (m_gstream) { log.Say("Config", "S3 OSS monitoring has been configured via " "xrootd.mongstream directive"); } else { log.Say( "Config", "S3 OSS plugin is not configured to send statistics; " "use `xrootd.mongstream oss ...` directive to enable it"); } } else { log.Say("Config", "XrdOssStats plugin invoked without a configured " "environment; likely an internal error"); } std::thread t(S3File::Maintenance, std::ref(log)); t.detach(); }); } void S3File::Maintenance(XrdSysError &log) { auto sleep_duration = HTTPRequest::GetStallTimeout() / 3; if (sleep_duration > std::chrono::seconds(1)) { sleep_duration = std::chrono::seconds(1); } while (true) { std::this_thread::sleep_for(sleep_duration); try { CleanupTransfersOnce(); } catch (std::exception &exc) { std::cerr << "Warning: caught unexpected exception when trying to " "clean transfers: " << exc.what() << std::endl; } try { SendStatistics(log); } catch (std::exception &exc) { std::cerr << "Warning: caught unexpected exception when trying to " "send statistics: " << exc.what() << std::endl; } } } void S3File::SendStatistics(XrdSysError &log) { char buf[1500]; auto bypass_duration_count = S3Cache::m_bypass_duration.load(std::memory_order_relaxed); auto fetch_duration_count = S3Cache::m_fetch_duration.load(std::memory_order_relaxed); std::chrono::steady_clock::duration bypass_duration{ std::chrono::steady_clock::duration::rep(bypass_duration_count)}; std::chrono::steady_clock::duration fetch_duration{ std::chrono::steady_clock::duration::rep(fetch_duration_count)}; auto bypass_s = std::chrono::duration_cast>( bypass_duration) .count(); auto fetch_s = std::chrono::duration_cast>(fetch_duration) .count(); auto len = snprintf( buf, 500, "{" "\"event\":\"s3file_stats\"," "\"hit_b\":%" PRIu64 ",\"miss_b\":%" PRIu64 ",\"full_hit\":%" PRIu64 "," "\"part_hit\":%" PRIu64 ",\"miss\":%" PRIu64 ",\"bypass_b\":%" PRIu64 "," "\"bypass\":%" PRIu64 ",\"fetch_b\":%" PRIu64 ",\"fetch\":%" PRIu64 "," "\"unused_b\":%" PRIu64 ",\"prefetch_b\":%" PRIu64 ",\"prefetch\":%" PRIu64 "," "\"errors\":%" PRIu64 ",\"bypass_s\":%.3f,\"fetch_s\":%.3f" "}", static_cast( S3Cache::m_hit_bytes.load(std::memory_order_relaxed)), static_cast( S3Cache::m_miss_bytes.load(std::memory_order_relaxed)), static_cast( S3Cache::m_full_hit_count.load(std::memory_order_relaxed)), static_cast( S3Cache::m_partial_hit_count.load(std::memory_order_relaxed)), static_cast( S3Cache::m_miss_count.load(std::memory_order_relaxed)), static_cast( S3Cache::m_bypass_bytes.load(std::memory_order_relaxed)), static_cast( S3Cache::m_bypass_count.load(std::memory_order_relaxed)), static_cast( S3Cache::m_fetch_bytes.load(std::memory_order_relaxed)), static_cast( S3Cache::m_fetch_count.load(std::memory_order_relaxed)), static_cast( S3Cache::m_unused_bytes.load(std::memory_order_relaxed)), static_cast( S3Cache::m_prefetch_bytes.load(std::memory_order_relaxed)), static_cast( S3Cache::m_prefetch_count.load(std::memory_order_relaxed)), static_cast( S3Cache::m_errors.load(std::memory_order_relaxed)), bypass_s, fetch_s); if (len >= 500) { log.Log(LogMask::Error, "Statistics", "Failed to generate g-stream statistics packet"); return; } log.Log(LogMask::Debug, "Statistics", buf); if (m_gstream && !m_gstream->Insert(buf, len + 1)) { log.Log(LogMask::Error, "Statistics", "Failed to send g-stream statistics packet"); return; } } void S3File::CleanupTransfersOnce() { // Make a list of live transfers; erase any dead ones still on the list. std::vector, std::shared_ptr>> existing_ops; { std::lock_guard lk(m_pending_lk); existing_ops.reserve(m_pending_ops.size()); m_pending_ops.erase( std::remove_if(m_pending_ops.begin(), m_pending_ops.end(), [&](const auto &op) -> bool { auto op_lk = op.first.lock(); if (!op_lk) { // In this case, the S3File is no longer open // for write. No need to potentially clean // up the transfer. return true; } auto op_part = op.second.lock(); if (!op_part) { // In this case, the S3File object is still // open for writes but the upload has // completed. Remove from the list. return true; } // The S3File is open and upload is in-progress; // we'll tick the transfer. existing_ops.emplace_back(op_lk, op_part); return false; }), m_pending_ops.end()); } // For each live transfer, call `Tick` to advance the clock and possibly // time things out. auto now = std::chrono::steady_clock::now(); for (auto &info : existing_ops) { std::lock_guard lk(*info.first); info.second->Tick(now); } } int S3File::Close(long long *retsz) { if (!m_is_open) { m_log.Log(LogMask::Warning, "Close", "File not open"); return -EBADF; } m_is_open = false; // If we opened the object in create mode but did not actually write // anything, make a quick zero-length file. if (m_create && !m_write_offset) { AmazonS3Upload upload(m_ai, m_object, m_log); if (!upload.SendRequest("")) { m_log.Log(LogMask::Warning, "Close", "Failed to create zero-length object"); return -ENOENT; } else { m_log.Log(LogMask::Debug, "Close", "Creation of zero-length object succeeded"); return 0; } } if (m_write_lk) { std::lock_guard lk(*m_write_lk); if (m_object_size == -1 && !m_streaming_buffer.empty()) { m_log.Emsg("Close", "Sending final part of length", std::to_string(m_streaming_buffer.size()).c_str()); auto rv = SendPartStreaming(); if (rv < 0) { return rv; } } else if (m_write_op) { m_part_size = m_part_written; auto written = ContinueSendPart(nullptr, 0); if (written < 0) { m_log.Log(LogMask::Warning, "Close", "Failed to complete the last S3 upload"); return -EIO; } } } // this is only true if some parts have been written and need to be // finalized if (partNumber > 1) { AmazonS3CompleteMultipartUpload complete_upload_request = AmazonS3CompleteMultipartUpload(m_ai, m_object, m_log); if (!complete_upload_request.SendRequest(eTags, partNumber, uploadId)) { m_log.Emsg("SendPart", "close.SendRequest() failed"); return -ENOENT; } else { m_log.Emsg("SendPart", "close.SendRequest() succeeded"); } } return 0; } // Copy any overlapping data from the cache buffer into the request buffer, // returning the remaining data necessary to fill the request. // // - `req_off`: File offset of the beginning of the request buffer. // - `req_size`: Size of the request buffer // - `req_buf`: Request buffer to copy data into // - `cache_off`: File offset of the beginning of the cache buffer. // - `cache_size`: Size of the cache buffer // - `cache_buf`: Cache buffer to copy data from. // - `used` (output): Incremented by the number of bytes copied from the cache // buffer // - Returns the (offset, size) of the remaining reads needed to satisfy the // request. If there is only one (or no!) remaining reads, then the // corresponding tuple returned is (-1, 0). std::tuple OverlapCopy(off_t req_off, size_t req_size, char *req_buf, off_t cache_off, size_t cache_size, char *cache_buf, size_t &used) { if (req_off < 0) { return std::make_tuple(req_off, req_size, -1, 0); } if (cache_off < 0) { return std::make_tuple(req_off, req_size, -1, 0); } if (cache_off <= req_off) { auto cache_end = cache_off + static_cast(cache_size); if (cache_end > req_off) { auto cache_buf_off = static_cast(req_off - cache_off); auto cache_copy_bytes = std::min(static_cast(cache_end - req_off), req_size); memcpy(req_buf, cache_buf + cache_buf_off, cache_copy_bytes); used += cache_copy_bytes; return std::make_tuple(req_off + cache_copy_bytes, req_size - cache_copy_bytes, -1, 0); } } if (req_off < cache_off) { auto req_end = static_cast(req_off + req_size); if (req_end > cache_off) { auto req_buf_off = static_cast(cache_off - req_off); auto cache_end = static_cast(cache_off + cache_size); auto trailing_bytes = static_cast(req_end - cache_end); if (trailing_bytes > 0) { memcpy(req_buf + req_buf_off, cache_buf, cache_size); used += cache_size; return std::make_tuple(req_off, req_buf_off, cache_end, trailing_bytes); } memcpy(req_buf + req_buf_off, cache_buf, req_end - cache_off); used += req_end - cache_off; return std::make_tuple(req_off, req_buf_off, -1, 0); } } return std::make_tuple(req_off, req_size, -1, 0); } std::tuple S3File::S3Cache::Entry::OverlapCopy(off_t req_off, size_t req_size, char *req_buf, bool is_hit) { size_t bytes_copied = 0; auto results = ::OverlapCopy(req_off, req_size, req_buf, m_off, m_cache_entry_size, m_data.data(), bytes_copied); if (is_hit) { m_parent.m_hit_bytes.fetch_add(bytes_copied, std::memory_order_relaxed); } m_used += bytes_copied; return results; } std::tuple S3File::DownloadBypass(off_t offset, size_t size, char *buffer) { if (m_cache_entry_size && (size <= m_cache_entry_size)) { return std::make_tuple(offset, size, false); } AmazonS3Download download(m_ai, m_object, m_log, buffer); auto start = std::chrono::steady_clock::now(); auto result = download.SendRequest(offset, size); auto duration = std::chrono::steady_clock::now() - start; m_cache.m_bypass_duration.fetch_add(duration.count(), std::memory_order_relaxed); if (!result) { std::stringstream ss; ss << "Failed to send GetObject command: " << download.getResponseCode() << "'" << download.getResultString() << "'"; m_log.Log(LogMask::Warning, "S3File::Read", ss.str().c_str()); return std::make_tuple(0, -1, false); } return std::make_tuple(-1, 0, true); } S3File::S3Cache::~S3Cache() { std::unique_lock lk(m_mutex); m_cv.wait(lk, [&] { return !m_a.m_inprogress && !m_b.m_inprogress; }); } bool S3File::S3Cache::CouldUseAligned(off_t req, off_t cache) { if (req < 0 || cache < 0) { return false; } return (req >= cache) && (req < cache + static_cast(S3File::m_cache_entry_size)); } bool S3File::S3Cache::CouldUse(off_t req_off, size_t req_size, off_t cache_off) { if (req_off < 0 || cache_off < 0) { return false; } auto cache_end = cache_off + static_cast(m_cache_entry_size); if (req_off >= cache_off) { return req_off < cache_end; } else { return req_off + static_cast(req_size) > cache_off; } } void S3File::S3Cache::DownloadCaches(bool download_a, bool download_b, bool locked) { if (!download_a && !download_b) { return; } std::unique_lock lk(m_mutex, std::defer_lock); if (!locked) { lk.lock(); } if (download_a) { m_a.Download(m_parent); } if (download_b) { m_b.Download(m_parent); } } ssize_t S3File::S3Cache::Read(char *buffer, off_t offset, size_t size) { if (offset >= m_parent.content_length) { return 0; } if (offset + static_cast(size) > m_parent.content_length) { size = m_parent.content_length - offset; } if (m_parent.m_log.getMsgMask() & LogMask::Debug) { std::stringstream ss; ss << "Read request for object=" << m_parent.m_object << ", offset=" << offset << ", size=" << size; m_parent.m_log.Log(LogMask::Debug, "cache", ss.str().c_str()); } off_t req3_off, req4_off, req5_off, req6_off; size_t req3_size, req4_size, req5_size, req6_size; // Copy as much data out of the cache as possible; wait for the caches to // finish their downloads if a cache fill is in progress and we could // utilize the cache fill. if (m_cache_entry_size) { std::unique_lock lk{m_mutex}; if (m_a.m_inprogress) { m_cv.wait(lk, [&] { return !m_a.m_inprogress || !CouldUse(offset, size, m_a.m_off); }); } off_t req1_off, req2_off; size_t req1_size, req2_size; std::tie(req1_off, req1_size, req2_off, req2_size) = m_a.OverlapCopy(offset, size, buffer, true); if (m_b.m_inprogress) { m_cv.wait(lk, [&] { return !m_b.m_inprogress || !(CouldUse(req1_off, req1_size, m_b.m_off) || CouldUse(req2_off, req2_size, m_b.m_off)); }); } std::tie(req3_off, req3_size, req4_off, req4_size) = m_b.OverlapCopy( req1_off, req1_size, buffer + req1_off - offset, true); std::tie(req5_off, req5_size, req6_off, req6_size) = m_b.OverlapCopy( req2_off, req2_size, buffer + req2_off - offset, true); } else { auto [off_next, size_next, downloaded] = m_parent.DownloadBypass(offset, size, buffer); if (!downloaded) { m_parent.m_log.Log(LogMask::Warning, "S3File::Read", "Failed to download data bypassing the cache"); m_errors.fetch_add(1, std::memory_order_relaxed); return -1; } else { m_bypass_bytes.fetch_add(size, std::memory_order_relaxed); m_bypass_count.fetch_add(1, std::memory_order_relaxed); return size; } } // If any of the remaining missing bytes are bigger than a single chunk, // download those bypassing the cache. bool downloaded; size_t bypass_size = req3_size; std::tie(req3_off, req3_size, downloaded) = m_parent.DownloadBypass( req3_off, req3_size, buffer + req3_off - offset); if (req3_size < 0) { m_errors.fetch_add(1, std::memory_order_relaxed); return -1; } if (downloaded) { m_bypass_bytes.fetch_add(bypass_size, std::memory_order_relaxed); m_bypass_count.fetch_add(1, std::memory_order_relaxed); } bypass_size = req4_size; std::tie(req4_off, req4_size, downloaded) = m_parent.DownloadBypass( req4_off, req4_size, buffer + req4_off - offset); if (req4_size < 0) { m_errors.fetch_add(1, std::memory_order_relaxed); return -1; } if (downloaded) { m_bypass_bytes.fetch_add(bypass_size, std::memory_order_relaxed); m_bypass_count.fetch_add(1, std::memory_order_relaxed); } bypass_size = req5_size; std::tie(req5_off, req5_size, downloaded) = m_parent.DownloadBypass( req5_off, req5_size, buffer + req5_off - offset); if (req5_size < 0) { m_errors.fetch_add(1, std::memory_order_relaxed); return -1; } if (downloaded) { m_bypass_bytes.fetch_add(bypass_size, std::memory_order_relaxed); m_bypass_count.fetch_add(1, std::memory_order_relaxed); } bypass_size = req6_size; std::tie(req6_off, req6_size, downloaded) = m_parent.DownloadBypass( req6_off, req6_size, buffer + req6_off - offset); if (req6_size < 0) { m_errors.fetch_add(1, std::memory_order_relaxed); return -1; } if (downloaded) { m_bypass_bytes.fetch_add(bypass_size, std::memory_order_relaxed); m_bypass_count.fetch_add(1, std::memory_order_relaxed); } if (req3_size == 0 && req4_size == 0 && req5_size == 0 && req6_size == 0) { m_full_hit_count.fetch_add(1, std::memory_order_relaxed); // We've used more bytes in the cache, potentially all of the bytes. // In that case, we could drop one of the cache entries and prefetch // more of the object. bool download_a = false, download_b = false; { std::unique_lock lk{m_mutex}; auto next_offset = std::max(m_a.m_off, m_b.m_off) + static_cast(m_cache_entry_size); if (next_offset < m_parent.content_length) { if (!m_a.m_inprogress && m_a.m_used >= m_cache_entry_size) { m_a.m_inprogress = true; m_a.m_off = next_offset; download_a = true; next_offset += m_cache_entry_size; } if (!m_b.m_inprogress && m_b.m_used >= m_cache_entry_size) { m_b.m_inprogress = true; m_b.m_off = next_offset; download_b = true; } } } if (download_a) { size_t request_size = m_cache_entry_size; if (m_a.m_off + static_cast(request_size) > m_parent.content_length) { request_size = m_parent.content_length - m_a.m_off; } m_prefetch_count.fetch_add(1, std::memory_order_relaxed); m_prefetch_bytes.fetch_add(request_size, std::memory_order_relaxed); } if (download_b) { size_t request_size = m_cache_entry_size; if (m_b.m_off + static_cast(request_size) > m_parent.content_length) { request_size = m_parent.content_length - m_b.m_off; } m_prefetch_count.fetch_add(1, std::memory_order_relaxed); m_prefetch_bytes.fetch_add(request_size, std::memory_order_relaxed); } DownloadCaches(download_a, download_b, false); return size; } // At this point, the only remaining data requests must be less than the // size of the cache chunk, implying it's a partial request at the beginning // or end of the range -- hence only two can exist. off_t req1_off = -1, req2_off = -1; off_t *req_off = &req1_off; size_t req1_size = 0, req2_size = 0; size_t *req_size = &req1_size; if (req3_off != -1) { *req_off = req3_off; *req_size = req3_size; req_off = &req2_off; req_size = &req2_size; } if (req4_off != -1) { *req_off = req4_off; *req_size = req4_size; req_off = &req2_off; req_size = &req2_size; } if (req5_off != -1) { *req_off = req5_off; *req_size = req5_size; req_off = &req2_off; req_size = &req2_size; } if (req6_off != -1) { *req_off = req6_off; *req_size = req6_size; } if (req1_off != -1 && req2_off == -1) { auto chunk_off = static_cast(req1_off / m_cache_entry_size * m_cache_entry_size + m_cache_entry_size); auto req_end = static_cast(req1_off + req1_size); if (req_end > chunk_off) { req2_off = chunk_off; req2_size = req_end - chunk_off; req1_size = chunk_off - req1_off; } } size_t miss_bytes = req1_size + req2_size; if (miss_bytes == size) { m_miss_count.fetch_add(1, std::memory_order_relaxed); } else { m_partial_hit_count.fetch_add(1, std::memory_order_relaxed); } m_miss_bytes.fetch_add(miss_bytes, std::memory_order_relaxed); while (req1_off != -1) { std::unique_lock lk(m_mutex); m_cv.wait(lk, [&] { bool req1waitOnA = m_a.m_inprogress && CouldUseAligned(req1_off, m_a.m_off); bool req2waitOnA = m_a.m_inprogress && CouldUseAligned(req2_off, m_a.m_off); bool req1waitOnB = m_b.m_inprogress && CouldUseAligned(req1_off, m_b.m_off); bool req2waitOnB = m_b.m_inprogress && CouldUseAligned(req2_off, m_b.m_off); // If there's an idle cache entry, use it -- unless the other cache // entry is working on this request. if (!m_a.m_inprogress && !req1waitOnB && !req2waitOnB) { return true; } if (!m_b.m_inprogress && !req1waitOnA && !req2waitOnA) { return true; } // If an idle cache entry can immediately satisfy the request, we // use it. if (!m_a.m_inprogress && (CouldUseAligned(req1_off, m_a.m_off) || CouldUseAligned(req2_off, m_a.m_off))) { return true; } if (!m_b.m_inprogress && (CouldUseAligned(req1_off, m_b.m_off) || CouldUseAligned(req2_off, m_b.m_off))) { return true; } // If either request is in progress, we continue to wait. if (req1waitOnA || req1waitOnB || req2waitOnA || req2waitOnB) { return false; } // If either cache is idle, we will use it. return !m_a.m_inprogress || !m_b.m_inprogress; }); // std::cout << "A entry in progress: " << m_a.m_inprogress // << ", with offset " << m_a.m_off << "\n"; // std::cout << "B entry in progress: " << m_b.m_inprogress // << ", with offset " << m_b.m_off << "\n"; // Test to see if any of the buffers could immediately fulfill the // requests. auto consumed_req = false; if (!m_a.m_inprogress) { if (CouldUseAligned(req2_off, m_a.m_off)) { if (m_a.m_failed) { m_a.m_failed = false; m_a.m_off = -1; m_errors.fetch_add(1, std::memory_order_relaxed); return -1; } m_a.OverlapCopy(req2_off, req2_size, buffer + req2_off - offset, false); req2_off = -1; req2_size = 0; consumed_req = true; } if (CouldUseAligned(req1_off, m_a.m_off)) { if (m_a.m_failed) { m_a.m_failed = false; m_a.m_off = -1; m_errors.fetch_add(1, std::memory_order_relaxed); return -1; } m_a.OverlapCopy(req1_off, req1_size, buffer + req1_off - offset, false); req1_off = req2_off; req1_size = req2_size; req2_off = -1; req2_size = 0; consumed_req = true; } } if (!m_b.m_inprogress) { if (CouldUseAligned(req2_off, m_b.m_off)) { if (m_b.m_failed) { m_b.m_failed = false; m_b.m_off = -1; m_errors.fetch_add(1, std::memory_order_relaxed); return -1; } m_b.OverlapCopy(req2_off, req2_size, buffer + req2_off - offset, false); req2_off = -1; req2_size = 0; consumed_req = true; } if (CouldUseAligned(req1_off, m_b.m_off)) { if (m_b.m_failed) { m_b.m_failed = false; m_b.m_off = -1; m_errors.fetch_add(1, std::memory_order_relaxed); return -1; } m_b.OverlapCopy(req1_off, req1_size, buffer + req1_off - offset, false); req1_off = req2_off; req1_size = req2_size; req2_off = -1; req2_size = 0; consumed_req = true; } } if (consumed_req) { continue; } // No caches serve our requests - we must kick off a new download // std::cout << "Will download data via cache; req1 offset=" << req1_off // << ", req2 offset=" << req2_off << "\n"; bool download_a = false, download_b = false, prefetch_b = false; if (!m_a.m_inprogress && m_b.m_inprogress) { m_a.m_off = req1_off / m_cache_entry_size * m_cache_entry_size; m_a.m_inprogress = true; download_a = true; } else if (m_a.m_inprogress && !m_b.m_inprogress) { m_b.m_off = req1_off / m_cache_entry_size * m_cache_entry_size; m_b.m_inprogress = true; download_b = true; } else if (!m_a.m_inprogress && !m_b.m_inprogress) { if (req2_off != -1) { m_a.m_off = req1_off / m_cache_entry_size * m_cache_entry_size; m_a.m_inprogress = true; download_a = true; m_b.m_off = req2_off / m_cache_entry_size * m_cache_entry_size; m_b.m_inprogress = true; download_b = true; } else { if (m_a.m_used >= m_cache_entry_size) { // Cache A is fully read -- let's empty it m_a.m_off = m_b.m_off; m_b.m_off = -1; m_a.m_used = m_b.m_used; m_b.m_used = 0; std::swap(m_a.m_data, m_b.m_data); } if (m_a.m_used >= m_cache_entry_size) { // Both caches were fully read -- empty the second one. m_a.m_off = -1; m_a.m_used = 0; } if (m_a.m_off == -1 && m_b.m_off == -1) { // Prefetch both caches at once m_a.m_off = req1_off / static_cast(m_cache_entry_size) * static_cast(m_cache_entry_size); auto prefetch_offset = m_a.m_off + static_cast(m_cache_entry_size); ; download_a = true; m_a.m_inprogress = true; if (prefetch_offset < m_parent.content_length) { m_b.m_off = prefetch_offset; prefetch_b = true; m_b.m_inprogress = true; } } else { // Select one cache entry to fetch data. auto needed_off = req1_off / static_cast(m_cache_entry_size) * static_cast(m_cache_entry_size); if (needed_off > m_a.m_off) { m_b.m_off = needed_off; download_b = true; m_b.m_inprogress = true; auto bytes_unused = static_cast(m_cache_entry_size) - static_cast(m_b.m_used); m_unused_bytes.fetch_add( bytes_unused < 0 ? 0 : bytes_unused, std::memory_order_relaxed); } else { m_a.m_off = needed_off; download_a = true; m_a.m_inprogress = true; auto bytes_unused = static_cast(m_cache_entry_size) - static_cast(m_a.m_used); m_unused_bytes.fetch_add( bytes_unused < 0 ? 0 : bytes_unused, std::memory_order_relaxed); } } } } // else both caches are in-progress and neither satisfied our needs if (download_a) { size_t request_size = m_cache_entry_size; if (m_a.m_off + static_cast(request_size) > m_parent.content_length) { request_size = m_parent.content_length - m_a.m_off; } m_fetch_count.fetch_add(1, std::memory_order_relaxed); m_fetch_bytes.fetch_add(request_size, std::memory_order_relaxed); } if (download_b) { size_t request_size = m_cache_entry_size; if (m_b.m_off + static_cast(request_size) > m_parent.content_length) { request_size = m_parent.content_length - m_b.m_off; } m_fetch_count.fetch_add(1, std::memory_order_relaxed); m_fetch_bytes.fetch_add(request_size, std::memory_order_relaxed); } if (prefetch_b) { size_t request_size = m_cache_entry_size; if (m_b.m_off + static_cast(request_size) > m_parent.content_length) { request_size = m_parent.content_length - m_b.m_off; } m_prefetch_count.fetch_add(1, std::memory_order_relaxed); m_prefetch_bytes.fetch_add(request_size, std::memory_order_relaxed); } DownloadCaches(download_a, download_b || prefetch_b, true); } return size; } void S3File::S3Cache::Entry::Notify() { std::unique_lock lk(m_parent.m_mutex); m_inprogress = false; m_failed = !m_request->getErrorCode().empty(); auto duration = m_request->getElapsedTime(); m_parent.m_fetch_duration.fetch_add(duration.count(), std::memory_order_relaxed); if ((m_parent.m_parent.m_log.getMsgMask() & LogMask::Warning) && m_failed) { std::stringstream ss; auto duration_ms = std::chrono::duration_cast(duration) .count(); ss << "Finished GET for object=" << m_parent.m_parent.m_object << ", offset=" << m_off << ", size=" << m_data.size() << ", duration_ms=" << duration_ms << "; failed with error '" << m_request->getErrorCode() << "'"; m_parent.m_parent.m_log.Log(LogMask::Warning, "cache", ss.str().c_str()); } else if (m_parent.m_parent.m_log.getMsgMask() & LogMask::Debug) { std::stringstream ss; auto duration_ms = std::chrono::duration_cast(duration) .count(); ss << "Finished GET for object=" << m_parent.m_parent.m_object << ", offset=" << m_off << ", size=" << m_data.size() << ", duration_ms=" << duration_ms << "; succeeded"; m_parent.m_parent.m_log.Log(LogMask::Debug, "cache", ss.str().c_str()); } m_request = nullptr; m_parent.m_cv.notify_all(); } void S3File::S3Cache::Entry::Download(S3File &file) { m_used = false; size_t request_size = m_cache_entry_size; if (m_off + static_cast(request_size) > file.content_length) { request_size = file.content_length - m_off; } m_data.resize(request_size); m_request.reset(new AmazonS3NonblockingDownload( file.m_ai, file.m_object, file.m_log, m_data.data(), *this)); // This function is always called with m_mutex held; however, // SendRequest can block if the threads are all busy; the threads // will need to grab the lock to notify of completion. So, we // must release the lock here before calling a blocking function -- // otherwise deadlock may occur. auto off = m_off; m_parent.m_mutex.unlock(); if (file.m_log.getMsgMask() & LogMask::Debug) { std::stringstream ss; ss << "Issuing GET for object=" << file.m_object << ", offset=" << m_off << ", size=" << request_size; file.m_log.Log(LogMask::Debug, "cache", ss.str().c_str()); } if (!m_request->SendRequest(off, request_size)) { m_parent.m_mutex.lock(); std::stringstream ss; ss << "Failed to send GetObject command: " << m_request->getResponseCode() << "'" << m_request->getResultString() << "'"; file.m_log.Log(LogMask::Warning, "S3File::Read", ss.str().c_str()); m_failed = true; m_request.reset(); } else { m_parent.m_mutex.lock(); } } extern "C" { /* This function is called when we are wrapping something. */ XrdOss *XrdOssAddStorageSystem2(XrdOss *curr_oss, XrdSysLogger *Logger, const char *config_fn, const char *parms, XrdOucEnv *envP) { XrdSysError log(Logger, "s3_"); log.Emsg("Initialize", "S3 filesystem cannot be stacked with other filesystems"); return nullptr; } /* This function is called when it is the top level file system and we are not wrapping anything */ XrdOss *XrdOssGetStorageSystem2(XrdOss *native_oss, XrdSysLogger *Logger, const char *config_fn, const char *parms, XrdOucEnv *envP) { auto log = new XrdSysError(Logger, "s3_"); envP->Export("XRDXROOTD_NOPOSC", "1"); S3File::LaunchMonitorThread(*log, envP); try { AmazonRequest::Init(*log); g_s3_oss = new S3FileSystem(Logger, config_fn, envP); return g_s3_oss; } catch (std::runtime_error &re) { log->Emsg("Initialize", "Encountered a runtime failure", re.what()); return nullptr; } } XrdOss *XrdOssGetStorageSystem(XrdOss *native_oss, XrdSysLogger *Logger, const char *config_fn, const char *parms) { return XrdOssGetStorageSystem2(native_oss, Logger, config_fn, parms, nullptr); } } // end extern "C" XrdVERSIONINFO(XrdOssGetStorageSystem, s3); XrdVERSIONINFO(XrdOssGetStorageSystem2, s3); XrdVERSIONINFO(XrdOssAddStorageSystem2, s3); xrootd-s3-http-0.4.1/src/S3File.hh000066400000000000000000000301251501635342300165330ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #pragma once #include "S3FileSystem.hh" #include #include #include #include #include #include #include #include #include #include #include int parse_path(const S3FileSystem &fs, const char *path, std::string &exposedPath, std::string &object); class AmazonS3SendMultipartPart; template class AmazonS3NonblockingDownload; class XrdXrootdGStream; class S3File : public XrdOssDF { public: S3File(XrdSysError &log, S3FileSystem *oss); virtual ~S3File() {} int Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) override; int Fchmod(mode_t mode) override { return -ENOSYS; } void Flush() override {} int Fstat(struct stat *buf) override; int Fsync() override { return -ENOSYS; } int Fsync(XrdSfsAio *aiop) override { return -ENOSYS; } int Ftruncate(unsigned long long size) override { return -ENOSYS; } off_t getMmap(void **addr) override { return 0; } int isCompressed(char *cxidp = 0) override { return -ENOSYS; } ssize_t pgRead(void *buffer, off_t offset, size_t rdlen, uint32_t *csvec, uint64_t opts) override { return -ENOSYS; } int pgRead(XrdSfsAio *aioparm, uint64_t opts) override { return -ENOSYS; } ssize_t pgWrite(void *buffer, off_t offset, size_t wrlen, uint32_t *csvec, uint64_t opts) override { return -ENOSYS; } int pgWrite(XrdSfsAio *aioparm, uint64_t opts) override { return -ENOSYS; } ssize_t Read(off_t offset, size_t size) override { return -ENOSYS; } ssize_t Read(void *buffer, off_t offset, size_t size) override; int Read(XrdSfsAio *aiop) override { return -ENOSYS; } ssize_t ReadRaw(void *buffer, off_t offset, size_t size) override { return -ENOSYS; } ssize_t ReadV(XrdOucIOVec *readV, int rdvcnt) override; ssize_t Write(const void *buffer, off_t offset, size_t size) override; int Write(XrdSfsAio *aiop) override { return -ENOSYS; } ssize_t WriteV(XrdOucIOVec *writeV, int wrvcnt) override { return -ENOSYS; } int Close(long long *retsz = 0) override; size_t getContentLength() { return content_length; } time_t getLastModified() { return last_modified; } // Launch the global monitor thread associated with S3File objects. // Currently, the monitor thread is used to cleanup in-progress file // transfers that have been abandoned. static void LaunchMonitorThread(XrdSysError &log, XrdOucEnv *envP); // Sets the size of the cache entry; defaults to 2MB. static void SetCacheEntrySize(size_t size) { m_cache_entry_size = size; } private: // Periodic cleanup of in-progress transfers. // // Iterates through the global list of pending multipart uploads // that may be paused. For each, call `Tick` on the upload and // see if the transfer has aborted. static void Maintenance(XrdSysError &log); // Single cleanup run for in-progress transfers. static void CleanupTransfersOnce(); // Send out the statistics to the log or monitoring system. static void SendStatistics(XrdSysError &log); // Write data while in "streaming mode" where we don't know the // ultimate size of the file (and hence can't start streaming // partitions immediately). ssize_t WriteStreaming(const void *buffer, off_t offset, size_t size); // Send a fully-buffered part of the file; only used while in // "streaming" mode. ssize_t SendPartStreaming(); ssize_t ContinueSendPart(const void *buffer, size_t size); // Download data synchronously, bypassing the cache. // The download is only performed if the request size is larger than a cache // entry. // // - `offset`: File offset of the request. // - `size`: Size of the request. // - `buffer`: Buffer to place resulting data into. // - Returns the (offset, size) of any remaining read and `true` if a // download occured. std::tuple DownloadBypass(off_t offset, size_t size, char *buffer); XrdSysError &m_log; S3FileSystem *m_oss; std::string m_object; S3AccessInfo m_ai; off_t content_length{-1}; time_t last_modified{-1}; static const size_t m_s3_part_size = 100'000'000; // The size of each S3 chunk. // Size of the buffer associated with the cache static size_t m_cache_entry_size; bool m_is_open{false}; // File open state bool m_create{false}; int partNumber{1}; size_t m_part_written{ 0}; // Number of bytes written for the current upload chunk. size_t m_part_size{0}; // Size of the current upload chunk (0 if unknon); off_t m_write_offset{0}; // Offset of the file pointer for writes (helps // detect out-of-order writes). off_t m_object_size{ -1}; // Expected size of the completed object; -1 if unknown. std::string uploadId; // For creates, upload ID as assigned by t std::vector eTags; // When using the "streaming mode", the upload part has to be completely // buffered within the S3File object; this is the current buffer. std::string m_streaming_buffer; // The mutex protecting write activities. Writes must currently be // serialized as we aggregate them into large operations and upload them to // the S3 endpoint. The mutex prevents corruption of internal state. // // The periodic cleanup thread may decide to abort the in-progress transfer; // to do so, it'll need a reference to this lock that is independent of the // lifetime of the open file; hence, it's a shared pointer. std::shared_ptr m_write_lk; // The in-progress operation for a multi-part upload; its lifetime may be // spread across multiple write calls. std::shared_ptr m_write_op; // The in-progress operation for a multi-part upload. // The multipart uploads represent an in-progress request and the global // cleanup thread may decide to trigger a failure if the request does not // advance after some time period. // // To do so, we must be able to lock the associated write mutex and then // call `Tick` on the upload. To avoid prolonging the lifetime of the // objects beyond the S3File, we hold onto a reference via a weak pointer. // Mutable operations on this vector are protected by the `m_pending_lk`. static std::vector, std::weak_ptr>> m_pending_ops; // Mutex protecting the m_pending_ops variable. static std::mutex m_pending_lk; // Flag determining whether the monitoring thread has been launched. static std::once_flag m_monitor_launch; // The pointer to the "g-stream" monitoring object, if available. static XrdXrootdGStream *m_gstream; // The double-buffering component for the file handle. Reads are rounded up // to a particular size and kept in the file handle; before requesting new // data, the cache is searched to see if the read can be serviced from // memory. When possible, a forward prefetch is done struct S3Cache { struct Entry { bool m_failed{false}; // Indication as to whether last download // attempt failed for cache entry. bool m_inprogress{ false}; // Indication as to whether a download is in-progress. off_t m_off{-1}; // File offset of the beginning of the cache entry. // -1 signifies unused entry size_t m_used{ 0}; // The number of bytes read out of the current cache entry. std::vector m_data; // Contents of cache entry S3Cache &m_parent; // Reference to owning object std::unique_ptr> m_request; // In-progress request to fill entry. Entry(S3Cache &cache) : m_parent(cache) {} void Download( S3File &); // Trigger download request for this cache entry. void Notify(); // Notify containing cache that the entry's // in-progress operation has completed. // Copy any overlapping data from the cache buffer into the request // buffer, returning the remaining data necessary to fill the // request. // // - `req_off`: File offset of the beginning of the request buffer. // - `req_size`: Size of the request buffer // - `req_buf`: Request buffer to copy data into // - `is_hit`: If the request is a cache hit, then OverlapCopy will // increment the hit bytes counter. // - Returns the (offset, size) of the remaining reads needed to // satisfy the request. If there is only one (or no!) remaining // reads, then the corresponding tuple returned is (-1, 0). std::tuple OverlapCopy(off_t req_off, size_t req_size, char *req_buf, bool is_hit); }; friend class AmazonS3NonblockingDownload; static std::atomic m_hit_bytes; // Bytes served from the cache. static std::atomic m_miss_bytes; // Bytes that resulted in a cache miss. static std::atomic m_full_hit_count; // Requests completely served from the cache. static std::atomic m_partial_hit_count; // Requests partially served from the cache. static std::atomic m_miss_count; // Requests that had no data served from the cache. static std::atomic m_bypass_bytes; // Bytes for requests that were large enough they // bypassed the cache and fetched directly from S3. static std::atomic m_bypass_count; // Requests that were large enough they (at least // partially) bypassed the cache and fetched // directly from S3. static std::atomic m_fetch_bytes; // Bytes that were fetched from // S3 to serve a cache miss. static std::atomic m_fetch_count; // Requests sent to S3 to serve a cache miss. static std::atomic m_unused_bytes; // Bytes that were unused at cache eviction. static std::atomic m_prefetch_bytes; // Bytes prefetched static std::atomic m_prefetch_count; // Number of prefetch requests static std::atomic m_errors; // Count of errors encountered by cache. static std::atomic m_bypass_duration; // Duration of bypass requests. static std::atomic m_fetch_duration; // Duration of fetch requests. Entry m_a{*this}; // Cache entry A. Protected by m_mutex. Entry m_b{*this}; // Cache entry B. Protected by m_mutex. std::mutex m_mutex; // Mutex protecting the data in the S3Cache object std::condition_variable m_cv; // Condition variable for notifying that // new downloaded data is available. S3File &m_parent; // Reference to the S3File object that owns this cache. // Returns `true` if the request offset would be inside the cache entry. // The request offset is assumed to be aligned to be inside a single // cache entry (that is, smaller than a cache entry and not spanning two // entries). bool CouldUseAligned(off_t req, off_t cache); // Returns true if the specified request, [req_off, req_off + req_size), // has any bytes inside the cache entry starting at `cache_off`. bool CouldUse(off_t req_off, size_t req_size, off_t cache_off); // Trigger the non-blocking download into the cache entries. // The condition variable will be notified when one of the caches // finishes. void DownloadCaches(bool download_a, bool download_b, bool locked); // Trigger a blocking read from a given file ssize_t Read(char *buffer, off_t offset, size_t size); S3Cache(S3File &file) : m_parent(file) {} // Shutdown the cache; ensure all reads are completed before // deleting the objects. ~S3Cache(); }; S3Cache m_cache{*this}; }; xrootd-s3-http-0.4.1/src/S3FileSystem.cc000066400000000000000000000346141501635342300177350ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #include "S3FileSystem.hh" #include "S3AccessInfo.hh" #include "S3Directory.hh" #include "S3File.hh" #include "logging.hh" #include "shortfile.hh" #include "stl_string_utils.hh" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include bool S3FileSystem::m_dir_marker = true; std::string S3FileSystem::m_dir_marker_name = ".pelican_dir_marker"; S3FileSystem::S3FileSystem(XrdSysLogger *lp, const char *configfn, XrdOucEnv * /*envP*/) : m_log(lp, "s3_") { m_log.Say("------ Initializing the S3 filesystem plugin."); if (!Config(lp, configfn)) { throw std::runtime_error("Failed to configure S3 filesystem plugin."); } } S3FileSystem::~S3FileSystem() {} bool S3FileSystem::handle_required_config(const char *desired_name, const std::string &source) { if (source.empty()) { std::string error; formatstr(error, "%s must specify a value", desired_name); m_log.Emsg("Config", error.c_str()); return false; } return true; } bool S3FileSystem::Config(XrdSysLogger *lp, const char *configfn) { XrdOucEnv myEnv; XrdOucGatherConf s3server_conf("s3.", &m_log); int result; if ((result = s3server_conf.Gather(configfn, XrdOucGatherConf::full_lines)) < 0) { m_log.Emsg("Config", -result, "parsing config file", configfn); return false; } char *temporary; std::string value; std::string attribute; std::shared_ptr newAccessInfo(new S3AccessInfo()); std::string exposedPath; m_log.setMsgMask(0); while ((temporary = s3server_conf.GetLine())) { attribute = s3server_conf.GetToken(); if (attribute == "s3.trace") { if (!XrdHTTPServer::ConfigLog(s3server_conf, m_log)) { m_log.Emsg("Config", "Failed to configure the log level"); } continue; } else if (attribute == "s3.cache_entry_size") { size_t size; auto value = s3server_conf.GetToken(); if (!value) { m_log.Emsg("Config", "s3.cache_entry_size must be specified"); return false; } std::string_view value_sv(value); auto result = std::from_chars( value_sv.data(), value_sv.data() + value_sv.size(), size); if (result.ec != std::errc()) { m_log.Emsg("Config", "s3.cache_entry_size must be a number"); return false; } else if (result.ptr != value_sv.data() + value_sv.size()) { m_log.Emsg("Config", "s3.cache_entry_size contains trailing characters"); return false; } S3File::SetCacheEntrySize(size); continue; } temporary = s3server_conf.GetToken(); if (attribute == "s3.end") { m_s3_access_map[exposedPath] = newAccessInfo; if (newAccessInfo->getS3ServiceName().empty()) { m_log.Emsg("Config", "s3.service_name not specified"); return false; } if (newAccessInfo->getS3Region().empty()) { m_log.Emsg("Config", "s3.region not specified"); return false; } std::string contents; if (newAccessInfo->getS3AccessKeyFile() != "") { if (!readShortFile(newAccessInfo->getS3AccessKeyFile(), contents)) { m_log.Emsg("Config", "s3.access_key_file not readable"); return false; } } if (newAccessInfo->getS3SecretKeyFile() != "") { if (!readShortFile(newAccessInfo->getS3SecretKeyFile(), contents)) { m_log.Emsg("Config", "s3.secret_key_file not readable"); return false; } } newAccessInfo.reset(new S3AccessInfo()); exposedPath = ""; continue; } if (!temporary) { continue; } value = temporary; if (!handle_required_config("s3.path_name", value)) { return false; } if (!handle_required_config("s3.bucket_name", value)) { return false; } if (!handle_required_config("s3.service_name", value)) { return false; } if (!handle_required_config("s3.region", value)) { return false; } if (!handle_required_config("s3.service_url", value)) { return false; } if (!handle_required_config("s3.access_key_file", value)) { return false; } if (!handle_required_config("s3.secret_key_file", value)) { return false; } if (!handle_required_config("s3.url_style", value)) { return false; } if (attribute == "s3.path_name") { // Normalize paths so that they all start with / if (value[0] != '/') { exposedPath = "/" + value; } else { exposedPath = value; } } else if (attribute == "s3.bucket_name") newAccessInfo->setS3BucketName(value); else if (attribute == "s3.service_name") newAccessInfo->setS3ServiceName(value); else if (attribute == "s3.region") newAccessInfo->setS3Region(value); else if (attribute == "s3.access_key_file") newAccessInfo->setS3AccessKeyFile(value); else if (attribute == "s3.secret_key_file") newAccessInfo->setS3SecretKeyFile(value); else if (attribute == "s3.service_url") newAccessInfo->setS3ServiceUrl(value); else if (attribute == "s3.url_style") { s3_url_style = value; newAccessInfo->setS3UrlStyle(s3_url_style); } } if (s3_url_style.empty()) { m_log.Emsg("Config", "s3.url_style not specified"); return false; } else { // We want this to be case-insensitive. toLower(s3_url_style); } if (s3_url_style != "virtual" && s3_url_style != "path") { m_log.Emsg( "Config", "invalid s3.url_style specified. Must be 'virtual' or 'path'"); return false; } return true; } // Object Allocation Functions // XrdOssDF *S3FileSystem::newDir(const char *user) { return new S3Directory(m_log, *this); } XrdOssDF *S3FileSystem::newFile(const char *user) { return new S3File(m_log, this); } // // Stat a path within the S3 bucket as if it were a hierarchical // path. // // Note that S3 is _not_ a hierarchy and may contain objects that // can't be represented inside XRootD. In that case, we just return // an -ENOENT. // // For example, consider a setup with two objects: // // - /foo/bar.txt // - /foo // // In this case, `Stat` of `/foo` will return a file so walking the // bucket will miss `/foo/bar.txt` // // We will also return an ENOENT for objects with a trailing `/`. So, // if there's a single object in the bucket: // // - /foo/bar.txt/ // // then a `Stat` of `/foo/bar.txt` and `/foo/bar.txt/` will both return // `-ENOENT`. int S3FileSystem::Stat(const char *path, struct stat *buff, int opts, XrdOucEnv *env) { m_log.Log(XrdHTTPServer::Debug, "Stat", "Stat'ing path", path); std::string exposedPath, object; auto rv = parsePath(path, exposedPath, object); if (rv != 0) { m_log.Log(XrdHTTPServer::Debug, "Stat", "Failed to parse path:", path); return rv; } auto ai = getS3AccessInfo(exposedPath, object); if (!ai) { m_log.Log(XrdHTTPServer::Info, "Stat", "Prefix not configured for Stat"); return -ENOENT; } if (ai->getS3BucketName().empty()) { return -EINVAL; } trimslashes(object); if (object == "") { if (m_dir_marker) { // We even do the `Stat` for `/` despite the fact we always // return the same directory object. This way, we test for // permission denied or other errors with the S3 instance. object = m_dir_marker_name; } else { if (buff) { memset(buff, '\0', sizeof(struct stat)); buff->st_mode = 0700 | S_IFDIR; buff->st_nlink = 0; buff->st_uid = 1; buff->st_gid = 1; buff->st_size = 4096; buff->st_mtime = buff->st_atime = buff->st_ctime = 0; buff->st_dev = 0; buff->st_ino = 1; } return 0; } } // First, check to see if the file name is an object. If it's // a 404 response, then we will assume it may be a directory. AmazonS3Head headCommand = AmazonS3Head(*ai, object, m_log); auto res = headCommand.SendRequest(); if (res) { if (buff) { memset(buff, '\0', sizeof(struct stat)); if (object == m_dir_marker_name) { buff->st_mode = 0700 | S_IFDIR; buff->st_size = 4096; buff->st_nlink = 0; } else { buff->st_mode = 0600 | S_IFREG; buff->st_size = headCommand.getSize(); buff->st_nlink = 1; } buff->st_uid = buff->st_gid = 1; buff->st_mtime = buff->st_atime = buff->st_ctime = 0; buff->st_dev = 0; buff->st_ino = 1; } return 0; } else { auto httpCode = headCommand.getResponseCode(); if (httpCode == 0) { if (m_log.getMsgMask() & XrdHTTPServer::Info) { std::stringstream ss; ss << "Failed to stat path " << path << "; error: " << headCommand.getErrorMessage() << " (code=" << headCommand.getErrorCode() << ")"; m_log.Log(XrdHTTPServer::Info, "Stat", ss.str().c_str()); } return -EIO; } if (httpCode == 404) { if (object == m_dir_marker_name) { if (buff) { memset(buff, '\0', sizeof(struct stat)); buff->st_mode = 0700 | S_IFDIR; buff->st_nlink = 0; buff->st_uid = 1; buff->st_gid = 1; buff->st_size = 4096; buff->st_mtime = buff->st_atime = buff->st_ctime = 0; buff->st_dev = 0; buff->st_ino = 1; } return 0; } object = object + "/"; } else { if (m_log.getMsgMask() & XrdHTTPServer::Info) { std::stringstream ss; ss << "Failed to stat path " << path << "; response code " << httpCode; m_log.Log(XrdHTTPServer::Info, "Stat", ss.str().c_str()); } return httpCode == 403 ? -EACCES : -EIO; } } // List the object name as a pseudo-directory. Limit the results // back to a single item (we're just looking to see if there's a // common prefix here). AmazonS3List listCommand(*ai, object, 1, m_log); res = listCommand.SendRequest(""); if (!res) { auto httpCode = listCommand.getResponseCode(); if (httpCode == 0) { if (m_log.getMsgMask() & XrdHTTPServer::Info) { std::stringstream ss; ss << "Failed to stat path " << path << "; error: " << listCommand.getErrorMessage() << " (code=" << listCommand.getErrorCode() << ")"; m_log.Log(XrdHTTPServer::Info, "Stat", ss.str().c_str()); } return -EIO; } else { if (m_log.getMsgMask() & XrdHTTPServer::Info) { std::stringstream ss; ss << "Failed to stat path " << path << "; response code " << httpCode; m_log.Log(XrdHTTPServer::Info, "Stat", ss.str().c_str()); } switch (httpCode) { case 404: return -ENOENT; case 403: return -EPERM; default: return -EIO; } } } std::string errMsg; std::vector objInfo; std::vector commonPrefixes; std::string ct; res = listCommand.Results(objInfo, commonPrefixes, ct, errMsg); if (!res) { m_log.Log(XrdHTTPServer::Warning, "Stat", "Failed to parse S3 results:", errMsg.c_str()); return -EIO; } if (m_log.getMsgMask() & XrdHTTPServer::Debug) { std::stringstream ss; ss << "Stat on object returned " << objInfo.size() << " objects and " << commonPrefixes.size() << " prefixes"; m_log.Log(XrdHTTPServer::Debug, "Stat", ss.str().c_str()); } // Recall we queried for 'object name' + '/'; as in, 'foo/' // instead of 'foo'. // If there's an object name with a trailing '/', then we // aren't able to open it or otherwise represent it within // XRootD. Hence, we just pretend it doesn't exist. bool foundObj = false; for (const auto &obj : objInfo) { if (obj.m_key == object) { foundObj = true; break; } } if (foundObj) { return -ENOENT; } if (!objInfo.size() && !commonPrefixes.size()) { return -ENOENT; } if (buff) { memset(buff, '\0', sizeof(struct stat)); buff->st_mode = 0700 | S_IFDIR; buff->st_nlink = 0; buff->st_uid = 1; buff->st_gid = 1; buff->st_size = 4096; buff->st_mtime = buff->st_atime = buff->st_ctime = 0; buff->st_dev = 0; buff->st_ino = 1; } return 0; } int S3FileSystem::Create(const char *tid, const char *path, mode_t mode, XrdOucEnv &env, int opts) { // Is path valid? std::string exposedPath, object; int rv = parsePath(path, exposedPath, object); if (rv != 0) { return rv; } // // We could instead invoke the upload mchinery directly to create a // 0-byte file, but it seems smarter to remove a round-trip (in // S3File::Open(), checking if the file exists) than to add one // (here, creating the file if it doesn't exist). // return 0; } int S3FileSystem::parsePath(const char *fullPath, std::string &exposedPath, std::string &object) const { // // Check the path for validity. // std::filesystem::path p(fullPath); auto pathComponents = p.begin(); // Iterate through components of the fullPath until we either find a match // or we've reached the end of the path. std::filesystem::path currentPath = *pathComponents; while (pathComponents != p.end()) { if (exposedPathExists(currentPath.string())) { exposedPath = currentPath.string(); break; } ++pathComponents; if (pathComponents != p.end()) { currentPath /= *pathComponents; } else { return -ENOENT; } } // Objects names may contain path separators. ++pathComponents; if (pathComponents == p.end()) { object = ""; return 0; } std::filesystem::path objectPath = *pathComponents++; for (; pathComponents != p.end(); ++pathComponents) { objectPath /= (*pathComponents); } object = objectPath.string(); return 0; } const std::shared_ptr S3FileSystem::getS3AccessInfo(const std::string &exposedPath, std::string &object) const { auto ai = m_s3_access_map.at(exposedPath); if (!ai) { return ai; } if (ai->getS3BucketName().empty()) { // Bucket name is embedded in the "object" name. Split it into the // bucket and "real" object. std::shared_ptr aiCopy(new S3AccessInfo(*ai)); auto firstSlashIdx = object.find('/'); if (firstSlashIdx == std::string::npos) { aiCopy->setS3BucketName(object); object = ""; } else { aiCopy->setS3BucketName(object.substr(0, firstSlashIdx)); object = object.substr(firstSlashIdx + 1); } return aiCopy; } return ai; } xrootd-s3-http-0.4.1/src/S3FileSystem.hh000066400000000000000000000127241501635342300177450ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #pragma once #include "S3AccessInfo.hh" #include #include #include #include #include #include #include class S3FileSystem : public XrdOss { public: S3FileSystem(XrdSysLogger *lp, const char *configfn, XrdOucEnv *envP); virtual ~S3FileSystem(); bool Config(XrdSysLogger *lp, const char *configfn); XrdOssDF *newDir(const char *user = 0); XrdOssDF *newFile(const char *user = 0); int Chmod(const char *path, mode_t mode, XrdOucEnv *env = 0) { return -ENOSYS; } void Connect(XrdOucEnv &env) {} int Create(const char *tid, const char *path, mode_t mode, XrdOucEnv &env, int opts = 0); void Disc(XrdOucEnv &env) {} void EnvInfo(XrdOucEnv *env) {} uint64_t Features() { return 0; } int FSctl(int cmd, int alen, const char *args, char **resp = 0) { return -ENOSYS; } int Init(XrdSysLogger *lp, const char *cfn) { return 0; } int Init(XrdSysLogger *lp, const char *cfn, XrdOucEnv *en) { return 0; } int Mkdir(const char *path, mode_t mode, int mkpath = 0, XrdOucEnv *env = 0) { return -ENOSYS; } int Reloc(const char *tident, const char *path, const char *cgName, const char *anchor = 0) { return -ENOSYS; } int Remdir(const char *path, int Opts = 0, XrdOucEnv *env = 0) { return -ENOSYS; } int Rename(const char *oPath, const char *nPath, XrdOucEnv *oEnvP = 0, XrdOucEnv *nEnvP = 0) { return -ENOSYS; } int Stat(const char *path, struct stat *buff, int opts = 0, XrdOucEnv *env = 0); int Stats(char *buff, int blen) { return -ENOSYS; } int StatFS(const char *path, char *buff, int &blen, XrdOucEnv *env = 0) { return -ENOSYS; } int StatLS(XrdOucEnv &env, const char *path, char *buff, int &blen) { return -ENOSYS; } int StatPF(const char *path, struct stat *buff, int opts) { return -ENOSYS; } int StatPF(const char *path, struct stat *buff) { return -ENOSYS; } int StatVS(XrdOssVSInfo *vsP, const char *sname = 0, int updt = 0) { return -ENOSYS; } int StatXA(const char *path, char *buff, int &blen, XrdOucEnv *env = 0) { return -ENOSYS; } int StatXP(const char *path, unsigned long long &attr, XrdOucEnv *env = 0) { return -ENOSYS; } int Truncate(const char *path, unsigned long long fsize, XrdOucEnv *env = 0) { return -ENOSYS; } int Unlink(const char *path, int Opts = 0, XrdOucEnv *env = 0) { return -ENOSYS; } int Lfn2Pfn(const char *Path, char *buff, int blen) { return -ENOSYS; } const char *Lfn2Pfn(const char *Path, char *buff, int blen, int &rc) { return nullptr; } // Given a path as seen by XRootD, split it into the configured prefix and // the object within the prefix. // // The returned `exposedPath` can be later used with the `get*` functions to // fetch the required S3 configuration. int parsePath(const char *fullPath, std::string &exposedPath, std::string &object) const; bool exposedPathExists(const std::string &exposedPath) const { return m_s3_access_map.count(exposedPath) > 0; } const std::string &getS3ServiceName(const std::string &exposedPath) const { return m_s3_access_map.at(exposedPath)->getS3ServiceName(); } const std::string &getS3Region(const std::string &exposedPath) const { return m_s3_access_map.at(exposedPath)->getS3Region(); } const std::string &getS3ServiceURL(const std::string &exposedPath) const { return m_s3_access_map.at(exposedPath)->getS3ServiceUrl(); } const std::string &getS3BucketName(const std::string &exposedPath) const { return m_s3_access_map.at(exposedPath)->getS3BucketName(); } const std::string & getS3AccessKeyFile(const std::string &exposedPath) const { return m_s3_access_map.at(exposedPath)->getS3AccessKeyFile(); } const std::string & getS3SecretKeyFile(const std::string &exposedPath) const { return m_s3_access_map.at(exposedPath)->getS3SecretKeyFile(); } const std::string &getS3URLStyle() const { return s3_url_style; } const std::shared_ptr getS3AccessInfo(const std::string &exposedPath, std::string &object) const; private: XrdSysError m_log; // The filesystem logic can test for an empty object to see if there's // authorized access to the bucket. This relies on said object not // existing -- a reasonable assumption but not foolproof. Hence, we have // a boolean (currently not configurable) to disable the behavior. // Note: in the future, if we want to create an "empty" directory, we could // just create an empty object. static bool m_dir_marker; // The name of the empty object for directory existence. static std::string m_dir_marker_name; bool handle_required_config(const char *desired_name, const std::string &source); std::map> m_s3_access_map; std::string s3_url_style; }; xrootd-s3-http-0.4.1/src/TokenFile.cc000066400000000000000000000047401501635342300173200ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #include "TokenFile.hh" #include "logging.hh" #include "shortfile.hh" #include "stl_string_utils.hh" #include using namespace std::chrono_literals; const std::chrono::steady_clock::duration TokenFile::m_token_expiry = 5s; // Retrieve the bearer token to use with HTTP requests // // Returns true on success and sets `token` to the value of // the bearer token to use. If there were no errors - but no // token is to be used - token is set to the empty string. // Otherwise, returns false. bool TokenFile::Get(std::string &token) const { if (m_token_file.empty()) { token.clear(); return true; } XrdSysRWLockHelper lock(m_token_mutex.get(), true); if (m_token_load_success) { auto now = std::chrono::steady_clock::now(); if (now - m_last_token_load <= m_token_expiry) { token = m_token_contents; return true; } } lock.UnLock(); // Upgrade to write lock - we will mutate the data structures. lock.Lock(m_token_mutex.get(), false); std::string contents; if (!readShortFile(m_token_file, contents)) { if (m_log) { m_log->Log( XrdHTTPServer::LogMask::Warning, "getAuthToken", "Failed to read token authorization file:", strerror(errno)); } m_token_load_success = false; return false; } std::istringstream istream; istream.str(contents); m_last_token_load = std::chrono::steady_clock::now(); m_token_load_success = true; for (std::string line; std::getline(istream, line);) { trim(line); if (line.empty()) { continue; } if (line[0] == '#') { continue; } m_token_contents = line; token = m_token_contents; return true; } // If there are no error reading the file but the file has no tokens, we // assume this indicates no token should be used. token = ""; return true; } xrootd-s3-http-0.4.1/src/TokenFile.hh000066400000000000000000000035261501635342300173330ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #pragma once #include #include #include #include #include // A class representing a bearer token found from a file on disk class TokenFile { public: TokenFile(std::string filename, XrdSysError *log) : m_log(log), m_token_file(filename), m_token_mutex(new XrdSysRWLock()) {} TokenFile(const TokenFile &) = delete; TokenFile(TokenFile &&other) noexcept = default; TokenFile &operator=(TokenFile &&other) noexcept = default; bool Get(std::string &) const; private: mutable bool m_token_load_success{false}; XrdSysError *m_log; std::string m_token_file; // Location of a file containing a bearer token // for auth'z. mutable std::string m_token_contents; // Cached copy of the token itself. mutable std::chrono::steady_clock::time_point m_last_token_load; // Last time the token was loaded from disk. static const std::chrono::steady_clock::duration m_token_expiry; mutable std::unique_ptr m_token_mutex; // Note: when we move to C++17, convert to // std::shared_mutex }; xrootd-s3-http-0.4.1/src/logging.cc000066400000000000000000000055761501635342300170760ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #include "logging.hh" #include #include #include using namespace XrdHTTPServer; std::string XrdHTTPServer::LogMaskToString(int mask) { if (mask == LogMask::All) { return "all"; } bool has_entry = false; std::stringstream ss; if (mask & LogMask::Dump) { ss << "dump"; has_entry = true; } if (mask & LogMask::Debug) { ss << (has_entry ? ", " : "") << "debug"; has_entry = true; } if (mask & LogMask::Info) { ss << (has_entry ? ", " : "") << "info"; has_entry = true; } if (mask & LogMask::Warning) { ss << (has_entry ? ", " : "") << "warning"; has_entry = true; } if (mask & LogMask::Error) { ss << (has_entry ? ", " : "") << "error"; has_entry = true; } return ss.str(); } bool XrdHTTPServer::ConfigLog(XrdOucGatherConf &conf, XrdSysError &log) { std::string map_filename; char *val = nullptr; if (!(val = conf.GetToken())) { log.Emsg("Config", "httpserver.trace requires an argument. Usage: " "httpserver.trace [all|error|warning|info|debug|none]"); return false; } do { if (!strcmp(val, "all")) { log.setMsgMask(log.getMsgMask() | LogMask::All); } else if (!strcmp(val, "error")) { log.setMsgMask(log.getMsgMask() | LogMask::Error); } else if (!strcmp(val, "warning")) { log.setMsgMask(log.getMsgMask() | LogMask::Warning | LogMask::Error); } else if (!strcmp(val, "info")) { log.setMsgMask(log.getMsgMask() | LogMask::Info | LogMask::Warning | LogMask::Error); } else if (!strcmp(val, "dump")) { log.setMsgMask(log.getMsgMask() | LogMask::Dump | LogMask::Debug | LogMask::Info | LogMask::Warning | LogMask::Error); } else if (!strcmp(val, "debug")) { log.setMsgMask(log.getMsgMask() | LogMask::Debug | LogMask::Info | LogMask::Warning | LogMask::Error); } else if (!strcmp(val, "none")) { log.setMsgMask(0); } else { log.Emsg("Config", "trace encountered an unknown directive:", val); return false; } } while ((val = conf.GetToken())); log.Emsg("Config", "Logging levels enabled -", XrdHTTPServer::LogMaskToString(log.getMsgMask()).c_str()); return true; } xrootd-s3-http-0.4.1/src/logging.hh000066400000000000000000000025111501635342300170720ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #pragma once #include class XrdOucGatherConf; class XrdSysError; namespace XrdHTTPServer { enum LogMask { Debug = 0x01, Info = 0x02, Warning = 0x04, Error = 0x08, All = 0x0f, Dump = 0x10 }; // Given a bitset based on LogMask, return a human-readable string of the set // logging levels. std::string LogMaskToString(int mask); // Given an xrootd configuration object that matched on httpserver.trace, parse // the remainder of the line and configure the logger appropriately. bool ConfigLog(XrdOucGatherConf &conf, XrdSysError &log); } // namespace XrdHTTPServer xrootd-s3-http-0.4.1/src/shortfile.cc000066400000000000000000000053121501635342300174330ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2023, HTCondor Team, UW-Madison * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #include "shortfile.hh" #include #include #include #include #include #include ssize_t full_read(int fd, void *ptr, size_t nbytes) { ssize_t nleft, nread; nleft = nbytes; while (nleft > 0) { REISSUE_READ: nread = read(fd, ptr, nleft); if (nread < 0) { /* error happened, ignore if EINTR, otherwise inform the caller */ if (errno == EINTR) { goto REISSUE_READ; } /* The caller has no idea how much was actually read in this scenario and the file offset is undefined */ return -1; } else if (nread == 0) { /* We've reached the end of file marker, so stop looping. */ break; } nleft -= nread; ptr = ((char *)ptr) + nread; } /* return how much was actually read, which could include 0 in an EOF situation */ return (nbytes - nleft); } bool readShortFile(const std::string &fileName, std::string &contents) { int fd = open(fileName.c_str(), O_RDONLY, 0600); if (fd < 0) { return false; } struct stat statbuf; int rv = fstat(fd, &statbuf); if (rv < 0) { return false; } unsigned long fileSize = statbuf.st_size; char *rawBuffer = (char *)malloc(fileSize + 1); assert(rawBuffer != NULL); unsigned long totalRead = full_read(fd, rawBuffer, fileSize); close(fd); if (totalRead != fileSize) { free(rawBuffer); return false; } contents.assign(rawBuffer, fileSize); free(rawBuffer); return true; } bool writeShortFile(const std::string &fileName, std::string &contents, int flags) { int fd = open(fileName.c_str(), O_WRONLY | flags, 0600); if (fd < 0) { return false; } auto ptr = &contents[0]; ssize_t nwrite; auto nleft = contents.size(); while (nleft > 0) { REISSUE_WRITE: nwrite = write(fd, ptr, nleft); if (nwrite < 0) { /* error happened, ignore if EINTR, otherwise inform the caller */ if (errno == EINTR) { goto REISSUE_WRITE; } close(fd); return false; } nleft -= nwrite; ptr += nwrite; } close(fd); return true; } xrootd-s3-http-0.4.1/src/shortfile.hh000066400000000000000000000016541501635342300174520ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2023, HTCondor Team, UW-Madison * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #pragma once #include bool readShortFile(const std::string &fileName, std::string &contents); bool writeShortFile(const std::string &fileName, std::string &contents, int flags); xrootd-s3-http-0.4.1/src/stl_string_utils.cc000066400000000000000000000106131501635342300210440ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, HTCondor Team, UW-Madison * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #include "stl_string_utils.hh" #include #include #include #include std::string substring(const std::string &str, size_t left, size_t right) { if (right == std::string::npos) { return str.substr(left); } else { return str.substr(left, right - left); } } void trim(std::string &str) { if (str.empty()) { return; } unsigned begin = 0; while (begin < str.length() && isspace(str[begin])) { ++begin; } int end = (int)str.length() - 1; while (end >= 0 && isspace(str[end])) { --end; } if (begin != 0 || end != (int)(str.length()) - 1) { str = str.substr(begin, (end - begin) + 1); } } void toLower(std::string &str) { std::transform(str.begin(), str.end(), str.begin(), ::tolower); } int vformatstr_impl(std::string &s, bool concat, const char *format, va_list pargs) { char fixbuf[512]; const int fixlen = sizeof(fixbuf) / sizeof(fixbuf[0]); int n; #if !defined(va_copy) n = vsnprintf(fixbuf, fixlen, format, pargs); #else va_list args; va_copy(args, pargs); n = vsnprintf(fixbuf, fixlen, format, args); va_end(args); #endif // In this case, fixed buffer was sufficient so we're done. // Return number of chars written. if (n < fixlen) { if (concat) { s.append(fixbuf, n); } else { s.assign(fixbuf, n); } return n; } // Otherwise, the fixed buffer was not large enough, but return from // vsnprintf() tells us how much memory we need now. n += 1; char *varbuf = NULL; // Handle 'new' behavior mode of returning NULL or throwing exception try { varbuf = new char[n]; } catch (...) { varbuf = NULL; } // if (NULL == varbuf) { EXCEPT("Failed to allocate char buffer of %d // chars", n); } assert(NULL == varbuf); // re-print, using buffer of sufficient size #if !defined(va_copy) int nn = vsnprintf(varbuf, n, format, pargs); #else va_copy(args, pargs); int nn = vsnprintf(varbuf, n, format, args); va_end(args); #endif // Sanity check. This ought not to happen. Ever. // if (nn >= n) EXCEPT("Insufficient buffer size (%d) for printing %d // chars", n, nn); assert(nn >= n); // safe to do string assignment if (concat) { s.append(varbuf, nn); } else { s.assign(varbuf, nn); } // clean up our allocated buffer delete[] varbuf; // return number of chars written return nn; } int vformatstr(std::string &s, const char *format, va_list pargs) { return vformatstr_impl(s, false, format, pargs); } int vformatstr_cat(std::string &s, const char *format, va_list pargs) { return vformatstr_impl(s, true, format, pargs); } int formatstr(std::string &s, const char *format, ...) { va_list args; va_start(args, format); int r = vformatstr_impl(s, false, format, args); va_end(args); return r; } int formatstr_cat(std::string &s, const char *format, ...) { va_list args; va_start(args, format); int r = vformatstr_impl(s, true, format, args); va_end(args); return r; } std::string urlquote(const std::string input) { std::string output; output.reserve(3 * input.size()); for (char val : input) { if ((val >= 48 && val <= 57) || // Digits 0-9 (val >= 65 && val <= 90) || // Uppercase A-Z (val >= 97 && val <= 122) || // Lowercase a-z (val == 95 || val == 46 || val == 45 || val == 126 || val == 47)) // '_.-~/' { output += val; } else { output += "%" + std::to_string(val); } } return output; } void trimslashes(std::string &path) { if (path.empty()) { return; } size_t begin = 0; while (begin < path.length() && (path[begin] == '/')) { ++begin; } auto end = path.length() - 1; while (end >= 0 && end >= begin && (path[end] == '/')) { --end; } if (begin != 0 || end != (path.length()) - 1) { path = path.substr(begin, (end - begin) + 1); } } xrootd-s3-http-0.4.1/src/stl_string_utils.hh000066400000000000000000000033771501635342300210670ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, HTCondor Team, UW-Madison * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #pragma once #include #ifndef CHECK_PRINTF_FORMAT #ifdef __GNUC__ #define CHECK_PRINTF_FORMAT(a, b) __attribute__((__format__(__printf__, a, b))) #else #define CHECK_PRINTF_FORMAT(a, b) #endif #endif void trim(std::string &str); std::string substring(const std::string &str, size_t left, size_t right = std::string::npos); void toLower(std::string &str); int formatstr(std::string &s, const char *format, ...) CHECK_PRINTF_FORMAT(2, 3); int formatstr_cat(std::string &s, const char *format, ...) CHECK_PRINTF_FORMAT(2, 3); // Given an input string, quote it to a form that is safe // for embedding in a URL query parameter. // // Letters, digits, and the characters '_.-~/' are never // quoted; otherwise, the byte is represented with its percent-encoded // ASCII representation (e.g., ' ' becomes %20) std::string urlquote(const std::string input); // Trim the slash(es) from a given object name // // foo/bar/ -> foo/bar // bar/baz -> bar/baz // foo/bar/// -> foo/bar // /a/b -> a/b void trimslashes(std::string &path); xrootd-s3-http-0.4.1/test/000077500000000000000000000000001501635342300153145ustar00rootroot00000000000000xrootd-s3-http-0.4.1/test/CMakeLists.txt000066400000000000000000000127651501635342300200670ustar00rootroot00000000000000set(EXECUTABLES "minio" "mc") foreach(EXE IN LISTS EXECUTABLES) find_program(EXE_BIN ${EXE} HINTS "${CMAKE_CURRENT_BINARY_DIR}") if(NOT EXE_BIN) message("Didnt find ${EXE}. Downloading it ...") if (APPLE) set(SYS_NAME "darwin") else() set(SYS_NAME "linux") endif() if(CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") set(SYS_PROC "arm64") else() set(SYS_PROC "amd64") endif() if (EXE STREQUAL "minio") set(DIR "server") else() set(DIR "client") endif() set(URL "https://dl.min.io/${DIR}/${EXE}/release/${SYS_NAME}-${SYS_PROC}/${EXE}") set(FILE "${CMAKE_CURRENT_BINARY_DIR}/${EXE}") message("Downloading ${URL} to ${FILE}") file(DOWNLOAD "${URL}" "${FILE}" STATUS download_status) list(GET download_status 0 RESULT_CODE) if (NOT RESULT_CODE EQUAL 0) message(FATAL_ERROR "Failed to download ${EXE}: ${RESULT_CODE}") endif() file(CHMOD "${FILE}" PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) endif() endforeach() find_program(MINIO_BIN minio HINTS "${CMAKE_CURRENT_BINARY_DIR}") find_program(MC_BIN mc HINTS "${CMAKE_CURRENT_BINARY_DIR}") include(GoogleTest) add_executable( s3-gtest s3_tests.cc ) add_executable( s3-unit-test s3_unit_tests.cc ) add_executable( http-gtest http_tests.cc ) add_executable( filter-gtest filter_tests.cc ../src/shortfile.cc ) target_link_libraries(s3-gtest XrdS3Testing GTest::gtest_main Threads::Threads) target_link_libraries(s3-unit-test XrdS3Testing GTest::gtest_main Threads::Threads) target_link_libraries(http-gtest XrdHTTPServerTesting GTest::gtest_main Threads::Threads) target_link_libraries(filter-gtest XrdOssFilterTesting GTest::gtest_main Threads::Threads) gtest_add_tests(TARGET filter-gtest) gtest_add_tests(TARGET s3-unit-test TEST_LIST s3UnitTests) set_tests_properties(${s3UnitTests} PROPERTIES FIXTURES_REQUIRED S3::s3_basic ENVIRONMENT "ENV_FILE=${CMAKE_BINARY_DIR}/tests/s3_basic/setup.sh" ) add_test( NAME s3-unit COMMAND ${CMAKE_CURRENT_BINARY_DIR}/s3-gtest ) add_test( NAME http-unit COMMAND ${CMAKE_CURRENT_BINARY_DIR}/http-gtest "${CMAKE_BINARY_DIR}/tests/basic/setup.sh" ) if (VALGRIND) add_test( NAME valgrind-s3 COMMAND ${VALGRIND_BIN} ${CMAKE_CURRENT_BINARY_DIR}/s3-unit-test -R FileSystemS3Fixture.UploadLargePartAligned ) set_tests_properties(valgrind-s3 PROPERTIES FIXTURES_REQUIRED S3::s3_basic ) endif() set_tests_properties(http-unit PROPERTIES FIXTURES_REQUIRED HTTP::basic ) ###################################### # Integration tests. ###################################### add_test(NAME HTTP::basic::setup COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/xrdhttp-setup.sh" basic) set_tests_properties(HTTP::basic::setup PROPERTIES FIXTURES_SETUP HTTP::basic ENVIRONMENT "BINARY_DIR=${CMAKE_BINARY_DIR};SOURCE_DIR=${CMAKE_SOURCE_DIR};XROOTD_BINDIR=${XRootD_DATA_DIR}/../bin" ) add_test(NAME HTTP::basic::teardown COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/xrdhttp-teardown.sh" basic) set_tests_properties(HTTP::basic::teardown PROPERTIES FIXTURES_CLEANUP HTTP::basic ENVIRONMENT "BINARY_DIR=${CMAKE_BINARY_DIR}" ) add_test(NAME HTTP::basic::test COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/xrdhttp-test.sh" basic) list(APPEND BASIC_TEST_LOGS ${CMAKE_CURRENT_BINARY_DIR}/tests/basic/server.log) list(APPEND BASIC_TEST_LOGS ${CMAKE_CURRENT_BINARY_DIR}/tests/basic/client.log) set_tests_properties(HTTP::basic::test PROPERTIES FIXTURES_REQUIRED HTTP::basic ENVIRONMENT "BINARY_DIR=${CMAKE_BINARY_DIR}" ATTACHED_FILES_ON_FAIL "${BASIC_TEST_LOGS}" ) #### # Start of S3 tests #### add_test(NAME S3::s3_basic::setup COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/s3-setup.sh" s3_basic) set_tests_properties(S3::s3_basic::setup PROPERTIES FIXTURES_SETUP S3::s3_basic ENVIRONMENT "BINARY_DIR=${CMAKE_BINARY_DIR};SOURCE_DIR=${CMAKE_SOURCE_DIR};XROOTD_BINDIR=${XRootD_DATA_DIR}/../bin;MINIO_BIN=${MINIO_BIN};MC_BIN=${MC_BIN}" ) add_test(NAME S3::s3_basic::teardown COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/s3-teardown.sh" s3_basic) set_tests_properties(S3::s3_basic::teardown PROPERTIES FIXTURES_CLEANUP S3::s3_basic ENVIRONMENT "BINARY_DIR=${CMAKE_BINARY_DIR}" ) add_test(NAME S3::s3_basic::test COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/s3-test.sh" s3_basic) add_test(NAME S3::s3_basic::stress_test COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/s3-stress-test.sh" s3_basic) list(APPEND S3_BASIC_TEST_LOGS ${CMAKE_CURRENT_BINARY_DIR}/tests/s3_basic/server.log) list(APPEND S3_BASIC_TEST_LOGS ${CMAKE_CURRENT_BINARY_DIR}/tests/s3_basic/client.log) set_tests_properties(S3::s3_basic::test PROPERTIES FIXTURES_REQUIRED S3::s3_basic ENVIRONMENT "BINARY_DIR=${CMAKE_BINARY_DIR}" ATTACHED_FILES_ON_FAIL "${S3_BASIC_TEST_LOGS}" ) set_tests_properties(S3::s3_basic::stress_test PROPERTIES FIXTURES_REQUIRED S3::s3_basic ENVIRONMENT "BINARY_DIR=${CMAKE_BINARY_DIR}" ATTACHED_FILES_ON_FAIL "${S3_BASIC_TEST_LOGS}" ) ####################################### # Stress-test using the go-wrk binary # ####################################### add_test( NAME S3::s3_basic::gowrk_test COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/s3-gowrk-test.sh" s3_basic ) set_tests_properties( S3::s3_basic::gowrk_test PROPERTIES FIXTURES_REQUIRED S3::s3_basic ENVIRONMENT "BINARY_DIR=${CMAKE_BINARY_DIR};WRK_BIN=${GoWrk}" ATTACHED_FILES_ON_FAIL "${S3_BASIC_TEST_LOGS}" ) xrootd-s3-http-0.4.1/test/README.md000066400000000000000000000003701501635342300165730ustar00rootroot00000000000000# Tests These are tests of the s3 and http code. We recommend running tests via `ctest`, which will launch the server and prepare the environment for gtest. An example of how to run `ctest` can be found in [test.yml](.github/workflows/test.yml). xrootd-s3-http-0.4.1/test/filter_tests.cc000066400000000000000000000400061501635342300203320ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2025, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #include "../src/Filter.hh" #include "s3_tests_common.hh" #include #include #include class SimpleDir final : public XrdOssDF { public: virtual int Opendir(const char *path, XrdOucEnv &env) { if (!strncmp(path, "/prefix", 7)) { m_subdir = !strcmp(path, "/prefix3"); return 0; } return -ENOENT; } virtual int Readdir(char *buff, int blen) { if (m_idx >= 3) { if (m_subdir && m_idx == 3 && blen >= 8) { memcpy(buff, "idx.txt", 8); m_idx++; return 0; } buff[0] = '\0'; return 0; } auto result = (m_subdir ? "subdir" : "idx") + std::to_string(m_idx++) + (m_subdir ? "" : ".txt"); if (result.size() + 1 < static_cast(blen)) { memcpy(buff, result.c_str(), result.size()); buff[result.size()] = '\0'; } else { return -ENOMEM; } return 0; } virtual int StatRet(struct stat *buff) { if (!buff) return 0; memset(buff, '\0', sizeof(struct stat)); buff->st_mode = 0750 | ((m_subdir && m_idx <= 3) ? S_IFDIR : S_IFREG); buff->st_size = m_idx; return 0; } virtual int Close(long long *retsz = 0) { m_idx = 0; return 0; } private: bool m_subdir{false}; unsigned m_idx{0}; }; class SimpleFile final : public XrdOssDF { public: virtual int Fchmod(mode_t mode) { return 0; } virtual void Flush() {} virtual int Fstat(struct stat *buf) { if (!buf) return 0; memset(buf, '\0', sizeof(struct stat)); buf->st_mode = 0640 | S_IFREG; return 0; } virtual int Fsync() { return 0; } virtual int Fsync(XrdSfsAio *aiop) { return 0; } virtual int Ftruncate(unsigned long long flen) { return 0; } virtual int Open(const char *path, int Oflag, mode_t Mode, XrdOucEnv &env) { return 0; } virtual ssize_t pgRead(void *buffer, off_t offset, size_t rdlen, uint32_t *csvec, uint64_t opts) { return 0; } virtual int pgRead(XrdSfsAio *aioparm, uint64_t opts) { return 0; } virtual ssize_t pgWrite(void *buffer, off_t offset, size_t wrlen, uint32_t *csvec, uint64_t opts) { return 0; } virtual int pgWrite(XrdSfsAio *aioparm, uint64_t opts) { return 0; } virtual ssize_t Read(off_t offset, size_t size) { return 0; } virtual ssize_t Read(void *buffer, off_t offset, size_t size) { return 0; } virtual int Read(XrdSfsAio *aiop) { (void)aiop; return 0; } virtual ssize_t ReadRaw(void *buffer, off_t offset, size_t size) { return 0; } virtual ssize_t ReadV(XrdOucIOVec *readV, int rdvcnt) { return 0; } virtual ssize_t Write(const void *buffer, off_t offset, size_t size) { return 0; } virtual int Write(XrdSfsAio *aiop) { (void)aiop; return 0; } virtual ssize_t WriteV(XrdOucIOVec *writeV, int wrvcnt) { return 0; } virtual int Close(long long *retsz = 0) { return 0; } virtual int Fctl(int cmd, int alen, const char *args, char **resp = 0) { return 0; } }; class SimpleFilesystem final : public XrdOss { public: virtual XrdOssDF *newDir(char const *user) { return new SimpleDir; } virtual XrdOssDF *newFile(char const *user) { return new SimpleFile; } virtual int Chmod(const char *path, mode_t mode, XrdOucEnv *envP = 0) { return 0; } virtual int Create(const char *tid, const char *path, mode_t mode, XrdOucEnv &env, int opts = 0) { return 0; } virtual int Init(XrdSysLogger *lp, const char *cfn) { return 0; } virtual int Mkdir(const char *path, mode_t mode, int mkpath = 0, XrdOucEnv *envP = 0) { return 0; } virtual int Remdir(const char *path, int Opts = 0, XrdOucEnv *envP = 0) { return 0; } virtual int Rename(const char *oPath, const char *nPath, XrdOucEnv *oEnvP = 0, XrdOucEnv *nEnvP = 0) { return 0; } virtual int Stat(const char *path, struct stat *buff, int opts = 0, XrdOucEnv *envP = 0); virtual int Truncate(const char *path, unsigned long long fsize, XrdOucEnv *envP = 0) { return 0; } virtual int Unlink(const char *path, int Opts = 0, XrdOucEnv *envP = 0) { return 0; } virtual ~SimpleFilesystem() {} }; int SimpleFilesystem::Stat(const char *path, struct stat *buff, int opts, XrdOucEnv *envP) { if (!strcmp(path, "/prefix1") || !strcmp(path, "/prefix2") || !strcmp(path, "/prefix3") || !strcmp(path, "/prefix4") || !strcmp(path, "/prefix3/subdir1") || !strcmp(path, "/prefix3/subdir2") || !strcmp(path, "/prefix3/subdir3") || !strcmp(path, "/prefix3/subdir4")) { if (!buff) return 0; memset(buff, '\0', sizeof(struct stat)); buff->st_mode = 0750 | S_IFDIR; return 0; } if (!strcmp(path, "/prefix1/idx0.txt") || !strcmp(path, "/prefix2/idx1.txt") || !strcmp(path, "/prefix2/idx2.txt") || !strcmp(path, "/prefix2/idx3.txt") || !strcmp(path, "/prefix2/idx4.txt") || !strcmp(path, "/prefix3/subdir1/1.txt") || !strcmp(path, "/prefix3/subdir1/2.txt") || !strcmp(path, "/prefix3/subdir1/3.txt") || !strcmp(path, "/prefix3/subdir1/4.txt") || !strcmp(path, "/prefix3/subdir2/1.txt") || !strcmp(path, "/prefix3/subdir2/2.txt") || !strcmp(path, "/prefix3/subdir2/3.txt") || !strcmp(path, "/prefix3/subdir1/4.txt") || !strcmp(path, "/prefix3/subdir3/1.txt") || !strcmp(path, "/prefix3/subdir3/2.txt") || !strcmp(path, "/prefix3/subdir3/3.txt") || !strcmp(path, "/prefix3/subdir3/4.txt") || !strcmp(path, "/prefix3/subdir4/1.txt") || !strcmp(path, "/prefix4/subdir2/idx0.txt") || !strcmp(path, "/prefix5/idx.txt")) { if (!buff) return 0; memset(buff, '\0', sizeof(struct stat)); buff->st_mode = 0750 | S_IFREG; return 0; } return -ENOENT; } class FileSystemGlob : public FileSystemFixtureBase { protected: virtual std::string GetConfig() override { return R"( filter.glob /prefix1 /prefix2/*.txt filter.glob /prefix3/*/*.txt filter.prefix /prefix5 filter.trace all )"; } }; TEST_F(FileSystemGlob, GlobFilter) { SimpleFilesystem sfs; XrdSysLogger log; FilterFileSystem fs(new SimpleFilesystem, &log, m_configfn.c_str(), nullptr); XrdOucEnv env; struct stat buf; ASSERT_EQ(sfs.Stat("/prefix1", &buf), 0); ASSERT_EQ(fs.Stat("/prefix1", &buf), 0); ASSERT_EQ(sfs.Stat("/prefix1/idx0.txt", &buf), 0); ASSERT_EQ(fs.Stat("/prefix1/idx0.txt", &buf), -ENOENT); ASSERT_EQ(fs.Stat("/prefix5/idx0.txt", &buf), -ENOENT); ASSERT_EQ(fs.Stat("/prefix5/idx.txt", &buf), 0); std::unique_ptr sfsdir(sfs.newDir("")); ASSERT_NE(nullptr, sfsdir); ASSERT_EQ(sfsdir->Opendir("/prefix1", env), 0); char buff[256]; ASSERT_EQ(sfsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, "idx0.txt"); ASSERT_EQ(sfsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, "idx1.txt"); ASSERT_EQ(sfsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, "idx2.txt"); ASSERT_EQ(sfsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, ""); ASSERT_EQ(sfsdir->Close(), 0); std::unique_ptr fsdir(fs.newDir()); ASSERT_NE(nullptr, fsdir); ASSERT_EQ(fsdir->Opendir("/prefix1", env), 0); ASSERT_EQ(fsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, ""); ASSERT_EQ(fsdir->Close(), 0); ASSERT_EQ(fsdir->Opendir("/prefix2", env), 0); ASSERT_EQ(fsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, "idx0.txt"); ASSERT_EQ(fsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, "idx1.txt"); ASSERT_EQ(fsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, "idx2.txt"); ASSERT_EQ(fsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, ""); ASSERT_EQ(fsdir->Close(), 0); sfsdir.reset(sfs.newDir("")); ASSERT_NE(sfsdir.get(), nullptr); ASSERT_EQ(sfsdir->Opendir("/prefix3", env), 0); ASSERT_EQ(sfsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, "subdir0"); ASSERT_EQ(sfsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, "subdir1"); ASSERT_EQ(sfsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, "subdir2"); ASSERT_EQ(sfsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, "idx.txt"); ASSERT_EQ(sfsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, ""); ASSERT_EQ(sfsdir->Close(), 0); ASSERT_EQ(fsdir->Opendir("/prefix3", env), 0); ASSERT_EQ(fsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, "subdir0"); ASSERT_EQ(fsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, "subdir1"); ASSERT_EQ(fsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, "subdir2"); ASSERT_EQ(fsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, ""); ASSERT_EQ(fsdir->Close(), 0); ASSERT_EQ(fsdir->Opendir("/prefix3/subdir0", env), 0); ASSERT_EQ(fsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, "idx0.txt"); ASSERT_EQ(fsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, "idx1.txt"); ASSERT_EQ(fsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, "idx2.txt"); ASSERT_EQ(fsdir->Readdir(buff, 256), 0); ASSERT_STREQ(buff, ""); ASSERT_EQ(fsdir->Close(), 0); std::unique_ptr fsfile(fs.newFile()); ASSERT_NE(nullptr, fsfile); ASSERT_EQ(fsfile->Open("/prefix1/idx0.txt", 0, 0, env), -ENOENT); std::unique_ptr sfsfile(sfs.newFile("")); ASSERT_NE(nullptr, sfsfile); ASSERT_EQ(sfsfile->Open("/prefix1/idx0.txt", 0, 0, env), 0); fsfile.reset(fs.newFile()); ASSERT_NE(nullptr, fsfile); ASSERT_EQ(fsfile->Open("/prefix2/idx0.txt", 0, 0, env), 0); fsfile.reset(fs.newFile()); ASSERT_NE(nullptr, fsfile); ASSERT_EQ(fsfile->Open("/prefix3/subdir2/idx0.txt", 0, 0, env), 0); fsfile.reset(fs.newFile()); ASSERT_NE(nullptr, fsfile); ASSERT_EQ(fsfile->Open("/prefix4/subdir2/idx0.txt", 0, 0, env), -ENOENT); sfsfile.reset(sfs.newFile("")); ASSERT_NE(nullptr, sfsfile); ASSERT_EQ(sfsfile->Open("/prefix4/subdir2/idx0.txt", 0, 0, env), 0); } TEST_F(FileSystemGlob, GlobNormal) { XrdSysLogger log; FilterFileSystem fs(new SimpleFilesystem, &log, m_configfn.c_str(), nullptr); XrdOucEnv env; bool partial; XrdSysError dst(&log, "FileSystemGlob"); dst.Emsg("Glob", "Testing /"); ASSERT_TRUE(fs.GlobOne("/", {false, "/*"}, partial)); ASSERT_TRUE(partial); ASSERT_TRUE(fs.GlobOne("/", {false, "/"}, partial)); ASSERT_FALSE(partial); dst.Emsg("Glob", "Testing /foo"); ASSERT_TRUE(fs.GlobOne("/foo", {false, "/*"}, partial)); ASSERT_FALSE(partial); dst.Emsg("Globstar", "Testing /bar"); ASSERT_FALSE(fs.GlobOne("/foo", {false, "/bar"}, partial)); ASSERT_FALSE(partial); dst.Emsg("Glob", "Testing /foo/bar/idx.txt"); ASSERT_FALSE(fs.GlobOne("/foo/bar/idx.txt", {false, "/foo/*"}, partial)); ASSERT_FALSE(partial); ASSERT_TRUE( fs.GlobOne("/foo/bar/idx.txt", {false, "/foo/bar/idx.txt"}, partial)); ASSERT_FALSE(partial); ASSERT_TRUE(fs.GlobOne("/foo/bar/idx.txt", {false, "/foo/bar/idx.txt/baz"}, partial)); ASSERT_TRUE(partial); ASSERT_TRUE( fs.GlobOne("/foo/bar/idx.txt", {false, "/foo/*/idx.txt"}, partial)); ASSERT_FALSE(partial); ASSERT_TRUE( fs.GlobOne("/foo/bar/idx.txt", {false, "/foo/*/*.txt"}, partial)); ASSERT_FALSE(partial); ASSERT_TRUE( fs.GlobOne("/foo/bar/idx.txt", {false, "/foo/bar/*.txt"}, partial)); ASSERT_FALSE(partial); ASSERT_TRUE( fs.GlobOne("/foo/bar/idx.txt", {false, "/foo/bar/idx.*"}, partial)); ASSERT_FALSE(partial); ASSERT_FALSE( fs.GlobOne("/foo/bar/idx.txt", {false, "/foo/bar/t.*"}, partial)); ASSERT_FALSE(partial); dst.Emsg("Glob", "Testing /foo/.bar/idx.txt"); ASSERT_TRUE( fs.GlobOne("/foo/.bar/idx.txt", {true, "/foo/*/idx.txt"}, partial)); ASSERT_FALSE(partial); ASSERT_FALSE( fs.GlobOne("/foo/.bar/idx.txt", {false, "/foo/*/idx.txt"}, partial)); dst.Emsg("Glob", "Testing /.bar"); ASSERT_TRUE(fs.GlobOne("/.bar", {true, "/*"}, partial)); ASSERT_FALSE(partial); ASSERT_FALSE(fs.GlobOne("/.bar", {false, "/*"}, partial)); } TEST_F(FileSystemGlob, Globstar) { XrdSysLogger log; FilterFileSystem fs(new SimpleFilesystem, &log, m_configfn.c_str(), nullptr); XrdOucEnv env; bool partial; XrdSysError dst(&log, "FileSystemGlob"); dst.Emsg("Globstar", "Testing /some/path"); ASSERT_TRUE(fs.GlobOne("/some/path", {false, "/some/**"}, partial)); ASSERT_FALSE(partial); dst.Emsg("Globstar", "Testing /"); ASSERT_TRUE(fs.GlobOne("/", {false, "/**"}, partial)); ASSERT_FALSE(partial); dst.Emsg("Globstar", "Testing /some"); ASSERT_TRUE(fs.GlobOne("/some", {false, "/**"}, partial)); ASSERT_FALSE(partial); dst.Emsg("Globstar", "Testing /some"); ASSERT_TRUE(fs.GlobOne("/some", {false, "/some/**"}, partial)); ASSERT_FALSE(partial); dst.Emsg("Globstar", "Testing /some/path/subdir/foo.txt"); ASSERT_TRUE( fs.GlobOne("/some/path/subdir/foo.txt", {false, "/some/**"}, partial)); ASSERT_FALSE(partial); dst.Emsg("Globstar", "Testing /foo/bar/idx.txt"); ASSERT_TRUE( fs.GlobOne("/foo/bar/idx.txt", {false, "/foo/**/idx.txt"}, partial)); ASSERT_FALSE(partial); dst.Emsg("Globstar", "Testing /foo/bar/baz/idx.txt"); ASSERT_TRUE(fs.GlobOne("/foo/bar/baz/idx.txt", {false, "/foo/**/idx.txt"}, partial)); ASSERT_FALSE(partial); dst.Emsg("Globstar", "Testing /foo/idx.txt"); ASSERT_TRUE( fs.GlobOne("/foo/idx.txt", {false, "/foo/**/idx.txt"}, partial)); ASSERT_FALSE(partial); dst.Emsg("Globstar", "Testing /foo/bar/idx.txt"); ASSERT_TRUE(fs.GlobOne("/foo/bar/idx.txt", {false, "/foo/**/bar/idx.txt"}, partial)); ASSERT_FALSE(partial); dst.Emsg("Globstar", "Testing /foo/bar/bar/idx.txt"); ASSERT_TRUE(fs.GlobOne("/foo/bar/bar/idx.txt", {false, "/foo/**/bar/idx.txt"}, partial)); ASSERT_FALSE(partial); dst.Emsg("Globstar", "Testing /foo/bar/bar"); ASSERT_TRUE( fs.GlobOne("/foo/bar/bar", {false, "/foo/**/bar/idx.txt"}, partial)); ASSERT_TRUE(partial); dst.Emsg("Globstar", "Testing /foo/bar/idx.txt"); ASSERT_TRUE( fs.GlobOne("/foo/bar/idx.txt", {false, "/foo/**/false"}, partial)); ASSERT_TRUE(partial); // Test that "dot files" are not matched by the globstar operator, // matching the bash implementation. dst.Emsg("Globstar", "Testing /foo/.bar/idx.txt"); partial = false; ASSERT_FALSE( fs.GlobOne("/foo/.bar/idx.txt", {false, "/foo/**/idx.txt"}, partial)); ASSERT_FALSE(partial); ASSERT_TRUE( fs.GlobOne("/foo/.bar/idx.txt", {true, "/foo/**/idx.txt"}, partial)); ASSERT_FALSE(partial); ASSERT_TRUE( fs.GlobOne("/foo/.bar/idx.txt", {true, "/foo/**/bar.txt"}, partial)); ASSERT_TRUE(partial); partial = false; dst.Emsg("Globstar", "Testing negative match with dotfile"); ASSERT_FALSE( fs.GlobOne("/foo/.bar/idx.txt", {false, "/foo/**/bar.txt"}, partial)); ASSERT_FALSE(partial); ASSERT_TRUE( fs.GlobOne("/foo/.bar/idx.txt", {true, "/foo/**/bar.txt"}, partial)); ASSERT_TRUE(partial); dst.Emsg("Globstra", "Testing /foo/1/.bar/idx.txt"); ASSERT_FALSE( fs.GlobOne("/foo/1/.bar/idx.txt", {false, "/foo/**/idx.txt"}, partial)); ASSERT_TRUE(fs.GlobOne("/foo/1/.bar/idx.txt", {false, "/foo/**/.bar/idx.txt"}, partial)); ASSERT_TRUE(fs.GlobOne("/foo/1/.bar/idx.txt", {false, "/foo/**/1/.bar/idx.txt"}, partial)); dst.Emsg("Globstra", "Testing /foo/.1/.bar/idx.txt"); ASSERT_FALSE(fs.GlobOne("/foo/.1/.bar/idx.txt", {false, "/foo/**/.bar/idx.txt"}, partial)); } TEST_F(FileSystemGlob, SanitizePrefix) { XrdSysLogger log; FilterFileSystem fs(new SimpleFilesystem, &log, m_configfn.c_str(), nullptr); auto [success, path] = fs.SanitizePrefix("/path/prefix"); ASSERT_TRUE(success); ASSERT_EQ(path, "/path/prefix"); std::tie(success, path) = fs.SanitizePrefix("/path//prefix"); ASSERT_TRUE(success); ASSERT_EQ(path, "/path/prefix"); std::tie(success, path) = fs.SanitizePrefix("foo"); ASSERT_FALSE(success); std::tie(success, path) = fs.SanitizePrefix("/path/./prefix"); ASSERT_FALSE(success); std::tie(success, path) = fs.SanitizePrefix("/../foo"); ASSERT_FALSE(success); std::tie(success, path) = fs.SanitizePrefix("/f*"); ASSERT_FALSE(success); std::tie(success, path) = fs.SanitizePrefix("/f/?(foo|bar)"); ASSERT_FALSE(success); std::tie(success, path) = fs.SanitizePrefix("/[:alpha:]"); ASSERT_FALSE(success); } xrootd-s3-http-0.4.1/test/http_tests.cc000066400000000000000000000062371501635342300200340ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #include "../src/HTTPCommands.hh" #include "../src/HTTPFileSystem.hh" #include #include #include #include #include #include #include #include std::string g_ca_file; std::string g_config_file; std::string g_url; void parseEnvFile(const std::string &fname) { std::ifstream fh(fname); if (!fh.is_open()) { std::cerr << "Failed to open env file: " << strerror(errno); exit(1); } std::string line; while (std::getline(fh, line)) { auto idx = line.find("="); if (idx == std::string::npos) { continue; } auto key = line.substr(0, idx); auto val = line.substr(idx + 1); if (key == "X509_CA_FILE") { g_ca_file = val; setenv("X509_CERT_FILE", g_ca_file.c_str(), 1); } else if (key == "XROOTD_URL") { g_url = val; } else if (key == "XROOTD_CFG") { g_config_file = val; } } } TEST(TestHTTPFile, TestXfer) { XrdSysLogger log; HTTPFileSystem fs(&log, g_config_file.c_str(), nullptr); struct stat si; auto rc = fs.Stat("/hello_world.txt", &si); ASSERT_EQ(rc, 0); ASSERT_EQ(si.st_size, 13); auto fh = fs.newFile(); XrdOucEnv env; rc = fh->Open("/hello_world.txt", O_RDONLY, 0700, env); ASSERT_EQ(rc, 0); char buf[12]; auto res = fh->Read(buf, 0, 12); ASSERT_EQ(res, 12); ASSERT_EQ(memcmp(buf, "Hello, World", 12), 0); ASSERT_EQ(fh->Close(), 0); } class TestHTTPRequest : public HTTPRequest { public: XrdSysLogger log{}; XrdSysError err{&log, "TestHTTPR3equest"}; TestHTTPRequest(const std::string &url) : HTTPRequest(url, err, nullptr) {} }; TEST(TestHTTPParseProtocol, Test1) { const std::string httpURL = "https://my-test-url.com:443"; TestHTTPRequest req{httpURL}; // Test parsing of https std::string protocol; req.parseProtocol("https://my-test-url.com:443", protocol); ASSERT_EQ(protocol, "https"); // Test parsing for http req.parseProtocol("http://my-test-url.com:443", protocol); ASSERT_EQ(protocol, "http"); } int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); if (argc != 2) { printf("Usage: %s test_env_file", argv[0]); return 1; } setenv("XRDINSTANCE", "xrootd", 1); std::cout << "Running HTTP test with environment file " << argv[1] << std::endl; parseEnvFile(argv[1]); auto logger = new XrdSysLogger(2, 0); auto log = new XrdSysError(logger, "curl_"); HTTPRequest::Init(*log); return RUN_ALL_TESTS(); } xrootd-s3-http-0.4.1/test/s3-gowrk-test.sh000077500000000000000000000011531501635342300203040ustar00rootroot00000000000000#!/bin/sh TEST_NAME=$1 if [ -z "$BINARY_DIR" ]; then echo "\$BINARY_DIR environment variable is not set; cannot run test" exit 1 fi if [ ! -d "$BINARY_DIR" ]; then echo "$BINARY_DIR is not a directory; cannot run test" exit 1 fi echo "Running $TEST_NAME - go-wrk based stress test" echo > "$BINARY_DIR/tests/$TEST_NAME/client.log" if [ ! -f "$BINARY_DIR/tests/$TEST_NAME/setup.sh" ]; then echo "Test environment file $BINARY_DIR/tests/$TEST_NAME/setup.sh does not exist - cannot run test" exit 1 fi . "$BINARY_DIR/tests/$TEST_NAME/setup.sh" "$WRK_BIN" -c 200 -d 10 -no-vr -T 10000 -f "$PLAYBACK_FILE" xrootd-s3-http-0.4.1/test/s3-setup.sh000077500000000000000000000233011501635342300173350ustar00rootroot00000000000000#!/bin/sh TEST_NAME=$1 VALGRIND=0 if [ "$2" = "valgrind" ]; then VALGRIND=1 fi if [ -z "$BINARY_DIR" ]; then echo "\$BINARY_DIR environment variable is not set; cannot run test" exit 1 fi if [ ! -d "$BINARY_DIR" ]; then echo "$BINARY_DIR is not a directory; cannot run test" exit 1 fi if [ -z "$SOURCE_DIR" ]; then echo "\$SOURCE_DIR environment variable is not set; cannot run test" exit 1 fi if [ ! -d "$SOURCE_DIR" ]; then echo "\$SOURCE_DIR environment variable is not set; cannot run test" exit 1 fi echo "Setting up S3 server for $TEST_NAME test" if [ -z "$MINIO_BIN" ]; then echo "minio binary not found; cannot run unit test" exit 1 fi if [ -z "$MC_BIN" ]; then echo "mc binary not found; cannot run unit test" exit 1 fi XROOTD_BIN="$XROOTD_BINDIR/xrootd" if [ -z "XROOTD_BIN" ]; then echo "xrootd binary not found; cannot run unit test" exit 1 fi mkdir -p "$BINARY_DIR/tests/$TEST_NAME" RUNDIR=$(mktemp -d -p "$BINARY_DIR/tests/$TEST_NAME" test_run.XXXXXXXX) if [ ! -d "$RUNDIR" ]; then echo "Failed to create test run directory; cannot run minio" exit 1 fi echo "Using $RUNDIR as the test run's home directory." cd "$RUNDIR" MINIO_DATADIR="$RUNDIR/minio-data" MINIO_CLIENTDIR="$RUNDIR/minio-client" MINIO_CERTSDIR="$RUNDIR/minio-certs" XROOTD_CONFIGDIR="$RUNDIR/xrootd-config" mkdir -p "$XROOTD_CONFIGDIR" XROOTD_RUNDIR=$(mktemp -d -p /tmp xrootd_test.XXXXXXXX) mkdir -p "$MINIO_DATADIR" mkdir -p "$MINIO_CERTSDIR/ca" mkdir -p "$MINIO_CERTSDIR/CAs" mkdir -p "$MINIO_CLIENTDIR" echo > "$BINARY_DIR/tests/$TEST_NAME/server.log" # Create the TLS credentials for the test openssl genrsa -out "$MINIO_CERTSDIR/tlscakey.pem" 4096 >> "$BINARY_DIR/tests/$TEST_NAME/server.log" touch "$MINIO_CERTSDIR/ca/index.txt" echo '01' > "$MINIO_CERTSDIR/ca/serial.txt" cat > "$MINIO_CERTSDIR/tlsca.ini" <> "$BINARY_DIR/tests/$TEST_NAME/server.log" if [ "$?" -ne 0 ]; then echo "Failed to generate CA request" exit 1 fi # Create the host certificate request openssl genrsa -out "$MINIO_CERTSDIR/private.key" 4096 >> "$BINARY_DIR/tests/$TEST_NAME/server.log" openssl req -new -key "$MINIO_CERTSDIR/private.key" -config "$MINIO_CERTSDIR/tlsca.ini" -out "$MINIO_CERTSDIR/public.csr" -outform PEM -subj "/CN=$(hostname)" 0<&- >> "$BINARY_DIR/tests/$TEST_NAME/server.log" if [ "$?" -ne 0 ]; then echo "Failed to generate host certificate request" exit 1 fi openssl ca -config "$MINIO_CERTSDIR/tlsca.ini" -batch -policy signing_policy -extensions cert_extensions -out "$MINIO_CERTSDIR/public.crt" -infiles "$MINIO_CERTSDIR/public.csr" 0<&- 2>> "$BINARY_DIR/tests/$TEST_NAME/server.log" if [ "$?" -ne 0 ]; then echo "Failed to sign host certificate request" exit 1 fi # Set the minio root credentials: export MINIO_ROOT_USER=minioadmin export MINIO_ROOT_PASSWORD=QXDEiQxQw8qY MINIO_USER=miniouser MINIO_PASSWORD=2Z303QCzRI7s printf "%s" "$MINIO_USER" > "$RUNDIR/access_key" printf "%s" "$MINIO_PASSWORD" > "$RUNDIR/secret_key" # Launch minio "$MINIO_BIN" --certs-dir "$MINIO_CERTSDIR" server --address "$(hostname):0" "$MINIO_DATADIR" 0<&- >"$BINARY_DIR/tests/$TEST_NAME/server.log" 2>&1 & MINIO_PID=$! echo "minio daemon PID: $MINIO_PID" sleep 1 MINIO_URL=$(grep "API: " "$BINARY_DIR/tests/$TEST_NAME/server.log" | tr ':' ' ' | awk '{print $NF}' | tail -n 1) IDX=0 while [ -z "$MINIO_URL" ]; do sleep 1 MINIO_URL=$(grep "API: " "$BINARY_DIR/tests/$TEST_NAME/server.log" | tr ':' ' ' | awk '{print $NF}' | tail -n 1) IDX=$(($IDX+1)) if [ $IDX -gt 1 ]; then echo "Waiting for minio to start ($IDX seconds so far) ..." fi if [ $IDX -eq 60 ]; then echo "minio failed to start - failing" exit 1 fi done MINIO_URL=https://$(hostname):$MINIO_URL echo "Minio API server started on $MINIO_URL" cat > "$BINARY_DIR/tests/$TEST_NAME/setup.sh" < "$RUNDIR/hello_world.txt" "$MC_BIN" --insecure --config-dir "$MINIO_CLIENTDIR" cp "$RUNDIR/hello_world.txt" userminio/test-bucket/hello_world.txt "$MC_BIN" --insecure --config-dir "$MINIO_CLIENTDIR" cp "$RUNDIR/hello_world.txt" userminio/test-bucket/hello_world2.txt IDX=0 COUNT=25 while [ $IDX -ne $COUNT ]; do if ! dd if=/dev/urandom "of=$RUNDIR/test_file" bs=1024 count=3096 2> /dev/null; then echo "Failed to create random file to upload" exit 1 fi if ! "$MC_BIN" --insecure --config-dir "$MINIO_CLIENTDIR" cp "$RUNDIR/test_file" "userminio/test-bucket/test_file_$IDX.random" > /dev/null; then echo "Failed to upload random file to S3 instance" exit 1 fi IDX=$((IDX+1)) done #### # Starting XRootD config with S3 backend #### export XROOTD_CONFIG="$XROOTD_CONFIGDIR/xrootd.cfg" BUCKET_NAME=test-bucket cat > "$XROOTD_CONFIG" < $XROOTD_CONFIGDIR/authdb < $XROOTD_CONFIGDIR/access_key echo "$MINIO_PASSWORD" > $XROOTD_CONFIGDIR/secret_key export X509_CERT_FILE=$MINIO_CERTSDIR/CAs/tlsca.pem if [ "$VALGRIND" -eq 1 ]; then valgrind --leak-check=full --track-origins=yes "$XROOTD_BIN" -c "$XROOTD_CONFIG" -l "$BINARY_DIR/tests/$TEST_NAME/server.log" 0<&- 2>>"$BINARY_DIR/tests/$TEST_NAME/server.log" >>"$BINARY_DIR/tests/$TEST_NAME/server.log" & else "$XROOTD_BIN" -c "$XROOTD_CONFIG" -l "$BINARY_DIR/tests/$TEST_NAME/server.log" 0<&- 2>>"$BINARY_DIR/tests/$TEST_NAME/server.log" >>"$BINARY_DIR/tests/$TEST_NAME/server.log" & fi XROOTD_PID=$! echo "xrootd daemon PID: $XROOTD_PID" XROOTD_URL=$(grep "Xrd_ProtLoad: enabling port" "$BINARY_DIR/tests/$TEST_NAME/server.log" | grep 'for protocol XrdHttp' | awk '{print $7}') IDX=0 while [ -z "$XROOTD_URL" ]; do sleep 1 XROOTD_URL=$(grep "Xrd_ProtLoad: enabling port" "$BINARY_DIR/tests/$TEST_NAME/server.log" | grep 'for protocol XrdHttp' | awk '{print $7}') IDX=$(($IDX+1)) if ! kill -0 "$XROOTD_PID" 2>/dev/null; then echo "xrootd process (PID $XROOTD_PID) failed to start" >&2 exit 1 fi if [ $IDX -gt 1 ]; then echo "Waiting for xrootd to start ($IDX seconds so far) ..." fi if [ $IDX -eq 20 ]; then echo "xrootd failed to start - failing" exit 1 fi done XROOTD_URL="https://$(hostname):$XROOTD_URL/" echo "xrootd started at $XROOTD_URL" IDX=0 touch "$RUNDIR/playback.txt" while [ $IDX -ne $COUNT ]; do echo "$XROOTD_URL/test/test_file_$IDX.random" >> "$RUNDIR/playback.txt" IDX=$((IDX+1)) done cat >> "$BINARY_DIR/tests/$TEST_NAME/setup.sh" < "$BINARY_DIR/tests/$TEST_NAME/client-$IDX.log" > "$BINARY_DIR/tests/$TEST_NAME/client-$IDX.out" & export CURL_${IDX}_PID=$! done IDX=1 while [ $IDX -le 100 ]; do IDX=$(($IDX+1)) CURL_NAME="CURL_${IDX}_PID" eval CURL_NAME='\$CURL_${IDX}_PID' eval CURL_PID=$CURL_NAME wait $CURL_PID CURL_EXIT=$? if [ $CURL_EXIT -ne 0 ]; then echo "Download of hello-world text failed for worker $IDX" exit 1 fi CONTENTS=$(cat "$BINARY_DIR/tests/$TEST_NAME/client-$IDX.out") if [ "$CONTENTS" != "Hello, World" ]; then echo "Downloaded hello-world text for worker $IDX is incorrect: $CONTENTS" exit 1 fi done xrootd-s3-http-0.4.1/test/s3-teardown.sh000077500000000000000000000012541501635342300200230ustar00rootroot00000000000000#!/bin/sh TEST_NAME=$1 if [ -z "$BINARY_DIR" ]; then echo "\$BINARY_DIR environment variable is not set; cannot run test" exit 1 fi if [ ! -d "$BINARY_DIR" ]; then echo "$BINARY_DIR is not a directory; cannot run test" exit 1 fi echo "Tearing down $TEST_NAME" if [ ! -f "$BINARY_DIR/tests/$TEST_NAME/setup.sh" ]; then echo "Test environment file $BINARY_DIR/tests/$TEST_NAME/setup.sh does not exist - cannot run test" exit 1 fi . "$BINARY_DIR/tests/$TEST_NAME/setup.sh" if [ -z "$MINIO_PID" ]; then echo "\$MINIO_PID environment variable is not set; cannot tear down process" exit 1 fi kill "$MINIO_PID" if [ ! -z "$XROOTD_PID" ]; then kill "$XROOTD_PID" fi xrootd-s3-http-0.4.1/test/s3-test.sh000077500000000000000000000034751501635342300171660ustar00rootroot00000000000000#!/bin/sh TEST_NAME=$1 if [ -z "$BINARY_DIR" ]; then echo "\$BINARY_DIR environment variable is not set; cannot run test" exit 1 fi if [ ! -d "$BINARY_DIR" ]; then echo "$BINARY_DIR is not a directory; cannot run test" exit 1 fi echo "Running $TEST_NAME - simple download" if [ ! -f "$BINARY_DIR/tests/$TEST_NAME/setup.sh" ]; then echo "Test environment file $BINARY_DIR/tests/$TEST_NAME/setup.sh does not exist - cannot run test" exit 1 fi . "$BINARY_DIR/tests/$TEST_NAME/setup.sh" if [ -z "$XROOTD_URL" ]; then echo "XRootD URL is not set; cannot test" exit 1 fi CONTENTS=$(curl --cacert $X509_CA_FILE -v --fail "$XROOTD_URL/test/hello_world.txt" 2> "$BINARY_DIR/tests/$TEST_NAME/client.log") CURL_EXIT=$? if [ $CURL_EXIT -ne 0 ]; then echo "Download of hello-world text failed" exit 1 fi if [ "$CONTENTS" != "Hello, World" ]; then echo "Downloaded hello-world text is incorrect: $CONTENTS" exit 1 fi echo "Running $TEST_NAME - missing object" HTTP_CODE=$(curl --cacert $X509_CA_FILE --output /dev/null -v --write-out '%{http_code}' "$XROOTD_URL/test/missing.txt" 2>> "$BINARY_DIR/tests/$TEST_NAME/client.log") if [ "$HTTP_CODE" -ne 404 ]; then echo "Expected HTTP code is 404; actual was $HTTP_CODE" exit 1 fi echo "Running $TEST_NAME - filtered prefix" HTTP_CODE=$(curl --cacert $X509_CA_FILE --output /dev/null -v --write-out '%{http_code}' "$XROOTD_URL/test2/hello_world.txt" 2>> "$BINARY_DIR/tests/$TEST_NAME/filter.log") if [ "$HTTP_CODE" -ne 200 ]; then echo "Expected HTTP code is 200; actual was $HTTP_CODE" exit 1 fi HTTP_CODE=$(curl --cacert $X509_CA_FILE --output /dev/null -v --write-out '%{http_code}' "$XROOTD_URL/test2/hello_world2.txt" 2>> "$BINARY_DIR/tests/$TEST_NAME/filter.log") if [ "$HTTP_CODE" -ne 404 ]; then echo "Expected HTTP code is 404; actual was $HTTP_CODE" exit 1 fi xrootd-s3-http-0.4.1/test/s3-xrootd-test.cfg000066400000000000000000000005621501635342300206170ustar00rootroot00000000000000all.export / xrd.protocol http:8080 libXrdHttp.so ofs.osslib /home/runner/work/xrootd-s3-http/build/libXrdS3.so xrootd.async off s3.begin s3.path_name /aws-opendata s3.bucket_name noaa-wod-pds s3.service_name s3 s3.region us-east-1 s3.service_url https://s3.us-east-1.amazonaws.com s3.end s3.url_style virtual ofs.trace all xrd.trace all -sched http.trace all xrootd-s3-http-0.4.1/test/s3_tests.cc000066400000000000000000000207011501635342300173720ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #include "../src/S3Commands.hh" #include "../src/S3FileSystem.hh" #include "s3_tests_common.hh" #include #include #include class TestAmazonRequest : public AmazonRequest { public: XrdSysLogger log{}; XrdSysError err{&log, "TestS3CommandsLog"}; TestAmazonRequest(const std::string &url, const std::string &akf, const std::string &skf, const std::string &bucket, const std::string &object, const std::string &path, int sigVersion) : AmazonRequest(url, akf, skf, bucket, object, path, sigVersion, err) {} // For getting access to otherwise-protected members std::string getHostUrl() const { return hostUrl; } }; TEST(TestS3URLGeneration, Test1) { const std::string serviceUrl = "https://s3-service.com:443"; const std::string b = "test-bucket"; const std::string o = "test-object"; // Test path-style URL generation TestAmazonRequest pathReq{serviceUrl, "akf", "skf", b, o, "path", 4}; std::string generatedHostUrl = pathReq.getHostUrl(); ASSERT_EQ(generatedHostUrl, "https://s3-service.com:443/test-bucket/test-object") << "generatedURL: " << generatedHostUrl; // Test virtual-style URL generation TestAmazonRequest virtReq{serviceUrl, "akf", "skf", b, o, "virtual", 4}; generatedHostUrl = virtReq.getHostUrl(); ASSERT_EQ(generatedHostUrl, "https://test-bucket.s3-service.com:443/test-object"); // Test path-style with empty bucket (which we use for exporting an entire // endpoint) TestAmazonRequest pathReqNoBucket{serviceUrl, "akf", "skf", "", o, "path", 4}; generatedHostUrl = pathReqNoBucket.getHostUrl(); ASSERT_EQ(generatedHostUrl, "https://s3-service.com:443/test-object"); } class FileSystemS3VirtualBucket : public FileSystemFixtureBase { protected: virtual std::string GetConfig() override { return R"( s3.begin s3.path_name /test s3.bucket_name genome-browser s3.service_name s3.amazonaws.com s3.region us-east-1 s3.service_url https://s3.us-east-1.amazonaws.com s3.url_style virtual s3.end )"; } }; class FileSystemS3VirtualNoBucket : public FileSystemFixtureBase { protected: virtual std::string GetConfig() override { return R"( s3.begin s3.path_name /test s3.service_name s3.amazonaws.com s3.region us-east-1 s3.service_url https://s3.us-east-1.amazonaws.com s3.url_style virtual s3.end )"; } }; class FileSystemS3PathBucket : public FileSystemFixtureBase { protected: virtual std::string GetConfig() override { return R"( s3.begin s3.path_name /test s3.service_name s3.amazonaws.com s3.region us-east-1 s3.bucket_name genome-browser s3.service_url https://s3.us-east-1.amazonaws.com s3.url_style path s3.end )"; } }; class FileSystemS3PathNoBucket : public FileSystemFixtureBase { protected: virtual std::string GetConfig() override { return R"( s3.begin s3.path_name /test s3.service_name s3.amazonaws.com s3.region us-east-1 s3.service_url https://s3.us-east-1.amazonaws.com s3.url_style path s3.end )"; } }; // Regression test for when the service_url ends in a `/` class FileSystemS3PathBucketSlash : public FileSystemFixtureBase { protected: virtual std::string GetConfig() override { return R"( s3.begin s3.path_name /test s3.service_name s3.amazonaws.com s3.region us-east-1 s3.bucket_name genome-browser s3.service_url https://s3.us-east-1.amazonaws.com/ s3.url_style path s3.end )"; } }; void TestDirectoryContents(S3FileSystem &fs, const std::string &dirname) { std::unique_ptr dir(fs.newDir()); ASSERT_TRUE(dir); XrdOucEnv env; auto rv = dir->Opendir(dirname.c_str(), env); ASSERT_EQ(rv, 0); struct stat buf; ASSERT_EQ(dir->StatRet(&buf), 0); std::vector name; name.resize(255); rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); ASSERT_EQ(std::string(&name[0]), "cellbrowser.json.bak"); ASSERT_EQ(buf.st_mode & S_IFREG, static_cast(S_IFREG)); ASSERT_EQ(buf.st_size, 672); rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); ASSERT_EQ(std::string(&name[0]), "dataset.json"); ASSERT_EQ(buf.st_mode & S_IFREG, static_cast(S_IFREG)); ASSERT_EQ(buf.st_size, 1847); rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); ASSERT_EQ(std::string(&name[0]), "desc.json"); ASSERT_EQ(buf.st_mode & S_IFREG, static_cast(S_IFREG)); ASSERT_EQ(buf.st_size, 1091); rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); ASSERT_EQ(std::string(&name[0]), "all"); ASSERT_EQ(buf.st_mode & S_IFDIR, static_cast(S_IFDIR)); rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); ASSERT_EQ(std::string(&name[0]), "by-organ"); ASSERT_EQ(buf.st_mode & S_IFDIR, static_cast(S_IFDIR)); rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); ASSERT_EQ(std::string(&name[0]), "func-compart"); ASSERT_EQ(buf.st_mode & S_IFDIR, static_cast(S_IFDIR)); rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); ASSERT_EQ(std::string(&name[0]), ""); ASSERT_EQ(dir->Close(), 0); } TEST_F(FileSystemS3VirtualBucket, Create) { EXPECT_NO_THROW( { S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); }); } TEST_F(FileSystemS3VirtualBucket, Stat) { S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); struct stat buff; auto rv = fs.Stat("/test/cells/tabula-sapiens/cellbrowser.json.bak", &buff); ASSERT_EQ(rv, 0) << "Failed to stat AWS bucket (" << strerror(-rv) << ")"; } TEST_F(FileSystemS3VirtualBucket, List) { S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); TestDirectoryContents(fs, "/test/cells/tabula-sapiens"); } TEST_F(FileSystemS3VirtualNoBucket, Stat) { S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); struct stat buff; auto rv = fs.Stat( "/test/genome-browser/cells/tabula-sapiens/cellbrowser.json.bak", &buff); ASSERT_EQ(rv, 0) << "Failed to stat AWS bucket (" << strerror(-rv) << ")"; } TEST_F(FileSystemS3VirtualNoBucket, List) { S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); TestDirectoryContents(fs, "/test/genome-browser/cells/tabula-sapiens"); } TEST_F(FileSystemS3PathBucket, Stat) { S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); struct stat buff; auto rv = fs.Stat("/test/cells/tabula-sapiens/cellbrowser.json.bak", &buff); ASSERT_EQ(rv, 0) << "Failed to stat AWS bucket (" << strerror(-rv) << ")"; } TEST_F(FileSystemS3PathBucket, List) { S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); TestDirectoryContents(fs, "/test/cells/tabula-sapiens"); } TEST_F(FileSystemS3PathNoBucket, Stat) { S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); struct stat buff; auto rv = fs.Stat( "/test/genome-browser/cells/tabula-sapiens/cellbrowser.json.bak", &buff); ASSERT_EQ(rv, 0) << "Failed to stat AWS bucket (" << strerror(-rv) << ")"; } TEST_F(FileSystemS3PathNoBucket, List) { S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); TestDirectoryContents(fs, "/test/genome-browser/cells/tabula-sapiens/"); } TEST_F(FileSystemS3PathBucketSlash, Stat) { S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); struct stat buff; auto rv = fs.Stat("/test/cells/tabula-sapiens/cellbrowser.json.bak", &buff); ASSERT_EQ(rv, 0) << "Failed to stat AWS bucket (" << strerror(-rv) << ")"; } TEST_F(FileSystemS3PathBucketSlash, List) { S3FileSystem fs(m_log.get(), m_configfn.c_str(), nullptr); TestDirectoryContents(fs, "/test/cells/tabula-sapiens"); } int main(int argc, char **argv) { auto logger = new XrdSysLogger(2, 0); auto log = new XrdSysError(logger, "curl_"); AmazonRequest::Init(*log); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } xrootd-s3-http-0.4.1/test/s3_tests_common.hh000066400000000000000000000036771501635342300207710ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ #include "../src/shortfile.hh" #include #include #include #include #include #include #include class FileSystemFixtureBase : public testing::Test { protected: FileSystemFixtureBase() : m_log(new XrdSysLogger(2, 0)) // Log to stderr, no log rotation {} void SetUp() override { setenv("XRDINSTANCE", "xrootd", 1); char tmp_configfn[] = "/tmp/xrootd-s3-gtest.cfg.XXXXXX"; auto result = mkstemp(tmp_configfn); ASSERT_NE(result, -1) << "Failed to create temp file (" << strerror(errno) << ", errno=" << errno << ")"; m_configfn = std::string(tmp_configfn); auto contents = GetConfig(); ASSERT_FALSE(contents.empty()); ASSERT_TRUE(writeShortFile(m_configfn, contents, 0)) << "Failed to write to temp file (" << strerror(errno) << ", errno=" << errno << ")"; } void TearDown() override { if (!m_configfn.empty()) { auto rv = unlink(m_configfn.c_str()); ASSERT_EQ(rv, 0) << "Failed to delete temp file (" << strerror(errno) << ", errno=" << errno << ")"; } } virtual std::string GetConfig() = 0; std::string m_configfn; std::unique_ptr m_log; }; xrootd-s3-http-0.4.1/test/s3_unit_tests.cc000066400000000000000000000470401501635342300204360ustar00rootroot00000000000000/*************************************************************** * * Copyright (C) 2024, Pelican Project, Morgridge Institute for Research * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ***************************************************************/ // // The tests in this file are meant to work with the minio-based fixture, // meaning no internet connectivity is needed to run them. // #include "../src/S3Commands.hh" #include "../src/S3File.hh" #include "../src/S3FileSystem.hh" #include "s3_tests_common.hh" #include #include #include #include #include #include #include #include std::once_flag g_init_once; std::string g_ca_file; std::string g_minio_url; std::string g_bucket_name; std::string g_access_key_file; std::string g_secret_key_file; void parseEnvFile(const std::string &fname) { std::ifstream fh(fname); if (!fh.is_open()) { std::cerr << "Failed to open env file: " << strerror(errno); exit(1); } std::string line; while (std::getline(fh, line)) { auto idx = line.find("="); if (idx == std::string::npos) { continue; } auto key = line.substr(0, idx); auto val = line.substr(idx + 1); if (key == "X509_CA_FILE") { g_ca_file = val; setenv("X509_CERT_FILE", g_ca_file.c_str(), 1); } else if (key == "MINIO_URL") { g_minio_url = val; } else if (key == "BUCKET_NAME") { g_bucket_name = val; } else if (key == "ACCESS_KEY_FILE") { g_access_key_file = val; } else if (key == "SECRET_KEY_FILE") { g_secret_key_file = val; } } } // Tests where we query S3 test fixture class FileSystemS3Fixture : public FileSystemFixtureBase { void SetUp() override { std::call_once(g_init_once, [&] { char *env_file = getenv("ENV_FILE"); ASSERT_NE(env_file, nullptr) << "$ENV_FILE environment variable " "not set; required to run test"; parseEnvFile(env_file); auto logger = new XrdSysLogger(2, 0); auto log = new XrdSysError(logger, "curl_"); AmazonRequest::Init(*log); }); FileSystemFixtureBase::SetUp(); } virtual std::string GetConfig() override { return R"( xrd.tlsca certfile )" + g_ca_file + R"( #s3.trace all dump s3.trace all s3.begin s3.path_name /test s3.access_key_file )" + g_access_key_file + R"( s3.secret_key_file )" + g_secret_key_file + R"( s3.service_name s3.example.com s3.region us-east-1 s3.bucket_name )" + g_bucket_name + R"( s3.service_url )" + g_minio_url + R"( s3.url_style path s3.end )"; } public: std::unique_ptr GetFS() { return std::unique_ptr( new S3FileSystem(&m_log, m_configfn.c_str(), nullptr)); } void WritePattern(const std::string &name, const off_t writeSize, const unsigned char chunkByte, const size_t chunkSize, bool known_size) { XrdSysLogger log; S3FileSystem fs(&log, m_configfn.c_str(), nullptr); std::unique_ptr fh(fs.newFile()); ASSERT_TRUE(fh); XrdOucEnv env; // Only set oss.asize for test cases where we want the server to know // the final size. if (known_size) { env.Put("oss.asize", std::to_string(writeSize).c_str()); } auto rv = fh->Open(name.c_str(), O_CREAT | O_WRONLY, 0755, env); ASSERT_EQ(rv, 0); size_t sizeToWrite = (static_cast(chunkSize) >= writeSize) ? static_cast(writeSize) : chunkSize; off_t curWriteSize = writeSize; auto curChunkByte = chunkByte; off_t offset = 0; while (sizeToWrite) { std::string writeBuffer(sizeToWrite, curChunkByte); rv = fh->Write(writeBuffer.data(), offset, sizeToWrite); ASSERT_EQ(rv, static_cast(sizeToWrite)); curWriteSize -= rv; offset += rv; sizeToWrite = (static_cast(chunkSize) >= curWriteSize) ? static_cast(curWriteSize) : chunkSize; curChunkByte += 1; } rv = fh->Close(); ASSERT_EQ(rv, 0); VerifyContents(fs, name, writeSize, chunkByte, chunkSize); } void RandomRead(const std::string &name, unsigned char chunkByte, size_t chunkSize, std::chrono::steady_clock::duration testLength) { XrdSysLogger log; S3FileSystem fs(&log, m_configfn.c_str(), nullptr); std::unique_ptr fh(fs.newFile()); ASSERT_TRUE(fh); XrdOucEnv env; auto rv = fh->Open(name.c_str(), O_CREAT | O_WRONLY, 0755, env); ASSERT_EQ(rv, 0); struct stat buf; rv = fh->Fstat(&buf); ASSERT_EQ(rv, 0); auto objSize = buf.st_size; auto startTime = std::chrono::steady_clock::now(); size_t maxReadSize = 5'000'000; std::string readBuf; readBuf.resize(maxReadSize); std::string correctContents; correctContents.resize(maxReadSize); while (std::chrono::steady_clock::now() - startTime < testLength) { size_t readSize = std::rand() % maxReadSize; off_t off = std::rand() % objSize; ssize_t expectedReadSize = (static_cast(readSize) + off - objSize > 0) ? (objSize - off) : readSize; readBuf.resize(expectedReadSize); rv = fh->Read(readBuf.data(), off, readSize); ASSERT_EQ(rv, expectedReadSize); GenCorrectContents(correctContents, off, expectedReadSize, chunkByte, chunkSize, objSize); ASSERT_EQ(readBuf, correctContents); } } private: void GenCorrectContents(std::string &correctContents, off_t off, size_t size, unsigned char chunkByte, size_t chunkSize, size_t objSize) { auto chunkNum = static_cast(off / chunkSize); auto curChunkByte = static_cast(chunkByte + chunkNum); off_t chunkBoundary = (chunkNum + 1) * chunkSize; correctContents.resize(size); if (chunkBoundary < off + static_cast(size)) { size_t firstLen = chunkBoundary - off; std::string firstChunk(firstLen, curChunkByte); correctContents.replace(0, firstLen, firstChunk); auto iter = correctContents.begin() + firstLen; off_t remaining = size - firstLen; while (remaining) { curChunkByte++; auto chunkLen = (remaining > static_cast(chunkSize)) ? chunkSize : remaining; std::string chunk(chunkLen, curChunkByte); std::copy(chunk.begin(), chunk.end(), iter); iter += chunkLen; remaining -= chunkLen; } } else { correctContents = std::string(size, curChunkByte); } } void VerifyContents(S3FileSystem &fs, const std::string &obj, off_t expectedSize, unsigned char chunkByte, size_t chunkSize) { std::unique_ptr fh(fs.newFile()); ASSERT_TRUE(fh); XrdOucEnv env; auto rv = fh->Open(obj.c_str(), O_RDONLY, 0, env); ASSERT_EQ(rv, 0); size_t sizeToRead = (static_cast(chunkSize) >= expectedSize) ? expectedSize : chunkSize; unsigned char curChunkByte = chunkByte; off_t offset = 0; while (sizeToRead) { std::string readBuffer(sizeToRead, curChunkByte - 1); rv = fh->Read(readBuffer.data(), offset, sizeToRead); ASSERT_EQ(rv, static_cast(sizeToRead)); readBuffer.resize(rv); std::string correctBuffer(sizeToRead, curChunkByte); ASSERT_EQ(readBuffer, correctBuffer); expectedSize -= rv; offset += rv; sizeToRead = (static_cast(chunkSize) >= expectedSize) ? expectedSize : chunkSize; curChunkByte += 1; } rv = fh->Close(); ASSERT_EQ(rv, 0); } XrdSysLogger m_log; }; // Upload a single byte into S3 TEST_F(FileSystemS3Fixture, UploadOneByte) { WritePattern("/test/write_one.txt", 1, 'X', 32 * 1024, true); WritePattern("/test/write_one_stream.txt", 1, 'X', 32 * 1024, false); } // Upload across multiple calls, single part TEST_F(FileSystemS3Fixture, UploadMultipleCalls) { WritePattern("/test/write_alphabet.txt", 26, 'a', 1, true); WritePattern("/test/write_alphabet_stream.txt", 26, 'a', 1, false); } // Upload a zero-byte object TEST_F(FileSystemS3Fixture, UploadZero) { WritePattern("/test/write_zero.txt", 0, 'X', 32 * 1024, true); WritePattern("/test/write_zero_stream.txt", 0, 'X', 32 * 1024, false); } // Upload larger - two chunks. TEST_F(FileSystemS3Fixture, UploadTwoChunks) { WritePattern("/test/write_two_chunks.txt", 1'024 + 42, 'a', 1'024, true); WritePattern("/test/write_two_chunks_stream.txt", 1'024 + 42, 'a', 1'024, false); } // Upload larger - a few chunks. TEST_F(FileSystemS3Fixture, UploadMultipleChunks) { WritePattern("/test/write_multi_chunks.txt", (10'000 / 1'024) * 1'024 + 42, 'a', 1'024, true); WritePattern("/test/write_multi_chunks_stream.txt", (10'000 / 1'024) * 1'024 + 42, 'a', 1'024, false); } // Upload across multiple parts, not aligned to partition. TEST_F(FileSystemS3Fixture, UploadLarge) { WritePattern("/test/write_large_1.txt", (100'000'000 / 1'310'720) * 1'310'720 + 42, 'a', 1'310'720, true); WritePattern("/test/write_large_1_stream.txt", (100'000'000 / 1'310'720) * 1'310'720 + 42, 'a', 1'310'720, false); } // Upload a file into S3 that's the same size as the partition size TEST_F(FileSystemS3Fixture, UploadLargePart) { WritePattern("/test/write_large_2.txt", 100'000'000, 'a', 131'072, true); WritePattern("/test/write_large_2_stream.txt", 100'000'000, 'a', 131'072, false); } // Upload a small file where the partition size is aligned with the chunk size TEST_F(FileSystemS3Fixture, UploadSmallAligned) { WritePattern("/test/write_large_3.txt", 1'000, 'a', 1'000, true); } // Upload a file into S3 that's the same size as the partition size, using // chunks that align with the partition size TEST_F(FileSystemS3Fixture, UploadLargePartAligned) { WritePattern("/test/write_large_4.txt", 100'000'000, 'a', 1'000'000, true); } // Upload a file into S3 resulting in multiple partitions TEST_F(FileSystemS3Fixture, UploadMultiPartAligned) { WritePattern("/test/write_large_5.txt", 100'000'000, 'a', 1'000'000, true); } // Upload a file into S3 resulting in multiple partitioned using not-aligned // chunks TEST_F(FileSystemS3Fixture, UploadMultiPartUnaligned) { WritePattern("/test/write_large_1.txt", 100'000'000, 'a', 32'768, true); WritePattern("/test/write_large_1_stream.txt", 100'000'000, 'a', 32'768, false); } // Ensure that uploads timeout if no action occurs. TEST_F(FileSystemS3Fixture, UploadStall) { HTTPRequest::SetStallTimeout(std::chrono::milliseconds(200)); XrdSysLogger log; auto eLog = new XrdSysError(&log, "s3_"); S3File::LaunchMonitorThread(*eLog, nullptr); S3FileSystem fs(&log, m_configfn.c_str(), nullptr); std::unique_ptr fh(fs.newFile()); ASSERT_TRUE(fh); XrdOucEnv env; env.Put("oss.asize", std::to_string(16'384).c_str()); auto rv = fh->Open("/test/write_stall.txt", O_CREAT | O_WRONLY, 0755, env); ASSERT_EQ(rv, 0); ssize_t sizeToWrite = 4'096; std::string writeBuffer(sizeToWrite, 'a'); rv = fh->Write(writeBuffer.data(), 0, sizeToWrite); ASSERT_EQ(rv, sizeToWrite); std::this_thread::sleep_for(HTTPRequest::GetStallTimeout() * 4 / 3 + std::chrono::milliseconds(10)); writeBuffer = std::string(sizeToWrite, 'b'); rv = fh->Write(writeBuffer.data(), sizeToWrite, sizeToWrite); ASSERT_EQ(rv, -ETIMEDOUT); } // Upload a few files into a "directory" then list the directory TEST_F(FileSystemS3Fixture, ListDir) { WritePattern("/test/listdir/write_1.txt", 100'000, 'a', 32'768, true); WritePattern("/test/listdir/write_2.txt", 50'000, 'a', 32'768, true); XrdSysLogger log; S3FileSystem fs(&log, m_configfn.c_str(), nullptr); std::unique_ptr dir(fs.newDir()); XrdOucEnv env; auto rv = dir->Opendir("/test/listdir", env); ASSERT_EQ(rv, 0); struct stat buf; ASSERT_EQ(dir->StatRet(&buf), 0); std::vector name; name.resize(255); rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); ASSERT_EQ(std::string(&name[0]), "write_1.txt"); ASSERT_EQ(buf.st_mode & S_IFREG, static_cast(S_IFREG)); ASSERT_EQ(buf.st_size, 100'000); rv = dir->Readdir(&name[0], 255); ASSERT_EQ(rv, 0); ASSERT_EQ(std::string(&name[0]), "write_2.txt"); ASSERT_EQ(buf.st_mode & S_IFREG, static_cast(S_IFREG)); ASSERT_EQ(buf.st_size, 50'000); ASSERT_EQ(dir->Close(), 0); } // Test stat against the root of the bucket. TEST_F(FileSystemS3Fixture, StatRoot) { WritePattern("/test/statroot.txt", 100'000, 'a', 32'768, true); XrdSysLogger log; S3FileSystem fs(&log, m_configfn.c_str(), nullptr); struct stat buf; ASSERT_EQ(fs.Stat("/test", &buf, 0, nullptr), 0); ASSERT_EQ(buf.st_mode & S_IFDIR, S_IFDIR); ASSERT_EQ(fs.Stat("/test/", &buf, 0, nullptr), 0); ASSERT_EQ(buf.st_mode & S_IFDIR, S_IFDIR); ASSERT_EQ(fs.Stat("//test/", &buf, 0, nullptr), 0); ASSERT_EQ(buf.st_mode & S_IFDIR, S_IFDIR); ASSERT_EQ(fs.Stat("//test", &buf, 0, nullptr), 0); ASSERT_EQ(buf.st_mode & S_IFDIR, S_IFDIR); ASSERT_EQ(fs.Stat("/test//", &buf, 0, nullptr), 0); ASSERT_EQ(buf.st_mode & S_IFDIR, S_IFDIR); ASSERT_EQ(fs.Stat("/test/statroot.txt", &buf, 0, nullptr), 0); ASSERT_EQ(buf.st_mode & S_IFREG, S_IFREG); } TEST_F(FileSystemS3Fixture, NestedDir) { WritePattern("/test/one.txt", 100'000, 'a', 32'768, true); WritePattern("/test/one/two/statroot.txt", 100'000, 'a', 32'768, true); XrdSysLogger log; S3FileSystem fs(&log, m_configfn.c_str(), nullptr); struct stat buf; ASSERT_EQ(fs.Stat("/test/one", &buf, 0, nullptr), 0); ASSERT_EQ(buf.st_mode & S_IFDIR, S_IFDIR); ASSERT_EQ(fs.Stat("/test/one/two", &buf, 0, nullptr), 0); ASSERT_EQ(buf.st_mode & S_IFDIR, S_IFDIR); } TEST_F(FileSystemS3Fixture, InvalidObject) { // Test various configurations of S3 buckets that lead // to undefined situations in our filesystem-like translation, // just to ensure we have our specified behavior. XrdSysLogger log; S3FileSystem fs(&log, m_configfn.c_str(), nullptr); // Object nested "inside" a directory. WritePattern("/test/nested/foo", 1'024, 'a', 1'024, true); WritePattern("/test/nested/foo/foo.txt", 1'024, 'a', 1'024, true); struct stat buf; ASSERT_EQ(fs.Stat("/test/nested/foo", &buf, 0, nullptr), 0); ASSERT_EQ(buf.st_mode & S_IFREG, S_IFREG); ASSERT_EQ(buf.st_size, 1'024); ASSERT_EQ(fs.Stat("/test/nested/foo/foo.txt", &buf, 0, nullptr), 0); ASSERT_EQ(buf.st_mode & S_IFREG, S_IFREG); ASSERT_EQ(buf.st_size, 1'024); // Object with a trailing slash in name WritePattern("/test/trailing/", 1'024, 'a', 1'024, true); ASSERT_EQ(fs.Stat("/test/trailing/", &buf, 0, nullptr), -ENOENT); } // Check out the logic of the overlap copy routine. std::tuple OverlapCopy(off_t req_off, size_t req_size, char *req_buf, off_t cache_off, size_t cache_size, char *cache_buf, size_t &used); TEST(OverlapCopy, Simple) { std::string repeatA(4096, 'a'); std::string repeatB(4096, 'b'); size_t used{0}; auto [req1_off, req1_size, req2_off, req2_size] = OverlapCopy(0, 4096, repeatA.data(), 4096, 4096, repeatB.data(), used); ASSERT_EQ(req1_off, 0); ASSERT_EQ(req1_size, 4096U); ASSERT_EQ(req2_off, -1); ASSERT_EQ(req2_size, 0U); ASSERT_EQ(used, 0U); std::tie(req1_off, req1_size, req2_off, req2_size) = OverlapCopy(0, 4096, repeatA.data(), 2048, 4096, repeatB.data(), used); ASSERT_EQ(req1_off, 0); ASSERT_EQ(req1_size, 2048U); ASSERT_EQ(req2_off, -1); ASSERT_EQ(req2_size, 0U); ASSERT_EQ(used, 2048U); auto correctOverlap = std::string(2048, 'a') + std::string(2048, 'b'); ASSERT_EQ(correctOverlap, repeatA); used = 0; repeatA = std::string(4096, 'a'); std::tie(req1_off, req1_size, req2_off, req2_size) = OverlapCopy(0, 4096, repeatA.data(), 1024, 1024, repeatB.data(), used); ASSERT_EQ(req1_off, 0); ASSERT_EQ(req1_size, 1024U); ASSERT_EQ(req2_off, 2048); ASSERT_EQ(req2_size, 2048U); ASSERT_EQ(used, 1024U); correctOverlap = std::string(1024, 'a') + std::string(1024, 'b') + std::string(2048, 'a'); ASSERT_EQ(correctOverlap, repeatA); used = 0; repeatA = std::string(4096, 'a'); std::tie(req1_off, req1_size, req2_off, req2_size) = OverlapCopy(1024, 4096, repeatA.data(), 0, 4096, repeatB.data(), used); ASSERT_EQ(req1_off, 4096); ASSERT_EQ(req1_size, 1024U); ASSERT_EQ(req2_off, -1); ASSERT_EQ(req2_size, 0U); ASSERT_EQ(used, 3072U); correctOverlap = std::string(3072, 'b') + std::string(1024, 'a'); ASSERT_EQ(correctOverlap, repeatA); used = 0; repeatA = std::string(4096, 'a'); std::tie(req1_off, req1_size, req2_off, req2_size) = OverlapCopy(4096, 4096, repeatA.data(), 0, 4096, repeatB.data(), used); ASSERT_EQ(req1_off, 4096); ASSERT_EQ(req1_size, 4096U); ASSERT_EQ(req2_off, -1); ASSERT_EQ(req2_size, 0U); ASSERT_EQ(used, 0U); correctOverlap = std::string(4096, 'a'); ASSERT_EQ(correctOverlap, repeatA); used = 0; repeatA = std::string(4096, 'a'); std::tie(req1_off, req1_size, req2_off, req2_size) = OverlapCopy(-1, 0, repeatA.data(), 0, 4096, repeatB.data(), used); ASSERT_EQ(req1_off, -1); ASSERT_EQ(req1_size, 0U); ASSERT_EQ(req2_off, -1); ASSERT_EQ(req2_size, 0U); ASSERT_EQ(used, 0U); correctOverlap = std::string(4096, 'a'); ASSERT_EQ(correctOverlap, repeatA); used = 0; repeatA = std::string(4096, 'a'); std::tie(req1_off, req1_size, req2_off, req2_size) = OverlapCopy(0, 4096, repeatA.data(), -1, 0, repeatB.data(), used); ASSERT_EQ(req1_off, 0); ASSERT_EQ(req1_size, 4096U); ASSERT_EQ(req2_off, -1); ASSERT_EQ(req2_size, 0U); ASSERT_EQ(used, 0U); correctOverlap = std::string(4096, 'a'); ASSERT_EQ(correctOverlap, repeatA); } TEST_F(FileSystemS3Fixture, StressGet) { // Upload a file auto name = "/test/write_stress.txt"; WritePattern(name, 100'000'000, 'a', 1'000'000, true); static const int workerThreads = 10; std::vector> threads; threads.resize(workerThreads); for (auto &tptr : threads) { tptr.reset(new std::thread([&] { RandomRead(name, 'a', 1'000'000, std::chrono::seconds(5)); })); } std::cout << "Launched all " << workerThreads << " threads" << std::endl; for (const auto &tptr : threads) { tptr->join(); } } class AmazonS3SendMultipartPartLowercase : public AmazonS3SendMultipartPart { protected: virtual void modifyResponse(std::string &resp) override { std::transform(resp.begin(), resp.end(), resp.begin(), [](unsigned char c) { return std::tolower(c); }); } }; TEST_F(FileSystemS3Fixture, Etag) { // Determine the S3 info. auto oss = GetFS(); std::string exposedPath, object; std::string path = "/test/etag_casesensitive_test"; ASSERT_EQ(oss->parsePath(path.c_str(), exposedPath, object), 0); auto ai = oss->getS3AccessInfo(exposedPath, object); ASSERT_NE(ai.get(), nullptr); ASSERT_NE(ai->getS3BucketName(), ""); ASSERT_NE(object, ""); // Start an upload. XrdSysLogger log; XrdSysError err(&log, "test"); AmazonS3CreateMultipartUpload startUpload(*ai, object, err); ASSERT_TRUE(startUpload.SendRequest()); std::string uploadId, errMsg; startUpload.Results(uploadId, errMsg); // Upload an etag. AmazonS3SendMultipartPart upload_part_request(*ai, object, err); std::string streaming_buffer = "aaaa"; ASSERT_TRUE(upload_part_request.SendRequest(streaming_buffer, std::to_string(1), uploadId, streaming_buffer.size(), true)); std::string etag; ASSERT_TRUE(upload_part_request.GetEtag(etag)); std::vector eTags; eTags.push_back(etag); // Finalize the object AmazonS3CompleteMultipartUpload complete_upload_request(*ai, object, err); ASSERT_TRUE(complete_upload_request.SendRequest(eTags, 2, uploadId)); } int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } xrootd-s3-http-0.4.1/test/xrdhttp-setup.sh000077500000000000000000000136641501635342300205200ustar00rootroot00000000000000#!/bin/sh TEST_NAME=$1 if [ -z "$BINARY_DIR" ]; then echo "\$BINARY_DIR environment variable is not set; cannot run test" exit 1 fi if [ ! -d "$BINARY_DIR" ]; then echo "$BINARY_DIR is not a directory; cannot run test" exit 1 fi if [ -z "$SOURCE_DIR" ]; then echo "\$SOURCE_DIR environment variable is not set; cannot run test" exit 1 fi if [ ! -d "$SOURCE_DIR" ]; then echo "\$SOURCE_DIR environment variable is not set; cannot run test" exit 1 fi echo "Setting up HTTP server for $TEST_NAME test" XROOTD_BIN="$XROOTD_BINDIR/xrootd" if [ -z "XROOTD_BIN" ]; then echo "xrootd binary not found; cannot run unit test" exit 1 fi mkdir -p "$BINARY_DIR/tests/$TEST_NAME" RUNDIR=$(mktemp -d -p "$BINARY_DIR/tests/$TEST_NAME" test_run.XXXXXXXX) if [ ! -d "$RUNDIR" ]; then echo "Failed to create test run directory; cannot run xrootd" exit 1 fi echo "Using $RUNDIR as the test run's home directory." cd "$RUNDIR" export XROOTD_CONFIGDIR="$RUNDIR/xrootd-config" mkdir -p "$XROOTD_CONFIGDIR/ca" echo > "$BINARY_DIR/tests/$TEST_NAME/server.log" # Create the TLS credentials for the test openssl genrsa -out "$XROOTD_CONFIGDIR/tlscakey.pem" 4096 >> "$BINARY_DIR/tests/$TEST_NAME/server.log" touch "$XROOTD_CONFIGDIR/ca/index.txt" echo '01' > "$XROOTD_CONFIGDIR/ca/serial.txt" cat > "$XROOTD_CONFIGDIR/tlsca.ini" <> "$BINARY_DIR/tests/$TEST_NAME/server.log" if [ "$?" -ne 0 ]; then echo "Failed to generate CA request" exit 1 fi # Create the host certificate request openssl genrsa -out "$XROOTD_CONFIGDIR/tls.key" 4096 >> "$BINARY_DIR/tests/$TEST_NAME/server.log" openssl req -new -key "$XROOTD_CONFIGDIR/tls.key" -config "$XROOTD_CONFIGDIR/tlsca.ini" -out "$XROOTD_CONFIGDIR/tls.csr" -outform PEM -subj "/CN=localhost" 0<&- >> "$BINARY_DIR/tests/$TEST_NAME/server.log" if [ "$?" -ne 0 ]; then echo "Failed to generate host certificate request" exit 1 fi openssl ca -config "$XROOTD_CONFIGDIR/tlsca.ini" -batch -policy signing_policy -extensions cert_extensions -out "$XROOTD_CONFIGDIR/tls.crt" -infiles "$XROOTD_CONFIGDIR/tls.csr" 0<&- 2>> "$BINARY_DIR/tests/$TEST_NAME/server.log" if [ "$?" -ne 0 ]; then echo "Failed to sign host certificate request" exit 1 fi # Create xrootd configuration and runtime directory structure XROOTD_EXPORTDIR="$RUNDIR/xrootd-export" mkdir -p "$XROOTD_EXPORTDIR" # XRootD has strict length limits on the admin path location. # Therefore, we also create a directory in /tmp. XROOTD_RUNDIR=$(mktemp -d -p /tmp xrootd_test.XXXXXXXX) export XROOTD_CONFIG="$XROOTD_CONFIGDIR/xrootd.cfg" cat > "$XROOTD_CONFIG" < $XROOTD_CONFIGDIR/authdb < "$XROOTD_EXPORTDIR/hello_world.txt" # Launch XRootD daemon. "$XROOTD_BIN" -c "$XROOTD_CONFIG" -l "$BINARY_DIR/tests/$TEST_NAME/server.log" 0<&- >>"$BINARY_DIR/tests/$TEST_NAME/server.log" 2>>"$BINARY_DIR/tests/$TEST_NAME/server.log" & XROOTD_PID=$! echo "xrootd daemon PID: $XROOTD_PID" echo "XRootD logs are available at $BINARY_DIR/tests/$TEST_NAME/server.log" # Build environment file for remainder of tests XROOTD_URL=$(grep "Xrd_ProtLoad: enabling port" "$BINARY_DIR/tests/$TEST_NAME/server.log" | grep 'for protocol XrdHttp' | awk '{print $7}') IDX=0 while [ -z "$XROOTD_URL" ]; do sleep 1 XROOTD_URL=$(grep "Xrd_ProtLoad: enabling port" "$BINARY_DIR/tests/$TEST_NAME/server.log" | grep 'for protocol XrdHttp' | awk '{print $7}') IDX=$(($IDX+1)) if ! kill -0 "$XROOTD_PID" 2>/dev/null; then echo "xrootd process (PID $XROOTD_PID) failed to start" >&2 exit 1 fi if [ $IDX -gt 1 ]; then echo "Waiting for xrootd to start ($IDX seconds so far) ..." fi if [ $IDX -eq 60 ]; then echo "xrootd failed to start - failing" exit 1 fi done XROOTD_URL="https://localhost:$XROOTD_URL/" echo "xrootd started at $XROOTD_URL" XROOTD_HTTPSERVER_CONFIG="$XROOTD_CONFIGDIR/xrootd-httpserver.cfg" cat > "$XROOTD_HTTPSERVER_CONFIG" < "$BINARY_DIR/tests/$TEST_NAME/setup.sh" < "$BINARY_DIR/tests/$TEST_NAME/client.log") CURL_EXIT=$? if [ $CURL_EXIT -ne 0 ]; then echo "Download of hello-world text failed" exit 1 fi if [ "$CONTENTS" != "Hello, World" ]; then echo "Downloaded hello-world text is incorrect: $CONTENTS" exit 1 fi echo "Running $TEST_NAME - missing object" HTTP_CODE=$(curl --cacert $X509_CA_FILE --output /dev/null -v --write-out '%{http_code}' "$XROOTD_URL/missing.txt" 2>> "$BINARY_DIR/tests/$TEST_NAME/client.log") if [ "$HTTP_CODE" -ne 404 ]; then echo "Expected HTTP code is 404; actual was $HTTP_CODE" exit 1 fi