pax_global_header00006660000000000000000000000064147112071550014515gustar00rootroot0000000000000052 comment=4a9a05728641674620818c6a78b01fce38e6f222 utf-8-validate-6.0.5/000077500000000000000000000000001471120715500142575ustar00rootroot00000000000000utf-8-validate-6.0.5/.github/000077500000000000000000000000001471120715500156175ustar00rootroot00000000000000utf-8-validate-6.0.5/.github/workflows/000077500000000000000000000000001471120715500176545ustar00rootroot00000000000000utf-8-validate-6.0.5/.github/workflows/ci.yml000066400000000000000000000055021471120715500207740ustar00rootroot00000000000000name: CI on: - push - pull_request permissions: {} jobs: test: runs-on: ${{ matrix.os }} strategy: matrix: arch: - x64 - x86 node: - 18 - 20 - 22 os: - macos-latest - ubuntu-latest - windows-latest exclude: - arch: x86 os: macos-latest - arch: x86 os: ubuntu-latest - arch: x86 node: 18 os: windows-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-node@v4 with: node-version: ${{ matrix.node }} architecture: ${{ matrix.arch }} - run: npm install - run: npm test build: if: startsWith(github.ref, 'refs/tags/') needs: test runs-on: ${{ matrix.os }} strategy: matrix: arch: - x64 - x86 os: - macos-latest - ubuntu-20.04 - windows-latest exclude: - arch: x86 os: macos-latest - arch: x86 os: ubuntu-20.04 steps: - uses: actions/checkout@v4 - uses: actions/setup-node@v4 with: node-version: 20 architecture: ${{ matrix.arch }} - run: npm install - run: npm run prebuild-darwin-x64+arm64 if: matrix.os == 'macos-latest' - run: | npm run prebuild npm run prebuild-linux-musl-x64 if: matrix.os == 'ubuntu-20.04' - run: npm run prebuild if: matrix.os == 'windows-latest' - uses: actions/upload-artifact@v4 with: name: ${{ matrix.os }}-${{ matrix.arch }} path: prebuilds retention-days: 1 release: needs: build permissions: contents: write # Needed for softprops/action-gh-release. runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/download-artifact@v4 with: path: prebuilds - run: echo "version=$(git describe --tags)" >> $GITHUB_OUTPUT id: get_version - run: tar -cvf "${{ steps.get_version.outputs.version }}-darwin-x64+arm64.tar" -C "prebuilds/macos-latest-x64" darwin-x64+arm64 - run: tar -cvf "${{ steps.get_version.outputs.version }}-linux-x64.tar" -C "prebuilds/ubuntu-20.04-x64" linux-x64 - run: tar -cvf "${{ steps.get_version.outputs.version }}-win32-ia32.tar" -C "prebuilds/windows-latest-x86" win32-ia32 - run: tar -cvf "${{ steps.get_version.outputs.version }}-win32-x64.tar" -C "prebuilds/windows-latest-x64" win32-x64 - uses: softprops/action-gh-release@v2 with: files: ${{ steps.get_version.outputs.version }}-*.tar token: ${{ secrets.GITHUB_TOKEN }} utf-8-validate-6.0.5/.gitignore000066400000000000000000000000661471120715500162510ustar00rootroot00000000000000deps/is_utf8/.github/ node_modules/ prebuilds/ build/ utf-8-validate-6.0.5/.npmrc000066400000000000000000000000231471120715500153720ustar00rootroot00000000000000package-lock=false utf-8-validate-6.0.5/LICENSE000066400000000000000000000047321471120715500152720ustar00rootroot00000000000000utf-8-validate is licensed for use as follows: """ Copyright (c) 2011 Einar Otto Stangvik Copyright (c) 2013 Arnout Kazemier and contributors Copyright (c) 2016 Luigi Pinca and contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ This license applies to all parts of utf-8-validate that are not externally maintained libraries. The externally maintained is_utf8 library used by utf-8-validate, located at deps/is_utf8, is licensed as follows: """ Copyright 2022 The is_utf8 authors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ utf-8-validate-6.0.5/README.md000066400000000000000000000024261471120715500155420ustar00rootroot00000000000000# utf-8-validate [![Version npm](https://img.shields.io/npm/v/utf-8-validate.svg?logo=npm)](https://www.npmjs.com/package/utf-8-validate) [![Linux/macOS/Windows Build](https://img.shields.io/github/actions/workflow/status/websockets/utf-8-validate/ci.yml?branch=master&label=build&logo=github)](https://github.com/websockets/utf-8-validate/actions?query=workflow%3ACI+branch%3Amaster) Check if a buffer contains valid UTF-8 encoded text. ## Installation ``` npm install utf-8-validate --save-optional ``` The `--save-optional` flag tells npm to save the package in your package.json under the [`optionalDependencies`](https://docs.npmjs.com/files/package.json#optionaldependencies) key. ## API The module exports a single function that takes one argument. To maximize performance, the argument is not validated. It is the caller's responsibility to ensure that it is correct. ### `isValidUTF8(buffer)` Checks whether a buffer contains valid UTF-8. #### Arguments - `buffer` - The buffer to check. #### Return value `true` if the buffer contains only correct UTF-8, else `false`. #### Example ```js 'use strict'; const isValidUTF8 = require('utf-8-validate'); const buf = Buffer.from([0xf0, 0x90, 0x80, 0x80]); console.log(isValidUTF8(buf)); // => true ``` ## License [MIT](LICENSE) utf-8-validate-6.0.5/binding.gyp000066400000000000000000000016201471120715500164110ustar00rootroot00000000000000{ 'variables': { 'openssl_fips': '' }, 'targets': [ { 'target_name': 'validation', 'sources': [ 'src/validation.cc', 'deps/is_utf8/src/is_utf8.cpp' ], 'cflags_cc': ['-std=gnu++11'], 'conditions': [ ["OS=='mac'", { 'variables': { 'clang_version': '&1 | perl -ne \'print $1 if /clang version ([0-9]+(\\.[0-9]+){2,})/\')' }, 'xcode_settings': { 'MACOSX_DEPLOYMENT_TARGET': '10.7' }, 'conditions': [ # Use Perl v-strings to compare versions. ['clang_version and #include #include #include #include #include #include #include uint64_t nano() { return std::chrono::duration_cast<::std::chrono::nanoseconds>( std::chrono::steady_clock::now().time_since_epoch()) .count(); } #ifdef _MSC_VER #define never_inline __declspec(noinline) #else #define never_inline __attribute__((noinline)) #endif // generate a string having at least length N // can exceed by up to 3 chars, returns the actual length size_t populate_utf8(char *data, size_t N) { size_t i = 0; for (; i < N;) { int w = rand() & 0xFF; if (w < 0x80) { data[i++] = 0x20; // w; } else if (w < 0xE0) { data[i++] = 0xC2 + rand() % (0xDF - 0xC2 + 1); data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); } else if (w == 0xE0) { data[i++] = w; data[i++] = 0xA0 + rand() % (0xBF - 0xA0 + 1); data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); } else if (w <= 0xEC) { data[i++] = w; data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); } else if (w == 0xED) { data[i++] = w; data[i++] = 0x80 + rand() % (0x9F - 0x80 + 1); data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); } else if (w <= 0xEF) { data[i++] = w; data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); } else if (w < 0xF0) { data[i++] = 0xF1 + rand() % (0xF3 - 0xF1 + 1); data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); } else if (w == 0xF0) { data[i++] = w; data[i++] = 0x90 + rand() % (0xBF - 0x90 + 1); data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); } else if (w <= 0xF3) { data[i++] = 0xF1 + rand() % (0xF3 - 0xF1 + 1); data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); } else if (w == 0xF4) { data[i++] = w; data[i++] = 0x80 + rand() % (0x8F - 0x80 + 1); data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); data[i++] = 0x80 + rand() % (0xBF - 0x80 + 1); } } data[i] = '\0'; return i; } // copied in part from Guava static never_inline bool basic_validate_utf8(const char *b, size_t length) { const unsigned char *bytes = (const unsigned char *)b; for (size_t index = 0;;) { unsigned char byte1; do { // fast ASCII Path if (index >= length) { return true; } byte1 = bytes[index++]; } while (byte1 < 0x80); if (byte1 < 0xE0) { // Two-byte form. if (index == length) { return false; } if (byte1 < 0xC2 || bytes[index++] > 0xBF) { return false; } } else if (byte1 < 0xF0) { // Three-byte form. if (index + 1 >= length) { return false; } unsigned char byte2 = bytes[index++]; if (byte2 > 0xBF // Overlong? 5 most significant bits must not all be zero. || (byte1 == 0xE0 && byte2 < 0xA0) // Check for illegal surrogate codepoints. || (byte1 == 0xED && 0xA0 <= byte2) // Third byte trailing-byte test. || bytes[index++] > 0xBF) { return false; } } else { // Four-byte form. if (index + 2 >= length) { return false; } int byte2 = bytes[index++]; if (byte2 > 0xBF // Check that 1 <= plane <= 16. Tricky optimized form of: // if (byte1 > (byte) 0xF4 // || byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 // || byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F) || (((byte1 << 28) + (byte2 - 0x90)) >> 30) != 0 // Third byte trailing-byte test || bytes[index++] > 0xBF // Fourth byte trailing-byte test || bytes[index++] > 0xBF) { return false; } } } } bool zerobuffer_bench(size_t N) { printf("zero buffer \n"); printf("string size = %zu \n", N); char *input = new char[N]{}; volatile bool isgood{true}; { uint64_t start = nano(); uint64_t finish = start; size_t count{0}; uint64_t threshold = 500000000; for (; finish - start < threshold;) { count++; isgood &= basic_validate_utf8(input, N); finish = nano(); } double t = (N * count) / double(finish - start); printf("basic_validate_utf8 %f GB/s\n", t); } { uint64_t start = nano(); uint64_t finish = start; size_t count{0}; uint64_t threshold = 500000000; for (; finish - start < threshold;) { count++; isgood &= simdutf::validate_utf8(input, N); finish = nano(); } double t = (N * count) / double(finish - start); printf("simdutf %f GB/s\n", t); } { uint64_t start = nano(); uint64_t finish = start; size_t count{0}; uint64_t threshold = 500000000; for (; finish - start < threshold;) { count++; isgood &= is_utf8(input, N); finish = nano(); } double t = (N * count) / double(finish - start); printf("is_utf8 %f GB/s\n", t); } delete[] input; printf("\n"); return isgood; } bool bench(size_t N) { printf("random UTF-8\n"); printf("string size = %zu \n", N); char *input = new char[N]; populate_utf8(input, N); volatile bool isgood{true}; { uint64_t start = nano(); uint64_t finish = start; size_t count{0}; uint64_t threshold = 500000000; for (; finish - start < threshold;) { count++; isgood &= basic_validate_utf8(input, N); finish = nano(); } double t = (N * count) / double(finish - start); printf("basic_validate_utf8 %f GB/s\n", t); } { uint64_t start = nano(); uint64_t finish = start; size_t count{0}; uint64_t threshold = 500000000; for (; finish - start < threshold;) { count++; isgood &= simdutf::validate_utf8(input, N); finish = nano(); } double t = (N * count) / double(finish - start); printf("simdutf %f GB/s\n", t); } { uint64_t start = nano(); uint64_t finish = start; size_t count{0}; uint64_t threshold = 500000000; for (; finish - start < threshold;) { count++; isgood &= is_utf8(input, N); finish = nano(); } double t = (N * count) / double(finish - start); printf("is_utf8 %f GB/s\n", t); } delete[] input; printf("\n"); return isgood; } int main() { return (bench(40096) & bench(100000) & bench(50000)) & (zerobuffer_bench(40096) & zerobuffer_bench(100000) & zerobuffer_bench(50000)) ? EXIT_SUCCESS : EXIT_FAILURE; }utf-8-validate-6.0.5/deps/is_utf8/cmake/000077500000000000000000000000001471120715500176535ustar00rootroot00000000000000utf-8-validate-6.0.5/deps/is_utf8/cmake/add_cpp_test.cmake000066400000000000000000000041451471120715500233120ustar00rootroot00000000000000# Helper so we don't have to repeat ourselves so much # Usage: add_cpp_test(testname [COMPILE_ONLY] [SOURCES a.cpp b.cpp ...] [LABELS acceptance per_implementation ...]) # SOURCES defaults to testname.cpp if not specified. function(add_cpp_test TEST_NAME) # Parse arguments cmake_parse_arguments(PARSE_ARGV 1 ARGS "COMPILE_ONLY;LIBRARY;WILL_FAIL" "" "SOURCES;LABELS;DEPENDENCY_OF") if (NOT ARGS_SOURCES) list(APPEND ARGS_SOURCES ${TEST_NAME}.cpp) endif() if (ARGS_COMPILE_ONLY) list(APPEND ${ARGS_LABELS} compile_only) endif() if (IS_UTF8_SANITIZE) add_compile_options(-fsanitize=address -fno-omit-frame-pointer -fno-sanitize-recover=all) add_compile_definitions(ASAN_OPTIONS=detect_leaks=1) endif() # Add the compile target if (ARGS_LIBRARY) add_library(${TEST_NAME} STATIC ${ARGS_SOURCES}) else(ARGS_LIBRARY) add_executable(${TEST_NAME} ${ARGS_SOURCES}) endif(ARGS_LIBRARY) # Add test if (ARGS_COMPILE_ONLY OR ARGS_LIBRARY) add_test( NAME ${TEST_NAME} COMMAND ${CMAKE_COMMAND} --build . --target ${TEST_NAME} --config $ WORKING_DIRECTORY ${PROJECT_BINARY_DIR} ) set_target_properties(${TEST_NAME} PROPERTIES EXCLUDE_FROM_ALL TRUE EXCLUDE_FROM_DEFAULT_BUILD TRUE) else() add_test(${TEST_NAME} ${TEST_NAME}) # Add to