diff --git a/.cmake-format b/.cmake-format index 57ad821ef8..ed478467a3 100644 --- a/.cmake-format +++ b/.cmake-format @@ -9,31 +9,33 @@ with section("parse"): "pargs": 0, "flags": [], 'kwargs': { - 'NAME': '*', - 'SRCS': '*', - 'LIBS': '*' , - 'LIBDIRS': '*'}}, + 'NAME': '*', + 'SRCS': '*', + 'LIBS': '*' , + 'LIBDIRS': '*', + 'TESTARGS': '*'}}, 'add_umf_executable': { "pargs": 0, "flags": [], 'kwargs': { - 'NAME': '*', - 'SRCS': '*', + 'NAME': '*', + 'SRCS': '*', 'LIBS': '*'}}, 'add_umf_test': { "pargs": 0, "flags": [], 'kwargs': { - 'NAME': '*', - 'SRCS': '*', - 'LIBS': '*'}}, + 'NAME': '*', + 'SRCS': '*', + 'LIBS': '*', + 'ENVS': '*'}}, 'add_umf_library': { "pargs": 0, "flags": [], 'kwargs': { - 'NAME': '*', - 'TYPE': '*', - 'SRCS': '*', + 'NAME': '*', + 'TYPE': '*', + 'SRCS': '*', 'LIBS': '*', 'LINUX_MAP_FILE': '*', 'WINDOWS_DEF_FILE': '*'}}, @@ -43,7 +45,20 @@ with section("parse"): 'kwargs': { 'LABELS': '*', 'PASS_REGULAR_EXPRESSION': '*'}}, - } + 'build_umf_test': { + "pargs": 0, + "flags": [], + 'kwargs': { + 'NAME': '*', + 'SRCS': '*', + 'LIBS': '*' }}, + 'add_umf_ipc_test': { + "pargs": 0, + "flags": [], + 'kwargs': { + 'TEST': '*', + 'SRC_DIR': '*'}}, + } # Override configurations per-command where available override_spec = {} diff --git a/.github/docker/ubuntu-20.04.Dockerfile b/.github/docker/ubuntu-20.04.Dockerfile index 069deeac93..a6a45a8c1b 100644 --- a/.github/docker/ubuntu-20.04.Dockerfile +++ b/.github/docker/ubuntu-20.04.Dockerfile @@ -24,7 +24,6 @@ ARG BASE_DEPS="\ # UMF's dependencies ARG UMF_DEPS="\ - libjemalloc-dev \ libhwloc-dev \ libtbb-dev" @@ -34,6 +33,7 @@ ARG TEST_DEPS="\ # Miscellaneous for our builds/CI (optional) ARG MISC_DEPS="\ + automake \ clang \ g++-7 \ python3-pip \ diff --git a/.github/docker/ubuntu-22.04.Dockerfile b/.github/docker/ubuntu-22.04.Dockerfile index 08d546083d..75c71c526c 100644 --- a/.github/docker/ubuntu-22.04.Dockerfile +++ b/.github/docker/ubuntu-22.04.Dockerfile @@ -24,7 +24,6 @@ ARG BASE_DEPS="\ # UMF's dependencies ARG UMF_DEPS="\ - libjemalloc-dev \ libhwloc-dev \ libtbb-dev" @@ -34,6 +33,7 @@ ARG TEST_DEPS="\ # Miscellaneous for our builds/CI (optional) ARG MISC_DEPS="\ + automake \ clang \ python3-pip \ sudo \ diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 89e8148565..35a7f05b65 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -36,3 +36,4 @@ Before checking all the boxes please mark the PR as draft. - [ ] All newly added source files have a license - [ ] All newly added source files are referenced in CMake files - [ ] Logger (with debug/info/... messages) is used +- [ ] All API changes are reflected in docs and def/map files, and are tested diff --git a/.github/scripts/get_system_info.sh b/.github/scripts/get_system_info.sh index 595d5e31ae..81c54ce980 100755 --- a/.github/scripts/get_system_info.sh +++ b/.github/scripts/get_system_info.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -15,7 +15,7 @@ function check_L0_version { fi if command -v zypper &> /dev/null; then - zypper se level-zero && return + zypper -n se level-zero || true fi echo "level-zero not installed" @@ -26,9 +26,9 @@ function system_info { cat /etc/os-release | grep -oP "PRETTY_NAME=\K.*" cat /proc/version - # echo "**********SYCL-LS**********" - # source /opt/intel/oneapi/setvars.sh - # sycl-ls + echo "**********SYCL-LS**********" + source /opt/intel/oneapi/setvars.sh + sycl-ls echo "**********numactl topology**********" numactl -H @@ -36,22 +36,22 @@ function system_info { echo "**********VGA info**********" lspci | grep -i VGA - # echo "**********CUDA Version**********" - # if command -v nvidia-smi &> /dev/null; then - # nvidia-smi - # else - # echo "CUDA not installed" - # fi + echo "**********CUDA Version**********" + if command -v nvidia-smi &> /dev/null; then + nvidia-smi + else + echo "CUDA not installed" + fi echo "**********L0 Version**********" check_L0_version - # echo "**********ROCm Version**********" - # if command -v rocminfo &> /dev/null; then - # rocminfo - # else - # echo "ROCm not installed" - # fi + echo "**********ROCm Version**********" + if command -v rocminfo &> /dev/null; then + rocminfo + else + echo "ROCm not installed" + fi echo "******OpenCL*******" # The driver version of OpenCL Graphics is the compute-runtime version @@ -67,11 +67,15 @@ function system_info { cat /proc/meminfo echo "**********env variables**********" - echo "PATH=${PATH}" - echo "CPATH=${CPATH}" - echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" - echo "LIBRARY_PATH=${LIBRARY_PATH}" - echo "PKG_CONFIG_PATH=${PKG_CONFIG_PATH}" + echo "PATH=$PATH" + echo + echo "CPATH=$CPATH" + echo + echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" + echo + echo "LIBRARY_PATH=$LIBRARY_PATH" + echo + echo "PKG_CONFIG_PATH=$PKG_CONFIG_PATH" echo echo "******build tools versions*******" diff --git a/.github/scripts/run-codespell.py b/.github/scripts/run-codespell.py new file mode 100644 index 0000000000..b87bf37bd5 --- /dev/null +++ b/.github/scripts/run-codespell.py @@ -0,0 +1,40 @@ +""" + Copyright (C) 2024 Intel Corporation + + Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +""" + +import subprocess # nosec B404 +import logging +import sys + +logging.basicConfig( + level=logging.INFO, format="[%(levelname)s]: [%(asctime)s] %(message)s" +) + + +def codespell_scan(): + try: + codespell_result = subprocess.run( # nosec + [ + "codespell", + "-H", + "--quiet-level=3", + "--skip=./.git,./.venv,./.github/workflows/.spellcheck-conf.toml", + ], + text=True, + stdout=subprocess.PIPE, + ) + if codespell_result.returncode != 0: + for line in codespell_result.stdout.splitlines(): + logging.error(line.strip()) + sys.exit(1) + else: + logging.info("No spelling errors found") + except subprocess.CalledProcessError as ex: + logging.error(ex) + sys.exit(1) + + +codespell_scan() diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml new file mode 100644 index 0000000000..7eb3c7b06a --- /dev/null +++ b/.github/workflows/benchmarks.yml @@ -0,0 +1,31 @@ +name: Compute Benchmarks + +on: + workflow_dispatch: + inputs: + pr_no: + description: PR number (if 0, it'll run on the main) + type: number + bench_script_params: + description: Parameters passed to script executing benchmark + type: string + required: false + default: '' + upload_report: + description: 'Upload HTML report' + type: boolean + required: false + default: false + +permissions: + contents: read + pull-requests: write + +jobs: + manual: + name: Compute Benchmarks + uses: ./.github/workflows/reusable_benchmarks.yml + with: + pr_no: ${{ inputs.pr_no }} + bench_script_params: ${{ inputs.bench_script_params }} + upload_report: ${{ inputs.upload_report }} diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index dfa03fc4f4..ebae6086a7 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -31,7 +31,7 @@ jobs: - name: Install apt packages run: | sudo apt-get update - sudo apt-get install -y cmake hwloc libhwloc-dev libjemalloc-dev libnuma-dev libtbb-dev + sudo apt-get install -y cmake hwloc libhwloc-dev libnuma-dev libtbb-dev - name: Download Coverity run: | @@ -49,7 +49,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=OFF -DUMF_TESTS_FAIL_ON_SKIP=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - name: Build diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 3d9bfc29b4..0918a36997 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -4,6 +4,7 @@ name: GitHubPages on: push: branches: ["main"] + workflow_dispatch: # Cancel previous in-progress workflow, only the latest run is relevant concurrency: @@ -14,44 +15,14 @@ permissions: contents: read jobs: - build: - name: Build docs - runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }} - - steps: - - name: Checkout repository - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - with: - fetch-depth: 0 - - - name: Install doxygen - run: | - sudo apt-get update - sudo apt-get install -y doxygen - - # Latest distros do not allow global pip installation - - name: Install Python requirements in venv - run: | - python3 -m venv .venv - . .venv/bin/activate - echo "$PATH" >> $GITHUB_PATH - python3 -m pip install -r third_party/requirements.txt - - - name: Setup PATH for python - run: echo "$HOME/.local/bin" >> $GITHUB_PATH - - - name: Build the documentation - working-directory: scripts - run: python3 generate_docs.py - - - name: Upload artifact - uses: actions/upload-pages-artifact@0252fc4ba7626f0298f0cf00902a25c6afc77fa8 # v3.0.0 - with: - path: docs/html + DocsBuild: + uses: ./.github/workflows/reusable_docs_build.yml + with: + upload: true - deploy: + DocsDeploy: name: Deploy docs to GitHub Pages - needs: build + needs: DocsBuild permissions: pages: write diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 281ae00615..f209cf2c5f 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -1,15 +1,19 @@ # Various non-standard tests, requiring e.g. longer run name: Nightly -# This job is run at 00:00 UTC every day or on demand. +# This job is run at 04:00 UTC every day or on demand. on: workflow_dispatch: schedule: - - cron: '0 0 * * *' + - cron: '0 4 * * *' permissions: contents: read +env: + BUILD_DIR : "${{github.workspace}}/build" + INSTALL_DIR: "${{github.workspace}}/build/install" + jobs: fuzz-test: name: Fuzz test @@ -67,7 +71,7 @@ jobs: - name: Install apt packages run: | sudo apt-get update - sudo apt-get install -y cmake hwloc libhwloc-dev libjemalloc-dev libnuma-dev libtbb-dev valgrind + sudo apt-get install -y cmake hwloc libhwloc-dev libnuma-dev libtbb-dev valgrind - name: Configure CMake run: > @@ -76,7 +80,6 @@ jobs: -DCMAKE_BUILD_TYPE=Debug -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF -DUMF_BUILD_CUDA_PROVIDER=OFF @@ -89,93 +92,231 @@ jobs: - name: Run tests under valgrind run: ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{github.workspace}}/build ${{matrix.tool}} - # TODO fix #843 - #icx: - # name: ICX - # env: - # VCPKG_PATH: "${{github.workspace}}/build/vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/build/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/build/vcpkg/packages/jemalloc_x64-windows" - # BUILD_DIR : "${{github.workspace}}/build" - # strategy: - # matrix: - # os: ['windows-2019', 'windows-2022'] - # build_type: [Debug] - # compiler: [{c: icx, cxx: icx}] - # shared_library: ['ON', 'OFF'] - # include: - # - os: windows-2022 - # build_type: Release - # compiler: {c: icx, cxx: icx} - # shared_library: 'ON' - # - # runs-on: ${{matrix.os}} - # - # steps: - # - name: Checkout - # uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - # with: - # fetch-depth: 0 - # - # - name: Initialize vcpkg - # uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 - # with: - # vcpkgGitCommitId: 3dd44b931481d7a8e9ba412621fa810232b66289 - # vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg - # vcpkgJsonGlob: '**/vcpkg.json' - # - # - name: Install dependencies - # run: vcpkg install - # - # - name: Install Ninja - # uses: seanmiddleditch/gha-setup-ninja@96bed6edff20d1dd61ecff9b75cc519d516e6401 # v5 - # - # - name: Download icx compiler - # env: - # # Link source: https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compiler-download.html - # CMPLR_LINK: "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/15a35578-2f9a-4f39-804b-3906e0a5f8fc/w_dpcpp-cpp-compiler_p_2024.2.1.83_offline.exe" - # run: | - # Invoke-WebRequest -Uri "${{ env.CMPLR_LINK }}" -OutFile compiler_install.exe - # - # - name: Install icx compiler - # shell: cmd - # run: | - # start /b /wait .\compiler_install.exe -s -x -f extracted --log extract.log - # extracted\bootstrapper.exe -s --action install --eula=accept -p=NEED_VS2017_INTEGRATION=0 ^ - # -p=NEED_VS2019_INTEGRATION=0 -p=NEED_VS2022_INTEGRATION=0 --log-dir=. - # - # - name: Configure build - # shell: cmd - # run: | - # call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" - # call "C:\Program Files (x86)\Intel\oneAPI\setvars-vcvarsall.bat" - # cmake ^ - # -B ${{env.BUILD_DIR}} ^ - # -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" ^ - # -DCMAKE_C_COMPILER=${{matrix.compiler.c}} ^ - # -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} ^ - # -G Ninja ^ - # -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} ^ - # -DUMF_FORMAT_CODE_STYLE=OFF ^ - # -DUMF_DEVELOPER_MODE=ON ^ - # -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON ^ - # -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON ^ - # -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON ^ - # -DUMF_BUILD_CUDA_PROVIDER=ON ^ - # -DUMF_TESTS_FAIL_ON_SKIP=ON - # - # - name: Build UMF - # shell: cmd - # run: | - # call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" - # call "C:\Program Files (x86)\Intel\oneAPI\setvars-vcvarsall.bat" - # cmake --build ${{env.BUILD_DIR}} --config ${{matrix.build_type}} -j %NUMBER_OF_PROCESSORS% - # - # - name: Run tests - # shell: cmd - # working-directory: ${{env.BUILD_DIR}} - # run: | - # call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" - # call "C:\Program Files (x86)\Intel\oneAPI\setvars-vcvarsall.bat" - # ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test + Windows-generators: + name: Windows ${{matrix.generator}} generator + strategy: + matrix: + build_type: [Debug, Release] + compiler: [{c: cl, cxx: cl}] + shared_library: ['ON', 'OFF'] + static_hwloc: ['ON', 'OFF'] + generator: ['Ninja', 'NMake Makefiles'] + umfd_lib: ['ON', 'OFF'] + + runs-on: windows-latest + + steps: + - name: Checkout + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + + - name: Set VCPKG_PATH with hwloc + if: matrix.static_hwloc == 'OFF' + run: echo "VCPKG_PATH=${{github.workspace}}/build/vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/build/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/build/vcpkg/packages/jemalloc_x64-windows" >> $env:GITHUB_ENV + + - name: Set VCPKG_PATH without hwloc + if: matrix.static_hwloc == 'ON' + run: echo "VCPKG_PATH=${{github.workspace}}/build/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/build/vcpkg/packages/jemalloc_x64-windows" >> $env:GITHUB_ENV + + - name: Initialize vcpkg + uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 + env: + VCPKG_PATH: ${{env.VCPKG_PATH}} + with: + vcpkgGitCommitId: ea2a964f9303270322cf3f2d51c265ba146c422d # 1.04.2025 + vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg + vcpkgJsonGlob: '**/vcpkg.json' + + - name: Install dependencies + run: vcpkg install --triplet x64-windows + + - name: Install Ninja + if: matrix.generator == 'Ninja' + uses: seanmiddleditch/gha-setup-ninja@96bed6edff20d1dd61ecff9b75cc519d516e6401 # v5 + + - name: Configure MSVC environment + uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0 + + - name: Configure build + run: > + cmake + -B ${{env.BUILD_DIR}} + -DCMAKE_BUILD_TYPE=${{matrix.build_type}} + -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" + -DCMAKE_C_COMPILER=${{matrix.compiler.c}} + -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} + -G "${{matrix.generator}}" + -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} + -DUMF_LINK_HWLOC_STATICALLY=${{matrix.static_hwloc}} + -DUMF_FORMAT_CODE_STYLE=OFF + -DUMF_DEVELOPER_MODE=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON + -DUMF_TESTS_FAIL_ON_SKIP=ON + ${{ matrix.umfd_lib == 'ON' && '-DUMF_USE_DEBUG_POSTFIX=ON' || '' }} + + - name: Build UMF + shell: cmd + run: cmake --build ${{env.BUILD_DIR}} --config ${{matrix.build_type}} -j %NUMBER_OF_PROCESSORS% + + - name: Run tests + shell: cmd + working-directory: ${{env.BUILD_DIR}} + run: ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test + + - name: Get UMF version + run: | + $version = (git describe --tags --abbrev=0 | Select-String -Pattern '\d+\.\d+\.\d+').Matches.Value + echo "UMF_VERSION=$version" >> $env:GITHUB_ENV + shell: pwsh + + - name: Test UMF installation and uninstallation + # The '--shared-library' parameter is added to the installation test when the UMF is built as a shared library + # The '--umfd-lib' parameter is added when the UMF is built with the umfd library + run: > + python3 ${{github.workspace}}/test/test_installation.py + --build-dir ${{env.BUILD_DIR}} + --install-dir ${{env.INSTALL_DIR}} + --build-type ${{matrix.build_type}} + --umf-version ${{env.UMF_VERSION}} + ${{ matrix.shared_library == 'ON' && '--proxy --shared-library' || '' }} + ${{ matrix.umfd_lib == 'ON' && '--umfd-lib' || ''}} + ${{ matrix.static_hwloc == 'ON' && '--hwloc' || '' }} + + icx: + name: ICX + env: + VCPKG_PATH: "${{github.workspace}}/build/vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/build/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/build/vcpkg/packages/jemalloc_x64-windows" + strategy: + matrix: + os: ['windows-2019', 'windows-2022'] + build_type: [Debug] + compiler: [{c: icx, cxx: icx}] + shared_library: ['ON', 'OFF'] + include: + - os: windows-2022 + build_type: Release + compiler: {c: icx, cxx: icx} + shared_library: 'ON' + + runs-on: ${{matrix.os}} + + steps: + - name: Checkout + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + + - name: Initialize vcpkg + uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 + with: + vcpkgGitCommitId: ea2a964f9303270322cf3f2d51c265ba146c422d # 1.04.2025 + vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg + vcpkgJsonGlob: '**/vcpkg.json' + + - name: Install dependencies + run: vcpkg install --triplet x64-windows + + - name: Install Ninja + uses: seanmiddleditch/gha-setup-ninja@96bed6edff20d1dd61ecff9b75cc519d516e6401 # v5 + + - name: Download icx compiler + env: + # Link source: https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compiler-download.html + CMPLR_LINK: "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/15a35578-2f9a-4f39-804b-3906e0a5f8fc/w_dpcpp-cpp-compiler_p_2024.2.1.83_offline.exe" + run: | + Invoke-WebRequest -Uri "${{ env.CMPLR_LINK }}" -OutFile compiler_install.exe + + - name: Install icx compiler + shell: cmd + run: | + start /b /wait .\compiler_install.exe -s -x -f extracted --log extract.log + extracted\bootstrapper.exe -s --action install --eula=accept -p=NEED_VS2017_INTEGRATION=0 ^ + -p=NEED_VS2019_INTEGRATION=0 -p=NEED_VS2022_INTEGRATION=0 --log-dir=. + + - name: Configure build + shell: cmd + run: | + call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" + call "C:\Program Files (x86)\Intel\oneAPI\setvars-vcvarsall.bat" + cmake ^ + -B ${{env.BUILD_DIR}} ^ + -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" ^ + -DCMAKE_C_COMPILER=${{matrix.compiler.c}} ^ + -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} ^ + -G Ninja ^ + -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} ^ + -DUMF_FORMAT_CODE_STYLE=OFF ^ + -DUMF_DEVELOPER_MODE=ON ^ + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON ^ + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON ^ + -DUMF_BUILD_CUDA_PROVIDER=ON ^ + -DUMF_TESTS_FAIL_ON_SKIP=ON + + - name: Build UMF + shell: cmd + run: | + call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" + call "C:\Program Files (x86)\Intel\oneAPI\setvars-vcvarsall.bat" + cmake --build ${{env.BUILD_DIR}} --config ${{matrix.build_type}} -j %NUMBER_OF_PROCESSORS% + + - name: Run tests + shell: cmd + working-directory: ${{env.BUILD_DIR}} + run: | + call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" + call "C:\Program Files (x86)\Intel\oneAPI\setvars-vcvarsall.bat" + ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test + + hwloc-fallback: + # Scenarios where UMF_LINK_HWLOC_STATICALLY is set to OFF and hwloc is not installed in the system + # The hwloc library is fetched implicitly + name: "Fallback to static hwloc build" + strategy: + matrix: + include: + - os: 'ubuntu-latest' + build_type: Release + number_of_processors: '$(nproc)' + - os: 'windows-latest' + build_type: Release + number_of_processors: '$Env:NUMBER_OF_PROCESSORS' + + runs-on: ${{matrix.os}} + + steps: + - name: Install dependencies + if: matrix.os == 'ubuntu-latest' + run: sudo apt-get install -y libnuma-dev + + - name: Checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + fetch-depth: 0 + + - name: Configure build + run: > + cmake + -B ${{env.BUILD_DIR}} + -DCMAKE_BUILD_TYPE=${{matrix.build_type}} + -DUMF_BUILD_SHARED_LIBRARY=ON + -DUMF_BUILD_EXAMPLES=OFF + -DUMF_DEVELOPER_MODE=ON + -DUMF_LINK_HWLOC_STATICALLY=OFF + -DUMF_TESTS_FAIL_ON_SKIP=ON + + - name: Build UMF + run: > + cmake + --build ${{env.BUILD_DIR}} + --config ${{matrix.build_type}} + -j ${{matrix.number_of_processors}} + + - name: Run tests + working-directory: ${{env.BUILD_DIR}} + run: ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test L0: uses: ./.github/workflows/reusable_gpu.yml @@ -194,3 +335,13 @@ jobs: # Beside the 2 LTS Ubuntu, we also test this on the latest Ubuntu - to be updated # every 6 months, so we verify the latest version of packages (compilers, etc.). os: "['ubuntu-22.04', 'ubuntu-24.04', 'ubuntu-24.10']" + + Benchmarks: + uses: ./.github/workflows/reusable_benchmarks.yml + permissions: + contents: read + pull-requests: write + with: + pr_no: '0' + bench_script_params: '--save baseline' + upload_report: true diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml deleted file mode 100644 index 6057df5f0a..0000000000 --- a/.github/workflows/performance.yml +++ /dev/null @@ -1,115 +0,0 @@ -name: Performance - -on: - # Can be triggered via manual "dispatch" (from workflow view in GitHub Actions tab) - workflow_dispatch: - inputs: - pr_no: - description: PR number (if 0, it'll run on the main) - type: number - required: true - -permissions: - contents: read - pull-requests: write - -env: - BUILD_DIR : "${{github.workspace}}/build" - -jobs: - perf-l0: - name: Build UMF and run performance tests - runs-on: "L0_PERF" - - steps: - # Workspace on self-hosted runners is not cleaned automatically. - # We have to delete the files created outside of using actions. - - name: Cleanup self-hosted workspace - if: always() - run: | - ls -la ./ - rm -rf ./* || true - - - name: Add comment to PR - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 - if: ${{ always() && inputs.pr_no != 0 }} - with: - script: | - const pr_no = '${{ inputs.pr_no }}'; - const provider = 'LEVEL_ZERO'; - const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}'; - const body = `Performance workflow for ${provider}_PROVIDER run:\n${url}`; - - github.rest.issues.createComment({ - issue_number: pr_no, - owner: context.repo.owner, - repo: context.repo.repo, - body: body - }) - - - name: Checkout UMF - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - - name: Get information about platform - run: .github/scripts/get_system_info.sh - - # We need to fetch special ref for proper PR's merge commit. Note, this ref may be absent if the PR is already merged. - - name: Fetch PR's merge commit - if: ${{ inputs.pr_no != 0 }} - working-directory: ${{github.workspace}} - env: - PR_NO: ${{ inputs.pr_no }} - run: | - git fetch -- https://github.com/${{github.repository}} +refs/pull/${PR_NO}/*:refs/remotes/origin/pr/${PR_NO}/* - git checkout origin/pr/${PR_NO}/merge - git rev-parse origin/pr/${PR_NO}/merge - - - name: Configure build - run: > - cmake - -B ${{env.BUILD_DIR}} - -DCMAKE_BUILD_TYPE=Release - -DUMF_BUILD_SHARED_LIBRARY=ON - -DUMF_BUILD_BENCHMARKS=ON - -DUMF_BUILD_BENCHMARKS_MT=ON - -DUMF_BUILD_TESTS=OFF - -DUMF_FORMAT_CODE_STYLE=OFF - -DUMF_DEVELOPER_MODE=OFF - -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON - -DUMF_BUILD_CUDA_PROVIDER=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON - -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - - - name: Build - run: cmake --build ${{env.BUILD_DIR}} -j $(nproc) - - - name: Run benchmarks - working-directory: ${{env.BUILD_DIR}} - id: benchmarks - run: numactl -N 1 ctest -V --test-dir benchmark -C Release - - - name: Add comment to PR - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 - if: ${{ always() && inputs.pr_no != 0 }} - with: - script: | - let markdown = "" - try { - const fs = require('fs'); - markdown = fs.readFileSync('umf_perf_results.md', 'utf8'); - } catch(err) { - } - - const pr_no = '${{ inputs.pr_no }}'; - const provider = 'LEVEL_ZERO'; - const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}'; - const test_status = '${{ steps.benchmarks.outcome }}'; - const job_status = '${{ job.status }}'; - const body = `Performance workflow for ${provider}_PROVIDER run:\n${url}\nJob status: ${job_status}. Test status: ${test_status}.\n ${markdown}`; - - github.rest.issues.createComment({ - issue_number: pr_no, - owner: context.repo.owner, - repo: context.repo.repo, - body: body - }) diff --git a/.github/workflows/pr_push.yml b/.github/workflows/pr_push.yml index 9623b69f1b..511808887e 100644 --- a/.github/workflows/pr_push.yml +++ b/.github/workflows/pr_push.yml @@ -54,9 +54,6 @@ jobs: uses: ./.github/workflows/reusable_qemu.yml with: short_run: true - Benchmarks: - needs: [Build] - uses: ./.github/workflows/reusable_benchmarks.yml ProxyLib: needs: [Build] uses: ./.github/workflows/reusable_proxy_lib.yml @@ -88,3 +85,11 @@ jobs: contents: read security-events: write uses: ./.github/workflows/reusable_trivy.yml + Compatibility: + needs: [Build] + uses: ./.github/workflows/reusable_compatibility.yml + strategy: + matrix: + tag: ["v0.11.0-dev1"] + with: + tag: ${{matrix.tag}} diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index ced48e0c72..b30bfed4c8 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -7,8 +7,6 @@ permissions: contents: read env: - # for installation testing - it should match with version set in CMake - UMF_VERSION: 0.10.1 BUILD_DIR : "${{github.workspace}}/build" INSTL_DIR : "${{github.workspace}}/../install-dir" COVERAGE_DIR : "${{github.workspace}}/coverage" @@ -19,7 +17,7 @@ jobs: name: Ubuntu strategy: matrix: - os: ['ubuntu-20.04', 'ubuntu-22.04'] + os: ['ubuntu-22.04', 'ubuntu-24.04'] build_type: [Debug, Release] compiler: [{c: gcc, cxx: g++}] shared_library: ['OFF'] @@ -29,15 +27,6 @@ jobs: disable_hwloc: ['OFF'] link_hwloc_statically: ['OFF'] include: - - os: 'ubuntu-20.04' - build_type: Release - compiler: {c: gcc-7, cxx: g++-7} - shared_library: 'OFF' - level_zero_provider: 'ON' - cuda_provider: 'ON' - install_tbb: 'ON' - disable_hwloc: 'OFF' - link_hwloc_statically: 'OFF' - os: 'ubuntu-22.04' build_type: Release compiler: {c: clang, cxx: clang++} @@ -76,15 +65,26 @@ jobs: disable_hwloc: 'OFF' link_hwloc_statically: 'OFF' # test icx compiler - # - os: 'ubuntu-22.04' - # build_type: Release - # compiler: {c: icx, cxx: icpx} - # shared_library: 'ON' - # level_zero_provider: 'ON' - # cuda_provider: 'ON' - # install_tbb: 'ON' - # disable_hwloc: 'OFF' - # link_hwloc_statically: 'OFF' + - os: 'ubuntu-22.04' + build_type: Release + compiler: {c: icx, cxx: icpx} + shared_library: 'ON' + level_zero_provider: 'ON' + cuda_provider: 'ON' + install_tbb: 'ON' + disable_hwloc: 'OFF' + link_hwloc_statically: 'OFF' + # test lld linker + - os: 'ubuntu-24.04' + build_type: Release + compiler: {c: icx, cxx: icpx} + shared_library: 'ON' + level_zero_provider: 'ON' + cuda_provider: 'ON' + install_tbb: 'ON' + disable_hwloc: 'OFF' + link_hwloc_statically: 'OFF' + llvm_linker: '-DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=lld" -DCMAKE_MODULE_LINKER_FLAGS="-fuse-ld=lld" -DCMAKE_SHARED_LINKER_FLAGS="-fuse-ld=lld"' # test without installing TBB - os: 'ubuntu-22.04' build_type: Release @@ -124,7 +124,7 @@ jobs: - name: Install apt packages run: | sudo apt-get update - sudo apt-get install -y clang cmake libnuma-dev libjemalloc-dev lcov + sudo apt-get install -y clang cmake libnuma-dev lcov - name: Install TBB apt package if: matrix.install_tbb == 'ON' @@ -147,8 +147,10 @@ jobs: - name: Install libhwloc run: .github/scripts/install_hwloc.sh - - name: Set ptrace value for IPC test - run: sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" + - name: Get UMF version + run: | + VERSION=$(git describe --tags --abbrev=0 | grep -oP '\d+\.\d+\.\d+') + echo "UMF_VERSION=$VERSION" >> $GITHUB_ENV - name: Configure build run: > @@ -165,11 +167,11 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_TESTS_FAIL_ON_SKIP=ON -DUMF_DISABLE_HWLOC=${{matrix.disable_hwloc}} -DUMF_LINK_HWLOC_STATICALLY=${{matrix.link_hwloc_statically}} ${{ matrix.build_type == 'Debug' && matrix.compiler.c == 'gcc' && '-DUMF_USE_COVERAGE=ON' || '' }} + ${{ matrix.llvm_linker || '' }} - name: Build UMF run: | @@ -180,7 +182,7 @@ jobs: working-directory: ${{env.BUILD_DIR}} run: | ${{ matrix.compiler.cxx == 'icpx' && '. /opt/intel/oneapi/setvars.sh' || true }} - ctest --output-on-failure # run all tests for better coverage + LD_LIBRARY_PATH="${{env.BUILD_DIR}}/lib/:${LD_LIBRARY_PATH}" ctest --output-on-failure - name: Check coverage if: ${{ matrix.build_type == 'Debug' && matrix.compiler.c == 'gcc' }} @@ -208,8 +210,6 @@ jobs: --build-dir ${{env.BUILD_DIR}} --install-dir ${{env.INSTL_DIR}} --build-type ${{matrix.build_type}} - --disjoint-pool - --jemalloc-pool ${{ matrix.install_tbb == 'ON' && matrix.disable_hwloc != 'ON' && matrix.shared_library == 'ON' && '--proxy' || '' }} --umf-version ${{env.UMF_VERSION}} ${{ matrix.shared_library == 'ON' && '--shared-library' || '' }} @@ -227,20 +227,21 @@ jobs: level_zero_provider: ['ON'] cuda_provider: ['ON'] include: - # temporarily disable failing CI job - #- os: 'windows-2022' - # build_type: Release - # compiler: {c: clang-cl, cxx: clang-cl} - # shared_library: 'ON' - # level_zero_provider: 'ON' - # cuda_provider: 'ON' - # toolset: "-T ClangCL" + - os: 'windows-2019' + # clang build fails on Windows 2022 + build_type: Release + compiler: {c: clang-cl, cxx: clang-cl} + shared_library: 'ON' + level_zero_provider: 'ON' + cuda_provider: 'ON' + toolset: "-T ClangCL" - os: 'windows-2022' build_type: Release compiler: {c: cl, cxx: cl} shared_library: 'ON' level_zero_provider: 'ON' cuda_provider: 'ON' + umfd_lib: 'ON' - os: 'windows-2022' build_type: Release compiler: {c: cl, cxx: cl} @@ -259,14 +260,20 @@ jobs: - name: Initialize vcpkg uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 with: - vcpkgGitCommitId: 3dd44b931481d7a8e9ba412621fa810232b66289 + vcpkgGitCommitId: ea2a964f9303270322cf3f2d51c265ba146c422d # 1.04.2025 vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg vcpkgJsonGlob: '**/vcpkg.json' - name: Install dependencies - run: vcpkg install + run: vcpkg install --triplet x64-windows shell: pwsh # Specifies PowerShell as the shell for running the script. + - name: Get UMF version + run: | + $version = (git describe --tags --abbrev=0 | Select-String -Pattern '\d+\.\d+\.\d+').Matches.Value + echo "UMF_VERSION=$version" >> $env:GITHUB_ENV + shell: pwsh + - name: Configure build run: > cmake @@ -279,11 +286,11 @@ jobs: -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=${{matrix.level_zero_provider}} -DUMF_BUILD_CUDA_PROVIDER=${{matrix.cuda_provider}} -DUMF_TESTS_FAIL_ON_SKIP=ON + -DUMF_USE_DEBUG_POSTFIX=${{matrix.umfd_lib}} - name: Build UMF run: cmake --build ${{env.BUILD_DIR}} --config ${{matrix.build_type}} -j $Env:NUMBER_OF_PROCESSORS @@ -299,11 +306,10 @@ jobs: --build-dir ${{env.BUILD_DIR}} --install-dir ${{env.INSTL_DIR}} --build-type ${{matrix.build_type}} - --disjoint-pool - --jemalloc-pool ${{matrix.shared_library == 'ON' && '--proxy' || '' }} --umf-version ${{env.UMF_VERSION}} ${{ matrix.shared_library == 'ON' && '--shared-library' || ''}} + ${{ matrix.umfd_lib == 'ON' && '--umfd-lib' || ''}} - name: check /DEPENDENTLOADFLAG in umf.dll if: ${{matrix.shared_library == 'ON' && matrix.compiler.cxx == 'cl'}} @@ -335,10 +341,9 @@ jobs: -B ${{env.BUILD_DIR}} -DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}" -DUMF_BUILD_SHARED_LIBRARY=ON - -DUMF_BUILD_EXAMPLES=OFF + -DUMF_BUILD_EXAMPLES=ON -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON -DUMF_BUILD_CUDA_PROVIDER=ON @@ -378,10 +383,9 @@ jobs: -B ${{env.BUILD_DIR}} -DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}" -DUMF_BUILD_SHARED_LIBRARY=OFF - -DUMF_BUILD_EXAMPLES=OFF + -DUMF_BUILD_EXAMPLES=ON -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON -DUMF_BUILD_CUDA_PROVIDER=ON @@ -450,6 +454,9 @@ jobs: strategy: matrix: os: ['macos-13', 'macos-14'] + include: + - os: macos-14 + static_hwloc: '-DUMF_LINK_HWLOC_STATICALLY=ON' env: BUILD_TYPE : "Release" runs-on: ${{matrix.os}} @@ -468,8 +475,17 @@ jobs: echo "$PATH" >> $GITHUB_PATH python3 -m pip install -r third_party/requirements.txt + - name: Install dependencies + run: brew install jemalloc tbb automake libtool + - name: Install hwloc - run: brew install hwloc jemalloc tbb + if: ${{ !matrix.static_hwloc }} + run: brew install hwloc + + - name: Get UMF version + run: | + VERSION=$(git describe --tags --abbrev=0 | grep -Eo '\d+\.\d+\.\d+') + echo "UMF_VERSION=$VERSION" >> $GITHUB_ENV - name: Configure build run: > @@ -480,10 +496,10 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_SHARED_LIBRARY=ON -DUMF_TESTS_FAIL_ON_SKIP=ON + ${{matrix.static_hwloc}} - name: Build UMF run: cmake --build ${{env.BUILD_DIR}} -j $(sysctl -n hw.logicalcpu) @@ -494,8 +510,6 @@ jobs: --build-dir ${{env.BUILD_DIR}} --install-dir ${{env.INSTL_DIR}} --build-type ${{env.BUILD_TYPE}} - --disjoint-pool - --jemalloc-pool --proxy --umf-version ${{env.UMF_VERSION}} --shared-library diff --git a/.github/workflows/reusable_benchmarks.yml b/.github/workflows/reusable_benchmarks.yml index 41710029c8..3953e98de5 100644 --- a/.github/workflows/reusable_benchmarks.yml +++ b/.github/workflows/reusable_benchmarks.yml @@ -1,80 +1,191 @@ -# Executes benchmarks implemented in this repository +# Executes benchmarks implemented in this repository using scripts +# for results visualization from intel/llvm (unified-runtime dir). name: Benchmarks -on: workflow_call +on: + workflow_call: + inputs: + pr_no: + # even though this is a number, this is a workaround for issues with + # reusable workflow calls that result in "Unexpected value '0'" error. + type: string + default: '0' + bench_script_params: + required: false + type: string + default: '' + upload_report: + required: false + type: boolean + default: false permissions: contents: read + pull-requests: write env: - BUILD_DIR : "${{github.workspace}}/build" - INSTL_DIR : "${{github.workspace}}/../install-dir" + UMF_DIR: "${{github.workspace}}/umf-repo" + BUILD_DIR : "${{github.workspace}}/umf-repo/build" jobs: benchmarks: name: Benchmarks - env: - VCPKG_PATH: "${{github.workspace}}/build/vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/build/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/build/vcpkg/packages/jemalloc_x64-windows" - strategy: - matrix: - os: ['ubuntu-latest', 'windows-latest'] - include: - # Windows doesn't recognize 'CMAKE_BUILD_TYPE', it uses '--config' param in build command to determine the build type - - os: ubuntu-latest - extra_build_option: '-DCMAKE_BUILD_TYPE=Release' - runs-on: ${{matrix.os}} + # run only on upstream; forks will not have the HW + if: github.repository == 'oneapi-src/unified-memory-framework' + runs-on: L0_PERF steps: - - name: Checkout - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - with: - fetch-depth: 0 - - - name: Install apt packages - if: matrix.os == 'ubuntu-latest' - run: | - sudo apt-get update - sudo apt-get install -y cmake libhwloc-dev libnuma-dev libjemalloc-dev libtbb-dev - - - name: Initialize vcpkg - if: matrix.os == 'windows-latest' - uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 - with: - vcpkgGitCommitId: 3dd44b931481d7a8e9ba412621fa810232b66289 - vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg - vcpkgJsonGlob: '**/vcpkg.json' - - - name: Install vcpkg packages - if: matrix.os == 'windows-latest' - run: vcpkg install - shell: pwsh # Specifies PowerShell as the shell for running the script. - - - name: Configure build - run: > - cmake - -B ${{env.BUILD_DIR}} - ${{matrix.extra_build_option}} - -DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}" - -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" - -DUMF_BUILD_SHARED_LIBRARY=ON - -DUMF_BUILD_BENCHMARKS=ON - -DUMF_BUILD_BENCHMARKS_MT=ON - -DUMF_BUILD_TESTS=OFF - -DUMF_FORMAT_CODE_STYLE=OFF - -DUMF_DEVELOPER_MODE=OFF - -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON - -DUMF_BUILD_CUDA_PROVIDER=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON - -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - - - name: Build UMF on Linux - if: matrix.os == 'ubuntu-latest' - run: cmake --build ${{env.BUILD_DIR}} -j $(nproc) - - - name: Build UMF on Windows - if: matrix.os == 'windows-latest' - run: cmake --build ${{env.BUILD_DIR}} --config Release -j $Env:NUMBER_OF_PROCESSORS - - - name: Run benchmarks - working-directory: ${{env.BUILD_DIR}} - run: ctest -V --test-dir benchmark -C Release + # Workspace on self-hosted runners is not cleaned automatically. + # We have to delete the files created outside of using actions. + - name: Cleanup self-hosted workspace + if: always() + run: | + ls -la ./ + rm -rf ./* || true + + - name: Add comment to PR + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + if: ${{ always() && inputs.pr_no != 0 }} + with: + script: | + const pr_no = '${{ inputs.pr_no }}'; + const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}'; + const params = '${{ inputs.bench_script_params }}'; + const body = `Compute Benchmarks run (with params: ${params}):\n${url}`; + + github.rest.issues.createComment({ + issue_number: pr_no, + owner: context.repo.owner, + repo: context.repo.repo, + body: body + }) + + - name: Checkout UMF + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + path: ${{env.UMF_DIR}} + fetch-depth: 0 + + # We need to fetch special ref for proper PR's merge commit. Note, this ref may be absent if the PR is already merged. + - name: Fetch PR's merge commit + if: ${{ inputs.pr_no != 0 }} + working-directory: ${{env.UMF_DIR}} + env: + PR_NO: ${{ inputs.pr_no }} + run: | + git fetch -- https://github.com/${{github.repository}} +refs/pull/${PR_NO}/*:refs/remotes/origin/pr/${PR_NO}/* + git checkout origin/pr/${PR_NO}/merge + git rev-parse origin/pr/${PR_NO}/merge + + - name: Configure UMF + run: > + cmake + -S ${{env.UMF_DIR}} + -B ${{env.BUILD_DIR}} + -DCMAKE_BUILD_TYPE=Release + -DUMF_BUILD_SHARED_LIBRARY=ON + -DUMF_BUILD_BENCHMARKS=ON + -DUMF_BUILD_BENCHMARKS_MT=ON + -DUMF_BUILD_TESTS=OFF + -DUMF_BUILD_EXAMPLES=OFF + -DUMF_DEVELOPER_MODE=OFF + -DUMF_FORMAT_CODE_STYLE=OFF + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + + - name: Build UMF + run: cmake --build ${{env.BUILD_DIR}} -j $(nproc) + + # Get scripts for benchmark data visualization. + # Use specific tag, as the scripts or files' location may change. + - name: Checkout SYCL + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: intel/llvm + # [BENCHMARK] fix default timeout parameter + # https://github.com/intel/llvm/pull/17412 + ref: 357e9e0b253b7eba105d044e38452b3c09169f8a + path: sycl-repo + fetch-depth: 1 + + - name: Install benchmarking scripts deps + run: | + python -m venv .venv + source .venv/bin/activate + pip install -r ${{github.workspace}}/sycl-repo/unified-runtime/third_party/benchmark_requirements.txt + + - name: Set core range and GPU mask + run: | + # Compute the core range for the second NUMA node; first node is for SYCL/UR jobs. + # Skip the first 4 cores - the kernel is likely to schedule more work on these. + CORES=$(lscpu | awk ' + /NUMA node1 CPU|On-line CPU/ {line=$0} + END { + split(line, a, " ") + split(a[4], b, ",") + sub(/^0/, "4", b[1]) + print b[1] + }') + echo "Selected core: $CORES" + echo "CORES=$CORES" >> $GITHUB_ENV + + ZE_AFFINITY_MASK=1 + echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV + + - name: Run UMF benchmarks + id: benchmarks + working-directory: ${{env.BUILD_DIR}} + run: > + source ${{github.workspace}}/.venv/bin/activate && + taskset -c ${{ env.CORES }} ${{ github.workspace }}/sycl-repo/unified-runtime/scripts/benchmarks/main.py + ~/bench_workdir_umf + --umf ${{env.BUILD_DIR}} + --compare baseline + --timeout 3000 + ${{ inputs.upload_report && '--output-html' || '' }} + ${{ inputs.pr_no != 0 && '--output-markdown' || '' }} + ${{ inputs.bench_script_params }} + + # In case it failed to add a comment, we can still print the results. + - name: Print benchmark results + if: ${{ always() && inputs.pr_no != 0 }} + run: cat ${{env.BUILD_DIR}}/benchmark_results.md + + - name: Add comment to PR + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + if: ${{ always() && inputs.pr_no != 0 }} + with: + script: | + let markdown = "" + try { + const fs = require('fs'); + markdown = fs.readFileSync('${{env.BUILD_DIR}}/benchmark_results.md', 'utf8'); + } catch(err) { + } + + const pr_no = '${{ inputs.pr_no }}'; + const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}'; + const test_status = '${{ steps.benchmarks.outcome }}'; + const job_status = '${{ job.status }}'; + const params = '${{ inputs.bench_script_params }}'; + const body = `Compute Benchmarks run (${params}):\n${url}\nJob status: ${job_status}. Test status: ${test_status}.\n ${markdown}`; + + github.rest.issues.createComment({ + issue_number: pr_no, + owner: context.repo.owner, + repo: context.repo.repo, + body: body + }) + + - name: Upload HTML report + if: ${{ always() && inputs.upload_report }} + uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + with: + path: umf-repo/build/benchmark_results.html + key: benchmark-results-${{ github.run_id }} + + - name: Get information about platform + if: ${{ always() }} + working-directory: ${{env.UMF_DIR}} + run: .github/scripts/get_system_info.sh diff --git a/.github/workflows/reusable_checks.yml b/.github/workflows/reusable_checks.yml index e3e264b0db..a7602d2696 100644 --- a/.github/workflows/reusable_checks.yml +++ b/.github/workflows/reusable_checks.yml @@ -21,7 +21,7 @@ jobs: - name: Install dependencies run: | sudo apt-get update - sudo apt-get install -y black cmake clang-format-15 cmake-format libhwloc-dev + sudo apt-get install -y black cmake clang-format-15 cmake-format libhwloc-dev doxygen # Latest distros do not allow global pip installation - name: Install Python requirements in venv @@ -29,7 +29,8 @@ jobs: python3 -m venv .venv . .venv/bin/activate echo "$PATH" >> $GITHUB_PATH - python3 -m pip install bandit + python3 -m pip install -r third_party/requirements.txt + python3 -m pip install bandit codespell - name: Configure CMake run: > @@ -52,11 +53,24 @@ jobs: - name: Check Python formatting run: cmake --build build --target black-format-check + - name: Run check-license + run: | + ./scripts/check_license/check_headers.sh . "Apache-2.0 WITH LLVM-exception" -v + - name: Run a spell check uses: crate-ci/typos@b63f421581dce830bda2f597a678cb7776b41877 # v1.18.2 with: config: ./.github/workflows/.spellcheck-conf.toml + - name: Run codespell + run: python3 ./.github/scripts/run-codespell.py + + - name: Check spelling in docs + run: | + cmake -B build + cmake --build build --target docs + sphinx-build -b spelling ./build/docs_build/config ./build/docs_build/spelling_log -W + # Run Bandit recursively, but omit _deps directory (with 3rd party code) and python's venv - name: Run Bandit run: python3 -m bandit -r . -x '/_deps/,/.venv/' diff --git a/.github/workflows/reusable_codeql.yml b/.github/workflows/reusable_codeql.yml index e764563103..252e70eee7 100644 --- a/.github/workflows/reusable_codeql.yml +++ b/.github/workflows/reusable_codeql.yml @@ -48,21 +48,21 @@ jobs: if: matrix.os == 'windows-latest' uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 with: - vcpkgGitCommitId: 3dd44b931481d7a8e9ba412621fa810232b66289 + vcpkgGitCommitId: ea2a964f9303270322cf3f2d51c265ba146c422d # 1.04.2025 vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg vcpkgJsonGlob: '**/vcpkg.json' - name: "[Win] Install dependencies" if: matrix.os == 'windows-latest' run: | - vcpkg install + vcpkg install --triplet x64-windows python3 -m pip install -r third_party/requirements.txt - name: "[Lin] Install apt packages" if: matrix.os == 'ubuntu-latest' run: | sudo apt-get update - sudo apt-get install -y cmake clang libhwloc-dev libnuma-dev libjemalloc-dev libtbb-dev + sudo apt-get install -y cmake clang libhwloc-dev libnuma-dev libtbb-dev # Latest distros do not allow global pip installation - name: "[Lin] Install Python requirements in venv" diff --git a/.github/workflows/reusable_compatibility.yml b/.github/workflows/reusable_compatibility.yml new file mode 100644 index 0000000000..523b09e129 --- /dev/null +++ b/.github/workflows/reusable_compatibility.yml @@ -0,0 +1,275 @@ +# Workflow for checking the backward compatibility of UMF. +# Test the latest UMF shared library with binaries compiled using the older UMF +# shared library. +name: Compatibility + +on: + workflow_call: + inputs: + tag: + description: Check backward compatibility with this tag + type: string + default: "v0.11.0-dev1" + +permissions: + contents: read + +jobs: + ubuntu: + name: Ubuntu + runs-on: 'ubuntu-22.04' + + steps: + - name: Install apt packages + run: | + sudo apt-get update + sudo apt-get install -y clang cmake libnuma-dev libtbb-dev + + - name: Checkout "tag" UMF version + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + ref: refs/tags/${{inputs.tag}} + path: ${{github.workspace}}/tag_version + + - name: Install libhwloc + working-directory: ${{github.workspace}}/tag_version + run: .github/scripts/install_hwloc.sh + + - name: Configure "tag" UMF build + working-directory: ${{github.workspace}}/tag_version + run: > + cmake + -B ${{github.workspace}}/tag_version/build + -DCMAKE_BUILD_TYPE=Debug + -DUMF_BUILD_SHARED_LIBRARY=ON + -DCMAKE_C_COMPILER=gcc + -DCMAKE_CXX_COMPILER=g++ + -DUMF_BUILD_TESTS=ON + -DUMF_BUILD_EXAMPLES=ON + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON + -DUMF_FORMAT_CODE_STYLE=OFF + -DUMF_DEVELOPER_MODE=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON + -DUMF_TESTS_FAIL_ON_SKIP=ON + + - name: Build "tag" UMF + working-directory: ${{github.workspace}}/tag_version + run: | + cmake --build ${{github.workspace}}/tag_version/build -j $(nproc) + + - name: Run "tag" UMF tests + working-directory: ${{github.workspace}}/tag_version/build + run: | + LD_LIBRARY_PATH=${{github.workspace}}/tag_version/build/lib/ ctest --output-on-failure + + - name: Checkout latest UMF version + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + path: ${{github.workspace}}/latest_version + + - name: Configure latest UMF build + working-directory: ${{github.workspace}}/latest_version + run: > + cmake + -B ${{github.workspace}}/latest_version/build + -DCMAKE_BUILD_TYPE=Debug + -DUMF_BUILD_SHARED_LIBRARY=ON + -DCMAKE_C_COMPILER=gcc + -DCMAKE_CXX_COMPILER=g++ + -DUMF_BUILD_TESTS=OFF + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON + -DUMF_FORMAT_CODE_STYLE=OFF + -DUMF_DEVELOPER_MODE=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + + - name: Build latest UMF + working-directory: ${{github.workspace}}/latest_version + run: | + cmake --build ${{github.workspace}}/latest_version/build -j $(nproc) + + - name: Run "tag" UMF tests with latest UMF libs (warnings enabled) + working-directory: ${{github.workspace}}/tag_version/build + # GTEST_FILTER is used below to skip test that is not compatible + run: > + UMF_LOG="level:warning;flush:debug;output:stderr;pid:no" + LD_LIBRARY_PATH=${{github.workspace}}/latest_version/build/lib/ + GTEST_FILTER="-*umfIpcTest.GetPoolByOpenedHandle*" + ctest --verbose + + windows: + name: Windows + env: + VCPKG_PATH: "${{github.workspace}}/vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/vcpkg/packages/jemalloc_x64-windows" + runs-on: "windows-2022" + + steps: + - name: Checkout "tag" UMF version + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + ref: refs/tags/${{inputs.tag}} + path: ${{github.workspace}}/tag_version + + - name: Initialize vcpkg + uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 + with: + vcpkgGitCommitId: ea2a964f9303270322cf3f2d51c265ba146c422d # 1.04.2025 + vcpkgDirectory: ${{github.workspace}}/vcpkg + vcpkgJsonGlob: '**/vcpkg.json' + + # NOTE we use vcpkg setup from "tag" version + - name: Install dependencies + working-directory: ${{github.workspace}}/tag_version + run: vcpkg install --triplet x64-windows + shell: pwsh # Specifies PowerShell as the shell for running the script. + + - name: Configure "tag" UMF build + working-directory: ${{github.workspace}}/tag_version + run: > + cmake + -B "${{github.workspace}}/tag_version/build" + -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" + -DCMAKE_C_COMPILER=cl + -DCMAKE_CXX_COMPILER=cl + -DUMF_BUILD_SHARED_LIBRARY=ON + -DUMF_BUILD_TESTS=ON + -DUMF_BUILD_EXAMPLES=ON + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON + -DUMF_FORMAT_CODE_STYLE=OFF + -DUMF_DEVELOPER_MODE=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON + -DUMF_TESTS_FAIL_ON_SKIP=ON + + - name: Build "tag" UMF + run: cmake --build "${{github.workspace}}/tag_version/build" --config Debug -j $Env:NUMBER_OF_PROCESSORS + + - name: Run "tag" UMF tests + working-directory: "${{github.workspace}}/tag_version/build" + run: ctest -C Debug --output-on-failure --test-dir test + + - name: Checkout latest UMF version + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + path: ${{github.workspace}}/latest_version + + - name: Configure latest UMF build + working-directory: ${{github.workspace}}/latest_version + run: > + cmake + -B "${{github.workspace}}/latest_version/build" + -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" + -DCMAKE_C_COMPILER=cl + -DCMAKE_CXX_COMPILER=cl + -DUMF_BUILD_SHARED_LIBRARY=ON + -DUMF_BUILD_TESTS=OFF + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON + -DUMF_FORMAT_CODE_STYLE=OFF + -DUMF_DEVELOPER_MODE=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + + - name: Build latest UMF + run: cmake --build "${{github.workspace}}/latest_version/build" --config Debug -j $Env:NUMBER_OF_PROCESSORS + + - name: Run "tag" UMF tests with latest UMF libs (warnings enabled) + working-directory: ${{github.workspace}}/tag_version/build + run: | + $env:UMF_LOG="level:warning;flush:debug;output:stderr;pid:no" + $env:GTEST_FILTER="-*umfIpcTest.GetPoolByOpenedHandle*" + cp ${{github.workspace}}/latest_version/build/bin/Debug/umf.dll ${{github.workspace}}/tag_version/build/bin/Debug/umf.dll + ctest -C Debug --verbose + + gpu: + name: GPU Ubuntu + # run only on upstream; forks will not have the HW + if: github.repository == 'oneapi-src/unified-memory-framework' + strategy: + matrix: + provider: ['LEVEL_ZERO', 'CUDA'] + runs-on: ["DSS-${{matrix.provider}}", "DSS-UBUNTU"] + + steps: + - name: Checkout "tag" UMF version + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + ref: refs/tags/${{inputs.tag}} + path: ${{github.workspace}}/tag_version + + - name: Configure "tag" UMF build + working-directory: ${{github.workspace}}/tag_version + run: > + cmake + -B ${{github.workspace}}/tag_version/build + -DCMAKE_BUILD_TYPE=Debug + -DUMF_BUILD_SHARED_LIBRARY=ON + -DCMAKE_C_COMPILER=gcc + -DCMAKE_CXX_COMPILER=g++ + -DUMF_BUILD_TESTS=ON + -DUMF_BUILD_GPU_TESTS=ON + -DUMF_BUILD_EXAMPLES=ON + -DUMF_BUILD_GPU_EXAMPLES=ON + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF + -DUMF_BUILD_CUDA_PROVIDER=OFF + -DUMF_BUILD_${{matrix.provider}}_PROVIDER=ON + -DUMF_FORMAT_CODE_STYLE=OFF + -DUMF_DEVELOPER_MODE=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON + -DUMF_TESTS_FAIL_ON_SKIP=ON + + - name: Build "tag" UMF + working-directory: ${{github.workspace}}/tag_version + run: | + cmake --build ${{github.workspace}}/tag_version/build -j $(nproc) + + - name: Run "tag" UMF tests + working-directory: ${{github.workspace}}/tag_version/build + run: > + LD_LIBRARY_PATH=${{github.workspace}}/tag_version/build/lib/ + GTEST_FILTER="-*umfIpcTest.GetPoolByOpenedHandle*" + ctest --output-on-failure + + - name: Checkout latest UMF version + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + path: ${{github.workspace}}/latest_version + + - name: Configure latest UMF build + working-directory: ${{github.workspace}}/latest_version + run: > + cmake + -B ${{github.workspace}}/latest_version/build + -DCMAKE_BUILD_TYPE=Debug + -DUMF_BUILD_SHARED_LIBRARY=ON + -DCMAKE_C_COMPILER=gcc + -DCMAKE_CXX_COMPILER=g++ + -DUMF_BUILD_TESTS=OFF + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON + -DUMF_FORMAT_CODE_STYLE=OFF + -DUMF_DEVELOPER_MODE=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + + - name: Build latest UMF + working-directory: ${{github.workspace}}/latest_version + run: | + cmake --build ${{github.workspace}}/latest_version/build -j $(nproc) + + # NOTE: Once not implemented features may now be implemented - exclude these tests + - name: Run "tag" UMF tests with latest UMF libs (warnings enabled) + working-directory: ${{github.workspace}}/tag_version/build + run: > + UMF_LOG="level:warning;flush:debug;output:stderr;pid:no" + LD_LIBRARY_PATH=${{github.workspace}}/latest_version/build/lib/ + GTEST_FILTER="-*umfIpcTest.GetPoolByOpenedHandle*" + ctest --verbose -E "not_impl" diff --git a/.github/workflows/reusable_dax.yml b/.github/workflows/reusable_dax.yml index f7c5d0d21a..4ea5ddac75 100644 --- a/.github/workflows/reusable_dax.yml +++ b/.github/workflows/reusable_dax.yml @@ -31,6 +31,7 @@ env: INSTL_DIR : "${{github.workspace}}/../install-dir" COVERAGE_DIR : "${{github.workspace}}/coverage" COVERAGE_NAME : "exports-coverage-dax" + DAX_TESTS: "./test/test_provider_file_memory ./test/test_provider_devdax_memory" jobs: dax: @@ -83,7 +84,6 @@ jobs: -DUMF_BUILD_GPU_EXAMPLES=OFF -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF -DUMF_TESTS_FAIL_ON_SKIP=ON @@ -106,8 +106,6 @@ jobs: UMF_TESTS_FSDAX_PATH_2=${{env.UMF_TESTS_FSDAX_PATH_2}} ctest -C ${{matrix.build_type}} -V -R "file|fsdax" - # TODO: enable the provider_devdax_memory_ipc test when the IPC tests with the proxy library are fixed - # see the issue: https://github.com/oneapi-src/unified-memory-framework/issues/864 - name: Run the DEVDAX tests with the proxy library # proxy library is built only if libumf is a shared library if: ${{ matrix.shared_library == 'ON' }} @@ -116,10 +114,8 @@ jobs: LD_PRELOAD=./lib/libumf_proxy.so UMF_TESTS_DEVDAX_PATH="/dev/dax${{env.DEVDAX_NAMESPACE}}" UMF_TESTS_DEVDAX_SIZE="$(ndctl list --namespace=namespace${{env.DEVDAX_NAMESPACE}} | grep size | cut -d':' -f2 | cut -d',' -f1)" - ctest -C ${{matrix.build_type}} -V -R devdax -E provider_devdax_memory_ipc + ctest -C ${{matrix.build_type}} -V -R devdax - # TODO: enable the provider_file_memory_ipc test when the IPC tests with the proxy library are fixed - # see the issue: https://github.com/oneapi-src/unified-memory-framework/issues/864 - name: Run the FSDAX tests with the proxy library # proxy library is built only if libumf is a shared library if: ${{ matrix.shared_library == 'ON' }} @@ -128,7 +124,13 @@ jobs: LD_PRELOAD=./lib/libumf_proxy.so UMF_TESTS_FSDAX_PATH=${{env.UMF_TESTS_FSDAX_PATH}} UMF_TESTS_FSDAX_PATH_2=${{env.UMF_TESTS_FSDAX_PATH_2}} - ctest -C ${{matrix.build_type}} -V -R "file|fsdax" -E provider_file_memory_ipc + ctest -C ${{matrix.build_type}} -V -R "file|fsdax" + + - name: Run DAX tests under valgrind + run: | + ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{env.BUILD_DIR}} memcheck "${{env.DAX_TESTS}}" + ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{env.BUILD_DIR}} drd "${{env.DAX_TESTS}}" + ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{env.BUILD_DIR}} helgrind "${{env.DAX_TESTS}}" - name: Check coverage if: ${{ matrix.build_type == 'Debug' }} diff --git a/.github/workflows/reusable_docs_build.yml b/.github/workflows/reusable_docs_build.yml index 269560c674..9317478bb4 100644 --- a/.github/workflows/reusable_docs_build.yml +++ b/.github/workflows/reusable_docs_build.yml @@ -1,6 +1,12 @@ name: Docs build -on: workflow_call +on: + workflow_call: + inputs: + upload: + description: Should HTML documentation be uploaded as artifact? + type: boolean + default: false permissions: contents: read @@ -30,5 +36,31 @@ jobs: python3 -m pip install -r third_party/requirements.txt - name: Build the documentation - working-directory: scripts - run: python3 generate_docs.py + run: | + cmake -B build \ + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF \ + -DUMF_BUILD_CUDA_PROVIDER=OFF \ + -DUMF_BUILD_TESTS=OFF \ + -DUMF_BUILD_EXAMPLES=OFF \ + -DUMF_DISABLE_HWLOC=ON + cmake --build build --target docs + + # If we upload HTML docs, we want to include benchmark results as well + - name: Download benchmark HTML before uploading docs + if: ${{ inputs.upload == true }} + id: download-bench-html + uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + with: + path: umf-repo/build/benchmark_results.html + key: benchmark-results- + + - name: Move benchmark HTML + if: ${{ inputs.upload == true && steps.download-bench-html.outputs.cache-hit != '' }} + run: | + mv umf-repo/build/benchmark_results.html ${{github.workspace}}/build/docs_build/generated/html + + - name: Upload artifact + if: ${{ inputs.upload == true }} + uses: actions/upload-pages-artifact@0252fc4ba7626f0298f0cf00902a25c6afc77fa8 # v3.0.0 + with: + path: build/docs_build/generated/html diff --git a/.github/workflows/reusable_fast.yml b/.github/workflows/reusable_fast.yml index e25de68a1b..7b1087ed0a 100644 --- a/.github/workflows/reusable_fast.yml +++ b/.github/workflows/reusable_fast.yml @@ -19,24 +19,20 @@ jobs: matrix: include: - os: windows-latest - disjoint: 'OFF' build_tests: 'ON' simple_cmake: 'OFF' # pure C build (Windows) - os: windows-latest - disjoint: 'OFF' # Tests' building is off for a pure C build build_tests: 'OFF' simple_cmake: 'OFF' - os: ubuntu-latest - disjoint: 'ON' build_tests: 'ON' # Windows doesn't recognize 'CMAKE_BUILD_TYPE', it uses '--config' param in build command extra_build_options: '-DCMAKE_BUILD_TYPE=Release -DUMF_BUILD_BENCHMARKS=ON -DUMF_BUILD_BENCHMARKS_MT=ON' simple_cmake: 'OFF' # pure C build (Linux) - os: ubuntu-latest - disjoint: 'OFF' # Windows doesn't recognize 'CMAKE_BUILD_TYPE', it uses '--config' param in build command # Tests' building is off for a pure C build build_tests: 'OFF' @@ -44,13 +40,11 @@ jobs: simple_cmake: 'OFF' # simplest CMake on ubuntu-latest - os: ubuntu-latest - disjoint: 'OFF' build_tests: 'ON' extra_build_options: '-DCMAKE_BUILD_TYPE=Release' simple_cmake: 'ON' - # simplest CMake ubuntu-20.04 - - os: ubuntu-20.04 - disjoint: 'OFF' + # simplest CMake ubuntu-22.04 + - os: ubuntu-22.04 build_tests: 'ON' extra_build_options: '-DCMAKE_BUILD_TYPE=Release' simple_cmake: 'ON' @@ -66,31 +60,20 @@ jobs: if: matrix.os == 'windows-latest' uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 with: - vcpkgGitCommitId: 3dd44b931481d7a8e9ba412621fa810232b66289 + vcpkgGitCommitId: ea2a964f9303270322cf3f2d51c265ba146c422d # 1.04.2025 vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg vcpkgJsonGlob: '**/vcpkg.json' - name: Install dependencies (windows-latest) if: matrix.os == 'windows-latest' - run: vcpkg install + run: vcpkg install --triplet x64-windows shell: pwsh # Specifies PowerShell as the shell for running the script. - - name: Install dependencies (ubuntu-latest) - if: matrix.os == 'ubuntu-latest' + - name: Install dependencies + if: matrix.os != 'windows-latest' run: | sudo apt-get update - sudo apt-get install -y cmake libjemalloc-dev libhwloc-dev libnuma-dev libtbb-dev - - - name: Install dependencies (ubuntu-20.04) - if: matrix.os == 'ubuntu-20.04' - run: | - sudo apt-get update - sudo apt-get install -y cmake libjemalloc-dev libnuma-dev libtbb-dev - .github/scripts/install_hwloc.sh # install hwloc-2.3.0 instead of hwloc-2.1.0 present in the OS package - - - name: Set ptrace value for IPC test (on Linux only) - if: ${{ matrix.os == 'ubuntu-latest' || matrix.os == 'ubuntu-20.04' }} - run: sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" + sudo apt-get install -y cmake libhwloc-dev libnuma-dev libtbb-dev - name: Configure CMake if: matrix.simple_cmake == 'OFF' @@ -101,7 +84,6 @@ jobs: -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=${{matrix.disjoint}} -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_TESTS=${{matrix.build_tests}} -DUMF_BUILD_EXAMPLES=ON diff --git a/.github/workflows/reusable_gpu.yml b/.github/workflows/reusable_gpu.yml index 913a0f0f18..6fcd408208 100644 --- a/.github/workflows/reusable_gpu.yml +++ b/.github/workflows/reusable_gpu.yml @@ -68,15 +68,16 @@ jobs: if: matrix.os == 'Windows' uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 with: - vcpkgGitCommitId: 3dd44b931481d7a8e9ba412621fa810232b66289 + vcpkgGitCommitId: ea2a964f9303270322cf3f2d51c265ba146c422d # 1.04.2025 vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg vcpkgJsonGlob: '**/vcpkg.json' - name: "[Win] Install dependencies" if: matrix.os == 'Windows' - run: vcpkg install + run: vcpkg install --triplet x64-windows # note: disable all providers except the one being tested + # '-DCMAKE_SUPPRESS_REGENERATION=ON' is the WA for the error: "CUSTOMBUILD : CMake error : Cannot restore timestamp" - name: Configure build run: > cmake @@ -88,17 +89,18 @@ jobs: -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} -DUMF_BUILD_BENCHMARKS=ON + -DUMF_BUILD_BENCHMARKS_MT=ON -DUMF_BUILD_TESTS=ON -DUMF_BUILD_GPU_TESTS=ON -DUMF_BUILD_GPU_EXAMPLES=ON -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_CUDA_PROVIDER=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF -DUMF_BUILD_${{inputs.name}}_PROVIDER=ON -DUMF_TESTS_FAIL_ON_SKIP=ON ${{ matrix.os == 'Ubuntu' && matrix.build_type == 'Debug' && '-DUMF_USE_COVERAGE=ON' || '' }} + ${{ matrix.os == 'Windows' && '-DCMAKE_SUPPRESS_REGENERATION=ON' || '' }} - name: Build UMF run: cmake --build ${{env.BUILD_DIR}} --config ${{matrix.build_type}} -j ${{matrix.number_of_processors}} @@ -114,7 +116,7 @@ jobs: - name: Run benchmarks if: matrix.build_type == 'Release' working-directory: ${{env.BUILD_DIR}} - run: ctest --output-on-failure --test-dir benchmark -C ${{matrix.build_type}} --exclude-regex umf-bench-multithreaded + run: ctest --output-on-failure --test-dir benchmark -C ${{matrix.build_type}} --exclude-regex umf-multithreaded - name: Check coverage if: ${{ matrix.build_type == 'Debug' && matrix.os == 'Ubuntu' }} diff --git a/.github/workflows/reusable_multi_numa.yml b/.github/workflows/reusable_multi_numa.yml index 8b30ed53ed..47a48adb20 100644 --- a/.github/workflows/reusable_multi_numa.yml +++ b/.github/workflows/reusable_multi_numa.yml @@ -1,4 +1,4 @@ -# Runs tests on multi-numa machine +# Runs tests on multi-numa machines name: MultiNuma on: [workflow_call] @@ -10,6 +10,7 @@ env: BUILD_DIR : "${{github.workspace}}/build" COVERAGE_DIR : "${{github.workspace}}/coverage" COVERAGE_NAME : "exports-coverage-multinuma" + NUMA_TESTS: "./test/test_memspace_numa ./test/test_provider_os_memory_multiple_numa_nodes" jobs: multi_numa: @@ -19,7 +20,7 @@ jobs: strategy: matrix: - os: [ubuntu-22.04, rhel-9.1] + os: [ubuntu-22.04, rhel-9.1, sles-15] build_type: [Debug, Release] shared_library: ['ON', 'OFF'] runs-on: ["DSS-MULTI-NUMA", "DSS-${{matrix.os}}"] @@ -30,9 +31,6 @@ jobs: with: fetch-depth: 0 - - name: Get information about platform - run: .github/scripts/get_system_info.sh - - name: Configure build run: > cmake @@ -44,8 +42,7 @@ jobs: -DUMF_BUILD_BENCHMARKS=OFF -DUMF_BUILD_TESTS=ON -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON - -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=${{ matrix.os == 'rhel-9.1' && 'OFF' || 'ON' }} -DUMF_TESTS_FAIL_ON_SKIP=ON ${{ matrix.build_type == 'Debug' && matrix.os == 'ubuntu-22.04' && '-DUMF_USE_COVERAGE=ON' || '' }} @@ -53,21 +50,29 @@ jobs: run: cmake --build ${{github.workspace}}/build -j $(nproc) - name: Run tests - if: matrix.os != 'rhel-9.1' + if: (matrix.os != 'rhel-9.1') && (matrix.os != 'sles-15') working-directory: ${{github.workspace}}/build run: ctest --output-on-failure --test-dir test - # On RHEL, hwloc version is just a little too low. + # On RHEL/SLES, hwloc version is just a little too low. # Skip some tests until we upgrade hwloc and update CMake to properly handle local hwloc installation. # TODO: fix issue #560 - - name: Run tests (on RHEL) - if: matrix.os == 'rhel-9.1' + # TODO: add issue for -E test_init_teardown - it is not clear why it fails + - name: Run tests (on RHEL/SLES) + if: (matrix.os == 'rhel-9.1') || (matrix.os == 'sles-15') working-directory: ${{github.workspace}}/build run: | - ctest --output-on-failure --test-dir test -E "umf-provider_os_memory_multiple_numa_nodes" - ./test/umf_test-provider_os_memory_multiple_numa_nodes \ + ctest --output-on-failure --test-dir test -E "test_provider_os_memory_multiple_numa_nodes|test_init_teardown" + ./test/test_provider_os_memory_multiple_numa_nodes \ --gtest_filter="-*checkModeLocal/*:*checkModePreferredEmptyNodeset/*:testNuma.checkModeInterleave" + - name: Run NUMA tests under valgrind + if: (matrix.os != 'rhel-9.1') && (matrix.os != 'sles-15') + run: | + ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{env.BUILD_DIR}} memcheck "${{env.NUMA_TESTS}}" + ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{env.BUILD_DIR}} drd "${{env.NUMA_TESTS}}" + ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{env.BUILD_DIR}} helgrind "${{env.NUMA_TESTS}}" + - name: Check coverage if: ${{ matrix.build_type == 'Debug' && matrix.os == 'ubuntu-22.04' }} working-directory: ${{env.BUILD_DIR}} @@ -83,3 +88,7 @@ jobs: with: name: ${{env.COVERAGE_NAME}}-${{matrix.os}}-shared-${{matrix.shared_library}} path: ${{env.COVERAGE_DIR}} + + - name: Get information about platform + if: always() + run: .github/scripts/get_system_info.sh diff --git a/.github/workflows/reusable_proxy_lib.yml b/.github/workflows/reusable_proxy_lib.yml index 2a27161b3e..c519be95b8 100644 --- a/.github/workflows/reusable_proxy_lib.yml +++ b/.github/workflows/reusable_proxy_lib.yml @@ -32,10 +32,7 @@ jobs: - name: Install apt packages run: | sudo apt-get update - sudo apt-get install -y cmake libhwloc-dev libjemalloc-dev libtbb-dev lcov - - - name: Set ptrace value for IPC test - run: sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" + sudo apt-get install -y cmake libhwloc-dev libnuma-dev libtbb-dev lcov - name: Configure build run: > @@ -49,9 +46,8 @@ jobs: -DUMF_BUILD_BENCHMARKS=OFF -DUMF_BUILD_TESTS=ON -DUMF_FORMAT_CODE_STYLE=OFF - -DUMF_DEVELOPER_MODE=OFF + -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_TESTS_FAIL_ON_SKIP=ON -DUMF_PROXY_LIB_BASED_ON_POOL=${{matrix.proxy_lib_pool}} ${{ matrix.build_type == 'Debug' && '-DUMF_USE_COVERAGE=ON' || '' }} @@ -59,15 +55,13 @@ jobs: - name: Build UMF run: cmake --build ${{env.BUILD_DIR}} -j $(nproc) - # TODO enable the provider_file_memory_ipc test when the IPC tests with the proxy library are fixed - # see the issue: https://github.com/oneapi-src/unified-memory-framework/issues/864 - name: Run "ctest --output-on-failure" with proxy library working-directory: ${{env.BUILD_DIR}} - run: LD_PRELOAD=./lib/libumf_proxy.so ctest --output-on-failure -E provider_file_memory_ipc + run: LD_PRELOAD=./lib/libumf_proxy.so ctest --output-on-failure - - name: Run "./test/umf_test-memoryPool" with proxy library + - name: Run "./test/test_memoryPool" with proxy library working-directory: ${{env.BUILD_DIR}} - run: LD_PRELOAD=./lib/libumf_proxy.so ./test/umf_test-memoryPool + run: LD_PRELOAD=./lib/libumf_proxy.so ./test/test_memoryPool - name: Run "/usr/bin/ls" with proxy library working-directory: ${{env.BUILD_DIR}} @@ -77,14 +71,12 @@ jobs: working-directory: ${{env.BUILD_DIR}} run: UMF_PROXY="page.disposition=shared-shm" LD_PRELOAD=./lib/libumf_proxy.so /usr/bin/date - # TODO enable the provider_file_memory_ipc test when the IPC tests with the proxy library are fixed - # see the issue: https://github.com/oneapi-src/unified-memory-framework/issues/864 - name: Run "ctest --output-on-failure" with proxy library and size.threshold=128 working-directory: ${{env.BUILD_DIR}} run: > UMF_PROXY="page.disposition=shared-shm;size.threshold=128" LD_PRELOAD=./lib/libumf_proxy.so - ctest --output-on-failure -E provider_file_memory_ipc + ctest --output-on-failure - name: Check coverage if: ${{ matrix.build_type == 'Debug' }} diff --git a/.github/workflows/reusable_sanitizers.yml b/.github/workflows/reusable_sanitizers.yml index 3acda6833e..c74448e1d2 100644 --- a/.github/workflows/reusable_sanitizers.yml +++ b/.github/workflows/reusable_sanitizers.yml @@ -29,7 +29,7 @@ jobs: - name: Install apt packages run: | sudo apt-get update - sudo apt-get install -y clang cmake libhwloc-dev libnuma-dev libjemalloc-dev libtbb-dev + sudo apt-get install -y clang cmake libhwloc-dev libnuma-dev libtbb-dev - name: Install oneAPI basekit if: matrix.compiler.cxx == 'icpx' @@ -40,10 +40,6 @@ jobs: sudo apt-get update sudo apt-get install -y intel-oneapi-ippcp-devel intel-oneapi-ipp-devel intel-oneapi-common-oneapi-vars intel-oneapi-compiler-dpcpp-cpp - - - name: Set ptrace value for IPC test - run: sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" - - name: Configure build run: > ${{ matrix.compiler.cxx == 'icpx' && '. /opt/intel/oneapi/setvars.sh &&' || ''}} @@ -59,7 +55,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_USE_ASAN=${{matrix.sanitizers.asan}} -DUMF_USE_UBSAN=${{matrix.sanitizers.ubsan}} -DUMF_USE_TSAN=${{matrix.sanitizers.tsan}} @@ -77,7 +72,6 @@ jobs: ASAN_OPTIONS: allocator_may_return_null=1 TSAN_OPTIONS: allocator_may_return_null=1 run: | - ${{ matrix.compiler.cxx == 'icpx' && '. /opt/intel/oneapi/setvars.sh' || true }} ctest --output-on-failure windows-build: @@ -112,12 +106,12 @@ jobs: - name: Initialize vcpkg uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 with: - vcpkgGitCommitId: 3dd44b931481d7a8e9ba412621fa810232b66289 + vcpkgGitCommitId: ea2a964f9303270322cf3f2d51c265ba146c422d # 1.04.2025 vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg vcpkgJsonGlob: '**/vcpkg.json' - name: Install dependencies - run: vcpkg install + run: vcpkg install --triplet x64-windows shell: pwsh # Specifies PowerShell as the shell for running the script. # TODO enable level zero provider @@ -132,7 +126,6 @@ jobs: -DUMF_BUILD_SHARED_LIBRARY=OFF -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_USE_ASAN=${{matrix.sanitizers.asan}} -DUMF_BUILD_EXAMPLES=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF diff --git a/.github/workflows/reusable_valgrind.yml b/.github/workflows/reusable_valgrind.yml index 86ceb68c68..5999297d6a 100644 --- a/.github/workflows/reusable_valgrind.yml +++ b/.github/workflows/reusable_valgrind.yml @@ -1,4 +1,4 @@ -# Run tests with valgrind intstrumentation tools: memcheck, drd, helgrind +# Run tests with valgrind instrumentation tools: memcheck, drd, helgrind name: Valgrind on: workflow_call @@ -20,7 +20,7 @@ jobs: - name: Install apt packages run: | sudo apt-get update - sudo apt-get install -y cmake hwloc libhwloc-dev libjemalloc-dev libnuma-dev libtbb-dev valgrind + sudo apt-get install -y cmake hwloc libhwloc-dev libnuma-dev libtbb-dev valgrind - name: Configure CMake run: > @@ -29,7 +29,6 @@ jobs: -DCMAKE_BUILD_TYPE=Debug -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF -DUMF_BUILD_CUDA_PROVIDER=OFF diff --git a/.gitignore b/.gitignore index a1a488bc14..7d0aa10fda 100644 --- a/.gitignore +++ b/.gitignore @@ -58,7 +58,7 @@ __pycache__/ *.py[cod] # Generated docs -docs/ +docs_build/ # Build files /build*/ @@ -83,3 +83,7 @@ out/ # IDE Files /.vscode /.devcontainer + +# clangd files +/.cache/clangd +compile_commands.json diff --git a/CMakeLists.txt b/CMakeLists.txt index f71ce18201..f7e640abcc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2022-2024 Intel Corporation +# Copyright (C) 2022-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -10,8 +10,12 @@ list(APPEND CMAKE_MODULE_PATH "${UMF_CMAKE_SOURCE_DIR}/cmake") # Use full path of the helpers module (to omit potential conflicts with others) include(${UMF_CMAKE_SOURCE_DIR}/cmake/helpers.cmake) +# --------------------------------------------------------------------------- # +# Set UMF version variables, define project, and add basic modules +# --------------------------------------------------------------------------- # + # We use semver aligned version, set via git tags. We parse git output to -# establih the version of UMF to be used in CMake, Win dll's, and within the +# establish the version of UMF to be used in CMake, Win dll's, and within the # code (e.g. in logger). We have 3-component releases (e.g. 1.5.1) plus release # candidates and git info. Function below sets all variables related to version. set_version_variables() @@ -22,6 +26,12 @@ project( umf VERSION ${UMF_CMAKE_VERSION} LANGUAGES C) +if(UMF_CMAKE_VERSION VERSION_EQUAL "0.0.0") + message( + WARNING + "UMF version is set to 0.0.0, which most likely is not expected! " + "Please checkout the git tags to get a proper version.") +endif() if(PROJECT_VERSION_PATCH GREATER 0) # set extra variable for Windows dll metadata @@ -31,8 +41,13 @@ endif() include(CTest) include(CMakePackageConfigHelpers) include(GNUInstallDirs) +include(FetchContent) find_package(PkgConfig) +# --------------------------------------------------------------------------- # +# Set UMF build options (and CACHE variables) +# --------------------------------------------------------------------------- # + # Define a list to store the names of all options set(UMF_OPTIONS_LIST "") list(APPEND UMF_OPTIONS_LIST CMAKE_BUILD_TYPE) @@ -43,12 +58,11 @@ macro(umf_option) option(${ARGV}) endmacro() -# Build Options +# All CMake options have to be explicitly set in the build_umfd target's +# configuration command umf_option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF) umf_option(UMF_BUILD_LEVEL_ZERO_PROVIDER "Build Level Zero memory provider" ON) umf_option(UMF_BUILD_CUDA_PROVIDER "Build CUDA memory provider" ON) -umf_option(UMF_BUILD_LIBUMF_POOL_DISJOINT - "Build the libumf_pool_disjoint static library" OFF) umf_option(UMF_BUILD_LIBUMF_POOL_JEMALLOC "Build the libumf_pool_jemalloc static library" OFF) umf_option(UMF_BUILD_TESTS "Build UMF tests" ON) @@ -56,20 +70,16 @@ umf_option(UMF_BUILD_GPU_TESTS "Build UMF GPU tests" OFF) umf_option(UMF_BUILD_BENCHMARKS "Build UMF benchmarks" OFF) umf_option(UMF_BUILD_BENCHMARKS_MT "Build UMF multithreaded benchmarks" OFF) umf_option(UMF_BUILD_EXAMPLES "Build UMF examples" ON) -umf_option(UMF_BUILD_FUZZTESTS "Build UMF fuzz tests" OFF) umf_option(UMF_BUILD_GPU_EXAMPLES "Build UMF GPU examples" OFF) -umf_option(UMF_DEVELOPER_MODE "Enable additional developer checks" OFF) +umf_option(UMF_BUILD_FUZZTESTS "Build UMF fuzz tests" OFF) umf_option( UMF_DISABLE_HWLOC "Disable hwloc and UMF features requiring it (OS provider, memtargets, topology discovery)" OFF) umf_option( UMF_LINK_HWLOC_STATICALLY - "Link UMF with HWLOC library statically (supported for Linux, MacOS and Release build on Windows)" + "Link UMF with HWLOC library statically (proxy library will be disabled on Windows+Debug build)" OFF) -umf_option( - UMF_FORMAT_CODE_STYLE - "Add clang, cmake, and black -format-check and -format-apply targets" OFF) set(UMF_HWLOC_NAME "hwloc" CACHE STRING "Custom name for hwloc library w/o extension") @@ -81,6 +91,12 @@ set(UMF_INSTALL_RPATH "Set the runtime search path to the directory with dependencies (e.g. hwloc)" ) +umf_option(UMF_USE_DEBUG_POSTFIX "Add a 'd' postfix to Windows debug libraries" + OFF) +umf_option(UMF_DEVELOPER_MODE "Enable additional developer checks" OFF) +umf_option( + UMF_FORMAT_CODE_STYLE + "Add clang, cmake, and black -format-check and -format-apply targets" OFF) # Only a part of skips is treated as a failure now. TODO: extend to all tests umf_option(UMF_TESTS_FAIL_ON_SKIP "Treat skips in tests as fail" OFF) umf_option(UMF_USE_ASAN "Enable AddressSanitizer checks" OFF) @@ -100,6 +116,12 @@ set_property(CACHE UMF_PROXY_LIB_BASED_ON_POOL PROPERTY STRINGS ${KNOWN_PROXY_LIB_POOLS}) list(APPEND UMF_OPTIONS_LIST UMF_PROXY_LIB_BASED_ON_POOL) +# --------------------------------------------------------------------------- # +# Setup required variables, definitions; fetch dependencies; include +# sub_directories based on build options; set flags; etc. +# --------------------------------------------------------------------------- # +message(STATUS "CMAKE_GENERATOR: ${CMAKE_GENERATOR}") + if(UMF_BUILD_TESTS AND DEFINED ENV{CI} AND NOT UMF_TESTS_FAIL_ON_SKIP) @@ -123,106 +145,234 @@ else() message(FATAL_ERROR "Unknown OS type") endif() -if(UMF_DISABLE_HWLOC) - message(STATUS "hwloc is disabled, hence OS provider, memtargets, " - "topology discovery, examples won't be available!") +if(UMF_DEVELOPER_MODE) + set(UMF_COMMON_COMPILE_DEFINITIONS ${UMF_COMMON_COMPILE_DEFINITIONS} + UMF_DEVELOPER_MODE=1) +endif() + +message(STATUS "CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}") + +if(NOT UMF_BUILD_LIBUMF_POOL_JEMALLOC) + set(UMF_POOL_JEMALLOC_ENABLED FALSE) + set(JEMALLOC_FOUND FALSE) + set(JEMALLOC_LIBRARIES FALSE) +elseif(WINDOWS) + pkg_check_modules(JEMALLOC jemalloc) + if(NOT JEMALLOC_FOUND) + find_package(JEMALLOC REQUIRED jemalloc) + endif() else() - if(NOT DEFINED UMF_HWLOC_REPO) - set(UMF_HWLOC_REPO "https://github.com/open-mpi/hwloc.git") + if(NOT DEFINED UMF_JEMALLOC_REPO) + set(UMF_JEMALLOC_REPO "https://github.com/jemalloc/jemalloc.git") endif() - if(NOT DEFINED UMF_HWLOC_TAG) - set(UMF_HWLOC_TAG hwloc-2.10.0) + if(NOT DEFINED UMF_JEMALLOC_TAG) + set(UMF_JEMALLOC_TAG 5.3.0) endif() - if(NOT UMF_LINK_HWLOC_STATICALLY) - pkg_check_modules(LIBHWLOC hwloc>=2.3.0) - if(NOT LIBHWLOC_FOUND) - find_package(LIBHWLOC 2.3.0 REQUIRED hwloc) + message( + STATUS + "Will fetch jemalloc from ${UMF_JEMALLOC_REPO} (tag: ${UMF_JEMALLOC_TAG})" + ) + + FetchContent_Declare( + jemalloc_targ + GIT_REPOSITORY ${UMF_JEMALLOC_REPO} + GIT_TAG ${UMF_JEMALLOC_TAG}) + FetchContent_MakeAvailable(jemalloc_targ) + + add_custom_command( + COMMAND ./autogen.sh + WORKING_DIRECTORY ${jemalloc_targ_SOURCE_DIR} + OUTPUT ${jemalloc_targ_SOURCE_DIR}/configure) + add_custom_command( + # Custom jemalloc build. Non-default options used: + # --with-jemalloc-prefix=je_ - add je_ prefix to all public APIs + # --disable-cxx - Disable C++ integration. This will cause new and + # delete operators implementations to be omitted. + # --disable-initial-exec-tls - Disable the initial-exec TLS model for + # jemalloc's internal thread-local storage (on those platforms that + # support explicit settings). This can allow jemalloc to be dynamically + # loaded after program startup (e.g. using dlopen). --disable-doc - + # Disable building and installing the documentation. + COMMAND + ./configure --prefix=${jemalloc_targ_BINARY_DIR} + --with-jemalloc-prefix=je_ --disable-cxx --disable-initial-exec-tls + --disable-doc CFLAGS=-fPIC + WORKING_DIRECTORY ${jemalloc_targ_SOURCE_DIR} + OUTPUT ${jemalloc_targ_SOURCE_DIR}/Makefile + DEPENDS ${jemalloc_targ_SOURCE_DIR}/configure) + + if(NOT UMF_QEMU_BUILD) + set(MAKE_ARGUMENTS "-j$(nproc)") + endif() + + add_custom_command( + COMMAND make ${MAKE_ARGUMENTS} + WORKING_DIRECTORY ${jemalloc_targ_SOURCE_DIR} + OUTPUT ${jemalloc_targ_SOURCE_DIR}/lib/libjemalloc.a + DEPENDS ${jemalloc_targ_SOURCE_DIR}/Makefile) + add_custom_command( + COMMAND make install + WORKING_DIRECTORY ${jemalloc_targ_SOURCE_DIR} + OUTPUT ${jemalloc_targ_BINARY_DIR}/lib/libjemalloc.a + DEPENDS ${jemalloc_targ_SOURCE_DIR}/lib/libjemalloc.a) + + add_custom_target(jemalloc_prod + DEPENDS ${jemalloc_targ_BINARY_DIR}/lib/libjemalloc.a) + add_library(jemalloc INTERFACE) + target_link_libraries( + jemalloc INTERFACE ${jemalloc_targ_BINARY_DIR}/lib/libjemalloc.a) + add_dependencies(jemalloc jemalloc_prod) + + set(JEMALLOC_LIBRARY_DIRS ${jemalloc_targ_BINARY_DIR}/lib) + set(JEMALLOC_INCLUDE_DIRS ${jemalloc_targ_BINARY_DIR}/include) + set(JEMALLOC_LIBRARIES ${jemalloc_targ_BINARY_DIR}/lib/libjemalloc.a) +endif() + +if(JEMALLOC_FOUND OR JEMALLOC_LIBRARIES) + set(UMF_POOL_JEMALLOC_ENABLED TRUE) + # add PATH to DLL on Windows + set(DLL_PATH_LIST + "${DLL_PATH_LIST};PATH=path_list_append:${JEMALLOC_DLL_DIRS}") + message(STATUS " JEMALLOC_LIBRARIES = ${JEMALLOC_LIBRARIES}") + message(STATUS " JEMALLOC_INCLUDE_DIRS = ${JEMALLOC_INCLUDE_DIRS}") + message(STATUS " JEMALLOC_LIBRARY_DIRS = ${JEMALLOC_LIBRARY_DIRS}") + if(WINDOWS) + message(STATUS " JEMALLOC_DLL_DIRS = ${JEMALLOC_DLL_DIRS}") + endif() +else() + set(UMF_POOL_JEMALLOC_ENABLED FALSE) + message( + STATUS + "Disabling the Jemalloc Pool and tests and benchmarks that use it because jemalloc was not built/found." + ) +endif() + +if(NOT UMF_DISABLE_HWLOC AND (NOT UMF_LINK_HWLOC_STATICALLY)) + pkg_check_modules(LIBHWLOC hwloc>=2.3.0) + if(NOT LIBHWLOC_FOUND) + find_package(LIBHWLOC 2.3.0 COMPONENTS hwloc) + if(LIBHWLOC_LIBRARIES) + set(LIBHWLOC_AVAILABLE TRUE) endif() + endif() + if(LIBHWLOC_AVAILABLE OR LIBHWLOC_FOUND) # add PATH to DLL on Windows set(DLL_PATH_LIST "${DLL_PATH_LIST};PATH=path_list_append:${LIBHWLOC_DLL_DIRS}") - elseif(WINDOWS) - include(FetchContent) - set(HWLOC_ENABLE_TESTING OFF) - set(HWLOC_SKIP_LSTOPO ON) - set(HWLOC_SKIP_TOOLS ON) + else() + set(UMF_LINK_HWLOC_STATICALLY ON) + endif() +endif() - message( - STATUS - "Will fetch hwloc from ${UMF_HWLOC_REPO} (tag: ${UMF_HWLOC_TAG})" - ) +if(UMF_LINK_HWLOC_STATICALLY AND LINUX) + find_program(AUTORECONF_EXECUTABLE autoreconf) + if(NOT AUTORECONF_EXECUTABLE) + message(WARNING "autoreconf is not installed. Disabling hwloc.") + set(UMF_DISABLE_HWLOC ON) + set(UMF_LINK_HWLOC_STATICALLY OFF) + endif() +endif() - FetchContent_Declare( - hwloc_targ - GIT_REPOSITORY ${UMF_HWLOC_REPO} - GIT_TAG ${UMF_HWLOC_TAG} - SOURCE_SUBDIR contrib/windows-cmake/ FIND_PACKAGE_ARGS) - FetchContent_MakeAvailable(hwloc_targ) - - set(LIBHWLOC_INCLUDE_DIRS - ${hwloc_targ_SOURCE_DIR}/include;${hwloc_targ_BINARY_DIR}/include) - set(LIBHWLOC_LIBRARY_DIRS - ${hwloc_targ_BINARY_DIR}/Release;${hwloc_targ_BINARY_DIR}/Debug) - else() - include(FetchContent) +if(UMF_DISABLE_HWLOC) + message(STATUS "hwloc is disabled, hence OS provider, memtargets, " + "topology discovery, examples won't be available!") +else() + if(UMF_LINK_HWLOC_STATICALLY) + if(NOT DEFINED UMF_HWLOC_REPO) + set(UMF_HWLOC_REPO "https://github.com/open-mpi/hwloc.git") + endif() + + if(NOT DEFINED UMF_HWLOC_TAG) + set(UMF_HWLOC_TAG hwloc-2.10.0) + endif() message( STATUS "Will fetch hwloc from ${UMF_HWLOC_REPO} (tag: ${UMF_HWLOC_TAG})" ) - FetchContent_Declare( - hwloc_targ - GIT_REPOSITORY ${UMF_HWLOC_REPO} - GIT_TAG ${UMF_HWLOC_TAG}) - FetchContent_MakeAvailable(hwloc_targ) - - add_custom_command( - COMMAND ./autogen.sh - WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} - OUTPUT ${hwloc_targ_SOURCE_DIR}/configure) - add_custom_command( - COMMAND - ./configure --prefix=${hwloc_targ_BINARY_DIR} - --enable-static=yes --enable-shared=no --disable-libxml2 - --disable-pci --disable-levelzero --disable-opencl - --disable-cuda --disable-nvml --disable-libudev --disable-rsmi - CFLAGS=-fPIC CXXFLAGS=-fPIC - WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} - OUTPUT ${hwloc_targ_SOURCE_DIR}/Makefile - DEPENDS ${hwloc_targ_SOURCE_DIR}/configure) - add_custom_command( - COMMAND make - WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} - OUTPUT ${hwloc_targ_SOURCE_DIR}/lib/libhwloc.la - DEPENDS ${hwloc_targ_SOURCE_DIR}/Makefile) - add_custom_command( - COMMAND make install - WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} - OUTPUT ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a - DEPENDS ${hwloc_targ_SOURCE_DIR}/lib/libhwloc.la) - - add_custom_target(hwloc_prod - DEPENDS ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a) - add_library(hwloc INTERFACE) - target_link_libraries(hwloc - INTERFACE ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a) - add_dependencies(hwloc hwloc_prod) - - set(LIBHWLOC_LIBRARY_DIRS ${hwloc_targ_BINARY_DIR}/lib) - set(LIBHWLOC_INCLUDE_DIRS ${hwloc_targ_BINARY_DIR}/include) - set(LIBHWLOC_LIBRARIES ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a) - endif() + if(WINDOWS) + set(HWLOC_ENABLE_TESTING OFF) + set(HWLOC_SKIP_LSTOPO ON) + set(HWLOC_SKIP_TOOLS ON) + set(HWLOC_SKIP_INCLUDES ON) + + FetchContent_Declare( + hwloc_targ + GIT_REPOSITORY ${UMF_HWLOC_REPO} + GIT_TAG ${UMF_HWLOC_TAG} + SOURCE_SUBDIR contrib/windows-cmake/) + FetchContent_MakeAvailable(hwloc_targ) + + set(HWLOC_LIB_PATH "") + if(CMAKE_GENERATOR STREQUAL "NMake Makefiles") + set(HWLOC_LIB_PATH "${hwloc_targ_BINARY_DIR}/hwloc.lib") + else() + set(HWLOC_LIB_PATH "${hwloc_targ_BINARY_DIR}/lib/hwloc.lib") + endif() + + get_filename_component(LIBHWLOC_LIBRARY_DIRS ${HWLOC_LIB_PATH} + DIRECTORY) + set(LIBHWLOC_LIBRARIES ${HWLOC_LIB_PATH}) + set(LIBHWLOC_INCLUDE_DIRS ${hwloc_targ_BINARY_DIR}/include) + else() # not Windows + FetchContent_Declare( + hwloc_targ + GIT_REPOSITORY ${UMF_HWLOC_REPO} + GIT_TAG ${UMF_HWLOC_TAG}) + FetchContent_MakeAvailable(hwloc_targ) + + add_custom_command( + COMMAND ./autogen.sh + WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} + OUTPUT ${hwloc_targ_SOURCE_DIR}/configure) + add_custom_command( + COMMAND + ./configure --prefix=${hwloc_targ_BINARY_DIR} + --enable-static=yes --enable-shared=no --disable-libxml2 + --disable-pci --disable-levelzero --disable-opencl + --disable-cuda --disable-nvml --disable-libudev + --disable-rsmi CFLAGS=-fPIC CXXFLAGS=-fPIC + WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} + OUTPUT ${hwloc_targ_SOURCE_DIR}/Makefile + DEPENDS ${hwloc_targ_SOURCE_DIR}/configure) + add_custom_command( + COMMAND make + WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} + OUTPUT ${hwloc_targ_SOURCE_DIR}/lib/libhwloc.la + DEPENDS ${hwloc_targ_SOURCE_DIR}/Makefile) + add_custom_command( + COMMAND make install + WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} + OUTPUT ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a + DEPENDS ${hwloc_targ_SOURCE_DIR}/lib/libhwloc.la) + + add_custom_target(hwloc_prod + DEPENDS ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a) + add_library(hwloc INTERFACE) + target_link_libraries( + hwloc INTERFACE ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a) + add_dependencies(hwloc hwloc_prod) + + set(LIBHWLOC_LIBRARY_DIRS ${hwloc_targ_BINARY_DIR}/lib) + set(LIBHWLOC_INCLUDE_DIRS ${hwloc_targ_BINARY_DIR}/include) + set(LIBHWLOC_LIBRARIES ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a) + endif() + endif() # UMF_LINK_HWLOC_STATICALLY message(STATUS " LIBHWLOC_LIBRARIES = ${LIBHWLOC_LIBRARIES}") message(STATUS " LIBHWLOC_INCLUDE_DIRS = ${LIBHWLOC_INCLUDE_DIRS}") message(STATUS " LIBHWLOC_LIBRARY_DIRS = ${LIBHWLOC_LIBRARY_DIRS}") + message(STATUS " LIBHWLOC_API_VERSION = ${LIBHWLOC_API_VERSION}") + if(WINDOWS) + message(STATUS " LIBHWLOC_DLL_DIRS = ${LIBHWLOC_DLL_DIRS}") + endif() endif() if(hwloc_targ_SOURCE_DIR) - # apply security patch for HWLOC + # Apply security patch for HWLOC execute_process( COMMAND git apply ${PROJECT_SOURCE_DIR}/cmake/fix_coverity_issues.patch WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} @@ -240,10 +390,8 @@ endif() # Fetch L0 loader only if needed i.e.: if building L0 provider is ON and L0 # headers are not provided by the user (via setting UMF_LEVEL_ZERO_INCLUDE_DIR). if(UMF_BUILD_LEVEL_ZERO_PROVIDER AND (NOT UMF_LEVEL_ZERO_INCLUDE_DIR)) - include(FetchContent) - set(LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git") - set(LEVEL_ZERO_LOADER_TAG v1.19.2) + set(LEVEL_ZERO_LOADER_TAG v1.20.2) message( STATUS @@ -270,8 +418,6 @@ endif() # Fetch CUDA only if needed i.e.: if building CUDA provider is ON and CUDA # headers are not provided by the user (via setting UMF_CUDA_INCLUDE_DIR). if(UMF_BUILD_CUDA_PROVIDER AND (NOT UMF_CUDA_INCLUDE_DIR)) - include(FetchContent) - set(CUDA_REPO "https://gitlab.com/nvidia/headers/cuda-individual/cudart.git") set(CUDA_TAG cuda-12.5.1) @@ -288,11 +434,79 @@ if(UMF_BUILD_CUDA_PROVIDER AND (NOT UMF_CUDA_INCLUDE_DIR)) set(CUDA_INCLUDE_DIRS ${cuda-headers_SOURCE_DIR} CACHE PATH "Path to CUDA headers") - message(STATUS "CUDA include directory: ${CUDA_INCLUDE_DIRS}") + message(STATUS "CUDA_INCLUDE_DIRS = ${CUDA_INCLUDE_DIRS}") elseif(UMF_BUILD_CUDA_PROVIDER) # Only header is needed to build UMF set(CUDA_INCLUDE_DIRS ${UMF_CUDA_INCLUDE_DIR}) - message(STATUS "CUDA include directory: ${CUDA_INCLUDE_DIRS}") + message(STATUS "CUDA_INCLUDE_DIRS = ${CUDA_INCLUDE_DIRS}") +endif() + +# Build the umfd target in a separate directory with Debug configuration +if(WINDOWS AND UMF_USE_DEBUG_POSTFIX) + # The build_umfd target's configuration command requires to have + # CMAKE_PREFIX_PATH with semicolons escaped + string(JOIN "\;" UMFD_CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH}) + add_custom_target( + build_umfd ALL + COMMAND + ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" -S ${UMF_CMAKE_SOURCE_DIR} + -B ${CMAKE_BINARY_DIR}/umfd_build -DCMAKE_BUILD_TYPE=Debug + -DCMAKE_DEBUG_POSTFIX=d + -DCMAKE_PREFIX_PATH="${UMFD_CMAKE_PREFIX_PATH}" + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DUMF_USE_DEBUG_POSTFIX=OFF + -DUMF_BUILD_SHARED_LIBRARY=${UMF_BUILD_SHARED_LIBRARY} + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=${UMF_BUILD_LEVEL_ZERO_PROVIDER} + -DUMF_BUILD_CUDA_PROVIDER=${UMF_BUILD_CUDA_PROVIDER} + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=${UMF_BUILD_LIBUMF_POOL_JEMALLOC} + -DUMF_BUILD_TESTS=OFF -DUMF_BUILD_GPU_TESTS=OFF + -DUMF_BUILD_BENCHMARKS=OFF -DUMF_BUILD_BENCHMARKS_MT=OFF + -DUMF_BUILD_EXAMPLES=OFF -DUMF_BUILD_GPU_EXAMPLES=OFF + -DUMF_BUILD_FUZZTESTS=OFF -DUMF_DISABLE_HWLOC=${UMF_DISABLE_HWLOC} + -DUMF_LINK_HWLOC_STATICALLY=${UMF_LINK_HWLOC_STATICALLY} + -DUMF_HWLOC_NAME=${UMF_HWLOC_NAME} + -DUMF_INSTALL_RPATH=${UMF_INSTALL_RPATH} -DUMF_DEVELOPER_MODE=OFF + -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_TESTS_FAIL_ON_SKIP=OFF + -DUMF_USE_ASAN=OFF -DUMF_USE_UBSAN=OFF -DUMF_USE_TSAN=OFF + -DUMF_USE_MSAN=OFF -DUMF_USE_VALGRIND=OFF -DUMF_USE_COVERAGE=OFF + -DUMF_PROXY_LIB_BASED_ON_POOL=${UMF_PROXY_LIB_BASED_ON_POOL} + COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR}/umfd_build --target + umf --config Debug + COMMENT + "Configuring and building umfd.dll in a separate directory with Debug configuration" + ) + + # Copy built UMF libraries to the main binary directory and remove + # umfd_build + if(CMAKE_CONFIGURATION_TYPES) + # Multi-config generator (e.g., Visual Studio) + set(UMFD_DLL_SRC "${CMAKE_BINARY_DIR}/umfd_build/bin/Debug/umfd.dll") + set(UMFD_LIB_SRC "${CMAKE_BINARY_DIR}/umfd_build/lib/Debug/umfd.lib") + set(UMFD_DLL "${CMAKE_BINARY_DIR}/bin/$/umfd.dll") + set(UMFD_LIB "${CMAKE_BINARY_DIR}/lib/$/umfd.lib") + else() + # Single-config generator (e.g., Ninja) + set(UMFD_DLL_SRC "${CMAKE_BINARY_DIR}/umfd_build/bin/umfd.dll") + set(UMFD_LIB_SRC "${CMAKE_BINARY_DIR}/umfd_build/lib/umfd.lib") + set(UMFD_DLL "${CMAKE_BINARY_DIR}/bin/umfd.dll") + set(UMFD_LIB "${CMAKE_BINARY_DIR}/lib/umfd.lib") + endif() + + if(UMF_BUILD_SHARED_LIBRARY) + add_custom_command( + TARGET build_umfd + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${UMFD_DLL_SRC} + ${UMFD_DLL} + COMMENT "Copying umfd.dll to the main binary directory") + endif() + add_custom_command( + TARGET build_umfd + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${UMFD_LIB_SRC} + ${UMFD_LIB} + COMMAND ${CMAKE_COMMAND} -E remove_directory + ${CMAKE_BINARY_DIR}/umfd_build DEPENDS ${UMFD_DLL} + COMMENT "Copying umfd.lib to the main library directory") endif() # This build type check is not possible on Windows when CMAKE_BUILD_TYPE is not @@ -331,8 +545,8 @@ endif() # For using the options listed in the OPTIONS_REQUIRING_CXX variable a C++17 # compiler is required. Moreover, if these options are not set, CMake will set # up a strict C build, without C++ support. -set(OPTIONS_REQUIRING_CXX "UMF_BUILD_TESTS" "UMF_BUILD_LIBUMF_POOL_DISJOINT" - "UMF_BUILD_BENCHMARKS_MT" "UMF_BUILD_BENCHMARKS") +set(OPTIONS_REQUIRING_CXX "UMF_BUILD_TESTS" "UMF_BUILD_BENCHMARKS_MT" + "UMF_BUILD_BENCHMARKS") foreach(option_name ${OPTIONS_REQUIRING_CXX}) if(${option_name}) enable_language(CXX) @@ -414,19 +628,6 @@ else() set(UMF_POOL_SCALABLE_ENABLED FALSE) endif() -if(UMF_BUILD_LIBUMF_POOL_JEMALLOC) - pkg_check_modules(JEMALLOC jemalloc) - if(NOT JEMALLOC_FOUND) - find_package(JEMALLOC REQUIRED jemalloc) - endif() - if(JEMALLOC_FOUND OR JEMALLOC_LIBRARIES) - set(UMF_POOL_JEMALLOC_ENABLED TRUE) - # add PATH to DLL on Windows - set(DLL_PATH_LIST - "${DLL_PATH_LIST};PATH=path_list_append:${JEMALLOC_DLL_DIRS}") - endif() -endif() - if(WINDOWS) # TODO: enable the proxy library in the Debug build on Windows # @@ -467,14 +668,14 @@ elseif(UMF_PROXY_LIB_BASED_ON_POOL STREQUAL SCALABLE) ) endif() elseif(UMF_PROXY_LIB_BASED_ON_POOL STREQUAL JEMALLOC) - if(UMF_BUILD_LIBUMF_POOL_JEMALLOC) + if(UMF_POOL_JEMALLOC_ENABLED) set(UMF_PROXY_LIB_ENABLED ON) set(PROXY_LIB_USES_JEMALLOC_POOL ON) - set(PROXY_LIBS jemalloc_pool umf) + set(PROXY_LIBS umf) else() message( STATUS - "Disabling the proxy library, because UMF_PROXY_LIB_BASED_ON_POOL==JEMALLOC but UMF_BUILD_LIBUMF_POOL_JEMALLOC is OFF" + "Disabling the proxy library, because UMF_PROXY_LIB_BASED_ON_POOL==JEMALLOC but the jemalloc pool is disabled" ) endif() else() @@ -625,7 +826,7 @@ if(UMF_FORMAT_CODE_STYLE) add_custom_target( cmake-format-apply COMMAND ${CMAKE_FORMAT} --in-place ${format_cmake_list} - COMMENT "Format Cmake files using cmake-format") + COMMENT "Format CMake files using cmake-format") endif() if(BLACK) @@ -686,9 +887,34 @@ if(UMF_FORMAT_CODE_STYLE) endif() endif() +find_package(Python3 3.8) +if(Python3_FOUND) + message(STATUS "Adding 'docs' target for creating a documentation.") + add_custom_target( + docs + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + COMMAND UMF_VERSION=${UMF_CMAKE_VERSION} ${Python3_EXECUTABLE} + ${UMF_CMAKE_SOURCE_DIR}/docs/generate_docs.py + COMMENT "Generate HTML documentation using Doxygen") +endif() + # --------------------------------------------------------------------------- # # Configure make install/uninstall and packages # --------------------------------------------------------------------------- # +# Install the umfd library files as part of the umfd component +if(WINDOWS AND UMF_USE_DEBUG_POSTFIX) + if(UMF_BUILD_SHARED_LIBRARY) + install( + FILES ${UMFD_DLL} + DESTINATION ${CMAKE_INSTALL_BINDIR} + COMPONENT umfd) + endif() + install( + FILES ${UMFD_LIB} + DESTINATION ${CMAKE_INSTALL_LIBDIR} + COMPONENT umfd) +endif() + install(FILES ${PROJECT_SOURCE_DIR}/LICENSE.TXT DESTINATION "${CMAKE_INSTALL_DATAROOTDIR}/doc/${PROJECT_NAME}/") install( diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 58dba18db6..2e7fbf7d6c 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -5,7 +5,7 @@ We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender -identity and expression, level of experience, education, socio-economic status, +identity and expression, level of experience, education, socioeconomic status, nationality, personal appearance, race, caste, color, religion, or sexual identity and orientation. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7b9749c491..6a050c0ae7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -13,8 +13,9 @@ - [Adding new dependency](#adding-new-dependency) - [Code coverage](#code-coverage) - [Debugging](#debugging) - - [Checking the UMF version and CMake variables (Linux only)](#checking-the-umf-version-and-cmake-variables-linux-only) - - [Requirements](#requirements) + - [Checking UMF version and build options](#checking-umf-version-and-build-options) + - [Linux](#linux) + - [Windows](#windows) Below you'll find instructions on how to contribute to UMF, either with code changes or issues. All contributions are most welcome! @@ -229,9 +230,17 @@ $ genhtml -o html_report coverage.info ## Debugging -### Checking the UMF version and CMake variables (Linux only) +### Checking UMF version and build options -Strings with the UMF version and useful CMake variables can be grepped in the following way: +From an already built UMF shared library you can obtain UMF precise version and +CMake variables/options it was built with. It's not only useful to verify what should +be included within the library, but also for debugging. If you're filing an issue to +UMF project, please include this information in your ticket. + +#### Linux + +Make sure the `binutils` package is installed in your system. Then, you can use +the following grep command: ```bash $ strings libumf.so | grep "@(#)" @@ -239,6 +248,11 @@ $ strings libumf.so | grep "@(#)" @(#) Intel(R) UMF CMake variables: "CMAKE_BUILD_TYPE:Debug,... ``` -#### Requirements +Please note, that version available in the name of library file (e.g. `libumf.so.0.11.0`) +may be not accurate - version coded inside of the library is far more precise. + +#### Windows -- binutils package (Linux) +On Windows, DLL's metadata can be accessed e.g. looking into *Properties* of the dll file +in the explorer. Look into the *Details* tab for "Product version" and "File description" +(it contains UMF's build options). diff --git a/ChangeLog b/ChangeLog index 0736379f85..a7b6137745 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +Thu Apr 17 2025 Łukasz Stolarczuk + + * Version 0.11.0 + + This release contains: + - make disjoint pool a C structure #898 + - add fixed provider #976 + - remove the Coarse provider #934 and replace with internal coarse library #931, #932 + - implement umfPool[Set/Get]Tag #962 + - L0 provider: implement support for defer and blocking free #963 + - add set/restore context in CUDA provider free() #1049 + - L0 provider: implement min/recommended page size query #1059 + - add support for CUDA allocation flags #1079 + - increase refcount to ze_loader/CUDA libraries #1086 + - implement size limit for the cache of opened IPC handles #998 + - allow creating fixed provider based on allocations from another pool #1143 + - multiple benchmark improvements + - new tests and CI workflows, incl. backward compatibility checks (#1087, #1163) + Fri Jan 10 2025 Łukasz Stolarczuk * Version 0.10.1 @@ -31,6 +50,12 @@ Mon Dec 09 2024 Łukasz Stolarczuk - extended logging - yet more fixes in the building system +Tue Nov 12 2024 Łukasz Stolarczuk + + * Version 0.9.1 + + This patch release contains only 3 small fixes in build system of UMF. + Thu Sep 12 2024 Łukasz Stolarczuk * Version 0.9.0 diff --git a/README.md b/README.md index 6f1233c639..00d6136df9 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ For a quick introduction to UMF usage, please see [examples](https://oneapi-src.github.io/unified-memory-framework/examples.html) documentation, which includes the code of the [basic example](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/basic/basic.c). -The are also more advanced that allocates USM memory from the +The are also more advanced that allocates USM memory from the [Level Zero device](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/level_zero_shared_memory/level_zero_shared_memory.c) using the Level Zero API and UMF Level Zero memory provider and [CUDA device](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/cuda_shared_memory/cuda_shared_memory.c) using the CUDA API and UMF CUDA memory provider. @@ -28,19 +28,23 @@ using the CUDA API and UMF CUDA memory provider. ### Requirements Required packages: + - libhwloc-dev >= 2.3.0 (Linux) / hwloc >= 2.3.0 (Windows) - C compiler - [CMake](https://cmake.org/) >= 3.14.0 For development and contributions: + - clang-format-15.0 (can be installed with `python -m pip install clang-format==15.0.7`) - cmake-format-0.6 (can be installed with `python -m pip install cmake-format==0.6.13`) - black (can be installed with `python -m pip install black==24.3.0`) -For building tests, multithreaded benchmarks and Disjoint Pool: +For building tests and multithreaded benchmarks: + - C++ compiler with C++17 support For Level Zero memory provider tests: + - Level Zero headers and libraries - compatible GPU with installed driver @@ -50,8 +54,8 @@ Executable and binaries will be in **build/bin**. The `{build_config}` can be either `Debug` or `Release`. ```bash -$ cmake -B build -DCMAKE_BUILD_TYPE={build_config} -$ cmake --build build -j $(nproc) +cmake -B build -DCMAKE_BUILD_TYPE={build_config} +cmake --build build -j $(nproc) ``` ### Windows @@ -60,8 +64,8 @@ Generating Visual Studio Project. EXE and binaries will be in **build/bin/{build The `{build_config}` can be either `Debug` or `Release`. ```bash -$ cmake -B build -G "Visual Studio 15 2017 Win64" -$ cmake --build build --config {build_config} -j $Env:NUMBER_OF_PROCESSORS +cmake -B build -G "Visual Studio 15 2017 Win64" +cmake --build build --config {build_config} -j $Env:NUMBER_OF_PROCESSORS ``` ### Benchmark @@ -73,20 +77,22 @@ UMF also provides multithreaded benchmarks that can be enabled by setting both `UMF_BUILD_BENCHMARKS` and `UMF_BUILD_BENCHMARKS_MT` CMake configuration flags to `ON`. Multithreaded benchmarks require a C++ support. -The Scalable Pool requirements can be found in the relevant 'Memory Pool +The Scalable Pool requirements can be found in the relevant 'Memory Pool managers' section below. ### Sanitizers List of sanitizers available on Linux: + - AddressSanitizer - UndefinedBehaviorSanitizer - ThreadSanitizer - - Is mutually exclusive with other sanitizers. + - Is mutually exclusive with other sanitizers. - MemorySanitizer - - Requires linking against MSan-instrumented libraries to prevent false positive reports. More information [here](https://github.com/google/sanitizers/wiki/MemorySanitizerLibcxxHowTo). + - Requires linking against MSan-instrumented libraries to prevent false positive reports. More information [here](https://github.com/google/sanitizers/wiki/MemorySanitizerLibcxxHowTo). List of sanitizers available on Windows: + - AddressSanitizer Listed sanitizers can be enabled with appropriate [CMake options](#cmake-standard-options). @@ -100,7 +106,6 @@ List of options provided by CMake: | UMF_BUILD_SHARED_LIBRARY | Build UMF as shared library | ON/OFF | OFF | | UMF_BUILD_LEVEL_ZERO_PROVIDER | Build Level Zero memory provider | ON/OFF | ON | | UMF_BUILD_CUDA_PROVIDER | Build CUDA memory provider | ON/OFF | ON | -| UMF_BUILD_LIBUMF_POOL_DISJOINT | Build the libumf_pool_disjoint static library | ON/OFF | OFF | | UMF_BUILD_LIBUMF_POOL_JEMALLOC | Build the libumf_pool_jemalloc static library | ON/OFF | OFF | | UMF_BUILD_TESTS | Build UMF tests | ON/OFF | ON | | UMF_BUILD_GPU_TESTS | Build UMF GPU tests | ON/OFF | OFF | @@ -117,81 +122,107 @@ List of options provided by CMake: | UMF_USE_MSAN | Enable MemorySanitizer checks | ON/OFF | OFF | | UMF_USE_VALGRIND | Enable Valgrind instrumentation | ON/OFF | OFF | | UMF_USE_COVERAGE | Build with coverage enabled (Linux only) | ON/OFF | OFF | -| UMF_LINK_HWLOC_STATICALLY | Link UMF with HWLOC library statically (Windows+Release only) | ON/OFF | OFF | +| UMF_LINK_HWLOC_STATICALLY | Link UMF with HWLOC library statically (proxy library will be disabled on Windows+Debug build) | ON/OFF | OFF | | UMF_DISABLE_HWLOC | Disable features that requires hwloc (OS provider, memory targets, topology discovery) | ON/OFF | OFF | ## Architecture: memory pools and providers -A UMF memory pool is a combination of a pool allocator and a memory provider. A memory provider is responsible for coarse-grained memory allocations and management of memory pages, while the pool allocator controls memory pooling and handles fine-grained memory allocations. +A UMF memory pool is a combination of a pool allocator and a memory provider. A memory provider is responsible for +coarse-grained memory allocations and management of memory pages, while the pool allocator controls memory pooling +and handles fine-grained memory allocations. Pool allocator can leverage existing allocators (e.g. jemalloc or tbbmalloc) or be written from scratch. -UMF comes with predefined pool allocators (see include/pool) and providers (see include/provider). UMF can also work with user-defined pools and providers that implement a specific interface (see include/umf/memory_pool_ops.h and include/umf/memory_provider_ops.h). +UMF comes with predefined pool allocators (see [`include/umf/pools`](include/umf/pools)) and providers +(see [`include/umf/providers`](include/umf/providers)). UMF can also work with user-defined pools and +providers that implement a specific interface (see [`include/umf/memory_pool_ops.h`](include/umf/memory_pool_ops.h) +and [`include/umf/memory_provider_ops.h`](include/umf/memory_provider_ops.h)). -More detailed documentation is available here: https://oneapi-src.github.io/unified-memory-framework/ +More detailed documentation is available here: ### Memory providers -#### Coarse Provider +#### Fixed memory provider -A memory provider that can provide memory from: -1) a given pre-allocated buffer (the fixed-size memory provider option) or -2) from an additional upstream provider (e.g. provider that does not support the free() operation - like the File memory provider or the DevDax memory provider - see below). +A memory provider that can provide memory from a given pre-allocated buffer. #### OS memory provider A memory provider that provides memory from an operating system. OS memory provider supports two types of memory mappings (set by the `visibility` parameter): + 1) private memory mapping (`UMF_MEM_MAP_PRIVATE`) 2) shared memory mapping (`UMF_MEM_MAP_SHARED` - supported on Linux only yet) IPC API requires the `UMF_MEM_MAP_SHARED` memory `visibility` mode (`UMF_RESULT_ERROR_INVALID_ARGUMENT` is returned otherwise). +IPC API uses file descriptor duplication, which requires the `pidfd_getfd(2)` system call to obtain +a duplicate of another process's file descriptor. This system call is supported since Linux 5.6. +Required permission ("restricted ptrace") is governed by the `PTRACE_MODE_ATTACH_REALCREDS` check +(see `ptrace(2)`). To allow file descriptor duplication in a binary that opens IPC handle, you can call +`prctl(PR_SET_PTRACER, ...)` in the producer binary that gets the IPC handle. +Alternatively you can change the `ptrace_scope` globally in the system, e.g.: + +```sh +sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" +``` + There are available two mechanisms for the shared memory mapping: + 1) a named shared memory object (used if the `shm_name` parameter is not NULL) or 2) an anonymous file descriptor (used if the `shm_name` parameter is NULL) The `shm_name` parameter should be a null-terminated string of up to NAME_MAX (i.e., 255) characters none of which are slashes. An anonymous file descriptor for the shared memory mapping will be created using: + 1) `memfd_secret()` syscall - (if it is implemented and) if the `UMF_MEM_FD_FUNC` environment variable does not contain the "memfd_create" string or 2) `memfd_create()` syscall - otherwise (and if it is implemented). ##### Requirements -Required packages for tests (Linux-only yet): - - libnuma-dev +IPC API on Linux requires the `PTRACE_MODE_ATTACH_REALCREDS` permission (see `ptrace(2)`) +to duplicate another process's file descriptor (see above). + +Packages required for tests (Linux-only yet): + +- libnuma-dev #### Level Zero memory provider A memory provider that provides memory from L0 device. +IPC API uses file descriptor duplication, which requires the `pidfd_getfd(2)` system call to obtain +a duplicate of another process's file descriptor. This system call is supported since Linux 5.6. +Required permission ("restricted ptrace") is governed by the `PTRACE_MODE_ATTACH_REALCREDS` check +(see `ptrace(2)`). To allow file descriptor duplication in a binary that opens IPC handle, you can call +`prctl(PR_SET_PTRACER, ...)` in the producer binary that gets the IPC handle. +Alternatively you can change the `ptrace_scope` globally in the system, e.g.: + +```sh +sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" +``` ##### Requirements 1) Linux or Windows OS 2) The `UMF_BUILD_LEVEL_ZERO_PROVIDER` option turned `ON` (by default) +3) IPC API on Linux requires the `PTRACE_MODE_ATTACH_REALCREDS` permission (see `ptrace(2)`) + to duplicate another process's file descriptor (see above). Additionally, required for tests: -3) The `UMF_BUILD_GPU_TESTS` option turned `ON` -4) System with Level Zero compatible GPU -5) Required packages: +4) The `UMF_BUILD_GPU_TESTS` option turned `ON` +5) System with Level Zero compatible GPU +6) Required packages: - liblevel-zero-dev (Linux) or level-zero-sdk (Windows) #### DevDax memory provider (Linux only) -A memory provider that provides memory from a device DAX (a character device file /dev/daxX.Y). +A memory provider that provides memory from a device DAX (a character device file like `/dev/daxX.Y`). It can be used when large memory mappings are needed. -The DevDax memory provider does not support the free operation -(`umfMemoryProviderFree()` always returns `UMF_RESULT_ERROR_NOT_SUPPORTED`), -so it should be used with a pool manager that will take over -the managing of the provided memory - for example the jemalloc pool -with the `disable_provider_free` parameter set to true. - ##### Requirements 1) Linux OS @@ -201,12 +232,6 @@ with the `disable_provider_free` parameter set to true. A memory provider that provides memory by mapping a regular, extendable file. -The file memory provider does not support the free operation -(`umfMemoryProviderFree()` always returns `UMF_RESULT_ERROR_NOT_SUPPORTED`), -so it should be used with a pool manager that will take over -the managing of the provided memory - for example the jemalloc pool -with the `disable_provider_free` parameter set to true. - IPC API requires the `UMF_MEM_MAP_SHARED` memory `visibility` mode (`UMF_RESULT_ERROR_INVALID_ARGUMENT` is returned otherwise). @@ -241,28 +266,41 @@ This memory pool is distributed as part of libumf. It forwards all requests to t memory provider. Currently umfPoolRealloc, umfPoolCalloc and umfPoolMallocUsableSize functions are not supported by the proxy pool. -To enable this feature, the `UMF_BUILD_SHARED_LIBRARY` option needs to be turned `ON`. - -#### Disjoint pool - -TODO: Add a description +#### Disjoint pool (part of libumf) -##### Requirements - -To enable this feature, the `UMF_BUILD_LIBUMF_POOL_DISJOINT` option needs to be turned `ON`. +The Disjoint pool is designed to keep internal metadata separate from user data. +This separation is particularly useful when user data needs to be placed in memory with relatively high latency, +such as GPU memory or disk storage. #### Jemalloc pool -Jemalloc pool is a [jemalloc](https://github.com/jemalloc/jemalloc)-based memory +Jemalloc pool is a [jemalloc](https://github.com/jemalloc/jemalloc)-based memory pool manager built as a separate static library: libjemalloc_pool.a on Linux and jemalloc_pool.lib on Windows. The `UMF_BUILD_LIBUMF_POOL_JEMALLOC` option has to be turned `ON` to build this library. +[jemalloc](https://github.com/jemalloc/jemalloc) is required to build the jemalloc pool. + +In case of Linux OS jemalloc is built from the (fetched) sources with the following +non-default options enabled: + +- `--with-jemalloc-prefix=je_` - adds the `je_` prefix to all public APIs, +- `--disable-cxx` - disables C++ integration, it will cause the `new` and the `delete` + operators implementations to be omitted. +- `--disable-initial-exec-tls` - disables the initial-exec TLS model for jemalloc's + internal thread-local storage (on those platforms that support + explicit settings), it can allow jemalloc to be dynamically + loaded after program startup (e.g. using `dlopen()`). + +The default jemalloc package is required on Windows. + ##### Requirements 1) The `UMF_BUILD_LIBUMF_POOL_JEMALLOC` option turned `ON` -2) Required packages: - - libjemalloc-dev (Linux) or jemalloc (Windows) +2) jemalloc is required: + +- on Linux and MacOS: jemalloc is fetched and built from sources (a custom build), +- on Windows: the default jemalloc package is required #### Scalable Pool (part of libumf) @@ -272,7 +310,8 @@ It is distributed as part of libumf. To use this pool, TBB must be installed in ##### Requirements Packages required for using this pool and executing tests/benchmarks (not required for build): - - libtbb-dev (libtbbmalloc.so.2) on Linux or tbb (tbbmalloc.dll) on Windows + +- libtbb-dev (libtbbmalloc.so.2) on Linux or tbb (tbbmalloc.dll) on Windows ### Memspaces (Linux-only) @@ -303,19 +342,22 @@ Querying the latency value requires HMAT support on the platform. Calling `umfMe UMF provides the UMF proxy library (`umf_proxy`) that makes it possible to override the default allocator in other programs in both Linux and Windows. +To enable this feature, the `UMF_BUILD_SHARED_LIBRARY` option needs to be turned `ON`. + #### Linux In case of Linux it can be done without any code changes using the `LD_PRELOAD` environment variable: ```sh -$ LD_PRELOAD=/usr/lib/libumf_proxy.so myprogram +LD_PRELOAD=/usr/lib/libumf_proxy.so myprogram ``` The memory used by the proxy memory allocator is mmap'ed: + 1) with the `MAP_PRIVATE` flag by default or 2) with the `MAP_SHARED` flag if the `UMF_PROXY` environment variable contains one of two following strings: `page.disposition=shared-shm` or `page.disposition=shared-fd`. These two options differ in a mechanism used during IPC: - `page.disposition=shared-shm` - IPC uses the named shared memory. An SHM name is generated using the `umf_proxy_lib_shm_pid_$PID` pattern, where `$PID` is the PID of the process. It creates the `/dev/shm/umf_proxy_lib_shm_pid_$PID` file. - - `page.disposition=shared-fd` - IPC uses the file descriptor duplication. It requires using `pidfd_getfd(2)` to obtain a duplicate of another process's file descriptor. Permission to duplicate another process's file descriptor is governed by a ptrace access mode `PTRACE_MODE_ATTACH_REALCREDS` check (see `ptrace(2)`) that can be changed using the `/proc/sys/kernel/yama/ptrace_scope` interface. `pidfd_getfd(2)` is supported since Linux 5.6. + - `page.disposition=shared-fd` - IPC API uses file descriptor duplication, which requires the `pidfd_getfd(2)` system call to obtain a duplicate of another process's file descriptor. This system call is supported since Linux 5.6. Required permission ("restricted ptrace") is governed by the `PTRACE_MODE_ATTACH_REALCREDS` check (see `ptrace(2)`). To allow file descriptor duplication in a binary that opens IPC handle, you can call `prctl(PR_SET_PTRACER, ...)` in the producer binary that gets the IPC handle. Alternatively you can change the `ptrace_scope` globally in the system, e.g.: `sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope"`. **Size threshold** @@ -327,6 +369,7 @@ It can be enabled by adding the `size.threshold=` string to the `UMF_PROX #### Windows In case of Windows it requires: + 1) explicitly linking your program dynamically with the `umf_proxy.dll` library 2) (C++ code only) including `proxy_lib_new_delete.h` in a single(!) source file in your project to override also the `new`/`delete` operations. @@ -340,3 +383,7 @@ an issue or a Pull Request, please read [Contribution Guide](./CONTRIBUTING.md). To enable logging in UMF source files please follow the guide in the [web documentation](https://oneapi-src.github.io/unified-memory-framework/introduction.html#logging). + +## Notices + +The contents of this repository may have been developed with support from one or more Intel-operated generative artificial intelligence solutions. diff --git a/RELEASE_STEPS.md b/RELEASE_STEPS.md index ec6e5b6906..9189d48048 100644 --- a/RELEASE_STEPS.md +++ b/RELEASE_STEPS.md @@ -39,10 +39,8 @@ Do changes for a release: - For major/minor release start from the `main` branch - Add an entry to ChangeLog, remember to change the day of the week in the release date - For major and minor (prior 1.0.0) releases mention API and ABI compatibility with the previous release -- Update project's version in a few places: - - For major and minor releases: `UMF_VERSION_CURRENT` in `include/umf/base.h` (the API version) - - `release` variable in `scripts/docs_config/conf.py` (for docs) - - `UMF_VERSION` variable in `.github/workflows/basic.yml` (for installation test) +- For major and minor releases, update `UMF_VERSION_CURRENT` in `include/umf/base.h` (the API version) + - For changes in ops structures, update corresponding UMF_*_OPS_VERSION_CURRENT - For major and minor (prior 1.0.0) releases update ABI version in `.map` and `.def` files - These files are defined for all public libraries (`libumf` and `proxy_lib`, at the moment) - Commit these changes and tag the release: @@ -52,6 +50,11 @@ Do changes for a release: - If stable branch for this release is required, create it: - `git checkout -b v$VER.x` - For some early versions (like `0.1.0`) we may omit creation of the branch +- For major/minor release, when release is done, add an extra "dev" tag on the `main` branch: + - `git tag -a -s -m "Development version $VERSION+1" v$VERSION+1-dev` + - for example, when `v0.1.0` is released, the dev tag would be `v0.2.0-dev` + - if needed, further in time, an extra dev tag can be introduced, e.g. `v0.2.0-dev1` + - This way, the `main` branch will introduce itself as the next version ## Publish changes diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 5605519ee2..d52fc0857e 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2023-2024 Intel Corporation +# Copyright (C) 2023-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -40,8 +40,9 @@ function(add_umf_benchmark) # * SRCS - source files # * LIBS - libraries to be linked with # * LIBDIRS - directories of libraries to be linked with + # * TESTARGS - additional arguments to be passed to the add_test set(oneValueArgs NAME) - set(multiValueArgs SRCS LIBS LIBDIRS) + set(multiValueArgs SRCS LIBS LIBDIRS TESTARGS) cmake_parse_arguments( ARG "" @@ -51,7 +52,7 @@ function(add_umf_benchmark) set(BENCH_NAME umf-${ARG_NAME}) - set(BENCH_LIBS ${ARG_LIBS} umf) + set(BENCH_LIBS ${ARG_LIBS} umf umf_utils) add_umf_executable( NAME ${BENCH_NAME} @@ -66,7 +67,7 @@ function(add_umf_benchmark) add_test( NAME ${BENCH_NAME} - COMMAND ${BENCH_NAME} + COMMAND ${BENCH_NAME} ${ARG_TESTARGS} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) if("${BENCH_NAME}" STREQUAL "umf-ubench") @@ -86,11 +87,17 @@ function(add_umf_benchmark) set_property(TEST ${BENCH_NAME} PROPERTY ENVIRONMENT_MODIFICATION "${DLL_PATH_LIST}") endif() - - if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - target_compile_definitions(${BENCH_NAME} - PRIVATE UMF_POOL_DISJOINT_ENABLED=1) + if(LINUX) + # prepend LD_LIBRARY_PATH with ${CMAKE_BINARY_DIR}/lib it is required + # because ${CMAKE_BINARY_DIR}/lib contains libze_loader.so and tests + # should use it instead of system one. + set_property( + TEST ${BENCH_NAME} + PROPERTY ENVIRONMENT_MODIFICATION + "LD_LIBRARY_PATH=path_list_prepend:${CMAKE_BINARY_DIR}/lib" + ) endif() + if(UMF_POOL_JEMALLOC_ENABLED) target_compile_definitions(${BENCH_NAME} PRIVATE UMF_POOL_JEMALLOC_ENABLED=1) @@ -109,6 +116,9 @@ function(add_umf_benchmark) if(UMF_BUILD_CUDA_PROVIDER) target_compile_definitions(${BENCH_NAME} PRIVATE UMF_BUILD_CUDA_PROVIDER=1) + target_include_directories( + ${BENCH_NAME} PRIVATE ${UMF_CMAKE_SOURCE_DIR}/test/common + ${CUDA_INCLUDE_DIRS}) endif() if(UMF_BUILD_GPU_TESTS) target_compile_definitions(${BENCH_NAME} PRIVATE UMF_BUILD_GPU_TESTS=1) @@ -118,13 +128,6 @@ endfunction() set(LIB_DIRS ${LIBHWLOC_LIBRARY_DIRS}) # optional libraries -if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - set(LIBS_OPTIONAL ${LIBS_OPTIONAL} disjoint_pool) -endif() -if(UMF_BUILD_LIBUMF_POOL_JEMALLOC) - set(LIBS_OPTIONAL ${LIBS_OPTIONAL} jemalloc_pool ${JEMALLOC_LIBRARIES}) - set(LIB_DIRS ${LIB_DIRS} ${JEMALLOC_LIBRARY_DIRS}) -endif() if(LINUX) set(LIBS_OPTIONAL ${LIBS_OPTIONAL} m) endif() @@ -146,7 +149,9 @@ add_umf_benchmark( NAME benchmark SRCS benchmark.cpp LIBS ${LIBS_OPTIONAL} benchmark::benchmark - LIBDIRS ${LIB_DIRS}) + # limit running benchmarks in CI tests to single-threaded + LIBDIRS ${LIB_DIRS} + TESTARGS --benchmark_filter=threads:1$) if(UMF_BUILD_BENCHMARKS_MT) add_umf_benchmark( diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index c10bbda877..f57c0d5ae4 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -7,335 +7,113 @@ */ #include -#include -#ifdef UMF_POOL_SCALABLE_ENABLED -#include -#endif -#include - -#ifdef UMF_POOL_DISJOINT_ENABLED -#include -#endif - -#ifdef UMF_POOL_JEMALLOC_ENABLED -#include -#endif #include "benchmark.hpp" -struct glibc_malloc : public allocator_interface { - unsigned SetUp([[maybe_unused]] ::benchmark::State &state, - unsigned argPos) override { - return argPos; - } - void TearDown([[maybe_unused]] ::benchmark::State &state) override{}; - void *benchAlloc(size_t size) override { return malloc(size); } - void benchFree(void *ptr, [[maybe_unused]] size_t size) override { - free(ptr); - } - static std::string name() { return "glibc"; } -}; - -struct os_provider : public provider_interface { - umf_os_memory_provider_params_handle_t params = NULL; - os_provider() { - umfOsMemoryProviderParamsCreate(¶ms); - return; +#define UMF_BENCHMARK_TEMPLATE_DEFINE(BaseClass, Method, ...) \ + BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, __VA_ARGS__) \ + (benchmark::State & state) { \ + for (auto _ : state) { \ + bench(state); \ + } \ } - ~os_provider() { - if (params != NULL) { - umfOsMemoryProviderParamsDestroy(params); - } - } +#define UMF_BENCHMARK_REGISTER_F(BaseClass, Method) \ + BENCHMARK_REGISTER_F(BaseClass, Method) \ + ->Apply( \ + &BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::defaultArgs) - void *getParams() override { return params; } - umf_memory_provider_ops_t *getOps() override { - return umfOsMemoryProviderOps(); - } - static std::string name() { return "os_provider"; } -}; - -template -struct proxy_pool : public pool_interface { - umf_memory_pool_ops_t * - getOps([[maybe_unused]] ::benchmark::State &state) override { - return umfProxyPoolOps(); - } - void *getParams([[maybe_unused]] ::benchmark::State &state) override { - return nullptr; - } - static std::string name() { return "proxy_pool<" + Provider::name() + ">"; } -}; - -#ifdef UMF_POOL_DISJOINT_ENABLED -template -struct disjoint_pool : public pool_interface { - umf_disjoint_pool_params_handle_t disjoint_memory_pool_params; - - disjoint_pool() { - disjoint_memory_pool_params = NULL; - auto ret = umfDisjointPoolParamsCreate(&disjoint_memory_pool_params); - if (ret != UMF_RESULT_SUCCESS) { - return; - } - - // those function should never fail, so error handling is minimal. - ret = umfDisjointPoolParamsSetSlabMinSize(disjoint_memory_pool_params, - 4096); - if (ret != UMF_RESULT_SUCCESS) { - goto err; - } - - ret = umfDisjointPoolParamsSetCapacity(disjoint_memory_pool_params, 4); - if (ret != UMF_RESULT_SUCCESS) { - goto err; - } - - ret = umfDisjointPoolParamsSetMinBucketSize(disjoint_memory_pool_params, - 4096); - if (ret != UMF_RESULT_SUCCESS) { - goto err; - } - - ret = umfDisjointPoolParamsSetMaxPoolableSize( - disjoint_memory_pool_params, 4096 * 16); - - if (ret != UMF_RESULT_SUCCESS) { - goto err; - } - return; - err: - - umfDisjointPoolParamsDestroy(disjoint_memory_pool_params); - disjoint_memory_pool_params = NULL; - } - - ~disjoint_pool() { - if (disjoint_memory_pool_params != NULL) { - umfDisjointPoolParamsDestroy(disjoint_memory_pool_params); - } - } - - umf_memory_pool_ops_t * - getOps([[maybe_unused]] ::benchmark::State &state) override { - return umfDisjointPoolOps(); - } - void *getParams([[maybe_unused]] ::benchmark::State &state) override { - - if (disjoint_memory_pool_params == NULL) { - state.SkipWithError("Failed to create disjoint pool params"); - } - - return disjoint_memory_pool_params; - } - static std::string name() { - return "disjoint_pool<" + Provider::name() + ">"; - } -}; -#endif - -#ifdef UMF_POOL_JEMALLOC_ENABLED -template -struct jemalloc_pool : public pool_interface { - umf_memory_pool_ops_t * - getOps([[maybe_unused]] ::benchmark::State &state) override { - return umfJemallocPoolOps(); - } - void *getParams([[maybe_unused]] ::benchmark::State &state) override { - return NULL; - } - static std::string name() { - return "jemalloc_pool<" + Provider::name() + ">"; - } -}; -#endif - -#ifdef UMF_POOL_SCALABLE_ENABLED -template -struct scalable_pool : public pool_interface { - virtual umf_memory_pool_ops_t * - getOps([[maybe_unused]] ::benchmark::State &state) override { - return umfScalablePoolOps(); - } - virtual void * - getParams([[maybe_unused]] ::benchmark::State &state) override { - return NULL; - } - static std::string name() { - return "scalable_pool<" + Provider::name() + ">"; - } -}; -#endif // Benchmarks scenarios: -UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, glibc_fix, fixed_alloc_size, - glibc_malloc); - // The benchmark arguments specified in Args() are, in order: // benchmark arguments, allocator arguments, size generator arguments. // The exact meaning of each argument depends on the benchmark, allocator, and size components used. // Refer to the 'argsName()' function in each component to find detailed descriptions of these arguments. -UMF_BENCHMARK_REGISTER_F(alloc_benchmark, glibc_fix) - ->Args({10000, 0, 4096}) - ->Args({10000, 100000, 4096}) - ->Threads(4) - ->Threads(1); - -UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, glibc_uniform, - uniform_alloc_size, glibc_malloc); -UMF_BENCHMARK_REGISTER_F(alloc_benchmark, glibc_uniform) - ->Args({10000, 0, 8, 64 * 1024, 8}) - ->Threads(4) - ->Threads(1); - -UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, os_provider, fixed_alloc_size, - provider_allocator); -UMF_BENCHMARK_REGISTER_F(alloc_benchmark, os_provider) - ->Args({10000, 0, 4096}) - ->Args({10000, 100000, 4096}) - ->Threads(4) - ->Threads(1); - -UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, proxy_pool, fixed_alloc_size, - pool_allocator>); - -UMF_BENCHMARK_REGISTER_F(alloc_benchmark, proxy_pool) - ->Args({1000, 0, 4096}) - ->Args({1000, 100000, 4096}) - ->Threads(4) - ->Threads(1); - -#ifdef UMF_POOL_DISJOINT_ENABLED -UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, disjoint_pool_fix, - fixed_alloc_size, - pool_allocator>); -UMF_BENCHMARK_REGISTER_F(alloc_benchmark, disjoint_pool_fix) - ->Args({10000, 0, 4096}) - ->Args({10000, 100000, 4096}) - ->Threads(4) - ->Threads(1); - -// TODO: debug why this crashes -/*UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, disjoint_pool_uniform, - uniform_alloc_size, - pool_allocator>); -UMF_BENCHMARK_REGISTER_F(alloc_benchmark, disjoint_pool_uniform) - ->Args({10000, 0, 8, 64 * 1024, 8}) - // ->Threads(4) - ->Threads(1); -*/ -#endif -#ifdef UMF_POOL_JEMALLOC_ENABLED -UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, jemalloc_pool_fix, - fixed_alloc_size, - pool_allocator>); -UMF_BENCHMARK_REGISTER_F(alloc_benchmark, jemalloc_pool_fix) - ->Args({10000, 0, 4096}) - ->Args({10000, 100000, 4096}) - ->Threads(4) - ->Threads(1); - -UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, jemalloc_pool_uniform, - uniform_alloc_size, - pool_allocator>); -UMF_BENCHMARK_REGISTER_F(alloc_benchmark, jemalloc_pool_uniform) - ->Args({10000, 0, 8, 64 * 1024, 8}) - ->Threads(4) - ->Threads(1); - -#endif -#ifdef UMF_POOL_SCALABLE_ENABLED -UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, scalable_pool_fix, - fixed_alloc_size, - pool_allocator>); - -UMF_BENCHMARK_REGISTER_F(alloc_benchmark, scalable_pool_fix) - ->Args({10000, 0, 4096}) - ->Args({10000, 100000, 4096}) - ->Threads(4) - ->Threads(1); - -UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, scalable_pool_uniform, - uniform_alloc_size, - pool_allocator>); - -UMF_BENCHMARK_REGISTER_F(alloc_benchmark, scalable_pool_uniform) - ->Args({10000, 0, 8, 64 * 1024, 8}) - ->Threads(4) - ->Threads(1); -#endif -// Multiple allocs/free +static void multithreaded(benchmark::internal::Benchmark *benchmark) { + benchmark->Threads(12); + benchmark->Threads(8); + benchmark->Threads(4); + benchmark->Threads(1); +} + +static void singlethreaded(benchmark::internal::Benchmark *benchmark) { + benchmark->Threads(1); +} + +static void +default_multiple_alloc_fix_size(benchmark::internal::Benchmark *benchmark) { + benchmark->Args({10000, 1, 4096}); + benchmark->Iterations(500000); +} + +static void +default_multiple_alloc_uniform_size(benchmark::internal::Benchmark *benchmark) { + benchmark->Args({10000, 1, 8, 4096, 8}); + benchmark->Args({10000, 1, 8, 128, 8}); + benchmark->Iterations(500000); +} UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, glibc_fix, fixed_alloc_size, glibc_malloc); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, glibc_fix) - ->Args({10000, 4096}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_multiple_alloc_fix_size) + ->Apply(&multithreaded); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, glibc_uniform, uniform_alloc_size, glibc_malloc); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, glibc_uniform) - ->Args({10000, 8, 64 * 1024, 8}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_multiple_alloc_uniform_size) + ->Apply(&multithreaded); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, proxy_pool, fixed_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, proxy_pool) - ->Args({10000, 4096}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_multiple_alloc_fix_size) + // reduce iterations, as this benchmark is slower than others + ->Iterations(50000) + ->Apply(&singlethreaded); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, os_provider, fixed_alloc_size, provider_allocator); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, os_provider) - ->Args({10000, 4096}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_multiple_alloc_fix_size) + // reduce iterations, as this benchmark is slower than others + ->Iterations(50000) + ->Apply(&singlethreaded); -#ifdef UMF_POOL_DISJOINT_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_fix, fixed_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_fix) - ->Args({10000, 4096}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_multiple_alloc_fix_size) + ->Apply(&multithreaded); -// TODO: debug why this crashes -/*UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_uniform, uniform_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_uniform) - ->Args({10000, 0, 8, 64 * 1024, 8}) - ->Threads(4) - ->Threads(1); -*/ -#endif + ->Apply(&default_multiple_alloc_uniform_size) + ->Apply(&multithreaded); #ifdef UMF_POOL_JEMALLOC_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_fix, fixed_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, jemalloc_pool_fix) - ->Args({10000, 4096}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_multiple_alloc_fix_size) + ->Apply(&multithreaded); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_uniform, uniform_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, jemalloc_pool_uniform) - ->Args({1000, 8, 64 * 1024, 8}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_multiple_alloc_uniform_size) + ->Apply(&multithreaded); #endif @@ -345,18 +123,100 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, scalable_pool_fix, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, scalable_pool_fix) - ->Args({10000, 4096}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_multiple_alloc_fix_size) + ->Apply(&multithreaded); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, scalable_pool_uniform, uniform_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, scalable_pool_uniform) - ->Args({10000, 8, 64 * 1024, 8}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_multiple_alloc_uniform_size) + ->Apply(&multithreaded); #endif -BENCHMARK_MAIN(); + +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, + proxy_pool_fixedprovider, fixed_alloc_size, + pool_allocator>); + +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, + proxy_pool_fixedprovider) + ->Apply(&default_multiple_alloc_fix_size) + ->Apply(&singlethreaded); + +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, fixed_provider, + fixed_alloc_size, + provider_allocator); +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, fixed_provider) + ->Apply(&default_multiple_alloc_fix_size) + ->Apply(&singlethreaded); + +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, + disjoint_pool_fix_fixedprovider, fixed_alloc_size, + pool_allocator>); +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, + disjoint_pool_fix_fixedprovider) + ->Apply(&default_multiple_alloc_fix_size) + ->Apply(&multithreaded); + +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, + disjoint_pool_uniform_fixedprovider, + uniform_alloc_size, + pool_allocator>); +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, + disjoint_pool_uniform_fixedprovider) + ->Apply(&default_multiple_alloc_uniform_size) + ->Apply(&multithreaded); + +#ifdef UMF_POOL_JEMALLOC_ENABLED +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, + jemalloc_pool_fixedprovider, fixed_alloc_size, + pool_allocator>); +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, + jemalloc_pool_fixedprovider) + ->Apply(&default_multiple_alloc_fix_size) + ->Apply(&multithreaded); + +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, + jemalloc_pool_uniform_fixedprovider, + uniform_alloc_size, + pool_allocator>); +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, + jemalloc_pool_uniform_fixedprovider) + ->Apply(&default_multiple_alloc_uniform_size) + ->Apply(&multithreaded); + +#endif + +#ifdef UMF_POOL_SCALABLE_ENABLED +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, + scalable_pool_fix_fixedprovider, fixed_alloc_size, + pool_allocator>); + +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, + scalable_pool_fix_fixedprovider) + ->Apply(&default_multiple_alloc_fix_size) + ->Apply(&multithreaded); + +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, + scalable_pool_uniform_fixedprovider, + uniform_alloc_size, + pool_allocator>); + +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, + scalable_pool_uniform_fixedprovider) + ->Apply(&default_multiple_alloc_uniform_size) + ->Apply(&multithreaded); + +#endif + +//BENCHMARK_MAIN(); +int main(int argc, char **argv) { + if (initAffinityMask()) { + return -1; + } + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); +} diff --git a/benchmark/benchmark.hpp b/benchmark/benchmark.hpp index ead6b39e75..a960d89bca 100644 --- a/benchmark/benchmark.hpp +++ b/benchmark/benchmark.hpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -70,179 +70,198 @@ * - Additional benchmarking scenarios can be created by extending `benchmark_interface`. */ -#include +#include #include + +#include #include #include -#include "benchmark_interfaces.hpp" +#include "benchmark_size.hpp" +#include "benchmark_umf.hpp" struct alloc_data { void *ptr; size_t size; }; -#define UMF_BENCHMARK_TEMPLATE_DEFINE(BaseClass, Method, ...) \ - BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, __VA_ARGS__) \ - (benchmark::State & state) { \ - for (auto _ : state) { \ - bench(state); \ - } \ +struct next_alloc_data { + size_t offset; + size_t size; +}; + +#ifndef WIN32 +std::vector affinityMask; + +int initAffinityMask() { + cpu_set_t mask; + CPU_ZERO(&mask); + + if (sched_getaffinity(0, sizeof(mask), &mask) == -1) { + perror("sched_getaffinity"); + return 1; + } + + for (int cpu = 0; cpu < CPU_SETSIZE; cpu++) { + if (CPU_ISSET(cpu, &mask)) { + cpu_set_t mask; + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); + affinityMask.push_back(mask); + } + } + // we reverse affinityMask to avoid using cpu 0 if possible. + // CPU 0 is usually the most used one by other applications on the system. + std::reverse(affinityMask.begin(), affinityMask.end()); + return 0; +} + +void setAffinity(benchmark::State &state) { + size_t tid = state.thread_index(); + if (tid >= affinityMask.size()) { + state.SkipWithError("Not enough CPUs available to set affinity"); } -#define UMF_BENCHMARK_REGISTER_F(BaseClass, Method) \ - BENCHMARK_REGISTER_F(BaseClass, Method) \ - ->ArgNames( \ - BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::argsName()) \ - ->Name(BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::name()) \ - ->MinWarmUpTime(1) + auto &mask = affinityMask[tid]; + + if (sched_setaffinity(0, sizeof(mask), &mask) != 0) { + state.SkipWithError("Failed to set affinity"); + } +} + +#else +int initAffinityMask() { + printf( + "Affinity set not supported on Windows, benchmark can be unstable\n"); + return 0; +} + +void setAffinity([[maybe_unused]] benchmark::State &state) { + // Not implemented for Windows +} + +#endif + +// function that ensures that all threads have reached the same point +inline void waitForAllThreads(const benchmark::State &state) { + static std::atomic count{0}; + static std::atomic generation{0}; + + const int totalThreads = state.threads(); + int gen = generation.load(std::memory_order_relaxed); + + int c = count.fetch_add(1, std::memory_order_acq_rel) + 1; + + if (c == totalThreads) { + // Last thread - reset count and bump generation + count.store(0, std::memory_order_relaxed); + generation.fetch_add(1, std::memory_order_acq_rel); + } else { + // Not the last thread: spin until the generation changes + while (generation.load(std::memory_order_acquire) == gen) { + std::this_thread::yield(); + } + } +} -class fixed_alloc_size : public alloc_size_interface { +template ::value>> +class provider_allocator : public allocator_interface { public: unsigned SetUp(::benchmark::State &state, unsigned argPos) override { - size = state.range(argPos); - return argPos + 1; + provider.SetUp(state); + return argPos; + } + + void TearDown(::benchmark::State &state) override { + provider.TearDown(state); + } + + void *benchAlloc(size_t size) override { + void *ptr; + if (umfMemoryProviderAlloc(provider.provider, size, 0, &ptr) != + UMF_RESULT_SUCCESS) { + return NULL; + } + return ptr; + } + + void benchFree(void *ptr, size_t size) override { + umfMemoryProviderFree(provider.provider, ptr, size); } - void TearDown([[maybe_unused]] ::benchmark::State &state) override {} - size_t nextSize() override { return size; }; - static std::vector argsName() { return {"size"}; } + + static std::string name() { return Provider::name(); } private: - size_t size; + Provider provider; }; -class uniform_alloc_size : public alloc_size_interface { - using distribution = std::uniform_int_distribution; - +// TODO: assert Pool to be a pool_interface. +template class pool_allocator : public allocator_interface { public: unsigned SetUp(::benchmark::State &state, unsigned argPos) override { - auto min = state.range(argPos++); - auto max = state.range(argPos++); - auto gran = state.range(argPos++); - if (min % gran != 0 && max % gran != 0) { - state.SkipWithError("min and max must be divisible by granularity"); - return argPos; - } - - dist.param(distribution::param_type(min / gran, max / gran)); - multiplier = gran; + pool.SetUp(state); return argPos; } - void TearDown([[maybe_unused]] ::benchmark::State &state) override {} - size_t nextSize() override { return dist(generator) * multiplier; } - static std::vector argsName() { - return {"min size", "max size", "granularity"}; + + void TearDown(::benchmark::State &state) override { pool.TearDown(state); } + + virtual void *benchAlloc(size_t size) override { + return umfPoolMalloc(pool.pool, size); + } + + virtual void benchFree(void *ptr, [[maybe_unused]] size_t size) override { + umfPoolFree(pool.pool, ptr); } + static std::string name() { return Pool::name(); } + private: - std::default_random_engine generator; - distribution dist; - size_t multiplier; + Pool pool; }; -// This class benchmarks speed of alloc() operations. -template < - typename Size, typename Alloc, - typename = - std::enable_if_t::value>, - typename = - std::enable_if_t::value>> -class alloc_benchmark : public benchmark_interface { - public: - size_t max_allocs = 1000; - size_t pre_allocs = 0; - void SetUp(::benchmark::State &state) override { - if (state.thread_index() != 0) { - return; - } - - // unpack arguments - int argPos = 0; - max_allocs = state.range(argPos++); - pre_allocs = state.range(argPos++); - // pass rest of the arguments to "alloc_size" and "allocator" - argPos = base::alloc_size.SetUp(state, argPos); - base::allocator.SetUp(state, argPos); - - // initialize allocations tracking vectors (one per thread) - // and iterators for these vectors. - allocations.resize(state.threads()); - iters.resize(state.threads()); - - for (auto &i : iters) { - i = pre_allocs; +template +struct benchmark_interface : public benchmark::Fixture { + int parseArgs(::benchmark::State &state, int argPos) { + Size generator; + argPos = generator.SetUp(state, argPos); + argPos = allocator.SetUp(state, argPos); + alloc_sizes.resize(state.threads()); + for (auto &i : alloc_sizes) { + i = generator; } + return argPos; + } + void SetUp(::benchmark::State &state) { parseArgs(state, 0); } - // do "pre_alloc" allocations before actual benchmark. - for (auto &i : allocations) { - i.resize(max_allocs + pre_allocs); - - for (size_t j = 0; j < pre_allocs; j++) { - i[j].ptr = - base::allocator.benchAlloc(base::alloc_size.nextSize()); - if (i[j].ptr == NULL) { - state.SkipWithError("preallocation failed"); - return; - } - i[j].size = base::alloc_size.nextSize(); - } + void TearDown(::benchmark::State &state) { + for (auto &i : alloc_sizes) { + i.TearDown(state); } + allocator.TearDown(state); } - void TearDown(::benchmark::State &state) override { - if (state.thread_index() != 0) { - return; - } - for (auto &i : allocations) { - for (auto &j : i) { - if (j.ptr != NULL) { - base::allocator.benchFree(j.ptr, j.size); - j.ptr = NULL; - j.size = 0; - } - } - } + virtual void bench(::benchmark::State &state) = 0; - base::TearDown(state); + virtual std::vector argsName() { + auto s = Size::argsName(); + auto a = Allocator::argsName(); + std::vector res = {}; + res.insert(res.end(), s.begin(), s.end()); + res.insert(res.end(), a.begin(), a.end()); + return res; } - void bench(benchmark::State &state) override { - auto tid = state.thread_index(); - auto s = base::alloc_size.nextSize(); - auto &i = iters[tid]; - allocations[tid][i].ptr = base::allocator.benchAlloc(s); - if (allocations[tid][i].ptr == NULL) { - state.SkipWithError("allocation failed"); - return; - } - allocations[tid][i].size = s; - i++; - if (i >= max_allocs + pre_allocs) { - // This benchmark tests only allocations - - // if allocation tracker is full we pause benchmark to dealloc all allocations - - // excluding pre-allocated ones. - state.PauseTiming(); - while (i > pre_allocs) { - auto &allocation = allocations[tid][--i]; - base::allocator.benchFree(allocation.ptr, allocation.size); - allocation.ptr = NULL; - allocation.size = 0; - } - state.ResumeTiming(); - } - } - static std::vector argsName() { - auto n = benchmark_interface::argsName(); - std::vector res = {"max_allocs", "pre_allocs"}; - res.insert(res.end(), n.begin(), n.end()); - return res; + virtual std::string name() { return Allocator::name(); } + + static void defaultArgs(Benchmark *benchmark) { + auto *bench = + static_cast *>(benchmark); + benchmark->ArgNames(bench->argsName())->Name(bench->name()); } - static std::string name() { return base::name() + "/alloc"; } - protected: - using base = benchmark_interface; - std::vector> allocations; - std::vector iters; + std::vector alloc_sizes; + Allocator allocator; }; // This class benchmarks performance of random deallocations and (re)allocations @@ -252,131 +271,149 @@ template < std::enable_if_t::value>, typename = std::enable_if_t::value>> -class multiple_malloc_free_benchmark : public alloc_benchmark { +class multiple_malloc_free_benchmark : public benchmark_interface { using distribution = std::uniform_int_distribution; - using base = alloc_benchmark; + template using vector2d = std::vector>; + using base = benchmark_interface; + + int allocsPerIterations = 10; + bool thread_local_allocations = true; + size_t max_allocs = 0; + + vector2d allocations; + std::vector iters; + + vector2d next; + std::vector::const_iterator> next_iter; + int64_t iterations; public: - int reallocs = 100; void SetUp(::benchmark::State &state) override { - if (state.thread_index() != 0) { - return; + auto tid = state.thread_index(); + + if (tid == 0) { + // unpack arguments + iterations = state.max_iterations; + int argPos = 0; + max_allocs = state.range(argPos++); + thread_local_allocations = state.range(argPos++); + base::parseArgs(state, argPos); + + allocations.resize(state.threads()); + next.resize(state.threads()); + next_iter.resize(state.threads()); + +#ifndef WIN32 + // Ensure that system malloc does not have memory pooled on the heap + malloc_trim(0); +#endif } - // unpack arguments - int argPos = 0; - base::max_allocs = state.range(argPos++); - - // pass rest of the arguments to "alloc_size" and "allocator" - argPos = base::alloc_size.SetUp(state, argPos); - base::allocator.SetUp(state, argPos); - - // perform initial allocations which will be later freed and reallocated - base::allocations.resize(state.threads()); - for (auto &i : base::allocations) { - i.resize(base::max_allocs); - - for (size_t j = 0; j < base::max_allocs; j++) { - i[j].ptr = - base::allocator.benchAlloc(base::alloc_size.nextSize()); - if (i[j].ptr == NULL) { - state.SkipWithError("preallocation failed"); - return; - } - i[j].size = base::alloc_size.nextSize(); - } + setAffinity(state); + // sync thread to ensure that thread 0 parsed args and did all initialization + waitForAllThreads(state); + // Prepare workload for warp up + prealloc(state); + prepareWorkload(state); + // Start warm up with all threads at once + waitForAllThreads(state); + // warm up + for (int j = 0; j < iterations; j++) { + bench(state); } - dist.param(distribution::param_type(0, base::max_allocs - 1)); + waitForAllThreads(state); + // prepare workload for actual benchmark. + freeAllocs(state); + prealloc(state); + prepareWorkload(state); } - void bench(benchmark::State &state) override { + void TearDown(::benchmark::State &state) override { auto tid = state.thread_index(); - auto &allocation = base::allocations[tid]; - std::vector to_alloc; - for (int j = 0; j < reallocs; j++) { - auto idx = dist(generator); - if (allocation[idx].ptr == NULL) { - continue; - } - to_alloc.push_back(idx); - base::allocator.benchFree(allocation[idx].ptr, - allocation[idx].size); - allocation[idx].ptr = NULL; - allocation[idx].size = 0; + freeAllocs(state); + waitForAllThreads(state); + if (tid == 0) { + // release memory used by benchmark + next.clear(); + next_iter.clear(); + allocations.clear(); + iters.clear(); } + base::TearDown(state); + } + + void bench(benchmark::State &state) override { + auto tid = state.thread_index(); + auto &allocation = allocations[tid]; + for (int i = 0; i < allocsPerIterations; i++) { + auto &n = *next_iter[tid]++; + auto &alloc = allocation[n.offset]; + base::allocator.benchFree(alloc.ptr, alloc.size); + + alloc.size = n.size; + alloc.ptr = base::allocator.benchAlloc(alloc.size); - for (auto idx : to_alloc) { - auto s = base::alloc_size.nextSize(); - allocation[idx].ptr = base::allocator.benchAlloc(s); - if (allocation[idx].ptr == NULL) { + if (alloc.ptr == NULL) { state.SkipWithError("allocation failed"); } - allocation[idx].size = s; } } - static std::string name() { - return base::base::name() + "/multiple_malloc_free"; + virtual std::string name() { + return base::name() + "/multiple_malloc_free"; } - static std::vector argsName() { + + virtual std::vector argsName() { auto n = benchmark_interface::argsName(); - std::vector res = {"max_allocs"}; + std::vector res = {"max_allocs", + "thread_local_allocations"}; res.insert(res.end(), n.begin(), n.end()); return res; } - std::default_random_engine generator; - distribution dist; -}; - -template ::value>> -class provider_allocator : public allocator_interface { - public: - unsigned SetUp(::benchmark::State &state, unsigned r) override { - provider.SetUp(state); - return r; - } - - void TearDown(::benchmark::State &state) override { - provider.TearDown(state); - } - void *benchAlloc(size_t size) override { - void *ptr; - if (umfMemoryProviderAlloc(provider.provider, size, 0, &ptr) != - UMF_RESULT_SUCCESS) { - return NULL; + private: + void prealloc(benchmark::State &state) { + auto tid = state.thread_index(); + auto &i = allocations[tid]; + i.resize(max_allocs); + auto sizeGenerator = base::alloc_sizes[tid]; + for (size_t j = 0; j < max_allocs; j++) { + auto size = sizeGenerator.nextSize(); + i[j].ptr = base::allocator.benchAlloc(size); + if (i[j].ptr == NULL) { + state.SkipWithError("preallocation failed"); + return; + } + i[j].size = size; } - return ptr; } - void benchFree(void *ptr, size_t size) override { - umfMemoryProviderFree(provider.provider, ptr, size); - } - static std::string name() { return Provider::name(); } - - private: - Provider provider; -}; -// TODO: assert Pool to be a pool_interface. -template class pool_allocator : public allocator_interface { - public: - unsigned SetUp(::benchmark::State &state, unsigned r) override { - pool.SetUp(state); - return r; + void freeAllocs(benchmark::State &state) { + auto tid = state.thread_index(); + auto &i = allocations[tid]; + for (auto &j : i) { + if (j.ptr != NULL) { + base::allocator.benchFree(j.ptr, j.size); + j.ptr = NULL; + j.size = 0; + } + } } - void TearDown(::benchmark::State &state) override { pool.TearDown(state); } - - virtual void *benchAlloc(size_t size) override { - return umfPoolMalloc(pool.pool, size); - } - virtual void benchFree(void *ptr, [[maybe_unused]] size_t size) override { - umfPoolFree(pool.pool, ptr); + void prepareWorkload(benchmark::State &state) { + auto tid = state.thread_index(); + auto &n = next[tid]; + std::default_random_engine generator; + distribution dist; + generator.seed(0); + dist.param(distribution::param_type(0, max_allocs - 1)); + auto sizeGenerator = base::alloc_sizes[tid]; + + n.clear(); + for (int64_t j = 0; j < state.max_iterations * allocsPerIterations; + j++) { + n.push_back({dist(generator), sizeGenerator.nextSize()}); + } + next_iter[tid] = n.cbegin(); } - - static std::string name() { return Pool::name(); } - - private: - Pool pool; }; diff --git a/benchmark/benchmark_interfaces.hpp b/benchmark/benchmark_interfaces.hpp deleted file mode 100644 index 8681160626..0000000000 --- a/benchmark/benchmark_interfaces.hpp +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (C) 2024 Intel Corporation - * - * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - */ - -#include -#include -#include - -#include -#include -#include - -class alloc_size_interface { - public: - virtual unsigned SetUp([[maybe_unused]] ::benchmark::State &state, - [[maybe_unused]] unsigned argPos) = 0; - virtual void TearDown([[maybe_unused]] ::benchmark::State &state) = 0; - virtual size_t nextSize() = 0; - static std::vector argsName() { return {""}; }; -}; - -class allocator_interface { - public: - virtual unsigned SetUp([[maybe_unused]] ::benchmark::State &state, - [[maybe_unused]] unsigned argPos) = 0; - virtual void TearDown([[maybe_unused]] ::benchmark::State &state) = 0; - virtual void *benchAlloc(size_t size) = 0; - virtual void benchFree(void *ptr, [[maybe_unused]] size_t size) = 0; - static std::vector argsName() { return {}; } -}; - -template -struct benchmark_interface : public benchmark::Fixture { - void SetUp(::benchmark::State &state) { - int argPos = alloc_size.SetUp(state, 0); - allocator.SetUp(state, argPos); - } - void TearDown(::benchmark::State &state) { - alloc_size.TearDown(state); - allocator.TearDown(state); - } - - virtual void bench(::benchmark::State &state) = 0; - - static std::vector argsName() { - auto s = Size::argsName(); - auto a = Allocator::argsName(); - std::vector res = {}; - res.insert(res.end(), s.begin(), s.end()); - res.insert(res.end(), a.begin(), a.end()); - return res; - } - static std::string name() { return Allocator::name(); } - - Size alloc_size; - Allocator allocator; -}; - -struct provider_interface { - umf_memory_provider_handle_t provider = NULL; - virtual void SetUp(::benchmark::State &state) { - if (state.thread_index() != 0) { - return; - } - auto umf_result = - umfMemoryProviderCreate(getOps(), getParams(), &provider); - if (umf_result != UMF_RESULT_SUCCESS) { - state.SkipWithError("umfMemoryProviderCreate() failed"); - } - } - - virtual void TearDown([[maybe_unused]] ::benchmark::State &state) { - if (state.thread_index() != 0) { - return; - } - - if (provider) { - umfMemoryProviderDestroy(provider); - } - } - - virtual umf_memory_provider_ops_t *getOps() { return nullptr; } - virtual void *getParams() { return nullptr; } -}; - -template ::value>> -struct pool_interface { - virtual void SetUp(::benchmark::State &state) { - provider.SetUp(state); - if (state.thread_index() != 0) { - return; - } - auto umf_result = umfPoolCreate(getOps(state), provider.provider, - getParams(state), 0, &pool); - if (umf_result != UMF_RESULT_SUCCESS) { - state.SkipWithError("umfPoolCreate() failed"); - } - } - virtual void TearDown([[maybe_unused]] ::benchmark::State &state) { - if (state.thread_index() != 0) { - return; - } - // TODO: The scalable pool destruction process can race with other threads - // performing TLS (Thread-Local Storage) destruction. - // As a temporary workaround, we introduce a delay (sleep) - // to ensure the pool is destroyed only after all threads have completed. - // Issue: #933 - std::this_thread::sleep_for(std::chrono::milliseconds(500)); - if (pool) { - umfPoolDestroy(pool); - } - }; - - virtual umf_memory_pool_ops_t * - getOps([[maybe_unused]] ::benchmark::State &state) { - return nullptr; - } - virtual void *getParams([[maybe_unused]] ::benchmark::State &state) { - return nullptr; - } - T provider; - umf_memory_pool_handle_t pool; -}; diff --git a/benchmark/benchmark_size.hpp b/benchmark/benchmark_size.hpp new file mode 100644 index 0000000000..44e4bf1da8 --- /dev/null +++ b/benchmark/benchmark_size.hpp @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2024-2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include +#include +#include + +class alloc_size_interface { + public: + virtual unsigned SetUp([[maybe_unused]] ::benchmark::State &state, + [[maybe_unused]] unsigned argPos) = 0; + virtual void TearDown([[maybe_unused]] ::benchmark::State &state) = 0; + virtual size_t nextSize() = 0; + static std::vector argsName() { return {""}; }; +}; + +class fixed_alloc_size : public alloc_size_interface { + public: + unsigned SetUp(::benchmark::State &state, unsigned argPos) override { + size = state.range(argPos); + return argPos + 1; + } + void TearDown([[maybe_unused]] ::benchmark::State &state) override {} + size_t nextSize() override { return size; }; + static std::vector argsName() { return {"size"}; } + + private: + size_t size; +}; + +class uniform_alloc_size : public alloc_size_interface { + using distribution = std::uniform_int_distribution; + + public: + unsigned SetUp(::benchmark::State &state, unsigned argPos) override { + auto min = state.range(argPos++); + auto max = state.range(argPos++); + auto gran = state.range(argPos++); + if (min % gran != 0 && max % gran != 0) { + state.SkipWithError("min and max must be divisible by granularity"); + return argPos; + } + generator.seed(0); + dist.param(distribution::param_type(min / gran, max / gran)); + multiplier = gran; + return argPos; + } + void TearDown([[maybe_unused]] ::benchmark::State &state) override {} + size_t nextSize() override { return dist(generator) * multiplier; } + static std::vector argsName() { + return {"min_size", "max_size", "granularity"}; + } + + private: + std::default_random_engine generator; + distribution dist; + size_t multiplier = 1; +}; diff --git a/benchmark/benchmark_umf.hpp b/benchmark/benchmark_umf.hpp new file mode 100644 index 0000000000..cfc9982d2c --- /dev/null +++ b/benchmark/benchmark_umf.hpp @@ -0,0 +1,307 @@ +/* + * Copyright (C) 2024-2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ +#include +#include + +#include +#include +#include + +#include +#include +#include + +#ifdef UMF_POOL_SCALABLE_ENABLED +#include +#endif +#include +#include + +#ifdef UMF_POOL_JEMALLOC_ENABLED +#include +#endif + +struct provider_interface { + using params_ptr = std::unique_ptr; + + umf_memory_provider_handle_t provider = NULL; + virtual void SetUp(::benchmark::State &state) { + if (state.thread_index() != 0) { + return; + } + auto params = getParams(state); + auto umf_result = + umfMemoryProviderCreate(getOps(state), params.get(), &provider); + if (umf_result != UMF_RESULT_SUCCESS) { + state.SkipWithError("umfMemoryProviderCreate() failed"); + } + } + + virtual void TearDown([[maybe_unused]] ::benchmark::State &state) { + if (state.thread_index() != 0) { + return; + } + + if (provider) { + umfMemoryProviderDestroy(provider); + } + } + + virtual umf_memory_provider_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) { + return nullptr; + } + + virtual params_ptr getParams([[maybe_unused]] ::benchmark::State &state) { + return {nullptr, [](void *) {}}; + } +}; + +template ::value>> +struct pool_interface { + using params_ptr = std::unique_ptr; + + virtual void SetUp(::benchmark::State &state) { + provider.SetUp(state); + if (state.thread_index() != 0) { + return; + } + auto params = getParams(state); + auto umf_result = umfPoolCreate(getOps(state), provider.provider, + params.get(), 0, &pool); + if (umf_result != UMF_RESULT_SUCCESS) { + state.SkipWithError("umfPoolCreate() failed"); + } + } + virtual void TearDown([[maybe_unused]] ::benchmark::State &state) { + if (state.thread_index() != 0) { + return; + } + // TODO: The scalable pool destruction process can race with other threads + // performing TLS (Thread-Local Storage) destruction. + // As a temporary workaround, we introduce a delay (sleep) + // to ensure the pool is destroyed only after all threads have completed. + // Issue: #933 + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + if (pool) { + umfPoolDestroy(pool); + } + }; + + virtual umf_memory_pool_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) { + return nullptr; + } + virtual params_ptr getParams([[maybe_unused]] ::benchmark::State &state) { + return {nullptr, [](void *) {}}; + } + T provider; + umf_memory_pool_handle_t pool; +}; + +class allocator_interface { + public: + virtual unsigned SetUp([[maybe_unused]] ::benchmark::State &state, + [[maybe_unused]] unsigned argPos) = 0; + virtual void TearDown([[maybe_unused]] ::benchmark::State &state) = 0; + virtual void *benchAlloc(size_t size) = 0; + virtual void benchFree(void *ptr, [[maybe_unused]] size_t size) = 0; + static std::vector argsName() { return {}; } +}; + +struct glibc_malloc : public allocator_interface { + unsigned SetUp([[maybe_unused]] ::benchmark::State &state, + unsigned argPos) override { + return argPos; + } + void TearDown([[maybe_unused]] ::benchmark::State &state) override{}; + void *benchAlloc(size_t size) override { return malloc(size); } + void benchFree(void *ptr, [[maybe_unused]] size_t size) override { + free(ptr); + } + static std::string name() { return "glibc"; } +}; + +struct os_provider : public provider_interface { + provider_interface::params_ptr + getParams(::benchmark::State &state) override { + umf_os_memory_provider_params_handle_t raw_params = nullptr; + umfOsMemoryProviderParamsCreate(&raw_params); + if (!raw_params) { + state.SkipWithError("Failed to create os provider params"); + return {nullptr, [](void *) {}}; + } + + // Use a lambda as the custom deleter + auto deleter = [](void *p) { + auto handle = + static_cast(p); + umfOsMemoryProviderParamsDestroy(handle); + }; + + return {static_cast( + raw_params), + deleter}; + } + + umf_memory_provider_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) override { + return umfOsMemoryProviderOps(); + } + static std::string name() { return "os_provider"; } +}; + +struct fixed_provider : public provider_interface { + private: + char *mem = NULL; + const size_t size = 1024 * 1024 * 1024; // 1GB + public: + virtual void SetUp(::benchmark::State &state) override { + if (state.thread_index() != 0) { + return; + } + + if (!mem) { + mem = new char[size]; + } + + provider_interface::SetUp(state); + } + + virtual void TearDown(::benchmark::State &state) override { + if (state.thread_index() != 0) { + return; + } + + delete[] mem; + mem = nullptr; + + provider_interface::TearDown(state); + } + + provider_interface::params_ptr + getParams(::benchmark::State &state) override { + umf_fixed_memory_provider_params_handle_t raw_params = nullptr; + umfFixedMemoryProviderParamsCreate(&raw_params, mem, size); + if (!raw_params) { + state.SkipWithError("Failed to create fixed provider params"); + return {nullptr, [](void *) {}}; + } + + // Use a lambda as the custom deleter + auto deleter = [](void *p) { + auto handle = + static_cast(p); + umfFixedMemoryProviderParamsDestroy(handle); + }; + + return {static_cast( + raw_params), + deleter}; + } + + umf_memory_provider_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) override { + return umfFixedMemoryProviderOps(); + } + static std::string name() { return "fixed_provider"; } +}; + +template +struct proxy_pool : public pool_interface { + umf_memory_pool_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) override { + return umfProxyPoolOps(); + } + + static std::string name() { return "proxy_pool<" + Provider::name() + ">"; } +}; + +template +struct disjoint_pool : public pool_interface { + umf_memory_pool_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) override { + return umfDisjointPoolOps(); + } + + typename pool_interface::params_ptr + getParams(::benchmark::State &state) override { + umf_disjoint_pool_params_handle_t raw_params = nullptr; + auto ret = umfDisjointPoolParamsCreate(&raw_params); + if (ret != UMF_RESULT_SUCCESS) { + state.SkipWithError("Failed to create disjoint pool params"); + return {nullptr, [](void *) {}}; + } + + typename pool_interface::params_ptr params( + raw_params, [](void *p) { + umfDisjointPoolParamsDestroy( + static_cast(p)); + }); + + ret = umfDisjointPoolParamsSetSlabMinSize(raw_params, 4096); + if (ret != UMF_RESULT_SUCCESS) { + state.SkipWithError("Failed to set slab min size"); + return {nullptr, [](void *) {}}; + } + + ret = umfDisjointPoolParamsSetCapacity(raw_params, 4); + if (ret != UMF_RESULT_SUCCESS) { + state.SkipWithError("Failed to set capacity"); + return {nullptr, [](void *) {}}; + } + + ret = umfDisjointPoolParamsSetMinBucketSize(raw_params, 8); + if (ret != UMF_RESULT_SUCCESS) { + state.SkipWithError("Failed to set min bucket size"); + return {nullptr, [](void *) {}}; + } + + ret = umfDisjointPoolParamsSetMaxPoolableSize(raw_params, 4096 * 16); + if (ret != UMF_RESULT_SUCCESS) { + state.SkipWithError("Failed to set max poolable size"); + return {nullptr, [](void *) {}}; + } + + return params; + } + + static std::string name() { + return "disjoint_pool<" + Provider::name() + ">"; + } +}; + +#ifdef UMF_POOL_JEMALLOC_ENABLED +template +struct jemalloc_pool : public pool_interface { + umf_memory_pool_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) override { + return umfJemallocPoolOps(); + } + + static std::string name() { + return "jemalloc_pool<" + Provider::name() + ">"; + } +}; +#endif + +#ifdef UMF_POOL_SCALABLE_ENABLED +template +struct scalable_pool : public pool_interface { + virtual umf_memory_pool_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) override { + return umfScalablePoolOps(); + } + + static std::string name() { + return "scalable_pool<" + Provider::name() + ">"; + } +}; +#endif diff --git a/benchmark/multithread.cpp b/benchmark/multithread.cpp index 4558942ecb..d00ffba907 100644 --- a/benchmark/multithread.cpp +++ b/benchmark/multithread.cpp @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -121,7 +121,7 @@ int main() { std::cout << "skipping jemalloc_pool mt_alloc_free" << std::endl; #endif -#if defined(UMF_POOL_DISJOINT_ENABLED) + // NOTE: disjoint pool is always enabled umf_disjoint_pool_params_handle_t hDisjointParams = nullptr; umf_result_t ret = umfDisjointPoolParamsCreate(&hDisjointParams); if (ret != UMF_RESULT_SUCCESS) { @@ -132,9 +132,6 @@ int main() { std::cout << "disjoint_pool mt_alloc_free: "; mt_alloc_free(poolCreateExtParams{umfDisjointPoolOps(), hDisjointParams, umfOsMemoryProviderOps(), osParams}); -#else - std::cout << "skipping disjoint_pool mt_alloc_free" << std::endl; -#endif // ctest looks for "PASSED" in the output std::cout << "PASSED" << std::endl; diff --git a/benchmark/ubench.c b/benchmark/ubench.c index 5f1bfe9e48..5beaa62be7 100644 --- a/benchmark/ubench.c +++ b/benchmark/ubench.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -15,23 +15,19 @@ #include #include +#include #include #include #include #include -#ifdef UMF_POOL_DISJOINT_ENABLED -#include -#endif - #ifdef UMF_POOL_JEMALLOC_ENABLED #include #endif #include "utils_common.h" -#if (defined UMF_BUILD_LIBUMF_POOL_DISJOINT && \ - defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) +#if (defined UMF_PROVIDER_LEVEL_ZERO_ENABLED && defined UMF_BUILD_GPU_TESTS) #include "utils_level_zero.h" #endif @@ -244,7 +240,6 @@ UBENCH_EX(simple, proxy_pool_with_os_memory_provider) { free(array); } -#if (defined UMF_POOL_DISJOINT_ENABLED) ////////////////// DISJOINT POOL WITH OS MEMORY PROVIDER UBENCH_EX(simple, disjoint_pool_with_os_memory_provider) { @@ -327,7 +322,6 @@ UBENCH_EX(simple, disjoint_pool_with_os_memory_provider) { umfMemoryProviderDestroy(os_memory_provider); free(array); } -#endif /* (defined UMF_POOL_DISJOINT_ENABLED) */ #if (defined UMF_POOL_JEMALLOC_ENABLED) ////////////////// JEMALLOC POOL WITH OS MEMORY PROVIDER @@ -421,8 +415,7 @@ UBENCH_EX(simple, scalable_pool_with_os_memory_provider) { } #endif /* (defined UMF_POOL_SCALABLE_ENABLED) */ -#if (defined UMF_POOL_DISJOINT_ENABLED && \ - defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) +#if (defined UMF_PROVIDER_LEVEL_ZERO_ENABLED && defined UMF_BUILD_GPU_TESTS) static void do_ipc_get_put_benchmark(alloc_t *allocs, size_t num_allocs, size_t repeats, umf_ipc_handle_t *ipc_handles) { @@ -445,14 +438,14 @@ static void do_ipc_get_put_benchmark(alloc_t *allocs, size_t num_allocs, } } -int create_level_zero_params(ze_context_handle_t *context, - ze_device_handle_t *device) { +static int create_level_zero_params(ze_context_handle_t *context, + ze_device_handle_t *device) { uint32_t driver_idx = 0; ze_driver_handle_t driver = NULL; int ret = utils_ze_init_level_zero(); if (ret != 0) { - fprintf(stderr, "Failed to init Level 0!\n"); + fprintf(stderr, "Failed to init Level Zero!\n"); return ret; } @@ -630,7 +623,7 @@ UBENCH_EX(ipc, disjoint_pool_with_level_zero_provider) { err_destroy_context: utils_ze_destroy_context(context); } -#endif /* (defined UMF_POLL_DISJOINT_ENABLED && defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) */ +#endif /* (defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) */ // TODO add IPC benchmark for CUDA diff --git a/cmake/FindJEMALLOC.cmake b/cmake/FindJEMALLOC.cmake index 89d488ecc0..2dab1f3833 100644 --- a/cmake/FindJEMALLOC.cmake +++ b/cmake/FindJEMALLOC.cmake @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -35,12 +35,6 @@ endif() if(JEMALLOC_LIBRARY) message(STATUS " Found jemalloc using find_library()") - message(STATUS " JEMALLOC_LIBRARIES = ${JEMALLOC_LIBRARIES}") - message(STATUS " JEMALLOC_INCLUDE_DIRS = ${JEMALLOC_INCLUDE_DIRS}") - message(STATUS " JEMALLOC_LIBRARY_DIRS = ${JEMALLOC_LIBRARY_DIRS}") - if(WINDOWS) - message(STATUS " JEMALLOC_DLL_DIRS = ${JEMALLOC_DLL_DIRS}") - endif() else() set(MSG_NOT_FOUND "jemalloc NOT found (set CMAKE_PREFIX_PATH to point the location)") diff --git a/cmake/FindLIBHWLOC.cmake b/cmake/FindLIBHWLOC.cmake index 8d7998f8d5..2efd072d4d 100644 --- a/cmake/FindLIBHWLOC.cmake +++ b/cmake/FindLIBHWLOC.cmake @@ -1,7 +1,17 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +function(print_hwloc_dirs) + message(STATUS " LIBHWLOC_LIBRARIES = ${LIBHWLOC_LIBRARIES}") + message(STATUS " LIBHWLOC_INCLUDE_DIRS = ${LIBHWLOC_INCLUDE_DIRS}") + message(STATUS " LIBHWLOC_LIBRARY_DIRS = ${LIBHWLOC_LIBRARY_DIRS}") + message(STATUS " LIBHWLOC_API_VERSION = ${LIBHWLOC_API_VERSION}") + if(WINDOWS) + message(STATUS " LIBHWLOC_DLL_DIRS = ${LIBHWLOC_DLL_DIRS}") + endif() +endfunction() + message(STATUS "Checking for module 'libhwloc' using find_library()") find_library(LIBHWLOC_LIBRARY NAMES ${UMF_HWLOC_NAME}) @@ -45,20 +55,15 @@ if(WINDOWS) endif() if(LIBHWLOC_LIBRARY) - message(STATUS " Found libhwloc using find_library()") - message(STATUS " LIBHWLOC_LIBRARIES = ${LIBHWLOC_LIBRARIES}") - message(STATUS " LIBHWLOC_INCLUDE_DIRS = ${LIBHWLOC_INCLUDE_DIRS}") - message(STATUS " LIBHWLOC_LIBRARY_DIRS = ${LIBHWLOC_LIBRARY_DIRS}") - message(STATUS " LIBHWLOC_API_VERSION = ${LIBHWLOC_API_VERSION}") - if(WINDOWS) - message(STATUS " LIBHWLOC_DLL_DIRS = ${LIBHWLOC_DLL_DIRS}") - endif() + message(STATUS " Found libhwloc: ${LIBHWLOC_LIBRARY}") if(LIBHWLOC_FIND_VERSION) if(NOT LIBHWLOC_API_VERSION) + print_hwloc_dirs() message(FATAL_ERROR "Failed to retrieve libhwloc version") elseif(NOT LIBHWLOC_API_VERSION VERSION_GREATER_EQUAL LIBHWLOC_FIND_VERSION) + print_hwloc_dirs() message( FATAL_ERROR " Required version: ${LIBHWLOC_FIND_VERSION}, found ${LIBHWLOC_API_VERSION}" @@ -67,8 +72,7 @@ if(LIBHWLOC_LIBRARY) endif() else() set(MSG_NOT_FOUND - "libhwloc NOT found (set CMAKE_PREFIX_PATH to point the location or disable with -DUMF_DISABLE_HWLOC=ON)" - ) + "libhwloc NOT found in the system (will fetch it from GitHub)") if(LIBHWLOC_FIND_REQUIRED) message(FATAL_ERROR ${MSG_NOT_FOUND}) else() diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake index 2544a15186..02aaf5c71e 100644 --- a/cmake/helpers.cmake +++ b/cmake/helpers.cmake @@ -1,4 +1,4 @@ -# Copyright (C) 2023-2024 Intel Corporation +# Copyright (C) 2023-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -121,12 +121,12 @@ function(set_version_variables) return() endif() - # v1.5.0-dev - we're on a development tag -> UMF ver: "1.5.0-dev" - string(REGEX MATCHALL "\^v([0-9]+\.[0-9]+\.[0-9]+)-dev\$" MATCHES + # v1.5.0-dev1 - we're on a development tag -> UMF ver: "1.5.0-dev1" + string(REGEX MATCHALL "\^v([0-9]+\.[0-9]+\.[0-9]+)-(dev[0-9]?)\$" MATCHES ${GIT_VERSION}) if(MATCHES) set(UMF_VERSION - "${CMAKE_MATCH_1}-dev" + "${CMAKE_MATCH_1}-${CMAKE_MATCH_2}" PARENT_SCOPE) set(UMF_CMAKE_VERSION "${CMAKE_MATCH_1}" @@ -157,12 +157,12 @@ function(set_version_variables) return() endif() - # v1.5.0-dev-19-gb8f7a32 -> UMF ver: "1.5.0-dev.git19.gb8f7a32" - string(REGEX MATCHALL "v([0-9.]*)-dev-([0-9]*)-([0-9a-g]*)" MATCHES + # v1.5.0-dev2-19-gb8f7a32 -> UMF ver: "1.5.0-dev2.git19.gb8f7a32" + string(REGEX MATCHALL "v([0-9.]*)-(dev[0-9]?)-([0-9]*)-([0-9a-g]*)" MATCHES ${GIT_VERSION}) if(MATCHES) set(UMF_VERSION - "${CMAKE_MATCH_1}-dev.git${CMAKE_MATCH_2}.${CMAKE_MATCH_3}" + "${CMAKE_MATCH_1}-${CMAKE_MATCH_2}.git${CMAKE_MATCH_3}.${CMAKE_MATCH_4}" PARENT_SCOPE) set(UMF_CMAKE_VERSION "${CMAKE_MATCH_1}" @@ -232,9 +232,9 @@ function(add_umf_target_compile_options name) PRIVATE -fPIC -Wall -Wextra - -Wpedantic -Wformat-security - -Wcast-qual + -Wno-cast-qual # TODO: remove this when const qualifier drop + # will be solved in CTL $<$:-fdiagnostics-color=auto>) if(CMAKE_BUILD_TYPE STREQUAL "Release") target_compile_definitions(${name} PRIVATE -D_FORTIFY_SOURCE=2) @@ -387,7 +387,8 @@ function(add_umf_library) ${ARG_NAME} PRIVATE ${UMF_CMAKE_SOURCE_DIR}/include ${UMF_CMAKE_SOURCE_DIR}/src/utils - ${UMF_CMAKE_SOURCE_DIR}/src/base_alloc) + ${UMF_CMAKE_SOURCE_DIR}/src/base_alloc + ${UMF_CMAKE_SOURCE_DIR}/src/coarse) add_umf_target_compile_options(${ARG_NAME}) add_umf_target_link_options(${ARG_NAME}) endfunction() diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000000..737bb12595 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,32 @@ +# Documentation + +To generate HTML documentation, run the `generate_docs.py` script from any sub-directory of the repository (most likely `build`). +To display the proper version of UMF in the documentation title, set the `UMF_VERSION` variable before running the script. + +```bash +cd build +$ UMF_VERSION= python ../docs/generate_docs.py +``` + +Documentation can also be built using the build target 'docs' (see details below). + +This script will create `./docs_build` sub-directory, where the intermediate and final files +will be created. HTML docs will be in the `./docs_build/generated/html` directory. + +## make docs + +To run documentation generation via build target use CMake commands below. +To enable this target, python executable (in required version) has to be found in the system. + +```bash +cmake -B build +cmake --build build --target docs +``` + +## Requirements + +Script to generate HTML docs requires: + +* [Doxygen](http://www.doxygen.nl/) at least v1.9.1 +* [Python](https://www.python.org/downloads/) at least v3.8 +* and python pip requirements, as defined in `third_party/requirements.txt` diff --git a/scripts/assets/images/intro_architecture.png b/docs/assets/images/intro_architecture.png similarity index 100% rename from scripts/assets/images/intro_architecture.png rename to docs/assets/images/intro_architecture.png diff --git a/scripts/docs_config/Doxyfile b/docs/config/Doxyfile similarity index 99% rename from scripts/docs_config/Doxyfile rename to docs/config/Doxyfile index 43ff2a6037..6309463745 100644 --- a/scripts/docs_config/Doxyfile +++ b/docs/config/Doxyfile @@ -445,7 +445,7 @@ INLINE_SIMPLE_STRUCTS = NO # types are typedef'ed and only the typedef is referenced, never the tag name. # The default value is: NO. -TYPEDEF_HIDES_STRUCT = NO +TYPEDEF_HIDES_STRUCT = YES # The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This # cache is used to resolve symbols given their name and scope. Since this can be @@ -2058,7 +2058,7 @@ GENERATE_XML = YES # The default directory is: xml. # This tag requires that the tag GENERATE_XML is set to YES. -XML_OUTPUT = ../docs/xml +XML_OUTPUT = docs_build/doxyxml # If the XML_PROGRAMLISTING tag is set to YES, doxygen will dump the program # listings (including syntax highlighting and cross-referencing information) to diff --git a/scripts/docs_config/api.rst b/docs/config/api.rst similarity index 72% rename from scripts/docs_config/api.rst rename to docs/config/api.rst index 7f734cad24..97e664d97f 100644 --- a/scripts/docs_config/api.rst +++ b/docs/config/api.rst @@ -58,6 +58,9 @@ supported by the Proxy Pool. Scalable Pool ------------------------------------------ + +A oneTBB-based memory pool manager. + .. doxygenfile:: pool_scalable.h :sections: define enum typedef func var @@ -80,17 +83,12 @@ and operate on the provider. .. doxygenfile:: memory_provider.h :sections: define enum typedef func var -Coarse Provider +Fixed Memory Provider ------------------------------------------ -A memory provider that can provide memory from: - -1) A given pre-allocated buffer (the fixed-size memory provider option) or -2) From an additional upstream provider (e.g. provider that does not support - the free() operation like the File memory provider or the DevDax memory - provider - see below). +A memory provider that can provide memory from a given preallocated buffer. -.. doxygenfile:: provider_coarse.h +.. doxygenfile:: provider_fixed_memory.h :sections: define enum typedef func var OS Memory Provider @@ -109,10 +107,18 @@ A memory provider that provides memory from L0 device. .. doxygenfile:: provider_level_zero.h :sections: define enum typedef func var +CUDA Provider +------------------------------------------ + +A memory provider that provides memory from CUDA device. + +.. doxygenfile:: provider_cuda.h + :sections: define enum typedef func var + DevDax Memory Provider ------------------------------------------ -A memory provider that provides memory from a device DAX (a character device file /dev/daxX.Y). +A memory provider that provides memory from a device DAX (a character device file like /dev/daxX.Y). .. doxygenfile:: provider_devdax_memory.h :sections: define enum typedef func var @@ -162,6 +168,26 @@ IPC API allows retrieving IPC handles for the memory buffers allocated from UMF memory pools. The memory provider used by the pool should support IPC operations for this API to work. Otherwise IPC APIs return an error. +IPC caching +------------------------------------------ + +UMF employs IPC caching to avoid multiple IPC handles being created for the same +coarse-grain memory region allocated by the memory provider. UMF guarantees that +for each coarse-grain memory region allocated by the memory provider, only one +IPC handle is created when the :any:`umfGetIPCHandle` function is called. All +subsequent calls to the :any:`umfGetIPCHandle` function for the pointer to the +same memory region will return the entry from the cache. + +The same is true for the :any:`umfOpenIPCHandle` function. The actual mapping +of the IPC handle to the virtual address space is created only once, and all +subsequent calls to open the same IPC handle will return the entry from the cache. +The size of the cache for opened IPC handles is controlled by the ``UMF_MAX_OPENED_IPC_HANDLES`` +environment variable. By default, the cache size is unlimited. However, if the environment +variable is set and the cache size exceeds the limit, old items will be evicted. UMF tracks +the ref count for each entry in the cache and can evict only items with the ref count equal to 0. +The ref count is increased when the :any:`umfOpenIPCHandle` function is called and decreased +when the :any:`umfCloseIPCHandle` function is called for the corresponding IPC handle. + .. _ipc-api: IPC API diff --git a/scripts/docs_config/conf.py b/docs/config/conf.py similarity index 79% rename from scripts/docs_config/conf.py rename to docs/config/conf.py index 77d9856274..ae698ba98c 100644 --- a/scripts/docs_config/conf.py +++ b/docs/config/conf.py @@ -1,3 +1,5 @@ +import os + # Configuration file for the Sphinx documentation builder. # # This file only contains a selection of the most common options. For a full @@ -22,15 +24,22 @@ author = "Intel" # The full version, including alpha/beta/rc tags -release = "0.10.1" - +release = os.getenv("UMF_VERSION", "") +print( + f"UMF_VERSION used in docs: {release}" + if release != "" + else "please set UMF_VERSION environment variable before running this script" +) # -- General configuration --------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ["breathe"] +extensions = ["breathe", "sphinxcontrib.spelling"] + +spelling_show_suggestions = True +spelling_word_list_filename = "spelling_exceptions.txt" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -49,7 +58,9 @@ # -- Extension configuration ------------------------------------------------- # -- Options for breathe extension ------------------------------------------- -breathe_projects = {project: "../../docs/xml"} +# 'doxyxml' dir is generated with Doxygen; it's supposed to be in a directory +# one above the config directory. +breathe_projects = {project: "../doxyxml"} breathe_default_project = project breathe_show_include = False breathe_default_members = ("members", "undoc-members") diff --git a/scripts/docs_config/examples.rst b/docs/config/examples.rst similarity index 99% rename from scripts/docs_config/examples.rst rename to docs/config/examples.rst index c58e7fc223..4eeea6aa95 100644 --- a/scripts/docs_config/examples.rst +++ b/docs/config/examples.rst @@ -178,7 +178,7 @@ by a different library and the caller of the :any:`umfGetIPCHandle` function may The :any:`umfGetIPCHandle` function returns the IPC handle and its size. The IPC handle is a byte-copyable opaque data structure. The :any:`umf_ipc_handle_t` type is defined as a pointer to a byte array. The size of the handle might be different for different memory provider types. The code snippet below demonstrates how the IPC handle can -be serialized for marshalling purposes. +be serialized for marshaling purposes. .. code-block:: c diff --git a/scripts/docs_config/glossary.rst b/docs/config/glossary.rst similarity index 100% rename from scripts/docs_config/glossary.rst rename to docs/config/glossary.rst diff --git a/scripts/docs_config/index.rst b/docs/config/index.rst similarity index 100% rename from scripts/docs_config/index.rst rename to docs/config/index.rst diff --git a/scripts/docs_config/introduction.rst b/docs/config/introduction.rst similarity index 100% rename from scripts/docs_config/introduction.rst rename to docs/config/introduction.rst diff --git a/docs/config/spelling_exceptions.txt b/docs/config/spelling_exceptions.txt new file mode 100644 index 0000000000..d4e40a3ec8 --- /dev/null +++ b/docs/config/spelling_exceptions.txt @@ -0,0 +1,74 @@ +addr +allocatable +allocator +allocators +calloc +CXL +copyable +customizable +daxX +deallocation +deallocating +deallocations +Devdax +dev +Globals +hMemtarget +hPool +hProvider +highPtr +io +interprocess +ipc +jemalloc +lowPtr +malloc +maxnode +mem +mempolicies +mempolicy +Mempolicy +memspace +Memspace +memspaces +Memtarget +memtarget +memtargets +middleware +multithreading +Nodemask +nodemask +numa +oneAPI +oneTBB +os +params +partList +pid +poolable +preallocated +providerIpcData +providential +ptr +realloc +Scalable +scalable +stdout +Tiering +tiering +topologies +umf +umfGetIPCHandle +umfMemoryProviderAlloc +umfMemoryProviderGetLastNativeError +umfMemoryProviderOpenIPCHandle +umfOsMemoryProviderParamsDestroy +umfPool +umfPoolCalloc +umfPoolDestroy +umfPoolGetTag +umfPoolMallocUsableSize +umfPoolRealloc +umfMemspaceUserFilter +umfMemspaceMemtargetAdd +unfreed \ No newline at end of file diff --git a/scripts/generate_docs.py b/docs/generate_docs.py similarity index 71% rename from scripts/generate_docs.py rename to docs/generate_docs.py index d5b2a01282..1697eacfe6 100644 --- a/scripts/generate_docs.py +++ b/docs/generate_docs.py @@ -6,17 +6,20 @@ """ from pathlib import Path -from shutil import rmtree +from shutil import rmtree, copytree import subprocess # nosec B404 import time def _check_cwd() -> None: - script_path = Path(__file__).resolve().parent cwd = Path.cwd() - if script_path != cwd: + include_dir = Path(cwd, "../include") + # Verify if include dir is one level up (as defined in Doxyfile) + if not include_dir.exists(): print( - f"{__file__} script has to be run from the 'scripts' directory. Terminating..." + f"Include directory {include_dir.resolve()} not found! " + "Please run this script from /build!", + flush=True, ) exit(1) @@ -66,8 +69,17 @@ def _generate_html(config_path: Path, docs_path: Path) -> None: def main() -> None: _check_cwd() - config_path = Path("docs_config").resolve() - docs_path = Path("..", "docs").resolve() + + script_dir = Path(__file__).resolve().parent + docs_build_path = Path("docs_build").resolve() + + # Sphinx and breathe require access to a Doxygen generated dir ('doxyxml') + # so we copy the whole content of the 'docs' dir to the build dir. + copytree(Path(script_dir), docs_build_path, dirs_exist_ok=True) + + config_path = Path(docs_build_path, "config").resolve() + docs_path = Path(docs_build_path, "generated").resolve() + start = time.time() _prepare_docs_dir(docs_path) _generate_xml(config_path, docs_path) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 942579a303..8bb3527876 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -18,7 +18,7 @@ set(EXAMPLE_NAME umf_example_basic) add_umf_executable( NAME ${EXAMPLE_NAME} SRCS basic/basic.c - LIBS umf ${LIBHWLOC_LIBRARIES}) + LIBS umf ${UMF_HWLOC_NAME}) target_include_directories( ${EXAMPLE_NAME} PRIVATE ${UMF_CMAKE_SOURCE_DIR}/src/utils @@ -41,16 +41,14 @@ if(UMF_POOL_SCALABLE_ENABLED) endif() endif() -if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT - AND UMF_BUILD_LEVEL_ZERO_PROVIDER) +if(UMF_BUILD_GPU_EXAMPLES AND UMF_BUILD_LEVEL_ZERO_PROVIDER) set(EXAMPLE_NAME umf_example_level_zero_shared_memory) add_umf_executable( NAME ${EXAMPLE_NAME} SRCS level_zero_shared_memory/level_zero_shared_memory.c common/examples_level_zero_helpers.c - LIBS disjoint_pool ze_loader umf) + LIBS ze_loader umf) target_include_directories( ${EXAMPLE_NAME} @@ -72,14 +70,23 @@ if(UMF_BUILD_GPU_EXAMPLES set_property(TEST ${EXAMPLE_NAME} PROPERTY ENVIRONMENT_MODIFICATION "${DLL_PATH_LIST}") endif() + if(LINUX) + # prepend LD_LIBRARY_PATH with ${CMAKE_BINARY_DIR}/lib it is required + # because ${CMAKE_BINARY_DIR}/lib contains libze_loader.so and tests + # should use it instead of system one. + set_property( + TEST ${EXAMPLE_NAME} + PROPERTY ENVIRONMENT_MODIFICATION + "LD_LIBRARY_PATH=path_list_prepend:${CMAKE_BINARY_DIR}/lib" + ) + endif() else() message(STATUS "GPU Level Zero shared memory example requires " - "UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LEVEL_ZERO_PROVIDER and " - "UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON - skipping") + "UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_LEVEL_ZERO_PROVIDER " + "to be turned ON - skipping") endif() if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT AND UMF_BUILD_CUDA_PROVIDER AND UMF_CUDA_ENABLED) set(EXAMPLE_NAME umf_example_cuda_shared_memory) @@ -87,7 +94,7 @@ if(UMF_BUILD_GPU_EXAMPLES add_umf_executable( NAME ${EXAMPLE_NAME} SRCS cuda_shared_memory/cuda_shared_memory.c - LIBS disjoint_pool cuda umf) + LIBS cuda umf) target_include_directories( ${EXAMPLE_NAME} @@ -113,14 +120,13 @@ if(UMF_BUILD_GPU_EXAMPLES else() message( STATUS - "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_CUDA_PROVIDER, UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON and installed CUDA libraries - skipping" + "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_CUDA_PROVIDER to be turned ON and installed CUDA libraries - skipping" ) endif() # TODO: it looks like there is some problem with IPC implementation in Level # Zero on windows if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT AND UMF_BUILD_LEVEL_ZERO_PROVIDER AND LINUX) set(EXAMPLE_NAME umf_example_ipc_level_zero) @@ -129,7 +135,7 @@ if(UMF_BUILD_GPU_EXAMPLES NAME ${EXAMPLE_NAME} SRCS ipc_level_zero/ipc_level_zero.c common/examples_level_zero_helpers.c - LIBS disjoint_pool ze_loader umf) + LIBS ze_loader umf) target_include_directories( ${EXAMPLE_NAME} @@ -151,10 +157,20 @@ if(UMF_BUILD_GPU_EXAMPLES set_property(TEST ${EXAMPLE_NAME} PROPERTY ENVIRONMENT_MODIFICATION "${DLL_PATH_LIST}") endif() + if(LINUX) + # prepend LD_LIBRARY_PATH with ${CMAKE_BINARY_DIR}/lib it is required + # because ${CMAKE_BINARY_DIR}/lib contains libze_loader.so and tests + # should use it instead of system one. + set_property( + TEST ${EXAMPLE_NAME} + PROPERTY ENVIRONMENT_MODIFICATION + "LD_LIBRARY_PATH=path_list_prepend:${CMAKE_BINARY_DIR}/lib" + ) + endif() else() message( STATUS - "IPC Level 0 example requires UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LEVEL_ZERO_PROVIDER and UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON - skipping" + "IPC Level Zero example requires UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_LEVEL_ZERO_PROVIDER to be turned ON - skipping" ) endif() @@ -190,6 +206,7 @@ function(add_umf_ipc_example script) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) set_tests_properties(${EXAMPLE_NAME} PROPERTIES LABELS "example") + set_tests_properties(${EXAMPLE_NAME} PROPERTIES TIMEOUT 60) if(NOT UMF_TESTS_FAIL_ON_SKIP) set_tests_properties(${EXAMPLE_NAME} PROPERTIES SKIP_RETURN_CODE 125) endif() @@ -214,7 +231,7 @@ if(LINUX) add_umf_executable( NAME ${EXAMPLE_NAME} SRCS memspace_numa/memspace_numa.c - LIBS umf ${LIBHWLOC_LIBRARIES} numa) + LIBS umf ${UMF_HWLOC_NAME} numa) target_include_directories( ${EXAMPLE_NAME} @@ -237,7 +254,7 @@ if(LINUX) add_umf_executable( NAME ${EXAMPLE_NAME} SRCS memspace_hmat/memspace_hmat.c - LIBS umf ${LIBHWLOC_LIBRARIES} numa) + LIBS umf ${UMF_HWLOC_NAME} numa) target_include_directories( ${EXAMPLE_NAME} @@ -261,7 +278,7 @@ if(LINUX) add_umf_executable( NAME ${EXAMPLE_NAME} SRCS custom_file_provider/custom_file_provider.c - LIBS umf ${LIBHWLOC_LIBRARIES}) + LIBS umf ${UMF_HWLOC_NAME}) target_include_directories( ${EXAMPLE_NAME} PRIVATE ${UMF_CMAKE_SOURCE_DIR}/src/utils @@ -282,10 +299,13 @@ if(LINUX) add_umf_executable( NAME ${EXAMPLE_NAME} SRCS dram_and_fsdax/dram_and_fsdax.c - LIBS umf jemalloc_pool) + LIBS umf) - target_link_directories(${EXAMPLE_NAME} PRIVATE - ${LIBHWLOC_LIBRARY_DIRS}) + target_link_options(${EXAMPLE_NAME} PRIVATE "-Wl,--no-as-needed,-ldl") + + target_link_directories( + ${EXAMPLE_NAME} PRIVATE ${LIBHWLOC_LIBRARY_DIRS} + ${JEMALLOC_LIBRARY_DIRS}) add_test( NAME ${EXAMPLE_NAME} diff --git a/examples/README.md b/examples/README.md index e7823347ef..70d114a63a 100644 --- a/examples/README.md +++ b/examples/README.md @@ -24,7 +24,7 @@ cleans up and exits with an error status. ### Requirements * Level Zero headers and libraries * compatible GPU with installed driver -* set UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LIBUMF_POOL_DISJOINT and UMF_BUILD_LEVEL_ZERO_PROVIDER CMake configuration flags to ON +* set UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_LEVEL_ZERO_PROVIDER CMake configuration flags to ON ## IPC example with Level Zero memory provider This example demonstrates how to use UMF IPC API. The example creates two @@ -35,7 +35,7 @@ and build this example Level Zero development package should be installed. ### Requirements * Level Zero headers and libraries * compatible GPU with installed driver -* set UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LIBUMF_POOL_DISJOINT and UMF_BUILD_LEVEL_ZERO_PROVIDER CMake configuration flags to ON +* set UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_LEVEL_ZERO_PROVIDER CMake configuration flags to ON ## IPC example with shared memory This example also demonstrates how to use UMF IPC API. The example creates two diff --git a/examples/cmake/FindJEMALLOC.cmake b/examples/cmake/FindJEMALLOC.cmake index 89d488ecc0..e6db190d4a 100644 --- a/examples/cmake/FindJEMALLOC.cmake +++ b/examples/cmake/FindJEMALLOC.cmake @@ -2,9 +2,11 @@ # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -message(STATUS "Checking for module 'jemalloc' using find_library()") +message( + STATUS "Looking for the static 'libjemalloc.a' library using find_library()" +) -find_library(JEMALLOC_LIBRARY NAMES libjemalloc jemalloc) +find_library(JEMALLOC_LIBRARY NAMES libjemalloc.a jemalloc.a) set(JEMALLOC_LIBRARIES ${JEMALLOC_LIBRARY}) get_filename_component(JEMALLOC_LIB_DIR ${JEMALLOC_LIBRARIES} DIRECTORY) diff --git a/examples/cuda_shared_memory/CMakeLists.txt b/examples/cuda_shared_memory/CMakeLists.txt index dd8567c141..0e57ec6077 100644 --- a/examples/cuda_shared_memory/CMakeLists.txt +++ b/examples/cuda_shared_memory/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -55,9 +55,8 @@ target_link_directories( ${LIBHWLOC_LIBRARY_DIRS} ${CUDA_LIBRARY_DIRS}) target_link_options(${EXAMPLE_NAME} PRIVATE "-Wl,--start-group") -target_link_libraries( - ${EXAMPLE_NAME} PRIVATE stdc++ libdisjoint_pool.a ${CUDA_LIBRARIES} - ${LIBUMF_LIBRARIES}) +target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ ${CUDA_LIBRARIES} + ${LIBUMF_LIBRARIES}) # an optional part - adds a test of this example add_test( diff --git a/examples/custom_file_provider/custom_file_provider.c b/examples/custom_file_provider/custom_file_provider.c index ffa61d63f5..a442fca6ad 100644 --- a/examples/custom_file_provider/custom_file_provider.c +++ b/examples/custom_file_provider/custom_file_provider.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -62,7 +62,8 @@ static umf_result_t file_init(void *params, void **provider) { // Open the file file_provider->fd = open(file_params->filename, O_RDWR | O_CREAT, 0666); if (file_provider->fd < 0) { - perror("Failed to open file"); + perror("open()"); + fprintf(stderr, "Failed to open the file: %s\n", file_params->filename); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; goto cleanup_malloc; } @@ -233,15 +234,15 @@ static umf_result_t file_get_min_page_size(void *provider, void *ptr, // File provider operations static umf_memory_provider_ops_t file_ops = { - .version = UMF_VERSION_CURRENT, + .version = UMF_PROVIDER_OPS_VERSION_CURRENT, .initialize = file_init, .finalize = file_deinit, .alloc = file_alloc, + .free = file_free, .get_name = file_get_name, .get_last_native_error = file_get_last_native_error, .get_recommended_page_size = file_get_recommended_page_size, .get_min_page_size = file_get_min_page_size, - .ext.free = file_free, }; // Main function diff --git a/examples/dram_and_fsdax/CMakeLists.txt b/examples/dram_and_fsdax/CMakeLists.txt index 0d0bf25935..dcb538085e 100644 --- a/examples/dram_and_fsdax/CMakeLists.txt +++ b/examples/dram_and_fsdax/CMakeLists.txt @@ -21,26 +21,15 @@ if(NOT LIBHWLOC_FOUND) find_package(LIBHWLOC 2.3.0 REQUIRED hwloc) endif() -pkg_check_modules(JEMALLOC jemalloc) -if(NOT JEMALLOC_FOUND) - find_package(JEMALLOC REQUIRED jemalloc) -endif() - # build the example set(EXAMPLE_NAME umf_example_dram_and_fsdax) add_executable(${EXAMPLE_NAME} dram_and_fsdax.c) target_include_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_INCLUDE_DIRS}) -target_link_directories( - ${EXAMPLE_NAME} - PRIVATE - ${LIBUMF_LIBRARY_DIRS} - ${LIBHWLOC_LIBRARY_DIRS} - ${JEMALLOC_LIBRARY_DIRS}) +target_link_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_LIBRARY_DIRS} + ${LIBHWLOC_LIBRARY_DIRS}) -target_link_libraries( - ${EXAMPLE_NAME} PRIVATE hwloc jemalloc_pool ${JEMALLOC_LIBRARIES} - ${LIBUMF_LIBRARIES}) +target_link_libraries(${EXAMPLE_NAME} PRIVATE hwloc ${LIBUMF_LIBRARIES}) # an optional part - adds a test of this example add_test( @@ -56,6 +45,6 @@ if(LINUX) TEST ${EXAMPLE_NAME} PROPERTY ENVIRONMENT_MODIFICATION - "LD_LIBRARY_PATH=path_list_append:${LIBUMF_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${LIBHWLOC_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${JEMALLOC_LIBRARY_DIRS}" + "LD_LIBRARY_PATH=path_list_append:${LIBUMF_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${LIBHWLOC_LIBRARY_DIRS}" ) endif() diff --git a/examples/dram_and_fsdax/dram_and_fsdax.c b/examples/dram_and_fsdax/dram_and_fsdax.c index 26f4517281..970242e109 100644 --- a/examples/dram_and_fsdax/dram_and_fsdax.c +++ b/examples/dram_and_fsdax/dram_and_fsdax.c @@ -78,41 +78,14 @@ static umf_memory_pool_handle_t create_fsdax_pool(const char *path) { } // Create an FSDAX memory pool - // - // The file memory provider does not support the free operation - // (`umfMemoryProviderFree()` always returns `UMF_RESULT_ERROR_NOT_SUPPORTED`), - // so it should be used with a pool manager that will take over - // the managing of the provided memory - for example the jemalloc pool - // with the `disable_provider_free` parameter set to true. - umf_jemalloc_pool_params_handle_t pool_params; - umf_result = umfJemallocPoolParamsCreate(&pool_params); - if (umf_result != UMF_RESULT_SUCCESS) { - fprintf(stderr, "Failed to create jemalloc params!\n"); - umfMemoryProviderDestroy(provider_fsdax); - return NULL; - } - umf_result = umfJemallocPoolParamsSetKeepAllMemory(pool_params, true); - if (umf_result != UMF_RESULT_SUCCESS) { - fprintf(stderr, "Failed to set KeepAllMemory!\n"); - umfMemoryProviderDestroy(provider_fsdax); - return NULL; - } - - // Create an FSDAX memory pool - umf_result = - umfPoolCreate(umfJemallocPoolOps(), provider_fsdax, pool_params, - UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &pool_fsdax); + umf_result = umfPoolCreate(umfJemallocPoolOps(), provider_fsdax, NULL, + UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &pool_fsdax); if (umf_result != UMF_RESULT_SUCCESS) { fprintf(stderr, "Failed to create an FSDAX memory pool!\n"); umfMemoryProviderDestroy(provider_fsdax); return NULL; } - umf_result = umfJemallocPoolParamsDestroy(pool_params); - if (umf_result != UMF_RESULT_SUCCESS) { - fprintf(stderr, "Failed to destroy jemalloc params!\n"); - } - return pool_fsdax; } diff --git a/examples/ipc_ipcapi/ipc_ipcapi_anon_fd.sh b/examples/ipc_ipcapi/ipc_ipcapi_anon_fd.sh index 615271eebb..2eb9409daf 100755 --- a/examples/ipc_ipcapi/ipc_ipcapi_anon_fd.sh +++ b/examples/ipc_ipcapi/ipc_ipcapi_anon_fd.sh @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -16,16 +16,8 @@ PORT=$(( 1024 + ( $$ % ( 65535 - 1024 )))) # to obtain a duplicate of another process's file descriptor. # Permission to duplicate another process's file descriptor # is governed by a ptrace access mode PTRACE_MODE_ATTACH_REALCREDS check (see ptrace(2)) -# that can be changed using the /proc/sys/kernel/yama/ptrace_scope interface. -PTRACE_SCOPE_FILE="/proc/sys/kernel/yama/ptrace_scope" -VAL=0 -if [ -f $PTRACE_SCOPE_FILE ]; then - PTRACE_SCOPE_VAL=$(cat $PTRACE_SCOPE_FILE) - if [ $PTRACE_SCOPE_VAL -ne $VAL ]; then - echo "SKIP: ptrace_scope is not set to 0 (classic ptrace permissions) - skipping the test" - exit 125 # skip code defined in CMakeLists.txt - fi -fi +# In the producer binary used in this example prctl(PR_SET_PTRACER, getppid()) is used +# to allow consumer to duplicate file descriptor of producer. UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" diff --git a/examples/ipc_ipcapi/ipc_ipcapi_producer.c b/examples/ipc_ipcapi/ipc_ipcapi_producer.c index 4157e8284f..9082302ac9 100644 --- a/examples/ipc_ipcapi/ipc_ipcapi_producer.c +++ b/examples/ipc_ipcapi/ipc_ipcapi_producer.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -69,6 +70,21 @@ int main(int argc, char *argv[]) { int port = atoi(argv[1]); + // The prctl() function with PR_SET_PTRACER is used here to allow parent process and its children + // to ptrace the current process. This is necessary because UMF's memory providers on Linux (except CUDA) + // use the pidfd_getfd(2) system call to duplicate another process's file descriptor, which is + // governed by ptrace permissions. By default on Ubuntu /proc/sys/kernel/yama/ptrace_scope is + // set to 1 ("restricted ptrace"), which prevents pidfd_getfd from working unless ptrace_scope + // is set to 0. + // To overcome this limitation without requiring users to change the ptrace_scope + // setting (which requires root privileges), we use prctl() to allow the consumer process + // to copy producer's file descriptor, even when ptrace_scope is set to 1. + ret = prctl(PR_SET_PTRACER, getppid()); + if (ret == -1) { + perror("PR_SET_PTRACER may be not supported. prctl() call failed"); + goto err_end; + } + umf_memory_provider_handle_t OS_memory_provider = NULL; umf_os_memory_provider_params_handle_t os_params = NULL; enum umf_result_t umf_result; @@ -259,6 +275,7 @@ int main(int argc, char *argv[]) { err_destroy_OS_params: umfOsMemoryProviderParamsDestroy(os_params); +err_end: if (ret == 0) { fprintf(stderr, "[producer] Shutting down (status OK) ...\n"); } else if (ret == 1) { diff --git a/examples/ipc_level_zero/CMakeLists.txt b/examples/ipc_level_zero/CMakeLists.txt index 5c17d4c9cb..2aa391d655 100644 --- a/examples/ipc_level_zero/CMakeLists.txt +++ b/examples/ipc_level_zero/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -24,7 +24,7 @@ endif() include(FetchContent) set(LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git") -set(LEVEL_ZERO_LOADER_TAG v1.19.2) +set(LEVEL_ZERO_LOADER_TAG v1.20.2) message( STATUS @@ -53,8 +53,8 @@ target_include_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_INCLUDE_DIRS} target_link_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_LIBRARY_DIRS} ${LIBHWLOC_LIBRARY_DIRS}) target_link_options(${EXAMPLE_NAME} PRIVATE "-Wl,--start-group") -target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ libdisjoint_pool.a - ze_loader ${LIBUMF_LIBRARIES}) +target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ ze_loader + ${LIBUMF_LIBRARIES}) # an optional part - adds a test of this example add_test( @@ -69,6 +69,6 @@ if(LINUX) TEST ${EXAMPLE_NAME} PROPERTY ENVIRONMENT_MODIFICATION - "LD_LIBRARY_PATH=path_list_append:${LIBUMF_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${LIBHWLOC_LIBRARY_DIRS}" + "LD_LIBRARY_PATH=path_list_prepend:${LIBUMF_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${LIBHWLOC_LIBRARY_DIRS}" ) endif() diff --git a/examples/ipc_level_zero/ipc_level_zero.c b/examples/ipc_level_zero/ipc_level_zero.c index 9579244abf..87dbbd0226 100644 --- a/examples/ipc_level_zero/ipc_level_zero.c +++ b/examples/ipc_level_zero/ipc_level_zero.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -104,7 +104,7 @@ int main(void) { const size_t BUFFER_PATTERN = 0x42; int ret = init_level_zero(); if (ret != 0) { - fprintf(stderr, "ERROR: Failed to init Level 0!\n"); + fprintf(stderr, "ERROR: Failed to init Level Zero!\n"); return ret; } diff --git a/examples/level_zero_shared_memory/CMakeLists.txt b/examples/level_zero_shared_memory/CMakeLists.txt index 3711b40941..b7c990145b 100644 --- a/examples/level_zero_shared_memory/CMakeLists.txt +++ b/examples/level_zero_shared_memory/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -24,7 +24,7 @@ endif() include(FetchContent) set(LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git") -set(LEVEL_ZERO_LOADER_TAG v1.19.2) +set(LEVEL_ZERO_LOADER_TAG v1.20.2) message( STATUS @@ -53,8 +53,8 @@ target_include_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_INCLUDE_DIRS} target_link_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_LIBRARY_DIRS} ${LIBHWLOC_LIBRARY_DIRS}) target_link_options(${EXAMPLE_NAME} PRIVATE "-Wl,--start-group") -target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ libdisjoint_pool.a - ze_loader ${LIBUMF_LIBRARIES}) +target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ ze_loader + ${LIBUMF_LIBRARIES}) # an optional part - adds a test of this example add_test( @@ -70,6 +70,6 @@ if(LINUX) TEST ${EXAMPLE_NAME} PROPERTY ENVIRONMENT_MODIFICATION - "LD_LIBRARY_PATH=path_list_append:${LIBUMF_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${LIBHWLOC_LIBRARY_DIRS}" + "LD_LIBRARY_PATH=path_list_prepend:${LIBUMF_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${LIBHWLOC_LIBRARY_DIRS}" ) endif() diff --git a/examples/level_zero_shared_memory/level_zero_shared_memory.c b/examples/level_zero_shared_memory/level_zero_shared_memory.c index b0f646861d..7cfe89366c 100644 --- a/examples/level_zero_shared_memory/level_zero_shared_memory.c +++ b/examples/level_zero_shared_memory/level_zero_shared_memory.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -27,7 +27,7 @@ int main(void) { // Initialize Level Zero int ret = init_level_zero(); if (ret != 0) { - fprintf(stderr, "Failed to init Level 0!\n"); + fprintf(stderr, "Failed to init Level Zero!\n"); return ret; } @@ -189,6 +189,6 @@ int main(void) { umfLevelZeroMemoryProviderParamsDestroy(ze_memory_provider_params); level_zero_destroy: - ret = destroy_context(hContext); + destroy_context(hContext); return ret; } diff --git a/include/umf.h b/include/umf.h index 3e2d827991..57bebef8a9 100644 --- a/include/umf.h +++ b/include/umf.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/include/umf/base.h b/include/umf/base.h index 53378195d2..12e99aa2bb 100644 --- a/include/umf/base.h +++ b/include/umf/base.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -28,7 +28,7 @@ extern "C" { #define UMF_MINOR_VERSION(_ver) (_ver & 0x0000ffff) /// @brief Current version of the UMF headers -#define UMF_VERSION_CURRENT UMF_MAKE_VERSION(0, 10) +#define UMF_VERSION_CURRENT UMF_MAKE_VERSION(0, 11) /// @brief Operation results typedef enum umf_result_t { @@ -45,7 +45,10 @@ typedef enum umf_result_t { UMF_RESULT_ERROR_NOT_SUPPORTED = 5, ///< Operation not supported UMF_RESULT_ERROR_USER_SPECIFIC = 6, ///< Failure in user provider code (i.e in user provided callback) - UMF_RESULT_ERROR_UNKNOWN = 0x7ffffffe ///< Unknown or internal error + UMF_RESULT_ERROR_DEPENDENCY_UNAVAILABLE = + 7, ///< External required dependency is unavailable or missing + UMF_RESULT_ERROR_OUT_OF_RESOURCES = 8, ///< Out of internal resources + UMF_RESULT_ERROR_UNKNOWN = 0x7ffffffe ///< Unknown error } umf_result_t; #ifdef __cplusplus diff --git a/include/umf/memory_pool.h b/include/umf/memory_pool.h index a93d400f92..ed3d1eb0dc 100644 --- a/include/umf/memory_pool.h +++ b/include/umf/memory_pool.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -11,6 +11,7 @@ #define UMF_MEMORY_POOL_H 1 #include +#include #include #ifdef __cplusplus @@ -22,12 +23,6 @@ extern "C" { /// functions typedef struct umf_memory_pool_t *umf_memory_pool_handle_t; -/// @brief This structure comprises function pointers used by corresponding umfPool* -/// calls. Each memory pool implementation should initialize all function -/// pointers. -/// -typedef struct umf_memory_pool_ops_t umf_memory_pool_ops_t; - /// @brief Supported pool creation flags typedef enum umf_pool_create_flag_t { UMF_POOL_CREATE_FLAG_NONE = @@ -140,7 +135,7 @@ umf_result_t umfFree(void *ptr); /// * Implementations *must* store the error code in thread-local /// storage prior to returning NULL from the allocation functions. /// -/// * If the last allocation/de-allocation operation succeeded, the value returned by +/// * If the last allocation/deallocation operation succeeded, the value returned by /// this function is unspecified. /// /// * The application *may* call this function from simultaneous threads. @@ -170,6 +165,22 @@ umf_memory_pool_handle_t umfPoolByPtr(const void *ptr); umf_result_t umfPoolGetMemoryProvider(umf_memory_pool_handle_t hPool, umf_memory_provider_handle_t *hProvider); +/// +/// @brief Set a custom tag on the memory pool that can be later retrieved using umfPoolGetTag. +/// @param hPool specified memory pool +/// @param tag tag to be set +/// @param oldTag [out][optional] previous tag set on the memory pool +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfPoolSetTag(umf_memory_pool_handle_t hPool, void *tag, + void **oldTag); + +/// +/// @brief Retrieve the tag associated with the memory pool or NULL if no tag is set. +/// @param hPool specified memory pool +/// @param tag [out] tag associated with the memory pool +/// @return UMF_RESULT_SUCCESS on success. +umf_result_t umfPoolGetTag(umf_memory_pool_handle_t hPool, void **tag); + #ifdef __cplusplus } #endif diff --git a/include/umf/memory_pool_ops.h b/include/umf/memory_pool_ops.h index 67afdd1669..657f40aeaa 100644 --- a/include/umf/memory_pool_ops.h +++ b/include/umf/memory_pool_ops.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -17,6 +17,11 @@ extern "C" { #endif +/// @brief Version of the Memory Pool ops structure. +/// NOTE: This is equal to the latest UMF version, in which the ops structure +/// has been modified. +#define UMF_POOL_OPS_VERSION_CURRENT UMF_MAKE_VERSION(0, 11) + /// /// @brief This structure comprises function pointers used by corresponding umfPool* /// calls. Each memory pool implementation should initialize all function @@ -24,7 +29,7 @@ extern "C" { /// typedef struct umf_memory_pool_ops_t { /// Version of the ops structure. - /// Should be initialized using UMF_VERSION_CURRENT. + /// Should be initialized using UMF_POOL_OPS_VERSION_CURRENT. uint32_t version; /// diff --git a/include/umf/memory_provider_ops.h b/include/umf/memory_provider_ops.h index 0b9c7cfce3..aaddd503b7 100644 --- a/include/umf/memory_provider_ops.h +++ b/include/umf/memory_provider_ops.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -16,21 +16,17 @@ extern "C" { #endif +/// @brief Version of the Memory Provider ops structure. +/// NOTE: This is equal to the latest UMF version, in which the ops structure +/// has been modified. +#define UMF_PROVIDER_OPS_VERSION_CURRENT UMF_MAKE_VERSION(0, 11) + /// /// @brief This structure comprises optional function pointers used /// by corresponding umfMemoryProvider* calls. A memory provider implementation /// can keep them NULL. /// typedef struct umf_memory_provider_ext_ops_t { - /// - /// @brief Frees the memory space pointed by \p ptr from the memory \p provider - /// @param provider pointer to the memory provider - /// @param ptr pointer to the allocated memory to free - /// @param size size of the allocation - /// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure - /// - umf_result_t (*free)(void *provider, void *ptr, size_t size); - /// /// @brief Discard physical pages within the virtual memory mapping associated at the given addr /// and \p size. This call is asynchronous and may delay purging the pages indefinitely. @@ -152,7 +148,7 @@ typedef struct umf_memory_provider_ipc_ops_t { /// typedef struct umf_memory_provider_ops_t { /// Version of the ops structure. - /// Should be initialized using UMF_VERSION_CURRENT. + /// Should be initialized using UMF_PROVIDER_OPS_VERSION_CURRENT. uint32_t version; /// @@ -181,6 +177,15 @@ typedef struct umf_memory_provider_ops_t { umf_result_t (*alloc)(void *provider, size_t size, size_t alignment, void **ptr); + /// + /// @brief Frees the memory space pointed by \p ptr from the memory \p provider + /// @param provider pointer to the memory provider + /// @param ptr pointer to the allocated memory to free + /// @param size size of the allocation + /// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure + /// + umf_result_t (*free)(void *provider, void *ptr, size_t size); + /// /// @brief Retrieve string representation of the underlying provider specific /// result reported by the last API that returned diff --git a/include/umf/memtarget.h b/include/umf/memtarget.h index d74947f14e..55ca30919a 100644 --- a/include/umf/memtarget.h +++ b/include/umf/memtarget.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/include/umf/pools/pool_disjoint.h b/include/umf/pools/pool_disjoint.h index fdf682ae5b..a1558b85bf 100644 --- a/include/umf/pools/pool_disjoint.h +++ b/include/umf/pools/pool_disjoint.h @@ -1,6 +1,11 @@ -// Copyright (C) 2023-2024 Intel Corporation -// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +/* + * + * Copyright (C) 2023-2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ #pragma once #ifdef __cplusplus @@ -87,7 +92,7 @@ umfDisjointPoolParamsSetTrace(umf_disjoint_pool_params_handle_t hParams, /// @brief Set shared limits for disjoint pool. /// @param hParams handle to the parameters of the disjoint pool. -/// @param hSharedLimits handle tp the shared limits. +/// @param hSharedLimits handle to the shared limits. /// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. umf_result_t umfDisjointPoolParamsSetSharedLimits( umf_disjoint_pool_params_handle_t hParams, @@ -95,7 +100,7 @@ umf_result_t umfDisjointPoolParamsSetSharedLimits( /// @brief Set custom name of the disjoint pool to be used in the traces. /// @param hParams handle to the parameters of the disjoint pool. -/// @param name custom name of the pool. +/// @param name custom name of the pool. Name longer than 64 characters will be truncated. /// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. umf_result_t umfDisjointPoolParamsSetName(umf_disjoint_pool_params_handle_t hParams, diff --git a/include/umf/pools/pool_jemalloc.h b/include/umf/pools/pool_jemalloc.h index 0cbecd38f7..5974e6440a 100644 --- a/include/umf/pools/pool_jemalloc.h +++ b/include/umf/pools/pool_jemalloc.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -14,35 +14,8 @@ extern "C" { #endif -#include #include -struct umf_jemalloc_pool_params_t; - -/// @brief handle to the parameters of the jemalloc pool. -typedef struct umf_jemalloc_pool_params_t *umf_jemalloc_pool_params_handle_t; - -/// @brief Create a struct to store parameters of jemalloc pool. -/// @param hParams [out] handle to the newly created parameters struct. -/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. -umf_result_t -umfJemallocPoolParamsCreate(umf_jemalloc_pool_params_handle_t *hParams); - -/// @brief Destroy parameters struct. -/// @param hParams handle to the parameters of the jemalloc pool. -/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. -umf_result_t -umfJemallocPoolParamsDestroy(umf_jemalloc_pool_params_handle_t hParams); - -/// @brief Set if \p umfMemoryProviderFree() should never be called. -/// @param hParams handle to the parameters of the jemalloc pool. -/// @param keepAllMemory \p true if the jemalloc pool should not call -/// \p umfMemoryProviderFree, \p false otherwise. -/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. -umf_result_t -umfJemallocPoolParamsSetKeepAllMemory(umf_jemalloc_pool_params_handle_t hParams, - bool keepAllMemory); - umf_memory_pool_ops_t *umfJemallocPoolOps(void); #ifdef __cplusplus diff --git a/include/umf/pools/pool_scalable.h b/include/umf/pools/pool_scalable.h index 072169b68c..1915ad0b7a 100644 --- a/include/umf/pools/pool_scalable.h +++ b/include/umf/pools/pool_scalable.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/include/umf/providers/provider_coarse.h b/include/umf/providers/provider_coarse.h deleted file mode 100644 index 6ed6e0fbc9..0000000000 --- a/include/umf/providers/provider_coarse.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (C) 2023-2024 Intel Corporation - * - * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -*/ - -#ifndef UMF_COARSE_PROVIDER_H -#define UMF_COARSE_PROVIDER_H - -#include -#include - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/// @brief Coarse Memory Provider allocation strategy -typedef enum coarse_memory_provider_strategy_t { - /// Always allocate a free block of the (size + alignment) size - /// and cut out the properly aligned part leaving two remaining parts. - /// It is the fastest strategy but causes memory fragmentation - /// when alignment is greater than 0. - /// It is the best strategy when alignment always equals 0. - UMF_COARSE_MEMORY_STRATEGY_FASTEST = 0, - - /// Check if the first free block of the 'size' size has the correct alignment. - /// If not, use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. - UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE, - - /// Look through all free blocks of the 'size' size - /// and choose the first one with the correct alignment. - /// If none of them had the correct alignment, - /// use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. - UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE, - - /// The maximum value (it has to be the last one). - UMF_COARSE_MEMORY_STRATEGY_MAX -} coarse_memory_provider_strategy_t; - -/// @brief Coarse Memory Provider settings struct. -typedef struct coarse_memory_provider_params_t { - /// Handle to the upstream memory provider. - /// It has to be NULL if init_buffer is set - /// (exactly one of them has to be non-NULL). - umf_memory_provider_handle_t upstream_memory_provider; - - /// Memory allocation strategy. - /// See coarse_memory_provider_strategy_t for details. - coarse_memory_provider_strategy_t allocation_strategy; - - /// A pre-allocated buffer that will be the only memory that - /// the coarse provider can provide (the fixed-size memory provider option). - /// If it is non-NULL, `init_buffer_size ` has to contain its size. - /// It has to be NULL if upstream_memory_provider is set - /// (exactly one of them has to be non-NULL). - void *init_buffer; - - /// Size of the initial buffer: - /// 1) `init_buffer` if it is non-NULL xor - /// 2) that will be allocated from the upstream_memory_provider - /// (if it is non-NULL) in the `.initialize` operation. - size_t init_buffer_size; - - /// When it is true and the upstream_memory_provider is given, - /// the init buffer (of `init_buffer_size` bytes) would be pre-allocated - /// during creation time using the `upstream_memory_provider`. - /// If upstream_memory_provider is not given, - /// the init_buffer is always used instead - /// (regardless of the value of this parameter). - bool immediate_init_from_upstream; - - /// Destroy upstream_memory_provider in finalize(). - bool destroy_upstream_memory_provider; -} coarse_memory_provider_params_t; - -/// @brief Coarse Memory Provider stats (TODO move to CTL) -typedef struct coarse_memory_provider_stats_t { - /// Total allocation size. - size_t alloc_size; - - /// Size of used memory. - size_t used_size; - - /// Number of memory blocks allocated from the upstream provider. - size_t num_upstream_blocks; - - /// Total number of allocated memory blocks. - size_t num_all_blocks; - - /// Number of free memory blocks. - size_t num_free_blocks; -} coarse_memory_provider_stats_t; - -umf_memory_provider_ops_t *umfCoarseMemoryProviderOps(void); - -// TODO use CTL -coarse_memory_provider_stats_t -umfCoarseMemoryProviderGetStats(umf_memory_provider_handle_t provider); - -/// @brief Create default params for the coarse memory provider -static inline coarse_memory_provider_params_t -umfCoarseMemoryProviderParamsDefault(void) { - coarse_memory_provider_params_t coarse_memory_provider_params; - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - return coarse_memory_provider_params; -} - -#ifdef __cplusplus -} -#endif - -#endif // UMF_COARSE_PROVIDER_H diff --git a/include/umf/providers/provider_cuda.h b/include/umf/providers/provider_cuda.h index 5f1d5a6e2a..95f2634fbc 100644 --- a/include/umf/providers/provider_cuda.h +++ b/include/umf/providers/provider_cuda.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -20,7 +20,8 @@ typedef struct umf_cuda_memory_provider_params_t *umf_cuda_memory_provider_params_handle_t; /// @brief Create a struct to store parameters of the CUDA Memory Provider. -/// @param hParams [out] handle to the newly created parameters struct. +/// @param hParams [out] handle to the newly created parameters structure, +/// initialized with the default (current) context and device ID. /// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. umf_result_t umfCUDAMemoryProviderParamsCreate( umf_cuda_memory_provider_params_handle_t *hParams); @@ -53,6 +54,13 @@ umf_result_t umfCUDAMemoryProviderParamsSetMemoryType( umf_cuda_memory_provider_params_handle_t hParams, umf_usm_memory_type_t memoryType); +/// @brief Set the allocation flags in the parameters struct. +/// @param hParams handle to the parameters of the CUDA Memory Provider. +/// @param flags valid combination of CUDA allocation flags. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfCUDAMemoryProviderParamsSetAllocFlags( + umf_cuda_memory_provider_params_handle_t hParams, unsigned int flags); + umf_memory_provider_ops_t *umfCUDAMemoryProviderOps(void); #ifdef __cplusplus diff --git a/include/umf/providers/provider_fixed_memory.h b/include/umf/providers/provider_fixed_memory.h new file mode 100644 index 0000000000..2351faf312 --- /dev/null +++ b/include/umf/providers/provider_fixed_memory.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef UMF_FIXED_MEMORY_PROVIDER_H +#define UMF_FIXED_MEMORY_PROVIDER_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/// @cond +#define UMF_FIXED_RESULTS_START_FROM 4000 +/// @endcond + +struct umf_fixed_memory_provider_params_t; + +typedef struct umf_fixed_memory_provider_params_t + *umf_fixed_memory_provider_params_handle_t; + +/// @brief Create a struct to store parameters of the Fixed Memory Provider. +/// @param hParams [out] handle to the newly created parameters struct. +/// @param ptr [in] pointer to the memory region. +/// @param size [in] size of the memory region in bytes. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfFixedMemoryProviderParamsCreate( + umf_fixed_memory_provider_params_handle_t *hParams, void *ptr, size_t size); + +/// @brief Set the memory region in params struct. Overwrites the previous value. +/// It provides an ability to use the same instance of params to create multiple +/// instances of the provider for different memory regions. +/// @param hParams [in] handle to the parameters of the Fixed Memory Provider. +/// @param ptr [in] pointer to the memory region. +/// @param size [in] size of the memory region in bytes. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfFixedMemoryProviderParamsSetMemory( + umf_fixed_memory_provider_params_handle_t hParams, void *ptr, size_t size); + +/// @brief Destroy parameters struct. +/// @param hParams [in] handle to the parameters of the Fixed Memory Provider. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfFixedMemoryProviderParamsDestroy( + umf_fixed_memory_provider_params_handle_t hParams); + +/// @brief Retrieve the operations structure for the Fixed Memory Provider. +/// @return Pointer to the umf_memory_provider_ops_t structure. +umf_memory_provider_ops_t *umfFixedMemoryProviderOps(void); + +/// @brief Fixed Memory Provider operation results +typedef enum umf_fixed_memory_provider_native_error { + UMF_FIXED_RESULT_SUCCESS = UMF_FIXED_RESULTS_START_FROM, ///< Success + UMF_FIXED_RESULT_ERROR_PURGE_FORCE_FAILED, ///< Force purging failed +} umf_fixed_memory_provider_native_error_t; + +#ifdef __cplusplus +} +#endif + +#endif /* UMF_FIXED_MEMORY_PROVIDER_H */ diff --git a/include/umf/providers/provider_level_zero.h b/include/umf/providers/provider_level_zero.h index f760c57244..b20fb40d59 100644 --- a/include/umf/providers/provider_level_zero.h +++ b/include/umf/providers/provider_level_zero.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -68,6 +68,29 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetResidentDevices( umf_level_zero_memory_provider_params_handle_t hParams, ze_device_handle_t *hDevices, uint32_t deviceCount); +typedef enum umf_level_zero_memory_provider_free_policy_t { + UMF_LEVEL_ZERO_MEMORY_PROVIDER_FREE_POLICY_DEFAULT = + 0, ///< Free memory immediately. Default. + UMF_LEVEL_ZERO_MEMORY_PROVIDER_FREE_POLICY_BLOCKING_FREE, ///< Blocks until all commands using the memory are complete before freeing. + UMF_LEVEL_ZERO_MEMORY_PROVIDER_FREE_POLICY_DEFER_FREE, ///< Schedules the memory to be freed but does not free immediately. +} umf_level_zero_memory_provider_free_policy_t; + +/// @brief Set the memory free policy. +/// @param hParams handle to the parameters of the Level Zero Memory Provider. +/// @param policy memory free policy. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfLevelZeroMemoryProviderParamsSetFreePolicy( + umf_level_zero_memory_provider_params_handle_t hParams, + umf_level_zero_memory_provider_free_policy_t policy); + +/// @brief Set the device ordinal in the parameters struct. +/// @param hParams handle to the parameters of the Level Zero Memory Provider. +/// @param deviceOrdinal device ordinal. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfLevelZeroMemoryProviderParamsSetDeviceOrdinal( + umf_level_zero_memory_provider_params_handle_t hParams, + uint32_t deviceOrdinal); + umf_memory_provider_ops_t *umfLevelZeroMemoryProviderOps(void); #ifdef __cplusplus diff --git a/include/umf/providers/provider_os_memory.h b/include/umf/providers/provider_os_memory.h index a6bf43a7d9..90455cad19 100644 --- a/include/umf/providers/provider_os_memory.h +++ b/include/umf/providers/provider_os_memory.h @@ -1,9 +1,11 @@ /* - * Copyright (C) 2022-2024 Intel Corporation + * + * Copyright (C) 2022-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -*/ + * + */ #ifndef UMF_OS_MEMORY_PROVIDER_H #define UMF_OS_MEMORY_PROVIDER_H @@ -23,7 +25,7 @@ extern "C" { /// Not every mode is supported on every system. typedef enum umf_numa_mode_t { /// Default binding mode. Actual binding policy is system-specific. On - /// linux this corresponds to MPOL_DEFAULT. If this mode is specified, + /// Linux this corresponds to MPOL_DEFAULT. If this mode is specified, /// nodemask must be NULL and maxnode must be 0. UMF_NUMA_MODE_DEFAULT, diff --git a/scripts/README.md b/scripts/README.md deleted file mode 100644 index e3a9ed533f..0000000000 --- a/scripts/README.md +++ /dev/null @@ -1,5 +0,0 @@ -The documentation HTML files are generated using the following dependencies: - * [Python](https://www.python.org/downloads/) at least v3.8 - * [Doxygen](http://www.doxygen.nl/) at least v1.9.1 - - To generate files run the `generate_docs.py` script from the `scripts` directory. Files will be generated to the `docs/html` directory relative to the main directory of this repository. diff --git a/scripts/check_license/check_headers.sh b/scripts/check_license/check_headers.sh new file mode 100755 index 0000000000..aeb90e7a28 --- /dev/null +++ b/scripts/check_license/check_headers.sh @@ -0,0 +1,197 @@ +#!/usr/bin/env bash +# Copyright (C) 2016-2025 Intel Corporation +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# check-headers.sh - check copyright and license in source files + +SELF=$0 + +function usage() { + echo "Usage: $SELF [-h|-v|-a|-d]" + echo " -h, --help this help message" + echo " -v, --verbose verbose mode" + echo " -a, --all check all files (only modified files are checked by default)" + echo " -d, --update_dates change Copyright dates in all analyzed files (rather not use with -a)" +} + +if [ "$#" -lt 2 ]; then + usage >&2 + exit 2 +fi + +SOURCE_ROOT=$1 +shift +LICENSE=$1 +shift + +PATTERN=`mktemp` +TMP=`mktemp` +TMP2=`mktemp` +TEMPFILE=`mktemp` +rm -f $PATTERN $TMP $TMP2 + +if [ "$1" == "-h" -o "$1" == "--help" ]; then + usage + exit 0 +fi + +export GIT="git -C ${SOURCE_ROOT}" +$GIT rev-parse || exit 1 + +if [ -f $SOURCE_ROOT/.git/shallow ]; then + SHALLOW_CLONE=1 + echo + echo "Warning: This is a shallow clone. Checking dates in copyright headers" + echo " will be skipped in case of files that have no history." + echo +else + SHALLOW_CLONE=0 +fi + +VERBOSE=0 +CHECK_ALL=0 +UPDATE_DATES=0 +while [ "$1" != "" ]; do + case $1 in + -v|--verbose) + VERBOSE=1 + ;; + -a|--all) + CHECK_ALL=1 + ;; + -d|--update_dates) + UPDATE_DATES=1 + ;; + esac + shift +done + +if [ $CHECK_ALL -eq 0 ]; then + CURRENT_COMMIT=$($GIT --no-pager log --pretty=%H -1) + MERGE_BASE=$($GIT merge-base HEAD origin/main 2>/dev/null) + [ -z $MERGE_BASE ] && \ + MERGE_BASE=$($GIT --no-pager log --pretty="%cN:%H" | grep GitHub 2>/dev/null | head -n1 | cut -d: -f2) + [ -z $MERGE_BASE -o "$CURRENT_COMMIT" = "$MERGE_BASE" ] && \ + CHECK_ALL=1 +fi + +if [ $CHECK_ALL -eq 1 ]; then + echo "INFO: Checking copyright headers of all files..." + GIT_COMMAND="ls-tree -r --name-only HEAD" +else + echo "INFO: Checking copyright headers of modified files only..." + GIT_COMMAND="diff --name-only $MERGE_BASE $CURRENT_COMMIT" +fi + +FILES=$($GIT $GIT_COMMAND | ${SOURCE_ROOT}/scripts/check_license/file-exceptions.sh) + +RV=0 +for file in $FILES ; do + if [ $VERBOSE -eq 1 ]; then + echo "Checking file: $file" + fi + # The src_path is a path which should be used in every command except git. + # git is called with -C flag so filepaths should be relative to SOURCE_ROOT + src_path="${SOURCE_ROOT}/$file" + [ ! -f $src_path ] && continue + # ensure that file is UTF-8 encoded + ENCODING=`file -b --mime-encoding $src_path` + iconv -f $ENCODING -t "UTF-8" $src_path > $TEMPFILE + + if ! grep -q "SPDX-License-Identifier: $LICENSE" $src_path; then + echo >&2 "error: no $LICENSE SPDX tag in file: $src_path" + RV=1 + fi + + if [ $SHALLOW_CLONE -eq 0 ]; then + $GIT log --no-merges --format="%ai %aE" -- $file | sort > $TMP + else + # mark the grafted commits (commits with no parents) + $GIT log --no-merges --format="%ai %aE grafted-%p-commit" -- $file | sort > $TMP + fi + + # skip checking dates for non-Intel commits + [[ ! $(tail -n1 $TMP) =~ "@intel.com" ]] && continue + + # skip checking dates for new files + [ $(cat $TMP | wc -l) -le 1 ] && continue + + # grep out the grafted commits (commits with no parents) + # and skip checking dates for non-Intel commits + grep -v -e "grafted--commit" $TMP | grep -e "@intel.com" > $TMP2 + + [ $(cat $TMP2 | wc -l) -eq 0 ] && continue + + FIRST=`head -n1 $TMP2` + LAST=` tail -n1 $TMP2` + + YEARS=$(sed ' +/.*Copyright (C) [0-9-]\+ Intel Corporation/!d +s/.*Copyright (C) \([0-9]\+\)-\([0-9]\+\).*/\1-\2/ +s/.*Copyright (C) \([0-9]\+\).*/\1/' "$src_path") + if [ -z "$YEARS" ]; then + echo >&2 "No copyright years in $src_path" + RV=1 + continue + fi + + HEADER_FIRST=`echo $YEARS | cut -d"-" -f1` + HEADER_LAST=` echo $YEARS | cut -d"-" -f2` + + COMMIT_FIRST=`echo $FIRST | cut -d"-" -f1` + COMMIT_LAST=` echo $LAST | cut -d"-" -f1` + + if [ "$COMMIT_FIRST" != "" -a "$COMMIT_LAST" != "" ]; then + if [ "$COMMIT_FIRST" -lt "$HEADER_FIRST" ]; then + RV=1 + fi + + if [[ -n "$COMMIT_FIRST" && -n "$COMMIT_LAST" ]]; then + if [[ $HEADER_FIRST -le $COMMIT_FIRST ]]; then + if [[ $HEADER_LAST -eq $COMMIT_LAST ]]; then + continue + else + NEW="$HEADER_FIRST-$COMMIT_LAST" + if [[ ${UPDATE_DATES} -eq 1 ]]; then + echo "Updating copyright date in $src_path: $YEARS -> $NEW" + sed -i "s/Copyright (C) ${YEARS}/Copyright (C) ${NEW}/g" "${src_path}" + else + echo "$file:1: error: wrong copyright date: (is: $YEARS, should be: $NEW)" >&2 + RV=1 + fi + fi + else + if [[ $COMMIT_FIRST -eq $COMMIT_LAST ]]; then + NEW=$COMMIT_LAST + else + NEW=$COMMIT_FIRST-$COMMIT_LAST + fi + + if [[ "$YEARS" == "$NEW" ]]; then + continue + else + if [[ ${UPDATE_DATES} -eq 1 ]]; then + echo "Updating copyright date in $src_path: $YEARS -> $NEW" + sed -i "s/Copyright (C) ${YEARS}/Copyright (C) ${NEW}/g" "${src_path}" + else + echo "$file:1: error: wrong copyright date: (is: $YEARS, should be: $NEW)" >&2 + RV=1 + fi + fi + fi + fi + else + echo "error: unknown commit dates in file: $file" >&2 + RV=1 + fi +done +rm -f $TMP $TMP2 $TEMPFILE + +# check if error found +if [ $RV -eq 0 ]; then + echo "Copyright headers are OK." +else + echo "Error(s) in copyright headers found!" >&2 +fi +exit $RV diff --git a/scripts/check_license/file-exceptions.sh b/scripts/check_license/file-exceptions.sh new file mode 100755 index 0000000000..10c5560614 --- /dev/null +++ b/scripts/check_license/file-exceptions.sh @@ -0,0 +1,38 @@ +#!/bin/sh -e +# Copyright (C) 2025 Intel Corporation +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# You can add an exception file +# list for license and copyright check +grep -v -E -e 'benchmark/ubench.h' \ + -e 'ChangeLog' \ + -e 'CODEOWNERS$' \ + -e 'docs/assets/.*' \ + -e 'docs/config/spelling_exceptions.txt' \ + -e 'docs/config/conf.py' \ + -e 'docs/config/Doxyfile' \ + -e 'include/umf/proxy_lib_new_delete.h' \ + -e 'LICENSE.TXT' \ + -e 'licensing/third-party-programs.txt' \ + -e 'scripts/assets/images/.*' \ + -e 'scripts/qemu/requirements.txt' \ + -e 'src/uthash/.*' \ + -e 'src/uthash/utlist.h' \ + -e 'src/uthash/uthash.h' \ + -e 'test/ctl/config.txt' \ + -e 'test/supp/.*' \ + -e 'third_party/requirements.txt' \ + -e '.clang-format$' \ + -e '.cmake-format$' \ + -e '.cmake.in$' \ + -e '.gitignore' \ + -e '.json$' \ + -e '.mailmap' \ + -e '.md$' \ + -e '.patch$' \ + -e '.rst$' \ + -e '.spellcheck-conf.toml' \ + -e '.trivyignore' \ + -e '.xml$' \ + -e '.yml$' diff --git a/scripts/qemu/configs/default.xml b/scripts/qemu/configs/default.xml index 5654687949..5d3198f60a 100644 --- a/scripts/qemu/configs/default.xml +++ b/scripts/qemu/configs/default.xml @@ -50,16 +50,12 @@ Cell 2 | 0 | 1200MiB | 17, 28, 10 | - - - - diff --git a/scripts/qemu/run-build.sh b/scripts/qemu/run-build.sh index 06d6043f6b..724e6d7ff2 100755 --- a/scripts/qemu/run-build.sh +++ b/scripts/qemu/run-build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -14,18 +14,18 @@ pwd echo password | sudo -Sk apt-get update echo password | sudo -Sk apt-get install -y git cmake gcc g++ pkg-config \ - numactl libnuma-dev hwloc libhwloc-dev libjemalloc-dev libtbb-dev valgrind lcov + numactl libnuma-dev hwloc libhwloc-dev libtbb-dev valgrind lcov mkdir build cd build cmake .. \ -DCMAKE_BUILD_TYPE=Debug \ + -DUMF_QEMU_BUILD=1 \ -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON \ -DUMF_BUILD_CUDA_PROVIDER=ON \ -DUMF_FORMAT_CODE_STYLE=OFF \ -DUMF_DEVELOPER_MODE=ON \ - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON \ -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON \ -DUMF_BUILD_EXAMPLES=ON \ -DUMF_USE_COVERAGE=${COVERAGE} \ diff --git a/scripts/qemu/run-tests.sh b/scripts/qemu/run-tests.sh index 9d855590ba..341e2f9ab8 100755 --- a/scripts/qemu/run-tests.sh +++ b/scripts/qemu/run-tests.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -23,8 +23,6 @@ UMF_DIR=$(pwd) # Drop caches, restores free memory on NUMA nodes echo password | sudo sync; echo password | sudo sh -c "/usr/bin/echo 3 > /proc/sys/vm/drop_caches" -# Set ptrace value for IPC test -echo password | sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" numactl -H diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 57050e8276..24beb1e0ae 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2023-2024 Intel Corporation +# Copyright (C) 2023-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -27,18 +27,21 @@ endforeach() # # TODO: Cleanup the compile definitions across all the CMake files set(UMF_COMMON_COMPILE_DEFINITIONS - UMF_VERSION=${UMF_VERSION} + ${UMF_COMMON_COMPILE_DEFINITIONS} UMF_VERSION=${UMF_VERSION} UMF_ALL_CMAKE_VARIABLES="${UMF_ALL_CMAKE_VARIABLES}") -add_subdirectory(utils) - -set(UMF_LIBS $) - set(BA_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/base_alloc/base_alloc.c ${CMAKE_CURRENT_SOURCE_DIR}/base_alloc/base_alloc_linear.c ${CMAKE_CURRENT_SOURCE_DIR}/base_alloc/base_alloc_global.c) +add_subdirectory(utils) +add_subdirectory(coarse) + +set(UMF_LIBS $ $) + +set(CTL_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/ctl/ctl.c) + if(LINUX) set(BA_SOURCES ${BA_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/base_alloc/base_alloc_linux.c) @@ -58,6 +61,7 @@ set(HWLOC_DEPENDENT_SOURCES topology.c) set(UMF_SOURCES ${BA_SOURCES} + ${CTL_SOURCES} libumf.c ipc.c ipc_cache.c @@ -72,22 +76,34 @@ set(UMF_SOURCES memspaces/memspace_highest_bandwidth.c memspaces/memspace_lowest_latency.c memspaces/memspace_numa.c - provider/provider_coarse.c provider/provider_cuda.c provider/provider_devdax_memory.c provider/provider_file_memory.c + provider/provider_fixed_memory.c provider/provider_level_zero.c provider/provider_os_memory.c provider/provider_tracking.c critnib/critnib.c ravl/ravl.c + pool/pool_disjoint.c + pool/pool_jemalloc.c pool/pool_proxy.c pool/pool_scalable.c) +if(UMF_POOL_JEMALLOC_ENABLED) + set(UMF_LIBS ${UMF_LIBS} ${JEMALLOC_LIBRARIES}) + set(UMF_PRIVATE_LIBRARY_DIRS ${UMF_PRIVATE_LIBRARY_DIRS} + ${JEMALLOC_LIBRARY_DIRS}) + set(UMF_PRIVATE_INCLUDE_DIRS ${UMF_PRIVATE_INCLUDE_DIRS} + ${JEMALLOC_INCLUDE_DIRS}) + set(UMF_COMMON_COMPILE_DEFINITIONS ${UMF_COMMON_COMPILE_DEFINITIONS} + "UMF_POOL_JEMALLOC_ENABLED=1") +endif() + if(NOT UMF_DISABLE_HWLOC) set(UMF_SOURCES ${UMF_SOURCES} ${HWLOC_DEPENDENT_SOURCES} memtargets/memtarget_numa.c) - set(UMF_LIBS ${UMF_LIBS} ${LIBHWLOC_LIBRARIES}) + set(UMF_LIBS ${UMF_LIBS} $) set(UMF_PRIVATE_LIBRARY_DIRS ${UMF_PRIVATE_LIBRARY_DIRS} ${LIBHWLOC_LIBRARY_DIRS}) else() @@ -134,14 +150,11 @@ if(UMF_BUILD_SHARED_LIBRARY) set(CMAKE_INSTALL_RPATH "${UMF_INSTALL_RPATH}") endif() - if(NOT UMF_DISABLE_HWLOC) - set(HWLOC_LIB ${UMF_HWLOC_NAME}) - endif() add_umf_library( NAME umf TYPE SHARED SRCS ${UMF_SOURCES} - LIBS ${UMF_LIBS} ${HWLOC_LIB} + LIBS ${UMF_LIBS} LINUX_MAP_FILE ${CMAKE_CURRENT_SOURCE_DIR}/libumf.map WINDOWS_DEF_FILE ${CMAKE_CURRENT_SOURCE_DIR}/libumf.def) set(UMF_COMMON_COMPILE_DEFINITIONS ${UMF_COMMON_COMPILE_DEFINITIONS} @@ -159,13 +172,31 @@ else() LIBS ${UMF_LIBS}) endif() +target_include_directories(umf PRIVATE ${UMF_PRIVATE_INCLUDE_DIRS}) +target_link_directories(umf PRIVATE ${UMF_PRIVATE_LIBRARY_DIRS}) +target_compile_definitions(umf PRIVATE ${UMF_COMMON_COMPILE_DEFINITIONS}) + +add_dependencies(umf coarse) + if(UMF_LINK_HWLOC_STATICALLY) add_dependencies(umf ${UMF_HWLOC_NAME}) + # On Darwin, link with the IOKit and Foundation frameworks, if they are + # available in the system. This is to comply with hwloc which links these, + # if available. There is no option to disable these frameworks on Darwin + # hwloc builds. + if(MACOSX) + find_library(IOKIT_LIBRARY IOKit) + find_library(FOUNDATION_LIBRARY Foundation) + if(IOKIT_LIBRARY OR FOUNDATION_LIBRARY) + target_link_libraries(umf PRIVATE ${IOKIT_LIBRARY} + ${FOUNDATION_LIBRARY}) + endif() + endif() endif() -target_link_directories(umf PRIVATE ${UMF_PRIVATE_LIBRARY_DIRS}) - -target_compile_definitions(umf PRIVATE ${UMF_COMMON_COMPILE_DEFINITIONS}) +if(NOT WINDOWS AND UMF_POOL_JEMALLOC_ENABLED) + add_dependencies(umf jemalloc) +endif() if(UMF_BUILD_LEVEL_ZERO_PROVIDER) if(LINUX) diff --git a/src/base_alloc/base_alloc.c b/src/base_alloc/base_alloc.c index 209ace7fe3..00e58078e6 100644 --- a/src/base_alloc/base_alloc.c +++ b/src/base_alloc/base_alloc.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -230,6 +230,7 @@ void *umf_ba_alloc(umf_ba_pool_t *pool) { // check if the free list is not empty if (pool->metadata.free_list == NULL) { LOG_ERR("base_alloc: Free list should not be empty before new alloc"); + utils_mutex_unlock(&pool->metadata.free_lock); return NULL; } @@ -303,7 +304,13 @@ void umf_ba_destroy(umf_ba_pool_t *pool) { #ifndef NDEBUG ba_debug_checks(pool); if (pool->metadata.n_allocs) { - LOG_ERR("pool->metadata.n_allocs = %zu", pool->metadata.n_allocs); + LOG_ERR("number of base allocator memory leaks: %zu", + pool->metadata.n_allocs); + +#ifdef UMF_DEVELOPER_MODE + assert(pool->metadata.n_allocs == 0 && + "memory leaks in base allocator occurred"); +#endif } #endif /* NDEBUG */ diff --git a/src/base_alloc/base_alloc_global.c b/src/base_alloc/base_alloc_global.c index 2aca5d29cf..ecec3367c9 100644 --- a/src/base_alloc/base_alloc_global.c +++ b/src/base_alloc/base_alloc_global.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -23,6 +23,7 @@ // global base allocator used by all providers and pools static UTIL_ONCE_FLAG ba_is_initialized = UTIL_ONCE_FLAG_INIT; +static bool ba_is_destroyed = false; #define ALLOC_METADATA_SIZE (sizeof(size_t)) @@ -40,6 +41,8 @@ struct base_alloc_t { static struct base_alloc_t BASE_ALLOC = {.ac_sizes = ALLOCATION_CLASSES}; void umf_ba_destroy_global(void) { + ba_is_destroyed = true; + for (int i = 0; i < NUM_ALLOCATION_CLASSES; i++) { if (BASE_ALLOC.ac[i]) { umf_ba_destroy(BASE_ALLOC.ac[i]); @@ -48,10 +51,12 @@ void umf_ba_destroy_global(void) { } // portable version of "ba_is_initialized = UTIL_ONCE_FLAG_INIT;" - static UTIL_ONCE_FLAG is_initialized = UTIL_ONCE_FLAG_INIT; - memcpy(&ba_is_initialized, &is_initialized, sizeof(ba_is_initialized)); + static UTIL_ONCE_FLAG set_once = UTIL_ONCE_FLAG_INIT; + memcpy(&ba_is_initialized, &set_once, sizeof(ba_is_initialized)); } +bool umf_ba_global_is_destroyed(void) { return ba_is_destroyed; } + static void umf_ba_create_global(void) { for (int i = 0; i < NUM_ALLOCATION_CLASSES; i++) { // allocation classes need to be powers of 2 @@ -66,7 +71,7 @@ static void umf_ba_create_global(void) { } size_t smallestSize = BASE_ALLOC.ac_sizes[0]; - BASE_ALLOC.smallest_ac_size_log2 = log2Utils(smallestSize); + BASE_ALLOC.smallest_ac_size_log2 = utils_msb64(smallestSize); LOG_DEBUG("UMF base allocator created"); } @@ -78,8 +83,8 @@ static int size_to_idx(size_t size) { } int isPowerOf2 = (0 == (size & (size - 1))); - int index = - (int)(log2Utils(size) + !isPowerOf2 - BASE_ALLOC.smallest_ac_size_log2); + int index = (int)(utils_msb64(size) + !isPowerOf2 - + BASE_ALLOC.smallest_ac_size_log2); assert(index >= 0); return index; @@ -202,6 +207,12 @@ void umf_ba_global_free(void *ptr) { return; } + if (ba_is_destroyed) { + LOG_WARN( + "base_alloc: calling free() after the base allocator is destroyed"); + return; + } + size_t total_size; ptr = get_original_alloc(ptr, &total_size, NULL); diff --git a/src/base_alloc/base_alloc_global.h b/src/base_alloc/base_alloc_global.h index ad7f12ce54..bd55d352ff 100644 --- a/src/base_alloc/base_alloc_global.h +++ b/src/base_alloc/base_alloc_global.h @@ -8,6 +8,8 @@ #ifndef UMF_BASE_ALLOC_GLOBAL_H #define UMF_BASE_ALLOC_GLOBAL_H 1 +#include + #include "base_alloc.h" #ifdef __cplusplus @@ -17,6 +19,7 @@ extern "C" { void *umf_ba_global_alloc(size_t size); void umf_ba_global_free(void *ptr); void umf_ba_destroy_global(void); +bool umf_ba_global_is_destroyed(void); size_t umf_ba_global_malloc_usable_size(void *ptr); void *umf_ba_global_aligned_alloc(size_t size, size_t alignment); diff --git a/src/base_alloc/base_alloc_linux.c b/src/base_alloc/base_alloc_linux.c index 260eec5aac..9b1dc63fe0 100644 --- a/src/base_alloc/base_alloc_linux.c +++ b/src/base_alloc/base_alloc_linux.c @@ -1,26 +1,22 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception */ #include -#include #include -#include #include -#include "base_alloc.h" -#include "base_alloc_global.h" #include "utils_concurrency.h" static UTIL_ONCE_FLAG Page_size_is_initialized = UTIL_ONCE_FLAG_INIT; static size_t Page_size; void *ba_os_alloc(size_t size) { - void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + void *ptr = utils_mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); // this should be unnecessary but pairs of mmap/munmap do not reset // asan's user-poisoning flags, leading to invalid error reports // Bug 81619: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81619 @@ -29,7 +25,7 @@ void *ba_os_alloc(size_t size) { } void ba_os_free(void *ptr, size_t size) { - int ret = munmap(ptr, size); + int ret = utils_munmap(ptr, size); assert(ret == 0); (void)ret; // unused } diff --git a/src/coarse/CMakeLists.txt b/src/coarse/CMakeLists.txt new file mode 100644 index 0000000000..c211f9a7bf --- /dev/null +++ b/src/coarse/CMakeLists.txt @@ -0,0 +1,26 @@ +# Copyright (C) 2024-2025 Intel Corporation +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +include(${UMF_CMAKE_SOURCE_DIR}/cmake/helpers.cmake) + +set(COARSE_SOURCES coarse.c ../ravl/ravl.c) + +if(UMF_BUILD_SHARED_LIBRARY AND (NOT WINDOWS)) + set(COARSE_EXTRA_SRCS ${BA_SOURCES}) + set(COARSE_EXTRA_LIBS $) +endif() + +add_umf_library( + NAME coarse + TYPE STATIC + SRCS ${COARSE_SOURCES} ${COARSE_EXTRA_SRCS} + LIBS ${COARSE_EXTRA_LIBS}) + +target_include_directories( + coarse + PRIVATE $ + $ + $) + +add_library(${PROJECT_NAME}::coarse ALIAS coarse) diff --git a/src/coarse/coarse.c b/src/coarse/coarse.c new file mode 100644 index 0000000000..19798466e1 --- /dev/null +++ b/src/coarse/coarse.c @@ -0,0 +1,1370 @@ +/* + * Copyright (C) 2024-2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#include +#include +#include +#include +#include +#include + +#include + +#include "base_alloc_global.h" +#include "coarse.h" +#include "libumf.h" +#include "ravl.h" +#include "utils_common.h" +#include "utils_concurrency.h" +#include "utils_log.h" + +#ifdef _WIN32 +UTIL_ONCE_FLAG Log_initialized = UTIL_ONCE_FLAG_INIT; +#else +void __attribute__((constructor)) coarse_init(void) { utils_log_init(); } +void __attribute__((destructor)) coarse_destroy(void) {} +#endif /* _WIN32 */ + +typedef struct coarse_t { + // handle of the memory provider + void *provider; + + // coarse callbacks + coarse_callbacks_t cb; + + // memory allocation strategy + coarse_strategy_t allocation_strategy; + + // page size of the memory provider + size_t page_size; + + // all_blocks - tree of all blocks - sorted by an address of data + struct ravl *all_blocks; + + // free_blocks - tree of free blocks - sorted by a size of data, + // each node contains a pointer (ravl_free_blocks_head_t) + // to the head of the list of free blocks of the same size + struct ravl *free_blocks; + + struct utils_mutex_t lock; + + // statistics + size_t used_size; + size_t alloc_size; +} coarse_t; + +typedef struct ravl_node ravl_node_t; + +typedef enum check_free_blocks_t { + CHECK_ONLY_THE_FIRST_BLOCK = 0, + CHECK_ALL_BLOCKS_OF_SIZE, +} check_free_blocks_t; + +typedef struct block_t { + size_t size; + unsigned char *data; + bool used; + + // Node in the list of free blocks of the same size pointing to this block. + // The list is located in the (coarse->free_blocks) RAVL tree. + struct ravl_free_blocks_elem_t *free_list_ptr; +} block_t; + +// A general node in a RAVL tree. +// 1) coarse->all_blocks RAVL tree (tree of all blocks - sorted by an address of data): +// key - pointer (block_t->data) to the beginning of the block data +// value - pointer (block_t) to the block of the allocation +// 2) coarse->free_blocks RAVL tree (tree of free blocks - sorted by a size of data): +// key - size of the allocation (block_t->size) +// value - pointer (ravl_free_blocks_head_t) to the head of the list of free blocks of the same size +typedef struct ravl_data_t { + uintptr_t key; + void *value; +} ravl_data_t; + +// The head of the list of free blocks of the same size. +typedef struct ravl_free_blocks_head_t { + struct ravl_free_blocks_elem_t *head; +} ravl_free_blocks_head_t; + +// The node of the list of free blocks of the same size +typedef struct ravl_free_blocks_elem_t { + struct block_t *block; + struct ravl_free_blocks_elem_t *next; + struct ravl_free_blocks_elem_t *prev; +} ravl_free_blocks_elem_t; + +// The compare function of a RAVL tree +static int coarse_ravl_comp(const void *lhs, const void *rhs) { + const ravl_data_t *lhs_ravl = (const ravl_data_t *)lhs; + const ravl_data_t *rhs_ravl = (const ravl_data_t *)rhs; + + if (lhs_ravl->key < rhs_ravl->key) { + return -1; + } + + if (lhs_ravl->key > rhs_ravl->key) { + return 1; + } + + // lhs_ravl->key == rhs_ravl->key + return 0; +} + +static inline block_t *get_node_block(ravl_node_t *node) { + ravl_data_t *node_data = ravl_data(node); + assert(node_data); + assert(node_data->value); + return node_data->value; +} + +static inline ravl_node_t *get_node_prev(ravl_node_t *node) { + return ravl_node_predecessor(node); +} + +static inline ravl_node_t *get_node_next(ravl_node_t *node) { + return ravl_node_successor(node); +} + +#ifndef NDEBUG +static block_t *get_block_prev(ravl_node_t *node) { + ravl_node_t *ravl_prev = ravl_node_predecessor(node); + if (!ravl_prev) { + return NULL; + } + + return get_node_block(ravl_prev); +} + +static block_t *get_block_next(ravl_node_t *node) { + ravl_node_t *ravl_next = ravl_node_successor(node); + if (!ravl_next) { + return NULL; + } + + return get_node_block(ravl_next); +} +#endif /* NDEBUG */ + +// The functions "coarse_ravl_*" handles the coarse->all_blocks list of blocks +// sorted by a pointer (block_t->data) to the beginning of the block data. +// +// coarse_ravl_add_new - allocate and add a new block to the tree +// and link this block to the next and the previous one. +static block_t *coarse_ravl_add_new(struct ravl *rtree, unsigned char *data, + size_t size, ravl_node_t **node) { + assert(rtree); + assert(data); + assert(size); + + // TODO add valgrind annotations + block_t *block = umf_ba_global_alloc(sizeof(*block)); + if (block == NULL) { + return NULL; + } + + block->data = data; + block->size = size; + block->free_list_ptr = NULL; + + ravl_data_t rdata = {(uintptr_t)block->data, block}; + assert(NULL == ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL)); + int ret = ravl_emplace_copy(rtree, &rdata); + if (ret) { + umf_ba_global_free(block); + return NULL; + } + + ravl_node_t *new_node = ravl_find(rtree, &rdata, RAVL_PREDICATE_EQUAL); + assert(NULL != new_node); + + if (node) { + *node = new_node; + } + + return block; +} + +// coarse_ravl_find_node - find the node in the tree +static ravl_node_t *coarse_ravl_find_node(struct ravl *rtree, void *ptr) { + ravl_data_t data = {(uintptr_t)ptr, NULL}; + return ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL); +} + +// coarse_ravl_rm - remove the block from the tree +static block_t *coarse_ravl_rm(struct ravl *rtree, void *ptr) { + ravl_data_t data = {(uintptr_t)ptr, NULL}; + ravl_node_t *node; + node = ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL); + if (node) { + ravl_data_t *node_data = ravl_data(node); + assert(node_data); + block_t *block = node_data->value; + assert(block); + ravl_remove(rtree, node); + assert(NULL == ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL)); + return block; + } + return NULL; +} + +// The functions "node_list_*" handle lists of free blocks of the same size. +// The heads (ravl_free_blocks_head_t) of those lists are stored in nodes of +// the coarse->free_blocks RAVL tree. +// +// node_list_add - add a free block to the list of free blocks of the same size +static ravl_free_blocks_elem_t * +node_list_add(ravl_free_blocks_head_t *head_node, struct block_t *block) { + assert(head_node); + assert(block); + + ravl_free_blocks_elem_t *node = umf_ba_global_alloc(sizeof(*node)); + if (node == NULL) { + return NULL; + } + + if (head_node->head) { + head_node->head->prev = node; + } + + node->block = block; + node->next = head_node->head; + node->prev = NULL; + head_node->head = node; + + return node; +} + +// node_list_rm - remove the given free block from the list of free blocks of the same size +static block_t *node_list_rm(ravl_free_blocks_head_t *head_node, + ravl_free_blocks_elem_t *node) { + assert(head_node); + assert(node); + assert(head_node->head); + + if (node == head_node->head) { + assert(node->prev == NULL); + head_node->head = node->next; + } + + ravl_free_blocks_elem_t *node_next = node->next; + ravl_free_blocks_elem_t *node_prev = node->prev; + if (node_next) { + node_next->prev = node_prev; + } + + if (node_prev) { + node_prev->next = node_next; + } + + struct block_t *block = node->block; + block->free_list_ptr = NULL; + umf_ba_global_free(node); + + return block; +} + +// node_list_rm_first - remove the first free block from the list of free blocks of the same size only if it can be properly aligned +static block_t *node_list_rm_first(ravl_free_blocks_head_t *head_node, + size_t alignment) { + assert(head_node); + assert(head_node->head); + + ravl_free_blocks_elem_t *node = head_node->head; + assert(node->prev == NULL); + struct block_t *block = node->block; + + if (IS_NOT_ALIGNED(((uintptr_t)block->data), alignment)) { + return NULL; + } + + if (node->next) { + node->next->prev = NULL; + } + + head_node->head = node->next; + block->free_list_ptr = NULL; + umf_ba_global_free(node); + + return block; +} + +// node_list_rm_with_alignment - remove the first free block with the correct alignment from the list of free blocks of the same size +static block_t *node_list_rm_with_alignment(ravl_free_blocks_head_t *head_node, + size_t alignment) { + assert(head_node); + assert(head_node->head); + + assert(((ravl_free_blocks_elem_t *)head_node->head)->prev == NULL); + + ravl_free_blocks_elem_t *node; + for (node = head_node->head; node != NULL; node = node->next) { + if (IS_ALIGNED(((uintptr_t)node->block->data), alignment)) { + return node_list_rm(head_node, node); + } + } + + return NULL; +} + +// The functions "free_blocks_*" handle the coarse->free_blocks RAVL tree +// sorted by a size of the allocation (block_t->size). +// This is a tree of heads (ravl_free_blocks_head_t) of lists of free blocks of the same size. +// +// free_blocks_add - add a free block to the list of free blocks of the same size +static int free_blocks_add(struct ravl *free_blocks, block_t *block) { + ravl_free_blocks_head_t *head_node = NULL; + int rv; + + ravl_data_t head_node_data = {(uintptr_t)block->size, NULL}; + ravl_node_t *node; + node = ravl_find(free_blocks, &head_node_data, RAVL_PREDICATE_EQUAL); + if (node) { + ravl_data_t *node_data = ravl_data(node); + assert(node_data); + head_node = node_data->value; + assert(head_node); + } else { // no head_node + head_node = umf_ba_global_alloc(sizeof(*head_node)); + if (!head_node) { + return -1; + } + + head_node->head = NULL; + + ravl_data_t data = {(uintptr_t)block->size, head_node}; + rv = ravl_emplace_copy(free_blocks, &data); + if (rv) { + umf_ba_global_free(head_node); + return -1; + } + } + + block->free_list_ptr = node_list_add(head_node, block); + if (!block->free_list_ptr) { + return -1; // out of memory + } + + assert(block->free_list_ptr->block->size == block->size); + + return 0; +} + +// free_blocks_rm_ge - remove the first free block of a size greater or equal to the given size only if it can be properly aligned +// If it was the last block, the head node is freed and removed from the tree. +// It is used during memory allocation (looking for a free block). +static block_t *free_blocks_rm_ge(struct ravl *free_blocks, size_t size, + size_t alignment, + check_free_blocks_t check_blocks) { + ravl_data_t data = {(uintptr_t)size, NULL}; + ravl_node_t *node; + node = ravl_find(free_blocks, &data, RAVL_PREDICATE_GREATER_EQUAL); + if (!node) { + return NULL; + } + + ravl_data_t *node_data = ravl_data(node); + assert(node_data); + assert(node_data->key >= size); + + ravl_free_blocks_head_t *head_node = node_data->value; + assert(head_node); + + block_t *block = NULL; + switch (check_blocks) { + case CHECK_ONLY_THE_FIRST_BLOCK: + block = node_list_rm_first(head_node, alignment); + break; + case CHECK_ALL_BLOCKS_OF_SIZE: + block = node_list_rm_with_alignment(head_node, alignment); + break; + } + + if (head_node->head == NULL) { + umf_ba_global_free(head_node); + ravl_remove(free_blocks, node); + } + + return block; +} + +// free_blocks_rm_node - remove the free block pointed by the given node. +// If it was the last block, the head node is freed and removed from the tree. +// It is used during merging free blocks and destroying the coarse->free_blocks tree. +static block_t *free_blocks_rm_node(struct ravl *free_blocks, + ravl_free_blocks_elem_t *node) { + assert(free_blocks); + assert(node); + size_t size = node->block->size; + ravl_data_t data = {(uintptr_t)size, NULL}; + ravl_node_t *ravl_node; + ravl_node = ravl_find(free_blocks, &data, RAVL_PREDICATE_EQUAL); + assert(ravl_node); + + ravl_data_t *node_data = ravl_data(ravl_node); + assert(node_data); + assert(node_data->key == size); + + ravl_free_blocks_head_t *head_node = node_data->value; + assert(head_node); + + block_t *block = node_list_rm(head_node, node); + + if (head_node->head == NULL) { + umf_ba_global_free(head_node); + ravl_remove(free_blocks, ravl_node); + } + + return block; +} + +// user_block_merge - merge two blocks from one of two lists of user blocks: all_blocks or free_blocks +static umf_result_t user_block_merge(coarse_t *coarse, ravl_node_t *node1, + ravl_node_t *node2, bool used, + ravl_node_t **merged_node) { + assert(node1); + assert(node2); + assert(node1 == get_node_prev(node2)); + assert(node2 == get_node_next(node1)); + assert(merged_node); + + *merged_node = NULL; + + struct ravl *all_blocks = coarse->all_blocks; + struct ravl *free_blocks = coarse->free_blocks; + + block_t *block1 = get_node_block(node1); + block_t *block2 = get_node_block(node2); + assert(block1->data < block2->data); + + bool same_used = ((block1->used == used) && (block2->used == used)); + bool contignous_data = (block1->data + block1->size == block2->data); + + // check if blocks can be merged + if (!same_used || !contignous_data) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + // check if blocks can be merged + umf_result_t umf_result = + coarse->cb.merge(coarse->provider, block1->data, block2->data, + block1->size + block2->size); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("coarse_merge_cb(lowPtr=%p, highPtr=%p, totalSize=%zu) failed", + (void *)block1->data, (void *)block2->data, + block1->size + block2->size); + return umf_result; + } + + if (block1->free_list_ptr) { + free_blocks_rm_node(free_blocks, block1->free_list_ptr); + block1->free_list_ptr = NULL; + } + + if (block2->free_list_ptr) { + free_blocks_rm_node(free_blocks, block2->free_list_ptr); + block2->free_list_ptr = NULL; + } + + // update the size + block1->size += block2->size; + + block_t *block_rm = coarse_ravl_rm(all_blocks, block2->data); + assert(block_rm == block2); + (void)block_rm; // WA for unused variable error + umf_ba_global_free(block2); + + *merged_node = node1; + + return UMF_RESULT_SUCCESS; +} + +// free_block_merge_with_prev - merge the given free block +// with the previous one if both are unused and have continuous data. +// Remove the merged block from the tree of free blocks. +static ravl_node_t *free_block_merge_with_prev(coarse_t *coarse, + ravl_node_t *node) { + ravl_node_t *node_prev = get_node_prev(node); + if (!node_prev) { + return node; + } + + ravl_node_t *merged_node = NULL; + umf_result_t umf_result = + user_block_merge(coarse, node_prev, node, false, &merged_node); + if (umf_result != UMF_RESULT_SUCCESS) { + return node; + } + + assert(merged_node != NULL); + + return merged_node; +} + +// free_block_merge_with_next - merge the given free block +// with the next one if both are unused and have continuous data. +// Remove the merged block from the tree of free blocks. +static ravl_node_t *free_block_merge_with_next(coarse_t *coarse, + ravl_node_t *node) { + ravl_node_t *node_next = get_node_next(node); + if (!node_next) { + return node; + } + + ravl_node_t *merged_node = NULL; + umf_result_t umf_result = + user_block_merge(coarse, node, node_next, false, &merged_node); + if (umf_result != UMF_RESULT_SUCCESS) { + return node; + } + + assert(merged_node != NULL); + + return merged_node; +} + +#ifndef NDEBUG // begin of DEBUG code + +typedef struct debug_cb_args_t { + coarse_t *provider; + size_t sum_used; + size_t sum_blocks_size; + size_t num_all_blocks; + size_t num_free_blocks; +} debug_cb_args_t; + +static void debug_verify_all_blocks_cb(void *data, void *arg) { + assert(data); + assert(arg); + + ravl_data_t *node_data = data; + block_t *block = node_data->value; + assert(block); + + debug_cb_args_t *cb_args = (debug_cb_args_t *)arg; + coarse_t *provider = cb_args->provider; + + ravl_node_t *node = + ravl_find(provider->all_blocks, data, RAVL_PREDICATE_EQUAL); + assert(node); + + block_t *block_next = get_block_next(node); + block_t *block_prev = get_block_prev(node); + + cb_args->num_all_blocks++; + if (!block->used) { + cb_args->num_free_blocks++; + } + + assert(block->data); + assert(block->size > 0); + + // data addresses in the list are in ascending order + if (block_prev) { + assert(block_prev->data < block->data); + } + + if (block_next) { + assert(block->data < block_next->data); + } + + // two block's data should not overlap + if (block_next) { + assert((block->data + block->size) <= block_next->data); + } + + cb_args->sum_blocks_size += block->size; + if (block->used) { + cb_args->sum_used += block->size; + } +} + +static umf_result_t coarse_get_stats_no_lock(coarse_t *coarse, + coarse_stats_t *stats); + +static bool debug_check(coarse_t *provider) { + assert(provider); + + coarse_stats_t stats = {0}; + coarse_get_stats_no_lock(provider, &stats); + + debug_cb_args_t cb_args = {0}; + cb_args.provider = provider; + + // verify the all_blocks list + ravl_foreach(provider->all_blocks, debug_verify_all_blocks_cb, &cb_args); + + assert(cb_args.num_all_blocks == stats.num_all_blocks); + assert(cb_args.num_free_blocks == stats.num_free_blocks); + assert(cb_args.sum_used == provider->used_size); + assert(cb_args.sum_blocks_size == provider->alloc_size); + assert(provider->alloc_size >= provider->used_size); + + return true; +} +#endif /* NDEBUG */ // end of DEBUG code + +static umf_result_t coarse_add_used_block(coarse_t *coarse, void *addr, + size_t size) { + block_t *new_block = + coarse_ravl_add_new(coarse->all_blocks, addr, size, NULL); + if (new_block == NULL) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + new_block->used = true; + coarse->alloc_size += size; + coarse->used_size += size; + + return UMF_RESULT_SUCCESS; +} + +static void coarse_ravl_cb_rm_all_blocks_node(void *data, void *arg) { + assert(data); + assert(arg); + + coarse_t *coarse = (struct coarse_t *)arg; + ravl_data_t *node_data = data; + block_t *block = node_data->value; + assert(block); + + if (block->used) { +#ifndef NDEBUG + LOG_WARN("not freed block (addr: %p, size: %zu)", (void *)block->data, + block->size); +#endif + assert(coarse->used_size >= block->size); + coarse->used_size -= block->size; + } + + if (block->free_list_ptr) { + free_blocks_rm_node(coarse->free_blocks, block->free_list_ptr); + } + + if (coarse->cb.free) { + coarse->cb.free(coarse->provider, block->data, block->size); + } + + assert(coarse->alloc_size >= block->size); + coarse->alloc_size -= block->size; + + umf_ba_global_free(block); +} + +static umf_result_t can_provider_split(coarse_t *coarse, void *ptr, + size_t totalSize, size_t firstSize) { + // check if the block can be split + umf_result_t umf_result = + coarse->cb.split(coarse->provider, ptr, totalSize, firstSize); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR( + "coarse_split_cb->(ptr=%p, totalSize = %zu = (%zu + %zu)) failed", + ptr, totalSize, firstSize, totalSize - firstSize); + } + + return umf_result; +} + +static umf_result_t create_aligned_block(coarse_t *coarse, size_t orig_size, + size_t alignment, block_t **current) { + (void)orig_size; // unused in the Release version + int rv; + + block_t *curr = *current; + + // In case of non-zero alignment create an aligned block what would be further used. + uintptr_t orig_data = (uintptr_t)curr->data; + uintptr_t aligned_data = ALIGN_UP(orig_data, alignment); + size_t padding = aligned_data - orig_data; + if (alignment > 0 && padding > 0) { + // check if block can be split by the upstream provider + umf_result_t umf_result = + can_provider_split(coarse, curr->data, curr->size, padding); + if (umf_result != UMF_RESULT_SUCCESS) { + return umf_result; + } + + block_t *aligned_block = + coarse_ravl_add_new(coarse->all_blocks, curr->data + padding, + curr->size - padding, NULL); + if (aligned_block == NULL) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + curr->used = false; + curr->size = padding; + + rv = free_blocks_add(coarse->free_blocks, curr); + if (rv) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + // use aligned block + *current = aligned_block; + assert((*current)->size >= orig_size); + } + + return UMF_RESULT_SUCCESS; +} + +// Split the current block and put the new block after the one that we use. +static umf_result_t split_current_block(coarse_t *coarse, block_t *curr, + size_t size) { + + // check if block can be split by the upstream provider + umf_result_t umf_result = + can_provider_split(coarse, curr->data, curr->size, size); + if (umf_result != UMF_RESULT_SUCCESS) { + return umf_result; + } + + ravl_node_t *new_node = NULL; + + block_t *new_block = coarse_ravl_add_new( + coarse->all_blocks, curr->data + size, curr->size - size, &new_node); + if (new_block == NULL) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + new_block->used = false; + + int rv = free_blocks_add(coarse->free_blocks, get_node_block(new_node)); + if (rv) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return UMF_RESULT_SUCCESS; +} + +static block_t *find_free_block(struct ravl *free_blocks, size_t size, + size_t alignment, + coarse_strategy_t allocation_strategy) { + block_t *block; + size_t new_size = size + alignment; + + switch (allocation_strategy) { + case UMF_COARSE_MEMORY_STRATEGY_FASTEST: + // Always allocate a free block of the (size + alignment) size + // and later cut out the properly aligned part leaving two remaining parts. + if (new_size < size) { + LOG_ERR("arithmetic overflow (size + alignment)"); + return NULL; + } + + return free_blocks_rm_ge(free_blocks, new_size, 0, + CHECK_ONLY_THE_FIRST_BLOCK); + + case UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE: + // First check if the first free block of the 'size' size has the correct alignment. + block = free_blocks_rm_ge(free_blocks, size, alignment, + CHECK_ONLY_THE_FIRST_BLOCK); + if (block) { + return block; + } + + if (new_size < size) { + LOG_ERR("arithmetic overflow (size + alignment)"); + return NULL; + } + + // If not, use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. + return free_blocks_rm_ge(free_blocks, new_size, 0, + CHECK_ONLY_THE_FIRST_BLOCK); + + case UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE: + // First look through all free blocks of the 'size' size + // and choose the first one with the correct alignment. + block = free_blocks_rm_ge(free_blocks, size, alignment, + CHECK_ALL_BLOCKS_OF_SIZE); + if (block) { + return block; + } + + if (new_size < size) { + LOG_ERR("arithmetic overflow (size + alignment)"); + return NULL; + } + + // If none of them had the correct alignment, + // use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. + return free_blocks_rm_ge(free_blocks, new_size, 0, + CHECK_ONLY_THE_FIRST_BLOCK); + } + + return NULL; +} + +static int free_blocks_re_add(coarse_t *coarse, block_t *block) { + assert(coarse); + + ravl_node_t *node = coarse_ravl_find_node(coarse->all_blocks, block->data); + assert(node); + + // merge with prev and/or next block if they are unused and have continuous data + node = free_block_merge_with_prev(coarse, node); + node = free_block_merge_with_next(coarse, node); + + return free_blocks_add(coarse->free_blocks, get_node_block(node)); +} + +static void ravl_cb_count(void *data, void *arg) { + assert(arg); + (void)data; // unused + + size_t *num_all_blocks = arg; + (*num_all_blocks)++; +} + +static void ravl_cb_count_free(void *data, void *arg) { + assert(data); + assert(arg); + + ravl_data_t *node_data = data; + assert(node_data); + ravl_free_blocks_head_t *head_node = node_data->value; + assert(head_node); + struct ravl_free_blocks_elem_t *free_block = head_node->head; + assert(free_block); + + size_t *num_all_blocks = arg; + while (free_block) { + (*num_all_blocks)++; + free_block = free_block->next; + } +} + +static umf_result_t coarse_get_stats_no_lock(coarse_t *coarse, + coarse_stats_t *stats) { + assert(coarse); + + size_t num_all_blocks = 0; + ravl_foreach(coarse->all_blocks, ravl_cb_count, &num_all_blocks); + + size_t num_free_blocks = 0; + ravl_foreach(coarse->free_blocks, ravl_cb_count_free, &num_free_blocks); + + stats->alloc_size = coarse->alloc_size; + stats->used_size = coarse->used_size; + stats->num_all_blocks = num_all_blocks; + stats->num_free_blocks = num_free_blocks; + + return UMF_RESULT_SUCCESS; +} + +// PUBLIC API + +umf_result_t coarse_new(coarse_params_t *coarse_params, coarse_t **pcoarse) { +#ifdef _WIN32 + utils_init_once(&Log_initialized, utils_log_init); +#endif /* _WIN32 */ + + if (coarse_params == NULL || pcoarse == NULL) { + LOG_ERR("coarse parameters or handle is missing"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (!coarse_params->provider) { + LOG_ERR("memory provider is not set"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (!coarse_params->page_size) { + LOG_ERR("page size of the memory provider is not set"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (!coarse_params->cb.split) { + LOG_ERR("coarse split callback is not set"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (!coarse_params->cb.merge) { + LOG_ERR("coarse merge callback is not set"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + // alloc() and free() callbacks are optional + + coarse_t *coarse = umf_ba_global_alloc(sizeof(*coarse)); + if (!coarse) { + LOG_ERR("out of the host memory"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + memset(coarse, 0, sizeof(*coarse)); + + coarse->provider = coarse_params->provider; + coarse->page_size = coarse_params->page_size; + coarse->cb = coarse_params->cb; + coarse->allocation_strategy = coarse_params->allocation_strategy; + + umf_result_t umf_result = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + + coarse->free_blocks = ravl_new_sized(coarse_ravl_comp, sizeof(ravl_data_t)); + if (coarse->free_blocks == NULL) { + LOG_ERR("out of the host memory"); + goto err_free_coarse; + } + + coarse->all_blocks = ravl_new_sized(coarse_ravl_comp, sizeof(ravl_data_t)); + if (coarse->all_blocks == NULL) { + LOG_ERR("out of the host memory"); + goto err_delete_ravl_free_blocks; + } + + coarse->alloc_size = 0; + coarse->used_size = 0; + + umf_result = UMF_RESULT_ERROR_UNKNOWN; + + if (utils_mutex_init(&coarse->lock) == NULL) { + LOG_ERR("lock initialization failed"); + goto err_delete_ravl_all_blocks; + } + + assert(coarse->used_size == 0); + assert(coarse->alloc_size == 0); + assert(debug_check(coarse)); + + *pcoarse = coarse; + + return UMF_RESULT_SUCCESS; + +err_delete_ravl_all_blocks: + ravl_delete(coarse->all_blocks); +err_delete_ravl_free_blocks: + ravl_delete(coarse->free_blocks); +err_free_coarse: + umf_ba_global_free(coarse); + return umf_result; +} + +void coarse_delete(coarse_t *coarse) { + if (coarse == NULL) { + LOG_ERR("coarse handle is missing"); + return; + } + + utils_mutex_destroy_not_free(&coarse->lock); + + ravl_foreach(coarse->all_blocks, coarse_ravl_cb_rm_all_blocks_node, coarse); + assert(coarse->used_size == 0); + assert(coarse->alloc_size == 0); + + ravl_delete(coarse->all_blocks); + ravl_delete(coarse->free_blocks); + + umf_ba_global_free(coarse); +} + +umf_result_t coarse_add_memory_from_provider(coarse_t *coarse, size_t size) { + umf_result_t umf_result; + void *ptr = NULL; + + if (coarse == NULL || size == 0) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (!coarse->cb.alloc) { + LOG_ERR("error: alloc callback is not set"); + return UMF_RESULT_ERROR_NOT_SUPPORTED; + } + + umf_result = coarse_alloc(coarse, size, coarse->page_size, &ptr); + if (umf_result != UMF_RESULT_SUCCESS) { + return umf_result; + } + + assert(ptr); + + return coarse_free(coarse, ptr, size); +} + +umf_result_t coarse_add_memory_fixed(coarse_t *coarse, void *addr, + size_t size) { + umf_result_t umf_result; + + if (coarse == NULL || addr == NULL || size == 0) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (coarse->cb.alloc || coarse->cb.free) { + LOG_ERR("error: alloc or free callback is set"); + return UMF_RESULT_ERROR_NOT_SUPPORTED; + } + + if (utils_mutex_lock(&coarse->lock) != 0) { + LOG_ERR("locking the lock failed"); + return UMF_RESULT_ERROR_UNKNOWN; + } + + assert(debug_check(coarse)); + + umf_result = coarse_add_used_block(coarse, addr, size); + + assert(debug_check(coarse)); + utils_mutex_unlock(&coarse->lock); + + if (umf_result != UMF_RESULT_SUCCESS) { + return umf_result; + } + + umf_result = coarse_free(coarse, addr, size); + if (umf_result != UMF_RESULT_SUCCESS) { + return umf_result; + } + + LOG_DEBUG("coarse_ALLOC (add_memory_block) %zu used %zu alloc %zu", size, + coarse->used_size, coarse->alloc_size); + + return UMF_RESULT_SUCCESS; +} + +umf_result_t coarse_alloc(coarse_t *coarse, size_t size, size_t alignment, + void **resultPtr) { + umf_result_t umf_result = UMF_RESULT_ERROR_UNKNOWN; + + if (coarse == NULL || resultPtr == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + // alignment must be a power of two and a multiple or a divider of the page size + if (alignment == 0) { + alignment = coarse->page_size; + } else if ((alignment & (alignment - 1)) || + ((alignment % coarse->page_size) && + (coarse->page_size % alignment))) { + LOG_ERR("wrong alignment: %zu (not a power of 2 or a multiple or a " + "divider of the page size (%zu))", + alignment, coarse->page_size); + return UMF_RESULT_ERROR_INVALID_ALIGNMENT; + } else if (IS_NOT_ALIGNED(alignment, coarse->page_size)) { + alignment = ALIGN_UP_SAFE(alignment, coarse->page_size); + } + + if (utils_mutex_lock(&coarse->lock) != 0) { + LOG_ERR("locking the lock failed"); + return UMF_RESULT_ERROR_UNKNOWN; + } + + assert(debug_check(coarse)); + + // Find a block with greater or equal size using the given memory allocation strategy + block_t *curr = find_free_block(coarse->free_blocks, size, alignment, + coarse->allocation_strategy); + + // If the block that we want to reuse has a greater size, split it. + // Try to merge the split part with the successor if it is not used. + enum { ACTION_NONE = 0, ACTION_USE, ACTION_SPLIT } action = ACTION_NONE; + + if (curr && curr->size > size) { + action = ACTION_SPLIT; + } else if (curr && curr->size == size) { + action = ACTION_USE; + } + + if (action) { // ACTION_SPLIT or ACTION_USE + assert(curr->used == false); + + // In case of non-zero alignment create an aligned block what would be further used. + if (alignment > 0) { + umf_result = create_aligned_block(coarse, size, alignment, &curr); + if (umf_result != UMF_RESULT_SUCCESS) { + (void)free_blocks_re_add(coarse, curr); + goto err_unlock; + } + } + + if (action == ACTION_SPLIT) { + // Split the current block and put the new block after the one that we use. + umf_result = split_current_block(coarse, curr, size); + if (umf_result != UMF_RESULT_SUCCESS) { + (void)free_blocks_re_add(coarse, curr); + goto err_unlock; + } + + curr->size = size; + + LOG_DEBUG("coarse_ALLOC (split_block) %zu used %zu alloc %zu", size, + coarse->used_size, coarse->alloc_size); + + } else { // action == ACTION_USE + LOG_DEBUG("coarse_ALLOC (same_block) %zu used %zu alloc %zu", size, + coarse->used_size, coarse->alloc_size); + } + + curr->used = true; + *resultPtr = curr->data; + coarse->used_size += size; + + assert(debug_check(coarse)); + utils_mutex_unlock(&coarse->lock); + + return UMF_RESULT_SUCCESS; + } + + // no suitable block found - try to get more memory from the upstream provider + umf_result = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + + *resultPtr = NULL; + + if (!coarse->cb.alloc) { + LOG_ERR("out of memory"); + goto err_unlock; + } + + umf_result = coarse->cb.alloc(coarse->provider, size, alignment, resultPtr); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("coarse_alloc_cb() failed: out of memory"); + goto err_unlock; + } + + ASSERT_IS_ALIGNED(((uintptr_t)(*resultPtr)), alignment); + + umf_result = coarse_add_used_block(coarse, *resultPtr, size); + if (umf_result != UMF_RESULT_SUCCESS) { + if (coarse->cb.free) { + coarse->cb.free(coarse->provider, *resultPtr, size); + } + goto err_unlock; + } + + LOG_DEBUG("coarse_ALLOC (memory_provider) %zu used %zu alloc %zu", size, + coarse->used_size, coarse->alloc_size); + + umf_result = UMF_RESULT_SUCCESS; + +err_unlock: + assert(debug_check(coarse)); + utils_mutex_unlock(&coarse->lock); + + return umf_result; +} + +umf_result_t coarse_free(coarse_t *coarse, void *ptr, size_t bytes) { + if (coarse == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (ptr == NULL) { + return UMF_RESULT_SUCCESS; + } + + if (utils_mutex_lock(&coarse->lock) != 0) { + LOG_ERR("locking the lock failed"); + return UMF_RESULT_ERROR_UNKNOWN; + } + + assert(debug_check(coarse)); + + ravl_node_t *node = coarse_ravl_find_node(coarse->all_blocks, ptr); + if (node == NULL) { + // the block was not found + LOG_ERR("memory block not found (ptr = %p, size = %zu)", ptr, bytes); + utils_mutex_unlock(&coarse->lock); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + block_t *block = get_node_block(node); + if (!block->used) { + LOG_ERR("double free"); + utils_mutex_unlock(&coarse->lock); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (bytes > 0 && bytes != block->size) { + LOG_ERR("wrong size of allocation"); + utils_mutex_unlock(&coarse->lock); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + LOG_DEBUG("coarse_FREE (return_block_to_pool) %zu used %zu alloc %zu", + block->size, coarse->used_size - block->size, coarse->alloc_size); + + assert(coarse->used_size >= block->size); + coarse->used_size -= block->size; + + block->used = false; + + // Merge with prev and/or next block if they are unused and have continuous data. + node = free_block_merge_with_prev(coarse, node); + node = free_block_merge_with_next(coarse, node); + + int rv = free_blocks_add(coarse->free_blocks, get_node_block(node)); + if (rv) { + utils_mutex_unlock(&coarse->lock); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + assert(debug_check(coarse)); + utils_mutex_unlock(&coarse->lock); + + return UMF_RESULT_SUCCESS; +} + +umf_result_t coarse_merge(coarse_t *coarse, void *lowPtr, void *highPtr, + size_t totalSize) { + umf_result_t umf_result; + + if (coarse == NULL || lowPtr == NULL || highPtr == NULL || totalSize == 0 || + ((uintptr_t)highPtr <= (uintptr_t)lowPtr) || + ((uintptr_t)highPtr - (uintptr_t)lowPtr >= totalSize)) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (utils_mutex_lock(&coarse->lock) != 0) { + LOG_ERR("locking the lock failed"); + return UMF_RESULT_ERROR_UNKNOWN; + } + + assert(debug_check(coarse)); + + umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; + + ravl_node_t *low_node = coarse_ravl_find_node(coarse->all_blocks, lowPtr); + if (low_node == NULL) { + LOG_ERR("the lowPtr memory block not found"); + goto err_mutex_unlock; + } + + block_t *low_block = get_node_block(low_node); + if (!low_block->used) { + LOG_ERR("the lowPtr block is not allocated"); + goto err_mutex_unlock; + } + + ravl_node_t *high_node = coarse_ravl_find_node(coarse->all_blocks, highPtr); + if (high_node == NULL) { + LOG_ERR("the highPtr memory block not found"); + goto err_mutex_unlock; + } + + block_t *high_block = get_node_block(high_node); + if (!high_block->used) { + LOG_ERR("the highPtr block is not allocated"); + goto err_mutex_unlock; + } + + if (get_node_next(low_node) != high_node || + ((uintptr_t)highPtr != ((uintptr_t)lowPtr + low_block->size))) { + LOG_ERR("given allocations are not adjacent"); + goto err_mutex_unlock; + } + + assert(get_node_prev(high_node) == low_node); + + if (low_block->size + high_block->size != totalSize) { + LOG_ERR("wrong totalSize: %zu != %zu", totalSize, + low_block->size + high_block->size); + goto err_mutex_unlock; + } + + ravl_node_t *merged_node = NULL; + + umf_result = + user_block_merge(coarse, low_node, high_node, true, &merged_node); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("merging a block failed"); + goto err_mutex_unlock; + } + + assert(merged_node == low_node); + assert(low_block->size == totalSize); + + umf_result = UMF_RESULT_SUCCESS; + +err_mutex_unlock: + assert(debug_check(coarse)); + utils_mutex_unlock(&coarse->lock); + + return umf_result; +} + +umf_result_t coarse_split(coarse_t *coarse, void *ptr, size_t totalSize, + size_t firstSize) { + umf_result_t umf_result; + + if (coarse == NULL || ptr == NULL || (firstSize >= totalSize) || + firstSize == 0 || totalSize == 0) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (utils_mutex_lock(&coarse->lock) != 0) { + LOG_ERR("locking the lock failed"); + return UMF_RESULT_ERROR_UNKNOWN; + } + + assert(debug_check(coarse)); + + umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; + + ravl_node_t *node = coarse_ravl_find_node(coarse->all_blocks, ptr); + if (node == NULL) { + LOG_ERR("memory block not found"); + goto err_mutex_unlock; + } + + block_t *block = get_node_block(node); + + if (block->size != totalSize) { + LOG_ERR("wrong totalSize: %zu != %zu", totalSize, block->size); + goto err_mutex_unlock; + } + + if (!block->used) { + LOG_ERR("block is not allocated"); + goto err_mutex_unlock; + } + + // check if block can be split by the memory provider + umf_result = can_provider_split(coarse, ptr, totalSize, firstSize); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("memory provider cannot split a memory block"); + goto err_mutex_unlock; + } + + umf_result = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + + block_t *new_block = + coarse_ravl_add_new(coarse->all_blocks, block->data + firstSize, + block->size - firstSize, NULL); + if (new_block == NULL) { + goto err_mutex_unlock; + } + + block->size = firstSize; + new_block->used = true; + + assert(new_block->size == (totalSize - firstSize)); + + umf_result = UMF_RESULT_SUCCESS; + +err_mutex_unlock: + assert(debug_check(coarse)); + utils_mutex_unlock(&coarse->lock); + + return umf_result; +} + +coarse_stats_t coarse_get_stats(coarse_t *coarse) { + coarse_stats_t stats = {0}; + + if (coarse == NULL) { + return stats; + } + + if (utils_mutex_lock(&coarse->lock) != 0) { + LOG_ERR("locking the lock failed"); + return stats; + } + + coarse_get_stats_no_lock(coarse, &stats); + + utils_mutex_unlock(&coarse->lock); + + return stats; +} diff --git a/src/coarse/coarse.h b/src/coarse/coarse.h new file mode 100644 index 0000000000..93ec990027 --- /dev/null +++ b/src/coarse/coarse.h @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef UMF_COARSE_H +#define UMF_COARSE_H + +#include +#include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct coarse_t coarse_t; + +// coarse callbacks implement provider-specific actions +typedef struct coarse_callbacks_t { + // alloc() is optional (can be NULL for the fixed-size memory provider) + umf_result_t (*alloc)(void *provider, size_t size, size_t alignment, + void **ptr); + // free() is optional (can be NULL if the provider does not support the free() op) + umf_result_t (*free)(void *provider, void *ptr, size_t size); + umf_result_t (*split)(void *provider, void *ptr, size_t totalSize, + size_t firstSize); + umf_result_t (*merge)(void *provider, void *lowPtr, void *highPtr, + size_t totalSize); +} coarse_callbacks_t; + +// coarse library allocation strategy +typedef enum coarse_strategy_t { + // Check if the first free block of the 'size' size has the correct alignment. + // If not, use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. + UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE = 0, + + // Always allocate a free block of the (size + alignment) size + // and cut out the properly aligned part leaving two remaining parts. + // It is the fastest strategy but causes memory fragmentation + // when alignment is greater than 0. + // It is the best strategy when alignment always equals 0. + UMF_COARSE_MEMORY_STRATEGY_FASTEST, + + // Look through all free blocks of the 'size' size + // and choose the first one with the correct alignment. + // If none of them had the correct alignment, + // use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. + UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE, +} coarse_strategy_t; + +// coarse library settings structure +typedef struct coarse_params_t { + // handle of the memory provider + void *provider; + + // coarse callbacks + coarse_callbacks_t cb; + + // memory allocation strategy, + // see coarse_strategy_t for details + coarse_strategy_t allocation_strategy; + + // page size of the memory provider + size_t page_size; +} coarse_params_t; + +// coarse library statistics +typedef struct coarse_stats_t { + // total allocation size + size_t alloc_size; + + // size of used memory + size_t used_size; + + // total number of allocated memory blocks + size_t num_all_blocks; + + // number of free memory blocks + size_t num_free_blocks; +} coarse_stats_t; + +umf_result_t coarse_new(coarse_params_t *coarse_params, coarse_t **pcoarse); +void coarse_delete(coarse_t *coarse); + +umf_result_t coarse_alloc(coarse_t *coarse, size_t size, size_t alignment, + void **resultPtr); +umf_result_t coarse_free(coarse_t *coarse, void *ptr, size_t bytes); + +umf_result_t coarse_merge(coarse_t *coarse, void *lowPtr, void *highPtr, + size_t totalSize); +umf_result_t coarse_split(coarse_t *coarse, void *ptr, size_t totalSize, + size_t firstSize); + +// supported only if the alloc callback is set, +// returns UMF_RESULT_ERROR_NOT_SUPPORTED otherwise +umf_result_t coarse_add_memory_from_provider(coarse_t *coarse, size_t size); + +// supported only if the alloc and the free callbacks are NOT set +// returns UMF_RESULT_ERROR_NOT_SUPPORTED otherwise +umf_result_t coarse_add_memory_fixed(coarse_t *coarse, void *addr, size_t size); + +coarse_stats_t coarse_get_stats(coarse_t *coarse); + +#ifdef __cplusplus +} +#endif + +#endif // UMF_COARSE_H diff --git a/src/critnib/critnib.c b/src/critnib/critnib.c index 62d14af73d..7db9dd3dca 100644 --- a/src/critnib/critnib.c +++ b/src/critnib/critnib.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -64,6 +64,7 @@ #include "utils_assert.h" #include "utils_common.h" #include "utils_concurrency.h" +#include "utils_math.h" /* * A node that has been deleted is left untouched for this many delete @@ -133,24 +134,6 @@ struct critnib { struct utils_mutex_t mutex; /* writes/removes */ }; -/* - * atomic load - */ -static void load(void *src, void *dst) { - utils_atomic_load_acquire((word *)src, (word *)dst); -} - -static void load64(uint64_t *src, uint64_t *dst) { - utils_atomic_load_acquire(src, dst); -} - -/* - * atomic store - */ -static void store(void *dst, void *src) { - utils_atomic_store_release((word *)dst, (word)src); -} - /* * internal: is_leaf -- check tagged pointer for leafness */ @@ -192,8 +175,8 @@ struct critnib *critnib_new(void) { goto err_free_critnib; } - VALGRIND_HG_DRD_DISABLE_CHECKING(&c->root, sizeof(c->root)); - VALGRIND_HG_DRD_DISABLE_CHECKING(&c->remove_count, sizeof(c->remove_count)); + utils_annotate_memory_no_check(&c->root, sizeof(c->root)); + utils_annotate_memory_no_check(&c->remove_count, sizeof(c->remove_count)); return c; err_free_critnib: @@ -263,8 +246,8 @@ static void free_node(struct critnib *__restrict c, } ASSERT(!is_leaf(n)); - n->child[0] = c->deleted_node; - c->deleted_node = n; + utils_atomic_store_release_ptr((void **)&n->child[0], c->deleted_node); + utils_atomic_store_release_ptr((void **)&c->deleted_node, n); } /* @@ -278,7 +261,7 @@ static struct critnib_node *alloc_node(struct critnib *__restrict c) { struct critnib_node *n = c->deleted_node; c->deleted_node = n->child[0]; - VALGRIND_ANNOTATE_NEW_MEMORY(n, sizeof(*n)); + utils_annotate_memory_new(n, sizeof(*n)); return n; } @@ -294,8 +277,8 @@ static void free_leaf(struct critnib *__restrict c, return; } - k->value = c->deleted_leaf; - c->deleted_leaf = k; + utils_atomic_store_release_ptr((void **)&k->value, c->deleted_leaf); + utils_atomic_store_release_ptr((void **)&c->deleted_leaf, k); } /* @@ -303,13 +286,13 @@ static void free_leaf(struct critnib *__restrict c, */ static struct critnib_leaf *alloc_leaf(struct critnib *__restrict c) { if (!c->deleted_leaf) { - return umf_ba_global_alloc(sizeof(struct critnib_leaf)); + return umf_ba_global_aligned_alloc(sizeof(struct critnib_leaf), 8); } struct critnib_leaf *k = c->deleted_leaf; c->deleted_leaf = k->value; - VALGRIND_ANNOTATE_NEW_MEMORY(k, sizeof(*k)); + utils_annotate_memory_new(k, sizeof(*k)); return k; } @@ -334,19 +317,17 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) { return ENOMEM; } - VALGRIND_HG_DRD_DISABLE_CHECKING(k, sizeof(struct critnib_leaf)); + utils_annotate_memory_no_check(k, sizeof(struct critnib_leaf)); - k->key = key; - k->value = value; + utils_atomic_store_release_ptr((void **)&k->key, (void *)key); + utils_atomic_store_release_ptr((void **)&k->value, value); struct critnib_node *kn = (void *)((word)k | 1); struct critnib_node *n = c->root; if (!n) { - store(&c->root, kn); - + utils_atomic_store_release_ptr((void **)&c->root, kn); utils_mutex_unlock(&c->mutex); - return 0; } @@ -361,7 +342,8 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) { if (!n) { n = prev; - store(&n->child[slice_index(key, n->shift)], kn); + utils_atomic_store_release_ptr( + (void **)&n->child[slice_index(key, n->shift)], kn); utils_mutex_unlock(&c->mutex); @@ -376,7 +358,7 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) { free_leaf(c, to_leaf(kn)); if (update) { - to_leaf(n)->value = value; + utils_atomic_store_release_ptr(&to_leaf(n)->value, value); utils_mutex_unlock(&c->mutex); return 0; } else { @@ -386,7 +368,7 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) { } /* and convert that to an index. */ - sh_t sh = utils_mssb_index(at) & (sh_t) ~(SLICE - 1); + sh_t sh = utils_msb64(at) & (sh_t) ~(SLICE - 1); struct critnib_node *m = alloc_node(c); if (!m) { @@ -396,17 +378,18 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) { return ENOMEM; } - VALGRIND_HG_DRD_DISABLE_CHECKING(m, sizeof(struct critnib_node)); + utils_annotate_memory_no_check(m, sizeof(struct critnib_node)); for (int i = 0; i < SLNODES; i++) { - m->child[i] = NULL; + utils_atomic_store_release_ptr((void *)&m->child[i], NULL); } - m->child[slice_index(key, sh)] = kn; - m->child[slice_index(path, sh)] = n; + utils_atomic_store_release_ptr((void *)&m->child[slice_index(key, sh)], kn); + utils_atomic_store_release_ptr((void *)&m->child[slice_index(path, sh)], n); m->shift = sh; - m->path = key & path_mask(sh); - store(parent, m); + utils_atomic_store_release_u64((void *)&m->path, key & path_mask(sh)); + + utils_atomic_store_release_ptr((void **)parent, m); utils_mutex_unlock(&c->mutex); @@ -427,7 +410,8 @@ void *critnib_remove(struct critnib *c, word key) { goto not_found; } - word del = (utils_atomic_increment(&c->remove_count) - 1) % DELETED_LIFE; + word del = + (utils_atomic_increment_u64(&c->remove_count) - 1) % DELETED_LIFE; free_node(c, c->pending_del_nodes[del]); free_leaf(c, c->pending_del_leaves[del]); c->pending_del_nodes[del] = NULL; @@ -436,7 +420,7 @@ void *critnib_remove(struct critnib *c, word key) { if (is_leaf(n)) { k = to_leaf(n); if (k->key == key) { - store(&c->root, NULL); + utils_atomic_store_release_ptr((void **)&c->root, NULL); goto del_leaf; } @@ -466,7 +450,8 @@ void *critnib_remove(struct critnib *c, word key) { goto not_found; } - store(&n->child[slice_index(key, n->shift)], NULL); + utils_atomic_store_release_ptr( + (void **)&n->child[slice_index(key, n->shift)], NULL); /* Remove the node if there's only one remaining child. */ int ochild = -1; @@ -482,7 +467,7 @@ void *critnib_remove(struct critnib *c, word key) { ASSERTne(ochild, -1); - store(n_parent, n->child[ochild]); + utils_atomic_store_release_ptr((void **)n_parent, n->child[ochild]); c->pending_del_nodes[del] = n; del_leaf: @@ -511,8 +496,8 @@ void *critnib_get(struct critnib *c, word key) { do { struct critnib_node *n; - load64(&c->remove_count, &wrs1); - load(&c->root, &n); + utils_atomic_load_acquire_u64(&c->remove_count, &wrs1); + utils_atomic_load_acquire_ptr((void **)&c->root, (void **)&n); /* * critbit algorithm: dive into the tree, looking at nothing but @@ -520,13 +505,14 @@ void *critnib_get(struct critnib *c, word key) { * going wrong way if our path is missing, but that's ok... */ while (n && !is_leaf(n)) { - load(&n->child[slice_index(key, n->shift)], &n); + utils_atomic_load_acquire_ptr( + (void **)&n->child[slice_index(key, n->shift)], (void **)&n); } /* ... as we check it at the end. */ struct critnib_leaf *k = to_leaf(n); res = (n && k->key == key) ? k->value : NULL; - load64(&c->remove_count, &wrs2); + utils_atomic_load_acquire_u64(&c->remove_count, &wrs2); } while (wrs1 + DELETED_LIFE <= wrs2); return res; @@ -540,7 +526,9 @@ find_predecessor(struct critnib_node *__restrict n) { while (1) { int nib; for (nib = NIB; nib >= 0; nib--) { - if (n->child[nib]) { + struct critnib_node *m; + utils_atomic_load_acquire_ptr((void **)&n->child[nib], (void **)&m); + if (m) { break; } } @@ -549,7 +537,12 @@ find_predecessor(struct critnib_node *__restrict n) { return NULL; } - n = n->child[nib]; + utils_atomic_load_acquire_ptr((void **)&n->child[nib], (void **)&n); + + if (!n) { + return NULL; + } + if (is_leaf(n)) { return to_leaf(n); } @@ -577,12 +570,15 @@ static struct critnib_leaf *find_le(struct critnib_node *__restrict n, * that shift points at the nib's lower rather than upper edge, so it * needs to be masked away as well. */ - if ((key ^ n->path) >> (n->shift) & ~NIB) { + word path; + sh_t shift = n->shift; + utils_atomic_load_acquire_u64((uint64_t *)&n->path, (uint64_t *)&path); + if ((key ^ path) >> (shift) & ~NIB) { /* * subtree is too far to the left? * -> its rightmost value is good */ - if (n->path < key) { + if (path < key) { return find_predecessor(n); } @@ -597,7 +593,7 @@ static struct critnib_leaf *find_le(struct critnib_node *__restrict n, /* recursive call: follow the path */ { struct critnib_node *m; - load(&n->child[nib], &m); + utils_atomic_load_acquire_ptr((void **)&n->child[nib], (void **)&m); struct critnib_leaf *k = find_le(m, key); if (k) { return k; @@ -611,7 +607,7 @@ static struct critnib_leaf *find_le(struct critnib_node *__restrict n, */ for (; nib > 0; nib--) { struct critnib_node *m; - load(&n->child[nib - 1], &m); + utils_atomic_load_acquire_ptr((void **)&n->child[nib - 1], (void **)&m); if (m) { n = m; if (is_leaf(n)) { @@ -635,12 +631,12 @@ void *critnib_find_le(struct critnib *c, word key) { void *res; do { - load64(&c->remove_count, &wrs1); + utils_atomic_load_acquire_u64(&c->remove_count, &wrs1); struct critnib_node *n; /* avoid a subtle TOCTOU */ - load(&c->root, &n); + utils_atomic_load_acquire_ptr((void **)&c->root, (void **)&n); struct critnib_leaf *k = n ? find_le(n, key) : NULL; res = k ? k->value : NULL; - load64(&c->remove_count, &wrs2); + utils_atomic_load_acquire_u64(&c->remove_count, &wrs2); } while (wrs1 + DELETED_LIFE <= wrs2); return res; @@ -653,7 +649,9 @@ static struct critnib_leaf *find_successor(struct critnib_node *__restrict n) { while (1) { unsigned nib; for (nib = 0; nib <= NIB; nib++) { - if (n->child[nib]) { + struct critnib_node *m; + utils_atomic_load_acquire_ptr((void **)&n->child[nib], (void **)&m); + if (m) { break; } } @@ -662,7 +660,12 @@ static struct critnib_leaf *find_successor(struct critnib_node *__restrict n) { return NULL; } - n = n->child[nib]; + utils_atomic_load_acquire_ptr((void **)&n->child[nib], (void **)&n); + + if (!n) { + return NULL; + } + if (is_leaf(n)) { return to_leaf(n); } @@ -694,7 +697,7 @@ static struct critnib_leaf *find_ge(struct critnib_node *__restrict n, unsigned nib = slice_index(key, n->shift); { struct critnib_node *m; - load(&n->child[nib], &m); + utils_atomic_load_acquire_ptr((void **)&n->child[nib], (void **)&m); struct critnib_leaf *k = find_ge(m, key); if (k) { return k; @@ -703,7 +706,7 @@ static struct critnib_leaf *find_ge(struct critnib_node *__restrict n, for (; nib < NIB; nib++) { struct critnib_node *m; - load(&n->child[nib + 1], &m); + utils_atomic_load_acquire_ptr((void **)&n->child[nib + 1], (void **)&m); if (m) { n = m; if (is_leaf(n)) { @@ -741,9 +744,9 @@ int critnib_find(struct critnib *c, uintptr_t key, enum find_dir_t dir, } do { - load64(&c->remove_count, &wrs1); + utils_atomic_load_acquire_u64(&c->remove_count, &wrs1); struct critnib_node *n; - load(&c->root, &n); + utils_atomic_load_acquire_ptr((void **)&c->root, (void **)&n); if (dir < 0) { k = find_le(n, key); @@ -751,17 +754,20 @@ int critnib_find(struct critnib *c, uintptr_t key, enum find_dir_t dir, k = find_ge(n, key); } else { while (n && !is_leaf(n)) { - load(&n->child[slice_index(key, n->shift)], &n); + utils_atomic_load_acquire_ptr( + (void **)&n->child[slice_index(key, n->shift)], + (void **)&n); } struct critnib_leaf *kk = to_leaf(n); k = (n && kk->key == key) ? kk : NULL; } if (k) { - _rkey = k->key; - _rvalue = k->value; + utils_atomic_load_acquire_u64((uint64_t *)&k->key, + (uint64_t *)&_rkey); + utils_atomic_load_acquire_ptr(&k->value, (void **)&_rvalue); } - load64(&c->remove_count, &wrs2); + utils_atomic_load_acquire_u64(&c->remove_count, &wrs2); } while (wrs1 + DELETED_LIFE <= wrs2); if (k) { diff --git a/src/ctl/ctl.c b/src/ctl/ctl.c new file mode 100644 index 0000000000..4db11ac21f --- /dev/null +++ b/src/ctl/ctl.c @@ -0,0 +1,596 @@ +/* + * + * Copyright (C) 2016-2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +// This file was originally under following license: +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2024, Intel Corporation */ + +/* + * ctl.c -- implementation of the interface for examination and modification of + * the library's internal state + */ + +#include "ctl.h" + +#include +#include +#include +#include +#include + +#include "base_alloc/base_alloc_global.h" +#include "utils/utils_common.h" +#include "utlist.h" + +#ifdef _WIN32 +#define strtok_r strtok_s +#else +#include +#endif + +#define CTL_MAX_ENTRIES 100 + +#define MAX_CONFIG_FILE_LEN (1 << 20) /* 1 megabyte */ + +#define CTL_STRING_QUERY_SEPARATOR ";" +#define CTL_NAME_VALUE_SEPARATOR "=" +#define CTL_QUERY_NODE_SEPARATOR "." +#define CTL_VALUE_ARG_SEPARATOR "," + +static int ctl_global_first_free = 0; +static struct ctl_node CTL_NODE(global)[CTL_MAX_ENTRIES]; + +/* + * This is the top level node of the ctl tree structure. Each node can contain + * children and leaf nodes. + * + * Internal nodes simply create a new path in the tree whereas child nodes are + * the ones providing the read/write functionality by the means of callbacks. + * + * Each tree node must be NULL-terminated, CTL_NODE_END macro is provided for + * convenience. + */ +struct ctl { + struct ctl_node root[CTL_MAX_ENTRIES]; + int first_free; +}; + +void *Zalloc(size_t sz) { + void *ptr = umf_ba_global_alloc(sz); + if (ptr) { + memset(ptr, 0, sz); + } + return ptr; +} + +char *Strdup(const char *s) { + size_t len = strlen(s) + 1; + char *p = umf_ba_global_alloc(len); + if (p) { + memcpy(p, s, len); + } + return p; +} + +/* + * ctl_find_node -- (internal) searches for a matching entry point in the + * provided nodes + * + * The caller is responsible for freeing all of the allocated indexes, + * regardless of the return value. + */ +static const struct ctl_node *ctl_find_node(const struct ctl_node *nodes, + const char *name, + struct ctl_index_utlist *indexes) { + const struct ctl_node *n = NULL; + char *sptr = NULL; + char *parse_str = Strdup(name); + if (parse_str == NULL) { + return NULL; + } + + char *node_name = strtok_r(parse_str, CTL_QUERY_NODE_SEPARATOR, &sptr); + + /* + * Go through the string and separate tokens that correspond to nodes + * in the main ctl tree. + */ + while (node_name != NULL) { + char *endptr; + /* + * Ignore errno from strtol: FreeBSD returns EINVAL if no + * conversion is performed. Linux does not, but endptr + * check is valid in both cases. + */ + int tmp_errno = errno; + long index_value = strtol(node_name, &endptr, 0); + errno = tmp_errno; + struct ctl_index_utlist *index_entry = NULL; + if (endptr != node_name) { /* a valid index */ + index_entry = umf_ba_global_alloc(sizeof(*index_entry)); + if (index_entry == NULL) { + goto error; + } + index_entry->value = index_value; + LL_PREPEND(indexes, index_entry); + } + + for (n = &nodes[0]; n->name != NULL; ++n) { + if (index_entry && n->type == CTL_NODE_INDEXED) { + break; + } else if (strcmp(n->name, node_name) == 0) { + break; + } + } + if (n->name == NULL) { + goto error; + } + + if (index_entry) { + index_entry->name = n->name; + } + + nodes = n->children; + node_name = strtok_r(NULL, CTL_QUERY_NODE_SEPARATOR, &sptr); + } + + umf_ba_global_free(parse_str); + return n; + +error: + umf_ba_global_free(parse_str); + return NULL; +} + +/* + * ctl_delete_indexes -- + * (internal) removes and frees all entries on the index list + */ +static void ctl_delete_indexes(struct ctl_index_utlist *indexes) { + if (!indexes) { + return; + } + struct ctl_index_utlist *elem, *tmp; + LL_FOREACH_SAFE(indexes, elem, tmp) { + LL_DELETE(indexes, elem); + if (elem) { + umf_ba_global_free(elem); + } + } +} + +/* + * ctl_parse_args -- (internal) parses a string argument based on the node + * structure + */ +static void *ctl_parse_args(const struct ctl_argument *arg_proto, char *arg) { + char *dest_arg = umf_ba_global_alloc(arg_proto->dest_size); + if (dest_arg == NULL) { + return NULL; + } + + char *sptr = NULL; + char *arg_sep = strtok_r(arg, CTL_VALUE_ARG_SEPARATOR, &sptr); + for (const struct ctl_argument_parser *p = arg_proto->parsers; + p->parser != NULL; ++p) { + if (arg_sep == NULL) { + goto error_parsing; + } + + if (p->parser(arg_sep, dest_arg + p->dest_offset, p->dest_size) != 0) { + goto error_parsing; + } + + arg_sep = strtok_r(NULL, CTL_VALUE_ARG_SEPARATOR, &sptr); + } + + return dest_arg; + +error_parsing: + umf_ba_global_free(dest_arg); + return NULL; +} + +/* + * ctl_query_get_real_args -- (internal) returns a pointer with actual argument + * structure as required by the node callback + */ +static void *ctl_query_get_real_args(const struct ctl_node *n, void *write_arg, + enum ctl_query_source source) { + void *real_arg = NULL; + switch (source) { + case CTL_QUERY_CONFIG_INPUT: + real_arg = ctl_parse_args(n->arg, write_arg); + break; + case CTL_QUERY_PROGRAMMATIC: + real_arg = write_arg; + break; + default: + break; + } + + return real_arg; +} + +/* + * ctl_query_cleanup_real_args -- (internal) cleanups relevant argument + * structures allocated as a result of the get_real_args call + */ +static void ctl_query_cleanup_real_args(const struct ctl_node *n, + void *real_arg, + enum ctl_query_source source) { + /* suppress unused-parameter errors */ + (void)n; + + switch (source) { + case CTL_QUERY_CONFIG_INPUT: + umf_ba_global_free(real_arg); + break; + case CTL_QUERY_PROGRAMMATIC: + break; + default: + break; + } +} + +/* + * ctl_exec_query_read -- (internal) calls the read callback of a node + */ +static int ctl_exec_query_read(void *ctx, const struct ctl_node *n, + enum ctl_query_source source, void *arg, + struct ctl_index_utlist *indexes) { + if (arg == NULL) { + errno = EINVAL; + return -1; + } + + return n->cb[CTL_QUERY_READ](ctx, source, arg, indexes); +} + +/* + * ctl_exec_query_write -- (internal) calls the write callback of a node + */ +static int ctl_exec_query_write(void *ctx, const struct ctl_node *n, + enum ctl_query_source source, void *arg, + struct ctl_index_utlist *indexes) { + if (arg == NULL) { + errno = EINVAL; + return -1; + } + + void *real_arg = ctl_query_get_real_args(n, arg, source); + if (real_arg == NULL) { + return -1; + } + + int ret = n->cb[CTL_QUERY_WRITE](ctx, source, real_arg, indexes); + ctl_query_cleanup_real_args(n, real_arg, source); + + return ret; +} + +/* + * ctl_exec_query_runnable -- (internal) calls the run callback of a node + */ +static int ctl_exec_query_runnable(void *ctx, const struct ctl_node *n, + enum ctl_query_source source, void *arg, + struct ctl_index_utlist *indexes) { + return n->cb[CTL_QUERY_RUNNABLE](ctx, source, arg, indexes); +} + +static int (*ctl_exec_query[MAX_CTL_QUERY_TYPE])( + void *ctx, const struct ctl_node *n, enum ctl_query_source source, + void *arg, struct ctl_index_utlist *indexes) = { + ctl_exec_query_read, + ctl_exec_query_write, + ctl_exec_query_runnable, +}; + +/* + * ctl_query -- (internal) parses the name and calls the appropriate methods + * from the ctl tree + */ +int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, + const char *name, enum ctl_query_type type, void *arg) { + if (name == NULL) { + errno = EINVAL; + return -1; + } + + /* + * All of the indexes are put on this list so that the handlers can + * easily retrieve the index values. The list is cleared once the ctl + * query has been handled. + */ + struct ctl_index_utlist *indexes = NULL; + indexes = Zalloc(sizeof(*indexes)); + if (!indexes) { + return -1; + } + + int ret = -1; + + const struct ctl_node *n = ctl_find_node(CTL_NODE(global), name, indexes); + + if (n == NULL && ctl) { + ctl_delete_indexes(indexes); + indexes = NULL; + n = ctl_find_node(ctl->root, name, indexes); + } + + if (n == NULL || n->type != CTL_NODE_LEAF || n->cb[type] == NULL) { + errno = EINVAL; + goto out; + } + + ret = ctl_exec_query[type](ctx, n, source, arg, indexes); + +out: + ctl_delete_indexes(indexes); + + return ret; +} + +/* + * ctl_register_module_node -- adds a new node to the CTL tree root. + */ +void ctl_register_module_node(struct ctl *c, const char *name, + struct ctl_node *n) { + struct ctl_node *nnode = c == NULL + ? &CTL_NODE(global)[ctl_global_first_free++] + : &c->root[c->first_free++]; + + nnode->children = n; + nnode->type = CTL_NODE_NAMED; + nnode->name = name; +} + +/* + * ctl_parse_query -- (internal) splits an entire query string + * into name and value + */ +static int ctl_parse_query(char *qbuf, char **name, char **value) { + if (qbuf == NULL) { + return -1; + } + + char *sptr = NULL; + *name = strtok_r(qbuf, CTL_NAME_VALUE_SEPARATOR, &sptr); + if (*name == NULL) { + return -1; + } + + *value = strtok_r(NULL, CTL_NAME_VALUE_SEPARATOR, &sptr); + if (*value == NULL) { + return -1; + } + + /* the value itself mustn't include CTL_NAME_VALUE_SEPARATOR */ + char *extra = strtok_r(NULL, CTL_NAME_VALUE_SEPARATOR, &sptr); + if (extra != NULL) { + return -1; + } + + return 0; +} + +/* + * ctl_load_config -- executes the entire query collection from a provider + */ +static int ctl_load_config(struct ctl *ctl, void *ctx, char *buf) { + int r = 0; + char *sptr = NULL; /* for internal use of strtok */ + char *name; + char *value; + char *qbuf = strtok_r(buf, CTL_STRING_QUERY_SEPARATOR, &sptr); + + while (qbuf != NULL) { + r = ctl_parse_query(qbuf, &name, &value); + if (r != 0) { + return -1; + } + + r = ctl_query(ctl, ctx, CTL_QUERY_CONFIG_INPUT, name, CTL_QUERY_WRITE, + value); + + if (r < 0 && ctx != NULL) { + return -1; + } + + qbuf = strtok_r(NULL, CTL_STRING_QUERY_SEPARATOR, &sptr); + } + + return 0; +} + +/* + * ctl_load_config_from_string -- loads obj configuration from string + */ +int ctl_load_config_from_string(struct ctl *ctl, void *ctx, + const char *cfg_string) { + char *buf = Strdup(cfg_string); + if (buf == NULL) { + return -1; + } + + int ret = ctl_load_config(ctl, ctx, buf); + + umf_ba_global_free(buf); + return ret; +} + +/* + * ctl_load_config_from_file -- loads obj configuration from file + * + * This function opens up the config file, allocates a buffer of size equal to + * the size of the file, reads its content and sanitizes it for ctl_load_config. + */ +#ifndef _WIN32 // TODO: implement for Windows +int ctl_load_config_from_file(struct ctl *ctl, void *ctx, + const char *cfg_file) { + int ret = -1; + long fsize = 0; + char *buf = NULL; + + FILE *fp = fopen(cfg_file, "r"); + if (fp == NULL) { + return ret; + } + + int err; + if ((err = fseek(fp, 0, SEEK_END)) != 0) { + goto error_file_parse; + } + + fsize = ftell(fp); + if (fsize == -1) { + goto error_file_parse; + } + + if (fsize > MAX_CONFIG_FILE_LEN) { + goto error_file_parse; + } + + if ((err = fseek(fp, 0, SEEK_SET)) != 0) { + goto error_file_parse; + } + + buf = Zalloc((size_t)fsize + 1); /* +1 for NULL-termination */ + if (buf == NULL) { + goto error_file_parse; + } + + { + size_t bufpos = 0; + int c; + int is_comment_section = 0; + while ((c = fgetc(fp)) != EOF) { + if (c == '#') { + is_comment_section = 1; + } else if (c == '\n') { + is_comment_section = 0; + } else if (!is_comment_section && !isspace(c)) { + buf[bufpos++] = (char)c; + } + } + } + + ret = ctl_load_config(ctl, ctx, buf); + + umf_ba_global_free(buf); + +error_file_parse: + (void)fclose(fp); + return ret; +} +#endif + +/* + * ctl_new -- allocates and initializes ctl data structures + */ +struct ctl *ctl_new(void) { + struct ctl *c = Zalloc(sizeof(struct ctl)); + if (c == NULL) { + return NULL; + } + + c->first_free = 0; + return c; +} + +/* + * ctl_delete -- deletes ctl + */ +void ctl_delete(struct ctl *c) { umf_ba_global_free(c); } + +/* + * ctl_parse_ll -- (internal) parses and returns a long long signed integer + */ +static long long ctl_parse_ll(const char *str) { + char *endptr; + int olderrno = errno; + errno = 0; + long long val = strtoll(str, &endptr, 0); + if (endptr == str || errno != 0) { + return LLONG_MIN; + } + errno = olderrno; + + return val; +} + +/* + * ctl_arg_boolean -- checks whether the provided argument contains + * either a 1 or y or Y. + */ +int ctl_arg_boolean(const void *arg, void *dest, size_t dest_size) { + /* suppress unused-parameter errors */ + (void)dest_size; + + int *intp = dest; + char in = ((const char *)arg)[0]; + + if (tolower(in) == 'y' || in == '1') { + *intp = 1; + return 0; + } else if (tolower(in) == 'n' || in == '0') { + *intp = 0; + return 0; + } + + return -1; +} + +/* + * ctl_arg_integer -- parses signed integer argument + */ +int ctl_arg_integer(const void *arg, void *dest, size_t dest_size) { + long long val = ctl_parse_ll(arg); + if (val == LLONG_MIN) { + return -1; + } + + switch (dest_size) { + case sizeof(int): + if (val > INT_MAX || val < INT_MIN) { + return -1; + } + *(int *)dest = (int)val; + break; + case sizeof(long long): + *(long long *)dest = val; + break; + case sizeof(uint8_t): + if (val > UINT8_MAX || val < 0) { + return -1; + } + *(uint8_t *)dest = (uint8_t)val; + break; + default: + errno = EINVAL; + return -1; + } + + return 0; +} + +/* + * ctl_arg_string -- verifies length and copies a string argument into a zeroed + * buffer + */ +int ctl_arg_string(const void *arg, void *dest, size_t dest_size) { + /* check if the incoming string is longer or equal to dest_size */ + if (strnlen(arg, dest_size) == dest_size) { + return -1; + } + + strncpy(dest, arg, dest_size); + + return 0; +} diff --git a/src/ctl/ctl.h b/src/ctl/ctl.h new file mode 100644 index 0000000000..9327b01afe --- /dev/null +++ b/src/ctl/ctl.h @@ -0,0 +1,216 @@ +/* + * + * Copyright (C) 2016-2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +// This file was originally under following license: +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * ctl.h -- internal declaration of statistics and control related structures + */ + +#ifndef UMF_CTL_H +#define UMF_CTL_H 1 + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct ctl; + +struct ctl_index_utlist { + const char *name; + long value; + struct ctl_index_utlist *next; +}; + +enum ctl_query_source { + CTL_UNKNOWN_QUERY_SOURCE, + /* query executed directly from the program */ + CTL_QUERY_PROGRAMMATIC, + /* query executed from the config file */ + CTL_QUERY_CONFIG_INPUT, + + MAX_CTL_QUERY_SOURCE +}; + +enum ctl_query_type { + CTL_QUERY_READ, + CTL_QUERY_WRITE, + CTL_QUERY_RUNNABLE, + + MAX_CTL_QUERY_TYPE +}; + +typedef int (*node_callback)(void *ctx, enum ctl_query_source type, void *arg, + struct ctl_index_utlist *indexes); + +enum ctl_node_type { + CTL_NODE_UNKNOWN, + CTL_NODE_NAMED, + CTL_NODE_LEAF, + CTL_NODE_INDEXED, + + MAX_CTL_NODE +}; + +typedef int (*ctl_arg_parser)(const void *arg, void *dest, size_t dest_size); + +struct ctl_argument_parser { + size_t dest_offset; /* offset of the field inside of the argument */ + size_t dest_size; /* size of the field inside of the argument */ + ctl_arg_parser parser; +}; + +struct ctl_argument { + size_t dest_size; /* size of the entire argument */ + struct ctl_argument_parser parsers[]; /* array of 'fields' in arg */ +}; + +#define sizeof_member(t, m) sizeof(((t *)0)->m) + +#define CTL_ARG_PARSER(t, p) \ + { 0, sizeof(t), p } + +#define CTL_ARG_PARSER_STRUCT(t, m, p) \ + { offsetof(t, m), sizeof_member(t, m), p } + +#define CTL_ARG_PARSER_END \ + { 0, 0, NULL } + +/* + * CTL Tree node structure, do not use directly. All the necessary functionality + * is provided by the included macros. + */ +struct ctl_node { + const char *name; + enum ctl_node_type type; + + node_callback cb[MAX_CTL_QUERY_TYPE]; + const struct ctl_argument *arg; + + const struct ctl_node *children; +}; + +struct ctl *ctl_new(void); +void ctl_delete(struct ctl *stats); + +int ctl_load_config_from_string(struct ctl *ctl, void *ctx, + const char *cfg_string); +int ctl_load_config_from_file(struct ctl *ctl, void *ctx, const char *cfg_file); + +/* Use through CTL_REGISTER_MODULE, never directly */ +void ctl_register_module_node(struct ctl *c, const char *name, + struct ctl_node *n); + +int ctl_arg_boolean(const void *arg, void *dest, size_t dest_size); +#define CTL_ARG_BOOLEAN \ + {sizeof(int), {{0, sizeof(int), ctl_arg_boolean}, CTL_ARG_PARSER_END}}; + +int ctl_arg_integer(const void *arg, void *dest, size_t dest_size); +#define CTL_ARG_INT \ + {sizeof(int), {{0, sizeof(int), ctl_arg_integer}, CTL_ARG_PARSER_END}}; + +#define CTL_ARG_LONG_LONG \ + { \ + sizeof(long long), { \ + {0, sizeof(long long), ctl_arg_integer}, CTL_ARG_PARSER_END \ + } \ + } + +int ctl_arg_string(const void *arg, void *dest, size_t dest_size); +#define CTL_ARG_STRING(len) \ + {len, {{0, len, ctl_arg_string}, CTL_ARG_PARSER_END}}; + +#define CTL_STR(name) #name + +#define CTL_NODE_END \ + { NULL, CTL_NODE_UNKNOWN, {NULL, NULL, NULL}, NULL, NULL } + +#define CTL_NODE(name, ...) ctl_node_##__VA_ARGS__##_##name + +int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, + const char *name, enum ctl_query_type type, void *arg); + +/* Declaration of a new child node */ +#define CTL_CHILD(name, ...) \ + { \ + CTL_STR(name), CTL_NODE_NAMED, {NULL, NULL, NULL}, NULL, \ + (struct ctl_node *)CTL_NODE(name, __VA_ARGS__) \ + } + +/* Declaration of a new indexed node */ +#define CTL_INDEXED(name, ...) \ + { \ + CTL_STR(name), CTL_NODE_INDEXED, {NULL, NULL, NULL}, NULL, \ + (struct ctl_node *)CTL_NODE(name, __VA_ARGS__) \ + } + +#define CTL_READ_HANDLER(name, ...) ctl_##__VA_ARGS__##_##name##_read + +#define CTL_WRITE_HANDLER(name, ...) ctl_##__VA_ARGS__##_##name##_write + +#define CTL_RUNNABLE_HANDLER(name, ...) ctl_##__VA_ARGS__##_##name##_runnable + +#define CTL_ARG(name) ctl_arg_##name + +/* + * Declaration of a new read-only leaf. If used the corresponding read function + * must be declared by CTL_READ_HANDLER macro. + */ +#define CTL_LEAF_RO(name, ...) \ + { \ + CTL_STR(name), CTL_NODE_LEAF, \ + {CTL_READ_HANDLER(name, __VA_ARGS__), NULL, NULL}, NULL, NULL \ + } + +/* + * Declaration of a new write-only leaf. If used the corresponding write + * function must be declared by CTL_WRITE_HANDLER macro. + */ +#define CTL_LEAF_WO(name, ...) \ + { \ + CTL_STR(name), CTL_NODE_LEAF, \ + {NULL, CTL_WRITE_HANDLER(name, __VA_ARGS__), NULL}, \ + &CTL_ARG(name), NULL \ + } + +/* + * Declaration of a new runnable leaf. If used the corresponding run + * function must be declared by CTL_RUNNABLE_HANDLER macro. + */ +#define CTL_LEAF_RUNNABLE(name, ...) \ + { \ + CTL_STR(name), CTL_NODE_LEAF, \ + {NULL, NULL, CTL_RUNNABLE_HANDLER(name, __VA_ARGS__)}, NULL, NULL \ + } + +/* + * Declaration of a new read-write leaf. If used both read and write function + * must be declared by CTL_READ_HANDLER and CTL_WRITE_HANDLER macros. + */ +#define CTL_LEAF_RW(name) \ + { \ + CTL_STR(name), CTL_NODE_LEAF, \ + {CTL_READ_HANDLER(name), CTL_WRITE_HANDLER(name), NULL}, \ + &CTL_ARG(name), NULL \ + } + +#define CTL_REGISTER_MODULE(_ctl, name) \ + ctl_register_module_node((_ctl), CTL_STR(name), \ + (struct ctl_node *)CTL_NODE(name)) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/ipc.c b/src/ipc.c index 1b479fd7c5..d4e5cc8066 100644 --- a/src/ipc.c +++ b/src/ipc.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -15,6 +15,7 @@ #include "base_alloc_global.h" #include "ipc_internal.h" #include "memory_pool_internal.h" +#include "memory_provider_internal.h" #include "provider/provider_tracking.h" #include "utils_common.h" #include "utils_log.h" @@ -123,14 +124,14 @@ umf_result_t umfOpenIPCHandle(umf_ipc_handler_handle_t hIPCHandler, umf_ipc_handle_t umfIPCHandle, void **ptr) { // IPC handler is an instance of tracking memory provider - if (*(uint32_t *)hIPCHandler != UMF_VERSION_CURRENT) { + umf_memory_provider_handle_t hProvider = hIPCHandler; + if (hProvider->ops.version != UMF_PROVIDER_OPS_VERSION_CURRENT) { // It is a temporary hack to verify that user passes correct IPC handler, // not a pool handle, as it was required in previous version. LOG_ERR("Invalid IPC handler."); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } - umf_memory_provider_handle_t hProvider = hIPCHandler; void *base = NULL; umf_result_t ret = umfMemoryProviderOpenIPCHandle( @@ -145,19 +146,15 @@ umf_result_t umfOpenIPCHandle(umf_ipc_handler_handle_t hIPCHandler, } umf_result_t umfCloseIPCHandle(void *ptr) { - umf_alloc_info_t allocInfo; - umf_result_t ret = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo); + umf_ipc_info_t ipcInfo; + umf_result_t ret = umfMemoryTrackerGetIpcInfo(ptr, &ipcInfo); if (ret != UMF_RESULT_SUCCESS) { - LOG_ERR("cannot get alloc info for ptr = %p.", ptr); + LOG_ERR("cannot get IPC info for ptr = %p.", ptr); return ret; } - // We cannot use umfPoolGetMemoryProvider function because it returns - // upstream provider but we need tracking one - umf_memory_provider_handle_t hProvider = allocInfo.pool->provider; - - return umfMemoryProviderCloseIPCHandle(hProvider, allocInfo.base, - allocInfo.baseSize); + return umfMemoryProviderCloseIPCHandle(ipcInfo.provider, ipcInfo.base, + ipcInfo.baseSize); } umf_result_t umfPoolGetIPCHandler(umf_memory_pool_handle_t hPool, diff --git a/src/ipc_cache.c b/src/ipc_cache.c index 60072d4dfa..bf17a66a42 100644 --- a/src/ipc_cache.c +++ b/src/ipc_cache.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -22,41 +22,57 @@ #pragma warning(disable : 4702) #endif -struct ipc_handle_cache_entry_t; +struct ipc_opened_cache_entry_t; -typedef struct ipc_handle_cache_entry_t *hash_map_t; -typedef struct ipc_handle_cache_entry_t *lru_list_t; +typedef struct ipc_opened_cache_entry_t *hash_map_t; +typedef struct ipc_opened_cache_entry_t *lru_list_t; -typedef struct ipc_handle_cache_entry_t { +typedef struct ipc_opened_cache_entry_t { UT_hash_handle hh; - struct ipc_handle_cache_entry_t *next, *prev; - ipc_mapped_handle_cache_key_t key; + struct ipc_opened_cache_entry_t *next, *prev; + ipc_opened_cache_key_t key; uint64_t ref_count; uint64_t handle_id; hash_map_t *hash_table; // pointer to the hash table to which the entry belongs - ipc_mapped_handle_cache_value_t value; -} ipc_handle_cache_entry_t; + ipc_opened_cache_value_t value; +} ipc_opened_cache_entry_t; -typedef struct ipc_mapped_handle_cache_global_t { +typedef struct ipc_opened_cache_global_t { utils_mutex_t cache_lock; umf_ba_pool_t *cache_allocator; size_t max_size; size_t cur_size; lru_list_t lru_list; -} ipc_mapped_handle_cache_global_t; +} ipc_opened_cache_global_t; -typedef struct ipc_mapped_handle_cache_t { - ipc_mapped_handle_cache_global_t *global; +typedef struct ipc_opened_cache_t { + ipc_opened_cache_global_t *global; hash_map_t hash_table; - ipc_mapped_handle_cache_eviction_cb_t eviction_cb; -} ipc_mapped_handle_cache_t; - -ipc_mapped_handle_cache_global_t *IPC_MAPPED_CACHE_GLOBAL = NULL; + ipc_opened_cache_eviction_cb_t eviction_cb; +} ipc_opened_cache_t; + +ipc_opened_cache_global_t *IPC_OPENED_CACHE_GLOBAL = NULL; + +// Returns value of the UMF_MAX_OPENED_IPC_HANDLES environment variable +// or 0 if it is not set. +static size_t umfIpcCacheGlobalInitMaxOpenedHandles(void) { + const char *max_size_str = getenv("UMF_MAX_OPENED_IPC_HANDLES"); + if (max_size_str) { + char *endptr; + size_t max_size = strtoul(max_size_str, &endptr, 10); + if (*endptr == '\0') { + return max_size; + } + LOG_ERR("Invalid value of UMF_MAX_OPENED_IPC_HANDLES: %s", + max_size_str); + } + return 0; +} umf_result_t umfIpcCacheGlobalInit(void) { umf_result_t ret = UMF_RESULT_SUCCESS; - ipc_mapped_handle_cache_global_t *cache_global = + ipc_opened_cache_global_t *cache_global = umf_ba_global_alloc(sizeof(*cache_global)); if (!cache_global) { LOG_ERR("Failed to allocate memory for the IPC cache global data"); @@ -71,19 +87,18 @@ umf_result_t umfIpcCacheGlobalInit(void) { } cache_global->cache_allocator = - umf_ba_create(sizeof(ipc_handle_cache_entry_t)); + umf_ba_create(sizeof(ipc_opened_cache_entry_t)); if (!cache_global->cache_allocator) { LOG_ERR("Failed to create IPC cache allocator"); ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; goto err_mutex_destroy; } - // TODO: make max_size configurable via environment variable - cache_global->max_size = 0; + cache_global->max_size = umfIpcCacheGlobalInitMaxOpenedHandles(); cache_global->cur_size = 0; cache_global->lru_list = NULL; - IPC_MAPPED_CACHE_GLOBAL = cache_global; + IPC_OPENED_CACHE_GLOBAL = cache_global; goto err_exit; err_mutex_destroy: @@ -97,15 +112,15 @@ umf_result_t umfIpcCacheGlobalInit(void) { #ifndef NDEBUG static size_t getGlobalLruListSize(lru_list_t lru_list) { size_t size = 0; - ipc_handle_cache_entry_t *tmp; + ipc_opened_cache_entry_t *tmp; DL_COUNT(lru_list, tmp, size); return size; } #endif /* NDEBUG */ void umfIpcCacheGlobalTearDown(void) { - ipc_mapped_handle_cache_global_t *cache_global = IPC_MAPPED_CACHE_GLOBAL; - IPC_MAPPED_CACHE_GLOBAL = NULL; + ipc_opened_cache_global_t *cache_global = IPC_OPENED_CACHE_GLOBAL; + IPC_OPENED_CACHE_GLOBAL = NULL; if (!cache_global) { return; @@ -119,31 +134,33 @@ void umfIpcCacheGlobalTearDown(void) { umf_ba_global_free(cache_global); } -ipc_mapped_handle_cache_handle_t umfIpcHandleMappedCacheCreate( - ipc_mapped_handle_cache_eviction_cb_t eviction_cb) { +ipc_opened_cache_handle_t +umfIpcOpenedCacheCreate(ipc_opened_cache_eviction_cb_t eviction_cb) { if (eviction_cb == NULL) { LOG_ERR("Eviction callback is NULL"); return NULL; } - ipc_mapped_handle_cache_t *cache = umf_ba_global_alloc(sizeof(*cache)); + ipc_opened_cache_t *cache = umf_ba_global_alloc(sizeof(*cache)); if (!cache) { LOG_ERR("Failed to allocate memory for the IPC cache"); return NULL; } - assert(IPC_MAPPED_CACHE_GLOBAL != NULL); + assert(IPC_OPENED_CACHE_GLOBAL != NULL); - cache->global = IPC_MAPPED_CACHE_GLOBAL; + cache->global = IPC_OPENED_CACHE_GLOBAL; cache->hash_table = NULL; cache->eviction_cb = eviction_cb; return cache; } -void umfIpcHandleMappedCacheDestroy(ipc_mapped_handle_cache_handle_t cache) { - ipc_handle_cache_entry_t *entry, *tmp; +void umfIpcOpenedCacheDestroy(ipc_opened_cache_handle_t cache) { + ipc_opened_cache_entry_t *entry, *tmp; + + utils_mutex_lock(&(cache->global->cache_lock)); HASH_ITER(hh, cache->hash_table, entry, tmp) { DL_DELETE(cache->global->lru_list, entry); HASH_DEL(cache->hash_table, entry); @@ -153,19 +170,19 @@ void umfIpcHandleMappedCacheDestroy(ipc_mapped_handle_cache_handle_t cache) { umf_ba_free(cache->global->cache_allocator, entry); } HASH_CLEAR(hh, cache->hash_table); + utils_mutex_unlock(&(cache->global->cache_lock)); umf_ba_global_free(cache); } -umf_result_t -umfIpcHandleMappedCacheGet(ipc_mapped_handle_cache_handle_t cache, - const ipc_mapped_handle_cache_key_t *key, - uint64_t handle_id, - ipc_mapped_handle_cache_value_t **retEntry) { - ipc_handle_cache_entry_t *entry = NULL; +umf_result_t umfIpcOpenedCacheGet(ipc_opened_cache_handle_t cache, + const ipc_opened_cache_key_t *key, + uint64_t handle_id, + ipc_opened_cache_value_t **retEntry) { + ipc_opened_cache_entry_t *entry = NULL; umf_result_t ret = UMF_RESULT_SUCCESS; bool evicted = false; - ipc_mapped_handle_cache_value_t evicted_value; + ipc_opened_cache_value_t evicted_value; if (!cache || !key || !retEntry) { LOG_ERR("Some arguments are NULL, cache=%p, key=%p, retEntry=%p", @@ -189,7 +206,19 @@ umfIpcHandleMappedCacheGet(ipc_mapped_handle_cache_handle_t cache, if (entry == NULL && cache->global->max_size != 0 && cache->global->cur_size >= cache->global->max_size) { // If max_size is set and the cache is full, evict the least recently used entry. - entry = cache->global->lru_list->prev; + // we need to search for the least recently used entry with ref_count == 0 + // The utlist implementation of the doubly-linked list keeps a tail pointer in head->prev + ipc_opened_cache_entry_t *candidate = cache->global->lru_list->prev; + do { + uint64_t ref_count = 0; + utils_atomic_load_acquire_u64(&candidate->ref_count, + &ref_count); + if (ref_count == 0) { + entry = candidate; + break; + } + candidate = candidate->prev; + } while (candidate != cache->global->lru_list->prev); } if (entry) { // we have eviction candidate @@ -230,7 +259,7 @@ umfIpcHandleMappedCacheGet(ipc_mapped_handle_cache_handle_t cache, exit: if (ret == UMF_RESULT_SUCCESS) { - utils_atomic_increment(&entry->ref_count); + utils_atomic_increment_u64(&entry->ref_count); *retEntry = &entry->value; } @@ -242,3 +271,20 @@ umfIpcHandleMappedCacheGet(ipc_mapped_handle_cache_handle_t cache, return ret; } + +umf_result_t +umfIpcHandleMappedCacheRelease(ipc_opened_cache_value_t *cacheValue) { + if (!cacheValue) { + LOG_ERR("cacheValue is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + // get pointer to the entry + ipc_opened_cache_entry_t *entry = + (ipc_opened_cache_entry_t *)((char *)cacheValue - + offsetof(ipc_opened_cache_entry_t, value)); + // decrement the ref count + utils_atomic_decrement_u64(&entry->ref_count); + + return UMF_RESULT_SUCCESS; +} diff --git a/src/ipc_cache.h b/src/ipc_cache.h index 59ae287872..545c6e1e7e 100644 --- a/src/ipc_cache.h +++ b/src/ipc_cache.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -14,39 +14,39 @@ #include "utils_concurrency.h" -typedef struct ipc_mapped_handle_cache_key_t { +typedef struct ipc_opened_cache_key_t { void *remote_base_ptr; umf_memory_provider_handle_t local_provider; int remote_pid; -} ipc_mapped_handle_cache_key_t; +} ipc_opened_cache_key_t; -typedef struct ipc_mapped_handle_cache_value_t { +typedef struct ipc_opened_cache_value_t { void *mapped_base_ptr; size_t mapped_size; utils_mutex_t mmap_lock; -} ipc_mapped_handle_cache_value_t; +} ipc_opened_cache_value_t; -struct ipc_mapped_handle_cache_t; +struct ipc_opened_cache_t; -typedef struct ipc_mapped_handle_cache_t *ipc_mapped_handle_cache_handle_t; +typedef struct ipc_opened_cache_t *ipc_opened_cache_handle_t; umf_result_t umfIpcCacheGlobalInit(void); void umfIpcCacheGlobalTearDown(void); // define pointer to the eviction callback function -typedef void (*ipc_mapped_handle_cache_eviction_cb_t)( - const ipc_mapped_handle_cache_key_t *key, - const ipc_mapped_handle_cache_value_t *value); +typedef void (*ipc_opened_cache_eviction_cb_t)( + const ipc_opened_cache_key_t *key, const ipc_opened_cache_value_t *value); -ipc_mapped_handle_cache_handle_t umfIpcHandleMappedCacheCreate( - ipc_mapped_handle_cache_eviction_cb_t eviction_cb); +ipc_opened_cache_handle_t +umfIpcOpenedCacheCreate(ipc_opened_cache_eviction_cb_t eviction_cb); -void umfIpcHandleMappedCacheDestroy(ipc_mapped_handle_cache_handle_t cache); +void umfIpcOpenedCacheDestroy(ipc_opened_cache_handle_t cache); -umf_result_t -umfIpcHandleMappedCacheGet(ipc_mapped_handle_cache_handle_t cache, - const ipc_mapped_handle_cache_key_t *key, - uint64_t handle_id, - ipc_mapped_handle_cache_value_t **retEntry); +umf_result_t umfIpcOpenedCacheGet(ipc_opened_cache_handle_t cache, + const ipc_opened_cache_key_t *key, + uint64_t handle_id, + ipc_opened_cache_value_t **retEntry); +umf_result_t +umfIpcHandleMappedCacheRelease(ipc_opened_cache_value_t *cacheValue); #endif /* UMF_IPC_CACHE_H */ diff --git a/src/libumf.c b/src/libumf.c index b89e5c844d..e357b2583a 100644 --- a/src/libumf.c +++ b/src/libumf.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -12,6 +12,9 @@ #include "base_alloc_global.h" #include "ipc_cache.h" #include "memspace_internal.h" +#include "pool/pool_scalable_internal.h" +#include "provider_cuda_internal.h" +#include "provider_level_zero_internal.h" #include "provider_tracking.h" #include "utils_common.h" #include "utils_log.h" @@ -21,10 +24,10 @@ umf_memory_tracker_handle_t TRACKER = NULL; -static unsigned long long umfRefCount = 0; +static uint64_t umfRefCount = 0; int umfInit(void) { - if (utils_fetch_and_add64(&umfRefCount, 1) == 0) { + if (utils_fetch_and_add_u64(&umfRefCount, 1) == 0) { utils_log_init(); TRACKER = umfMemoryTrackerCreate(); if (!TRACKER) { @@ -51,7 +54,7 @@ int umfInit(void) { } void umfTearDown(void) { - if (utils_fetch_and_add64(&umfRefCount, -1) == 1) { + if (utils_fetch_and_sub_u64(&umfRefCount, 1) == 1) { #if !defined(_WIN32) && !defined(UMF_NO_HWLOC) umfMemspaceHostAllDestroy(); umfMemspaceHighestCapacityDestroy(); @@ -79,6 +82,9 @@ void umfTearDown(void) { LOG_DEBUG("UMF base allocator destroyed"); fini_umfTearDown: + fini_ze_global_state(); + fini_cu_global_state(); + fini_tbb_global_state(); LOG_DEBUG("UMF library finalized"); } } diff --git a/src/libumf.def b/src/libumf.def index 82e32d4a13..ce8820a8fa 100644 --- a/src/libumf.def +++ b/src/libumf.def @@ -1,12 +1,12 @@ ;;;; Begin Copyright Notice -; Copyright (C) 2024 Intel Corporation +; Copyright (C) 2023-2025 Intel Corporation ; Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ;;;; End Copyright Notice LIBRARY UMF -VERSION 0.10 +VERSION 0.11 EXPORTS DllMain @@ -14,8 +14,6 @@ EXPORTS umfTearDown umfGetCurrentVersion umfCloseIPCHandle - umfCoarseMemoryProviderGetStats - umfCoarseMemoryProviderOps umfCUDAMemoryProviderOps umfCUDAMemoryProviderParamsCreate umfCUDAMemoryProviderParamsDestroy @@ -27,15 +25,16 @@ EXPORTS umfDevDaxMemoryProviderParamsDestroy umfDevDaxMemoryProviderParamsSetDeviceDax umfDevDaxMemoryProviderParamsSetProtection - umfFree umfFileMemoryProviderOps umfFileMemoryProviderParamsCreate umfFileMemoryProviderParamsDestroy umfFileMemoryProviderParamsSetPath umfFileMemoryProviderParamsSetProtection umfFileMemoryProviderParamsSetVisibility + umfFree umfGetIPCHandle umfGetLastFailedMemoryProvider + umfJemallocPoolOps umfLevelZeroMemoryProviderOps umfLevelZeroMemoryProviderParamsCreate umfLevelZeroMemoryProviderParamsDestroy @@ -105,10 +104,12 @@ EXPORTS umfPoolGetIPCHandler umfPoolGetIPCHandleSize umfPoolGetLastAllocationError + umfPoolGetTag umfPoolGetMemoryProvider umfPoolMalloc umfPoolMallocUsableSize umfPoolRealloc + umfPoolSetTag umfProxyPoolOps umfPutIPCHandle umfScalablePoolOps @@ -116,3 +117,22 @@ EXPORTS umfScalablePoolParamsDestroy umfScalablePoolParamsSetGranularity umfScalablePoolParamsSetKeepAllMemory +; Added in UMF_0.11 + umfCUDAMemoryProviderParamsSetAllocFlags + umfDisjointPoolOps + umfDisjointPoolParamsCreate + umfDisjointPoolParamsDestroy + umfDisjointPoolParamsSetCapacity + umfDisjointPoolParamsSetMaxPoolableSize + umfDisjointPoolParamsSetMinBucketSize + umfDisjointPoolParamsSetName + umfDisjointPoolParamsSetSharedLimits + umfDisjointPoolParamsSetSlabMinSize + umfDisjointPoolParamsSetTrace + umfDisjointPoolSharedLimitsCreate + umfDisjointPoolSharedLimitsDestroy + umfFixedMemoryProviderOps + umfFixedMemoryProviderParamsCreate + umfFixedMemoryProviderParamsDestroy + umfLevelZeroMemoryProviderParamsSetFreePolicy + umfLevelZeroMemoryProviderParamsSetDeviceOrdinal diff --git a/src/libumf.map b/src/libumf.map index 4755b6b814..6582fd0f8d 100644 --- a/src/libumf.map +++ b/src/libumf.map @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2023-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -8,8 +8,6 @@ UMF_0.10 { umfTearDown; umfGetCurrentVersion; umfCloseIPCHandle; - umfCoarseMemoryProviderGetStats; - umfCoarseMemoryProviderOps; umfCUDAMemoryProviderOps; umfCUDAMemoryProviderParamsCreate; umfCUDAMemoryProviderParamsDestroy; @@ -21,15 +19,16 @@ UMF_0.10 { umfDevDaxMemoryProviderParamsDestroy; umfDevDaxMemoryProviderParamsSetDeviceDax; umfDevDaxMemoryProviderParamsSetProtection; - umfFree; umfFileMemoryProviderOps; umfFileMemoryProviderParamsCreate; umfFileMemoryProviderParamsDestroy; umfFileMemoryProviderParamsSetPath; umfFileMemoryProviderParamsSetProtection; umfFileMemoryProviderParamsSetVisibility; + umfFree; umfGetIPCHandle; umfGetLastFailedMemoryProvider; + umfJemallocPoolOps; umfLevelZeroMemoryProviderOps; umfLevelZeroMemoryProviderParamsCreate; umfLevelZeroMemoryProviderParamsDestroy; @@ -82,13 +81,13 @@ UMF_0.10 { umfOsMemoryProviderOps; umfOsMemoryProviderParamsCreate; umfOsMemoryProviderParamsDestroy; - umfOsMemoryProviderParamsSetProtection; - umfOsMemoryProviderParamsSetVisibility; - umfOsMemoryProviderParamsSetShmName; umfOsMemoryProviderParamsSetNumaList; umfOsMemoryProviderParamsSetNumaMode; umfOsMemoryProviderParamsSetPartSize; umfOsMemoryProviderParamsSetPartitions; + umfOsMemoryProviderParamsSetProtection; + umfOsMemoryProviderParamsSetShmName; + umfOsMemoryProviderParamsSetVisibility; umfPoolAlignedMalloc; umfPoolByPtr; umfPoolCalloc; @@ -100,9 +99,11 @@ UMF_0.10 { umfPoolGetIPCHandleSize; umfPoolGetLastAllocationError; umfPoolGetMemoryProvider; + umfPoolGetTag; umfPoolMalloc; umfPoolMallocUsableSize; umfPoolRealloc; + umfPoolSetTag; umfProxyPoolOps; umfPutIPCHandle; umfScalablePoolOps; @@ -113,3 +114,24 @@ UMF_0.10 { local: *; }; + +UMF_0.11 { + umfCUDAMemoryProviderParamsSetAllocFlags; + umfDisjointPoolOps; + umfDisjointPoolParamsCreate; + umfDisjointPoolParamsDestroy; + umfDisjointPoolParamsSetCapacity; + umfDisjointPoolParamsSetMaxPoolableSize; + umfDisjointPoolParamsSetMinBucketSize; + umfDisjointPoolParamsSetName; + umfDisjointPoolParamsSetSharedLimits; + umfDisjointPoolParamsSetSlabMinSize; + umfDisjointPoolParamsSetTrace; + umfDisjointPoolSharedLimitsCreate; + umfDisjointPoolSharedLimitsDestroy; + umfFixedMemoryProviderOps; + umfFixedMemoryProviderParamsCreate; + umfFixedMemoryProviderParamsDestroy; + umfLevelZeroMemoryProviderParamsSetFreePolicy; + umfLevelZeroMemoryProviderParamsSetDeviceOrdinal; +} UMF_0.10; diff --git a/src/libumf.rc.in b/src/libumf.rc.in index 7aba79e7ed..43bed1560f 100644 --- a/src/libumf.rc.in +++ b/src/libumf.rc.in @@ -1,4 +1,4 @@ -// Copyright (c) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -10,6 +10,8 @@ #define UMF_VERNUMBERS @PROJECT_VERSION_MAJOR@,@PROJECT_VERSION_MINOR@,@PROJECT_VERSION_PATCH@,@UMF_VERSION_REVISION@ #define _UMF_VERSION "@UMF_VERSION@" +// Store our CMake vars in the "FileDescription" block, as the custom fields require special parsing. +#define _UMF_CMAKE_VARS "@UMF_ALL_CMAKE_VARIABLES@" #ifdef _DEBUG #define VERSION_DEBUG VS_FF_DEBUG @@ -49,9 +51,9 @@ BEGIN BLOCK "040904b0" // U.S. English, Unicode (0x04b0 == 1200) BEGIN VALUE "CompanyName", "Intel Corporation\0" - VALUE "FileDescription", "Unified Memory Framework (UMF) library\0" + VALUE "FileDescription", "Unified Memory Framework (UMF) library (build options: " _UMF_CMAKE_VARS ")\0" VALUE "FileVersion", _UMF_VERSION "\0" - VALUE "LegalCopyright", "Copyright 2024, Intel Corporation. All rights reserved.\0" + VALUE "LegalCopyright", "Copyright 2024-2025, Intel Corporation. All rights reserved.\0" VALUE "LegalTrademarks", "\0" VALUE "OriginalFilename", "umf.dll\0" VALUE "ProductName", "Unified Memory Framework (UMF)\0" diff --git a/src/memory_pool.c b/src/memory_pool.c index 4a85955efa..eb00545228 100644 --- a/src/memory_pool.c +++ b/src/memory_pool.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -38,14 +38,15 @@ static umf_result_t umfPoolCreateInternal(const umf_memory_pool_ops_t *ops, return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } - assert(ops->version == UMF_VERSION_CURRENT); + if (ops->version != UMF_POOL_OPS_VERSION_CURRENT) { + LOG_WARN("Memory Pool ops version \"%d\" is different than the current " + "version \"%d\"", + ops->version, UMF_POOL_OPS_VERSION_CURRENT); + } if (!(flags & UMF_POOL_CREATE_FLAG_DISABLE_TRACKING)) { // Wrap provider with memory tracking provider. - // Check if the provider supports the free() operation. - bool upstreamDoesNotFree = umfIsFreeOpDefault(provider); - ret = umfTrackingMemoryProviderCreate(provider, pool, &pool->provider, - upstreamDoesNotFree); + ret = umfTrackingMemoryProviderCreate(provider, pool, &pool->provider); if (ret != UMF_RESULT_SUCCESS) { goto err_provider_create; } @@ -55,6 +56,13 @@ static umf_result_t umfPoolCreateInternal(const umf_memory_pool_ops_t *ops, pool->flags = flags; pool->ops = *ops; + pool->tag = NULL; + + if (NULL == utils_mutex_init(&pool->lock)) { + LOG_ERR("Failed to initialize mutex for pool"); + ret = UMF_RESULT_ERROR_UNKNOWN; + goto err_lock_init; + } ret = ops->initialize(pool->provider, params, &pool->pool_priv); if (ret != UMF_RESULT_SUCCESS) { @@ -66,6 +74,8 @@ static umf_result_t umfPoolCreateInternal(const umf_memory_pool_ops_t *ops, return UMF_RESULT_SUCCESS; err_pool_init: + utils_mutex_destroy_not_free(&pool->lock); +err_lock_init: if (!(flags & UMF_POOL_CREATE_FLAG_DISABLE_TRACKING)) { umfMemoryProviderDestroy(pool->provider); } @@ -90,6 +100,8 @@ void umfPoolDestroy(umf_memory_pool_handle_t hPool) { umfMemoryProviderDestroy(hUpstreamProvider); } + utils_mutex_destroy_not_free(&hPool->lock); + LOG_INFO("Memory pool destroyed: %p", (void *)hPool); // TODO: this free keeps memory in base allocator, so it can lead to OOM in some scenarios (it should be optimized) @@ -99,6 +111,8 @@ void umfPoolDestroy(umf_memory_pool_handle_t hPool) { umf_result_t umfFree(void *ptr) { umf_memory_pool_handle_t hPool = umfPoolByPtr(ptr); if (hPool) { + LOG_DEBUG("calling umfPoolFree(pool=%p, ptr=%p) ...", (void *)hPool, + ptr); return umfPoolFree(hPool, ptr); } return UMF_RESULT_SUCCESS; @@ -175,3 +189,24 @@ umf_result_t umfPoolGetLastAllocationError(umf_memory_pool_handle_t hPool) { UMF_CHECK((hPool != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); return hPool->ops.get_last_allocation_error(hPool->pool_priv); } + +umf_result_t umfPoolSetTag(umf_memory_pool_handle_t hPool, void *tag, + void **oldTag) { + UMF_CHECK((hPool != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); + utils_mutex_lock(&hPool->lock); + if (oldTag) { + *oldTag = hPool->tag; + } + hPool->tag = tag; + utils_mutex_unlock(&hPool->lock); + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfPoolGetTag(umf_memory_pool_handle_t hPool, void **tag) { + UMF_CHECK((hPool != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); + UMF_CHECK((tag != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); + utils_mutex_lock(&hPool->lock); + *tag = hPool->tag; + utils_mutex_unlock(&hPool->lock); + return UMF_RESULT_SUCCESS; +} diff --git a/src/memory_pool_internal.h b/src/memory_pool_internal.h index 90f2f16298..ab3378163d 100644 --- a/src/memory_pool_internal.h +++ b/src/memory_pool_internal.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -22,6 +22,7 @@ extern "C" { #endif #include "base_alloc.h" +#include "utils_concurrency.h" typedef struct umf_memory_pool_t { void *pool_priv; @@ -30,6 +31,9 @@ typedef struct umf_memory_pool_t { // Memory provider used by the pool. umf_memory_provider_handle_t provider; + + utils_mutex_t lock; + void *tag; } umf_memory_pool_t; #ifdef __cplusplus diff --git a/src/memory_provider.c b/src/memory_provider.c index 883f1be263..ce6a10a207 100644 --- a/src/memory_provider.c +++ b/src/memory_provider.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -20,18 +20,6 @@ #include "memory_provider_internal.h" #include "utils_assert.h" -typedef struct umf_memory_provider_t { - umf_memory_provider_ops_t ops; - void *provider_priv; -} umf_memory_provider_t; - -static umf_result_t umfDefaultFree(void *provider, void *ptr, size_t size) { - (void)provider; - (void)ptr; - (void)size; - return UMF_RESULT_ERROR_NOT_SUPPORTED; -} - static umf_result_t umfDefaultPurgeLazy(void *provider, void *ptr, size_t size) { (void)provider; @@ -106,9 +94,6 @@ static umf_result_t umfDefaultCloseIPCHandle(void *provider, void *ptr, } void assignOpsExtDefaults(umf_memory_provider_ops_t *ops) { - if (!ops->ext.free) { - ops->ext.free = umfDefaultFree; - } if (!ops->ext.purge_lazy) { ops->ext.purge_lazy = umfDefaultPurgeLazy; } @@ -143,7 +128,7 @@ void assignOpsIpcDefaults(umf_memory_provider_ops_t *ops) { static bool validateOpsMandatory(const umf_memory_provider_ops_t *ops) { // Mandatory ops should be non-NULL - return ops->alloc && ops->get_recommended_page_size && + return ops->alloc && ops->free && ops->get_recommended_page_size && ops->get_min_page_size && ops->initialize && ops->finalize && ops->get_last_native_error && ops->get_name; } @@ -169,10 +154,6 @@ static bool validateOps(const umf_memory_provider_ops_t *ops) { validateOpsIpc(&(ops->ipc)); } -bool umfIsFreeOpDefault(umf_memory_provider_handle_t hProvider) { - return (hProvider->ops.ext.free == umfDefaultFree); -} - umf_result_t umfMemoryProviderCreate(const umf_memory_provider_ops_t *ops, void *params, umf_memory_provider_handle_t *hProvider) { @@ -181,14 +162,18 @@ umf_result_t umfMemoryProviderCreate(const umf_memory_provider_ops_t *ops, return UMF_RESULT_ERROR_INVALID_ARGUMENT; } + if (ops->version != UMF_PROVIDER_OPS_VERSION_CURRENT) { + LOG_WARN("Memory Provider ops version \"%d\" is different than the " + "current version \"%d\"", + ops->version, UMF_PROVIDER_OPS_VERSION_CURRENT); + } + umf_memory_provider_handle_t provider = umf_ba_global_alloc(sizeof(umf_memory_provider_t)); if (!provider) { return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } - assert(ops->version == UMF_VERSION_CURRENT); - provider->ops = *ops; assignOpsExtDefaults(&(provider->ops)); @@ -236,8 +221,7 @@ umf_result_t umfMemoryProviderAlloc(umf_memory_provider_handle_t hProvider, umf_result_t umfMemoryProviderFree(umf_memory_provider_handle_t hProvider, void *ptr, size_t size) { UMF_CHECK((hProvider != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); - umf_result_t res = - hProvider->ops.ext.free(hProvider->provider_priv, ptr, size); + umf_result_t res = hProvider->ops.free(hProvider->provider_priv, ptr, size); checkErrorAndSetLastProvider(res, hProvider); return res; } diff --git a/src/memory_provider_get_last_failed.c b/src/memory_provider_get_last_failed.c index 9434eea976..09bd075e10 100644 --- a/src/memory_provider_get_last_failed.c +++ b/src/memory_provider_get_last_failed.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/src/memory_provider_internal.h b/src/memory_provider_internal.h index 49b2f2e531..dd1111a236 100644 --- a/src/memory_provider_internal.h +++ b/src/memory_provider_internal.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -18,9 +18,13 @@ extern "C" { #endif +typedef struct umf_memory_provider_t { + umf_memory_provider_ops_t ops; + void *provider_priv; +} umf_memory_provider_t; + void *umfMemoryProviderGetPriv(umf_memory_provider_handle_t hProvider); umf_memory_provider_handle_t *umfGetLastFailedMemoryProviderPtr(void); -bool umfIsFreeOpDefault(umf_memory_provider_handle_t hProvider); #ifdef __cplusplus } diff --git a/src/memspaces/memspace_numa.c b/src/memspaces/memspace_numa.c index 0028e394dc..83e65fc291 100644 --- a/src/memspaces/memspace_numa.c +++ b/src/memspaces/memspace_numa.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/src/memtarget.c b/src/memtarget.c index a897084608..8eb6e4e8cb 100644 --- a/src/memtarget.c +++ b/src/memtarget.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -15,6 +15,7 @@ #include "memtarget_internal.h" #include "memtarget_ops.h" #include "utils_concurrency.h" +#include "utils_log.h" umf_result_t umfMemtargetCreate(const umf_memtarget_ops_t *ops, void *params, umf_memtarget_handle_t *memoryTarget) { @@ -29,7 +30,11 @@ umf_result_t umfMemtargetCreate(const umf_memtarget_ops_t *ops, void *params, return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } - assert(ops->version == UMF_VERSION_CURRENT); + if (ops->version != UMF_MEMTARGET_OPS_VERSION_CURRENT) { + LOG_WARN("Memtarget ops version \"%d\" is different than the current " + "version \"%d\"", + ops->version, UMF_MEMTARGET_OPS_VERSION_CURRENT); + } target->ops = ops; diff --git a/src/memtarget_internal.h b/src/memtarget_internal.h index c5b9a61c5c..85ec99b8e6 100644 --- a/src/memtarget_internal.h +++ b/src/memtarget_internal.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -16,7 +16,6 @@ extern "C" { #endif -struct umf_memtarget_ops_t; typedef struct umf_memtarget_ops_t umf_memtarget_ops_t; typedef struct umf_memtarget_t { diff --git a/src/memtarget_ops.h b/src/memtarget_ops.h index 75e16447e3..4bd9bb8997 100644 --- a/src/memtarget_ops.h +++ b/src/memtarget_ops.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -18,9 +18,14 @@ extern "C" { #endif +// Version of the Memtarget ops structure. +// NOTE: This is equal to the latest UMF version, in which the ops structure +// has been modified. +#define UMF_MEMTARGET_OPS_VERSION_CURRENT UMF_MAKE_VERSION(0, 11) + typedef struct umf_memtarget_ops_t { /// Version of the ops structure. - /// Should be initialized using UMF_VERSION_CURRENT + /// Should be initialized using UMF_MEMTARGET_OPS_VERSION_CURRENT uint32_t version; umf_result_t (*initialize)(void *params, void **memoryTarget); diff --git a/src/memtargets/memtarget_numa.c b/src/memtargets/memtarget_numa.c index f32774ebbc..88d8ac2a42 100644 --- a/src/memtargets/memtarget_numa.c +++ b/src/memtargets/memtarget_numa.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -390,7 +390,7 @@ static umf_result_t numa_compare(void *memTarget, void *otherMemTarget, } struct umf_memtarget_ops_t UMF_MEMTARGET_NUMA_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_MEMTARGET_OPS_VERSION_CURRENT, .initialize = numa_initialize, .finalize = numa_finalize, .pool_create_from_memspace = numa_pool_create_from_memspace, diff --git a/src/memtargets/memtarget_numa.h b/src/memtargets/memtarget_numa.h index 2d3e3fd704..6659d045ef 100644 --- a/src/memtargets/memtarget_numa.h +++ b/src/memtargets/memtarget_numa.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/src/pool/CMakeLists.txt b/src/pool/CMakeLists.txt index bdd196b041..22aeab7833 100644 --- a/src/pool/CMakeLists.txt +++ b/src/pool/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2023 Intel Corporation +# Copyright (C) 2023-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -8,47 +8,3 @@ if(UMF_BUILD_SHARED_LIBRARY) endif() set(POOL_COMPILE_DEFINITIONS ${UMF_COMMON_COMPILE_DEFINITIONS}) - -# libumf_pool_disjoint -if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - add_umf_library( - NAME disjoint_pool - TYPE STATIC - SRCS pool_disjoint.cpp ${POOL_EXTRA_SRCS} - LIBS ${POOL_EXTRA_LIBS}) - - target_compile_definitions(disjoint_pool - PRIVATE ${POOL_COMPILE_DEFINITIONS}) - - if(WINDOWS) - target_compile_options(disjoint_pool PRIVATE /DWIN32_LEAN_AND_MEAN - /DNOMINMAX) - endif() - - add_library(${PROJECT_NAME}::disjoint_pool ALIAS disjoint_pool) - - add_dependencies(disjoint_pool umf) - - target_link_libraries(disjoint_pool PRIVATE umf) - - target_include_directories( - disjoint_pool - PUBLIC $ - $) - - install(TARGETS disjoint_pool EXPORT ${PROJECT_NAME}-targets) -endif() - -# libumf_pool_jemalloc -if(UMF_BUILD_LIBUMF_POOL_JEMALLOC) - add_umf_library( - NAME jemalloc_pool - TYPE STATIC - SRCS pool_jemalloc.c ${POOL_EXTRA_SRCS} - LIBS jemalloc ${POOL_EXTRA_LIBS}) - target_include_directories(jemalloc_pool PRIVATE ${JEMALLOC_INCLUDE_DIRS}) - target_compile_definitions(jemalloc_pool - PRIVATE ${POOL_COMPILE_DEFINITIONS}) - add_library(${PROJECT_NAME}::jemalloc_pool ALIAS jemalloc_pool) - install(TARGETS jemalloc_pool EXPORT ${PROJECT_NAME}-targets) -endif() diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c new file mode 100644 index 0000000000..ca4d2fab78 --- /dev/null +++ b/src/pool/pool_disjoint.c @@ -0,0 +1,1095 @@ +/* + * Copyright (C) 2022-2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "base_alloc_global.h" +#include "pool_disjoint_internal.h" +#include "provider/provider_tracking.h" +#include "uthash/utlist.h" +#include "utils_common.h" +#include "utils_concurrency.h" +#include "utils_log.h" +#include "utils_math.h" + +// Temporary solution for disabling memory poisoning. This is needed because +// AddressSanitizer does not support memory poisoning for GPU allocations. +// More info: https://github.com/oneapi-src/unified-memory-framework/issues/634 +#ifndef POISON_MEMORY +#undef __SANITIZE_ADDRESS__ +#endif +#include "utils_sanitizers.h" + +// Forward declarations +static void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool); +static bool bucket_can_pool(bucket_t *bucket); +static slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, + bool *from_pool); + +static __TLS umf_result_t TLS_last_allocation_error; + +// Allocations are a minimum of 4KB/64KB/2MB even when a smaller size is +// requested. The implementation distinguishes between allocations of size +// ChunkCutOff = (minimum-alloc-size / 2) and those that are larger. +// Allocation requests smaller than ChunkCutoff use chunks taken from a single +// coarse-grain allocation. Thus, for example, for a 64KB minimum allocation +// size, and 8-byte allocations, only 1 in ~8000 requests results in a new +// coarse-grain allocation. Freeing results only in a chunk of a larger +// allocation to be marked as available and no real return to the system. An +// allocation is returned to the system only when all chunks in the larger +// allocation are freed by the program. Allocations larger than ChunkCutOff use +// a separate coarse-grain allocation for each request. These are subject to +// "pooling". That is, when such an allocation is freed by the program it is +// retained in a pool. The pool is available for future allocations, which means +// there are fewer actual coarse-grain allocations/deallocations. + +// The largest size which is allocated via the allocator. +// Allocations with size > CutOff bypass the pool and +// go directly to the provider. +static const size_t CutOff = (size_t)1 << 31; // 2GB + +static size_t bucket_slab_min_size(bucket_t *bucket) { + return bucket->pool->params.slab_min_size; +} + +static size_t bucket_slab_alloc_size(bucket_t *bucket) { + return utils_max(bucket->size, bucket_slab_min_size(bucket)); +} + +static slab_t *create_slab(bucket_t *bucket) { + assert(bucket); + + umf_result_t res = UMF_RESULT_SUCCESS; + umf_memory_provider_handle_t provider = bucket->pool->provider; + + size_t num_chunks_total = + utils_max(bucket_slab_min_size(bucket) / bucket->size, 1); + + // Calculate the number of 64-bit words needed. + size_t num_words = + (num_chunks_total + CHUNK_BITMAP_SIZE - 1) / CHUNK_BITMAP_SIZE; + + slab_t *slab = umf_ba_global_alloc(sizeof(*slab) + + num_words * sizeof(slab->chunks[0])); + if (slab == NULL) { + LOG_ERR("allocation of new slab failed!"); + return NULL; + } + + slab->num_chunks_allocated = 0; + slab->bucket = bucket; + + slab->iter.val = slab; + slab->iter.prev = slab->iter.next = NULL; + + slab->num_chunks_total = num_chunks_total; + slab->num_words = num_words; + + // set all chunks as free + memset(slab->chunks, ~0, num_words * sizeof(slab->chunks[0])); + if (num_chunks_total % CHUNK_BITMAP_SIZE) { + // clear remaining bits + slab->chunks[num_words - 1] = + ((1ULL << (num_chunks_total % CHUNK_BITMAP_SIZE)) - 1); + } + + // if slab_min_size is not a multiple of bucket size, we would have some + // padding at the end of the slab + slab->slab_size = bucket_slab_alloc_size(bucket); + + // TODO not true + // NOTE: originally slabs memory were allocated without alignment + // with this registering a slab is simpler and doesn't require multimap + res = umfMemoryProviderAlloc(provider, slab->slab_size, 0, &slab->mem_ptr); + if (res != UMF_RESULT_SUCCESS) { + LOG_ERR("allocation of slab data failed!"); + goto free_slab; + } + + // raw allocation is not available for user so mark it as inaccessible + utils_annotate_memory_inaccessible(slab->mem_ptr, slab->slab_size); + + LOG_DEBUG("bucket: %p, slab_size: %zu", (void *)bucket, slab->slab_size); + return slab; + +free_slab: + umf_ba_global_free(slab); + return NULL; +} + +static void destroy_slab(slab_t *slab) { + LOG_DEBUG("bucket: %p, slab_size: %zu", (void *)slab->bucket, + slab->slab_size); + + umf_memory_provider_handle_t provider = slab->bucket->pool->provider; + umf_result_t res = + umfMemoryProviderFree(provider, slab->mem_ptr, slab->slab_size); + if (res != UMF_RESULT_SUCCESS) { + LOG_ERR("deallocation of slab data failed!"); + } + + umf_ba_global_free(slab); +} + +static size_t slab_find_first_available_chunk_idx(const slab_t *slab) { + for (size_t i = 0; i < slab->num_words; i++) { + // NOTE: free chunks are represented as set bits + uint64_t word = slab->chunks[i]; + if (word != 0) { + size_t bit_index = utils_lsb64(word); + size_t free_chunk = i * CHUNK_BITMAP_SIZE + bit_index; + return free_chunk; + } + } + + // No free chunk was found. + return SIZE_MAX; +} + +static void *slab_get_chunk(slab_t *slab) { + // free chunk must exist, otherwise we would have allocated another slab + const size_t chunk_idx = slab_find_first_available_chunk_idx(slab); + assert(chunk_idx != SIZE_MAX); + + void *free_chunk = + (void *)((uintptr_t)slab->mem_ptr + chunk_idx * slab->bucket->size); + + // mark chunk as used + slab_set_chunk_bit(slab, chunk_idx, false); + slab->num_chunks_allocated += 1; + + return free_chunk; +} + +static void *slab_get(const slab_t *slab) { return slab->mem_ptr; } +static void *slab_get_end(const slab_t *slab) { + return (void *)((uintptr_t)slab->mem_ptr + + bucket_slab_min_size(slab->bucket)); +} + +static void slab_free_chunk(slab_t *slab, void *ptr) { + // This method should be called through bucket (since we might remove the + // slab as a result), therefore all locks are done on bucket level. + + // Make sure that we're in the right slab + assert(ptr >= slab_get(slab) && ptr < slab_get_end(slab)); + + // Get the chunk index + uintptr_t ptr_diff = (uintptr_t)ptr - (uintptr_t)slab->mem_ptr; + assert((ptr_diff % slab->bucket->size) == 0); + size_t chunk_idx = ptr_diff / slab->bucket->size; + + // Make sure that the chunk was allocated + assert(slab_read_chunk_bit(slab, chunk_idx) == 0 && "double free detected"); + slab_set_chunk_bit(slab, chunk_idx, true); + slab->num_chunks_allocated -= 1; +} + +static bool slab_has_avail(const slab_t *slab) { + return slab->num_chunks_allocated < slab->num_chunks_total; +} + +static umf_result_t pool_register_slab(disjoint_pool_t *pool, slab_t *slab) { + critnib *slabs = pool->known_slabs; + + // NOTE: changed vs original DisjointPool implementation - currently slab + // is already aligned to bucket size. + void *slab_addr = slab_get(slab); + // TODO ASSERT_IS_ALIGNED((uintptr_t)slab_addr, bucket->size); + LOG_DEBUG("slab: %p, start: %p", (void *)slab, slab_addr); + + // NOTE: we don't need to lock the slabs map as the critnib already has a + // lock inside it + int ret = critnib_insert(slabs, (uintptr_t)slab_addr, slab, 0); + umf_result_t res = UMF_RESULT_SUCCESS; + if (ret == ENOMEM) { + LOG_ERR("register failed because of out of memory!"); + res = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } else if (ret == EEXIST) { + LOG_ERR("register failed because the address is already registered!"); + res = UMF_RESULT_ERROR_UNKNOWN; + } + + return res; +} + +static umf_result_t pool_unregister_slab(disjoint_pool_t *pool, slab_t *slab) { + critnib *slabs = pool->known_slabs; + + void *slab_addr = slab_get(slab); + // TODO ASSERT_IS_ALIGNED((uintptr_t)slab_addr, bucket->size); + LOG_DEBUG("slab: %p, start: %p", (void *)slab, slab_addr); + + critnib_remove(slabs, (uintptr_t)slab_addr); + + return UMF_RESULT_SUCCESS; +} + +static bucket_t * +create_bucket(size_t sz, disjoint_pool_t *pool, + umf_disjoint_pool_shared_limits_handle_t shared_limits) { + + bucket_t *bucket = umf_ba_global_alloc(sizeof(*bucket)); + if (bucket == NULL) { + LOG_ERR("allocation of new bucket failed!"); + return NULL; + } + + memset(bucket, 0, sizeof(*bucket)); + bucket->size = sz; + bucket->pool = pool; + bucket->shared_limits = shared_limits; + + utils_mutex_init(&bucket->bucket_lock); + return bucket; +} + +static void destroy_bucket(bucket_t *bucket) { + // use an extra tmp to store the next iterator before destroying the slab + slab_list_item_t *it = NULL, *tmp = NULL; + LL_FOREACH_SAFE(bucket->available_slabs, it, tmp) { + LL_DELETE(bucket->available_slabs, it); + destroy_slab(it->val); + } + + LL_FOREACH_SAFE(bucket->unavailable_slabs, it, tmp) { + LL_DELETE(bucket->unavailable_slabs, it); + destroy_slab(it->val); + } + + utils_mutex_destroy_not_free(&bucket->bucket_lock); + umf_ba_global_free(bucket); +} + +static size_t slab_get_num_free_chunks(const slab_t *slab) { + return slab->num_chunks_total - slab->num_chunks_allocated; +} + +// NOTE: this function must be called under bucket->bucket_lock +static void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab, + bool *to_pool) { + slab_free_chunk(slab, ptr); + + // in case if the slab was previously full and now has single available + // chunk, it should be moved to the list of available slabs + if (slab_get_num_free_chunks(slab) == 1) { + slab_list_item_t *slab_it = &slab->iter; + assert(slab_it->val != NULL); + DL_DELETE(bucket->unavailable_slabs, slab_it); + DL_PREPEND(bucket->available_slabs, slab_it); + bucket->available_slabs_num++; + } + + // check if slab is empty, and pool it if we can + if (slab->num_chunks_allocated == 0) { + // The slab is now empty. + // If the pool has capacity then put the slab in the pool. + // The to_pool parameter indicates whether the slab will be put in the + // pool or freed. + *to_pool = bucket_can_pool(bucket); + if (*to_pool == false) { + // remove slab + slab_list_item_t *slab_it = &slab->iter; + assert(slab_it->val != NULL); + pool_unregister_slab(bucket->pool, slab_it->val); + DL_DELETE(bucket->available_slabs, slab_it); + assert(bucket->available_slabs_num > 0); + bucket->available_slabs_num--; + destroy_slab(slab_it->val); + } + } else { + // return this chunk to the pool + *to_pool = true; + } +} + +// NOTE: this function must be called under bucket->bucket_lock +static void *bucket_get_free_chunk(bucket_t *bucket, bool *from_pool) { + slab_list_item_t *slab_it = bucket_get_avail_slab(bucket, from_pool); + if (slab_it == NULL) { + return NULL; + } + + void *free_chunk = slab_get_chunk(slab_it->val); + + // if we allocated last free chunk from the slab and now it is full, move + // it to unavailable slabs and update its iterator + if (!(slab_has_avail(slab_it->val))) { + DL_DELETE(bucket->available_slabs, slab_it); + bucket->available_slabs_num--; + slab_it->prev = NULL; + DL_PREPEND(bucket->unavailable_slabs, slab_it); + } + + return free_chunk; +} + +static size_t bucket_chunk_cut_off(bucket_t *bucket) { + return bucket_slab_min_size(bucket) / 2; +} + +static slab_t *bucket_create_slab(bucket_t *bucket) { + slab_t *slab = create_slab(bucket); + if (slab == NULL) { + LOG_ERR("create_slab failed!") + return NULL; + } + + umf_result_t res = pool_register_slab(bucket->pool, slab); + if (res != UMF_RESULT_SUCCESS) { + LOG_ERR("slab_reg failed!") + destroy_slab(slab); + return NULL; + } + + DL_PREPEND(bucket->available_slabs, &slab->iter); + bucket->available_slabs_num++; + bucket_update_stats(bucket, 1, 0); + + return slab; +} + +static slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, + bool *from_pool) { + if (bucket->available_slabs == NULL) { + bucket_create_slab(bucket); + *from_pool = false; + } else { + slab_t *slab = bucket->available_slabs->val; + // Allocation from existing slab is treated as from pool for statistics. + *from_pool = true; + if (slab->num_chunks_allocated == 0) { + assert(bucket->chunked_slabs_in_pool > 0); + // If this was an empty slab, it was in the pool. + // Now it is no longer in the pool, so update count. + --bucket->chunked_slabs_in_pool; + uint64_t size_to_sub = bucket_slab_alloc_size(bucket); + uint64_t old_size = utils_fetch_and_sub_u64( + &bucket->shared_limits->total_size, size_to_sub); + (void)old_size; + assert(old_size >= size_to_sub); + bucket_update_stats(bucket, 1, -1); + } + } + + return bucket->available_slabs; +} + +static size_t bucket_max_pooled_slabs(bucket_t *bucket) { + // For small buckets where slabs are split to chunks, just one pooled slab is sufficient. + // For larger buckets, the capacity could be more and is adjustable. + if (bucket->size <= bucket_chunk_cut_off(bucket)) { + return 1; + } else { + return bucket->pool->params.capacity; + } +} + +static void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool) { + if (bucket->pool->params.pool_trace == 0) { + return; + } + + bucket->curr_slabs_in_use += in_use; + bucket->max_slabs_in_use = + utils_max(bucket->curr_slabs_in_use, bucket->max_slabs_in_use); + + bucket->curr_slabs_in_pool += in_pool; + bucket->max_slabs_in_pool = + utils_max(bucket->curr_slabs_in_pool, bucket->max_slabs_in_pool); + + // Increment or decrement current pool sizes based on whether + // slab was added to or removed from pool. + bucket->pool->params.cur_pool_size += + in_pool * bucket_slab_alloc_size(bucket); +} + +static bool bucket_can_pool(bucket_t *bucket) { + size_t new_free_slabs_in_bucket; + + new_free_slabs_in_bucket = bucket->chunked_slabs_in_pool + 1; + + // we keep at most params.capacity slabs in the pool + if (bucket_max_pooled_slabs(bucket) >= new_free_slabs_in_bucket) { + + uint64_t size_to_add = bucket_slab_alloc_size(bucket); + size_t previous_size = utils_fetch_and_add_u64( + &bucket->shared_limits->total_size, size_to_add); + + if (previous_size + size_to_add <= bucket->shared_limits->max_size) { + ++bucket->chunked_slabs_in_pool; + bucket_update_stats(bucket, -1, 1); + return true; + } else { + uint64_t old = utils_fetch_and_sub_u64( + &bucket->shared_limits->total_size, size_to_add); + (void)old; + assert(old >= size_to_add); + } + } + + bucket_update_stats(bucket, -1, 0); + return false; +} + +static size_t size_to_idx(disjoint_pool_t *pool, size_t size) { + assert(size <= CutOff && "Unexpected size"); + assert(size > 0 && "Unexpected size"); + + size_t min_bucket_size = (size_t)1 << pool->min_bucket_size_exp; + if (size < min_bucket_size) { + return 0; + } + + // get the position of the leftmost set bit + size_t position = utils_msb64(size); + + bool is_power_of_2 = IS_POWER_OF_2(size); + bool larger_than_halfway_between_powers_of_2 = + !is_power_of_2 && + (bool)((size - 1) & ((uint64_t)(1) << (position - 1))); + size_t index = (position - pool->min_bucket_size_exp) * 2 + + (int)(!is_power_of_2) + + (int)larger_than_halfway_between_powers_of_2; + + return index; +} + +static umf_disjoint_pool_shared_limits_t * +disjoint_pool_get_limits(disjoint_pool_t *pool) { + if (pool->params.shared_limits) { + return pool->params.shared_limits; + } else { + return pool->default_shared_limits; + } +} + +static bucket_t *disjoint_pool_find_bucket(disjoint_pool_t *pool, size_t size) { + size_t calculated_idx = size_to_idx(pool, size); + return pool->buckets[calculated_idx]; +} + +static void disjoint_pool_print_stats(disjoint_pool_t *pool) { + size_t high_bucket_size = 0; + size_t high_peak_slabs_in_use = 0; + const char *name = pool->params.name; + + LOG_DEBUG("\"%s\" pool memory statistics", name); + LOG_DEBUG("%14s %12s %12s %18s %20s %21s", "Bucket Size", "Allocs", "Frees", + "Allocs from Pool", "Peak Slabs in Use", "Peak Slabs in Pool"); + + for (size_t i = 0; i < pool->buckets_num; i++) { + bucket_t *bucket = pool->buckets[i]; + // lock bucket before accessing its stats + utils_mutex_lock(&bucket->bucket_lock); + + if (bucket->alloc_count) { + LOG_DEBUG("%14zu %12zu %12zu %18zu %20zu %21zu", bucket->size, + bucket->alloc_count, bucket->free_count, + bucket->alloc_pool_count, bucket->max_slabs_in_use, + bucket->max_slabs_in_pool); + high_bucket_size = + utils_max(bucket_slab_alloc_size(bucket), high_bucket_size); + } + + high_peak_slabs_in_use = + utils_max(bucket->max_slabs_in_use, high_peak_slabs_in_use); + + utils_mutex_unlock(&bucket->bucket_lock); + } + + LOG_DEBUG("current pool size: %" PRIu64, + disjoint_pool_get_limits(pool)->total_size); + LOG_DEBUG("suggested setting=;%c%s:%zu,%zu,64K", (char)tolower(name[0]), + (name + 1), high_bucket_size, high_peak_slabs_in_use); +} + +static void *disjoint_pool_allocate(disjoint_pool_t *pool, size_t size) { + if (size == 0) { + return NULL; + } + + void *ptr = NULL; + + if (size > pool->params.max_poolable_size) { + umf_result_t ret = + umfMemoryProviderAlloc(pool->provider, size, 0, &ptr); + if (ret != UMF_RESULT_SUCCESS) { + TLS_last_allocation_error = ret; + LOG_ERR("allocation from the memory provider failed"); + return NULL; + } + + utils_annotate_memory_undefined(ptr, size); + return ptr; + } + + bucket_t *bucket = disjoint_pool_find_bucket(pool, size); + + utils_mutex_lock(&bucket->bucket_lock); + + bool from_pool = false; + ptr = bucket_get_free_chunk(bucket, &from_pool); + + if (ptr == NULL) { + TLS_last_allocation_error = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + utils_mutex_unlock(&bucket->bucket_lock); + return NULL; + } + + if (pool->params.pool_trace > 1) { + // update stats + ++bucket->alloc_count; + if (from_pool) { + ++bucket->alloc_pool_count; + } + } + + utils_mutex_unlock(&bucket->bucket_lock); + + if (pool->params.pool_trace > 2) { + LOG_DEBUG("Allocated %8zu %s bytes from %s -> %p", size, + pool->params.name, (from_pool ? "pool" : "provider"), ptr); + } + + VALGRIND_DO_MEMPOOL_ALLOC(pool, ptr, size); + utils_annotate_memory_undefined(ptr, bucket->size); + return ptr; +} + +umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider, + void *params, void **ppPool) { + // TODO set defaults when user pass the NULL as params + if (!provider || !params || !ppPool) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_disjoint_pool_params_t *dp_params = + (umf_disjoint_pool_params_t *)params; + + // min_bucket_size parameter must be a power of 2 for bucket sizes + // to generate correctly. + if (!dp_params->min_bucket_size || + !IS_POWER_OF_2(dp_params->min_bucket_size)) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + disjoint_pool_t *disjoint_pool = + umf_ba_global_alloc(sizeof(*disjoint_pool)); + if (disjoint_pool == NULL) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + VALGRIND_DO_CREATE_MEMPOOL(disjoint_pool, 0, 0); + + disjoint_pool->provider = provider; + disjoint_pool->params = *dp_params; + + disjoint_pool->known_slabs = critnib_new(); + if (disjoint_pool->known_slabs == NULL) { + goto err_free_disjoint_pool; + } + + // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff. + // Powers of 2 and the value halfway between the powers of 2. + size_t Size1 = disjoint_pool->params.min_bucket_size; + + // min_bucket_size cannot be larger than CutOff. + Size1 = utils_min(Size1, CutOff); + + // Buckets sized smaller than the bucket default size- 8 aren't needed. + Size1 = utils_max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE); + + // Calculate the exponent for min_bucket_size used for finding buckets. + disjoint_pool->min_bucket_size_exp = (size_t)utils_msb64(Size1); + disjoint_pool->default_shared_limits = + umfDisjointPoolSharedLimitsCreate(SIZE_MAX); + if (disjoint_pool->default_shared_limits == NULL) { + goto err_free_known_slabs; + } + + // count number of buckets, start from 1 + disjoint_pool->buckets_num = 1; + size_t Size2 = Size1 + Size1 / 2; + size_t ts2 = Size2, ts1 = Size1; + while (Size2 < CutOff) { + disjoint_pool->buckets_num += 2; + Size2 *= 2; + } + + disjoint_pool->buckets = umf_ba_global_alloc( + sizeof(*disjoint_pool->buckets) * disjoint_pool->buckets_num); + if (disjoint_pool->buckets == NULL) { + goto err_free_shared_limits; + } + + size_t i = 0; + Size1 = ts1; + Size2 = ts2; + for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2, i += 2) { + disjoint_pool->buckets[i] = create_bucket( + Size1, disjoint_pool, disjoint_pool_get_limits(disjoint_pool)); + disjoint_pool->buckets[i + 1] = create_bucket( + Size2, disjoint_pool, disjoint_pool_get_limits(disjoint_pool)); + } + disjoint_pool->buckets[i] = create_bucket( + CutOff, disjoint_pool, disjoint_pool_get_limits(disjoint_pool)); + + // check if all buckets were created successfully + for (i = 0; i < disjoint_pool->buckets_num; i++) { + if (disjoint_pool->buckets[i] == NULL) { + goto err_free_buckets; + } + } + + umf_result_t ret = umfMemoryProviderGetMinPageSize( + provider, NULL, &disjoint_pool->provider_min_page_size); + if (ret != UMF_RESULT_SUCCESS) { + disjoint_pool->provider_min_page_size = 0; + } + + *ppPool = (void *)disjoint_pool; + + return UMF_RESULT_SUCCESS; + +err_free_buckets: + for (i = 0; i < disjoint_pool->buckets_num; i++) { + if (disjoint_pool->buckets[i] != NULL) { + destroy_bucket(disjoint_pool->buckets[i]); + } + } + umf_ba_global_free(disjoint_pool->buckets); + +err_free_shared_limits: + umfDisjointPoolSharedLimitsDestroy(disjoint_pool->default_shared_limits); + +err_free_known_slabs: + critnib_delete(disjoint_pool->known_slabs); + +err_free_disjoint_pool: + umf_ba_global_free(disjoint_pool); + + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; +} + +void *disjoint_pool_malloc(void *pool, size_t size) { + disjoint_pool_t *hPool = (disjoint_pool_t *)pool; + void *ptr = disjoint_pool_allocate(hPool, size); + + return ptr; +} + +void *disjoint_pool_calloc(void *pool, size_t num, size_t size) { + (void)pool; + (void)num; + (void)size; + + // Not supported + TLS_last_allocation_error = UMF_RESULT_ERROR_NOT_SUPPORTED; + return NULL; +} + +void *disjoint_pool_realloc(void *pool, void *ptr, size_t size) { + (void)pool; + (void)ptr; + (void)size; + + // Not supported + TLS_last_allocation_error = UMF_RESULT_ERROR_NOT_SUPPORTED; + return NULL; +} + +void *disjoint_pool_aligned_malloc(void *pool, size_t size, size_t alignment) { + disjoint_pool_t *disjoint_pool = (disjoint_pool_t *)pool; + + void *ptr = NULL; + + if (size == 0) { + return NULL; + } + + if (alignment <= 1) { + return disjoint_pool_allocate(pool, size); + } + + size_t aligned_size; + if (alignment <= disjoint_pool->provider_min_page_size) { + // This allocation will be served from a Bucket which size is multiple + // of Alignment and Slab address is aligned to provider_min_page_size + // so the address will be properly aligned. + aligned_size = (size > 1) ? ALIGN_UP_SAFE(size, alignment) : alignment; + } else { + // Slabs are only aligned to provider_min_page_size, we need to compensate + // for that in case the allocation is within pooling limit. + // TODO: consider creating properly-aligned Slabs on demand + aligned_size = size + alignment - 1; + } + + // Check if requested allocation size is within pooling limit. + // If not, just request aligned pointer from the system. + if (aligned_size > disjoint_pool->params.max_poolable_size) { + + umf_result_t ret = umfMemoryProviderAlloc(disjoint_pool->provider, size, + alignment, &ptr); + if (ret != UMF_RESULT_SUCCESS) { + TLS_last_allocation_error = ret; + LOG_ERR("allocation from the memory provider failed"); + return NULL; + } + + assert(ptr); + utils_annotate_memory_undefined(ptr, size); + return ptr; + } + + bool from_pool = false; + bucket_t *bucket = disjoint_pool_find_bucket(pool, aligned_size); + + utils_mutex_lock(&bucket->bucket_lock); + + ptr = bucket_get_free_chunk(bucket, &from_pool); + + if (ptr == NULL) { + TLS_last_allocation_error = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + utils_mutex_unlock(&bucket->bucket_lock); + return NULL; + } + + if (disjoint_pool->params.pool_trace > 1) { + // update stats + ++bucket->alloc_count; + if (from_pool) { + ++bucket->alloc_pool_count; + } + } + + void *aligned_ptr = (void *)ALIGN_UP_SAFE((size_t)ptr, alignment); + size_t diff = (ptrdiff_t)aligned_ptr - (ptrdiff_t)ptr; + size_t real_size = bucket->size - diff; + VALGRIND_DO_MEMPOOL_ALLOC(disjoint_pool, aligned_ptr, real_size); + utils_annotate_memory_undefined(aligned_ptr, real_size); + + utils_mutex_unlock(&bucket->bucket_lock); + + if (disjoint_pool->params.pool_trace > 2) { + LOG_DEBUG("Allocated %8zu %s bytes aligned at %zu from %s -> %p", size, + disjoint_pool->params.name, alignment, + (from_pool ? "pool" : "provider"), ptr); + } + + return aligned_ptr; +} + +static size_t get_chunk_idx(void *ptr, slab_t *slab) { + return (((uintptr_t)ptr - (uintptr_t)slab->mem_ptr) / slab->bucket->size); +} + +static void *get_unaligned_ptr(size_t chunk_idx, slab_t *slab) { + return (void *)((uintptr_t)slab->mem_ptr + chunk_idx * slab->bucket->size); +} + +size_t disjoint_pool_malloc_usable_size(void *pool, void *ptr) { + disjoint_pool_t *disjoint_pool = (disjoint_pool_t *)pool; + if (ptr == NULL) { + return 0; + } + + // check if given pointer is allocated inside any Disjoint Pool slab + slab_t *slab = + (slab_t *)critnib_find_le(disjoint_pool->known_slabs, (uintptr_t)ptr); + if (slab == NULL || ptr >= slab_get_end(slab)) { + // memory comes directly from the provider + umf_alloc_info_t allocInfo = {NULL, 0, NULL}; + umf_result_t ret = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo); + if (ret != UMF_RESULT_SUCCESS) { + return 0; + } + + return allocInfo.baseSize; + } + // Get the unaligned pointer + // NOTE: the base pointer slab->mem_ptr needn't to be aligned to bucket size + size_t chunk_idx = get_chunk_idx(ptr, slab); + void *unaligned_ptr = get_unaligned_ptr(chunk_idx, slab); + + ptrdiff_t diff = (ptrdiff_t)ptr - (ptrdiff_t)unaligned_ptr; + + return slab->bucket->size - diff; +} + +umf_result_t disjoint_pool_free(void *pool, void *ptr) { + disjoint_pool_t *disjoint_pool = (disjoint_pool_t *)pool; + if (ptr == NULL) { + return UMF_RESULT_SUCCESS; + } + + // check if given pointer is allocated inside any Disjoint Pool slab + slab_t *slab = + (slab_t *)critnib_find_le(disjoint_pool->known_slabs, (uintptr_t)ptr); + + if (slab == NULL || ptr >= slab_get_end(slab)) { + + // regular free + umf_alloc_info_t allocInfo = {NULL, 0, NULL}; + umf_result_t ret = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo); + if (ret != UMF_RESULT_SUCCESS) { + TLS_last_allocation_error = ret; + LOG_ERR("failed to get allocation info from the memory tracker"); + return ret; + } + + size_t size = allocInfo.baseSize; + umf_memory_provider_handle_t provider = disjoint_pool->provider; + ret = umfMemoryProviderFree(provider, ptr, size); + if (ret != UMF_RESULT_SUCCESS) { + TLS_last_allocation_error = ret; + LOG_ERR("deallocation from the memory provider failed"); + } + + return ret; + } + + bool to_pool = false; + + if (ptr < slab_get(slab) || ptr >= slab_get_end(slab)) { + assert(0); + return UMF_RESULT_ERROR_UNKNOWN; + } + + // The slab object won't be deleted until it's removed from the map which is + // protected by the lock, so it's safe to access it here. + + bucket_t *bucket = slab->bucket; + + utils_mutex_lock(&bucket->bucket_lock); + VALGRIND_DO_MEMPOOL_FREE(pool, ptr); + + // Get the unaligned pointer + // NOTE: the base pointer slab->mem_ptr needn't to be aligned to bucket size + size_t chunk_idx = get_chunk_idx(ptr, slab); + void *unaligned_ptr = get_unaligned_ptr(chunk_idx, slab); + + utils_annotate_memory_inaccessible(unaligned_ptr, bucket->size); + bucket_free_chunk(bucket, unaligned_ptr, slab, &to_pool); + + if (disjoint_pool->params.pool_trace > 1) { + bucket->free_count++; + } + + utils_mutex_unlock(&bucket->bucket_lock); + + if (disjoint_pool->params.pool_trace > 2) { + const char *name = disjoint_pool->params.name; + LOG_DEBUG("freed %s %p to %s, current total pool size: %" PRIu64 + ", current " + "pool size for %s: %zu", + name, ptr, (to_pool ? "pool" : "provider"), + disjoint_pool_get_limits(disjoint_pool)->total_size, name, + disjoint_pool->params.cur_pool_size); + } + + return UMF_RESULT_SUCCESS; +} + +umf_result_t disjoint_pool_get_last_allocation_error(void *pool) { + (void)pool; + return TLS_last_allocation_error; +} + +// Define destructor for use with unique_ptr +void disjoint_pool_finalize(void *pool) { + disjoint_pool_t *hPool = (disjoint_pool_t *)pool; + + if (hPool->params.pool_trace > 1) { + disjoint_pool_print_stats(hPool); + } + + for (size_t i = 0; i < hPool->buckets_num; i++) { + destroy_bucket(hPool->buckets[i]); + } + + VALGRIND_DO_DESTROY_MEMPOOL(hPool); + + umfDisjointPoolSharedLimitsDestroy(hPool->default_shared_limits); + critnib_delete(hPool->known_slabs); + + umf_ba_global_free(hPool); +} + +static umf_memory_pool_ops_t UMF_DISJOINT_POOL_OPS = { + .version = UMF_VERSION_CURRENT, + .initialize = disjoint_pool_initialize, + .finalize = disjoint_pool_finalize, + .malloc = disjoint_pool_malloc, + .calloc = disjoint_pool_calloc, + .realloc = disjoint_pool_realloc, + .aligned_malloc = disjoint_pool_aligned_malloc, + .malloc_usable_size = disjoint_pool_malloc_usable_size, + .free = disjoint_pool_free, + .get_last_allocation_error = disjoint_pool_get_last_allocation_error, +}; + +umf_memory_pool_ops_t *umfDisjointPoolOps(void) { + return &UMF_DISJOINT_POOL_OPS; +} + +umf_disjoint_pool_shared_limits_t * +umfDisjointPoolSharedLimitsCreate(size_t max_size) { + umf_disjoint_pool_shared_limits_t *ptr = umf_ba_global_alloc(sizeof(*ptr)); + if (ptr == NULL) { + LOG_ERR("cannot allocate memory for disjoint pool shared limits"); + return NULL; + } + ptr->max_size = max_size; + ptr->total_size = 0; + return ptr; +} + +void umfDisjointPoolSharedLimitsDestroy( + umf_disjoint_pool_shared_limits_t *limits) { + umf_ba_global_free(limits); +} + +umf_result_t +umfDisjointPoolParamsCreate(umf_disjoint_pool_params_handle_t *hParams) { + static char *DEFAULT_NAME = "disjoint_pool"; + + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_disjoint_pool_params_handle_t params = + umf_ba_global_alloc(sizeof(*params)); + if (params == NULL) { + LOG_ERR("cannot allocate memory for disjoint pool params"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + *params = (umf_disjoint_pool_params_t){ + .slab_min_size = 0, + .max_poolable_size = 0, + .capacity = 0, + .min_bucket_size = UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE, + .cur_pool_size = 0, + .pool_trace = 0, + .shared_limits = NULL, + .name = {*DEFAULT_NAME}, + }; + + *hParams = params; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsDestroy(umf_disjoint_pool_params_handle_t hParams) { + // NOTE: dereferencing hParams when BA is already destroyed leads to crash + if (hParams && !umf_ba_global_is_destroyed()) { + umf_ba_global_free(hParams); + } + + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetSlabMinSize(umf_disjoint_pool_params_handle_t hParams, + size_t slabMinSize) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->slab_min_size = slabMinSize; + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfDisjointPoolParamsSetMaxPoolableSize( + umf_disjoint_pool_params_handle_t hParams, size_t maxPoolableSize) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->max_poolable_size = maxPoolableSize; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetCapacity(umf_disjoint_pool_params_handle_t hParams, + size_t maxCapacity) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->capacity = maxCapacity; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetMinBucketSize(umf_disjoint_pool_params_handle_t hParams, + size_t minBucketSize) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + // minBucketSize parameter must be a power of 2 and greater than 0. + if (minBucketSize == 0 || (minBucketSize & (minBucketSize - 1))) { + LOG_ERR("minBucketSize must be a power of 2 and greater than 0"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->min_bucket_size = minBucketSize; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetTrace(umf_disjoint_pool_params_handle_t hParams, + int poolTrace) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->pool_trace = poolTrace; + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfDisjointPoolParamsSetSharedLimits( + umf_disjoint_pool_params_handle_t hParams, + umf_disjoint_pool_shared_limits_handle_t hSharedLimits) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->shared_limits = hSharedLimits; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetName(umf_disjoint_pool_params_handle_t hParams, + const char *name) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + strncpy(hParams->name, name, sizeof(hParams->name) - 1); + return UMF_RESULT_SUCCESS; +} diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp deleted file mode 100644 index e0298b43df..0000000000 --- a/src/pool/pool_disjoint.cpp +++ /dev/null @@ -1,1313 +0,0 @@ -// Copyright (C) 2023 Intel Corporation -// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// TODO: replace with logger? -#include - -#include "provider/provider_tracking.h" - -#include "../cpp_helpers.hpp" -#include "pool_disjoint.h" -#include "umf.h" -#include "utils_log.h" -#include "utils_math.h" -#include "utils_sanitizers.h" - -// Temporary solution for disabling memory poisoning. This is needed because -// AddressSanitizer does not support memory poisoning for GPU allocations. -// More info: https://github.com/oneapi-src/unified-memory-framework/issues/634 -#ifndef POISON_MEMORY -#define POISON_MEMORY 0 -#endif - -static inline void annotate_memory_inaccessible([[maybe_unused]] void *ptr, - [[maybe_unused]] size_t size) { -#if (POISON_MEMORY != 0) - utils_annotate_memory_inaccessible(ptr, size); -#endif -} - -static inline void annotate_memory_undefined([[maybe_unused]] void *ptr, - [[maybe_unused]] size_t size) { -#if (POISON_MEMORY != 0) - utils_annotate_memory_undefined(ptr, size); -#endif -} - -typedef struct umf_disjoint_pool_shared_limits_t { - size_t MaxSize; - std::atomic TotalSize; -} umf_disjoint_pool_shared_limits_t; - -// Configuration of Disjoint Pool -typedef struct umf_disjoint_pool_params_t { - // Minimum allocation size that will be requested from the memory provider. - size_t SlabMinSize; - - // Allocations up to this limit will be subject to chunking/pooling - size_t MaxPoolableSize; - - // When pooling, each bucket will hold a max of 'Capacity' unfreed slabs - size_t Capacity; - - // Holds the minimum bucket size valid for allocation of a memory type. - // This value must be a power of 2. - size_t MinBucketSize; - - // Holds size of the pool managed by the allocator. - size_t CurPoolSize; - - // Whether to print pool usage statistics - int PoolTrace; - - // Memory limits that can be shared between multitple pool instances, - // i.e. if multiple pools use the same SharedLimits sum of those pools' - // sizes cannot exceed MaxSize. - umf_disjoint_pool_shared_limits_handle_t SharedLimits; - - // Name used in traces - char *Name; -} umf_disjoint_pool_params_t; - -class DisjointPool { - public: - class AllocImpl; - using Config = umf_disjoint_pool_params_t; - - umf_result_t initialize(umf_memory_provider_handle_t provider, - umf_disjoint_pool_params_handle_t parameters); - void *malloc(size_t size); - void *calloc(size_t, size_t); - void *realloc(void *, size_t); - void *aligned_malloc(size_t size, size_t alignment); - size_t malloc_usable_size(void *); - umf_result_t free(void *ptr); - umf_result_t get_last_allocation_error(); - - DisjointPool(); - ~DisjointPool(); - - private: - std::unique_ptr impl; -}; - -umf_disjoint_pool_shared_limits_t * -umfDisjointPoolSharedLimitsCreate(size_t MaxSize) { - return new umf_disjoint_pool_shared_limits_t{MaxSize, 0}; -} - -void umfDisjointPoolSharedLimitsDestroy( - umf_disjoint_pool_shared_limits_handle_t hSharedLimits) { - delete hSharedLimits; -} - -umf_result_t -umfDisjointPoolParamsCreate(umf_disjoint_pool_params_handle_t *hParams) { - static const char *DEFAULT_NAME = "disjoint_pool"; - - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - umf_disjoint_pool_params_handle_t params = new umf_disjoint_pool_params_t{}; - if (params == nullptr) { - LOG_ERR("cannot allocate memory for disjoint pool params"); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - params->SlabMinSize = 0; - params->MaxPoolableSize = 0; - params->Capacity = 0; - params->MinBucketSize = UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE; - params->CurPoolSize = 0; - params->PoolTrace = 0; - params->SharedLimits = nullptr; - params->Name = nullptr; - - umf_result_t ret = umfDisjointPoolParamsSetName(params, DEFAULT_NAME); - if (ret != UMF_RESULT_SUCCESS) { - delete params; - return ret; - } - - *hParams = params; - - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsDestroy(umf_disjoint_pool_params_handle_t hParams) { - if (hParams) { - delete[] hParams->Name; - delete hParams; - } - - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetSlabMinSize(umf_disjoint_pool_params_handle_t hParams, - size_t slabMinSize) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->SlabMinSize = slabMinSize; - return UMF_RESULT_SUCCESS; -} - -umf_result_t umfDisjointPoolParamsSetMaxPoolableSize( - umf_disjoint_pool_params_handle_t hParams, size_t maxPoolableSize) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->MaxPoolableSize = maxPoolableSize; - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetCapacity(umf_disjoint_pool_params_handle_t hParams, - size_t maxCapacity) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->Capacity = maxCapacity; - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetMinBucketSize(umf_disjoint_pool_params_handle_t hParams, - size_t minBucketSize) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - // minBucketSize parameter must be a power of 2 and greater than 0. - if (minBucketSize == 0 || (minBucketSize & (minBucketSize - 1))) { - LOG_ERR("minBucketSize must be a power of 2 and greater than 0"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->MinBucketSize = minBucketSize; - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetTrace(umf_disjoint_pool_params_handle_t hParams, - int poolTrace) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->PoolTrace = poolTrace; - return UMF_RESULT_SUCCESS; -} - -umf_result_t umfDisjointPoolParamsSetSharedLimits( - umf_disjoint_pool_params_handle_t hParams, - umf_disjoint_pool_shared_limits_handle_t hSharedLimits) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->SharedLimits = hSharedLimits; - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetName(umf_disjoint_pool_params_handle_t hParams, - const char *name) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - char *newName = new char[std::strlen(name) + 1]; - if (newName == nullptr) { - LOG_ERR("cannot allocate memory for disjoint pool name"); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - delete[] hParams->Name; - hParams->Name = newName; - std::strcpy(hParams->Name, name); - - return UMF_RESULT_SUCCESS; -} - -// Allocations are a minimum of 4KB/64KB/2MB even when a smaller size is -// requested. The implementation distinguishes between allocations of size -// ChunkCutOff = (minimum-alloc-size / 2) and those that are larger. -// Allocation requests smaller than ChunkCutoff use chunks taken from a single -// coarse-grain allocation. Thus, for example, for a 64KB minimum allocation -// size, and 8-byte allocations, only 1 in ~8000 requests results in a new -// coarse-grain allocation. Freeing results only in a chunk of a larger -// allocation to be marked as available and no real return to the system. An -// allocation is returned to the system only when all chunks in the larger -// allocation are freed by the program. Allocations larger than ChunkCutOff use -// a separate coarse-grain allocation for each request. These are subject to -// "pooling". That is, when such an allocation is freed by the program it is -// retained in a pool. The pool is available for future allocations, which means -// there are fewer actual coarse-grain allocations/deallocations. - -// The largest size which is allocated via the allocator. -// Allocations with size > CutOff bypass the pool and -// go directly to the provider. -static constexpr size_t CutOff = (size_t)1 << 31; // 2GB - -// Aligns the pointer down to the specified alignment -// (e.g. returns 8 for Size = 13, Alignment = 8) -static void *AlignPtrDown(void *Ptr, const size_t Alignment) { - return reinterpret_cast((reinterpret_cast(Ptr)) & - (~(Alignment - 1))); -} - -// Aligns the pointer up to the specified alignment -// (e.g. returns 16 for Size = 13, Alignment = 8) -static void *AlignPtrUp(void *Ptr, const size_t Alignment) { - void *AlignedPtr = AlignPtrDown(Ptr, Alignment); - // Special case when the pointer is already aligned - if (Ptr == AlignedPtr) { - return Ptr; - } - return static_cast(AlignedPtr) + Alignment; -} - -// Aligns the value up to the specified alignment -// (e.g. returns 16 for Size = 13, Alignment = 8) -static size_t AlignUp(size_t Val, size_t Alignment) { - assert(Alignment > 0); - return (Val + Alignment - 1) & (~(Alignment - 1)); -} - -typedef struct MemoryProviderError { - umf_result_t code; -} MemoryProviderError_t; - -class Bucket; - -// Represents the allocated memory block of size 'SlabMinSize' -// Internally, it splits the memory block into chunks. The number of -// chunks depends of the size of a Bucket which created the Slab. -// Note: Bucket's methods are responsible for thread safety of Slab access, -// so no locking happens here. -class Slab { - - // Pointer to the allocated memory of SlabMinSize bytes - void *MemPtr; - - // Represents the current state of each chunk: - // if the bit is set then the chunk is allocated - // the chunk is free for allocation otherwise - std::vector Chunks; - - // Total number of allocated chunks at the moment. - size_t NumAllocated = 0; - - // The bucket which the slab belongs to - Bucket &bucket; - - using ListIter = std::list>::iterator; - - // Store iterator to the corresponding node in avail/unavail list - // to achieve O(1) removal - ListIter SlabListIter; - - // Hints where to start search for free chunk in a slab - size_t FirstFreeChunkIdx = 0; - - // Return the index of the first available chunk, SIZE_MAX otherwise - size_t FindFirstAvailableChunkIdx() const; - - // Register/Unregister the slab in the global slab address map. - void regSlab(Slab &); - void unregSlab(Slab &); - static void regSlabByAddr(void *, Slab &); - static void unregSlabByAddr(void *, Slab &); - - public: - Slab(Bucket &); - ~Slab(); - - void setIterator(ListIter It) { SlabListIter = It; } - ListIter getIterator() const { return SlabListIter; } - - size_t getNumAllocated() const { return NumAllocated; } - - // Get pointer to allocation that is one piece of this slab. - void *getChunk(); - - // Get pointer to allocation that is this entire slab. - void *getSlab(); - - void *getPtr() const { return MemPtr; } - void *getEnd() const; - - size_t getChunkSize() const; - size_t getNumChunks() const { return Chunks.size(); } - - bool hasAvail(); - - Bucket &getBucket(); - const Bucket &getBucket() const; - - void freeChunk(void *Ptr); -}; - -class Bucket { - const size_t Size; - - // List of slabs which have at least 1 available chunk. - std::list> AvailableSlabs; - - // List of slabs with 0 available chunk. - std::list> UnavailableSlabs; - - // Protects the bucket and all the corresponding slabs - std::mutex BucketLock; - - // Reference to the allocator context, used access memory allocation - // routines, slab map and etc. - DisjointPool::AllocImpl &OwnAllocCtx; - - // For buckets used in chunked mode, a counter of slabs in the pool. - // For allocations that use an entire slab each, the entries in the Available - // list are entries in the pool.Each slab is available for a new - // allocation.The size of the Available list is the size of the pool. - // For allocations that use slabs in chunked mode, slabs will be in the - // Available list if any one or more of their chunks is free.The entire slab - // is not necessarily free, just some chunks in the slab are free. To - // implement pooling we will allow one slab in the Available list to be - // entirely empty. Normally such a slab would have been freed. But - // now we don't, and treat this slab as "in the pool". - // When a slab becomes entirely free we have to decide whether to return it - // to the provider or keep it allocated. A simple check for size of the - // Available list is not sufficient to check whether any slab has been - // pooled yet. We would have to traverse the entire Available list and check - // if any of them is entirely free. Instead we keep a counter of entirely - // empty slabs within the Available list to speed up the process of checking - // if a slab in this bucket is already pooled. - size_t chunkedSlabsInPool; - - // Statistics - size_t allocPoolCount; - size_t freeCount; - size_t currSlabsInUse; - size_t currSlabsInPool; - size_t maxSlabsInPool; - - public: - // Statistics - size_t allocCount; - size_t maxSlabsInUse; - - Bucket(size_t Sz, DisjointPool::AllocImpl &AllocCtx) - : Size{Sz}, OwnAllocCtx{AllocCtx}, chunkedSlabsInPool(0), - allocPoolCount(0), freeCount(0), currSlabsInUse(0), - currSlabsInPool(0), maxSlabsInPool(0), allocCount(0), - maxSlabsInUse(0) {} - - // Get pointer to allocation that is one piece of an available slab in this - // bucket. - void *getChunk(bool &FromPool); - - // Get pointer to allocation that is a full slab in this bucket. - void *getSlab(bool &FromPool); - - // Return the allocation size of this bucket. - size_t getSize() const { return Size; } - - // Free an allocation that is one piece of a slab in this bucket. - void freeChunk(void *Ptr, Slab &Slab, bool &ToPool); - - // Free an allocation that is a full slab in this bucket. - void freeSlab(Slab &Slab, bool &ToPool); - - umf_memory_provider_handle_t getMemHandle(); - - DisjointPool::AllocImpl &getAllocCtx() { return OwnAllocCtx; } - - // Check whether an allocation to be freed can be placed in the pool. - bool CanPool(bool &ToPool); - - // The minimum allocation size for any slab. - size_t SlabMinSize(); - - // The allocation size for a slab in this bucket. - size_t SlabAllocSize(); - - // The minimum size of a chunk from this bucket's slabs. - size_t ChunkCutOff(); - - // The number of slabs in this bucket that can be in the pool. - size_t Capacity(); - - // The maximum allocation size subject to pooling. - size_t MaxPoolableSize(); - - // Update allocation count - void countAlloc(bool FromPool); - - // Update free count - void countFree(); - - // Update statistics of Available/Unavailable - void updateStats(int InUse, int InPool); - - // Print bucket statistics - void printStats(bool &TitlePrinted, const std::string &Label); - - private: - void onFreeChunk(Slab &, bool &ToPool); - - // Update statistics of pool usage, and indicate that an allocation was made - // from the pool. - void decrementPool(bool &FromPool); - - // Get a slab to be used for chunked allocations. - decltype(AvailableSlabs.begin()) getAvailSlab(bool &FromPool); - - // Get a slab that will be used as a whole for a single allocation. - decltype(AvailableSlabs.begin()) getAvailFullSlab(bool &FromPool); -}; - -class DisjointPool::AllocImpl { - // It's important for the map to be destroyed last after buckets and their - // slabs This is because slab's destructor removes the object from the map. - std::unordered_multimap KnownSlabs; - std::shared_timed_mutex KnownSlabsMapLock; - - // Handle to the memory provider - umf_memory_provider_handle_t MemHandle; - - // Store as unique_ptrs since Bucket is not Movable(because of std::mutex) - std::vector> Buckets; - - // Configuration for this instance - umf_disjoint_pool_params_t params; - - umf_disjoint_pool_shared_limits_t DefaultSharedLimits = { - (std::numeric_limits::max)(), 0}; - - // Used in algorithm for finding buckets - std::size_t MinBucketSizeExp; - - // Coarse-grain allocation min alignment - size_t ProviderMinPageSize; - - public: - AllocImpl(umf_memory_provider_handle_t hProvider, - umf_disjoint_pool_params_handle_t params) - : MemHandle{hProvider}, params(*params) { - - VALGRIND_DO_CREATE_MEMPOOL(this, 0, 0); - - // deep copy of the Name - this->params.Name = new char[std::strlen(params->Name) + 1]; - std::strcpy(this->params.Name, params->Name); - - // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff. - // Powers of 2 and the value halfway between the powers of 2. - auto Size1 = this->params.MinBucketSize; - // MinBucketSize cannot be larger than CutOff. - Size1 = std::min(Size1, CutOff); - // Buckets sized smaller than the bucket default size- 8 aren't needed. - Size1 = std::max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE); - // Calculate the exponent for MinBucketSize used for finding buckets. - MinBucketSizeExp = (size_t)log2Utils(Size1); - auto Size2 = Size1 + Size1 / 2; - for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) { - Buckets.push_back(std::make_unique(Size1, *this)); - Buckets.push_back(std::make_unique(Size2, *this)); - } - Buckets.push_back(std::make_unique(CutOff, *this)); - - auto ret = umfMemoryProviderGetMinPageSize(hProvider, nullptr, - &ProviderMinPageSize); - if (ret != UMF_RESULT_SUCCESS) { - ProviderMinPageSize = 0; - } - } - - ~AllocImpl() { - VALGRIND_DO_DESTROY_MEMPOOL(this); - delete[] this->params.Name; - } - - void *allocate(size_t Size, size_t Alignment, bool &FromPool); - void *allocate(size_t Size, bool &FromPool); - void deallocate(void *Ptr, bool &ToPool); - - umf_memory_provider_handle_t getMemHandle() { return MemHandle; } - - std::shared_timed_mutex &getKnownSlabsMapLock() { - return KnownSlabsMapLock; - } - std::unordered_multimap &getKnownSlabs() { - return KnownSlabs; - } - - size_t SlabMinSize() { return params.SlabMinSize; }; - - umf_disjoint_pool_params_t &getParams() { return params; } - - umf_disjoint_pool_shared_limits_t *getLimits() { - if (params.SharedLimits) { - return params.SharedLimits; - } else { - return &DefaultSharedLimits; - } - }; - - void printStats(bool &TitlePrinted, size_t &HighBucketSize, - size_t &HighPeakSlabsInUse, const std::string &Label); - - private: - Bucket &findBucket(size_t Size); - std::size_t sizeToIdx(size_t Size); -}; - -static void *memoryProviderAlloc(umf_memory_provider_handle_t hProvider, - size_t size, size_t alignment = 0) { - void *ptr; - auto ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr); - if (ret != UMF_RESULT_SUCCESS) { - throw MemoryProviderError{ret}; - } - annotate_memory_inaccessible(ptr, size); - return ptr; -} - -static void memoryProviderFree(umf_memory_provider_handle_t hProvider, - void *ptr) { - size_t size = 0; - - if (ptr) { - umf_alloc_info_t allocInfo = {NULL, 0, NULL}; - umf_result_t umf_result = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo); - if (umf_result == UMF_RESULT_SUCCESS) { - size = allocInfo.baseSize; - } - } - - auto ret = umfMemoryProviderFree(hProvider, ptr, size); - if (ret != UMF_RESULT_SUCCESS) { - throw MemoryProviderError{ret}; - } -} - -bool operator==(const Slab &Lhs, const Slab &Rhs) { - return Lhs.getPtr() == Rhs.getPtr(); -} - -std::ostream &operator<<(std::ostream &Os, const Slab &Slab) { - Os << "Slab<" << Slab.getPtr() << ", " << Slab.getEnd() << ", " - << Slab.getBucket().getSize() << ">"; - return Os; -} - -Slab::Slab(Bucket &Bkt) - : // In case bucket size is not a multiple of SlabMinSize, we would have - // some padding at the end of the slab. - Chunks(Bkt.SlabMinSize() / Bkt.getSize()), NumAllocated{0}, - bucket(Bkt), SlabListIter{}, FirstFreeChunkIdx{0} { - auto SlabSize = Bkt.SlabAllocSize(); - MemPtr = memoryProviderAlloc(Bkt.getMemHandle(), SlabSize); - regSlab(*this); -} - -Slab::~Slab() { - try { - unregSlab(*this); - } catch (std::exception &e) { - LOG_ERR("DisjointPool: unexpected error: %s", e.what()); - } - - try { - memoryProviderFree(bucket.getMemHandle(), MemPtr); - } catch (MemoryProviderError &e) { - LOG_ERR("DisjointPool: error from memory provider: %d", e.code); - - if (e.code == UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC) { - const char *message = ""; - int error = 0; - - try { - umfMemoryProviderGetLastNativeError( - umfGetLastFailedMemoryProvider(), &message, &error); - LOG_ERR("Native error msg: %s, native error code: %d", message, - error); - } catch (...) { - // ignore any additional errors from logger - } - } - } -} - -// Return the index of the first available chunk, SIZE_MAX otherwise -size_t Slab::FindFirstAvailableChunkIdx() const { - // Use the first free chunk index as a hint for the search. - auto It = std::find_if(Chunks.begin() + FirstFreeChunkIdx, Chunks.end(), - [](auto x) { return !x; }); - if (It != Chunks.end()) { - return It - Chunks.begin(); - } - - return std::numeric_limits::max(); -} - -void *Slab::getChunk() { - // assert(NumAllocated != Chunks.size()); - - const size_t ChunkIdx = FindFirstAvailableChunkIdx(); - // Free chunk must exist, otherwise we would have allocated another slab - assert(ChunkIdx != (std::numeric_limits::max())); - - void *const FreeChunk = - (static_cast(getPtr())) + ChunkIdx * getChunkSize(); - Chunks[ChunkIdx] = true; - NumAllocated += 1; - - // Use the found index as the next hint - FirstFreeChunkIdx = ChunkIdx; - - return FreeChunk; -} - -void *Slab::getSlab() { return getPtr(); } - -Bucket &Slab::getBucket() { return bucket; } -const Bucket &Slab::getBucket() const { return bucket; } - -size_t Slab::getChunkSize() const { return bucket.getSize(); } - -void Slab::regSlabByAddr(void *Addr, Slab &Slab) { - auto &Lock = Slab.getBucket().getAllocCtx().getKnownSlabsMapLock(); - auto &Map = Slab.getBucket().getAllocCtx().getKnownSlabs(); - - std::lock_guard Lg(Lock); - Map.insert({Addr, Slab}); -} - -void Slab::unregSlabByAddr(void *Addr, Slab &Slab) { - auto &Lock = Slab.getBucket().getAllocCtx().getKnownSlabsMapLock(); - auto &Map = Slab.getBucket().getAllocCtx().getKnownSlabs(); - - std::lock_guard Lg(Lock); - - auto Slabs = Map.equal_range(Addr); - // At least the must get the current slab from the map. - assert(Slabs.first != Slabs.second && "Slab is not found"); - - for (auto It = Slabs.first; It != Slabs.second; ++It) { - if (It->second == Slab) { - Map.erase(It); - return; - } - } - - assert(false && "Slab is not found"); -} - -void Slab::regSlab(Slab &Slab) { - void *StartAddr = AlignPtrDown(Slab.getPtr(), bucket.SlabMinSize()); - void *EndAddr = static_cast(StartAddr) + bucket.SlabMinSize(); - - regSlabByAddr(StartAddr, Slab); - regSlabByAddr(EndAddr, Slab); -} - -void Slab::unregSlab(Slab &Slab) { - void *StartAddr = AlignPtrDown(Slab.getPtr(), bucket.SlabMinSize()); - void *EndAddr = static_cast(StartAddr) + bucket.SlabMinSize(); - - unregSlabByAddr(StartAddr, Slab); - unregSlabByAddr(EndAddr, Slab); -} - -void Slab::freeChunk(void *Ptr) { - // This method should be called through bucket(since we might remove the slab - // as a result), therefore all locks are done on that level. - - // Make sure that we're in the right slab - assert(Ptr >= getPtr() && Ptr < getEnd()); - - // Even if the pointer p was previously aligned, it's still inside the - // corresponding chunk, so we get the correct index here. - auto ChunkIdx = (static_cast(Ptr) - static_cast(MemPtr)) / - getChunkSize(); - - // Make sure that the chunk was allocated - assert(Chunks[ChunkIdx] && "double free detected"); - - Chunks[ChunkIdx] = false; - NumAllocated -= 1; - - if (ChunkIdx < FirstFreeChunkIdx) { - FirstFreeChunkIdx = ChunkIdx; - } -} - -void *Slab::getEnd() const { - return static_cast(getPtr()) + bucket.SlabMinSize(); -} - -bool Slab::hasAvail() { return NumAllocated != getNumChunks(); } - -// If a slab was available in the pool then note that the current pooled -// size has reduced by the size of a slab in this bucket. -void Bucket::decrementPool(bool &FromPool) { - FromPool = true; - updateStats(1, -1); - OwnAllocCtx.getLimits()->TotalSize -= SlabAllocSize(); -} - -auto Bucket::getAvailFullSlab(bool &FromPool) - -> decltype(AvailableSlabs.begin()) { - // Return a slab that will be used for a single allocation. - if (AvailableSlabs.size() == 0) { - auto It = AvailableSlabs.insert(AvailableSlabs.begin(), - std::make_unique(*this)); - (*It)->setIterator(It); - FromPool = false; - updateStats(1, 0); - } else { - decrementPool(FromPool); - } - - return AvailableSlabs.begin(); -} - -void *Bucket::getSlab(bool &FromPool) { - std::lock_guard Lg(BucketLock); - - auto SlabIt = getAvailFullSlab(FromPool); - auto *FreeSlab = (*SlabIt)->getSlab(); - auto It = - UnavailableSlabs.insert(UnavailableSlabs.begin(), std::move(*SlabIt)); - AvailableSlabs.erase(SlabIt); - (*It)->setIterator(It); - return FreeSlab; -} - -void Bucket::freeSlab(Slab &Slab, bool &ToPool) { - std::lock_guard Lg(BucketLock); - auto SlabIter = Slab.getIterator(); - assert(SlabIter != UnavailableSlabs.end()); - if (CanPool(ToPool)) { - auto It = - AvailableSlabs.insert(AvailableSlabs.begin(), std::move(*SlabIter)); - UnavailableSlabs.erase(SlabIter); - (*It)->setIterator(It); - } else { - UnavailableSlabs.erase(SlabIter); - } -} - -auto Bucket::getAvailSlab(bool &FromPool) -> decltype(AvailableSlabs.begin()) { - - if (AvailableSlabs.size() == 0) { - auto It = AvailableSlabs.insert(AvailableSlabs.begin(), - std::make_unique(*this)); - (*It)->setIterator(It); - - updateStats(1, 0); - FromPool = false; - } else { - if ((*(AvailableSlabs.begin()))->getNumAllocated() == 0) { - // If this was an empty slab, it was in the pool. - // Now it is no longer in the pool, so update count. - --chunkedSlabsInPool; - decrementPool(FromPool); - } else { - // Allocation from existing slab is treated as from pool for statistics. - FromPool = true; - } - } - - return AvailableSlabs.begin(); -} - -void *Bucket::getChunk(bool &FromPool) { - std::lock_guard Lg(BucketLock); - - auto SlabIt = getAvailSlab(FromPool); - auto *FreeChunk = (*SlabIt)->getChunk(); - - // If the slab is full, move it to unavailable slabs and update its iterator - if (!((*SlabIt)->hasAvail())) { - auto It = UnavailableSlabs.insert(UnavailableSlabs.begin(), - std::move(*SlabIt)); - AvailableSlabs.erase(SlabIt); - (*It)->setIterator(It); - } - - return FreeChunk; -} - -void Bucket::freeChunk(void *Ptr, Slab &Slab, bool &ToPool) { - std::lock_guard Lg(BucketLock); - - Slab.freeChunk(Ptr); - - onFreeChunk(Slab, ToPool); -} - -// The lock must be acquired before calling this method -void Bucket::onFreeChunk(Slab &Slab, bool &ToPool) { - ToPool = true; - - // In case if the slab was previously full and now has 1 available - // chunk, it should be moved to the list of available slabs - if (Slab.getNumAllocated() == (Slab.getNumChunks() - 1)) { - auto SlabIter = Slab.getIterator(); - assert(SlabIter != UnavailableSlabs.end()); - - auto It = - AvailableSlabs.insert(AvailableSlabs.begin(), std::move(*SlabIter)); - UnavailableSlabs.erase(SlabIter); - - (*It)->setIterator(It); - } - - // Check if slab is empty, and pool it if we can. - if (Slab.getNumAllocated() == 0) { - // The slab is now empty. - // If pool has capacity then put the slab in the pool. - // The ToPool parameter indicates whether the Slab will be put in the - // pool or freed. - if (!CanPool(ToPool)) { - // Note: since the slab is stored as unique_ptr, just remove it from - // the list to destroy the object. - auto It = Slab.getIterator(); - assert(It != AvailableSlabs.end()); - AvailableSlabs.erase(It); - } - } -} - -bool Bucket::CanPool(bool &ToPool) { - size_t NewFreeSlabsInBucket; - // Check if this bucket is used in chunked form or as full slabs. - bool chunkedBucket = getSize() <= ChunkCutOff(); - if (chunkedBucket) { - NewFreeSlabsInBucket = chunkedSlabsInPool + 1; - } else { - NewFreeSlabsInBucket = AvailableSlabs.size() + 1; - } - if (Capacity() >= NewFreeSlabsInBucket) { - size_t PoolSize = OwnAllocCtx.getLimits()->TotalSize; - while (true) { - size_t NewPoolSize = PoolSize + SlabAllocSize(); - - if (OwnAllocCtx.getLimits()->MaxSize < NewPoolSize) { - break; - } - - if (OwnAllocCtx.getLimits()->TotalSize.compare_exchange_strong( - PoolSize, NewPoolSize)) { - if (chunkedBucket) { - ++chunkedSlabsInPool; - } - - updateStats(-1, 1); - ToPool = true; - return true; - } - } - } - - updateStats(-1, 0); - ToPool = false; - return false; -} - -umf_memory_provider_handle_t Bucket::getMemHandle() { - return OwnAllocCtx.getMemHandle(); -} - -size_t Bucket::SlabMinSize() { return OwnAllocCtx.getParams().SlabMinSize; } - -size_t Bucket::SlabAllocSize() { return std::max(getSize(), SlabMinSize()); } - -size_t Bucket::Capacity() { - // For buckets used in chunked mode, just one slab in pool is sufficient. - // For larger buckets, the capacity could be more and is adjustable. - if (getSize() <= ChunkCutOff()) { - return 1; - } else { - return OwnAllocCtx.getParams().Capacity; - } -} - -size_t Bucket::MaxPoolableSize() { - return OwnAllocCtx.getParams().MaxPoolableSize; -} - -size_t Bucket::ChunkCutOff() { return SlabMinSize() / 2; } - -void Bucket::countAlloc(bool FromPool) { - ++allocCount; - if (FromPool) { - ++allocPoolCount; - } -} - -void Bucket::countFree() { ++freeCount; } - -void Bucket::updateStats(int InUse, int InPool) { - if (OwnAllocCtx.getParams().PoolTrace == 0) { - return; - } - currSlabsInUse += InUse; - maxSlabsInUse = std::max(currSlabsInUse, maxSlabsInUse); - currSlabsInPool += InPool; - maxSlabsInPool = std::max(currSlabsInPool, maxSlabsInPool); - // Increment or decrement current pool sizes based on whether - // slab was added to or removed from pool. - OwnAllocCtx.getParams().CurPoolSize += InPool * SlabAllocSize(); -} - -void Bucket::printStats(bool &TitlePrinted, const std::string &Label) { - if (allocCount) { - if (!TitlePrinted) { - std::cout << Label << " memory statistics\n"; - std::cout << std::setw(14) << "Bucket Size" << std::setw(12) - << "Allocs" << std::setw(12) << "Frees" << std::setw(18) - << "Allocs from Pool" << std::setw(20) - << "Peak Slabs in Use" << std::setw(21) - << "Peak Slabs in Pool" << std::endl; - TitlePrinted = true; - } - std::cout << std::setw(14) << getSize() << std::setw(12) << allocCount - << std::setw(12) << freeCount << std::setw(18) - << allocPoolCount << std::setw(20) << maxSlabsInUse - << std::setw(21) << maxSlabsInPool << std::endl; - } -} - -void *DisjointPool::AllocImpl::allocate(size_t Size, bool &FromPool) try { - void *Ptr; - - if (Size == 0) { - return nullptr; - } - - FromPool = false; - if (Size > getParams().MaxPoolableSize) { - Ptr = memoryProviderAlloc(getMemHandle(), Size); - annotate_memory_undefined(Ptr, Size); - return Ptr; - } - - auto &Bucket = findBucket(Size); - - if (Size > Bucket.ChunkCutOff()) { - Ptr = Bucket.getSlab(FromPool); - } else { - Ptr = Bucket.getChunk(FromPool); - } - - if (getParams().PoolTrace > 1) { - Bucket.countAlloc(FromPool); - } - - VALGRIND_DO_MEMPOOL_ALLOC(this, Ptr, Size); - annotate_memory_undefined(Ptr, Bucket.getSize()); - - return Ptr; -} catch (MemoryProviderError &e) { - umf::getPoolLastStatusRef() = e.code; - return nullptr; -} - -void *DisjointPool::AllocImpl::allocate(size_t Size, size_t Alignment, - bool &FromPool) try { - void *Ptr; - - if (Size == 0) { - return nullptr; - } - - if (Alignment <= 1) { - return allocate(Size, FromPool); - } - - size_t AlignedSize; - if (Alignment <= ProviderMinPageSize) { - // This allocation will be served from a Bucket which size is multiple - // of Alignment and Slab address is aligned to ProviderMinPageSize - // so the address will be properly aligned. - AlignedSize = (Size > 1) ? AlignUp(Size, Alignment) : Alignment; - } else { - // Slabs are only aligned to ProviderMinPageSize, we need to compensate - // for that in case the allocation is within pooling limit. - // TODO: consider creating properly-aligned Slabs on demand - AlignedSize = Size + Alignment - 1; - } - - // Check if requested allocation size is within pooling limit. - // If not, just request aligned pointer from the system. - FromPool = false; - if (AlignedSize > getParams().MaxPoolableSize) { - Ptr = memoryProviderAlloc(getMemHandle(), Size, Alignment); - annotate_memory_undefined(Ptr, Size); - return Ptr; - } - - auto &Bucket = findBucket(AlignedSize); - - if (AlignedSize > Bucket.ChunkCutOff()) { - Ptr = Bucket.getSlab(FromPool); - } else { - Ptr = Bucket.getChunk(FromPool); - } - - if (getParams().PoolTrace > 1) { - Bucket.countAlloc(FromPool); - } - - VALGRIND_DO_MEMPOOL_ALLOC(this, AlignPtrUp(Ptr, Alignment), Size); - annotate_memory_undefined(AlignPtrUp(Ptr, Alignment), Size); - return AlignPtrUp(Ptr, Alignment); -} catch (MemoryProviderError &e) { - umf::getPoolLastStatusRef() = e.code; - return nullptr; -} - -std::size_t DisjointPool::AllocImpl::sizeToIdx(size_t Size) { - assert(Size <= CutOff && "Unexpected size"); - assert(Size > 0 && "Unexpected size"); - - size_t MinBucketSize = (size_t)1 << MinBucketSizeExp; - if (Size < MinBucketSize) { - return 0; - } - - // Get the position of the leftmost set bit. - size_t position = getLeftmostSetBitPos(Size); - - auto isPowerOf2 = 0 == (Size & (Size - 1)); - auto largerThanHalfwayBetweenPowersOf2 = - !isPowerOf2 && bool((Size - 1) & (uint64_t(1) << (position - 1))); - auto index = (position - MinBucketSizeExp) * 2 + (int)(!isPowerOf2) + - (int)largerThanHalfwayBetweenPowersOf2; - - return index; -} - -Bucket &DisjointPool::AllocImpl::findBucket(size_t Size) { - auto calculatedIdx = sizeToIdx(Size); - assert((*(Buckets[calculatedIdx])).getSize() >= Size); - if (calculatedIdx > 0) { - assert((*(Buckets[calculatedIdx - 1])).getSize() < Size); - } - - return *(Buckets[calculatedIdx]); -} - -void DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) { - auto *SlabPtr = AlignPtrDown(Ptr, SlabMinSize()); - - // Lock the map on read - std::shared_lock Lk(getKnownSlabsMapLock()); - - ToPool = false; - auto Slabs = getKnownSlabs().equal_range(SlabPtr); - if (Slabs.first == Slabs.second) { - Lk.unlock(); - memoryProviderFree(getMemHandle(), Ptr); - return; - } - - for (auto It = Slabs.first; It != Slabs.second; ++It) { - // The slab object won't be deleted until it's removed from the map which is - // protected by the lock, so it's safe to access it here. - auto &Slab = It->second; - if (Ptr >= Slab.getPtr() && Ptr < Slab.getEnd()) { - // Unlock the map before freeing the chunk, it may be locked on write - // there - Lk.unlock(); - auto &Bucket = Slab.getBucket(); - - if (getParams().PoolTrace > 1) { - Bucket.countFree(); - } - - VALGRIND_DO_MEMPOOL_FREE(this, Ptr); - annotate_memory_inaccessible(Ptr, Bucket.getSize()); - if (Bucket.getSize() <= Bucket.ChunkCutOff()) { - Bucket.freeChunk(Ptr, Slab, ToPool); - } else { - Bucket.freeSlab(Slab, ToPool); - } - - return; - } - } - - Lk.unlock(); - // There is a rare case when we have a pointer from system allocation next - // to some slab with an entry in the map. So we find a slab - // but the range checks fail. - memoryProviderFree(getMemHandle(), Ptr); -} - -void DisjointPool::AllocImpl::printStats(bool &TitlePrinted, - size_t &HighBucketSize, - size_t &HighPeakSlabsInUse, - const std::string &MTName) { - HighBucketSize = 0; - HighPeakSlabsInUse = 0; - for (auto &B : Buckets) { - (*B).printStats(TitlePrinted, MTName); - HighPeakSlabsInUse = std::max((*B).maxSlabsInUse, HighPeakSlabsInUse); - if ((*B).allocCount) { - HighBucketSize = std::max((*B).SlabAllocSize(), HighBucketSize); - } - } -} - -umf_result_t -DisjointPool::initialize(umf_memory_provider_handle_t provider, - umf_disjoint_pool_params_handle_t parameters) { - if (!provider) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - // MinBucketSize parameter must be a power of 2 for bucket sizes - // to generate correctly. - if (!parameters->MinBucketSize || - !((parameters->MinBucketSize & (parameters->MinBucketSize - 1)) == 0)) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - impl = std::make_unique(provider, parameters); - return UMF_RESULT_SUCCESS; -} - -void *DisjointPool::malloc(size_t size) { // For full-slab allocations indicates - // whether slab is from Pool. - bool FromPool; - auto Ptr = impl->allocate(size, FromPool); - - if (impl->getParams().PoolTrace > 2) { - auto MT = impl->getParams().Name; - std::cout << "Allocated " << std::setw(8) << size << " " << MT - << " bytes from " << (FromPool ? "Pool" : "Provider") << " ->" - << Ptr << std::endl; - } - return Ptr; -} - -void *DisjointPool::calloc(size_t, size_t) { - // Not supported - umf::getPoolLastStatusRef() = UMF_RESULT_ERROR_NOT_SUPPORTED; - return NULL; -} - -void *DisjointPool::realloc(void *, size_t) { - // Not supported - umf::getPoolLastStatusRef() = UMF_RESULT_ERROR_NOT_SUPPORTED; - return NULL; -} - -void *DisjointPool::aligned_malloc(size_t size, size_t alignment) { - bool FromPool; - auto Ptr = impl->allocate(size, alignment, FromPool); - - if (impl->getParams().PoolTrace > 2) { - auto MT = impl->getParams().Name; - std::cout << "Allocated " << std::setw(8) << size << " " << MT - << " bytes aligned at " << alignment << " from " - << (FromPool ? "Pool" : "Provider") << " ->" << Ptr - << std::endl; - } - return Ptr; -} - -size_t DisjointPool::malloc_usable_size(void *) { - // Not supported - return 0; -} - -umf_result_t DisjointPool::free(void *ptr) try { - bool ToPool; - impl->deallocate(ptr, ToPool); - - if (impl->getParams().PoolTrace > 2) { - auto MT = impl->getParams().Name; - std::cout << "Freed " << MT << " " << ptr << " to " - << (ToPool ? "Pool" : "Provider") - << ", Current total pool size " - << impl->getLimits()->TotalSize.load() - << ", Current pool size for " << MT << " " - << impl->getParams().CurPoolSize << "\n"; - } - return UMF_RESULT_SUCCESS; -} catch (MemoryProviderError &e) { - return e.code; -} - -umf_result_t DisjointPool::get_last_allocation_error() { - return umf::getPoolLastStatusRef(); -} - -DisjointPool::DisjointPool() {} - -// Define destructor for use with unique_ptr -DisjointPool::~DisjointPool() { - bool TitlePrinted = false; - size_t HighBucketSize; - size_t HighPeakSlabsInUse; - if (impl->getParams().PoolTrace > 1) { - auto name = impl->getParams().Name; - try { // cannot throw in destructor - impl->printStats(TitlePrinted, HighBucketSize, HighPeakSlabsInUse, - name); - if (TitlePrinted) { - std::cout << "Current Pool Size " - << impl->getLimits()->TotalSize.load() << std::endl; - std::cout << "Suggested Setting=;" - << std::string(1, (char)tolower(name[0])) - << std::string(name + 1) << ":" << HighBucketSize - << "," << HighPeakSlabsInUse << ",64K" << std::endl; - } - } catch (...) { // ignore exceptions - } - } -} - -static umf_memory_pool_ops_t UMF_DISJOINT_POOL_OPS = - umf::poolMakeCOps(); - -umf_memory_pool_ops_t *umfDisjointPoolOps(void) { - return &UMF_DISJOINT_POOL_OPS; -} diff --git a/src/pool/pool_disjoint_internal.h b/src/pool/pool_disjoint_internal.h new file mode 100644 index 0000000000..7a63dd72ac --- /dev/null +++ b/src/pool/pool_disjoint_internal.h @@ -0,0 +1,184 @@ +/* + * Copyright (C) 2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef UMF_POOL_DISJOINT_INTERNAL_H +#define UMF_POOL_DISJOINT_INTERNAL_H 1 + +#include + +#include + +#include "critnib/critnib.h" +#include "utils_concurrency.h" + +#define CHUNK_BITMAP_SIZE 64 + +typedef struct bucket_t bucket_t; +typedef struct slab_t slab_t; +typedef struct slab_list_item_t slab_list_item_t; +typedef struct disjoint_pool_t disjoint_pool_t; + +typedef struct bucket_t { + size_t size; + + // Linked list of slabs which have at least 1 available chunk. + // We always count available slabs as an optimization. + slab_list_item_t *available_slabs; + size_t available_slabs_num; + + // Linked list of slabs with 0 available chunks + slab_list_item_t *unavailable_slabs; + + // Protects the bucket and all the corresponding slabs + utils_mutex_t bucket_lock; + + // Reference to the allocator context, used to access memory allocation + // routines, slab map and etc. + disjoint_pool_t *pool; + + umf_disjoint_pool_shared_limits_handle_t shared_limits; + + // For buckets used in chunked mode, a counter of slabs in the pool. + // For allocations that use an entire slab each, the entries in the + // "available" list are entries in the pool. Each slab is available for a + // new allocation. The size of the available list is the size of the pool. + // + // For allocations that use slabs in chunked mode, slabs will be in the + // "available" list if any one or more of their chunks are free. The entire + // slab is not necessarily free, just some chunks in the slab are free. To + // implement pooling, we will allow one slab in the "available" list to be + // entirely empty, and treat this slab as "in the pool". + // When a slab becomes entirely free, we must decide whether to return it + // to the provider or keep it allocated. We keep a counter of entirely + // empty slabs within the "available" list to speed up the process of + // checking if a slab in this bucket is already pooled. + size_t chunked_slabs_in_pool; + + // Statistics + size_t alloc_count; + size_t alloc_pool_count; + size_t free_count; + size_t curr_slabs_in_use; + size_t curr_slabs_in_pool; + size_t max_slabs_in_pool; + size_t max_slabs_in_use; +} bucket_t; + +typedef struct slab_list_item_t { + slab_t *val; + struct slab_list_item_t *prev, *next; +} slab_list_item_t; + +// Represents the allocated memory block of size 'slab_min_size' +// Internally, it splits the memory block into chunks. The number of +// chunks depends on the size of a Bucket which created the Slab. +// Note: Bucket's methods are responsible for thread safety of Slab access, +// so no locking happens here. +typedef struct slab_t { + // Pointer to the allocated memory of slab_min_size bytes + void *mem_ptr; + size_t slab_size; + + size_t num_chunks_total; + + // Num of 64-bit words needed to store chunk state + size_t num_words; + + // Total number of allocated chunks at the moment. + size_t num_chunks_allocated; + + // The bucket which the slab belongs to + bucket_t *bucket; + + // Store iterator to the corresponding node in avail/unavail list + // to achieve O(1) removal + slab_list_item_t iter; + + // Represents the current state of each chunk: if the bit is clear, the + // chunk is allocated; otherwise, the chunk is free for allocation + uint64_t chunks[]; +} slab_t; + +typedef struct umf_disjoint_pool_shared_limits_t { + size_t max_size; + uint64_t total_size; // requires atomic access +} umf_disjoint_pool_shared_limits_t; + +typedef struct umf_disjoint_pool_params_t { + // Minimum allocation size that will be requested from the memory provider. + size_t slab_min_size; + + // Allocations up to this limit will be subject to chunking/pooling + size_t max_poolable_size; + + // When pooling, each bucket will hold a max of 'capacity' unfreed slabs + size_t capacity; + + // Holds the minimum bucket size valid for allocation of a memory type. + // This value must be a power of 2. + size_t min_bucket_size; + + // Holds size of the pool managed by the allocator. + size_t cur_pool_size; + + // Whether to print pool usage statistics + int pool_trace; + + // Memory limits that can be shared between multiple pool instances, + // i.e. if multiple pools use the same shared_limits sum of those pools' + // sizes cannot exceed max_size. + umf_disjoint_pool_shared_limits_handle_t shared_limits; + + // Name used in traces + char name[64]; +} umf_disjoint_pool_params_t; + +typedef struct disjoint_pool_t { + // Keep the list of known slabs to quickly find required one during the + // free() + critnib *known_slabs; // (void *, slab_t *) + + // Handle to the memory provider + umf_memory_provider_handle_t provider; + + // Array of bucket_t* + bucket_t **buckets; + size_t buckets_num; + + // Configuration for this instance + umf_disjoint_pool_params_t params; + + umf_disjoint_pool_shared_limits_handle_t default_shared_limits; + + // Used in algorithm for finding buckets + size_t min_bucket_size_exp; + + // Coarse-grain allocation min alignment + size_t provider_min_page_size; +} disjoint_pool_t; + +static inline void slab_set_chunk_bit(slab_t *slab, size_t index, bool value) { + assert(index < slab->num_chunks_total && "Index out of range"); + + size_t word_index = index / CHUNK_BITMAP_SIZE; + unsigned bit_index = index % CHUNK_BITMAP_SIZE; + if (value) { + slab->chunks[word_index] |= (1ULL << bit_index); + } else { + slab->chunks[word_index] &= ~(1ULL << bit_index); + } +} + +static inline int slab_read_chunk_bit(const slab_t *slab, size_t index) { + assert(index < slab->num_chunks_total && "Index out of range"); + + size_t word_index = index / CHUNK_BITMAP_SIZE; + unsigned bit_index = index % CHUNK_BITMAP_SIZE; + return (slab->chunks[word_index] >> bit_index) & 1; +} + +#endif // UMF_POOL_DISJOINT_INTERNAL_H diff --git a/src/pool/pool_jemalloc.c b/src/pool/pool_jemalloc.c index 3ec7c78050..10e00dea51 100644 --- a/src/pool/pool_jemalloc.c +++ b/src/pool/pool_jemalloc.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022-2024 Intel Corporation + * Copyright (C) 2022-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -20,33 +20,21 @@ #include #include -#include +#ifndef UMF_POOL_JEMALLOC_ENABLED -// The Windows version of jemalloc uses API with je_ prefix, -// while the Linux one does not. -#ifndef _WIN32 -#define je_mallocx mallocx -#define je_dallocx dallocx -#define je_rallocx rallocx -#define je_mallctl mallctl -#define je_malloc_usable_size malloc_usable_size -#endif +umf_memory_pool_ops_t *umfJemallocPoolOps(void) { return NULL; } + +#else + +#include #define MALLOCX_ARENA_MAX (MALLCTL_ARENAS_ALL - 1) typedef struct jemalloc_memory_pool_t { umf_memory_provider_handle_t provider; unsigned int arena_index; // index of jemalloc arena - // set to true if umfMemoryProviderFree() should never be called - bool disable_provider_free; } jemalloc_memory_pool_t; -// Configuration of Jemalloc Pool -typedef struct umf_jemalloc_pool_params_t { - /// Set to true if umfMemoryProviderFree() should never be called. - bool disable_provider_free; -} umf_jemalloc_pool_params_t; - static __TLS umf_result_t TLS_last_allocation_error; static jemalloc_memory_pool_t *pool_by_arena_index[MALLCTL_ARENAS_ALL]; @@ -59,52 +47,6 @@ static jemalloc_memory_pool_t *get_pool_by_arena_index(unsigned arena_ind) { return pool_by_arena_index[arena_ind]; } -umf_result_t -umfJemallocPoolParamsCreate(umf_jemalloc_pool_params_handle_t *hParams) { - if (!hParams) { - LOG_ERR("jemalloc pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - umf_jemalloc_pool_params_t *params_data = - umf_ba_global_alloc(sizeof(*params_data)); - if (!params_data) { - LOG_ERR("cannot allocate memory for jemalloc poolparams"); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - params_data->disable_provider_free = false; - - *hParams = (umf_jemalloc_pool_params_handle_t)params_data; - - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfJemallocPoolParamsDestroy(umf_jemalloc_pool_params_handle_t hParams) { - if (!hParams) { - LOG_ERR("jemalloc pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - umf_ba_global_free(hParams); - - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfJemallocPoolParamsSetKeepAllMemory(umf_jemalloc_pool_params_handle_t hParams, - bool keepAllMemory) { - if (!hParams) { - LOG_ERR("jemalloc pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->disable_provider_free = keepAllMemory; - - return UMF_RESULT_SUCCESS; -} - // arena_extent_alloc - an extent allocation function conforms to the extent_alloc_t type and upon // success returns a pointer to size bytes of mapped memory on behalf of arena arena_ind such that // the extent's base address is a multiple of alignment, as well as setting *zero to indicate @@ -134,9 +76,7 @@ static void *arena_extent_alloc(extent_hooks_t *extent_hooks, void *new_addr, } if (new_addr != NULL && ptr != new_addr) { - if (!pool->disable_provider_free) { - umfMemoryProviderFree(pool->provider, ptr, size); - } + umfMemoryProviderFree(pool->provider, ptr, size); return NULL; } @@ -170,10 +110,6 @@ static void arena_extent_destroy(extent_hooks_t *extent_hooks, void *addr, jemalloc_memory_pool_t *pool = get_pool_by_arena_index(arena_ind); - if (pool->disable_provider_free) { - return; - } - umf_result_t ret; ret = umfMemoryProviderFree(pool->provider, addr, size); if (ret != UMF_RESULT_SUCCESS) { @@ -196,10 +132,6 @@ static bool arena_extent_dalloc(extent_hooks_t *extent_hooks, void *addr, jemalloc_memory_pool_t *pool = get_pool_by_arena_index(arena_ind); - if (pool->disable_provider_free) { - return true; // opt-out from deallocation - } - umf_result_t ret; ret = umfMemoryProviderFree(pool->provider, addr, size); if (ret != UMF_RESULT_SUCCESS) { @@ -450,12 +382,10 @@ static void *op_aligned_alloc(void *pool, size_t size, size_t alignment) { static umf_result_t op_initialize(umf_memory_provider_handle_t provider, void *params, void **out_pool) { + (void)params; // unused assert(provider); assert(out_pool); - umf_jemalloc_pool_params_handle_t je_params = - (umf_jemalloc_pool_params_handle_t)params; - extent_hooks_t *pHooks = &arena_extent_hooks; size_t unsigned_size = sizeof(unsigned); int err; @@ -468,12 +398,6 @@ static umf_result_t op_initialize(umf_memory_provider_handle_t provider, pool->provider = provider; - if (je_params) { - pool->disable_provider_free = je_params->disable_provider_free; - } else { - pool->disable_provider_free = false; - } - unsigned arena_index; err = je_mallctl("arenas.create", (void *)&arena_index, &unsigned_size, NULL, 0); @@ -488,7 +412,7 @@ static umf_result_t op_initialize(umf_memory_provider_handle_t provider, err = je_mallctl(cmd, NULL, NULL, (void *)&pHooks, sizeof(void *)); if (err) { snprintf(cmd, sizeof(cmd), "arena.%u.destroy", arena_index); - je_mallctl(cmd, NULL, 0, NULL, 0); + (void)je_mallctl(cmd, NULL, 0, NULL, 0); LOG_ERR("Could not setup extent_hooks for newly created arena."); goto err_free_pool; } @@ -512,7 +436,7 @@ static void op_finalize(void *pool) { jemalloc_memory_pool_t *je_pool = (jemalloc_memory_pool_t *)pool; char cmd[64]; snprintf(cmd, sizeof(cmd), "arena.%u.destroy", je_pool->arena_index); - je_mallctl(cmd, NULL, 0, NULL, 0); + (void)je_mallctl(cmd, NULL, 0, NULL, 0); pool_by_arena_index[je_pool->arena_index] = NULL; umf_ba_global_free(je_pool); @@ -530,7 +454,7 @@ static umf_result_t op_get_last_allocation_error(void *pool) { } static umf_memory_pool_ops_t UMF_JEMALLOC_POOL_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_POOL_OPS_VERSION_CURRENT, .initialize = op_initialize, .finalize = op_finalize, .malloc = op_malloc, @@ -545,3 +469,4 @@ static umf_memory_pool_ops_t UMF_JEMALLOC_POOL_OPS = { umf_memory_pool_ops_t *umfJemallocPoolOps(void) { return &UMF_JEMALLOC_POOL_OPS; } +#endif /* UMF_POOL_JEMALLOC_ENABLED */ diff --git a/src/pool/pool_proxy.c b/src/pool/pool_proxy.c index 2269d9344b..eedddb0acb 100644 --- a/src/pool/pool_proxy.c +++ b/src/pool/pool_proxy.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -123,7 +123,7 @@ static umf_result_t proxy_get_last_allocation_error(void *pool) { } static umf_memory_pool_ops_t UMF_PROXY_POOL_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_POOL_OPS_VERSION_CURRENT, .initialize = proxy_pool_initialize, .finalize = proxy_pool_finalize, .malloc = proxy_malloc, diff --git a/src/pool/pool_scalable.c b/src/pool/pool_scalable.c index 6ee364344e..8a9fd88c11 100644 --- a/src/pool/pool_scalable.c +++ b/src/pool/pool_scalable.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -20,6 +20,7 @@ #include "base_alloc_global.h" #include "libumf.h" +#include "pool_scalable_internal.h" #include "utils_common.h" #include "utils_concurrency.h" #include "utils_load_library.h" @@ -33,6 +34,7 @@ static __TLS umf_result_t TLS_last_allocation_error; static __TLS umf_result_t TLS_last_free_error; static const size_t DEFAULT_GRANULARITY = 2 * 1024 * 1024; // 2MB + typedef struct tbb_mem_pool_policy_t { raw_alloc_tbb_type pAlloc; raw_free_tbb_type pFree; @@ -66,7 +68,6 @@ typedef struct tbb_callbacks_t { typedef struct tbb_memory_pool_t { umf_memory_provider_handle_t mem_provider; void *tbb_pool; - tbb_callbacks_t tbb_callbacks; } tbb_memory_pool_t; typedef enum tbb_enums_t { @@ -82,6 +83,10 @@ typedef enum tbb_enums_t { TBB_POOL_SYMBOLS_MAX // it has to be the last one } tbb_enums_t; +static UTIL_ONCE_FLAG tbb_initialized = UTIL_ONCE_FLAG_INIT; +static int tbb_init_result = 0; +static tbb_callbacks_t tbb_callbacks = {0}; + static const char *tbb_symbol[TBB_POOL_SYMBOLS_MAX] = { #ifdef _WIN32 // symbols copied from oneTBB/src/tbbmalloc/def/win64-tbbmalloc.def @@ -109,46 +114,60 @@ static const char *tbb_symbol[TBB_POOL_SYMBOLS_MAX] = { #endif }; -static int init_tbb_callbacks(tbb_callbacks_t *tbb_callbacks) { - assert(tbb_callbacks); - +static void init_tbb_callbacks_once(void) { const char *lib_name = tbb_symbol[TBB_LIB_NAME]; - tbb_callbacks->lib_handle = utils_open_library(lib_name, 0); - if (!tbb_callbacks->lib_handle) { + tbb_callbacks.lib_handle = utils_open_library(lib_name, 0); + if (!tbb_callbacks.lib_handle) { LOG_ERR("%s required by Scalable Pool not found - install TBB malloc " "or make sure it is in the default search paths.", lib_name); - return -1; + tbb_init_result = -1; + return; } - - *(void **)&tbb_callbacks->pool_malloc = utils_get_symbol_addr( - tbb_callbacks->lib_handle, tbb_symbol[TBB_POOL_MALLOC], lib_name); - *(void **)&tbb_callbacks->pool_realloc = utils_get_symbol_addr( - tbb_callbacks->lib_handle, tbb_symbol[TBB_POOL_REALLOC], lib_name); - *(void **)&tbb_callbacks->pool_aligned_malloc = - utils_get_symbol_addr(tbb_callbacks->lib_handle, + *(void **)&tbb_callbacks.pool_malloc = utils_get_symbol_addr( + tbb_callbacks.lib_handle, tbb_symbol[TBB_POOL_MALLOC], lib_name); + *(void **)&tbb_callbacks.pool_realloc = utils_get_symbol_addr( + tbb_callbacks.lib_handle, tbb_symbol[TBB_POOL_REALLOC], lib_name); + *(void **)&tbb_callbacks.pool_aligned_malloc = + utils_get_symbol_addr(tbb_callbacks.lib_handle, tbb_symbol[TBB_POOL_ALIGNED_MALLOC], lib_name); - *(void **)&tbb_callbacks->pool_free = utils_get_symbol_addr( - tbb_callbacks->lib_handle, tbb_symbol[TBB_POOL_FREE], lib_name); - *(void **)&tbb_callbacks->pool_create_v1 = utils_get_symbol_addr( - tbb_callbacks->lib_handle, tbb_symbol[TBB_POOL_CREATE_V1], lib_name); - *(void **)&tbb_callbacks->pool_destroy = utils_get_symbol_addr( - tbb_callbacks->lib_handle, tbb_symbol[TBB_POOL_DESTROY], lib_name); - *(void **)&tbb_callbacks->pool_identify = utils_get_symbol_addr( - tbb_callbacks->lib_handle, tbb_symbol[TBB_POOL_IDENTIFY], lib_name); - *(void **)&tbb_callbacks->pool_msize = utils_get_symbol_addr( - tbb_callbacks->lib_handle, tbb_symbol[TBB_POOL_MSIZE], lib_name); - - if (!tbb_callbacks->pool_malloc || !tbb_callbacks->pool_realloc || - !tbb_callbacks->pool_aligned_malloc || !tbb_callbacks->pool_free || - !tbb_callbacks->pool_create_v1 || !tbb_callbacks->pool_destroy || - !tbb_callbacks->pool_identify) { - LOG_ERR("Could not find symbols in %s", lib_name); - utils_close_library(tbb_callbacks->lib_handle); - return -1; + *(void **)&tbb_callbacks.pool_free = utils_get_symbol_addr( + tbb_callbacks.lib_handle, tbb_symbol[TBB_POOL_FREE], lib_name); + *(void **)&tbb_callbacks.pool_create_v1 = utils_get_symbol_addr( + tbb_callbacks.lib_handle, tbb_symbol[TBB_POOL_CREATE_V1], lib_name); + *(void **)&tbb_callbacks.pool_destroy = utils_get_symbol_addr( + tbb_callbacks.lib_handle, tbb_symbol[TBB_POOL_DESTROY], lib_name); + *(void **)&tbb_callbacks.pool_identify = utils_get_symbol_addr( + tbb_callbacks.lib_handle, tbb_symbol[TBB_POOL_IDENTIFY], lib_name); + *(void **)&tbb_callbacks.pool_msize = utils_get_symbol_addr( + tbb_callbacks.lib_handle, tbb_symbol[TBB_POOL_MSIZE], lib_name); + + if (!tbb_callbacks.pool_malloc || !tbb_callbacks.pool_realloc || + !tbb_callbacks.pool_aligned_malloc || !tbb_callbacks.pool_free || + !tbb_callbacks.pool_create_v1 || !tbb_callbacks.pool_destroy || + !tbb_callbacks.pool_identify) { + LOG_FATAL("Could not find all TBB symbols in %s", lib_name); + if (utils_close_library(tbb_callbacks.lib_handle)) { + LOG_ERR("Could not close %s library", lib_name); + } + tbb_init_result = -1; } +} - return 0; +static int init_tbb_callbacks(void) { + utils_init_once(&tbb_initialized, init_tbb_callbacks_once); + return tbb_init_result; +} + +void fini_tbb_global_state(void) { + if (tbb_callbacks.lib_handle) { + if (!utils_close_library(tbb_callbacks.lib_handle)) { + tbb_callbacks.lib_handle = NULL; + LOG_DEBUG("TBB library closed"); + } else { + LOG_ERR("TBB library cannot be unloaded"); + } + } } static void *tbb_raw_alloc_wrapper(intptr_t pool_id, size_t *raw_bytes) { @@ -264,35 +283,41 @@ static umf_result_t tbb_pool_initialize(umf_memory_provider_handle_t provider, return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } - int ret = init_tbb_callbacks(&pool_data->tbb_callbacks); + umf_result_t res = UMF_RESULT_SUCCESS; + int ret = init_tbb_callbacks(); if (ret != 0) { - LOG_ERR("loading TBB symbols failed"); - return UMF_RESULT_ERROR_UNKNOWN; + LOG_FATAL("loading TBB symbols failed"); + res = UMF_RESULT_ERROR_DEPENDENCY_UNAVAILABLE; + goto err_tbb_init; } pool_data->mem_provider = provider; - ret = pool_data->tbb_callbacks.pool_create_v1((intptr_t)pool_data, &policy, - &(pool_data->tbb_pool)); + ret = tbb_callbacks.pool_create_v1((intptr_t)pool_data, &policy, + &(pool_data->tbb_pool)); if (ret != 0 /* TBBMALLOC_OK */) { - return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + res = UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + goto err_tbb_init; } *pool = (void *)pool_data; - return UMF_RESULT_SUCCESS; + return res; + +err_tbb_init: + umf_ba_global_free(pool_data); + return res; } static void tbb_pool_finalize(void *pool) { tbb_memory_pool_t *pool_data = (tbb_memory_pool_t *)pool; - pool_data->tbb_callbacks.pool_destroy(pool_data->tbb_pool); - utils_close_library(pool_data->tbb_callbacks.lib_handle); + tbb_callbacks.pool_destroy(pool_data->tbb_pool); umf_ba_global_free(pool_data); } static void *tbb_malloc(void *pool, size_t size) { tbb_memory_pool_t *pool_data = (tbb_memory_pool_t *)pool; TLS_last_allocation_error = UMF_RESULT_SUCCESS; - void *ptr = pool_data->tbb_callbacks.pool_malloc(pool_data->tbb_pool, size); + void *ptr = tbb_callbacks.pool_malloc(pool_data->tbb_pool, size); if (ptr == NULL) { if (TLS_last_allocation_error == UMF_RESULT_SUCCESS) { TLS_last_allocation_error = UMF_RESULT_ERROR_UNKNOWN; @@ -319,8 +344,7 @@ static void *tbb_calloc(void *pool, size_t num, size_t size) { static void *tbb_realloc(void *pool, void *ptr, size_t size) { tbb_memory_pool_t *pool_data = (tbb_memory_pool_t *)pool; TLS_last_allocation_error = UMF_RESULT_SUCCESS; - void *new_ptr = - pool_data->tbb_callbacks.pool_realloc(pool_data->tbb_pool, ptr, size); + void *new_ptr = tbb_callbacks.pool_realloc(pool_data->tbb_pool, ptr, size); if (new_ptr == NULL) { if (TLS_last_allocation_error == UMF_RESULT_SUCCESS) { TLS_last_allocation_error = UMF_RESULT_ERROR_UNKNOWN; @@ -334,8 +358,8 @@ static void *tbb_realloc(void *pool, void *ptr, size_t size) { static void *tbb_aligned_malloc(void *pool, size_t size, size_t alignment) { tbb_memory_pool_t *pool_data = (tbb_memory_pool_t *)pool; TLS_last_allocation_error = UMF_RESULT_SUCCESS; - void *ptr = pool_data->tbb_callbacks.pool_aligned_malloc( - pool_data->tbb_pool, size, alignment); + void *ptr = + tbb_callbacks.pool_aligned_malloc(pool_data->tbb_pool, size, alignment); if (ptr == NULL) { if (TLS_last_allocation_error == UMF_RESULT_SUCCESS) { TLS_last_allocation_error = UMF_RESULT_ERROR_UNKNOWN; @@ -360,7 +384,7 @@ static umf_result_t tbb_free(void *pool, void *ptr) { utils_annotate_release(pool); tbb_memory_pool_t *pool_data = (tbb_memory_pool_t *)pool; - if (pool_data->tbb_callbacks.pool_free(pool_data->tbb_pool, ptr)) { + if (tbb_callbacks.pool_free(pool_data->tbb_pool, ptr)) { return UMF_RESULT_SUCCESS; } @@ -373,7 +397,7 @@ static umf_result_t tbb_free(void *pool, void *ptr) { static size_t tbb_malloc_usable_size(void *pool, void *ptr) { tbb_memory_pool_t *pool_data = (tbb_memory_pool_t *)pool; - return pool_data->tbb_callbacks.pool_msize(pool_data->tbb_pool, ptr); + return tbb_callbacks.pool_msize(pool_data->tbb_pool, ptr); } static umf_result_t tbb_get_last_allocation_error(void *pool) { @@ -382,7 +406,7 @@ static umf_result_t tbb_get_last_allocation_error(void *pool) { } static umf_memory_pool_ops_t UMF_SCALABLE_POOL_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_POOL_OPS_VERSION_CURRENT, .initialize = tbb_pool_initialize, .finalize = tbb_pool_finalize, .malloc = tbb_malloc, diff --git a/src/pool/pool_scalable_internal.h b/src/pool/pool_scalable_internal.h new file mode 100644 index 0000000000..cfdc668fc6 --- /dev/null +++ b/src/pool/pool_scalable_internal.h @@ -0,0 +1,10 @@ +/* + * + * Copyright (C) 2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +void fini_tbb_global_state(void); diff --git a/src/provider/provider_coarse.c b/src/provider/provider_coarse.c deleted file mode 100644 index c3027b91d7..0000000000 --- a/src/provider/provider_coarse.c +++ /dev/null @@ -1,1707 +0,0 @@ -/* - * Copyright (C) 2023-2024 Intel Corporation - * - * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -*/ - -#include -#include -#include -#include -#include -#include - -#include - -#include "base_alloc_global.h" -#include "memory_provider_internal.h" -#include "ravl.h" -#include "utils_common.h" -#include "utils_concurrency.h" -#include "utils_log.h" - -#define COARSE_BASE_NAME "coarse" - -#define IS_ORIGIN_OF_BLOCK(origin, block) \ - (((uintptr_t)(block)->data >= (uintptr_t)(origin)->data) && \ - ((uintptr_t)(block)->data + (block)->size <= \ - (uintptr_t)(origin)->data + (origin)->size)) - -typedef struct coarse_memory_provider_t { - umf_memory_provider_handle_t upstream_memory_provider; - - // destroy upstream_memory_provider in finalize() - bool destroy_upstream_memory_provider; - - // memory allocation strategy - coarse_memory_provider_strategy_t allocation_strategy; - - void *init_buffer; - - size_t used_size; - size_t alloc_size; - - // upstream_blocks - tree of all blocks allocated from the upstream provider - struct ravl *upstream_blocks; - - // all_blocks - tree of all blocks - sorted by an address of data - struct ravl *all_blocks; - - // free_blocks - tree of free blocks - sorted by a size of data, - // each node contains a pointer (ravl_free_blocks_head_t) - // to the head of the list of free blocks of the same size - struct ravl *free_blocks; - - struct utils_mutex_t lock; - - // Name of the provider with the upstream provider: - // "coarse ()" - // for example: "coarse (L0)" - char *name; - - // Set to true if the free() operation of the upstream memory provider is not supported - // (i.e. if (umfMemoryProviderFree(upstream_memory_provider, NULL, 0) == UMF_RESULT_ERROR_NOT_SUPPORTED) - bool disable_upstream_provider_free; -} coarse_memory_provider_t; - -typedef struct ravl_node ravl_node_t; - -typedef enum check_free_blocks_t { - CHECK_ONLY_THE_FIRST_BLOCK = 0, - CHECK_ALL_BLOCKS_OF_SIZE, -} check_free_blocks_t; - -typedef struct block_t { - size_t size; - unsigned char *data; - bool used; - - // Node in the list of free blocks of the same size pointing to this block. - // The list is located in the (coarse_provider->free_blocks) RAVL tree. - struct ravl_free_blocks_elem_t *free_list_ptr; -} block_t; - -// A general node in a RAVL tree. -// 1) coarse_provider->all_blocks RAVL tree (tree of all blocks - sorted by an address of data): -// key - pointer (block_t->data) to the beginning of the block data -// value - pointer (block_t) to the block of the allocation -// 2) coarse_provider->free_blocks RAVL tree (tree of free blocks - sorted by a size of data): -// key - size of the allocation (block_t->size) -// value - pointer (ravl_free_blocks_head_t) to the head of the list of free blocks of the same size -typedef struct ravl_data_t { - uintptr_t key; - void *value; -} ravl_data_t; - -// The head of the list of free blocks of the same size. -typedef struct ravl_free_blocks_head_t { - struct ravl_free_blocks_elem_t *head; -} ravl_free_blocks_head_t; - -// The node of the list of free blocks of the same size -typedef struct ravl_free_blocks_elem_t { - struct block_t *block; - struct ravl_free_blocks_elem_t *next; - struct ravl_free_blocks_elem_t *prev; -} ravl_free_blocks_elem_t; - -// The compare function of a RAVL tree -static int coarse_ravl_comp(const void *lhs, const void *rhs) { - const ravl_data_t *lhs_ravl = (const ravl_data_t *)lhs; - const ravl_data_t *rhs_ravl = (const ravl_data_t *)rhs; - - if (lhs_ravl->key < rhs_ravl->key) { - return -1; - } - - if (lhs_ravl->key > rhs_ravl->key) { - return 1; - } - - // lhs_ravl->key == rhs_ravl->key - return 0; -} - -static inline block_t *get_node_block(ravl_node_t *node) { - ravl_data_t *node_data = ravl_data(node); - assert(node_data); - assert(node_data->value); - return node_data->value; -} - -static inline ravl_node_t *get_node_prev(ravl_node_t *node) { - return ravl_node_predecessor(node); -} - -static inline ravl_node_t *get_node_next(ravl_node_t *node) { - return ravl_node_successor(node); -} - -#ifndef NDEBUG -static block_t *get_block_prev(ravl_node_t *node) { - ravl_node_t *ravl_prev = ravl_node_predecessor(node); - if (!ravl_prev) { - return NULL; - } - - return get_node_block(ravl_prev); -} - -static block_t *get_block_next(ravl_node_t *node) { - ravl_node_t *ravl_next = ravl_node_successor(node); - if (!ravl_next) { - return NULL; - } - - return get_node_block(ravl_next); -} -#endif /* NDEBUG */ - -static bool is_same_origin(struct ravl *upstream_blocks, block_t *block1, - block_t *block2) { - ravl_data_t rdata1 = {(uintptr_t)block1->data, NULL}; - ravl_node_t *ravl_origin1 = - ravl_find(upstream_blocks, &rdata1, RAVL_PREDICATE_LESS_EQUAL); - assert(ravl_origin1); - - block_t *origin1 = get_node_block(ravl_origin1); - assert(IS_ORIGIN_OF_BLOCK(origin1, block1)); - - return (IS_ORIGIN_OF_BLOCK(origin1, block2)); -} - -// The functions "coarse_ravl_*" handle lists of blocks: -// - coarse_provider->all_blocks and coarse_provider->upstream_blocks -// sorted by a pointer (block_t->data) to the beginning of the block data. -// -// coarse_ravl_add_new - allocate and add a new block to the tree -// and link this block to the next and the previous one. -static block_t *coarse_ravl_add_new(struct ravl *rtree, unsigned char *data, - size_t size, ravl_node_t **node) { - assert(rtree); - assert(data); - assert(size); - - // TODO add valgrind annotations - block_t *block = umf_ba_global_alloc(sizeof(*block)); - if (block == NULL) { - return NULL; - } - - block->data = data; - block->size = size; - block->free_list_ptr = NULL; - - ravl_data_t rdata = {(uintptr_t)block->data, block}; - assert(NULL == ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL)); - int ret = ravl_emplace_copy(rtree, &rdata); - if (ret) { - umf_ba_global_free(block); - return NULL; - } - - ravl_node_t *new_node = ravl_find(rtree, &rdata, RAVL_PREDICATE_EQUAL); - assert(NULL != new_node); - - if (node) { - *node = new_node; - } - - return block; -} - -// coarse_ravl_find_node - find the node in the tree -static ravl_node_t *coarse_ravl_find_node(struct ravl *rtree, void *ptr) { - ravl_data_t data = {(uintptr_t)ptr, NULL}; - return ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL); -} - -// coarse_ravl_rm - remove the block from the tree -static block_t *coarse_ravl_rm(struct ravl *rtree, void *ptr) { - ravl_data_t data = {(uintptr_t)ptr, NULL}; - ravl_node_t *node; - node = ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL); - if (node) { - ravl_data_t *node_data = ravl_data(node); - assert(node_data); - block_t *block = node_data->value; - assert(block); - ravl_remove(rtree, node); - assert(NULL == ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL)); - return block; - } - return NULL; -} - -// The functions "node_list_*" handle lists of free blocks of the same size. -// The heads (ravl_free_blocks_head_t) of those lists are stored in nodes of -// the coarse_provider->free_blocks RAVL tree. -// -// node_list_add - add a free block to the list of free blocks of the same size -static ravl_free_blocks_elem_t * -node_list_add(ravl_free_blocks_head_t *head_node, struct block_t *block) { - assert(head_node); - assert(block); - - ravl_free_blocks_elem_t *node = umf_ba_global_alloc(sizeof(*node)); - if (node == NULL) { - return NULL; - } - - if (head_node->head) { - head_node->head->prev = node; - } - - node->block = block; - node->next = head_node->head; - node->prev = NULL; - head_node->head = node; - - return node; -} - -// node_list_rm - remove the given free block from the list of free blocks of the same size -static block_t *node_list_rm(ravl_free_blocks_head_t *head_node, - ravl_free_blocks_elem_t *node) { - assert(head_node); - assert(node); - - if (!head_node->head) { - return NULL; - } - - if (node == head_node->head) { - assert(node->prev == NULL); - head_node->head = node->next; - } - - ravl_free_blocks_elem_t *node_next = node->next; - ravl_free_blocks_elem_t *node_prev = node->prev; - if (node_next) { - node_next->prev = node_prev; - } - - if (node_prev) { - node_prev->next = node_next; - } - - struct block_t *block = node->block; - block->free_list_ptr = NULL; - umf_ba_global_free(node); - - return block; -} - -// node_list_rm_first - remove the first free block from the list of free blocks of the same size only if it can be properly aligned -static block_t *node_list_rm_first(ravl_free_blocks_head_t *head_node, - size_t alignment) { - assert(head_node); - - if (!head_node->head) { - return NULL; - } - - ravl_free_blocks_elem_t *node = head_node->head; - assert(node->prev == NULL); - struct block_t *block = node->block; - - if (IS_NOT_ALIGNED(block->size, alignment)) { - return NULL; - } - - if (node->next) { - node->next->prev = NULL; - } - - head_node->head = node->next; - block->free_list_ptr = NULL; - umf_ba_global_free(node); - - return block; -} - -// node_list_rm_with_alignment - remove the first free block with the correct alignment from the list of free blocks of the same size -static block_t *node_list_rm_with_alignment(ravl_free_blocks_head_t *head_node, - size_t alignment) { - assert(head_node); - - if (!head_node->head) { - return NULL; - } - - assert(((ravl_free_blocks_elem_t *)head_node->head)->prev == NULL); - - ravl_free_blocks_elem_t *node; - for (node = head_node->head; node != NULL; node = node->next) { - if (IS_ALIGNED(node->block->size, alignment)) { - return node_list_rm(head_node, node); - } - } - - return NULL; -} - -// The functions "free_blocks_*" handle the coarse_provider->free_blocks RAVL tree -// sorted by a size of the allocation (block_t->size). -// This is a tree of heads (ravl_free_blocks_head_t) of lists of free blocks of the same size. -// -// free_blocks_add - add a free block to the list of free blocks of the same size -static int free_blocks_add(struct ravl *free_blocks, block_t *block) { - ravl_free_blocks_head_t *head_node = NULL; - int rv; - - ravl_data_t head_node_data = {(uintptr_t)block->size, NULL}; - ravl_node_t *node; - node = ravl_find(free_blocks, &head_node_data, RAVL_PREDICATE_EQUAL); - if (node) { - ravl_data_t *node_data = ravl_data(node); - assert(node_data); - head_node = node_data->value; - assert(head_node); - } else { // no head_node - head_node = umf_ba_global_alloc(sizeof(*head_node)); - if (!head_node) { - return -1; - } - - head_node->head = NULL; - - ravl_data_t data = {(uintptr_t)block->size, head_node}; - rv = ravl_emplace_copy(free_blocks, &data); - if (rv) { - umf_ba_global_free(head_node); - return -1; - } - } - - block->free_list_ptr = node_list_add(head_node, block); - if (!block->free_list_ptr) { - return -1; - } - - assert(block->free_list_ptr->block->size == block->size); - - return 0; -} - -// free_blocks_rm_ge - remove the first free block of a size greater or equal to the given size only if it can be properly aligned -// If it was the last block, the head node is freed and removed from the tree. -// It is used during memory allocation (looking for a free block). -static block_t *free_blocks_rm_ge(struct ravl *free_blocks, size_t size, - size_t alignment, - check_free_blocks_t check_blocks) { - ravl_data_t data = {(uintptr_t)size, NULL}; - ravl_node_t *node; - node = ravl_find(free_blocks, &data, RAVL_PREDICATE_GREATER_EQUAL); - if (!node) { - return NULL; - } - - ravl_data_t *node_data = ravl_data(node); - assert(node_data); - assert(node_data->key >= size); - - ravl_free_blocks_head_t *head_node = node_data->value; - assert(head_node); - - block_t *block; - switch (check_blocks) { - case CHECK_ONLY_THE_FIRST_BLOCK: - block = node_list_rm_first(head_node, alignment); - break; - case CHECK_ALL_BLOCKS_OF_SIZE: - block = node_list_rm_with_alignment(head_node, alignment); - break; - // wrong value of check_blocks - default: - abort(); - } - - if (head_node->head == NULL) { - umf_ba_global_free(head_node); - ravl_remove(free_blocks, node); - } - - return block; -} - -// free_blocks_rm_node - remove the free block pointed by the given node. -// If it was the last block, the head node is freed and removed from the tree. -// It is used during merging free blocks and destroying the coarse_provider->free_blocks tree. -static block_t *free_blocks_rm_node(struct ravl *free_blocks, - ravl_free_blocks_elem_t *node) { - assert(free_blocks); - assert(node); - size_t size = node->block->size; - ravl_data_t data = {(uintptr_t)size, NULL}; - ravl_node_t *ravl_node; - ravl_node = ravl_find(free_blocks, &data, RAVL_PREDICATE_EQUAL); - assert(ravl_node); - - ravl_data_t *node_data = ravl_data(ravl_node); - assert(node_data); - assert(node_data->key == size); - - ravl_free_blocks_head_t *head_node = node_data->value; - assert(head_node); - - block_t *block = node_list_rm(head_node, node); - - if (head_node->head == NULL) { - umf_ba_global_free(head_node); - ravl_remove(free_blocks, ravl_node); - } - - return block; -} - -// user_block_merge - merge two blocks from one of two lists of user blocks: all_blocks or free_blocks -static umf_result_t user_block_merge(coarse_memory_provider_t *coarse_provider, - ravl_node_t *node1, ravl_node_t *node2, - bool used, ravl_node_t **merged_node) { - assert(node1); - assert(node2); - assert(node1 == get_node_prev(node2)); - assert(node2 == get_node_next(node1)); - assert(merged_node); - - *merged_node = NULL; - - struct ravl *upstream_blocks = coarse_provider->upstream_blocks; - struct ravl *all_blocks = coarse_provider->all_blocks; - struct ravl *free_blocks = coarse_provider->free_blocks; - - block_t *block1 = get_node_block(node1); - block_t *block2 = get_node_block(node2); - assert(block1->data < block2->data); - - bool same_used = ((block1->used == used) && (block2->used == used)); - bool contignous_data = (block1->data + block1->size == block2->data); - bool same_origin = is_same_origin(upstream_blocks, block1, block2); - - // check if blocks can be merged - if (!same_used || !contignous_data || !same_origin) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - if (block1->free_list_ptr) { - free_blocks_rm_node(free_blocks, block1->free_list_ptr); - block1->free_list_ptr = NULL; - } - - if (block2->free_list_ptr) { - free_blocks_rm_node(free_blocks, block2->free_list_ptr); - block2->free_list_ptr = NULL; - } - - // update the size - block1->size += block2->size; - - block_t *block_rm = coarse_ravl_rm(all_blocks, block2->data); - assert(block_rm == block2); - (void)block_rm; // WA for unused variable error - umf_ba_global_free(block2); - - *merged_node = node1; - - return UMF_RESULT_SUCCESS; -} - -// free_block_merge_with_prev - merge the given free block -// with the previous one if both are unused and have continuous data. -// Remove the merged block from the tree of free blocks. -static ravl_node_t * -free_block_merge_with_prev(coarse_memory_provider_t *coarse_provider, - ravl_node_t *node) { - ravl_node_t *node_prev = get_node_prev(node); - if (!node_prev) { - return node; - } - - ravl_node_t *merged_node = NULL; - umf_result_t umf_result = - user_block_merge(coarse_provider, node_prev, node, false, &merged_node); - if (umf_result != UMF_RESULT_SUCCESS) { - return node; - } - - assert(merged_node != NULL); - - return merged_node; -} - -// free_block_merge_with_next - merge the given free block -// with the next one if both are unused and have continuous data. -// Remove the merged block from the tree of free blocks. -static ravl_node_t * -free_block_merge_with_next(coarse_memory_provider_t *coarse_provider, - ravl_node_t *node) { - ravl_node_t *node_next = get_node_next(node); - if (!node_next) { - return node; - } - - ravl_node_t *merged_node = NULL; - umf_result_t umf_result = - user_block_merge(coarse_provider, node, node_next, false, &merged_node); - if (umf_result != UMF_RESULT_SUCCESS) { - return node; - } - - assert(merged_node != NULL); - - return merged_node; -} - -// upstream_block_merge - merge the given two upstream blocks -static umf_result_t -upstream_block_merge(coarse_memory_provider_t *coarse_provider, - ravl_node_t *node1, ravl_node_t *node2, - ravl_node_t **merged_node) { - assert(node1); - assert(node2); - assert(merged_node); - - *merged_node = NULL; - - umf_memory_provider_handle_t upstream_provider = - coarse_provider->upstream_memory_provider; - if (!upstream_provider) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - block_t *block1 = get_node_block(node1); - block_t *block2 = get_node_block(node2); - assert(block1->data < block2->data); - - bool contignous_data = (block1->data + block1->size == block2->data); - if (!contignous_data) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - // check if blocks can be merged by the upstream provider - umf_result_t merge_status = umfMemoryProviderAllocationMerge( - coarse_provider->upstream_memory_provider, block1->data, block2->data, - block1->size + block2->size); - if (merge_status != UMF_RESULT_SUCCESS) { - return merge_status; - } - - // update the size - block1->size += block2->size; - - struct ravl *upstream_blocks = coarse_provider->upstream_blocks; - block_t *block_rm = coarse_ravl_rm(upstream_blocks, block2->data); - assert(block_rm == block2); - (void)block_rm; // WA for unused variable error - umf_ba_global_free(block2); - - *merged_node = node1; - - return UMF_RESULT_SUCCESS; -} - -// upstream_block_merge_with_prev - merge the given upstream block -// with the previous one if both have continuous data. -// Remove the merged block from the tree of upstream blocks. -static ravl_node_t * -upstream_block_merge_with_prev(coarse_memory_provider_t *coarse_provider, - ravl_node_t *node) { - assert(node); - - ravl_node_t *node_prev = get_node_prev(node); - if (!node_prev) { - return node; - } - - ravl_node_t *merged_node = NULL; - umf_result_t umf_result = - upstream_block_merge(coarse_provider, node_prev, node, &merged_node); - if (umf_result != UMF_RESULT_SUCCESS) { - return node; - } - - assert(merged_node != NULL); - - return merged_node; -} - -// upstream_block_merge_with_next - merge the given upstream block -// with the next one if both have continuous data. -// Remove the merged block from the tree of upstream blocks. -static ravl_node_t * -upstream_block_merge_with_next(coarse_memory_provider_t *coarse_provider, - ravl_node_t *node) { - assert(node); - - ravl_node_t *node_next = get_node_next(node); - if (!node_next) { - return node; - } - - ravl_node_t *merged_node = NULL; - umf_result_t umf_result = - upstream_block_merge(coarse_provider, node, node_next, &merged_node); - if (umf_result != UMF_RESULT_SUCCESS) { - return node; - } - - assert(merged_node != NULL); - - return merged_node; -} - -#ifndef NDEBUG // begin of DEBUG code - -typedef struct debug_cb_args_t { - coarse_memory_provider_t *provider; - size_t sum_used; - size_t sum_blocks_size; - size_t num_all_blocks; - size_t num_free_blocks; - size_t num_alloc_blocks; - size_t sum_alloc_size; -} debug_cb_args_t; - -static void debug_verify_all_blocks_cb(void *data, void *arg) { - assert(data); - assert(arg); - - ravl_data_t *node_data = data; - block_t *block = node_data->value; - assert(block); - - debug_cb_args_t *cb_args = (debug_cb_args_t *)arg; - coarse_memory_provider_t *provider = cb_args->provider; - - ravl_node_t *node = - ravl_find(provider->all_blocks, data, RAVL_PREDICATE_EQUAL); - assert(node); - - block_t *block_next = get_block_next(node); - block_t *block_prev = get_block_prev(node); - - cb_args->num_all_blocks++; - if (!block->used) { - cb_args->num_free_blocks++; - } - - assert(block->data); - assert(block->size > 0); - - // There shouldn't be two adjacent unused blocks - // if they are continuous and have the same origin. - if (block_prev && !block_prev->used && !block->used && - (block_prev->data + block_prev->size == block->data)) { - assert(!is_same_origin(provider->upstream_blocks, block_prev, block)); - } - - if (block_next && !block_next->used && !block->used && - (block->data + block->size == block_next->data)) { - assert(!is_same_origin(provider->upstream_blocks, block, block_next)); - } - - // data addresses in the list are in ascending order - if (block_prev) { - assert(block_prev->data < block->data); - } - - if (block_next) { - assert(block->data < block_next->data); - } - - // two block's data should not overlap - if (block_next) { - assert((block->data + block->size) <= block_next->data); - } - - cb_args->sum_blocks_size += block->size; - if (block->used) { - cb_args->sum_used += block->size; - } -} - -static void debug_verify_upstream_blocks_cb(void *data, void *arg) { - assert(data); - assert(arg); - - ravl_data_t *node_data = data; - block_t *alloc = node_data->value; - assert(alloc); - - debug_cb_args_t *cb_args = (debug_cb_args_t *)arg; - coarse_memory_provider_t *provider = cb_args->provider; - - ravl_node_t *node = - ravl_find(provider->upstream_blocks, data, RAVL_PREDICATE_EQUAL); - assert(node); - - block_t *alloc_next = get_block_next(node); - block_t *alloc_prev = get_block_prev(node); - - cb_args->num_alloc_blocks++; - cb_args->sum_alloc_size += alloc->size; - - assert(alloc->data); - assert(alloc->size > 0); - - // data addresses in the list are in ascending order - if (alloc_prev) { - assert(alloc_prev->data < alloc->data); - } - - if (alloc_next) { - assert(alloc->data < alloc_next->data); - } - - // data should not overlap - if (alloc_next) { - assert((alloc->data + alloc->size) <= alloc_next->data); - } -} - -static umf_result_t -coarse_memory_provider_get_stats(void *provider, - coarse_memory_provider_stats_t *stats); - -static bool debug_check(coarse_memory_provider_t *provider) { - assert(provider); - - coarse_memory_provider_stats_t stats = {0}; - coarse_memory_provider_get_stats(provider, &stats); - - debug_cb_args_t cb_args = {0}; - cb_args.provider = provider; - - // verify the all_blocks list - ravl_foreach(provider->all_blocks, debug_verify_all_blocks_cb, &cb_args); - - assert(cb_args.num_all_blocks == stats.num_all_blocks); - assert(cb_args.num_free_blocks == stats.num_free_blocks); - assert(cb_args.sum_used == provider->used_size); - assert(cb_args.sum_blocks_size == provider->alloc_size); - assert(provider->alloc_size >= provider->used_size); - - // verify the upstream_blocks list - ravl_foreach(provider->upstream_blocks, debug_verify_upstream_blocks_cb, - &cb_args); - - assert(cb_args.sum_alloc_size == provider->alloc_size); - assert(cb_args.num_alloc_blocks == stats.num_upstream_blocks); - - return true; -} -#endif /* NDEBUG */ // end of DEBUG code - -static umf_result_t -coarse_add_upstream_block(coarse_memory_provider_t *coarse_provider, void *addr, - size_t size) { - ravl_node_t *alloc_node = NULL; - - block_t *alloc = coarse_ravl_add_new(coarse_provider->upstream_blocks, addr, - size, &alloc_node); - if (alloc == NULL) { - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - block_t *new_block = - coarse_ravl_add_new(coarse_provider->all_blocks, addr, size, NULL); - if (new_block == NULL) { - coarse_ravl_rm(coarse_provider->upstream_blocks, addr); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - // check if the new upstream block can be merged with its neighbours - alloc_node = upstream_block_merge_with_prev(coarse_provider, alloc_node); - alloc_node = upstream_block_merge_with_next(coarse_provider, alloc_node); - - new_block->used = true; - coarse_provider->alloc_size += size; - coarse_provider->used_size += size; - - return UMF_RESULT_SUCCESS; -} - -static umf_result_t -coarse_memory_provider_set_name(coarse_memory_provider_t *coarse_provider) { - if (coarse_provider->upstream_memory_provider == NULL) { - // COARSE_BASE_NAME will be used - coarse_provider->name = NULL; - return UMF_RESULT_SUCCESS; - } - - const char *up_name = - umfMemoryProviderGetName(coarse_provider->upstream_memory_provider); - if (!up_name) { - return UMF_RESULT_ERROR_UNKNOWN; - } - - size_t length = - strlen(COARSE_BASE_NAME) + strlen(up_name) + 3; // + 3 for " ()" - - coarse_provider->name = umf_ba_global_alloc(length + 1); // + 1 for '\0' - if (coarse_provider->name == NULL) { - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - sprintf(coarse_provider->name, "%s (%s)", COARSE_BASE_NAME, up_name); - - return UMF_RESULT_SUCCESS; -} - -// needed for coarse_memory_provider_initialize() -static umf_result_t coarse_memory_provider_alloc(void *provider, size_t size, - size_t alignment, - void **resultPtr); - -// needed for coarse_memory_provider_initialize() -static umf_result_t coarse_memory_provider_free(void *provider, void *ptr, - size_t bytes); - -static umf_result_t coarse_memory_provider_initialize(void *params, - void **provider) { - assert(provider); - - if (params == NULL) { - LOG_ERR("coarse provider parameters are missing"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - coarse_memory_provider_params_t *coarse_params = - (coarse_memory_provider_params_t *)params; - - // check params - if (!coarse_params->upstream_memory_provider == - !coarse_params->init_buffer) { - LOG_ERR("either upstream provider or init buffer has to be provided in " - "the parameters (exactly one of them)"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - if (coarse_params->init_buffer_size == 0 && - (coarse_params->immediate_init_from_upstream || - coarse_params->init_buffer != NULL)) { - LOG_ERR("init_buffer_size has to be greater than 0 if " - "immediate_init_from_upstream or init_buffer is set"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - if (coarse_params->init_buffer_size != 0 && - (!coarse_params->immediate_init_from_upstream && - coarse_params->init_buffer == NULL)) { - LOG_ERR("init_buffer_size is greater than 0 but none of " - "immediate_init_from_upstream nor init_buffer is set"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - if (coarse_params->destroy_upstream_memory_provider && - !coarse_params->upstream_memory_provider) { - LOG_ERR("destroy_upstream_memory_provider is true, but an upstream " - "provider is not provided"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - coarse_memory_provider_t *coarse_provider = - umf_ba_global_alloc(sizeof(*coarse_provider)); - if (!coarse_provider) { - LOG_ERR("out of the host memory"); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - memset(coarse_provider, 0, sizeof(*coarse_provider)); - - coarse_provider->upstream_memory_provider = - coarse_params->upstream_memory_provider; - coarse_provider->destroy_upstream_memory_provider = - coarse_params->destroy_upstream_memory_provider; - coarse_provider->allocation_strategy = coarse_params->allocation_strategy; - coarse_provider->init_buffer = coarse_params->init_buffer; - - if (coarse_provider->upstream_memory_provider) { - coarse_provider->disable_upstream_provider_free = - umfIsFreeOpDefault(coarse_provider->upstream_memory_provider); - } else { - coarse_provider->disable_upstream_provider_free = false; - } - - umf_result_t umf_result = coarse_memory_provider_set_name(coarse_provider); - if (umf_result != UMF_RESULT_SUCCESS) { - LOG_ERR("name initialization failed"); - goto err_free_coarse_provider; - } - - // most of the error handling paths below set this error - umf_result = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - - coarse_provider->upstream_blocks = - ravl_new_sized(coarse_ravl_comp, sizeof(ravl_data_t)); - if (coarse_provider->upstream_blocks == NULL) { - LOG_ERR("out of the host memory"); - goto err_free_name; - } - - coarse_provider->free_blocks = - ravl_new_sized(coarse_ravl_comp, sizeof(ravl_data_t)); - if (coarse_provider->free_blocks == NULL) { - LOG_ERR("out of the host memory"); - goto err_delete_ravl_upstream_blocks; - } - - coarse_provider->all_blocks = - ravl_new_sized(coarse_ravl_comp, sizeof(ravl_data_t)); - if (coarse_provider->all_blocks == NULL) { - LOG_ERR("out of the host memory"); - goto err_delete_ravl_free_blocks; - } - - coarse_provider->alloc_size = 0; - coarse_provider->used_size = 0; - - if (utils_mutex_init(&coarse_provider->lock) == NULL) { - LOG_ERR("lock initialization failed"); - umf_result = UMF_RESULT_ERROR_UNKNOWN; - goto err_delete_ravl_all_blocks; - } - - if (coarse_params->upstream_memory_provider && - coarse_params->immediate_init_from_upstream) { - // allocate and immediately deallocate memory using the upstream provider - void *init_buffer = NULL; - coarse_memory_provider_alloc( - coarse_provider, coarse_params->init_buffer_size, 0, &init_buffer); - if (init_buffer == NULL) { - goto err_destroy_mutex; - } - - coarse_memory_provider_free(coarse_provider, init_buffer, - coarse_params->init_buffer_size); - - } else if (coarse_params->init_buffer) { - umf_result = coarse_add_upstream_block(coarse_provider, - coarse_provider->init_buffer, - coarse_params->init_buffer_size); - if (umf_result != UMF_RESULT_SUCCESS) { - goto err_destroy_mutex; - } - - LOG_DEBUG("coarse_ALLOC (init_buffer) %zu used %zu alloc %zu", - coarse_params->init_buffer_size, coarse_provider->used_size, - coarse_provider->alloc_size); - - coarse_memory_provider_free(coarse_provider, - coarse_provider->init_buffer, - coarse_params->init_buffer_size); - } - - assert(coarse_provider->used_size == 0); - assert(coarse_provider->alloc_size == coarse_params->init_buffer_size); - assert(debug_check(coarse_provider)); - - *provider = coarse_provider; - - return UMF_RESULT_SUCCESS; - -err_destroy_mutex: - utils_mutex_destroy_not_free(&coarse_provider->lock); -err_delete_ravl_all_blocks: - ravl_delete(coarse_provider->all_blocks); -err_delete_ravl_free_blocks: - ravl_delete(coarse_provider->free_blocks); -err_delete_ravl_upstream_blocks: - ravl_delete(coarse_provider->upstream_blocks); -err_free_name: - umf_ba_global_free(coarse_provider->name); -err_free_coarse_provider: - umf_ba_global_free(coarse_provider); - return umf_result; -} - -static void coarse_ravl_cb_rm_upstream_blocks_node(void *data, void *arg) { - assert(data); - assert(arg); - - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)arg; - ravl_data_t *node_data = data; - block_t *alloc = node_data->value; - assert(alloc); - - if (coarse_provider->upstream_memory_provider && - !coarse_provider->disable_upstream_provider_free) { - // We continue to deallocate alloc blocks even if the upstream provider doesn't return success. - umfMemoryProviderFree(coarse_provider->upstream_memory_provider, - alloc->data, alloc->size); - } - - assert(coarse_provider->alloc_size >= alloc->size); - coarse_provider->alloc_size -= alloc->size; - - umf_ba_global_free(alloc); -} - -static void coarse_ravl_cb_rm_all_blocks_node(void *data, void *arg) { - assert(data); - assert(arg); - - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)arg; - ravl_data_t *node_data = data; - block_t *block = node_data->value; - assert(block); - - if (block->used) { - assert(coarse_provider->used_size >= block->size); - coarse_provider->used_size -= block->size; - } - - if (block->free_list_ptr) { - free_blocks_rm_node(coarse_provider->free_blocks, block->free_list_ptr); - } - - umf_ba_global_free(block); -} - -static void coarse_memory_provider_finalize(void *provider) { - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - - utils_mutex_destroy_not_free(&coarse_provider->lock); - - ravl_foreach(coarse_provider->all_blocks, coarse_ravl_cb_rm_all_blocks_node, - coarse_provider); - assert(coarse_provider->used_size == 0); - - ravl_foreach(coarse_provider->upstream_blocks, - coarse_ravl_cb_rm_upstream_blocks_node, coarse_provider); - assert(coarse_provider->alloc_size == 0); - - ravl_delete(coarse_provider->upstream_blocks); - ravl_delete(coarse_provider->all_blocks); - ravl_delete(coarse_provider->free_blocks); - - umf_ba_global_free(coarse_provider->name); - - if (coarse_provider->destroy_upstream_memory_provider && - coarse_provider->upstream_memory_provider) { - umfMemoryProviderDestroy(coarse_provider->upstream_memory_provider); - } - - umf_ba_global_free(coarse_provider); -} - -static umf_result_t -create_aligned_block(coarse_memory_provider_t *coarse_provider, - size_t orig_size, size_t alignment, block_t **current) { - (void)orig_size; // unused in the Release version - int rv; - - block_t *curr = *current; - - // In case of non-zero alignment create an aligned block what would be further used. - uintptr_t orig_data = (uintptr_t)curr->data; - uintptr_t aligned_data = ALIGN_UP(orig_data, alignment); - size_t padding = aligned_data - orig_data; - if (alignment > 0 && padding > 0) { - block_t *aligned_block = coarse_ravl_add_new( - coarse_provider->all_blocks, curr->data + padding, - curr->size - padding, NULL); - if (aligned_block == NULL) { - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - curr->used = false; - curr->size = padding; - - rv = free_blocks_add(coarse_provider->free_blocks, curr); - if (rv) { - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - // use aligned block - *current = aligned_block; - assert((*current)->size >= orig_size); - } - - return UMF_RESULT_SUCCESS; -} - -// Split the current block and put the new block after the one that we use. -static umf_result_t -split_current_block(coarse_memory_provider_t *coarse_provider, block_t *curr, - size_t size) { - ravl_node_t *new_node = NULL; - - block_t *new_block = - coarse_ravl_add_new(coarse_provider->all_blocks, curr->data + size, - curr->size - size, &new_node); - if (new_block == NULL) { - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - new_block->used = false; - - int rv = - free_blocks_add(coarse_provider->free_blocks, get_node_block(new_node)); - if (rv) { - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return UMF_RESULT_SUCCESS; -} - -static block_t * -find_free_block(struct ravl *free_blocks, size_t size, size_t alignment, - coarse_memory_provider_strategy_t allocation_strategy) { - block_t *block; - - switch (allocation_strategy) { - case UMF_COARSE_MEMORY_STRATEGY_FASTEST: - // Always allocate a free block of the (size + alignment) size - // and later cut out the properly aligned part leaving two remaining parts. - return free_blocks_rm_ge(free_blocks, size + alignment, 0, - CHECK_ONLY_THE_FIRST_BLOCK); - - case UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE: - // First check if the first free block of the 'size' size has the correct alignment. - block = free_blocks_rm_ge(free_blocks, size, alignment, - CHECK_ONLY_THE_FIRST_BLOCK); - if (block) { - return block; - } - - // If not, use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. - return free_blocks_rm_ge(free_blocks, size + alignment, 0, - CHECK_ONLY_THE_FIRST_BLOCK); - - case UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE: - // First look through all free blocks of the 'size' size - // and choose the first one with the correct alignment. - block = free_blocks_rm_ge(free_blocks, size, alignment, - CHECK_ALL_BLOCKS_OF_SIZE); - if (block) { - return block; - } - - // If none of them had the correct alignment, - // use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. - return free_blocks_rm_ge(free_blocks, size + alignment, 0, - CHECK_ONLY_THE_FIRST_BLOCK); - - // unknown memory allocation strategy - default: - abort(); - } -} - -static umf_result_t coarse_memory_provider_alloc(void *provider, size_t size, - size_t alignment, - void **resultPtr) { - umf_result_t umf_result = UMF_RESULT_ERROR_UNKNOWN; - - if (resultPtr == NULL) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - - if (utils_mutex_lock(&coarse_provider->lock) != 0) { - LOG_ERR("locking the lock failed"); - return UMF_RESULT_ERROR_UNKNOWN; - } - - assert(debug_check(coarse_provider)); - - // Find a block with greater or equal size using the given memory allocation strategy - block_t *curr = - find_free_block(coarse_provider->free_blocks, size, alignment, - coarse_provider->allocation_strategy); - - // If the block that we want to reuse has a greater size, split it. - // Try to merge the split part with the successor if it is not used. - enum { ACTION_NONE = 0, ACTION_USE, ACTION_SPLIT } action = ACTION_NONE; - - if (curr && curr->size > size) { - action = ACTION_SPLIT; - } else if (curr && curr->size == size) { - action = ACTION_USE; - } - - if (action) { // ACTION_SPLIT or ACTION_USE - assert(curr->used == false); - - // In case of non-zero alignment create an aligned block what would be further used. - if (alignment > 0) { - umf_result = - create_aligned_block(coarse_provider, size, alignment, &curr); - if (umf_result != UMF_RESULT_SUCCESS) { - utils_mutex_unlock(&coarse_provider->lock); - return umf_result; - } - } - - if (action == ACTION_SPLIT) { - // Split the current block and put the new block after the one that we use. - umf_result = split_current_block(coarse_provider, curr, size); - if (umf_result != UMF_RESULT_SUCCESS) { - utils_mutex_unlock(&coarse_provider->lock); - return umf_result; - } - - curr->size = size; - - LOG_DEBUG("coarse_ALLOC (split_block) %zu used %zu alloc %zu", size, - coarse_provider->used_size, coarse_provider->alloc_size); - - } else { // action == ACTION_USE - LOG_DEBUG("coarse_ALLOC (same_block) %zu used %zu alloc %zu", size, - coarse_provider->used_size, coarse_provider->alloc_size); - } - - curr->used = true; - *resultPtr = curr->data; - coarse_provider->used_size += size; - - assert(debug_check(coarse_provider)); - utils_mutex_unlock(&coarse_provider->lock); - - return UMF_RESULT_SUCCESS; - } - - // no suitable block found - try to get more memory from the upstream provider - umf_result = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - - if (coarse_provider->upstream_memory_provider == NULL) { - LOG_ERR("out of memory - no upstream memory provider given"); - goto err_unlock; - } - - umfMemoryProviderAlloc(coarse_provider->upstream_memory_provider, size, - alignment, resultPtr); - if (*resultPtr == NULL) { - LOG_ERR("out of memory - upstream memory provider allocation failed"); - goto err_unlock; - } - - ASSERT_IS_ALIGNED(((uintptr_t)(*resultPtr)), alignment); - - umf_result = coarse_add_upstream_block(coarse_provider, *resultPtr, size); - if (umf_result != UMF_RESULT_SUCCESS) { - if (!coarse_provider->disable_upstream_provider_free) { - umfMemoryProviderFree(coarse_provider->upstream_memory_provider, - *resultPtr, size); - } - goto err_unlock; - } - - LOG_DEBUG("coarse_ALLOC (upstream) %zu used %zu alloc %zu", size, - coarse_provider->used_size, coarse_provider->alloc_size); - - umf_result = UMF_RESULT_SUCCESS; - -err_unlock: - assert(debug_check(coarse_provider)); - utils_mutex_unlock(&coarse_provider->lock); - - return umf_result; -} - -static umf_result_t coarse_memory_provider_free(void *provider, void *ptr, - size_t bytes) { - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - - if (utils_mutex_lock(&coarse_provider->lock) != 0) { - LOG_ERR("locking the lock failed"); - return UMF_RESULT_ERROR_UNKNOWN; - } - - assert(debug_check(coarse_provider)); - - ravl_node_t *node = coarse_ravl_find_node(coarse_provider->all_blocks, ptr); - if (node == NULL) { - // the block was not found - utils_mutex_unlock(&coarse_provider->lock); - LOG_ERR("memory block not found (ptr = %p, size = %zu)", ptr, bytes); - return UMF_RESULT_ERROR_UNKNOWN; - } - - block_t *block = get_node_block(node); - if (!block->used) { - // the block is already free - utils_mutex_unlock(&coarse_provider->lock); - LOG_ERR("the block is already free"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - if (bytes > 0 && bytes != block->size) { - // wrong size of allocation - utils_mutex_unlock(&coarse_provider->lock); - LOG_ERR("wrong size of allocation"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - LOG_DEBUG("coarse_FREE (return_block_to_pool) %zu used %zu alloc %zu", - block->size, coarse_provider->used_size - block->size, - coarse_provider->alloc_size); - - assert(coarse_provider->used_size >= block->size); - coarse_provider->used_size -= block->size; - - block->used = false; - - // Merge with prev and/or next block if they are unused and have continuous data. - node = free_block_merge_with_prev(coarse_provider, node); - node = free_block_merge_with_next(coarse_provider, node); - - int rv = - free_blocks_add(coarse_provider->free_blocks, get_node_block(node)); - if (rv) { - utils_mutex_unlock(&coarse_provider->lock); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - assert(debug_check(coarse_provider)); - utils_mutex_unlock(&coarse_provider->lock); - - return UMF_RESULT_SUCCESS; -} - -static void coarse_memory_provider_get_last_native_error(void *provider, - const char **ppMessage, - int32_t *pError) { - (void)provider; // unused - - if (ppMessage == NULL || pError == NULL) { - assert(0); - return; - } - - // Nothing more is needed here, since - // there is no UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC error used. -} - -static umf_result_t coarse_memory_provider_get_min_page_size(void *provider, - void *ptr, - size_t *pageSize) { - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - - if (!coarse_provider->upstream_memory_provider) { - *pageSize = utils_get_page_size(); - return UMF_RESULT_SUCCESS; - } - - return umfMemoryProviderGetMinPageSize( - coarse_provider->upstream_memory_provider, ptr, pageSize); -} - -static umf_result_t -coarse_memory_provider_get_recommended_page_size(void *provider, size_t size, - size_t *pageSize) { - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - - if (!coarse_provider->upstream_memory_provider) { - *pageSize = utils_get_page_size(); - return UMF_RESULT_SUCCESS; - } - - return umfMemoryProviderGetRecommendedPageSize( - coarse_provider->upstream_memory_provider, size, pageSize); -} - -static const char *coarse_memory_provider_get_name(void *provider) { - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - - if (!coarse_provider->name) { - return COARSE_BASE_NAME; - } - - return coarse_provider->name; -} - -static void ravl_cb_count(void *data, void *arg) { - assert(arg); - (void)data; /* unused */ - - size_t *num_all_blocks = arg; - (*num_all_blocks)++; -} - -static void ravl_cb_count_free(void *data, void *arg) { - assert(data); - assert(arg); - - ravl_data_t *node_data = data; - assert(node_data); - ravl_free_blocks_head_t *head_node = node_data->value; - assert(head_node); - struct ravl_free_blocks_elem_t *free_block = head_node->head; - assert(free_block); - - size_t *num_all_blocks = arg; - while (free_block) { - (*num_all_blocks)++; - free_block = free_block->next; - } -} - -static umf_result_t -coarse_memory_provider_get_stats(void *provider, - coarse_memory_provider_stats_t *stats) { - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - - // count blocks - size_t num_upstream_blocks = 0; - ravl_foreach(coarse_provider->upstream_blocks, ravl_cb_count, - &num_upstream_blocks); - - size_t num_all_blocks = 0; - ravl_foreach(coarse_provider->all_blocks, ravl_cb_count, &num_all_blocks); - - size_t num_free_blocks = 0; - ravl_foreach(coarse_provider->free_blocks, ravl_cb_count_free, - &num_free_blocks); - - stats->alloc_size = coarse_provider->alloc_size; - stats->used_size = coarse_provider->used_size; - stats->num_upstream_blocks = num_upstream_blocks; - stats->num_all_blocks = num_all_blocks; - stats->num_free_blocks = num_free_blocks; - - return UMF_RESULT_SUCCESS; -} - -static umf_result_t coarse_memory_provider_purge_lazy(void *provider, void *ptr, - size_t size) { - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - if (coarse_provider->upstream_memory_provider == NULL) { - LOG_ERR("no upstream memory provider given"); - return UMF_RESULT_ERROR_NOT_SUPPORTED; - } - - return umfMemoryProviderPurgeLazy(coarse_provider->upstream_memory_provider, - ptr, size); -} - -static umf_result_t coarse_memory_provider_purge_force(void *provider, - void *ptr, size_t size) { - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - if (coarse_provider->upstream_memory_provider == NULL) { - LOG_ERR("no upstream memory provider given"); - return UMF_RESULT_ERROR_NOT_SUPPORTED; - } - - return umfMemoryProviderPurgeForce( - coarse_provider->upstream_memory_provider, ptr, size); -} - -static umf_result_t coarse_memory_provider_allocation_split(void *provider, - void *ptr, - size_t totalSize, - size_t firstSize) { - umf_result_t umf_result; - - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - - if (utils_mutex_lock(&coarse_provider->lock) != 0) { - LOG_ERR("locking the lock failed"); - return UMF_RESULT_ERROR_UNKNOWN; - } - - assert(debug_check(coarse_provider)); - - ravl_node_t *node = coarse_ravl_find_node(coarse_provider->all_blocks, ptr); - if (node == NULL) { - LOG_ERR("memory block not found"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - block_t *block = get_node_block(node); - - if (block->size != totalSize) { - LOG_ERR("wrong totalSize"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - if (!block->used) { - LOG_ERR("block is not allocated"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - block_t *new_block = coarse_ravl_add_new(coarse_provider->all_blocks, - block->data + firstSize, - block->size - firstSize, NULL); - if (new_block == NULL) { - umf_result = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - goto err_mutex_unlock; - } - - block->size = firstSize; - new_block->used = true; - - assert(new_block->size == (totalSize - firstSize)); - - umf_result = UMF_RESULT_SUCCESS; - -err_mutex_unlock: - assert(debug_check(coarse_provider)); - utils_mutex_unlock(&coarse_provider->lock); - - return umf_result; -} - -static umf_result_t coarse_memory_provider_allocation_merge(void *provider, - void *lowPtr, - void *highPtr, - size_t totalSize) { - umf_result_t umf_result; - - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - - if (utils_mutex_lock(&coarse_provider->lock) != 0) { - LOG_ERR("locking the lock failed"); - return UMF_RESULT_ERROR_UNKNOWN; - } - - assert(debug_check(coarse_provider)); - - ravl_node_t *low_node = - coarse_ravl_find_node(coarse_provider->all_blocks, lowPtr); - if (low_node == NULL) { - LOG_ERR("the lowPtr memory block not found"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - block_t *low_block = get_node_block(low_node); - if (!low_block->used) { - LOG_ERR("the lowPtr block is not allocated"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - ravl_node_t *high_node = - coarse_ravl_find_node(coarse_provider->all_blocks, highPtr); - if (high_node == NULL) { - LOG_ERR("the highPtr memory block not found"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - block_t *high_block = get_node_block(high_node); - if (!high_block->used) { - LOG_ERR("the highPtr block is not allocated"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - if (get_node_next(low_node) != high_node) { - LOG_ERR("given pointers cannot be merged"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - if (get_node_prev(high_node) != low_node) { - LOG_ERR("given pointers cannot be merged"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - if (low_block->size + high_block->size != totalSize) { - LOG_ERR("wrong totalSize"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - if ((uintptr_t)highPtr != ((uintptr_t)lowPtr + low_block->size)) { - LOG_ERR("given pointers cannot be merged"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - ravl_node_t *merged_node = NULL; - - umf_result = user_block_merge(coarse_provider, low_node, high_node, true, - &merged_node); - if (umf_result != UMF_RESULT_SUCCESS) { - LOG_ERR("merging failed"); - goto err_mutex_unlock; - } - - assert(merged_node == low_node); - assert(low_block->size == totalSize); - - umf_result = UMF_RESULT_SUCCESS; - -err_mutex_unlock: - assert(debug_check(coarse_provider)); - utils_mutex_unlock(&coarse_provider->lock); - - return umf_result; -} - -umf_memory_provider_ops_t UMF_COARSE_MEMORY_PROVIDER_OPS = { - .version = UMF_VERSION_CURRENT, - .initialize = coarse_memory_provider_initialize, - .finalize = coarse_memory_provider_finalize, - .alloc = coarse_memory_provider_alloc, - .get_last_native_error = coarse_memory_provider_get_last_native_error, - .get_recommended_page_size = - coarse_memory_provider_get_recommended_page_size, - .get_min_page_size = coarse_memory_provider_get_min_page_size, - .get_name = coarse_memory_provider_get_name, - .ext.free = coarse_memory_provider_free, - .ext.purge_lazy = coarse_memory_provider_purge_lazy, - .ext.purge_force = coarse_memory_provider_purge_force, - .ext.allocation_merge = coarse_memory_provider_allocation_merge, - .ext.allocation_split = coarse_memory_provider_allocation_split, - // TODO - /* - .ipc.get_ipc_handle_size = coarse_memory_provider_get_ipc_handle_size, - .ipc.get_ipc_handle = coarse_memory_provider_get_ipc_handle, - .ipc.put_ipc_handle = coarse_memory_provider_put_ipc_handle, - .ipc.open_ipc_handle = coarse_memory_provider_open_ipc_handle, - .ipc.close_ipc_handle = coarse_memory_provider_close_ipc_handle, - */ -}; - -umf_memory_provider_ops_t *umfCoarseMemoryProviderOps(void) { - return &UMF_COARSE_MEMORY_PROVIDER_OPS; -} - -coarse_memory_provider_stats_t -umfCoarseMemoryProviderGetStats(umf_memory_provider_handle_t provider) { - coarse_memory_provider_stats_t stats = {0}; - - if (provider == NULL) { - return stats; - } - - void *priv = umfMemoryProviderGetPriv(provider); - - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)priv; - - if (utils_mutex_lock(&coarse_provider->lock) != 0) { - LOG_ERR("locking the lock failed"); - return stats; - } - - coarse_memory_provider_get_stats(priv, &stats); - - utils_mutex_unlock(&coarse_provider->lock); - - return stats; -} diff --git a/src/provider/provider_cuda.c b/src/provider/provider_cuda.c index ce2f1debb7..dd12d91847 100644 --- a/src/provider/provider_cuda.c +++ b/src/provider/provider_cuda.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -12,8 +12,19 @@ #include #include +#include "provider_cuda_internal.h" +#include "utils_load_library.h" #include "utils_log.h" +static void *cu_lib_handle = NULL; + +void fini_cu_global_state(void) { + if (cu_lib_handle) { + utils_close_library(cu_lib_handle); + cu_lib_handle = NULL; + } +} + #if defined(UMF_NO_CUDA_PROVIDER) umf_result_t umfCUDAMemoryProviderParamsCreate( @@ -55,6 +66,14 @@ umf_result_t umfCUDAMemoryProviderParamsSetMemoryType( return UMF_RESULT_ERROR_NOT_SUPPORTED; } +umf_result_t umfCUDAMemoryProviderParamsSetAllocFlags( + umf_cuda_memory_provider_params_handle_t hParams, unsigned int flags) { + (void)hParams; + (void)flags; + LOG_ERR("CUDA provider is disabled (UMF_BUILD_CUDA_PROVIDER is OFF)!"); + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + umf_memory_provider_ops_t *umfCUDAMemoryProviderOps(void) { // not supported LOG_ERR("CUDA provider is disabled (UMF_BUILD_CUDA_PROVIDER is OFF)!"); @@ -80,7 +99,6 @@ umf_memory_provider_ops_t *umfCUDAMemoryProviderOps(void) { #include "utils_assert.h" #include "utils_common.h" #include "utils_concurrency.h" -#include "utils_load_library.h" #include "utils_log.h" #include "utils_sanitizers.h" @@ -89,13 +107,22 @@ typedef struct cu_memory_provider_t { CUdevice device; umf_usm_memory_type_t memory_type; size_t min_alignment; + unsigned int alloc_flags; } cu_memory_provider_t; // CUDA Memory Provider settings struct typedef struct umf_cuda_memory_provider_params_t { - void *cuda_context_handle; ///< Handle to the CUDA context - int cuda_device_handle; ///< Handle to the CUDA device - umf_usm_memory_type_t memory_type; ///< Allocation memory type + // Handle to the CUDA context + void *cuda_context_handle; + + // Handle to the CUDA device + int cuda_device_handle; + + // Allocation memory type + umf_usm_memory_type_t memory_type; + + // Allocation flags for cuMemHostAlloc/cuMemAllocManaged + unsigned int alloc_flags; } umf_cuda_memory_provider_params_t; typedef struct cu_ops_t { @@ -103,7 +130,7 @@ typedef struct cu_ops_t { size_t *granularity, const CUmemAllocationProp *prop, CUmemAllocationGranularity_flags option); CUresult (*cuMemAlloc)(CUdeviceptr *dptr, size_t bytesize); - CUresult (*cuMemAllocHost)(void **pp, size_t bytesize); + CUresult (*cuMemHostAlloc)(void **pp, size_t bytesize, unsigned int flags); CUresult (*cuMemAllocManaged)(CUdeviceptr *dptr, size_t bytesize, unsigned int flags); CUresult (*cuMemFree)(CUdeviceptr dptr); @@ -112,6 +139,7 @@ typedef struct cu_ops_t { CUresult (*cuGetErrorName)(CUresult error, const char **pStr); CUresult (*cuGetErrorString)(CUresult error, const char **pStr); CUresult (*cuCtxGetCurrent)(CUcontext *pctx); + CUresult (*cuCtxGetDevice)(CUdevice *device); CUresult (*cuCtxSetCurrent)(CUcontext ctx); CUresult (*cuIpcGetMemHandle)(CUipcMemHandle *pHandle, CUdeviceptr dptr); CUresult (*cuIpcOpenMemHandle)(CUdeviceptr *pdptr, CUipcMemHandle handle, @@ -150,8 +178,10 @@ static umf_result_t cu2umf_result(CUresult result) { return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; case CUDA_ERROR_INVALID_VALUE: case CUDA_ERROR_INVALID_HANDLE: - case CUDA_ERROR_INVALID_RESOURCE_TYPE: return UMF_RESULT_ERROR_INVALID_ARGUMENT; + case CUDA_ERROR_DEINITIALIZED: + LOG_ERR("CUDA driver has been deinitialized"); + return UMF_RESULT_ERROR_OUT_OF_RESOURCES; default: cu_store_last_native_error(result); return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; @@ -164,48 +194,61 @@ static void init_cu_global_state(void) { #else const char *lib_name = "libcuda.so"; #endif - // check if CUDA shared library is already loaded - // we pass 0 as a handle to search the global symbol table + // The CUDA shared library should be already loaded by the user + // of the CUDA provider. UMF just want to reuse it + // and increase the reference count to the CUDA shared library. + void *lib_handle = + utils_open_library(lib_name, UMF_UTIL_OPEN_LIBRARY_NO_LOAD); + if (!lib_handle) { + LOG_ERR("Failed to open CUDA shared library"); + Init_cu_global_state_failed = true; + return; + } // NOTE: some symbols defined in the lib have _vX postfixes - it is // important to load the proper version of functions - *(void **)&g_cu_ops.cuMemGetAllocationGranularity = - utils_get_symbol_addr(0, "cuMemGetAllocationGranularity", lib_name); + *(void **)&g_cu_ops.cuMemGetAllocationGranularity = utils_get_symbol_addr( + lib_handle, "cuMemGetAllocationGranularity", lib_name); *(void **)&g_cu_ops.cuMemAlloc = - utils_get_symbol_addr(0, "cuMemAlloc_v2", lib_name); - *(void **)&g_cu_ops.cuMemAllocHost = - utils_get_symbol_addr(0, "cuMemAllocHost_v2", lib_name); + utils_get_symbol_addr(lib_handle, "cuMemAlloc_v2", lib_name); + *(void **)&g_cu_ops.cuMemHostAlloc = + utils_get_symbol_addr(lib_handle, "cuMemHostAlloc", lib_name); *(void **)&g_cu_ops.cuMemAllocManaged = - utils_get_symbol_addr(0, "cuMemAllocManaged", lib_name); + utils_get_symbol_addr(lib_handle, "cuMemAllocManaged", lib_name); *(void **)&g_cu_ops.cuMemFree = - utils_get_symbol_addr(0, "cuMemFree_v2", lib_name); + utils_get_symbol_addr(lib_handle, "cuMemFree_v2", lib_name); *(void **)&g_cu_ops.cuMemFreeHost = - utils_get_symbol_addr(0, "cuMemFreeHost", lib_name); + utils_get_symbol_addr(lib_handle, "cuMemFreeHost", lib_name); *(void **)&g_cu_ops.cuGetErrorName = - utils_get_symbol_addr(0, "cuGetErrorName", lib_name); + utils_get_symbol_addr(lib_handle, "cuGetErrorName", lib_name); *(void **)&g_cu_ops.cuGetErrorString = - utils_get_symbol_addr(0, "cuGetErrorString", lib_name); + utils_get_symbol_addr(lib_handle, "cuGetErrorString", lib_name); *(void **)&g_cu_ops.cuCtxGetCurrent = - utils_get_symbol_addr(0, "cuCtxGetCurrent", lib_name); + utils_get_symbol_addr(lib_handle, "cuCtxGetCurrent", lib_name); + *(void **)&g_cu_ops.cuCtxGetDevice = + utils_get_symbol_addr(lib_handle, "cuCtxGetDevice", lib_name); *(void **)&g_cu_ops.cuCtxSetCurrent = - utils_get_symbol_addr(0, "cuCtxSetCurrent", lib_name); + utils_get_symbol_addr(lib_handle, "cuCtxSetCurrent", lib_name); *(void **)&g_cu_ops.cuIpcGetMemHandle = - utils_get_symbol_addr(0, "cuIpcGetMemHandle", lib_name); + utils_get_symbol_addr(lib_handle, "cuIpcGetMemHandle", lib_name); *(void **)&g_cu_ops.cuIpcOpenMemHandle = - utils_get_symbol_addr(0, "cuIpcOpenMemHandle_v2", lib_name); + utils_get_symbol_addr(lib_handle, "cuIpcOpenMemHandle_v2", lib_name); *(void **)&g_cu_ops.cuIpcCloseMemHandle = - utils_get_symbol_addr(0, "cuIpcCloseMemHandle", lib_name); + utils_get_symbol_addr(lib_handle, "cuIpcCloseMemHandle", lib_name); if (!g_cu_ops.cuMemGetAllocationGranularity || !g_cu_ops.cuMemAlloc || - !g_cu_ops.cuMemAllocHost || !g_cu_ops.cuMemAllocManaged || + !g_cu_ops.cuMemHostAlloc || !g_cu_ops.cuMemAllocManaged || !g_cu_ops.cuMemFree || !g_cu_ops.cuMemFreeHost || !g_cu_ops.cuGetErrorName || !g_cu_ops.cuGetErrorString || - !g_cu_ops.cuCtxGetCurrent || !g_cu_ops.cuCtxSetCurrent || - !g_cu_ops.cuIpcGetMemHandle || !g_cu_ops.cuIpcOpenMemHandle || - !g_cu_ops.cuIpcCloseMemHandle) { - LOG_ERR("Required CUDA symbols not found."); + !g_cu_ops.cuCtxGetCurrent || !g_cu_ops.cuCtxGetDevice || + !g_cu_ops.cuCtxSetCurrent || !g_cu_ops.cuIpcGetMemHandle || + !g_cu_ops.cuIpcOpenMemHandle || !g_cu_ops.cuIpcCloseMemHandle) { + LOG_FATAL("Required CUDA symbols not found."); Init_cu_global_state_failed = true; + utils_close_library(lib_handle); + return; } + cu_lib_handle = lib_handle; } umf_result_t umfCUDAMemoryProviderParamsCreate( @@ -223,9 +266,31 @@ umf_result_t umfCUDAMemoryProviderParamsCreate( return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } - params_data->cuda_context_handle = NULL; - params_data->cuda_device_handle = -1; + utils_init_once(&cu_is_initialized, init_cu_global_state); + if (Init_cu_global_state_failed) { + LOG_FATAL("Loading CUDA symbols failed"); + return UMF_RESULT_ERROR_DEPENDENCY_UNAVAILABLE; + } + + // initialize context and device to the current ones + CUcontext current_ctx = NULL; + CUresult cu_result = g_cu_ops.cuCtxGetCurrent(¤t_ctx); + if (cu_result == CUDA_SUCCESS) { + params_data->cuda_context_handle = current_ctx; + } else { + params_data->cuda_context_handle = NULL; + } + + CUdevice current_device = -1; + cu_result = g_cu_ops.cuCtxGetDevice(¤t_device); + if (cu_result == CUDA_SUCCESS) { + params_data->cuda_device_handle = current_device; + } else { + params_data->cuda_device_handle = -1; + } + params_data->memory_type = UMF_MEMORY_TYPE_UNKNOWN; + params_data->alloc_flags = 0; *hParams = params_data; @@ -276,6 +341,18 @@ umf_result_t umfCUDAMemoryProviderParamsSetMemoryType( return UMF_RESULT_SUCCESS; } +umf_result_t umfCUDAMemoryProviderParamsSetAllocFlags( + umf_cuda_memory_provider_params_handle_t hParams, unsigned int flags) { + if (!hParams) { + LOG_ERR("CUDA Memory Provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->alloc_flags = flags; + + return UMF_RESULT_SUCCESS; +} + static umf_result_t cu_memory_provider_initialize(void *params, void **provider) { if (params == NULL) { @@ -292,13 +369,19 @@ static umf_result_t cu_memory_provider_initialize(void *params, } if (cu_params->cuda_context_handle == NULL) { + LOG_ERR("Invalid context handle"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (cu_params->cuda_device_handle < 0) { + LOG_ERR("Invalid device handle"); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } utils_init_once(&cu_is_initialized, init_cu_global_state); if (Init_cu_global_state_failed) { - LOG_ERR("Loading CUDA symbols failed"); - return UMF_RESULT_ERROR_UNKNOWN; + LOG_FATAL("Loading CUDA symbols failed"); + return UMF_RESULT_ERROR_DEPENDENCY_UNAVAILABLE; } cu_memory_provider_t *cu_provider = @@ -313,7 +396,7 @@ static umf_result_t cu_memory_provider_initialize(void *params, CUmemAllocationProp allocProps = {0}; allocProps.location.type = CU_MEM_LOCATION_TYPE_DEVICE; allocProps.type = CU_MEM_ALLOCATION_TYPE_PINNED; - allocProps.location.id = cu_provider->device; + allocProps.location.id = cu_params->cuda_device_handle; CUresult cu_result = g_cu_ops.cuMemGetAllocationGranularity( &min_alignment, &allocProps, CU_MEM_ALLOC_GRANULARITY_MINIMUM); if (cu_result != CUDA_SUCCESS) { @@ -326,6 +409,17 @@ static umf_result_t cu_memory_provider_initialize(void *params, cu_provider->memory_type = cu_params->memory_type; cu_provider->min_alignment = min_alignment; + // If the memory type is shared (CUDA managed), the allocation flags must + // be set. NOTE: we do not check here if the flags are valid - + // this will be done by CUDA runtime. + if (cu_params->memory_type == UMF_MEMORY_TYPE_SHARED && + cu_params->alloc_flags == 0) { + // the default setting is CU_MEM_ATTACH_GLOBAL + cu_provider->alloc_flags = CU_MEM_ATTACH_GLOBAL; + } else { + cu_provider->alloc_flags = cu_params->alloc_flags; + } + *provider = cu_provider; return UMF_RESULT_SUCCESS; @@ -382,7 +476,8 @@ static umf_result_t cu_memory_provider_alloc(void *provider, size_t size, CUresult cu_result = CUDA_SUCCESS; switch (cu_provider->memory_type) { case UMF_MEMORY_TYPE_HOST: { - cu_result = g_cu_ops.cuMemAllocHost(resultPtr, size); + cu_result = + g_cu_ops.cuMemHostAlloc(resultPtr, size, cu_provider->alloc_flags); break; } case UMF_MEMORY_TYPE_DEVICE: { @@ -391,7 +486,7 @@ static umf_result_t cu_memory_provider_alloc(void *provider, size_t size, } case UMF_MEMORY_TYPE_SHARED: { cu_result = g_cu_ops.cuMemAllocManaged((CUdeviceptr *)resultPtr, size, - CU_MEM_ATTACH_GLOBAL); + cu_provider->alloc_flags); break; } default: @@ -433,6 +528,14 @@ static umf_result_t cu_memory_provider_free(void *provider, void *ptr, cu_memory_provider_t *cu_provider = (cu_memory_provider_t *)provider; + // Remember current context and set the one from the provider + CUcontext restore_ctx = NULL; + umf_result_t umf_result = set_context(cu_provider->context, &restore_ctx); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("Failed to set CUDA context, ret = %d", umf_result); + return umf_result; + } + CUresult cu_result = CUDA_SUCCESS; switch (cu_provider->memory_type) { case UMF_MEMORY_TYPE_HOST: { @@ -451,6 +554,11 @@ static umf_result_t cu_memory_provider_free(void *provider, void *ptr, return UMF_RESULT_ERROR_UNKNOWN; } + umf_result = set_context(restore_ctx, &restore_ctx); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("Failed to restore CUDA context, ret = %d", umf_result); + } + return cu2umf_result(cu_result); } @@ -464,22 +572,41 @@ static void cu_memory_provider_get_last_native_error(void *provider, return; } - const char *error_name = 0; - const char *error_string = 0; - g_cu_ops.cuGetErrorName(TLS_last_native_error.native_error, &error_name); - g_cu_ops.cuGetErrorString(TLS_last_native_error.native_error, - &error_string); - + CUresult result; size_t buf_size = 0; - strncpy(TLS_last_native_error.msg_buff, error_name, TLS_MSG_BUF_LEN - 1); - buf_size = strlen(TLS_last_native_error.msg_buff); + const char *error_name = NULL; + const char *error_string = NULL; + + // If the error code is not recognized, + // CUDA_ERROR_INVALID_VALUE will be returned + // and error_name will be set to the NULL address. + result = g_cu_ops.cuGetErrorName(TLS_last_native_error.native_error, + &error_name); + if (result == CUDA_SUCCESS && error_name != NULL) { + strncpy(TLS_last_native_error.msg_buff, error_name, + TLS_MSG_BUF_LEN - 1); + } else { + strncpy(TLS_last_native_error.msg_buff, "cuGetErrorName() failed", + TLS_MSG_BUF_LEN - 1); + } + buf_size = strlen(TLS_last_native_error.msg_buff); strncat(TLS_last_native_error.msg_buff, " - ", TLS_MSG_BUF_LEN - buf_size - 1); buf_size = strlen(TLS_last_native_error.msg_buff); - strncat(TLS_last_native_error.msg_buff, error_string, - TLS_MSG_BUF_LEN - buf_size - 1); + // If the error code is not recognized, + // CUDA_ERROR_INVALID_VALUE will be returned + // and error_string will be set to the NULL address. + result = g_cu_ops.cuGetErrorString(TLS_last_native_error.native_error, + &error_string); + if (result == CUDA_SUCCESS && error_string != NULL) { + strncat(TLS_last_native_error.msg_buff, error_string, + TLS_MSG_BUF_LEN - buf_size - 1); + } else { + strncat(TLS_last_native_error.msg_buff, "cuGetErrorString() failed", + TLS_MSG_BUF_LEN - buf_size - 1); + } *pError = TLS_last_native_error.native_error; *ppMessage = TLS_last_native_error.msg_buff; @@ -581,7 +708,10 @@ static umf_result_t cu_memory_provider_open_ipc_handle(void *provider, LOG_ERR("cuIpcOpenMemHandle() failed."); } - set_context(restore_ctx, &restore_ctx); + umf_result = set_context(restore_ctx, &restore_ctx); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("Failed to restore CUDA context, ret = %d", umf_result); + } return cu2umf_result(cu_result); } @@ -602,16 +732,16 @@ cu_memory_provider_close_ipc_handle(void *provider, void *ptr, size_t size) { return UMF_RESULT_SUCCESS; } -static struct umf_memory_provider_ops_t UMF_CUDA_MEMORY_PROVIDER_OPS = { - .version = UMF_VERSION_CURRENT, +static umf_memory_provider_ops_t UMF_CUDA_MEMORY_PROVIDER_OPS = { + .version = UMF_PROVIDER_OPS_VERSION_CURRENT, .initialize = cu_memory_provider_initialize, .finalize = cu_memory_provider_finalize, .alloc = cu_memory_provider_alloc, + .free = cu_memory_provider_free, .get_last_native_error = cu_memory_provider_get_last_native_error, .get_recommended_page_size = cu_memory_provider_get_recommended_page_size, .get_min_page_size = cu_memory_provider_get_min_page_size, .get_name = cu_memory_provider_get_name, - .ext.free = cu_memory_provider_free, // TODO /* .ext.purge_lazy = cu_memory_provider_purge_lazy, diff --git a/src/provider/provider_cuda_internal.h b/src/provider/provider_cuda_internal.h new file mode 100644 index 0000000000..bc3d79d4aa --- /dev/null +++ b/src/provider/provider_cuda_internal.h @@ -0,0 +1,10 @@ +/* + * + * Copyright (C) 2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +void fini_cu_global_state(void); diff --git a/src/provider/provider_devdax_memory.c b/src/provider/provider_devdax_memory.c index cb5a4af572..8e81971905 100644 --- a/src/provider/provider_devdax_memory.c +++ b/src/provider/provider_devdax_memory.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -65,6 +65,7 @@ umf_result_t umfDevDaxMemoryProviderParamsSetProtection( #else // !defined(_WIN32) && !defined(UMF_NO_HWLOC) #include "base_alloc_global.h" +#include "coarse.h" #include "libumf.h" #include "utils_common.h" #include "utils_concurrency.h" @@ -81,6 +82,7 @@ typedef struct devdax_memory_provider_t { size_t offset; // offset in the file used for memory mapping utils_mutex_t lock; // lock of ptr and offset unsigned protection; // combination of OS-specific protection flags + coarse_t *coarse; // coarse library handle } devdax_memory_provider_t; // DevDax Memory provider settings struct @@ -140,6 +142,12 @@ devdax_translate_params(umf_devdax_memory_provider_params_t *in_params, return UMF_RESULT_SUCCESS; } +static umf_result_t devdax_allocation_split_cb(void *provider, void *ptr, + size_t totalSize, + size_t firstSize); +static umf_result_t devdax_allocation_merge_cb(void *provider, void *lowPtr, + void *highPtr, size_t totalSize); + static umf_result_t devdax_initialize(void *params, void **provider) { umf_result_t ret; @@ -168,21 +176,42 @@ static umf_result_t devdax_initialize(void *params, void **provider) { memset(devdax_provider, 0, sizeof(*devdax_provider)); - ret = devdax_translate_params(in_params, devdax_provider); + coarse_params_t coarse_params = {0}; + coarse_params.provider = devdax_provider; + coarse_params.page_size = DEVDAX_PAGE_SIZE_2MB; + // The alloc callback is not available in case of the devdax provider + // because it is a fixed-size memory provider + // and the entire devdax memory is added as a single block + // to the coarse library. + coarse_params.cb.alloc = NULL; + coarse_params.cb.free = NULL; // not available for the devdax provider + coarse_params.cb.split = devdax_allocation_split_cb; + coarse_params.cb.merge = devdax_allocation_merge_cb; + + coarse_t *coarse = NULL; + ret = coarse_new(&coarse_params, &coarse); if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("coarse_new() failed"); goto err_free_devdax_provider; } + devdax_provider->coarse = coarse; + + ret = devdax_translate_params(in_params, devdax_provider); + if (ret != UMF_RESULT_SUCCESS) { + goto err_coarse_delete; + } + devdax_provider->size = in_params->size; if (utils_copy_path(in_params->path, devdax_provider->path, PATH_MAX)) { - goto err_free_devdax_provider; + goto err_coarse_delete; } int fd = utils_devdax_open(in_params->path); if (fd == -1) { LOG_ERR("cannot open the device DAX: %s", in_params->path); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_free_devdax_provider; + goto err_coarse_delete; } bool is_dax = false; @@ -196,23 +225,26 @@ static umf_result_t devdax_initialize(void *params, void **provider) { LOG_PDEBUG("mapping the devdax failed (path=%s, size=%zu)", in_params->path, devdax_provider->size); ret = UMF_RESULT_ERROR_UNKNOWN; - goto err_free_devdax_provider; + goto err_coarse_delete; } if (!is_dax) { LOG_ERR("mapping the devdax with MAP_SYNC failed: %s", in_params->path); ret = UMF_RESULT_ERROR_UNKNOWN; - - if (devdax_provider->base) { - utils_munmap(devdax_provider->base, devdax_provider->size); - } - - goto err_free_devdax_provider; + goto err_unmap_devdax; } LOG_DEBUG("devdax memory mapped (path=%s, size=%zu, addr=%p)", in_params->path, devdax_provider->size, devdax_provider->base); + // add the entire devdax memory as a single block + ret = coarse_add_memory_fixed(coarse, devdax_provider->base, + devdax_provider->size); + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("adding memory block failed"); + goto err_unmap_devdax; + } + if (utils_mutex_init(&devdax_provider->lock) == NULL) { LOG_ERR("lock init failed"); ret = UMF_RESULT_ERROR_UNKNOWN; @@ -224,7 +256,11 @@ static umf_result_t devdax_initialize(void *params, void **provider) { return UMF_RESULT_SUCCESS; err_unmap_devdax: - utils_munmap(devdax_provider->base, devdax_provider->size); + if (devdax_provider->base) { + utils_munmap(devdax_provider->base, devdax_provider->size); + } +err_coarse_delete: + coarse_delete(devdax_provider->coarse); err_free_devdax_provider: umf_ba_global_free(devdax_provider); return ret; @@ -234,78 +270,15 @@ static void devdax_finalize(void *provider) { devdax_memory_provider_t *devdax_provider = provider; utils_mutex_destroy_not_free(&devdax_provider->lock); utils_munmap(devdax_provider->base, devdax_provider->size); + coarse_delete(devdax_provider->coarse); umf_ba_global_free(devdax_provider); } -static int devdax_alloc_aligned(size_t length, size_t alignment, void *base, - size_t size, utils_mutex_t *lock, - void **out_addr, size_t *offset) { - assert(out_addr); - - if (utils_mutex_lock(lock)) { - LOG_ERR("locking file offset failed"); - return -1; - } - - uintptr_t ptr = (uintptr_t)base + *offset; - uintptr_t rest_of_div = alignment ? (ptr % alignment) : 0; - - if (alignment > 0 && rest_of_div > 0) { - ptr += alignment - rest_of_div; - } - - size_t new_offset = ptr - (uintptr_t)base + length; - - if (new_offset > size) { - utils_mutex_unlock(lock); - LOG_ERR("cannot allocate more memory than the device DAX size: %zu", - size); - return -1; - } - - *offset = new_offset; - *out_addr = (void *)ptr; - - utils_mutex_unlock(lock); - - return 0; -} - static umf_result_t devdax_alloc(void *provider, size_t size, size_t alignment, void **resultPtr) { - int ret; - - // alignment must be a power of two and a multiple or a divider of the page size - if (alignment && ((alignment & (alignment - 1)) || - ((alignment % DEVDAX_PAGE_SIZE_2MB) && - (DEVDAX_PAGE_SIZE_2MB % alignment)))) { - LOG_ERR("wrong alignment: %zu (not a power of 2 or a multiple or a " - "divider of the page size (%zu))", - alignment, DEVDAX_PAGE_SIZE_2MB); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - if (IS_NOT_ALIGNED(alignment, DEVDAX_PAGE_SIZE_2MB)) { - alignment = ALIGN_UP(alignment, DEVDAX_PAGE_SIZE_2MB); - } - devdax_memory_provider_t *devdax_provider = (devdax_memory_provider_t *)provider; - - void *addr = NULL; - errno = 0; - ret = devdax_alloc_aligned(size, alignment, devdax_provider->base, - devdax_provider->size, &devdax_provider->lock, - &addr, &devdax_provider->offset); - if (ret) { - devdax_store_last_native_error(UMF_DEVDAX_RESULT_ERROR_ALLOC_FAILED, 0); - LOG_ERR("memory allocation failed"); - return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; - } - - *resultPtr = addr; - - return UMF_RESULT_SUCCESS; + return coarse_alloc(devdax_provider->coarse, size, alignment, resultPtr); } static void devdax_get_last_native_error(void *provider, const char **ppMessage, @@ -391,6 +364,14 @@ static const char *devdax_get_name(void *provider) { static umf_result_t devdax_allocation_split(void *provider, void *ptr, size_t totalSize, size_t firstSize) { + devdax_memory_provider_t *devdax_provider = + (devdax_memory_provider_t *)provider; + return coarse_split(devdax_provider->coarse, ptr, totalSize, firstSize); +} + +static umf_result_t devdax_allocation_split_cb(void *provider, void *ptr, + size_t totalSize, + size_t firstSize) { (void)provider; (void)ptr; (void)totalSize; @@ -400,6 +381,14 @@ static umf_result_t devdax_allocation_split(void *provider, void *ptr, static umf_result_t devdax_allocation_merge(void *provider, void *lowPtr, void *highPtr, size_t totalSize) { + devdax_memory_provider_t *devdax_provider = + (devdax_memory_provider_t *)provider; + return coarse_merge(devdax_provider->coarse, lowPtr, highPtr, totalSize); +} + +static umf_result_t devdax_allocation_merge_cb(void *provider, void *lowPtr, + void *highPtr, + size_t totalSize) { (void)provider; (void)lowPtr; (void)highPtr; @@ -534,11 +523,18 @@ static umf_result_t devdax_close_ipc_handle(void *provider, void *ptr, return UMF_RESULT_SUCCESS; } +static umf_result_t devdax_free(void *provider, void *ptr, size_t size) { + devdax_memory_provider_t *devdax_provider = + (devdax_memory_provider_t *)provider; + return coarse_free(devdax_provider->coarse, ptr, size); +} + static umf_memory_provider_ops_t UMF_DEVDAX_MEMORY_PROVIDER_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_PROVIDER_OPS_VERSION_CURRENT, .initialize = devdax_initialize, .finalize = devdax_finalize, .alloc = devdax_alloc, + .free = devdax_free, .get_last_native_error = devdax_get_last_native_error, .get_recommended_page_size = devdax_get_recommended_page_size, .get_min_page_size = devdax_get_min_page_size, diff --git a/src/provider/provider_file_memory.c b/src/provider/provider_file_memory.c index 7c9ee38564..12a9233229 100644 --- a/src/provider/provider_file_memory.c +++ b/src/provider/provider_file_memory.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -71,6 +71,7 @@ umf_result_t umfFileMemoryProviderParamsSetVisibility( #else // !defined(_WIN32) && !defined(UMF_NO_HWLOC) #include "base_alloc_global.h" +#include "coarse.h" #include "critnib.h" #include "libumf.h" #include "utils_common.h" @@ -106,9 +107,11 @@ typedef struct file_memory_provider_t { // A critnib map storing (ptr, fd_offset + 1) pairs. We add 1 to fd_offset // in order to be able to store fd_offset equal 0, because // critnib_get() returns value or NULL, so a value cannot equal 0. - // It is needed mainly in the get_ipc_handle and open_ipc_handle hooks + // It is needed mainly in the ipc_get_handle and ipc_open_handle hooks // to mmap a specific part of a file. critnib *fd_offset_map; + + coarse_t *coarse; // coarse library handle } file_memory_provider_t; // File Memory Provider settings struct @@ -174,6 +177,14 @@ file_translate_params(umf_file_memory_provider_params_t *in_params, return UMF_RESULT_SUCCESS; } +static umf_result_t file_alloc_cb(void *provider, size_t size, size_t alignment, + void **resultPtr); +static umf_result_t file_allocation_split_cb(void *provider, void *ptr, + size_t totalSize, + size_t firstSize); +static umf_result_t file_allocation_merge_cb(void *provider, void *lowPtr, + void *highPtr, size_t totalSize); + static umf_result_t file_initialize(void *params, void **provider) { umf_result_t ret; @@ -241,10 +252,27 @@ static umf_result_t file_initialize(void *params, void **provider) { file_provider->page_size = utils_get_page_size(); } + coarse_params_t coarse_params = {0}; + coarse_params.provider = file_provider; + coarse_params.page_size = file_provider->page_size; + coarse_params.cb.alloc = file_alloc_cb; + coarse_params.cb.free = NULL; // not available for the file provider + coarse_params.cb.split = file_allocation_split_cb; + coarse_params.cb.merge = file_allocation_merge_cb; + + coarse_t *coarse = NULL; + ret = coarse_new(&coarse_params, &coarse); + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("coarse_new() failed"); + goto err_close_fd; + } + + file_provider->coarse = coarse; + if (utils_mutex_init(&file_provider->lock) == NULL) { LOG_ERR("lock init failed"); ret = UMF_RESULT_ERROR_UNKNOWN; - goto err_close_fd; + goto err_coarse_delete; } file_provider->fd_offset_map = critnib_new(); @@ -269,6 +297,8 @@ static umf_result_t file_initialize(void *params, void **provider) { critnib_delete(file_provider->fd_offset_map); err_mutex_destroy_not_free: utils_mutex_destroy_not_free(&file_provider->lock); +err_coarse_delete: + coarse_delete(file_provider->coarse); err_close_fd: utils_close_fd(file_provider->fd); err_free_file_provider: @@ -293,6 +323,7 @@ static void file_finalize(void *provider) { utils_close_fd(file_provider->fd); critnib_delete(file_provider->fd_offset_map); critnib_delete(file_provider->mmaps); + coarse_delete(file_provider->coarse); umf_ba_global_free(file_provider); } @@ -373,8 +404,12 @@ static umf_result_t file_mmap_aligned(file_memory_provider_t *file_provider, "inserted a value to the map of memory mapping (addr=%p, size=%zu)", ptr, extended_size); - file_provider->base_mmap = ptr; - file_provider->size_mmap = extended_size; + // align the new pointer + uintptr_t aligned_ptr = ALIGN_UP_SAFE((uintptr_t)ptr, alignment); + size_t aligned_size = extended_size - (aligned_ptr - (uintptr_t)ptr); + + file_provider->base_mmap = (void *)aligned_ptr; + file_provider->size_mmap = aligned_size; file_provider->offset_mmap = 0; return UMF_RESULT_SUCCESS; @@ -394,6 +429,8 @@ static umf_result_t file_alloc_aligned(file_memory_provider_t *file_provider, return UMF_RESULT_ERROR_UNKNOWN; } + assert(file_provider->offset_mmap <= file_provider->size_mmap); + if (file_provider->size_mmap - file_provider->offset_mmap < size) { umf_result = file_mmap_aligned(file_provider, size, alignment); if (umf_result != UMF_RESULT_SUCCESS) { @@ -419,7 +456,8 @@ static umf_result_t file_alloc_aligned(file_memory_provider_t *file_provider, size_t new_offset_fd = file_provider->offset_fd + new_offset_mmap - file_provider->offset_mmap; - if (file_provider->size_mmap - new_offset_mmap < size) { + // new_offset_mmap can be greater than file_provider->size_mmap + if (file_provider->size_mmap < size + new_offset_mmap) { umf_result = file_mmap_aligned(file_provider, size, alignment); if (umf_result != UMF_RESULT_SUCCESS) { utils_mutex_unlock(&file_provider->lock); @@ -451,11 +489,19 @@ static umf_result_t file_alloc_aligned(file_memory_provider_t *file_provider, static umf_result_t file_alloc(void *provider, size_t size, size_t alignment, void **resultPtr) { + file_memory_provider_t *file_provider = (file_memory_provider_t *)provider; + return coarse_alloc(file_provider->coarse, size, alignment, resultPtr); +} + +static umf_result_t file_alloc_cb(void *provider, size_t size, size_t alignment, + void **resultPtr) { umf_result_t umf_result; int ret; file_memory_provider_t *file_provider = (file_memory_provider_t *)provider; + *resultPtr = NULL; + // alignment must be a power of two and a multiple or a divider of the page size if (alignment && ((alignment & (alignment - 1)) || ((alignment % file_provider->page_size) && @@ -488,8 +534,15 @@ static umf_result_t file_alloc(void *provider, size_t size, size_t alignment, LOG_ERR("inserting a value to the file descriptor offset map failed " "(addr=%p, offset=%zu)", addr, alloc_offset_fd); + // We cannot undo the file_alloc_aligned() call here, + // because the file memory provider does not support the free operation. + return UMF_RESULT_ERROR_UNKNOWN; } + LOG_DEBUG("inserted a value to the file descriptor offset map (addr=%p, " + "offset=%zu)", + addr, alloc_offset_fd); + *resultPtr = addr; return UMF_RESULT_SUCCESS; @@ -576,10 +629,15 @@ static const char *file_get_name(void *provider) { return "FILE"; } -// This function is supposed to be thread-safe, so it should NOT be called concurrently -// with file_allocation_merge() with the same pointer. static umf_result_t file_allocation_split(void *provider, void *ptr, size_t totalSize, size_t firstSize) { + file_memory_provider_t *file_provider = (file_memory_provider_t *)provider; + return coarse_split(file_provider->coarse, ptr, totalSize, firstSize); +} + +static umf_result_t file_allocation_split_cb(void *provider, void *ptr, + size_t totalSize, + size_t firstSize) { (void)totalSize; file_memory_provider_t *file_provider = (file_memory_provider_t *)provider; @@ -589,29 +647,42 @@ static umf_result_t file_allocation_split(void *provider, void *ptr, void *value = critnib_get(file_provider->fd_offset_map, (uintptr_t)ptr); if (value == NULL) { - LOG_ERR("file_allocation_split(): getting a value from the file " - "descriptor offset map failed (addr=%p)", + LOG_ERR("getting a value from the file descriptor offset map failed " + "(addr=%p)", ptr); return UMF_RESULT_ERROR_UNKNOWN; } + LOG_DEBUG("split the value from the file descriptor offset map (addr=%p) " + "from size %zu to %zu + %zu", + ptr, totalSize, firstSize, totalSize - firstSize); + uintptr_t new_key = (uintptr_t)ptr + firstSize; void *new_value = (void *)((uintptr_t)value + firstSize); int ret = critnib_insert(file_provider->fd_offset_map, new_key, new_value, 0 /* update */); if (ret) { - LOG_ERR("file_allocation_split(): inserting a value to the file " - "descriptor offset map failed (addr=%p, offset=%zu)", + LOG_ERR("inserting a value to the file descriptor offset map failed " + "(addr=%p, offset=%zu)", (void *)new_key, (size_t)new_value - 1); return UMF_RESULT_ERROR_UNKNOWN; } + LOG_DEBUG("inserted a value to the file descriptor offset map (addr=%p, " + "offset=%zu)", + (void *)new_key, (size_t)new_value - 1); + return UMF_RESULT_SUCCESS; } -// It should NOT be called concurrently with file_allocation_split() with the same pointer. static umf_result_t file_allocation_merge(void *provider, void *lowPtr, void *highPtr, size_t totalSize) { + file_memory_provider_t *file_provider = (file_memory_provider_t *)provider; + return coarse_merge(file_provider->coarse, lowPtr, highPtr, totalSize); +} + +static umf_result_t file_allocation_merge_cb(void *provider, void *lowPtr, + void *highPtr, size_t totalSize) { (void)lowPtr; (void)totalSize; @@ -623,12 +694,16 @@ static umf_result_t file_allocation_merge(void *provider, void *lowPtr, void *value = critnib_remove(file_provider->fd_offset_map, (uintptr_t)highPtr); if (value == NULL) { - LOG_ERR("file_allocation_merge(): removing a value from the file " - "descriptor offset map failed (addr=%p)", + LOG_ERR("removing a value from the file descriptor offset map failed " + "(addr=%p)", highPtr); return UMF_RESULT_ERROR_UNKNOWN; } + LOG_DEBUG("removed a value from the file descriptor offset map (addr=%p) - " + "merged with %p", + highPtr, lowPtr); + return UMF_RESULT_SUCCESS; } @@ -662,9 +737,7 @@ static umf_result_t file_get_ipc_handle(void *provider, const void *ptr, void *value = critnib_get(file_provider->fd_offset_map, (uintptr_t)ptr); if (value == NULL) { - LOG_ERR("file_get_ipc_handle(): getting a value from the IPC cache " - "failed (addr=%p)", - ptr); + LOG_ERR("getting a value from the IPC cache failed (addr=%p)", ptr); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } @@ -776,11 +849,17 @@ static umf_result_t file_close_ipc_handle(void *provider, void *ptr, return UMF_RESULT_SUCCESS; } +static umf_result_t file_free(void *provider, void *ptr, size_t size) { + file_memory_provider_t *file_provider = (file_memory_provider_t *)provider; + return coarse_free(file_provider->coarse, ptr, size); +} + static umf_memory_provider_ops_t UMF_FILE_MEMORY_PROVIDER_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_PROVIDER_OPS_VERSION_CURRENT, .initialize = file_initialize, .finalize = file_finalize, .alloc = file_alloc, + .free = file_free, .get_last_native_error = file_get_last_native_error, .get_recommended_page_size = file_get_recommended_page_size, .get_min_page_size = file_get_min_page_size, diff --git a/src/provider/provider_fixed_memory.c b/src/provider/provider_fixed_memory.c new file mode 100644 index 0000000000..eeeb8b7025 --- /dev/null +++ b/src/provider/provider_fixed_memory.c @@ -0,0 +1,337 @@ +/* + * Copyright (C) 2024-2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "base_alloc_global.h" +#include "coarse.h" +#include "libumf.h" +#include "utils_common.h" +#include "utils_concurrency.h" +#include "utils_log.h" + +#define TLS_MSG_BUF_LEN 1024 + +typedef struct fixed_memory_provider_t { + void *base; // base address of memory + size_t size; // size of the memory region + coarse_t *coarse; // coarse library handle +} fixed_memory_provider_t; + +// Fixed Memory provider settings struct +typedef struct umf_fixed_memory_provider_params_t { + void *ptr; + size_t size; +} umf_fixed_memory_provider_params_t; + +typedef struct fixed_last_native_error_t { + int32_t native_error; + int errno_value; + char msg_buff[TLS_MSG_BUF_LEN]; +} fixed_last_native_error_t; + +static __TLS fixed_last_native_error_t TLS_last_native_error; + +// helper values used only in the Native_error_str array +#define _UMF_FIXED_RESULT_SUCCESS \ + (UMF_FIXED_RESULT_SUCCESS - UMF_FIXED_RESULT_SUCCESS) +#define _UMF_FIXED_RESULT_ERROR_PURGE_FORCE_FAILED \ + (UMF_FIXED_RESULT_ERROR_PURGE_FORCE_FAILED - UMF_FIXED_RESULT_SUCCESS) + +static const char *Native_error_str[] = { + [_UMF_FIXED_RESULT_SUCCESS] = "success", + [_UMF_FIXED_RESULT_ERROR_PURGE_FORCE_FAILED] = "force purging failed"}; + +static void fixed_store_last_native_error(int32_t native_error, + int errno_value) { + TLS_last_native_error.native_error = native_error; + TLS_last_native_error.errno_value = errno_value; +} + +static umf_result_t fixed_allocation_split_cb(void *provider, void *ptr, + size_t totalSize, + size_t firstSize) { + (void)provider; + (void)ptr; + (void)totalSize; + (void)firstSize; + return UMF_RESULT_SUCCESS; +} + +static umf_result_t fixed_allocation_merge_cb(void *provider, void *lowPtr, + void *highPtr, size_t totalSize) { + (void)provider; + (void)lowPtr; + (void)highPtr; + (void)totalSize; + return UMF_RESULT_SUCCESS; +} + +static umf_result_t fixed_initialize(void *params, void **provider) { + umf_result_t ret; + + if (params == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_fixed_memory_provider_params_t *in_params = + (umf_fixed_memory_provider_params_t *)params; + + fixed_memory_provider_t *fixed_provider = + umf_ba_global_alloc(sizeof(*fixed_provider)); + if (!fixed_provider) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + memset(fixed_provider, 0, sizeof(*fixed_provider)); + + coarse_params_t coarse_params = {0}; + coarse_params.provider = fixed_provider; + coarse_params.page_size = utils_get_page_size(); + // The alloc callback is not available in case of the fixed provider + // because it is a fixed-size memory provider + // and the entire memory is added as a single block + // to the coarse library. + coarse_params.cb.alloc = NULL; + coarse_params.cb.free = NULL; // not available for the fixed provider + coarse_params.cb.split = fixed_allocation_split_cb; + coarse_params.cb.merge = fixed_allocation_merge_cb; + + coarse_t *coarse = NULL; + ret = coarse_new(&coarse_params, &coarse); + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("coarse_new() failed"); + goto err_free_fixed_provider; + } + + fixed_provider->coarse = coarse; + + fixed_provider->base = in_params->ptr; + fixed_provider->size = in_params->size; + + // add the entire memory as a single block + ret = coarse_add_memory_fixed(coarse, fixed_provider->base, + fixed_provider->size); + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("adding memory block failed"); + goto err_coarse_delete; + } + + *provider = fixed_provider; + + return UMF_RESULT_SUCCESS; + +err_coarse_delete: + coarse_delete(fixed_provider->coarse); +err_free_fixed_provider: + umf_ba_global_free(fixed_provider); + return ret; +} + +static void fixed_finalize(void *provider) { + fixed_memory_provider_t *fixed_provider = provider; + coarse_delete(fixed_provider->coarse); + umf_ba_global_free(fixed_provider); +} + +static umf_result_t fixed_alloc(void *provider, size_t size, size_t alignment, + void **resultPtr) { + fixed_memory_provider_t *fixed_provider = + (fixed_memory_provider_t *)provider; + + return coarse_alloc(fixed_provider->coarse, size, alignment, resultPtr); +} + +static void fixed_get_last_native_error(void *provider, const char **ppMessage, + int32_t *pError) { + (void)provider; // unused + + if (ppMessage == NULL || pError == NULL) { + assert(0); + return; + } + + *pError = TLS_last_native_error.native_error; + if (TLS_last_native_error.errno_value == 0) { + *ppMessage = Native_error_str[*pError - UMF_FIXED_RESULT_SUCCESS]; + return; + } + + const char *msg; + size_t len; + size_t pos = 0; + + msg = Native_error_str[*pError - UMF_FIXED_RESULT_SUCCESS]; + len = strlen(msg); + memcpy(TLS_last_native_error.msg_buff + pos, msg, len + 1); + pos += len; + + msg = ": "; + len = strlen(msg); + memcpy(TLS_last_native_error.msg_buff + pos, msg, len + 1); + pos += len; + + utils_strerror(TLS_last_native_error.errno_value, + TLS_last_native_error.msg_buff + pos, TLS_MSG_BUF_LEN - pos); + + *ppMessage = TLS_last_native_error.msg_buff; +} + +static umf_result_t fixed_get_recommended_page_size(void *provider, size_t size, + size_t *page_size) { + (void)provider; // unused + (void)size; // unused + + *page_size = utils_get_page_size(); + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t fixed_get_min_page_size(void *provider, void *ptr, + size_t *page_size) { + (void)ptr; // unused + + return fixed_get_recommended_page_size(provider, 0, page_size); +} + +static umf_result_t fixed_purge_lazy(void *provider, void *ptr, size_t size) { + (void)provider; // unused + (void)ptr; // unused + (void)size; // unused + // purge_lazy is unsupported in case of the fixed memory provider + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +static umf_result_t fixed_purge_force(void *provider, void *ptr, size_t size) { + (void)provider; // unused + errno = 0; + if (utils_purge(ptr, size, UMF_PURGE_FORCE)) { + fixed_store_last_native_error(UMF_FIXED_RESULT_ERROR_PURGE_FORCE_FAILED, + errno); + LOG_PERR("force purging failed"); + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + return UMF_RESULT_SUCCESS; +} + +static const char *fixed_get_name(void *provider) { + (void)provider; // unused + return "FIXED"; +} + +static umf_result_t fixed_allocation_split(void *provider, void *ptr, + size_t totalSize, size_t firstSize) { + fixed_memory_provider_t *fixed_provider = + (fixed_memory_provider_t *)provider; + return coarse_split(fixed_provider->coarse, ptr, totalSize, firstSize); +} + +static umf_result_t fixed_allocation_merge(void *provider, void *lowPtr, + void *highPtr, size_t totalSize) { + fixed_memory_provider_t *fixed_provider = + (fixed_memory_provider_t *)provider; + return coarse_merge(fixed_provider->coarse, lowPtr, highPtr, totalSize); +} + +static umf_result_t fixed_free(void *provider, void *ptr, size_t size) { + fixed_memory_provider_t *fixed_provider = + (fixed_memory_provider_t *)provider; + return coarse_free(fixed_provider->coarse, ptr, size); +} + +static umf_memory_provider_ops_t UMF_FIXED_MEMORY_PROVIDER_OPS = { + .version = UMF_PROVIDER_OPS_VERSION_CURRENT, + .initialize = fixed_initialize, + .finalize = fixed_finalize, + .alloc = fixed_alloc, + .free = fixed_free, + .get_last_native_error = fixed_get_last_native_error, + .get_recommended_page_size = fixed_get_recommended_page_size, + .get_min_page_size = fixed_get_min_page_size, + .get_name = fixed_get_name, + .ext.purge_lazy = fixed_purge_lazy, + .ext.purge_force = fixed_purge_force, + .ext.allocation_merge = fixed_allocation_merge, + .ext.allocation_split = fixed_allocation_split, + .ipc.get_ipc_handle_size = NULL, + .ipc.get_ipc_handle = NULL, + .ipc.put_ipc_handle = NULL, + .ipc.open_ipc_handle = NULL, + .ipc.close_ipc_handle = NULL}; + +umf_memory_provider_ops_t *umfFixedMemoryProviderOps(void) { + return &UMF_FIXED_MEMORY_PROVIDER_OPS; +} + +umf_result_t umfFixedMemoryProviderParamsCreate( + umf_fixed_memory_provider_params_handle_t *hParams, void *ptr, + size_t size) { + libumfInit(); + if (hParams == NULL) { + LOG_ERR("Memory Provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_fixed_memory_provider_params_handle_t params = + umf_ba_global_alloc(sizeof(*params)); + if (params == NULL) { + LOG_ERR("Allocating memory for the Memory Provider params failed"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + umf_result_t ret = umfFixedMemoryProviderParamsSetMemory(params, ptr, size); + if (ret != UMF_RESULT_SUCCESS) { + umf_ba_global_free(params); + return ret; + } + + *hParams = params; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfFixedMemoryProviderParamsDestroy( + umf_fixed_memory_provider_params_handle_t hParams) { + if (hParams != NULL) { + umf_ba_global_free(hParams); + } + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfFixedMemoryProviderParamsSetMemory( + umf_fixed_memory_provider_params_handle_t hParams, void *ptr, size_t size) { + + if (hParams == NULL) { + LOG_ERR("Memory Provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (ptr == NULL) { + LOG_ERR("Memory pointer is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (size == 0) { + LOG_ERR("Size must be greater than 0"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->ptr = ptr; + hParams->size = size; + return UMF_RESULT_SUCCESS; +} diff --git a/src/provider/provider_level_zero.c b/src/provider/provider_level_zero.c index 5f9c85a86d..af81e84bc0 100644 --- a/src/provider/provider_level_zero.c +++ b/src/provider/provider_level_zero.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -14,8 +14,19 @@ #include #include +#include "provider_level_zero_internal.h" +#include "utils_load_library.h" #include "utils_log.h" +static void *ze_lib_handle = NULL; + +void fini_ze_global_state(void) { + if (ze_lib_handle) { + utils_close_library(ze_lib_handle); + ze_lib_handle = NULL; + } +} + #if defined(UMF_NO_LEVEL_ZERO_PROVIDER) umf_result_t umfLevelZeroMemoryProviderParamsCreate( @@ -75,6 +86,22 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetResidentDevices( return UMF_RESULT_ERROR_NOT_SUPPORTED; } +umf_result_t umfLevelZeroMemoryProviderParamsSetFreePolicy( + umf_level_zero_memory_provider_params_handle_t hParams, + umf_level_zero_memory_provider_free_policy_t policy) { + (void)hParams; + (void)policy; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t umfLevelZeroMemoryProviderParamsSetDeviceOrdinal( + umf_level_zero_memory_provider_params_handle_t hParams, + uint32_t deviceOrdinal) { + (void)hParams; + (void)deviceOrdinal; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + umf_memory_provider_ops_t *umfLevelZeroMemoryProviderOps(void) { // not supported LOG_ERR("L0 memory provider is disabled! (UMF_BUILD_LEVEL_ZERO_PROVIDER is " @@ -89,7 +116,6 @@ umf_memory_provider_ops_t *umfLevelZeroMemoryProviderOps(void) { #include "utils_assert.h" #include "utils_common.h" #include "utils_concurrency.h" -#include "utils_load_library.h" #include "utils_log.h" #include "utils_sanitizers.h" #include "ze_api.h" @@ -107,6 +133,11 @@ typedef struct umf_level_zero_memory_provider_params_t { resident_device_handles; ///< Array of devices for which the memory should be made resident uint32_t resident_device_count; ///< Number of devices for which the memory should be made resident + + umf_level_zero_memory_provider_free_policy_t + freePolicy; ///< Memory free policy + + uint32_t device_ordinal; } umf_level_zero_memory_provider_params_t; typedef struct ze_memory_provider_t { @@ -118,6 +149,12 @@ typedef struct ze_memory_provider_t { uint32_t resident_device_count; ze_device_properties_t device_properties; + + ze_driver_memory_free_policy_ext_flags_t freePolicyFlags; + + size_t min_page_size; + + uint32_t device_ordinal; } ze_memory_provider_t; typedef struct ze_ops_t { @@ -144,6 +181,11 @@ typedef struct ze_ops_t { size_t); ze_result_t (*zeDeviceGetProperties)(ze_device_handle_t, ze_device_properties_t *); + ze_result_t (*zeMemFreeExt)(ze_context_handle_t, + ze_memory_free_ext_desc_t *, void *); + ze_result_t (*zeMemGetAllocProperties)(ze_context_handle_t, const void *, + ze_memory_allocation_properties_t *, + ze_device_handle_t *); } ze_ops_t; static ze_ops_t g_ze_ops; @@ -175,47 +217,63 @@ static void init_ze_global_state(void) { #else const char *lib_name = "libze_loader.so"; #endif - // check if Level Zero shared library is already loaded - // we pass 0 as a handle to search the global symbol table + // The Level Zero shared library should be already loaded by the user + // of the Level Zero provider. UMF just want to reuse it + // and increase the reference count to the Level Zero shared library. + void *lib_handle = + utils_open_library(lib_name, UMF_UTIL_OPEN_LIBRARY_NO_LOAD); + if (!lib_handle) { + LOG_FATAL("Failed to open Level Zero shared library"); + Init_ze_global_state_failed = true; + return; + } + *(void **)&g_ze_ops.zeMemAllocHost = - utils_get_symbol_addr(0, "zeMemAllocHost", lib_name); + utils_get_symbol_addr(lib_handle, "zeMemAllocHost", lib_name); *(void **)&g_ze_ops.zeMemAllocDevice = - utils_get_symbol_addr(0, "zeMemAllocDevice", lib_name); + utils_get_symbol_addr(lib_handle, "zeMemAllocDevice", lib_name); *(void **)&g_ze_ops.zeMemAllocShared = - utils_get_symbol_addr(0, "zeMemAllocShared", lib_name); + utils_get_symbol_addr(lib_handle, "zeMemAllocShared", lib_name); *(void **)&g_ze_ops.zeMemFree = - utils_get_symbol_addr(0, "zeMemFree", lib_name); + utils_get_symbol_addr(lib_handle, "zeMemFree", lib_name); *(void **)&g_ze_ops.zeMemGetIpcHandle = - utils_get_symbol_addr(0, "zeMemGetIpcHandle", lib_name); + utils_get_symbol_addr(lib_handle, "zeMemGetIpcHandle", lib_name); *(void **)&g_ze_ops.zeMemPutIpcHandle = - utils_get_symbol_addr(0, "zeMemPutIpcHandle", lib_name); + utils_get_symbol_addr(lib_handle, "zeMemPutIpcHandle", lib_name); *(void **)&g_ze_ops.zeMemOpenIpcHandle = - utils_get_symbol_addr(0, "zeMemOpenIpcHandle", lib_name); + utils_get_symbol_addr(lib_handle, "zeMemOpenIpcHandle", lib_name); *(void **)&g_ze_ops.zeMemCloseIpcHandle = - utils_get_symbol_addr(0, "zeMemCloseIpcHandle", lib_name); - *(void **)&g_ze_ops.zeContextMakeMemoryResident = - utils_get_symbol_addr(0, "zeContextMakeMemoryResident", lib_name); + utils_get_symbol_addr(lib_handle, "zeMemCloseIpcHandle", lib_name); + *(void **)&g_ze_ops.zeContextMakeMemoryResident = utils_get_symbol_addr( + lib_handle, "zeContextMakeMemoryResident", lib_name); *(void **)&g_ze_ops.zeDeviceGetProperties = - utils_get_symbol_addr(0, "zeDeviceGetProperties", lib_name); + utils_get_symbol_addr(lib_handle, "zeDeviceGetProperties", lib_name); + *(void **)&g_ze_ops.zeMemFreeExt = + utils_get_symbol_addr(lib_handle, "zeMemFreeExt", lib_name); + *(void **)&g_ze_ops.zeMemGetAllocProperties = + utils_get_symbol_addr(lib_handle, "zeMemGetAllocProperties", lib_name); if (!g_ze_ops.zeMemAllocHost || !g_ze_ops.zeMemAllocDevice || !g_ze_ops.zeMemAllocShared || !g_ze_ops.zeMemFree || !g_ze_ops.zeMemGetIpcHandle || !g_ze_ops.zeMemOpenIpcHandle || !g_ze_ops.zeMemCloseIpcHandle || !g_ze_ops.zeContextMakeMemoryResident || - !g_ze_ops.zeDeviceGetProperties) { + !g_ze_ops.zeDeviceGetProperties || !g_ze_ops.zeMemGetAllocProperties) { // g_ze_ops.zeMemPutIpcHandle can be NULL because it was introduced // starting from Level Zero 1.6 - LOG_ERR("Required Level Zero symbols not found."); + LOG_FATAL("Required Level Zero symbols not found."); Init_ze_global_state_failed = true; + utils_close_library(lib_handle); + return; } + ze_lib_handle = lib_handle; } umf_result_t umfLevelZeroMemoryProviderParamsCreate( umf_level_zero_memory_provider_params_handle_t *hParams) { libumfInit(); if (!hParams) { - LOG_ERR("Level zero memory provider params handle is NULL"); + LOG_ERR("Level Zero memory provider params handle is NULL"); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } @@ -232,6 +290,8 @@ umf_result_t umfLevelZeroMemoryProviderParamsCreate( params->memory_type = UMF_MEMORY_TYPE_UNKNOWN; params->resident_device_handles = NULL; params->resident_device_count = 0; + params->freePolicy = UMF_LEVEL_ZERO_MEMORY_PROVIDER_FREE_POLICY_DEFAULT; + params->device_ordinal = 0; *hParams = params; @@ -249,12 +309,12 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetContext( umf_level_zero_memory_provider_params_handle_t hParams, ze_context_handle_t hContext) { if (!hParams) { - LOG_ERR("Level zero memory provider params handle is NULL"); + LOG_ERR("Level Zero memory provider params handle is NULL"); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } if (!hContext) { - LOG_ERR("Level zero context handle is NULL"); + LOG_ERR("Level Zero context handle is NULL"); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } @@ -267,7 +327,7 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetDevice( umf_level_zero_memory_provider_params_handle_t hParams, ze_device_handle_t hDevice) { if (!hParams) { - LOG_ERR("Level zero memory provider params handle is NULL"); + LOG_ERR("Level Zero memory provider params handle is NULL"); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } @@ -280,7 +340,7 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetMemoryType( umf_level_zero_memory_provider_params_handle_t hParams, umf_usm_memory_type_t memoryType) { if (!hParams) { - LOG_ERR("Level zero memory provider params handle is NULL"); + LOG_ERR("Level Zero memory provider params handle is NULL"); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } @@ -289,11 +349,23 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetMemoryType( return UMF_RESULT_SUCCESS; } +umf_result_t umfLevelZeroMemoryProviderParamsSetDeviceOrdinal( + umf_level_zero_memory_provider_params_handle_t hParams, + uint32_t deviceOrdinal) { + if (!hParams) { + LOG_ERR("Level Zero memory provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + hParams->device_ordinal = deviceOrdinal; + + return UMF_RESULT_SUCCESS; +} + umf_result_t umfLevelZeroMemoryProviderParamsSetResidentDevices( umf_level_zero_memory_provider_params_handle_t hParams, ze_device_handle_t *hDevices, uint32_t deviceCount) { if (!hParams) { - LOG_ERR("Level zero memory provider params handle is NULL"); + LOG_ERR("Level Zero memory provider params handle is NULL"); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } @@ -308,96 +380,31 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetResidentDevices( return UMF_RESULT_SUCCESS; } -static umf_result_t ze_memory_provider_initialize(void *params, - void **provider) { - if (params == NULL) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - umf_level_zero_memory_provider_params_handle_t ze_params = - (umf_level_zero_memory_provider_params_handle_t)params; - - if (!ze_params->level_zero_context_handle) { - LOG_ERR("Level Zero context handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - if ((ze_params->memory_type == UMF_MEMORY_TYPE_HOST) == - (ze_params->level_zero_device_handle != NULL)) { - LOG_ERR("Level Zero device handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - if ((bool)ze_params->resident_device_count && - (ze_params->resident_device_handles == NULL)) { - LOG_ERR("Resident devices handles array is NULL, but device_count is " - "not zero"); +umf_result_t umfLevelZeroMemoryProviderParamsSetFreePolicy( + umf_level_zero_memory_provider_params_handle_t hParams, + umf_level_zero_memory_provider_free_policy_t policy) { + if (!hParams) { + LOG_ERR("Level Zero memory provider params handle is NULL"); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } - utils_init_once(&ze_is_initialized, init_ze_global_state); - if (Init_ze_global_state_failed) { - LOG_ERR("Loading Level Zero symbols failed"); - return UMF_RESULT_ERROR_UNKNOWN; - } - - ze_memory_provider_t *ze_provider = - umf_ba_global_alloc(sizeof(ze_memory_provider_t)); - if (!ze_provider) { - LOG_ERR("Cannot allocate memory for Level Zero Memory Provider"); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - ze_provider->context = ze_params->level_zero_context_handle; - ze_provider->device = ze_params->level_zero_device_handle; - ze_provider->memory_type = (ze_memory_type_t)ze_params->memory_type; - - if (ze_provider->device) { - umf_result_t ret = ze2umf_result(g_ze_ops.zeDeviceGetProperties( - ze_provider->device, &ze_provider->device_properties)); - - if (ret != UMF_RESULT_SUCCESS) { - LOG_ERR("Cannot get device properties"); - umf_ba_global_free(ze_provider); - return ret; - } - } else { - memset(&ze_provider->device_properties, 0, - sizeof(ze_provider->device_properties)); - } - - if (ze_params->resident_device_count) { - ze_provider->resident_device_handles = umf_ba_global_alloc( - sizeof(ze_device_handle_t) * ze_params->resident_device_count); - if (!ze_provider->resident_device_handles) { - LOG_ERR("Cannot allocate memory for resident devices"); - umf_ba_global_free(ze_provider); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - ze_provider->resident_device_count = ze_params->resident_device_count; - - for (uint32_t i = 0; i < ze_provider->resident_device_count; i++) { - ze_provider->resident_device_handles[i] = - ze_params->resident_device_handles[i]; - } - } else { - ze_provider->resident_device_handles = NULL; - ze_provider->resident_device_count = 0; - } - - *provider = ze_provider; - + hParams->freePolicy = policy; return UMF_RESULT_SUCCESS; } -static void ze_memory_provider_finalize(void *provider) { - ze_memory_provider_t *ze_provider = (ze_memory_provider_t *)provider; - umf_ba_global_free(ze_provider->resident_device_handles); - - umf_ba_global_free(provider); +static ze_driver_memory_free_policy_ext_flags_t +umfFreePolicyToZePolicy(umf_level_zero_memory_provider_free_policy_t policy) { + switch (policy) { + case UMF_LEVEL_ZERO_MEMORY_PROVIDER_FREE_POLICY_DEFAULT: + return 0; + case UMF_LEVEL_ZERO_MEMORY_PROVIDER_FREE_POLICY_BLOCKING_FREE: + return ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_BLOCKING_FREE; + case UMF_LEVEL_ZERO_MEMORY_PROVIDER_FREE_POLICY_DEFER_FREE: + return ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_DEFER_FREE; + default: + return 0; + } } - static bool use_relaxed_allocation(ze_memory_provider_t *ze_provider, size_t size) { assert(ze_provider); @@ -435,8 +442,7 @@ static umf_result_t ze_memory_provider_alloc(void *provider, size_t size, ? &relaxed_device_allocation_desc : NULL, .flags = 0, - .ordinal = 0 // TODO - }; + .ordinal = ze_provider->device_ordinal}; ze_result = g_ze_ops.zeMemAllocDevice(ze_provider->context, &dev_desc, size, alignment, ze_provider->device, resultPtr); @@ -453,8 +459,7 @@ static umf_result_t ze_memory_provider_alloc(void *provider, size_t size, ? &relaxed_device_allocation_desc : NULL, .flags = 0, - .ordinal = 0 // TODO - }; + .ordinal = ze_provider->device_ordinal}; ze_result = g_ze_ops.zeMemAllocShared(ze_provider->context, &dev_desc, &host_desc, size, alignment, ze_provider->device, resultPtr); @@ -492,10 +497,147 @@ static umf_result_t ze_memory_provider_free(void *provider, void *ptr, } ze_memory_provider_t *ze_provider = (ze_memory_provider_t *)provider; - ze_result_t ze_result = g_ze_ops.zeMemFree(ze_provider->context, ptr); + + if (ze_provider->freePolicyFlags == 0) { + return ze2umf_result(g_ze_ops.zeMemFree(ze_provider->context, ptr)); + } + + ze_memory_free_ext_desc_t desc = { + .stype = ZE_STRUCTURE_TYPE_MEMORY_FREE_EXT_DESC, + .pNext = NULL, + .freePolicy = ze_provider->freePolicyFlags}; + + return ze2umf_result( + g_ze_ops.zeMemFreeExt(ze_provider->context, &desc, ptr)); +} + +static umf_result_t query_min_page_size(ze_memory_provider_t *ze_provider, + size_t *min_page_size) { + assert(min_page_size); + + LOG_DEBUG("Querying minimum page size"); + + void *ptr; + umf_result_t result = ze_memory_provider_alloc(ze_provider, 1, 0, &ptr); + if (result != UMF_RESULT_SUCCESS) { + return result; + } + + ze_memory_allocation_properties_t properties = { + .stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES}; + ze_result_t ze_result = g_ze_ops.zeMemGetAllocProperties( + ze_provider->context, ptr, &properties, NULL); + + *min_page_size = properties.pageSize; + + ze_memory_provider_free(ze_provider, ptr, 1); + return ze2umf_result(ze_result); } +static void ze_memory_provider_finalize(void *provider) { + ze_memory_provider_t *ze_provider = (ze_memory_provider_t *)provider; + umf_ba_global_free(ze_provider->resident_device_handles); + + umf_ba_global_free(provider); +} + +static umf_result_t ze_memory_provider_initialize(void *params, + void **provider) { + if (params == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_level_zero_memory_provider_params_handle_t ze_params = + (umf_level_zero_memory_provider_params_handle_t)params; + + if (!ze_params->level_zero_context_handle) { + LOG_ERR("Level Zero context handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if ((ze_params->memory_type == UMF_MEMORY_TYPE_HOST) == + (ze_params->level_zero_device_handle != NULL)) { + LOG_ERR("Level Zero device handle should be set only for device and " + "shared memory types"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if ((bool)ze_params->resident_device_count && + (ze_params->resident_device_handles == NULL)) { + LOG_ERR("Resident devices handles array is NULL, but device_count is " + "not zero"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + utils_init_once(&ze_is_initialized, init_ze_global_state); + if (Init_ze_global_state_failed) { + LOG_FATAL("Loading Level Zero symbols failed"); + return UMF_RESULT_ERROR_DEPENDENCY_UNAVAILABLE; + } + + ze_memory_provider_t *ze_provider = + umf_ba_global_alloc(sizeof(ze_memory_provider_t)); + if (!ze_provider) { + LOG_ERR("Cannot allocate memory for Level Zero Memory Provider"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + ze_provider->context = ze_params->level_zero_context_handle; + ze_provider->device = ze_params->level_zero_device_handle; + ze_provider->memory_type = (ze_memory_type_t)ze_params->memory_type; + ze_provider->freePolicyFlags = + umfFreePolicyToZePolicy(ze_params->freePolicy); + ze_provider->min_page_size = 0; + ze_provider->device_ordinal = ze_params->device_ordinal; + + memset(&ze_provider->device_properties, 0, + sizeof(ze_provider->device_properties)); + ze_provider->device_properties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; + + if (ze_provider->device) { + umf_result_t ret = ze2umf_result(g_ze_ops.zeDeviceGetProperties( + ze_provider->device, &ze_provider->device_properties)); + + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("Cannot get device properties"); + umf_ba_global_free(ze_provider); + return ret; + } + } + + if (ze_params->resident_device_count) { + ze_provider->resident_device_handles = umf_ba_global_alloc( + sizeof(ze_device_handle_t) * ze_params->resident_device_count); + if (!ze_provider->resident_device_handles) { + LOG_ERR("Cannot allocate memory for resident devices"); + umf_ba_global_free(ze_provider); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + ze_provider->resident_device_count = ze_params->resident_device_count; + + for (uint32_t i = 0; i < ze_provider->resident_device_count; i++) { + ze_provider->resident_device_handles[i] = + ze_params->resident_device_handles[i]; + } + } else { + ze_provider->resident_device_handles = NULL; + ze_provider->resident_device_count = 0; + } + + umf_result_t result = + query_min_page_size(ze_provider, &ze_provider->min_page_size); + if (result != UMF_RESULT_SUCCESS) { + ze_memory_provider_finalize(ze_provider); + return result; + } + + *provider = ze_provider; + + return UMF_RESULT_SUCCESS; +} + static void ze_memory_provider_get_last_native_error(void *provider, const char **ppMessage, int32_t *pError) { @@ -512,11 +654,23 @@ static void ze_memory_provider_get_last_native_error(void *provider, static umf_result_t ze_memory_provider_get_min_page_size(void *provider, void *ptr, size_t *pageSize) { - (void)provider; - (void)ptr; + ze_memory_provider_t *ze_provider = (ze_memory_provider_t *)provider; + + if (!ptr) { + *pageSize = ze_provider->min_page_size; + return UMF_RESULT_SUCCESS; + } + + ze_memory_allocation_properties_t properties = { + .stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES}; + ze_result_t ze_result = g_ze_ops.zeMemGetAllocProperties( + ze_provider->context, ptr, &properties, NULL); + if (ze_result != ZE_RESULT_SUCCESS) { + return ze2umf_result(ze_result); + } + + *pageSize = properties.pageSize; - // TODO - *pageSize = 1024 * 64; return UMF_RESULT_SUCCESS; } @@ -543,12 +697,8 @@ static umf_result_t ze_memory_provider_purge_force(void *provider, void *ptr, static umf_result_t ze_memory_provider_get_recommended_page_size(void *provider, size_t size, size_t *pageSize) { - (void)provider; (void)size; - - // TODO - *pageSize = 1024 * 64; - return UMF_RESULT_SUCCESS; + return ze_memory_provider_get_min_page_size(provider, NULL, pageSize); } static const char *ze_memory_provider_get_name(void *provider) { @@ -693,16 +843,16 @@ ze_memory_provider_close_ipc_handle(void *provider, void *ptr, size_t size) { return UMF_RESULT_SUCCESS; } -static struct umf_memory_provider_ops_t UMF_LEVEL_ZERO_MEMORY_PROVIDER_OPS = { - .version = UMF_VERSION_CURRENT, +static umf_memory_provider_ops_t UMF_LEVEL_ZERO_MEMORY_PROVIDER_OPS = { + .version = UMF_PROVIDER_OPS_VERSION_CURRENT, .initialize = ze_memory_provider_initialize, .finalize = ze_memory_provider_finalize, .alloc = ze_memory_provider_alloc, + .free = ze_memory_provider_free, .get_last_native_error = ze_memory_provider_get_last_native_error, .get_recommended_page_size = ze_memory_provider_get_recommended_page_size, .get_min_page_size = ze_memory_provider_get_min_page_size, .get_name = ze_memory_provider_get_name, - .ext.free = ze_memory_provider_free, .ext.purge_lazy = ze_memory_provider_purge_lazy, .ext.purge_force = ze_memory_provider_purge_force, .ext.allocation_merge = ze_memory_provider_allocation_merge, diff --git a/src/provider/provider_level_zero_internal.h b/src/provider/provider_level_zero_internal.h new file mode 100644 index 0000000000..7da299ffdf --- /dev/null +++ b/src/provider/provider_level_zero_internal.h @@ -0,0 +1,10 @@ +/* + * + * Copyright (C) 2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +void fini_ze_global_state(void); diff --git a/src/provider/provider_os_memory.c b/src/provider/provider_os_memory.c index 4c19944a96..f0cd3abaed 100644 --- a/src/provider/provider_os_memory.c +++ b/src/provider/provider_os_memory.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022-2024 Intel Corporation + * Copyright (C) 2022-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -934,7 +934,7 @@ static membind_t membindFirst(os_memory_provider_t *provider, void *addr, if (provider->mode == UMF_NUMA_MODE_INTERLEAVE) { assert(provider->part_size != 0); - size_t s = utils_fetch_and_add64(&provider->alloc_sum, size); + size_t s = utils_fetch_and_add_u64(&provider->alloc_sum, size); membind.node = (s / provider->part_size) % provider->nodeset_len; membind.bitmap = provider->nodeset[membind.node]; membind.bind_size = ALIGN_UP(provider->part_size, membind.page_size); @@ -1286,9 +1286,7 @@ static umf_result_t os_get_ipc_handle(void *provider, const void *ptr, void *value = critnib_get(os_provider->fd_offset_map, (uintptr_t)ptr); if (value == NULL) { - LOG_ERR("os_get_ipc_handle(): getting a value from the IPC cache " - "failed (addr=%p)", - ptr); + LOG_ERR("getting a value from the IPC cache failed (addr=%p)", ptr); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } @@ -1404,15 +1402,15 @@ static umf_result_t os_close_ipc_handle(void *provider, void *ptr, } static umf_memory_provider_ops_t UMF_OS_MEMORY_PROVIDER_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_PROVIDER_OPS_VERSION_CURRENT, .initialize = os_initialize, .finalize = os_finalize, .alloc = os_alloc, + .free = os_free, .get_last_native_error = os_get_last_native_error, .get_recommended_page_size = os_get_recommended_page_size, .get_min_page_size = os_get_min_page_size, .get_name = os_get_name, - .ext.free = os_free, .ext.purge_lazy = os_purge_lazy, .ext.purge_force = os_purge_force, .ext.allocation_merge = os_allocation_merge, diff --git a/src/provider/provider_os_memory_internal.h b/src/provider/provider_os_memory_internal.h index faf0de2473..4a603b1dad 100644 --- a/src/provider/provider_os_memory_internal.h +++ b/src/provider/provider_os_memory_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -58,7 +58,7 @@ typedef struct os_memory_provider_t { int numa_flags; // combination of hwloc flags size_t part_size; - size_t alloc_sum; // sum of all allocations - used for manual interleaving + uint64_t alloc_sum; // sum of all allocations - used for manual interleaving struct { unsigned weight; diff --git a/src/provider/provider_tracking.c b/src/provider/provider_tracking.c index e726feefb0..c5a4b5f1f4 100644 --- a/src/provider/provider_tracking.c +++ b/src/provider/provider_tracking.c @@ -1,72 +1,235 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * */ -#include "provider_tracking.h" +#include +#include +#include +#include +#include + +#include +#include +#include + #include "base_alloc_global.h" #include "critnib.h" #include "ipc_cache.h" #include "ipc_internal.h" +#include "memory_pool_internal.h" +#include "provider_tracking.h" #include "utils_common.h" #include "utils_concurrency.h" #include "utils_log.h" -#include -#include -#include - -#include -#include -#include -#include -#include +// TODO: we need to support an arbitrary amount of layers in the future +#define MAX_LEVELS_OF_ALLOC_SEGMENT_MAP 8 uint64_t IPC_HANDLE_ID = 0; -typedef struct tracker_value_t { +struct umf_memory_tracker_t { + umf_ba_pool_t *alloc_info_allocator; + // Multilevel maps are needed to support the case + // when one memory pool acts as a memory provider + // for another memory pool (nested memory pooling). + critnib *alloc_segments_map[MAX_LEVELS_OF_ALLOC_SEGMENT_MAP]; + utils_mutex_t splitMergeMutex; + umf_ba_pool_t *ipc_info_allocator; + critnib *ipc_segments_map; +}; + +typedef struct tracker_alloc_info_t { umf_memory_pool_handle_t pool; size_t size; -} tracker_value_t; + // number of overlapping memory regions + // in the next level of map + // falling within the current range + size_t n_children; +} tracker_alloc_info_t; -static umf_result_t umfMemoryTrackerAdd(umf_memory_tracker_handle_t hTracker, - umf_memory_pool_handle_t pool, - const void *ptr, size_t size) { +typedef struct tracker_ipc_info_t { + size_t size; + umf_memory_provider_handle_t provider; + ipc_opened_cache_value_t *ipc_cache_value; +} tracker_ipc_info_t; + +// Get the most nested (on the highest level) allocation segment in the map with the `ptr` key. +// If `no_children` is set to 1, the function will return the entry +// only if it has no children on the higher level. +// The function returns the entry if found, otherwise NULL. +static tracker_alloc_info_t *get_most_nested_alloc_segment( + umf_memory_tracker_handle_t hTracker, const void *ptr, int *_level, + uintptr_t *_parent_key, tracker_alloc_info_t **_parent_value, + int no_children) { + assert(ptr); + + tracker_alloc_info_t *parent_value = NULL; + tracker_alloc_info_t *rvalue = NULL; + uintptr_t parent_key = 0; + uintptr_t rkey = 0; + uint64_t rsize = 0; + int level = 0; + int found = 0; + + do { + assert(level < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP); + found = + critnib_find(hTracker->alloc_segments_map[level], (uintptr_t)ptr, + FIND_LE, (void *)&rkey, (void **)&rvalue); + if (!found || !rvalue) { + break; + } + + utils_atomic_load_acquire_u64((uint64_t *)&rvalue->size, &rsize); + + if (found && (uintptr_t)ptr < rkey + rsize) { + if (rvalue->n_children) { + if (level == MAX_LEVELS_OF_ALLOC_SEGMENT_MAP - 1) { + break; + } + level++; + parent_key = rkey; + parent_value = rvalue; + } + } + } while (found && ((uintptr_t)ptr < rkey + rsize) && rvalue->n_children); + + if (!rvalue || rkey != (uintptr_t)ptr) { + return NULL; + } + + if (no_children && (rvalue->n_children > 0)) { + return NULL; + } + + if (_level) { + *_level = level; + } + if (_parent_key) { + *_parent_key = parent_key; + } + if (_parent_value) { + *_parent_value = parent_value; + } + + assert(!no_children || rvalue->n_children == 0); + + return rvalue; +} + +static umf_result_t +umfMemoryTrackerAddAtLevel(umf_memory_tracker_handle_t hTracker, int level, + umf_memory_pool_handle_t pool, const void *ptr, + size_t size, uintptr_t parent_key, + tracker_alloc_info_t *parent_value) { assert(ptr); - tracker_value_t *value = umf_ba_alloc(hTracker->tracker_allocator); + umf_result_t umf_result = UMF_RESULT_ERROR_UNKNOWN; + + tracker_alloc_info_t *value = umf_ba_alloc(hTracker->alloc_info_allocator); if (value == NULL) { - LOG_ERR("failed to allocate tracker value, ptr=%p, size=%zu", ptr, + LOG_ERR("failed to allocate a tracker value, ptr=%p, size=%zu", ptr, size); return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } value->pool = pool; - value->size = size; - - int ret = critnib_insert(hTracker->map, (uintptr_t)ptr, value, 0); + utils_atomic_store_release_u64(&value->size, size); + value->n_children = 0; + assert(level < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP); + int ret = critnib_insert(hTracker->alloc_segments_map[level], + (uintptr_t)ptr, value, 0); if (ret == 0) { - LOG_DEBUG( - "memory region is added, tracker=%p, ptr=%p, pool=%p, size=%zu", - (void *)hTracker, ptr, (void *)pool, size); + LOG_DEBUG("memory region is added, tracker=%p, level=%i, pool=%p, " + "ptr=%p, size=%zu", + (void *)hTracker, level, (void *)pool, ptr, size); + + if (parent_value) { + parent_value->n_children++; + LOG_DEBUG( + "child #%zu added to memory region: tracker=%p, level=%i, " + "pool=%p, ptr=%p, size=%zu", + parent_value->n_children, (void *)hTracker, level - 1, + (void *)parent_value->pool, (void *)parent_key, + parent_value->size); + } return UMF_RESULT_SUCCESS; } + if (ret == ENOMEM) { + umf_result = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } - LOG_ERR("failed to insert tracker value, ret=%d, ptr=%p, pool=%p, size=%zu", - ret, ptr, (void *)pool, size); + LOG_ERR( + "failed to insert the tracker value: pool=%p, ptr=%p, size=%zu, ret=%d", + (void *)pool, ptr, size, ret); - umf_ba_free(hTracker->tracker_allocator, value); + umf_ba_free(hTracker->alloc_info_allocator, value); - if (ret == ENOMEM) { - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + return umf_result; +} + +static umf_result_t umfMemoryTrackerAdd(umf_memory_tracker_handle_t hTracker, + umf_memory_pool_handle_t pool, + const void *ptr, size_t size) { + assert(ptr); + + umf_result_t umf_result = UMF_RESULT_ERROR_UNKNOWN; + tracker_alloc_info_t *parent_value = NULL; + tracker_alloc_info_t *rvalue = NULL; + uintptr_t parent_key = 0; + uintptr_t rkey = 0; + uint64_t rsize = 0; + int level = 0; + int found = 0; + + // Find the most nested (in the highest level) entry + // in the critnib maps that contains the given 'ptr' pointer. + do { + assert(level < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP); + found = + critnib_find(hTracker->alloc_segments_map[level], (uintptr_t)ptr, + FIND_LE, (void *)&rkey, (void **)&rvalue); + if (!found || !rvalue) { + break; + } + + utils_atomic_load_acquire_u64((uint64_t *)&rvalue->size, &rsize); + + if ((uintptr_t)ptr < rkey + rsize) { + if (level == MAX_LEVELS_OF_ALLOC_SEGMENT_MAP - 1) { + // TODO: we need to support an arbitrary amount of layers in the future + LOG_ERR("tracker level is too high, ptr=%p, size=%zu", ptr, + size); + return UMF_RESULT_ERROR_OUT_OF_RESOURCES; + } + if (((uintptr_t)ptr + size) > (rkey + rsize)) { + LOG_ERR( + "cannot insert to the tracker value (pool=%p, ptr=%p, " + "size=%zu) " + "that exceeds the parent value (pool=%p, ptr=%p, size=%zu)", + (void *)pool, ptr, size, (void *)rvalue->pool, (void *)rkey, + (size_t)rsize); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + parent_key = rkey; + parent_value = rvalue; + level++; + } + } while (found && ((uintptr_t)ptr < rkey + rsize) && rvalue->n_children); + + umf_result = umfMemoryTrackerAddAtLevel(hTracker, level, pool, ptr, size, + parent_key, parent_value); + if (umf_result != UMF_RESULT_SUCCESS) { + return umf_result; } - return UMF_RESULT_ERROR_UNKNOWN; + return UMF_RESULT_SUCCESS; } static umf_result_t umfMemoryTrackerRemove(umf_memory_tracker_handle_t hTracker, @@ -78,18 +241,103 @@ static umf_result_t umfMemoryTrackerRemove(umf_memory_tracker_handle_t hTracker, // Every umfMemoryTrackerAdd(..., ptr, ...) should have a corresponding // umfMemoryTrackerRemove call with the same ptr value. - void *value = critnib_remove(hTracker->map, (uintptr_t)ptr); + tracker_alloc_info_t *parent_value = NULL; + uintptr_t parent_key = 0; + int level = 0; + + // Find the most nested (on the highest level) entry in the map + // with the `ptr` key and with no children - only such entry can be removed. + tracker_alloc_info_t *value = get_most_nested_alloc_segment( + hTracker, ptr, &level, &parent_key, &parent_value, 1 /* no_children */); if (!value) { - LOG_ERR("pointer %p not found in the map", ptr); + LOG_ERR("pointer %p not found in the alloc_segments_map", ptr); return UMF_RESULT_ERROR_UNKNOWN; } - tracker_value_t *v = value; + assert(level < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP); + value = critnib_remove(hTracker->alloc_segments_map[level], (uintptr_t)ptr); + assert(value); + + LOG_DEBUG("memory region removed: tracker=%p, level=%i, pool=%p, ptr=%p, " + "size=%zu", + (void *)hTracker, level, value->pool, ptr, value->size); - LOG_DEBUG("memory region removed: tracker=%p, ptr=%p, size=%zu", - (void *)hTracker, ptr, v->size); + if (parent_value) { + LOG_DEBUG( + "child #%zu removed from memory region: tracker=%p, level=%i, " + "pool=%p, ptr=%p, size=%zu", + parent_value->n_children, (void *)hTracker, level - 1, + (void *)parent_value->pool, (void *)parent_key, parent_value->size); + parent_value->n_children--; + } - umf_ba_free(hTracker->tracker_allocator, value); + umf_ba_free(hTracker->alloc_info_allocator, value); + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t +umfMemoryTrackerAddIpcSegment(umf_memory_tracker_handle_t hTracker, + const void *ptr, size_t size, + umf_memory_provider_handle_t provider, + ipc_opened_cache_value_t *cache_entry) { + assert(hTracker); + assert(provider); + assert(cache_entry); + + tracker_ipc_info_t *value = umf_ba_alloc(hTracker->ipc_info_allocator); + + if (value == NULL) { + LOG_ERR("failed to allocate tracker_ipc_info_t, ptr=%p, size=%zu", ptr, + size); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + value->size = size; + value->provider = provider; + value->ipc_cache_value = cache_entry; + + int ret = + critnib_insert(hTracker->ipc_segments_map, (uintptr_t)ptr, value, 0); + if (ret == 0) { + LOG_DEBUG("IPC memory region is added, tracker=%p, ptr=%p, size=%zu, " + "provider=%p, cache_entry=%p", + (void *)hTracker, ptr, size, provider, cache_entry); + return UMF_RESULT_SUCCESS; + } + + LOG_ERR("failed to insert tracker_ipc_info_t, ret=%d, ptr=%p, size=%zu, " + "provider=%p, cache_entry=%p", + ret, ptr, size, provider, cache_entry); + + umf_ba_free(hTracker->ipc_info_allocator, value); + + if (ret == ENOMEM) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return UMF_RESULT_ERROR_UNKNOWN; +} + +static umf_result_t +umfMemoryTrackerRemoveIpcSegment(umf_memory_tracker_handle_t hTracker, + const void *ptr) { + assert(ptr); + + void *value = critnib_remove(hTracker->ipc_segments_map, (uintptr_t)ptr); + + if (!value) { + LOG_ERR("pointer %p not found in the ipc_segments_map", ptr); + return UMF_RESULT_ERROR_UNKNOWN; + } + + tracker_ipc_info_t *v = value; + + LOG_DEBUG("IPC memory region removed: tracker=%p, ptr=%p, size=%zu, " + "provider=%p, cache_entry=%p", + (void *)hTracker, ptr, v->size, v->provider, v->ipc_cache_value); + + umf_ba_free(hTracker->ipc_info_allocator, value); return UMF_RESULT_SUCCESS; } @@ -117,14 +365,68 @@ umf_result_t umfMemoryTrackerGetAllocInfo(const void *ptr, return UMF_RESULT_ERROR_NOT_SUPPORTED; } - if (TRACKER->map == NULL) { - LOG_ERR("tracker's map does not exist"); + if (TRACKER->alloc_segments_map[0] == NULL) { + LOG_ERR("tracker's alloc_segments_map does not exist"); + return UMF_RESULT_ERROR_NOT_SUPPORTED; + } + + tracker_alloc_info_t *top_most_value = NULL; + tracker_alloc_info_t *rvalue = NULL; + uintptr_t top_most_key = 0; + uintptr_t rkey = 0; + int level = 0; + int found = 0; + + do { + assert(level < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP); + found = critnib_find(TRACKER->alloc_segments_map[level], (uintptr_t)ptr, + FIND_LE, (void *)&rkey, (void **)&rvalue); + if (found && (uintptr_t)ptr < rkey + rvalue->size) { + top_most_key = rkey; + top_most_value = rvalue; + if (rvalue->n_children == 0 || + level == MAX_LEVELS_OF_ALLOC_SEGMENT_MAP - 1) { + break; + } + level++; + } + } while (found && (uintptr_t)ptr < rkey + rvalue->size && + rvalue->n_children); + + if (!top_most_value) { + LOG_DEBUG("pointer %p not found in the tracker, TRACKER=%p", ptr, + (void *)TRACKER); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + pAllocInfo->base = (void *)top_most_key; + pAllocInfo->baseSize = top_most_value->size; + pAllocInfo->pool = top_most_value->pool; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfMemoryTrackerGetIpcInfo(const void *ptr, + umf_ipc_info_t *pIpcInfo) { + assert(pIpcInfo); + + if (ptr == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (TRACKER == NULL) { + LOG_ERR("tracker does not exist"); + return UMF_RESULT_ERROR_NOT_SUPPORTED; + } + + if (TRACKER->ipc_segments_map == NULL) { + LOG_ERR("tracker's ipc_segments_map does not exist"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } uintptr_t rkey; - tracker_value_t *rvalue; - int found = critnib_find(TRACKER->map, (uintptr_t)ptr, FIND_LE, + tracker_ipc_info_t *rvalue = NULL; + int found = critnib_find(TRACKER->ipc_segments_map, (uintptr_t)ptr, FIND_LE, (void *)&rkey, (void **)&rvalue); if (!found || (uintptr_t)ptr >= rkey + rvalue->size) { LOG_DEBUG("pointer %p not found in the tracker, TRACKER=%p", ptr, @@ -132,9 +434,9 @@ umf_result_t umfMemoryTrackerGetAllocInfo(const void *ptr, return UMF_RESULT_ERROR_INVALID_ARGUMENT; } - pAllocInfo->base = (void *)rkey; - pAllocInfo->baseSize = rvalue->size; - pAllocInfo->pool = rvalue->pool; + pIpcInfo->base = (void *)rkey; + pIpcInfo->baseSize = rvalue->size; + pIpcInfo->provider = rvalue->provider; return UMF_RESULT_SUCCESS; } @@ -153,35 +455,44 @@ typedef struct umf_tracking_memory_provider_t { umf_memory_tracker_handle_t hTracker; umf_memory_pool_handle_t pool; critnib *ipcCache; - ipc_mapped_handle_cache_handle_t hIpcMappedCache; - - // the upstream provider does not support the free() operation - bool upstreamDoesNotFree; + ipc_opened_cache_handle_t hIpcMappedCache; } umf_tracking_memory_provider_t; typedef struct umf_tracking_memory_provider_t umf_tracking_memory_provider_t; static umf_result_t trackingAlloc(void *hProvider, size_t size, - size_t alignment, void **ptr) { + size_t alignment, void **_ptr) { umf_tracking_memory_provider_t *p = (umf_tracking_memory_provider_t *)hProvider; umf_result_t ret = UMF_RESULT_SUCCESS; + void *ptr; assert(p->hUpstream); - ret = umfMemoryProviderAlloc(p->hUpstream, size, alignment, ptr); - if (ret != UMF_RESULT_SUCCESS || !*ptr) { + *_ptr = NULL; + + ret = umfMemoryProviderAlloc(p->hUpstream, size, alignment, &ptr); + if (ret != UMF_RESULT_SUCCESS || !ptr) { return ret; } - umf_result_t ret2 = umfMemoryTrackerAdd(p->hTracker, p->pool, *ptr, size); - if (ret2 != UMF_RESULT_SUCCESS) { + ret = umfMemoryTrackerAdd(p->hTracker, p->pool, ptr, size); + if (ret != UMF_RESULT_SUCCESS) { LOG_ERR("failed to add allocated region to the tracker, ptr = %p, size " "= %zu, ret = %d", - *ptr, size, ret2); + ptr, size, ret); + umf_result_t ret2 = umfMemoryProviderFree(p->hUpstream, ptr, size); + if (ret2 != UMF_RESULT_SUCCESS) { + LOG_ERR("upstream provider failed to free the memory: ptr = %p, " + "size = %zu, ret = %d", + ptr, size, ret2); + } + return ret; } - return ret; + *_ptr = ptr; + + return UMF_RESULT_SUCCESS; } static umf_result_t trackingAllocationSplit(void *hProvider, void *ptr, @@ -190,30 +501,38 @@ static umf_result_t trackingAllocationSplit(void *hProvider, void *ptr, umf_result_t ret = UMF_RESULT_ERROR_UNKNOWN; umf_tracking_memory_provider_t *provider = (umf_tracking_memory_provider_t *)hProvider; + tracker_alloc_info_t *parent_value = NULL; + uintptr_t parent_key = 0; - tracker_value_t *splitValue = - umf_ba_alloc(provider->hTracker->tracker_allocator); + tracker_alloc_info_t *splitValue = + umf_ba_alloc(provider->hTracker->alloc_info_allocator); if (!splitValue) { return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } splitValue->pool = provider->pool; splitValue->size = firstSize; + splitValue->n_children = 0; int r = utils_mutex_lock(&provider->hTracker->splitMergeMutex); if (r) { goto err_lock; } - tracker_value_t *value = - (tracker_value_t *)critnib_get(provider->hTracker->map, (uintptr_t)ptr); + int level = 0; + + // Find the most nested (on the highest level) entry in the map + // with the `ptr` key and with no children - only such entry can be split. + tracker_alloc_info_t *value = get_most_nested_alloc_segment( + provider->hTracker, ptr, &level, &parent_key, &parent_value, + 1 /* no_children */); if (!value) { LOG_ERR("region for split is not found in the tracker"); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; goto err; } if (value->size != totalSize) { - LOG_ERR("tracked size %zu does not match requested size to split: %zu", + LOG_ERR("tracked size=%zu does not match requested size to split: %zu", value->size, totalSize); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; goto err; @@ -226,39 +545,58 @@ static umf_result_t trackingAllocationSplit(void *hProvider, void *ptr, goto err; } + assert(level < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP); + int cret = + critnib_insert(provider->hTracker->alloc_segments_map[level], + (uintptr_t)ptr, (void *)splitValue, 1 /* update */); + // this cannot fail since we know the element exists (nothing to allocate) + assert(cret == 0); + (void)cret; + void *highPtr = (void *)(((uintptr_t)ptr) + firstSize); size_t secondSize = totalSize - firstSize; // We'll have a duplicate entry for the range [highPtr, highValue->size] but this is fine, // the value is the same anyway and we forbid removing that range concurrently - ret = umfMemoryTrackerAdd(provider->hTracker, provider->pool, highPtr, - secondSize); + ret = umfMemoryTrackerAddAtLevel(provider->hTracker, level, provider->pool, + highPtr, secondSize, parent_key, + parent_value); if (ret != UMF_RESULT_SUCCESS) { - LOG_ERR("failed to add split region to the tracker, ptr = %p, size " - "= %zu, ret = %d", + LOG_ERR("failed to add the split region to the tracker, ptr=%p, " + "size=%zu, ret=%d", highPtr, secondSize, ret); + // revert the split + assert(level < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP); + cret = critnib_insert(provider->hTracker->alloc_segments_map[level], + (uintptr_t)ptr, (void *)value, 1 /* update */); + // this cannot fail since we know the element exists (nothing to allocate) + assert(cret == 0); + (void)cret; // TODO: what now? should we rollback the split? This can only happen due to ENOMEM // so it's unlikely but probably the best solution would be to try to preallocate everything // (value and critnib nodes) before calling umfMemoryProviderAllocationSplit. goto err; } - int cret = critnib_insert(provider->hTracker->map, (uintptr_t)ptr, - (void *)splitValue, 1 /* update */); - // this cannot fail since we know the element exists (nothing to allocate) - assert(cret == 0); - (void)cret; - // free the original value - umf_ba_free(provider->hTracker->tracker_allocator, value); + umf_ba_free(provider->hTracker->alloc_info_allocator, value); utils_mutex_unlock(&provider->hTracker->splitMergeMutex); + LOG_DEBUG( + "split memory region (level=%i): ptr=%p, totalSize=%zu, firstSize=%zu", + level, ptr, totalSize, firstSize); + return UMF_RESULT_SUCCESS; err: utils_mutex_unlock(&provider->hTracker->splitMergeMutex); err_lock: - umf_ba_free(provider->hTracker->tracker_allocator, splitValue); + umf_ba_free(provider->hTracker->alloc_info_allocator, splitValue); + + LOG_ERR( + "failed to split memory region: ptr=%p, totalSize=%zu, firstSize=%zu", + ptr, totalSize, firstSize); + return ret; } @@ -268,8 +606,8 @@ static umf_result_t trackingAllocationMerge(void *hProvider, void *lowPtr, umf_tracking_memory_provider_t *provider = (umf_tracking_memory_provider_t *)hProvider; - tracker_value_t *mergedValue = - umf_ba_alloc(provider->hTracker->tracker_allocator); + tracker_alloc_info_t *mergedValue = + umf_ba_alloc(provider->hTracker->alloc_info_allocator); if (!mergedValue) { return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; @@ -277,37 +615,51 @@ static umf_result_t trackingAllocationMerge(void *hProvider, void *lowPtr, mergedValue->pool = provider->pool; mergedValue->size = totalSize; + mergedValue->n_children = 0; + + // any different negative values + int lowLevel = -2; + int highLevel = -1; int r = utils_mutex_lock(&provider->hTracker->splitMergeMutex); if (r) { goto err_lock; } - tracker_value_t *lowValue = (tracker_value_t *)critnib_get( - provider->hTracker->map, (uintptr_t)lowPtr); + tracker_alloc_info_t *lowValue = get_most_nested_alloc_segment( + provider->hTracker, lowPtr, &lowLevel, NULL, NULL, + 0 /* no_children */); // can have children if (!lowValue) { - LOG_ERR("no left value"); + LOG_FATAL("no left value"); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err; + goto err_assert; } - tracker_value_t *highValue = (tracker_value_t *)critnib_get( - provider->hTracker->map, (uintptr_t)highPtr); + tracker_alloc_info_t *highValue = get_most_nested_alloc_segment( + provider->hTracker, highPtr, &highLevel, NULL, NULL, + 0 /* no_children */); // can have children if (!highValue) { - LOG_ERR("no right value"); + LOG_FATAL("no right value"); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err; + goto err_assert; + } + if (lowLevel != highLevel) { + LOG_FATAL("tracker level mismatch"); + ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; + goto err_assert; } if (lowValue->pool != highValue->pool) { - LOG_ERR("pool mismatch"); + LOG_FATAL("pool mismatch"); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err; + goto err_assert; } if (lowValue->size + highValue->size != totalSize) { - LOG_ERR("lowValue->size + highValue->size != totalSize"); + LOG_FATAL("lowValue->size + highValue->size != totalSize"); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err; + goto err_assert; } + mergedValue->n_children = lowValue->n_children + highValue->n_children; + ret = umfMemoryProviderAllocationMerge(provider->hUpstream, lowPtr, highPtr, totalSize); if (ret != UMF_RESULT_SUCCESS) { @@ -315,35 +667,51 @@ static umf_result_t trackingAllocationMerge(void *hProvider, void *lowPtr, goto not_merged; } + size_t lno = lowValue->n_children; + size_t hno = highValue->n_children; + // We'll have a duplicate entry for the range [highPtr, highValue->size] but this is fine, // the value is the same anyway and we forbid removing that range concurrently - int cret = critnib_insert(provider->hTracker->map, (uintptr_t)lowPtr, - (void *)mergedValue, 1 /* update */); + int cret = + critnib_insert(provider->hTracker->alloc_segments_map[lowLevel], + (uintptr_t)lowPtr, (void *)mergedValue, 1 /* update */); // this cannot fail since we know the element exists (nothing to allocate) assert(cret == 0); (void)cret; // free old value that we just replaced with mergedValue - umf_ba_free(provider->hTracker->tracker_allocator, lowValue); + umf_ba_free(provider->hTracker->alloc_info_allocator, lowValue); - void *erasedhighValue = - critnib_remove(provider->hTracker->map, (uintptr_t)highPtr); + void *erasedhighValue = critnib_remove( + provider->hTracker->alloc_segments_map[highLevel], (uintptr_t)highPtr); assert(erasedhighValue == highValue); - umf_ba_free(provider->hTracker->tracker_allocator, erasedhighValue); + umf_ba_free(provider->hTracker->alloc_info_allocator, erasedhighValue); utils_mutex_unlock(&provider->hTracker->splitMergeMutex); + LOG_DEBUG("merged memory regions (level=%i): lowPtr=%p (child=%zu), " + "highPtr=%p (child=%zu), totalSize=%zu", + lowLevel, lowPtr, lno, highPtr, hno, totalSize); + return UMF_RESULT_SUCCESS; -err: +err_assert: + LOG_FATAL("failed to merge memory regions: lowPtr=%p (level=%i), " + "highPtr=%p (level=%i), totalSize=%zu", + lowPtr, lowLevel, highPtr, highLevel, totalSize); assert(0); not_merged: utils_mutex_unlock(&provider->hTracker->splitMergeMutex); err_lock: - umf_ba_free(provider->hTracker->tracker_allocator, mergedValue); + umf_ba_free(provider->hTracker->alloc_info_allocator, mergedValue); + + LOG_ERR("failed to merge memory regions: lowPtr=%p (level=%i), highPtr=%p " + "(level=%i), totalSize=%zu", + lowPtr, lowLevel, highPtr, highLevel, totalSize); + return ret; } @@ -419,71 +787,56 @@ static umf_result_t trackingInitialize(void *params, void **ret) { return UMF_RESULT_SUCCESS; } -// TODO clearing the tracker is a temporary solution and should be removed. -// The tracker should be cleared using the provider's free() operation. -static void clear_tracker_for_the_pool(umf_memory_tracker_handle_t hTracker, - umf_memory_pool_handle_t pool, - bool upstreamDoesNotFree) { - uintptr_t rkey; - void *rvalue; +#ifndef NDEBUG +static void check_if_tracker_is_empty(umf_memory_tracker_handle_t hTracker, + umf_memory_pool_handle_t pool) { size_t n_items = 0; - uintptr_t last_key = 0; - - while (1 == critnib_find((critnib *)hTracker->map, last_key, FIND_G, &rkey, - &rvalue)) { - tracker_value_t *value = (tracker_value_t *)rvalue; - if (value->pool != pool && pool != NULL) { - last_key = rkey; - continue; - } - n_items++; + for (int i = 0; i < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP; i++) { + uintptr_t last_key = 0; + uintptr_t rkey; + tracker_alloc_info_t *rvalue; - void *removed_value = critnib_remove(hTracker->map, rkey); - assert(removed_value == rvalue); - umf_ba_free(hTracker->tracker_allocator, removed_value); + while (1 == critnib_find(hTracker->alloc_segments_map[i], last_key, + FIND_G, &rkey, (void **)&rvalue)) { + if (rvalue->pool == pool || pool == NULL) { + n_items++; + } - last_key = rkey; + last_key = rkey; + } } -#ifndef NDEBUG - // print error messages only if provider supports the free() operation - if (n_items && !upstreamDoesNotFree) { - if (pool) { - LOG_ERR( - "tracking provider of pool %p is not empty! (%zu items left)", - (void *)pool, n_items); - } else { - LOG_ERR("tracking provider is not empty! (%zu items left)", - n_items); + if (n_items) { + // Do not log the error if we are running in the proxy library, + // because it may need those resources till + // the very end of exiting the application. + if (!utils_is_running_in_proxy_lib()) { + if (pool) { + LOG_ERR("tracking provider of pool %p is not empty! (%zu items " + "left)", + (void *)pool, n_items); + } else { + LOG_ERR("tracking provider is not empty! (%zu items left)", + n_items); + } + +#ifdef UMF_DEVELOPER_MODE + assert(n_items == 0 && "tracking provider is not empty!"); +#endif } } -#else /* DEBUG */ - (void)upstreamDoesNotFree; // unused in DEBUG build - (void)n_items; // unused in DEBUG build -#endif /* DEBUG */ -} - -static void clear_tracker(umf_memory_tracker_handle_t hTracker) { - clear_tracker_for_the_pool(hTracker, NULL, false); } +#endif /* NDEBUG */ static void trackingFinalize(void *provider) { umf_tracking_memory_provider_t *p = (umf_tracking_memory_provider_t *)provider; - umfIpcHandleMappedCacheDestroy(p->hIpcMappedCache); + umfIpcOpenedCacheDestroy(p->hIpcMappedCache); critnib_delete(p->ipcCache); - // Do not clear the tracker if we are running in the proxy library, - // because it may need those resources till - // the very end of exiting the application. - if (!utils_is_running_in_proxy_lib()) { - clear_tracker_for_the_pool(p->hTracker, p->pool, - p->upstreamDoesNotFree); - } - umf_ba_global_free(provider); } @@ -576,7 +929,7 @@ static umf_result_t trackingGetIpcHandle(void *provider, const void *ptr, return ret; } - cache_value->handle_id = utils_atomic_increment(&IPC_HANDLE_ID); + cache_value->handle_id = utils_atomic_increment_u64(&IPC_HANDLE_ID); cache_value->ipcDataSize = ipcDataSize; int insRes = critnib_insert(p->ipcCache, (uintptr_t)ptr, @@ -625,21 +978,21 @@ static umf_result_t trackingPutIpcHandle(void *provider, } static void -ipcMappedCacheEvictionCallback(const ipc_mapped_handle_cache_key_t *key, - const ipc_mapped_handle_cache_value_t *value) { +ipcOpenedCacheEvictionCallback(const ipc_opened_cache_key_t *key, + const ipc_opened_cache_value_t *value) { umf_tracking_memory_provider_t *p = (umf_tracking_memory_provider_t *)key->local_provider; - // umfMemoryTrackerRemove should be called before umfMemoryProviderCloseIPCHandle + // umfMemoryTrackerRemoveIpcSegment should be called before umfMemoryProviderCloseIPCHandle // to avoid a race condition. If the order would be different, other thread - // could allocate the memory at address `ptr` before a call to umfMemoryTrackerRemove + // could allocate the memory at address `ptr` before a call to umfMemoryTrackerRemoveIpcSegment // resulting in inconsistent state. if (value->mapped_base_ptr) { - umf_result_t ret = - umfMemoryTrackerRemove(p->hTracker, value->mapped_base_ptr); + umf_result_t ret = umfMemoryTrackerRemoveIpcSegment( + p->hTracker, value->mapped_base_ptr); if (ret != UMF_RESULT_SUCCESS) { // DO NOT return an error here, because the tracking provider // cannot change behaviour of the upstream provider. - LOG_ERR("failed to remove the region from the tracker, ptr=%p, " + LOG_ERR("failed to remove the region from the IPC tracker, ptr=%p, " "size=%zu, ret = %d", value->mapped_base_ptr, value->mapped_size, ret); } @@ -652,12 +1005,13 @@ ipcMappedCacheEvictionCallback(const ipc_mapped_handle_cache_key_t *key, } } -static umf_result_t upstreamOpenIPCHandle(umf_tracking_memory_provider_t *p, - void *providerIpcData, - size_t bufferSize, void **ptr) { +static umf_result_t +upstreamOpenIPCHandle(umf_tracking_memory_provider_t *p, void *providerIpcData, + size_t bufferSize, + ipc_opened_cache_value_t *cache_entry) { void *mapped_ptr = NULL; assert(p != NULL); - assert(ptr != NULL); + assert(cache_entry != NULL); umf_result_t ret = umfMemoryProviderOpenIPCHandle( p->hUpstream, providerIpcData, &mapped_ptr); if (ret != UMF_RESULT_SUCCESS) { @@ -666,7 +1020,21 @@ static umf_result_t upstreamOpenIPCHandle(umf_tracking_memory_provider_t *p, } assert(mapped_ptr != NULL); - ret = umfMemoryTrackerAdd(p->hTracker, p->pool, mapped_ptr, bufferSize); + // Today umfMemoryTrackerAddIpcSegment requires the memory provider handle + // to know which tracking provider instance opened the IPC handle. + // The `p` points to the tracking provider private data. + // Because of that we get handle to the tracking provider instance + // using `p->pool->provider`. + // + // TODO: + // Today we always create a pool and get an IPC handler from the pool. + // And tracking provider is always created together with a pool. + // And the IPC handler is a tracking memory provider in fact. + // However, we are considering adding an API that allows IPC handler creation + // from scratch (without creating a memory pool). In that case, we will + // create a tracker provider without a pool. So p->pool might be NULL in the future. + ret = umfMemoryTrackerAddIpcSegment(p->hTracker, mapped_ptr, bufferSize, + p->pool->provider, cache_entry); if (ret != UMF_RESULT_SUCCESS) { LOG_ERR("failed to add IPC region to the tracker, ptr=%p, " "size=%zu, " @@ -681,7 +1049,8 @@ static umf_result_t upstreamOpenIPCHandle(umf_tracking_memory_provider_t *p, return ret; } - *ptr = mapped_ptr; + cache_entry->mapped_size = bufferSize; + utils_atomic_store_release_ptr(&(cache_entry->mapped_base_ptr), mapped_ptr); return UMF_RESULT_SUCCESS; } @@ -696,16 +1065,16 @@ static umf_result_t trackingOpenIpcHandle(void *provider, void *providerIpcData, umf_ipc_data_t *ipcUmfData = getIpcDataFromIpcHandle(providerIpcData); - // Compiler may add paddings to the ipc_mapped_handle_cache_key_t structure + // Compiler may add paddings to the ipc_opened_cache_key_t structure // so we need to zero it out to avoid false cache miss. - ipc_mapped_handle_cache_key_t key = {0}; + ipc_opened_cache_key_t key = {0}; key.remote_base_ptr = ipcUmfData->base; key.local_provider = provider; key.remote_pid = ipcUmfData->pid; - ipc_mapped_handle_cache_value_t *cache_entry = NULL; - ret = umfIpcHandleMappedCacheGet(p->hIpcMappedCache, &key, - ipcUmfData->handle_id, &cache_entry); + ipc_opened_cache_value_t *cache_entry = NULL; + ret = umfIpcOpenedCacheGet(p->hIpcMappedCache, &key, ipcUmfData->handle_id, + &cache_entry); if (ret != UMF_RESULT_SUCCESS) { LOG_ERR("failed to get cache entry"); return ret; @@ -714,57 +1083,60 @@ static umf_result_t trackingOpenIpcHandle(void *provider, void *providerIpcData, assert(cache_entry != NULL); void *mapped_ptr = NULL; - utils_atomic_load_acquire(&(cache_entry->mapped_base_ptr), &mapped_ptr); - if (mapped_ptr == NULL) { + utils_atomic_load_acquire_ptr(&(cache_entry->mapped_base_ptr), + (void **)&mapped_ptr); + if (mapped_ptr == NULL) { // new cache entry utils_mutex_lock(&(cache_entry->mmap_lock)); - utils_atomic_load_acquire(&(cache_entry->mapped_base_ptr), &mapped_ptr); + utils_atomic_load_acquire_ptr(&(cache_entry->mapped_base_ptr), + (void **)&mapped_ptr); if (mapped_ptr == NULL) { ret = upstreamOpenIPCHandle(p, providerIpcData, - ipcUmfData->baseSize, &mapped_ptr); - if (ret == UMF_RESULT_SUCCESS) { - // Put to the cache - cache_entry->mapped_size = ipcUmfData->baseSize; - utils_atomic_store_release(&(cache_entry->mapped_base_ptr), - mapped_ptr); - } + ipcUmfData->baseSize, cache_entry); } + mapped_ptr = cache_entry->mapped_base_ptr; utils_mutex_unlock(&(cache_entry->mmap_lock)); } if (ret == UMF_RESULT_SUCCESS) { + assert(mapped_ptr != NULL); *ptr = mapped_ptr; } return ret; } +static tracker_ipc_info_t *getTrackerIpcInfo(const void *ptr) { + assert(ptr); + + uintptr_t key = (uintptr_t)ptr; + tracker_ipc_info_t *value = critnib_get(TRACKER->ipc_segments_map, key); + + return value; +} + static umf_result_t trackingCloseIpcHandle(void *provider, void *ptr, size_t size) { (void)provider; - (void)ptr; - (void)size; - // We keep opened IPC handles in the p->hIpcMappedCache. - // IPC handle is closed when it is evicted from the cache - // or when cache is destroyed. - // - // TODO: today the size of the IPC cache is infinite. - // When the threshold for the cache size is implemented - // we need to introduce a reference counting mechanism. - // The trackingOpenIpcHandle will increment the refcount for the corresponding entry. - // The trackingCloseIpcHandle will decrement the refcount for the corresponding cache entry. - return UMF_RESULT_SUCCESS; + tracker_ipc_info_t *trackerIpcInfo = getTrackerIpcInfo(ptr); + + if (!trackerIpcInfo) { + LOG_ERR("failed to get tracker ipc info, ptr=%p, size=%zu", ptr, size); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + return umfIpcHandleMappedCacheRelease(trackerIpcInfo->ipc_cache_value); } umf_memory_provider_ops_t UMF_TRACKING_MEMORY_PROVIDER_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_PROVIDER_OPS_VERSION_CURRENT, .initialize = trackingInitialize, .finalize = trackingFinalize, .alloc = trackingAlloc, + .free = trackingFree, .get_last_native_error = trackingGetLastError, .get_min_page_size = trackingGetMinPageSize, .get_recommended_page_size = trackingGetRecommendedPageSize, .get_name = trackingName, - .ext.free = trackingFree, .ext.purge_force = trackingPurgeForce, .ext.purge_lazy = trackingPurgeLazy, .ext.allocation_split = trackingAllocationSplit, @@ -777,11 +1149,10 @@ umf_memory_provider_ops_t UMF_TRACKING_MEMORY_PROVIDER_OPS = { umf_result_t umfTrackingMemoryProviderCreate( umf_memory_provider_handle_t hUpstream, umf_memory_pool_handle_t hPool, - umf_memory_provider_handle_t *hTrackingProvider, bool upstreamDoesNotFree) { + umf_memory_provider_handle_t *hTrackingProvider) { umf_tracking_memory_provider_t params; params.hUpstream = hUpstream; - params.upstreamDoesNotFree = upstreamDoesNotFree; params.hTracker = TRACKER; if (!params.hTracker) { LOG_ERR("failed, TRACKER is NULL"); @@ -795,7 +1166,7 @@ umf_result_t umfTrackingMemoryProviderCreate( } params.hIpcMappedCache = - umfIpcHandleMappedCacheCreate(ipcMappedCacheEvictionCallback); + umfIpcOpenedCacheCreate(ipcOpenedCacheEvictionCallback); LOG_DEBUG("upstream=%p, tracker=%p, " "pool=%p, ipcCache=%p, hIpcMappedCache=%p", @@ -823,33 +1194,56 @@ umf_memory_tracker_handle_t umfMemoryTrackerCreate(void) { return NULL; } - umf_ba_pool_t *tracker_allocator = - umf_ba_create(sizeof(struct tracker_value_t)); - if (!tracker_allocator) { + memset(handle, 0, sizeof(struct umf_memory_tracker_t)); + + umf_ba_pool_t *alloc_info_allocator = + umf_ba_create(sizeof(struct tracker_alloc_info_t)); + if (!alloc_info_allocator) { goto err_free_handle; } - handle->tracker_allocator = tracker_allocator; + handle->alloc_info_allocator = alloc_info_allocator; void *mutex_ptr = utils_mutex_init(&handle->splitMergeMutex); if (!mutex_ptr) { - goto err_destroy_tracker_allocator; + goto err_destroy_alloc_info_allocator; } - handle->map = critnib_new(); - if (!handle->map) { - goto err_destroy_mutex; + int i; + for (i = 0; i < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP; i++) { + handle->alloc_segments_map[i] = critnib_new(); + if (!handle->alloc_segments_map[i]) { + goto err_destroy_alloc_segments_map; + } } - LOG_DEBUG("tracker created, handle=%p, segment map=%p", (void *)handle, - (void *)handle->map); + handle->ipc_info_allocator = + umf_ba_create(sizeof(struct tracker_ipc_info_t)); + if (!handle->ipc_info_allocator) { + goto err_destroy_alloc_segments_map; + } + + handle->ipc_segments_map = critnib_new(); + if (!handle->ipc_segments_map) { + goto err_destroy_ipc_info_allocator; + } + + LOG_DEBUG("tracker created, handle=%p, alloc_segments_map=%p", + (void *)handle, (void *)handle->alloc_segments_map); return handle; -err_destroy_mutex: +err_destroy_ipc_info_allocator: + umf_ba_destroy(handle->ipc_info_allocator); +err_destroy_alloc_segments_map: + for (i = 0; i < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP; i++) { + if (handle->alloc_segments_map[i]) { + critnib_delete(handle->alloc_segments_map[i]); + } + } utils_mutex_destroy_not_free(&handle->splitMergeMutex); -err_destroy_tracker_allocator: - umf_ba_destroy(tracker_allocator); +err_destroy_alloc_info_allocator: + umf_ba_destroy(alloc_info_allocator); err_free_handle: umf_ba_global_free(handle); return NULL; @@ -867,15 +1261,25 @@ void umfMemoryTrackerDestroy(umf_memory_tracker_handle_t handle) { return; } - clear_tracker(handle); +#ifndef NDEBUG + check_if_tracker_is_empty(handle, NULL); +#endif /* NDEBUG */ // We have to zero all inner pointers, // because the tracker handle can be copied // and used in many places. - critnib_delete(handle->map); - handle->map = NULL; + for (int i = 0; i < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP; i++) { + if (handle->alloc_segments_map[i]) { + critnib_delete(handle->alloc_segments_map[i]); + handle->alloc_segments_map[i] = NULL; + } + } utils_mutex_destroy_not_free(&handle->splitMergeMutex); - umf_ba_destroy(handle->tracker_allocator); - handle->tracker_allocator = NULL; + umf_ba_destroy(handle->alloc_info_allocator); + handle->alloc_info_allocator = NULL; + critnib_delete(handle->ipc_segments_map); + handle->ipc_segments_map = NULL; + umf_ba_destroy(handle->ipc_info_allocator); + handle->ipc_info_allocator = NULL; umf_ba_global_free(handle); } diff --git a/src/provider/provider_tracking.h b/src/provider/provider_tracking.h index 9444ee4757..842449be5c 100644 --- a/src/provider/provider_tracking.h +++ b/src/provider/provider_tracking.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -26,12 +26,7 @@ extern "C" { #endif -struct umf_memory_tracker_t { - umf_ba_pool_t *tracker_allocator; - critnib *map; - utils_mutex_t splitMergeMutex; -}; - +struct umf_memory_tracker_t; typedef struct umf_memory_tracker_t *umf_memory_tracker_handle_t; extern umf_memory_tracker_handle_t TRACKER; @@ -50,11 +45,20 @@ typedef struct umf_alloc_info_t { umf_result_t umfMemoryTrackerGetAllocInfo(const void *ptr, umf_alloc_info_t *pAllocInfo); +typedef struct umf_ipc_info_t { + void *base; + size_t baseSize; + umf_memory_provider_handle_t provider; +} umf_ipc_info_t; + +umf_result_t umfMemoryTrackerGetIpcInfo(const void *ptr, + umf_ipc_info_t *pIpcInfo); + // Creates a memory provider that tracks each allocation/deallocation through umf_memory_tracker_handle_t and // forwards all requests to hUpstream memory Provider. hUpstream lifetime should be managed by the user of this function. umf_result_t umfTrackingMemoryProviderCreate( umf_memory_provider_handle_t hUpstream, umf_memory_pool_handle_t hPool, - umf_memory_provider_handle_t *hTrackingProvider, bool upstreamDoesNotFree); + umf_memory_provider_handle_t *hTrackingProvider); void umfTrackingMemoryProviderGetUpstreamProvider( umf_memory_provider_handle_t hTrackingProvider, diff --git a/src/proxy_lib/proxy_lib.c b/src/proxy_lib/proxy_lib.c index f8bae304d8..ccb60f8aba 100644 --- a/src/proxy_lib/proxy_lib.c +++ b/src/proxy_lib/proxy_lib.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -128,13 +128,6 @@ static umf_memory_pool_handle_t Proxy_pool = NULL; // it protects us from recursion in umfPool*() static __TLS int was_called_from_umfPool = 0; -// This WA for the issue: -// https://github.com/oneapi-src/unified-memory-framework/issues/894 -// It protects us from a recursion in malloc_usable_size() -// when the JEMALLOC proxy_lib_pool is used. -// TODO remove this WA when the issue is fixed. -static __TLS int was_called_from_malloc_usable_size = 0; - /*****************************************************************************/ /*** The constructor and destructor of the proxy library *********************/ /*****************************************************************************/ @@ -145,7 +138,7 @@ static size_t get_size_threshold(void) { LOG_DEBUG("UMF_PROXY[size.threshold] = %s", str_threshold); long threshold = utils_get_size_threshold(str_threshold); if (threshold < 0) { - LOG_ERR("incorrect size threshold: %s", str_threshold); + LOG_FATAL("incorrect size threshold: %s", str_threshold); exit(-1); } @@ -170,6 +163,8 @@ static int get_system_allocator_symbols(void) { return 0; } + LOG_FATAL("Required system allocator's symbols not found."); + return -1; } #endif /* _WIN32 */ @@ -181,7 +176,7 @@ void proxy_lib_create_common(void) { umf_result = umfOsMemoryProviderParamsCreate(&os_params); if (umf_result != UMF_RESULT_SUCCESS) { - LOG_ERR("creating OS memory provider params failed"); + LOG_FATAL("creating OS memory provider params failed"); exit(-1); } @@ -189,7 +184,7 @@ void proxy_lib_create_common(void) { size_t _threshold = get_size_threshold(); if (_threshold > 0) { if (get_system_allocator_symbols()) { - LOG_ERR("initialization of the system allocator failed!"); + LOG_FATAL("initialization of the system allocator failed!"); exit(-1); } @@ -204,12 +199,12 @@ void proxy_lib_create_common(void) { umf_result = umfOsMemoryProviderParamsSetVisibility(os_params, UMF_MEM_MAP_SHARED); if (umf_result != UMF_RESULT_SUCCESS) { - LOG_ERR("setting visibility mode failed"); + LOG_FATAL("setting visibility mode failed"); exit(-1); } umf_result = umfOsMemoryProviderParamsSetShmName(os_params, NULL); if (umf_result != UMF_RESULT_SUCCESS) { - LOG_ERR("setting shared memory name failed"); + LOG_FATAL("setting shared memory name failed"); exit(-1); } } else if (utils_env_var_has_str("UMF_PROXY", @@ -217,7 +212,7 @@ void proxy_lib_create_common(void) { umf_result = umfOsMemoryProviderParamsSetVisibility(os_params, UMF_MEM_MAP_SHARED); if (umf_result != UMF_RESULT_SUCCESS) { - LOG_ERR("setting visibility mode failed"); + LOG_FATAL("setting visibility mode failed"); exit(-1); } @@ -226,7 +221,7 @@ void proxy_lib_create_common(void) { sprintf(shm_name, "umf_proxy_lib_shm_pid_%i", utils_getpid()); umf_result = umfOsMemoryProviderParamsSetShmName(os_params, shm_name); if (umf_result != UMF_RESULT_SUCCESS) { - LOG_ERR("setting shared memory name failed"); + LOG_FATAL("setting shared memory name failed"); exit(-1); } @@ -240,14 +235,14 @@ void proxy_lib_create_common(void) { &OS_memory_provider); umfOsMemoryProviderParamsDestroy(os_params); if (umf_result != UMF_RESULT_SUCCESS) { - LOG_ERR("creating OS memory provider failed"); + LOG_FATAL("creating OS memory provider failed"); exit(-1); } umf_result = umfPoolCreate(umfPoolManagerOps(), OS_memory_provider, NULL, 0, &Proxy_pool); if (umf_result != UMF_RESULT_SUCCESS) { - LOG_ERR("creating UMF pool manager failed"); + LOG_FATAL("creating UMF pool manager failed"); exit(-1); } @@ -308,7 +303,9 @@ static inline void *ba_generic_realloc(umf_ba_linear_pool_t *pool, void *ptr, /*** The "LEAK" linear base allocator functions ******************************/ /*****************************************************************************/ -static void ba_leak_create(void) { Base_alloc_leak = umf_ba_linear_create(0); } +static void ba_leak_create(void) { + Base_alloc_leak = umf_ba_linear_create(4 * 1024 * 1024); +} // it does not implement destroy(), because we cannot destroy non-freed memory @@ -478,18 +475,15 @@ size_t malloc_usable_size(void *ptr) { return 0; // unsupported in case of the ba_leak allocator } - if (!was_called_from_malloc_usable_size && Proxy_pool && - (umfPoolByPtr(ptr) == Proxy_pool)) { - was_called_from_malloc_usable_size = 1; + if (Proxy_pool && (umfPoolByPtr(ptr) == Proxy_pool)) { was_called_from_umfPool = 1; size_t size = umfPoolMallocUsableSize(Proxy_pool, ptr); was_called_from_umfPool = 0; - was_called_from_malloc_usable_size = 0; return size; } #ifndef _WIN32 - if (!was_called_from_malloc_usable_size && Size_threshold_value) { + if (Size_threshold_value) { return System_malloc_usable_size(ptr); } #endif /* _WIN32 */ diff --git a/src/proxy_lib/proxy_lib.rc.in b/src/proxy_lib/proxy_lib.rc.in index dce151ec3e..a3eff71dea 100644 --- a/src/proxy_lib/proxy_lib.rc.in +++ b/src/proxy_lib/proxy_lib.rc.in @@ -1,4 +1,4 @@ -// Copyright (c) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -10,6 +10,8 @@ #define UMF_VERNUMBERS @PROJECT_VERSION_MAJOR@,@PROJECT_VERSION_MINOR@,@PROJECT_VERSION_PATCH@,@UMF_VERSION_REVISION@ #define _UMF_VERSION "@UMF_VERSION@" +// Store our CMake vars in the "FileDescription" block, as the custom fields require special parsing. +#define _UMF_CMAKE_VARS "@UMF_ALL_CMAKE_VARIABLES@" #ifdef _DEBUG #define VERSION_DEBUG VS_FF_DEBUG @@ -49,9 +51,9 @@ BEGIN BLOCK "040904b0" // U.S. English, Unicode (0x04b0 == 1200) BEGIN VALUE "CompanyName", "Intel Corporation\0" - VALUE "FileDescription", "Unified Memory Framework (UMF) proxy library\0" + VALUE "FileDescription", "Unified Memory Framework (UMF) proxy library (build options: " _UMF_CMAKE_VARS ")\0" VALUE "FileVersion", _UMF_VERSION "\0" - VALUE "LegalCopyright", "Copyright 2024, Intel Corporation. All rights reserved.\0" + VALUE "LegalCopyright", "Copyright 2024-2025, Intel Corporation. All rights reserved.\0" VALUE "LegalTrademarks", "\0" VALUE "OriginalFilename", "umf_proxy.dll\0" VALUE "ProductName", "Unified Memory Framework (UMF)\0" diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt index a0bff39fd8..976a2cb626 100644 --- a/src/utils/CMakeLists.txt +++ b/src/utils/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2023-2024 Intel Corporation +# Copyright (C) 2023-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -7,15 +7,14 @@ include(FindThreads) set(UMF_UTILS_SOURCES_COMMON utils_common.c utils_log.c utils_load_library.c) -set(UMF_UTILS_SOURCES_POSIX utils_posix_common.c utils_posix_concurrency.c - utils_posix_math.c) +set(UMF_UTILS_SOURCES_POSIX utils_posix_common.c utils_posix_concurrency.c) set(UMF_UTILS_SOURCES_LINUX utils_linux_common.c) set(UMF_UTILS_SOURCES_MACOSX utils_macosx_common.c) set(UMF_UTILS_SOURCES_WINDOWS utils_windows_common.c - utils_windows_concurrency.c utils_windows_math.c) + utils_windows_concurrency.c) if(UMF_USE_VALGRIND) if(UMF_USE_ASAN diff --git a/src/utils/utils_common.c b/src/utils/utils_common.c index eaf5420fc6..225c02d2c1 100644 --- a/src/utils/utils_common.c +++ b/src/utils/utils_common.c @@ -128,3 +128,6 @@ umf_result_t utils_translate_flags(unsigned in_flags, unsigned max, *out_flags = out_f; return UMF_RESULT_SUCCESS; } + +size_t utils_max(size_t a, size_t b) { return a > b ? a : b; } +size_t utils_min(size_t a, size_t b) { return a < b ? a : b; } diff --git a/src/utils/utils_common.h b/src/utils/utils_common.h index 9ef2b3cf13..fff44f390c 100644 --- a/src/utils/utils_common.h +++ b/src/utils/utils_common.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -38,6 +38,8 @@ typedef enum umf_purge_advise_t { expression; \ } while (0) +#define IS_POWER_OF_2(value) ((value) != 0 && ((value) & ((value)-1)) == 0) + #define IS_ALIGNED(value, align) \ ((align == 0 || (((value) & ((align)-1)) == 0))) #define IS_NOT_ALIGNED(value, align) \ @@ -51,9 +53,6 @@ typedef enum umf_purge_advise_t { #define ASSERT_IS_ALIGNED(value, align) \ DO_WHILE_EXPRS(assert(IS_ALIGNED(value, align))) -#define VALGRIND_ANNOTATE_NEW_MEMORY(p, s) DO_WHILE_EMPTY -#define VALGRIND_HG_DRD_DISABLE_CHECKING(p, s) DO_WHILE_EMPTY - #ifdef _WIN32 /* Windows */ #define __TLS __declspec(thread) @@ -176,6 +175,10 @@ int utils_fallocate(int fd, long offset, long len); long utils_get_size_threshold(char *str_threshold); +size_t utils_max(size_t a, size_t b); + +size_t utils_min(size_t a, size_t b); + #ifdef __cplusplus } #endif diff --git a/src/utils/utils_concurrency.h b/src/utils/utils_concurrency.h index 155184cc44..31e5793b96 100644 --- a/src/utils/utils_concurrency.h +++ b/src/utils/utils_concurrency.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -10,7 +10,10 @@ #ifndef UMF_UTILS_CONCURRENCY_H #define UMF_UTILS_CONCURRENCY_H 1 +#include +#include #include +#include #ifdef _WIN32 #include @@ -18,7 +21,7 @@ #include "utils_windows_intrin.h" #pragma intrinsic(_BitScanForward64) -#else +#else /* !_WIN32 */ #include #ifndef __cplusplus @@ -26,10 +29,18 @@ #else /* __cplusplus */ #include #define _Atomic(X) std::atomic + +// TODO remove cpp code from this file +using std::memory_order_acq_rel; +using std::memory_order_acquire; +using std::memory_order_relaxed; +using std::memory_order_release; + #endif /* __cplusplus */ -#endif /* _WIN32 */ +#endif /* !_WIN32 */ +#include "utils_common.h" #include "utils_sanitizers.h" #ifdef __cplusplus @@ -45,11 +56,27 @@ typedef struct utils_mutex_t { } utils_mutex_t; size_t utils_mutex_get_size(void); -utils_mutex_t *utils_mutex_init(void *ptr); +utils_mutex_t *utils_mutex_init(utils_mutex_t *ptr); void utils_mutex_destroy_not_free(utils_mutex_t *m); int utils_mutex_lock(utils_mutex_t *mutex); int utils_mutex_unlock(utils_mutex_t *mutex); +typedef struct utils_rwlock_t { +#ifdef _WIN32 + // Slim Read/Wrtiter lock + SRWLOCK lock; +#else + pthread_rwlock_t rwlock; +#endif +} utils_rwlock_t; + +utils_rwlock_t *utils_rwlock_init(utils_rwlock_t *ptr); +void utils_rwlock_destroy_not_free(utils_rwlock_t *rwlock); +int utils_read_lock(utils_rwlock_t *rwlock); +int utils_write_lock(utils_rwlock_t *rwlock); +int utils_read_unlock(utils_rwlock_t *rwlock); +int utils_write_unlock(utils_rwlock_t *rwlock); + #if defined(_WIN32) #define UTIL_ONCE_FLAG INIT_ONCE #define UTIL_ONCE_FLAG_INIT INIT_ONCE_STATIC_INIT @@ -61,54 +88,141 @@ int utils_mutex_unlock(utils_mutex_t *mutex); void utils_init_once(UTIL_ONCE_FLAG *flag, void (*onceCb)(void)); #if defined(_WIN32) -static __inline unsigned char utils_lssb_index(long long value) { - unsigned long ret; - _BitScanForward64(&ret, value); - return (unsigned char)ret; + +// There is no good way to do atomic_load on windows... +static inline void utils_atomic_load_acquire_u64(uint64_t *ptr, uint64_t *out) { + // NOTE: Windows cl complains about direct accessing 'ptr' which is next + // accessed using Interlocked* functions (warning 28112 - disabled) + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + + // On Windows, there is no equivalent to __atomic_load, so we use cmpxchg + // with 0, 0 here. This will always return the value under the pointer + // without writing anything. + LONG64 ret = InterlockedCompareExchange64((LONG64 volatile *)ptr, 0, 0); + *out = *(uint64_t *)&ret; } -static __inline unsigned char utils_mssb_index(long long value) { - unsigned long ret; - _BitScanReverse64(&ret, value); - return (unsigned char)ret; + +static inline void utils_atomic_load_acquire_ptr(void **ptr, void **out) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + uintptr_t ret = (uintptr_t)InterlockedCompareExchangePointer(ptr, 0, 0); + *(uintptr_t *)out = ret; } -// There is no good way to do atomic_load on windows... -#define utils_atomic_load_acquire(object, dest) \ - do { \ - *(LONG64 *)dest = \ - InterlockedOr64Acquire((LONG64 volatile *)object, 0); \ - } while (0) - -#define utils_atomic_store_release(object, desired) \ - InterlockedExchange64((LONG64 volatile *)object, (LONG64)desired) -#define utils_atomic_increment(object) \ - InterlockedIncrement64((LONG64 volatile *)object) -#define utils_atomic_decrement(object) \ - InterlockedDecrement64((LONG64 volatile *)object) -#define utils_fetch_and_add64(ptr, value) \ - InterlockedExchangeAdd64((LONG64 *)(ptr), value) -#else -#define utils_lssb_index(x) ((unsigned char)__builtin_ctzll(x)) -#define utils_mssb_index(x) ((unsigned char)(63 - __builtin_clzll(x))) -#define utils_atomic_load_acquire(object, dest) \ - do { \ - utils_annotate_acquire((void *)object); \ - __atomic_load(object, dest, memory_order_acquire); \ - } while (0) - -#define utils_atomic_store_release(object, desired) \ - do { \ - __atomic_store_n(object, desired, memory_order_release); \ - utils_annotate_release((void *)object); \ - } while (0) - -#define utils_atomic_increment(object) \ - __atomic_add_fetch(object, 1, __ATOMIC_ACQ_REL) -#define utils_atomic_decrement(object) \ - __atomic_sub_fetch(object, 1, __ATOMIC_ACQ_REL) -#define utils_fetch_and_add64 __sync_fetch_and_add +static inline void utils_atomic_store_release_ptr(void **ptr, void *val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + InterlockedExchangePointer(ptr, val); +} -#endif +static inline uint64_t utils_atomic_increment_u64(uint64_t *ptr) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + // return incremented value + return InterlockedIncrement64((LONG64 volatile *)ptr); +} + +static inline uint64_t utils_atomic_decrement_u64(uint64_t *ptr) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + // return decremented value + return InterlockedDecrement64((LONG64 volatile *)ptr); +} + +static inline uint64_t utils_fetch_and_add_u64(uint64_t *ptr, uint64_t val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + // return the value that had previously been in *ptr + return InterlockedExchangeAdd64((LONG64 volatile *)(ptr), val); +} + +static inline uint64_t utils_fetch_and_sub_u64(uint64_t *ptr, uint64_t val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + // return the value that had previously been in *ptr + // NOTE: on Windows there is no *Sub* version of InterlockedExchange + return InterlockedExchangeAdd64((LONG64 volatile *)(ptr), -(LONG64)val); +} + +static inline bool utils_compare_exchange_u64(uint64_t *ptr, uint64_t *expected, + uint64_t *desired) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + LONG64 out = InterlockedCompareExchange64( + (LONG64 volatile *)ptr, *(LONG64 *)desired, *(LONG64 *)expected); + if (out == *(LONG64 *)expected) { + return true; + } + + // else + *expected = out; + return false; +} + +static inline void utils_atomic_store_release_u64(void *ptr, uint64_t val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + LONG64 out; + LONG64 desired = (LONG64)val; + LONG64 expected = 0; + while (expected != (out = InterlockedCompareExchange64( + (LONG64 volatile *)ptr, desired, expected))) { + expected = out; + } +} + +#else // !defined(_WIN32) + +static inline void utils_atomic_load_acquire_u64(uint64_t *ptr, uint64_t *out) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + ASSERT_IS_ALIGNED((uintptr_t)out, 8); + __atomic_load(ptr, out, memory_order_acquire); + utils_annotate_acquire(ptr); +} + +static inline void utils_atomic_load_acquire_ptr(void **ptr, void **out) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + ASSERT_IS_ALIGNED((uintptr_t)out, 8); + __atomic_load((uintptr_t *)ptr, (uintptr_t *)out, memory_order_acquire); + utils_annotate_acquire(ptr); +} + +static inline void utils_atomic_store_release_u64(void *ptr, uint64_t val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + __atomic_store_n((uintptr_t *)ptr, (uintptr_t)val, memory_order_release); +} + +static inline void utils_atomic_store_release_ptr(void **ptr, void *val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + utils_annotate_release(ptr); + __atomic_store_n((uintptr_t *)ptr, (uintptr_t)val, memory_order_release); +} + +static inline uint64_t utils_atomic_increment_u64(uint64_t *val) { + ASSERT_IS_ALIGNED((uintptr_t)val, 8); + // return incremented value + return __atomic_add_fetch(val, 1, memory_order_acq_rel); +} + +static inline uint64_t utils_atomic_decrement_u64(uint64_t *val) { + ASSERT_IS_ALIGNED((uintptr_t)val, 8); + // return decremented value + return __atomic_sub_fetch(val, 1, memory_order_acq_rel); +} + +static inline uint64_t utils_fetch_and_add_u64(uint64_t *ptr, uint64_t val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + // return the value that had previously been in *ptr + return __atomic_fetch_add(ptr, val, memory_order_acq_rel); +} + +static inline uint64_t utils_fetch_and_sub_u64(uint64_t *ptr, uint64_t val) { + // return the value that had previously been in *ptr + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + return __atomic_fetch_sub(ptr, val, memory_order_acq_rel); +} + +static inline bool utils_compare_exchange_u64(uint64_t *ptr, uint64_t *expected, + uint64_t *desired) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + return __atomic_compare_exchange(ptr, expected, desired, 0 /* strong */, + memory_order_acq_rel, + memory_order_relaxed); +} + +#endif // !defined(_WIN32) #ifdef __cplusplus } diff --git a/src/utils/utils_level_zero.cpp b/src/utils/utils_level_zero.cpp index 833047dd79..f5a42b0fae 100644 --- a/src/utils/utils_level_zero.cpp +++ b/src/utils/utils_level_zero.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -60,6 +60,9 @@ struct libze_ops { const ze_device_mem_alloc_desc_t *, size_t, size_t, ze_device_handle_t, void **); ze_result_t (*zeMemFree)(ze_context_handle_t, void *); + ze_result_t (*zeDeviceGetMemoryProperties)( + ze_device_handle_t hDevice, uint32_t *pCount, + ze_device_memory_properties_t *pMemProperties); } libze_ops; #if USE_DLOPEN @@ -125,6 +128,9 @@ struct DlHandleCloser { libze_ops.zeMemFree = [](auto... args) { return noop_stub(args...); }; + libze_ops.zeDeviceGetMemoryProperties = [](auto... args) { + return noop_stub(args...); + }; utils_close_library(dlHandle); } } @@ -138,10 +144,15 @@ int InitLevelZeroOps() { const char *lib_name = "libze_loader.so"; #endif // Load Level Zero symbols - // NOTE that we use UMF_UTIL_OPEN_LIBRARY_GLOBAL which add all loaded symbols to the +#if OPEN_ZE_LIBRARY_GLOBAL + // NOTE UMF_UTIL_OPEN_LIBRARY_GLOBAL adds all loaded symbols to the // global symbol table. + int open_flags = UMF_UTIL_OPEN_LIBRARY_GLOBAL; +#else + int open_flags = 0; +#endif zeDlHandle = std::unique_ptr( - utils_open_library(lib_name, UMF_UTIL_OPEN_LIBRARY_GLOBAL)); + utils_open_library(lib_name, open_flags)); *(void **)&libze_ops.zeInit = utils_get_symbol_addr(zeDlHandle.get(), "zeInit", lib_name); if (libze_ops.zeInit == nullptr) { @@ -265,6 +276,13 @@ int InitLevelZeroOps() { fprintf(stderr, "zeMemFree symbol not found in %s\n", lib_name); return -1; } + *(void **)&libze_ops.zeDeviceGetMemoryProperties = utils_get_symbol_addr( + zeDlHandle.get(), "zeDeviceGetMemoryProperties", lib_name); + if (libze_ops.zeDeviceGetMemoryProperties == nullptr) { + fprintf(stderr, "zeDeviceGetMemoryProperties symbol not found in %s\n", + lib_name); + return -1; + } return 0; } @@ -292,6 +310,7 @@ int InitLevelZeroOps() { libze_ops.zeMemGetAllocProperties = zeMemGetAllocProperties; libze_ops.zeMemAllocDevice = zeMemAllocDevice; libze_ops.zeMemFree = zeMemFree; + libze_ops.zeDeviceGetMemoryProperties = zeDeviceGetMemoryProperties; return 0; } @@ -330,12 +349,6 @@ int utils_ze_get_drivers(uint32_t *drivers_num_, ze_driver_handle_t *drivers = NULL; uint32_t drivers_num = 0; - ret = utils_ze_init_level_zero(); - if (ret != 0) { - fprintf(stderr, "utils_ze_init_level_zero() failed!\n"); - goto init_fail; - } - ze_result = libze_ops.zeDriverGet(&drivers_num, NULL); if (ze_result != ZE_RESULT_SUCCESS) { fprintf(stderr, "zeDriverGet() failed!\n"); @@ -372,7 +385,6 @@ int utils_ze_get_drivers(uint32_t *drivers_num_, *drivers_ = NULL; } -init_fail: return ret; } @@ -383,12 +395,6 @@ int utils_ze_get_devices(ze_driver_handle_t driver, uint32_t *devices_num_, uint32_t devices_num = 0; ze_device_handle_t *devices = NULL; - ret = utils_ze_init_level_zero(); - if (ret != 0) { - fprintf(stderr, "utils_ze_init_level_zero() failed!\n"); - goto init_fail; - } - ze_result = libze_ops.zeDeviceGet(driver, &devices_num, NULL); if (ze_result != ZE_RESULT_SUCCESS) { fprintf(stderr, "zeDeviceGet() failed!\n"); @@ -424,7 +430,7 @@ int utils_ze_get_devices(ze_driver_handle_t driver, uint32_t *devices_num_, free(devices); devices = NULL; } -init_fail: + return ret; } @@ -745,3 +751,15 @@ ze_memory_type_t utils_ze_get_mem_type(ze_context_handle_t context, void *ptr) { libze_ops.zeMemGetAllocProperties(context, ptr, &alloc_props, &device); return alloc_props.type; } + +int64_t utils_ze_get_num_memory_properties(ze_device_handle_t device) { + uint32_t pCount = 0; + ze_result_t ze_result = + libze_ops.zeDeviceGetMemoryProperties(device, &pCount, nullptr); + if (ze_result != ZE_RESULT_SUCCESS) { + fprintf(stderr, "zeDeviceGetMemoryProperties() failed!\n"); + return -1; + } + + return static_cast(pCount); +} diff --git a/src/utils/utils_level_zero.h b/src/utils/utils_level_zero.h index b29a4dc438..00f55b3510 100644 --- a/src/utils/utils_level_zero.h +++ b/src/utils/utils_level_zero.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -16,7 +16,6 @@ extern "C" { #endif -int utils_ze_init_level_zero(void); int utils_ze_init_level_zero(void); int utils_ze_get_drivers(uint32_t *drivers_num_, ze_driver_handle_t **drivers_); @@ -45,6 +44,8 @@ int utils_ze_destroy_context(ze_context_handle_t context); ze_memory_type_t utils_ze_get_mem_type(ze_context_handle_t context, void *ptr); +int64_t utils_ze_get_num_memory_properties(ze_device_handle_t device); + #ifdef __cplusplus } #endif diff --git a/src/utils/utils_load_library.c b/src/utils/utils_load_library.c index ef0da450ba..d774fec840 100644 --- a/src/utils/utils_load_library.c +++ b/src/utils/utils_load_library.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -32,7 +32,11 @@ #ifdef _WIN32 void *utils_open_library(const char *filename, int userFlags) { - (void)userFlags; //unused for win + if (userFlags & UMF_UTIL_OPEN_LIBRARY_NO_LOAD) { + HMODULE hModule; + BOOL ret = GetModuleHandleEx(0, TEXT(filename), &hModule); + return ret ? hModule : NULL; + } return LoadLibrary(TEXT(filename)); } @@ -66,6 +70,9 @@ void *utils_open_library(const char *filename, int userFlags) { if (userFlags & UMF_UTIL_OPEN_LIBRARY_GLOBAL) { dlopenFlags |= RTLD_GLOBAL; } + if (userFlags & UMF_UTIL_OPEN_LIBRARY_NO_LOAD) { + dlopenFlags |= RTLD_NOLOAD; + } void *handle = dlopen(filename, dlopenFlags); if (handle == NULL) { diff --git a/src/utils/utils_load_library.h b/src/utils/utils_load_library.h index 3206183f55..5b6e71239f 100644 --- a/src/utils/utils_load_library.h +++ b/src/utils/utils_load_library.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -17,7 +17,10 @@ #ifdef __cplusplus extern "C" { #endif +// The symbols defined by this library will be made available for symbol resolution of subsequently loaded libraries. #define UMF_UTIL_OPEN_LIBRARY_GLOBAL 1 +// Don't load the library. utils_open_library succeeds if the library is already loaded. +#define UMF_UTIL_OPEN_LIBRARY_NO_LOAD 1 << 1 void *utils_open_library(const char *filename, int userFlags); int utils_close_library(void *handle); diff --git a/src/utils/utils_math.h b/src/utils/utils_math.h index c78be11367..0e58fc38df 100644 --- a/src/utils/utils_math.h +++ b/src/utils/utils_math.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -11,16 +11,58 @@ #define UMF_MATH_H 1 #include +#include #include +#include #ifdef __cplusplus extern "C" { #endif -size_t getLeftmostSetBitPos(size_t num); +#if defined(_WIN32) -// Logarithm is an index of the most significant non-zero bit. -static inline size_t log2Utils(size_t num) { return getLeftmostSetBitPos(num); } +#include "utils_windows_intrin.h" + +#pragma intrinsic(_BitScanReverse64) +#pragma intrinsic(_BitScanForward64) + +// Retrieves the position of the leftmost set bit. +// The position of the bit is counted from 0 +// e.g. for 01000011110 the position equals 9. +static inline size_t utils_msb64(uint64_t num) { + assert(num != 0 && + "Finding leftmost set bit when number equals zero is undefined"); + unsigned long index = 0; + _BitScanReverse64(&index, num); + return (size_t)index; +} + +static inline size_t utils_lsb64(uint64_t num) { + assert(num != 0 && + "Finding rightmost set bit when number equals zero is undefined"); + unsigned long index = 0; + _BitScanForward64(&index, num); + return (size_t)index; +} + +#else // !defined(_WIN32) + +// Retrieves the position of the leftmost set bit. +// The position of the bit is counted from 0 +// e.g. for 01000011110 the position equals 9. +static inline size_t utils_msb64(uint64_t num) { + assert(num != 0 && + "Finding leftmost set bit when number equals zero is undefined"); + return 63 - __builtin_clzll(num); +} + +static inline size_t utils_lsb64(uint64_t num) { + assert(num != 0 && + "Finding rightmost set bit when number equals zero is undefined"); + return __builtin_ctzll(num); +} + +#endif // !defined(_WIN32) #ifdef __cplusplus } diff --git a/src/utils/utils_posix_common.c b/src/utils/utils_posix_common.c index 4a60cbb1f2..613b8ea41d 100644 --- a/src/utils/utils_posix_common.c +++ b/src/utils/utils_posix_common.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -91,9 +91,8 @@ umf_result_t utils_duplicate_fd(int pid, int fd_in, int *fd_out) { return UMF_RESULT_ERROR_NOT_SUPPORTED; #else // pidfd_getfd(2) is used to obtain a duplicate of another process's file descriptor. - // Permission to duplicate another process's file descriptor - // is governed by a ptrace access mode PTRACE_MODE_ATTACH_REALCREDS check (see ptrace(2)) - // that can be changed using the /proc/sys/kernel/yama/ptrace_scope interface. + // Calling prctl(PR_SET_PTRACER, getppid()) in a producer binary that creates IPC handle + // allows file descriptor duplication for parent process and its children. // pidfd_getfd(2) is supported since Linux 5.6 // pidfd_open(2) is supported since Linux 5.3 errno = 0; diff --git a/src/utils/utils_posix_concurrency.c b/src/utils/utils_posix_concurrency.c index fcf04ed952..44a3173611 100644 --- a/src/utils/utils_posix_concurrency.c +++ b/src/utils/utils_posix_concurrency.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -11,10 +11,11 @@ #include #include "utils_concurrency.h" +#include "utils_log.h" size_t utils_mutex_get_size(void) { return sizeof(pthread_mutex_t); } -utils_mutex_t *utils_mutex_init(void *ptr) { +utils_mutex_t *utils_mutex_init(utils_mutex_t *ptr) { pthread_mutex_t *mutex = (pthread_mutex_t *)ptr; int ret = pthread_mutex_init(mutex, NULL); return ret == 0 ? ((utils_mutex_t *)mutex) : NULL; @@ -23,7 +24,9 @@ utils_mutex_t *utils_mutex_init(void *ptr) { void utils_mutex_destroy_not_free(utils_mutex_t *m) { pthread_mutex_t *mutex = (pthread_mutex_t *)m; int ret = pthread_mutex_destroy(mutex); - (void)ret; // TODO: add logging + if (ret) { + LOG_ERR("pthread_mutex_destroy failed"); + } } int utils_mutex_lock(utils_mutex_t *m) { @@ -37,3 +40,33 @@ int utils_mutex_unlock(utils_mutex_t *m) { void utils_init_once(UTIL_ONCE_FLAG *flag, void (*oneCb)(void)) { pthread_once(flag, oneCb); } + +utils_rwlock_t *utils_rwlock_init(utils_rwlock_t *ptr) { + pthread_rwlock_t *rwlock = (pthread_rwlock_t *)ptr; + int ret = pthread_rwlock_init(rwlock, NULL); + return ret == 0 ? ((utils_rwlock_t *)rwlock) : NULL; +} + +void utils_rwlock_destroy_not_free(utils_rwlock_t *ptr) { + pthread_rwlock_t *rwlock = (pthread_rwlock_t *)ptr; + int ret = pthread_rwlock_destroy(rwlock); + if (ret) { + LOG_ERR("pthread_rwlock_destroy failed"); + } +} + +int utils_read_lock(utils_rwlock_t *rwlock) { + return pthread_rwlock_rdlock((pthread_rwlock_t *)rwlock); +} + +int utils_write_lock(utils_rwlock_t *rwlock) { + return pthread_rwlock_wrlock((pthread_rwlock_t *)rwlock); +} + +int utils_read_unlock(utils_rwlock_t *rwlock) { + return pthread_rwlock_unlock((pthread_rwlock_t *)rwlock); +} + +int utils_write_unlock(utils_rwlock_t *rwlock) { + return pthread_rwlock_unlock((pthread_rwlock_t *)rwlock); +} diff --git a/src/utils/utils_posix_math.c b/src/utils/utils_posix_math.c deleted file mode 100644 index 465b687725..0000000000 --- a/src/utils/utils_posix_math.c +++ /dev/null @@ -1,20 +0,0 @@ -/* - * - * Copyright (C) 2023 Intel Corporation - * - * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - */ - -#include "utils_math.h" -#include - -// Retrieves the position of the leftmost set bit. -// The position of the bit is counted from 0 -// e.g. for 01000011110 the position equals 9. -size_t getLeftmostSetBitPos(size_t num) { - assert(num != 0 && - "Finding leftmost set bit when number equals zero is undefined"); - return (sizeof(num) * CHAR_BIT - 1) - __builtin_clzll(num); -} diff --git a/src/utils/utils_sanitizers.h b/src/utils/utils_sanitizers.h index 3498e4b705..f8896d0ae3 100644 --- a/src/utils/utils_sanitizers.h +++ b/src/utils/utils_sanitizers.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -168,6 +168,24 @@ static inline void utils_annotate_memory_inaccessible(void *ptr, size_t size) { #endif } +static inline void utils_annotate_memory_new(void *ptr, size_t size) { +#ifdef UMF_VG_DRD_ENABLED + ANNOTATE_NEW_MEMORY(ptr, size); +#else + (void)ptr; + (void)size; +#endif +} + +static inline void utils_annotate_memory_no_check(void *ptr, size_t size) { +#ifdef UMF_VG_HELGRIND_ENABLED + VALGRIND_HG_DISABLE_CHECKING(ptr, size); +#else + (void)ptr; + (void)size; +#endif +} + #ifdef __cplusplus } #endif diff --git a/src/utils/utils_windows_concurrency.c b/src/utils/utils_windows_concurrency.c index 696f4523bc..faa302be36 100644 --- a/src/utils/utils_windows_concurrency.c +++ b/src/utils/utils_windows_concurrency.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -11,35 +11,61 @@ size_t utils_mutex_get_size(void) { return sizeof(utils_mutex_t); } -utils_mutex_t *utils_mutex_init(void *ptr) { - utils_mutex_t *mutex_internal = (utils_mutex_t *)ptr; - InitializeCriticalSection(&mutex_internal->lock); - return (utils_mutex_t *)mutex_internal; +utils_mutex_t *utils_mutex_init(utils_mutex_t *mutex) { + InitializeCriticalSection(&mutex->lock); + return mutex; } void utils_mutex_destroy_not_free(utils_mutex_t *mutex) { - utils_mutex_t *mutex_internal = (utils_mutex_t *)mutex; - DeleteCriticalSection(&mutex_internal->lock); + DeleteCriticalSection(&mutex->lock); } int utils_mutex_lock(utils_mutex_t *mutex) { - utils_mutex_t *mutex_internal = (utils_mutex_t *)mutex; - EnterCriticalSection(&mutex_internal->lock); + EnterCriticalSection(&mutex->lock); - if (mutex_internal->lock.RecursionCount > 1) { - LeaveCriticalSection(&mutex_internal->lock); + if (mutex->lock.RecursionCount > 1) { + LeaveCriticalSection(&mutex->lock); /* deadlock detected */ - return -1; + abort(); } return 0; } int utils_mutex_unlock(utils_mutex_t *mutex) { - utils_mutex_t *mutex_internal = (utils_mutex_t *)mutex; - LeaveCriticalSection(&mutex_internal->lock); + LeaveCriticalSection(&mutex->lock); return 0; } +utils_rwlock_t *utils_rwlock_init(utils_rwlock_t *rwlock) { + InitializeSRWLock(&rwlock->lock); + return 0; // never fails +} + +void utils_rwlock_destroy_not_free(utils_rwlock_t *rwlock) { + // there is no call to destroy SWR lock + (void)rwlock; +} + +int utils_read_lock(utils_rwlock_t *rwlock) { + AcquireSRWLockShared(&rwlock->lock); + return 0; // never fails +} + +int utils_write_lock(utils_rwlock_t *rwlock) { + AcquireSRWLockExclusive(&rwlock->lock); + return 0; // never fails +} + +int utils_read_unlock(utils_rwlock_t *rwlock) { + ReleaseSRWLockShared(&rwlock->lock); + return 0; // never fails +} + +int utils_write_unlock(utils_rwlock_t *rwlock) { + ReleaseSRWLockExclusive(&rwlock->lock); + return 0; // never fails +} + static BOOL CALLBACK initOnceCb(PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContext) { (void)InitOnce; // unused diff --git a/src/utils/utils_windows_math.c b/src/utils/utils_windows_math.c deleted file mode 100644 index 07c4c9978b..0000000000 --- a/src/utils/utils_windows_math.c +++ /dev/null @@ -1,24 +0,0 @@ -/* - * - * Copyright (C) 2023 Intel Corporation - * - * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - */ - -#include "utils_math.h" -#include "utils_windows_intrin.h" - -#pragma intrinsic(_BitScanReverse) - -// Retrieves the position of the leftmost set bit. -// The position of the bit is counted from 0 -// e.g. for 01000011110 the position equals 9. -size_t getLeftmostSetBitPos(size_t num) { - assert(num != 0 && - "Finding leftmost set bit when number equals zero is undefined"); - unsigned long index = 0; - _BitScanReverse(&index, (unsigned long)num); - return (size_t)index; -} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c8b854ba5d..20f982c65e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,10 +1,16 @@ -# Copyright (C) 2022-2024 Intel Corporation +# Copyright (C) 2022-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED YES) +if(CMAKE_C_COMPILER_ID STREQUAL "IntelLLVM") + # Compiler dependencies needs to be in library path or to be linked + # statically + add_link_options(-static-intel) +endif() + include(FetchContent) FetchContent_Declare( googletest @@ -25,8 +31,11 @@ set(UMF_TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(UMF_UTILS_DIR ${UMF_CMAKE_SOURCE_DIR}/src/utils) function(build_umf_test) - # Parameters: * NAME - a name of the test * SRCS - source files * LIBS - - # libraries to be linked with + # Parameters: + # + # * NAME - a name of the test + # * SRCS - source files + # * LIBS - libraries to be linked with set(oneValueArgs NAME) set(multiValueArgs SRCS LIBS) cmake_parse_arguments( @@ -36,19 +45,32 @@ function(build_umf_test) "${multiValueArgs}" ${ARGN}) - set(TEST_NAME umf-${ARG_NAME}) - set(TEST_TARGET_NAME umf_test-${ARG_NAME}) + set(TEST_NAME test_${ARG_NAME}) + set(TEST_TARGET_NAME test_${ARG_NAME}) set(LIB_DIRS ${LIB_DIRS} ${LIBHWLOC_LIBRARY_DIRS}) - if(UMF_BUILD_LIBUMF_POOL_JEMALLOC) - set(LIB_DIRS ${LIB_DIRS} ${JEMALLOC_LIBRARY_DIRS}) - endif() - if(UMF_BUILD_CUDA_PROVIDER) + set(INC_DIRS ${INC_DIRS} ${CUDA_INCLUDE_DIRS}) set(LIB_DIRS ${LIB_DIRS} ${CUDA_LIBRARY_DIRS}) endif() + if(UMF_BUILD_LEVEL_ZERO_PROVIDER) + set(INC_DIRS ${INC_DIRS} ${LEVEL_ZERO_INCLUDE_DIRS}) + endif() + + if(NOT UMF_DISABLE_HWLOC) + set(INC_DIRS ${INC_DIRS} ${LIBHWLOC_INCLUDE_DIRS}) + endif() + + if(UMF_POOL_JEMALLOC_ENABLED) + set(CPL_DEFS ${CPL_DEFS} UMF_POOL_JEMALLOC_ENABLED=1) + endif() + + if(UMF_POOL_SCALABLE_ENABLED) + set(CPL_DEFS ${CPL_DEFS} UMF_POOL_SCALABLE_ENABLED=1) + endif() + set(TEST_LIBS umf_test_common ${ARG_LIBS} @@ -61,15 +83,7 @@ function(build_umf_test) SRCS ${ARG_SRCS} LIBS ${TEST_LIBS}) - if(UMF_POOL_JEMALLOC_ENABLED) - target_compile_definitions(${TEST_TARGET_NAME} - PRIVATE UMF_POOL_JEMALLOC_ENABLED=1) - endif() - - if(UMF_POOL_SCALABLE_ENABLED) - target_compile_definitions(${TEST_TARGET_NAME} - PRIVATE UMF_POOL_SCALABLE_ENABLED=1) - endif() + target_compile_definitions(${TEST_TARGET_NAME} PRIVATE ${CPL_DEFS}) if(NOT MSVC) # Suppress 'cast discards const qualifier' warnings. Parametrized GTEST @@ -81,6 +95,7 @@ function(build_umf_test) target_compile_options(${TEST_TARGET_NAME} PRIVATE -Werror) endif() endif() + target_link_directories(${TEST_TARGET_NAME} PRIVATE ${LIB_DIRS}) target_include_directories( @@ -88,16 +103,22 @@ function(build_umf_test) PRIVATE ${UMF_CMAKE_SOURCE_DIR}/include ${UMF_CMAKE_SOURCE_DIR}/src ${UMF_CMAKE_SOURCE_DIR}/src/base_alloc + ${UMF_CMAKE_SOURCE_DIR}/src/coarse ${UMF_CMAKE_SOURCE_DIR}/src/utils ${UMF_TEST_DIR}/common - ${UMF_TEST_DIR}) + ${UMF_TEST_DIR} + ${INC_DIRS}) endfunction() function(add_umf_test) - # Parameters: * NAME - a name of the test * SRCS - source files * LIBS - - # libraries to be linked with + # Parameters: + # + # * NAME - a name of the test + # * SRCS - source files + # * LIBS - libraries to be linked with + # * ENVS - environment variables set(oneValueArgs NAME) - set(multiValueArgs SRCS LIBS) + set(multiValueArgs SRCS LIBS ENVS) cmake_parse_arguments( ARG "" @@ -106,15 +127,12 @@ function(add_umf_test) ${ARGN}) build_umf_test( - NAME - ${ARG_NAME} - SRCS - ${ARG_SRCS} - LIBS - ${ARG_LIBS}) + NAME ${ARG_NAME} + SRCS ${ARG_SRCS} + LIBS ${ARG_LIBS}) - set(TEST_NAME umf-${ARG_NAME}) - set(TEST_TARGET_NAME umf_test-${ARG_NAME}) + set(TEST_NAME test_${ARG_NAME}) + set(TEST_TARGET_NAME test_${ARG_NAME}) add_test( NAME ${TEST_NAME} @@ -122,12 +140,30 @@ function(add_umf_test) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) set_tests_properties(${TEST_NAME} PROPERTIES LABELS "umf") + if(ARG_ENVS) + set_tests_properties(${TEST_NAME} PROPERTIES ENVIRONMENT ${ARG_ENVS}) + endif() if(WINDOWS) + # add PATH to DLL on Windows + set(DLL_PATH_LIST + "${DLL_PATH_LIST};PATH=path_list_append:${CMAKE_BINARY_DIR}/bin/;PATH=path_list_append:${CMAKE_BINARY_DIR}/bin/$/" + ) + # append PATH to DLLs set_property(TEST ${TEST_NAME} PROPERTY ENVIRONMENT_MODIFICATION "${DLL_PATH_LIST}") endif() + if(LINUX) + # prepend LD_LIBRARY_PATH with ${CMAKE_BINARY_DIR}/lib it is required + # because ${CMAKE_BINARY_DIR}/lib contains libze_loader.so and tests + # should use it instead of system one. + set_property( + TEST ${TEST_NAME} + PROPERTY ENVIRONMENT_MODIFICATION + "LD_LIBRARY_PATH=path_list_prepend:${CMAKE_BINARY_DIR}/lib" + ) + endif() endfunction() add_subdirectory(common) @@ -156,10 +192,6 @@ if(UMF_BUILD_SHARED_LIBRARY) endif() endif() -if(UMF_POOL_JEMALLOC_ENABLED) - set(LIB_JEMALLOC_POOL jemalloc_pool) -endif() - if(UMF_BUILD_SHARED_LIBRARY) # if build as shared library, ba symbols won't be visible in tests set(BA_SOURCES_FOR_TEST ${BA_SOURCES}) @@ -179,6 +211,11 @@ add_umf_test( SRCS utils/utils_log.cpp ${UMF_UTILS_SOURCES} LIBS ${UMF_LOGGER_LIBS}) +add_umf_test( + NAME ctl + SRCS ctl/test.cpp ctl/ctl_debug.c ../src/ctl/ctl.c ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + add_umf_test( NAME utils_common SRCS utils/utils.cpp @@ -192,40 +229,41 @@ if(LINUX) endif() add_umf_test( - NAME provider_coarse - SRCS provider_coarse.cpp ${BA_SOURCES_FOR_TEST} + NAME coarse_lib + SRCS coarse_lib.cpp ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST} coarse) + +add_umf_test( + NAME disjoint_pool + SRCS pools/disjoint_pool.cpp malloc_compliance_tests.cpp + ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST}) -if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - add_umf_test( - NAME disjointPool - SRCS pools/disjoint_pool.cpp malloc_compliance_tests.cpp - LIBS disjoint_pool) - add_umf_test( - NAME c_api_disjoint_pool - SRCS c_api/disjoint_pool.c - LIBS disjoint_pool) +add_umf_test( + NAME c_api_disjoint_pool + SRCS c_api/disjoint_pool.c ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + +if(LINUX AND (NOT UMF_DISABLE_HWLOC)) + # this test uses the file provider add_umf_test( - NAME disjointCoarseMallocPool - SRCS disjointCoarseMallocPool.cpp - LIBS disjoint_pool) + NAME disjoint_pool_file_prov + SRCS disjoint_pool_file_prov.cpp ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) endif() -if(UMF_BUILD_LIBUMF_POOL_DISJOINT - AND UMF_POOL_JEMALLOC_ENABLED +if(UMF_POOL_JEMALLOC_ENABLED AND UMF_POOL_SCALABLE_ENABLED AND (NOT UMF_DISABLE_HWLOC)) - add_umf_test( - NAME c_api_multi_pool - SRCS c_api/multi_pool.c - LIBS disjoint_pool jemalloc_pool ${JEMALLOC_LIBRARIES}) + add_umf_test(NAME c_api_multi_pool SRCS c_api/multi_pool.c) endif() if(UMF_POOL_JEMALLOC_ENABLED AND (NOT UMF_DISABLE_HWLOC)) add_umf_test( NAME jemalloc_pool SRCS pools/jemalloc_pool.cpp malloc_compliance_tests.cpp - LIBS jemalloc_pool) + ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) endif() if(UMF_POOL_SCALABLE_ENABLED AND (NOT UMF_DISABLE_HWLOC)) @@ -248,13 +286,7 @@ if(LINUX AND (NOT UMF_DISABLE_HWLOC)) # OS-specific functions are implemented add_umf_test( NAME provider_os_memory SRCS provider_os_memory.cpp ${BA_SOURCES_FOR_TEST} - LIBS ${UMF_UTILS_FOR_TEST} ${LIB_JEMALLOC_POOL}) - if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - target_compile_definitions(umf_test-provider_os_memory - PRIVATE UMF_POOL_DISJOINT_ENABLED=1) - target_link_libraries(umf_test-provider_os_memory PRIVATE disjoint_pool) - endif() - + LIBS ${UMF_UTILS_FOR_TEST}) add_umf_test( NAME provider_os_memory_multiple_numa_nodes SRCS provider_os_memory_multiple_numa_nodes.cpp @@ -262,27 +294,27 @@ if(LINUX AND (NOT UMF_DISABLE_HWLOC)) # OS-specific functions are implemented add_umf_test( NAME memspace_numa SRCS memspaces/memspace_numa.cpp - LIBS ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES}) + LIBS ${LIBNUMA_LIBRARIES} ${UMF_HWLOC_NAME}) add_umf_test( NAME provider_os_memory_config SRCS provider_os_memory_config.cpp - LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES}) + LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${UMF_HWLOC_NAME}) add_umf_test( NAME memspace_host_all SRCS memspaces/memspace_host_all.cpp - LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES}) + LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${UMF_HWLOC_NAME}) add_umf_test( NAME memspace_highest_capacity SRCS memspaces/memspace_highest_capacity.cpp - LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES}) + LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${UMF_HWLOC_NAME}) add_umf_test( NAME memspace_highest_bandwidth SRCS memspaces/memspace_highest_bandwidth.cpp - LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES}) + LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${UMF_HWLOC_NAME}) add_umf_test( NAME memspace_lowest_latency SRCS memspaces/memspace_lowest_latency.cpp - LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES}) + LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${UMF_HWLOC_NAME}) add_umf_test( NAME mempolicy SRCS memspaces/mempolicy.cpp @@ -294,7 +326,7 @@ if(LINUX AND (NOT UMF_DISABLE_HWLOC)) # OS-specific functions are implemented add_umf_test( NAME memtarget SRCS memspaces/memtarget.cpp - LIBS ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES}) + LIBS ${LIBNUMA_LIBRARIES} ${UMF_HWLOC_NAME}) add_umf_test( NAME provider_devdax_memory SRCS provider_devdax_memory.cpp @@ -302,7 +334,7 @@ if(LINUX AND (NOT UMF_DISABLE_HWLOC)) # OS-specific functions are implemented add_umf_test( NAME provider_devdax_memory_ipc SRCS provider_devdax_memory_ipc.cpp ${BA_SOURCES_FOR_TEST} - LIBS ${UMF_UTILS_FOR_TEST} ${LIB_JEMALLOC_POOL}) + LIBS ${UMF_UTILS_FOR_TEST}) add_umf_test( NAME provider_file_memory SRCS provider_file_memory.cpp @@ -310,18 +342,33 @@ if(LINUX AND (NOT UMF_DISABLE_HWLOC)) # OS-specific functions are implemented add_umf_test( NAME provider_file_memory_ipc SRCS provider_file_memory_ipc.cpp ${BA_SOURCES_FOR_TEST} - LIBS ${UMF_UTILS_FOR_TEST} ${LIB_JEMALLOC_POOL}) + LIBS ${UMF_UTILS_FOR_TEST}) + add_umf_test( + NAME provider_fixed_memory + SRCS provider_fixed_memory.cpp + LIBS ${UMF_UTILS_FOR_TEST}) + add_umf_test( + NAME provider_tracking + SRCS provider_tracking.cpp + LIBS ${UMF_UTILS_FOR_TEST}) + add_umf_test( + NAME provider_tracking_fixture_tests + SRCS provider_tracking_fixture_tests.cpp malloc_compliance_tests.cpp + ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) # This test requires Linux-only file memory provider if(UMF_POOL_JEMALLOC_ENABLED) add_umf_test( NAME jemalloc_coarse_file SRCS pools/jemalloc_coarse_file.cpp malloc_compliance_tests.cpp - LIBS jemalloc_pool) + ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) add_umf_test( NAME jemalloc_coarse_devdax SRCS pools/jemalloc_coarse_devdax.cpp malloc_compliance_tests.cpp - LIBS jemalloc_pool) + ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) endif() # This test requires Linux-only file memory provider @@ -368,18 +415,22 @@ if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_LEVEL_ZERO_PROVIDER) SRCS providers/provider_level_zero.cpp ${UMF_UTILS_DIR}/utils_level_zero.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST} ze_loader) - target_include_directories(umf_test-provider_level_zero - PRIVATE ${LEVEL_ZERO_INCLUDE_DIRS}) add_umf_test( - NAME provider_level_zero_dlopen + NAME provider_level_zero_dlopen_global + SRCS providers/provider_level_zero.cpp + ${UMF_UTILS_DIR}/utils_level_zero.cpp ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + target_compile_definitions(test_provider_level_zero_dlopen_global + PUBLIC USE_DLOPEN=1 OPEN_ZE_LIBRARY_GLOBAL=1) + + add_umf_test( + NAME provider_level_zero_dlopen_local SRCS providers/provider_level_zero.cpp ${UMF_UTILS_DIR}/utils_level_zero.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST}) - target_compile_definitions(umf_test-provider_level_zero_dlopen - PUBLIC USE_DLOPEN=1) - target_include_directories(umf_test-provider_level_zero_dlopen - PRIVATE ${LEVEL_ZERO_INCLUDE_DIRS}) + target_compile_definitions(test_provider_level_zero_dlopen_local + PUBLIC USE_DLOPEN=1 OPEN_ZE_LIBRARY_GLOBAL=0) endif() if(NOT UMF_BUILD_LEVEL_ZERO_PROVIDER) @@ -399,20 +450,22 @@ if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_CUDA_PROVIDER) SRCS providers/provider_cuda.cpp providers/cuda_helpers.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST} cuda) - target_include_directories(umf_test-provider_cuda - PRIVATE ${CUDA_INCLUDE_DIRS}) - target_link_directories(umf_test-provider_cuda PRIVATE - ${CUDA_LIBRARY_DIRS}) add_umf_test( - NAME provider_cuda_dlopen + NAME provider_cuda_dlopen_global + SRCS providers/provider_cuda.cpp providers/cuda_helpers.cpp + ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + target_compile_definitions(test_provider_cuda_dlopen_global + PUBLIC USE_DLOPEN=1 OPEN_CU_LIBRARY_GLOBAL=1) + + add_umf_test( + NAME provider_cuda_dlopen_local SRCS providers/provider_cuda.cpp providers/cuda_helpers.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST}) - target_compile_definitions(umf_test-provider_cuda_dlopen - PUBLIC USE_DLOPEN=1) - target_include_directories(umf_test-provider_cuda_dlopen - PRIVATE ${CUDA_INCLUDE_DIRS}) + target_compile_definitions(test_provider_cuda_dlopen_local + PUBLIC USE_DLOPEN=1 OPEN_CU_LIBRARY_GLOBAL=0) else() message( STATUS @@ -453,10 +506,10 @@ if(UMF_PROXY_LIB_ENABLED AND UMF_BUILD_SHARED_LIBRARY) # TODO enable this test on Windows if(LINUX) add_umf_test( - NAME test_proxy_lib_size_threshold + NAME proxy_lib_size_threshold SRCS ${BA_SOURCES_FOR_TEST} test_proxy_lib_size_threshold.cpp LIBS ${UMF_UTILS_FOR_TEST} umf_proxy) - set_property(TEST umf-test_proxy_lib_size_threshold + set_property(TEST test_proxy_lib_size_threshold PROPERTY ENVIRONMENT UMF_PROXY="size.threshold=64") endif() @@ -466,7 +519,7 @@ if(UMF_PROXY_LIB_ENABLED AND UMF_BUILD_SHARED_LIBRARY) SRCS ${BA_SOURCES_FOR_TEST} memoryPoolAPI.cpp malloc_compliance_tests.cpp LIBS ${UMF_UTILS_FOR_TEST} umf_proxy) - target_compile_definitions(umf_test-proxy_lib_memoryPool + target_compile_definitions(test_proxy_lib_memoryPool PUBLIC UMF_PROXY_LIB_ENABLED=1) endif() @@ -475,11 +528,19 @@ add_umf_test( SRCS ipcAPI.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST}) +add_umf_test( + NAME ipc_max_opened_limit + SRCS ipcAPI.cpp ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST} + ENVS "UMF_MAX_OPENED_IPC_HANDLES=10") + add_umf_test(NAME ipc_negative SRCS ipc_negative.cpp) function(add_umf_ipc_test) - # Parameters: * TEST - a name of the test * SRC_DIR - source files directory - # path + # Parameters: + # + # * TEST - a name of the test + # * SRC_DIR - source files directory path set(oneValueArgs TEST SRC_DIR) cmake_parse_arguments( ARG @@ -488,7 +549,7 @@ function(add_umf_ipc_test) "" ${ARGN}) - set(TEST_NAME umf-${ARG_TEST}) + set(TEST_NAME test_${ARG_TEST}) if(DEFINED ARG_SRC_DIR) set(SRC_DIR ${ARG_SRC_DIR}) @@ -508,140 +569,90 @@ function(add_umf_ipc_test) if(NOT UMF_TESTS_FAIL_ON_SKIP) set_tests_properties(${TEST_NAME} PROPERTIES SKIP_RETURN_CODE 125) endif() + if(LINUX) + # prepend LD_LIBRARY_PATH with ${CMAKE_BINARY_DIR}/lib it is required + # because ${CMAKE_BINARY_DIR}/lib contains libze_loader.so and tests + # should use it instead of system one. + set_property( + TEST ${TEST_NAME} + PROPERTY ENVIRONMENT_MODIFICATION + "LD_LIBRARY_PATH=path_list_prepend:${CMAKE_BINARY_DIR}/lib" + ) + endif() endfunction() if(LINUX) if(NOT UMF_DISABLE_HWLOC AND UMF_POOL_SCALABLE_ENABLED) build_umf_test( - NAME - ipc_os_prov_consumer - SRCS - ipc_os_prov_consumer.c - common/ipc_common.c - common/ipc_os_prov_common.c) + NAME ipc_os_prov_consumer + SRCS ipc_os_prov_consumer.c common/ipc_common.c + common/ipc_os_prov_common.c) build_umf_test( - NAME - ipc_os_prov_producer - SRCS - ipc_os_prov_producer.c - common/ipc_common.c - common/ipc_os_prov_common.c) + NAME ipc_os_prov_producer + SRCS ipc_os_prov_producer.c common/ipc_common.c + common/ipc_os_prov_common.c) add_umf_ipc_test(TEST ipc_os_prov_anon_fd) add_umf_ipc_test(TEST ipc_os_prov_shm) if(UMF_PROXY_LIB_ENABLED AND UMF_BUILD_SHARED_LIBRARY) build_umf_test( - NAME - ipc_os_prov_proxy - SRCS - ipc_os_prov_proxy.c - common/ipc_common.c - LIBS - ${UMF_UTILS_FOR_TEST}) + NAME ipc_os_prov_proxy + SRCS ipc_os_prov_proxy.c common/ipc_common.c + LIBS ${UMF_UTILS_FOR_TEST}) add_umf_ipc_test(TEST ipc_os_prov_proxy) endif() build_umf_test( - NAME - ipc_devdax_prov_consumer - SRCS - ipc_devdax_prov_consumer.c - common/ipc_common.c - common/ipc_os_prov_common.c) + NAME ipc_devdax_prov_consumer + SRCS ipc_devdax_prov_consumer.c common/ipc_common.c + common/ipc_os_prov_common.c) build_umf_test( - NAME - ipc_devdax_prov_producer - SRCS - ipc_devdax_prov_producer.c - common/ipc_common.c - common/ipc_os_prov_common.c) + NAME ipc_devdax_prov_producer + SRCS ipc_devdax_prov_producer.c common/ipc_common.c + common/ipc_os_prov_common.c) add_umf_ipc_test(TEST ipc_devdax_prov) build_umf_test( - NAME - ipc_file_prov_consumer - SRCS - ipc_file_prov_consumer.c - common/ipc_common.c - common/ipc_os_prov_common.c) + NAME ipc_file_prov_consumer + SRCS ipc_file_prov_consumer.c common/ipc_common.c + common/ipc_os_prov_common.c) build_umf_test( - NAME - ipc_file_prov_producer - SRCS - ipc_file_prov_producer.c - common/ipc_common.c - common/ipc_os_prov_common.c) + NAME ipc_file_prov_producer + SRCS ipc_file_prov_producer.c common/ipc_common.c + common/ipc_os_prov_common.c) add_umf_ipc_test(TEST ipc_file_prov) add_umf_ipc_test(TEST ipc_file_prov_fsdax) endif() # TODO add IPC tests for CUDA - if(UMF_BUILD_GPU_TESTS - AND UMF_BUILD_LEVEL_ZERO_PROVIDER - AND UMF_BUILD_LIBUMF_POOL_DISJOINT) + if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_LEVEL_ZERO_PROVIDER) build_umf_test( - NAME - ipc_level_zero_prov_consumer - SRCS - providers/ipc_level_zero_prov_consumer.c - common/ipc_common.c - providers/ipc_level_zero_prov_common.c - ${UMF_UTILS_DIR}/utils_level_zero.cpp - LIBS - ze_loader - disjoint_pool - ${UMF_UTILS_FOR_TEST}) + NAME ipc_level_zero_prov_consumer + SRCS providers/ipc_level_zero_prov_consumer.c common/ipc_common.c + providers/ipc_level_zero_prov_common.c + ${UMF_UTILS_DIR}/utils_level_zero.cpp + LIBS ze_loader ${UMF_UTILS_FOR_TEST}) build_umf_test( - NAME - ipc_level_zero_prov_producer - SRCS - providers/ipc_level_zero_prov_producer.c - common/ipc_common.c - providers/ipc_level_zero_prov_common.c - ${UMF_UTILS_DIR}/utils_level_zero.cpp - LIBS - ze_loader - disjoint_pool - ${UMF_UTILS_FOR_TEST}) - target_include_directories(umf_test-ipc_level_zero_prov_producer - PRIVATE ${LEVEL_ZERO_INCLUDE_DIRS}) - target_include_directories(umf_test-ipc_level_zero_prov_consumer - PRIVATE ${LEVEL_ZERO_INCLUDE_DIRS}) + NAME ipc_level_zero_prov_producer + SRCS providers/ipc_level_zero_prov_producer.c common/ipc_common.c + providers/ipc_level_zero_prov_common.c + ${UMF_UTILS_DIR}/utils_level_zero.cpp + LIBS ze_loader ${UMF_UTILS_FOR_TEST}) add_umf_ipc_test(TEST ipc_level_zero_prov SRC_DIR providers) endif() - if(UMF_BUILD_GPU_TESTS - AND UMF_BUILD_CUDA_PROVIDER - AND UMF_BUILD_LIBUMF_POOL_DISJOINT) + if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_CUDA_PROVIDER) build_umf_test( - NAME - ipc_cuda_prov_consumer - SRCS - providers/ipc_cuda_prov_consumer.c - common/ipc_common.c - providers/ipc_cuda_prov_common.c - providers/cuda_helpers.cpp - LIBS - cuda - disjoint_pool - ${UMF_UTILS_FOR_TEST}) + NAME ipc_cuda_prov_consumer + SRCS providers/ipc_cuda_prov_consumer.c common/ipc_common.c + providers/ipc_cuda_prov_common.c providers/cuda_helpers.cpp + LIBS cuda ${UMF_UTILS_FOR_TEST}) build_umf_test( - NAME - ipc_cuda_prov_producer - SRCS - providers/ipc_cuda_prov_producer.c - common/ipc_common.c - providers/ipc_cuda_prov_common.c - providers/cuda_helpers.cpp - LIBS - cuda - disjoint_pool - ${UMF_UTILS_FOR_TEST}) - target_include_directories(umf_test-ipc_cuda_prov_producer - PRIVATE ${CUDA_INCLUDE_DIRS}) - target_include_directories(umf_test-ipc_cuda_prov_consumer - PRIVATE ${CUDA_INCLUDE_DIRS}) + NAME ipc_cuda_prov_producer + SRCS providers/ipc_cuda_prov_producer.c common/ipc_common.c + providers/ipc_cuda_prov_common.c providers/cuda_helpers.cpp + LIBS cuda ${UMF_UTILS_FOR_TEST}) add_umf_ipc_test(TEST ipc_cuda_prov SRC_DIR providers) endif() else() @@ -658,7 +669,7 @@ if(LINUX LIBS dl) # append LD_LIBRARY_PATH to the libumf set_property( - TEST umf-init_teardown + TEST test_init_teardown PROPERTY ENVIRONMENT_MODIFICATION "LD_LIBRARY_PATH=path_list_append:${CMAKE_BINARY_DIR}/lib") endif() @@ -667,6 +678,7 @@ endif() # replace test_examples.sh with CMake script?) if(LINUX AND UMF_BUILD_SHARED_LIBRARY + AND UMF_BUILD_EXAMPLES AND NOT (UMF_USE_ASAN OR UMF_USE_UBSAN @@ -694,41 +706,34 @@ if(LINUX ) endif() - if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT - AND UMF_BUILD_LEVEL_ZERO_PROVIDER) + if(UMF_BUILD_GPU_EXAMPLES AND UMF_BUILD_LEVEL_ZERO_PROVIDER) set(EXAMPLES ${EXAMPLES} level_zero_shared_memory) else() message( STATUS - "GPU level zero shared memory example requires UMF_BUILD_GPU_EXAMPLES, " - "UMF_BUILD_LEVEL_ZERO_PROVIDER and UMF_BUILD_LIBUMF_POOL_DISJOINT " - "to be turned ON - skipping") + "GPU level zero shared memory example requires UMF_BUILD_GPU_EXAMPLES and " + "UMF_BUILD_LEVEL_ZERO_PROVIDER to be turned ON - skipping") endif() if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT AND UMF_BUILD_CUDA_PROVIDER AND UMF_CUDA_ENABLED) set(EXAMPLES ${EXAMPLES} cuda_shared_memory) else() message( STATUS - "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES, " - "UMF_BUILD_CUDA_PROVIDER, UMF_BUILD_LIBUMF_POOL_DISJOINT " - "to be turned ON and installed CUDA libraries - skipping") + "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES " + "and UMF_BUILD_CUDA_PROVIDER to be turned ON and installed CUDA " + "libraries - skipping") endif() # TODO add IPC examples for CUDA - if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT - AND UMF_BUILD_LEVEL_ZERO_PROVIDER) + if(UMF_BUILD_GPU_EXAMPLES AND UMF_BUILD_LEVEL_ZERO_PROVIDER) set(EXAMPLES ${EXAMPLES} ipc_level_zero) else() message( - STATUS - "IPC Level 0 example requires UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LEVEL_ZERO_PROVIDER and UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON - skipping" - ) + STATUS "IPC Level Zero example requires UMF_BUILD_GPU_EXAMPLES and " + "UMF_BUILD_LEVEL_ZERO_PROVIDER to be turned ON - skipping") endif() if(UMF_POOL_SCALABLE_ENABLED) @@ -745,7 +750,7 @@ if(LINUX else() message( STATUS - "The dram_and_fsdax example is supported on Linux only and requires UMF_BUILD_LIBUMF_POOL_JEMALLOC to be turned ON - skipping" + "The dram_and_fsdax example is supported on Linux only and requires the jemalloc pool, but it is disabled - skipping" ) endif() @@ -753,6 +758,13 @@ if(LINUX set(STANDALONE_CMAKE_OPTIONS "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" ) + if(JEMALLOC_INCLUDE_DIRS) + # add custom jemalloc installation + set(STANDALONE_CMAKE_OPTIONS + "${STANDALONE_CMAKE_OPTIONS} -DCMAKE_PREFIX_PATH=${JEMALLOC_INCLUDE_DIRS}/../" + ) + endif() + add_test( NAME umf-standalone_examples COMMAND @@ -762,5 +774,15 @@ if(LINUX "${CMAKE_INSTALL_PREFIX}" "${STANDALONE_CMAKE_OPTIONS}" ${EXAMPLES} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + if(LINUX) + # prepend LD_LIBRARY_PATH with ${CMAKE_BINARY_DIR}/lib it is + # required because ${CMAKE_BINARY_DIR}/lib contains libze_loader.so + # and tests should use it instead of system one. + set_property( + TEST umf-standalone_examples + PROPERTY + ENVIRONMENT_MODIFICATION + "LD_LIBRARY_PATH=path_list_prepend:${CMAKE_BINARY_DIR}/lib") + endif() endif() endif() diff --git a/test/c_api/disjoint_pool.c b/test/c_api/disjoint_pool.c index 4d4634def1..b529497c8f 100644 --- a/test/c_api/disjoint_pool.c +++ b/test/c_api/disjoint_pool.c @@ -1,10 +1,11 @@ -// Copyright (C) 2023-2024 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include -#include "pool_disjoint.h" +#include + #include "provider_null.h" #include "test_helpers.h" #include "test_ut_asserts.h" diff --git a/test/c_api/test_ut_asserts.h b/test/c_api/test_ut_asserts.h index 834d39bda8..b73f0cd19e 100644 --- a/test/c_api/test_ut_asserts.h +++ b/test/c_api/test_ut_asserts.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -9,7 +9,7 @@ /* The project uses GTEST framework for testing, which is not supported in C - These asserts should NOT be used in other purposes than for testing C API + These asserts should NOT be used in other purposes than for testing C API */ #ifndef UMF_TEST_UT_ASSERTS_H diff --git a/test/coarse_lib.cpp b/test/coarse_lib.cpp new file mode 100644 index 0000000000..7611833899 --- /dev/null +++ b/test/coarse_lib.cpp @@ -0,0 +1,1410 @@ +/* + * Copyright (C) 2024-2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#include "coarse.h" +#include "provider.hpp" + +using umf_test::KB; +using umf_test::MB; +using umf_test::test; + +#define MOCKED_COARSE ((coarse_t *)0x01) +#define MOCKED_PROVIDER ((umf_memory_provider_handle_t)0x02) +#define INVALID_PTR ((void *)0x03) + +static umf_result_t alloc_cb(void *provider, size_t size, size_t alignment, + void **ptr) { + return umfMemoryProviderAlloc((umf_memory_provider_handle_t)provider, size, + alignment, ptr); +} + +static umf_result_t free_cb(void *provider, void *ptr, size_t size) { + return umfMemoryProviderFree((umf_memory_provider_handle_t)provider, ptr, + size); +} + +static umf_result_t split_cb(void *provider, void *ptr, size_t totalSize, + size_t firstSize) { + if (provider == NULL || ptr == NULL || (firstSize >= totalSize) || + firstSize == 0 || totalSize == 0) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t merge_cb(void *provider, void *lowPtr, void *highPtr, + size_t totalSize) { + if (provider == NULL || lowPtr == NULL || highPtr == NULL || + totalSize == 0 || ((uintptr_t)highPtr <= (uintptr_t)lowPtr) || + ((uintptr_t)highPtr - (uintptr_t)lowPtr >= totalSize)) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t alloc_cb_fails(void *provider, size_t size, + size_t alignment, void **ptr) { + (void)provider; //unused + (void)size; //unused + (void)alignment; //unused + (void)ptr; //unused + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; +} + +static umf_result_t free_cb_fails(void *provider, void *ptr, size_t size) { + (void)provider; //unused + (void)ptr; //unused + (void)size; //unused + return UMF_RESULT_ERROR_USER_SPECIFIC; +} + +static umf_result_t split_cb_fails(void *provider, void *ptr, size_t totalSize, + size_t firstSize) { + (void)provider; //unused + (void)ptr; //unused + (void)totalSize; //unused + (void)firstSize; //unused + return UMF_RESULT_ERROR_USER_SPECIFIC; +} + +static umf_result_t merge_cb_fails(void *provider, void *lowPtr, void *highPtr, + size_t totalSize) { + (void)provider; //unused + (void)lowPtr; //unused + (void)highPtr; //unused + (void)totalSize; //unused + return UMF_RESULT_ERROR_USER_SPECIFIC; +} + +static void coarse_params_set_default(coarse_params_t *coarse_params, + umf_memory_provider_handle_t provider, + coarse_strategy_t allocation_strategy) { + memset(coarse_params, 0, sizeof(*coarse_params)); + coarse_params->provider = provider; + coarse_params->allocation_strategy = allocation_strategy; + coarse_params->cb.split = split_cb; + coarse_params->cb.merge = merge_cb; + coarse_params->page_size = utils_get_page_size(); + + if (provider) { + coarse_params->cb.alloc = alloc_cb; + coarse_params->cb.free = free_cb; + } +} + +umf_memory_provider_ops_t UMF_MALLOC_MEMORY_PROVIDER_OPS = + umf_test::providerMakeCOps(); + +struct CoarseWithMemoryStrategyTest + : umf_test::test, + ::testing::WithParamInterface { + void SetUp() override { + test::SetUp(); + allocation_strategy = this->GetParam(); + coarse_params_set_default(&coarse_params, MOCKED_PROVIDER, + allocation_strategy); + } + + coarse_t *coarse_handle = nullptr; + coarse_params_t coarse_params; + coarse_strategy_t allocation_strategy; + umf_result_t umf_result; +}; + +INSTANTIATE_TEST_SUITE_P( + CoarseWithMemoryStrategyTest, CoarseWithMemoryStrategyTest, + ::testing::Values(UMF_COARSE_MEMORY_STRATEGY_FASTEST, + UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE, + UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE)); + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_basic_provider) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + const size_t alloc_size = 20 * MB; + void *ptr; + + umf_result = coarse_add_memory_from_provider(ch, alloc_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + umf_result = coarse_alloc(ch, 2 * MB, 0, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + umf_result = coarse_free(ch, ptr, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // test double free + umf_result = coarse_free(ch, ptr, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + coarse_delete(ch); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_basic_fixed_memory) { + // preallocate some memory and initialize the vector with zeros + const size_t buff_size = 20 * MB + coarse_params.page_size; + std::vector buffer(buff_size, 0); + void *buf = (void *)ALIGN_UP_SAFE((uintptr_t)buffer.data(), + coarse_params.page_size); + ASSERT_NE(buf, nullptr); + + coarse_params.cb.alloc = NULL; + coarse_params.cb.free = NULL; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + char *ptr = nullptr; + + umf_result = coarse_add_memory_fixed(ch, buf, buff_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + umf_result = coarse_alloc(ch, 2 * MB, 0, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + umf_result = coarse_free(ch, ptr, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // test double free + umf_result = coarse_free(ch, ptr, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + coarse_delete(ch); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_fixed_memory_various) { + // preallocate some memory and initialize the vector with zeros + const size_t buff_size = 20 * MB + coarse_params.page_size; + std::vector buffer(buff_size, 0); + void *buf = (void *)ALIGN_UP_SAFE((uintptr_t)buffer.data(), + coarse_params.page_size); + ASSERT_NE(buf, nullptr); + + coarse_params.cb.alloc = NULL; + coarse_params.cb.free = NULL; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + char *ptr = nullptr; + + umf_result = coarse_add_memory_fixed(ch, buf, buff_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // free NULL + umf_result = coarse_free(ch, nullptr, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + // free invalid pointer + umf_result = coarse_free(ch, INVALID_PTR, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // wrong alignment (3 bytes) + ptr = nullptr; + umf_result = coarse_alloc(ch, 2 * MB, 3, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ALIGNMENT); + ASSERT_EQ(ptr, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // not freed allocation + // coarse_delete() prints LOG_WARN() in Debug mode + umf_result = coarse_alloc(ch, 2 * MB, 0, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + coarse_delete(ch); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_split_merge) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + char *ptr = nullptr; + const size_t alloc_size = 20 * MB; + + umf_result = coarse_add_memory_from_provider(ch, alloc_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + /* test coarse_split */ + umf_result = coarse_alloc(ch, 2 * MB, 0, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + umf_result = coarse_split(ch, ptr, 2 * MB, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + umf_result = coarse_free(ch, (ptr + 1 * MB), 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 1 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + umf_result = coarse_free(ch, ptr, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + /* test coarse_merge */ + umf_result = coarse_alloc(ch, 2 * MB, 0, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + umf_result = coarse_split(ch, ptr, 2 * MB, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + umf_result = coarse_merge(ch, ptr, (ptr + 1 * MB), 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + umf_result = coarse_free(ch, ptr, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + coarse_delete(coarse_handle); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +// negative tests + +// NULL parameters +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_no_params) { + umf_result = coarse_new(nullptr, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_handle, nullptr); +} + +// no provider +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_no_provider) { + coarse_params.provider = NULL; + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_handle, nullptr); +} + +// no page size +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_no_page_size) { + coarse_params.page_size = 0; + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_handle, nullptr); +} + +// no split callback +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_no_split_cb) { + coarse_params.cb.split = NULL; + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_handle, nullptr); +} + +// no merge callback +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_no_merge_cb) { + coarse_params.cb.merge = NULL; + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_handle, nullptr); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_alloc_invalid) { + void *ptr = nullptr; + + umf_result = coarse_alloc(nullptr, MB, 0, nullptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(ptr, nullptr); + + umf_result = coarse_alloc(nullptr, MB, 0, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(ptr, nullptr); + + umf_result = coarse_alloc(MOCKED_COARSE, MB, 0, nullptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(ptr, nullptr); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_free_invalid) { + // coarse handle is NULL + umf_result = coarse_free(nullptr, nullptr, MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_delete_null) { + coarse_delete(nullptr); +} + +TEST_P(CoarseWithMemoryStrategyTest, + coarseTest_add_memory_from_provider_null_0) { + umf_result = coarse_add_memory_from_provider(nullptr, 0); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_add_memory_fixed_null_0) { + umf_result = coarse_add_memory_fixed(nullptr, nullptr, 0); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_null_stats) { + ASSERT_EQ(coarse_get_stats(nullptr).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(nullptr).used_size, 0); + ASSERT_EQ(coarse_get_stats(nullptr).num_all_blocks, 0); + ASSERT_EQ(coarse_get_stats(nullptr).num_free_blocks, 0); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_split_merge_negative) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + char *ptr = nullptr; + const size_t alloc_size = 20 * MB; + + umf_result = coarse_add_memory_from_provider(ch, alloc_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + /* test coarse_split */ + + umf_result = coarse_alloc(ch, 6 * MB, 0, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 6 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + // firstSize >= totalSize + umf_result = coarse_split(ch, ptr, 6 * MB, 6 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // firstSize == 0 + umf_result = coarse_split(ch, ptr, 6 * MB, 0); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // totalSize == 0 + umf_result = coarse_split(ch, ptr, 0, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // wrong totalSize + umf_result = coarse_split(ch, ptr, 5 * MB, 1 * KB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // memory block not found + umf_result = coarse_split(ch, ptr + 1, 6 * MB, 1 * KB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umf_result = coarse_free(ch, ptr, 6 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // split freed block + umf_result = coarse_split(ch, ptr, alloc_size, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + /* test coarse_merge */ + + umf_result = coarse_alloc(ch, 6 * MB, 0, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 6 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + // split (6 * MB) block into (1 * MB) + (5 * MB) + umf_result = coarse_split(ch, ptr, 6 * MB, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 6 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + // split (5 * MB) block into (2 * MB) + (3 * MB) + umf_result = coarse_split(ch, (ptr + 1 * MB), 5 * MB, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 6 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 4); + + // now we have 3 used blocks: (1 * MB) + (2 * MB) + (3 * MB) + + // highPtr <= lowPtr + umf_result = coarse_merge(ch, (ptr + 1 * MB), ptr, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // highPtr - lowPtr >= totalSize + umf_result = coarse_merge(ch, ptr, (ptr + 1 * MB), 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // low ptr does not exist + umf_result = coarse_merge(ch, ptr + 1, (ptr + 1 * MB), 3 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // high ptr does not exist + umf_result = coarse_merge(ch, ptr, (ptr + 1 * MB + 1), 3 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // low_block->size + high_block->size != totalSize + umf_result = coarse_merge(ch, ptr, (ptr + 1 * MB), 5 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // not adjacent blocks + umf_result = coarse_merge(ch, ptr, (ptr + 3 * MB), 4 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // free the 2 MB block in the middle + umf_result = coarse_free(ch, (ptr + 1 * MB), 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 4 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 4); + + // now we have 3 blocks: (1 * MB) used + (2 * MB) freed + (3 * MB) used + + // the low ptr block is not allocated + umf_result = coarse_merge(ch, (ptr + 1 * MB), (ptr + 3 * MB), 5 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // the high ptr block is not allocated + umf_result = coarse_merge(ch, ptr, (ptr + 1 * MB), 3 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umf_result = coarse_free(ch, ptr, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 3 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + umf_result = coarse_free(ch, (ptr + 3 * MB), 3 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + coarse_delete(coarse_handle); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_basic_alloc_cb_fails) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + coarse_params.cb.alloc = alloc_cb_fails; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + const size_t alloc_size = 20 * MB; + + umf_result = coarse_add_memory_from_provider(ch, alloc_size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + coarse_delete(ch); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_basic_free_cb_fails) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + coarse_params.cb.free = free_cb_fails; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + const size_t alloc_size = 20 * MB; + + umf_result = coarse_add_memory_from_provider(ch, alloc_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + coarse_delete(ch); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_split_cb_fails) { + if (coarse_params.allocation_strategy == + UMF_COARSE_MEMORY_STRATEGY_FASTEST) { + // This test is designed for the UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE + // and UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE strategies, + // because the UMF_COARSE_MEMORY_STRATEGY_FASTEST strategy + // looks always for a block of size greater by the page size. + return; + } + + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + coarse_params.cb.split = split_cb_fails; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + void *ptr = nullptr; + const size_t alloc_size = 20 * MB; + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + umf_result = coarse_add_memory_from_provider(ch, alloc_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // coarse_alloc(alloc_size / 2, alignment = 0) + umf_result = coarse_alloc(ch, alloc_size / 2, 0, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_USER_SPECIFIC); + ASSERT_EQ(ptr, nullptr); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // coarse_alloc(alloc_size / 2, alignment = 2 * MB) + umf_result = coarse_alloc(ch, alloc_size / 2, 2 * MB, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_USER_SPECIFIC); + ASSERT_EQ(ptr, nullptr); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // coarse_alloc(alloc_size, alignment = 0) - OK + umf_result = coarse_alloc(ch, alloc_size, 0, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + + ASSERT_EQ(coarse_get_stats(ch).used_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + umf_result = coarse_split(ch, ptr, alloc_size, alloc_size / 2); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_USER_SPECIFIC); + + ASSERT_EQ(coarse_get_stats(ch).used_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + umf_result = coarse_free(ch, ptr, alloc_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + coarse_delete(coarse_handle); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_merge_cb_fails) { + // preallocate some memory and initialize the vector with zeros + const size_t buff_size = 10 * MB + coarse_params.page_size; + std::vector buffer(buff_size, 0); + void *buf = (void *)ALIGN_UP_SAFE((uintptr_t)buffer.data(), + coarse_params.page_size); + ASSERT_NE(buf, nullptr); + + coarse_params.cb.alloc = NULL; + coarse_params.cb.free = NULL; + coarse_params.cb.merge = merge_cb_fails; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + char *ptr = nullptr; + + umf_result = coarse_add_memory_fixed(ch, buf, buff_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + /* test coarse_merge */ + umf_result = coarse_alloc(ch, 3 * MB, 0, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 3 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + umf_result = coarse_split(ch, ptr, 3 * MB, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 3 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + umf_result = coarse_merge(ch, ptr, (ptr + 1 * MB), 3 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_USER_SPECIFIC); + ASSERT_EQ(coarse_get_stats(ch).used_size, 3 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + umf_result = coarse_free(ch, ptr, 3 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_get_stats(ch).used_size, 3 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + umf_result = coarse_free(ch, ptr, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + umf_result = coarse_free(ch, (ptr + 1 * MB), 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + coarse_delete(coarse_handle); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_fixed_memory_alloc_set) { + // preallocate some memory and initialize the vector with zeros + const size_t buff_size = 20 * MB; + std::vector buffer(buff_size, 0); + void *buf = (void *)buffer.data(); + ASSERT_NE(buf, nullptr); + + coarse_params.cb.free = NULL; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + umf_result = coarse_add_memory_fixed(ch, buf, buff_size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + coarse_delete(ch); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_fixed_memory_free_set) { + // preallocate some memory and initialize the vector with zeros + const size_t buff_size = 20 * MB; + std::vector buffer(buff_size, 0); + void *buf = (void *)buffer.data(); + ASSERT_NE(buf, nullptr); + + coarse_params.cb.alloc = NULL; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + umf_result = coarse_add_memory_fixed(ch, buf, buff_size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + coarse_delete(ch); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_fixed_memory_alloc_free_set) { + // preallocate some memory and initialize the vector with zeros + const size_t buff_size = 20 * MB; + std::vector buffer(buff_size, 0); + void *buf = (void *)buffer.data(); + ASSERT_NE(buf, nullptr); + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + umf_result = coarse_add_memory_fixed(ch, buf, buff_size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + coarse_delete(ch); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_provider_alloc_not_set) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + coarse_params.cb.alloc = NULL; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + const size_t alloc_size = 20 * MB; + void *ptr; + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + umf_result = coarse_add_memory_from_provider(ch, alloc_size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + umf_result = coarse_alloc(ch, 2 * MB, 0, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY); + ASSERT_EQ(ptr, nullptr); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + umf_result = coarse_alloc(ch, 2 * MB, 2 * MB, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY); + ASSERT_EQ(ptr, nullptr); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + coarse_delete(ch); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_basic) { + if (coarse_params.allocation_strategy == + UMF_COARSE_MEMORY_STRATEGY_FASTEST) { + // This test is designed for the UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE + // and UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE strategies, + // because the UMF_COARSE_MEMORY_STRATEGY_FASTEST strategy + // looks always for a block of size greater by the page size. + return; + } + + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + const size_t init_buffer_size = 20 * MB; + void *p1, *p2; + + umf_result = coarse_add_memory_from_provider(ch, init_buffer_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // alloc 2x 2MB + umf_result = coarse_alloc(ch, 2 * MB, 0, (void **)&p1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(p1, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + umf_result = coarse_alloc(ch, 2 * MB, 0, (void **)&p2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(p2, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 4 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + ASSERT_NE(p1, p2); + + // swap pointers to get p1 < p2 + if (p1 > p2) { + std::swap(p1, p2); + } + + // free + alloc first block + // the block should be reused + // currently there is no purging, so the alloc size shouldn't change + // there should be no block merging between used and not-used blocks + umf_result = coarse_free(ch, p1, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + umf_result = coarse_alloc(ch, 2 * MB, 0, (void **)&p1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(p1, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 4 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + // free all allocs + // overall alloc size shouldn't change + // block p2 should merge with the prev free block p1 + // and the remaining init block + umf_result = coarse_free(ch, p1, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + umf_result = coarse_free(ch, p2, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // test allocations with alignment + // TODO: what about holes? + umf_result = coarse_alloc(ch, 1 * MB - 4, 128, (void **)&p1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(p1, nullptr); + ASSERT_EQ((uintptr_t)p1 & 127, 0); + + umf_result = coarse_alloc(ch, 1 * MB - 4, 128, (void **)&p2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(p2, nullptr); + ASSERT_EQ((uintptr_t)p2 & 127, 0); + + umf_result = coarse_free(ch, p1, 1 * MB - 4); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = coarse_free(ch, p2, 1 * MB - 4); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + // alloc whole buffer + // after this, there should be one single block + umf_result = coarse_alloc(ch, init_buffer_size, 0, (void **)&p1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(p1, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // free all memory + umf_result = coarse_free(ch, p1, init_buffer_size); + + // alloc 2 MB block - the init block should be split + umf_result = coarse_alloc(ch, 2 * MB, 0, (void **)&p1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(p1, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + // alloc additional 2 MB + // the non-used block should be used + umf_result = coarse_alloc(ch, 2 * MB, 0, (void **)&p2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(p2, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 4 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + ASSERT_NE(p1, p2); + + // make sure that p1 < p2 + if (p1 > p2) { + std::swap(p1, p2); + } + + // free blocks in order: p2, p1 + // block p1 should merge with the next block p2 + // swap pointers to get p1 < p2 + coarse_free(ch, p2, 2 * MB); + coarse_free(ch, p1, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // alloc 10x 2 MB - this should occupy all allocated memory + constexpr int allocs_size = 10; + void *allocs[allocs_size] = {0}; + for (int i = 0; i < allocs_size; i++) { + ASSERT_EQ(coarse_get_stats(ch).used_size, i * 2 * MB); + umf_result = coarse_alloc(ch, 2 * MB, 0, &allocs[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(allocs[i], nullptr); + } + ASSERT_EQ(coarse_get_stats(ch).used_size, 20 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + // there should be no block with the free memory + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, allocs_size); + + // free all memory + for (int i = 0; i < allocs_size; i++) { + umf_result = coarse_free(ch, allocs[i], 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + + coarse_delete(ch); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_simple1) { + if (coarse_params.allocation_strategy == + UMF_COARSE_MEMORY_STRATEGY_FASTEST) { + // This test is designed for the UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE + // and UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE strategies, + // because the UMF_COARSE_MEMORY_STRATEGY_FASTEST strategy + // looks always for a block of size greater by the page size. + return; + } + + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + const size_t init_buffer_size = 20 * MB; + + umf_result = coarse_add_memory_from_provider(ch, init_buffer_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // test 1 + + size_t s1 = 74659 * KB; + size_t s2 = 8206 * KB; + + size_t max_alloc_size = 0; + + const int nreps = 2; + const int nptrs = 6; + + // s1 + for (int j = 0; j < nreps; j++) { + void *t[nptrs] = {0}; + for (int i = 0; i < nptrs; i++) { + umf_result = coarse_alloc(ch, s1, 0, &t[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(t[i], nullptr); + } + + size_t alloc_size = coarse_get_stats(ch).alloc_size; + if (alloc_size > max_alloc_size) { + max_alloc_size = alloc_size; + } + + for (int i = 0; i < nptrs; i++) { + umf_result = coarse_free(ch, t[i], s1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + } + + // s2 + for (int j = 0; j < nreps; j++) { + void *t[nptrs] = {0}; + for (int i = 0; i < nptrs; i++) { + umf_result = coarse_alloc(ch, s2, 0, &t[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(t[i], nullptr); + } + + // all s2 should fit into single block leaved after freeing s1 + ASSERT_LE(coarse_get_stats(ch).alloc_size, max_alloc_size); + + for (int i = 0; i < nptrs; i++) { + umf_result = coarse_free(ch, t[i], s2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + } + + coarse_delete(ch); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_simple2) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + const size_t init_buffer_size = 20 * MB; + + umf_result = coarse_add_memory_from_provider(ch, init_buffer_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // test + double sizes[] = {2, 4, 0.5, 1, 8, 0.25}; + size_t alignment[] = {0, 4, 0, 16, 32, 128}; + for (int i = 0; i < 6; i++) { + size_t s = (size_t)(sizes[i] * MB); + void *t[8] = {0}; + for (int j = 0; j < 8; j++) { + umf_result = coarse_alloc(ch, s, alignment[i], &t[j]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(t[j], nullptr); + } + + for (int j = 0; j < 8; j++) { + umf_result = coarse_free(ch, t[j], s); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + } + + coarse_delete(ch); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_alignment_provider) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + const size_t alloc_size = 40 * MB; + + umf_result = coarse_add_memory_from_provider(ch, alloc_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + const int niter = 10; + const int size = 1 * MB; + void *ptr[niter] = {0}; + + for (int i = 0; i < niter; i++) { + umf_result = coarse_alloc(ch, size, 0, &ptr[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr[i], nullptr); + } + + ASSERT_EQ(coarse_get_stats(ch).used_size, niter * size); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, niter + 1); + + for (int i = 0; i < niter; i += 2) { + umf_result = coarse_free(ch, ptr[i], size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ptr[i] = nullptr; + } + + ASSERT_EQ(coarse_get_stats(ch).used_size, niter * size / 2); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, niter + 1); + + for (int i = 0; i < niter; i += 2) { + ASSERT_EQ(ptr[i], nullptr); + umf_result = coarse_alloc(ch, size, 2 * MB, &ptr[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr[i], nullptr); + } + + ASSERT_EQ(coarse_get_stats(ch).used_size, niter * size); + + for (int i = 0; i < niter; i++) { + umf_result = coarse_free(ch, ptr[i], size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + coarse_delete(ch); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_alignment_fixed_memory) { + // preallocate some memory and initialize the vector with zeros + const size_t alloc_size = 40 * MB + coarse_params.page_size; + std::vector buffer(alloc_size, 0); + void *buf = (void *)ALIGN_UP_SAFE((uintptr_t)buffer.data(), + coarse_params.page_size); + ASSERT_NE(buf, nullptr); + + coarse_params.cb.alloc = NULL; + coarse_params.cb.free = NULL; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + + umf_result = coarse_add_memory_fixed(ch, buf, alloc_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + const int niter = 10; + const int size = 1 * MB; + void *ptr[niter] = {0}; + + for (int i = 0; i < niter; i++) { + umf_result = coarse_alloc(ch, size, 0, &ptr[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr[i], nullptr); + } + + ASSERT_EQ(coarse_get_stats(ch).used_size, niter * size); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, niter + 1); + + for (int i = 0; i < niter; i += 2) { + umf_result = coarse_free(ch, ptr[i], size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ptr[i] = nullptr; + } + + ASSERT_EQ(coarse_get_stats(ch).used_size, niter * size / 2); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, niter + 1); + + for (int i = 0; i < niter; i += 2) { + ASSERT_EQ(ptr[i], nullptr); + umf_result = coarse_alloc(ch, size, 2 * MB, &ptr[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr[i], nullptr); + } + + ASSERT_EQ(coarse_get_stats(ch).used_size, niter * size); + + for (int i = 0; i < niter; i++) { + umf_result = coarse_free(ch, ptr[i], size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + coarse_delete(ch); +} + +TEST_P(CoarseWithMemoryStrategyTest, + coarseTest_basic_non_aligned_fixed_memory) { + // preallocate some memory and initialize the vector with zeros + const size_t buff_size = 20 * MB + coarse_params.page_size; + std::vector buffer(buff_size, 0); + + void *buf_aligned = (void *)ALIGN_UP_SAFE((uintptr_t)buffer.data(), + coarse_params.page_size); + ASSERT_NE(buf_aligned, nullptr); + + void *buf_non_aligned = (void *)((uintptr_t)buf_aligned + 64); + size_t buf_non_aligned_size = + buff_size - ((uintptr_t)buf_non_aligned - (uintptr_t)buffer.data()); + buf_non_aligned_size = + ALIGN_DOWN(buf_non_aligned_size, coarse_params.page_size); + + coarse_params.cb.alloc = NULL; + coarse_params.cb.free = NULL; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + char *ptr = nullptr; + + umf_result = + coarse_add_memory_fixed(ch, buf_non_aligned, buf_non_aligned_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buf_non_aligned_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + umf_result = coarse_alloc(ch, buf_non_aligned_size, 0, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY); + ASSERT_EQ(ptr, nullptr); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buf_non_aligned_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + coarse_delete(ch); +} diff --git a/test/common/ipc_common.c b/test/common/ipc_common.c index 140927079b..bf116a6779 100644 --- a/test/common/ipc_common.c +++ b/test/common/ipc_common.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -126,8 +127,7 @@ int run_consumer(int port, umf_memory_pool_ops_t *pool_ops, void *pool_params, umf_result = umfMemoryProviderCreate(provider_ops, provider_params, &provider); if (umf_result != UMF_RESULT_SUCCESS) { - fprintf(stderr, - "[consumer] ERROR: creating OS memory provider failed\n"); + fprintf(stderr, "[consumer] ERROR: creating memory provider failed\n"); return -1; } @@ -336,12 +336,17 @@ int run_producer(int port, umf_memory_pool_ops_t *pool_ops, void *pool_params, int producer_socket = -1; char consumer_message[MSG_SIZE]; + ret = prctl(PR_SET_PTRACER, getppid()); + if (ret == -1) { + perror("PR_SET_PTRACER may be not supported. prctl() call failed"); + goto err_end; + } + // create OS memory provider umf_result = umfMemoryProviderCreate(provider_ops, provider_params, &provider); if (umf_result != UMF_RESULT_SUCCESS) { - fprintf(stderr, - "[producer] ERROR: creating OS memory provider failed\n"); + fprintf(stderr, "[producer] ERROR: creating memory provider failed\n"); return -1; } @@ -528,6 +533,7 @@ int run_producer(int port, umf_memory_pool_ops_t *pool_ops, void *pool_params, err_umfMemoryProviderDestroy: umfMemoryProviderDestroy(provider); +err_end: if (ret == 0) { fprintf(stderr, "[producer] Shutting down (status OK) ...\n"); } else if (ret == 1) { diff --git a/test/common/pool.hpp b/test/common/pool.hpp index 9a5739085a..558b9d665a 100644 --- a/test/common/pool.hpp +++ b/test/common/pool.hpp @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -19,10 +19,11 @@ #include #include +#include #include "base.hpp" -#include "cpp_helpers.hpp" #include "provider.hpp" +#include "utils/cpp_helpers.hpp" namespace umf_test { @@ -37,7 +38,7 @@ createPoolChecked(umf_memory_pool_ops_t *ops, } auto wrapPoolUnique(umf_memory_pool_handle_t hPool) { - return umf::pool_unique_handle_t(hPool, &umfPoolDestroy); + return umf_test::pool_unique_handle_t(hPool, &umfPoolDestroy); } bool isReallocSupported(umf_memory_pool_handle_t hPool) { @@ -148,7 +149,50 @@ struct malloc_pool : public pool_base_t { }; umf_memory_pool_ops_t MALLOC_POOL_OPS = - umf::poolMakeCOps(); + umf_test::poolMakeCOps(); + +static constexpr size_t DEFAULT_DISJOINT_SLAB_MIN_SIZE = 4096; +static constexpr size_t DEFAULT_DISJOINT_MAX_POOLABLE_SIZE = 4096; +static constexpr size_t DEFAULT_DISJOINT_CAPACITY = 4; +static constexpr size_t DEFAULT_DISJOINT_MIN_BUCKET_SIZE = 64; + +inline void *defaultDisjointPoolConfig() { + umf_disjoint_pool_params_handle_t config = nullptr; + umf_result_t res = umfDisjointPoolParamsCreate(&config); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error("Failed to create pool params"); + } + res = umfDisjointPoolParamsSetSlabMinSize(config, + DEFAULT_DISJOINT_SLAB_MIN_SIZE); + if (res != UMF_RESULT_SUCCESS) { + umfDisjointPoolParamsDestroy(config); + throw std::runtime_error("Failed to set slab min size"); + } + res = umfDisjointPoolParamsSetMaxPoolableSize( + config, DEFAULT_DISJOINT_MAX_POOLABLE_SIZE); + if (res != UMF_RESULT_SUCCESS) { + umfDisjointPoolParamsDestroy(config); + throw std::runtime_error("Failed to set max poolable size"); + } + res = umfDisjointPoolParamsSetCapacity(config, DEFAULT_DISJOINT_CAPACITY); + if (res != UMF_RESULT_SUCCESS) { + umfDisjointPoolParamsDestroy(config); + throw std::runtime_error("Failed to set capacity"); + } + res = umfDisjointPoolParamsSetMinBucketSize( + config, DEFAULT_DISJOINT_MIN_BUCKET_SIZE); + if (res != UMF_RESULT_SUCCESS) { + umfDisjointPoolParamsDestroy(config); + throw std::runtime_error("Failed to set min bucket size"); + } + + return config; +} + +inline umf_result_t defaultDisjointPoolConfigDestroy(void *config) { + return umfDisjointPoolParamsDestroy( + static_cast(config)); +} } // namespace umf_test diff --git a/test/common/pool_null.c b/test/common/pool_null.c index c34bcfc169..40d6626797 100644 --- a/test/common/pool_null.c +++ b/test/common/pool_null.c @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -64,7 +64,7 @@ static umf_result_t nullGetLastStatus(void *pool) { } umf_memory_pool_ops_t UMF_NULL_POOL_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_POOL_OPS_VERSION_CURRENT, .initialize = nullInitialize, .finalize = nullFinalize, .malloc = nullMalloc, diff --git a/test/common/pool_trace.c b/test/common/pool_trace.c index 29329f31c0..9a9e010193 100644 --- a/test/common/pool_trace.c +++ b/test/common/pool_trace.c @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -90,7 +90,7 @@ static umf_result_t traceGetLastStatus(void *pool) { } umf_memory_pool_ops_t UMF_TRACE_POOL_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_POOL_OPS_VERSION_CURRENT, .initialize = traceInitialize, .finalize = traceFinalize, .malloc = traceMalloc, diff --git a/test/common/provider.hpp b/test/common/provider.hpp index 148f34dc89..38fe7336ec 100644 --- a/test/common/provider.hpp +++ b/test/common/provider.hpp @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -15,8 +15,8 @@ #include "base.hpp" #include "base_alloc_global.h" -#include "cpp_helpers.hpp" #include "test_helpers.h" +#include "utils/cpp_helpers.hpp" namespace umf_test { @@ -29,7 +29,8 @@ createProviderChecked(umf_memory_provider_ops_t *ops, void *params) { } auto wrapProviderUnique(umf_memory_provider_handle_t hProvider) { - return umf::provider_unique_handle_t(hProvider, &umfMemoryProviderDestroy); + return umf_test::provider_unique_handle_t(hProvider, + &umfMemoryProviderDestroy); } typedef struct provider_base_t { @@ -97,7 +98,7 @@ typedef struct provider_base_t { } provider_base_t; umf_memory_provider_ops_t BASE_PROVIDER_OPS = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); struct provider_ba_global : public provider_base_t { umf_result_t alloc(size_t size, size_t align, void **ptr) noexcept { @@ -127,7 +128,7 @@ struct provider_ba_global : public provider_base_t { }; umf_memory_provider_ops_t BA_GLOBAL_PROVIDER_OPS = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); struct provider_mock_out_of_mem : public provider_base_t { provider_ba_global helper_prov; @@ -152,7 +153,7 @@ struct provider_mock_out_of_mem : public provider_base_t { }; umf_memory_provider_ops_t MOCK_OUT_OF_MEM_PROVIDER_OPS = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); } // namespace umf_test diff --git a/test/common/provider_null.c b/test/common/provider_null.c index 5db389e895..b4e54f9764 100644 --- a/test/common/provider_null.c +++ b/test/common/provider_null.c @@ -1,4 +1,4 @@ -// Copyright (C) 2023-2024 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -130,15 +130,15 @@ static umf_result_t nullCloseIpcHandle(void *provider, void *ptr, size_t size) { } umf_memory_provider_ops_t UMF_NULL_PROVIDER_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_PROVIDER_OPS_VERSION_CURRENT, .initialize = nullInitialize, .finalize = nullFinalize, .alloc = nullAlloc, + .free = nullFree, .get_last_native_error = nullGetLastError, .get_recommended_page_size = nullGetRecommendedPageSize, .get_min_page_size = nullGetPageSize, .get_name = nullName, - .ext.free = nullFree, .ext.purge_lazy = nullPurgeLazy, .ext.purge_force = nullPurgeForce, .ext.allocation_merge = nullAllocationMerge, diff --git a/test/common/provider_trace.c b/test/common/provider_trace.c index 219dde5cd7..20f44e8683 100644 --- a/test/common/provider_trace.c +++ b/test/common/provider_trace.c @@ -1,4 +1,4 @@ -// Copyright (C) 2023-2024 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -191,15 +191,15 @@ static umf_result_t traceCloseIpcHandle(void *provider, void *ptr, } umf_memory_provider_ops_t UMF_TRACE_PROVIDER_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_PROVIDER_OPS_VERSION_CURRENT, .initialize = traceInitialize, .finalize = traceFinalize, .alloc = traceAlloc, + .free = traceFree, .get_last_native_error = traceGetLastError, .get_recommended_page_size = traceGetRecommendedPageSize, .get_min_page_size = traceGetPageSize, .get_name = traceName, - .ext.free = traceFree, .ext.purge_lazy = tracePurgeLazy, .ext.purge_force = tracePurgeForce, .ext.allocation_merge = traceAllocationMerge, diff --git a/test/common/test_helpers.c b/test/common/test_helpers.c index 71f018d0f7..d69ca35353 100644 --- a/test/common/test_helpers.c +++ b/test/common/test_helpers.c @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // This file contains tests for UMF pool API diff --git a/test/ctl/config.txt b/test/ctl/config.txt new file mode 100644 index 0000000000..5d4f9c62bd --- /dev/null +++ b/test/ctl/config.txt @@ -0,0 +1 @@ +debug.heap.alloc_pattern=321 \ No newline at end of file diff --git a/test/ctl/ctl_debug.c b/test/ctl/ctl_debug.c new file mode 100644 index 0000000000..711cb5e179 --- /dev/null +++ b/test/ctl/ctl_debug.c @@ -0,0 +1,126 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +/* + * ctl_debug.c -- implementation of the debug CTL namespace + */ + +#include "ctl_debug.h" + +static struct ctl *ctl_debug; + +static int alloc_pattern = 0; +static int enable_logging = 0; +static int log_level = 0; + +struct ctl *get_debug_ctl(void) { return ctl_debug; } + +/* + * CTL_WRITE_HANDLER(alloc_pattern) -- sets the alloc_pattern field in heap + */ +static int CTL_WRITE_HANDLER(alloc_pattern)(void *ctx, + enum ctl_query_source source, + void *arg, + struct ctl_index_utlist *indexes) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)ctx; + + int arg_in = *(int *)arg; + alloc_pattern = arg_in; + return 0; +} + +/* + * CTL_READ_HANDLER(alloc_pattern) -- returns alloc_pattern heap field + */ +static int CTL_READ_HANDLER(alloc_pattern)(void *ctx, + enum ctl_query_source source, + void *arg, + struct ctl_index_utlist *indexes) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)ctx; + + int *arg_out = arg; + *arg_out = alloc_pattern; + return 0; +} + +static int CTL_WRITE_HANDLER(enable_logging)(void *ctx, + enum ctl_query_source source, + void *arg, + struct ctl_index_utlist *indexes) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)ctx; + + int arg_in = *(int *)arg; + enable_logging = arg_in; + return 0; +} + +static int CTL_READ_HANDLER(enable_logging)(void *ctx, + enum ctl_query_source source, + void *arg, + struct ctl_index_utlist *indexes) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)ctx; + + int *arg_out = arg; + *arg_out = enable_logging; + return 0; +} + +static int CTL_WRITE_HANDLER(log_level)(void *ctx, enum ctl_query_source source, + void *arg, + struct ctl_index_utlist *indexes) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)ctx; + + int arg_in = *(int *)arg; + log_level = arg_in; + return 0; +} + +static int CTL_READ_HANDLER(log_level)(void *ctx, enum ctl_query_source source, + void *arg, + struct ctl_index_utlist *indexes) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)ctx; + + int *arg_out = arg; + *arg_out = log_level; + return 0; +} + +static const struct ctl_argument CTL_ARG(alloc_pattern) = CTL_ARG_LONG_LONG; + +static const struct ctl_argument CTL_ARG(enable_logging) = CTL_ARG_BOOLEAN; + +static const struct ctl_argument CTL_ARG(log_level) = CTL_ARG_INT; + +static const struct ctl_node CTL_NODE(heap)[] = {CTL_LEAF_RW(alloc_pattern), + CTL_LEAF_RW(enable_logging), + CTL_LEAF_RW(log_level), + + CTL_NODE_END}; + +static const struct ctl_node CTL_NODE(debug)[] = {CTL_CHILD(heap), + + CTL_NODE_END}; + +/* + * debug_ctl_register -- registers ctl nodes for "debug" module + */ +void debug_ctl_register(struct ctl *ctl) { CTL_REGISTER_MODULE(ctl, debug); } + +void initialize_debug_ctl(void) { + ctl_debug = ctl_new(); + debug_ctl_register(ctl_debug); +} + +void deinitialize_debug_ctl(void) { ctl_delete(ctl_debug); } diff --git a/test/ctl/ctl_debug.h b/test/ctl/ctl_debug.h new file mode 100644 index 0000000000..9dd8bade5b --- /dev/null +++ b/test/ctl/ctl_debug.h @@ -0,0 +1,32 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +/* + * ctl_debug.h -- definitions for CTL test + */ + +#ifndef UMF_CTL_DEBUG_H +#define UMF_CTL_DEBUG_H 1 + +#include "../src/ctl/ctl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void debug_ctl_register(struct ctl *ctl); +struct ctl *get_debug_ctl(void); +void initialize_debug_ctl(void); +void deinitialize_debug_ctl(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/test/ctl/test.cpp b/test/ctl/test.cpp new file mode 100644 index 0000000000..c35759c673 --- /dev/null +++ b/test/ctl/test.cpp @@ -0,0 +1,93 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include "../common/base.hpp" +#include "ctl/ctl.h" +#include "ctl/ctl_debug.h" + +using namespace umf_test; + +TEST_F(test, ctl_debug_read_from_string) { + initialize_debug_ctl(); + auto ctl_handler = get_debug_ctl(); + ctl_load_config_from_string(ctl_handler, NULL, + "debug.heap.alloc_pattern=1"); + + int value = 0; + ctl_query(ctl_handler, NULL, CTL_QUERY_PROGRAMMATIC, + "debug.heap.alloc_pattern", CTL_QUERY_READ, &value); + ASSERT_EQ(value, 1); + + // Test setting alloc_pattern to 2 + ctl_load_config_from_string(ctl_handler, NULL, + "debug.heap.alloc_pattern=2"); + ctl_query(ctl_handler, NULL, CTL_QUERY_PROGRAMMATIC, + "debug.heap.alloc_pattern", CTL_QUERY_READ, &value); + ASSERT_EQ(value, 2); + + // Test setting alloc_pattern to 0 + ctl_load_config_from_string(ctl_handler, NULL, + "debug.heap.alloc_pattern=0"); + ctl_query(ctl_handler, NULL, CTL_QUERY_PROGRAMMATIC, + "debug.heap.alloc_pattern", CTL_QUERY_READ, &value); + ASSERT_EQ(value, 0); + + // Negative test: non-existent configuration + ASSERT_NE(ctl_query(ctl_handler, NULL, CTL_QUERY_PROGRAMMATIC, + "debug.heap.non_existent", CTL_QUERY_READ, &value), + 0); + + // Negative test: invalid path + ASSERT_NE(ctl_query(ctl_handler, NULL, CTL_QUERY_PROGRAMMATIC, + "invalid.path.alloc_pattern", CTL_QUERY_READ, &value), + 0); + + debug_ctl_register(ctl_handler); + deinitialize_debug_ctl(); +} + +int ctl_config_write_to_file(const char *filename, const char *data) { + FILE *file = fopen(filename == NULL ? "config.txt" : filename, "w+"); + if (file == NULL) { + return -1; + } + fputs(data, file); + fclose(file); + return 0; +} + +TEST_F(test, ctl_debug_read_from_file) { +#ifndef _WIN32 + ASSERT_EQ(ctl_config_write_to_file( + "config.txt", "debug.heap.alloc_pattern=321;\ndebug.heap." + "enable_logging=1;\ndebug.heap.log_level=5;\n"), + 0); + initialize_debug_ctl(); + auto ctl_handler = get_debug_ctl(); + ASSERT_EQ(ctl_load_config_from_file(ctl_handler, NULL, "config.txt"), 0); + + int value = 0; + ctl_query(ctl_handler, NULL, CTL_QUERY_PROGRAMMATIC, + "debug.heap.alloc_pattern", CTL_QUERY_READ, &value); + ASSERT_EQ(value, 321); + + value = 0; + ctl_query(ctl_handler, NULL, CTL_QUERY_PROGRAMMATIC, "debug.heap.log_level", + CTL_QUERY_READ, &value); + ASSERT_EQ(value, 5); + + value = 0; + ctl_query(ctl_handler, NULL, CTL_QUERY_PROGRAMMATIC, + "debug.heap.enable_logging", CTL_QUERY_READ, &value); + ASSERT_EQ(value, 1); + + debug_ctl_register(ctl_handler); + deinitialize_debug_ctl(); +#endif +} diff --git a/test/disjointCoarseMallocPool.cpp b/test/disjointCoarseMallocPool.cpp deleted file mode 100644 index 32e1d24f36..0000000000 --- a/test/disjointCoarseMallocPool.cpp +++ /dev/null @@ -1,580 +0,0 @@ -/* - * Copyright (C) 2023-2024 Intel Corporation - * - * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -*/ - -#include - -#include "provider.hpp" - -#include -#include - -using umf_test::KB; -using umf_test::MB; -using umf_test::test; - -#define GetStats umfCoarseMemoryProviderGetStats - -umf_memory_provider_ops_t UMF_MALLOC_MEMORY_PROVIDER_OPS = - umf::providerMakeCOps(); - -struct CoarseWithMemoryStrategyTest - : umf_test::test, - ::testing::WithParamInterface { - void SetUp() override { - test::SetUp(); - allocation_strategy = this->GetParam(); - } - - coarse_memory_provider_strategy_t allocation_strategy; -}; - -INSTANTIATE_TEST_SUITE_P( - CoarseWithMemoryStrategyTest, CoarseWithMemoryStrategyTest, - ::testing::Values(UMF_COARSE_MEMORY_STRATEGY_FASTEST, - UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE, - UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE)); - -TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMallocPool_basic) { - umf_memory_provider_handle_t malloc_memory_provider; - umf_result_t umf_result; - - umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, - &malloc_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(malloc_memory_provider, nullptr); - - const size_t init_buffer_size = 20 * MB; - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = - malloc_memory_provider; - coarse_memory_provider_params.destroy_upstream_memory_provider = true; - coarse_memory_provider_params.immediate_init_from_upstream = true; - coarse_memory_provider_params.init_buffer = nullptr; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(coarse_memory_provider, nullptr); - - umf_disjoint_pool_params_handle_t disjoint_pool_params = NULL; - umf_result = umfDisjointPoolParamsCreate(&disjoint_pool_params); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(disjoint_pool_params, nullptr); - umf_result = - umfDisjointPoolParamsSetSlabMinSize(disjoint_pool_params, 4096); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = - umfDisjointPoolParamsSetMaxPoolableSize(disjoint_pool_params, 4096); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = umfDisjointPoolParamsSetCapacity(disjoint_pool_params, 4); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = - umfDisjointPoolParamsSetMinBucketSize(disjoint_pool_params, 64); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = umfDisjointPoolParamsSetTrace(disjoint_pool_params, 1); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - - umf_memory_pool_handle_t pool; - umf_result = umfPoolCreate(umfDisjointPoolOps(), coarse_memory_provider, - disjoint_pool_params, - UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &pool); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(pool, nullptr); - - umf_result = umfDisjointPoolParamsDestroy(disjoint_pool_params); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - - // test - - umf_memory_provider_handle_t prov = NULL; - umf_result = umfPoolGetMemoryProvider(pool, &prov); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(prov, nullptr); - - // alloc 2x 2MB - void *p1 = umfPoolMalloc(pool, 2 * MB); - ASSERT_NE(p1, nullptr); - ASSERT_EQ(GetStats(prov).used_size, 2 * MB); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(prov).num_all_blocks, 2); - - void *p2 = umfPoolMalloc(pool, 2 * MB); - ASSERT_NE(p2, nullptr); - ASSERT_EQ(GetStats(prov).used_size, 4 * MB); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(prov).num_all_blocks, 3); - ASSERT_NE(p1, p2); - - // swap pointers to get p1 < p2 - if (p1 > p2) { - std::swap(p1, p2); - } - - // free + alloc first block - // the block should be reused - // currently there is no purging, so the alloc size shouldn't change - // there should be no block merging between used and not-used blocks - umf_result = umfPoolFree(pool, p1); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(prov).used_size, 2 * MB); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(prov).num_all_blocks, 3); - - p1 = umfPoolMalloc(pool, 2 * MB); - ASSERT_EQ(GetStats(prov).used_size, 4 * MB); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(prov).num_all_blocks, 3); - - // free all allocs - // overall alloc size shouldn't change - // block p2 should merge with the prev free block p1 - // and the remaining init block - umf_result = umfPoolFree(pool, p1); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(prov).num_all_blocks, 3); - umf_result = umfPoolFree(pool, p2); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(prov).used_size, 0 * MB); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(prov).num_all_blocks, 1); - - // test allocations with alignment - // TODO: what about holes? - p1 = umfPoolAlignedMalloc(pool, 1 * MB - 4, 128); - ASSERT_NE(p1, nullptr); - ASSERT_EQ((uintptr_t)p1 & 127, 0); - p2 = umfPoolAlignedMalloc(pool, 1 * MB - 4, 128); - ASSERT_NE(p2, nullptr); - ASSERT_EQ((uintptr_t)p1 & 127, 0); - umf_result = umfPoolFree(pool, p1); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = umfPoolFree(pool, p2); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - - // alloc whole buffer - // after this, there should be one single block - p1 = umfPoolMalloc(pool, init_buffer_size); - ASSERT_EQ(GetStats(prov).used_size, init_buffer_size); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(prov).num_all_blocks, 1); - - // free all memory - // alloc 2 MB block - the init block should be split - umf_result = umfPoolFree(pool, p1); - p1 = umfPoolMalloc(pool, 2 * MB); - ASSERT_EQ(GetStats(prov).used_size, 2 * MB); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(prov).num_all_blocks, 2); - - // alloc additional 2 MB - // the non-used block should be used - p2 = umfPoolMalloc(pool, 2 * MB); - ASSERT_EQ(GetStats(prov).used_size, 4 * MB); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(prov).num_all_blocks, 3); - ASSERT_NE(p1, p2); - - // make sure that p1 < p2 - if (p1 > p2) { - std::swap(p1, p2); - } - - // free blocks in order: p2, p1 - // block p1 should merge with the next block p2 - // swap pointers to get p1 < p2 - umfPoolFree(pool, p2); - umfPoolFree(pool, p1); - ASSERT_EQ(GetStats(prov).used_size, 0 * MB); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(prov).num_all_blocks, 1); - - // alloc 10x 2 MB - this should occupy all allocated memory - constexpr int allocs_size = 10; - void *allocs[allocs_size] = {0}; - for (int i = 0; i < allocs_size; i++) { - ASSERT_EQ(GetStats(prov).used_size, i * 2 * MB); - allocs[i] = umfPoolMalloc(pool, 2 * MB); - ASSERT_NE(allocs[i], nullptr); - } - ASSERT_EQ(GetStats(prov).used_size, 20 * MB); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - // there should be no block with the free memory - ASSERT_EQ(GetStats(prov).num_all_blocks, allocs_size); - - // free all memory - for (int i = 0; i < allocs_size; i++) { - umf_result = umfPoolFree(pool, allocs[i]); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - } - - ASSERT_EQ(GetStats(prov).num_all_blocks, 1); - ASSERT_EQ(GetStats(prov).used_size, 0 * MB); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - - umfPoolDestroy(pool); - // Both coarse_memory_provider and malloc_memory_provider - // have already been destroyed by umfPoolDestroy(), because: - // UMF_POOL_CREATE_FLAG_OWN_PROVIDER was set in umfPoolCreate() and - // coarse_memory_provider_params.destroy_upstream_memory_provider = true; -} - -TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMallocPool_simple1) { - umf_memory_provider_handle_t malloc_memory_provider; - umf_result_t umf_result; - - umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, - &malloc_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(malloc_memory_provider, nullptr); - - const size_t init_buffer_size = 20 * MB; - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = - malloc_memory_provider; - coarse_memory_provider_params.immediate_init_from_upstream = true; - coarse_memory_provider_params.init_buffer = NULL; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(coarse_memory_provider, nullptr); - - umf_disjoint_pool_params_handle_t disjoint_pool_params = NULL; - umf_result = umfDisjointPoolParamsCreate(&disjoint_pool_params); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(disjoint_pool_params, nullptr); - umf_result = - umfDisjointPoolParamsSetSlabMinSize(disjoint_pool_params, 4096); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = - umfDisjointPoolParamsSetMaxPoolableSize(disjoint_pool_params, 4096); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = umfDisjointPoolParamsSetCapacity(disjoint_pool_params, 4); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = - umfDisjointPoolParamsSetMinBucketSize(disjoint_pool_params, 64); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = umfDisjointPoolParamsSetTrace(disjoint_pool_params, 1); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - - umf_memory_pool_handle_t pool; - umf_result = - umfPoolCreate(umfDisjointPoolOps(), coarse_memory_provider, - disjoint_pool_params, UMF_POOL_CREATE_FLAG_NONE, &pool); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(pool, nullptr); - - umf_result = umfDisjointPoolParamsDestroy(disjoint_pool_params); - - umf_memory_provider_handle_t prov = NULL; - umfPoolGetMemoryProvider(pool, &prov); - ASSERT_NE(prov, nullptr); - - // test 1 - - size_t s1 = 74659 * KB; - size_t s2 = 8206 * KB; - - size_t max_alloc_size = 0; - - const int nreps = 2; - const int nptrs = 6; - - // s1 - for (int j = 0; j < nreps; j++) { - void *t[nptrs] = {0}; - for (int i = 0; i < nptrs; i++) { - t[i] = umfPoolMalloc(pool, s1); - ASSERT_NE(t[i], nullptr); - } - - if (max_alloc_size == 0) { - max_alloc_size = GetStats(prov).alloc_size; - } - - for (int i = 0; i < nptrs; i++) { - umf_result = umfPoolFree(pool, t[i]); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - } - } - - // s2 - for (int j = 0; j < nreps; j++) { - void *t[nptrs] = {0}; - for (int i = 0; i < nptrs; i++) { - t[i] = umfPoolMalloc(pool, s2); - ASSERT_NE(t[i], nullptr); - } - - // all s2 should fit into single block leaved after freeing s1 - ASSERT_LE(GetStats(prov).alloc_size, max_alloc_size); - - for (int i = 0; i < nptrs; i++) { - umf_result = umfPoolFree(pool, t[i]); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - } - } - - umfPoolDestroy(pool); - umfMemoryProviderDestroy(coarse_memory_provider); - umfMemoryProviderDestroy(malloc_memory_provider); -} - -TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMallocPool_simple2) { - umf_memory_provider_handle_t malloc_memory_provider; - umf_result_t umf_result; - - umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, - &malloc_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(malloc_memory_provider, nullptr); - - const size_t init_buffer_size = 20 * MB; - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = - malloc_memory_provider; - coarse_memory_provider_params.immediate_init_from_upstream = true; - coarse_memory_provider_params.init_buffer = NULL; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(coarse_memory_provider, nullptr); - - umf_disjoint_pool_params_handle_t disjoint_pool_params = NULL; - umf_result = umfDisjointPoolParamsCreate(&disjoint_pool_params); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(disjoint_pool_params, nullptr); - umf_result = - umfDisjointPoolParamsSetSlabMinSize(disjoint_pool_params, 4096); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = - umfDisjointPoolParamsSetMaxPoolableSize(disjoint_pool_params, 4096); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = umfDisjointPoolParamsSetCapacity(disjoint_pool_params, 4); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = - umfDisjointPoolParamsSetMinBucketSize(disjoint_pool_params, 64); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = umfDisjointPoolParamsSetTrace(disjoint_pool_params, 1); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - - umf_memory_pool_handle_t pool; - umf_result = - umfPoolCreate(umfDisjointPoolOps(), coarse_memory_provider, - disjoint_pool_params, UMF_POOL_CREATE_FLAG_NONE, &pool); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(pool, nullptr); - - umf_result = umfDisjointPoolParamsDestroy(disjoint_pool_params); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - - // test - double sizes[] = {2, 4, 0.5, 1, 8, 0.25}; - size_t alignment[] = {0, 4, 0, 16, 32, 128}; - for (int i = 0; i < 6; i++) { - size_t s = (size_t)(sizes[i] * MB); - void *t[8] = {0}; - for (int j = 0; j < 8; j++) { - t[j] = umfPoolAlignedMalloc(pool, s, alignment[i]); - ASSERT_NE(t[j], nullptr); - } - - for (int j = 0; j < 8; j++) { - umf_result = umfPoolFree(pool, t[j]); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - } - } - - umfPoolDestroy(pool); - umfMemoryProviderDestroy(coarse_memory_provider); - umfMemoryProviderDestroy(malloc_memory_provider); -} - -struct alloc_ptr_size { - void *ptr; - size_t size; - - bool operator<(const alloc_ptr_size &other) const { - if (ptr == other.ptr) { - return size < other.size; - } - return ptr < other.ptr; - } -}; - -TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMMapPool_random) { - umf_result_t umf_result; - - const size_t init_buffer_size = 200 * MB; - - // preallocate some memory and initialize the vector with zeros - std::vector buffer(init_buffer_size, 0); - void *buf = (void *)buffer.data(); - ASSERT_NE(buf, nullptr); - - const unsigned char alloc_check_val = 11; - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = NULL; - coarse_memory_provider_params.immediate_init_from_upstream = false; - coarse_memory_provider_params.init_buffer = buf; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(coarse_memory_provider, nullptr); - - umf_disjoint_pool_params_handle_t disjoint_pool_params = NULL; - umf_result = umfDisjointPoolParamsCreate(&disjoint_pool_params); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(disjoint_pool_params, nullptr); - umf_result = - umfDisjointPoolParamsSetSlabMinSize(disjoint_pool_params, 1024); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = - umfDisjointPoolParamsSetMaxPoolableSize(disjoint_pool_params, 1024); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = umfDisjointPoolParamsSetCapacity(disjoint_pool_params, 2); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = - umfDisjointPoolParamsSetMinBucketSize(disjoint_pool_params, 16); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = umfDisjointPoolParamsSetTrace(disjoint_pool_params, 1); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - - umf_memory_pool_handle_t pool; - umf_result = - umfPoolCreate(umfDisjointPoolOps(), coarse_memory_provider, - disjoint_pool_params, UMF_POOL_CREATE_FLAG_NONE, &pool); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(pool, nullptr); - - umf_result = umfDisjointPoolParamsDestroy(disjoint_pool_params); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - - // set constant seed so each test run will have the same scenario - uint32_t seed = 1234; - std::mt19937 mt(seed); - - // different sizes to alloc - std::vector sizes = {15, 49, 588, 1025, - 2 * KB, 5 * KB, 160 * KB, 511 * KB, - 1000 * KB, MB, 3 * MB, 7 * MB}; - std::uniform_int_distribution sizes_dist(0, (int)(sizes.size() - 1)); - - // each alloc would be done few times - std::vector counts = {1, 3, 4, 8, 9, 11}; - std::uniform_int_distribution counts_dist(0, (int)(counts.size() - 1)); - - // action to take will be random - // alloc = <0, .5), free = <.5, 1) - std::uniform_real_distribution actions_dist(0, 1); - - std::set allocs; - const int nreps = 100; - - for (size_t i = 0; i < nreps; i++) { - size_t count = counts[counts_dist(mt)]; - float action = actions_dist(mt); - - if (action < 0.5) { - size_t size = sizes[sizes_dist(mt)]; - std::cout << "size: " << size << " count: " << count - << " action: alloc" << std::endl; - - // alloc - for (size_t j = 0; j < count; j++) { - void *ptr = umfPoolMalloc(pool, size); - ASSERT_NE(ptr, nullptr); - - if (ptr == nullptr) { - break; - } - - // check if first and last bytes are empty and fill them with control data - ASSERT_EQ(((unsigned char *)ptr)[0], 0); - ASSERT_EQ(((unsigned char *)ptr)[size - 1], 0); - ((unsigned char *)ptr)[0] = alloc_check_val; - ((unsigned char *)ptr)[size - 1] = alloc_check_val; - - allocs.insert({ptr, size}); - } - } else { - std::cout << "count: " << count << " action: free" << std::endl; - - // free random allocs - for (size_t j = 0; j < count; j++) { - if (allocs.size() == 0) { - continue; - } - - std::uniform_int_distribution free_dist( - 0, (int)(allocs.size() - 1)); - size_t free_id = free_dist(mt); - auto it = allocs.begin(); - std::advance(it, free_id); - auto [ptr, size] = (*it); - ASSERT_NE(ptr, nullptr); - - // check if control bytes are set and clean them - - ASSERT_EQ(((unsigned char *)ptr)[0], alloc_check_val); - ASSERT_EQ(((unsigned char *)ptr)[size - 1], alloc_check_val); - ((unsigned char *)ptr)[0] = 0; - ((unsigned char *)ptr)[size - 1] = 0; - - umf_result_t ret = umfPoolFree(pool, ptr); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - - allocs.erase(it); - } - } - } - - std::cout << "cleanup" << std::endl; - - while (allocs.size()) { - umf_result_t ret = umfPoolFree(pool, (*allocs.begin()).ptr); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - allocs.erase(allocs.begin()); - } - - umfPoolDestroy(pool); - umfMemoryProviderDestroy(coarse_memory_provider); -} diff --git a/test/disjoint_pool_file_prov.cpp b/test/disjoint_pool_file_prov.cpp new file mode 100644 index 0000000000..58e15f5714 --- /dev/null +++ b/test/disjoint_pool_file_prov.cpp @@ -0,0 +1,363 @@ +/* + * Copyright (C) 2023-2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#include + +#include +#include + +#include "coarse.h" +#include "provider.hpp" + +using umf_test::KB; +using umf_test::MB; +using umf_test::test; + +#define FILE_PATH ((char *)"tmp_file") + +umf_memory_provider_ops_t UMF_MALLOC_MEMORY_PROVIDER_OPS = + umf_test::providerMakeCOps(); + +struct FileWithMemoryStrategyTest + : umf_test::test, + ::testing::WithParamInterface { + void SetUp() override { + test::SetUp(); + allocation_strategy = this->GetParam(); + } + + coarse_strategy_t allocation_strategy; +}; + +INSTANTIATE_TEST_SUITE_P( + FileWithMemoryStrategyTest, FileWithMemoryStrategyTest, + ::testing::Values(UMF_COARSE_MEMORY_STRATEGY_FASTEST, + UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE, + UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE)); + +TEST_P(FileWithMemoryStrategyTest, disjointFileMallocPool_simple1) { + umf_memory_provider_handle_t malloc_memory_provider = nullptr; + umf_result_t umf_result; + + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, + nullptr, &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + umf_file_memory_provider_params_handle_t file_params = nullptr; + umf_result = umfFileMemoryProviderParamsCreate(&file_params, FILE_PATH); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(file_params, nullptr); + + umf_memory_provider_handle_t file_memory_provider; + umf_result = umfMemoryProviderCreate(umfFileMemoryProviderOps(), + file_params, &file_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(file_memory_provider, nullptr); + + umf_result = umfFileMemoryProviderParamsDestroy(file_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_disjoint_pool_params_handle_t disjoint_pool_params = nullptr; + umf_result = umfDisjointPoolParamsCreate(&disjoint_pool_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(disjoint_pool_params, nullptr); + umf_result = + umfDisjointPoolParamsSetSlabMinSize(disjoint_pool_params, 4096); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = + umfDisjointPoolParamsSetMaxPoolableSize(disjoint_pool_params, 4096); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = umfDisjointPoolParamsSetCapacity(disjoint_pool_params, 4); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = + umfDisjointPoolParamsSetMinBucketSize(disjoint_pool_params, 64); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = umfDisjointPoolParamsSetTrace(disjoint_pool_params, 1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_memory_pool_handle_t pool; + umf_result = + umfPoolCreate(umfDisjointPoolOps(), file_memory_provider, + disjoint_pool_params, UMF_POOL_CREATE_FLAG_NONE, &pool); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(pool, nullptr); + + umf_result = umfDisjointPoolParamsDestroy(disjoint_pool_params); + + umf_memory_provider_handle_t prov = nullptr; + umfPoolGetMemoryProvider(pool, &prov); + ASSERT_NE(prov, nullptr); + + // test 1 + + size_t s1 = 74659 * KB; + size_t s2 = 8206 * KB; + + const int nreps = 2; + const int nptrs = 6; + + // s1 + for (int j = 0; j < nreps; j++) { + void *t[nptrs] = {0}; + for (int i = 0; i < nptrs; i++) { + t[i] = umfPoolMalloc(pool, s1); + ASSERT_NE(t[i], nullptr); + } + + for (int i = 0; i < nptrs; i++) { + umf_result = umfPoolFree(pool, t[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + } + + // s2 + for (int j = 0; j < nreps; j++) { + void *t[nptrs] = {0}; + for (int i = 0; i < nptrs; i++) { + t[i] = umfPoolMalloc(pool, s2); + ASSERT_NE(t[i], nullptr); + } + + for (int i = 0; i < nptrs; i++) { + umf_result = umfPoolFree(pool, t[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + } + + umfPoolDestroy(pool); + umfMemoryProviderDestroy(file_memory_provider); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(FileWithMemoryStrategyTest, disjointFileMallocPool_simple2) { + umf_memory_provider_handle_t malloc_memory_provider = nullptr; + umf_result_t umf_result; + + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, + nullptr, &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + umf_file_memory_provider_params_handle_t file_params = nullptr; + umf_result = umfFileMemoryProviderParamsCreate(&file_params, FILE_PATH); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(file_params, nullptr); + + umf_memory_provider_handle_t file_memory_provider; + umf_result = umfMemoryProviderCreate(umfFileMemoryProviderOps(), + file_params, &file_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(file_memory_provider, nullptr); + + umf_result = umfFileMemoryProviderParamsDestroy(file_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_disjoint_pool_params_handle_t disjoint_pool_params = nullptr; + umf_result = umfDisjointPoolParamsCreate(&disjoint_pool_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(disjoint_pool_params, nullptr); + umf_result = + umfDisjointPoolParamsSetSlabMinSize(disjoint_pool_params, 4096); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = + umfDisjointPoolParamsSetMaxPoolableSize(disjoint_pool_params, 4096); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = umfDisjointPoolParamsSetCapacity(disjoint_pool_params, 4); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = + umfDisjointPoolParamsSetMinBucketSize(disjoint_pool_params, 64); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = umfDisjointPoolParamsSetTrace(disjoint_pool_params, 1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_memory_pool_handle_t pool; + umf_result = + umfPoolCreate(umfDisjointPoolOps(), file_memory_provider, + disjoint_pool_params, UMF_POOL_CREATE_FLAG_NONE, &pool); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(pool, nullptr); + + umf_result = umfDisjointPoolParamsDestroy(disjoint_pool_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + // test + double sizes[] = {2, 4, 0.5, 1, 8, 0.25}; + size_t alignment[] = {0, 4, 0, 16, 32, 128}; + for (int i = 0; i < 6; i++) { + size_t s = (size_t)(sizes[i] * MB); + void *t[8] = {0}; + for (int j = 0; j < 8; j++) { + t[j] = umfPoolAlignedMalloc(pool, s, alignment[i]); + ASSERT_NE(t[j], nullptr); + } + + for (int j = 0; j < 8; j++) { + umf_result = umfPoolFree(pool, t[j]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + } + + umfPoolDestroy(pool); + umfMemoryProviderDestroy(file_memory_provider); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +struct alloc_ptr_size { + void *ptr; + size_t size; + + bool operator<(const alloc_ptr_size &other) const { + if (ptr == other.ptr) { + return size < other.size; + } + return ptr < other.ptr; + } +}; + +TEST_P(FileWithMemoryStrategyTest, disjointFileMMapPool_random) { + umf_result_t umf_result; + + const size_t init_buffer_size = 200 * MB; + + // preallocate some memory and initialize the vector with zeros + std::vector buffer(init_buffer_size, 0); + void *buf = (void *)buffer.data(); + ASSERT_NE(buf, nullptr); + + const unsigned char alloc_check_val = 11; + + umf_file_memory_provider_params_handle_t file_params = nullptr; + umf_result = umfFileMemoryProviderParamsCreate(&file_params, FILE_PATH); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(file_params, nullptr); + + umf_memory_provider_handle_t file_memory_provider; + umf_result = umfMemoryProviderCreate(umfFileMemoryProviderOps(), + file_params, &file_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(file_memory_provider, nullptr); + + umf_result = umfFileMemoryProviderParamsDestroy(file_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_disjoint_pool_params_handle_t disjoint_pool_params = nullptr; + umf_result = umfDisjointPoolParamsCreate(&disjoint_pool_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(disjoint_pool_params, nullptr); + umf_result = + umfDisjointPoolParamsSetSlabMinSize(disjoint_pool_params, 1024); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = + umfDisjointPoolParamsSetMaxPoolableSize(disjoint_pool_params, 1024); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = umfDisjointPoolParamsSetCapacity(disjoint_pool_params, 2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = + umfDisjointPoolParamsSetMinBucketSize(disjoint_pool_params, 16); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = umfDisjointPoolParamsSetTrace(disjoint_pool_params, 1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_memory_pool_handle_t pool; + umf_result = + umfPoolCreate(umfDisjointPoolOps(), file_memory_provider, + disjoint_pool_params, UMF_POOL_CREATE_FLAG_NONE, &pool); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(pool, nullptr); + + umf_result = umfDisjointPoolParamsDestroy(disjoint_pool_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + // set constant seed so each test run will have the same scenario + uint32_t seed = 1234; + std::mt19937 mt(seed); + + // different sizes to alloc + std::vector sizes = {15, 49, 588, 1025, + 2 * KB, 5 * KB, 160 * KB, 511 * KB, + 1000 * KB, MB, 3 * MB, 7 * MB}; + std::uniform_int_distribution sizes_dist(0, (int)(sizes.size() - 1)); + + // each alloc would be done few times + std::vector counts = {1, 3, 4, 8, 9, 11}; + std::uniform_int_distribution counts_dist(0, (int)(counts.size() - 1)); + + // action to take will be random + // alloc = <0, .5), free = <.5, 1) + std::uniform_real_distribution actions_dist(0, 1); + + std::set allocs; + const int nreps = 100; + + for (size_t i = 0; i < nreps; i++) { + size_t count = counts[counts_dist(mt)]; + float action = actions_dist(mt); + + if (action < 0.5) { + size_t size = sizes[sizes_dist(mt)]; + std::cout << "size: " << size << " count: " << count + << " action: alloc" << std::endl; + + // alloc + for (size_t j = 0; j < count; j++) { + void *ptr = umfPoolCalloc(pool, 1, size); + if (ptr == nullptr) { + break; + } + + // check if first and last bytes are empty and fill them with control data + ASSERT_EQ(((unsigned char *)ptr)[0], 0); + ASSERT_EQ(((unsigned char *)ptr)[size - 1], 0); + ((unsigned char *)ptr)[0] = alloc_check_val; + ((unsigned char *)ptr)[size - 1] = alloc_check_val; + + allocs.insert({ptr, size}); + } + } else { + std::cout << "count: " << count << " action: free" << std::endl; + + // free random allocs + for (size_t j = 0; j < count; j++) { + if (allocs.size() == 0) { + continue; + } + + std::uniform_int_distribution free_dist( + 0, (int)(allocs.size() - 1)); + size_t free_id = free_dist(mt); + auto it = allocs.begin(); + std::advance(it, free_id); + auto [ptr, size] = (*it); + ASSERT_NE(ptr, nullptr); + + // check if control bytes are set and clean them + + ASSERT_EQ(((unsigned char *)ptr)[0], alloc_check_val); + ASSERT_EQ(((unsigned char *)ptr)[size - 1], alloc_check_val); + ((unsigned char *)ptr)[0] = 0; + ((unsigned char *)ptr)[size - 1] = 0; + + umf_result_t ret = umfPoolFree(pool, ptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + allocs.erase(it); + } + } + } + + std::cout << "cleanup" << std::endl; + + while (allocs.size()) { + umf_result_t ret = umfPoolFree(pool, (*allocs.begin()).ptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + allocs.erase(allocs.begin()); + } + + umfPoolDestroy(pool); + umfMemoryProviderDestroy(file_memory_provider); +} diff --git a/test/ipcAPI.cpp b/test/ipcAPI.cpp index 4df32a1c9b..c0642dd760 100644 --- a/test/ipcAPI.cpp +++ b/test/ipcAPI.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023-2024 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // This file contains tests for UMF pool API @@ -109,11 +109,12 @@ provider_mock_ipc::allocations_mutex_type provider_mock_ipc::alloc_mutex; provider_mock_ipc::allocations_map_type provider_mock_ipc::allocations; static umf_memory_provider_ops_t IPC_MOCK_PROVIDER_OPS = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); HostMemoryAccessor hostMemoryAccessor; INSTANTIATE_TEST_SUITE_P(umfIpcTestSuite, umfIpcTest, ::testing::Values(ipcTestParams{ - umfProxyPoolOps(), nullptr, &IPC_MOCK_PROVIDER_OPS, - nullptr, &hostMemoryAccessor, false})); + umfProxyPoolOps(), nullptr, nullptr, + &IPC_MOCK_PROVIDER_OPS, nullptr, nullptr, + &hostMemoryAccessor})); diff --git a/test/ipcFixtures.hpp b/test/ipcFixtures.hpp index 8dca83f10e..cf31ff7584 100644 --- a/test/ipcFixtures.hpp +++ b/test/ipcFixtures.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -15,12 +15,15 @@ #include #include +#include #include #include +#include #include class MemoryAccessor { public: + virtual ~MemoryAccessor() = default; virtual void fill(void *ptr, size_t size, const void *pattern, size_t pattern_size) = 0; virtual void copy(void *dst_ptr, void *src_ptr, size_t size) = 0; @@ -46,40 +49,73 @@ class HostMemoryAccessor : public MemoryAccessor { } }; +typedef void *(*pfnPoolParamsCreate)(); +typedef umf_result_t (*pfnPoolParamsDestroy)(void *); + +typedef void *(*pfnProviderParamsCreate)(); +typedef umf_result_t (*pfnProviderParamsDestroy)(void *); + // ipcTestParams: -// pool_ops, pool_params, provider_ops, provider_params, memoryAccessor, free_not_supp -// free_not_supp (bool) - provider does not support the free() op +// pool_ops, pfnPoolParamsCreate,pfnPoolParamsDestroy, +// provider_ops, pfnProviderParamsCreate, pfnProviderParamsDestroy, +// memoryAccessor using ipcTestParams = - std::tuple; + std::tuple; struct umfIpcTest : umf_test::test, ::testing::WithParamInterface { umfIpcTest() {} + size_t getOpenedIpcCacheSize() { + const char *max_size_str = getenv("UMF_MAX_OPENED_IPC_HANDLES"); + if (max_size_str) { + char *endptr; + size_t max_size = strtoul(max_size_str, &endptr, 10); + EXPECT_EQ(*endptr, '\0'); + if (*endptr == '\0') { + return max_size; + } + } + return 0; + } void SetUp() override { test::SetUp(); - auto [pool_ops, pool_params, provider_ops, provider_params, accessor, - free_not_supp] = this->GetParam(); + auto [pool_ops, pool_params_create, pool_params_destroy, provider_ops, + provider_params_create, provider_params_destroy, accessor] = + this->GetParam(); poolOps = pool_ops; - poolParams = pool_params; + poolParamsCreate = pool_params_create; + poolParamsDestroy = pool_params_destroy; providerOps = provider_ops; - providerParams = provider_params; + providerParamsCreate = provider_params_create; + providerParamsDestroy = provider_params_destroy; memAccessor = accessor; - freeNotSupported = free_not_supp; + openedIpcCacheSize = getOpenedIpcCacheSize(); } void TearDown() override { test::TearDown(); } - umf::pool_unique_handle_t makePool() { + umf_test::pool_unique_handle_t makePool() { // TODO: The function is similar to poolCreateExt function // from memoryPool.hpp umf_memory_provider_handle_t hProvider = NULL; umf_memory_pool_handle_t hPool = NULL; + void *providerParams = nullptr; + if (providerParamsCreate) { + providerParams = providerParamsCreate(); + } + auto ret = umfMemoryProviderCreate(providerOps, providerParams, &hProvider); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + if (providerParamsDestroy) { + providerParamsDestroy(providerParams); + } + auto trace = [](void *trace_context, const char *name) { stats_type *stat = static_cast(trace_context); if (std::strcmp(name, "alloc") == 0) { @@ -98,11 +134,20 @@ struct umfIpcTest : umf_test::test, umf_memory_provider_handle_t hTraceProvider = traceProviderCreate(hProvider, true, (void *)&stat, trace); + void *poolParams = nullptr; + if (poolParamsCreate) { + poolParams = poolParamsCreate(); + } + ret = umfPoolCreate(poolOps, hTraceProvider, poolParams, UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &hPool); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); - return umf::pool_unique_handle_t(hPool, &umfPoolDestroy); + if (poolParamsDestroy) { + poolParamsDestroy(poolParams); + } + + return umf_test::pool_unique_handle_t(hPool, &umfPoolDestroy); } struct stats_type { @@ -120,25 +165,275 @@ struct umfIpcTest : umf_test::test, static constexpr int NTHREADS = 10; stats_type stat; MemoryAccessor *memAccessor = nullptr; + umf_memory_pool_ops_t *poolOps = nullptr; - void *poolParams = nullptr; + pfnPoolParamsCreate poolParamsCreate = nullptr; + pfnPoolParamsDestroy poolParamsDestroy = nullptr; + umf_memory_provider_ops_t *providerOps = nullptr; - void *providerParams = nullptr; - bool freeNotSupported = false; -}; + pfnProviderParamsCreate providerParamsCreate = nullptr; + pfnProviderParamsDestroy providerParamsDestroy = nullptr; + size_t openedIpcCacheSize = 0; + + void concurrentGetConcurrentPutHandles(bool shuffle) { + std::vector ptrs; + constexpr size_t ALLOC_SIZE = 100; + constexpr size_t NUM_POINTERS = 100; + umf_test::pool_unique_handle_t pool = makePool(); + ASSERT_NE(pool.get(), nullptr); + + for (size_t i = 0; i < NUM_POINTERS; ++i) { + void *ptr = umfPoolMalloc(pool.get(), ALLOC_SIZE); + EXPECT_NE(ptr, nullptr); + ptrs.push_back(ptr); + } + + std::array, NTHREADS> ipcHandles; + + umf_test::syncthreads_barrier syncthreads(NTHREADS); + + auto getHandlesFn = [shuffle, &ipcHandles, &ptrs, + &syncthreads](size_t tid) { + // Each thread gets a copy of the pointers to shuffle them + std::vector localPtrs = ptrs; + if (shuffle) { + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(localPtrs.begin(), localPtrs.end(), g); + } + syncthreads(); + for (void *ptr : localPtrs) { + umf_ipc_handle_t ipcHandle; + size_t handleSize; + umf_result_t ret = + umfGetIPCHandle(ptr, &ipcHandle, &handleSize); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ipcHandles[tid].push_back(ipcHandle); + } + }; + + umf_test::parallel_exec(NTHREADS, getHandlesFn); + + auto putHandlesFn = [&ipcHandles, &syncthreads](size_t tid) { + syncthreads(); + for (umf_ipc_handle_t ipcHandle : ipcHandles[tid]) { + umf_result_t ret = umfPutIPCHandle(ipcHandle); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + }; + + umf_test::parallel_exec(NTHREADS, putHandlesFn); -static inline umf_result_t -get_umf_result_of_free(bool freeNotSupported, umf_result_t expected_result) { - if (freeNotSupported) { - return UMF_RESULT_ERROR_NOT_SUPPORTED; + for (void *ptr : ptrs) { + umf_result_t ret = umfPoolFree(pool.get(), ptr); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + + pool.reset(nullptr); + EXPECT_EQ(stat.putCount, stat.getCount); } - return expected_result; -} + void concurrentGetPutHandles(bool shuffle) { + std::vector ptrs; + constexpr size_t ALLOC_SIZE = 100; + constexpr size_t NUM_POINTERS = 100; + umf_test::pool_unique_handle_t pool = makePool(); + ASSERT_NE(pool.get(), nullptr); + + for (size_t i = 0; i < NUM_POINTERS; ++i) { + void *ptr = umfPoolMalloc(pool.get(), ALLOC_SIZE); + EXPECT_NE(ptr, nullptr); + ptrs.push_back(ptr); + } + + umf_test::syncthreads_barrier syncthreads(NTHREADS); + + auto getPutHandlesFn = [shuffle, &ptrs, &syncthreads](size_t) { + // Each thread gets a copy of the pointers to shuffle them + std::vector localPtrs = ptrs; + if (shuffle) { + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(localPtrs.begin(), localPtrs.end(), g); + } + syncthreads(); + for (void *ptr : localPtrs) { + umf_ipc_handle_t ipcHandle; + size_t handleSize; + umf_result_t ret = + umfGetIPCHandle(ptr, &ipcHandle, &handleSize); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ret = umfPutIPCHandle(ipcHandle); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + }; + + umf_test::parallel_exec(NTHREADS, getPutHandlesFn); + + for (void *ptr : ptrs) { + umf_result_t ret = umfPoolFree(pool.get(), ptr); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + + pool.reset(nullptr); + EXPECT_EQ(stat.putCount, stat.getCount); + } + + void concurrentOpenConcurrentCloseHandles(bool shuffle) { + umf_result_t ret; + std::vector ptrs; + constexpr size_t ALLOC_SIZE = 100; + constexpr size_t NUM_POINTERS = 100; + umf_test::pool_unique_handle_t pool = makePool(); + ASSERT_NE(pool.get(), nullptr); + + for (size_t i = 0; i < NUM_POINTERS; ++i) { + void *ptr = umfPoolMalloc(pool.get(), ALLOC_SIZE); + EXPECT_NE(ptr, nullptr); + ptrs.push_back(ptr); + } + + std::vector ipcHandles; + for (size_t i = 0; i < NUM_POINTERS; ++i) { + umf_ipc_handle_t ipcHandle; + size_t handleSize; + ret = umfGetIPCHandle(ptrs[i], &ipcHandle, &handleSize); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ipcHandles.push_back(ipcHandle); + } + + std::array, NTHREADS> openedIpcHandles; + umf_ipc_handler_handle_t ipcHandler = nullptr; + ret = umfPoolGetIPCHandler(pool.get(), &ipcHandler); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(ipcHandler, nullptr); + + umf_test::syncthreads_barrier syncthreads(NTHREADS); + + auto openHandlesFn = [shuffle, &ipcHandles, &openedIpcHandles, + &syncthreads, ipcHandler](size_t tid) { + // Each thread gets a copy of the pointers to shuffle them + std::vector localIpcHandles = ipcHandles; + if (shuffle) { + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(localIpcHandles.begin(), localIpcHandles.end(), g); + } + syncthreads(); + for (auto ipcHandle : localIpcHandles) { + void *ptr; + umf_result_t ret = + umfOpenIPCHandle(ipcHandler, ipcHandle, &ptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + openedIpcHandles[tid].push_back(ptr); + } + }; + + umf_test::parallel_exec(NTHREADS, openHandlesFn); + + auto closeHandlesFn = [&openedIpcHandles, &syncthreads](size_t tid) { + syncthreads(); + for (void *ptr : openedIpcHandles[tid]) { + umf_result_t ret = umfCloseIPCHandle(ptr); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + }; + + umf_test::parallel_exec(NTHREADS, closeHandlesFn); + + for (auto ipcHandle : ipcHandles) { + ret = umfPutIPCHandle(ipcHandle); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + + for (void *ptr : ptrs) { + ret = umfPoolFree(pool.get(), ptr); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + + pool.reset(nullptr); + EXPECT_EQ(stat.getCount, stat.allocCount); + EXPECT_EQ(stat.putCount, stat.getCount); + EXPECT_EQ(stat.openCount, stat.allocCount); + EXPECT_EQ(stat.openCount, stat.closeCount); + } + + void concurrentOpenCloseHandles(bool shuffle) { + umf_result_t ret; + std::vector ptrs; + constexpr size_t ALLOC_SIZE = 100; + constexpr size_t NUM_POINTERS = 100; + umf_test::pool_unique_handle_t pool = makePool(); + ASSERT_NE(pool.get(), nullptr); + + for (size_t i = 0; i < NUM_POINTERS; ++i) { + void *ptr = umfPoolMalloc(pool.get(), ALLOC_SIZE); + EXPECT_NE(ptr, nullptr); + ptrs.push_back(ptr); + } + + std::vector ipcHandles; + for (size_t i = 0; i < NUM_POINTERS; ++i) { + umf_ipc_handle_t ipcHandle; + size_t handleSize; + ret = umfGetIPCHandle(ptrs[i], &ipcHandle, &handleSize); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ipcHandles.push_back(ipcHandle); + } + + umf_ipc_handler_handle_t ipcHandler = nullptr; + ret = umfPoolGetIPCHandler(pool.get(), &ipcHandler); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(ipcHandler, nullptr); + + umf_test::syncthreads_barrier syncthreads(NTHREADS); + + auto openCloseHandlesFn = [shuffle, &ipcHandles, &syncthreads, + ipcHandler](size_t) { + // Each thread gets a copy of the pointers to shuffle them + std::vector localIpcHandles = ipcHandles; + if (shuffle) { + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(localIpcHandles.begin(), localIpcHandles.end(), g); + } + syncthreads(); + for (auto ipcHandle : localIpcHandles) { + void *ptr; + umf_result_t ret = + umfOpenIPCHandle(ipcHandler, ipcHandle, &ptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ret = umfCloseIPCHandle(ptr); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + }; + + umf_test::parallel_exec(NTHREADS, openCloseHandlesFn); + + for (auto ipcHandle : ipcHandles) { + ret = umfPutIPCHandle(ipcHandle); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + + for (void *ptr : ptrs) { + ret = umfPoolFree(pool.get(), ptr); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + + pool.reset(nullptr); + EXPECT_EQ(stat.getCount, stat.allocCount); + EXPECT_EQ(stat.putCount, stat.getCount); + if (openedIpcCacheSize == 0) { + EXPECT_EQ(stat.openCount, stat.allocCount); + } + EXPECT_EQ(stat.openCount, stat.closeCount); + } +}; TEST_P(umfIpcTest, GetIPCHandleSize) { size_t size = 0; - umf::pool_unique_handle_t pool = makePool(); + umf_test::pool_unique_handle_t pool = makePool(); + ASSERT_NE(pool.get(), nullptr); umf_result_t ret = umfPoolGetIPCHandleSize(pool.get(), &size); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); @@ -150,7 +445,9 @@ TEST_P(umfIpcTest, GetIPCHandleSizeInvalidArgs) { umf_result_t ret = umfPoolGetIPCHandleSize(nullptr, &size); EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); - umf::pool_unique_handle_t pool = makePool(); + umf_test::pool_unique_handle_t pool = makePool(); + ASSERT_NE(pool.get(), nullptr); + ret = umfPoolGetIPCHandleSize(pool.get(), nullptr); EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); } @@ -166,7 +463,9 @@ TEST_P(umfIpcTest, GetIPCHandleInvalidArgs) { ret = umfGetIPCHandle(ptr, &ipcHandle, &handleSize); EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); - umf::pool_unique_handle_t pool = makePool(); + umf_test::pool_unique_handle_t pool = makePool(); + ASSERT_NE(pool.get(), nullptr); + ptr = umfPoolMalloc(pool.get(), SIZE); EXPECT_NE(ptr, nullptr); @@ -177,8 +476,7 @@ TEST_P(umfIpcTest, GetIPCHandleInvalidArgs) { EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); ret = umfFree(ptr); - EXPECT_EQ(ret, - get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); } TEST_P(umfIpcTest, CloseIPCHandleInvalidPtr) { @@ -190,7 +488,9 @@ TEST_P(umfIpcTest, CloseIPCHandleInvalidPtr) { TEST_P(umfIpcTest, BasicFlow) { constexpr size_t SIZE = 100; std::vector expected_data(SIZE); - umf::pool_unique_handle_t pool = makePool(); + umf_test::pool_unique_handle_t pool = makePool(); + ASSERT_NE(pool.get(), nullptr); + int *ptr = (int *)umfPoolMalloc(pool.get(), SIZE * sizeof(int)); EXPECT_NE(ptr, nullptr); @@ -244,8 +544,7 @@ TEST_P(umfIpcTest, BasicFlow) { EXPECT_EQ(ret, UMF_RESULT_SUCCESS); ret = umfPoolFree(pool.get(), ptr); - EXPECT_EQ(ret, - get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); pool.reset(nullptr); EXPECT_EQ(stat.getCount, 1); @@ -254,73 +553,11 @@ TEST_P(umfIpcTest, BasicFlow) { EXPECT_EQ(stat.closeCount, stat.openCount); } -TEST_P(umfIpcTest, GetPoolByOpenedHandle) { - constexpr size_t SIZE = 100; - constexpr size_t NUM_ALLOCS = 100; - constexpr size_t NUM_POOLS = 4; - void *ptrs[NUM_ALLOCS]; - void *openedPtrs[NUM_POOLS][NUM_ALLOCS]; - std::vector pools_to_open; - umf::pool_unique_handle_t pool = makePool(); - - for (size_t i = 0; i < NUM_POOLS; ++i) { - pools_to_open.push_back(makePool()); - } - - for (size_t i = 0; i < NUM_ALLOCS; ++i) { - void *ptr = umfPoolMalloc(pool.get(), SIZE); - ASSERT_NE(ptr, nullptr); - ptrs[i] = ptr; - } - - for (size_t i = 0; i < NUM_ALLOCS; ++i) { - umf_ipc_handle_t ipcHandle = nullptr; - size_t handleSize = 0; - umf_result_t ret = umfGetIPCHandle(ptrs[i], &ipcHandle, &handleSize); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - - for (size_t pool_id = 0; pool_id < NUM_POOLS; pool_id++) { - void *ptr = nullptr; - umf_ipc_handler_handle_t ipcHandler = nullptr; - ret = - umfPoolGetIPCHandler(pools_to_open[pool_id].get(), &ipcHandler); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - ASSERT_NE(ipcHandler, nullptr); - - ret = umfOpenIPCHandle(ipcHandler, ipcHandle, &ptr); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - openedPtrs[pool_id][i] = ptr; - } - - ret = umfPutIPCHandle(ipcHandle); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - } - - for (size_t pool_id = 0; pool_id < NUM_POOLS; pool_id++) { - for (size_t i = 0; i < NUM_ALLOCS; ++i) { - umf_memory_pool_handle_t openedPool = - umfPoolByPtr(openedPtrs[pool_id][i]); - EXPECT_EQ(openedPool, pools_to_open[pool_id].get()); - } - } - - for (size_t pool_id = 0; pool_id < NUM_POOLS; pool_id++) { - for (size_t i = 0; i < NUM_ALLOCS; ++i) { - umf_result_t ret = umfCloseIPCHandle(openedPtrs[pool_id][i]); - EXPECT_EQ(ret, UMF_RESULT_SUCCESS); - } - } - - for (size_t i = 0; i < NUM_ALLOCS; ++i) { - umf_result_t ret = umfFree(ptrs[i]); - EXPECT_EQ(ret, - get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); - } -} - TEST_P(umfIpcTest, AllocFreeAllocTest) { constexpr size_t SIZE = 64 * 1024; - umf::pool_unique_handle_t pool = makePool(); + umf_test::pool_unique_handle_t pool = makePool(); + ASSERT_NE(pool.get(), nullptr); + umf_ipc_handler_handle_t ipcHandler = nullptr; umf_result_t ret = umfPoolGetIPCHandler(pool.get(), &ipcHandler); @@ -346,8 +583,7 @@ TEST_P(umfIpcTest, AllocFreeAllocTest) { EXPECT_EQ(ret, UMF_RESULT_SUCCESS); ret = umfPoolFree(pool.get(), ptr); - EXPECT_EQ(ret, - get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); ptr = umfPoolMalloc(pool.get(), SIZE); ASSERT_NE(ptr, nullptr); @@ -369,8 +605,7 @@ TEST_P(umfIpcTest, AllocFreeAllocTest) { EXPECT_EQ(ret, UMF_RESULT_SUCCESS); ret = umfPoolFree(pool.get(), ptr); - EXPECT_EQ(ret, - get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); pool.reset(nullptr); EXPECT_EQ(stat.getCount, stat.putCount); @@ -381,8 +616,10 @@ TEST_P(umfIpcTest, AllocFreeAllocTest) { TEST_P(umfIpcTest, openInTwoIpcHandlers) { constexpr size_t SIZE = 100; std::vector expected_data(SIZE); - umf::pool_unique_handle_t pool1 = makePool(); - umf::pool_unique_handle_t pool2 = makePool(); + umf_test::pool_unique_handle_t pool1 = makePool(); + ASSERT_NE(pool1.get(), nullptr); + umf_test::pool_unique_handle_t pool2 = makePool(); + ASSERT_NE(pool2.get(), nullptr); umf_ipc_handler_handle_t ipcHandler1 = nullptr; umf_ipc_handler_handle_t ipcHandler2 = nullptr; @@ -432,8 +669,7 @@ TEST_P(umfIpcTest, openInTwoIpcHandlers) { EXPECT_EQ(ret, UMF_RESULT_SUCCESS); ret = umfPoolFree(pool1.get(), ptr); - EXPECT_EQ(ret, - get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); pool1.reset(nullptr); pool2.reset(nullptr); @@ -443,123 +679,102 @@ TEST_P(umfIpcTest, openInTwoIpcHandlers) { EXPECT_EQ(stat.closeCount, stat.openCount); } -TEST_P(umfIpcTest, ConcurrentGetPutHandles) { - std::vector ptrs; - constexpr size_t ALLOC_SIZE = 100; - constexpr size_t NUM_POINTERS = 100; - umf::pool_unique_handle_t pool = makePool(); - - for (size_t i = 0; i < NUM_POINTERS; ++i) { - void *ptr = umfPoolMalloc(pool.get(), ALLOC_SIZE); - EXPECT_NE(ptr, nullptr); - ptrs.push_back(ptr); - } - - std::array, NTHREADS> ipcHandles; +TEST_P(umfIpcTest, ConcurrentGetConcurrentPutHandles) { + concurrentGetConcurrentPutHandles(false); +} - umf_test::syncthreads_barrier syncthreads(NTHREADS); +TEST_P(umfIpcTest, ConcurrentGetConcurrentPutHandlesShuffled) { + concurrentGetConcurrentPutHandles(true); +} - auto getHandlesFn = [&ipcHandles, &ptrs, &syncthreads](size_t tid) { - syncthreads(); - for (void *ptr : ptrs) { - umf_ipc_handle_t ipcHandle; - size_t handleSize; - umf_result_t ret = umfGetIPCHandle(ptr, &ipcHandle, &handleSize); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - ipcHandles[tid].push_back(ipcHandle); - } - }; +TEST_P(umfIpcTest, ConcurrentGetPutHandles) { concurrentGetPutHandles(false); } - umf_test::parallel_exec(NTHREADS, getHandlesFn); +TEST_P(umfIpcTest, ConcurrentGetPutHandlesShuffled) { + concurrentGetPutHandles(true); +} - auto putHandlesFn = [&ipcHandles, &syncthreads](size_t tid) { - syncthreads(); - for (umf_ipc_handle_t ipcHandle : ipcHandles[tid]) { - umf_result_t ret = umfPutIPCHandle(ipcHandle); - EXPECT_EQ(ret, UMF_RESULT_SUCCESS); - } - }; +TEST_P(umfIpcTest, ConcurrentOpenConcurrentCloseHandles) { + concurrentOpenConcurrentCloseHandles(false); +} - umf_test::parallel_exec(NTHREADS, putHandlesFn); +TEST_P(umfIpcTest, ConcurrentOpenConcurrentCloseHandlesShuffled) { + concurrentOpenConcurrentCloseHandles(true); +} - for (void *ptr : ptrs) { - umf_result_t ret = umfPoolFree(pool.get(), ptr); - EXPECT_EQ(ret, - get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); - } +TEST_P(umfIpcTest, ConcurrentOpenCloseHandles) { + concurrentOpenCloseHandles(false); +} - pool.reset(nullptr); - EXPECT_EQ(stat.putCount, stat.getCount); +TEST_P(umfIpcTest, ConcurrentOpenCloseHandlesShuffled) { + concurrentOpenCloseHandles(true); } -TEST_P(umfIpcTest, ConcurrentOpenCloseHandles) { - umf_result_t ret; - std::vector ptrs; - constexpr size_t ALLOC_SIZE = 100; - constexpr size_t NUM_POINTERS = 100; - umf::pool_unique_handle_t pool = makePool(); - - for (size_t i = 0; i < NUM_POINTERS; ++i) { - void *ptr = umfPoolMalloc(pool.get(), ALLOC_SIZE); - EXPECT_NE(ptr, nullptr); - ptrs.push_back(ptr); +TEST_P(umfIpcTest, ConcurrentDestroyIpcHandlers) { + constexpr size_t SIZE = 100; + constexpr size_t NUM_ALLOCS = 100; + constexpr size_t NUM_POOLS = 10; + void *ptrs[NUM_ALLOCS]; + void *openedPtrs[NUM_POOLS][NUM_ALLOCS]; + std::vector consumerPools; + umf_test::pool_unique_handle_t producerPool = makePool(); + ASSERT_NE(producerPool.get(), nullptr); + + for (size_t i = 0; i < NUM_POOLS; ++i) { + consumerPools.push_back(makePool()); } - std::array ipcHandles; - for (size_t i = 0; i < NUM_POINTERS; ++i) { - umf_ipc_handle_t ipcHandle; - size_t handleSize; - ret = umfGetIPCHandle(ptrs[i], &ipcHandle, &handleSize); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - ipcHandles[i] = ipcHandle; + for (size_t i = 0; i < NUM_ALLOCS; ++i) { + void *ptr = umfPoolMalloc(producerPool.get(), SIZE); + ASSERT_NE(ptr, nullptr); + ptrs[i] = ptr; } - std::array, NTHREADS> openedIpcHandles; - umf_ipc_handler_handle_t ipcHandler = nullptr; - ret = umfPoolGetIPCHandler(pool.get(), &ipcHandler); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - ASSERT_NE(ipcHandler, nullptr); + for (size_t i = 0; i < NUM_ALLOCS; ++i) { + umf_ipc_handle_t ipcHandle = nullptr; + size_t handleSize = 0; + umf_result_t ret = umfGetIPCHandle(ptrs[i], &ipcHandle, &handleSize); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - umf_test::syncthreads_barrier syncthreads(NTHREADS); + for (size_t poolId = 0; poolId < NUM_POOLS; poolId++) { + void *ptr = nullptr; + umf_ipc_handler_handle_t ipcHandler = nullptr; + ret = + umfPoolGetIPCHandler(consumerPools[poolId].get(), &ipcHandler); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(ipcHandler, nullptr); - auto openHandlesFn = [&ipcHandles, &openedIpcHandles, &syncthreads, - ipcHandler](size_t tid) { - syncthreads(); - for (auto ipcHandle : ipcHandles) { - void *ptr; - umf_result_t ret = umfOpenIPCHandle(ipcHandler, ipcHandle, &ptr); + ret = umfOpenIPCHandle(ipcHandler, ipcHandle, &ptr); ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - openedIpcHandles[tid].push_back(ptr); + openedPtrs[poolId][i] = ptr; } - }; - umf_test::parallel_exec(NTHREADS, openHandlesFn); + ret = umfPutIPCHandle(ipcHandle); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + } - auto closeHandlesFn = [&openedIpcHandles, &syncthreads](size_t tid) { - syncthreads(); - for (void *ptr : openedIpcHandles[tid]) { - umf_result_t ret = umfCloseIPCHandle(ptr); + for (size_t poolId = 0; poolId < NUM_POOLS; poolId++) { + for (size_t i = 0; i < NUM_ALLOCS; ++i) { + umf_result_t ret = umfCloseIPCHandle(openedPtrs[poolId][i]); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); } - }; - - umf_test::parallel_exec(NTHREADS, closeHandlesFn); + } - for (auto ipcHandle : ipcHandles) { - ret = umfPutIPCHandle(ipcHandle); + for (size_t i = 0; i < NUM_ALLOCS; ++i) { + umf_result_t ret = umfFree(ptrs[i]); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); } - for (void *ptr : ptrs) { - ret = umfPoolFree(pool.get(), ptr); - EXPECT_EQ(ret, - get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); - } + // Destroy pools in parallel to cause IPC cache cleanup in parallel. + umf_test::syncthreads_barrier syncthreads(NUM_POOLS); + auto poolDestroyFn = [&consumerPools, &syncthreads](size_t tid) { + syncthreads(); + consumerPools[tid].reset(nullptr); + }; + umf_test::parallel_exec(NUM_POOLS, poolDestroyFn); + + producerPool.reset(nullptr); - pool.reset(nullptr); - EXPECT_EQ(stat.getCount, stat.allocCount); EXPECT_EQ(stat.putCount, stat.getCount); - EXPECT_EQ(stat.openCount, stat.allocCount); EXPECT_EQ(stat.openCount, stat.closeCount); } diff --git a/test/ipc_devdax_prov.sh b/test/ipc_devdax_prov.sh index 7c5ba36752..43f177c713 100755 --- a/test/ipc_devdax_prov.sh +++ b/test/ipc_devdax_prov.sh @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -25,10 +25,10 @@ PORT=$(( 1024 + ( $$ % ( 65535 - 1024 )))) UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" echo "Starting ipc_devdax_prov CONSUMER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_devdax_prov_consumer $PORT & +UMF_LOG=$UMF_LOG_VAL ./test_ipc_devdax_prov_consumer $PORT & echo "Waiting 1 sec ..." sleep 1 echo "Starting ipc_devdax_prov PRODUCER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_devdax_prov_producer $PORT +UMF_LOG=$UMF_LOG_VAL ./test_ipc_devdax_prov_producer $PORT diff --git a/test/ipc_file_prov.sh b/test/ipc_file_prov.sh index b3e3091a8f..ffb849f25a 100755 --- a/test/ipc_file_prov.sh +++ b/test/ipc_file_prov.sh @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -9,7 +9,12 @@ set -e -FILE_NAME="/tmp/umf_file_provider_$$" +FILE_BASE="/tmp/umf_file_provider" + +# remove old SHM files (left from the previous runs, because of crashes) +rm -f ${FILE_BASE}* + +FILE_NAME="${FILE_BASE}_$$" # port should be a number from the range <1024, 65535> PORT=$(( 1024 + ( $$ % ( 65535 - 1024 )))) @@ -20,13 +25,13 @@ UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" rm -f ${FILE_NAME} echo "Starting ipc_file_prov CONSUMER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_file_prov_consumer $PORT ${FILE_NAME}_consumer & +UMF_LOG=$UMF_LOG_VAL ./test_ipc_file_prov_consumer $PORT ${FILE_NAME}_consumer & echo "Waiting 1 sec ..." sleep 1 echo "Starting ipc_file_prov PRODUCER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_file_prov_producer $PORT ${FILE_NAME}_producer +UMF_LOG=$UMF_LOG_VAL ./test_ipc_file_prov_producer $PORT ${FILE_NAME}_producer # remove the SHM file rm -f ${FILE_NAME} diff --git a/test/ipc_file_prov_fsdax.sh b/test/ipc_file_prov_fsdax.sh index 4e908869b7..314d0aa667 100755 --- a/test/ipc_file_prov_fsdax.sh +++ b/test/ipc_file_prov_fsdax.sh @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -31,13 +31,13 @@ UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" rm -f ${FILE_NAME} echo "Starting ipc_file_prov_fsdax CONSUMER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_file_prov_consumer $PORT $FILE_NAME & +UMF_LOG=$UMF_LOG_VAL ./test_ipc_file_prov_consumer $PORT $FILE_NAME & echo "Waiting 1 sec ..." sleep 1 echo "Starting ipc_file_prov_fsdax PRODUCER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_file_prov_producer $PORT $FILE_NAME_2 +UMF_LOG=$UMF_LOG_VAL ./test_ipc_file_prov_producer $PORT $FILE_NAME_2 # remove the SHM file rm -f ${FILE_NAME} diff --git a/test/ipc_os_prov_anon_fd.sh b/test/ipc_os_prov_anon_fd.sh index c5738e9893..4e9a0f8327 100755 --- a/test/ipc_os_prov_anon_fd.sh +++ b/test/ipc_os_prov_anon_fd.sh @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -12,28 +12,13 @@ set -e # port should be a number from the range <1024, 65535> PORT=$(( 1024 + ( $$ % ( 65535 - 1024 )))) -# The ipc_os_prov_anon_fd example requires using pidfd_getfd(2) -# to obtain a duplicate of another process's file descriptor. -# Permission to duplicate another process's file descriptor -# is governed by a ptrace access mode PTRACE_MODE_ATTACH_REALCREDS check (see ptrace(2)) -# that can be changed using the /proc/sys/kernel/yama/ptrace_scope interface. -PTRACE_SCOPE_FILE="/proc/sys/kernel/yama/ptrace_scope" -VAL=0 -if [ -f $PTRACE_SCOPE_FILE ]; then - PTRACE_SCOPE_VAL=$(cat $PTRACE_SCOPE_FILE) - if [ $PTRACE_SCOPE_VAL -ne $VAL ]; then - echo "SKIP: ptrace_scope is not set to 0 (classic ptrace permissions) - skipping the test" - exit 125 # skip code defined in CMakeLists.txt - fi -fi - UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" echo "Starting ipc_os_prov_anon_fd CONSUMER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_os_prov_consumer $PORT & +UMF_LOG=$UMF_LOG_VAL ./test_ipc_os_prov_consumer $PORT & echo "Waiting 1 sec ..." sleep 1 echo "Starting ipc_os_prov_anon_fd PRODUCER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_os_prov_producer $PORT +UMF_LOG=$UMF_LOG_VAL ./test_ipc_os_prov_producer $PORT diff --git a/test/ipc_os_prov_proxy.sh b/test/ipc_os_prov_proxy.sh index 86b95a2356..9bd02dad84 100755 --- a/test/ipc_os_prov_proxy.sh +++ b/test/ipc_os_prov_proxy.sh @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -17,10 +17,10 @@ LD_PRELOAD_VAL="../lib/libumf_proxy.so" PORT=$(( 1024 + ( $$ % ( 65535 - 1024 )))) echo "Starting CONSUMER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_os_prov_consumer $PORT & +UMF_LOG=$UMF_LOG_VAL ./test_ipc_os_prov_consumer $PORT & echo "Waiting 1 sec ..." sleep 1 echo "Starting ipc_os_prov_proxy PRODUCER on port $PORT ..." -LD_PRELOAD=$LD_PRELOAD_VAL UMF_LOG=$UMF_LOG_VAL UMF_PROXY=$UMF_PROXY_VAL ./umf_test-ipc_os_prov_proxy $PORT +LD_PRELOAD=$LD_PRELOAD_VAL UMF_LOG=$UMF_LOG_VAL UMF_PROXY=$UMF_PROXY_VAL ./test_ipc_os_prov_proxy $PORT diff --git a/test/ipc_os_prov_shm.sh b/test/ipc_os_prov_shm.sh index efa2de35ae..7bde3c6135 100755 --- a/test/ipc_os_prov_shm.sh +++ b/test/ipc_os_prov_shm.sh @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -20,13 +20,13 @@ UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" rm -f /dev/shm/${SHM_NAME} echo "Starting ipc_os_prov_shm CONSUMER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_os_prov_consumer $PORT & +UMF_LOG=$UMF_LOG_VAL ./test_ipc_os_prov_consumer $PORT & echo "Waiting 1 sec ..." sleep 1 echo "Starting ipc_os_prov_shm PRODUCER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_os_prov_producer $PORT $SHM_NAME +UMF_LOG=$UMF_LOG_VAL ./test_ipc_os_prov_producer $PORT $SHM_NAME # remove the SHM file rm -f /dev/shm/${SHM_NAME} diff --git a/test/malloc_compliance_tests.cpp b/test/malloc_compliance_tests.cpp index 06e3b5dd78..b91bde1f6d 100644 --- a/test/malloc_compliance_tests.cpp +++ b/test/malloc_compliance_tests.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/test/memoryPoolAPI.cpp b/test/memoryPoolAPI.cpp index 1c6d83f2af..e8071a2d81 100644 --- a/test/memoryPoolAPI.cpp +++ b/test/memoryPoolAPI.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023-2024 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // This file contains tests for UMF pool API @@ -12,6 +12,7 @@ #include "test_helpers.h" #include +#include #include #ifdef UMF_PROXY_LIB_ENABLED @@ -124,7 +125,7 @@ TEST_P(umfPoolWithCreateFlagsTest, memoryPoolWithCustomProvider) { return UMF_RESULT_SUCCESS; } }; - umf_memory_pool_ops_t pool_ops = umf::poolMakeCOps(); + umf_memory_pool_ops_t pool_ops = umf_test::poolMakeCOps(); umf_memory_pool_handle_t hPool; auto ret = umfPoolCreate(&pool_ops, hProvider, nullptr, flags, &hPool); @@ -178,18 +179,135 @@ TEST_F(test, BasicPoolByPtrTest) { ASSERT_EQ(ret, UMF_RESULT_SUCCESS); } +struct tagTest : umf_test::test { + void SetUp() override { + test::SetUp(); + provider = umf_test::wrapProviderUnique(nullProviderCreate()); + pool = umf_test::wrapPoolUnique( + createPoolChecked(umfProxyPoolOps(), provider.get(), nullptr)); + } + + umf_test::provider_unique_handle_t provider; + umf_test::pool_unique_handle_t pool; +}; + +TEST_F(tagTest, SetAndGet) { + umf_result_t ret = umfPoolSetTag(pool.get(), (void *)0x99, nullptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + void *tag; + ret = umfPoolGetTag(pool.get(), &tag); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_EQ(tag, (void *)0x99); + + void *oldTag; + ret = umfPoolSetTag(pool.get(), (void *)0x100, &oldTag); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_EQ(oldTag, (void *)0x99); + + ret = umfPoolGetTag(pool.get(), &tag); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_EQ(tag, (void *)0x100); +} + +TEST_F(tagTest, SetAndGetNull) { + umf_result_t ret = umfPoolSetTag(pool.get(), nullptr, nullptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + void *tag; + ret = umfPoolGetTag(pool.get(), &tag); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_EQ(tag, nullptr); +} + +TEST_F(tagTest, NoSetAndGet) { + void *tag; + umf_result_t ret = umfPoolGetTag(pool.get(), &tag); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_EQ(tag, nullptr); +} + +TEST_F(tagTest, SetAndGetMt) { + static constexpr size_t NUM_THREADS = 8; + static constexpr size_t NUM_OPS_PER_THREAD = 16; + + std::vector threads; + + auto encodeTag = [](size_t thread, size_t op) -> void * { + return reinterpret_cast(thread * NUM_OPS_PER_THREAD + op); + }; + + auto decodeTag = [](void *tag) -> std::pair { + auto op = reinterpret_cast(tag) & (NUM_OPS_PER_THREAD - 1); + auto thread = reinterpret_cast(tag) / NUM_OPS_PER_THREAD; + return {thread, op}; + }; + + for (size_t i = 0; i < NUM_THREADS; i++) { + threads.emplace_back([this, i, encodeTag, decodeTag] { + for (size_t j = 0; j < NUM_OPS_PER_THREAD; j++) { + void *oldTag; + umf_result_t ret = + umfPoolSetTag(pool.get(), encodeTag(i, j), &oldTag); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + void *queriedTag; + ret = umfPoolGetTag(pool.get(), &queriedTag); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + auto [t1, op1] = decodeTag(oldTag); + auto [t2, op2] = decodeTag(queriedTag); + // if the tag was set by the same thread, the op part should be the same or higher + ASSERT_TRUE(t1 != t2 || op2 >= op1); + } + }); + } + + for (auto &thread : threads) { + thread.join(); + } + + void *tag; + auto ret = umfPoolGetTag(pool.get(), &tag); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + auto [t, op] = decodeTag(tag); + ASSERT_TRUE(t < NUM_THREADS); + ASSERT_TRUE(op == NUM_OPS_PER_THREAD - 1); +} + +TEST_F(tagTest, SetAndGetInvalidPtr) { + umf_result_t ret = umfPoolSetTag(pool.get(), nullptr, nullptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + ret = umfPoolGetTag(pool.get(), nullptr); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_F(tagTest, SetAndGetInvalidPool) { + umf_result_t ret = + umfPoolSetTag(nullptr, reinterpret_cast(0x1), nullptr); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + void *tag; + ret = umfPoolGetTag(nullptr, &tag); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + INSTANTIATE_TEST_SUITE_P( mallocPoolTest, umfPoolTest, - ::testing::Values(poolCreateExtParams{&MALLOC_POOL_OPS, nullptr, - &UMF_NULL_PROVIDER_OPS, nullptr, - nullptr}, - poolCreateExtParams{umfProxyPoolOps(), nullptr, - &BA_GLOBAL_PROVIDER_OPS, nullptr, - nullptr})); + ::testing::Values( + poolCreateExtParams{&MALLOC_POOL_OPS, nullptr, nullptr, + &UMF_NULL_PROVIDER_OPS, nullptr, nullptr}, + poolCreateExtParams{umfProxyPoolOps(), nullptr, nullptr, + &BA_GLOBAL_PROVIDER_OPS, nullptr, nullptr}, + poolCreateExtParams{umfDisjointPoolOps(), defaultDisjointPoolConfig, + defaultDisjointPoolConfigDestroy, + &BA_GLOBAL_PROVIDER_OPS, nullptr, nullptr})); INSTANTIATE_TEST_SUITE_P(mallocMultiPoolTest, umfMultiPoolTest, ::testing::Values(poolCreateExtParams{ - umfProxyPoolOps(), nullptr, + umfProxyPoolOps(), nullptr, nullptr, &BA_GLOBAL_PROVIDER_OPS, nullptr, nullptr})); INSTANTIATE_TEST_SUITE_P(umfPoolWithCreateFlagsTest, umfPoolWithCreateFlagsTest, @@ -252,7 +370,8 @@ TEST_P(poolInitializeTest, errorPropagation) { return *errorToReturn; } }; - umf_memory_pool_ops_t pool_ops = umf::poolMakeCOps(); + umf_memory_pool_ops_t pool_ops = + umf_test::poolMakeCOps(); umf_memory_pool_handle_t hPool; auto ret = umfPoolCreate(&pool_ops, hProvider, (void *)&this->GetParam(), 0, @@ -302,7 +421,7 @@ TEST_F(test, getLastFailedMemoryProvider) { const char *name; }; umf_memory_provider_ops_t provider_ops = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); auto providerUnique1 = wrapProviderUnique( createProviderChecked(&provider_ops, (void *)"provider1")); diff --git a/test/memoryProviderAPI.cpp b/test/memoryProviderAPI.cpp index 866ae6dae3..720f11b413 100644 --- a/test/memoryProviderAPI.cpp +++ b/test/memoryProviderAPI.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023-2024 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // This file contains tests for UMF provider API @@ -89,19 +89,6 @@ TEST_F(test, memoryProviderTrace) { ASSERT_EQ(calls.size(), ++call_count); } -TEST_F(test, memoryProviderOpsNullFreeField) { - umf_memory_provider_ops_t provider_ops = UMF_NULL_PROVIDER_OPS; - provider_ops.ext.free = nullptr; - umf_memory_provider_handle_t hProvider; - auto ret = umfMemoryProviderCreate(&provider_ops, nullptr, &hProvider); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - - ret = umfMemoryProviderFree(hProvider, nullptr, 0); - ASSERT_EQ(ret, UMF_RESULT_ERROR_NOT_SUPPORTED); - - umfMemoryProviderDestroy(hProvider); -} - TEST_F(test, memoryProviderOpsNullPurgeLazyField) { umf_memory_provider_ops_t provider_ops = UMF_NULL_PROVIDER_OPS; provider_ops.ext.purge_lazy = nullptr; @@ -204,6 +191,14 @@ TEST_F(test, memoryProviderOpsNullAllocField) { ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); } +TEST_F(test, memoryProviderOpsNullFreeField) { + umf_memory_provider_ops_t provider_ops = UMF_NULL_PROVIDER_OPS; + provider_ops.free = nullptr; + umf_memory_provider_handle_t hProvider; + auto ret = umfMemoryProviderCreate(&provider_ops, nullptr, &hProvider); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + TEST_F(test, memoryProviderOpsNullGetLastNativeErrorField) { umf_memory_provider_ops_t provider_ops = UMF_NULL_PROVIDER_OPS; provider_ops.get_last_native_error = nullptr; @@ -340,7 +335,7 @@ TEST_P(providerInitializeTest, errorPropagation) { } }; umf_memory_provider_ops_t provider_ops = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); umf_memory_provider_handle_t hProvider; auto ret = umfMemoryProviderCreate(&provider_ops, (void *)&this->GetParam(), diff --git a/test/memspaces/mempolicy.cpp b/test/memspaces/mempolicy.cpp index 97948bfbb7..7b9c4891d3 100644 --- a/test/memspaces/mempolicy.cpp +++ b/test/memspaces/mempolicy.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -7,11 +7,7 @@ #include "provider_os_memory_internal.h" os_memory_provider_t *providerGetPriv(umf_memory_provider_handle_t hProvider) { - // hack to have access to fields in structure defined in memory_provider.c - struct umf_memory_provider_t { - umf_memory_provider_ops_t ops; - void *provider_priv; - } *provider = (struct umf_memory_provider_t *)hProvider; + umf_memory_provider_t *provider = (umf_memory_provider_t *)hProvider; return (os_memory_provider_t *)provider->provider_priv; } diff --git a/test/poolFixtures.hpp b/test/poolFixtures.hpp index e5ec85012c..5f39e021d0 100644 --- a/test/poolFixtures.hpp +++ b/test/poolFixtures.hpp @@ -1,16 +1,10 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #ifndef UMF_TEST_POOL_FIXTURES_HPP #define UMF_TEST_POOL_FIXTURES_HPP 1 -#include "pool.hpp" -#include "provider.hpp" -#include "umf/providers/provider_coarse.h" -#include "umf/providers/provider_devdax_memory.h" -#include "utils/utils_sanitizers.h" - #include #include #include @@ -18,21 +12,39 @@ #include #include +#include +#include +#include + #include "../malloc_compliance_tests.hpp" +#include "pool.hpp" +#include "provider.hpp" +#include "utils/utils_sanitizers.h" + +typedef void *(*pfnPoolParamsCreate)(); +typedef umf_result_t (*pfnPoolParamsDestroy)(void *); + +typedef void *(*pfnProviderParamsCreate)(); +typedef umf_result_t (*pfnProviderParamsDestroy)(void *); using poolCreateExtParams = - std::tuple; + std::tuple; -umf::pool_unique_handle_t poolCreateExtUnique(poolCreateExtParams params) { - auto [pool_ops, pool_params, provider_ops, provider_params, coarse_params] = - params; +umf_test::pool_unique_handle_t poolCreateExtUnique(poolCreateExtParams params) { + auto [pool_ops, poolParamsCreate, poolParamsDestroy, provider_ops, + providerParamsCreate, providerParamsDestroy] = params; umf_memory_provider_handle_t upstream_provider = nullptr; umf_memory_provider_handle_t provider = nullptr; umf_memory_pool_handle_t hPool = nullptr; umf_result_t ret; + void *provider_params = NULL; + if (providerParamsCreate) { + provider_params = providerParamsCreate(); + } ret = umfMemoryProviderCreate(provider_ops, provider_params, &upstream_provider); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); @@ -40,28 +52,28 @@ umf::pool_unique_handle_t poolCreateExtUnique(poolCreateExtParams params) { provider = upstream_provider; - if (coarse_params) { - coarse_memory_provider_params_t *coarse_memory_provider_params = - (coarse_memory_provider_params_t *)coarse_params; - coarse_memory_provider_params->upstream_memory_provider = - upstream_provider; - coarse_memory_provider_params->destroy_upstream_memory_provider = true; - - umf_memory_provider_handle_t coarse_provider = nullptr; - ret = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - coarse_params, &coarse_provider); - EXPECT_EQ(ret, UMF_RESULT_SUCCESS); - EXPECT_NE(coarse_provider, nullptr); - - provider = coarse_provider; + void *pool_params = NULL; + if (poolParamsCreate) { + pool_params = poolParamsCreate(); } + // NOTE: we set the UMF_POOL_CREATE_FLAG_OWN_PROVIDER flag here so the pool + // will destroy the provider when it is destroyed ret = umfPoolCreate(pool_ops, provider, pool_params, UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &hPool); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); EXPECT_NE(hPool, nullptr); - return umf::pool_unique_handle_t(hPool, &umfPoolDestroy); + // we do not need params anymore + if (poolParamsDestroy) { + poolParamsDestroy(pool_params); + } + + if (providerParamsDestroy) { + providerParamsDestroy(provider_params); + } + + return umf_test::pool_unique_handle_t(hPool, &umfPoolDestroy); } struct umfPoolTest : umf_test::test, @@ -74,7 +86,7 @@ struct umfPoolTest : umf_test::test, void TearDown() override { test::TearDown(); } - umf::pool_unique_handle_t pool; + umf_test::pool_unique_handle_t pool; static constexpr int NTHREADS = 5; static constexpr std::array nonAlignedAllocSizes = {5, 7, 23, 55, @@ -94,7 +106,7 @@ struct umfMultiPoolTest : umf_test::test, void TearDown() override { test::TearDown(); } - std::vector pools; + std::vector pools; }; struct umfMemTest @@ -111,7 +123,7 @@ struct umfMemTest void TearDown() override { test::TearDown(); } - umf::pool_unique_handle_t pool; + umf_test::pool_unique_handle_t pool; int expectedRecycledPoolAllocs; }; @@ -124,7 +136,8 @@ TEST_P(umfPoolTest, allocFree) { auto *ptr = umfPoolMalloc(pool.get(), allocSize); ASSERT_NE(ptr, nullptr); std::memset(ptr, 0, allocSize); - umfPoolFree(pool.get(), ptr); + umf_result_t umf_result = umfPoolFree(pool.get(), ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } TEST_P(umfPoolTest, allocFreeNonAlignedSizes) { @@ -132,10 +145,33 @@ TEST_P(umfPoolTest, allocFreeNonAlignedSizes) { auto *ptr = umfPoolMalloc(pool.get(), allocSize); ASSERT_NE(ptr, nullptr); std::memset(ptr, 0, allocSize); - umfPoolFree(pool.get(), ptr); + umf_result_t umf_result = umfPoolFree(pool.get(), ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } } +TEST_P(umfPoolTest, allocFreeAligned) { +// ::aligned_alloc(alignment=4096, size=1) does not work under sanitizers for unknown reason +#if defined(_WIN32) || defined(__SANITIZE_ADDRESS__) || \ + defined(__SANITIZE_THREAD__) + // TODO: implement support for windows + GTEST_SKIP(); +#else + if (!umf_test::isAlignedAllocSupported(pool.get())) { + GTEST_SKIP(); + } + + size_t alignment = 4 * 1024; // 4kB + void *ptr = umfPoolAlignedMalloc(pool.get(), 1, alignment); + ASSERT_NE(ptr, nullptr); + ASSERT_TRUE(reinterpret_cast(ptr) % alignment == 0); + *(reinterpret_cast(ptr)) = (unsigned char)0xFF; + + umf_result_t umf_result = umfPoolFree(pool.get(), ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +#endif +} + TEST_P(umfPoolTest, reallocFree) { if (!umf_test::isReallocSupported(pool.get())) { GTEST_SKIP(); @@ -148,7 +184,8 @@ TEST_P(umfPoolTest, reallocFree) { auto *new_ptr = umfPoolRealloc(pool.get(), ptr, allocSize * multiplier); ASSERT_NE(new_ptr, nullptr); std::memset(new_ptr, 0, allocSize * multiplier); - umfPoolFree(pool.get(), new_ptr); + umf_result_t umf_result = umfPoolFree(pool.get(), new_ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } TEST_P(umfPoolTest, callocFree) { @@ -162,7 +199,8 @@ TEST_P(umfPoolTest, callocFree) { for (size_t i = 0; i < num; ++i) { ASSERT_EQ(((int *)ptr)[i], 0); } - umfPoolFree(pool.get(), ptr); + umf_result_t umf_result = umfPoolFree(pool.get(), ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } void pow2AlignedAllocHelper(umf_memory_pool_handle_t pool) { @@ -183,9 +221,31 @@ void pow2AlignedAllocHelper(umf_memory_pool_handle_t pool) { } for (auto &ptr : allocs) { - umfPoolFree(pool, ptr); + umf_result_t umf_result = umfPoolFree(pool, ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + } + +// ::aligned_alloc(alignment=4096, size=1) does not work under sanitizers for unknown reason +#if !defined(__SANITIZE_ADDRESS__) && !defined(__SANITIZE_THREAD__) + // the same for size = 1 + for (size_t alignment = 1; alignment <= maxAlignment; alignment <<= 1) { + std::vector allocs; + + for (size_t alloc = 0; alloc < numAllocs; alloc++) { + auto *ptr = umfPoolAlignedMalloc(pool, 1, alignment); + ASSERT_NE(ptr, nullptr); + ASSERT_TRUE(reinterpret_cast(ptr) % alignment == 0); + *(reinterpret_cast(ptr)) = (unsigned char)0xFF; + allocs.push_back(ptr); + } + + for (auto &ptr : allocs) { + umf_result_t umf_result = umfPoolFree(pool, ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } } +#endif } TEST_P(umfPoolTest, pow2AlignedAlloc) { @@ -215,7 +275,8 @@ TEST_P(umfPoolTest, multiThreadedMallocFree) { } for (auto allocation : allocations) { - umfPoolFree(inPool, allocation); + umf_result_t umf_result = umfPoolFree(inPool, allocation); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } }; @@ -268,7 +329,8 @@ TEST_P(umfPoolTest, multiThreadedReallocFree) { for (auto allocation : allocations) { auto *ptr = umfPoolRealloc(inPool, allocation, allocSize * multiplier); - umfPoolFree(inPool, ptr); + umf_result_t umf_result = umfPoolFree(inPool, ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } }; @@ -298,7 +360,8 @@ TEST_P(umfPoolTest, multiThreadedCallocFree) { } for (auto allocation : allocations) { - umfPoolFree(inPool, allocation); + umf_result_t umf_result = umfPoolFree(inPool, allocation); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } }; @@ -323,7 +386,8 @@ TEST_P(umfPoolTest, multiThreadedMallocFreeRandomSizes) { } for (auto allocation : allocations) { - umfPoolFree(inPool, allocation); + umf_result_t umf_result = umfPoolFree(inPool, allocation); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } }; @@ -363,7 +427,8 @@ TEST_P(umfMemTest, outOfMem) { ASSERT_NE(allocations.back(), nullptr); for (int i = 0; i < expectedRecycledPoolAllocs; i++) { - umfPoolFree(hPool, allocations.back()); + umf_result_t umf_result = umfPoolFree(hPool, allocations.back()); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); allocations.pop_back(); } @@ -373,7 +438,8 @@ TEST_P(umfMemTest, outOfMem) { } for (auto allocation : allocations) { - umfPoolFree(hPool, allocation); + umf_result_t umf_result = umfPoolFree(hPool, allocation); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } } @@ -442,26 +508,183 @@ TEST_P(umfPoolTest, allocMaxSize) { } TEST_P(umfPoolTest, mallocUsableSize) { + [[maybe_unused]] auto pool_ops = std::get<0>(this->GetParam()); +#ifdef _WIN32 + if (pool_ops == &umf_test::MALLOC_POOL_OPS) { + GTEST_SKIP() + << "Windows Malloc Pool does not support umfPoolAlignedMalloc"; + } +#endif + if (!umf_test::isAlignedAllocSupported(pool.get())) { + GTEST_SKIP(); + } #ifdef __SANITIZE_ADDRESS__ - // Sanitizer replaces malloc_usable_size implementation with its own - GTEST_SKIP() - << "This test is invalid with AddressSanitizer instrumentation"; -#else + if (pool_ops == &umf_test::MALLOC_POOL_OPS) { + // Sanitizer replaces malloc_usable_size implementation with its own + GTEST_SKIP() + << "This test is invalid with AddressSanitizer instrumentation"; + } +#endif + for (size_t allocSize : + {32, 64, 1 << 6, 1 << 10, 1 << 13, 1 << 16, 1 << 19}) { + for (size_t alignment : {0, 1 << 6, 1 << 8, 1 << 12}) { + if (alignment >= allocSize) { + continue; + } + void *ptr = nullptr; + if (alignment == 0) { + ptr = umfPoolMalloc(pool.get(), allocSize); + } else { + ptr = umfPoolAlignedMalloc(pool.get(), allocSize, alignment); + } + ASSERT_NE(ptr, nullptr); + size_t result = umfPoolMallocUsableSize(pool.get(), ptr); + ASSERT_TRUE(result == 0 || result >= allocSize); - for (size_t allocSize : {32, 48, 1024, 8192}) { - char *ptr = static_cast(umfPoolMalloc(pool.get(), allocSize)); - ASSERT_NE(ptr, nullptr); - size_t result = umfPoolMallocUsableSize(pool.get(), ptr); - ASSERT_TRUE(result == 0 || result >= allocSize); + // Make sure we can write to this memory + memset(ptr, 123, result); - // Make sure we can write to this memory - for (size_t i = 0; i < result; i++) { - ptr[i] = 123; + umf_result_t umf_result = umfPoolFree(pool.get(), ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } + } +} - umfPoolFree(pool.get(), ptr); +TEST_P(umfPoolTest, umfPoolAlignedMalloc) { +#ifdef _WIN32 + // TODO: implement support for windows + GTEST_SKIP() << "umfPoolAlignedMalloc() is not supported on Windows"; +#else /* !_WIN32 */ + umf_result_t umf_result; + void *ptr = nullptr; + const size_t size = 2 * 1024 * 1024; // 2MB + + umf_memory_pool_handle_t pool_get = pool.get(); + + if (!umf_test::isAlignedAllocSupported(pool_get)) { + GTEST_SKIP(); } -#endif + + ptr = umfPoolAlignedMalloc(pool_get, size, utils_get_page_size()); + ASSERT_NE(ptr, nullptr); + + umf_result = umfPoolFree(pool_get, ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +#endif /* !_WIN32 */ +} + +TEST_P(umfPoolTest, pool_from_ptr_whole_size_success) { +#ifdef _WIN32 + // TODO: implement support for windows + GTEST_SKIP() << "umfPoolAlignedMalloc() is not supported on Windows"; +#else /* !_WIN32 */ + umf_result_t umf_result; + size_t size_of_pool_from_ptr; + void *ptr_for_pool = nullptr; + void *ptr = nullptr; + + umf_memory_pool_handle_t pool_get = pool.get(); + const size_t size_of_first_alloc = 2 * 1024 * 1024; // 2MB + + if (!umf_test::isAlignedAllocSupported(pool_get)) { + GTEST_SKIP(); + } + + ptr_for_pool = umfPoolAlignedMalloc(pool_get, size_of_first_alloc, + utils_get_page_size()); + ASSERT_NE(ptr_for_pool, nullptr); + + // Create provider parameters + size_of_pool_from_ptr = size_of_first_alloc; // whole size + umf_fixed_memory_provider_params_handle_t params = nullptr; + umf_result = umfFixedMemoryProviderParamsCreate(¶ms, ptr_for_pool, + size_of_pool_from_ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(params, nullptr); + + umf_memory_provider_handle_t providerFromPtr = nullptr; + umf_result = umfMemoryProviderCreate(umfFixedMemoryProviderOps(), params, + &providerFromPtr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(providerFromPtr, nullptr); + + umf_memory_pool_handle_t poolFromPtr = nullptr; + umf_result = umfPoolCreate(umfProxyPoolOps(), providerFromPtr, nullptr, 0, + &poolFromPtr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ptr = umfPoolMalloc(poolFromPtr, size_of_pool_from_ptr); + ASSERT_NE(ptr, nullptr); + + memset(ptr, 0xFF, size_of_pool_from_ptr); + + umf_result = umfPoolFree(poolFromPtr, ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfPoolDestroy(poolFromPtr); + umfMemoryProviderDestroy(providerFromPtr); + umfFixedMemoryProviderParamsDestroy(params); + + umf_result = umfPoolFree(pool_get, ptr_for_pool); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +#endif /* !_WIN32 */ +} + +TEST_P(umfPoolTest, pool_from_ptr_half_size_success) { +#ifdef _WIN32 + // TODO: implement support for windows + GTEST_SKIP() << "umfPoolAlignedMalloc() is not supported on Windows"; +#else /* !_WIN32 */ + umf_result_t umf_result; + size_t size_of_pool_from_ptr; + void *ptr_for_pool = nullptr; + void *ptr = nullptr; + + umf_memory_pool_handle_t pool_get = pool.get(); + const size_t size_of_first_alloc = 2 * 1024 * 1024; // 2MB + + if (!umf_test::isAlignedAllocSupported(pool_get)) { + GTEST_SKIP(); + } + + ptr_for_pool = umfPoolAlignedMalloc(pool_get, size_of_first_alloc, + utils_get_page_size()); + ASSERT_NE(ptr_for_pool, nullptr); + + // Create provider parameters + size_of_pool_from_ptr = size_of_first_alloc / 2; // half size + umf_fixed_memory_provider_params_handle_t params = nullptr; + umf_result = umfFixedMemoryProviderParamsCreate(¶ms, ptr_for_pool, + size_of_pool_from_ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(params, nullptr); + + umf_memory_provider_handle_t providerFromPtr = nullptr; + umf_result = umfMemoryProviderCreate(umfFixedMemoryProviderOps(), params, + &providerFromPtr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(providerFromPtr, nullptr); + + umf_memory_pool_handle_t poolFromPtr = nullptr; + umf_result = umfPoolCreate(umfProxyPoolOps(), providerFromPtr, nullptr, 0, + &poolFromPtr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ptr = umfPoolMalloc(poolFromPtr, size_of_pool_from_ptr); + ASSERT_NE(ptr, nullptr); + + memset(ptr, 0xFF, size_of_pool_from_ptr); + + umf_result = umfPoolFree(poolFromPtr, ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfPoolDestroy(poolFromPtr); + umfMemoryProviderDestroy(providerFromPtr); + umfFixedMemoryProviderParamsDestroy(params); + + umf_result = umfPoolFree(pool_get, ptr_for_pool); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +#endif /* !_WIN32 */ } #endif /* UMF_TEST_POOL_FIXTURES_HPP */ diff --git a/test/pools/disjoint_pool.cpp b/test/pools/disjoint_pool.cpp index 319997c823..9bdef4f131 100644 --- a/test/pools/disjoint_pool.cpp +++ b/test/pools/disjoint_pool.cpp @@ -1,76 +1,157 @@ -// Copyright (C) 2023-2024 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include +#include + #include "pool.hpp" +#include "pool/pool_disjoint_internal.h" #include "poolFixtures.hpp" -#include "pool_disjoint.h" #include "provider.hpp" #include "provider_null.h" #include "provider_trace.h" -using disjoint_params_unique_handle_t = - std::unique_ptr; +using umf_test::test; +using namespace umf_test; -static constexpr size_t DEFAULT_DISJOINT_SLAB_MIN_SIZE = 4096; -static constexpr size_t DEFAULT_DISJOINT_MAX_POOLABLE_SIZE = 4096; -static constexpr size_t DEFAULT_DISJOINT_CAPACITY = 4; -static constexpr size_t DEFAULT_DISJOINT_MIN_BUCKET_SIZE = 64; +TEST_F(test, internals) { + static umf_result_t expectedResult = UMF_RESULT_SUCCESS; + struct memory_provider : public umf_test::provider_base_t { + umf_result_t alloc(size_t size, size_t alignment, void **ptr) noexcept { + *ptr = umf_ba_global_aligned_alloc(size, alignment); + return UMF_RESULT_SUCCESS; + } -disjoint_params_unique_handle_t poolConfig() { - umf_disjoint_pool_params_handle_t config = nullptr; - umf_result_t res = umfDisjointPoolParamsCreate(&config); - if (res != UMF_RESULT_SUCCESS) { - throw std::runtime_error("Failed to create pool params"); - } - res = umfDisjointPoolParamsSetSlabMinSize(config, - DEFAULT_DISJOINT_SLAB_MIN_SIZE); - if (res != UMF_RESULT_SUCCESS) { - throw std::runtime_error("Failed to set slab min size"); - } - res = umfDisjointPoolParamsSetMaxPoolableSize( - config, DEFAULT_DISJOINT_MAX_POOLABLE_SIZE); - if (res != UMF_RESULT_SUCCESS) { - throw std::runtime_error("Failed to set max poolable size"); - } - res = umfDisjointPoolParamsSetCapacity(config, DEFAULT_DISJOINT_CAPACITY); - if (res != UMF_RESULT_SUCCESS) { - throw std::runtime_error("Failed to set capacity"); - } - res = umfDisjointPoolParamsSetMinBucketSize( - config, DEFAULT_DISJOINT_MIN_BUCKET_SIZE); - if (res != UMF_RESULT_SUCCESS) { - throw std::runtime_error("Failed to set min bucket size"); + umf_result_t free(void *ptr, [[maybe_unused]] size_t size) noexcept { + // do the actual free only when we expect the success + if (expectedResult == UMF_RESULT_SUCCESS) { + umf_ba_global_free(ptr); + } + return expectedResult; + } + + umf_result_t + get_min_page_size([[maybe_unused]] void *ptr, + [[maybe_unused]] size_t *pageSize) noexcept { + *pageSize = 1024; + return UMF_RESULT_SUCCESS; + } + }; + umf_memory_provider_ops_t provider_ops = + umf_test::providerMakeCOps(); + + auto providerUnique = + wrapProviderUnique(createProviderChecked(&provider_ops, nullptr)); + + umf_memory_provider_handle_t provider_handle; + provider_handle = providerUnique.get(); + + umf_disjoint_pool_params_handle_t params = + (umf_disjoint_pool_params_handle_t)defaultDisjointPoolConfig(); + // set to maximum tracing + params->pool_trace = 3; + params->max_poolable_size = 1024 * 1024; + + // in "internals" test we use ops interface to directly manipulate the pool + // structure + umf_memory_pool_ops_t *ops = umfDisjointPoolOps(); + EXPECT_NE(ops, nullptr); + + disjoint_pool_t *pool; + umf_result_t res = ops->initialize(provider_handle, params, (void **)&pool); + EXPECT_EQ(res, UMF_RESULT_SUCCESS); + EXPECT_NE(pool, nullptr); + EXPECT_EQ(pool->provider_min_page_size, 1024); + + // check buckets sizes + size_t expected_size = DEFAULT_DISJOINT_MIN_BUCKET_SIZE; + EXPECT_EQ(pool->buckets[0]->size, expected_size); + EXPECT_EQ(pool->buckets[pool->buckets_num - 1]->size, + (size_t)1 << 31); // 2GB + for (size_t i = 0; i < pool->buckets_num; i++) { + bucket_t *bucket = pool->buckets[i]; + EXPECT_NE(bucket, nullptr); + EXPECT_EQ(bucket->size, expected_size); + + // assuming DEFAULT_DISJOINT_MIN_BUCKET_SIZE = 64, expected bucket + // sizes are: 64, 96, 128, 192, 256, ..., 2GB + if (i % 2 == 0) { + expected_size += expected_size / 2; + } else { + expected_size = DEFAULT_DISJOINT_MIN_BUCKET_SIZE << ((i + 1) / 2); + } } - return disjoint_params_unique_handle_t(config, - &umfDisjointPoolParamsDestroy); + // test small allocations + size_t size = 8; + void *ptr = ops->malloc(pool, size); + EXPECT_NE(ptr, nullptr); + + // get bucket - because of small size this should be the first bucket in + // the pool + bucket_t *bucket = pool->buckets[0]; + EXPECT_NE(bucket, nullptr); + + // check bucket stats + EXPECT_EQ(bucket->alloc_count, 1); + + // first allocation will always use external memory (newly added to the + // pool) and this is counted as allocation from the outside of the pool + EXPECT_EQ(bucket->alloc_pool_count, 0); + EXPECT_EQ(bucket->curr_slabs_in_use, 1); + + // check slab - there should be only single slab allocated + EXPECT_NE(bucket->available_slabs, nullptr); + EXPECT_EQ(bucket->available_slabs_num, 1); + EXPECT_EQ(bucket->available_slabs->next, nullptr); + slab_t *slab = bucket->available_slabs->val; + + // check slab stats + EXPECT_GE(slab->slab_size, params->slab_min_size); + EXPECT_GE(slab->num_chunks_total, slab->slab_size / bucket->size); + + // check allocation in slab + EXPECT_EQ(slab_read_chunk_bit(slab, 0), false); + EXPECT_EQ(slab_read_chunk_bit(slab, 1), true); + + // TODO: + // * multiple alloc + free from single bucket + // * alignments + // * full slab alloc + // * slab overflow + // * chunked slabs + // * multiple alloc + free from different buckets + // * alloc something outside pool (> MaxPoolableSize) + // * test capacity + // * check minBucketSize + // * test large objects + // * check available_slabs_num + + // cleanup + ops->finalize(pool); + umfDisjointPoolParamsDestroy(params); } -using umf_test::test; -using namespace umf_test; - TEST_F(test, freeErrorPropagation) { static umf_result_t expectedResult = UMF_RESULT_SUCCESS; struct memory_provider : public umf_test::provider_base_t { - umf_result_t alloc(size_t size, size_t, void **ptr) noexcept { - *ptr = malloc(size); + umf_result_t alloc(size_t size, size_t alignment, void **ptr) noexcept { + *ptr = umf_ba_global_aligned_alloc(size, alignment); return UMF_RESULT_SUCCESS; } umf_result_t free(void *ptr, [[maybe_unused]] size_t size) noexcept { // do the actual free only when we expect the success if (expectedResult == UMF_RESULT_SUCCESS) { - ::free(ptr); + umf_ba_global_free(ptr); } return expectedResult; } }; umf_memory_provider_ops_t provider_ops = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); auto providerUnique = wrapProviderUnique(createProviderChecked(&provider_ops, nullptr)); @@ -79,17 +160,21 @@ TEST_F(test, freeErrorPropagation) { provider_handle = providerUnique.get(); // force all allocations to go to memory provider - disjoint_params_unique_handle_t params = poolConfig(); - umf_result_t retp = - umfDisjointPoolParamsSetMaxPoolableSize(params.get(), 0); + umf_disjoint_pool_params_handle_t params; + umf_result_t retp = umfDisjointPoolParamsCreate(¶ms); + EXPECT_EQ(retp, UMF_RESULT_SUCCESS); + retp = umfDisjointPoolParamsSetMaxPoolableSize(params, 0); EXPECT_EQ(retp, UMF_RESULT_SUCCESS); umf_memory_pool_handle_t pool = NULL; - retp = umfPoolCreate(umfDisjointPoolOps(), provider_handle, params.get(), 0, - &pool); + retp = + umfPoolCreate(umfDisjointPoolOps(), provider_handle, params, 0, &pool); EXPECT_EQ(retp, UMF_RESULT_SUCCESS); auto poolHandle = umf_test::wrapPoolUnique(pool); + retp = umfDisjointPoolParamsDestroy(params); + EXPECT_EQ(retp, UMF_RESULT_SUCCESS); + static constexpr size_t size = 1024; void *ptr = umfPoolMalloc(pool, size); @@ -109,26 +194,26 @@ TEST_F(test, sharedLimits) { static size_t numFrees = 0; struct memory_provider : public umf_test::provider_base_t { - umf_result_t alloc(size_t size, size_t, void **ptr) noexcept { - *ptr = malloc(size); + umf_result_t alloc(size_t size, size_t alignment, void **ptr) noexcept { + *ptr = umf_ba_global_aligned_alloc(size, alignment); numAllocs++; return UMF_RESULT_SUCCESS; } umf_result_t free(void *ptr, [[maybe_unused]] size_t size) noexcept { - ::free(ptr); + umf_ba_global_free(ptr); numFrees++; return UMF_RESULT_SUCCESS; } }; umf_memory_provider_ops_t provider_ops = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); static constexpr size_t SlabMinSize = 1024; static constexpr size_t MaxSize = 4 * SlabMinSize; - disjoint_params_unique_handle_t config = poolConfig(); - umf_result_t ret = - umfDisjointPoolParamsSetSlabMinSize(config.get(), SlabMinSize); + umf_disjoint_pool_params_handle_t params = + (umf_disjoint_pool_params_handle_t)defaultDisjointPoolConfig(); + umf_result_t ret = umfDisjointPoolParamsSetSlabMinSize(params, SlabMinSize); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); auto limits = @@ -137,7 +222,7 @@ TEST_F(test, sharedLimits) { umfDisjointPoolSharedLimitsCreate(MaxSize), &umfDisjointPoolSharedLimitsDestroy); - ret = umfDisjointPoolParamsSetSharedLimits(config.get(), limits.get()); + ret = umfDisjointPoolParamsSetSharedLimits(params, limits.get()); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); auto provider = @@ -145,16 +230,19 @@ TEST_F(test, sharedLimits) { umf_memory_pool_handle_t pool1 = NULL; umf_memory_pool_handle_t pool2 = NULL; - ret = umfPoolCreate(umfDisjointPoolOps(), provider.get(), - (void *)config.get(), 0, &pool1); + ret = + umfPoolCreate(umfDisjointPoolOps(), provider.get(), params, 0, &pool1); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); auto poolHandle1 = umf_test::wrapPoolUnique(pool1); - ret = umfPoolCreate(umfDisjointPoolOps(), provider.get(), - (void *)config.get(), 0, &pool2); + ret = + umfPoolCreate(umfDisjointPoolOps(), provider.get(), params, 0, &pool2); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); auto poolHandle2 = umf_test::wrapPoolUnique(pool2); + ret = umfDisjointPoolParamsDestroy(params); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + EXPECT_EQ(0, numAllocs); EXPECT_EQ(0, numFrees); @@ -239,24 +327,25 @@ TEST_F(test, disjointPoolInvalidBucketSize) { umfDisjointPoolParamsDestroy(params); } -disjoint_params_unique_handle_t defaultPoolConfig = poolConfig(); INSTANTIATE_TEST_SUITE_P(disjointPoolTests, umfPoolTest, ::testing::Values(poolCreateExtParams{ - umfDisjointPoolOps(), - (void *)defaultPoolConfig.get(), + umfDisjointPoolOps(), defaultDisjointPoolConfig, + defaultDisjointPoolConfigDestroy, &BA_GLOBAL_PROVIDER_OPS, nullptr, nullptr})); -INSTANTIATE_TEST_SUITE_P(disjointPoolTests, umfMemTest, - ::testing::Values(std::make_tuple( - poolCreateExtParams{ - umfDisjointPoolOps(), - (void *)defaultPoolConfig.get(), - &MOCK_OUT_OF_MEM_PROVIDER_OPS, - (void *)&DEFAULT_DISJOINT_CAPACITY, nullptr}, - static_cast(DEFAULT_DISJOINT_CAPACITY) / 2))); +void *memProviderParams() { return (void *)&DEFAULT_DISJOINT_CAPACITY; } + +INSTANTIATE_TEST_SUITE_P( + disjointPoolTests, umfMemTest, + ::testing::Values(std::make_tuple( + poolCreateExtParams{umfDisjointPoolOps(), defaultDisjointPoolConfig, + defaultDisjointPoolConfigDestroy, + &MOCK_OUT_OF_MEM_PROVIDER_OPS, memProviderParams, + nullptr}, + static_cast(DEFAULT_DISJOINT_CAPACITY) / 2))); INSTANTIATE_TEST_SUITE_P(disjointMultiPoolTests, umfMultiPoolTest, ::testing::Values(poolCreateExtParams{ - umfDisjointPoolOps(), - (void *)defaultPoolConfig.get(), + umfDisjointPoolOps(), defaultDisjointPoolConfig, + defaultDisjointPoolConfigDestroy, &BA_GLOBAL_PROVIDER_OPS, nullptr, nullptr})); diff --git a/test/pools/jemalloc_coarse_devdax.cpp b/test/pools/jemalloc_coarse_devdax.cpp index 350e053ab7..53d2a41b37 100644 --- a/test/pools/jemalloc_coarse_devdax.cpp +++ b/test/pools/jemalloc_coarse_devdax.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -7,18 +7,20 @@ #include "pool_coarse.hpp" -using devdax_params_unique_handle_t = - std::unique_ptr; - -devdax_params_unique_handle_t create_devdax_params() { +bool devDaxEnvSet() { char *path = getenv("UMF_TESTS_DEVDAX_PATH"); char *size = getenv("UMF_TESTS_DEVDAX_SIZE"); if (path == nullptr || path[0] == 0 || size == nullptr || size[0] == 0) { - return devdax_params_unique_handle_t( - nullptr, &umfDevDaxMemoryProviderParamsDestroy); + return false; } + return true; +} + +void *createDevDaxParams() { + char *path = getenv("UMF_TESTS_DEVDAX_PATH"); + char *size = getenv("UMF_TESTS_DEVDAX_SIZE"); + umf_devdax_memory_provider_params_handle_t params = NULL; umf_result_t res = umfDevDaxMemoryProviderParamsCreate(¶ms, path, atol(size)); @@ -27,18 +29,15 @@ devdax_params_unique_handle_t create_devdax_params() { "Failed to create DevDax Memory Provider params"); } - return devdax_params_unique_handle_t(params, - &umfDevDaxMemoryProviderParamsDestroy); + return params; } -auto coarseParams = umfCoarseMemoryProviderParamsDefault(); -auto devdaxParams = create_devdax_params(); - static std::vector poolParamsList = - devdaxParams.get() + devDaxEnvSet() ? std::vector{poolCreateExtParams{ - umfJemallocPoolOps(), nullptr, umfDevDaxMemoryProviderOps(), - devdaxParams.get(), &coarseParams}} + umfJemallocPoolOps(), nullptr, nullptr, + umfDevDaxMemoryProviderOps(), createDevDaxParams, + (pfnProviderParamsDestroy)umfDevDaxMemoryProviderParamsDestroy}} : std::vector{}; INSTANTIATE_TEST_SUITE_P(jemallocCoarseDevDaxTest, umfPoolTest, diff --git a/test/pools/jemalloc_coarse_file.cpp b/test/pools/jemalloc_coarse_file.cpp index 74ad36d56e..dcd03898ee 100644 --- a/test/pools/jemalloc_coarse_file.cpp +++ b/test/pools/jemalloc_coarse_file.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -7,27 +7,25 @@ #include "pool_coarse.hpp" -using file_params_unique_handle_t = - std::unique_ptr; - -file_params_unique_handle_t get_file_params_default(char *path) { +void *getFileParamsDefault() { umf_file_memory_provider_params_handle_t file_params = NULL; - umf_result_t res = umfFileMemoryProviderParamsCreate(&file_params, path); + umf_result_t res = + umfFileMemoryProviderParamsCreate(&file_params, FILE_PATH); if (res != UMF_RESULT_SUCCESS) { throw std::runtime_error( "Failed to create File Memory Provider params"); } - return file_params_unique_handle_t(file_params, - &umfFileMemoryProviderParamsDestroy); + return file_params; } -auto coarseParams = umfCoarseMemoryProviderParamsDefault(); -file_params_unique_handle_t fileParams = get_file_params_default(FILE_PATH); +umf_result_t destroyFileParams(void *params) { + return umfFileMemoryProviderParamsDestroy( + (umf_file_memory_provider_params_handle_t)params); +} INSTANTIATE_TEST_SUITE_P(jemallocCoarseFileTest, umfPoolTest, ::testing::Values(poolCreateExtParams{ - umfJemallocPoolOps(), nullptr, - umfFileMemoryProviderOps(), fileParams.get(), - &coarseParams})); + umfJemallocPoolOps(), nullptr, nullptr, + umfFileMemoryProviderOps(), getFileParamsDefault, + destroyFileParams})); diff --git a/test/pools/jemalloc_pool.cpp b/test/pools/jemalloc_pool.cpp index 4dddbcd32b..8112f36bf4 100644 --- a/test/pools/jemalloc_pool.cpp +++ b/test/pools/jemalloc_pool.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -11,26 +11,60 @@ using umf_test::test; using namespace umf_test; -using os_params_unique_handle_t = - std::unique_ptr; +using void_unique_ptr = std::unique_ptr; -os_params_unique_handle_t createOsMemoryProviderParams() { +void *createOsMemoryProviderParams() { umf_os_memory_provider_params_handle_t params = nullptr; umf_result_t res = umfOsMemoryProviderParamsCreate(¶ms); if (res != UMF_RESULT_SUCCESS) { throw std::runtime_error("Failed to create os memory provider params"); } - return os_params_unique_handle_t(params, &umfOsMemoryProviderParamsDestroy); + return params; } -auto defaultParams = createOsMemoryProviderParams(); -INSTANTIATE_TEST_SUITE_P(jemallocPoolTest, umfPoolTest, - ::testing::Values(poolCreateExtParams{ - umfJemallocPoolOps(), nullptr, - umfOsMemoryProviderOps(), defaultParams.get(), - nullptr})); +umf_result_t destroyOsMemoryProviderParams(void *params) { + return umfOsMemoryProviderParamsDestroy( + (umf_os_memory_provider_params_handle_t)params); +} + +void *createFixedMemoryProviderParams() { + // Allocate a memory buffer to use with the fixed memory provider. + // The umfPoolTest.malloc_compliance test requires a lot of memory. + size_t memory_size = (1UL << 31); + static void_unique_ptr memory_buffer = + void_unique_ptr(malloc(memory_size), free); + if (memory_buffer.get() == NULL) { + throw std::runtime_error( + "Failed to allocate memory for Fixed memory provider"); + } + + umf_fixed_memory_provider_params_handle_t params = nullptr; + umf_result_t res = umfFixedMemoryProviderParamsCreate( + ¶ms, memory_buffer.get(), memory_size); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error( + "Failed to create Fixed memory provider params"); + } + + return params; +} + +umf_result_t destroyFixedMemoryProviderParams(void *params) { + return umfFixedMemoryProviderParamsDestroy( + (umf_fixed_memory_provider_params_handle_t)params); +} + +INSTANTIATE_TEST_SUITE_P( + jemallocPoolTest, umfPoolTest, + ::testing::Values(poolCreateExtParams{umfJemallocPoolOps(), nullptr, + nullptr, umfOsMemoryProviderOps(), + createOsMemoryProviderParams, + destroyOsMemoryProviderParams}, + poolCreateExtParams{umfJemallocPoolOps(), nullptr, + nullptr, umfFixedMemoryProviderOps(), + createFixedMemoryProviderParams, + destroyFixedMemoryProviderParams})); // this test makes sure that jemalloc does not use // memory provider to allocate metadata (and hence @@ -42,142 +76,46 @@ TEST_F(test, metadataNotAllocatedUsingProvider) { // set coarse grain allocations to PROT_NONE so that we can be sure // jemalloc does not touch any of the allocated memory - umf_os_memory_provider_params_handle_t params = nullptr; - umf_result_t res = umfOsMemoryProviderParamsCreate(¶ms); - ASSERT_EQ(res, UMF_RESULT_SUCCESS); - res = umfOsMemoryProviderParamsSetProtection(params, UMF_PROTECTION_NONE); - ASSERT_EQ(res, UMF_RESULT_SUCCESS); - - auto pool = - poolCreateExtUnique({umfJemallocPoolOps(), nullptr, - umfOsMemoryProviderOps(), params, nullptr}); - - res = umfOsMemoryProviderParamsDestroy(params); - ASSERT_EQ(res, UMF_RESULT_SUCCESS); - std::vector> allocs; - for (size_t i = 0; i < numAllocs; i++) { - allocs.emplace_back( - umfPoolMalloc(pool.get(), allocSize), - [pool = pool.get()](void *ptr) { umfPoolFree(pool, ptr); }); - } -} - -using jemallocPoolParams = bool; -struct umfJemallocPoolParamsTest - : umf_test::test, - ::testing::WithParamInterface { - - struct validation_params_t { - bool keep_all_memory; - }; - - struct provider_validator : public umf_test::provider_ba_global { - using base_provider = umf_test::provider_ba_global; - - umf_result_t initialize(validation_params_t *params) { - EXPECT_NE(params, nullptr); - expected_params = params; - return UMF_RESULT_SUCCESS; + auto providerParamsCreate = []() { + umf_os_memory_provider_params_handle_t params = nullptr; + umf_result_t res = umfOsMemoryProviderParamsCreate(¶ms); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error( + "Failed to create OS Memory Provider params"); } - umf_result_t free(void *ptr, size_t size) { - EXPECT_EQ(expected_params->keep_all_memory, false); - return base_provider::free(ptr, size); + res = + umfOsMemoryProviderParamsSetProtection(params, UMF_PROTECTION_NONE); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error( + "Failed to set OS Memory Provider params protection"); } - - validation_params_t *expected_params; + return (void *)params; }; - static constexpr umf_memory_provider_ops_t VALIDATOR_PROVIDER_OPS = - umf::providerMakeCOps(); - - umfJemallocPoolParamsTest() : expected_params{false}, params(nullptr) {} - void SetUp() override { - test::SetUp(); - expected_params.keep_all_memory = this->GetParam(); - umf_result_t ret = umfJemallocPoolParamsCreate(¶ms); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - ret = umfJemallocPoolParamsSetKeepAllMemory( - params, expected_params.keep_all_memory); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - } - - void TearDown() override { - umfJemallocPoolParamsDestroy(params); - test::TearDown(); - } - - umf::pool_unique_handle_t makePool() { - umf_memory_provider_handle_t hProvider = nullptr; - umf_memory_pool_handle_t hPool = nullptr; - - auto ret = umfMemoryProviderCreate(&VALIDATOR_PROVIDER_OPS, - &expected_params, &hProvider); - EXPECT_EQ(ret, UMF_RESULT_SUCCESS); - - ret = umfPoolCreate(umfJemallocPoolOps(), hProvider, params, - UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &hPool); - EXPECT_EQ(ret, UMF_RESULT_SUCCESS); - - return umf::pool_unique_handle_t(hPool, &umfPoolDestroy); - } - - void allocFreeFlow() { - static const size_t ALLOC_SIZE = 128; - static const size_t NUM_ALLOCATIONS = 100; - std::vector ptrs; - - auto pool = makePool(); - ASSERT_NE(pool, nullptr); - - for (size_t i = 0; i < NUM_ALLOCATIONS; ++i) { - auto *ptr = umfPoolMalloc(pool.get(), ALLOC_SIZE); - ASSERT_NE(ptr, nullptr); - ptrs.push_back(ptr); + auto providerParamsDestroy = [](void *params) { + umf_result_t res = umfOsMemoryProviderParamsDestroy( + (umf_os_memory_provider_params_handle_t)params); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error( + "Failed to destroy OS Memory Provider params"); } + return res; + }; - for (size_t i = 0; i < NUM_ALLOCATIONS; ++i) { - auto ret = umfPoolFree(pool.get(), ptrs[i]); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - } + auto pool = poolCreateExtUnique({ + umfJemallocPoolOps(), + nullptr, + nullptr, + umfOsMemoryProviderOps(), + (pfnProviderParamsCreate)providerParamsCreate, + (pfnProviderParamsDestroy)providerParamsDestroy, + }); - // Now pool can call free during pool destruction - expected_params.keep_all_memory = false; + std::vector> allocs; + for (size_t i = 0; i < numAllocs; i++) { + allocs.emplace_back( + umfPoolMalloc(pool.get(), allocSize), + [pool = pool.get()](void *ptr) { umfPoolFree(pool, ptr); }); } - - validation_params_t expected_params; - umf_jemalloc_pool_params_handle_t params; -}; - -TEST_P(umfJemallocPoolParamsTest, allocFree) { allocFreeFlow(); } - -TEST_P(umfJemallocPoolParamsTest, updateParams) { - expected_params.keep_all_memory = !expected_params.keep_all_memory; - umf_result_t ret = umfJemallocPoolParamsSetKeepAllMemory( - params, expected_params.keep_all_memory); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - - allocFreeFlow(); } - -TEST_P(umfJemallocPoolParamsTest, invalidParams) { - umf_result_t ret = umfJemallocPoolParamsCreate(nullptr); - ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - ret = umfJemallocPoolParamsSetKeepAllMemory(nullptr, true); - ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - ret = umfJemallocPoolParamsSetKeepAllMemory(nullptr, false); - ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - ret = umfJemallocPoolParamsDestroy(nullptr); - ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); -} - -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(umfJemallocPoolParamsTest); - -/* TODO: enable this test after the issue #903 is fixed. -(https://github.com/oneapi-src/unified-memory-framework/issues/903) -INSTANTIATE_TEST_SUITE_P(jemallocPoolTest, umfJemallocPoolParamsTest, - testing::Values(false, true)); -*/ diff --git a/test/pools/pool_base_alloc.cpp b/test/pools/pool_base_alloc.cpp index 7c9a3701a7..441ab37ec3 100644 --- a/test/pools/pool_base_alloc.cpp +++ b/test/pools/pool_base_alloc.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -17,17 +17,17 @@ struct base_alloc_pool : public umf_test::pool_base_t { void *malloc(size_t size) noexcept { return umf_ba_global_alloc(size); } void *calloc(size_t, size_t) noexcept { - umf::getPoolLastStatusRef() = + umf_test::getPoolLastStatusRef() = UMF_RESULT_ERROR_NOT_SUPPORTED; return NULL; } void *realloc(void *, size_t) noexcept { - umf::getPoolLastStatusRef() = + umf_test::getPoolLastStatusRef() = UMF_RESULT_ERROR_NOT_SUPPORTED; return NULL; } void *aligned_malloc(size_t, size_t) noexcept { - umf::getPoolLastStatusRef() = + umf_test::getPoolLastStatusRef() = UMF_RESULT_ERROR_NOT_SUPPORTED; return NULL; } @@ -39,13 +39,14 @@ struct base_alloc_pool : public umf_test::pool_base_t { return UMF_RESULT_SUCCESS; } umf_result_t get_last_allocation_error() { - return umf::getPoolLastStatusRef(); + return umf_test::getPoolLastStatusRef(); } }; -umf_memory_pool_ops_t BA_POOL_OPS = umf::poolMakeCOps(); +umf_memory_pool_ops_t BA_POOL_OPS = + umf_test::poolMakeCOps(); INSTANTIATE_TEST_SUITE_P(baPool, umfPoolTest, ::testing::Values(poolCreateExtParams{ - &BA_POOL_OPS, nullptr, + &BA_POOL_OPS, nullptr, nullptr, &umf_test::BASE_PROVIDER_OPS, nullptr, nullptr})); diff --git a/test/pools/pool_coarse.hpp b/test/pools/pool_coarse.hpp index 7baa612f1e..b1efb4fee9 100644 --- a/test/pools/pool_coarse.hpp +++ b/test/pools/pool_coarse.hpp @@ -5,8 +5,6 @@ #ifndef UMF_TEST_POOL_COARSE_HPP #define UMF_TEST_POOL_COARSE_HPP 1 -#include "umf/providers/provider_coarse.h" - #include "pool.hpp" #include "poolFixtures.hpp" diff --git a/test/pools/scalable_coarse_devdax.cpp b/test/pools/scalable_coarse_devdax.cpp index 1bf77c61ce..86c5804022 100644 --- a/test/pools/scalable_coarse_devdax.cpp +++ b/test/pools/scalable_coarse_devdax.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -7,18 +7,20 @@ #include "pool_coarse.hpp" -using devdax_params_unique_handle_t = - std::unique_ptr; - -devdax_params_unique_handle_t create_devdax_params() { +bool devDaxEnvSet() { char *path = getenv("UMF_TESTS_DEVDAX_PATH"); char *size = getenv("UMF_TESTS_DEVDAX_SIZE"); if (path == nullptr || path[0] == 0 || size == nullptr || size[0] == 0) { - return devdax_params_unique_handle_t( - nullptr, &umfDevDaxMemoryProviderParamsDestroy); + return false; } + return true; +} + +void *createDevDaxParams() { + char *path = getenv("UMF_TESTS_DEVDAX_PATH"); + char *size = getenv("UMF_TESTS_DEVDAX_SIZE"); + umf_devdax_memory_provider_params_handle_t params = NULL; umf_result_t res = umfDevDaxMemoryProviderParamsCreate(¶ms, path, atol(size)); @@ -27,19 +29,20 @@ devdax_params_unique_handle_t create_devdax_params() { "Failed to create DevDax Memory Provider params"); } - return devdax_params_unique_handle_t(params, - &umfDevDaxMemoryProviderParamsDestroy); + return params; } -auto coarseParams = umfCoarseMemoryProviderParamsDefault(); -auto devdaxParams = create_devdax_params(); +umf_result_t destroyDevDaxParams(void *params) { + return umfDevDaxMemoryProviderParamsDestroy( + (umf_devdax_memory_provider_params_handle_t)params); +} static std::vector poolParamsList = - devdaxParams.get() - ? std::vector{poolCreateExtParams{ - umfScalablePoolOps(), nullptr, umfDevDaxMemoryProviderOps(), - devdaxParams.get(), &coarseParams}} - : std::vector{}; + devDaxEnvSet() ? std::vector{poolCreateExtParams{ + umfScalablePoolOps(), nullptr, nullptr, + umfDevDaxMemoryProviderOps(), createDevDaxParams, + destroyDevDaxParams}} + : std::vector{}; INSTANTIATE_TEST_SUITE_P(scalableCoarseDevDaxTest, umfPoolTest, ::testing::ValuesIn(poolParamsList)); diff --git a/test/pools/scalable_coarse_file.cpp b/test/pools/scalable_coarse_file.cpp index b45c112be2..a5fd5b46a6 100644 --- a/test/pools/scalable_coarse_file.cpp +++ b/test/pools/scalable_coarse_file.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -7,27 +7,25 @@ #include "pool_coarse.hpp" -using file_params_unique_handle_t = - std::unique_ptr; - -file_params_unique_handle_t get_file_params_default(char *path) { +void *getFileParamsDefault() { umf_file_memory_provider_params_handle_t file_params = NULL; - umf_result_t res = umfFileMemoryProviderParamsCreate(&file_params, path); + umf_result_t res = + umfFileMemoryProviderParamsCreate(&file_params, FILE_PATH); if (res != UMF_RESULT_SUCCESS) { throw std::runtime_error( "Failed to create File Memory Provider params"); } - return file_params_unique_handle_t(file_params, - &umfFileMemoryProviderParamsDestroy); + return file_params; } -auto coarseParams = umfCoarseMemoryProviderParamsDefault(); -file_params_unique_handle_t fileParams = get_file_params_default(FILE_PATH); +umf_result_t destroyFileParams(void *params) { + return umfFileMemoryProviderParamsDestroy( + (umf_file_memory_provider_params_handle_t)params); +} INSTANTIATE_TEST_SUITE_P(scalableCoarseFileTest, umfPoolTest, ::testing::Values(poolCreateExtParams{ - umfScalablePoolOps(), nullptr, - umfFileMemoryProviderOps(), fileParams.get(), - &coarseParams})); + umfScalablePoolOps(), nullptr, nullptr, + umfFileMemoryProviderOps(), getFileParamsDefault, + destroyFileParams})); diff --git a/test/pools/scalable_pool.cpp b/test/pools/scalable_pool.cpp index 3edacd965c..54c0128a40 100644 --- a/test/pools/scalable_pool.cpp +++ b/test/pools/scalable_pool.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -9,26 +9,26 @@ #include "poolFixtures.hpp" #include "provider.hpp" -using os_params_unique_handle_t = - std::unique_ptr; - -os_params_unique_handle_t createOsMemoryProviderParams() { +void *createOsMemoryProviderParams() { umf_os_memory_provider_params_handle_t params = nullptr; umf_result_t res = umfOsMemoryProviderParamsCreate(¶ms); if (res != UMF_RESULT_SUCCESS) { throw std::runtime_error("Failed to create os memory provider params"); } - return os_params_unique_handle_t(params, &umfOsMemoryProviderParamsDestroy); + return params; +} + +umf_result_t destroyOsMemoryProviderParams(void *params) { + return umfOsMemoryProviderParamsDestroy( + (umf_os_memory_provider_params_handle_t)params); } -auto defaultParams = createOsMemoryProviderParams(); -INSTANTIATE_TEST_SUITE_P(scalablePoolTest, umfPoolTest, - ::testing::Values(poolCreateExtParams{ - umfScalablePoolOps(), nullptr, - umfOsMemoryProviderOps(), defaultParams.get(), - nullptr})); +INSTANTIATE_TEST_SUITE_P( + scalablePoolTest, umfPoolTest, + ::testing::Values(poolCreateExtParams{ + umfScalablePoolOps(), nullptr, nullptr, umfOsMemoryProviderOps(), + createOsMemoryProviderParams, destroyOsMemoryProviderParams})); using scalablePoolParams = std::tuple; struct umfScalablePoolParamsTest @@ -61,7 +61,7 @@ struct umfScalablePoolParamsTest }; static constexpr umf_memory_provider_ops_t VALIDATOR_PROVIDER_OPS = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); umfScalablePoolParamsTest() : expected_params{0, false}, params(nullptr) {} void SetUp() override { @@ -82,7 +82,7 @@ struct umfScalablePoolParamsTest test::TearDown(); } - umf::pool_unique_handle_t makePool() { + umf_test::pool_unique_handle_t makePool() { umf_memory_provider_handle_t hProvider = nullptr; umf_memory_pool_handle_t hPool = nullptr; @@ -94,7 +94,7 @@ struct umfScalablePoolParamsTest UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &hPool); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); - return umf::pool_unique_handle_t(hPool, &umfPoolDestroy); + return umf_test::pool_unique_handle_t(hPool, &umfPoolDestroy); } void allocFreeFlow() { diff --git a/test/provider_coarse.cpp b/test/provider_coarse.cpp deleted file mode 100644 index c2de4c06a9..0000000000 --- a/test/provider_coarse.cpp +++ /dev/null @@ -1,668 +0,0 @@ -/* - * Copyright (C) 2023-2024 Intel Corporation - * - * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -*/ - -#include - -#include "provider.hpp" - -#include - -using umf_test::KB; -using umf_test::MB; -using umf_test::test; - -#define GetStats umfCoarseMemoryProviderGetStats - -#define UPSTREAM_NAME "umf_ba_global" -#define BASE_NAME "coarse" -#define COARSE_NAME BASE_NAME " (" UPSTREAM_NAME ")" - -umf_memory_provider_ops_t UMF_MALLOC_MEMORY_PROVIDER_OPS = - umf::providerMakeCOps(); - -struct CoarseWithMemoryStrategyTest - : umf_test::test, - ::testing::WithParamInterface { - void SetUp() override { - test::SetUp(); - allocation_strategy = this->GetParam(); - } - - coarse_memory_provider_strategy_t allocation_strategy; -}; - -INSTANTIATE_TEST_SUITE_P( - CoarseWithMemoryStrategyTest, CoarseWithMemoryStrategyTest, - ::testing::Values(UMF_COARSE_MEMORY_STRATEGY_FASTEST, - UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE, - UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE)); - -TEST_F(test, coarseProvider_name_upstream) { - umf_memory_provider_handle_t malloc_memory_provider; - umf_result_t umf_result; - - umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, - &malloc_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(malloc_memory_provider, nullptr); - - const size_t init_buffer_size = 20 * MB; - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.upstream_memory_provider = - malloc_memory_provider; - coarse_memory_provider_params.destroy_upstream_memory_provider = true; - coarse_memory_provider_params.immediate_init_from_upstream = true; - coarse_memory_provider_params.init_buffer = nullptr; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(coarse_memory_provider, nullptr); - - size_t minPageSize = 0; - umf_result = umfMemoryProviderGetMinPageSize(coarse_memory_provider, - nullptr, &minPageSize); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_UNKNOWN); - ASSERT_EQ(minPageSize, 0); - - size_t pageSize = 0; - umf_result = umfMemoryProviderGetRecommendedPageSize( - coarse_memory_provider, minPageSize, &pageSize); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_UNKNOWN); - ASSERT_EQ(pageSize, minPageSize); - - ASSERT_EQ( - strcmp(umfMemoryProviderGetName(coarse_memory_provider), COARSE_NAME), - 0); - - umfMemoryProviderDestroy(coarse_memory_provider); - // malloc_memory_provider has already been destroyed - // by umfMemoryProviderDestroy(coarse_memory_provider), because: - // coarse_memory_provider_params.destroy_upstream_memory_provider = true; -} - -TEST_F(test, coarseProvider_name_no_upstream) { - umf_result_t umf_result; - - const size_t init_buffer_size = 20 * MB; - - // preallocate some memory and initialize the vector with zeros - std::vector buffer(init_buffer_size, 0); - void *buf = (void *)buffer.data(); - ASSERT_NE(buf, nullptr); - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.upstream_memory_provider = nullptr; - coarse_memory_provider_params.immediate_init_from_upstream = false; - coarse_memory_provider_params.init_buffer = buf; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(coarse_memory_provider, nullptr); - - size_t minPageSize = 0; - umf_result = umfMemoryProviderGetMinPageSize(coarse_memory_provider, - nullptr, &minPageSize); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_GT(minPageSize, 0); - - size_t pageSize = 0; - umf_result = umfMemoryProviderGetRecommendedPageSize( - coarse_memory_provider, minPageSize, &pageSize); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_GE(pageSize, minPageSize); - - ASSERT_EQ( - strcmp(umfMemoryProviderGetName(coarse_memory_provider), BASE_NAME), 0); - - umfMemoryProviderDestroy(coarse_memory_provider); -} - -// negative tests - -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_null_stats) { - ASSERT_EQ(GetStats(nullptr).alloc_size, 0); - ASSERT_EQ(GetStats(nullptr).used_size, 0); - ASSERT_EQ(GetStats(nullptr).num_upstream_blocks, 0); - ASSERT_EQ(GetStats(nullptr).num_all_blocks, 0); - ASSERT_EQ(GetStats(nullptr).num_free_blocks, 0); -} - -// wrong NULL parameters -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_NULL_params) { - umf_result_t umf_result; - - umf_memory_provider_handle_t coarse_memory_provider = nullptr; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), nullptr, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - ASSERT_EQ(coarse_memory_provider, nullptr); -} - -// wrong parameters: given no upstream_memory_provider -// nor init_buffer while exactly one of them must be set -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_wrong_params_0) { - umf_result_t umf_result; - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = nullptr; - coarse_memory_provider_params.immediate_init_from_upstream = false; - coarse_memory_provider_params.init_buffer = nullptr; - coarse_memory_provider_params.init_buffer_size = 0; - - umf_memory_provider_handle_t coarse_memory_provider = nullptr; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - ASSERT_EQ(coarse_memory_provider, nullptr); -} - -// wrong parameters: given both an upstream_memory_provider -// and an init_buffer while only one of them is allowed -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_wrong_params_1) { - umf_memory_provider_handle_t malloc_memory_provider; - umf_result_t umf_result; - - umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, - &malloc_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(malloc_memory_provider, nullptr); - - const size_t init_buffer_size = 20 * MB; - - // preallocate some memory and initialize the vector with zeros - std::vector buffer(init_buffer_size, 0); - void *buf = (void *)buffer.data(); - ASSERT_NE(buf, nullptr); - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = - malloc_memory_provider; - coarse_memory_provider_params.immediate_init_from_upstream = true; - coarse_memory_provider_params.init_buffer = buf; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider = nullptr; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - ASSERT_EQ(coarse_memory_provider, nullptr); - - umfMemoryProviderDestroy(malloc_memory_provider); -} - -// wrong parameters: init_buffer_size must not equal 0 when immediate_init_from_upstream is true -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_wrong_params_2) { - umf_memory_provider_handle_t malloc_memory_provider; - umf_result_t umf_result; - - umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, - &malloc_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(malloc_memory_provider, nullptr); - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = - malloc_memory_provider; - coarse_memory_provider_params.immediate_init_from_upstream = true; - coarse_memory_provider_params.init_buffer = nullptr; - coarse_memory_provider_params.init_buffer_size = 0; - - umf_memory_provider_handle_t coarse_memory_provider = nullptr; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - ASSERT_EQ(coarse_memory_provider, nullptr); - - umfMemoryProviderDestroy(malloc_memory_provider); -} - -// wrong parameters: init_buffer_size must not equal 0 when init_buffer is not NULL -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_wrong_params_3) { - umf_result_t umf_result; - - const size_t init_buffer_size = 20 * MB; - - // preallocate some memory and initialize the vector with zeros - std::vector buffer(init_buffer_size, 0); - void *buf = (void *)buffer.data(); - ASSERT_NE(buf, nullptr); - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = nullptr; - coarse_memory_provider_params.immediate_init_from_upstream = false; - coarse_memory_provider_params.init_buffer = buf; - coarse_memory_provider_params.init_buffer_size = 0; - - umf_memory_provider_handle_t coarse_memory_provider = nullptr; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - ASSERT_EQ(coarse_memory_provider, nullptr); -} - -// wrong parameters: init_buffer_size must equal 0 when init_buffer is NULL and immediate_init_from_upstream is false -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_wrong_params_4) { - umf_memory_provider_handle_t malloc_memory_provider; - umf_result_t umf_result; - - umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, - &malloc_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(malloc_memory_provider, nullptr); - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = - malloc_memory_provider; - coarse_memory_provider_params.immediate_init_from_upstream = false; - coarse_memory_provider_params.init_buffer = NULL; - coarse_memory_provider_params.init_buffer_size = 20 * MB; - - umf_memory_provider_handle_t coarse_memory_provider = nullptr; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - ASSERT_EQ(coarse_memory_provider, nullptr); - - umfMemoryProviderDestroy(malloc_memory_provider); -} - -// wrong parameters: destroy_upstream_memory_provider is true, but an upstream provider is not provided -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_wrong_params_5) { - umf_result_t umf_result; - - const size_t init_buffer_size = 20 * MB; - - // preallocate some memory and initialize the vector with zeros - std::vector buffer(init_buffer_size, 0); - void *buf = (void *)buffer.data(); - ASSERT_NE(buf, nullptr); - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = nullptr; - coarse_memory_provider_params.destroy_upstream_memory_provider = true; - coarse_memory_provider_params.immediate_init_from_upstream = false; - coarse_memory_provider_params.init_buffer = buf; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider = nullptr; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - ASSERT_EQ(coarse_memory_provider, nullptr); -} - -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_split_merge) { - umf_memory_provider_handle_t malloc_memory_provider; - umf_result_t umf_result; - - umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, - &malloc_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(malloc_memory_provider, nullptr); - - const size_t init_buffer_size = 20 * MB; - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.upstream_memory_provider = - malloc_memory_provider; - coarse_memory_provider_params.immediate_init_from_upstream = true; - coarse_memory_provider_params.init_buffer = NULL; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(coarse_memory_provider, nullptr); - - umf_memory_provider_handle_t cp = coarse_memory_provider; - char *ptr = nullptr; - - ASSERT_EQ(GetStats(cp).used_size, 0 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 1); - - /* test umfMemoryProviderAllocationSplit */ - umf_result = umfMemoryProviderAlloc(cp, 2 * MB, 0, (void **)&ptr); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(ptr, nullptr); - ASSERT_EQ(GetStats(cp).used_size, 2 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 2); - - umf_result = umfMemoryProviderAllocationSplit(cp, ptr, 2 * MB, 1 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 2 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 3); - - umf_result = umfMemoryProviderFree(cp, (ptr + 1 * MB), 1 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 1 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 2); - - umf_result = umfMemoryProviderFree(cp, ptr, 1 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 0); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 1); - - /* test umfMemoryProviderAllocationMerge */ - umf_result = umfMemoryProviderAlloc(cp, 2 * MB, 0, (void **)&ptr); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(ptr, nullptr); - ASSERT_EQ(GetStats(cp).used_size, 2 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 2); - - umf_result = umfMemoryProviderAllocationSplit(cp, ptr, 2 * MB, 1 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 2 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 3); - - umf_result = - umfMemoryProviderAllocationMerge(cp, ptr, (ptr + 1 * MB), 2 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 2 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 2); - - umf_result = umfMemoryProviderFree(cp, ptr, 2 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 0); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 1); - - umfMemoryProviderDestroy(coarse_memory_provider); - umfMemoryProviderDestroy(malloc_memory_provider); -} - -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_split_merge_negative) { - umf_memory_provider_handle_t malloc_memory_provider; - umf_result_t umf_result; - - umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, - &malloc_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(malloc_memory_provider, nullptr); - - const size_t init_buffer_size = 20 * MB; - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.upstream_memory_provider = - malloc_memory_provider; - coarse_memory_provider_params.immediate_init_from_upstream = true; - coarse_memory_provider_params.init_buffer = NULL; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(coarse_memory_provider, nullptr); - - umf_memory_provider_handle_t cp = coarse_memory_provider; - char *ptr = nullptr; - - ASSERT_EQ(GetStats(cp).used_size, 0 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 1); - - /* test umfMemoryProviderAllocationSplit */ - umf_result = umfMemoryProviderAlloc(cp, 6 * MB, 0, (void **)&ptr); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(ptr, nullptr); - ASSERT_EQ(GetStats(cp).used_size, 6 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 2); - - // firstSize >= totalSize - umf_result = umfMemoryProviderAllocationSplit(cp, ptr, 6 * MB, 6 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // firstSize == 0 - umf_result = umfMemoryProviderAllocationSplit(cp, ptr, 6 * MB, 0); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // wrong totalSize - umf_result = umfMemoryProviderAllocationSplit(cp, ptr, 5 * MB, 1 * KB); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - /* test umfMemoryProviderAllocationMerge */ - // split (6 * MB) block into (1 * MB) + (5 * MB) - umf_result = umfMemoryProviderAllocationSplit(cp, ptr, 6 * MB, 1 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 6 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 3); - - // split (5 * MB) block into (2 * MB) + (3 * MB) - umf_result = - umfMemoryProviderAllocationSplit(cp, (ptr + 1 * MB), 5 * MB, 2 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 6 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 4); - - // now we have 3 blocks: (1 * MB) + (2 * MB) + (3 * MB) - - // highPtr <= lowPtr - umf_result = - umfMemoryProviderAllocationMerge(cp, (ptr + 1 * MB), ptr, 2 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // highPtr - lowPtr >= totalSize - umf_result = - umfMemoryProviderAllocationMerge(cp, ptr, (ptr + 1 * MB), 1 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // low_block->size + high_block->size != totalSize - umf_result = - umfMemoryProviderAllocationMerge(cp, ptr, (ptr + 1 * MB), 5 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // not adjacent blocks - umf_result = - umfMemoryProviderAllocationMerge(cp, ptr, (ptr + 3 * MB), 4 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - umf_result = umfMemoryProviderFree(cp, ptr, 1 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 5 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 4); - - umf_result = umfMemoryProviderFree(cp, (ptr + 1 * MB), 2 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 3 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 3); - - umf_result = umfMemoryProviderFree(cp, (ptr + 3 * MB), 3 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 0); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 1); - - umfMemoryProviderDestroy(coarse_memory_provider); - umfMemoryProviderDestroy(malloc_memory_provider); -} - -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_purge_no_upstream) { - umf_result_t umf_result; - - const size_t init_buffer_size = 20 * MB; - - // preallocate some memory and initialize the vector with zeros - std::vector buffer(init_buffer_size, 0); - void *buf = (void *)buffer.data(); - ASSERT_NE(buf, nullptr); - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = nullptr; - coarse_memory_provider_params.immediate_init_from_upstream = false; - coarse_memory_provider_params.init_buffer = buf; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider = nullptr; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(coarse_memory_provider, nullptr); - - // umfMemoryProviderPurgeLazy - // provider == NULL - umf_result = umfMemoryProviderPurgeLazy(nullptr, (void *)0x01, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // ptr == NULL - umf_result = umfMemoryProviderPurgeLazy(coarse_memory_provider, nullptr, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // no upstream_memory_provider - umf_result = - umfMemoryProviderPurgeLazy(coarse_memory_provider, (void *)0x01, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); - - // umfMemoryProviderPurgeForce - // provider == NULL - umf_result = umfMemoryProviderPurgeForce(nullptr, (void *)0x01, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // ptr == NULL - umf_result = - umfMemoryProviderPurgeForce(coarse_memory_provider, nullptr, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // no upstream_memory_provider - umf_result = - umfMemoryProviderPurgeForce(coarse_memory_provider, (void *)0x01, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); - - umfMemoryProviderDestroy(coarse_memory_provider); -} - -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_purge_with_upstream) { - umf_memory_provider_handle_t malloc_memory_provider; - umf_result_t umf_result; - - umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, - &malloc_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(malloc_memory_provider, nullptr); - - const size_t init_buffer_size = 20 * MB; - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.upstream_memory_provider = - malloc_memory_provider; - coarse_memory_provider_params.immediate_init_from_upstream = true; - coarse_memory_provider_params.init_buffer = NULL; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(coarse_memory_provider, nullptr); - - // umfMemoryProviderPurgeLazy - // provider == NULL - umf_result = umfMemoryProviderPurgeLazy(nullptr, (void *)0x01, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // ptr == NULL - umf_result = umfMemoryProviderPurgeLazy(coarse_memory_provider, nullptr, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // malloc_memory_provider returns UMF_RESULT_ERROR_UNKNOWN - umf_result = - umfMemoryProviderPurgeLazy(coarse_memory_provider, (void *)0x01, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_UNKNOWN); - - // umfMemoryProviderPurgeForce - // provider == NULL - umf_result = umfMemoryProviderPurgeForce(nullptr, (void *)0x01, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // ptr == NULL - umf_result = - umfMemoryProviderPurgeForce(coarse_memory_provider, nullptr, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // malloc_memory_provider returns UMF_RESULT_ERROR_UNKNOWN - umf_result = - umfMemoryProviderPurgeForce(coarse_memory_provider, (void *)0x01, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_UNKNOWN); - - umfMemoryProviderDestroy(coarse_memory_provider); - umfMemoryProviderDestroy(malloc_memory_provider); -} diff --git a/test/provider_devdax_memory.cpp b/test/provider_devdax_memory.cpp index 0fd0705da4..6efeef90cb 100644 --- a/test/provider_devdax_memory.cpp +++ b/test/provider_devdax_memory.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -11,8 +11,8 @@ #include "base.hpp" -#include "cpp_helpers.hpp" #include "test_helpers.h" +#include "utils/cpp_helpers.hpp" #include #include @@ -46,7 +46,7 @@ static int compare_native_error_str(const char *message, int error) { using providerCreateExtParams = std::tuple; static void providerCreateExt(providerCreateExtParams params, - umf::provider_unique_handle_t *handle) { + umf_test::provider_unique_handle_t *handle) { umf_memory_provider_handle_t hProvider = nullptr; auto [provider_ops, provider_params] = params; @@ -55,8 +55,8 @@ static void providerCreateExt(providerCreateExtParams params, ASSERT_EQ(ret, UMF_RESULT_SUCCESS); ASSERT_NE(hProvider, nullptr); - *handle = - umf::provider_unique_handle_t(hProvider, &umfMemoryProviderDestroy); + *handle = umf_test::provider_unique_handle_t(hProvider, + &umfMemoryProviderDestroy); } struct umfProviderTest @@ -74,7 +74,7 @@ struct umfProviderTest void TearDown() override { test::TearDown(); } - umf::provider_unique_handle_t provider; + umf_test::provider_unique_handle_t provider; size_t page_size; size_t page_plus_64; }; @@ -100,7 +100,7 @@ static void test_alloc_free_success(umf_memory_provider_handle_t provider, } umf_result = umfMemoryProviderFree(provider, ptr, size); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } static void verify_last_native_error(umf_memory_provider_handle_t provider, @@ -162,9 +162,10 @@ TEST_F(test, test_if_mapped_with_MAP_SYNC) { bool flag_found = is_mapped_with_MAP_SYNC(path, buf, size); umf_result = umfMemoryProviderFree(hProvider, buf, size); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); umfMemoryProviderDestroy(hProvider); + umfDevDaxMemoryProviderParamsDestroy(params); // fail test if the "sf" flag was not found ASSERT_EQ(flag_found, true); @@ -233,34 +234,40 @@ TEST_P(umfProviderTest, purge_force) { test_alloc_free_success(provider.get(), page_size, 0, PURGE_FORCE); } +TEST_P(umfProviderTest, purge_force_unalligned_alloc) { + void *ptr; + auto ret = umfMemoryProviderAlloc(provider.get(), page_plus_64, 0, &ptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + test_alloc_free_success(provider.get(), page_size, 0, PURGE_FORCE); + umfMemoryProviderFree(provider.get(), ptr, page_plus_64); +} // negative tests using test_alloc_failure TEST_P(umfProviderTest, alloc_page64_align_page_minus_1_WRONG_ALIGNMENT_1) { test_alloc_failure(provider.get(), page_plus_64, page_size - 1, - UMF_RESULT_ERROR_INVALID_ARGUMENT, 0); + UMF_RESULT_ERROR_INVALID_ALIGNMENT, 0); } TEST_P(umfProviderTest, alloc_page64_align_one_half_pages_WRONG_ALIGNMENT_2) { test_alloc_failure(provider.get(), page_plus_64, page_size + (page_size / 2), - UMF_RESULT_ERROR_INVALID_ARGUMENT, 0); + UMF_RESULT_ERROR_INVALID_ALIGNMENT, 0); } TEST_P(umfProviderTest, alloc_page64_WRONG_ALIGNMENT_3_pages) { test_alloc_failure(provider.get(), page_plus_64, 3 * page_size, - UMF_RESULT_ERROR_INVALID_ARGUMENT, 0); + UMF_RESULT_ERROR_INVALID_ALIGNMENT, 0); } TEST_P(umfProviderTest, alloc_3_pages_WRONG_ALIGNMENT_3_pages) { test_alloc_failure(provider.get(), 3 * page_size, 3 * page_size, - UMF_RESULT_ERROR_INVALID_ARGUMENT, 0); + UMF_RESULT_ERROR_INVALID_ALIGNMENT, 0); } TEST_P(umfProviderTest, alloc_WRONG_SIZE) { size_t size = (size_t)(-1) & ~(page_size - 1); test_alloc_failure(provider.get(), size, 0, - UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC, - UMF_DEVDAX_RESULT_ERROR_ALLOC_FAILED); + UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY, 0); } // other positive tests @@ -295,12 +302,12 @@ TEST_P(umfProviderTest, get_name) { TEST_P(umfProviderTest, free_size_0_ptr_not_null) { umf_result_t umf_result = umfMemoryProviderFree(provider.get(), INVALID_PTR, 0); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); } TEST_P(umfProviderTest, free_NULL) { umf_result_t umf_result = umfMemoryProviderFree(provider.get(), nullptr, 0); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } // other negative tests @@ -308,7 +315,7 @@ TEST_P(umfProviderTest, free_NULL) { TEST_P(umfProviderTest, free_INVALID_POINTER_SIZE_GT_0) { umf_result_t umf_result = umfMemoryProviderFree(provider.get(), INVALID_PTR, page_plus_64); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); } TEST_P(umfProviderTest, purge_lazy_INVALID_POINTER) { diff --git a/test/provider_devdax_memory_ipc.cpp b/test/provider_devdax_memory_ipc.cpp index 3941f66e90..47b389c95f 100644 --- a/test/provider_devdax_memory_ipc.cpp +++ b/test/provider_devdax_memory_ipc.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -15,16 +15,21 @@ using umf_test::test; -using devdax_params_unique_handle_t = - std::unique_ptr; +bool devDaxEnvSet() { + char *path = getenv("UMF_TESTS_DEVDAX_PATH"); + char *size = getenv("UMF_TESTS_DEVDAX_SIZE"); + if (path == nullptr || path[0] == 0 || size == nullptr || size[0] == 0) { + return false; + } -devdax_params_unique_handle_t create_devdax_params() { + return true; +} + +void *defaultDevDaxParamsCreate() { char *path = getenv("UMF_TESTS_DEVDAX_PATH"); char *size = getenv("UMF_TESTS_DEVDAX_SIZE"); if (path == nullptr || path[0] == 0 || size == nullptr || size[0] == 0) { - return devdax_params_unique_handle_t( - nullptr, &umfDevDaxMemoryProviderParamsDestroy); + return nullptr; } umf_devdax_memory_provider_params_handle_t params = NULL; @@ -35,32 +40,34 @@ devdax_params_unique_handle_t create_devdax_params() { "Failed to create DevDax Memory Provider params"); } - return devdax_params_unique_handle_t(params, - &umfDevDaxMemoryProviderParamsDestroy); + return params; } -auto defaultDevDaxParams = create_devdax_params(); +umf_result_t defaultDevDaxParamsDestroy(void *params) { + return umfDevDaxMemoryProviderParamsDestroy( + (umf_devdax_memory_provider_params_handle_t)params); +} HostMemoryAccessor hostAccessor; static std::vector getIpcProxyPoolTestParamsList(void) { std::vector ipcProxyPoolTestParamsList = {}; - if (!defaultDevDaxParams.get()) { + if (!devDaxEnvSet()) { // return empty list to skip the test return ipcProxyPoolTestParamsList; } ipcProxyPoolTestParamsList = { - {umfProxyPoolOps(), nullptr, umfDevDaxMemoryProviderOps(), - defaultDevDaxParams.get(), &hostAccessor, true}, + {umfProxyPoolOps(), nullptr, nullptr, umfDevDaxMemoryProviderOps(), + defaultDevDaxParamsCreate, defaultDevDaxParamsDestroy, &hostAccessor}, #ifdef UMF_POOL_JEMALLOC_ENABLED - {umfJemallocPoolOps(), nullptr, umfDevDaxMemoryProviderOps(), - defaultDevDaxParams.get(), &hostAccessor, false}, + {umfJemallocPoolOps(), nullptr, nullptr, umfDevDaxMemoryProviderOps(), + defaultDevDaxParamsCreate, defaultDevDaxParamsDestroy, &hostAccessor}, #endif #ifdef UMF_POOL_SCALABLE_ENABLED - {umfScalablePoolOps(), nullptr, umfDevDaxMemoryProviderOps(), - defaultDevDaxParams.get(), &hostAccessor, false}, + {umfScalablePoolOps(), nullptr, nullptr, umfDevDaxMemoryProviderOps(), + defaultDevDaxParamsCreate, defaultDevDaxParamsDestroy, &hostAccessor}, #endif }; diff --git a/test/provider_file_memory.cpp b/test/provider_file_memory.cpp index d3124aa11f..bcc9d26453 100644 --- a/test/provider_file_memory.cpp +++ b/test/provider_file_memory.cpp @@ -1,11 +1,11 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "base.hpp" -#include "cpp_helpers.hpp" #include "test_helpers.h" +#include "utils/cpp_helpers.hpp" #ifndef _WIN32 #include "test_helpers_linux.h" #endif @@ -42,7 +42,7 @@ static int compare_native_error_str(const char *message, int error) { using providerCreateExtParams = std::tuple; static void providerCreateExt(providerCreateExtParams params, - umf::provider_unique_handle_t *handle) { + umf_test::provider_unique_handle_t *handle) { umf_memory_provider_handle_t hProvider = nullptr; auto [provider_ops, provider_params] = params; @@ -51,8 +51,8 @@ static void providerCreateExt(providerCreateExtParams params, ASSERT_EQ(ret, UMF_RESULT_SUCCESS); ASSERT_NE(hProvider, nullptr); - *handle = - umf::provider_unique_handle_t(hProvider, &umfMemoryProviderDestroy); + *handle = umf_test::provider_unique_handle_t(hProvider, + &umfMemoryProviderDestroy); } struct FileProviderParamsDefault @@ -70,7 +70,7 @@ struct FileProviderParamsDefault void TearDown() override { test::TearDown(); } - umf::provider_unique_handle_t provider; + umf_test::provider_unique_handle_t provider; size_t page_size; size_t page_plus_64; }; @@ -98,7 +98,7 @@ static void test_alloc_free_success(umf_memory_provider_handle_t provider, } umf_result = umfMemoryProviderFree(provider, ptr, size); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } static void verify_last_native_error(umf_memory_provider_handle_t provider, @@ -159,9 +159,10 @@ TEST_F(test, test_if_mapped_with_MAP_SYNC) { bool flag_found = is_mapped_with_MAP_SYNC(path, buf, size); umf_result = umfMemoryProviderFree(hProvider, buf, size); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); umfMemoryProviderDestroy(hProvider); + umfFileMemoryProviderParamsDestroy(params); // fail test if the "sf" flag was not found ASSERT_EQ(flag_found, true); @@ -244,10 +245,10 @@ TEST_P(FileProviderParamsDefault, two_allocations) { memset(ptr2, 0x22, size); umf_result = umfMemoryProviderFree(provider.get(), ptr1, size); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); umf_result = umfMemoryProviderFree(provider.get(), ptr2, size); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } TEST_P(FileProviderParamsDefault, alloc_page64_align_0) { @@ -366,12 +367,12 @@ TEST_P(FileProviderParamsDefault, get_name) { TEST_P(FileProviderParamsDefault, free_size_0_ptr_not_null) { umf_result_t umf_result = umfMemoryProviderFree(provider.get(), INVALID_PTR, 0); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); } TEST_P(FileProviderParamsDefault, free_NULL) { umf_result_t umf_result = umfMemoryProviderFree(provider.get(), nullptr, 0); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } // other negative tests @@ -449,7 +450,7 @@ TEST_F(test, set_null_path) { TEST_P(FileProviderParamsDefault, free_INVALID_POINTER_SIZE_GT_0) { umf_result_t umf_result = umfMemoryProviderFree(provider.get(), INVALID_PTR, page_plus_64); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); } TEST_P(FileProviderParamsDefault, purge_lazy_INVALID_POINTER) { @@ -512,7 +513,7 @@ TEST_P(FileProviderParamsShared, IPC_base_success_test) { ASSERT_EQ(ret, 0); umf_result = umfMemoryProviderFree(provider.get(), ptr, size); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } TEST_P(FileProviderParamsShared, IPC_file_not_exist) { @@ -552,5 +553,5 @@ TEST_P(FileProviderParamsShared, IPC_file_not_exist) { ASSERT_EQ(new_ptr, nullptr); umf_result = umfMemoryProviderFree(provider.get(), ptr, size); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } diff --git a/test/provider_file_memory_ipc.cpp b/test/provider_file_memory_ipc.cpp index ee7ab6c8ff..90623a1793 100644 --- a/test/provider_file_memory_ipc.cpp +++ b/test/provider_file_memory_ipc.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -17,13 +17,10 @@ using umf_test::test; #define FILE_PATH ((char *)"tmp_file") -using file_params_unique_handle_t = - std::unique_ptr; - -file_params_unique_handle_t get_file_params_shared(char *path) { +void *createFileParamsShared() { umf_file_memory_provider_params_handle_t file_params = NULL; - umf_result_t res = umfFileMemoryProviderParamsCreate(&file_params, path); + umf_result_t res = + umfFileMemoryProviderParamsCreate(&file_params, FILE_PATH); if (res != UMF_RESULT_SUCCESS) { throw std::runtime_error( "Failed to create File Memory Provider params"); @@ -37,20 +34,21 @@ file_params_unique_handle_t get_file_params_shared(char *path) { "Memory Provider params"); } - return file_params_unique_handle_t(file_params, - &umfFileMemoryProviderParamsDestroy); + return file_params; } -file_params_unique_handle_t file_params_shared = - get_file_params_shared(FILE_PATH); +umf_result_t destroyFileParamsShared(void *params) { + return umfFileMemoryProviderParamsDestroy( + (umf_file_memory_provider_params_handle_t)params); +} -file_params_unique_handle_t get_file_params_fsdax(char *path) { +void *createFileParamsFSDAX() { umf_file_memory_provider_params_handle_t file_params = NULL; - umf_result_t res = umfFileMemoryProviderParamsCreate(&file_params, path); + umf_result_t res = umfFileMemoryProviderParamsCreate( + &file_params, getenv("UMF_TESTS_FSDAX_PATH")); if (res != UMF_RESULT_SUCCESS) { //test will be skipped. - return file_params_unique_handle_t(nullptr, - &umfFileMemoryProviderParamsDestroy); + return nullptr; } res = umfFileMemoryProviderParamsSetVisibility(file_params, @@ -61,26 +59,27 @@ file_params_unique_handle_t get_file_params_fsdax(char *path) { "Memory Provider params"); } - return file_params_unique_handle_t(file_params, - &umfFileMemoryProviderParamsDestroy); + return file_params; } -file_params_unique_handle_t file_params_fsdax = - get_file_params_fsdax(getenv("UMF_TESTS_FSDAX_PATH")); +umf_result_t destroyFileParamsFSDAX(void *params) { + return umfFileMemoryProviderParamsDestroy( + (umf_file_memory_provider_params_handle_t)params); +} HostMemoryAccessor hostAccessor; static std::vector ipcManyPoolsTestParamsList = { // TODO: enable it when sizes of allocations in ipcFixtures.hpp are fixed // {umfProxyPoolOps(), nullptr, umfFileMemoryProviderOps(), -// file_params_shared.get(), &hostAccessor, true}, +// file_params_shared.get(), &hostAccessor}, #ifdef UMF_POOL_JEMALLOC_ENABLED - {umfJemallocPoolOps(), nullptr, umfFileMemoryProviderOps(), - file_params_shared.get(), &hostAccessor, false}, + {umfJemallocPoolOps(), nullptr, nullptr, umfFileMemoryProviderOps(), + createFileParamsShared, destroyFileParamsShared, &hostAccessor}, #endif #ifdef UMF_POOL_SCALABLE_ENABLED - {umfScalablePoolOps(), nullptr, umfFileMemoryProviderOps(), - file_params_shared.get(), &hostAccessor, false}, + {umfScalablePoolOps(), nullptr, nullptr, umfFileMemoryProviderOps(), + createFileParamsShared, destroyFileParamsShared, &hostAccessor}, #endif }; @@ -96,14 +95,14 @@ static std::vector getIpcFsDaxTestParamsList(void) { ipcFsDaxTestParamsList = { // TODO: enable it when sizes of allocations in ipcFixtures.hpp are fixed // {umfProxyPoolOps(), nullptr, umfFileMemoryProviderOps(), -// file_params_fsdax.get(), &hostAccessor, true}, +// file_params_fsdax.get(), &hostAccessor}, #ifdef UMF_POOL_JEMALLOC_ENABLED - {umfJemallocPoolOps(), nullptr, umfFileMemoryProviderOps(), - file_params_fsdax.get(), &hostAccessor, false}, + {umfJemallocPoolOps(), nullptr, nullptr, umfFileMemoryProviderOps(), + createFileParamsFSDAX, destroyFileParamsFSDAX, &hostAccessor}, #endif #ifdef UMF_POOL_SCALABLE_ENABLED - {umfScalablePoolOps(), nullptr, umfFileMemoryProviderOps(), - file_params_fsdax.get(), &hostAccessor, false}, + {umfScalablePoolOps(), nullptr, nullptr, umfFileMemoryProviderOps(), + createFileParamsFSDAX, destroyFileParamsFSDAX, &hostAccessor}, #endif }; diff --git a/test/provider_fixed_memory.cpp b/test/provider_fixed_memory.cpp new file mode 100644 index 0000000000..dac651435a --- /dev/null +++ b/test/provider_fixed_memory.cpp @@ -0,0 +1,501 @@ +// Copyright (C) 2024-2025 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "base.hpp" + +#include "test_helpers.h" +#include "utils/cpp_helpers.hpp" +#ifndef _WIN32 +#include "test_helpers_linux.h" +#endif + +#include +#include +#include + +using umf_test::test; + +#define FIXED_BUFFER_SIZE (10 * utils_get_page_size()) +#define INVALID_PTR ((void *)0x01) + +typedef enum purge_t { + PURGE_NONE = 0, + PURGE_LAZY = 1, + PURGE_FORCE = 2, +} purge_t; + +static const char *Native_error_str[] = { + "success", // UMF_FIXED_RESULT_SUCCESS + "force purging failed", // UMF_FIXED_RESULT_ERROR_PURGE_FORCE_FAILED +}; + +// Test helpers + +static int compare_native_error_str(const char *message, int error) { + const char *error_str = Native_error_str[error - UMF_FIXED_RESULT_SUCCESS]; + size_t len = strlen(error_str); + return strncmp(message, error_str, len); +} + +using providerCreateExtParams = std::tuple; + +static void providerCreateExt(providerCreateExtParams params, + umf_test::provider_unique_handle_t *handle) { + umf_memory_provider_handle_t hProvider = nullptr; + auto [provider_ops, provider_params] = params; + + auto ret = + umfMemoryProviderCreate(provider_ops, provider_params, &hProvider); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hProvider, nullptr); + + *handle = umf_test::provider_unique_handle_t(hProvider, + &umfMemoryProviderDestroy); +} + +struct FixedProviderTest + : umf_test::test, + ::testing::WithParamInterface { + void SetUp() override { + test::SetUp(); + + // Allocate a memory buffer to use with the fixed memory provider + memory_size = FIXED_BUFFER_SIZE; // Allocate 10 pages + memory_buffer = malloc(memory_size); + ASSERT_NE(memory_buffer, nullptr); + + // Create provider parameters + umf_fixed_memory_provider_params_handle_t params = nullptr; + umf_result_t res = umfFixedMemoryProviderParamsCreate( + ¶ms, memory_buffer, memory_size); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + ASSERT_NE(params, nullptr); + + providerCreateExt(std::make_tuple(umfFixedMemoryProviderOps(), params), + &provider); + + umfFixedMemoryProviderParamsDestroy(params); + umf_result_t umf_result = + umfMemoryProviderGetMinPageSize(provider.get(), NULL, &page_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + page_plus_64 = page_size + 64; + } + + void TearDown() override { + if (memory_buffer) { + free(memory_buffer); + memory_buffer = nullptr; + } + test::TearDown(); + } + + void test_alloc_free_success(size_t size, size_t alignment, purge_t purge) { + void *ptr = nullptr; + auto provider = this->provider.get(); + + umf_result_t umf_result = + umfMemoryProviderAlloc(provider, size, alignment, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + + memset(ptr, 0xFF, size); + + if (purge == PURGE_LAZY) { + umf_result = umfMemoryProviderPurgeLazy(provider, ptr, size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + } else if (purge == PURGE_FORCE) { + umf_result = umfMemoryProviderPurgeForce(provider, ptr, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + + umf_result = umfMemoryProviderFree(provider, ptr, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + + void verify_last_native_error(int32_t err) { + const char *message; + int32_t error; + auto provider = this->provider.get(); + umfMemoryProviderGetLastNativeError(provider, &message, &error); + ASSERT_EQ(error, err); + ASSERT_EQ(compare_native_error_str(message, error), 0); + } + + void test_alloc_failure(size_t size, size_t alignment, umf_result_t result, + int32_t err) { + void *ptr = nullptr; + auto provider = this->provider.get(); + + umf_result_t umf_result = + umfMemoryProviderAlloc(provider, size, alignment, &ptr); + ASSERT_EQ(umf_result, result); + ASSERT_EQ(ptr, nullptr); + + if (umf_result == UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC) { + verify_last_native_error(err); + } + } + + umf_test::provider_unique_handle_t provider; + size_t page_size; + size_t page_plus_64; + void *memory_buffer = nullptr; + size_t memory_size = 0; +}; + +// TESTS + +// Positive tests using test_alloc_free_success + +INSTANTIATE_TEST_SUITE_P(fixedProviderTest, FixedProviderTest, + ::testing::Values(providerCreateExtParams{ + umfFixedMemoryProviderOps(), nullptr})); + +TEST_P(FixedProviderTest, create_destroy) { + // Creation and destruction are handled in SetUp and TearDown +} + +TEST_F(test, create_no_params) { + umf_memory_provider_handle_t provider = nullptr; + auto result = umfMemoryProviderCreate(umfFixedMemoryProviderOps(), nullptr, + &provider); + ASSERT_EQ(result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(provider, nullptr); +} + +TEST_P(FixedProviderTest, two_allocations) { + umf_result_t umf_result; + void *ptr1 = nullptr; + void *ptr2 = nullptr; + size_t size = page_plus_64; + size_t alignment = page_size; + + umf_result = umfMemoryProviderAlloc(provider.get(), size, alignment, &ptr1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr1, nullptr); + + umf_result = umfMemoryProviderAlloc(provider.get(), size, alignment, &ptr2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr2, nullptr); + + ASSERT_NE(ptr1, ptr2); + if ((uintptr_t)ptr1 > (uintptr_t)ptr2) { + ASSERT_GT((uintptr_t)ptr1 - (uintptr_t)ptr2, size); + } else { + ASSERT_GT((uintptr_t)ptr2 - (uintptr_t)ptr1, size); + } + + memset(ptr1, 0x11, size); + memset(ptr2, 0x22, size); + + umf_result = umfMemoryProviderFree(provider.get(), ptr1, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfMemoryProviderFree(provider.get(), ptr2, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +} + +TEST_P(FixedProviderTest, alloc_page64_align_0) { + test_alloc_free_success(page_plus_64, 0, PURGE_NONE); +} + +TEST_P(FixedProviderTest, alloc_page64_align_page_div_2) { + test_alloc_free_success(page_plus_64, page_size / 2, PURGE_NONE); +} + +TEST_P(FixedProviderTest, purge_lazy) { + test_alloc_free_success(page_size, 0, PURGE_LAZY); +} + +TEST_P(FixedProviderTest, purge_force) { + test_alloc_free_success(page_size, 0, PURGE_FORCE); +} + +// Negative tests using test_alloc_failure + +TEST_P(FixedProviderTest, alloc_WRONG_SIZE) { + test_alloc_failure((size_t)-1, 0, UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY, 0); +} + +TEST_P(FixedProviderTest, alloc_page64_WRONG_ALIGNMENT_3_pages) { + test_alloc_failure(page_plus_64, 3 * page_size, + UMF_RESULT_ERROR_INVALID_ALIGNMENT, 0); +} + +TEST_P(FixedProviderTest, alloc_3pages_WRONG_ALIGNMENT_3pages) { + test_alloc_failure(3 * page_size, 3 * page_size, + UMF_RESULT_ERROR_INVALID_ALIGNMENT, 0); +} + +TEST_P(FixedProviderTest, alloc_page64_align_page_plus_1_WRONG_ALIGNMENT_1) { + test_alloc_failure(page_plus_64, page_size + 1, + UMF_RESULT_ERROR_INVALID_ALIGNMENT, 0); +} + +TEST_P(FixedProviderTest, alloc_page64_align_one_half_pages_WRONG_ALIGNMENT_2) { + test_alloc_failure(page_plus_64, page_size + (page_size / 2), + UMF_RESULT_ERROR_INVALID_ALIGNMENT, 0); +} + +// Other positive tests + +TEST_P(FixedProviderTest, get_min_page_size) { + size_t min_page_size; + umf_result_t umf_result = umfMemoryProviderGetMinPageSize( + provider.get(), nullptr, &min_page_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_LE(min_page_size, page_size); +} + +TEST_P(FixedProviderTest, get_recommended_page_size) { + size_t min_page_size; + umf_result_t umf_result = umfMemoryProviderGetMinPageSize( + provider.get(), nullptr, &min_page_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_LE(min_page_size, page_size); + + size_t recommended_page_size; + umf_result = umfMemoryProviderGetRecommendedPageSize( + provider.get(), 0, &recommended_page_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_GE(recommended_page_size, min_page_size); +} + +TEST_P(FixedProviderTest, get_name) { + const char *name = umfMemoryProviderGetName(provider.get()); + ASSERT_STREQ(name, "FIXED"); +} + +TEST_P(FixedProviderTest, free_size_0_ptr_not_null) { + umf_result_t umf_result = + umfMemoryProviderFree(provider.get(), INVALID_PTR, 0); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(FixedProviderTest, free_NULL) { + umf_result_t umf_result = umfMemoryProviderFree(provider.get(), nullptr, 0); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +} + +// Other negative tests + +TEST_P(FixedProviderTest, free_INVALID_POINTER_SIZE_GT_0) { + umf_result_t umf_result = + umfMemoryProviderFree(provider.get(), INVALID_PTR, page_plus_64); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(FixedProviderTest, purge_lazy_INVALID_POINTER) { + umf_result_t umf_result = + umfMemoryProviderPurgeLazy(provider.get(), INVALID_PTR, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); +} + +TEST_P(FixedProviderTest, purge_force_INVALID_POINTER) { + umf_result_t umf_result = + umfMemoryProviderPurgeForce(provider.get(), INVALID_PTR, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC); + + verify_last_native_error(UMF_FIXED_RESULT_ERROR_PURGE_FORCE_FAILED); +} + +// Params tests + +TEST_F(test, params_null_handle) { + constexpr size_t memory_size = 100; + char memory_buffer[memory_size]; + umf_result_t umf_result = + umfFixedMemoryProviderParamsCreate(nullptr, memory_buffer, memory_size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umf_result = umfFixedMemoryProviderParamsDestroy(nullptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +} + +TEST_F(test, create_with_null_ptr) { + constexpr size_t memory_size = 100; + umf_fixed_memory_provider_params_handle_t wrong_params = nullptr; + umf_result_t umf_result = + umfFixedMemoryProviderParamsCreate(&wrong_params, nullptr, memory_size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(wrong_params, nullptr); +} + +TEST_F(test, create_with_zero_size) { + constexpr size_t memory_size = 100; + char memory_buffer[memory_size]; + umf_fixed_memory_provider_params_handle_t wrong_params = nullptr; + umf_result_t umf_result = + umfFixedMemoryProviderParamsCreate(&wrong_params, memory_buffer, 0); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(wrong_params, nullptr); +} + +TEST_P(FixedProviderTest, alloc_size_exceeds_buffer) { + size_t size = memory_size + page_size; + test_alloc_failure(size, 0, UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY, 0); +} + +TEST_P(FixedProviderTest, merge) { + umf_result_t umf_result; + void *ptr1 = nullptr; + void *ptr2 = nullptr; + size_t size = page_size; + size_t alignment = page_size; + + umf_result = umfMemoryProviderAlloc(provider.get(), size, alignment, &ptr1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr1, nullptr); + + umf_result = umfMemoryProviderAlloc(provider.get(), size, alignment, &ptr2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr2, nullptr); + + ASSERT_EQ((uintptr_t)ptr2 - (uintptr_t)ptr1, size); + + memset(ptr1, 0x11, size); + memset(ptr2, 0x22, size); + + size_t merged_size = size * 2; + umf_result = umfMemoryProviderAllocationMerge(provider.get(), ptr1, ptr2, + merged_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfMemoryProviderFree(provider.get(), ptr1, merged_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +} + +TEST_P(FixedProviderTest, split) { + umf_result_t umf_result; + void *ptr1 = nullptr; + void *ptr2 = nullptr; + size_t size = page_size; + size_t alignment = page_size; + + umf_result = + umfMemoryProviderAlloc(provider.get(), size * 2, alignment, &ptr1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr1, nullptr); + + umf_result = + umfMemoryProviderAllocationSplit(provider.get(), ptr1, size * 2, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ptr2 = (void *)((uintptr_t)ptr1 + size); + memset(ptr1, 0x11, size); + + umf_result = umfMemoryProviderFree(provider.get(), ptr1, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + memset(ptr2, 0x22, size); + umf_result = umfMemoryProviderFree(provider.get(), ptr2, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +} + +TEST_P(FixedProviderTest, pool_from_ptr_whole_size_success) { + umf_result_t umf_result; + size_t size_of_first_alloc; + size_t size_of_pool_from_ptr; + void *ptr_for_pool = nullptr; + void *ptr = nullptr; + + umf_memory_pool_handle_t proxyFixedPool = nullptr; + umf_result = umfPoolCreate(umfProxyPoolOps(), provider.get(), nullptr, 0, + &proxyFixedPool); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + size_of_first_alloc = FIXED_BUFFER_SIZE - (2 * page_size); + ptr_for_pool = umfPoolMalloc(proxyFixedPool, size_of_first_alloc); + ASSERT_NE(ptr_for_pool, nullptr); + + // Create provider parameters + size_of_pool_from_ptr = size_of_first_alloc; // whole size + umf_fixed_memory_provider_params_handle_t params = nullptr; + umf_result = umfFixedMemoryProviderParamsCreate(¶ms, ptr_for_pool, + size_of_pool_from_ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(params, nullptr); + + umf_memory_provider_handle_t providerFromPtr = nullptr; + umf_result = umfMemoryProviderCreate(umfFixedMemoryProviderOps(), params, + &providerFromPtr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(providerFromPtr, nullptr); + + umf_memory_pool_handle_t poolFromPtr = nullptr; + umf_result = umfPoolCreate(umfProxyPoolOps(), providerFromPtr, nullptr, 0, + &poolFromPtr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ptr = umfPoolMalloc(poolFromPtr, size_of_pool_from_ptr); + ASSERT_NE(ptr, nullptr); + + memset(ptr, 0xFF, size_of_pool_from_ptr); + + umf_result = umfPoolFree(poolFromPtr, ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfPoolDestroy(poolFromPtr); + umfMemoryProviderDestroy(providerFromPtr); + umfFixedMemoryProviderParamsDestroy(params); + + umf_result = umfPoolFree(proxyFixedPool, ptr_for_pool); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfPoolDestroy(proxyFixedPool); +} + +TEST_P(FixedProviderTest, pool_from_ptr_half_size_success) { + umf_result_t umf_result; + size_t size_of_first_alloc; + size_t size_of_pool_from_ptr; + void *ptr_for_pool = nullptr; + void *ptr = nullptr; + + umf_memory_pool_handle_t proxyFixedPool = nullptr; + umf_result = umfPoolCreate(umfProxyPoolOps(), provider.get(), nullptr, 0, + &proxyFixedPool); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + size_of_first_alloc = FIXED_BUFFER_SIZE - (2 * page_size); + ptr_for_pool = umfPoolMalloc(proxyFixedPool, size_of_first_alloc); + ASSERT_NE(ptr_for_pool, nullptr); + + // Create provider parameters + size_of_pool_from_ptr = size_of_first_alloc / 2; // half size + umf_fixed_memory_provider_params_handle_t params = nullptr; + umf_result = umfFixedMemoryProviderParamsCreate(¶ms, ptr_for_pool, + size_of_pool_from_ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(params, nullptr); + + umf_memory_provider_handle_t providerFromPtr = nullptr; + umf_result = umfMemoryProviderCreate(umfFixedMemoryProviderOps(), params, + &providerFromPtr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(providerFromPtr, nullptr); + + umf_memory_pool_handle_t poolFromPtr = nullptr; + umf_result = umfPoolCreate(umfProxyPoolOps(), providerFromPtr, nullptr, 0, + &poolFromPtr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ptr = umfPoolMalloc(poolFromPtr, size_of_pool_from_ptr); + ASSERT_NE(ptr, nullptr); + + memset(ptr, 0xFF, size_of_pool_from_ptr); + + umf_result = umfPoolFree(poolFromPtr, ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfPoolDestroy(poolFromPtr); + umfMemoryProviderDestroy(providerFromPtr); + umfFixedMemoryProviderParamsDestroy(params); + + umf_result = umfPoolFree(proxyFixedPool, ptr_for_pool); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfPoolDestroy(proxyFixedPool); +} diff --git a/test/provider_os_memory.cpp b/test/provider_os_memory.cpp index 57bce46d24..f3552b9236 100644 --- a/test/provider_os_memory.cpp +++ b/test/provider_os_memory.cpp @@ -1,18 +1,16 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "base.hpp" -#include "cpp_helpers.hpp" #include "ipcFixtures.hpp" #include "test_helpers.h" +#include "utils/cpp_helpers.hpp" #include -#include -#if (defined UMF_POOL_DISJOINT_ENABLED) #include -#endif +#include #ifdef UMF_POOL_JEMALLOC_ENABLED #include #endif @@ -49,7 +47,7 @@ static int compare_native_error_str(const char *message, int error) { using providerCreateExtParams = std::tuple; static void providerCreateExt(providerCreateExtParams params, - umf::provider_unique_handle_t *handle) { + umf_test::provider_unique_handle_t *handle) { umf_memory_provider_handle_t hProvider = nullptr; auto [provider_ops, provider_params] = params; @@ -58,8 +56,8 @@ static void providerCreateExt(providerCreateExtParams params, ASSERT_EQ(ret, UMF_RESULT_SUCCESS); ASSERT_NE(hProvider, nullptr); - *handle = - umf::provider_unique_handle_t(hProvider, &umfMemoryProviderDestroy); + *handle = umf_test::provider_unique_handle_t(hProvider, + &umfMemoryProviderDestroy); } struct umfProviderTest @@ -77,7 +75,7 @@ struct umfProviderTest void TearDown() override { test::TearDown(); } - umf::provider_unique_handle_t provider; + umf_test::provider_unique_handle_t provider; size_t page_size; size_t page_plus_64; }; @@ -407,11 +405,7 @@ TEST_P(umfProviderTest, close_ipc_handle_wrong_visibility) { GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(umfIpcTest); -using os_params_unique_handle_t = - std::unique_ptr; - -os_params_unique_handle_t osMemoryProviderParamsShared() { +void *createOsMemoryProviderParamsShared() { umf_os_memory_provider_params_handle_t params = nullptr; umf_result_t res = umfOsMemoryProviderParamsCreate(¶ms); if (res != UMF_RESULT_SUCCESS) { @@ -422,18 +416,17 @@ os_params_unique_handle_t osMemoryProviderParamsShared() { throw std::runtime_error("Failed to set protection"); } - return os_params_unique_handle_t(params, &umfOsMemoryProviderParamsDestroy); + return params; } -auto os_params = osMemoryProviderParamsShared(); -HostMemoryAccessor hostAccessor; +umf_result_t destroyOsMemoryProviderParamsShared(void *params) { + return umfOsMemoryProviderParamsDestroy( + static_cast(params)); +} -#if (defined UMF_POOL_DISJOINT_ENABLED) -using disjoint_params_unique_handle_t = - std::unique_ptr; +HostMemoryAccessor hostAccessor; -disjoint_params_unique_handle_t disjointPoolParams() { +void *createDisjointPoolParams() { umf_disjoint_pool_params_handle_t params = nullptr; umf_result_t res = umfDisjointPoolParamsCreate(¶ms); if (res != UMF_RESULT_SUCCESS) { @@ -441,35 +434,41 @@ disjoint_params_unique_handle_t disjointPoolParams() { } res = umfDisjointPoolParamsSetSlabMinSize(params, 4096); if (res != UMF_RESULT_SUCCESS) { + umfDisjointPoolParamsDestroy(params); throw std::runtime_error("Failed to set slab min size"); } res = umfDisjointPoolParamsSetMaxPoolableSize(params, 4096); if (res != UMF_RESULT_SUCCESS) { + umfDisjointPoolParamsDestroy(params); throw std::runtime_error("Failed to set max poolable size"); } res = umfDisjointPoolParamsSetCapacity(params, 4); if (res != UMF_RESULT_SUCCESS) { + umfDisjointPoolParamsDestroy(params); throw std::runtime_error("Failed to set capacity"); } res = umfDisjointPoolParamsSetMinBucketSize(params, 64); if (res != UMF_RESULT_SUCCESS) { + umfDisjointPoolParamsDestroy(params); throw std::runtime_error("Failed to set min bucket size"); } - return disjoint_params_unique_handle_t(params, - &umfDisjointPoolParamsDestroy); + return params; +} + +umf_result_t destroyDisjointPoolParams(void *params) { + return umfDisjointPoolParamsDestroy( + static_cast(params)); } -disjoint_params_unique_handle_t disjointParams = disjointPoolParams(); -#endif static std::vector ipcTestParamsList = { -#if (defined UMF_POOL_DISJOINT_ENABLED) - {umfDisjointPoolOps(), disjointParams.get(), umfOsMemoryProviderOps(), - os_params.get(), &hostAccessor, false}, -#endif + {umfDisjointPoolOps(), createDisjointPoolParams, destroyDisjointPoolParams, + umfOsMemoryProviderOps(), createOsMemoryProviderParamsShared, + destroyOsMemoryProviderParamsShared, &hostAccessor}, #ifdef UMF_POOL_JEMALLOC_ENABLED - {umfJemallocPoolOps(), nullptr, umfOsMemoryProviderOps(), os_params.get(), - &hostAccessor, false}, + {umfJemallocPoolOps(), nullptr, nullptr, umfOsMemoryProviderOps(), + createOsMemoryProviderParamsShared, destroyOsMemoryProviderParamsShared, + &hostAccessor}, #endif }; diff --git a/test/provider_os_memory_multiple_numa_nodes.cpp b/test/provider_os_memory_multiple_numa_nodes.cpp index e493a427ca..cfc58f2f06 100644 --- a/test/provider_os_memory_multiple_numa_nodes.cpp +++ b/test/provider_os_memory_multiple_numa_nodes.cpp @@ -674,17 +674,17 @@ TEST_P(testNumaSplit, checkModeSplit) { auto [required_numa_nodes, pages, in, out] = param; umf_result_t umf_result; - umf_os_memory_provider_params_handle_t os_memory_provider_params = nullptr; - - umf_result = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - std::vector numa_nodes = get_available_numa_nodes(); if (numa_nodes.size() < required_numa_nodes) { GTEST_SKIP_("Not enough numa nodes"); } + umf_os_memory_provider_params_handle_t os_memory_provider_params = nullptr; + + umf_result = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(out.size(), pages) << "Wrong test input - out array size doesn't match page count"; diff --git a/test/provider_tracking.cpp b/test/provider_tracking.cpp new file mode 100644 index 0000000000..55acc452cf --- /dev/null +++ b/test/provider_tracking.cpp @@ -0,0 +1,374 @@ +// Copyright (C) 2025 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "base.hpp" + +#include "test_helpers.h" +#include "utils/cpp_helpers.hpp" +#ifndef _WIN32 +#include "test_helpers_linux.h" +#endif + +#include +#include +#include + +using umf_test::test; + +#define FIXED_BUFFER_SIZE (512 * utils_get_page_size()) +#define INVALID_PTR ((void *)0x01) + +using providerCreateExtParams = std::tuple; + +static void providerCreateExt(providerCreateExtParams params, + umf_test::provider_unique_handle_t *handle) { + umf_memory_provider_handle_t hProvider = nullptr; + auto [provider_ops, provider_params] = params; + + auto ret = + umfMemoryProviderCreate(provider_ops, provider_params, &hProvider); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hProvider, nullptr); + + *handle = umf_test::provider_unique_handle_t(hProvider, + &umfMemoryProviderDestroy); +} + +struct TrackingProviderTest + : umf_test::test, + ::testing::WithParamInterface { + void SetUp() override { + test::SetUp(); + + // Allocate a memory buffer to use with the fixed memory provider + memory_size = FIXED_BUFFER_SIZE; + memory_buffer = malloc(memory_size); + ASSERT_NE(memory_buffer, nullptr); + + // Create provider parameters + umf_fixed_memory_provider_params_handle_t params = nullptr; + umf_result_t res = umfFixedMemoryProviderParamsCreate( + ¶ms, memory_buffer, memory_size); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + ASSERT_NE(params, nullptr); + + providerCreateExt(std::make_tuple(umfFixedMemoryProviderOps(), params), + &provider); + + umfFixedMemoryProviderParamsDestroy(params); + umf_result_t umf_result = + umfMemoryProviderGetMinPageSize(provider.get(), NULL, &page_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + page_plus_64 = page_size + 64; + + umf_memory_pool_handle_t hPool = nullptr; + umf_result = umfPoolCreate(umfProxyPoolOps(), provider.get(), nullptr, + 0, &hPool); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + pool = umf_test::pool_unique_handle_t(hPool, &umfPoolDestroy); + } + + void TearDown() override { + if (memory_buffer) { + free(memory_buffer); + memory_buffer = nullptr; + } + test::TearDown(); + } + + umf_test::provider_unique_handle_t provider; + umf_test::pool_unique_handle_t pool; + size_t page_size; + size_t page_plus_64; + void *memory_buffer = nullptr; + size_t memory_size = 0; +}; + +static void +createPoolFromAllocation(void *ptr0, size_t size1, + umf_memory_provider_handle_t *_providerFromPtr, + umf_memory_pool_handle_t *_poolFromPtr) { + umf_result_t umf_result; + + // Create provider parameters + umf_fixed_memory_provider_params_handle_t params = nullptr; + umf_result = umfFixedMemoryProviderParamsCreate(¶ms, ptr0, size1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(params, nullptr); + + umf_memory_provider_handle_t provider1 = nullptr; + umf_result = umfMemoryProviderCreate(umfFixedMemoryProviderOps(), params, + &provider1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider1, nullptr); + + umf_memory_pool_handle_t pool1 = nullptr; + umf_result = + umfPoolCreate(umfProxyPoolOps(), provider1, nullptr, 0, &pool1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfFixedMemoryProviderParamsDestroy(params); + + *_providerFromPtr = provider1; + *_poolFromPtr = pool1; +} + +// TESTS + +INSTANTIATE_TEST_SUITE_P(trackingProviderTest, TrackingProviderTest, + ::testing::Values(providerCreateExtParams{ + umfFixedMemoryProviderOps(), nullptr})); + +TEST_P(TrackingProviderTest, create_destroy) { + // Creation and destruction are handled in SetUp and TearDown +} + +TEST_P(TrackingProviderTest, whole_size_success) { + umf_result_t umf_result; + size_t size0; + size_t size1; + void *ptr0 = nullptr; + void *ptr1 = nullptr; + + umf_memory_pool_handle_t pool0 = pool.get(); + + size0 = FIXED_BUFFER_SIZE - (2 * page_size); + ptr0 = umfPoolAlignedMalloc(pool0, size0, utils_get_page_size()); + ASSERT_NE(ptr0, nullptr); + + size1 = size0; // whole size + + umf_memory_provider_handle_t provider1 = nullptr; + umf_memory_pool_handle_t pool1 = nullptr; + createPoolFromAllocation(ptr0, size1, &provider1, &pool1); + + ptr1 = umfPoolMalloc(pool1, size1); + ASSERT_NE(ptr1, nullptr); + + umf_result = umfPoolFree(pool1, ptr1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfPoolDestroy(pool1); + umfMemoryProviderDestroy(provider1); + + umf_result = umfPoolFree(pool0, ptr0); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +} + +TEST_P(TrackingProviderTest, half_size_success) { + umf_result_t umf_result; + size_t size0; + size_t size1; + void *ptr0 = nullptr; + void *ptr1 = nullptr; + + umf_memory_pool_handle_t pool0 = pool.get(); + + size0 = FIXED_BUFFER_SIZE - (2 * page_size); + ptr0 = umfPoolAlignedMalloc(pool0, size0, utils_get_page_size()); + ASSERT_NE(ptr0, nullptr); + + size1 = size0 / 2; // half size + + umf_memory_provider_handle_t provider1 = nullptr; + umf_memory_pool_handle_t pool1 = nullptr; + createPoolFromAllocation(ptr0, size1, &provider1, &pool1); + + ptr1 = umfPoolMalloc(pool1, size1); + ASSERT_NE(ptr1, nullptr); + + umf_result = umfPoolFree(pool1, ptr1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfPoolDestroy(pool1); + umfMemoryProviderDestroy(provider1); + + umf_result = umfPoolFree(pool0, ptr0); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +} + +TEST_P(TrackingProviderTest, failure_exceeding_size) { + umf_result_t umf_result; + size_t size0; + size_t size1; + void *ptr0 = nullptr; + void *ptr1 = nullptr; + + umf_memory_pool_handle_t pool0 = pool.get(); + + size0 = FIXED_BUFFER_SIZE - (2 * page_size); + ptr0 = umfPoolAlignedMalloc(pool0, size0, utils_get_page_size()); + ASSERT_NE(ptr0, nullptr); + + size1 = FIXED_BUFFER_SIZE - page_size; // exceeding size + + umf_memory_provider_handle_t provider1 = nullptr; + umf_memory_pool_handle_t pool1 = nullptr; + createPoolFromAllocation(ptr0, size1, &provider1, &pool1); + + ptr1 = umfPoolMalloc(pool1, size1); + ASSERT_EQ(ptr1, nullptr); + + umfPoolDestroy(pool1); + umfMemoryProviderDestroy(provider1); + + umf_result = umfPoolFree(pool0, ptr0); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +} + +#define MAX_ARRAY 9 +#define TEST_LEVEL_SUCCESS 7 +#define TEST_LEVEL_FAILURE 8 + +TEST_P(TrackingProviderTest, success_max_levels) { + umf_result_t umf_result; + size_t size; + void *ptr[MAX_ARRAY] = {0}; + umf_memory_provider_handle_t providers[MAX_ARRAY] = {0}; + umf_memory_pool_handle_t pools[MAX_ARRAY] = {0}; + + size = FIXED_BUFFER_SIZE - (2 * page_size); + pools[0] = pool.get(); + + for (int i = 0; i < TEST_LEVEL_SUCCESS; i++) { + fprintf(stderr, "Alloc #%d\n", i); + ptr[i] = umfPoolAlignedMalloc(pools[i], size, utils_get_page_size()); + ASSERT_NE(ptr[i], nullptr); + + createPoolFromAllocation(ptr[i], size, &providers[i + 1], + &pools[i + 1]); + } + + int s = TEST_LEVEL_SUCCESS; + fprintf(stderr, "Alloc #%d\n", s); + ptr[s] = umfPoolAlignedMalloc(pools[s], size, utils_get_page_size()); + ASSERT_NE(ptr[s], nullptr); + + fprintf(stderr, "Free #%d\n", s); + umf_result = umfPoolFree(pools[s], ptr[s]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + for (int i = TEST_LEVEL_SUCCESS - 1; i >= 0; i--) { + umfPoolDestroy(pools[i + 1]); + umfMemoryProviderDestroy(providers[i + 1]); + + fprintf(stderr, "Free #%d\n", i); + umf_result = umfPoolFree(pools[i], ptr[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } +} + +TEST_P(TrackingProviderTest, failure_exceeding_levels) { + umf_result_t umf_result; + size_t size; + void *ptr[MAX_ARRAY] = {0}; + umf_memory_provider_handle_t providers[MAX_ARRAY] = {0}; + umf_memory_pool_handle_t pools[MAX_ARRAY] = {0}; + + size = FIXED_BUFFER_SIZE - (2 * page_size); + pools[0] = pool.get(); + + for (int i = 0; i < TEST_LEVEL_FAILURE; i++) { + fprintf(stderr, "Alloc #%d\n", i); + ptr[i] = umfPoolAlignedMalloc(pools[i], size, utils_get_page_size()); + ASSERT_NE(ptr[i], nullptr); + + createPoolFromAllocation(ptr[i], size, &providers[i + 1], + &pools[i + 1]); + } + + // tracker level is too high + int f = TEST_LEVEL_FAILURE; + fprintf(stderr, "Alloc #%d\n", f); + ptr[f] = umfPoolAlignedMalloc(pools[f], size, utils_get_page_size()); + ASSERT_EQ(ptr[f], nullptr); + + for (int i = TEST_LEVEL_FAILURE - 1; i >= 0; i--) { + umfPoolDestroy(pools[i + 1]); + umfMemoryProviderDestroy(providers[i + 1]); + + fprintf(stderr, "Free #%d\n", i); + umf_result = umfPoolFree(pools[i], ptr[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } +} + +TEST_P(TrackingProviderTest, reverted_free_half_size) { + umf_result_t umf_result; + size_t size0; + size_t size1; + void *ptr0 = nullptr; + void *ptr1 = nullptr; + + umf_memory_pool_handle_t pool0 = pool.get(); + + size0 = FIXED_BUFFER_SIZE - (2 * page_size); + ptr0 = umfPoolAlignedMalloc(pool0, size0, utils_get_page_size()); + ASSERT_NE(ptr0, nullptr); + + umf_memory_provider_handle_t provider1 = nullptr; + umf_memory_pool_handle_t pool1 = nullptr; + createPoolFromAllocation(ptr0, size0, &provider1, &pool1); + + size1 = size0 / 2; // half size + + ptr1 = umfPoolMalloc(pool1, size1); + ASSERT_NE(ptr1, nullptr); + + // Freeing the "busy" pointer from the first pool is an Undefined Behavior + // It fails now if the sizes are different. + // see: https://github.com/oneapi-src/unified-memory-framework/pull/1161 + umf_result = umfPoolFree(pool0, ptr0); + + umf_result = umfPoolFree(pool1, ptr1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfPoolDestroy(pool1); + umfMemoryProviderDestroy(provider1); + + // It could have been freed above, + // so we cannot verify the result here. + umf_result = umfPoolFree(pool0, ptr0); +} + +TEST_P(TrackingProviderTest, reverted_free_the_same_size) { + umf_result_t umf_result; + size_t size0; + size_t size1; + void *ptr0 = nullptr; + void *ptr1 = nullptr; + + umf_memory_pool_handle_t pool0 = pool.get(); + + size0 = FIXED_BUFFER_SIZE - (2 * page_size); + ptr0 = umfPoolAlignedMalloc(pool0, size0, utils_get_page_size()); + ASSERT_NE(ptr0, nullptr); + + umf_memory_provider_handle_t provider1 = nullptr; + umf_memory_pool_handle_t pool1 = nullptr; + createPoolFromAllocation(ptr0, size0, &provider1, &pool1); + + size1 = size0; // the same size + + ptr1 = umfPoolMalloc(pool1, size1); + ASSERT_NE(ptr1, nullptr); + + // Freeing the "busy" pointer from the first pool is an Undefined Behavior + // It succeeds now if the sizes are equal. + // see: https://github.com/oneapi-src/unified-memory-framework/pull/1161 + umf_result = umfPoolFree(pool0, ptr0); + + // try to free the pointer from the second pool (the same size) + umf_result = umfPoolFree(pool1, ptr1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfPoolDestroy(pool1); + umfMemoryProviderDestroy(provider1); + + // It could have been freed above, + // so we cannot verify the result here. + umf_result = umfPoolFree(pool0, ptr0); +} diff --git a/test/provider_tracking_fixture_tests.cpp b/test/provider_tracking_fixture_tests.cpp new file mode 100644 index 0000000000..d81d4f8b1d --- /dev/null +++ b/test/provider_tracking_fixture_tests.cpp @@ -0,0 +1,91 @@ +// Copyright (C) 2025 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include + +#include "base.hpp" +#include "provider.hpp" + +#include "test_helpers.h" +#include "utils/cpp_helpers.hpp" +#ifndef _WIN32 +#include "test_helpers_linux.h" +#endif + +#include "poolFixtures.hpp" + +#define FILE_PATH ((char *)"tmp_file") + +struct provider_from_pool : public umf_test::provider_base_t { + umf_memory_pool_handle_t pool; + umf_result_t initialize(umf_memory_pool_handle_t _pool) noexcept { + if (!_pool) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + pool = _pool; + return UMF_RESULT_SUCCESS; + } + umf_result_t alloc(size_t size, size_t align, void **ptr) noexcept { + *ptr = umfPoolAlignedMalloc(pool, size, align); + return (*ptr) ? UMF_RESULT_SUCCESS + : UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + umf_result_t free(void *ptr, size_t) noexcept { + return umfPoolFree(pool, ptr); + } + const char *get_name() noexcept { return "provider_from_pool"; } + + virtual ~provider_from_pool() { + if (pool) { + umfPoolDestroy(pool); + pool = nullptr; + } + } +}; + +umf_memory_provider_ops_t PROVIDER_FROM_POOL_OPS = + umf_test::providerMakeCOps(); + +static void *providerFromPoolParamsCreate(void) { + umf_file_memory_provider_params_handle_t paramsFile = NULL; + umf_result_t umf_result = + umfFileMemoryProviderParamsCreate(¶msFile, FILE_PATH); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); + EXPECT_NE(paramsFile, nullptr); + + umf_memory_provider_handle_t providerFile = nullptr; + umf_result = umfMemoryProviderCreate(umfFileMemoryProviderOps(), paramsFile, + &providerFile); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); + EXPECT_NE(providerFile, nullptr); + + umf_memory_pool_handle_t poolProxyFile = nullptr; + umf_result = + umfPoolCreate(umfProxyPoolOps(), providerFile, nullptr, + UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &poolProxyFile); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); + EXPECT_NE(poolProxyFile, nullptr); + + umf_result = umfFileMemoryProviderParamsDestroy(paramsFile); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); + paramsFile = nullptr; + + return poolProxyFile; +} + +// TESTS + +INSTANTIATE_TEST_SUITE_P(TrackingProviderPoolTest, umfPoolTest, + ::testing::Values(poolCreateExtParams{ + umfProxyPoolOps(), nullptr, nullptr, + &PROVIDER_FROM_POOL_OPS, + providerFromPoolParamsCreate, nullptr})); + +INSTANTIATE_TEST_SUITE_P(TrackingProviderMultiPoolTest, umfMultiPoolTest, + ::testing::Values(poolCreateExtParams{ + umfProxyPoolOps(), nullptr, nullptr, + &PROVIDER_FROM_POOL_OPS, + providerFromPoolParamsCreate, nullptr})); diff --git a/test/providers/cuda_helpers.cpp b/test/providers/cuda_helpers.cpp index 9c41d9382f..3e81c184ff 100644 --- a/test/providers/cuda_helpers.cpp +++ b/test/providers/cuda_helpers.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -22,7 +22,7 @@ struct libcu_ops { CUresult (*cuDeviceGet)(CUdevice *device, int ordinal); CUresult (*cuMemAlloc)(CUdeviceptr *dptr, size_t size); CUresult (*cuMemFree)(CUdeviceptr dptr); - CUresult (*cuMemAllocHost)(void **pp, size_t size); + CUresult (*cuMemHostAlloc)(void **pp, size_t size, unsigned int flags); CUresult (*cuMemAllocManaged)(CUdeviceptr *dptr, size_t bytesize, unsigned int flags); CUresult (*cuMemFreeHost)(void *p); @@ -34,6 +34,7 @@ struct libcu_ops { CUresult (*cuPointerGetAttributes)(unsigned int numAttributes, CUpointer_attribute *attributes, void **data, CUdeviceptr ptr); + CUresult (*cuMemHostGetFlags)(unsigned int *pFlags, void *p); CUresult (*cuStreamSynchronize)(CUstream hStream); CUresult (*cuCtxSynchronize)(void); } libcu_ops; @@ -69,7 +70,7 @@ struct DlHandleCloser { libcu_ops.cuMemFree = [](auto... args) { return noop_stub(args...); }; - libcu_ops.cuMemAllocHost = [](auto... args) { + libcu_ops.cuMemHostAlloc = [](auto... args) { return noop_stub(args...); }; libcu_ops.cuMemAllocManaged = [](auto... args) { @@ -90,6 +91,9 @@ struct DlHandleCloser { libcu_ops.cuPointerGetAttributes = [](auto... args) { return noop_stub(args...); }; + libcu_ops.cuMemHostGetFlags = [](auto... args) { + return noop_stub(args...); + }; libcu_ops.cuStreamSynchronize = [](auto... args) { return noop_stub(args...); }; @@ -109,10 +113,15 @@ int InitCUDAOps() { const char *lib_name = "libcuda.so"; #endif // CUDA symbols - // NOTE that we use UMF_UTIL_OPEN_LIBRARY_GLOBAL which add all loaded - // symbols to the global symbol table. +#if OPEN_CU_LIBRARY_GLOBAL + // NOTE UMF_UTIL_OPEN_LIBRARY_GLOBAL adds all loaded symbols to the + // global symbol table. + int open_flags = UMF_UTIL_OPEN_LIBRARY_GLOBAL; +#else + int open_flags = 0; +#endif cuDlHandle = std::unique_ptr( - utils_open_library(lib_name, UMF_UTIL_OPEN_LIBRARY_GLOBAL)); + utils_open_library(lib_name, open_flags)); // NOTE: some symbols defined in the lib have _vX postfixes - this is // important to load the proper version of functions @@ -164,10 +173,10 @@ int InitCUDAOps() { fprintf(stderr, "cuMemFree_v2 symbol not found in %s\n", lib_name); return -1; } - *(void **)&libcu_ops.cuMemAllocHost = - utils_get_symbol_addr(cuDlHandle.get(), "cuMemAllocHost_v2", lib_name); - if (libcu_ops.cuMemAllocHost == nullptr) { - fprintf(stderr, "cuMemAllocHost_v2 symbol not found in %s\n", lib_name); + *(void **)&libcu_ops.cuMemHostAlloc = + utils_get_symbol_addr(cuDlHandle.get(), "cuMemHostAlloc", lib_name); + if (libcu_ops.cuMemHostAlloc == nullptr) { + fprintf(stderr, "cuMemHostAlloc symbol not found in %s\n", lib_name); return -1; } *(void **)&libcu_ops.cuMemAllocManaged = @@ -208,6 +217,12 @@ int InitCUDAOps() { lib_name); return -1; } + *(void **)&libcu_ops.cuMemHostGetFlags = + utils_get_symbol_addr(cuDlHandle.get(), "cuMemHostGetFlags", lib_name); + if (libcu_ops.cuMemHostGetFlags == nullptr) { + fprintf(stderr, "cuMemHostGetFlags symbol not found in %s\n", lib_name); + return -1; + } *(void **)&libcu_ops.cuStreamSynchronize = utils_get_symbol_addr( cuDlHandle.get(), "cuStreamSynchronize", lib_name); if (libcu_ops.cuStreamSynchronize == nullptr) { @@ -236,7 +251,7 @@ int InitCUDAOps() { libcu_ops.cuCtxSetCurrent = cuCtxSetCurrent; libcu_ops.cuDeviceGet = cuDeviceGet; libcu_ops.cuMemAlloc = cuMemAlloc; - libcu_ops.cuMemAllocHost = cuMemAllocHost; + libcu_ops.cuMemHostAlloc = cuMemHostAlloc; libcu_ops.cuMemAllocManaged = cuMemAllocManaged; libcu_ops.cuMemFree = cuMemFree; libcu_ops.cuMemFreeHost = cuMemFreeHost; @@ -244,6 +259,7 @@ int InitCUDAOps() { libcu_ops.cuMemcpy = cuMemcpy; libcu_ops.cuPointerGetAttribute = cuPointerGetAttribute; libcu_ops.cuPointerGetAttributes = cuPointerGetAttributes; + libcu_ops.cuMemHostGetFlags = cuMemHostGetFlags; libcu_ops.cuStreamSynchronize = cuStreamSynchronize; libcu_ops.cuCtxSynchronize = cuCtxSynchronize; @@ -251,7 +267,7 @@ int InitCUDAOps() { } #endif // USE_DLOPEN -static CUresult set_context(CUcontext required_ctx, CUcontext *restore_ctx) { +CUresult set_context(CUcontext required_ctx, CUcontext *restore_ctx) { CUcontext current_ctx = NULL; CUresult cu_result = libcu_ops.cuCtxGetCurrent(¤t_ctx); if (cu_result != CUDA_SUCCESS) { @@ -259,7 +275,10 @@ static CUresult set_context(CUcontext required_ctx, CUcontext *restore_ctx) { return cu_result; } - *restore_ctx = current_ctx; + if (restore_ctx != NULL) { + *restore_ctx = current_ctx; + } + if (current_ctx != required_ctx) { cu_result = libcu_ops.cuCtxSetCurrent(required_ctx); if (cu_result != CUDA_SUCCESS) { @@ -370,6 +389,17 @@ umf_usm_memory_type_t get_mem_type(CUcontext context, void *ptr) { return UMF_MEMORY_TYPE_UNKNOWN; } +unsigned int get_mem_host_alloc_flags(void *ptr) { + unsigned int flags; + CUresult res = libcu_ops.cuMemHostGetFlags(&flags, ptr); + if (res != CUDA_SUCCESS) { + fprintf(stderr, "cuPointerGetAttribute() failed!\n"); + return 0; + } + + return flags; +} + CUcontext get_mem_context(void *ptr) { CUcontext context; CUresult res = libcu_ops.cuPointerGetAttribute( @@ -382,6 +412,18 @@ CUcontext get_mem_context(void *ptr) { return context; } +int get_mem_device(void *ptr) { + int device; + CUresult res = libcu_ops.cuPointerGetAttribute( + &device, CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL, (CUdeviceptr)ptr); + if (res != CUDA_SUCCESS) { + fprintf(stderr, "cuPointerGetAttribute() failed!\n"); + return -1; + } + + return device; +} + CUcontext get_current_context() { CUcontext context; CUresult res = libcu_ops.cuCtxGetCurrent(&context); @@ -403,7 +445,7 @@ void init_cuda_once() { InitResult = init_cuda_lib(); } -int init_cuda() { +int init_cuda(void) { utils_init_once(&cuda_init_flag, init_cuda_once); return InitResult; @@ -412,12 +454,6 @@ int init_cuda() { int get_cuda_device(CUdevice *device) { CUdevice cuDevice = -1; - int ret = init_cuda(); - if (ret != 0) { - fprintf(stderr, "init_cuda() failed!\n"); - return ret; - } - CUresult res = libcu_ops.cuDeviceGet(&cuDevice, 0); if (res != CUDA_SUCCESS || cuDevice < 0) { return -1; @@ -430,12 +466,6 @@ int get_cuda_device(CUdevice *device) { int create_context(CUdevice device, CUcontext *context) { CUcontext cuContext = nullptr; - int ret = init_cuda(); - if (ret != 0) { - fprintf(stderr, "init_cuda() failed!\n"); - return ret; - } - CUresult res = libcu_ops.cuCtxCreate(&cuContext, 0, device); if (res != CUDA_SUCCESS || cuContext == nullptr) { return -1; diff --git a/test/providers/cuda_helpers.h b/test/providers/cuda_helpers.h index fc06c1fcff..944e6dbef8 100644 --- a/test/providers/cuda_helpers.h +++ b/test/providers/cuda_helpers.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -26,10 +26,14 @@ extern "C" { #endif +int init_cuda(void); + int get_cuda_device(CUdevice *device); int create_context(CUdevice device, CUcontext *context); +CUresult set_context(CUcontext required_ctx, CUcontext *restore_ctx); + int destroy_context(CUcontext context); int cuda_fill(CUcontext context, CUdevice device, void *ptr, size_t size, @@ -40,8 +44,12 @@ int cuda_copy(CUcontext context, CUdevice device, void *dst_ptr, umf_usm_memory_type_t get_mem_type(CUcontext context, void *ptr); +unsigned int get_mem_host_alloc_flags(void *ptr); + CUcontext get_mem_context(void *ptr); +int get_mem_device(void *ptr); + CUcontext get_current_context(); #ifdef __cplusplus diff --git a/test/providers/ipc_cuda_prov.sh b/test/providers/ipc_cuda_prov.sh index 1e9b6b05d4..bb4be94747 100755 --- a/test/providers/ipc_cuda_prov.sh +++ b/test/providers/ipc_cuda_prov.sh @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -15,10 +15,10 @@ PORT=$(( 1024 + ( $$ % ( 65535 - 1024 )))) UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" echo "Starting ipc_cuda_prov CONSUMER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_cuda_prov_consumer $PORT & +UMF_LOG=$UMF_LOG_VAL ./test_ipc_cuda_prov_consumer $PORT & echo "Waiting 1 sec ..." sleep 1 echo "Starting ipc_cuda_prov PRODUCER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_cuda_prov_producer $PORT +UMF_LOG=$UMF_LOG_VAL ./test_ipc_cuda_prov_producer $PORT diff --git a/test/providers/ipc_cuda_prov_consumer.c b/test/providers/ipc_cuda_prov_consumer.c index 1aeb5b15cb..3d4a707072 100644 --- a/test/providers/ipc_cuda_prov_consumer.c +++ b/test/providers/ipc_cuda_prov_consumer.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -25,7 +25,13 @@ int main(int argc, char *argv[]) { CUdevice hDevice = -1; CUcontext hContext = NULL; - int ret = get_cuda_device(&hDevice); + int ret = init_cuda(); + if (ret != 0) { + fprintf(stderr, "init_cuda() failed!\n"); + return -1; + } + + ret = get_cuda_device(&hDevice); if (ret != 0) { fprintf(stderr, "get_cuda_device() failed!\n"); return -1; diff --git a/test/providers/ipc_cuda_prov_producer.c b/test/providers/ipc_cuda_prov_producer.c index c2cd1d1325..a7421da06c 100644 --- a/test/providers/ipc_cuda_prov_producer.c +++ b/test/providers/ipc_cuda_prov_producer.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -25,7 +25,13 @@ int main(int argc, char *argv[]) { CUdevice hDevice = -1; CUcontext hContext = NULL; - int ret = get_cuda_device(&hDevice); + int ret = init_cuda(); + if (ret != 0) { + fprintf(stderr, "init_cuda() failed!\n"); + return -1; + } + + ret = get_cuda_device(&hDevice); if (ret != 0) { fprintf(stderr, "get_cuda_device() failed!\n"); return -1; diff --git a/test/providers/ipc_level_zero_prov.sh b/test/providers/ipc_level_zero_prov.sh index d6bcef4f3a..cebd909328 100755 --- a/test/providers/ipc_level_zero_prov.sh +++ b/test/providers/ipc_level_zero_prov.sh @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -12,28 +12,13 @@ set -e # port should be a number from the range <1024, 65535> PORT=$(( 1024 + ( $$ % ( 65535 - 1024 )))) -# The ipc_level_zero_prov test requires using pidfd_getfd(2) -# to obtain a duplicate of another process's file descriptor. -# Permission to duplicate another process's file descriptor -# is governed by a ptrace access mode PTRACE_MODE_ATTACH_REALCREDS check (see ptrace(2)) -# that can be changed using the /proc/sys/kernel/yama/ptrace_scope interface. -PTRACE_SCOPE_FILE="/proc/sys/kernel/yama/ptrace_scope" -VAL=0 -if [ -f $PTRACE_SCOPE_FILE ]; then - PTRACE_SCOPE_VAL=$(cat $PTRACE_SCOPE_FILE) - if [ $PTRACE_SCOPE_VAL -ne $VAL ]; then - echo "SKIP: ptrace_scope is not set to 0 (classic ptrace permissions) - skipping the test" - exit 125 # skip code defined in CMakeLists.txt - fi -fi - UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" echo "Starting ipc_level_zero_prov CONSUMER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_level_zero_prov_consumer $PORT & +UMF_LOG=$UMF_LOG_VAL ./test_ipc_level_zero_prov_consumer $PORT & echo "Waiting 1 sec ..." sleep 1 echo "Starting ipc_level_zero_prov PRODUCER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_level_zero_prov_producer $PORT +UMF_LOG=$UMF_LOG_VAL ./test_ipc_level_zero_prov_producer $PORT diff --git a/test/providers/ipc_level_zero_prov_consumer.c b/test/providers/ipc_level_zero_prov_consumer.c index 8ec0648e4f..5fb2128815 100644 --- a/test/providers/ipc_level_zero_prov_consumer.c +++ b/test/providers/ipc_level_zero_prov_consumer.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -27,7 +27,13 @@ int main(int argc, char *argv[]) { ze_device_handle_t hDevice = NULL; ze_context_handle_t hContext = NULL; - int ret = utils_ze_find_driver_with_gpu(&driver_idx, &hDriver); + int ret = utils_ze_init_level_zero(); + if (ret != 0) { + fprintf(stderr, "utils_ze_init_level_zero() failed!\n"); + return -1; + } + + ret = utils_ze_find_driver_with_gpu(&driver_idx, &hDriver); if (ret != 0 || hDriver == NULL) { fprintf(stderr, "utils_ze_find_driver_with_gpu() failed!\n"); return -1; diff --git a/test/providers/ipc_level_zero_prov_producer.c b/test/providers/ipc_level_zero_prov_producer.c index 2a8fedc374..e6ffcf2ed6 100644 --- a/test/providers/ipc_level_zero_prov_producer.c +++ b/test/providers/ipc_level_zero_prov_producer.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -27,7 +27,13 @@ int main(int argc, char *argv[]) { ze_device_handle_t hDevice = NULL; ze_context_handle_t hContext = NULL; - int ret = utils_ze_find_driver_with_gpu(&driver_idx, &hDriver); + int ret = utils_ze_init_level_zero(); + if (ret != 0) { + fprintf(stderr, "utils_ze_init_level_zero() failed!\n"); + return -1; + } + + ret = utils_ze_find_driver_with_gpu(&driver_idx, &hDriver); if (ret != 0 || hDriver == NULL) { fprintf(stderr, "utils_ze_find_driver_with_gpu() failed!\n"); return -1; diff --git a/test/providers/provider_cuda.cpp b/test/providers/provider_cuda.cpp index 4f1d359113..a7e5dbe5a0 100644 --- a/test/providers/provider_cuda.cpp +++ b/test/providers/provider_cuda.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -39,7 +39,13 @@ class CUDATestHelper { }; CUDATestHelper::CUDATestHelper() { - int ret = get_cuda_device(&hDevice_); + int ret = init_cuda(); + if (ret != 0) { + fprintf(stderr, "init_cuda() failed!\n"); + return; + } + + ret = get_cuda_device(&hDevice_); if (ret != 0) { fprintf(stderr, "get_cuda_device() failed!\n"); return; @@ -52,47 +58,46 @@ CUDATestHelper::CUDATestHelper() { } } -using cuda_params_unique_handle_t = - std::unique_ptr; - -cuda_params_unique_handle_t +umf_cuda_memory_provider_params_handle_t create_cuda_prov_params(CUcontext context, CUdevice device, - umf_usm_memory_type_t memory_type) { + umf_usm_memory_type_t memory_type, unsigned int flags) { umf_cuda_memory_provider_params_handle_t params = nullptr; umf_result_t res = umfCUDAMemoryProviderParamsCreate(¶ms); if (res != UMF_RESULT_SUCCESS) { - return cuda_params_unique_handle_t(nullptr, - &umfCUDAMemoryProviderParamsDestroy); + return nullptr; } res = umfCUDAMemoryProviderParamsSetContext(params, context); if (res != UMF_RESULT_SUCCESS) { umfCUDAMemoryProviderParamsDestroy(params); - return cuda_params_unique_handle_t(nullptr, - &umfCUDAMemoryProviderParamsDestroy); - ; + return nullptr; } res = umfCUDAMemoryProviderParamsSetDevice(params, device); if (res != UMF_RESULT_SUCCESS) { umfCUDAMemoryProviderParamsDestroy(params); - return cuda_params_unique_handle_t(nullptr, - &umfCUDAMemoryProviderParamsDestroy); - ; + return nullptr; } res = umfCUDAMemoryProviderParamsSetMemoryType(params, memory_type); if (res != UMF_RESULT_SUCCESS) { umfCUDAMemoryProviderParamsDestroy(params); - return cuda_params_unique_handle_t(nullptr, - &umfCUDAMemoryProviderParamsDestroy); - ; + return nullptr; + } + + res = umfCUDAMemoryProviderParamsSetAllocFlags(params, flags); + if (res != UMF_RESULT_SUCCESS) { + umfCUDAMemoryProviderParamsDestroy(params); + return nullptr; } - return cuda_params_unique_handle_t(params, - &umfCUDAMemoryProviderParamsDestroy); + return params; +} + +umf_result_t destroyCuParams(void *params) { + return umfCUDAMemoryProviderParamsDestroy( + (umf_cuda_memory_provider_params_handle_t)params); } class CUDAMemoryAccessor : public MemoryAccessor { @@ -126,30 +131,55 @@ class CUDAMemoryAccessor : public MemoryAccessor { CUcontext hContext_; }; -using CUDAProviderTestParams = - std::tuple; - struct umfCUDAProviderTest : umf_test::test, - ::testing::WithParamInterface { + ::testing::WithParamInterface { void SetUp() override { test::SetUp(); - auto [cuda_params, cu_context, memory_type, accessor] = - this->GetParam(); - params = cuda_params; - memAccessor = accessor; - expected_context = cu_context; + umf_usm_memory_type_t memory_type = this->GetParam(); + + memAccessor = nullptr; + expected_context = cudaTestHelper.get_test_context(); + expected_device = cudaTestHelper.get_test_device(); + params = create_cuda_prov_params(cudaTestHelper.get_test_context(), + cudaTestHelper.get_test_device(), + memory_type, 0 /* alloc flags */); + ASSERT_NE(expected_context, nullptr); + ASSERT_GE(expected_device, 0); + + switch (memory_type) { + case UMF_MEMORY_TYPE_DEVICE: + memAccessor = std::make_unique( + cudaTestHelper.get_test_context(), + cudaTestHelper.get_test_device()); + break; + case UMF_MEMORY_TYPE_SHARED: + case UMF_MEMORY_TYPE_HOST: + memAccessor = std::make_unique(); + break; + case UMF_MEMORY_TYPE_UNKNOWN: + break; + } + expected_memory_type = memory_type; } - void TearDown() override { test::TearDown(); } + void TearDown() override { + if (params) { + destroyCuParams(params); + } - umf_cuda_memory_provider_params_handle_t params; - MemoryAccessor *memAccessor = nullptr; - CUcontext expected_context; + test::TearDown(); + } + + CUDATestHelper cudaTestHelper; + umf_cuda_memory_provider_params_handle_t params = nullptr; + + std::unique_ptr memAccessor = nullptr; + CUcontext expected_context = nullptr; + int expected_device = -1; umf_usm_memory_type_t expected_memory_type; }; @@ -300,6 +330,44 @@ TEST_P(umfCUDAProviderTest, getPageSizeInvalidArgs) { umfMemoryProviderDestroy(provider); } +TEST_P(umfCUDAProviderTest, cudaProviderDefaultParams) { + umf_cuda_memory_provider_params_handle_t defaultParams = nullptr; + umf_result_t umf_result = umfCUDAMemoryProviderParamsCreate(&defaultParams); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfCUDAMemoryProviderParamsSetMemoryType(defaultParams, + expected_memory_type); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + // NOTE: we intentionally do not set any context and device params + + umf_memory_provider_handle_t provider = nullptr; + umf_result = umfMemoryProviderCreate(umfCUDAMemoryProviderOps(), + defaultParams, &provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider, nullptr); + + // do single alloc and check if the context and device id of allocated + // memory are correct + + void *ptr = nullptr; + umf_result = umfMemoryProviderAlloc(provider, 128, 0, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + + CUcontext actual_mem_context = get_mem_context(ptr); + ASSERT_EQ(actual_mem_context, expected_context); + + int actual_device = get_mem_device(ptr); + ASSERT_EQ(actual_device, expected_device); + + umf_result = umfMemoryProviderFree(provider, ptr, 128); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfMemoryProviderDestroy(provider); + umfCUDAMemoryProviderParamsDestroy(defaultParams); +} + TEST_P(umfCUDAProviderTest, cudaProviderNullParams) { umf_result_t res = umfCUDAMemoryProviderParamsCreate(nullptr); EXPECT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); @@ -315,36 +383,199 @@ TEST_P(umfCUDAProviderTest, cudaProviderNullParams) { EXPECT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); } -// TODO add tests that mixes CUDA Memory Provider and Disjoint Pool +TEST_P(umfCUDAProviderTest, multiContext) { + CUdevice device; + int ret = get_cuda_device(&device); + ASSERT_EQ(ret, 0); + + // create two CUDA contexts and two providers + CUcontext ctx1, ctx2; + ret = create_context(device, &ctx1); + ASSERT_EQ(ret, 0); + ret = create_context(device, &ctx2); + ASSERT_EQ(ret, 0); + + umf_cuda_memory_provider_params_handle_t params1 = + create_cuda_prov_params(ctx1, device, UMF_MEMORY_TYPE_HOST, 0); + ASSERT_NE(params1, nullptr); + umf_memory_provider_handle_t provider1; + umf_result_t umf_result = umfMemoryProviderCreate( + umfCUDAMemoryProviderOps(), params1, &provider1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider1, nullptr); + umf_result = umfCUDAMemoryProviderParamsDestroy(params1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_cuda_memory_provider_params_handle_t params2 = + create_cuda_prov_params(ctx2, device, UMF_MEMORY_TYPE_HOST, 0); + ASSERT_NE(params2, nullptr); + umf_memory_provider_handle_t provider2; + umf_result = umfMemoryProviderCreate(umfCUDAMemoryProviderOps(), params2, + &provider2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider2, nullptr); + umf_result = umfCUDAMemoryProviderParamsDestroy(params2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + // use the providers + // allocate from 1, then from 2, then free 1, then free 2 + void *ptr1, *ptr2; + const int size = 128; + // NOTE: we use ctx1 here + umf_result = umfMemoryProviderAlloc(provider1, size, 0, &ptr1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr1, nullptr); + + // NOTE: we use ctx2 here + umf_result = umfMemoryProviderAlloc(provider2, size, 0, &ptr2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr2, nullptr); + + // even if we change the context, we should be able to free the memory + ret = set_context(ctx2, NULL); + ASSERT_EQ(ret, 0); + // free memory from ctx1 + umf_result = umfMemoryProviderFree(provider1, ptr1, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ret = set_context(ctx1, NULL); + ASSERT_EQ(ret, 0); + umf_result = umfMemoryProviderFree(provider2, ptr2, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + // cleanup + umfMemoryProviderDestroy(provider2); + umfMemoryProviderDestroy(provider1); + ret = destroy_context(ctx1); + ASSERT_EQ(ret, 0); + ret = destroy_context(ctx2); + ASSERT_EQ(ret, 0); +} + +struct umfCUDAProviderAllocFlagsTest + : umf_test::test, + ::testing::WithParamInterface< + std::tuple> { + + void SetUp() override { + test::SetUp(); + + get_cuda_device(&device); + create_context(device, &context); + } + + void TearDown() override { + destroy_context(context); + + test::TearDown(); + } + + CUdevice device; + CUcontext context; +}; + +TEST_P(umfCUDAProviderAllocFlagsTest, cudaAllocFlags) { + auto [memory_type, test_flags] = this->GetParam(); -CUDATestHelper cudaTestHelper; + umf_cuda_memory_provider_params_handle_t test_params = + create_cuda_prov_params(context, device, memory_type, test_flags); + + umf_memory_provider_handle_t provider = nullptr; + umf_result_t umf_result = umfMemoryProviderCreate( + umfCUDAMemoryProviderOps(), test_params, &provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider, nullptr); -cuda_params_unique_handle_t cuParams_device_memory = create_cuda_prov_params( - cudaTestHelper.get_test_context(), cudaTestHelper.get_test_device(), - UMF_MEMORY_TYPE_DEVICE); -cuda_params_unique_handle_t cuParams_shared_memory = create_cuda_prov_params( - cudaTestHelper.get_test_context(), cudaTestHelper.get_test_device(), - UMF_MEMORY_TYPE_SHARED); -cuda_params_unique_handle_t cuParams_host_memory = create_cuda_prov_params( - cudaTestHelper.get_test_context(), cudaTestHelper.get_test_device(), - UMF_MEMORY_TYPE_HOST); + void *ptr = nullptr; + umf_result = umfMemoryProviderAlloc(provider, 128, 0, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + + if (memory_type == UMF_MEMORY_TYPE_HOST) { + // check if the memory allocation flag is set correctly + unsigned int flags = get_mem_host_alloc_flags(ptr); + ASSERT_TRUE(flags & test_flags); + } + + umf_result = umfMemoryProviderFree(provider, ptr, 128); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfMemoryProviderDestroy(provider); + umfCUDAMemoryProviderParamsDestroy(test_params); +} + +TEST_P(umfCUDAProviderAllocFlagsTest, reuseParams) { + auto [memory_type, test_flags] = this->GetParam(); + + // first, create a provider for SHARED memory type with empty alloc flags, + // and the reuse the test_params to create a provider for test params + umf_cuda_memory_provider_params_handle_t test_params = + create_cuda_prov_params(context, device, UMF_MEMORY_TYPE_SHARED, 0); + + umf_memory_provider_handle_t provider = nullptr; + + umf_result_t umf_result = umfMemoryProviderCreate( + umfCUDAMemoryProviderOps(), test_params, &provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider, nullptr); + + void *ptr = nullptr; + umf_result = umfMemoryProviderAlloc(provider, 128, 0, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + + umf_result = umfMemoryProviderFree(provider, ptr, 128); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfMemoryProviderDestroy(provider); + + // reuse the test_params to create a provider for test params + umf_result = + umfCUDAMemoryProviderParamsSetMemoryType(test_params, memory_type); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = + umfCUDAMemoryProviderParamsSetAllocFlags(test_params, test_flags); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfMemoryProviderCreate(umfCUDAMemoryProviderOps(), + test_params, &provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider, nullptr); + + umf_result = umfMemoryProviderAlloc(provider, 128, 0, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + + if (memory_type == UMF_MEMORY_TYPE_HOST) { + // check if the memory allocation flag is set correctly + unsigned int flags = get_mem_host_alloc_flags(ptr); + ASSERT_TRUE(flags & test_flags); + } + + umf_result = umfMemoryProviderFree(provider, ptr, 128); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfMemoryProviderDestroy(provider); + + umfCUDAMemoryProviderParamsDestroy(test_params); +} + +// TODO add tests that mixes CUDA Memory Provider and Disjoint Pool -CUDAMemoryAccessor cuAccessor(cudaTestHelper.get_test_context(), - cudaTestHelper.get_test_device()); -HostMemoryAccessor hostAccessor; +INSTANTIATE_TEST_SUITE_P(umfCUDAProviderTestSuite, umfCUDAProviderTest, + ::testing::Values(UMF_MEMORY_TYPE_DEVICE, + UMF_MEMORY_TYPE_SHARED, + UMF_MEMORY_TYPE_HOST)); INSTANTIATE_TEST_SUITE_P( - umfCUDAProviderTestSuite, umfCUDAProviderTest, + umfCUDAProviderAllocFlagsTestSuite, umfCUDAProviderAllocFlagsTest, ::testing::Values( - CUDAProviderTestParams{cuParams_device_memory.get(), - cudaTestHelper.get_test_context(), - UMF_MEMORY_TYPE_DEVICE, &cuAccessor}, - CUDAProviderTestParams{cuParams_shared_memory.get(), - cudaTestHelper.get_test_context(), - UMF_MEMORY_TYPE_SHARED, &hostAccessor}, - CUDAProviderTestParams{cuParams_host_memory.get(), - cudaTestHelper.get_test_context(), - UMF_MEMORY_TYPE_HOST, &hostAccessor})); + std::make_tuple(UMF_MEMORY_TYPE_SHARED, CU_MEM_ATTACH_GLOBAL), + std::make_tuple(UMF_MEMORY_TYPE_SHARED, CU_MEM_ATTACH_HOST), + std::make_tuple(UMF_MEMORY_TYPE_HOST, CU_MEMHOSTALLOC_PORTABLE), + std::make_tuple(UMF_MEMORY_TYPE_HOST, CU_MEMHOSTALLOC_DEVICEMAP), + std::make_tuple(UMF_MEMORY_TYPE_HOST, CU_MEMHOSTALLOC_WRITECOMBINED))); // TODO: add IPC API GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(umfIpcTest); diff --git a/test/providers/provider_cuda_not_impl.cpp b/test/providers/provider_cuda_not_impl.cpp index 30fc373ca3..4054c26a83 100644 --- a/test/providers/provider_cuda_not_impl.cpp +++ b/test/providers/provider_cuda_not_impl.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -26,6 +26,9 @@ TEST_F(test, cuda_provider_not_implemented) { UMF_MEMORY_TYPE_DEVICE); ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + result = umfCUDAMemoryProviderParamsSetAllocFlags(hParams, 0); + ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + umf_memory_provider_ops_t *ops = umfCUDAMemoryProviderOps(); ASSERT_EQ(ops, nullptr); } diff --git a/test/providers/provider_level_zero.cpp b/test/providers/provider_level_zero.cpp index d0584777be..47b62cc945 100644 --- a/test/providers/provider_level_zero.cpp +++ b/test/providers/provider_level_zero.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -42,7 +42,13 @@ class LevelZeroTestHelper { LevelZeroTestHelper::LevelZeroTestHelper() { uint32_t driver_idx = 0; - int ret = utils_ze_find_driver_with_gpu(&driver_idx, &hDriver_); + int ret = utils_ze_init_level_zero(); + if (ret != 0) { + fprintf(stderr, "utils_ze_init_level_zero() failed!\n"); + return; + } + + ret = utils_ze_find_driver_with_gpu(&driver_idx, &hDriver_); if (ret != 0 || hDriver_ == NULL) { fprintf(stderr, "utils_ze_find_driver_with_gpu() failed!\n"); return; @@ -61,11 +67,7 @@ LevelZeroTestHelper::LevelZeroTestHelper() { } } -using level_zero_params_unique_handle_t = - std::unique_ptr; - -level_zero_params_unique_handle_t +umf_level_zero_memory_provider_params_handle_t create_level_zero_prov_params(ze_context_handle_t context, ze_device_handle_t device, umf_usm_memory_type_t memory_type) { @@ -73,49 +75,46 @@ create_level_zero_prov_params(ze_context_handle_t context, umf_result_t res = umfLevelZeroMemoryProviderParamsCreate(¶ms); if (res != UMF_RESULT_SUCCESS) { - return level_zero_params_unique_handle_t( - nullptr, &umfLevelZeroMemoryProviderParamsDestroy); + return nullptr; } res = umfLevelZeroMemoryProviderParamsSetContext(params, context); if (res != UMF_RESULT_SUCCESS) { umfLevelZeroMemoryProviderParamsDestroy(params); - return level_zero_params_unique_handle_t( - nullptr, &umfLevelZeroMemoryProviderParamsDestroy); - ; + return nullptr; } res = umfLevelZeroMemoryProviderParamsSetDevice(params, device); if (res != UMF_RESULT_SUCCESS) { umfLevelZeroMemoryProviderParamsDestroy(params); - return level_zero_params_unique_handle_t( - nullptr, &umfLevelZeroMemoryProviderParamsDestroy); - ; + return nullptr; } res = umfLevelZeroMemoryProviderParamsSetMemoryType(params, memory_type); if (res != UMF_RESULT_SUCCESS) { umfLevelZeroMemoryProviderParamsDestroy(params); - return level_zero_params_unique_handle_t( - nullptr, &umfLevelZeroMemoryProviderParamsDestroy); - ; + return nullptr; } - return level_zero_params_unique_handle_t( - params, &umfLevelZeroMemoryProviderParamsDestroy); + return params; +} + +umf_result_t destroyL0Params(void *params) { + return umfLevelZeroMemoryProviderParamsDestroy( + static_cast(params)); } struct LevelZeroProviderInit : public test, - public ::testing::WithParamInterface {}; + public ::testing::WithParamInterface { + LevelZeroTestHelper l0TestHelper; +}; INSTANTIATE_TEST_SUITE_P(, LevelZeroProviderInit, ::testing::Values(UMF_MEMORY_TYPE_HOST, UMF_MEMORY_TYPE_DEVICE, UMF_MEMORY_TYPE_SHARED)); -LevelZeroTestHelper l0TestHelper; - TEST_P(LevelZeroProviderInit, FailNullContext) { umf_memory_provider_ops_t *ops = umfLevelZeroMemoryProviderOps(); ASSERT_NE(ops, nullptr); @@ -168,12 +167,14 @@ TEST_P(LevelZeroProviderInit, FailNullDevice) { umfLevelZeroMemoryProviderParamsDestroy(hParams); } -TEST_F(test, FailNonNullDevice) { +TEST_F(LevelZeroProviderInit, FailNonNullDevice) { + if (GetParam() != UMF_MEMORY_TYPE_HOST) { + GTEST_SKIP() << "Host memory does not require device handle"; + } umf_memory_provider_ops_t *ops = umfLevelZeroMemoryProviderOps(); ASSERT_NE(ops, nullptr); - auto memory_type = UMF_MEMORY_TYPE_HOST; - + auto memory_type = GetParam(); umf_level_zero_memory_provider_params_handle_t hParams = nullptr; umf_result_t result = umfLevelZeroMemoryProviderParamsCreate(&hParams); ASSERT_EQ(result, UMF_RESULT_SUCCESS); @@ -237,33 +238,43 @@ class LevelZeroMemoryAccessor : public MemoryAccessor { ze_context_handle_t hContext_; }; -using LevelZeroProviderTestParams = - std::tuple; - struct umfLevelZeroProviderTest : umf_test::test, - ::testing::WithParamInterface { + ::testing::WithParamInterface { void SetUp() override { test::SetUp(); - auto [l0_params, ze_context, memory_type, accessor] = this->GetParam(); - params = l0_params; - memAccessor = accessor; - hContext = ze_context; + umf_usm_memory_type_t memory_type = this->GetParam(); + + params = nullptr; + memAccessor = nullptr; + hContext = l0TestHelper.get_test_context(); ASSERT_NE(hContext, nullptr); switch (memory_type) { case UMF_MEMORY_TYPE_DEVICE: zeMemoryTypeExpected = ZE_MEMORY_TYPE_DEVICE; + params = create_level_zero_prov_params( + l0TestHelper.get_test_context(), l0TestHelper.get_test_device(), + memory_type); + memAccessor = std::make_unique( + l0TestHelper.get_test_context(), + l0TestHelper.get_test_device()); break; case UMF_MEMORY_TYPE_SHARED: zeMemoryTypeExpected = ZE_MEMORY_TYPE_SHARED; + params = create_level_zero_prov_params( + l0TestHelper.get_test_context(), l0TestHelper.get_test_device(), + memory_type); + memAccessor = std::make_unique(); break; case UMF_MEMORY_TYPE_HOST: zeMemoryTypeExpected = ZE_MEMORY_TYPE_HOST; + params = create_level_zero_prov_params( + l0TestHelper.get_test_context(), nullptr, memory_type); + memAccessor = std::make_unique(); break; case UMF_MEMORY_TYPE_UNKNOWN: zeMemoryTypeExpected = ZE_MEMORY_TYPE_UNKNOWN; @@ -273,10 +284,18 @@ struct umfLevelZeroProviderTest ASSERT_NE(zeMemoryTypeExpected, ZE_MEMORY_TYPE_UNKNOWN); } - void TearDown() override { test::TearDown(); } + void TearDown() override { + if (params) { + destroyL0Params(params); + } - umf_level_zero_memory_provider_params_handle_t params; - MemoryAccessor *memAccessor = nullptr; + test::TearDown(); + } + + LevelZeroTestHelper l0TestHelper; + umf_level_zero_memory_provider_params_handle_t params = nullptr; + + std::unique_ptr memAccessor = nullptr; ze_context_handle_t hContext = nullptr; ze_memory_type_t zeMemoryTypeExpected = ZE_MEMORY_TYPE_UNKNOWN; }; @@ -340,6 +359,18 @@ TEST_P(umfLevelZeroProviderTest, getPageSize) { ASSERT_GE(recommendedPageSize, minPageSize); + void *ptr; + umf_result = umfMemoryProviderAlloc(provider, 1, 0, &ptr); + + size_t actualPageSize = 0; + umf_result = + umfMemoryProviderGetMinPageSize(provider, ptr, &actualPageSize); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_GE(actualPageSize, minPageSize); + + umf_result = umfMemoryProviderFree(provider, ptr, 1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umfMemoryProviderDestroy(provider); } @@ -414,41 +445,60 @@ TEST_P(umfLevelZeroProviderTest, levelZeroProviderNullParams) { res = umfLevelZeroMemoryProviderParamsSetMemoryType(nullptr, UMF_MEMORY_TYPE_DEVICE); EXPECT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + res = umfLevelZeroMemoryProviderParamsSetDeviceOrdinal(nullptr, 0); + EXPECT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(umfLevelZeroProviderTest, setDeviceOrdinalValid) { + int64_t numProps = + utils_ze_get_num_memory_properties(l0TestHelper.get_test_device()); + ASSERT_GE(numProps, 0); + + for (uint32_t ordinal = 0; ordinal < static_cast(numProps); + ordinal++) { + umf_memory_provider_handle_t provider = nullptr; + umf_result_t res = + umfLevelZeroMemoryProviderParamsSetDeviceOrdinal(params, ordinal); + EXPECT_EQ(res, UMF_RESULT_SUCCESS); + + res = umfMemoryProviderCreate(umfLevelZeroMemoryProviderOps(), params, + &provider); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + ASSERT_NE(provider, nullptr); + + size_t size = 1024; + void *ptr = nullptr; + res = umfMemoryProviderAlloc(provider, size, 0, &ptr); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + + res = umfMemoryProviderFree(provider, ptr, size); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + + umfMemoryProviderDestroy(provider); + } } // TODO add tests that mixes Level Zero Memory Provider and Disjoint Pool -level_zero_params_unique_handle_t l0Params_device_memory = - create_level_zero_prov_params(l0TestHelper.get_test_context(), - l0TestHelper.get_test_device(), - UMF_MEMORY_TYPE_DEVICE); -level_zero_params_unique_handle_t l0Params_shared_memory = - create_level_zero_prov_params(l0TestHelper.get_test_context(), - l0TestHelper.get_test_device(), - UMF_MEMORY_TYPE_SHARED); -level_zero_params_unique_handle_t l0Params_host_memory = - create_level_zero_prov_params(l0TestHelper.get_test_context(), nullptr, - UMF_MEMORY_TYPE_HOST); +INSTANTIATE_TEST_SUITE_P(umfLevelZeroProviderTestSuite, + umfLevelZeroProviderTest, + ::testing::Values(UMF_MEMORY_TYPE_DEVICE, + UMF_MEMORY_TYPE_SHARED, + UMF_MEMORY_TYPE_HOST)); + +LevelZeroTestHelper l0TestHelper; + +void *createL0ParamsDeviceMemory() { + return create_level_zero_prov_params(l0TestHelper.get_test_context(), + l0TestHelper.get_test_device(), + UMF_MEMORY_TYPE_DEVICE); +} LevelZeroMemoryAccessor l0Accessor((ze_context_handle_t)l0TestHelper.get_test_context(), (ze_device_handle_t)l0TestHelper.get_test_device()); - -HostMemoryAccessor hostAccessor; - -INSTANTIATE_TEST_SUITE_P( - umfLevelZeroProviderTestSuite, umfLevelZeroProviderTest, - ::testing::Values( - LevelZeroProviderTestParams{l0Params_device_memory.get(), - l0TestHelper.get_test_context(), - UMF_MEMORY_TYPE_DEVICE, &l0Accessor}, - LevelZeroProviderTestParams{l0Params_shared_memory.get(), - l0TestHelper.get_test_context(), - UMF_MEMORY_TYPE_SHARED, &hostAccessor}, - LevelZeroProviderTestParams{l0Params_host_memory.get(), - l0TestHelper.get_test_context(), - UMF_MEMORY_TYPE_HOST, &hostAccessor})); - // TODO: it looks like there is some problem with IPC implementation in Level // Zero on windows. Issue: #494 #ifdef _WIN32 @@ -457,6 +507,6 @@ GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(umfIpcTest); INSTANTIATE_TEST_SUITE_P( umfLevelZeroProviderTestSuite, umfIpcTest, ::testing::Values(ipcTestParams{ - umfProxyPoolOps(), nullptr, umfLevelZeroMemoryProviderOps(), - l0Params_device_memory.get(), &l0Accessor, false})); + umfProxyPoolOps(), nullptr, nullptr, umfLevelZeroMemoryProviderOps(), + createL0ParamsDeviceMemory, destroyL0Params, &l0Accessor})); #endif diff --git a/test/providers/provider_level_zero_not_impl.cpp b/test/providers/provider_level_zero_not_impl.cpp index bea1acbe79..4948bd66f1 100644 --- a/test/providers/provider_level_zero_not_impl.cpp +++ b/test/providers/provider_level_zero_not_impl.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -31,6 +31,13 @@ TEST_F(test, level_zero_provider_not_implemented) { hDevices, 1); ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + result = umfLevelZeroMemoryProviderParamsSetFreePolicy( + hParams, UMF_LEVEL_ZERO_MEMORY_PROVIDER_FREE_POLICY_DEFAULT); + ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + result = umfLevelZeroMemoryProviderParamsSetDeviceOrdinal(hParams, 0); + ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + umf_memory_provider_ops_t *ops = umfLevelZeroMemoryProviderOps(); ASSERT_EQ(ops, nullptr); } diff --git a/test/supp/drd-test_disjoint_pool.supp b/test/supp/drd-test_disjoint_pool.supp new file mode 100644 index 0000000000..2a5548d27e --- /dev/null +++ b/test/supp/drd-test_disjoint_pool.supp @@ -0,0 +1,7 @@ +{ + False-positive ConflictingAccess in critnib_insert + drd:ConflictingAccess + fun:utils_atomic_store_release_ptr + fun:critnib_insert + ... +} diff --git a/test/supp/drd-test_ipc.supp b/test/supp/drd-test_ipc.supp new file mode 100644 index 0000000000..fbdbd0183f --- /dev/null +++ b/test/supp/drd-test_ipc.supp @@ -0,0 +1,34 @@ +{ + Conditional variable destruction false-positive + drd:CondErr + ... + fun:pthread_cond_destroy@* + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + drd:ConflictingAccess + fun:utils_atomic_load_acquire_ptr + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] trackingGetIpcHandle + drd:ConflictingAccess + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} + +{ + [false-positive] trackingGetIpcHandle + drd:ConflictingAccess + fun:memmove + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} diff --git a/test/supp/drd-test_ipc_max_opened_limit.supp b/test/supp/drd-test_ipc_max_opened_limit.supp new file mode 100644 index 0000000000..fbdbd0183f --- /dev/null +++ b/test/supp/drd-test_ipc_max_opened_limit.supp @@ -0,0 +1,34 @@ +{ + Conditional variable destruction false-positive + drd:CondErr + ... + fun:pthread_cond_destroy@* + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + drd:ConflictingAccess + fun:utils_atomic_load_acquire_ptr + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] trackingGetIpcHandle + drd:ConflictingAccess + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} + +{ + [false-positive] trackingGetIpcHandle + drd:ConflictingAccess + fun:memmove + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} diff --git a/test/supp/drd-test_jemalloc_coarse_devdax.supp b/test/supp/drd-test_jemalloc_coarse_devdax.supp new file mode 100644 index 0000000000..8d8746861c --- /dev/null +++ b/test/supp/drd-test_jemalloc_coarse_devdax.supp @@ -0,0 +1,15 @@ +{ + False-positive ConflictingAccess in jemalloc + drd:ConflictingAccess + ... + fun:je_* + ... +} + +{ + False-positive ConflictingAccess in critnib_insert + drd:ConflictingAccess + fun:utils_atomic_store_release_ptr + fun:critnib_insert + ... +} diff --git a/test/supp/drd-test_jemalloc_coarse_file.supp b/test/supp/drd-test_jemalloc_coarse_file.supp new file mode 100644 index 0000000000..8d8746861c --- /dev/null +++ b/test/supp/drd-test_jemalloc_coarse_file.supp @@ -0,0 +1,15 @@ +{ + False-positive ConflictingAccess in jemalloc + drd:ConflictingAccess + ... + fun:je_* + ... +} + +{ + False-positive ConflictingAccess in critnib_insert + drd:ConflictingAccess + fun:utils_atomic_store_release_ptr + fun:critnib_insert + ... +} diff --git a/test/supp/drd-test_jemalloc_pool.supp b/test/supp/drd-test_jemalloc_pool.supp new file mode 100644 index 0000000000..cb6179f875 --- /dev/null +++ b/test/supp/drd-test_jemalloc_pool.supp @@ -0,0 +1,7 @@ +{ + False-positive ConflictingAccess in jemalloc + drd:ConflictingAccess + ... + fun:je_* + ... +} diff --git a/test/supp/drd-test_provider_devdax_memory_ipc.supp b/test/supp/drd-test_provider_devdax_memory_ipc.supp new file mode 100644 index 0000000000..31608d30ca --- /dev/null +++ b/test/supp/drd-test_provider_devdax_memory_ipc.supp @@ -0,0 +1,37 @@ +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + drd:ConflictingAccess + fun:utils_atomic_store_release_ptr + fun:upstreamOpenIPCHandle + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + drd:ConflictingAccess + fun:utils_atomic_load_acquire_ptr + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + False-positive ConflictingAccess in jemalloc + drd:ConflictingAccess + fun:atomic_* + ... + fun:je_* + ... +} + +{ + False-positive ConflictingAccess in tbbmalloc + drd:ConflictingAccess + ... + fun:tbb_pool_finalize + ... +} diff --git a/test/supp/drd-test_provider_file_memory_ipc.supp b/test/supp/drd-test_provider_file_memory_ipc.supp new file mode 100644 index 0000000000..9883001f7c --- /dev/null +++ b/test/supp/drd-test_provider_file_memory_ipc.supp @@ -0,0 +1,62 @@ +{ + Conditional variable destruction false-positive + drd:CondErr + ... + fun:pthread_cond_destroy@* + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + drd:ConflictingAccess + fun:utils_atomic_store_release_ptr + fun:upstreamOpenIPCHandle + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + drd:ConflictingAccess + fun:utils_atomic_load_acquire_ptr + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] trackingGetIpcHandle + drd:ConflictingAccess + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} + +{ + [false-positive] trackingGetIpcHandle + drd:ConflictingAccess + fun:memmove + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} + +{ + False-positive ConflictingAccess in jemalloc + drd:ConflictingAccess + fun:atomic_* + ... + fun:je_* + ... +} + +{ + False-positive ConflictingAccess in tbbmalloc + drd:ConflictingAccess + ... + fun:tbb_pool_finalize + ... +} diff --git a/test/supp/drd-test_provider_os_memory.supp b/test/supp/drd-test_provider_os_memory.supp new file mode 100644 index 0000000000..31608d30ca --- /dev/null +++ b/test/supp/drd-test_provider_os_memory.supp @@ -0,0 +1,37 @@ +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + drd:ConflictingAccess + fun:utils_atomic_store_release_ptr + fun:upstreamOpenIPCHandle + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + drd:ConflictingAccess + fun:utils_atomic_load_acquire_ptr + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + False-positive ConflictingAccess in jemalloc + drd:ConflictingAccess + fun:atomic_* + ... + fun:je_* + ... +} + +{ + False-positive ConflictingAccess in tbbmalloc + drd:ConflictingAccess + ... + fun:tbb_pool_finalize + ... +} diff --git a/test/supp/drd-umf_test-scalable_coarse_devdax.supp b/test/supp/drd-test_scalable_coarse_devdax.supp similarity index 100% rename from test/supp/drd-umf_test-scalable_coarse_devdax.supp rename to test/supp/drd-test_scalable_coarse_devdax.supp diff --git a/test/supp/drd-umf_test-scalable_coarse_file.supp b/test/supp/drd-test_scalable_coarse_file.supp similarity index 100% rename from test/supp/drd-umf_test-scalable_coarse_file.supp rename to test/supp/drd-test_scalable_coarse_file.supp diff --git a/test/supp/drd-umf_test-scalable_pool.supp b/test/supp/drd-test_scalable_pool.supp similarity index 100% rename from test/supp/drd-umf_test-scalable_pool.supp rename to test/supp/drd-test_scalable_pool.supp diff --git a/test/supp/drd-umf_test-ipc.supp b/test/supp/drd-umf_test-ipc.supp deleted file mode 100644 index 76844585d1..0000000000 --- a/test/supp/drd-umf_test-ipc.supp +++ /dev/null @@ -1,7 +0,0 @@ -{ - Conditional variable destruction false-positive - drd:CondErr - ... - fun:pthread_cond_destroy@* - ... -} diff --git a/test/supp/drd-umf_test-jemalloc_coarse_devdax.supp b/test/supp/drd-umf_test-jemalloc_coarse_devdax.supp deleted file mode 100644 index fd071432b6..0000000000 --- a/test/supp/drd-umf_test-jemalloc_coarse_devdax.supp +++ /dev/null @@ -1,34 +0,0 @@ -{ - False-positive ConflictingAccess in libjemalloc.so - drd:ConflictingAccess - obj:*/libjemalloc.so* - ... - fun:mallocx - ... -} - -{ - False-positive ConflictingAccess in libjemalloc.so - drd:ConflictingAccess - obj:*/libjemalloc.so* - ... - fun:op_free - ... -} - -{ - False-positive ConflictingAccess in libjemalloc.so - drd:ConflictingAccess - obj:*/libjemalloc.so* - ... - fun:__nptl_deallocate_tsd - ... -} - -{ - False-positive ConflictingAccess in critnib_insert - drd:ConflictingAccess - fun:store - fun:critnib_insert - ... -} diff --git a/test/supp/drd-umf_test-jemalloc_coarse_file.supp b/test/supp/drd-umf_test-jemalloc_coarse_file.supp deleted file mode 100644 index fd071432b6..0000000000 --- a/test/supp/drd-umf_test-jemalloc_coarse_file.supp +++ /dev/null @@ -1,34 +0,0 @@ -{ - False-positive ConflictingAccess in libjemalloc.so - drd:ConflictingAccess - obj:*/libjemalloc.so* - ... - fun:mallocx - ... -} - -{ - False-positive ConflictingAccess in libjemalloc.so - drd:ConflictingAccess - obj:*/libjemalloc.so* - ... - fun:op_free - ... -} - -{ - False-positive ConflictingAccess in libjemalloc.so - drd:ConflictingAccess - obj:*/libjemalloc.so* - ... - fun:__nptl_deallocate_tsd - ... -} - -{ - False-positive ConflictingAccess in critnib_insert - drd:ConflictingAccess - fun:store - fun:critnib_insert - ... -} diff --git a/test/supp/drd-umf_test-jemalloc_pool.supp b/test/supp/drd-umf_test-jemalloc_pool.supp deleted file mode 100644 index 965ef38844..0000000000 --- a/test/supp/drd-umf_test-jemalloc_pool.supp +++ /dev/null @@ -1,6 +0,0 @@ -{ - Conflicting Access in libjemalloc.so - internal issue of libjemalloc - drd:ConflictingAccess - obj:*libjemalloc.so* - ... -} diff --git a/test/supp/drd-umf_test-provider_devdax_memory_ipc.supp b/test/supp/drd-umf_test-provider_devdax_memory_ipc.supp deleted file mode 100644 index cd44bb49ae..0000000000 --- a/test/supp/drd-umf_test-provider_devdax_memory_ipc.supp +++ /dev/null @@ -1,8 +0,0 @@ -{ - [false-positive] Double check locking pattern in trackingOpenIpcHandle - drd:ConflictingAccess - fun:trackingOpenIpcHandle - fun:umfMemoryProviderOpenIPCHandle - fun:umfOpenIPCHandle - ... -} diff --git a/test/supp/drd-umf_test-provider_file_memory_ipc.supp b/test/supp/drd-umf_test-provider_file_memory_ipc.supp deleted file mode 100644 index 7fce241167..0000000000 --- a/test/supp/drd-umf_test-provider_file_memory_ipc.supp +++ /dev/null @@ -1,16 +0,0 @@ -{ - Conditional variable destruction false-positive - drd:CondErr - ... - fun:pthread_cond_destroy@* - ... -} - -{ - [false-positive] Double check locking pattern in trackingOpenIpcHandle - drd:ConflictingAccess - fun:trackingOpenIpcHandle - fun:umfMemoryProviderOpenIPCHandle - fun:umfOpenIPCHandle - ... -} diff --git a/test/supp/drd-umf_test-provider_os_memory.supp b/test/supp/drd-umf_test-provider_os_memory.supp deleted file mode 100644 index cd44bb49ae..0000000000 --- a/test/supp/drd-umf_test-provider_os_memory.supp +++ /dev/null @@ -1,8 +0,0 @@ -{ - [false-positive] Double check locking pattern in trackingOpenIpcHandle - drd:ConflictingAccess - fun:trackingOpenIpcHandle - fun:umfMemoryProviderOpenIPCHandle - fun:umfOpenIPCHandle - ... -} diff --git a/test/supp/helgrind-umf_test-disjointPool.supp b/test/supp/helgrind-test_disjoint_pool.supp similarity index 53% rename from test/supp/helgrind-umf_test-disjointPool.supp rename to test/supp/helgrind-test_disjoint_pool.supp index 3ada32736c..65dfdd2c78 100644 --- a/test/supp/helgrind-umf_test-disjointPool.supp +++ b/test/supp/helgrind-test_disjoint_pool.supp @@ -29,25 +29,9 @@ } { - Incompatibility with helgrind's implementation ("pthread_rwlock_{rd,rw}lock with a pthread_mutex_t* argument") - Helgrind:Misc - obj:*vgpreload_helgrind-amd64-linux.so - fun:*glibcxx_rwlock_wrlock*pthread_rwlock_t - ... -} - -{ - Incompatibility with helgrind's implementation ("pthread_rwlock_unlock with a pthread_mutex_t* argument") - Helgrind:Misc - obj:*vgpreload_helgrind-amd64-linux.so - fun:*glibcxx_rwlock_unlock*pthread_rwlock_t - ... -} - -{ - Incompatibility with helgrind's implementation ("pthread_rwlock_{rd,rw}lock with a pthread_mutex_t* argument") - Helgrind:Misc - obj:*vgpreload_helgrind-amd64-linux.so - fun:*glibcxx_rwlock_rdlock*pthread_rwlock_t* + False-positive Race in critnib_insert + Helgrind:Race + fun:utils_atomic_store_release_ptr + fun:critnib_insert ... } diff --git a/test/supp/helgrind-test_ipc.supp b/test/supp/helgrind-test_ipc.supp new file mode 100644 index 0000000000..25ae87ea43 --- /dev/null +++ b/test/supp/helgrind-test_ipc.supp @@ -0,0 +1,54 @@ +{ + False-positive race in critnib_insert (lack of instrumentation) + Helgrind:Race + fun:utils_atomic_store_release_ptr + fun:critnib_insert + ... +} + +{ + False-positive race in critnib_find (lack of instrumentation) + Helgrind:Race + fun:find_predecessor + fun:find_le + fun:critnib_find + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:utils_atomic_store_release_ptr + fun:upstreamOpenIPCHandle + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:utils_atomic_load_acquire_ptr + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] umfMemoryProviderGetIPCHandle + Helgrind:Race + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} + +{ + [false-positive] umfMemoryProviderGetIPCHandle + Helgrind:Race + fun:memmove + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} diff --git a/test/supp/helgrind-test_ipc_max_opened_limit.supp b/test/supp/helgrind-test_ipc_max_opened_limit.supp new file mode 100644 index 0000000000..25ae87ea43 --- /dev/null +++ b/test/supp/helgrind-test_ipc_max_opened_limit.supp @@ -0,0 +1,54 @@ +{ + False-positive race in critnib_insert (lack of instrumentation) + Helgrind:Race + fun:utils_atomic_store_release_ptr + fun:critnib_insert + ... +} + +{ + False-positive race in critnib_find (lack of instrumentation) + Helgrind:Race + fun:find_predecessor + fun:find_le + fun:critnib_find + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:utils_atomic_store_release_ptr + fun:upstreamOpenIPCHandle + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:utils_atomic_load_acquire_ptr + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] umfMemoryProviderGetIPCHandle + Helgrind:Race + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} + +{ + [false-positive] umfMemoryProviderGetIPCHandle + Helgrind:Race + fun:memmove + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} diff --git a/test/supp/helgrind-test_jemalloc_coarse_devdax.supp b/test/supp/helgrind-test_jemalloc_coarse_devdax.supp new file mode 100644 index 0000000000..2f4980f519 --- /dev/null +++ b/test/supp/helgrind-test_jemalloc_coarse_devdax.supp @@ -0,0 +1,15 @@ +{ + False-positive Race in jemalloc + Helgrind:Race + ... + fun:je_* + ... +} + +{ + False-positive Race in critnib_insert + Helgrind:Race + fun:utils_atomic_store_release_ptr + fun:critnib_insert + ... +} diff --git a/test/supp/helgrind-test_jemalloc_coarse_file.supp b/test/supp/helgrind-test_jemalloc_coarse_file.supp new file mode 100644 index 0000000000..2f4980f519 --- /dev/null +++ b/test/supp/helgrind-test_jemalloc_coarse_file.supp @@ -0,0 +1,15 @@ +{ + False-positive Race in jemalloc + Helgrind:Race + ... + fun:je_* + ... +} + +{ + False-positive Race in critnib_insert + Helgrind:Race + fun:utils_atomic_store_release_ptr + fun:critnib_insert + ... +} diff --git a/test/supp/helgrind-test_jemalloc_pool.supp b/test/supp/helgrind-test_jemalloc_pool.supp new file mode 100644 index 0000000000..98d748feac --- /dev/null +++ b/test/supp/helgrind-test_jemalloc_pool.supp @@ -0,0 +1,7 @@ +{ + False-positive Race in jemalloc + Helgrind:Race + ... + fun:je_* + ... +} diff --git a/test/supp/helgrind-test_provider_devdax_memory_ipc.supp b/test/supp/helgrind-test_provider_devdax_memory_ipc.supp new file mode 100644 index 0000000000..63e7d626c2 --- /dev/null +++ b/test/supp/helgrind-test_provider_devdax_memory_ipc.supp @@ -0,0 +1,37 @@ +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:utils_atomic_store_release_ptr + fun:upstreamOpenIPCHandle + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:utils_atomic_load_acquire_ptr + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + False-positive ConflictingAccess in jemalloc + Helgrind:Race + fun:atomic_* + ... + fun:je_* + ... +} + +{ + False-positive ConflictingAccess in tbbmalloc + Helgrind:Race + ... + fun:tbb_pool_finalize + ... +} diff --git a/test/supp/helgrind-test_provider_file_memory_ipc.supp b/test/supp/helgrind-test_provider_file_memory_ipc.supp new file mode 100644 index 0000000000..11791e4ed2 --- /dev/null +++ b/test/supp/helgrind-test_provider_file_memory_ipc.supp @@ -0,0 +1,71 @@ +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:utils_atomic_store_release_ptr + fun:upstreamOpenIPCHandle + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:utils_atomic_load_acquire_ptr + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + False-positive race in critnib_insert (lack of instrumentation) + Helgrind:Race + fun:utils_atomic_store_release_ptr + fun:critnib_insert + ... +} + +{ + False-positive race in critnib_find (lack of instrumentation) + Helgrind:Race + fun:find_predecessor + fun:find_le + fun:critnib_find + ... +} + +{ + False-positive ConflictingAccess in jemalloc + Helgrind:Race + fun:atomic_* + ... + fun:je_* + ... +} + +{ + False-positive ConflictingAccess in tbbmalloc + Helgrind:Race + ... + fun:tbb_pool_finalize + ... +} + +{ + [false-positive] trackingGetIpcHandle + Helgrind:Race + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} + +{ + [false-positive] trackingGetIpcHandle + Helgrind:Race + fun:memmove + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} diff --git a/test/supp/helgrind-test_provider_os_memory.supp b/test/supp/helgrind-test_provider_os_memory.supp new file mode 100644 index 0000000000..63e7d626c2 --- /dev/null +++ b/test/supp/helgrind-test_provider_os_memory.supp @@ -0,0 +1,37 @@ +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:utils_atomic_store_release_ptr + fun:upstreamOpenIPCHandle + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:utils_atomic_load_acquire_ptr + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + False-positive ConflictingAccess in jemalloc + Helgrind:Race + fun:atomic_* + ... + fun:je_* + ... +} + +{ + False-positive ConflictingAccess in tbbmalloc + Helgrind:Race + ... + fun:tbb_pool_finalize + ... +} diff --git a/test/supp/helgrind-umf_test-scalable_coarse_devdax.supp b/test/supp/helgrind-test_scalable_coarse_devdax.supp similarity index 100% rename from test/supp/helgrind-umf_test-scalable_coarse_devdax.supp rename to test/supp/helgrind-test_scalable_coarse_devdax.supp diff --git a/test/supp/helgrind-umf_test-scalable_coarse_file.supp b/test/supp/helgrind-test_scalable_coarse_file.supp similarity index 100% rename from test/supp/helgrind-umf_test-scalable_coarse_file.supp rename to test/supp/helgrind-test_scalable_coarse_file.supp diff --git a/test/supp/helgrind-umf_test-scalable_pool.supp b/test/supp/helgrind-test_scalable_pool.supp similarity index 100% rename from test/supp/helgrind-umf_test-scalable_pool.supp rename to test/supp/helgrind-test_scalable_pool.supp diff --git a/test/supp/helgrind-umf_test-disjointCoarseMallocPool.supp b/test/supp/helgrind-umf_test-disjointCoarseMallocPool.supp deleted file mode 100644 index 2f669eb311..0000000000 --- a/test/supp/helgrind-umf_test-disjointCoarseMallocPool.supp +++ /dev/null @@ -1,24 +0,0 @@ -{ - Incompatibility with helgrind's implementation (pthread_mutex_lock with a pthread_rwlock_t* argument) - Helgrind:Misc - obj:*vgpreload_helgrind-amd64-linux.so - fun:_ZL20__gthread_mutex_lockP15pthread_mutex_t - ... -} - -{ - Incompatibility with helgrind's implementation (pthread_mutex_unlock with a pthread_rwlock_t* argument) - Helgrind:Misc - obj:*vgpreload_helgrind-amd64-linux.so - fun:_ZL22__gthread_mutex_unlockP15pthread_mutex_t - ... -} - -{ - Incompatibility with helgrind's implementation (lock order "0xA before 0xB" violated) - Helgrind:LockOrder - obj:*vgpreload_helgrind-amd64-linux.so - fun:_ZStL23__glibcxx_rwlock_wrlockP16pthread_rwlock_t - fun:_ZNSt22__shared_mutex_pthread4lockEv - ... -} diff --git a/test/supp/helgrind-umf_test-ipc.supp b/test/supp/helgrind-umf_test-ipc.supp deleted file mode 100644 index e46140c197..0000000000 --- a/test/supp/helgrind-umf_test-ipc.supp +++ /dev/null @@ -1,16 +0,0 @@ -{ - False-positive race in critnib_insert (lack of instrumentation) - Helgrind:Race - fun:store - fun:critnib_insert - ... -} - -{ - False-positive race in critnib_find (lack of instrumentation) - Helgrind:Race - fun:find_predecessor - fun:find_le - fun:critnib_find - ... -} diff --git a/test/supp/helgrind-umf_test-jemalloc_coarse_devdax.supp b/test/supp/helgrind-umf_test-jemalloc_coarse_devdax.supp deleted file mode 100644 index 18774f387c..0000000000 --- a/test/supp/helgrind-umf_test-jemalloc_coarse_devdax.supp +++ /dev/null @@ -1,34 +0,0 @@ -{ - False-positive Race in libjemalloc.so - Helgrind:Race - obj:*/libjemalloc.so* - ... - fun:mallocx - ... -} - -{ - False-positive Race in libjemalloc.so - Helgrind:Race - obj:*/libjemalloc.so* - ... - fun:op_free - ... -} - -{ - False-positive Race in libjemalloc.so - Helgrind:Race - obj:*/libjemalloc.so* - ... - fun:__nptl_deallocate_tsd - ... -} - -{ - False-positive Race in critnib_insert - Helgrind:Race - fun:store - fun:critnib_insert - ... -} diff --git a/test/supp/helgrind-umf_test-jemalloc_coarse_file.supp b/test/supp/helgrind-umf_test-jemalloc_coarse_file.supp deleted file mode 100644 index 18774f387c..0000000000 --- a/test/supp/helgrind-umf_test-jemalloc_coarse_file.supp +++ /dev/null @@ -1,34 +0,0 @@ -{ - False-positive Race in libjemalloc.so - Helgrind:Race - obj:*/libjemalloc.so* - ... - fun:mallocx - ... -} - -{ - False-positive Race in libjemalloc.so - Helgrind:Race - obj:*/libjemalloc.so* - ... - fun:op_free - ... -} - -{ - False-positive Race in libjemalloc.so - Helgrind:Race - obj:*/libjemalloc.so* - ... - fun:__nptl_deallocate_tsd - ... -} - -{ - False-positive Race in critnib_insert - Helgrind:Race - fun:store - fun:critnib_insert - ... -} diff --git a/test/supp/helgrind-umf_test-jemalloc_pool.supp b/test/supp/helgrind-umf_test-jemalloc_pool.supp deleted file mode 100644 index 8068b023dc..0000000000 --- a/test/supp/helgrind-umf_test-jemalloc_pool.supp +++ /dev/null @@ -1,6 +0,0 @@ -{ - Race in libjemalloc.so - internal issue of libjemalloc - Helgrind:Race - obj:*libjemalloc.so* - ... -} diff --git a/test/supp/helgrind-umf_test-provider_devdax_memory_ipc.supp b/test/supp/helgrind-umf_test-provider_devdax_memory_ipc.supp deleted file mode 100644 index 4fcd2786cf..0000000000 --- a/test/supp/helgrind-umf_test-provider_devdax_memory_ipc.supp +++ /dev/null @@ -1,8 +0,0 @@ -{ - [false-positive] Double check locking pattern in trackingOpenIpcHandle - Helgrind:Race - fun:trackingOpenIpcHandle - fun:umfMemoryProviderOpenIPCHandle - fun:umfOpenIPCHandle - ... -} diff --git a/test/supp/helgrind-umf_test-provider_file_memory_ipc.supp b/test/supp/helgrind-umf_test-provider_file_memory_ipc.supp deleted file mode 100644 index 4194f48479..0000000000 --- a/test/supp/helgrind-umf_test-provider_file_memory_ipc.supp +++ /dev/null @@ -1,25 +0,0 @@ -{ - [false-positive] Double check locking pattern in trackingOpenIpcHandle - Helgrind:Race - fun:trackingOpenIpcHandle - fun:umfMemoryProviderOpenIPCHandle - fun:umfOpenIPCHandle - ... -} - -{ - False-positive race in critnib_insert (lack of instrumentation) - Helgrind:Race - fun:store - fun:critnib_insert - ... -} - -{ - False-positive race in critnib_find (lack of instrumentation) - Helgrind:Race - fun:find_predecessor - fun:find_le - fun:critnib_find - ... -} diff --git a/test/supp/helgrind-umf_test-provider_os_memory.supp b/test/supp/helgrind-umf_test-provider_os_memory.supp deleted file mode 100644 index 4fcd2786cf..0000000000 --- a/test/supp/helgrind-umf_test-provider_os_memory.supp +++ /dev/null @@ -1,8 +0,0 @@ -{ - [false-positive] Double check locking pattern in trackingOpenIpcHandle - Helgrind:Race - fun:trackingOpenIpcHandle - fun:umfMemoryProviderOpenIPCHandle - fun:umfOpenIPCHandle - ... -} diff --git a/test/supp/memcheck-test_jemalloc_coarse_devdax.supp b/test/supp/memcheck-test_jemalloc_coarse_devdax.supp new file mode 100644 index 0000000000..f719032779 --- /dev/null +++ b/test/supp/memcheck-test_jemalloc_coarse_devdax.supp @@ -0,0 +1,7 @@ +{ + False-positive invalid write of size 8 + Memcheck:Addr8 + ... + fun:je_* + ... +} diff --git a/test/supp/memcheck-test_jemalloc_coarse_file.supp b/test/supp/memcheck-test_jemalloc_coarse_file.supp new file mode 100644 index 0000000000..f719032779 --- /dev/null +++ b/test/supp/memcheck-test_jemalloc_coarse_file.supp @@ -0,0 +1,7 @@ +{ + False-positive invalid write of size 8 + Memcheck:Addr8 + ... + fun:je_* + ... +} diff --git a/test/supp/memcheck-umf_test-jemalloc_pool.supp b/test/supp/memcheck-test_jemalloc_pool.supp similarity index 100% rename from test/supp/memcheck-umf_test-jemalloc_pool.supp rename to test/supp/memcheck-test_jemalloc_pool.supp diff --git a/test/supp/memcheck-umf_test-scalable_pool.supp b/test/supp/memcheck-test_scalable_pool.supp similarity index 100% rename from test/supp/memcheck-umf_test-scalable_pool.supp rename to test/supp/memcheck-test_scalable_pool.supp diff --git a/test/test_installation.py b/test/test_installation.py index 49a382969c..4cf7890001 100644 --- a/test/test_installation.py +++ b/test/test_installation.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -28,7 +28,8 @@ class UmfInstaller: proxy (bool): Determines whether the proxy library should be built together with the UMF library pools (List[str]): A list of enabled pools during the UMF compilation umf_version (Version): UMF version currently being built and installed - match_list (List[str]): A list of relative paths of files that should be installed + umfd_lib (bool): Determines if the UMF was built with the umfd library + hwloc (bool): Determines if hwloc is installed and should be checked """ def __init__( @@ -41,6 +42,8 @@ def __init__( proxy: bool, pools: List[str], umf_version: Version, + umfd_lib: bool, + hwloc: bool, ): self.workspace_dir = workspace_dir self.build_dir = build_dir @@ -50,6 +53,8 @@ def __init__( self.proxy = proxy self.pools = pools self.umf_version = umf_version + self.umfd_lib = umfd_lib + self.hwloc = hwloc self.match_list = self._create_match_list() def _create_match_list(self) -> List[str]: @@ -78,6 +83,8 @@ def _create_match_list(self) -> List[str]: bin.append("bin") if self.shared_library: bin.append("bin/umf.dll") + if self.umfd_lib: + bin.append("bin/umfd.dll") if self.proxy: bin.append("bin/umf_proxy.dll") @@ -97,10 +104,15 @@ def _create_match_list(self) -> List[str]: f"lib/cmake/umf/umf-targets-{self.build_type}.cmake", "lib/cmake/umf/umf-targets.cmake", ] + for pool in self.pools: lib.append(f"lib/{lib_prefix}{pool}.{lib_ext_static}") + if platform.system() == "Windows" and self.hwloc: + lib.append(f"lib/{lib_prefix}hwloc.{lib_ext_static}") if self.shared_library: lib.append(f"lib/{lib_prefix}umf.{lib_ext_shared}") + if platform.system() == "Windows" and self.umfd_lib: + lib.append(f"lib/{lib_prefix}umfd.{lib_ext_shared}") if platform.system() == "Linux": lib.append( @@ -114,6 +126,8 @@ def _create_match_list(self) -> List[str]: lib.append(f"lib/{lib_prefix}umf.{self.umf_version}.{lib_ext_shared}") else: lib.append(f"lib/{lib_prefix}umf.{lib_ext_static}") + if self.umfd_lib and platform.system() == "Windows": + lib.append(f"lib/{lib_prefix}umfd.{lib_ext_static}") if self.proxy: lib.append(f"lib/{lib_prefix}umf_proxy.{lib_ext_shared}") @@ -127,7 +141,6 @@ def _create_match_list(self) -> List[str]: f"lib/{lib_prefix}umf_proxy.{self.umf_version.major}.{lib_ext_shared}" ) - share = [] share = [ "share", "share/doc", @@ -279,19 +292,19 @@ def parse_arguments(self) -> argparse.Namespace: help="Add this argument if the proxy library should be built together with the UMF library", ) self.parser.add_argument( - "--disjoint-pool", - action="store_true", - help="Add this argument if the UMF was built with Disjoint Pool enabled", + "--umf-version", + action="store", + help="Current version of the UMF, e.g. 1.0.0", ) self.parser.add_argument( - "--jemalloc-pool", + "--umfd-lib", action="store_true", - help="Add this argument if the UMF was built with Jemalloc Pool enabled", + help="Add this argument if the UMF was built with the umfd library", ) self.parser.add_argument( - "--umf-version", - action="store", - help="Current version of the UMF, e.g. 1.0.0", + "--hwloc", + action="store_true", + help="Add this argument if hwloc is installed and should be checked", ) return self.parser.parse_args() @@ -304,10 +317,6 @@ def run(self) -> None: build_dir = Path(workspace_dir, self.args.build_dir) install_dir = Path(workspace_dir, self.args.install_dir) pools = [] - if self.args.disjoint_pool: - pools.append("disjoint_pool") - if self.args.jemalloc_pool: - pools.append("jemalloc_pool") umf_version = Version(self.args.umf_version) @@ -320,6 +329,8 @@ def run(self) -> None: self.args.proxy, pools, umf_version, + self.args.umfd_lib, + self.args.hwloc, ) print("Installation test - BEGIN", flush=True) diff --git a/test/test_valgrind.sh b/test/test_valgrind.sh index 9f84cf0d32..2e4f655f64 100755 --- a/test/test_valgrind.sh +++ b/test/test_valgrind.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -8,11 +8,16 @@ set -e WORKSPACE=$1 BUILD_DIR=$2 TOOL=$3 +TESTS=$4 function print_usage() { - echo "$(basename $0) - run all UMF tests under a valgrind tool (memcheck, drd or helgrind)" - echo "This script looks for './test/umf_test-*' test executables in the UMF build directory." - echo "Usage: $(basename $0) " + echo "$(basename $0) - run UMF tests and examples under a valgrind tool (memcheck, drd or helgrind)" + echo "Usage: $(basename $0) [tests_examples]" + echo "Where:" + echo + echo "tests_examples - (optional) list of tests or examples to be run (paths relative to the build directory)." + echo " If it is empty, all tests (./test/test_*) and examples (./examples/umf_example_*)" + echo " found in will be run." } if ! valgrind --version > /dev/null; then @@ -32,8 +37,8 @@ if [ ! -f $WORKSPACE/README.md ]; then exit 1 fi -if [ $(ls -1 ${BUILD_DIR}/test/umf_test-* 2>/dev/null | wc -l) -eq 0 ]; then - echo -e "error: UMF tests ./test/umf_test-* not found in the build directory: ${BUILD_DIR}\n" +if [ $(ls -1 ${BUILD_DIR}/test/test_* 2>/dev/null | wc -l) -eq 0 ]; then + echo -e "error: UMF tests ./test/test_* not found in the build directory: ${BUILD_DIR}\n" print_usage exit 1 fi @@ -58,33 +63,44 @@ esac WORKSPACE=$(realpath $WORKSPACE) BUILD_DIR=$(realpath $BUILD_DIR) -cd ${BUILD_DIR}/test/ +cd ${BUILD_DIR} mkdir -p cpuid echo "Gathering data for hwloc so it can be run under valgrind:" -hwloc-gather-cpuid ./cpuid +hwloc-gather-cpuid ./cpuid >/dev/null echo echo "Working directory: $(pwd)" echo "Running: \"valgrind $OPTION\" for the following tests:" ANY_TEST_FAILED=0 -rm -f umf_test-*.log umf_test-*.err +PATH_TESTS="./test/test_*" +PATH_EXAMPLES="./examples/umf_example_*" + +rm -f ${PATH_TESTS}.log ${PATH_TESTS}.err ${PATH_EXAMPLES}.log ${PATH_EXAMPLES}.err + +[ "$TESTS" = "" ] && TESTS=$(ls -1 ${PATH_TESTS} ${PATH_EXAMPLES}) -for test in $(ls -1 umf_test-*); do +for test in $TESTS; do + if [ ! -f $test ]; then + echo + echo "error: the $test (${BUILD_DIR}/$test) file does not exist" + exit 1 + fi [ ! -x $test ] && continue echo "$test - starting ..." echo -n "$test " LOG=${test}.log ERR=${test}.err - SUP="${WORKSPACE}/test/supp/${TOOL}-${test}.supp" + NAME=$(basename $test) + SUP="${WORKSPACE}/test/supp/${TOOL}-${NAME}.supp" OPT_SUP="" - [ -f ${SUP} ] && OPT_SUP="--suppressions=${SUP}" && echo -n "(${TOOL}-${test}.supp) " + [ -f ${SUP} ] && OPT_SUP="--suppressions=${SUP}" && echo -n "($(basename ${SUP})) " # skip tests incompatible with valgrind FILTER="" case $test in - umf_test-disjointPool) + ./test/test_disjointPool) if [ "$TOOL" = "helgrind" ]; then # skip because of the assert in helgrind: # Helgrind: hg_main.c:308 (lockN_acquire_reader): Assertion 'lk->kind == LK_rdwr' failed. @@ -92,53 +108,61 @@ for test in $(ls -1 umf_test-*); do continue; fi ;; - umf_test-ipc_os_prov_*) + ./test/test_ipc_os_prov_*) echo "- SKIPPED" continue; # skip testing helper binaries used by the ipc_os_prov_* tests ;; - umf_test-ipc_devdax_prov_*) + ./test/test_ipc_devdax_prov_*) echo "- SKIPPED" continue; # skip testing helper binaries used by the ipc_devdax_prov_* tests ;; - umf_test-ipc_file_prov_*) + ./test/test_ipc_file_prov_*) echo "- SKIPPED" continue; # skip testing helper binaries used by the ipc_file_prov_* tests ;; - umf_test-memspace_host_all) + ./test/test_memspace_host_all) FILTER='--gtest_filter="-*allocsSpreadAcrossAllNumaNodes"' ;; - umf_test-provider_os_memory) + ./test/test_provider_os_memory) FILTER='--gtest_filter="-osProviderTest/umfIpcTest*"' ;; - umf_test-provider_os_memory_config) + ./test/test_provider_os_memory_config) FILTER='--gtest_filter="-*protection_flag_none:*protection_flag_read:*providerConfigTestNumaMode*"' ;; - umf_test-memspace_highest_capacity) + ./test/test_memspace_highest_capacity) FILTER='--gtest_filter="-*highestCapacityVerify*"' ;; - umf_test-provider_os_memory_multiple_numa_nodes) + ./test/test_provider_os_memory_multiple_numa_nodes) FILTER='--gtest_filter="-testNuma.checkModeInterleave*:testNumaNodesAllocations/testNumaOnEachNode.checkNumaNodesAllocations*:testNumaNodesAllocations/testNumaOnEachNode.checkModePreferred*:testNumaNodesAllocations/testNumaOnEachNode.checkModeInterleaveSingleNode*:testNumaNodesAllocationsAllCpus/testNumaOnEachCpu.checkModePreferredEmptyNodeset*:testNumaNodesAllocationsAllCpus/testNumaOnEachCpu.checkModeLocal*"' ;; - umf_test-memspace_highest_bandwidth) + ./test/test_memspace_highest_bandwidth) FILTER='--gtest_filter="-*allocLocalMt*"' ;; - umf_test-memspace_lowest_latency) + ./test/test_memspace_lowest_latency) FILTER='--gtest_filter="-*allocLocalMt*"' ;; - umf_test-memoryPool) + ./test/test_memoryPool) FILTER='--gtest_filter="-*allocMaxSize*"' ;; + ./examples/umf_example_ipc_ipcapi_*) + echo "- SKIPPED" + continue; # skip testing helper binaries used by the umf_example_ipc_ipcapi_* examples + ;; esac [ "$FILTER" != "" ] && echo -n "($FILTER) " LAST_TEST_FAILED=0 - - if ! HWLOC_CPUID_PATH=./cpuid valgrind $OPTION $OPT_SUP --gen-suppressions=all ./$test $FILTER >$LOG 2>&1; then + set +e + HWLOC_CPUID_PATH=./cpuid valgrind $OPTION $OPT_SUP --gen-suppressions=all $test $FILTER >$LOG 2>&1 + RET=$? + set -e + # 125 is the return code when the test is skipped + if [ $RET -ne 0 -a $RET -ne 125 ]; then LAST_TEST_FAILED=1 ANY_TEST_FAILED=1 - echo "(valgrind FAILED) " - echo "Command: HWLOC_CPUID_PATH=./cpuid valgrind $OPTION $OPT_SUP --gen-suppressions=all ./$test $FILTER >$LOG 2>&1" + echo "(valgrind FAILED RV=$RET) " + echo "Command: HWLOC_CPUID_PATH=./cpuid valgrind $OPTION $OPT_SUP --gen-suppressions=all $test $FILTER >$LOG 2>&1" echo "Output:" cat $LOG echo "=====================" @@ -147,7 +171,7 @@ for test in $(ls -1 umf_test-*); do # grep for "ERROR SUMMARY" with errors (there can be many lines with "ERROR SUMMARY") grep -e "ERROR SUMMARY:" $LOG | grep -v -e "ERROR SUMMARY: 0 errors from 0 contexts" > $ERR || true if [ $LAST_TEST_FAILED -eq 0 -a $(cat $ERR | wc -l) -eq 0 ]; then - echo "- OK" + [ $RET -eq 0 ] && echo "- OK" || echo "- SKIPPED" rm -f $LOG $ERR else echo "- FAILED!" @@ -164,11 +188,33 @@ echo echo "======================================================================" echo -for log in $(ls -1 umf_test-*.log); do +LOG_FILES="" +NT=$(ls -1 ${PATH_TESTS}.log 2>/dev/null | wc -l) +if [ $NT -gt 0 ]; then + LOG_FILES="$LOG_FILES $(ls -1 ${PATH_TESTS}.log | xargs)" +fi +NE=$(ls -1 ${PATH_EXAMPLES}.log 2>/dev/null | wc -l) +if [ $NE -gt 0 ]; then + LOG_FILES="$LOG_FILES $(ls -1 ${PATH_EXAMPLES}.log | xargs)" +fi +if [ $(($NT + $NE)) -eq 0 ]; then + echo + echo "FATAL ERROR: no log files found, but number of failed tests equals $ANY_TEST_FAILED!" + echo + exit 1 +fi + +for log in $LOG_FILES; do echo ">>>>>>> LOG $log" cat $log echo echo done +if [ $(($NT + $NE)) -ne $ANY_TEST_FAILED ]; then + echo + echo "ERROR: incorrect number of log files: ANY_TEST_FAILED=$ANY_TEST_FAILED != ($NT + $NE)" + echo +fi + exit 1 diff --git a/src/cpp_helpers.hpp b/test/utils/cpp_helpers.hpp similarity index 94% rename from src/cpp_helpers.hpp rename to test/utils/cpp_helpers.hpp index 6316ccbc7e..037c633c17 100644 --- a/src/cpp_helpers.hpp +++ b/test/utils/cpp_helpers.hpp @@ -1,14 +1,14 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * */ -#ifndef UMF_HELPERS_HPP -#define UMF_HELPERS_HPP 1 +#ifndef UMF_TEST_HELPERS_HPP +#define UMF_TEST_HELPERS_HPP 1 #include #include @@ -22,7 +22,7 @@ #include #include -namespace umf { +namespace umf_test { using pool_unique_handle_t = std::unique_ptr umf_memory_pool_ops_t poolOpsBase() { umf_memory_pool_ops_t ops{}; - ops.version = UMF_VERSION_CURRENT; + ops.version = UMF_POOL_OPS_VERSION_CURRENT; ops.finalize = [](void *obj) { delete reinterpret_cast(obj); }; UMF_ASSIGN_OP(ops, T, malloc, ((void *)nullptr)); UMF_ASSIGN_OP(ops, T, calloc, ((void *)nullptr)); @@ -81,10 +81,10 @@ template umf_memory_pool_ops_t poolOpsBase() { template constexpr umf_memory_provider_ops_t providerOpsBase() { umf_memory_provider_ops_t ops{}; - ops.version = UMF_VERSION_CURRENT; + ops.version = UMF_PROVIDER_OPS_VERSION_CURRENT; ops.finalize = [](void *obj) { delete reinterpret_cast(obj); }; UMF_ASSIGN_OP(ops, T, alloc, UMF_RESULT_ERROR_UNKNOWN); - UMF_ASSIGN_OP(ops.ext, T, free, UMF_RESULT_ERROR_UNKNOWN); + UMF_ASSIGN_OP(ops, T, free, UMF_RESULT_ERROR_UNKNOWN); UMF_ASSIGN_OP_NORETURN(ops, T, get_last_native_error); UMF_ASSIGN_OP(ops, T, get_recommended_page_size, UMF_RESULT_ERROR_UNKNOWN); UMF_ASSIGN_OP(ops, T, get_min_page_size, UMF_RESULT_ERROR_UNKNOWN); @@ -162,6 +162,6 @@ template umf_result_t &getPoolLastStatusRef() { return last_status; } -} // namespace umf +} // namespace umf_test -#endif /* UMF_HELPERS_HPP */ +#endif /* UMF_TEST_HELPERS_HPP */ diff --git a/third_party/requirements.txt b/third_party/requirements.txt index 6a8be6e46a..467ea1e033 100644 --- a/third_party/requirements.txt +++ b/third_party/requirements.txt @@ -6,12 +6,15 @@ black==24.3.0 # Tests packaging==24.2 # Generating HTML documentation -pygments==2.18.0 +pygments==2.19.1 sphinxcontrib_applehelp==2.0.0 sphinxcontrib_devhelp==2.0.0 sphinxcontrib_htmlhelp==2.1.0 sphinxcontrib_serializinghtml==2.0.0 sphinxcontrib_qthelp==2.0.0 -breathe==4.35.0 +breathe==4.36.0 sphinx==8.1.3 sphinx_book_theme==1.1.3 +# Spelling check in documentation +pyenchant==3.2.2 +sphinxcontrib-spelling==8.0.1