From 6351b0ab48aa14d8ee238d2b3f1fef52cc3531d3 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 12 Jan 2024 10:03:54 +0700 Subject: [PATCH 001/281] update "CMakeLists.txt" --- CMakeLists.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 652e3b9..ae733e0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,10 +3,6 @@ cmake_minimum_required(VERSION 3.8.0) project(TensorArray) include(GNUInstallDirs) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/build-temp/archive) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/build-temp/library) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/build-temp/runtime) -set(CMAKE_OUTPUT ${CMAKE_CURRENT_LIST_DIR}/build-temp/bin) set(CMAKE_INSTALL_PREFIX ${PROJECT_SOURCE_DIR}) add_subdirectory("src/tensor_array/core") From d50ec931754d8e793ec92ba80d6cd2c3c56723fa Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 12 Jan 2024 11:52:28 +0700 Subject: [PATCH 002/281] temp delete "devcontainer.json" --- .devcontainer/devcontainer.json | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 .devcontainer/devcontainer.json diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json deleted file mode 100644 index 08d2ac4..0000000 --- a/.devcontainer/devcontainer.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "image": "mcr.microsoft.com/devcontainers/universal:2", - "features": { - "ghcr.io/devcontainers/features/nvidia-cuda:1": { - "instalNvtx": true, - "installToolkit": true, - "cudaVersion": "12.3" - } - } -} From 718cc7068941acd43bab93574c72848be00c410e Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sat, 13 Jan 2024 04:23:45 +0000 Subject: [PATCH 003/281] add docker --- .devcontainer/Dockerfile | 10 ++++++++++ .devcontainer/devcontainer.json | 33 +++++++++++++++++++++++++++++++++ .dockerignore | 5 +++++ 3 files changed, 48 insertions(+) create mode 100644 .devcontainer/Dockerfile create mode 100644 .devcontainer/devcontainer.json create mode 100644 .dockerignore diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 0000000..decec8d --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,10 @@ +FROM nvcr.io/nvidia/cuda:12.3.1-devel-ubuntu22.04 + +RUN apt-get update && apt-get -y install cmake + +# [Optional] Uncomment this section to install additional vcpkg ports. +# RUN su vscode -c "${VCPKG_ROOT}/vcpkg install " + +# [Optional] Uncomment this section to install additional packages. +# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ +# && apt-get -y install --no-install-recommends diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..db63d85 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,33 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the +// README at: https://github.com/devcontainers/templates/tree/main/src/cpp +{ + "name": "C++", + "build": { + "dockerfile": "Dockerfile" + }, + + "runArgs": [ + "--gpus", + "all" + ], + + "hostRequirements": { + "gpu": "optional" + }, + + "features": { + "ghcr.io/devcontainers/features/git:1": {} + } + + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], + + // Use 'postCreateCommand' to run commands after the container is created. + // "postCreateCommand": "gcc -v", + + // Configure tool-specific properties. + // "customizations": {}, + + // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. + // "remoteUser": "root" +} diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..bc78b51 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,5 @@ +build/ +lib/ +bin/ +include/ +build-temp/ From 733f604e137bc6eb71022de9afc416e6be9d104e Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sat, 13 Jan 2024 11:25:09 +0700 Subject: [PATCH 004/281] Create docker-image.yml --- .github/workflows/docker-image.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 .github/workflows/docker-image.yml diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml new file mode 100644 index 0000000..eac633f --- /dev/null +++ b/.github/workflows/docker-image.yml @@ -0,0 +1,18 @@ +name: Docker Image CI + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Build the Docker image + run: docker build . --file Dockerfile --tag my-image-name:$(date +%s) From c00d53bd597b0cefabe6e37521ebf9319bffa8f0 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sat, 13 Jan 2024 11:59:19 +0700 Subject: [PATCH 005/281] Update docker-image.yml --- .github/workflows/docker-image.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index eac633f..ef92c63 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -15,4 +15,4 @@ jobs: steps: - uses: actions/checkout@v3 - name: Build the Docker image - run: docker build . --file Dockerfile --tag my-image-name:$(date +%s) + run: docker build . --file .devcontainer/Dockerfile --tag my-image-name:$(date +%s) From a5b8e5762d164d1326f09c3e5a69a57d917bb143 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sat, 13 Jan 2024 08:06:02 +0000 Subject: [PATCH 006/281] add more "Docker" --- .devcontainer/devcontainer.json | 9 +++++++++ .github/workflows/docker-image.yml | 2 +- Dockerfile | 23 +++++++++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 Dockerfile diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index db63d85..4732d30 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -11,6 +11,15 @@ "all" ], + "customizations": { + "vscode": { + "extensions": [ + "ms-vscode.cpptools-extension-pack", + "ms-vscode.cmake-tools" + ] + } + }, + "hostRequirements": { "gpu": "optional" }, diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index ef92c63..eac633f 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -15,4 +15,4 @@ jobs: steps: - uses: actions/checkout@v3 - name: Build the Docker image - run: docker build . --file .devcontainer/Dockerfile --tag my-image-name:$(date +%s) + run: docker build . --file Dockerfile --tag my-image-name:$(date +%s) diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..7244284 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +FROM nvcr.io/nvidia/cuda:12.3.1-devel-ubuntu22.04 + +RUN apt-get update && apt-get -y install cmake + +# [Optional] Uncomment this section to install additional vcpkg ports. +# RUN su vscode -c "${VCPKG_ROOT}/vcpkg install " + +# [Optional] Uncomment this section to install additional packages. +# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ +# && apt-get -y install --no-install-recommends + +WORKDIR /tensor-array +COPY src/ ./src/ +COPY CMakeLists.txt ./ +COPY Config.cmake.in ./ +WORKDIR /tensor-array + +WORKDIR /tensor-array/build + +RUN cmake .. +RUN make install + +WORKDIR /tensor-array From 268744a694faf2400484ffec9dc786cf20083904 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sat, 13 Jan 2024 15:34:57 +0700 Subject: [PATCH 007/281] Create docker-publish.yml --- .github/workflows/docker-publish.yml | 98 ++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 .github/workflows/docker-publish.yml diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 0000000..7d6200b --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,98 @@ +name: Docker + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +on: + schedule: + - cron: '32 5 * * *' + push: + branches: [ "master" ] + # Publish semver tags as releases. + tags: [ 'v*.*.*' ] + pull_request: + branches: [ "master" ] + +env: + # Use docker.io for Docker Hub if empty + REGISTRY: ghcr.io + # github.repository as / + IMAGE_NAME: ${{ github.repository }} + + +jobs: + build: + + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + # This is used to complete the identity challenge + # with sigstore/fulcio when running outside of PRs. + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + # Install the cosign tool except on PR + # https://github.com/sigstore/cosign-installer + - name: Install cosign + if: github.event_name != 'pull_request' + uses: sigstore/cosign-installer@6e04d228eb30da1757ee4e1dd75a0ec73a653e06 #v3.1.1 + with: + cosign-release: 'v2.1.1' + + # Set up BuildKit Docker container builder to be able to build + # multi-platform images and export cache + # https://github.com/docker/setup-buildx-action + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # v3.0.0 + + # Login against a Docker registry except on PR + # https://github.com/docker/login-action + - name: Log into registry ${{ env.REGISTRY }} + if: github.event_name != 'pull_request' + uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + # Extract metadata (tags, labels) for Docker + # https://github.com/docker/metadata-action + - name: Extract Docker metadata + id: meta + uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934 # v5.0.0 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + # Build and push Docker image with Buildx (don't push on PR) + # https://github.com/docker/build-push-action + - name: Build and push Docker image + id: build-and-push + uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09 # v5.0.0 + with: + context: . + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + + # Sign the resulting Docker image digest except on PRs. + # This will only write to the public Rekor transparency log when the Docker + # repository is public to avoid leaking data. If you would like to publish + # transparency data even for private images, pass --force to cosign below. + # https://github.com/sigstore/cosign + - name: Sign the published Docker image + if: ${{ github.event_name != 'pull_request' }} + env: + # https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#using-an-intermediate-environment-variable + TAGS: ${{ steps.meta.outputs.tags }} + DIGEST: ${{ steps.build-and-push.outputs.digest }} + # This step uses the identity token to provision an ephemeral certificate + # against the sigstore community Fulcio instance. + run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST} From 9566959b4b5d2647db13c7f898035a99e767bc19 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sat, 13 Jan 2024 09:17:34 +0000 Subject: [PATCH 008/281] try CUDA 12.1 --- .github/workflows/cmake-single-platform.yml | 2 +- .github/workflows/codeql.yml | 2 +- scripts/actions/install-cuda-ubuntu.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cmake-single-platform.yml b/.github/workflows/cmake-single-platform.yml index e69bac7..29766ef 100644 --- a/.github/workflows/cmake-single-platform.yml +++ b/.github/workflows/cmake-single-platform.yml @@ -21,7 +21,7 @@ jobs: strategy: fail-fast: false matrix: - cuda-version: [ "12.3" ] + cuda-version: [ "12.1", "12.3" ] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 89997e7..9c90d2a 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -38,7 +38,7 @@ jobs: fail-fast: false matrix: language: [ 'c-cpp' ] - cuda-version: [ "12.3" ] + cuda-version: [ "12.1", "12.3" ] # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ] # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both diff --git a/scripts/actions/install-cuda-ubuntu.sh b/scripts/actions/install-cuda-ubuntu.sh index 45d85dd..f45dcf4 100644 --- a/scripts/actions/install-cuda-ubuntu.sh +++ b/scripts/actions/install-cuda-ubuntu.sh @@ -74,7 +74,7 @@ echo "CUDA_PACKAGES ${CUDA_PACKAGES}" CPU_ARCH="x86_64" PIN_FILENAME="cuda-${LINUX_ID}${LINUX_VERSION}.pin" PIN_URL="https://developer.download.nvidia.com/compute/cuda/repos/${LINUX_ID}${LINUX_VERSION}/${CPU_ARCH}/${PIN_FILENAME}" -KERYRING_PACKAGE_FILENAME="cuda-keyring_1.0-1_all.deb" +KERYRING_PACKAGE_FILENAME="cuda-keyring_1.1-1_all.deb" KEYRING_PACKAGE_URL="https://developer.download.nvidia.com/compute/cuda/repos/${LINUX_ID}${LINUX_VERSION}/${CPU_ARCH}/${KERYRING_PACKAGE_FILENAME}" REPO_URL="https://developer.download.nvidia.com/compute/cuda/repos/${LINUX_ID}${LINUX_VERSION}/${CPU_ARCH}/" From 5fae056f5c4a9b13eb928561a539509a81dff18d Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sat, 13 Jan 2024 09:22:41 +0000 Subject: [PATCH 009/281] changes "CUDA 12.1" to "CUDA 12.2" --- .github/workflows/cmake-single-platform.yml | 2 +- .github/workflows/codeql.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cmake-single-platform.yml b/.github/workflows/cmake-single-platform.yml index 29766ef..ac6f29f 100644 --- a/.github/workflows/cmake-single-platform.yml +++ b/.github/workflows/cmake-single-platform.yml @@ -21,7 +21,7 @@ jobs: strategy: fail-fast: false matrix: - cuda-version: [ "12.1", "12.3" ] + cuda-version: [ "12.1", "12.2" ] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 9c90d2a..9a00be2 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -38,7 +38,7 @@ jobs: fail-fast: false matrix: language: [ 'c-cpp' ] - cuda-version: [ "12.1", "12.3" ] + cuda-version: [ "12.1", "12.2" ] # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ] # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both From ddc68dbbe68ff772bde345a0bed44a088192b79d Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sat, 13 Jan 2024 09:23:58 +0000 Subject: [PATCH 010/281] _ --- .github/workflows/cmake-single-platform.yml | 2 +- .github/workflows/codeql.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cmake-single-platform.yml b/.github/workflows/cmake-single-platform.yml index ac6f29f..05991d2 100644 --- a/.github/workflows/cmake-single-platform.yml +++ b/.github/workflows/cmake-single-platform.yml @@ -21,7 +21,7 @@ jobs: strategy: fail-fast: false matrix: - cuda-version: [ "12.1", "12.2" ] + cuda-version: [ "12.2", "12.3" ] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 9a00be2..d374858 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -38,7 +38,7 @@ jobs: fail-fast: false matrix: language: [ 'c-cpp' ] - cuda-version: [ "12.1", "12.2" ] + cuda-version: [ "12.2", "12.3" ] # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ] # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both From 857f6177089f67c9a9e7b7ac1222bd64609f04a5 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sun, 14 Jan 2024 11:26:49 +0000 Subject: [PATCH 011/281] add reduce function --- .devcontainer/{ => CUDA}/Dockerfile | 0 .devcontainer/{ => CUDA}/devcontainer.json | 0 .devcontainer/OpenMP/Dockerfile | 18 + .devcontainer/OpenMP/devcontainer.json | 32 ++ .devcontainer/OpenMP/reinstall-cmake.sh | 59 ++++ .dockerignore | 2 + .vscode/settings.json | 3 +- src/tensor_array/core/CMakeLists.txt | 21 +- src/tensor_array/core/tensor.hh | 6 + src/tensor_array/core/tensor_cast.cu | 22 +- src/tensor_array/core/tensor_reduce.cu | 363 +++++++++++++++++++++ 11 files changed, 507 insertions(+), 19 deletions(-) rename .devcontainer/{ => CUDA}/Dockerfile (100%) rename .devcontainer/{ => CUDA}/devcontainer.json (100%) create mode 100644 .devcontainer/OpenMP/Dockerfile create mode 100644 .devcontainer/OpenMP/devcontainer.json create mode 100644 .devcontainer/OpenMP/reinstall-cmake.sh create mode 100644 src/tensor_array/core/tensor_reduce.cu diff --git a/.devcontainer/Dockerfile b/.devcontainer/CUDA/Dockerfile similarity index 100% rename from .devcontainer/Dockerfile rename to .devcontainer/CUDA/Dockerfile diff --git a/.devcontainer/devcontainer.json b/.devcontainer/CUDA/devcontainer.json similarity index 100% rename from .devcontainer/devcontainer.json rename to .devcontainer/CUDA/devcontainer.json diff --git a/.devcontainer/OpenMP/Dockerfile b/.devcontainer/OpenMP/Dockerfile new file mode 100644 index 0000000..82e43f9 --- /dev/null +++ b/.devcontainer/OpenMP/Dockerfile @@ -0,0 +1,18 @@ +FROM mcr.microsoft.com/devcontainers/cpp:1-ubuntu-22.04 + +ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.22.2" + +# Optionally install the cmake for vcpkg +COPY ./reinstall-cmake.sh /tmp/ + +RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ + chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ + fi \ + && rm -f /tmp/reinstall-cmake.sh + +# [Optional] Uncomment this section to install additional vcpkg ports. +# RUN su vscode -c "${VCPKG_ROOT}/vcpkg install " + +# [Optional] Uncomment this section to install additional packages. +# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ +# && apt-get -y install --no-install-recommends diff --git a/.devcontainer/OpenMP/devcontainer.json b/.devcontainer/OpenMP/devcontainer.json new file mode 100644 index 0000000..3dba291 --- /dev/null +++ b/.devcontainer/OpenMP/devcontainer.json @@ -0,0 +1,32 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the +// README at: https://github.com/devcontainers/templates/tree/main/src/cpp +{ + "name": "C++", + "build": { + "dockerfile": "Dockerfile" + }, + + "customizations": { + "vscode": { + "extensions": [ + "ms-vscode.cpptools-extension-pack", + "ms-vscode.cmake-tools" + ] + } + } + + // Features to add to the dev container. More info: https://containers.dev/features. + // "features": {}, + + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], + + // Use 'postCreateCommand' to run commands after the container is created. + // "postCreateCommand": "gcc -v", + + // Configure tool-specific properties. + // "customizations": {}, + + // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. + // "remoteUser": "root" +} diff --git a/.devcontainer/OpenMP/reinstall-cmake.sh b/.devcontainer/OpenMP/reinstall-cmake.sh new file mode 100644 index 0000000..408b81d --- /dev/null +++ b/.devcontainer/OpenMP/reinstall-cmake.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +#------------------------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See https://go.microsoft.com/fwlink/?linkid=2090316 for license information. +#------------------------------------------------------------------------------------------------------------- +# +set -e + +CMAKE_VERSION=${1:-"none"} + +if [ "${CMAKE_VERSION}" = "none" ]; then + echo "No CMake version specified, skipping CMake reinstallation" + exit 0 +fi + +# Cleanup temporary directory and associated files when exiting the script. +cleanup() { + EXIT_CODE=$? + set +e + if [[ -n "${TMP_DIR}" ]]; then + echo "Executing cleanup of tmp files" + rm -Rf "${TMP_DIR}" + fi + exit $EXIT_CODE +} +trap cleanup EXIT + + +echo "Installing CMake..." +apt-get -y purge --auto-remove cmake +mkdir -p /opt/cmake + +architecture=$(dpkg --print-architecture) +case "${architecture}" in + arm64) + ARCH=aarch64 ;; + amd64) + ARCH=x86_64 ;; + *) + echo "Unsupported architecture ${architecture}." + exit 1 + ;; +esac + +CMAKE_BINARY_NAME="cmake-${CMAKE_VERSION}-linux-${ARCH}.sh" +CMAKE_CHECKSUM_NAME="cmake-${CMAKE_VERSION}-SHA-256.txt" +TMP_DIR=$(mktemp -d -t cmake-XXXXXXXXXX) + +echo "${TMP_DIR}" +cd "${TMP_DIR}" + +curl -sSL "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_BINARY_NAME}" -O +curl -sSL "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_CHECKSUM_NAME}" -O + +sha256sum -c --ignore-missing "${CMAKE_CHECKSUM_NAME}" +sh "${TMP_DIR}/${CMAKE_BINARY_NAME}" --prefix=/opt/cmake --skip-license + +ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake +ln -s /opt/cmake/bin/ctest /usr/local/bin/ctest diff --git a/.dockerignore b/.dockerignore index bc78b51..3805f8a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,3 +3,5 @@ lib/ bin/ include/ build-temp/ +.git/ +.vscode/ diff --git a/.vscode/settings.json b/.vscode/settings.json index 821aaac..a838ff0 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -59,6 +59,7 @@ "stop_token": "cpp", "streambuf": "cpp", "thread": "cpp", - "typeindex": "cpp" + "typeindex": "cpp", + "cassert": "cpp" } } \ No newline at end of file diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index fe9cede..6dfc3a4 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -1,26 +1,31 @@ cmake_minimum_required(VERSION 3.8.0) +file(GLOB TensorArray_inc "*.hh") + +install( + FILES ${TensorArray_inc} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor_array/core + COMPONENT headers) + +find_package(CUDAToolkit) +if(CUDAToolkit_FOUND) enable_language(CUDA C CXX) -set(CMAKE_CUDA_COMPILER nvcc) set(CMAKE_CUDA_STANDARD 17) set(CMAKE_CUDA_ARCHITECTURES 52 75 89) set(CMAKE_CUDA_SEPARABLE_COMPILATION TRUE) set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "-g -G") # enable cuda-gdb file(GLOB TensorArray_src "*.cc" "*.cu") -file(GLOB TensorArray_inc "*.hh") # file(MAKE_DIRECTORY "include/tensor_array/core") -install( - FILES ${TensorArray_inc} - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor_array/core - COMPONENT headers) - add_library(tensorarray_core SHARED ${TensorArray_src}) +target_link_libraries(tensorarray_core PRIVATE CUDA::cublas) + +endif(CUDAToolkit_FOUND) + add_library(TensorArray::Core ALIAS tensorarray_core) -target_link_libraries(tensorarray_core PRIVATE cublas) install( TARGETS tensorarray_core diff --git a/src/tensor_array/core/tensor.hh b/src/tensor_array/core/tensor.hh index 48b2670..d3e82df 100644 --- a/src/tensor_array/core/tensor.hh +++ b/src/tensor_array/core/tensor.hh @@ -243,6 +243,12 @@ namespace tensor_array #ifdef TENSOR_CONTENT friend Tensor tensor_rand(const std::initializer_list&, unsigned int); + friend Tensor reduce_sum(const Tensor&); + + friend Tensor reduce_max(const Tensor&); + + friend Tensor reduce_min(const Tensor&); + friend Tensor add(const Tensor&, const Tensor&, bool); friend Tensor power(const Tensor&, const Tensor&, bool); diff --git a/src/tensor_array/core/tensor_cast.cu b/src/tensor_array/core/tensor_cast.cu index 3c3da08..cbf7c57 100644 --- a/src/tensor_array/core/tensor_cast.cu +++ b/src/tensor_array/core/tensor_cast.cu @@ -34,22 +34,24 @@ limitations under the License. #define END(...) END_(__VA_ARGS__) #define END_(...) __VA_ARGS__##_END -#define USING_DATA_TYPE_NVIDIA_FLOAT_8 (__nv_fp8_e5m2)(__nv_fp8_e4m3) +#define USING_DATA_TYPE_NVIDIA_FLOAT_8 (__nv_fp8_e4m3)(__nv_fp8_e5m2) #define USING_DATA_TYPE_NVIDIA_FLOAT (__half)(__nv_bfloat16) #define USING_DATA_TYPE_FLOAT (float)(double) -#define USING_DATA_TYPE_SINT (int8_t)(int16_t)(int32_t)(int64_t) -#define USING_DATA_TYPE_UINT (uint8_t)(uint16_t)(uint32_t)(uint64_t) +#define USING_DATA_TYPE_SINT (int16_t)(int32_t)(int64_t) +#define USING_DATA_TYPE_UINT (uint16_t)(uint32_t)(uint64_t) -#define USING_DATA_TYPE_CAST_TO \ -(bool) \ +#define USING_DATA_TYPE_CAST_FROM \ +(__nv_fp8_e4m3) \ USING_DATA_TYPE_SINT \ USING_DATA_TYPE_UINT \ USING_DATA_TYPE_FLOAT \ USING_DATA_TYPE_NVIDIA_FLOAT -#define USING_DATA_TYPE_CAST_FROM \ -USING_DATA_TYPE_CAST_TO \ -USING_DATA_TYPE_NVIDIA_FLOAT_8 +#define USING_DATA_TYPE_CAST_TO \ +(bool) \ +(int8_t) \ +(uint8_t) \ +USING_DATA_TYPE_CAST_FROM namespace tensor_array { @@ -85,7 +87,7 @@ namespace tensor_array #define ADD_CODE(TYPE) \ if(this->get_buffer().type() == typeid(TYPE)) \ type_casting<<>>(out_ptr, static_cast(base_of_this.data()), total_size); - LOOP(USING_DATA_TYPE_CAST_TO); + LOOP(USING_DATA_TYPE_CAST_FROM); #undef ADD_CODE cuda_status = cudaDeviceSynchronize(); cuda_status = cudaGetLastError(); @@ -108,7 +110,7 @@ type_casting<<>>(out_ptr, static_cast(base_of_ #define ADD_CODE(TYPE) \ if(dtype == typeid(TYPE)) \ return this->cast(is_derive); - LOOP(USING_DATA_TYPE_CAST_FROM); + LOOP(USING_DATA_TYPE_CAST_TO); #undef ADD_CODE throw std::exception(); } diff --git a/src/tensor_array/core/tensor_reduce.cu b/src/tensor_array/core/tensor_reduce.cu new file mode 100644 index 0000000..8d31b06 --- /dev/null +++ b/src/tensor_array/core/tensor_reduce.cu @@ -0,0 +1,363 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef TENSOR_CONTENT +#define TENSOR_CONTENT +#include "tensor.hh" +#undef TENSOR_CONTENT +#endif + +#define USING_DATA_TYPE_NVIDIA_FLOAT_8 (__nv_fp8_e5m2)(__nv_fp8_e4m3) +#define USING_DATA_TYPE_NVIDIA_FLOAT (__half)(__nv_bfloat16) +#define USING_DATA_TYPE_FLOAT (float)(double) +#define USING_DATA_TYPE_SINT (int8_t)(int16_t)(int32_t)(int64_t) +#define USING_DATA_TYPE_UINT (uint8_t)(uint16_t)(uint32_t)(uint64_t) +#define USING_DATA_TYPE USING_DATA_TYPE_SINT USING_DATA_TYPE_UINT USING_DATA_TYPE_FLOAT + +#define LOOP(seq) END(A seq) +#define BODY(x) ADD_CODE(x) +#define A(x) BODY(x) B +#define B(x) BODY(x) A +#define A_END +#define B_END +#define END(...) END_(__VA_ARGS__) +#define END_(...) __VA_ARGS__##_END + + +namespace tensor_array +{ + namespace value + { + using namespace devices; + template + __device__ void warp_reduce_sum(volatile T *sdata, unsigned int tid) + { + if constexpr (blockSize >= 1024) + sdata[tid] += sdata[tid + 512]; + if constexpr (blockSize >= 512) + sdata[tid] += sdata[tid + 256]; + if constexpr (blockSize >= 256) + sdata[tid] += sdata[tid + 128]; + if constexpr (blockSize >= 128) + sdata[tid] += sdata[tid + 64]; + if constexpr (blockSize >= 64) + sdata[tid] += sdata[tid + 32]; + if constexpr (blockSize >= 32) + sdata[tid] += sdata[tid + 16]; + if constexpr (blockSize >= 16) + sdata[tid] += sdata[tid + 8]; + if constexpr (blockSize >= 8) + sdata[tid] += sdata[tid + 4]; + if constexpr (blockSize >= 4) + sdata[tid] += sdata[tid + 2]; + if constexpr (blockSize >= 2) + sdata[tid] += sdata[tid + 1]; + } + + template + __device__ void warp_reduce_max(volatile T *sdata, volatile unsigned int *sindex, unsigned int tid) + { + if constexpr (blockSize >= 1024) + if (sdata[tid] < sdata[tid + 512]) + { + sdata[tid] = sdata[tid + 512]; + sindex[tid] = sindex[tid + 512]; + } + if constexpr (blockSize >= 512) + if (sdata[tid] < sdata[tid + 256]) + { + sdata[tid] = sdata[tid + 256]; + sindex[tid] = sindex[tid + 256]; + } + if constexpr (blockSize >= 256) + if (sdata[tid] < sdata[tid + 128]) + { + sdata[tid] = sdata[tid + 128]; + sindex[tid] = sindex[tid + 128]; + } + if constexpr (blockSize >= 128) + if (sdata[tid] < sdata[tid + 64]) + { + sdata[tid] = sdata[tid + 64]; + sindex[tid] = sindex[tid + 64]; + } + if constexpr (blockSize >= 64) + if (sdata[tid] < sdata[tid + 32]) + { + sdata[tid] = sdata[tid + 32]; + sindex[tid] = sindex[tid + 32]; + } + if constexpr (blockSize >= 32) + if (sdata[tid] < sdata[tid + 16]) + { + sdata[tid] = sdata[tid + 16]; + sindex[tid] = sindex[tid + 16]; + } + if constexpr (blockSize >= 16) + if (sdata[tid] < sdata[tid + 8]) + { + sdata[tid] = sdata[tid + 8]; + sindex[tid] = sindex[tid + 8]; + } + if constexpr (blockSize >= 8) + if (sdata[tid] < sdata[tid + 4]) + { + sdata[tid] = sdata[tid + 4]; + sindex[tid] = sindex[tid + 4]; + } + if constexpr (blockSize >= 4) + if (sdata[tid] < sdata[tid + 2]) + { + sdata[tid] = sdata[tid + 2]; + sindex[tid] = sindex[tid + 2]; + } + if constexpr (blockSize >= 2) + if (sdata[tid] < sdata[tid + 1]) + { + sdata[tid] = sdata[tid + 1]; + sindex[tid] = sindex[tid + 1]; + } + } + + template + __device__ void warp_reduce_min(volatile T *sdata, volatile unsigned int *sindex, unsigned int tid) + { + if constexpr (blockSize >= 1024) + if (sdata[tid] > sdata[tid + 512]) + { + sdata[tid] = sdata[tid + 512]; + sindex[tid] = sindex[tid + 512]; + } + if constexpr (blockSize >= 512) + if (sdata[tid] > sdata[tid + 256]) + { + sdata[tid] = sdata[tid + 256]; + sindex[tid] = sindex[tid + 256]; + } + if constexpr (blockSize >= 256) + if (sdata[tid] > sdata[tid + 128]) + { + sdata[tid] = sdata[tid + 128]; + sindex[tid] = sindex[tid + 128]; + } + if constexpr (blockSize >= 128) + if (sdata[tid] > sdata[tid + 64]) + { + sdata[tid] = sdata[tid + 64]; + sindex[tid] = sindex[tid + 64]; + } + if constexpr (blockSize >= 64) + if (sdata[tid] > sdata[tid + 32]) + { + sdata[tid] = sdata[tid + 32]; + sindex[tid] = sindex[tid + 32]; + } + if constexpr (blockSize >= 32) + if (sdata[tid] > sdata[tid + 16]) + { + sdata[tid] = sdata[tid + 16]; + sindex[tid] = sindex[tid + 16]; + } + if constexpr (blockSize >= 16) + if (sdata[tid] > sdata[tid + 8]) + { + sdata[tid] = sdata[tid + 8]; + sindex[tid] = sindex[tid + 8]; + } + if constexpr (blockSize >= 8) + if (sdata[tid] > sdata[tid + 4]) + { + sdata[tid] = sdata[tid + 4]; + sindex[tid] = sindex[tid + 4]; + } + if constexpr (blockSize >= 4) + if (sdata[tid] > sdata[tid + 2]) + { + sdata[tid] = sdata[tid + 2]; + sindex[tid] = sindex[tid + 2]; + } + if constexpr (blockSize >= 2) + if (sdata[tid] > sdata[tid + 1]) + { + sdata[tid] = sdata[tid + 1]; + sindex[tid] = sindex[tid + 1]; + } + } + + template + __global__ void array_reduce_sum(T *g_odata, const T *g_idata, unsigned int n) + { + __shared__ T sdata[blockSize]; + unsigned int tid = threadIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + sdata[tid] = 0; + if (blockIdx.x * blockDim.x + tid < n) + sdata[tid] += g_idata[blockIdx.x * blockDim.x + tid]; + __syncthreads(); + if (tid < 512) + warp_reduce_sum(sdata, tid); + if (tid == 0) g_odata[blockIdx.x] = sdata[0]; + } + + template + __global__ void array_reduce_max(T *g_odata, unsigned int *g_oindex, const T *g_idata, unsigned int n) + { + __shared__ T sdata[blockSize]; + __shared__ unsigned int sindex[blockSize]; + unsigned int tid = threadIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + sdata[tid] = std::numeric_limits::min(); + if (blockIdx.x * blockDim.x + tid < n) + sdata[tid] = g_idata[blockIdx.x * blockDim.x + tid]; + __syncthreads(); + if (tid < 512) + warp_reduce_max(sdata, sindex, tid); + if (tid == 0) + { + g_odata[blockIdx.x] = sdata[0]; + g_oindex[blockIdx.x] = sindex[0]; + } + } + + template + __global__ void array_reduce_min(T *g_odata, unsigned int *g_oindex, const T *g_idata, unsigned int n) + { + __shared__ T sdata[blockSize]; + __shared__ unsigned int sindex[blockSize]; + unsigned int tid = threadIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + sdata[tid] = std::numeric_limits::max(); + if (blockIdx.x * blockDim.x + tid < n) + sdata[tid] = g_idata[blockIdx.x * blockDim.x + tid]; + __syncthreads(); + if (tid < 512) + warp_reduce_min(sdata, sindex, tid); + if (tid == 0) + { + g_odata[blockIdx.x] = sdata[0]; + g_oindex[blockIdx.x] = sindex[0]; + } + } + + bool equal_dim_size(const TensorBase&, const TensorBase&); + Tensor multiply(const Tensor&, const Tensor&, bool, const DataBuffer&); + + Tensor reduce_sum(const Tensor& a) + { + std::vector> temp; + temp.push_back(std::make_pair(a, Derivation(values(a.get_buffer().shape(), 1).tensor_cast(a.get_buffer().type(), false), multiply))); + cudaError_t cuda_status; + TensorBase other_buf; + void* c_ptr; + devices::Device this_cuda{ devices::CUDA }; + cuda_status = cudaGetDevice(&this_cuda.index); + cudaDeviceProp cu_dev_prop; + cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); + const TensorBase& base_a = a.get_buffer(); + cuda_status = cudaMalloc(&c_ptr, base_a.data_size()); + device_memcpy(&c_ptr, this_cuda, base_a.data(), base_a.get_device(), base_a.data_size()); + std::vector shape_c = a.get_buffer().shape(); + std::size_t c_size = a.get_buffer().data_size() / get_sizeof_type(base_a.type()); + constexpr unsigned int thread_value = 1024U; + dim3 block_dim(shape_c[shape_c.size() - 1]); + dim3 grid_dim(c_size / block_dim.x); +#define ADD_CODE(TYPE) \ +if(a.get_buffer().type() == typeid(TYPE)) \ +{ \ +array_reduce_sum<<>>(static_cast(c_ptr), static_cast(c_ptr), c_size); \ +cuda_status = cudaDeviceSynchronize(); \ +other_buf = TensorBase(typeid(TYPE), a.get_buffer().shape(), c_ptr, this_cuda); \ +} + LOOP(USING_DATA_TYPE); +#undef ADD_CODE + cuda_status = cudaFree(c_ptr); + return Tensor(std::move(other_buf)); + } + + Tensor reduce_max(const Tensor& a) + { + std::vector> temp; + temp.push_back(std::make_pair(a, Derivation(values(a.get_buffer().shape(), 1).tensor_cast(a.get_buffer().type(), false), multiply))); + cudaError_t cuda_status; + TensorBase other_buf; + void* c_ptr; + devices::Device this_cuda{ devices::CUDA }; + cuda_status = cudaGetDevice(&this_cuda.index); + cudaDeviceProp cu_dev_prop; + cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); + const TensorBase& base_a = a.get_buffer(); + cuda_status = cudaMalloc(&c_ptr, base_a.data_size()); + device_memcpy(&c_ptr, this_cuda, base_a.data(), base_a.get_device(), base_a.data_size()); + std::vector shape_c = a.get_buffer().shape(); + std::size_t c_size = a.get_buffer().data_size() / get_sizeof_type(base_a.type()); + constexpr unsigned int thread_value = 1024U; + dim3 block_dim(shape_c[shape_c.size() - 1]); + dim3 grid_dim(c_size / block_dim.x); +#define ADD_CODE(TYPE) \ +if(a.get_buffer().type() == typeid(TYPE)) \ +{ \ +array_reduce_sum<<>>(static_cast(c_ptr), static_cast(c_ptr), c_size); \ +cuda_status = cudaDeviceSynchronize(); \ +other_buf = TensorBase(typeid(TYPE), a.get_buffer().shape(), c_ptr, this_cuda); \ +} + LOOP(USING_DATA_TYPE); +#undef ADD_CODE + cuda_status = cudaFree(c_ptr); + return Tensor(std::move(other_buf)); + } + + Tensor reduce_min(const Tensor& a) + { + std::vector> temp; + temp.push_back(std::make_pair(a, Derivation(values(a.get_buffer().shape(), 1).tensor_cast(a.get_buffer().type(), false), multiply))); + cudaError_t cuda_status; + TensorBase other_buf; + void* c_ptr; + devices::Device this_cuda{ devices::CUDA }; + cuda_status = cudaGetDevice(&this_cuda.index); + cudaDeviceProp cu_dev_prop; + cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); + const TensorBase& base_a = a.get_buffer(); + cuda_status = cudaMalloc(&c_ptr, base_a.data_size()); + device_memcpy(&c_ptr, this_cuda, base_a.data(), base_a.get_device(), base_a.data_size()); + std::vector shape_c = a.get_buffer().shape(); + std::size_t c_size = a.get_buffer().data_size() / get_sizeof_type(base_a.type()); + constexpr unsigned int thread_value = 1024U; + dim3 block_dim(shape_c[shape_c.size() - 1]); + dim3 grid_dim(c_size / block_dim.x); +#define ADD_CODE(TYPE) \ +if(a.get_buffer().type() == typeid(TYPE)) \ +{ \ +array_reduce_sum<<>>(static_cast(c_ptr), static_cast(c_ptr), c_size); \ +cuda_status = cudaDeviceSynchronize(); \ +other_buf = TensorBase(typeid(TYPE), a.get_buffer().shape(), c_ptr, this_cuda); \ +} + LOOP(USING_DATA_TYPE); +#undef ADD_CODE + cuda_status = cudaFree(c_ptr); + return Tensor(std::move(other_buf)); + } + } +} From 25da88c920e03fb9dd56f494d134c1ee85e5a45c Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 17 Jan 2024 05:45:10 +0000 Subject: [PATCH 012/281] update --- .devcontainer/{OpenMP => CPU}/Dockerfile | 0 .../{OpenMP => CPU}/devcontainer.json | 0 .../{OpenMP => CPU}/reinstall-cmake.sh | 0 .devcontainer/CUDA/devcontainer.json | 2 +- src/tensor_array/core/CMakeLists.txt | 3 +- .../core/{devices.cu => devices.cc} | 2 +- src/tensor_array/core/devices.hh | 2 +- src/tensor_array/core/tensor.cc | 8 +- src/tensor_array/core/tensor.hh | 9 +- .../core/{tensor_blas.cu => tensor_blas.cc} | 0 src/tensor_array/core/tensor_reduce.cu | 437 +++++++++--------- 11 files changed, 233 insertions(+), 230 deletions(-) rename .devcontainer/{OpenMP => CPU}/Dockerfile (100%) rename .devcontainer/{OpenMP => CPU}/devcontainer.json (100%) rename .devcontainer/{OpenMP => CPU}/reinstall-cmake.sh (100%) rename src/tensor_array/core/{devices.cu => devices.cc} (100%) rename src/tensor_array/core/{tensor_blas.cu => tensor_blas.cc} (100%) diff --git a/.devcontainer/OpenMP/Dockerfile b/.devcontainer/CPU/Dockerfile similarity index 100% rename from .devcontainer/OpenMP/Dockerfile rename to .devcontainer/CPU/Dockerfile diff --git a/.devcontainer/OpenMP/devcontainer.json b/.devcontainer/CPU/devcontainer.json similarity index 100% rename from .devcontainer/OpenMP/devcontainer.json rename to .devcontainer/CPU/devcontainer.json diff --git a/.devcontainer/OpenMP/reinstall-cmake.sh b/.devcontainer/CPU/reinstall-cmake.sh similarity index 100% rename from .devcontainer/OpenMP/reinstall-cmake.sh rename to .devcontainer/CPU/reinstall-cmake.sh diff --git a/.devcontainer/CUDA/devcontainer.json b/.devcontainer/CUDA/devcontainer.json index 4732d30..f9f175c 100644 --- a/.devcontainer/CUDA/devcontainer.json +++ b/.devcontainer/CUDA/devcontainer.json @@ -1,7 +1,7 @@ // For format details, see https://aka.ms/devcontainer.json. For config options, see the // README at: https://github.com/devcontainers/templates/tree/main/src/cpp { - "name": "C++", + "name": "CUDA", "build": { "dockerfile": "Dockerfile" }, diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index 6dfc3a4..eee3d4b 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -11,10 +11,9 @@ find_package(CUDAToolkit) if(CUDAToolkit_FOUND) enable_language(CUDA C CXX) -set(CMAKE_CUDA_STANDARD 17) set(CMAKE_CUDA_ARCHITECTURES 52 75 89) set(CMAKE_CUDA_SEPARABLE_COMPILATION TRUE) -set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "-g -G") # enable cuda-gdb +set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "--default-stream per-thread") file(GLOB TensorArray_src "*.cc" "*.cu") diff --git a/src/tensor_array/core/devices.cu b/src/tensor_array/core/devices.cc similarity index 100% rename from src/tensor_array/core/devices.cu rename to src/tensor_array/core/devices.cc index c3a1694..d5d38d2 100644 --- a/src/tensor_array/core/devices.cu +++ b/src/tensor_array/core/devices.cc @@ -14,11 +14,11 @@ See the License for the specific language governing permissions and limitations under the License. */ +#include #include "devices.hh" #include #include #include -#include namespace tensor_array { diff --git a/src/tensor_array/core/devices.hh b/src/tensor_array/core/devices.hh index ca49598..50eb102 100644 --- a/src/tensor_array/core/devices.hh +++ b/src/tensor_array/core/devices.hh @@ -33,7 +33,7 @@ namespace tensor_array enum DeviceType { CPU, - CUDA + CUDA, }; struct Device diff --git a/src/tensor_array/core/tensor.cc b/src/tensor_array/core/tensor.cc index 9b85cac..c3e9326 100644 --- a/src/tensor_array/core/tensor.cc +++ b/src/tensor_array/core/tensor.cc @@ -140,7 +140,7 @@ namespace tensor_array void Tensor::TensorContent::reset_grad() { - std::lock_guard tensor_lock(this->tensor_mutex); + std::lock_guard tensor_lock(this->tensor_mutex); this->grad = zeros(this->buf.shape()).tensor_cast(this->buf.type()).get_buffer(); } @@ -159,7 +159,7 @@ namespace tensor_array this->TensorContent::calc_grad(grad); if (this->can_calc_grad && this->forward_back.empty()) { - std::lock_guard tensor_lock(this->tensor_mutex); + std::lock_guard tensor_lock(this->tensor_mutex); std::forward_list thread_list; for (auto& dat : this->derive_data) if (this->derive_multithread) @@ -174,7 +174,7 @@ namespace tensor_array void Tensor::TensorContent::calc_grad(const Tensor& grad) { - std::lock_guard tensor_lock(this->tensor_mutex); + std::lock_guard tensor_lock(this->tensor_mutex); this->grad = add(this->grad, grad, false).get_buffer(); } @@ -468,8 +468,10 @@ temp_check_data_type = TEMP(temp.first) < TEMP(temp_tensor); return this->tensor_data.use_count(); } + std::mutex calc_grad_mutex; void Tensor::calc_grad() { + std::lock_guard calc_grad_lock(calc_grad_mutex); this->tensor_data->reset_grad(); this->tensor_data->calc_grad(values(this->get_buffer().shape(), 1.f).tensor_cast(this->get_buffer().type())); } diff --git a/src/tensor_array/core/tensor.hh b/src/tensor_array/core/tensor.hh index d3e82df..553f311 100644 --- a/src/tensor_array/core/tensor.hh +++ b/src/tensor_array/core/tensor.hh @@ -238,16 +238,13 @@ namespace tensor_array Tensor cosh() const; Tensor tanh() const; Tensor sigmoid() const; + Tensor reduce_sum(unsigned char) const; + Tensor reduce_max(unsigned char) const; + Tensor reduce_min(unsigned char) const; Tensor log() const; #ifdef TENSOR_CONTENT friend Tensor tensor_rand(const std::initializer_list&, unsigned int); - - friend Tensor reduce_sum(const Tensor&); - - friend Tensor reduce_max(const Tensor&); - - friend Tensor reduce_min(const Tensor&); friend Tensor add(const Tensor&, const Tensor&, bool); diff --git a/src/tensor_array/core/tensor_blas.cu b/src/tensor_array/core/tensor_blas.cc similarity index 100% rename from src/tensor_array/core/tensor_blas.cu rename to src/tensor_array/core/tensor_blas.cc diff --git a/src/tensor_array/core/tensor_reduce.cu b/src/tensor_array/core/tensor_reduce.cu index 8d31b06..4a35a51 100644 --- a/src/tensor_array/core/tensor_reduce.cu +++ b/src/tensor_array/core/tensor_reduce.cu @@ -51,244 +51,208 @@ namespace tensor_array namespace value { using namespace devices; - template - __device__ void warp_reduce_sum(volatile T *sdata, unsigned int tid) + + template + __device__ void warp_reduce_sum(T (*sdata)[BatchBlockSize][DimBlockSize][ContentBlockSize], unsigned int value) { - if constexpr (blockSize >= 1024) - sdata[tid] += sdata[tid + 512]; - if constexpr (blockSize >= 512) - sdata[tid] += sdata[tid + 256]; - if constexpr (blockSize >= 256) - sdata[tid] += sdata[tid + 128]; - if constexpr (blockSize >= 128) - sdata[tid] += sdata[tid + 64]; - if constexpr (blockSize >= 64) - sdata[tid] += sdata[tid + 32]; - if constexpr (blockSize >= 32) - sdata[tid] += sdata[tid + 16]; - if constexpr (blockSize >= 16) - sdata[tid] += sdata[tid + 8]; - if constexpr (blockSize >= 8) - sdata[tid] += sdata[tid + 4]; - if constexpr (blockSize >= 4) - sdata[tid] += sdata[tid + 2]; - if constexpr (blockSize >= 2) - sdata[tid] += sdata[tid + 1]; + (*sdata)[threadIdx.x][threadIdx.z][threadIdx.y] += (*sdata)[threadIdx.x][threadIdx.z + value][threadIdx.y]; } - template - __device__ void warp_reduce_max(volatile T *sdata, volatile unsigned int *sindex, unsigned int tid) + template + __device__ void warp_reduce_functions + ( + void(*func)(T (*)[BatchBlockSize][DimBlockSize][ContentBlockSize], unsigned int), + T (*sdata)[BatchBlockSize][DimBlockSize][ContentBlockSize], + Args ... args + ) { - if constexpr (blockSize >= 1024) - if (sdata[tid] < sdata[tid + 512]) - { - sdata[tid] = sdata[tid + 512]; - sindex[tid] = sindex[tid + 512]; - } - if constexpr (blockSize >= 512) - if (sdata[tid] < sdata[tid + 256]) - { - sdata[tid] = sdata[tid + 256]; - sindex[tid] = sindex[tid + 256]; - } - if constexpr (blockSize >= 256) - if (sdata[tid] < sdata[tid + 128]) - { - sdata[tid] = sdata[tid + 128]; - sindex[tid] = sindex[tid + 128]; - } - if constexpr (blockSize >= 128) - if (sdata[tid] < sdata[tid + 64]) - { - sdata[tid] = sdata[tid + 64]; - sindex[tid] = sindex[tid + 64]; - } - if constexpr (blockSize >= 64) - if (sdata[tid] < sdata[tid + 32]) - { - sdata[tid] = sdata[tid + 32]; - sindex[tid] = sindex[tid + 32]; - } - if constexpr (blockSize >= 32) - if (sdata[tid] < sdata[tid + 16]) - { - sdata[tid] = sdata[tid + 16]; - sindex[tid] = sindex[tid + 16]; - } - if constexpr (blockSize >= 16) - if (sdata[tid] < sdata[tid + 8]) - { - sdata[tid] = sdata[tid + 8]; - sindex[tid] = sindex[tid + 8]; - } - if constexpr (blockSize >= 8) - if (sdata[tid] < sdata[tid + 4]) - { - sdata[tid] = sdata[tid + 4]; - sindex[tid] = sindex[tid + 4]; - } - if constexpr (blockSize >= 4) - if (sdata[tid] < sdata[tid + 2]) - { - sdata[tid] = sdata[tid + 2]; - sindex[tid] = sindex[tid + 2]; - } - if constexpr (blockSize >= 2) - if (sdata[tid] < sdata[tid + 1]) - { - sdata[tid] = sdata[tid + 1]; - sindex[tid] = sindex[tid + 1]; - } + if constexpr (DimBlockSize >= 1024) if (threadIdx.z < 512) func(sdata, 512, args...); + if constexpr (DimBlockSize >= 512) if (threadIdx.z < 256) func(sdata, 256, args...); + if constexpr (DimBlockSize >= 256) if (threadIdx.z < 128) func(sdata, 128, args...); + if constexpr (DimBlockSize >= 128) if (threadIdx.z < 64) func(sdata, 64, args...); + if constexpr (DimBlockSize >= 64) if (threadIdx.z < 32) func(sdata, 32, args...); + if constexpr (DimBlockSize >= 32) if (threadIdx.z < 16) func(sdata, 16, args...); + if constexpr (DimBlockSize >= 16) if (threadIdx.z < 8) func(sdata, 8, args...); + if constexpr (DimBlockSize >= 8) if (threadIdx.z < 4) func(sdata, 4, args...); + if constexpr (DimBlockSize >= 4) if (threadIdx.z < 2) func(sdata, 2, args...); + if constexpr (DimBlockSize >= 2) if (threadIdx.z < 1) func(sdata, 1, args...); } - template - __device__ void warp_reduce_min(volatile T *sdata, volatile unsigned int *sindex, unsigned int tid) + template + __device__ void warp_reduce_max(T (*sdata)[BatchBlockSize][DimBlockSize][ContentBlockSize], unsigned int value, unsigned int (*sindex)[BatchBlockSize][DimBlockSize][ContentBlockSize]) { - if constexpr (blockSize >= 1024) - if (sdata[tid] > sdata[tid + 512]) - { - sdata[tid] = sdata[tid + 512]; - sindex[tid] = sindex[tid + 512]; - } - if constexpr (blockSize >= 512) - if (sdata[tid] > sdata[tid + 256]) - { - sdata[tid] = sdata[tid + 256]; - sindex[tid] = sindex[tid + 256]; - } - if constexpr (blockSize >= 256) - if (sdata[tid] > sdata[tid + 128]) - { - sdata[tid] = sdata[tid + 128]; - sindex[tid] = sindex[tid + 128]; - } - if constexpr (blockSize >= 128) - if (sdata[tid] > sdata[tid + 64]) - { - sdata[tid] = sdata[tid + 64]; - sindex[tid] = sindex[tid + 64]; - } - if constexpr (blockSize >= 64) - if (sdata[tid] > sdata[tid + 32]) - { - sdata[tid] = sdata[tid + 32]; - sindex[tid] = sindex[tid + 32]; - } - if constexpr (blockSize >= 32) - if (sdata[tid] > sdata[tid + 16]) - { - sdata[tid] = sdata[tid + 16]; - sindex[tid] = sindex[tid + 16]; - } - if constexpr (blockSize >= 16) - if (sdata[tid] > sdata[tid + 8]) - { - sdata[tid] = sdata[tid + 8]; - sindex[tid] = sindex[tid + 8]; - } - if constexpr (blockSize >= 8) - if (sdata[tid] > sdata[tid + 4]) - { - sdata[tid] = sdata[tid + 4]; - sindex[tid] = sindex[tid + 4]; - } - if constexpr (blockSize >= 4) - if (sdata[tid] > sdata[tid + 2]) - { - sdata[tid] = sdata[tid + 2]; - sindex[tid] = sindex[tid + 2]; - } - if constexpr (blockSize >= 2) - if (sdata[tid] > sdata[tid + 1]) - { - sdata[tid] = sdata[tid + 1]; - sindex[tid] = sindex[tid + 1]; - } + if (sdata[threadIdx.x][threadIdx.z][threadIdx.y] < sdata[threadIdx.x][threadIdx.z + value][threadIdx.y]) + { + sdata[threadIdx.x][threadIdx.z + value][threadIdx.y] = sdata[threadIdx.x][threadIdx.z + value][threadIdx.y]; + sindex[threadIdx.x][threadIdx.z + value][threadIdx.y] = sindex[threadIdx.x][threadIdx.z + value][threadIdx.y]; + } } - template - __global__ void array_reduce_sum(T *g_odata, const T *g_idata, unsigned int n) + template + __device__ void warp_reduce_min(T (*sdata)[BatchBlockSize][DimBlockSize][ContentBlockSize], unsigned int value, unsigned int (*sindex)[BatchBlockSize][DimBlockSize][ContentBlockSize]) { - __shared__ T sdata[blockSize]; - unsigned int tid = threadIdx.x; - unsigned int gridSize = blockDim.x * gridDim.x; - sdata[tid] = 0; - if (blockIdx.x * blockDim.x + tid < n) - sdata[tid] += g_idata[blockIdx.x * blockDim.x + tid]; + if (sdata[threadIdx.x][threadIdx.z][threadIdx.y] > sdata[threadIdx.x][threadIdx.z + value][threadIdx.y]) + { + sdata[threadIdx.x][threadIdx.z][threadIdx.y] = sdata[threadIdx.x][threadIdx.z + value][threadIdx.y]; + sindex[threadIdx.x][threadIdx.z][threadIdx.y] = sindex[threadIdx.x][threadIdx.z + value][threadIdx.y]; + } + } + + template + __global__ void array_reduce_sum(T *g_odata, const T *g_idata, unsigned int batch_size, unsigned int n, unsigned int content_size) + { + __shared__ T sdata[BatchBlockSize][BlockSize][ContentBlockSize]; + unsigned int batch_id = blockIdx.x * blockDim.x + threadIdx.x; + unsigned int content_id = blockIdx.y * blockDim.y + threadIdx.y; + unsigned int tid = threadIdx.z; + unsigned int gridSize = blockDim.z * gridDim.z; + sdata[threadIdx.x][threadIdx.z][threadIdx.y] = 0; + if (batch_id < batch_size && blockIdx.z * blockDim.z + tid < n && content_id < content_size) + sdata[threadIdx.x][threadIdx.z][threadIdx.y] += + g_idata + [ + batch_id * n * content_size + + tid * content_size + + content_id + ]; __syncthreads(); if (tid < 512) - warp_reduce_sum(sdata, tid); - if (tid == 0) g_odata[blockIdx.x] = sdata[0]; + warp_reduce_functions(&warp_reduce_sum, &sdata); + if (tid == 0) + g_odata[ + batch_id * blockDim.z * content_size + + tid * content_size + + content_id + ] = sdata[threadIdx.x][threadIdx.z][threadIdx.y]; } - template - __global__ void array_reduce_max(T *g_odata, unsigned int *g_oindex, const T *g_idata, unsigned int n) + template + __global__ void array_reduce_max(T *g_odata, unsigned int *g_oindex, const T *g_idata, unsigned int batch_size, unsigned int n, unsigned int content_size) { - __shared__ T sdata[blockSize]; - __shared__ unsigned int sindex[blockSize]; - unsigned int tid = threadIdx.x; - unsigned int gridSize = blockDim.x * gridDim.x; - sdata[tid] = std::numeric_limits::min(); - if (blockIdx.x * blockDim.x + tid < n) - sdata[tid] = g_idata[blockIdx.x * blockDim.x + tid]; + __shared__ T sdata[BatchBlockSize][BlockSize][ContentBlockSize]; + __shared__ unsigned int sindex[BatchBlockSize][BlockSize][ContentBlockSize]; + unsigned int batch_id = blockIdx.x * blockDim.x + threadIdx.x; + unsigned int content_id = blockIdx.y * blockDim.y + threadIdx.y; + unsigned int tid = threadIdx.z; + unsigned int gridSize = blockDim.z * gridDim.z; + sdata[threadIdx.x][threadIdx.z][threadIdx.y] = -std::numeric_limits::infinity(); + sindex[threadIdx.x][threadIdx.z][threadIdx.y] = threadIdx.z; + if (batch_id < batch_size && blockIdx.z * blockDim.z + tid < n && content_id < content_size) + sdata[threadIdx.x][threadIdx.z][threadIdx.y] = + g_idata + [ + batch_id * n * content_size + + tid * content_size + + content_id + ]; __syncthreads(); if (tid < 512) - warp_reduce_max(sdata, sindex, tid); + warp_reduce_functions(&warp_reduce_max, &sdata, &sindex); if (tid == 0) { - g_odata[blockIdx.x] = sdata[0]; - g_oindex[blockIdx.x] = sindex[0]; + g_odata[ + batch_id * blockDim.z * content_size + + blockIdx.z * content_size + + content_id + ] = sdata[threadIdx.x][threadIdx.z][threadIdx.y]; + g_oindex[ + batch_id * blockDim.z * content_size + + blockIdx.z * content_size + + content_id + ] = sindex[threadIdx.x][threadIdx.z][threadIdx.y]; } } - template - __global__ void array_reduce_min(T *g_odata, unsigned int *g_oindex, const T *g_idata, unsigned int n) + template + __global__ void array_reduce_min(T *g_odata, unsigned int *g_oindex, const T *g_idata, unsigned int batch_size, unsigned int n, unsigned int content_size) { - __shared__ T sdata[blockSize]; - __shared__ unsigned int sindex[blockSize]; - unsigned int tid = threadIdx.x; - unsigned int gridSize = blockDim.x * gridDim.x; - sdata[tid] = std::numeric_limits::max(); - if (blockIdx.x * blockDim.x + tid < n) - sdata[tid] = g_idata[blockIdx.x * blockDim.x + tid]; + __shared__ T sdata[BatchBlockSize][BlockSize][ContentBlockSize]; + __shared__ unsigned int sindex[BatchBlockSize][BlockSize][ContentBlockSize]; + unsigned int batch_id = blockIdx.x * blockDim.x + threadIdx.x; + unsigned int content_id = blockIdx.y * blockDim.y + threadIdx.y; + unsigned int tid = threadIdx.z; + unsigned int gridSize = blockDim.z * gridDim.z; + sdata[threadIdx.x][threadIdx.z][threadIdx.y] = std::numeric_limits::infinity(); + sindex[threadIdx.x][threadIdx.z][threadIdx.y] = threadIdx.z; + if (batch_id < batch_size && blockIdx.z * blockDim.z + tid < n && content_id < content_size) + sdata[threadIdx.x][threadIdx.z][threadIdx.y] = + g_idata + [ + batch_id * n * content_size + + tid * content_size + + content_id + ]; __syncthreads(); if (tid < 512) - warp_reduce_min(sdata, sindex, tid); + warp_reduce_functions(&warp_reduce_min, &sdata, &sindex); if (tid == 0) { - g_odata[blockIdx.x] = sdata[0]; - g_oindex[blockIdx.x] = sindex[0]; + g_odata[ + batch_id * blockDim.z * content_size + + blockIdx.z * content_size + + content_id + ] = sdata[threadIdx.x][threadIdx.z][threadIdx.y]; + g_oindex[ + batch_id * blockDim.z * content_size + + blockIdx.z * content_size + + content_id + ] = sindex[threadIdx.x][threadIdx.z][threadIdx.y]; } } bool equal_dim_size(const TensorBase&, const TensorBase&); - Tensor multiply(const Tensor&, const Tensor&, bool, const DataBuffer&); + Tensor derive_reduce_sum(const Tensor& a, const Tensor& b, bool is_derive, const DataBuffer& databuf) + { + return multiply(a, b, is_derive, databuf); + } - Tensor reduce_sum(const Tensor& a) + Tensor Tensor::reduce_sum(unsigned char dim) const { + std::vector shape_c = this->get_buffer().shape(); + assert(dim < shape_c.size()); std::vector> temp; - temp.push_back(std::make_pair(a, Derivation(values(a.get_buffer().shape(), 1).tensor_cast(a.get_buffer().type(), false), multiply))); + temp.push_back(std::make_pair(*this, Derivation(values(shape_c, 1).tensor_cast(this->get_buffer().type(), false), derive_reduce_sum))); cudaError_t cuda_status; TensorBase other_buf; + void* c_ptr; devices::Device this_cuda{ devices::CUDA }; cuda_status = cudaGetDevice(&this_cuda.index); cudaDeviceProp cu_dev_prop; cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); - const TensorBase& base_a = a.get_buffer(); + const TensorBase& base_a = this->get_buffer(); cuda_status = cudaMalloc(&c_ptr, base_a.data_size()); device_memcpy(&c_ptr, this_cuda, base_a.data(), base_a.get_device(), base_a.data_size()); - std::vector shape_c = a.get_buffer().shape(); - std::size_t c_size = a.get_buffer().data_size() / get_sizeof_type(base_a.type()); - constexpr unsigned int thread_value = 1024U; - dim3 block_dim(shape_c[shape_c.size() - 1]); - dim3 grid_dim(c_size / block_dim.x); + + unsigned int dim_x = 1; + for (unsigned char i = 0; i < dim; i++) + dim_x *= shape_c[i]; + + unsigned int dim_y = 1; + for (unsigned char i = dim+1; i < shape_c.size(); i++) + dim_y *= shape_c[i]; + + constexpr unsigned int thread_value_x = 8U; + constexpr unsigned int thread_value_y = 16U; + constexpr unsigned int thread_value_z = 8U; + dim3 block_dim(thread_value_x, thread_value_y, thread_value_z); + dim3 grid_dim + ( + dim_x / block_dim.x + (dim_x % block_dim.x ? 1U : 0U), + dim_y / block_dim.y + (dim_y % block_dim.y ? 1U : 0U), + shape_c[dim] / block_dim.z + (shape_c[dim] % block_dim.z ? 1U : 0U) + ); #define ADD_CODE(TYPE) \ -if(a.get_buffer().type() == typeid(TYPE)) \ +if(base_a.type() == typeid(TYPE)) \ { \ -array_reduce_sum<<>>(static_cast(c_ptr), static_cast(c_ptr), c_size); \ +while (shape_c[dim] > 1) \ +{ \ +array_reduce_sum<<>>(static_cast(c_ptr), static_cast(c_ptr), dim_x, shape_c[dim], dim_y); \ cuda_status = cudaDeviceSynchronize(); \ -other_buf = TensorBase(typeid(TYPE), a.get_buffer().shape(), c_ptr, this_cuda); \ +shape_c[dim] = grid_dim.z; \ +grid_dim.z = grid_dim.z / block_dim.z + (grid_dim.z % block_dim.z ? 1U : 0U); \ +} \ +other_buf = TensorBase(typeid(TYPE), shape_c, c_ptr, this_cuda); \ } LOOP(USING_DATA_TYPE); #undef ADD_CODE @@ -296,10 +260,10 @@ other_buf = TensorBase(typeid(TYPE), a.get_buffer().shape(), c_ptr, this_cuda); return Tensor(std::move(other_buf)); } - Tensor reduce_max(const Tensor& a) + Tensor Tensor::reduce_max(unsigned char dim) const { - std::vector> temp; - temp.push_back(std::make_pair(a, Derivation(values(a.get_buffer().shape(), 1).tensor_cast(a.get_buffer().type(), false), multiply))); + std::vector shape_c = this->get_buffer().shape(); + assert(dim < shape_c.size()); cudaError_t cuda_status; TensorBase other_buf; void* c_ptr; @@ -307,31 +271,52 @@ other_buf = TensorBase(typeid(TYPE), a.get_buffer().shape(), c_ptr, this_cuda); cuda_status = cudaGetDevice(&this_cuda.index); cudaDeviceProp cu_dev_prop; cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); - const TensorBase& base_a = a.get_buffer(); + const TensorBase& base_a = this->get_buffer(); cuda_status = cudaMalloc(&c_ptr, base_a.data_size()); device_memcpy(&c_ptr, this_cuda, base_a.data(), base_a.get_device(), base_a.data_size()); - std::vector shape_c = a.get_buffer().shape(); - std::size_t c_size = a.get_buffer().data_size() / get_sizeof_type(base_a.type()); - constexpr unsigned int thread_value = 1024U; - dim3 block_dim(shape_c[shape_c.size() - 1]); - dim3 grid_dim(c_size / block_dim.x); + + unsigned int dim_x = 1; + for (unsigned char i = 0; i < dim; i++) + dim_x *= shape_c[i]; + + unsigned int dim_y = 1; + for (unsigned char i = dim+1; i < shape_c.size(); i++) + dim_y *= shape_c[i]; + + constexpr unsigned int thread_value_x = 8U; + constexpr unsigned int thread_value_y = 16U; + constexpr unsigned int thread_value_z = 8U; + dim3 block_dim(thread_value_x, thread_value_y, thread_value_z); + dim3 grid_dim + ( + dim_x / block_dim.x + (dim_x % block_dim.x ? 1U : 0U), + dim_y / block_dim.y + (dim_y % block_dim.y ? 1U : 0U), + shape_c[dim] / block_dim.z + (shape_c[dim] % block_dim.z ? 1U : 0U) + ); #define ADD_CODE(TYPE) \ -if(a.get_buffer().type() == typeid(TYPE)) \ +if(base_a.type() == typeid(TYPE)) \ +{ \ +while (shape_c[dim] > 1) \ { \ -array_reduce_sum<<>>(static_cast(c_ptr), static_cast(c_ptr), c_size); \ +array_reduce_sum<<>>(static_cast(c_ptr), static_cast(c_ptr), dim_x, shape_c[dim], dim_y); \ cuda_status = cudaDeviceSynchronize(); \ -other_buf = TensorBase(typeid(TYPE), a.get_buffer().shape(), c_ptr, this_cuda); \ +shape_c[dim] = grid_dim.z; \ +grid_dim.z = grid_dim.z / block_dim.z + (grid_dim.z % block_dim.z ? 1U : 0U); \ +} \ +other_buf = TensorBase(typeid(TYPE), base_a.shape(), c_ptr, this_cuda); \ } LOOP(USING_DATA_TYPE); #undef ADD_CODE + std::vector> temp; + temp.push_back(std::make_pair(*this, Derivation(values(this->get_buffer().shape(), 1).tensor_cast(this->get_buffer().type(), false), derive_reduce_sum))); cuda_status = cudaFree(c_ptr); return Tensor(std::move(other_buf)); } - Tensor reduce_min(const Tensor& a) + Tensor Tensor::reduce_min(unsigned char dim) const { - std::vector> temp; - temp.push_back(std::make_pair(a, Derivation(values(a.get_buffer().shape(), 1).tensor_cast(a.get_buffer().type(), false), multiply))); + std::vector shape_c = this->get_buffer().shape(); + assert(dim < shape_c.size()); cudaError_t cuda_status; TensorBase other_buf; void* c_ptr; @@ -339,23 +324,43 @@ other_buf = TensorBase(typeid(TYPE), a.get_buffer().shape(), c_ptr, this_cuda); cuda_status = cudaGetDevice(&this_cuda.index); cudaDeviceProp cu_dev_prop; cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); - const TensorBase& base_a = a.get_buffer(); + const TensorBase& base_a = this->get_buffer(); cuda_status = cudaMalloc(&c_ptr, base_a.data_size()); device_memcpy(&c_ptr, this_cuda, base_a.data(), base_a.get_device(), base_a.data_size()); - std::vector shape_c = a.get_buffer().shape(); - std::size_t c_size = a.get_buffer().data_size() / get_sizeof_type(base_a.type()); - constexpr unsigned int thread_value = 1024U; - dim3 block_dim(shape_c[shape_c.size() - 1]); - dim3 grid_dim(c_size / block_dim.x); + unsigned int dim_x = 1; + for (unsigned char i = 0; i < dim; i++) + dim_x *= shape_c[i]; + + unsigned int dim_y = 1; + for (unsigned char i = dim+1; i < shape_c.size(); i++) + dim_y *= shape_c[i]; + + constexpr unsigned int thread_value_x = 8U; + constexpr unsigned int thread_value_y = 16U; + constexpr unsigned int thread_value_z = 8U; + dim3 block_dim(thread_value_x, thread_value_y, thread_value_z); + dim3 grid_dim + ( + dim_x / block_dim.x + (dim_x % block_dim.x ? 1U : 0U), + dim_y / block_dim.y + (dim_y % block_dim.y ? 1U : 0U), + shape_c[dim] / block_dim.z + (shape_c[dim] % block_dim.z ? 1U : 0U) + ); #define ADD_CODE(TYPE) \ -if(a.get_buffer().type() == typeid(TYPE)) \ +if(base_a.type() == typeid(TYPE)) \ +{ \ +while (shape_c[dim] > 1) \ { \ -array_reduce_sum<<>>(static_cast(c_ptr), static_cast(c_ptr), c_size); \ +array_reduce_sum<<>>(static_cast(c_ptr), static_cast(c_ptr), dim_x, shape_c[dim], dim_y); \ cuda_status = cudaDeviceSynchronize(); \ -other_buf = TensorBase(typeid(TYPE), a.get_buffer().shape(), c_ptr, this_cuda); \ +shape_c[dim] = grid_dim.z; \ +grid_dim.z = grid_dim.z / block_dim.z + (grid_dim.z % block_dim.z ? 1U : 0U); \ +} \ +other_buf = TensorBase(typeid(TYPE), base_a.shape(), c_ptr, this_cuda); \ } LOOP(USING_DATA_TYPE); #undef ADD_CODE + std::vector> temp; + temp.push_back(std::make_pair(*this, Derivation(values(this->get_buffer().shape(), 1).tensor_cast(this->get_buffer().type(), false), derive_reduce_sum))); cuda_status = cudaFree(c_ptr); return Tensor(std::move(other_buf)); } From 7214394d4f89d8abf1f14a0b026db4056d7138bd Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 23 Jan 2024 05:02:13 +0000 Subject: [PATCH 013/281] _ --- .../core/{data_type_wrapper.cu => data_type_wrapper.cc} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/tensor_array/core/{data_type_wrapper.cu => data_type_wrapper.cc} (100%) diff --git a/src/tensor_array/core/data_type_wrapper.cu b/src/tensor_array/core/data_type_wrapper.cc similarity index 100% rename from src/tensor_array/core/data_type_wrapper.cu rename to src/tensor_array/core/data_type_wrapper.cc From 12f72fa3908d071003ed3a24e453236acd979147 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 26 Jan 2024 06:16:33 +0000 Subject: [PATCH 014/281] changed matmul --- README.md | 3 +++ src/tensor_array/core/tensor_blas.cc | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c845edb..a2c4f17 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,9 @@ # Tensor-Array ![C++](https://img.shields.io/badge/C%2B%2B-17-blue) +[![Docker Image Size with architecture (latest by date/latest semver)](https://img.shields.io/docker/image-size/noobwastaken/tensor-array) +](https://hub.docker.com/repository/docker/noobwastaken/tensor-array/general) + A C++ Tensor library that can be used to work with machine learning or deep learning project. diff --git a/src/tensor_array/core/tensor_blas.cc b/src/tensor_array/core/tensor_blas.cc index a260ccb..1bb16e4 100644 --- a/src/tensor_array/core/tensor_blas.cc +++ b/src/tensor_array/core/tensor_blas.cc @@ -207,7 +207,10 @@ namespace tensor_array c_ptr, convert_cuda_type(c_type), shape_a.end()[-2], 1, batch_size, convert_cuda_type(c_type), CUBLAS_GEMM_DEFAULT); blasStat = cublasDestroy(blasHandle); - TensorBase value_buf(c_type, { batch_size, shape_a.end()[-2] , shape_b.end()[-1] }, c_ptr, this_cuda); + std::vector out_dims = shape_a; + out_dims[out_dims.size() - 1] = shape_b.end()[-1]; + + TensorBase value_buf(c_type, out_dims, c_ptr, this_cuda); cudaStat = cudaFree(c_ptr); return Tensor(std::move(value_buf), std::move(temp)); } From ea239d0d4c340b3d248bbd256a1ee4be5d8bbbd0 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 17 Jun 2025 13:14:26 +0700 Subject: [PATCH 015/281] Create SECURITY.md security? --- SECURITY.md | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..c26c3f5 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,10 @@ +# Security Policy + +## Supported Versions + +| Version | Supported | +| ------- | ------------------ | +| 2024 | :white_check_mark: | +| 2023 | :x: | + +## Reporting a Vulnerability From b3f462b198de6208a54f51079df28f42cbcf65b1 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 17 Jun 2025 13:15:41 +0700 Subject: [PATCH 016/281] Create dependabot.yml --- .github/dependabot.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..5990d9c --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: "" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "weekly" From 9b2837eaf01b73c8560eff49de2536d78b5375b4 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 17 Jun 2025 13:18:50 +0700 Subject: [PATCH 017/281] Delete .github/dependabot.yml --- .github/dependabot.yml | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index 5990d9c..0000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,11 +0,0 @@ -# To get started with Dependabot version updates, you'll need to specify which -# package ecosystems to update and where the package manifests are located. -# Please see the documentation for all configuration options: -# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file - -version: 2 -updates: - - package-ecosystem: "" # See documentation for possible values - directory: "/" # Location of package manifests - schedule: - interval: "weekly" From 42db8ba190798335fea4262009575352809cbadf Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 17 Jun 2025 13:22:18 +0700 Subject: [PATCH 018/281] Update cmake-single-platform.yml --- .github/workflows/cmake-single-platform.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake-single-platform.yml b/.github/workflows/cmake-single-platform.yml index 05991d2..608b760 100644 --- a/.github/workflows/cmake-single-platform.yml +++ b/.github/workflows/cmake-single-platform.yml @@ -21,7 +21,7 @@ jobs: strategy: fail-fast: false matrix: - cuda-version: [ "12.2", "12.3" ] + cuda-version: [ "12.9" ] steps: - uses: actions/checkout@v3 From e01961f65eff4dd08446d0f6df40e2c86d0371b2 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 17 Jun 2025 13:33:31 +0700 Subject: [PATCH 019/281] Update devices.cc --- src/tensor_array/core/devices.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tensor_array/core/devices.cc b/src/tensor_array/core/devices.cc index d5d38d2..825e602 100644 --- a/src/tensor_array/core/devices.cc +++ b/src/tensor_array/core/devices.cc @@ -54,7 +54,7 @@ namespace tensor_array } else { - void* temp_data = std::malloc(count); + void* temp_data = malloc(count); device_memcpy(temp_data, DEVICE_CPU_0, src, src_dev, count); device_memcpy(dst, dst_dev, temp_data, DEVICE_CPU_0, count); std::free(temp_data); @@ -84,7 +84,7 @@ namespace tensor_array } else { - void* temp_data = std::malloc(count); + void* temp_data = malloc(count); device_memcpy(temp_data, DEVICE_CPU_0, src, src_dev, count, stream); device_memcpy(dst, dst_dev, temp_data, DEVICE_CPU_0, count, stream); std::free(temp_data); @@ -149,7 +149,7 @@ void* operator new(size_t count, tensor_array::devices::Device dev) switch (dev.dev_t) { case tensor_array::devices::CPU: - m_alloc_dat = std::malloc(count); + m_alloc_dat = malloc(count); break; case tensor_array::devices::CUDA: { @@ -173,7 +173,7 @@ void* operator new(size_t count, tensor_array::devices::Device dev, void* stream switch (dev.dev_t) { case tensor_array::devices::CPU: - m_alloc_dat = std::malloc(count); + m_alloc_dat = malloc(count); break; case tensor_array::devices::CUDA: { From da92684ae8305d8ed22ba5860bca58b2cf410aa2 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 17 Jun 2025 13:35:13 +0700 Subject: [PATCH 020/281] Update codeql.yml --- .github/workflows/codeql.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index d374858..47fd3b7 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -38,7 +38,7 @@ jobs: fail-fast: false matrix: language: [ 'c-cpp' ] - cuda-version: [ "12.2", "12.3" ] + cuda-version: [ "12.9" ] # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ] # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both From 87e766dfb9c625a0569e47aa293ad42ca5a335a3 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 17 Jun 2025 13:40:27 +0700 Subject: [PATCH 021/281] Update devices.cc --- src/tensor_array/core/devices.cc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/tensor_array/core/devices.cc b/src/tensor_array/core/devices.cc index 825e602..6be0bef 100644 --- a/src/tensor_array/core/devices.cc +++ b/src/tensor_array/core/devices.cc @@ -16,6 +16,7 @@ limitations under the License. #include #include "devices.hh" +#include #include #include #include @@ -54,7 +55,7 @@ namespace tensor_array } else { - void* temp_data = malloc(count); + void* temp_data = std::malloc(count); device_memcpy(temp_data, DEVICE_CPU_0, src, src_dev, count); device_memcpy(dst, dst_dev, temp_data, DEVICE_CPU_0, count); std::free(temp_data); @@ -84,7 +85,7 @@ namespace tensor_array } else { - void* temp_data = malloc(count); + void* temp_data = std::malloc(count); device_memcpy(temp_data, DEVICE_CPU_0, src, src_dev, count, stream); device_memcpy(dst, dst_dev, temp_data, DEVICE_CPU_0, count, stream); std::free(temp_data); @@ -149,13 +150,13 @@ void* operator new(size_t count, tensor_array::devices::Device dev) switch (dev.dev_t) { case tensor_array::devices::CPU: - m_alloc_dat = malloc(count); + m_alloc_dat = std::malloc(count); break; case tensor_array::devices::CUDA: { cudaError_t cuda_status = cudaGetDevice(&temp); cuda_status = cudaSetDevice(dev.index); - cuda_status = cudaMalloc(&m_alloc_dat, count); + cuda_status = cudamalloc(&m_alloc_dat, count); cuda_status = cudaSetDevice(temp); } break; @@ -173,13 +174,13 @@ void* operator new(size_t count, tensor_array::devices::Device dev, void* stream switch (dev.dev_t) { case tensor_array::devices::CPU: - m_alloc_dat = malloc(count); + m_alloc_dat = std::malloc(count); break; case tensor_array::devices::CUDA: { cudaError_t cuda_status = cudaGetDevice(&temp); cuda_status = cudaSetDevice(dev.index); - cuda_status = cudaMallocAsync(&m_alloc_dat, count, static_cast(stream)); + cuda_status = cudamallocAsync(&m_alloc_dat, count, static_cast(stream)); cuda_status = cudaSetDevice(temp); } break; From b2701fd8985860ee28d936921cb661227f5e0310 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 17 Jun 2025 13:44:02 +0700 Subject: [PATCH 022/281] Update devices.cc --- src/tensor_array/core/devices.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tensor_array/core/devices.cc b/src/tensor_array/core/devices.cc index 6be0bef..4786101 100644 --- a/src/tensor_array/core/devices.cc +++ b/src/tensor_array/core/devices.cc @@ -156,7 +156,7 @@ void* operator new(size_t count, tensor_array::devices::Device dev) { cudaError_t cuda_status = cudaGetDevice(&temp); cuda_status = cudaSetDevice(dev.index); - cuda_status = cudamalloc(&m_alloc_dat, count); + cuda_status = cudaMalloc(&m_alloc_dat, count); cuda_status = cudaSetDevice(temp); } break; @@ -180,7 +180,7 @@ void* operator new(size_t count, tensor_array::devices::Device dev, void* stream { cudaError_t cuda_status = cudaGetDevice(&temp); cuda_status = cudaSetDevice(dev.index); - cuda_status = cudamallocAsync(&m_alloc_dat, count, static_cast(stream)); + cuda_status = cudaMallocAsync(&m_alloc_dat, count, static_cast(stream)); cuda_status = cudaSetDevice(temp); } break; From 8aeef616b9a912aba0678da44a8dd0b949778318 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 17 Jun 2025 13:47:06 +0700 Subject: [PATCH 023/281] Update devices.cc --- src/tensor_array/core/devices.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tensor_array/core/devices.cc b/src/tensor_array/core/devices.cc index 4786101..fce05f8 100644 --- a/src/tensor_array/core/devices.cc +++ b/src/tensor_array/core/devices.cc @@ -16,6 +16,7 @@ limitations under the License. #include #include "devices.hh" +#include #include #include #include From 45ac92f9339c89e2368bbfc6ae104d5ead72baf0 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 17 Jun 2025 13:56:52 +0700 Subject: [PATCH 024/281] Update tensor.cu --- src/tensor_array/core/tensor.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tensor_array/core/tensor.cu b/src/tensor_array/core/tensor.cu index 9db96a8..5587200 100644 --- a/src/tensor_array/core/tensor.cu +++ b/src/tensor_array/core/tensor.cu @@ -323,7 +323,7 @@ namespace tensor_array cudaStat = cudaGetLastError(); if (cudaStat != cudaSuccess) { - printf("CUDA error: %s\n", cudaGetErrorString(cudaStat)); + std::printf("CUDA error: %s\n", cudaGetErrorString(cudaStat)); } TensorBase value_buf(a.get_buffer().type(), { shape_a.begin()[0], shape_a.begin()[2], shape_a.begin()[1], shape_a.end()[-1]}, c_ptr, this_cuda); cudaStat = cudaFree(c_ptr); @@ -381,7 +381,7 @@ return values0(list_dim, value); \ cudaStatus = cudaGetLastError(); if (cudaStatus != cudaSuccess) { - printf("CUDA error: %s\n", cudaGetErrorString(cudaStatus)); + std::printf("CUDA error: %s\n", cudaGetErrorString(cudaStatus)); } TensorBase other_buf(typeid(float), list_dim, dev_ptr, this_cuda); cudaStatus = cudaFree(dev_ptr); @@ -416,7 +416,7 @@ arr_more_than<<>>(c_ptr, static_cast(base_a.da cuda_status = cudaGetLastError(); if (cuda_status != cudaSuccess) { - printf("CUDA error: %s\n", cudaGetErrorString(cuda_status)); + std::printf("CUDA error: %s\n", cudaGetErrorString(cuda_status)); } TensorBase other_buf(typeid(bool), a.get_buffer().shape(), c_ptr, this_cuda); cuda_status = cudaFree(c_ptr); From 9afdd6779664b81f3f4f7b15bc62c6c7ff1adaea Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 17 Jun 2025 14:03:12 +0700 Subject: [PATCH 025/281] Update tensor_cast.cu --- src/tensor_array/core/tensor_cast.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tensor_array/core/tensor_cast.cu b/src/tensor_array/core/tensor_cast.cu index cbf7c57..da01ab5 100644 --- a/src/tensor_array/core/tensor_cast.cu +++ b/src/tensor_array/core/tensor_cast.cu @@ -19,6 +19,7 @@ limitations under the License. #include #include #include +#include #ifndef TENSOR_CONTENT #define TENSOR_CONTENT #include "tensor.hh" @@ -93,7 +94,7 @@ type_casting<<>>(out_ptr, static_cast(base_of_ cuda_status = cudaGetLastError(); if (cuda_status != cudaSuccess) { - printf("CUDA error: %s\n", cudaGetErrorString(cuda_status)); + std::printf("CUDA error: %s\n", cudaGetErrorString(cuda_status)); } std::type_index test = typeid(T); if (dynamic_type_size.find(test) == dynamic_type_size.end()) From 323740c722cf84bf41082f731cc2803c006e0038 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 17 Jun 2025 14:04:45 +0700 Subject: [PATCH 026/281] Update tensor_convolution.cu --- src/tensor_array/core/tensor_convolution.cu | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/tensor_array/core/tensor_convolution.cu b/src/tensor_array/core/tensor_convolution.cu index ae2489f..38e1489 100644 --- a/src/tensor_array/core/tensor_convolution.cu +++ b/src/tensor_array/core/tensor_convolution.cu @@ -17,6 +17,7 @@ limitations under the License. #include #include #include +#include #include #include #include @@ -178,7 +179,7 @@ kernel_derive_conv_padding<<>>(static_cast(out_ptr), cuda_status = cudaGetLastError(); if (cuda_status != cudaSuccess) { - printf("CUDA error: %s\n", cudaGetErrorString(cuda_status)); + std::printf("CUDA error: %s\n", cudaGetErrorString(cuda_status)); } TensorBase value_buf(a.get_buffer().type(), new_shape, out_ptr, this_cuda); cuda_status = cudaFree(out_ptr); @@ -262,7 +263,7 @@ kernel_conv_padding<<>>(static_cast(out_ptr), static cuda_status = cudaGetLastError(); if (cuda_status != cudaSuccess) { - printf("CUDA error: %s\n", cudaGetErrorString(cuda_status)); + std::printf("CUDA error: %s\n", cudaGetErrorString(cuda_status)); } TensorBase value_buf(a.get_buffer().type(), new_shape, out_ptr, this_cuda); cuda_status = cudaFree(out_ptr); @@ -390,7 +391,7 @@ kernel_col2im<<>>(static_cast(out_ptr), static_cast< cuda_status = cudaGetLastError(); if (cuda_status != cudaSuccess) { - printf("CUDA error: %s\n", cudaGetErrorString(cuda_status)); + std::printf("CUDA error: %s\n", cudaGetErrorString(cuda_status)); } TensorBase value_buf(a.get_buffer().type(), new_shape, out_ptr, this_cuda); cuda_status = cudaFree(out_ptr); @@ -514,7 +515,7 @@ kernel_im2col<<>>(static_cast(out_ptr), static_cast< cuda_status = cudaGetLastError(); if (cuda_status != cudaSuccess) { - printf("CUDA error: %s\n", cudaGetErrorString(cuda_status)); + std::printf("CUDA error: %s\n", cudaGetErrorString(cuda_status)); } TensorBase value_buf(a.get_buffer().type(), new_shape, out_ptr, this_cuda); cuda_status = cudaFree(out_ptr); From 614320a8d253f1af0ca49e5787a1c8c64ba7984a Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 17 Jun 2025 14:15:20 +0700 Subject: [PATCH 027/281] Update Dockerfile --- .devcontainer/CUDA/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devcontainer/CUDA/Dockerfile b/.devcontainer/CUDA/Dockerfile index decec8d..c9fb107 100644 --- a/.devcontainer/CUDA/Dockerfile +++ b/.devcontainer/CUDA/Dockerfile @@ -1,4 +1,4 @@ -FROM nvcr.io/nvidia/cuda:12.3.1-devel-ubuntu22.04 +FROM nvcr.io/nvidia/cuda RUN apt-get update && apt-get -y install cmake From 3e785ea5e60555adab8b5d6d1464c32156c357f3 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 17 Jun 2025 14:35:24 +0700 Subject: [PATCH 028/281] Update Dockerfile From 7429aa0e10f4e6a6098dd3cf686b48c61e07ede8 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 24 Jun 2025 09:51:40 +0700 Subject: [PATCH 029/281] Update Dockerfile --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 7244284..03447db 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM nvcr.io/nvidia/cuda:12.3.1-devel-ubuntu22.04 +FROM nvcr.io/nvidia/cuda RUN apt-get update && apt-get -y install cmake From a8ad0bbf3a65a0554c9f7ba5d80d7bc419636e0d Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 24 Jun 2025 10:08:57 +0700 Subject: [PATCH 030/281] cuda 12.9.1 --- .devcontainer/CUDA/Dockerfile | 2 +- Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.devcontainer/CUDA/Dockerfile b/.devcontainer/CUDA/Dockerfile index c9fb107..6d66d86 100644 --- a/.devcontainer/CUDA/Dockerfile +++ b/.devcontainer/CUDA/Dockerfile @@ -1,4 +1,4 @@ -FROM nvcr.io/nvidia/cuda +FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 RUN apt-get update && apt-get -y install cmake diff --git a/Dockerfile b/Dockerfile index 7244284..17bbcb3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM nvcr.io/nvidia/cuda:12.3.1-devel-ubuntu22.04 +FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 RUN apt-get update && apt-get -y install cmake From acb729b65d59dd01cfef7cd24f3f8f898ccf293b Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 24 Jun 2025 10:13:52 +0700 Subject: [PATCH 031/281] Update Dockerfile --- Dockerfile | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4640897..17bbcb3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,4 @@ -<<<<<<< HEAD FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 -======= -FROM nvcr.io/nvidia/cuda ->>>>>>> 7429aa0e10f4e6a6098dd3cf686b48c61e07ede8 RUN apt-get update && apt-get -y install cmake From cbdd0423c5834d0f41145f5c9821b9007a172dea Mon Sep 17 00:00:00 2001 From: Noob Date: Tue, 24 Jun 2025 05:30:10 +0000 Subject: [PATCH 032/281] Update .gitlab-ci.yml file --- .gitlab-ci.yml | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 .gitlab-ci.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..c09a24d --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,49 @@ +# This file is a template, and might need editing before it works on your project. +# This is a sample GitLab CI/CD configuration file that should run without any modifications. +# It demonstrates a basic 3 stage CI/CD pipeline. Instead of real tests or scripts, +# it uses echo commands to simulate the pipeline execution. +# +# A pipeline is composed of independent jobs that run scripts, grouped into stages. +# Stages run in sequential order, but jobs within stages run in parallel. +# +# For more information, see: https://docs.gitlab.com/ee/ci/yaml/#stages +# +# You can copy and paste this template into a new `.gitlab-ci.yml` file. +# You should not add this template to an existing `.gitlab-ci.yml` file by using the `include:` keyword. +# +# To contribute improvements to CI/CD templates, please follow the Development guide at: +# https://docs.gitlab.com/development/cicd/templates/ +# This specific template is located at: +# https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/ci/templates/Getting-Started.gitlab-ci.yml + +stages: # List of stages for jobs, and their order of execution + - build + - test + - deploy + +build-job: # This job runs in the build stage, which runs first. + stage: build + script: + - echo "Compiling the code..." + - echo "Compile complete." + +unit-test-job: # This job runs in the test stage. + stage: test # It only starts when the job in the build stage completes successfully. + script: + - echo "Running unit tests... This will take about 60 seconds." + - sleep 60 + - echo "Code coverage is 90%" + +lint-test-job: # This job also runs in the test stage. + stage: test # It can run at the same time as unit-test-job (in parallel). + script: + - echo "Linting code... This will take about 10 seconds." + - sleep 10 + - echo "No lint issues found." + +deploy-job: # This job runs in the deploy stage. + stage: deploy # It only runs when *both* jobs in the test stage complete successfully. + environment: production + script: + - echo "Deploying application..." + - echo "Application successfully deployed." From 0bfc2e67811b49ecca87b817887c5257b3517674 Mon Sep 17 00:00:00 2001 From: Noob Date: Tue, 24 Jun 2025 05:36:51 +0000 Subject: [PATCH 033/281] Update .gitlab-ci.yml file --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c09a24d..4bdb4f5 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -47,3 +47,4 @@ deploy-job: # This job runs in the deploy stage. script: - echo "Deploying application..." - echo "Application successfully deployed." + From c17df4e7a60dc9057e41f158edbd58053c02bdfb Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 24 Jun 2025 20:14:27 +0700 Subject: [PATCH 034/281] commit --- .devcontainer/CUDA/Dockerfile | 5 ++++- Dockerfile | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.devcontainer/CUDA/Dockerfile b/.devcontainer/CUDA/Dockerfile index 6d66d86..981ec63 100644 --- a/.devcontainer/CUDA/Dockerfile +++ b/.devcontainer/CUDA/Dockerfile @@ -1,6 +1,9 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 -RUN apt-get update && apt-get -y install cmake +RUN apt-get update +RUN apt-get upgrade -y +RUN apt-get -y install cmake + # [Optional] Uncomment this section to install additional vcpkg ports. # RUN su vscode -c "${VCPKG_ROOT}/vcpkg install " diff --git a/Dockerfile b/Dockerfile index 17bbcb3..f9db01c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,8 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 -RUN apt-get update && apt-get -y install cmake +RUN apt-get update +RUN apt-get upgrade -y +RUN apt-get -y install cmake # [Optional] Uncomment this section to install additional vcpkg ports. # RUN su vscode -c "${VCPKG_ROOT}/vcpkg install " From e9a5f6e2498a4d850792df9e99594e1ae3395bf5 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 24 Jun 2025 20:21:50 +0700 Subject: [PATCH 035/281] test --- .devcontainer/CUDA/Dockerfile | 1 + Dockerfile | 1 + 2 files changed, 2 insertions(+) diff --git a/.devcontainer/CUDA/Dockerfile b/.devcontainer/CUDA/Dockerfile index 981ec63..aed4468 100644 --- a/.devcontainer/CUDA/Dockerfile +++ b/.devcontainer/CUDA/Dockerfile @@ -2,6 +2,7 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 RUN apt-get update RUN apt-get upgrade -y +ENV DEBIAN_FRONTEND=noninteractive RUN apt-get -y install cmake diff --git a/Dockerfile b/Dockerfile index f9db01c..3720b4b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,6 +2,7 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 RUN apt-get update RUN apt-get upgrade -y +ENV DEBIAN_FRONTEND=noninteractive RUN apt-get -y install cmake # [Optional] Uncomment this section to install additional vcpkg ports. From f75b3952d510ee52c624cb4db01a159b4588851d Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 24 Jun 2025 20:43:23 +0700 Subject: [PATCH 036/281] test2 --- src/tensor_array/core/CMakeLists.txt | 8 +++----- src/tensor_array/layers/CMakeLists.txt | 9 ++++----- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index eee3d4b..2c6827b 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -19,15 +19,13 @@ file(GLOB TensorArray_src "*.cc" "*.cu") # file(MAKE_DIRECTORY "include/tensor_array/core") -add_library(tensorarray_core SHARED ${TensorArray_src}) -target_link_libraries(tensorarray_core PRIVATE CUDA::cublas) +add_library(TensorArray::Core SHARED ${TensorArray_src}) +target_link_libraries(TensorArray::Core PRIVATE CUDA::cublas) endif(CUDAToolkit_FOUND) -add_library(TensorArray::Core ALIAS tensorarray_core) - install( - TARGETS tensorarray_core + TARGETS TensorArray::Core EXPORT TensorArrayTargets LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT libraries diff --git a/src/tensor_array/layers/CMakeLists.txt b/src/tensor_array/layers/CMakeLists.txt index cef320d..bf4f563 100644 --- a/src/tensor_array/layers/CMakeLists.txt +++ b/src/tensor_array/layers/CMakeLists.txt @@ -10,14 +10,13 @@ install( DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor_array/layers COMPONENT headers) -add_library(tensorarray_layers SHARED ${TensorArray_src}) -add_library(TensorArray::Layers ALIAS tensorarray_layers) +add_library(TensorArray::Layers SHARED ${TensorArray_src}) -target_include_directories(tensorarray_layers PRIVATE ${PROJECT_SOURCE_DIR}/src) -target_link_libraries(tensorarray_layers TensorArray::Core) +target_include_directories(TensorArray::Layers PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_link_libraries(TensorArray::Layers TensorArray::Core) install( - TARGETS tensorarray_layers + TARGETS TensorArray::Layers EXPORT TensorArrayTargets LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT libraries From cc32a4ea2d4f99fe36097c45fbf2724991af6f1d Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 24 Jun 2025 20:48:22 +0700 Subject: [PATCH 037/281] test1 --- src/tensor_array/core/CMakeLists.txt | 8 +++++--- src/tensor_array/layers/CMakeLists.txt | 9 +++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index 2c6827b..eee3d4b 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -19,13 +19,15 @@ file(GLOB TensorArray_src "*.cc" "*.cu") # file(MAKE_DIRECTORY "include/tensor_array/core") -add_library(TensorArray::Core SHARED ${TensorArray_src}) -target_link_libraries(TensorArray::Core PRIVATE CUDA::cublas) +add_library(tensorarray_core SHARED ${TensorArray_src}) +target_link_libraries(tensorarray_core PRIVATE CUDA::cublas) endif(CUDAToolkit_FOUND) +add_library(TensorArray::Core ALIAS tensorarray_core) + install( - TARGETS TensorArray::Core + TARGETS tensorarray_core EXPORT TensorArrayTargets LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT libraries diff --git a/src/tensor_array/layers/CMakeLists.txt b/src/tensor_array/layers/CMakeLists.txt index bf4f563..cef320d 100644 --- a/src/tensor_array/layers/CMakeLists.txt +++ b/src/tensor_array/layers/CMakeLists.txt @@ -10,13 +10,14 @@ install( DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor_array/layers COMPONENT headers) -add_library(TensorArray::Layers SHARED ${TensorArray_src}) +add_library(tensorarray_layers SHARED ${TensorArray_src}) +add_library(TensorArray::Layers ALIAS tensorarray_layers) -target_include_directories(TensorArray::Layers PRIVATE ${PROJECT_SOURCE_DIR}/src) -target_link_libraries(TensorArray::Layers TensorArray::Core) +target_include_directories(tensorarray_layers PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_link_libraries(tensorarray_layers TensorArray::Core) install( - TARGETS TensorArray::Layers + TARGETS tensorarray_layers EXPORT TensorArrayTargets LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT libraries From 0de76d3510d14bfd98553b105da97f8221ec4cf6 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 24 Jun 2025 20:58:45 +0700 Subject: [PATCH 038/281] test 3 --- src/tensor_array/core/CMakeLists.txt | 4 ++-- src/tensor_array/layers/CMakeLists.txt | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index eee3d4b..5aa955d 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -24,8 +24,6 @@ target_link_libraries(tensorarray_core PRIVATE CUDA::cublas) endif(CUDAToolkit_FOUND) -add_library(TensorArray::Core ALIAS tensorarray_core) - install( TARGETS tensorarray_core EXPORT TensorArrayTargets @@ -33,3 +31,5 @@ install( COMPONENT libraries ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT libraries) + +add_library(TensorArray::Core ALIAS tensorarray_core) diff --git a/src/tensor_array/layers/CMakeLists.txt b/src/tensor_array/layers/CMakeLists.txt index cef320d..80e222b 100644 --- a/src/tensor_array/layers/CMakeLists.txt +++ b/src/tensor_array/layers/CMakeLists.txt @@ -11,7 +11,6 @@ install( COMPONENT headers) add_library(tensorarray_layers SHARED ${TensorArray_src}) -add_library(TensorArray::Layers ALIAS tensorarray_layers) target_include_directories(tensorarray_layers PRIVATE ${PROJECT_SOURCE_DIR}/src) target_link_libraries(tensorarray_layers TensorArray::Core) @@ -23,3 +22,5 @@ install( COMPONENT libraries ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT libraries) + +add_library(TensorArray::Layers ALIAS tensorarray_layers) From 32019936ccd84e7b4a09041190b2b99b2246f97e Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 24 Jun 2025 21:28:17 +0700 Subject: [PATCH 039/281] test 1 --- src/tensor_array/core/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index 5aa955d..702f464 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -22,8 +22,6 @@ file(GLOB TensorArray_src "*.cc" "*.cu") add_library(tensorarray_core SHARED ${TensorArray_src}) target_link_libraries(tensorarray_core PRIVATE CUDA::cublas) -endif(CUDAToolkit_FOUND) - install( TARGETS tensorarray_core EXPORT TensorArrayTargets @@ -33,3 +31,6 @@ install( COMPONENT libraries) add_library(TensorArray::Core ALIAS tensorarray_core) + +endif(CUDAToolkit_FOUND) + From 0491c80614eca105baaad4a6e0705799e5cf8482 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 24 Jun 2025 21:36:09 +0700 Subject: [PATCH 040/281] test 12.9.0 --- .devcontainer/CUDA/Dockerfile | 2 +- Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.devcontainer/CUDA/Dockerfile b/.devcontainer/CUDA/Dockerfile index aed4468..a8de487 100644 --- a/.devcontainer/CUDA/Dockerfile +++ b/.devcontainer/CUDA/Dockerfile @@ -1,4 +1,4 @@ -FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 +FROM nvcr.io/nvidia/cuda:12.9.0-devel-ubuntu20.04 RUN apt-get update RUN apt-get upgrade -y diff --git a/Dockerfile b/Dockerfile index 3720b4b..b2796f8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 +FROM nvcr.io/nvidia/cuda:12.9.0-devel-ubuntu20.04 RUN apt-get update RUN apt-get upgrade -y From 9ff4836f23cab8166c53e91b55974084cd86a88a Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 24 Jun 2025 21:47:41 +0700 Subject: [PATCH 041/281] temp change to cmake --- src/tensor_array/core/CMakeLists.txt | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index 702f464..3176436 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -17,10 +17,18 @@ set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "--default-stream per-thread") file(GLOB TensorArray_src "*.cc" "*.cu") +else() +enable_language(C CXX) +file(GLOB TensorArray_src "*.cc") +endif() + # file(MAKE_DIRECTORY "include/tensor_array/core") add_library(tensorarray_core SHARED ${TensorArray_src}) + +if(CUDAToolkit_FOUND) target_link_libraries(tensorarray_core PRIVATE CUDA::cublas) +endif(CUDAToolkit_FOUND) install( TARGETS tensorarray_core @@ -31,6 +39,3 @@ install( COMPONENT libraries) add_library(TensorArray::Core ALIAS tensorarray_core) - -endif(CUDAToolkit_FOUND) - From d7e2f507c5915ae25fdd283dfb2f0e1accee3e8e Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 24 Jun 2025 22:00:23 +0700 Subject: [PATCH 042/281] test --- src/tensor_array/core/CMakeLists.txt | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index 3176436..6475c2c 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.8.0) file(GLOB TensorArray_inc "*.hh") - +enable_language(CUDA C CXX) install( FILES ${TensorArray_inc} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor_array/core @@ -9,18 +9,13 @@ install( find_package(CUDAToolkit) if(CUDAToolkit_FOUND) -enable_language(CUDA C CXX) set(CMAKE_CUDA_ARCHITECTURES 52 75 89) set(CMAKE_CUDA_SEPARABLE_COMPILATION TRUE) set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "--default-stream per-thread") file(GLOB TensorArray_src "*.cc" "*.cu") - -else() -enable_language(C CXX) -file(GLOB TensorArray_src "*.cc") -endif() +endif(CUDAToolkit_FOUND) # file(MAKE_DIRECTORY "include/tensor_array/core") From 9e396d23ddbf385ee57753ee9c306525d0906750 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 24 Jun 2025 22:20:20 +0700 Subject: [PATCH 043/281] test 101 --- .devcontainer/CUDA/Dockerfile | 2 +- CMakeLists.txt | 2 +- Dockerfile | 2 +- src/tensor_array/core/CMakeLists.txt | 12 ++++-------- src/tensor_array/layers/CMakeLists.txt | 2 +- 5 files changed, 8 insertions(+), 12 deletions(-) diff --git a/.devcontainer/CUDA/Dockerfile b/.devcontainer/CUDA/Dockerfile index a8de487..aed4468 100644 --- a/.devcontainer/CUDA/Dockerfile +++ b/.devcontainer/CUDA/Dockerfile @@ -1,4 +1,4 @@ -FROM nvcr.io/nvidia/cuda:12.9.0-devel-ubuntu20.04 +FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 RUN apt-get update RUN apt-get upgrade -y diff --git a/CMakeLists.txt b/CMakeLists.txt index ae733e0..0f6207d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.8.0) +cmake_minimum_required(VERSION 3.10) project(TensorArray) diff --git a/Dockerfile b/Dockerfile index b2796f8..3720b4b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM nvcr.io/nvidia/cuda:12.9.0-devel-ubuntu20.04 +FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 RUN apt-get update RUN apt-get upgrade -y diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index 6475c2c..cd068c4 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -1,29 +1,25 @@ -cmake_minimum_required(VERSION 3.8.0) +cmake_minimum_required(VERSION 3.10) file(GLOB TensorArray_inc "*.hh") -enable_language(CUDA C CXX) + install( FILES ${TensorArray_inc} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor_array/core COMPONENT headers) -find_package(CUDAToolkit) -if(CUDAToolkit_FOUND) +enable_language(CUDA) set(CMAKE_CUDA_ARCHITECTURES 52 75 89) set(CMAKE_CUDA_SEPARABLE_COMPILATION TRUE) set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "--default-stream per-thread") file(GLOB TensorArray_src "*.cc" "*.cu") -endif(CUDAToolkit_FOUND) # file(MAKE_DIRECTORY "include/tensor_array/core") add_library(tensorarray_core SHARED ${TensorArray_src}) -if(CUDAToolkit_FOUND) -target_link_libraries(tensorarray_core PRIVATE CUDA::cublas) -endif(CUDAToolkit_FOUND) +cuda_add_cublas_to_target(tensorarray_core) install( TARGETS tensorarray_core diff --git a/src/tensor_array/layers/CMakeLists.txt b/src/tensor_array/layers/CMakeLists.txt index 80e222b..fa4d1d0 100644 --- a/src/tensor_array/layers/CMakeLists.txt +++ b/src/tensor_array/layers/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.8.0) +cmake_minimum_required(VERSION 3.10) enable_language(C CXX) From b12a8157b5cfcd0bd13c50627a7c21e58f33c746 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 24 Jun 2025 22:27:34 +0700 Subject: [PATCH 044/281] minor change --- src/tensor_array/core/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index cd068c4..f9f7a42 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -19,7 +19,7 @@ file(GLOB TensorArray_src "*.cc" "*.cu") add_library(tensorarray_core SHARED ${TensorArray_src}) -cuda_add_cublas_to_target(tensorarray_core) +target_link_libraries(tensorarray_core PRIVATE CUDA::cublas) install( TARGETS tensorarray_core From aebc94bd89e3867a9e3f14642aeee718d62e5d62 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 24 Jun 2025 22:32:40 +0700 Subject: [PATCH 045/281] test 102 --- src/tensor_array/core/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index f9f7a42..54f9102 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -17,9 +17,9 @@ file(GLOB TensorArray_src "*.cc" "*.cu") # file(MAKE_DIRECTORY "include/tensor_array/core") -add_library(tensorarray_core SHARED ${TensorArray_src}) +cuda_add_library(tensorarray_core SHARED ${TensorArray_src}) -target_link_libraries(tensorarray_core PRIVATE CUDA::cublas) +cuda_add_cublas_to_target(tensorarray_core) install( TARGETS tensorarray_core From f0ee589d3f582ba72c3977ba4d6cac5b0dbc0c2a Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 24 Jun 2025 22:50:57 +0700 Subject: [PATCH 046/281] test find --- src/tensor_array/core/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index 54f9102..d607310 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -17,9 +17,10 @@ file(GLOB TensorArray_src "*.cc" "*.cu") # file(MAKE_DIRECTORY "include/tensor_array/core") -cuda_add_library(tensorarray_core SHARED ${TensorArray_src}) +add_library(tensorarray_core SHARED ${TensorArray_src}) -cuda_add_cublas_to_target(tensorarray_core) +FindCUDAToolkit() +target_link_libraries(tensorarray_core PRIVATE CUDA::cublas) install( TARGETS tensorarray_core From f71615284e7489fa9f7d3608d7ec404a78b8edaf Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 24 Jun 2025 22:59:13 +0700 Subject: [PATCH 047/281] include FindCUDA --- src/tensor_array/core/CMakeLists.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index d607310..6853f13 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -9,6 +9,8 @@ install( enable_language(CUDA) +include(FindCUDA) + set(CMAKE_CUDA_ARCHITECTURES 52 75 89) set(CMAKE_CUDA_SEPARABLE_COMPILATION TRUE) set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "--default-stream per-thread") @@ -17,10 +19,9 @@ file(GLOB TensorArray_src "*.cc" "*.cu") # file(MAKE_DIRECTORY "include/tensor_array/core") -add_library(tensorarray_core SHARED ${TensorArray_src}) +cuda_add_library(tensorarray_core ${TensorArray_src} SHARED) -FindCUDAToolkit() -target_link_libraries(tensorarray_core PRIVATE CUDA::cublas) +cuda_add_cublas_to_target(tensorarray_core) install( TARGETS tensorarray_core From 72aa8357008c6098e24494ea1d487815a5cf4ca0 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 13:57:20 +0700 Subject: [PATCH 048/281] #include --- src/tensor_array/core/tensor.hh | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tensor_array/core/tensor.hh b/src/tensor_array/core/tensor.hh index 553f311..fd16f54 100644 --- a/src/tensor_array/core/tensor.hh +++ b/src/tensor_array/core/tensor.hh @@ -16,6 +16,7 @@ limitations under the License. #include #include +#include #include "tensorbase.hh" #pragma once From df064e2705608f2cf16fcbebe1adb1a3608ab7b7 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 14:23:30 +0700 Subject: [PATCH 049/281] test 105 --- src/tensor_array/core/CMakeLists.txt | 6 +++--- src/tensor_array/core/tensor.hh | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index 6853f13..e4d2d69 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -11,9 +11,9 @@ enable_language(CUDA) include(FindCUDA) -set(CMAKE_CUDA_ARCHITECTURES 52 75 89) -set(CMAKE_CUDA_SEPARABLE_COMPILATION TRUE) -set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "--default-stream per-thread") +# set(CMAKE_CUDA_ARCHITECTURES 52 75 89) +set(CUDA_SEPARABLE_COMPILATION ON) +list(APPEND CUDA_NVCC_FLAGS "--default-stream per-thread") file(GLOB TensorArray_src "*.cc" "*.cu") diff --git a/src/tensor_array/core/tensor.hh b/src/tensor_array/core/tensor.hh index fd16f54..553f311 100644 --- a/src/tensor_array/core/tensor.hh +++ b/src/tensor_array/core/tensor.hh @@ -16,7 +16,6 @@ limitations under the License. #include #include -#include #include "tensorbase.hh" #pragma once From 3eb1dbe3e9722ee5840be895b7381732e4e432dc Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 14:37:51 +0700 Subject: [PATCH 050/281] standard c17 --- CMakeLists.txt | 7 +++++++ src/tensor_array/core/CMakeLists.txt | 4 ++++ src/tensor_array/layers/CMakeLists.txt | 2 -- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0f6207d..f148729 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,6 +5,13 @@ project(TensorArray) include(GNUInstallDirs) set(CMAKE_INSTALL_PREFIX ${PROJECT_SOURCE_DIR}) +set(CMAKE_C_STANDARD 17) +set(CMAKE_C_STANDARD_REQUIRED ON) +set(CMAKE_C_EXTENSIONS OFF) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + add_subdirectory("src/tensor_array/core") add_subdirectory("src/tensor_array/layers") diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index e4d2d69..99f5ebb 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -11,6 +11,10 @@ enable_language(CUDA) include(FindCUDA) +set(CMAKE_CUDA_STANDARD 17) +set(CMAKE_CUDA_STANDARD_REQUIRED ON) +set(CMAKE_CUDA_EXTENSIONS OFF) + # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) set(CUDA_SEPARABLE_COMPILATION ON) list(APPEND CUDA_NVCC_FLAGS "--default-stream per-thread") diff --git a/src/tensor_array/layers/CMakeLists.txt b/src/tensor_array/layers/CMakeLists.txt index fa4d1d0..f9ae87a 100644 --- a/src/tensor_array/layers/CMakeLists.txt +++ b/src/tensor_array/layers/CMakeLists.txt @@ -1,7 +1,5 @@ cmake_minimum_required(VERSION 3.10) -enable_language(C CXX) - file(GLOB TensorArray_src "*.cc") file(GLOB TensorArray_inc "*.hh") From e5507e31f99993c61ee130d507db53da7c2c3df8 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 14:46:27 +0700 Subject: [PATCH 051/281] changes c++ 17 --- CMakeLists.txt | 7 ------- src/tensor_array/core/CMakeLists.txt | 16 ++++++++++++---- src/tensor_array/layers/CMakeLists.txt | 8 ++++++++ 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f148729..0f6207d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,13 +5,6 @@ project(TensorArray) include(GNUInstallDirs) set(CMAKE_INSTALL_PREFIX ${PROJECT_SOURCE_DIR}) -set(CMAKE_C_STANDARD 17) -set(CMAKE_C_STANDARD_REQUIRED ON) -set(CMAKE_C_EXTENSIONS OFF) -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS OFF) - add_subdirectory("src/tensor_array/core") add_subdirectory("src/tensor_array/layers") diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index 99f5ebb..496ed44 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -11,10 +11,6 @@ enable_language(CUDA) include(FindCUDA) -set(CMAKE_CUDA_STANDARD 17) -set(CMAKE_CUDA_STANDARD_REQUIRED ON) -set(CMAKE_CUDA_EXTENSIONS OFF) - # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) set(CUDA_SEPARABLE_COMPILATION ON) list(APPEND CUDA_NVCC_FLAGS "--default-stream per-thread") @@ -27,6 +23,18 @@ cuda_add_library(tensorarray_core ${TensorArray_src} SHARED) cuda_add_cublas_to_target(tensorarray_core) +set_property(TARGET tensorarray_core PROPERTY C_STANDARD 11) +set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) + +set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) +set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) + +set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) +set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) + install( TARGETS tensorarray_core EXPORT TensorArrayTargets diff --git a/src/tensor_array/layers/CMakeLists.txt b/src/tensor_array/layers/CMakeLists.txt index f9ae87a..6ffa2a3 100644 --- a/src/tensor_array/layers/CMakeLists.txt +++ b/src/tensor_array/layers/CMakeLists.txt @@ -13,6 +13,14 @@ add_library(tensorarray_layers SHARED ${TensorArray_src}) target_include_directories(tensorarray_layers PRIVATE ${PROJECT_SOURCE_DIR}/src) target_link_libraries(tensorarray_layers TensorArray::Core) +set_property(TARGET tensorarray_layers PROPERTY C_STANDARD 11) +set_property(TARGET tensorarray_layers PROPERTY C_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_layers PROPERTY C_EXTENSIONS OFF) + +set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD 17) +set_property(TARGET tensorarray_layers PROPERTY C_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_layers PROPERTY C_EXTENSIONS OFF) + install( TARGETS tensorarray_layers EXPORT TensorArrayTargets From 6f5add310cc6815fe9196cfcf189afa299ef6d76 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 14:53:28 +0700 Subject: [PATCH 052/281] test 110 --- src/tensor_array/core/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index 496ed44..08f0108 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -13,7 +13,7 @@ include(FindCUDA) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) set(CUDA_SEPARABLE_COMPILATION ON) -list(APPEND CUDA_NVCC_FLAGS "--default-stream per-thread") +# list(APPEND CUDA_NVCC_FLAGS "--default-stream per-thread") file(GLOB TensorArray_src "*.cc" "*.cu") From 450e5c69541dc37696dd383359f1052decd5564e Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 15:22:59 +0700 Subject: [PATCH 053/281] test 1 --- src/tensor_array/core/CMakeLists.txt | 12 ------------ src/tensor_array/core/tensor.hh | 4 ++-- src/tensor_array/layers/CMakeLists.txt | 8 -------- 3 files changed, 2 insertions(+), 22 deletions(-) diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index 08f0108..f7d6030 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -23,18 +23,6 @@ cuda_add_library(tensorarray_core ${TensorArray_src} SHARED) cuda_add_cublas_to_target(tensorarray_core) -set_property(TARGET tensorarray_core PROPERTY C_STANDARD 11) -set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) - -set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) -set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) - -set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) -set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) - install( TARGETS tensorarray_core EXPORT TensorArrayTargets diff --git a/src/tensor_array/core/tensor.hh b/src/tensor_array/core/tensor.hh index 553f311..5990e26 100644 --- a/src/tensor_array/core/tensor.hh +++ b/src/tensor_array/core/tensor.hh @@ -60,14 +60,14 @@ namespace tensor_array data(create_mem_101(sizeof(T), &data)), data_size(sizeof(T)) { - static_assert(std::is_trivially_copyable_v, "Requied default constructor"); + static_assert(std::is_trivially_copyable, "Requied default constructor"); } template constexpr DataBuffer(const std::initializer_list &data) : data(create_mem_101(sizeof(T) * data.size(), data.begin())), data_size(sizeof(T) * data.size()) { - static_assert(std::is_trivially_copyable_v, "Requied default constructor"); + static_assert(std::is_trivially_copyable, "Requied default constructor"); } DataBuffer(); DataBuffer(std::nullptr_t); diff --git a/src/tensor_array/layers/CMakeLists.txt b/src/tensor_array/layers/CMakeLists.txt index 6ffa2a3..f9ae87a 100644 --- a/src/tensor_array/layers/CMakeLists.txt +++ b/src/tensor_array/layers/CMakeLists.txt @@ -13,14 +13,6 @@ add_library(tensorarray_layers SHARED ${TensorArray_src}) target_include_directories(tensorarray_layers PRIVATE ${PROJECT_SOURCE_DIR}/src) target_link_libraries(tensorarray_layers TensorArray::Core) -set_property(TARGET tensorarray_layers PROPERTY C_STANDARD 11) -set_property(TARGET tensorarray_layers PROPERTY C_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_layers PROPERTY C_EXTENSIONS OFF) - -set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD 17) -set_property(TARGET tensorarray_layers PROPERTY C_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_layers PROPERTY C_EXTENSIONS OFF) - install( TARGETS tensorarray_layers EXPORT TensorArrayTargets From efac13baf5fbdd244db8a3c5a3c7aa94584f92f9 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 15:27:54 +0700 Subject: [PATCH 054/281] ... --- src/tensor_array/core/CMakeLists.txt | 12 ++++++++++++ src/tensor_array/core/tensor.hh | 4 ++-- src/tensor_array/layers/CMakeLists.txt | 8 ++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index f7d6030..e671cc3 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -23,6 +23,18 @@ cuda_add_library(tensorarray_core ${TensorArray_src} SHARED) cuda_add_cublas_to_target(tensorarray_core) +set_property(TARGET tensorarray_core PROPERTY C_STANDARD 11) +set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) + +set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) +set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) + +set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 14) +set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) + install( TARGETS tensorarray_core EXPORT TensorArrayTargets diff --git a/src/tensor_array/core/tensor.hh b/src/tensor_array/core/tensor.hh index 5990e26..553f311 100644 --- a/src/tensor_array/core/tensor.hh +++ b/src/tensor_array/core/tensor.hh @@ -60,14 +60,14 @@ namespace tensor_array data(create_mem_101(sizeof(T), &data)), data_size(sizeof(T)) { - static_assert(std::is_trivially_copyable, "Requied default constructor"); + static_assert(std::is_trivially_copyable_v, "Requied default constructor"); } template constexpr DataBuffer(const std::initializer_list &data) : data(create_mem_101(sizeof(T) * data.size(), data.begin())), data_size(sizeof(T) * data.size()) { - static_assert(std::is_trivially_copyable, "Requied default constructor"); + static_assert(std::is_trivially_copyable_v, "Requied default constructor"); } DataBuffer(); DataBuffer(std::nullptr_t); diff --git a/src/tensor_array/layers/CMakeLists.txt b/src/tensor_array/layers/CMakeLists.txt index f9ae87a..6ffa2a3 100644 --- a/src/tensor_array/layers/CMakeLists.txt +++ b/src/tensor_array/layers/CMakeLists.txt @@ -13,6 +13,14 @@ add_library(tensorarray_layers SHARED ${TensorArray_src}) target_include_directories(tensorarray_layers PRIVATE ${PROJECT_SOURCE_DIR}/src) target_link_libraries(tensorarray_layers TensorArray::Core) +set_property(TARGET tensorarray_layers PROPERTY C_STANDARD 11) +set_property(TARGET tensorarray_layers PROPERTY C_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_layers PROPERTY C_EXTENSIONS OFF) + +set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD 17) +set_property(TARGET tensorarray_layers PROPERTY C_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_layers PROPERTY C_EXTENSIONS OFF) + install( TARGETS tensorarray_layers EXPORT TensorArrayTargets From 15c41d6314d1d98f9facda011484b5ed29b41ae4 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 15:46:02 +0700 Subject: [PATCH 055/281] test --- src/tensor_array/core/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index e671cc3..a831bf4 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -9,7 +9,7 @@ install( enable_language(CUDA) -include(FindCUDA) +include(FindCUDAToolkit) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) set(CUDA_SEPARABLE_COMPILATION ON) @@ -19,9 +19,9 @@ file(GLOB TensorArray_src "*.cc" "*.cu") # file(MAKE_DIRECTORY "include/tensor_array/core") -cuda_add_library(tensorarray_core ${TensorArray_src} SHARED) +add_library(tensorarray_core SHARED ${TensorArray_src}) -cuda_add_cublas_to_target(tensorarray_core) +target_link_libraries(tensorarray_core PRIVATE CUDA::cublas) set_property(TARGET tensorarray_core PROPERTY C_STANDARD 11) set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) From baf28413a1fa1363989a59f7253c629b1f132cab Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 15:50:47 +0700 Subject: [PATCH 056/281] cmake 3.18 --- CMakeLists.txt | 2 +- src/tensor_array/core/CMakeLists.txt | 6 +++--- src/tensor_array/layers/CMakeLists.txt | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0f6207d..e10cf74 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.10) +cmake_minimum_required(VERSION 3.18) project(TensorArray) diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index a831bf4..4dc04f0 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.10) +cmake_minimum_required(VERSION 3.18) file(GLOB TensorArray_inc "*.hh") @@ -12,7 +12,7 @@ enable_language(CUDA) include(FindCUDAToolkit) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) -set(CUDA_SEPARABLE_COMPILATION ON) +set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # list(APPEND CUDA_NVCC_FLAGS "--default-stream per-thread") file(GLOB TensorArray_src "*.cc" "*.cu") @@ -31,7 +31,7 @@ set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) -set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 14) +set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) diff --git a/src/tensor_array/layers/CMakeLists.txt b/src/tensor_array/layers/CMakeLists.txt index 6ffa2a3..c0f8fe0 100644 --- a/src/tensor_array/layers/CMakeLists.txt +++ b/src/tensor_array/layers/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.10) +cmake_minimum_required(VERSION 3.18) file(GLOB TensorArray_src "*.cc") file(GLOB TensorArray_inc "*.hh") From 0fa74ba30005c55714c86b34ff36c0e7867d5fff Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 16:05:53 +0700 Subject: [PATCH 057/281] test --- .devcontainer/CPU/Dockerfile | 2 +- .devcontainer/CUDA/Dockerfile | 6 +++--- Dockerfile | 7 +++---- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.devcontainer/CPU/Dockerfile b/.devcontainer/CPU/Dockerfile index 82e43f9..e01b9b5 100644 --- a/.devcontainer/CPU/Dockerfile +++ b/.devcontainer/CPU/Dockerfile @@ -1,6 +1,6 @@ FROM mcr.microsoft.com/devcontainers/cpp:1-ubuntu-22.04 -ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.22.2" +ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.8" # Optionally install the cmake for vcpkg COPY ./reinstall-cmake.sh /tmp/ diff --git a/.devcontainer/CUDA/Dockerfile b/.devcontainer/CUDA/Dockerfile index aed4468..103a5c9 100644 --- a/.devcontainer/CUDA/Dockerfile +++ b/.devcontainer/CUDA/Dockerfile @@ -1,9 +1,9 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 -RUN apt-get update -RUN apt-get upgrade -y +RUN apt update +RUN apt upgrade -y ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get -y install cmake +RUN apt -y install cmake # [Optional] Uncomment this section to install additional vcpkg ports. diff --git a/Dockerfile b/Dockerfile index 3720b4b..99d3826 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,8 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 -RUN apt-get update -RUN apt-get upgrade -y -ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get -y install cmake +RUN apt update +RUN apt upgrade -y +RUN apt -y install cmake # [Optional] Uncomment this section to install additional vcpkg ports. # RUN su vscode -c "${VCPKG_ROOT}/vcpkg install " From cc6ba0b5f592e5ca55c500e4fbd134f9a64cec63 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 16:08:25 +0700 Subject: [PATCH 058/281] test 2 --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 99d3826..dbeaf2e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,6 +2,7 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 RUN apt update RUN apt upgrade -y +ENV DEBIAN_FRONTEND=noninteractive RUN apt -y install cmake # [Optional] Uncomment this section to install additional vcpkg ports. From a82223c5fe7303c24c671046b6b87bb4a6114b92 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 16:11:01 +0700 Subject: [PATCH 059/281] test 5 --- .devcontainer/CUDA/Dockerfile | 8 ++++---- Dockerfile | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.devcontainer/CUDA/Dockerfile b/.devcontainer/CUDA/Dockerfile index 103a5c9..251ffd1 100644 --- a/.devcontainer/CUDA/Dockerfile +++ b/.devcontainer/CUDA/Dockerfile @@ -1,9 +1,9 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 -RUN apt update -RUN apt upgrade -y -ENV DEBIAN_FRONTEND=noninteractive -RUN apt -y install cmake +RUN apt-get update +RUN apt-get upgrade -y +# ENV DEBIAN_FRONTEND=noninteractive +RUN snap -y install cmake # [Optional] Uncomment this section to install additional vcpkg ports. diff --git a/Dockerfile b/Dockerfile index dbeaf2e..c5a27b9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,9 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 -RUN apt update -RUN apt upgrade -y -ENV DEBIAN_FRONTEND=noninteractive -RUN apt -y install cmake +RUN apt-get update +RUN apt-get upgrade -y +# ENV DEBIAN_FRONTEND=noninteractive +RUN snap -y install cmake # [Optional] Uncomment this section to install additional vcpkg ports. # RUN su vscode -c "${VCPKG_ROOT}/vcpkg install " From e67c4ed628b979bce6e81018d5c5f0c66bce6da1 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 16:14:55 +0700 Subject: [PATCH 060/281] changes --- .devcontainer/CUDA/Dockerfile | 4 ++-- Dockerfile | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.devcontainer/CUDA/Dockerfile b/.devcontainer/CUDA/Dockerfile index 251ffd1..ee54f8a 100644 --- a/.devcontainer/CUDA/Dockerfile +++ b/.devcontainer/CUDA/Dockerfile @@ -2,8 +2,8 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 RUN apt-get update RUN apt-get upgrade -y -# ENV DEBIAN_FRONTEND=noninteractive -RUN snap -y install cmake +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get -y install --no-install-recommends cmake # [Optional] Uncomment this section to install additional vcpkg ports. diff --git a/Dockerfile b/Dockerfile index c5a27b9..3dc543d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,8 +2,8 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 RUN apt-get update RUN apt-get upgrade -y -# ENV DEBIAN_FRONTEND=noninteractive -RUN snap -y install cmake +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get -y install --no-install-recommends cmake # [Optional] Uncomment this section to install additional vcpkg ports. # RUN su vscode -c "${VCPKG_ROOT}/vcpkg install " From 5a1b1b53261d076e74dc7a59afdd8819066a1bd7 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 17:44:59 +0700 Subject: [PATCH 061/281] test 101 --- .devcontainer/CPU/Dockerfile | 2 +- .devcontainer/CUDA/Dockerfile | 14 ++++++++++---- Dockerfile | 14 ++++++++++---- .../packages-install}/reinstall-cmake.sh | 0 4 files changed, 21 insertions(+), 9 deletions(-) rename {.devcontainer/CPU => scripts/packages-install}/reinstall-cmake.sh (100%) diff --git a/.devcontainer/CPU/Dockerfile b/.devcontainer/CPU/Dockerfile index e01b9b5..cf68114 100644 --- a/.devcontainer/CPU/Dockerfile +++ b/.devcontainer/CPU/Dockerfile @@ -3,7 +3,7 @@ FROM mcr.microsoft.com/devcontainers/cpp:1-ubuntu-22.04 ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.8" # Optionally install the cmake for vcpkg -COPY ./reinstall-cmake.sh /tmp/ +COPY ./../script/packages-install/reinstall-cmake.sh /tmp/ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ diff --git a/.devcontainer/CUDA/Dockerfile b/.devcontainer/CUDA/Dockerfile index ee54f8a..e20e237 100644 --- a/.devcontainer/CUDA/Dockerfile +++ b/.devcontainer/CUDA/Dockerfile @@ -1,9 +1,15 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 -RUN apt-get update -RUN apt-get upgrade -y -ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get -y install --no-install-recommends cmake +ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.8" + +# Optionally install the cmake for vcpkg +COPY ./../script/packages-install/reinstall-cmake.sh /tmp/ + +RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ + chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ + fi \ + && rm -f /tmp/reinstall-cmake.sh + # [Optional] Uncomment this section to install additional vcpkg ports. diff --git a/Dockerfile b/Dockerfile index 3dc543d..c646b3a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,15 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 -RUN apt-get update -RUN apt-get upgrade -y -ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get -y install --no-install-recommends cmake +ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.8" + +# Optionally install the cmake for vcpkg +COPY ./script/packages-install/reinstall-cmake.sh /tmp/ + +RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ + chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ + fi \ + && rm -f /tmp/reinstall-cmake.sh + # [Optional] Uncomment this section to install additional vcpkg ports. # RUN su vscode -c "${VCPKG_ROOT}/vcpkg install " diff --git a/.devcontainer/CPU/reinstall-cmake.sh b/scripts/packages-install/reinstall-cmake.sh similarity index 100% rename from .devcontainer/CPU/reinstall-cmake.sh rename to scripts/packages-install/reinstall-cmake.sh From e4b875ef599694861ce405d68b3b96e557ecd9c2 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 17:46:30 +0700 Subject: [PATCH 062/281] test 102 --- .devcontainer/CPU/Dockerfile | 2 +- .devcontainer/CUDA/Dockerfile | 2 +- Dockerfile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.devcontainer/CPU/Dockerfile b/.devcontainer/CPU/Dockerfile index cf68114..fff5cd7 100644 --- a/.devcontainer/CPU/Dockerfile +++ b/.devcontainer/CPU/Dockerfile @@ -3,7 +3,7 @@ FROM mcr.microsoft.com/devcontainers/cpp:1-ubuntu-22.04 ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.8" # Optionally install the cmake for vcpkg -COPY ./../script/packages-install/reinstall-cmake.sh /tmp/ +COPY ../script/packages-install/reinstall-cmake.sh /tmp/ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ diff --git a/.devcontainer/CUDA/Dockerfile b/.devcontainer/CUDA/Dockerfile index e20e237..285ae1c 100644 --- a/.devcontainer/CUDA/Dockerfile +++ b/.devcontainer/CUDA/Dockerfile @@ -3,7 +3,7 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.8" # Optionally install the cmake for vcpkg -COPY ./../script/packages-install/reinstall-cmake.sh /tmp/ +COPY ../script/packages-install/reinstall-cmake.sh /tmp/ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ diff --git a/Dockerfile b/Dockerfile index c646b3a..bc59de3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.8" # Optionally install the cmake for vcpkg -COPY ./script/packages-install/reinstall-cmake.sh /tmp/ +COPY /script/packages-install/reinstall-cmake.sh /tmp/ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ From 6cb0fc7c5a20bf9a3369659cea94224e313d0952 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 17:47:31 +0700 Subject: [PATCH 063/281] test 105 --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index bc59de3..1efb4de 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.8" # Optionally install the cmake for vcpkg -COPY /script/packages-install/reinstall-cmake.sh /tmp/ +COPY script/packages-install/reinstall-cmake.sh /tmp/ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ From 9f5df9c8fbad200a08052db1cea2036a91aff548 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 17:50:21 +0700 Subject: [PATCH 064/281] test 110 --- .devcontainer/CPU/Dockerfile | 2 +- .devcontainer/CUDA/Dockerfile | 2 +- Dockerfile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.devcontainer/CPU/Dockerfile b/.devcontainer/CPU/Dockerfile index fff5cd7..d7f66c9 100644 --- a/.devcontainer/CPU/Dockerfile +++ b/.devcontainer/CPU/Dockerfile @@ -3,7 +3,7 @@ FROM mcr.microsoft.com/devcontainers/cpp:1-ubuntu-22.04 ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.8" # Optionally install the cmake for vcpkg -COPY ../script/packages-install/reinstall-cmake.sh /tmp/ +COPY ../scripts/packages-install/reinstall-cmake.sh /tmp/ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ diff --git a/.devcontainer/CUDA/Dockerfile b/.devcontainer/CUDA/Dockerfile index 285ae1c..4b1b099 100644 --- a/.devcontainer/CUDA/Dockerfile +++ b/.devcontainer/CUDA/Dockerfile @@ -3,7 +3,7 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.8" # Optionally install the cmake for vcpkg -COPY ../script/packages-install/reinstall-cmake.sh /tmp/ +COPY ../scripts/packages-install/reinstall-cmake.sh /tmp/ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ diff --git a/Dockerfile b/Dockerfile index 1efb4de..48c58d4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.8" # Optionally install the cmake for vcpkg -COPY script/packages-install/reinstall-cmake.sh /tmp/ +COPY scripts/packages-install/reinstall-cmake.sh /tmp/ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ From 23eb072a7f2b339c273425c0fde2a99e8ad7e54f Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 18:13:42 +0700 Subject: [PATCH 065/281] test 111 --- .devcontainer/CUDA/Dockerfile | 4 ++++ Dockerfile | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/.devcontainer/CUDA/Dockerfile b/.devcontainer/CUDA/Dockerfile index 4b1b099..91aaedb 100644 --- a/.devcontainer/CUDA/Dockerfile +++ b/.devcontainer/CUDA/Dockerfile @@ -1,5 +1,9 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 +RUN apt-get update +RUN apt-get upgrade -y +RUN apt-get install curl + ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.8" # Optionally install the cmake for vcpkg diff --git a/Dockerfile b/Dockerfile index 48c58d4..67ad11c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,9 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 +RUN apt-get update +RUN apt-get upgrade -y +RUN apt-get install curl + ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.8" # Optionally install the cmake for vcpkg From 2d6a9a56d05be5ae5a50bd5ee1b4428a1e8d875e Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 18:15:49 +0700 Subject: [PATCH 066/281] add -y --- .devcontainer/CUDA/Dockerfile | 2 +- Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.devcontainer/CUDA/Dockerfile b/.devcontainer/CUDA/Dockerfile index 91aaedb..e725249 100644 --- a/.devcontainer/CUDA/Dockerfile +++ b/.devcontainer/CUDA/Dockerfile @@ -2,7 +2,7 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 RUN apt-get update RUN apt-get upgrade -y -RUN apt-get install curl +RUN apt-get install curl -y ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.8" diff --git a/Dockerfile b/Dockerfile index 67ad11c..43eb1d4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 RUN apt-get update RUN apt-get upgrade -y -RUN apt-get install curl +RUN apt-get install curl -y ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.8" From dc8be290a02c6370aadc4cb6a3787af2d80136d6 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 20:08:12 +0700 Subject: [PATCH 067/281] some minor changes --- .devcontainer/CPU/Dockerfile | 4 ++-- .devcontainer/CUDA/Dockerfile | 2 +- Dockerfile | 2 +- README.md | 15 +++++++-------- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/.devcontainer/CPU/Dockerfile b/.devcontainer/CPU/Dockerfile index d7f66c9..cffacf5 100644 --- a/.devcontainer/CPU/Dockerfile +++ b/.devcontainer/CPU/Dockerfile @@ -1,6 +1,6 @@ -FROM mcr.microsoft.com/devcontainers/cpp:1-ubuntu-22.04 +FROM mcr.microsoft.com/devcontainers/cpp:1-ubuntu-20.04 -ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.8" +ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.9" # Optionally install the cmake for vcpkg COPY ../scripts/packages-install/reinstall-cmake.sh /tmp/ diff --git a/.devcontainer/CUDA/Dockerfile b/.devcontainer/CUDA/Dockerfile index e725249..c1a4c65 100644 --- a/.devcontainer/CUDA/Dockerfile +++ b/.devcontainer/CUDA/Dockerfile @@ -4,7 +4,7 @@ RUN apt-get update RUN apt-get upgrade -y RUN apt-get install curl -y -ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.8" +ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.9" # Optionally install the cmake for vcpkg COPY ../scripts/packages-install/reinstall-cmake.sh /tmp/ diff --git a/Dockerfile b/Dockerfile index 43eb1d4..aadef0c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ RUN apt-get update RUN apt-get upgrade -y RUN apt-get install curl -y -ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.8" +ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.9" # Optionally install the cmake for vcpkg COPY scripts/packages-install/reinstall-cmake.sh /tmp/ diff --git a/README.md b/README.md index a2c4f17..92417d6 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ We created a template struct that named `TensorArray`. That struct is a multi-di ```C++ #include "tensor_array/core/tensorbase.hh" -using tensor_array::value; +using namespace tensor_array::value; int main() { @@ -64,11 +64,12 @@ The `Tensor::get_grad()` method can get the gradient after call `Tensor::calc_gr #include #include "tensor_array/core/tensor.hh" -using tensor_array::value; +using namespace std; +using namespace tensor_array::value; int main() { - tensor_array::value::TensorArray example_tensor_array = + TensorArray example_tensor_array = {{ {{ 1, 2, 3, 4 }}, {{ 5, 6, 7, 8 }}, @@ -79,13 +80,11 @@ int main() Tensor example_tensor_1(example_tensor_array); Tensor example_tensor_2(example_tensor_array_scalar); Tensor example_tensor_sum = example_tensor_1 + example_tensor_2; - std::cout << example_tensor_sum << std::endl; + cout << example_tensor_sum << endl; example_tensor_sum.calc_grad(); - std::cout << example_tensor_1.get_grad() << std::endl; - std::cout << example_tensor_2.get_grad() << std::endl; + cout << example_tensor_1.get_grad() << endl; + cout << example_tensor_2.get_grad() << endl; return 0; } ``` - - From 4c226c909b11340c776d1b0766e280283f0f60f0 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 26 Jun 2025 00:05:32 +0700 Subject: [PATCH 068/281] test --- .devcontainer/CPU/Dockerfile | 2 +- .devcontainer/CUDA/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.devcontainer/CPU/Dockerfile b/.devcontainer/CPU/Dockerfile index cffacf5..a8b7928 100644 --- a/.devcontainer/CPU/Dockerfile +++ b/.devcontainer/CPU/Dockerfile @@ -3,7 +3,7 @@ FROM mcr.microsoft.com/devcontainers/cpp:1-ubuntu-20.04 ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.9" # Optionally install the cmake for vcpkg -COPY ../scripts/packages-install/reinstall-cmake.sh /tmp/ +COPY ../../scripts/packages-install/reinstall-cmake.sh /tmp/ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ diff --git a/.devcontainer/CUDA/Dockerfile b/.devcontainer/CUDA/Dockerfile index c1a4c65..1619d6e 100644 --- a/.devcontainer/CUDA/Dockerfile +++ b/.devcontainer/CUDA/Dockerfile @@ -7,7 +7,7 @@ RUN apt-get install curl -y ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.9" # Optionally install the cmake for vcpkg -COPY ../scripts/packages-install/reinstall-cmake.sh /tmp/ +COPY ../../scripts/packages-install/reinstall-cmake.sh /tmp/ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ From 918b042a86280db16f0311366154405c57e69722 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 17:45:59 +0000 Subject: [PATCH 069/281] Docker --- .devcontainer/CPU/Dockerfile | 2 +- .devcontainer/CPU/devcontainer.json | 1 + .devcontainer/CUDA/Dockerfile | 2 +- .devcontainer/CUDA/devcontainer.json | 1 + 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.devcontainer/CPU/Dockerfile b/.devcontainer/CPU/Dockerfile index a8b7928..3f47760 100644 --- a/.devcontainer/CPU/Dockerfile +++ b/.devcontainer/CPU/Dockerfile @@ -3,7 +3,7 @@ FROM mcr.microsoft.com/devcontainers/cpp:1-ubuntu-20.04 ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.9" # Optionally install the cmake for vcpkg -COPY ../../scripts/packages-install/reinstall-cmake.sh /tmp/ +COPY scripts/packages-install/reinstall-cmake.sh /tmp/ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ diff --git a/.devcontainer/CPU/devcontainer.json b/.devcontainer/CPU/devcontainer.json index 3dba291..4c1d146 100644 --- a/.devcontainer/CPU/devcontainer.json +++ b/.devcontainer/CPU/devcontainer.json @@ -3,6 +3,7 @@ { "name": "C++", "build": { + "context": "../..", "dockerfile": "Dockerfile" }, diff --git a/.devcontainer/CUDA/Dockerfile b/.devcontainer/CUDA/Dockerfile index 1619d6e..d7df2de 100644 --- a/.devcontainer/CUDA/Dockerfile +++ b/.devcontainer/CUDA/Dockerfile @@ -7,7 +7,7 @@ RUN apt-get install curl -y ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.9" # Optionally install the cmake for vcpkg -COPY ../../scripts/packages-install/reinstall-cmake.sh /tmp/ +COPY scripts/packages-install/reinstall-cmake.sh /tmp/ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ diff --git a/.devcontainer/CUDA/devcontainer.json b/.devcontainer/CUDA/devcontainer.json index f9f175c..a59e314 100644 --- a/.devcontainer/CUDA/devcontainer.json +++ b/.devcontainer/CUDA/devcontainer.json @@ -3,6 +3,7 @@ { "name": "CUDA", "build": { + "context": "../..", "dockerfile": "Dockerfile" }, From dfb46d454300751287e45d209082d238d067d60d Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 17:59:48 +0000 Subject: [PATCH 070/281] change CMakeList.txt --- src/tensor_array/core/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index 4dc04f0..1c109a4 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -11,9 +11,9 @@ enable_language(CUDA) include(FindCUDAToolkit) -# set(CMAKE_CUDA_ARCHITECTURES 52 75 89) +set(CMAKE_CUDA_ARCHITECTURES 52 75 89) set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) -# list(APPEND CUDA_NVCC_FLAGS "--default-stream per-thread") +list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") file(GLOB TensorArray_src "*.cc" "*.cu") From b810fc88b39c64c432a5d02808e69630b2651256 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 18:05:01 +0000 Subject: [PATCH 071/281] changes --- .devcontainer/CUDA/Dockerfile | 24 ------------------------ .devcontainer/CUDA/devcontainer.json | 2 +- Dockerfile | 10 ++-------- 3 files changed, 3 insertions(+), 33 deletions(-) delete mode 100644 .devcontainer/CUDA/Dockerfile diff --git a/.devcontainer/CUDA/Dockerfile b/.devcontainer/CUDA/Dockerfile deleted file mode 100644 index d7df2de..0000000 --- a/.devcontainer/CUDA/Dockerfile +++ /dev/null @@ -1,24 +0,0 @@ -FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 - -RUN apt-get update -RUN apt-get upgrade -y -RUN apt-get install curl -y - -ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.9" - -# Optionally install the cmake for vcpkg -COPY scripts/packages-install/reinstall-cmake.sh /tmp/ - -RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ - chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ - fi \ - && rm -f /tmp/reinstall-cmake.sh - - - -# [Optional] Uncomment this section to install additional vcpkg ports. -# RUN su vscode -c "${VCPKG_ROOT}/vcpkg install " - -# [Optional] Uncomment this section to install additional packages. -# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ -# && apt-get -y install --no-install-recommends diff --git a/.devcontainer/CUDA/devcontainer.json b/.devcontainer/CUDA/devcontainer.json index a59e314..6317fac 100644 --- a/.devcontainer/CUDA/devcontainer.json +++ b/.devcontainer/CUDA/devcontainer.json @@ -4,7 +4,7 @@ "name": "CUDA", "build": { "context": "../..", - "dockerfile": "Dockerfile" + "dockerfile": "../../Dockerfile" }, "runArgs": [ diff --git a/Dockerfile b/Dockerfile index aadef0c..24fb98a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,15 +22,9 @@ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ # RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ # && apt-get -y install --no-install-recommends -WORKDIR /tensor-array -COPY src/ ./src/ -COPY CMakeLists.txt ./ -COPY Config.cmake.in ./ -WORKDIR /tensor-array - -WORKDIR /tensor-array/build +WORKDIR build RUN cmake .. RUN make install -WORKDIR /tensor-array +WORKDIR . From 61bda4cddc6caf62ea25bccc3800a50a27e004aa Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 25 Jun 2025 18:45:23 +0000 Subject: [PATCH 072/281] test 110 --- .devcontainer/CUDA/Dockerfile | 24 ++++++++++++++++++++++++ .devcontainer/CUDA/devcontainer.json | 2 +- Dockerfile | 10 ++++++++-- 3 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 .devcontainer/CUDA/Dockerfile diff --git a/.devcontainer/CUDA/Dockerfile b/.devcontainer/CUDA/Dockerfile new file mode 100644 index 0000000..d7df2de --- /dev/null +++ b/.devcontainer/CUDA/Dockerfile @@ -0,0 +1,24 @@ +FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 + +RUN apt-get update +RUN apt-get upgrade -y +RUN apt-get install curl -y + +ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.9" + +# Optionally install the cmake for vcpkg +COPY scripts/packages-install/reinstall-cmake.sh /tmp/ + +RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ + chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ + fi \ + && rm -f /tmp/reinstall-cmake.sh + + + +# [Optional] Uncomment this section to install additional vcpkg ports. +# RUN su vscode -c "${VCPKG_ROOT}/vcpkg install " + +# [Optional] Uncomment this section to install additional packages. +# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ +# && apt-get -y install --no-install-recommends diff --git a/.devcontainer/CUDA/devcontainer.json b/.devcontainer/CUDA/devcontainer.json index 6317fac..a59e314 100644 --- a/.devcontainer/CUDA/devcontainer.json +++ b/.devcontainer/CUDA/devcontainer.json @@ -4,7 +4,7 @@ "name": "CUDA", "build": { "context": "../..", - "dockerfile": "../../Dockerfile" + "dockerfile": "Dockerfile" }, "runArgs": [ diff --git a/Dockerfile b/Dockerfile index 24fb98a..aa7ad9d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,9 +22,15 @@ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ # RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ # && apt-get -y install --no-install-recommends -WORKDIR build +WORKDIR /app/tensor-array +COPY src/ /src/ +COPY CMakeLists.txt / +COPY Config.cmake.in / +WORKDIR /app/tensor-array + +WORKDIR /tensor-array/build RUN cmake .. RUN make install -WORKDIR . +WORKDIR /app/tensor-array From 7608e8a73c7da138ee215bf0121e16a9f37cea42 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 26 Jun 2025 01:18:06 +0000 Subject: [PATCH 073/281] add app --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index aa7ad9d..4100674 100644 --- a/Dockerfile +++ b/Dockerfile @@ -28,7 +28,7 @@ COPY CMakeLists.txt / COPY Config.cmake.in / WORKDIR /app/tensor-array -WORKDIR /tensor-array/build +WORKDIR app/tensor-array/build RUN cmake .. RUN make install From 4586f425e83ef524208e8271cceb3cd1684aad59 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 26 Jun 2025 01:32:38 +0000 Subject: [PATCH 074/281] test 2 --- Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4100674..3bed946 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,9 +23,9 @@ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ # && apt-get -y install --no-install-recommends WORKDIR /app/tensor-array -COPY src/ /src/ -COPY CMakeLists.txt / -COPY Config.cmake.in / +COPY src/ /src +COPY CMakeLists.txt . +COPY Config.cmake.in . WORKDIR /app/tensor-array WORKDIR app/tensor-array/build From 48685f945cdc8554f0cb874e32102a05c676b5c5 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 26 Jun 2025 01:37:30 +0000 Subject: [PATCH 075/281] test 3 --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3bed946..b76ec29 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,12 +23,12 @@ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ # && apt-get -y install --no-install-recommends WORKDIR /app/tensor-array -COPY src/ /src +COPY src/ src/ COPY CMakeLists.txt . COPY Config.cmake.in . WORKDIR /app/tensor-array -WORKDIR app/tensor-array/build +WORKDIR build RUN cmake .. RUN make install From a237efdce53e5138c388e2335a085d6b5cc2f0b8 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 26 Jun 2025 15:46:26 +0700 Subject: [PATCH 076/281] Delete .github/workflows/docker-publish.yml --- .github/workflows/docker-publish.yml | 98 ---------------------------- 1 file changed, 98 deletions(-) delete mode 100644 .github/workflows/docker-publish.yml diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml deleted file mode 100644 index 7d6200b..0000000 --- a/.github/workflows/docker-publish.yml +++ /dev/null @@ -1,98 +0,0 @@ -name: Docker - -# This workflow uses actions that are not certified by GitHub. -# They are provided by a third-party and are governed by -# separate terms of service, privacy policy, and support -# documentation. - -on: - schedule: - - cron: '32 5 * * *' - push: - branches: [ "master" ] - # Publish semver tags as releases. - tags: [ 'v*.*.*' ] - pull_request: - branches: [ "master" ] - -env: - # Use docker.io for Docker Hub if empty - REGISTRY: ghcr.io - # github.repository as / - IMAGE_NAME: ${{ github.repository }} - - -jobs: - build: - - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - # This is used to complete the identity challenge - # with sigstore/fulcio when running outside of PRs. - id-token: write - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - # Install the cosign tool except on PR - # https://github.com/sigstore/cosign-installer - - name: Install cosign - if: github.event_name != 'pull_request' - uses: sigstore/cosign-installer@6e04d228eb30da1757ee4e1dd75a0ec73a653e06 #v3.1.1 - with: - cosign-release: 'v2.1.1' - - # Set up BuildKit Docker container builder to be able to build - # multi-platform images and export cache - # https://github.com/docker/setup-buildx-action - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # v3.0.0 - - # Login against a Docker registry except on PR - # https://github.com/docker/login-action - - name: Log into registry ${{ env.REGISTRY }} - if: github.event_name != 'pull_request' - uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - # Extract metadata (tags, labels) for Docker - # https://github.com/docker/metadata-action - - name: Extract Docker metadata - id: meta - uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934 # v5.0.0 - with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - - # Build and push Docker image with Buildx (don't push on PR) - # https://github.com/docker/build-push-action - - name: Build and push Docker image - id: build-and-push - uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09 # v5.0.0 - with: - context: . - push: ${{ github.event_name != 'pull_request' }} - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha - cache-to: type=gha,mode=max - - # Sign the resulting Docker image digest except on PRs. - # This will only write to the public Rekor transparency log when the Docker - # repository is public to avoid leaking data. If you would like to publish - # transparency data even for private images, pass --force to cosign below. - # https://github.com/sigstore/cosign - - name: Sign the published Docker image - if: ${{ github.event_name != 'pull_request' }} - env: - # https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#using-an-intermediate-environment-variable - TAGS: ${{ steps.meta.outputs.tags }} - DIGEST: ${{ steps.build-and-push.outputs.digest }} - # This step uses the identity token to provision an ephemeral certificate - # against the sigstore community Fulcio instance. - run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST} From 75f8d96d6fe33110aae26c7526a330fd0609f76e Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 26 Jun 2025 15:46:41 +0700 Subject: [PATCH 077/281] Create docker-publish.yml --- .github/workflows/docker-publish.yml | 98 ++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 .github/workflows/docker-publish.yml diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 0000000..235a534 --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,98 @@ +name: Docker + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +on: + schedule: + - cron: '32 2 * * *' + push: + branches: [ "master" ] + # Publish semver tags as releases. + tags: [ 'v*.*.*' ] + pull_request: + branches: [ "master" ] + +env: + # Use docker.io for Docker Hub if empty + REGISTRY: ghcr.io + # github.repository as / + IMAGE_NAME: ${{ github.repository }} + + +jobs: + build: + + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + # This is used to complete the identity challenge + # with sigstore/fulcio when running outside of PRs. + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Install the cosign tool except on PR + # https://github.com/sigstore/cosign-installer + - name: Install cosign + if: github.event_name != 'pull_request' + uses: sigstore/cosign-installer@59acb6260d9c0ba8f4a2f9d9b48431a222b68e20 #v3.5.0 + with: + cosign-release: 'v2.2.4' + + # Set up BuildKit Docker container builder to be able to build + # multi-platform images and export cache + # https://github.com/docker/setup-buildx-action + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # v3.0.0 + + # Login against a Docker registry except on PR + # https://github.com/docker/login-action + - name: Log into registry ${{ env.REGISTRY }} + if: github.event_name != 'pull_request' + uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + # Extract metadata (tags, labels) for Docker + # https://github.com/docker/metadata-action + - name: Extract Docker metadata + id: meta + uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934 # v5.0.0 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + # Build and push Docker image with Buildx (don't push on PR) + # https://github.com/docker/build-push-action + - name: Build and push Docker image + id: build-and-push + uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09 # v5.0.0 + with: + context: . + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + + # Sign the resulting Docker image digest except on PRs. + # This will only write to the public Rekor transparency log when the Docker + # repository is public to avoid leaking data. If you would like to publish + # transparency data even for private images, pass --force to cosign below. + # https://github.com/sigstore/cosign + - name: Sign the published Docker image + if: ${{ github.event_name != 'pull_request' }} + env: + # https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#using-an-intermediate-environment-variable + TAGS: ${{ steps.meta.outputs.tags }} + DIGEST: ${{ steps.build-and-push.outputs.digest }} + # This step uses the identity token to provision an ephemeral certificate + # against the sigstore community Fulcio instance. + run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST} From 377edada27451be42a31f7e8458933b2fe22ffb2 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 26 Jun 2025 16:26:09 +0700 Subject: [PATCH 078/281] Create mirror-to-gitlab.yml --- .github/workflows/mirror-to-gitlab.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 .github/workflows/mirror-to-gitlab.yml diff --git a/.github/workflows/mirror-to-gitlab.yml b/.github/workflows/mirror-to-gitlab.yml new file mode 100644 index 0000000..ea1e60d --- /dev/null +++ b/.github/workflows/mirror-to-gitlab.yml @@ -0,0 +1,18 @@ +name: Mirroring + +on: [push, delete] + +jobs: + to_gitlab: + runs-on: ubuntu-latest + steps: # <-- must use actions/checkout before mirroring! + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - uses: pixta-dev/repository-mirroring-action@v1 + with: + target_repo_url: + git@gitlab.com:${{ github.repository }}.git + ssh_private_key: # <-- use 'secrets' to pass credential information. + ${{ secrets.GITLAB_SSH_PRIVATE_KEY }} + From 8ae5df83c5f7e1d4e369b4ecc89ef75005b6a367 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 26 Jun 2025 17:20:14 +0700 Subject: [PATCH 079/281] Delete .github/workflows/mirror-to-gitlab.yml --- .github/workflows/mirror-to-gitlab.yml | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 .github/workflows/mirror-to-gitlab.yml diff --git a/.github/workflows/mirror-to-gitlab.yml b/.github/workflows/mirror-to-gitlab.yml deleted file mode 100644 index ea1e60d..0000000 --- a/.github/workflows/mirror-to-gitlab.yml +++ /dev/null @@ -1,18 +0,0 @@ -name: Mirroring - -on: [push, delete] - -jobs: - to_gitlab: - runs-on: ubuntu-latest - steps: # <-- must use actions/checkout before mirroring! - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - uses: pixta-dev/repository-mirroring-action@v1 - with: - target_repo_url: - git@gitlab.com:${{ github.repository }}.git - ssh_private_key: # <-- use 'secrets' to pass credential information. - ${{ secrets.GITLAB_SSH_PRIVATE_KEY }} - From 4e0f3de349b4027c9130bcf8a252ee7464aa1aa9 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 26 Jun 2025 22:52:29 +0700 Subject: [PATCH 080/281] Update and rename docker-image.yml to docker-publish-d.yml --- .github/workflows/docker-image.yml | 18 ---------- .github/workflows/docker-publish-d.yml | 48 ++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 18 deletions(-) delete mode 100644 .github/workflows/docker-image.yml create mode 100644 .github/workflows/docker-publish-d.yml diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml deleted file mode 100644 index eac633f..0000000 --- a/.github/workflows/docker-image.yml +++ /dev/null @@ -1,18 +0,0 @@ -name: Docker Image CI - -on: - push: - branches: [ "master" ] - pull_request: - branches: [ "master" ] - -jobs: - - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - name: Build the Docker image - run: docker build . --file Dockerfile --tag my-image-name:$(date +%s) diff --git a/.github/workflows/docker-publish-d.yml b/.github/workflows/docker-publish-d.yml new file mode 100644 index 0000000..ecb85ca --- /dev/null +++ b/.github/workflows/docker-publish-d.yml @@ -0,0 +1,48 @@ +name: Publish Docker image + +on: + release: + type: [published] + +jobs: + + push_to_registry: + name: Push Docker image + runs-on: ubuntu-latest + permissions: + packages: write + contents: read + attestations: write + id-token: write + steps: + - name: Check out the repo + uses: actions/checkout@v4 + + - name: Docker Login + uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Docker Metadata action + id: meta + uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 + with: + image: noobwastaken/tensor-array + + - name: Build and push Docker images + id: push + uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 + with: + context: . + file: ./Dockerfile + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + - name: Attest Build Provenance + uses: actions/attest-build-provenance@v2 + with: + subject-name: index.docker.io/noobwastaken/tensor-array + subject-digest: ${{ steps.push.outputs.digest }} + push-to-registry: true From fd8b6153ac0e78a3901ec6a1af99a97f61b6ce00 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sun, 6 Jul 2025 20:02:50 +0700 Subject: [PATCH 081/281] changes --- .github/workflows/cmake-single-platform.yml | 4 ++-- .github/workflows/codeql.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cmake-single-platform.yml b/.github/workflows/cmake-single-platform.yml index 608b760..efc4338 100644 --- a/.github/workflows/cmake-single-platform.yml +++ b/.github/workflows/cmake-single-platform.yml @@ -30,8 +30,8 @@ jobs: temp: ${{ runner.temp }} cuda: ${{ matrix.cuda-version }} run: | - chmod +x ./scripts/actions/install-cuda-ubuntu.sh - ./scripts/actions/install-cuda-ubuntu.sh + chmod +x ${{github.workspace}}/scripts/actions/install-cuda-ubuntu.sh + ${{github.workspace}}/scripts/actions/install-cuda-ubuntu.sh shell: bash - name: Configure CMake diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 47fd3b7..d4e74da 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -53,8 +53,8 @@ jobs: temp: ${{ runner.temp }} cuda: ${{ matrix.cuda-version }} run: | - chmod +x ./scripts/actions/install-cuda-ubuntu.sh - ./scripts/actions/install-cuda-ubuntu.sh + chmod +x ${{github.workspace}}/scripts/actions/install-cuda-ubuntu.sh + ${{github.workspace}}/scripts/actions/install-cuda-ubuntu.sh shell: bash # Initializes the CodeQL tools for scanning. From 10d1556c4c1bd2601412735745142621c737614f Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Mon, 7 Jul 2025 13:21:52 +0700 Subject: [PATCH 082/281] test 01 --- .github/workflows/cmake-single-platform.yml | 3 +- scripts/actions/install-cuda-rhel.sh | 127 ++++++++++++++++++++ 2 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 scripts/actions/install-cuda-rhel.sh diff --git a/.github/workflows/cmake-single-platform.yml b/.github/workflows/cmake-single-platform.yml index efc4338..c7f43ed 100644 --- a/.github/workflows/cmake-single-platform.yml +++ b/.github/workflows/cmake-single-platform.yml @@ -17,11 +17,12 @@ jobs: # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac. # You can convert this to a matrix build if you need cross-platform coverage. # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix - runs-on: ubuntu-latest strategy: fail-fast: false matrix: cuda-version: [ "12.9" ] + os: [ "ubuntu-22.04", "ubuntu-20.04" ] + runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 diff --git a/scripts/actions/install-cuda-rhel.sh b/scripts/actions/install-cuda-rhel.sh new file mode 100644 index 0000000..ffa1be0 --- /dev/null +++ b/scripts/actions/install-cuda-rhel.sh @@ -0,0 +1,127 @@ +CUDA_PACKAGES_IN=( + "cuda-compiler" + "cuda-cudart" + "cuda-nvtx" + "cuda-nvrtc" + "libcurand-dev" + "libcublas-dev" + "libcufft-dev" + "cuda-cccl" +) + +function version_ge() { + [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 + [ "$(printf '%s\n' "$@" | sort -V | head -n 1)" == "$2" ] +} +# returns 0 (true) if a > b +function version_gt() { + [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 + [ "$1" = "$2" ] && return 1 || version_ge $1 $2 +} +# returns 0 (true) if a <= b +function version_le() { + [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 + [ "$(printf '%s\n' "$@" | sort -V | head -n 1)" == "$1" ] +} +# returns 0 (true) if a < b +function version_lt() { + [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 + [ "$1" = "$2" ] && return 1 || version_le $1 $2 +} + + +LINUX_ID=$(lsb_release -si) +LINUX_ID="${LINUX_ID,,}" + +LINUX_VERSION=$(lsb_release -sr) +LINUX_VERSION="${LINUX_VERSION//.}" + +LOCATION_TEMP=${temp} + +CUDA_VERSION_MAJOR_MINOR=${cuda} + +CUDA_MAJOR=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f1) +CUDA_MINOR=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f2) +CUDA_PATCH=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f3) + +CUDA_PACKAGES="" +for package in "${CUDA_PACKAGES_IN[@]}" +do : + # @todo This is not perfect. Should probably provide a separate list for diff versions + # cuda-compiler-X-Y if CUDA >= 9.1 else cuda-nvcc-X-Y + if [[ "${package}" == "cuda-nvcc" ]] && version_ge "$CUDA_VERSION_MAJOR_MINOR" "9.1" ; then + package="cuda-compiler" + elif [[ "${package}" == "cuda-compiler" ]] && version_lt "$CUDA_VERSION_MAJOR_MINOR" "9.1" ; then + package="cuda-nvcc" + # CUB/Thrust are packages in cuda-thrust in 11.3, but cuda-cccl in 11.4+ + elif [[ "${package}" == "cuda-thrust" || "${package}" == "cuda-cccl" ]]; then + # CUDA cuda-thrust >= 11.4 + if version_ge "$CUDA_VERSION_MAJOR_MINOR" "11.4" ; then + package="cuda-cccl" + # Use cuda-thrust > 11.2 + elif version_ge "$CUDA_VERSION_MAJOR_MINOR" "11.3" ; then + package="cuda-thrust" + # Do not include this pacakge < 11.3 + else + continue + fi + fi + # Build the full package name and append to the string. + CUDA_PACKAGES+=" ${package}-${CUDA_MAJOR}-${CUDA_MINOR}" +done +echo "CUDA_PACKAGES ${CUDA_PACKAGES}" + +CPU_ARCH="x86_64" +PIN_FILENAME="cuda-${LINUX_ID}${LINUX_VERSION}.pin" +PIN_URL="https://developer.download.nvidia.com/compute/cuda/repos/${LINUX_ID}${LINUX_VERSION}/${CPU_ARCH}/${PIN_FILENAME}" +KERYRING_PACKAGE_FILENAME="cuda-keyring_1.1-1_all.deb" +KEYRING_PACKAGE_URL="https://developer.download.nvidia.com/compute/cuda/repos/${LINUX_ID}${LINUX_VERSION}/${CPU_ARCH}/${KERYRING_PACKAGE_FILENAME}" +REPO_URL="https://developer.download.nvidia.com/compute/cuda/repos/${LINUX_ID}${LINUX_VERSION}/${CPU_ARCH}/" + +is_root=false +if (( $EUID == 0)); then + is_root=true +fi +# Find if sudo is available +has_sudo=false +if command -v sudo &> /dev/null ; then + has_sudo=true +fi +# Decide if we can proceed or not (root or sudo is required) and if so store whether sudo should be used or not. +if [ "$is_root" = false ] && [ "$has_sudo" = false ]; then + echo "Root or sudo is required. Aborting." + exit 1 +elif [ "$is_root" = false ] ; then + USE_SUDO=sudo +else + USE_SUDO= +fi + +echo "Adding CUDA Repository" +wget ${PIN_URL} +$USE_SUDO mv ${PIN_FILENAME} /etc/apt/preferences.d/cuda-repository-pin-600 +wget ${KEYRING_PACKAGE_URL} && ${USE_SUDO} dpkg -i ${KERYRING_PACKAGE_FILENAME} && rm ${KERYRING_PACKAGE_FILENAME} +$USE_SUDO dnf config-manager --add-repo ${REPO_URL} +$USE_SUDO dnf update + +$USE_SUDO dnf -y install ${CUDA_PACKAGES} + +if [[ $? -ne 0 ]]; then + echo "CUDA Installation Error." + exit 1 +fi + +CUDA_PATH=/usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR} +echo "CUDA_PATH=${CUDA_PATH}" +export CUDA_PATH=${CUDA_PATH} +export PATH="$CUDA_PATH/bin:$PATH" +export LD_LIBRARY_PATH="$CUDA_PATH/lib:$LD_LIBRARY_PATH" +export LD_LIBRARY_PATH="$CUDA_PATH/lib64:$LD_LIBRARY_PATH" + +if [[ $GITHUB_ACTIONS ]]; then + # Set paths for subsequent steps, using ${CUDA_PATH} + echo "Adding CUDA to CUDA_PATH, PATH and LD_LIBRARY_PATH" + echo "${CUDA_PATH}/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=${CUDA_PATH}/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=${CUDA_PATH}/lib64:${LD_LIBRARY_PATH}" >> $GITHUB_ENV +fi From 7c0b7e982aa012a8f986f2b95d7bbdae394071b1 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Mon, 7 Jul 2025 14:44:43 +0700 Subject: [PATCH 083/281] test --- .github/workflows/cmake-single-platform.yml | 15 +++++++++++++-- scripts/actions/install-cuda-rhel.sh | 11 +++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cmake-single-platform.yml b/.github/workflows/cmake-single-platform.yml index c7f43ed..9a204aa 100644 --- a/.github/workflows/cmake-single-platform.yml +++ b/.github/workflows/cmake-single-platform.yml @@ -21,12 +21,13 @@ jobs: fail-fast: false matrix: cuda-version: [ "12.9" ] - os: [ "ubuntu-22.04", "ubuntu-20.04" ] + os: [ "ubuntu-22.04", "debian-11", "rhel-8", "rhel-9" ] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 - - name: Run CUDA bash shell + - name: Run CUDA bash shell Ubuntu/Debian + if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'debian') env: temp: ${{ runner.temp }} cuda: ${{ matrix.cuda-version }} @@ -35,6 +36,16 @@ jobs: ${{github.workspace}}/scripts/actions/install-cuda-ubuntu.sh shell: bash + - name: Run CUDA bash shell RHEL + if: startsWith(matrix.os, 'rhel') + env: + temp: ${{ runner.temp }} + cuda: ${{ matrix.cuda-version }} + run: | + chmod +x ${{github.workspace}}/scripts/actions/install-cuda-rhel.sh + ${{github.workspace}}/scripts/actions/install-cuda-rhel.sh + shell: bash + - name: Configure CMake # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type diff --git a/scripts/actions/install-cuda-rhel.sh b/scripts/actions/install-cuda-rhel.sh index ffa1be0..1791656 100644 --- a/scripts/actions/install-cuda-rhel.sh +++ b/scripts/actions/install-cuda-rhel.sh @@ -36,6 +36,17 @@ LINUX_ID="${LINUX_ID,,}" LINUX_VERSION=$(lsb_release -sr) LINUX_VERSION="${LINUX_VERSION//.}" +LINUX_VERSION_MAJOR_MINOR=$(lsb_release -sr) +LINUX_MAJOR=$(echo "${LINUX_VERSION_MAJOR_MINOR}" | cut -d. -f1) +LINUX_MINOR=$(echo "${LINUX_VERSION_MAJOR_MINOR}" | cut -d. -f2) +LINUX_PATCH=$(echo "${LINUX_VERSION_MAJOR_MINOR}" | cut -d. -f3) + +if [[ -z "${LINUX_ID}" == "almalinux" ]]; then + echo "LINUX_ID: ${LINUX_ID} change to rhel" + LINUX_ID="rhel" + LINUX_VERSION=${LINUX_MAJOR} +fi + LOCATION_TEMP=${temp} CUDA_VERSION_MAJOR_MINOR=${cuda} From 166c70d92f285e5aebd8f86e36eba846a1a88f68 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Mon, 7 Jul 2025 14:49:46 +0700 Subject: [PATCH 084/281] test --- scripts/actions/install-cuda-rhel.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/actions/install-cuda-rhel.sh b/scripts/actions/install-cuda-rhel.sh index 1791656..60a4c63 100644 --- a/scripts/actions/install-cuda-rhel.sh +++ b/scripts/actions/install-cuda-rhel.sh @@ -41,7 +41,7 @@ LINUX_MAJOR=$(echo "${LINUX_VERSION_MAJOR_MINOR}" | cut -d. -f1) LINUX_MINOR=$(echo "${LINUX_VERSION_MAJOR_MINOR}" | cut -d. -f2) LINUX_PATCH=$(echo "${LINUX_VERSION_MAJOR_MINOR}" | cut -d. -f3) -if [[ -z "${LINUX_ID}" == "almalinux" ]]; then +if [[ "${LINUX_ID}" == "almalinux" ]]; then echo "LINUX_ID: ${LINUX_ID} change to rhel" LINUX_ID="rhel" LINUX_VERSION=${LINUX_MAJOR} From 4dfe714949a31258880a702ad1a94ca9f3d00c9a Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Mon, 7 Jul 2025 15:04:34 +0700 Subject: [PATCH 085/281] test --- .github/workflows/cmake-single-platform.yml | 2 +- scripts/actions/install-cuda-rhel.sh | 13 +++---------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/.github/workflows/cmake-single-platform.yml b/.github/workflows/cmake-single-platform.yml index 9a204aa..46141b2 100644 --- a/.github/workflows/cmake-single-platform.yml +++ b/.github/workflows/cmake-single-platform.yml @@ -21,7 +21,7 @@ jobs: fail-fast: false matrix: cuda-version: [ "12.9" ] - os: [ "ubuntu-22.04", "debian-11", "rhel-8", "rhel-9" ] + os: [ "ubuntu-22.04" ] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 diff --git a/scripts/actions/install-cuda-rhel.sh b/scripts/actions/install-cuda-rhel.sh index 60a4c63..537c497 100644 --- a/scripts/actions/install-cuda-rhel.sh +++ b/scripts/actions/install-cuda-rhel.sh @@ -41,7 +41,7 @@ LINUX_MAJOR=$(echo "${LINUX_VERSION_MAJOR_MINOR}" | cut -d. -f1) LINUX_MINOR=$(echo "${LINUX_VERSION_MAJOR_MINOR}" | cut -d. -f2) LINUX_PATCH=$(echo "${LINUX_VERSION_MAJOR_MINOR}" | cut -d. -f3) -if [[ "${LINUX_ID}" == "almalinux" ]]; then +if [[ "${LINUX_ID}" == "almalinux" || "${LINUX_ID}" == "centos" || "${LINUX_ID}" == "oracle" ]]; then echo "LINUX_ID: ${LINUX_ID} change to rhel" LINUX_ID="rhel" LINUX_VERSION=${LINUX_MAJOR} @@ -83,11 +83,7 @@ done echo "CUDA_PACKAGES ${CUDA_PACKAGES}" CPU_ARCH="x86_64" -PIN_FILENAME="cuda-${LINUX_ID}${LINUX_VERSION}.pin" -PIN_URL="https://developer.download.nvidia.com/compute/cuda/repos/${LINUX_ID}${LINUX_VERSION}/${CPU_ARCH}/${PIN_FILENAME}" -KERYRING_PACKAGE_FILENAME="cuda-keyring_1.1-1_all.deb" -KEYRING_PACKAGE_URL="https://developer.download.nvidia.com/compute/cuda/repos/${LINUX_ID}${LINUX_VERSION}/${CPU_ARCH}/${KERYRING_PACKAGE_FILENAME}" -REPO_URL="https://developer.download.nvidia.com/compute/cuda/repos/${LINUX_ID}${LINUX_VERSION}/${CPU_ARCH}/" +REPO_URL="https://developer.download.nvidia.com/compute/cuda/repos/${LINUX_ID}${LINUX_VERSION}/${CPU_ARCH}/cuda-${LINUX_ID}${LINUX_VERSION}.repo" is_root=false if (( $EUID == 0)); then @@ -109,11 +105,8 @@ else fi echo "Adding CUDA Repository" -wget ${PIN_URL} -$USE_SUDO mv ${PIN_FILENAME} /etc/apt/preferences.d/cuda-repository-pin-600 -wget ${KEYRING_PACKAGE_URL} && ${USE_SUDO} dpkg -i ${KERYRING_PACKAGE_FILENAME} && rm ${KERYRING_PACKAGE_FILENAME} $USE_SUDO dnf config-manager --add-repo ${REPO_URL} -$USE_SUDO dnf update +$USE_SUDO dnf clean all $USE_SUDO dnf -y install ${CUDA_PACKAGES} From db50ba5c68debe232b9695a3155cc71540739da8 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Mon, 7 Jul 2025 15:12:08 +0700 Subject: [PATCH 086/281] test --- scripts/actions/install-cuda-rhel.sh | 37 ++-------------------------- 1 file changed, 2 insertions(+), 35 deletions(-) diff --git a/scripts/actions/install-cuda-rhel.sh b/scripts/actions/install-cuda-rhel.sh index 537c497..f53ef6f 100644 --- a/scripts/actions/install-cuda-rhel.sh +++ b/scripts/actions/install-cuda-rhel.sh @@ -1,13 +1,4 @@ -CUDA_PACKAGES_IN=( - "cuda-compiler" - "cuda-cudart" - "cuda-nvtx" - "cuda-nvrtc" - "libcurand-dev" - "libcublas-dev" - "libcufft-dev" - "cuda-cccl" -) +#!/bin/bash function version_ge() { [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 @@ -55,31 +46,7 @@ CUDA_MAJOR=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f1) CUDA_MINOR=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f2) CUDA_PATCH=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f3) -CUDA_PACKAGES="" -for package in "${CUDA_PACKAGES_IN[@]}" -do : - # @todo This is not perfect. Should probably provide a separate list for diff versions - # cuda-compiler-X-Y if CUDA >= 9.1 else cuda-nvcc-X-Y - if [[ "${package}" == "cuda-nvcc" ]] && version_ge "$CUDA_VERSION_MAJOR_MINOR" "9.1" ; then - package="cuda-compiler" - elif [[ "${package}" == "cuda-compiler" ]] && version_lt "$CUDA_VERSION_MAJOR_MINOR" "9.1" ; then - package="cuda-nvcc" - # CUB/Thrust are packages in cuda-thrust in 11.3, but cuda-cccl in 11.4+ - elif [[ "${package}" == "cuda-thrust" || "${package}" == "cuda-cccl" ]]; then - # CUDA cuda-thrust >= 11.4 - if version_ge "$CUDA_VERSION_MAJOR_MINOR" "11.4" ; then - package="cuda-cccl" - # Use cuda-thrust > 11.2 - elif version_ge "$CUDA_VERSION_MAJOR_MINOR" "11.3" ; then - package="cuda-thrust" - # Do not include this pacakge < 11.3 - else - continue - fi - fi - # Build the full package name and append to the string. - CUDA_PACKAGES+=" ${package}-${CUDA_MAJOR}-${CUDA_MINOR}" -done +CUDA_PACKAGES="cuda-toolkit-${CUDA_MAJOR}-${CUDA_MINOR}" echo "CUDA_PACKAGES ${CUDA_PACKAGES}" CPU_ARCH="x86_64" From fb29585b33b31f7e57adfcd078bd64a6f483ea99 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Mon, 7 Jul 2025 15:22:33 +0700 Subject: [PATCH 087/281] test --- scripts/actions/install-cuda-rhel.sh | 38 ++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/scripts/actions/install-cuda-rhel.sh b/scripts/actions/install-cuda-rhel.sh index f53ef6f..7ba08b4 100644 --- a/scripts/actions/install-cuda-rhel.sh +++ b/scripts/actions/install-cuda-rhel.sh @@ -1,4 +1,14 @@ -#!/bin/bash +CUDA_PACKAGES_IN=( + "cuda-compiler" + "cuda-cudart" + "cuda-nvtx" + "cuda-nvrtc" + "libcurand-dev" + "libcublas-dev" + "libcufft-dev" + "cuda-cccl" +) + function version_ge() { [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 @@ -46,7 +56,31 @@ CUDA_MAJOR=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f1) CUDA_MINOR=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f2) CUDA_PATCH=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f3) -CUDA_PACKAGES="cuda-toolkit-${CUDA_MAJOR}-${CUDA_MINOR}" +CUDA_PACKAGES="" +for package in "${CUDA_PACKAGES_IN[@]}" +do : + # @todo This is not perfect. Should probably provide a separate list for diff versions + # cuda-compiler-X-Y if CUDA >= 9.1 else cuda-nvcc-X-Y + if [[ "${package}" == "cuda-nvcc" ]] && version_ge "$CUDA_VERSION_MAJOR_MINOR" "9.1" ; then + package="cuda-compiler" + elif [[ "${package}" == "cuda-compiler" ]] && version_lt "$CUDA_VERSION_MAJOR_MINOR" "9.1" ; then + package="cuda-nvcc" + # CUB/Thrust are packages in cuda-thrust in 11.3, but cuda-cccl in 11.4+ + elif [[ "${package}" == "cuda-thrust" || "${package}" == "cuda-cccl" ]]; then + # CUDA cuda-thrust >= 11.4 + if version_ge "$CUDA_VERSION_MAJOR_MINOR" "11.4" ; then + package="cuda-cccl" + # Use cuda-thrust > 11.2 + elif version_ge "$CUDA_VERSION_MAJOR_MINOR" "11.3" ; then + package="cuda-thrust" + # Do not include this pacakge < 11.3 + else + continue + fi + fi + # Build the full package name and append to the string. + CUDA_PACKAGES+=" ${package}-${CUDA_MAJOR}-${CUDA_MINOR}" +done echo "CUDA_PACKAGES ${CUDA_PACKAGES}" CPU_ARCH="x86_64" From 25bf9ae421302f762a5058c8d22b241e1c0189d2 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Mon, 7 Jul 2025 15:28:08 +0700 Subject: [PATCH 088/281] test --- scripts/actions/install-cuda-rhel.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/actions/install-cuda-rhel.sh b/scripts/actions/install-cuda-rhel.sh index 7ba08b4..5372216 100644 --- a/scripts/actions/install-cuda-rhel.sh +++ b/scripts/actions/install-cuda-rhel.sh @@ -3,9 +3,9 @@ CUDA_PACKAGES_IN=( "cuda-cudart" "cuda-nvtx" "cuda-nvrtc" - "libcurand-dev" - "libcublas-dev" - "libcufft-dev" + "libcurand" + "libcublas" + "libcufft" "cuda-cccl" ) From cf736ef3ee7d9d93a364f4411873eaf5fdd2b9a7 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Mon, 7 Jul 2025 20:42:27 +0700 Subject: [PATCH 089/281] test --- scripts/actions/install-cuda-rhel.sh | 4 ++-- scripts/actions/install-cuda-ubuntu.sh | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/scripts/actions/install-cuda-rhel.sh b/scripts/actions/install-cuda-rhel.sh index 5372216..a8f3c62 100644 --- a/scripts/actions/install-cuda-rhel.sh +++ b/scripts/actions/install-cuda-rhel.sh @@ -118,6 +118,7 @@ fi CUDA_PATH=/usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR} echo "CUDA_PATH=${CUDA_PATH}" +$USE_SUDO ln -s ${CUDA_PATH} /usr/local/cuda export CUDA_PATH=${CUDA_PATH} export PATH="$CUDA_PATH/bin:$PATH" export LD_LIBRARY_PATH="$CUDA_PATH/lib:$LD_LIBRARY_PATH" @@ -127,6 +128,5 @@ if [[ $GITHUB_ACTIONS ]]; then # Set paths for subsequent steps, using ${CUDA_PATH} echo "Adding CUDA to CUDA_PATH, PATH and LD_LIBRARY_PATH" echo "${CUDA_PATH}/bin" >> $GITHUB_PATH - echo "LD_LIBRARY_PATH=${CUDA_PATH}/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV - echo "LD_LIBRARY_PATH=${CUDA_PATH}/lib64:${LD_LIBRARY_PATH}" >> $GITHUB_ENV + echo "${CUDA_PATH}/lib:${CUDA_PATH}/lib64" >> $GITHUB_ENV fi diff --git a/scripts/actions/install-cuda-ubuntu.sh b/scripts/actions/install-cuda-ubuntu.sh index f45dcf4..f61f880 100644 --- a/scripts/actions/install-cuda-ubuntu.sh +++ b/scripts/actions/install-cuda-ubuntu.sh @@ -122,6 +122,5 @@ if [[ $GITHUB_ACTIONS ]]; then # Set paths for subsequent steps, using ${CUDA_PATH} echo "Adding CUDA to CUDA_PATH, PATH and LD_LIBRARY_PATH" echo "${CUDA_PATH}/bin" >> $GITHUB_PATH - echo "LD_LIBRARY_PATH=${CUDA_PATH}/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV - echo "LD_LIBRARY_PATH=${CUDA_PATH}/lib64:${LD_LIBRARY_PATH}" >> $GITHUB_ENV + echo "${CUDA_PATH}/lib:${CUDA_PATH}/lib64" >> $GITHUB_ENV fi From 9ee5068c39c449c6dbce960af6e0ac6e68a42a22 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Mon, 7 Jul 2025 20:51:13 +0700 Subject: [PATCH 090/281] test --- scripts/actions/install-cuda-rhel.sh | 8 ++++---- scripts/actions/install-cuda-ubuntu.sh | 7 ++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/scripts/actions/install-cuda-rhel.sh b/scripts/actions/install-cuda-rhel.sh index a8f3c62..89c9c21 100644 --- a/scripts/actions/install-cuda-rhel.sh +++ b/scripts/actions/install-cuda-rhel.sh @@ -118,15 +118,15 @@ fi CUDA_PATH=/usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR} echo "CUDA_PATH=${CUDA_PATH}" -$USE_SUDO ln -s ${CUDA_PATH} /usr/local/cuda export CUDA_PATH=${CUDA_PATH} export PATH="$CUDA_PATH/bin:$PATH" -export LD_LIBRARY_PATH="$CUDA_PATH/lib:$LD_LIBRARY_PATH" -export LD_LIBRARY_PATH="$CUDA_PATH/lib64:$LD_LIBRARY_PATH" +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_PATH/lib" +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_PATH/lib64" if [[ $GITHUB_ACTIONS ]]; then # Set paths for subsequent steps, using ${CUDA_PATH} echo "Adding CUDA to CUDA_PATH, PATH and LD_LIBRARY_PATH" echo "${CUDA_PATH}/bin" >> $GITHUB_PATH - echo "${CUDA_PATH}/lib:${CUDA_PATH}/lib64" >> $GITHUB_ENV + echo "${CUDA_PATH}/lib:$GITHUB_ENV" >> $GITHUB_ENV + echo "${CUDA_PATH}/lib64:$GITHUB_ENV" >> $GITHUB_ENV fi diff --git a/scripts/actions/install-cuda-ubuntu.sh b/scripts/actions/install-cuda-ubuntu.sh index f61f880..29d87f5 100644 --- a/scripts/actions/install-cuda-ubuntu.sh +++ b/scripts/actions/install-cuda-ubuntu.sh @@ -115,12 +115,13 @@ CUDA_PATH=/usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR} echo "CUDA_PATH=${CUDA_PATH}" export CUDA_PATH=${CUDA_PATH} export PATH="$CUDA_PATH/bin:$PATH" -export LD_LIBRARY_PATH="$CUDA_PATH/lib:$LD_LIBRARY_PATH" -export LD_LIBRARY_PATH="$CUDA_PATH/lib64:$LD_LIBRARY_PATH" +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_PATH/lib" +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_PATH/lib64" if [[ $GITHUB_ACTIONS ]]; then # Set paths for subsequent steps, using ${CUDA_PATH} echo "Adding CUDA to CUDA_PATH, PATH and LD_LIBRARY_PATH" echo "${CUDA_PATH}/bin" >> $GITHUB_PATH - echo "${CUDA_PATH}/lib:${CUDA_PATH}/lib64" >> $GITHUB_ENV + echo "${CUDA_PATH}/lib:$GITHUB_ENV" >> $GITHUB_ENV + echo "${CUDA_PATH}/lib64:$GITHUB_ENV" >> $GITHUB_ENV fi From 5ece0c3fd873d6b421b6dd07106b6c2bb0786d82 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Mon, 7 Jul 2025 21:12:11 +0700 Subject: [PATCH 091/281] test --- scripts/actions/install-cuda-rhel.sh | 7 ++++--- scripts/actions/install-cuda-ubuntu.sh | 13 +++++++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/scripts/actions/install-cuda-rhel.sh b/scripts/actions/install-cuda-rhel.sh index 89c9c21..4e0ea27 100644 --- a/scripts/actions/install-cuda-rhel.sh +++ b/scripts/actions/install-cuda-rhel.sh @@ -126,7 +126,8 @@ export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_PATH/lib64" if [[ $GITHUB_ACTIONS ]]; then # Set paths for subsequent steps, using ${CUDA_PATH} echo "Adding CUDA to CUDA_PATH, PATH and LD_LIBRARY_PATH" - echo "${CUDA_PATH}/bin" >> $GITHUB_PATH - echo "${CUDA_PATH}/lib:$GITHUB_ENV" >> $GITHUB_ENV - echo "${CUDA_PATH}/lib64:$GITHUB_ENV" >> $GITHUB_ENV + echo "${CUDA_PATH}" >> $GITHUB_PATH + echo "PATH=$PATH:$CUDA_PATH/bin" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${CUDA_PATH}/lib" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${CUDA_PATH}/lib64" >> $GITHUB_ENV fi diff --git a/scripts/actions/install-cuda-ubuntu.sh b/scripts/actions/install-cuda-ubuntu.sh index 29d87f5..5e63fc6 100644 --- a/scripts/actions/install-cuda-ubuntu.sh +++ b/scripts/actions/install-cuda-ubuntu.sh @@ -114,14 +114,15 @@ fi CUDA_PATH=/usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR} echo "CUDA_PATH=${CUDA_PATH}" export CUDA_PATH=${CUDA_PATH} -export PATH="$CUDA_PATH/bin:$PATH" -export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_PATH/lib" -export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_PATH/lib64" +export PATH="$PATH:$CUDA_PATH/bin" +export LD_LIBRARY_PATH="$CUDA_PATH/lib:$LD_LIBRARY_PATH" +export LD_LIBRARY_PATH="$CUDA_PATH/lib64:$LD_LIBRARY_PATH" if [[ $GITHUB_ACTIONS ]]; then # Set paths for subsequent steps, using ${CUDA_PATH} echo "Adding CUDA to CUDA_PATH, PATH and LD_LIBRARY_PATH" - echo "${CUDA_PATH}/bin" >> $GITHUB_PATH - echo "${CUDA_PATH}/lib:$GITHUB_ENV" >> $GITHUB_ENV - echo "${CUDA_PATH}/lib64:$GITHUB_ENV" >> $GITHUB_ENV + echo "${CUDA_PATH}" >> $GITHUB_PATH + echo "PATH=$PATH:$CUDA_PATH/bin" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${CUDA_PATH}/lib" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${CUDA_PATH}/lib64" >> $GITHUB_ENV fi From b4dbc1b37b9d26334ece0253b02cb18b6cea76eb Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 8 Jul 2025 19:41:17 +0700 Subject: [PATCH 092/281] test --- scripts/actions/install-cuda-rhel.sh | 7 ++++--- scripts/actions/install-cuda-ubuntu.sh | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/actions/install-cuda-rhel.sh b/scripts/actions/install-cuda-rhel.sh index 4e0ea27..84c97f1 100644 --- a/scripts/actions/install-cuda-rhel.sh +++ b/scripts/actions/install-cuda-rhel.sh @@ -1,12 +1,13 @@ CUDA_PACKAGES_IN=( "cuda-compiler" + "cuda-nvcc" "cuda-cudart" "cuda-nvtx" "cuda-nvrtc" - "libcurand" - "libcublas" - "libcufft" "cuda-cccl" + "libcurand-devel" + "libcublas-devel" + "libcufft-devel" ) diff --git a/scripts/actions/install-cuda-ubuntu.sh b/scripts/actions/install-cuda-ubuntu.sh index 5e63fc6..16d9a3c 100644 --- a/scripts/actions/install-cuda-ubuntu.sh +++ b/scripts/actions/install-cuda-ubuntu.sh @@ -3,10 +3,10 @@ CUDA_PACKAGES_IN=( "cuda-cudart" "cuda-nvtx" "cuda-nvrtc" + "cuda-cccl" "libcurand-dev" "libcublas-dev" "libcufft-dev" - "cuda-cccl" ) function version_ge() { From 97fbfb3cfcd68b6bb9c526fb2088f110e8f55ec1 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 8 Jul 2025 19:41:37 +0700 Subject: [PATCH 093/281] test --- scripts/actions/install-cuda-rhel.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/actions/install-cuda-rhel.sh b/scripts/actions/install-cuda-rhel.sh index 84c97f1..137e594 100644 --- a/scripts/actions/install-cuda-rhel.sh +++ b/scripts/actions/install-cuda-rhel.sh @@ -1,6 +1,5 @@ CUDA_PACKAGES_IN=( "cuda-compiler" - "cuda-nvcc" "cuda-cudart" "cuda-nvtx" "cuda-nvrtc" From dadf3f2b9e7e8b5333a8b84d6137863aa8550ac0 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 9 Jul 2025 03:04:09 +0700 Subject: [PATCH 094/281] Update install-cuda-rhel.sh --- scripts/actions/install-cuda-rhel.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/actions/install-cuda-rhel.sh b/scripts/actions/install-cuda-rhel.sh index 137e594..c59385e 100644 --- a/scripts/actions/install-cuda-rhel.sh +++ b/scripts/actions/install-cuda-rhel.sh @@ -106,10 +106,10 @@ else fi echo "Adding CUDA Repository" -$USE_SUDO dnf config-manager --add-repo ${REPO_URL} -$USE_SUDO dnf clean all +$USE_SUDO yum-config-manager --add-repo ${REPO_URL} +$USE_SUDO yum clean all -$USE_SUDO dnf -y install ${CUDA_PACKAGES} +$USE_SUDO yum install -y ${CUDA_PACKAGES} if [[ $? -ne 0 ]]; then echo "CUDA Installation Error." From 128b49366b0e9f749ebaf9850d7225904fb1b6d5 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 9 Jul 2025 03:56:45 +0000 Subject: [PATCH 095/281] test --- src/tensor_array/core/CMakeLists.txt | 34 ++++++++++++++++------------ 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index 1c109a4..8f80650 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -9,20 +9,6 @@ install( enable_language(CUDA) -include(FindCUDAToolkit) - -set(CMAKE_CUDA_ARCHITECTURES 52 75 89) -set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) -list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") - -file(GLOB TensorArray_src "*.cc" "*.cu") - -# file(MAKE_DIRECTORY "include/tensor_array/core") - -add_library(tensorarray_core SHARED ${TensorArray_src}) - -target_link_libraries(tensorarray_core PRIVATE CUDA::cublas) - set_property(TARGET tensorarray_core PROPERTY C_STANDARD 11) set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) @@ -31,9 +17,29 @@ set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) +find_package(CUDAToolkit) +if (CUDAToolkit_FOUND) +set(CMAKE_CUDA_ARCHITECTURES 52 75 89) +set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) +list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) +endif() + +if (CUDAToolkit_FOUND) +file(GLOB TensorArray_src "*.cc" "*.cu") +else() +file(GLOB TensorArray_src "*.cc") +endif() + +# file(MAKE_DIRECTORY "include/tensor_array/core") + +add_library(tensorarray_core SHARED ${TensorArray_src}) + +if (CUDAToolkit_FOUND) +target_link_libraries(tensorarray_core PRIVATE CUDA::cudart CUDA::cudart_static CUDA::cublas) +endif() install( TARGETS tensorarray_core From c96c8374781a70315251a6fb788fcbd6e79b0eaf Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 9 Jul 2025 03:56:58 +0000 Subject: [PATCH 096/281] test --- src/tensor_array/core/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index 8f80650..d87b3f6 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -18,6 +18,7 @@ set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) find_package(CUDAToolkit) + if (CUDAToolkit_FOUND) set(CMAKE_CUDA_ARCHITECTURES 52 75 89) set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) From 699bf476a3cc3f2c36520817ab3405b6a2bdce34 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 9 Jul 2025 04:05:13 +0000 Subject: [PATCH 097/281] test --- scripts/actions/install-cuda-rhel.sh | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/scripts/actions/install-cuda-rhel.sh b/scripts/actions/install-cuda-rhel.sh index c59385e..b03e05d 100644 --- a/scripts/actions/install-cuda-rhel.sh +++ b/scripts/actions/install-cuda-rhel.sh @@ -42,9 +42,16 @@ LINUX_MAJOR=$(echo "${LINUX_VERSION_MAJOR_MINOR}" | cut -d. -f1) LINUX_MINOR=$(echo "${LINUX_VERSION_MAJOR_MINOR}" | cut -d. -f2) LINUX_PATCH=$(echo "${LINUX_VERSION_MAJOR_MINOR}" | cut -d. -f3) +YUM_PACKAGE_MANAGER="yum" +YUM_CONFIG_MANAGER="yum-config-manager" + if [[ "${LINUX_ID}" == "almalinux" || "${LINUX_ID}" == "centos" || "${LINUX_ID}" == "oracle" ]]; then echo "LINUX_ID: ${LINUX_ID} change to rhel" LINUX_ID="rhel" + if [[ "${LINUX_MAJOR}" -ge "8" ]]; then + YUM_PACKAGE_MANAGER="dnf" + YUM_CONFIG_MANAGER="dnf config-manager" + fi LINUX_VERSION=${LINUX_MAJOR} fi @@ -106,10 +113,10 @@ else fi echo "Adding CUDA Repository" -$USE_SUDO yum-config-manager --add-repo ${REPO_URL} -$USE_SUDO yum clean all +$USE_SUDO $YUM_CONFIG_MANAGER --add-repo ${REPO_URL} +$USE_SUDO $YUM_PACKAGE_MANAGER clean all -$USE_SUDO yum install -y ${CUDA_PACKAGES} +$USE_SUDO $YUM_PACKAGE_MANAGER install -y ${CUDA_PACKAGES} if [[ $? -ne 0 ]]; then echo "CUDA Installation Error." From 0d26e9847f56d4c513befd2e1965a74bb3764c1e Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 9 Jul 2025 04:15:14 +0000 Subject: [PATCH 098/281] test --- src/tensor_array/core/CMakeLists.txt | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index d87b3f6..3fd38e5 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -9,23 +9,12 @@ install( enable_language(CUDA) -set_property(TARGET tensorarray_core PROPERTY C_STANDARD 11) -set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) - -set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) -set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) - find_package(CUDAToolkit) if (CUDAToolkit_FOUND) set(CMAKE_CUDA_ARCHITECTURES 52 75 89) set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") -set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) -set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) endif() if (CUDAToolkit_FOUND) @@ -34,6 +23,20 @@ else() file(GLOB TensorArray_src "*.cc") endif() +set_property(TARGET tensorarray_core PROPERTY C_STANDARD 11) +set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) + +set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) +set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) + +if (CUDAToolkit_FOUND) +set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) +set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) +endif() + # file(MAKE_DIRECTORY "include/tensor_array/core") add_library(tensorarray_core SHARED ${TensorArray_src}) From b6a6405362a5045259f49dd3da6e66e9de441bfb Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 9 Jul 2025 11:18:48 +0700 Subject: [PATCH 099/281] Update CMakeLists.txt --- src/tensor_array/core/CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor_array/core/CMakeLists.txt index 3fd38e5..d2e0e76 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor_array/core/CMakeLists.txt @@ -23,6 +23,10 @@ else() file(GLOB TensorArray_src "*.cc") endif() +# file(MAKE_DIRECTORY "include/tensor_array/core") + +add_library(tensorarray_core SHARED ${TensorArray_src}) + set_property(TARGET tensorarray_core PROPERTY C_STANDARD 11) set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) @@ -37,10 +41,6 @@ set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) endif() -# file(MAKE_DIRECTORY "include/tensor_array/core") - -add_library(tensorarray_core SHARED ${TensorArray_src}) - if (CUDAToolkit_FOUND) target_link_libraries(tensorarray_core PRIVATE CUDA::cudart CUDA::cudart_static CUDA::cublas) endif() From 04e4819da358b83818a3e1fc312a284fb9248c64 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 9 Jul 2025 05:17:38 +0000 Subject: [PATCH 100/281] test --- scripts/actions/install-cuda-rhel.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/actions/install-cuda-rhel.sh b/scripts/actions/install-cuda-rhel.sh index b03e05d..3e4aac9 100644 --- a/scripts/actions/install-cuda-rhel.sh +++ b/scripts/actions/install-cuda-rhel.sh @@ -1,6 +1,6 @@ CUDA_PACKAGES_IN=( "cuda-compiler" - "cuda-cudart" + "cuda-cudart-devel" "cuda-nvtx" "cuda-nvrtc" "cuda-cccl" From 55ce8de63f1adf1ef117704c0737c2b71664584a Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 15 Jul 2025 12:39:28 +0000 Subject: [PATCH 101/281] changes folder name --- CMakeLists.txt | 20 +++++++++---------- README.md | 15 ++++++++++++++ scripts/actions/install-cuda-rhel.sh | 2 +- scripts/actions/install-cuda-ubuntu.sh | 4 ++-- .../core/CMakeLists.txt | 6 +++--- .../core/data_type_wrapper.cc | 0 .../core/data_type_wrapper.hh | 0 .../core/devices.cc | 0 .../core/devices.hh | 0 .../core/extern_type_map.cc | 0 .../core/extern_type_map.hh | 0 .../core/initializer_wrapper.hh | 0 .../core/tensor.cc | 0 .../core/tensor.cu | 0 .../core/tensor.hh | 0 .../core/tensor_blas.cc | 0 .../core/tensor_cast.cu | 0 .../core/tensor_convolution.cc | 0 .../core/tensor_convolution.cu | 0 .../core/tensor_reduce.cu | 0 .../core/tensorarray.hh | 0 .../core/tensorbase.cc | 0 .../core/tensorbase.hh | 0 .../layers/CMakeLists.txt | 6 +++--- .../layers/attention.cc | 0 .../layers/attention.hh | 0 .../layers/convolution.cc | 0 .../layers/convolution.hh | 0 .../layers/layer_any.cc | 0 .../layers/layer_any.hh | 0 .../layers/layer_holder.hh | 0 .../layers/layer_impl.cc | 0 .../layers/layer_impl.hh | 2 +- .../layers/layer_utility.cc | 0 .../layers/layer_utility.hh | 0 .../layers/linear.cc | 0 .../layers/linear.hh | 0 .../layers/normalization.cc | 0 .../layers/normalization.hh | 0 .../layers/recurrent.cc | 0 .../layers/recurrent.hh | 0 .../layers/sequential.cc | 0 .../layers/sequential.hh | 0 .../layers/transformer.cc | 0 .../layers/transformer.hh | 0 45 files changed, 35 insertions(+), 20 deletions(-) rename src/{tensor_array => tensor-array}/core/CMakeLists.txt (89%) rename src/{tensor_array => tensor-array}/core/data_type_wrapper.cc (100%) rename src/{tensor_array => tensor-array}/core/data_type_wrapper.hh (100%) rename src/{tensor_array => tensor-array}/core/devices.cc (100%) rename src/{tensor_array => tensor-array}/core/devices.hh (100%) rename src/{tensor_array => tensor-array}/core/extern_type_map.cc (100%) rename src/{tensor_array => tensor-array}/core/extern_type_map.hh (100%) rename src/{tensor_array => tensor-array}/core/initializer_wrapper.hh (100%) rename src/{tensor_array => tensor-array}/core/tensor.cc (100%) rename src/{tensor_array => tensor-array}/core/tensor.cu (100%) rename src/{tensor_array => tensor-array}/core/tensor.hh (100%) rename src/{tensor_array => tensor-array}/core/tensor_blas.cc (100%) rename src/{tensor_array => tensor-array}/core/tensor_cast.cu (100%) rename src/{tensor_array => tensor-array}/core/tensor_convolution.cc (100%) rename src/{tensor_array => tensor-array}/core/tensor_convolution.cu (100%) rename src/{tensor_array => tensor-array}/core/tensor_reduce.cu (100%) rename src/{tensor_array => tensor-array}/core/tensorarray.hh (100%) rename src/{tensor_array => tensor-array}/core/tensorbase.cc (100%) rename src/{tensor_array => tensor-array}/core/tensorbase.hh (100%) rename src/{tensor_array => tensor-array}/layers/CMakeLists.txt (83%) rename src/{tensor_array => tensor-array}/layers/attention.cc (100%) rename src/{tensor_array => tensor-array}/layers/attention.hh (100%) rename src/{tensor_array => tensor-array}/layers/convolution.cc (100%) rename src/{tensor_array => tensor-array}/layers/convolution.hh (100%) rename src/{tensor_array => tensor-array}/layers/layer_any.cc (100%) rename src/{tensor_array => tensor-array}/layers/layer_any.hh (100%) rename src/{tensor_array => tensor-array}/layers/layer_holder.hh (100%) rename src/{tensor_array => tensor-array}/layers/layer_impl.cc (100%) rename src/{tensor_array => tensor-array}/layers/layer_impl.hh (98%) rename src/{tensor_array => tensor-array}/layers/layer_utility.cc (100%) rename src/{tensor_array => tensor-array}/layers/layer_utility.hh (100%) rename src/{tensor_array => tensor-array}/layers/linear.cc (100%) rename src/{tensor_array => tensor-array}/layers/linear.hh (100%) rename src/{tensor_array => tensor-array}/layers/normalization.cc (100%) rename src/{tensor_array => tensor-array}/layers/normalization.hh (100%) rename src/{tensor_array => tensor-array}/layers/recurrent.cc (100%) rename src/{tensor_array => tensor-array}/layers/recurrent.hh (100%) rename src/{tensor_array => tensor-array}/layers/sequential.cc (100%) rename src/{tensor_array => tensor-array}/layers/sequential.hh (100%) rename src/{tensor_array => tensor-array}/layers/transformer.cc (100%) rename src/{tensor_array => tensor-array}/layers/transformer.hh (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index e10cf74..2873cb5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,10 +3,10 @@ cmake_minimum_required(VERSION 3.18) project(TensorArray) include(GNUInstallDirs) -set(CMAKE_INSTALL_PREFIX ${PROJECT_SOURCE_DIR}) +# set(CMAKE_INSTALL_PREFIX ${PROJECT_SOURCE_DIR}) -add_subdirectory("src/tensor_array/core") -add_subdirectory("src/tensor_array/layers") +add_subdirectory("src/tensor-array/core") +add_subdirectory("src/tensor-array/layers") set(CPACK_PACKAGE_NAME "TensorArray") set(CPACK_PACKAGE_VENDOR "TensorArray-Creators") @@ -20,7 +20,7 @@ set(CPACK_PACKAGE_INSTALL_DIRECTORY "A machine learning libraries") install( EXPORT TensorArrayTargets - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/TensorArray + DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/cmake NAMESPACE TensorArray:: FILE TensorArrayTargets.cmake ) @@ -29,22 +29,22 @@ include(CMakePackageConfigHelpers) configure_package_config_file( "Config.cmake.in" "TensorArrayConfig.cmake" - INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/TensorArray + INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/cmake PATH_VARS CMAKE_INSTALL_LIBDIR CMAKE_INSTALL_INCLUDEDIR ) write_basic_package_version_file( - ${CMAKE_CURRENT_BINARY_DIR}/TensorArrayConfigVersion.cmake - VERSION 0.1.0 + ${CMAKE_CURRENT_BINARY_DIR}/tensor-array/TensorArrayConfigVersion.cmake + VERSION 0.2.0 COMPATIBILITY SameMajorVersion ) ### Install Config and ConfigVersion files install( FILES - ${CMAKE_CURRENT_BINARY_DIR}/TensorArrayConfig.cmake - ${CMAKE_CURRENT_BINARY_DIR}/TensorArrayConfigVersion.cmake - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/TensorArray + ${CMAKE_CURRENT_BINARY_DIR}/tensor-array/TensorArrayConfig.cmake + ${CMAKE_CURRENT_BINARY_DIR}/tensor-array/TensorArrayConfigVersion.cmake + DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/cmake ) include(CPack) diff --git a/README.md b/README.md index 92417d6..48469ea 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,22 @@ A C++ Tensor library that can be used to work with machine learning or deep lear Build your own neural network models with this library. +## Installing `Tensor-Array` +You need to clone repository by using [Git](https://git-scm.com/) + +You need to install `Tensor-Array` with [CMake](https://cmake.org/) + +```shell +git clone https://github.com/Tensor-Array/Tensor-Array.git +cd Tensor-Array +mkdir build +cd build +cmake .. +cmake --build . +cmake --install . +cd .. +``` ## Why this repository named `Tensor-Array` diff --git a/scripts/actions/install-cuda-rhel.sh b/scripts/actions/install-cuda-rhel.sh index 3e4aac9..b995f77 100644 --- a/scripts/actions/install-cuda-rhel.sh +++ b/scripts/actions/install-cuda-rhel.sh @@ -126,7 +126,7 @@ fi CUDA_PATH=/usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR} echo "CUDA_PATH=${CUDA_PATH}" export CUDA_PATH=${CUDA_PATH} -export PATH="$CUDA_PATH/bin:$PATH" +export PATH="$PATH:$CUDA_PATH/bin" export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_PATH/lib" export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_PATH/lib64" diff --git a/scripts/actions/install-cuda-ubuntu.sh b/scripts/actions/install-cuda-ubuntu.sh index 16d9a3c..64e5b99 100644 --- a/scripts/actions/install-cuda-ubuntu.sh +++ b/scripts/actions/install-cuda-ubuntu.sh @@ -115,8 +115,8 @@ CUDA_PATH=/usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR} echo "CUDA_PATH=${CUDA_PATH}" export CUDA_PATH=${CUDA_PATH} export PATH="$PATH:$CUDA_PATH/bin" -export LD_LIBRARY_PATH="$CUDA_PATH/lib:$LD_LIBRARY_PATH" -export LD_LIBRARY_PATH="$CUDA_PATH/lib64:$LD_LIBRARY_PATH" +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_PATH/lib" +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_PATH/lib64" if [[ $GITHUB_ACTIONS ]]; then # Set paths for subsequent steps, using ${CUDA_PATH} diff --git a/src/tensor_array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt similarity index 89% rename from src/tensor_array/core/CMakeLists.txt rename to src/tensor-array/core/CMakeLists.txt index d2e0e76..af3474d 100644 --- a/src/tensor_array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -4,7 +4,7 @@ file(GLOB TensorArray_inc "*.hh") install( FILES ${TensorArray_inc} - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor_array/core + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/core COMPONENT headers) enable_language(CUDA) @@ -48,9 +48,9 @@ endif() install( TARGETS tensorarray_core EXPORT TensorArrayTargets - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array COMPONENT libraries - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array COMPONENT libraries) add_library(TensorArray::Core ALIAS tensorarray_core) diff --git a/src/tensor_array/core/data_type_wrapper.cc b/src/tensor-array/core/data_type_wrapper.cc similarity index 100% rename from src/tensor_array/core/data_type_wrapper.cc rename to src/tensor-array/core/data_type_wrapper.cc diff --git a/src/tensor_array/core/data_type_wrapper.hh b/src/tensor-array/core/data_type_wrapper.hh similarity index 100% rename from src/tensor_array/core/data_type_wrapper.hh rename to src/tensor-array/core/data_type_wrapper.hh diff --git a/src/tensor_array/core/devices.cc b/src/tensor-array/core/devices.cc similarity index 100% rename from src/tensor_array/core/devices.cc rename to src/tensor-array/core/devices.cc diff --git a/src/tensor_array/core/devices.hh b/src/tensor-array/core/devices.hh similarity index 100% rename from src/tensor_array/core/devices.hh rename to src/tensor-array/core/devices.hh diff --git a/src/tensor_array/core/extern_type_map.cc b/src/tensor-array/core/extern_type_map.cc similarity index 100% rename from src/tensor_array/core/extern_type_map.cc rename to src/tensor-array/core/extern_type_map.cc diff --git a/src/tensor_array/core/extern_type_map.hh b/src/tensor-array/core/extern_type_map.hh similarity index 100% rename from src/tensor_array/core/extern_type_map.hh rename to src/tensor-array/core/extern_type_map.hh diff --git a/src/tensor_array/core/initializer_wrapper.hh b/src/tensor-array/core/initializer_wrapper.hh similarity index 100% rename from src/tensor_array/core/initializer_wrapper.hh rename to src/tensor-array/core/initializer_wrapper.hh diff --git a/src/tensor_array/core/tensor.cc b/src/tensor-array/core/tensor.cc similarity index 100% rename from src/tensor_array/core/tensor.cc rename to src/tensor-array/core/tensor.cc diff --git a/src/tensor_array/core/tensor.cu b/src/tensor-array/core/tensor.cu similarity index 100% rename from src/tensor_array/core/tensor.cu rename to src/tensor-array/core/tensor.cu diff --git a/src/tensor_array/core/tensor.hh b/src/tensor-array/core/tensor.hh similarity index 100% rename from src/tensor_array/core/tensor.hh rename to src/tensor-array/core/tensor.hh diff --git a/src/tensor_array/core/tensor_blas.cc b/src/tensor-array/core/tensor_blas.cc similarity index 100% rename from src/tensor_array/core/tensor_blas.cc rename to src/tensor-array/core/tensor_blas.cc diff --git a/src/tensor_array/core/tensor_cast.cu b/src/tensor-array/core/tensor_cast.cu similarity index 100% rename from src/tensor_array/core/tensor_cast.cu rename to src/tensor-array/core/tensor_cast.cu diff --git a/src/tensor_array/core/tensor_convolution.cc b/src/tensor-array/core/tensor_convolution.cc similarity index 100% rename from src/tensor_array/core/tensor_convolution.cc rename to src/tensor-array/core/tensor_convolution.cc diff --git a/src/tensor_array/core/tensor_convolution.cu b/src/tensor-array/core/tensor_convolution.cu similarity index 100% rename from src/tensor_array/core/tensor_convolution.cu rename to src/tensor-array/core/tensor_convolution.cu diff --git a/src/tensor_array/core/tensor_reduce.cu b/src/tensor-array/core/tensor_reduce.cu similarity index 100% rename from src/tensor_array/core/tensor_reduce.cu rename to src/tensor-array/core/tensor_reduce.cu diff --git a/src/tensor_array/core/tensorarray.hh b/src/tensor-array/core/tensorarray.hh similarity index 100% rename from src/tensor_array/core/tensorarray.hh rename to src/tensor-array/core/tensorarray.hh diff --git a/src/tensor_array/core/tensorbase.cc b/src/tensor-array/core/tensorbase.cc similarity index 100% rename from src/tensor_array/core/tensorbase.cc rename to src/tensor-array/core/tensorbase.cc diff --git a/src/tensor_array/core/tensorbase.hh b/src/tensor-array/core/tensorbase.hh similarity index 100% rename from src/tensor_array/core/tensorbase.hh rename to src/tensor-array/core/tensorbase.hh diff --git a/src/tensor_array/layers/CMakeLists.txt b/src/tensor-array/layers/CMakeLists.txt similarity index 83% rename from src/tensor_array/layers/CMakeLists.txt rename to src/tensor-array/layers/CMakeLists.txt index c0f8fe0..c02756d 100644 --- a/src/tensor_array/layers/CMakeLists.txt +++ b/src/tensor-array/layers/CMakeLists.txt @@ -5,7 +5,7 @@ file(GLOB TensorArray_inc "*.hh") install( FILES ${TensorArray_inc} - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor_array/layers + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/layers COMPONENT headers) add_library(tensorarray_layers SHARED ${TensorArray_src}) @@ -24,9 +24,9 @@ set_property(TARGET tensorarray_layers PROPERTY C_EXTENSIONS OFF) install( TARGETS tensorarray_layers EXPORT TensorArrayTargets - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array COMPONENT libraries - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array COMPONENT libraries) add_library(TensorArray::Layers ALIAS tensorarray_layers) diff --git a/src/tensor_array/layers/attention.cc b/src/tensor-array/layers/attention.cc similarity index 100% rename from src/tensor_array/layers/attention.cc rename to src/tensor-array/layers/attention.cc diff --git a/src/tensor_array/layers/attention.hh b/src/tensor-array/layers/attention.hh similarity index 100% rename from src/tensor_array/layers/attention.hh rename to src/tensor-array/layers/attention.hh diff --git a/src/tensor_array/layers/convolution.cc b/src/tensor-array/layers/convolution.cc similarity index 100% rename from src/tensor_array/layers/convolution.cc rename to src/tensor-array/layers/convolution.cc diff --git a/src/tensor_array/layers/convolution.hh b/src/tensor-array/layers/convolution.hh similarity index 100% rename from src/tensor_array/layers/convolution.hh rename to src/tensor-array/layers/convolution.hh diff --git a/src/tensor_array/layers/layer_any.cc b/src/tensor-array/layers/layer_any.cc similarity index 100% rename from src/tensor_array/layers/layer_any.cc rename to src/tensor-array/layers/layer_any.cc diff --git a/src/tensor_array/layers/layer_any.hh b/src/tensor-array/layers/layer_any.hh similarity index 100% rename from src/tensor_array/layers/layer_any.hh rename to src/tensor-array/layers/layer_any.hh diff --git a/src/tensor_array/layers/layer_holder.hh b/src/tensor-array/layers/layer_holder.hh similarity index 100% rename from src/tensor_array/layers/layer_holder.hh rename to src/tensor-array/layers/layer_holder.hh diff --git a/src/tensor_array/layers/layer_impl.cc b/src/tensor-array/layers/layer_impl.cc similarity index 100% rename from src/tensor_array/layers/layer_impl.cc rename to src/tensor-array/layers/layer_impl.cc diff --git a/src/tensor_array/layers/layer_impl.hh b/src/tensor-array/layers/layer_impl.hh similarity index 98% rename from src/tensor_array/layers/layer_impl.hh rename to src/tensor-array/layers/layer_impl.hh index 7197c38..2d03419 100644 --- a/src/tensor_array/layers/layer_impl.hh +++ b/src/tensor-array/layers/layer_impl.hh @@ -16,7 +16,7 @@ limitations under the License. #include #include -#include +#include #include #pragma once diff --git a/src/tensor_array/layers/layer_utility.cc b/src/tensor-array/layers/layer_utility.cc similarity index 100% rename from src/tensor_array/layers/layer_utility.cc rename to src/tensor-array/layers/layer_utility.cc diff --git a/src/tensor_array/layers/layer_utility.hh b/src/tensor-array/layers/layer_utility.hh similarity index 100% rename from src/tensor_array/layers/layer_utility.hh rename to src/tensor-array/layers/layer_utility.hh diff --git a/src/tensor_array/layers/linear.cc b/src/tensor-array/layers/linear.cc similarity index 100% rename from src/tensor_array/layers/linear.cc rename to src/tensor-array/layers/linear.cc diff --git a/src/tensor_array/layers/linear.hh b/src/tensor-array/layers/linear.hh similarity index 100% rename from src/tensor_array/layers/linear.hh rename to src/tensor-array/layers/linear.hh diff --git a/src/tensor_array/layers/normalization.cc b/src/tensor-array/layers/normalization.cc similarity index 100% rename from src/tensor_array/layers/normalization.cc rename to src/tensor-array/layers/normalization.cc diff --git a/src/tensor_array/layers/normalization.hh b/src/tensor-array/layers/normalization.hh similarity index 100% rename from src/tensor_array/layers/normalization.hh rename to src/tensor-array/layers/normalization.hh diff --git a/src/tensor_array/layers/recurrent.cc b/src/tensor-array/layers/recurrent.cc similarity index 100% rename from src/tensor_array/layers/recurrent.cc rename to src/tensor-array/layers/recurrent.cc diff --git a/src/tensor_array/layers/recurrent.hh b/src/tensor-array/layers/recurrent.hh similarity index 100% rename from src/tensor_array/layers/recurrent.hh rename to src/tensor-array/layers/recurrent.hh diff --git a/src/tensor_array/layers/sequential.cc b/src/tensor-array/layers/sequential.cc similarity index 100% rename from src/tensor_array/layers/sequential.cc rename to src/tensor-array/layers/sequential.cc diff --git a/src/tensor_array/layers/sequential.hh b/src/tensor-array/layers/sequential.hh similarity index 100% rename from src/tensor_array/layers/sequential.hh rename to src/tensor-array/layers/sequential.hh diff --git a/src/tensor_array/layers/transformer.cc b/src/tensor-array/layers/transformer.cc similarity index 100% rename from src/tensor_array/layers/transformer.cc rename to src/tensor-array/layers/transformer.cc diff --git a/src/tensor_array/layers/transformer.hh b/src/tensor-array/layers/transformer.hh similarity index 100% rename from src/tensor_array/layers/transformer.hh rename to src/tensor-array/layers/transformer.hh From e4ab78a16e9cd57fd784b7358d5bc59084cb2e12 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 15 Jul 2025 13:35:23 +0000 Subject: [PATCH 102/281] test --- .github/workflows/docker-publish.yml | 12 +++++++++--- CMakeLists.txt | 2 +- Dockerfile => Dockerfolder/Ubuntu.Dockerfile | 4 ++-- 3 files changed, 12 insertions(+), 6 deletions(-) rename Dockerfile => Dockerfolder/Ubuntu.Dockerfile (95%) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 235a534..777816c 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -24,8 +24,13 @@ env: jobs: build: - + strategy: + fail-fast: false + matrix: + image-os: [ "Ubuntu" ] + runs-on: ubuntu-latest + permissions: contents: read packages: write @@ -67,7 +72,7 @@ jobs: id: meta uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934 # v5.0.0 with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + images: "${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ matrix.image-os }}" # Build and push Docker image with Buildx (don't push on PR) # https://github.com/docker/build-push-action @@ -75,7 +80,8 @@ jobs: id: build-and-push uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09 # v5.0.0 with: - context: . + context: Dockerfolder + file: ${{ matrix.image-os }}.Dockerfile push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 2873cb5..125b398 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,7 @@ install( include(CMakePackageConfigHelpers) configure_package_config_file( "Config.cmake.in" - "TensorArrayConfig.cmake" + "tensor-array/TensorArrayConfig.cmake" INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/cmake PATH_VARS CMAKE_INSTALL_LIBDIR CMAKE_INSTALL_INCLUDEDIR ) diff --git a/Dockerfile b/Dockerfolder/Ubuntu.Dockerfile similarity index 95% rename from Dockerfile rename to Dockerfolder/Ubuntu.Dockerfile index b76ec29..8317437 100644 --- a/Dockerfile +++ b/Dockerfolder/Ubuntu.Dockerfile @@ -26,11 +26,11 @@ WORKDIR /app/tensor-array COPY src/ src/ COPY CMakeLists.txt . COPY Config.cmake.in . -WORKDIR /app/tensor-array WORKDIR build RUN cmake .. -RUN make install +RUN cmake --build . +RUN cmake --install . WORKDIR /app/tensor-array From 57efe7fea62fe2ff0c4c48e22d208d94e3f366b5 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 15 Jul 2025 13:38:29 +0000 Subject: [PATCH 103/281] test --- .github/workflows/docker-publish.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 777816c..390a620 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -72,7 +72,8 @@ jobs: id: meta uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934 # v5.0.0 with: - images: "${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ matrix.image-os }}" + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: ${{ matrix.image-os }} # Build and push Docker image with Buildx (don't push on PR) # https://github.com/docker/build-push-action From a42027c12122c57a6374ea35a9d22b448ed87586 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 15 Jul 2025 20:40:01 +0700 Subject: [PATCH 104/281] Update docker-publish.yml --- .github/workflows/docker-publish.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 390a620..2e3d127 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -81,8 +81,7 @@ jobs: id: build-and-push uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09 # v5.0.0 with: - context: Dockerfolder - file: ${{ matrix.image-os }}.Dockerfile + file: Dockerfolder/${{ matrix.image-os }}.Dockerfile push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} From 5027a1b9710e9b27a736cd2ed44c0096b4342957 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 15 Jul 2025 22:05:09 +0700 Subject: [PATCH 105/281] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 48469ea..0ee20a1 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ cd .. We created a template struct that named `TensorArray`. That struct is a multi-dimensional array wrapper. ```C++ -#include "tensor_array/core/tensorbase.hh" +#include using namespace tensor_array::value; @@ -77,7 +77,7 @@ The `Tensor::get_grad()` method can get the gradient after call `Tensor::calc_gr ```C++ #include -#include "tensor_array/core/tensor.hh" +#include using namespace std; using namespace tensor_array::value; From 59641cd39e9f6a1b5c1b23e8a20b2e3b40486efb Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 16 Jul 2025 04:20:31 +0000 Subject: [PATCH 106/281] add more arch --- .github/workflows/cmake-single-platform.yml | 4 ++-- scripts/actions/install-cuda-rhel.sh | 23 ++++++++++++++------- scripts/actions/install-cuda-ubuntu.sh | 23 +++++++++++++++------ 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/.github/workflows/cmake-single-platform.yml b/.github/workflows/cmake-single-platform.yml index 46141b2..25158b5 100644 --- a/.github/workflows/cmake-single-platform.yml +++ b/.github/workflows/cmake-single-platform.yml @@ -20,8 +20,8 @@ jobs: strategy: fail-fast: false matrix: - cuda-version: [ "12.9" ] - os: [ "ubuntu-22.04" ] + cuda-version: [ "12.9", "12.4" ] + os: [ "ubuntu-22.04", "ubuntu-22.04-arm" ] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 diff --git a/scripts/actions/install-cuda-rhel.sh b/scripts/actions/install-cuda-rhel.sh index b995f77..39e3957 100644 --- a/scripts/actions/install-cuda-rhel.sh +++ b/scripts/actions/install-cuda-rhel.sh @@ -63,22 +63,32 @@ CUDA_MAJOR=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f1) CUDA_MINOR=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f2) CUDA_PATCH=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f3) -CUDA_PACKAGES="" +CPU_ARCH=$(uname -m) +if "${CPU_ARCH}" == "aarch64" +then + CPU_ARCH="sbsa" +fi + for package in "${CUDA_PACKAGES_IN[@]}" do : # @todo This is not perfect. Should probably provide a separate list for diff versions # cuda-compiler-X-Y if CUDA >= 9.1 else cuda-nvcc-X-Y - if [[ "${package}" == "cuda-nvcc" ]] && version_ge "$CUDA_VERSION_MAJOR_MINOR" "9.1" ; then + if [[ "${package}" == "cuda-nvcc" ]] && version_ge "$CUDA_VERSION_MAJOR_MINOR" "9.1" + then package="cuda-compiler" - elif [[ "${package}" == "cuda-compiler" ]] && version_lt "$CUDA_VERSION_MAJOR_MINOR" "9.1" ; then + elif [[ "${package}" == "cuda-compiler" ]] && version_lt "$CUDA_VERSION_MAJOR_MINOR" "9.1" + then package="cuda-nvcc" # CUB/Thrust are packages in cuda-thrust in 11.3, but cuda-cccl in 11.4+ - elif [[ "${package}" == "cuda-thrust" || "${package}" == "cuda-cccl" ]]; then + elif [[ "${package}" == "cuda-thrust" || "${package}" == "cuda-cccl" ]] + then # CUDA cuda-thrust >= 11.4 - if version_ge "$CUDA_VERSION_MAJOR_MINOR" "11.4" ; then + if version_ge "$CUDA_VERSION_MAJOR_MINOR" "11.4" + then package="cuda-cccl" # Use cuda-thrust > 11.2 - elif version_ge "$CUDA_VERSION_MAJOR_MINOR" "11.3" ; then + elif version_ge "$CUDA_VERSION_MAJOR_MINOR" "11.3" + then package="cuda-thrust" # Do not include this pacakge < 11.3 else @@ -90,7 +100,6 @@ do : done echo "CUDA_PACKAGES ${CUDA_PACKAGES}" -CPU_ARCH="x86_64" REPO_URL="https://developer.download.nvidia.com/compute/cuda/repos/${LINUX_ID}${LINUX_VERSION}/${CPU_ARCH}/cuda-${LINUX_ID}${LINUX_VERSION}.repo" is_root=false diff --git a/scripts/actions/install-cuda-ubuntu.sh b/scripts/actions/install-cuda-ubuntu.sh index 64e5b99..ba8c1aa 100644 --- a/scripts/actions/install-cuda-ubuntu.sh +++ b/scripts/actions/install-cuda-ubuntu.sh @@ -44,22 +44,34 @@ CUDA_MAJOR=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f1) CUDA_MINOR=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f2) CUDA_PATCH=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f3) +CPU_ARCH=$(uname -m) +if "${CPU_ARCH}" == "aarch64" +then + CPU_ARCH="sbsa" +fi + + CUDA_PACKAGES="" for package in "${CUDA_PACKAGES_IN[@]}" do : # @todo This is not perfect. Should probably provide a separate list for diff versions # cuda-compiler-X-Y if CUDA >= 9.1 else cuda-nvcc-X-Y - if [[ "${package}" == "cuda-nvcc" ]] && version_ge "$CUDA_VERSION_MAJOR_MINOR" "9.1" ; then + if [[ "${package}" == "cuda-nvcc" ]] && version_ge "$CUDA_VERSION_MAJOR_MINOR" "9.1" + then package="cuda-compiler" - elif [[ "${package}" == "cuda-compiler" ]] && version_lt "$CUDA_VERSION_MAJOR_MINOR" "9.1" ; then + elif [[ "${package}" == "cuda-compiler" ]] && version_lt "$CUDA_VERSION_MAJOR_MINOR" "9.1" + then package="cuda-nvcc" # CUB/Thrust are packages in cuda-thrust in 11.3, but cuda-cccl in 11.4+ - elif [[ "${package}" == "cuda-thrust" || "${package}" == "cuda-cccl" ]]; then + elif [[ "${package}" == "cuda-thrust" || "${package}" == "cuda-cccl" ]] + then # CUDA cuda-thrust >= 11.4 - if version_ge "$CUDA_VERSION_MAJOR_MINOR" "11.4" ; then + if version_ge "$CUDA_VERSION_MAJOR_MINOR" "11.4" + then package="cuda-cccl" # Use cuda-thrust > 11.2 - elif version_ge "$CUDA_VERSION_MAJOR_MINOR" "11.3" ; then + elif version_ge "$CUDA_VERSION_MAJOR_MINOR" "11.3" + then package="cuda-thrust" # Do not include this pacakge < 11.3 else @@ -71,7 +83,6 @@ do : done echo "CUDA_PACKAGES ${CUDA_PACKAGES}" -CPU_ARCH="x86_64" PIN_FILENAME="cuda-${LINUX_ID}${LINUX_VERSION}.pin" PIN_URL="https://developer.download.nvidia.com/compute/cuda/repos/${LINUX_ID}${LINUX_VERSION}/${CPU_ARCH}/${PIN_FILENAME}" KERYRING_PACKAGE_FILENAME="cuda-keyring_1.1-1_all.deb" From 4b2222cbb431c2b776357a18a9dcdb95ead82dc3 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 16 Jul 2025 04:22:23 +0000 Subject: [PATCH 107/281] add more arch --- scripts/actions/install-cuda-rhel.sh | 2 +- scripts/actions/install-cuda-ubuntu.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/actions/install-cuda-rhel.sh b/scripts/actions/install-cuda-rhel.sh index 39e3957..cb68a46 100644 --- a/scripts/actions/install-cuda-rhel.sh +++ b/scripts/actions/install-cuda-rhel.sh @@ -64,7 +64,7 @@ CUDA_MINOR=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f2) CUDA_PATCH=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f3) CPU_ARCH=$(uname -m) -if "${CPU_ARCH}" == "aarch64" +if [[ "${CPU_ARCH}" == "aarch64" ]] then CPU_ARCH="sbsa" fi diff --git a/scripts/actions/install-cuda-ubuntu.sh b/scripts/actions/install-cuda-ubuntu.sh index ba8c1aa..9cf5bf1 100644 --- a/scripts/actions/install-cuda-ubuntu.sh +++ b/scripts/actions/install-cuda-ubuntu.sh @@ -45,7 +45,7 @@ CUDA_MINOR=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f2) CUDA_PATCH=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f3) CPU_ARCH=$(uname -m) -if "${CPU_ARCH}" == "aarch64" +if [[ "${CPU_ARCH}" == "aarch64" ]] then CPU_ARCH="sbsa" fi From 796e6136aa18a0ca9caea6f520770130f7b55fe7 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 15:19:02 +0700 Subject: [PATCH 108/281] test --- ...-platform.yml => cmake-multi-platform.yml} | 14 +++++++- CMakeLists.txt | 12 +++---- scripts/actions/install-cuda-windows.sh | 35 +++++++++++++++++++ src/tensor-array/core/CMakeLists.txt | 8 +++-- src/tensor-array/layers/CMakeLists.txt | 8 +++-- 5 files changed, 64 insertions(+), 13 deletions(-) rename .github/workflows/{cmake-single-platform.yml => cmake-multi-platform.yml} (87%) create mode 100644 scripts/actions/install-cuda-windows.sh diff --git a/.github/workflows/cmake-single-platform.yml b/.github/workflows/cmake-multi-platform.yml similarity index 87% rename from .github/workflows/cmake-single-platform.yml rename to .github/workflows/cmake-multi-platform.yml index 25158b5..b93eb44 100644 --- a/.github/workflows/cmake-single-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -1,6 +1,6 @@ # This starter workflow is for a CMake project running on a single platform. There is a different starter workflow if you need cross-platform coverage. # See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-multi-platform.yml -name: CMake on a single platform +name: CMake on multi platform on: push: @@ -22,6 +22,10 @@ jobs: matrix: cuda-version: [ "12.9", "12.4" ] os: [ "ubuntu-22.04", "ubuntu-22.04-arm" ] + include: + - os: windows-latest + cuda-version: "12.4.1" + runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 @@ -46,6 +50,14 @@ jobs: ${{github.workspace}}/scripts/actions/install-cuda-rhel.sh shell: bash + - name: Run CUDA bash shell Windows + if: startsWith(matrix.os, 'windows') + env: + temp: ${{ runner.temp }} + cuda: ${{ matrix.cuda-version }} + run: ${{github.workspace}}/scripts/actions/install-cuda-windows.sh + shell: bash + - name: Configure CMake # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type diff --git a/CMakeLists.txt b/CMakeLists.txt index 125b398..2cb8c4c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,9 +12,9 @@ set(CPACK_PACKAGE_NAME "TensorArray") set(CPACK_PACKAGE_VENDOR "TensorArray-Creators") set(CPACK_PACKAGE_DESCRIPTION_SUMMARY) -set(CPACK_PACKAGE_VERSION "0.1.0") +set(CPACK_PACKAGE_VERSION "0.2.0") set(CPACK_PACKAGE_VERSION_MAJOR "0") -set(CPACK_PACKAGE_VERSION_MINOR "1") +set(CPACK_PACKAGE_VERSION_MINOR "2") set(CPACK_PACKAGE_VERSION_PATCH "0") set(CPACK_PACKAGE_INSTALL_DIRECTORY "A machine learning libraries") @@ -28,13 +28,13 @@ install( include(CMakePackageConfigHelpers) configure_package_config_file( "Config.cmake.in" - "tensor-array/TensorArrayConfig.cmake" + "TensorArrayConfig.cmake" INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/cmake PATH_VARS CMAKE_INSTALL_LIBDIR CMAKE_INSTALL_INCLUDEDIR ) write_basic_package_version_file( - ${CMAKE_CURRENT_BINARY_DIR}/tensor-array/TensorArrayConfigVersion.cmake + ${CMAKE_CURRENT_BINARY_DIR}/TensorArrayConfigVersion.cmake VERSION 0.2.0 COMPATIBILITY SameMajorVersion ) @@ -42,8 +42,8 @@ write_basic_package_version_file( ### Install Config and ConfigVersion files install( FILES - ${CMAKE_CURRENT_BINARY_DIR}/tensor-array/TensorArrayConfig.cmake - ${CMAKE_CURRENT_BINARY_DIR}/tensor-array/TensorArrayConfigVersion.cmake + ${CMAKE_CURRENT_BINARY_DIR}/TensorArrayConfig.cmake + ${CMAKE_CURRENT_BINARY_DIR}/TensorArrayConfigVersion.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/cmake ) diff --git a/scripts/actions/install-cuda-windows.sh b/scripts/actions/install-cuda-windows.sh new file mode 100644 index 0000000..ec0a1a0 --- /dev/null +++ b/scripts/actions/install-cuda-windows.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +set -e +set -x + +CUDA_PACKAGES_IN=( + "nvcc" + "cudart" + "nvtx" + "nvrtc" + "thrust" + "curand_dev" + "cublas_dev" + "cufft_dev" +) + +CUDA_VERSION_MAJOR_MINOR=${cuda} + +CUDA_MAJOR=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f1) +CUDA_MINOR=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f2) +CUDA_PATCH=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f3) + +CUDA_PACKAGES="" +for package in "${CUDA_PACKAGES_IN[@]}" +do : + # Build the full package name and append to the string. + CUDA_PACKAGES+=" ${package}_${CUDA_MAJOR}.${CUDA_MINOR}" +done +echo "CUDA_PACKAGES ${CUDA_PACKAGES}" + +CUDA_ROOT = "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${CUDA_MAJOR}.${CUDA_MINOR}" + +curl --netrc-optional -L -nv -o cuda_installer.exe "https://developer.download.nvidia.com/compute/cuda/${cuda}/network_installers/cuda_${cuda}_windows_network.exe" +./cuda_installer.exe -s ${CUDA_PACKAGES} +rm -f cuda_installer.exe diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index af3474d..b1abcac 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -48,9 +48,11 @@ endif() install( TARGETS tensorarray_core EXPORT TensorArrayTargets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + COMPONENT Runtime LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array - COMPONENT libraries - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array - COMPONENT libraries) + COMPONENT Runtime + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/core + COMPONENT Development) add_library(TensorArray::Core ALIAS tensorarray_core) diff --git a/src/tensor-array/layers/CMakeLists.txt b/src/tensor-array/layers/CMakeLists.txt index c02756d..f4562c7 100644 --- a/src/tensor-array/layers/CMakeLists.txt +++ b/src/tensor-array/layers/CMakeLists.txt @@ -24,9 +24,11 @@ set_property(TARGET tensorarray_layers PROPERTY C_EXTENSIONS OFF) install( TARGETS tensorarray_layers EXPORT TensorArrayTargets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + COMPONENT Runtime LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array - COMPONENT libraries - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array - COMPONENT libraries) + COMPONENT Runtime + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/layers + COMPONENT Development) add_library(TensorArray::Layers ALIAS tensorarray_layers) From ded98b115757de9db2bc9d69116afa166285aa6b Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 15:21:21 +0700 Subject: [PATCH 109/281] test --- .github/workflows/cmake-multi-platform.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index b93eb44..6362d74 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -22,7 +22,7 @@ jobs: matrix: cuda-version: [ "12.9", "12.4" ] os: [ "ubuntu-22.04", "ubuntu-22.04-arm" ] - include: + include: - os: windows-latest cuda-version: "12.4.1" From 4dcf0d2f8852a0ae9a00de070cb4e77f8f4a2bcd Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 15:25:20 +0700 Subject: [PATCH 110/281] test --- .github/workflows/cmake-multi-platform.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index 6362d74..9a848a0 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -55,7 +55,7 @@ jobs: env: temp: ${{ runner.temp }} cuda: ${{ matrix.cuda-version }} - run: ${{github.workspace}}/scripts/actions/install-cuda-windows.sh + run: scripts/actions/install-cuda-windows.sh shell: bash - name: Configure CMake From 27dc3488b19984a0a6fe79d43a2e72d8ec2a5bc4 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 15:31:16 +0700 Subject: [PATCH 111/281] test --- scripts/actions/install-cuda-windows.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/actions/install-cuda-windows.sh b/scripts/actions/install-cuda-windows.sh index ec0a1a0..bc492fb 100644 --- a/scripts/actions/install-cuda-windows.sh +++ b/scripts/actions/install-cuda-windows.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#! /bin/bash set -e set -x @@ -28,7 +28,7 @@ do : done echo "CUDA_PACKAGES ${CUDA_PACKAGES}" -CUDA_ROOT = "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${CUDA_MAJOR}.${CUDA_MINOR}" +# CUDA_ROOT = "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${CUDA_MAJOR}.${CUDA_MINOR}" curl --netrc-optional -L -nv -o cuda_installer.exe "https://developer.download.nvidia.com/compute/cuda/${cuda}/network_installers/cuda_${cuda}_windows_network.exe" ./cuda_installer.exe -s ${CUDA_PACKAGES} From 8024f20e5df485d565df247f66b9b1e46a448afc Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 15:41:10 +0700 Subject: [PATCH 112/281] test --- scripts/actions/install-cuda-windows.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/actions/install-cuda-windows.sh b/scripts/actions/install-cuda-windows.sh index bc492fb..ff49298 100644 --- a/scripts/actions/install-cuda-windows.sh +++ b/scripts/actions/install-cuda-windows.sh @@ -33,3 +33,8 @@ echo "CUDA_PACKAGES ${CUDA_PACKAGES}" curl --netrc-optional -L -nv -o cuda_installer.exe "https://developer.download.nvidia.com/compute/cuda/${cuda}/network_installers/cuda_${cuda}_windows_network.exe" ./cuda_installer.exe -s ${CUDA_PACKAGES} rm -f cuda_installer.exe + +CUDA_PATH="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${CUDA_MAJOR}.${CUDA_MINOR}" +echo "CUDA_PATH=${CUDA_PATH}" +export CUDA_PATH=${CUDA_PATH} + From 021840f85d25a13d6de33bf6ad228a2a9a2ab278 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 15:46:57 +0700 Subject: [PATCH 113/281] Update install-cuda-windows.sh --- scripts/actions/install-cuda-windows.sh | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/scripts/actions/install-cuda-windows.sh b/scripts/actions/install-cuda-windows.sh index ff49298..2f548eb 100644 --- a/scripts/actions/install-cuda-windows.sh +++ b/scripts/actions/install-cuda-windows.sh @@ -12,6 +12,7 @@ CUDA_PACKAGES_IN=( "curand_dev" "cublas_dev" "cufft_dev" + "visual_studio_integration" ) CUDA_VERSION_MAJOR_MINOR=${cuda} @@ -33,8 +34,3 @@ echo "CUDA_PACKAGES ${CUDA_PACKAGES}" curl --netrc-optional -L -nv -o cuda_installer.exe "https://developer.download.nvidia.com/compute/cuda/${cuda}/network_installers/cuda_${cuda}_windows_network.exe" ./cuda_installer.exe -s ${CUDA_PACKAGES} rm -f cuda_installer.exe - -CUDA_PATH="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${CUDA_MAJOR}.${CUDA_MINOR}" -echo "CUDA_PATH=${CUDA_PATH}" -export CUDA_PATH=${CUDA_PATH} - From fd98850ffea87bd2e4c2c58e3d0aadde6f0a417d Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 16:36:22 +0700 Subject: [PATCH 114/281] test --- .github/workflows/cmake-multi-platform.yml | 9 ++--- scripts/actions/install-cuda-rhel.sh | 23 ++++++----- scripts/actions/install-cuda-ubuntu.sh | 22 ++++++---- scripts/actions/install-cuda-windows.sh | 47 +++++++++++++++++++++- 4 files changed, 76 insertions(+), 25 deletions(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index 9a848a0..f01a264 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -20,11 +20,8 @@ jobs: strategy: fail-fast: false matrix: - cuda-version: [ "12.9", "12.4" ] - os: [ "ubuntu-22.04", "ubuntu-22.04-arm" ] - include: - - os: windows-latest - cuda-version: "12.4.1" + os: [ "ubuntu-22.04", "ubuntu-22.04-arm", "windows-latest" ] + cuda-version: [ "12.9.1", "12.4.1" ] runs-on: ${{ matrix.os }} steps: @@ -51,7 +48,7 @@ jobs: shell: bash - name: Run CUDA bash shell Windows - if: startsWith(matrix.os, 'windows') + if: runner.os == 'Windows' env: temp: ${{ runner.temp }} cuda: ${{ matrix.cuda-version }} diff --git a/scripts/actions/install-cuda-rhel.sh b/scripts/actions/install-cuda-rhel.sh index cb68a46..266dfb9 100644 --- a/scripts/actions/install-cuda-rhel.sh +++ b/scripts/actions/install-cuda-rhel.sh @@ -103,19 +103,23 @@ echo "CUDA_PACKAGES ${CUDA_PACKAGES}" REPO_URL="https://developer.download.nvidia.com/compute/cuda/repos/${LINUX_ID}${LINUX_VERSION}/${CPU_ARCH}/cuda-${LINUX_ID}${LINUX_VERSION}.repo" is_root=false -if (( $EUID == 0)); then +if (( $EUID == 0)) +then is_root=true fi # Find if sudo is available has_sudo=false -if command -v sudo &> /dev/null ; then +if command -v sudo &> /dev/null +then has_sudo=true fi # Decide if we can proceed or not (root or sudo is required) and if so store whether sudo should be used or not. -if [ "$is_root" = false ] && [ "$has_sudo" = false ]; then +if [ "$is_root" = false ] && [ "$has_sudo" = false ] +then echo "Root or sudo is required. Aborting." exit 1 -elif [ "$is_root" = false ] ; then +elif [ "$is_root" = false ] +then USE_SUDO=sudo else USE_SUDO= @@ -127,7 +131,8 @@ $USE_SUDO $YUM_PACKAGE_MANAGER clean all $USE_SUDO $YUM_PACKAGE_MANAGER install -y ${CUDA_PACKAGES} -if [[ $? -ne 0 ]]; then +if [[ $? -ne 0 ]] +then echo "CUDA Installation Error." exit 1 fi @@ -139,10 +144,10 @@ export PATH="$PATH:$CUDA_PATH/bin" export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_PATH/lib" export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_PATH/lib64" -if [[ $GITHUB_ACTIONS ]]; then - # Set paths for subsequent steps, using ${CUDA_PATH} - echo "Adding CUDA to CUDA_PATH, PATH and LD_LIBRARY_PATH" - echo "${CUDA_PATH}" >> $GITHUB_PATH +if [[ $GITHUB_ACTIONS ]] +then + echo "${CUDA_PATH}/bin" >> $GITHUB_PATH + echo "CUDA_PATH=${CUDA_PATH}" >> $GITHUB_ENV echo "PATH=$PATH:$CUDA_PATH/bin" >> $GITHUB_ENV echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${CUDA_PATH}/lib" >> $GITHUB_ENV echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${CUDA_PATH}/lib64" >> $GITHUB_ENV diff --git a/scripts/actions/install-cuda-ubuntu.sh b/scripts/actions/install-cuda-ubuntu.sh index 9cf5bf1..628ea25 100644 --- a/scripts/actions/install-cuda-ubuntu.sh +++ b/scripts/actions/install-cuda-ubuntu.sh @@ -90,19 +90,23 @@ KEYRING_PACKAGE_URL="https://developer.download.nvidia.com/compute/cuda/repos/${ REPO_URL="https://developer.download.nvidia.com/compute/cuda/repos/${LINUX_ID}${LINUX_VERSION}/${CPU_ARCH}/" is_root=false -if (( $EUID == 0)); then +if (( $EUID == 0)) +then is_root=true fi # Find if sudo is available has_sudo=false -if command -v sudo &> /dev/null ; then +if command -v sudo &> /dev/null +then has_sudo=true fi # Decide if we can proceed or not (root or sudo is required) and if so store whether sudo should be used or not. -if [ "$is_root" = false ] && [ "$has_sudo" = false ]; then +if [ "$is_root" = false ] && [ "$has_sudo" = false ] +then echo "Root or sudo is required. Aborting." exit 1 -elif [ "$is_root" = false ] ; then +elif [ "$is_root" = false ] +then USE_SUDO=sudo else USE_SUDO= @@ -117,7 +121,8 @@ $USE_SUDO apt-get update $USE_SUDO apt-get -y install ${CUDA_PACKAGES} -if [[ $? -ne 0 ]]; then +if [[ $? -ne 0 ]] +then echo "CUDA Installation Error." exit 1 fi @@ -129,10 +134,11 @@ export PATH="$PATH:$CUDA_PATH/bin" export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_PATH/lib" export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_PATH/lib64" -if [[ $GITHUB_ACTIONS ]]; then - # Set paths for subsequent steps, using ${CUDA_PATH} +if [[ $GITHUB_ACTIONS ]] +then echo "Adding CUDA to CUDA_PATH, PATH and LD_LIBRARY_PATH" - echo "${CUDA_PATH}" >> $GITHUB_PATH + echo "${CUDA_PATH}/bin" >> $GITHUB_PATH + echo "CUDA_PATH=${CUDA_PATH}" >> $GITHUB_ENV echo "PATH=$PATH:$CUDA_PATH/bin" >> $GITHUB_ENV echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${CUDA_PATH}/lib" >> $GITHUB_ENV echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${CUDA_PATH}/lib64" >> $GITHUB_ENV diff --git a/scripts/actions/install-cuda-windows.sh b/scripts/actions/install-cuda-windows.sh index 2f548eb..dc64550 100644 --- a/scripts/actions/install-cuda-windows.sh +++ b/scripts/actions/install-cuda-windows.sh @@ -4,7 +4,7 @@ set -e set -x CUDA_PACKAGES_IN=( - "nvcc" + "compiler" "cudart" "nvtx" "nvrtc" @@ -15,6 +15,27 @@ CUDA_PACKAGES_IN=( "visual_studio_integration" ) +function version_ge() { + [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 + [ "$(printf '%s\n' "$@" | sort -V | head -n 1)" == "$2" ] +} +# returns 0 (true) if a > b +function version_gt() { + [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 + [ "$1" = "$2" ] && return 1 || version_ge $1 $2 +} +# returns 0 (true) if a <= b +function version_le() { + [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 + [ "$(printf '%s\n' "$@" | sort -V | head -n 1)" == "$1" ] +} +# returns 0 (true) if a < b +function version_lt() { + [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 + [ "$1" = "$2" ] && return 1 || version_le $1 $2 +} + + CUDA_VERSION_MAJOR_MINOR=${cuda} CUDA_MAJOR=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f1) @@ -24,6 +45,15 @@ CUDA_PATCH=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f3) CUDA_PACKAGES="" for package in "${CUDA_PACKAGES_IN[@]}" do : + # @todo This is not perfect. Should probably provide a separate list for diff versions + # cuda-compiler-X-Y if CUDA >= 9.1 else cuda-nvcc-X-Y + if [[ "${package}" == "nvcc" ]] && version_ge "$CUDA_VERSION_MAJOR_MINOR" "9.1" + then + package="compiler" + elif [[ "${package}" == "compiler" ]] && version_lt "$CUDA_VERSION_MAJOR_MINOR" "9.1" + then + package="nvcc" + fi # Build the full package name and append to the string. CUDA_PACKAGES+=" ${package}_${CUDA_MAJOR}.${CUDA_MINOR}" done @@ -31,6 +61,19 @@ echo "CUDA_PACKAGES ${CUDA_PACKAGES}" # CUDA_ROOT = "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${CUDA_MAJOR}.${CUDA_MINOR}" -curl --netrc-optional -L -nv -o cuda_installer.exe "https://developer.download.nvidia.com/compute/cuda/${cuda}/network_installers/cuda_${cuda}_windows_network.exe" +curl --netrc-optional -L -nv -o cuda_installer.exe \ +"https://developer.download.nvidia.com/compute/cuda/${cuda}/network_installers/cuda_${cuda}_windows_network.exe" ./cuda_installer.exe -s ${CUDA_PACKAGES} rm -f cuda_installer.exe + +CUDA_PATH="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${CUDA_MAJOR}.${CUDA_MINOR}" +echo "CUDA_PATH=${CUDA_PATH}" +export CUDA_PATH=${CUDA_PATH} + +if [[ $GITHUB_ACTIONS ]] +then + echo "Adding CUDA to CUDA_PATH" + echo "${CUDA_PATH}/bin" >> $GITHUB_PATH + echo "CUDA_PATH=${CUDA_PATH}" >> $GITHUB_ENV +fi + From 37777282f0b9f78fc84873da5fb22c9c98ddf069 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 16:41:54 +0700 Subject: [PATCH 115/281] Update install-cuda-windows.sh --- scripts/actions/install-cuda-windows.sh | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/scripts/actions/install-cuda-windows.sh b/scripts/actions/install-cuda-windows.sh index dc64550..e6f37c6 100644 --- a/scripts/actions/install-cuda-windows.sh +++ b/scripts/actions/install-cuda-windows.sh @@ -4,7 +4,7 @@ set -e set -x CUDA_PACKAGES_IN=( - "compiler" + "nvcc" "cudart" "nvtx" "nvrtc" @@ -45,15 +45,6 @@ CUDA_PATCH=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f3) CUDA_PACKAGES="" for package in "${CUDA_PACKAGES_IN[@]}" do : - # @todo This is not perfect. Should probably provide a separate list for diff versions - # cuda-compiler-X-Y if CUDA >= 9.1 else cuda-nvcc-X-Y - if [[ "${package}" == "nvcc" ]] && version_ge "$CUDA_VERSION_MAJOR_MINOR" "9.1" - then - package="compiler" - elif [[ "${package}" == "compiler" ]] && version_lt "$CUDA_VERSION_MAJOR_MINOR" "9.1" - then - package="nvcc" - fi # Build the full package name and append to the string. CUDA_PACKAGES+=" ${package}_${CUDA_MAJOR}.${CUDA_MINOR}" done From 388a38480a88734c8a8d2cd61d3dcccb8b41c504 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 16:46:41 +0700 Subject: [PATCH 116/281] Update install-cuda-windows.sh --- scripts/actions/install-cuda-windows.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/actions/install-cuda-windows.sh b/scripts/actions/install-cuda-windows.sh index e6f37c6..ecab8c4 100644 --- a/scripts/actions/install-cuda-windows.sh +++ b/scripts/actions/install-cuda-windows.sh @@ -12,7 +12,6 @@ CUDA_PACKAGES_IN=( "curand_dev" "cublas_dev" "cufft_dev" - "visual_studio_integration" ) function version_ge() { From 1b6e7c9c47fe1f2cdc3adb5748d15613b534a412 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 17:19:14 +0700 Subject: [PATCH 117/281] test --- .github/workflows/cmake-multi-platform.yml | 4 +- scripts/actions/install-cuda-windows.ps1 | 52 ++++++++++++++++ scripts/actions/install-cuda-windows.sh | 69 ---------------------- 3 files changed, 54 insertions(+), 71 deletions(-) create mode 100644 scripts/actions/install-cuda-windows.ps1 delete mode 100644 scripts/actions/install-cuda-windows.sh diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index f01a264..ebbf04a 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -52,8 +52,8 @@ jobs: env: temp: ${{ runner.temp }} cuda: ${{ matrix.cuda-version }} - run: scripts/actions/install-cuda-windows.sh - shell: bash + run: scripts/actions/install-cuda-windows.ps1 + shell: pwsh - name: Configure CMake # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. diff --git a/scripts/actions/install-cuda-windows.ps1 b/scripts/actions/install-cuda-windows.ps1 new file mode 100644 index 0000000..47d5a5c --- /dev/null +++ b/scripts/actions/install-cuda-windows.ps1 @@ -0,0 +1,52 @@ +$CUDA_PACKAGES_IN = @( + "nvcc" + "cudart" + "nvtx" + "nvrtc" + "thrust" + "curand_dev" + "cublas_dev" + "cufft_dev" +) + +function Version-Ge($a, $b) { + return ([version]$a -ge [version]$b) +} +function Version-Gt($a, $b) { + return ([version]$a -gt [version]$b) +} +function Version-Le($a, $b) { + return ([version]$a -le [version]$b) +} +function Version-Lt($a, $b) { + return ([version]$a -lt [version]$b) +} + +# Expect $env:cuda to be set, e.g. "12.4.1" +$CUDA_VERSION_MAJOR_MINOR = $env:cuda + +$parts = $CUDA_VERSION_MAJOR_MINOR.Split('.') +$CUDA_MAJOR = $parts[0] +$CUDA_MINOR = $parts[1] +$CUDA_PATCH = if ($parts.Count -gt 2) { $parts[2] } else { "0" } + +$CUDA_PACKAGES = "" +foreach ($package in $CUDA_PACKAGES_IN) { + $CUDA_PACKAGES += " $package" + "_$CUDA_MAJOR.$CUDA_MINOR" +} +Write-Host "CUDA_PACKAGES $CUDA_PACKAGES" + +$cudaInstallerUrl = "https://developer.download.nvidia.com/compute/cuda/$CUDA_VERSION_MAJOR_MINOR/network_installers/cuda_${CUDA_VERSION_MAJOR_MINOR}_windows_network.exe" +Invoke-WebRequest -Uri $cudaInstallerUrl -OutFile "cuda_installer.exe" +Start-Process -FilePath ".\cuda_installer.exe" -ArgumentList "-s $CUDA_PACKAGES" -Wait +Remove-Item "cuda_installer.exe" -Force + +$CUDA_PATH = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$CUDA_MAJOR.$CUDA_MINOR" +Write-Host "CUDA_PATH=$CUDA_PATH" +$env:CUDA_PATH = $CUDA_PATH + +if ($env:GITHUB_ACTIONS) { + Write-Host "Adding CUDA to CUDA_PATH" + Add-Content -Path $env:GITHUB_PATH -Value "$CUDA_PATH\bin" + Add-Content -Path $env:GITHUB_ENV -Value "CUDA_PATH=$CUDA_PATH" +} diff --git a/scripts/actions/install-cuda-windows.sh b/scripts/actions/install-cuda-windows.sh deleted file mode 100644 index ecab8c4..0000000 --- a/scripts/actions/install-cuda-windows.sh +++ /dev/null @@ -1,69 +0,0 @@ -#! /bin/bash - -set -e -set -x - -CUDA_PACKAGES_IN=( - "nvcc" - "cudart" - "nvtx" - "nvrtc" - "thrust" - "curand_dev" - "cublas_dev" - "cufft_dev" -) - -function version_ge() { - [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 - [ "$(printf '%s\n' "$@" | sort -V | head -n 1)" == "$2" ] -} -# returns 0 (true) if a > b -function version_gt() { - [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 - [ "$1" = "$2" ] && return 1 || version_ge $1 $2 -} -# returns 0 (true) if a <= b -function version_le() { - [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 - [ "$(printf '%s\n' "$@" | sort -V | head -n 1)" == "$1" ] -} -# returns 0 (true) if a < b -function version_lt() { - [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 - [ "$1" = "$2" ] && return 1 || version_le $1 $2 -} - - -CUDA_VERSION_MAJOR_MINOR=${cuda} - -CUDA_MAJOR=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f1) -CUDA_MINOR=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f2) -CUDA_PATCH=$(echo "${CUDA_VERSION_MAJOR_MINOR}" | cut -d. -f3) - -CUDA_PACKAGES="" -for package in "${CUDA_PACKAGES_IN[@]}" -do : - # Build the full package name and append to the string. - CUDA_PACKAGES+=" ${package}_${CUDA_MAJOR}.${CUDA_MINOR}" -done -echo "CUDA_PACKAGES ${CUDA_PACKAGES}" - -# CUDA_ROOT = "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${CUDA_MAJOR}.${CUDA_MINOR}" - -curl --netrc-optional -L -nv -o cuda_installer.exe \ -"https://developer.download.nvidia.com/compute/cuda/${cuda}/network_installers/cuda_${cuda}_windows_network.exe" -./cuda_installer.exe -s ${CUDA_PACKAGES} -rm -f cuda_installer.exe - -CUDA_PATH="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${CUDA_MAJOR}.${CUDA_MINOR}" -echo "CUDA_PATH=${CUDA_PATH}" -export CUDA_PATH=${CUDA_PATH} - -if [[ $GITHUB_ACTIONS ]] -then - echo "Adding CUDA to CUDA_PATH" - echo "${CUDA_PATH}/bin" >> $GITHUB_PATH - echo "CUDA_PATH=${CUDA_PATH}" >> $GITHUB_ENV -fi - From 1ce1958cd482359fde47dea469fc7c069d006a4d Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 17:21:06 +0700 Subject: [PATCH 118/281] Update install-cuda-windows.ps1 --- scripts/actions/install-cuda-windows.ps1 | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/actions/install-cuda-windows.ps1 b/scripts/actions/install-cuda-windows.ps1 index 47d5a5c..11666dc 100644 --- a/scripts/actions/install-cuda-windows.ps1 +++ b/scripts/actions/install-cuda-windows.ps1 @@ -1,5 +1,6 @@ $CUDA_PACKAGES_IN = @( "nvcc" + "visual_studio_integration" "cudart" "nvtx" "nvrtc" From bfe932edda6efaa0cb116e10e9954324e81cd8d8 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 18:17:46 +0700 Subject: [PATCH 119/281] Update install-cuda-windows.ps1 --- scripts/actions/install-cuda-windows.ps1 | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/actions/install-cuda-windows.ps1 b/scripts/actions/install-cuda-windows.ps1 index 11666dc..aef434d 100644 --- a/scripts/actions/install-cuda-windows.ps1 +++ b/scripts/actions/install-cuda-windows.ps1 @@ -46,8 +46,10 @@ $CUDA_PATH = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$CUDA_MAJOR.$C Write-Host "CUDA_PATH=$CUDA_PATH" $env:CUDA_PATH = $CUDA_PATH -if ($env:GITHUB_ACTIONS) { - Write-Host "Adding CUDA to CUDA_PATH" - Add-Content -Path $env:GITHUB_PATH -Value "$CUDA_PATH\bin" - Add-Content -Path $env:GITHUB_ENV -Value "CUDA_PATH=$CUDA_PATH" +# If executing on github actions, emit the appropriate echo statements to update environment variables +if (Test-Path "env:GITHUB_ACTIONS") { + # Set paths for subsequent steps, using $env:CUDA_PATH + echo "Adding CUDA to CUDA_PATH, CUDA_PATH_X_Y and PATH" + echo "CUDA_PATH=$env:CUDA_PATH" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append + echo "$env:CUDA_PATH/bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append } From 60145aa5770bdc75e39572cc3bed12ae07798f14 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 19:37:47 +0700 Subject: [PATCH 120/281] test --- .github/workflows/cmake-multi-platform.yml | 152 +++++++++++---------- scripts/actions/install-cuda-windows.ps1 | 116 ++++++++-------- 2 files changed, 142 insertions(+), 126 deletions(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index ebbf04a..9fbcfb7 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -1,71 +1,81 @@ -# This starter workflow is for a CMake project running on a single platform. There is a different starter workflow if you need cross-platform coverage. -# See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-multi-platform.yml -name: CMake on multi platform - -on: - push: - branches: [ "master" ] - pull_request: - branches: [ "master" ] - -env: - # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) - BUILD_TYPE: Release - -jobs: - build: - # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac. - # You can convert this to a matrix build if you need cross-platform coverage. - # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix - strategy: - fail-fast: false - matrix: - os: [ "ubuntu-22.04", "ubuntu-22.04-arm", "windows-latest" ] - cuda-version: [ "12.9.1", "12.4.1" ] - - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v3 - - - name: Run CUDA bash shell Ubuntu/Debian - if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'debian') - env: - temp: ${{ runner.temp }} - cuda: ${{ matrix.cuda-version }} - run: | - chmod +x ${{github.workspace}}/scripts/actions/install-cuda-ubuntu.sh - ${{github.workspace}}/scripts/actions/install-cuda-ubuntu.sh - shell: bash - - - name: Run CUDA bash shell RHEL - if: startsWith(matrix.os, 'rhel') - env: - temp: ${{ runner.temp }} - cuda: ${{ matrix.cuda-version }} - run: | - chmod +x ${{github.workspace}}/scripts/actions/install-cuda-rhel.sh - ${{github.workspace}}/scripts/actions/install-cuda-rhel.sh - shell: bash - - - name: Run CUDA bash shell Windows - if: runner.os == 'Windows' - env: - temp: ${{ runner.temp }} - cuda: ${{ matrix.cuda-version }} - run: scripts/actions/install-cuda-windows.ps1 - shell: pwsh - - - name: Configure CMake - # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. - # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} - - - name: Build - # Build your program with the given configuration - run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - - - name: Test - working-directory: ${{github.workspace}}/build - # Execute tests defined by the CMake configuration. - # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail - run: ctest -C ${{env.BUILD_TYPE}} +# This starter workflow is for a CMake project running on a single platform. There is a different starter workflow if you need cross-platform coverage. +# See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-multi-platform.yml +name: CMake on multi platform + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +env: + # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) + BUILD_TYPE: Release + +jobs: + build: + # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac. + # You can convert this to a matrix build if you need cross-platform coverage. + # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix + strategy: + fail-fast: false + matrix: + os: [ "ubuntu-22.04", "ubuntu-22.04-arm", "windows-latest" ] + cuda-version: [ "12.9.1", "12.4.1" ] + exclude: + - os: "windows-latest" + cuda-version: "12.9.1" # CUDA 12.9.1 is not available for ARM64 + + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + + - name: Run CUDA bash shell Ubuntu/Debian + if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'debian') + env: + temp: ${{ runner.temp }} + cuda: ${{ matrix.cuda-version }} + run: | + chmod +x ${{github.workspace}}/scripts/actions/install-cuda-ubuntu.sh + ${{github.workspace}}/scripts/actions/install-cuda-ubuntu.sh + shell: bash + + - name: Run CUDA bash shell RHEL + if: startsWith(matrix.os, 'rhel') + env: + temp: ${{ runner.temp }} + cuda: ${{ matrix.cuda-version }} + run: | + chmod +x ${{github.workspace}}/scripts/actions/install-cuda-rhel.sh + ${{github.workspace}}/scripts/actions/install-cuda-rhel.sh + shell: bash + + - name: Run CUDA bash shell Windows + if: runner.os == 'Windows' + env: + temp: ${{ runner.temp }} + cuda: ${{ matrix.cuda-version }} + run: scripts/actions/install-cuda-windows.ps1 + shell: pwsh + + - name: Configure CMake + if: runner.os != 'Windows' + # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. + # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + + - name: Configure CMake Windows + if: runner.os == 'Windows' + # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. + # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -G "Visual Studio 17 2022" -A x64 + + - name: Build + # Build your program with the given configuration + run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} + + - name: Test + working-directory: ${{github.workspace}}/build + # Execute tests defined by the CMake configuration. + # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail + run: ctest -C ${{env.BUILD_TYPE}} diff --git a/scripts/actions/install-cuda-windows.ps1 b/scripts/actions/install-cuda-windows.ps1 index aef434d..ea2371f 100644 --- a/scripts/actions/install-cuda-windows.ps1 +++ b/scripts/actions/install-cuda-windows.ps1 @@ -1,55 +1,61 @@ -$CUDA_PACKAGES_IN = @( - "nvcc" - "visual_studio_integration" - "cudart" - "nvtx" - "nvrtc" - "thrust" - "curand_dev" - "cublas_dev" - "cufft_dev" -) - -function Version-Ge($a, $b) { - return ([version]$a -ge [version]$b) -} -function Version-Gt($a, $b) { - return ([version]$a -gt [version]$b) -} -function Version-Le($a, $b) { - return ([version]$a -le [version]$b) -} -function Version-Lt($a, $b) { - return ([version]$a -lt [version]$b) -} - -# Expect $env:cuda to be set, e.g. "12.4.1" -$CUDA_VERSION_MAJOR_MINOR = $env:cuda - -$parts = $CUDA_VERSION_MAJOR_MINOR.Split('.') -$CUDA_MAJOR = $parts[0] -$CUDA_MINOR = $parts[1] -$CUDA_PATCH = if ($parts.Count -gt 2) { $parts[2] } else { "0" } - -$CUDA_PACKAGES = "" -foreach ($package in $CUDA_PACKAGES_IN) { - $CUDA_PACKAGES += " $package" + "_$CUDA_MAJOR.$CUDA_MINOR" -} -Write-Host "CUDA_PACKAGES $CUDA_PACKAGES" - -$cudaInstallerUrl = "https://developer.download.nvidia.com/compute/cuda/$CUDA_VERSION_MAJOR_MINOR/network_installers/cuda_${CUDA_VERSION_MAJOR_MINOR}_windows_network.exe" -Invoke-WebRequest -Uri $cudaInstallerUrl -OutFile "cuda_installer.exe" -Start-Process -FilePath ".\cuda_installer.exe" -ArgumentList "-s $CUDA_PACKAGES" -Wait -Remove-Item "cuda_installer.exe" -Force - -$CUDA_PATH = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$CUDA_MAJOR.$CUDA_MINOR" -Write-Host "CUDA_PATH=$CUDA_PATH" -$env:CUDA_PATH = $CUDA_PATH - -# If executing on github actions, emit the appropriate echo statements to update environment variables -if (Test-Path "env:GITHUB_ACTIONS") { - # Set paths for subsequent steps, using $env:CUDA_PATH - echo "Adding CUDA to CUDA_PATH, CUDA_PATH_X_Y and PATH" - echo "CUDA_PATH=$env:CUDA_PATH" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append - echo "$env:CUDA_PATH/bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append -} +$CUDA_PACKAGES_IN = @( + "nvcc" + "visual_studio_integration" + "cudart" + "nvtx" + "nvrtc" + "thrust" + "curand_dev" + "cublas_dev" + "cufft_dev" +) + +function Version-Ge($a, $b) { + return ([version]$a -ge [version]$b) +} +function Version-Gt($a, $b) { + return ([version]$a -gt [version]$b) +} +function Version-Le($a, $b) { + return ([version]$a -le [version]$b) +} +function Version-Lt($a, $b) { + return ([version]$a -lt [version]$b) +} + +# Expect $env:cuda to be set, e.g. "12.4.1" +$CUDA_VERSION_MAJOR_MINOR = $env:cuda + +$parts = $CUDA_VERSION_MAJOR_MINOR.Split('.') +$CUDA_MAJOR = $parts[0] +$CUDA_MINOR = $parts[1] +$CUDA_PATCH = if ($parts.Count -gt 2) { $parts[2] } else { "0" } + +$CUDA_PACKAGES = "" +foreach ($package in $CUDA_PACKAGES_IN) { + $pkg = $package + if ($pkg -eq "nvcc" -and Version-Ge $CUDA_VERSION_MAJOR_MINOR "9.1") { + $pkg = "compiler" + } + if ($pkg -eq "compiler" -and Version-Lt $CUDA_VERSION_MAJOR_MINOR "9.1") { + $pkg = "nvcc" + } + $CUDA_PACKAGES += " ${pkg}_$CUDA_MAJOR.$CUDA_MINOR" +} +Write-Host "CUDA_PACKAGES $CUDA_PACKAGES" + +$cudaInstallerUrl = "https://developer.download.nvidia.com/compute/cuda/$CUDA_VERSION_MAJOR_MINOR/network_installers/cuda_${CUDA_VERSION_MAJOR_MINOR}_windows_network.exe" +Invoke-WebRequest -Uri $cudaInstallerUrl -OutFile "cuda_installer.exe" +Start-Process -FilePath ".\cuda_installer.exe" -ArgumentList "-s $CUDA_PACKAGES" -Wait +Remove-Item "cuda_installer.exe" -Force + +$CUDA_PATH = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$CUDA_MAJOR.$CUDA_MINOR" +Write-Host "CUDA_PATH=$CUDA_PATH" +$env:CUDA_PATH = $CUDA_PATH + +# If executing on github actions, emit the appropriate echo statements to update environment variables +if (Test-Path "env:GITHUB_ACTIONS") { + Write-Host "Adding CUDA to CUDA_PATH" + Add-Content -Path $env:GITHUB_PATH -Value "$CUDA_PATH\bin" + Add-Content -Path $env:GITHUB_ENV -Value "CUDA_PATH=$CUDA_PATH" +} From 7ba568222e4ea51c431f6c1cc6049521fab589ac Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 19:39:24 +0700 Subject: [PATCH 121/281] Update cmake-multi-platform.yml --- .github/workflows/cmake-multi-platform.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index 9fbcfb7..4424ab8 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -22,7 +22,7 @@ jobs: matrix: os: [ "ubuntu-22.04", "ubuntu-22.04-arm", "windows-latest" ] cuda-version: [ "12.9.1", "12.4.1" ] - exclude: + exclude: - os: "windows-latest" cuda-version: "12.9.1" # CUDA 12.9.1 is not available for ARM64 From 37aa1ab4be0fc0a9c81322f285612b60a61c5207 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 19:43:29 +0700 Subject: [PATCH 122/281] Update install-cuda-windows.ps1 --- scripts/actions/install-cuda-windows.ps1 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/actions/install-cuda-windows.ps1 b/scripts/actions/install-cuda-windows.ps1 index ea2371f..ad3ff75 100644 --- a/scripts/actions/install-cuda-windows.ps1 +++ b/scripts/actions/install-cuda-windows.ps1 @@ -34,10 +34,10 @@ $CUDA_PATCH = if ($parts.Count -gt 2) { $parts[2] } else { "0" } $CUDA_PACKAGES = "" foreach ($package in $CUDA_PACKAGES_IN) { $pkg = $package - if ($pkg -eq "nvcc" -and Version-Ge $CUDA_VERSION_MAJOR_MINOR "9.1") { + if (($pkg -eq "nvcc") -and (Version-Ge $CUDA_VERSION_MAJOR_MINOR "9.1")) { $pkg = "compiler" } - if ($pkg -eq "compiler" -and Version-Lt $CUDA_VERSION_MAJOR_MINOR "9.1") { + if (($pkg -eq "compiler") -and (Version-Lt $CUDA_VERSION_MAJOR_MINOR "9.1")) { $pkg = "nvcc" } $CUDA_PACKAGES += " ${pkg}_$CUDA_MAJOR.$CUDA_MINOR" From 61c322a93f0341c5ad8d86cd8b3750a27a340817 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 19:44:53 +0700 Subject: [PATCH 123/281] Update install-cuda-windows.ps1 --- scripts/actions/install-cuda-windows.ps1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/actions/install-cuda-windows.ps1 b/scripts/actions/install-cuda-windows.ps1 index ad3ff75..1a2a655 100644 --- a/scripts/actions/install-cuda-windows.ps1 +++ b/scripts/actions/install-cuda-windows.ps1 @@ -54,7 +54,7 @@ Write-Host "CUDA_PATH=$CUDA_PATH" $env:CUDA_PATH = $CUDA_PATH # If executing on github actions, emit the appropriate echo statements to update environment variables -if (Test-Path "env:GITHUB_ACTIONS") { +if ($env:GITHUB_ACTIONS) { Write-Host "Adding CUDA to CUDA_PATH" Add-Content -Path $env:GITHUB_PATH -Value "$CUDA_PATH\bin" Add-Content -Path $env:GITHUB_ENV -Value "CUDA_PATH=$CUDA_PATH" From 508d409ce4499fbc4d35a328bbfdd866ee8f421e Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 19:46:44 +0700 Subject: [PATCH 124/281] Update install-cuda-windows.ps1 --- scripts/actions/install-cuda-windows.ps1 | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/scripts/actions/install-cuda-windows.ps1 b/scripts/actions/install-cuda-windows.ps1 index 1a2a655..fd36cf1 100644 --- a/scripts/actions/install-cuda-windows.ps1 +++ b/scripts/actions/install-cuda-windows.ps1 @@ -34,10 +34,10 @@ $CUDA_PATCH = if ($parts.Count -gt 2) { $parts[2] } else { "0" } $CUDA_PACKAGES = "" foreach ($package in $CUDA_PACKAGES_IN) { $pkg = $package - if (($pkg -eq "nvcc") -and (Version-Ge $CUDA_VERSION_MAJOR_MINOR "9.1")) { + if ($pkg -eq "nvcc" -and Version-Ge $CUDA_VERSION_MAJOR_MINOR "9.1") { $pkg = "compiler" } - if (($pkg -eq "compiler") -and (Version-Lt $CUDA_VERSION_MAJOR_MINOR "9.1")) { + if ($pkg -eq "compiler" -and Version-Lt $CUDA_VERSION_MAJOR_MINOR "9.1") { $pkg = "nvcc" } $CUDA_PACKAGES += " ${pkg}_$CUDA_MAJOR.$CUDA_MINOR" @@ -54,8 +54,9 @@ Write-Host "CUDA_PATH=$CUDA_PATH" $env:CUDA_PATH = $CUDA_PATH # If executing on github actions, emit the appropriate echo statements to update environment variables -if ($env:GITHUB_ACTIONS) { - Write-Host "Adding CUDA to CUDA_PATH" - Add-Content -Path $env:GITHUB_PATH -Value "$CUDA_PATH\bin" - Add-Content -Path $env:GITHUB_ENV -Value "CUDA_PATH=$CUDA_PATH" +if (Test-Path "env:GITHUB_ACTIONS") { + # Set paths for subsequent steps, using $env:CUDA_PATH + echo "Adding CUDA to CUDA_PATH, CUDA_PATH_X_Y and PATH" + echo "CUDA_PATH=$env:CUDA_PATH" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append + echo "$env:CUDA_PATH/bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append } From 4c05a6388c034d779ebf819dd5697c9f7127de7f Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 19:48:02 +0700 Subject: [PATCH 125/281] Update install-cuda-windows.ps1 --- scripts/actions/install-cuda-windows.ps1 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/actions/install-cuda-windows.ps1 b/scripts/actions/install-cuda-windows.ps1 index fd36cf1..bc1e840 100644 --- a/scripts/actions/install-cuda-windows.ps1 +++ b/scripts/actions/install-cuda-windows.ps1 @@ -34,10 +34,10 @@ $CUDA_PATCH = if ($parts.Count -gt 2) { $parts[2] } else { "0" } $CUDA_PACKAGES = "" foreach ($package in $CUDA_PACKAGES_IN) { $pkg = $package - if ($pkg -eq "nvcc" -and Version-Ge $CUDA_VERSION_MAJOR_MINOR "9.1") { + if (($pkg -eq "nvcc") -and (Version-Ge $CUDA_VERSION_MAJOR_MINOR "9.1")) { $pkg = "compiler" } - if ($pkg -eq "compiler" -and Version-Lt $CUDA_VERSION_MAJOR_MINOR "9.1") { + if (($pkg -eq "compiler") -and (Version-Lt $CUDA_VERSION_MAJOR_MINOR "9.1")) { $pkg = "nvcc" } $CUDA_PACKAGES += " ${pkg}_$CUDA_MAJOR.$CUDA_MINOR" From 4e8a48460775c0bdcc05dee7245736a9b1db48c2 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 19:50:33 +0700 Subject: [PATCH 126/281] Update cmake-multi-platform.yml --- .github/workflows/cmake-multi-platform.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index 4424ab8..2b648d0 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -56,7 +56,7 @@ jobs: temp: ${{ runner.temp }} cuda: ${{ matrix.cuda-version }} run: scripts/actions/install-cuda-windows.ps1 - shell: pwsh + shell: powershell - name: Configure CMake if: runner.os != 'Windows' From 5c8d11dfbe4b66216f27a49d0d86e19595cc5782 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 20:03:36 +0700 Subject: [PATCH 127/281] test --- .github/workflows/cmake-multi-platform.yml | 162 ++++++++++----------- scripts/actions/install-cuda-windows.ps1 | 6 +- 2 files changed, 84 insertions(+), 84 deletions(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index 2b648d0..013c638 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -1,81 +1,81 @@ -# This starter workflow is for a CMake project running on a single platform. There is a different starter workflow if you need cross-platform coverage. -# See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-multi-platform.yml -name: CMake on multi platform - -on: - push: - branches: [ "master" ] - pull_request: - branches: [ "master" ] - -env: - # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) - BUILD_TYPE: Release - -jobs: - build: - # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac. - # You can convert this to a matrix build if you need cross-platform coverage. - # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix - strategy: - fail-fast: false - matrix: - os: [ "ubuntu-22.04", "ubuntu-22.04-arm", "windows-latest" ] - cuda-version: [ "12.9.1", "12.4.1" ] - exclude: - - os: "windows-latest" - cuda-version: "12.9.1" # CUDA 12.9.1 is not available for ARM64 - - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v3 - - - name: Run CUDA bash shell Ubuntu/Debian - if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'debian') - env: - temp: ${{ runner.temp }} - cuda: ${{ matrix.cuda-version }} - run: | - chmod +x ${{github.workspace}}/scripts/actions/install-cuda-ubuntu.sh - ${{github.workspace}}/scripts/actions/install-cuda-ubuntu.sh - shell: bash - - - name: Run CUDA bash shell RHEL - if: startsWith(matrix.os, 'rhel') - env: - temp: ${{ runner.temp }} - cuda: ${{ matrix.cuda-version }} - run: | - chmod +x ${{github.workspace}}/scripts/actions/install-cuda-rhel.sh - ${{github.workspace}}/scripts/actions/install-cuda-rhel.sh - shell: bash - - - name: Run CUDA bash shell Windows - if: runner.os == 'Windows' - env: - temp: ${{ runner.temp }} - cuda: ${{ matrix.cuda-version }} - run: scripts/actions/install-cuda-windows.ps1 - shell: powershell - - - name: Configure CMake - if: runner.os != 'Windows' - # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. - # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} - - - name: Configure CMake Windows - if: runner.os == 'Windows' - # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. - # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -G "Visual Studio 17 2022" -A x64 - - - name: Build - # Build your program with the given configuration - run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - - - name: Test - working-directory: ${{github.workspace}}/build - # Execute tests defined by the CMake configuration. - # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail - run: ctest -C ${{env.BUILD_TYPE}} +# This starter workflow is for a CMake project running on a single platform. There is a different starter workflow if you need cross-platform coverage. +# See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-multi-platform.yml +name: CMake on multi platform + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +env: + # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) + BUILD_TYPE: Release + +jobs: + build: + # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac. + # You can convert this to a matrix build if you need cross-platform coverage. + # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix + strategy: + fail-fast: false + matrix: + os: [ "ubuntu-22.04", "ubuntu-22.04-arm", "windows-latest" ] + cuda-version: [ "12.9.1", "12.4.1" ] + exclude: + - os: "windows-latest" + cuda-version: "12.9.1" # CUDA 12.9.1 is not available for ARM64 + + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + + - name: Run CUDA bash shell Ubuntu/Debian + if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'debian') + env: + temp: ${{ runner.temp }} + cuda: ${{ matrix.cuda-version }} + run: | + chmod +x ${{github.workspace}}/scripts/actions/install-cuda-ubuntu.sh + ${{github.workspace}}/scripts/actions/install-cuda-ubuntu.sh + shell: bash + + - name: Run CUDA bash shell RHEL + if: startsWith(matrix.os, 'rhel') + env: + temp: ${{ runner.temp }} + cuda: ${{ matrix.cuda-version }} + run: | + chmod +x ${{github.workspace}}/scripts/actions/install-cuda-rhel.sh + ${{github.workspace}}/scripts/actions/install-cuda-rhel.sh + shell: bash + + - name: Run CUDA bash shell Windows + if: runner.os == 'Windows' + env: + temp: ${{ runner.temp }} + cuda: ${{ matrix.cuda-version }} + run: scripts/actions/install-cuda-windows.ps1 + shell: pwsh + + - name: Configure CMake + if: runner.os != 'Windows' + # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. + # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + + - name: Configure CMake Windows + if: runner.os == 'Windows' + # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. + # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + + - name: Build + # Build your program with the given configuration + run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} + + - name: Test + working-directory: ${{github.workspace}}/build + # Execute tests defined by the CMake configuration. + # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail + run: ctest -C ${{env.BUILD_TYPE}} diff --git a/scripts/actions/install-cuda-windows.ps1 b/scripts/actions/install-cuda-windows.ps1 index bc1e840..59bcd68 100644 --- a/scripts/actions/install-cuda-windows.ps1 +++ b/scripts/actions/install-cuda-windows.ps1 @@ -56,7 +56,7 @@ $env:CUDA_PATH = $CUDA_PATH # If executing on github actions, emit the appropriate echo statements to update environment variables if (Test-Path "env:GITHUB_ACTIONS") { # Set paths for subsequent steps, using $env:CUDA_PATH - echo "Adding CUDA to CUDA_PATH, CUDA_PATH_X_Y and PATH" - echo "CUDA_PATH=$env:CUDA_PATH" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append - echo "$env:CUDA_PATH/bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + Write-Host "Adding CUDA to CUDA_PATH, and PATH" + Add-Content -Path $env:GITHUB_ENV -Value "CUDA_PATH=$env:CUDA_PATH" + Add-Content -Path $env:GITHUB_PATH -Value "$env:CUDA_PATH\bin" } From 8278e398425c897e689d88474cefdf2a845a135e Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 20:10:47 +0700 Subject: [PATCH 128/281] test --- .github/workflows/cmake-multi-platform.yml | 2 +- scripts/actions/install-cuda-windows.ps1 | 9 +-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index 013c638..efbfc59 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -68,7 +68,7 @@ jobs: if: runner.os == 'Windows' # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -G "Visual Studio 17 2022" -A x64 - name: Build # Build your program with the given configuration diff --git a/scripts/actions/install-cuda-windows.ps1 b/scripts/actions/install-cuda-windows.ps1 index 59bcd68..e07c079 100644 --- a/scripts/actions/install-cuda-windows.ps1 +++ b/scripts/actions/install-cuda-windows.ps1 @@ -33,14 +33,7 @@ $CUDA_PATCH = if ($parts.Count -gt 2) { $parts[2] } else { "0" } $CUDA_PACKAGES = "" foreach ($package in $CUDA_PACKAGES_IN) { - $pkg = $package - if (($pkg -eq "nvcc") -and (Version-Ge $CUDA_VERSION_MAJOR_MINOR "9.1")) { - $pkg = "compiler" - } - if (($pkg -eq "compiler") -and (Version-Lt $CUDA_VERSION_MAJOR_MINOR "9.1")) { - $pkg = "nvcc" - } - $CUDA_PACKAGES += " ${pkg}_$CUDA_MAJOR.$CUDA_MINOR" + $CUDA_PACKAGES += " ${package}_$CUDA_MAJOR.$CUDA_MINOR" } Write-Host "CUDA_PACKAGES $CUDA_PACKAGES" From e814dfae23c466220bd367001ca3740c3208f3ba Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 21:01:36 +0700 Subject: [PATCH 129/281] Update install-cuda-windows.ps1 --- scripts/actions/install-cuda-windows.ps1 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/actions/install-cuda-windows.ps1 b/scripts/actions/install-cuda-windows.ps1 index e07c079..3789ea7 100644 --- a/scripts/actions/install-cuda-windows.ps1 +++ b/scripts/actions/install-cuda-windows.ps1 @@ -33,7 +33,7 @@ $CUDA_PATCH = if ($parts.Count -gt 2) { $parts[2] } else { "0" } $CUDA_PACKAGES = "" foreach ($package in $CUDA_PACKAGES_IN) { - $CUDA_PACKAGES += " ${package}_$CUDA_MAJOR.$CUDA_MINOR" + $CUDA_PACKAGES += " ${package}_${CUDA_MAJOR}.${CUDA_MINOR}" } Write-Host "CUDA_PACKAGES $CUDA_PACKAGES" @@ -51,5 +51,6 @@ if (Test-Path "env:GITHUB_ACTIONS") { # Set paths for subsequent steps, using $env:CUDA_PATH Write-Host "Adding CUDA to CUDA_PATH, and PATH" Add-Content -Path $env:GITHUB_ENV -Value "CUDA_PATH=$env:CUDA_PATH" + Add-Content -Path $env:GITHUB_ENV -Value "CUDA_PATH_${CUDA_MAJOR}_${CUDA_MINOR}=$env:CUDA_PATH" Add-Content -Path $env:GITHUB_PATH -Value "$env:CUDA_PATH\bin" } From 7ba8733210a24ddbf1a1612f0bda303c8ff1740c Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 21:08:14 +0700 Subject: [PATCH 130/281] Update install-cuda-windows.ps1 --- scripts/actions/install-cuda-windows.ps1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/actions/install-cuda-windows.ps1 b/scripts/actions/install-cuda-windows.ps1 index 3789ea7..c47930b 100644 --- a/scripts/actions/install-cuda-windows.ps1 +++ b/scripts/actions/install-cuda-windows.ps1 @@ -51,6 +51,6 @@ if (Test-Path "env:GITHUB_ACTIONS") { # Set paths for subsequent steps, using $env:CUDA_PATH Write-Host "Adding CUDA to CUDA_PATH, and PATH" Add-Content -Path $env:GITHUB_ENV -Value "CUDA_PATH=$env:CUDA_PATH" - Add-Content -Path $env:GITHUB_ENV -Value "CUDA_PATH_${CUDA_MAJOR}_${CUDA_MINOR}=$env:CUDA_PATH" + Add-Content -Path $env:GITHUB_ENV -Value "CUDA_PATH_V${CUDA_MAJOR}_${CUDA_MINOR}=$env:CUDA_PATH" Add-Content -Path $env:GITHUB_PATH -Value "$env:CUDA_PATH\bin" } From 9db0757fcf831dea795679827d962d97e9fd4885 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 21:31:05 +0700 Subject: [PATCH 131/281] Update cmake-multi-platform.yml --- .github/workflows/cmake-multi-platform.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index efbfc59..6185625 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -68,7 +68,7 @@ jobs: if: runner.os == 'Windows' # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -G "Visual Studio 17 2022" -A x64 + run: cmake -B ${{github.workspace}}/build -DCMAKE_CONFIGURATION_TYPES="Debug;Release" -G "Visual Studio 17 2022" -A x64 - name: Build # Build your program with the given configuration From f7041bfce40e699584d63f92012a40eb1fbdfdd4 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 22:33:24 +0700 Subject: [PATCH 132/281] Create msvc.yml --- .github/workflows/msvc.yml | 74 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 .github/workflows/msvc.yml diff --git a/.github/workflows/msvc.yml b/.github/workflows/msvc.yml new file mode 100644 index 0000000..58ca683 --- /dev/null +++ b/.github/workflows/msvc.yml @@ -0,0 +1,74 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. +# +# Find more information at: +# https://github.com/microsoft/msvc-code-analysis-action + +name: Microsoft C++ Code Analysis + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + schedule: + - cron: '37 5 * * 4' + +env: + # Path to the CMake build directory. + build: '${{ github.workspace }}/build' + +permissions: + contents: read + +jobs: + analyze: + permissions: + contents: read # for actions/checkout to fetch code + security-events: write # for github/codeql-action/upload-sarif to upload SARIF results + actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status + name: Analyze + runs-on: windows-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Run CUDA bash shell Windows + env: + temp: ${{ runner.temp }} + cuda: ${{ matrix.cuda-version }} + run: scripts/actions/install-cuda-windows.ps1 + shell: pwsh + + + - name: Configure CMake + run: cmake -B ${{ env.build }} + + # Build is not required unless generated source files are used + # - name: Build CMake + # run: cmake --build ${{ env.build }} + + - name: Initialize MSVC Code Analysis + uses: microsoft/msvc-code-analysis-action@04825f6d9e00f87422d6bf04e1a38b1f3ed60d99 + # Provide a unique ID to access the sarif output path + id: run-analysis + with: + cmakeBuildDirectory: ${{ env.build }} + # Ruleset file that will determine what checks will be run + ruleset: NativeRecommendedRules.ruleset + + # Upload SARIF file to GitHub Code Scanning Alerts + - name: Upload SARIF to GitHub + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: ${{ steps.run-analysis.outputs.sarif }} + + # Upload SARIF file as an Artifact to download and view + # - name: Upload SARIF as an Artifact + # uses: actions/upload-artifact@v4 + # with: + # name: sarif-file + # path: ${{ steps.run-analysis.outputs.sarif }} From 63348afe817774186510958a062587b60cf65efa Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 22:35:11 +0700 Subject: [PATCH 133/281] Update msvc.yml --- .github/workflows/msvc.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/msvc.yml b/.github/workflows/msvc.yml index 58ca683..9482618 100644 --- a/.github/workflows/msvc.yml +++ b/.github/workflows/msvc.yml @@ -39,7 +39,7 @@ jobs: - name: Run CUDA bash shell Windows env: temp: ${{ runner.temp }} - cuda: ${{ matrix.cuda-version }} + cuda: "12.4" run: scripts/actions/install-cuda-windows.ps1 shell: pwsh From bf2c3d2bb9867664273c4f691ba33fef595f7071 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 22:36:33 +0700 Subject: [PATCH 134/281] Update msvc.yml --- .github/workflows/msvc.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/msvc.yml b/.github/workflows/msvc.yml index 9482618..d4f9072 100644 --- a/.github/workflows/msvc.yml +++ b/.github/workflows/msvc.yml @@ -39,7 +39,7 @@ jobs: - name: Run CUDA bash shell Windows env: temp: ${{ runner.temp }} - cuda: "12.4" + cuda: "12.4.1" run: scripts/actions/install-cuda-windows.ps1 shell: pwsh From 6f75c41769984654c46eef4fc00fccbb62403dc7 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 17 Jul 2025 22:39:42 +0700 Subject: [PATCH 135/281] Update cmake-multi-platform.yml --- .github/workflows/cmake-multi-platform.yml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index 6185625..4612ea4 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -59,17 +59,10 @@ jobs: shell: pwsh - name: Configure CMake - if: runner.os != 'Windows' # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} - - name: Configure CMake Windows - if: runner.os == 'Windows' - # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. - # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type - run: cmake -B ${{github.workspace}}/build -DCMAKE_CONFIGURATION_TYPES="Debug;Release" -G "Visual Studio 17 2022" -A x64 - - name: Build # Build your program with the given configuration run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} From 376d04e6d2e68b25691feb6c98333c4886facbe3 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 18 Jul 2025 00:13:02 +0700 Subject: [PATCH 136/281] test --- src/tensor-array/core/CMakeLists.txt | 9 +++++++++ src/tensor-array/layers/CMakeLists.txt | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index b1abcac..ce8931a 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -23,6 +23,15 @@ else() file(GLOB TensorArray_src "*.cc") endif() +# Ensure native path separators for sources (for Windows) +if(WIN32) + foreach(src_file ${TensorArray_src}) + file(TO_CMAKE_PATH "${src_file}" cmake_src_file) + list(APPEND TensorArray_cmake_src "${cmake_src_file}") + endforeach() + set(TensorArray_src ${TensorArray_cmake_src}) +endif() + # file(MAKE_DIRECTORY "include/tensor_array/core") add_library(tensorarray_core SHARED ${TensorArray_src}) diff --git a/src/tensor-array/layers/CMakeLists.txt b/src/tensor-array/layers/CMakeLists.txt index f4562c7..f94f3c8 100644 --- a/src/tensor-array/layers/CMakeLists.txt +++ b/src/tensor-array/layers/CMakeLists.txt @@ -8,6 +8,15 @@ install( DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/layers COMPONENT headers) +# Ensure native path separators for sources (for Windows) +if(WIN32) + foreach(src_file ${TensorArray_src}) + file(TO_CMAKE_PATH "${src_file}" cmake_src_file) + list(APPEND TensorArray_cmake_src "${cmake_src_file}") + endforeach() + set(TensorArray_src ${TensorArray_cmake_src}) +endif() + add_library(tensorarray_layers SHARED ${TensorArray_src}) target_include_directories(tensorarray_layers PRIVATE ${PROJECT_SOURCE_DIR}/src) From b9c189876b15be9c00931e64ca9e520a58f6ace8 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 18 Jul 2025 00:47:11 +0700 Subject: [PATCH 137/281] test --- src/tensor-array/core/CMakeLists.txt | 9 --------- src/tensor-array/layers/CMakeLists.txt | 9 --------- 2 files changed, 18 deletions(-) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index ce8931a..b1abcac 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -23,15 +23,6 @@ else() file(GLOB TensorArray_src "*.cc") endif() -# Ensure native path separators for sources (for Windows) -if(WIN32) - foreach(src_file ${TensorArray_src}) - file(TO_CMAKE_PATH "${src_file}" cmake_src_file) - list(APPEND TensorArray_cmake_src "${cmake_src_file}") - endforeach() - set(TensorArray_src ${TensorArray_cmake_src}) -endif() - # file(MAKE_DIRECTORY "include/tensor_array/core") add_library(tensorarray_core SHARED ${TensorArray_src}) diff --git a/src/tensor-array/layers/CMakeLists.txt b/src/tensor-array/layers/CMakeLists.txt index f94f3c8..f4562c7 100644 --- a/src/tensor-array/layers/CMakeLists.txt +++ b/src/tensor-array/layers/CMakeLists.txt @@ -8,15 +8,6 @@ install( DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/layers COMPONENT headers) -# Ensure native path separators for sources (for Windows) -if(WIN32) - foreach(src_file ${TensorArray_src}) - file(TO_CMAKE_PATH "${src_file}" cmake_src_file) - list(APPEND TensorArray_cmake_src "${cmake_src_file}") - endforeach() - set(TensorArray_src ${TensorArray_cmake_src}) -endif() - add_library(tensorarray_layers SHARED ${TensorArray_src}) target_include_directories(tensorarray_layers PRIVATE ${PROJECT_SOURCE_DIR}/src) From 67ef61eca3c1691c556c9699c016d8f245494e2d Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 18 Jul 2025 11:08:22 +0700 Subject: [PATCH 138/281] Update cmake-multi-platform.yml --- .github/workflows/cmake-multi-platform.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index 4612ea4..e66de0a 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -65,7 +65,7 @@ jobs: - name: Build # Build your program with the given configuration - run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} + run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --clean-first - name: Test working-directory: ${{github.workspace}}/build From 79f0098dd4752e45d0c890b29248129de102ce46 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 18 Jul 2025 12:14:50 +0700 Subject: [PATCH 139/281] test --- .gitignore | 3 + src/tensor-array/core/initializer_wrapper.hh | 90 +++++++++++++------- 2 files changed, 64 insertions(+), 29 deletions(-) diff --git a/.gitignore b/.gitignore index bc78b51..f4f2205 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,6 @@ lib/ bin/ include/ build-temp/ +.vs/ +out/ +CMakeSettings.json diff --git a/src/tensor-array/core/initializer_wrapper.hh b/src/tensor-array/core/initializer_wrapper.hh index 7e8e116..8245f0b 100644 --- a/src/tensor-array/core/initializer_wrapper.hh +++ b/src/tensor-array/core/initializer_wrapper.hh @@ -20,45 +20,77 @@ namespace tensor_array { namespace wrapper { +#ifdef _MSC_VER template - class initializer_wrapper - { - public: - typedef _E value_type; - typedef const _E& reference; - typedef const _E& const_reference; - typedef size_t size_type; - typedef const _E* iterator; - typedef const _E* const_iterator; + class initializer_wrapper: public std::initializer_list<_E> + { + public: + typedef _E value_type; + typedef const _E& reference; + typedef const _E& const_reference; + typedef size_t size_type; + typedef const _E* iterator; + typedef const _E* const_iterator; + public: + constexpr initializer_wrapper(const_iterator __a, size_type __l) + : std::initializer_list<_E>(__a, __a + __l) { } + + constexpr initializer_wrapper(const_iterator __begin, const_iterator __end) + : std::initializer_list<_E>(__begin, __end) { } + + constexpr initializer_wrapper() noexcept: std::initializer_list<_E>() { } + }; +#else + template + class initializer_wrapper + { + public: + typedef _E value_type; + typedef const _E& reference; + typedef const _E& const_reference; + typedef size_t size_type; + typedef const _E* iterator; + typedef const _E* const_iterator; - private: + private: #ifdef __GNUC__ - iterator _M_array; - size_type _M_len; + iterator _M_array; + size_type _M_len; #endif - public: - constexpr initializer_wrapper(const_iterator __a, size_type __l) - : _M_array(__a), _M_len(__l) { } + public: + constexpr initializer_wrapper(const_iterator __a, size_type __l) +#ifdef __GNUC__ + : _M_array(__a), _M_len(__l) { } +#endif - constexpr initializer_wrapper(const_iterator __begin, const_iterator __end) - : _M_array(__begin), _M_len(__end - __begin) { } + constexpr initializer_wrapper(const_iterator __begin, const_iterator __end) + : _M_array(__begin), _M_len(__end - __begin) + { } - constexpr initializer_wrapper() noexcept: _M_array(0), _M_len(0) { } + constexpr initializer_wrapper() noexcept: _M_array(0), _M_len(0) { } - // Number of elements. - constexpr size_type - size() const noexcept { return _M_len; } + // Number of elements. + constexpr size_type + size() const noexcept + { + return _M_len; + } - // First element. - constexpr const_iterator - begin() const noexcept { return _M_array; } + // First element. + constexpr const_iterator + begin() const noexcept { + return _M_array; + } - // One past the last element. - constexpr const_iterator - end() const noexcept { return begin() + size(); } + // One past the last element. + constexpr const_iterator + end() const noexcept { + return begin() + size(); + } - constexpr operator std::initializer_list<_E>() const { return reinterpret_cast&>(*this); } - }; + constexpr operator std::initializer_list<_E>() const { return reinterpret_cast&>(*this); } + }; +#endif // !_MSC_VER } } \ No newline at end of file From fdd4738d47b282fa028dede2897726fc5e90dc80 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 18 Jul 2025 05:56:48 +0000 Subject: [PATCH 140/281] test --- src/tensor-array/core/CMakeLists.txt | 12 ++++++------ src/tensor-array/layers/CMakeLists.txt | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index b1abcac..df4673e 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -17,23 +17,23 @@ set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") endif() +file(GLOB TensorArray_src_cc "*.cc") + if (CUDAToolkit_FOUND) -file(GLOB TensorArray_src "*.cc" "*.cu") -else() -file(GLOB TensorArray_src "*.cc") +file(GLOB TensorArray_src_cu "*.cu") endif() # file(MAKE_DIRECTORY "include/tensor_array/core") -add_library(tensorarray_core SHARED ${TensorArray_src}) +add_library(tensorarray_core SHARED ${TensorArray_src_cc} ${TensorArray_src_cu}) set_property(TARGET tensorarray_core PROPERTY C_STANDARD 11) set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) -set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) +set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_core PROPERTY CXX_EXTENSIONS OFF) if (CUDAToolkit_FOUND) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) diff --git a/src/tensor-array/layers/CMakeLists.txt b/src/tensor-array/layers/CMakeLists.txt index f4562c7..f013017 100644 --- a/src/tensor-array/layers/CMakeLists.txt +++ b/src/tensor-array/layers/CMakeLists.txt @@ -18,8 +18,8 @@ set_property(TARGET tensorarray_layers PROPERTY C_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_layers PROPERTY C_EXTENSIONS OFF) set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD 17) -set_property(TARGET tensorarray_layers PROPERTY C_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_layers PROPERTY C_EXTENSIONS OFF) +set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_layers PROPERTY CXX_EXTENSIONS OFF) install( TARGETS tensorarray_layers From a71b6544691448ca003073aa8beda1fbb6d2de41 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 18 Jul 2025 06:00:14 +0000 Subject: [PATCH 141/281] test --- .github/workflows/cmake-multi-platform.yml | 2 +- src/tensor-array/core/tensor.cc | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index e66de0a..4612ea4 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -65,7 +65,7 @@ jobs: - name: Build # Build your program with the given configuration - run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --clean-first + run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - name: Test working-directory: ${{github.workspace}}/build diff --git a/src/tensor-array/core/tensor.cc b/src/tensor-array/core/tensor.cc index c3e9326..21fd0ac 100644 --- a/src/tensor-array/core/tensor.cc +++ b/src/tensor-array/core/tensor.cc @@ -904,6 +904,7 @@ out_stream << static_cast(tensor_out); std::pair broadcast_t = tensor_broadcasting(a, temp_b, 0, 2); return batchedmatmul(broadcast_t.first, broadcast_t.second, true, nullptr); } + throw std::exception(); } Tensor condition(const Tensor& value_bool, const Tensor& value_true, const Tensor& value_false) From 1500859250a65c28a5c87c25f9c0914aa97c2d70 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 18 Jul 2025 14:19:31 +0700 Subject: [PATCH 142/281] Update CMakeLists.txt --- src/tensor-array/core/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index df4673e..97064dc 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -11,10 +11,11 @@ enable_language(CUDA) find_package(CUDAToolkit) + if (CUDAToolkit_FOUND) -set(CMAKE_CUDA_ARCHITECTURES 52 75 89) +# set(CMAKE_CUDA_ARCHITECTURES 52 75 89) set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) -list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") +# list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") endif() file(GLOB TensorArray_src_cc "*.cc") From 07871689068dda47effef9f150796d01434d3ff4 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 18 Jul 2025 16:09:33 +0700 Subject: [PATCH 143/281] support Windows x64 --- CMakeLists.txt | 4 + src/tensor-array/core/devices.hh | 16 ++-- src/tensor-array/core/tensor.cc | 10 +++ src/tensor-array/core/tensor.hh | 101 ++++++++++++----------- src/tensor-array/core/tensorbase.hh | 14 ++-- src/tensor-array/layers/attention.hh | 4 +- src/tensor-array/layers/convolution.hh | 8 +- src/tensor-array/layers/layer_impl.hh | 12 +-- src/tensor-array/layers/layer_utility.hh | 14 ++-- src/tensor-array/layers/linear.hh | 2 +- src/tensor-array/layers/normalization.cc | 2 +- src/tensor-array/layers/normalization.hh | 2 +- src/tensor-array/layers/recurrent.hh | 4 +- src/tensor-array/layers/sequential.hh | 4 +- src/tensor-array/layers/transformer.hh | 4 +- 15 files changed, 110 insertions(+), 91 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2cb8c4c..1290953 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,6 +5,10 @@ project(TensorArray) include(GNUInstallDirs) # set(CMAKE_INSTALL_PREFIX ${PROJECT_SOURCE_DIR}) +if(MSVC) + add_compile_definitions(TENSOR_ARRAY_EXPORTS) +endif() + add_subdirectory("src/tensor-array/core") add_subdirectory("src/tensor-array/layers") diff --git a/src/tensor-array/core/devices.hh b/src/tensor-array/core/devices.hh index 50eb102..e3aeb41 100644 --- a/src/tensor-array/core/devices.hh +++ b/src/tensor-array/core/devices.hh @@ -16,14 +16,14 @@ limitations under the License. #pragma once -#ifdef __WIN32__ -#ifdef CUDA_ML_EXPORTS -#define CUDA_ML_API __declspec(dllexport) +#ifdef _WIN32 +#ifdef TENSOR_ARRAY_EXPORTS +#define TENSOR_ARRAY_API __declspec(dllexport) #else -#define CUDA_ML_API __declspec(dllimport) +#define TENSOR_ARRAY_API __declspec(dllimport) #endif #else -#define CUDA_ML_API +#define TENSOR_ARRAY_API #endif namespace tensor_array @@ -44,7 +44,7 @@ namespace tensor_array constexpr Device DEVICE_CPU_0{ CPU,0 }; - CUDA_ML_API Device& local_device(); + TENSOR_ARRAY_API Device& local_device(); void device_memcpy(void*, Device, const void*, Device, size_t); @@ -54,7 +54,7 @@ namespace tensor_array void device_memset(void*, Device, int, size_t, void*); - CUDA_ML_API void device_CUDA_get_info(); + TENSOR_ARRAY_API void device_CUDA_get_info(); } } @@ -66,4 +66,4 @@ void operator delete(void*, tensor_array::devices::Device); void operator delete(void*, tensor_array::devices::Device, void*); -#undef CUDA_ML_API \ No newline at end of file +#undef TENSOR_ARRAY_API \ No newline at end of file diff --git a/src/tensor-array/core/tensor.cc b/src/tensor-array/core/tensor.cc index 21fd0ac..0310877 100644 --- a/src/tensor-array/core/tensor.cc +++ b/src/tensor-array/core/tensor.cc @@ -47,6 +47,16 @@ namespace tensor_array { bool use_grad = true; + bool is_use_grad() + { + return use_grad; + } + + void set_use_grad(bool use) + { + use_grad = use; + } + class Tensor::TensorContent { private: diff --git a/src/tensor-array/core/tensor.hh b/src/tensor-array/core/tensor.hh index 553f311..5654887 100644 --- a/src/tensor-array/core/tensor.hh +++ b/src/tensor-array/core/tensor.hh @@ -20,14 +20,18 @@ limitations under the License. #pragma once -#ifdef __WIN32__ -#ifdef CUDA_ML_EXPORTS -#define CUDA_ML_API __declspec(dllexport) +#ifdef _WIN32 +#ifdef TENSOR_ARRAY_EXPORTS +#define TENSOR_ARRAY_API __declspec(dllexport) +#define TENSOR_ARRAY_EXPORT_API __declspec(dllexport) +#define TENSOR_ARRAY_IMPORT_API #else -#define CUDA_ML_API __declspec(dllimport) +#define TENSOR_ARRAY_API __declspec(dllimport) +#define TENSOR_ARRAY_EXPORT_API +#define TENSOR_ARRAY_IMPORT_API __declspec(dllimport) #endif #else -#define CUDA_ML_API +#define TENSOR_ARRAY_API #endif #define USING_DATA_TYPE_FLOAT (float)(double) @@ -48,7 +52,8 @@ namespace tensor_array { namespace value { - extern CUDA_ML_API bool use_grad; + bool TENSOR_ARRAY_API is_use_grad(); + void TENSOR_ARRAY_API set_use_grad(bool use_grad); #ifdef TENSOR_CONTENT void* create_mem_101(std::size_t s, const void* dat); @@ -103,7 +108,7 @@ namespace tensor_array * \brief Dynamic derivative tensor. * \brief This class use to calculate the tensor. */ - class CUDA_ML_API Tensor + class TENSOR_ARRAY_API Tensor { public: /** @@ -130,7 +135,7 @@ namespace tensor_array /** * \brief This class can iterate copy child tensor by index and derivate to parent tensor, */ - class CUDA_ML_API Iterator + class TENSOR_ARRAY_API Iterator { public: using iterator_category = std::forward_iterator_tag; @@ -144,8 +149,8 @@ namespace tensor_array Iterator& operator--(); Iterator operator++(int); Iterator operator--(int); - friend bool CUDA_ML_API operator==(const Iterator&, const Iterator&); - friend bool CUDA_ML_API operator!=(const Iterator&, const Iterator&); + friend bool TENSOR_ARRAY_API operator==(const Iterator&, const Iterator&); + friend bool TENSOR_ARRAY_API operator!=(const Iterator&, const Iterator&); private: unsigned long long index; reference_left ref; @@ -191,9 +196,9 @@ namespace tensor_array Tensor transpose(unsigned char, unsigned char) const; std::pair max(unsigned char = 0) const; std::pair min(unsigned char = 0) const; - friend std::pair tensor_broadcasting(const Tensor&, const Tensor&, unsigned char, unsigned char); + friend TENSOR_ARRAY_EXPORT_API std::pair tensor_broadcasting(const Tensor&, const Tensor&, unsigned char, unsigned char); #ifdef TENSOR_CONTENT - friend CUDA_ML_API Tensor add_dim(const std::vector&); + friend TENSOR_ARRAY_API Tensor add_dim(const std::vector&); #endif bool has_tensor() const; template @@ -225,10 +230,10 @@ namespace tensor_array Tensor& operator/=(const Tensor&); - friend CUDA_ML_API Tensor operator>(const Tensor&, const Tensor&); - friend CUDA_ML_API Tensor operator<(const Tensor&, const Tensor&); - friend CUDA_ML_API Tensor operator&&(const Tensor&, const Tensor&); - friend CUDA_ML_API Tensor operator||(const Tensor&, const Tensor&); + friend TENSOR_ARRAY_API Tensor operator>(const Tensor&, const Tensor&); + friend TENSOR_ARRAY_API Tensor operator<(const Tensor&, const Tensor&); + friend TENSOR_ARRAY_API Tensor operator&&(const Tensor&, const Tensor&); + friend TENSOR_ARRAY_API Tensor operator||(const Tensor&, const Tensor&); Tensor operator!(); Tensor exp() const; Tensor sin() const; @@ -244,7 +249,7 @@ namespace tensor_array Tensor log() const; #ifdef TENSOR_CONTENT - friend Tensor tensor_rand(const std::initializer_list&, unsigned int); + friend TENSOR_ARRAY_EXPORT_API Tensor tensor_rand(const std::initializer_list&, unsigned int); friend Tensor add(const Tensor&, const Tensor&, bool); @@ -267,7 +272,7 @@ namespace tensor_array Tensor tensor_cast(const std::type_info&, bool) const; #endif - friend CUDA_ML_API std::ostream& operator<<(std::ostream&, const Tensor&); + friend TENSOR_ARRAY_API std::ostream& operator<<(std::ostream&, const Tensor&); private: #ifdef TENSOR_CONTENT @@ -297,7 +302,7 @@ namespace tensor_array std::shared_ptr tensor_data; }; - class CUDA_ML_API WeakTensor + class TENSOR_ARRAY_API WeakTensor { public: WeakTensor(const Tensor&); @@ -307,13 +312,13 @@ namespace tensor_array std::weak_ptr tensor_data; }; - CUDA_ML_API dimension operator+(const dimension&, const dimension&); + TENSOR_ARRAY_API dimension operator+(const dimension&, const dimension&); - CUDA_ML_API dimension operator-(const dimension&, const dimension&); + TENSOR_ARRAY_API dimension operator-(const dimension&, const dimension&); - CUDA_ML_API dimension operator*(const dimension&, const dimension&); + TENSOR_ARRAY_API dimension operator*(const dimension&, const dimension&); - CUDA_ML_API dimension operator/(const dimension&, const dimension&); + TENSOR_ARRAY_API dimension operator/(const dimension&, const dimension&); /** * \brief Plus 2 n-d tensors. @@ -321,9 +326,9 @@ namespace tensor_array * \return * Tensor */ - CUDA_ML_API Tensor operator+(const Tensor&, const Tensor&); + TENSOR_ARRAY_API Tensor operator+(const Tensor&, const Tensor&); - CUDA_ML_API Tensor operator-(const Tensor&, const Tensor&); + TENSOR_ARRAY_API Tensor operator-(const Tensor&, const Tensor&); /** * \brief Multiply 2 n-d tensors. @@ -331,19 +336,19 @@ namespace tensor_array * \return * Tensor */ - CUDA_ML_API Tensor operator*(const Tensor&, const Tensor&); - - CUDA_ML_API Tensor operator/(const Tensor&, const Tensor&); - CUDA_ML_API Tensor operator!=(const Tensor&, const Tensor&); - CUDA_ML_API Tensor operator==(const Tensor&, const Tensor&); - CUDA_ML_API Tensor operator>=(const Tensor&, const Tensor&); - CUDA_ML_API Tensor operator<=(const Tensor&, const Tensor&); - CUDA_ML_API Tensor tensor_file_load(const char*); - CUDA_ML_API Tensor power(const Tensor&, const Tensor&); - CUDA_ML_API Tensor add(const Tensor&, const Tensor&); - CUDA_ML_API Tensor multiply(const Tensor&, const Tensor&); - CUDA_ML_API Tensor divide(const Tensor&, const Tensor&); - CUDA_ML_API Tensor dot(const Tensor&, const Tensor&); + TENSOR_ARRAY_API Tensor operator*(const Tensor&, const Tensor&); + + TENSOR_ARRAY_API Tensor operator/(const Tensor&, const Tensor&); + TENSOR_ARRAY_API Tensor operator!=(const Tensor&, const Tensor&); + TENSOR_ARRAY_API Tensor operator==(const Tensor&, const Tensor&); + TENSOR_ARRAY_API Tensor operator>=(const Tensor&, const Tensor&); + TENSOR_ARRAY_API Tensor operator<=(const Tensor&, const Tensor&); + TENSOR_ARRAY_API Tensor tensor_file_load(const char*); + TENSOR_ARRAY_API Tensor power(const Tensor&, const Tensor&); + TENSOR_ARRAY_API Tensor add(const Tensor&, const Tensor&); + TENSOR_ARRAY_API Tensor multiply(const Tensor&, const Tensor&); + TENSOR_ARRAY_API Tensor divide(const Tensor&, const Tensor&); + TENSOR_ARRAY_API Tensor dot(const Tensor&, const Tensor&); /** * \brief Matrix multiplication 2 matrices. * \param a Matrix/Tensor that has size (batch*)m*k. @@ -351,8 +356,8 @@ namespace tensor_array * \return Tensor - Matrix that has size (batch*)m*n. * \exception a.col != b.row */ - CUDA_ML_API Tensor matmul(const Tensor&, const Tensor&); - CUDA_ML_API Tensor condition(const Tensor&, const Tensor&, const Tensor&); + TENSOR_ARRAY_API Tensor matmul(const Tensor&, const Tensor&); + TENSOR_ARRAY_API Tensor condition(const Tensor&, const Tensor&, const Tensor&); /** * \brief Convolution * \brief Only suport 1D, 2D, 3D convolution @@ -363,17 +368,17 @@ namespace tensor_array * \return * Tensor (N, K, ...) */ - CUDA_ML_API Tensor convolution(const Tensor&, const Tensor&, const dimension& = value::dimension(), const dimension& = value::dimension()); - CUDA_ML_API std::pair tensor_broadcasting(const Tensor&, const Tensor&, unsigned char = 0, unsigned char = 0); - CUDA_ML_API Tensor tensor_rand(const std::initializer_list&, unsigned int = std::rand()); -#define ADD_CODE(TYPE) CUDA_ML_API Tensor values(const std::initializer_list&, TYPE); + TENSOR_ARRAY_API Tensor convolution(const Tensor&, const Tensor&, const dimension& = value::dimension(), const dimension& = value::dimension()); + TENSOR_ARRAY_IMPORT_API std::pair tensor_broadcasting(const Tensor&, const Tensor&, unsigned char = 0, unsigned char = 0); + TENSOR_ARRAY_IMPORT_API Tensor tensor_rand(const std::initializer_list&, unsigned int = std::rand()); +#define ADD_CODE(TYPE) TENSOR_ARRAY_API Tensor values(const std::initializer_list&, TYPE); LOOP(USING_DATA_TYPE); #undef ADD_CODE #ifndef TENSOR_CONTENT - CUDA_ML_API Tensor add_dim(const std::vector&); + TENSOR_ARRAY_API Tensor add_dim(const std::vector&); #endif - CUDA_ML_API const std::type_info& comparison_type(const std::type_info&, const std::type_info&); - CUDA_ML_API Tensor tensor_rand(const std::vector&, unsigned int = std::rand()); + TENSOR_ARRAY_API const std::type_info& comparison_type(const std::type_info&, const std::type_info&); + TENSOR_ARRAY_API Tensor tensor_rand(const std::vector&, unsigned int = std::rand()); #ifdef TENSOR_CONTENT class Derivation @@ -451,4 +456,4 @@ struct std::equal_to #undef USING_DATA_TYPE_SINT #undef USING_DATA_TYPE_UINT -#undef CUDA_ML_API \ No newline at end of file +#undef TENSOR_ARRAY_API \ No newline at end of file diff --git a/src/tensor-array/core/tensorbase.hh b/src/tensor-array/core/tensorbase.hh index 0c0e863..0474afa 100644 --- a/src/tensor-array/core/tensorbase.hh +++ b/src/tensor-array/core/tensorbase.hh @@ -23,14 +23,14 @@ limitations under the License. #include "initializer_wrapper.hh" #pragma once -#ifdef __WIN32__ -#ifdef CUDA_ML_EXPORTS -#define CUDA_ML_API __declspec(dllexport) +#ifdef _WIN32 +#ifdef TENSOR_ARRAY_EXPORTS +#define TENSOR_ARRAY_API __declspec(dllexport) #else -#define CUDA_ML_API __declspec(dllimport) +#define TENSOR_ARRAY_API __declspec(dllimport) #endif #else -#define CUDA_ML_API +#define TENSOR_ARRAY_API #endif namespace tensor_array @@ -40,7 +40,7 @@ namespace tensor_array /** * \brief This class look like std::any but it tensor. */ - class CUDA_ML_API TensorBase + class TENSOR_ARRAY_API TensorBase { private: struct TensorStorage @@ -213,4 +213,4 @@ namespace tensor_array } } -#undef CUDA_ML_API \ No newline at end of file +#undef TENSOR_ARRAY_API \ No newline at end of file diff --git a/src/tensor-array/layers/attention.hh b/src/tensor-array/layers/attention.hh index 8cd7480..0c3b4c3 100644 --- a/src/tensor-array/layers/attention.hh +++ b/src/tensor-array/layers/attention.hh @@ -23,9 +23,9 @@ namespace tensor_array { namespace layers { - value::Tensor CUDA_ML_API scaled_dot_product_attention(const value::Tensor&, const value::Tensor&, const value::Tensor&, const value::Tensor& = value::Tensor()); + value::Tensor TENSOR_ARRAY_API scaled_dot_product_attention(const value::Tensor&, const value::Tensor&, const value::Tensor&, const value::Tensor& = value::Tensor()); - class CUDA_ML_API MultiHeadAttentionImpl final : + class TENSOR_ARRAY_API MultiHeadAttentionImpl final : public LayerImpl { private: diff --git a/src/tensor-array/layers/convolution.hh b/src/tensor-array/layers/convolution.hh index 644f1a7..6da4845 100644 --- a/src/tensor-array/layers/convolution.hh +++ b/src/tensor-array/layers/convolution.hh @@ -22,7 +22,7 @@ namespace tensor_array { namespace layers { - class CUDA_ML_API ConvolutionLayerImpl : + class TENSOR_ARRAY_API ConvolutionLayerImpl : public TensorCalculateLayerImpl { protected: @@ -39,7 +39,7 @@ namespace tensor_array value::Tensor calculate(const value::Tensor&) override final; }; - class CUDA_ML_API Conv1D_Impl final : + class TENSOR_ARRAY_API Conv1D_Impl final : public ConvolutionLayerImpl { public: @@ -49,7 +49,7 @@ namespace tensor_array using Conv1D = LayerHolder; - class CUDA_ML_API Conv2D_Impl final : + class TENSOR_ARRAY_API Conv2D_Impl final : public ConvolutionLayerImpl { public: @@ -59,7 +59,7 @@ namespace tensor_array using Conv2D = LayerHolder; - class CUDA_ML_API Conv3D_Impl final : + class TENSOR_ARRAY_API Conv3D_Impl final : public ConvolutionLayerImpl { public: diff --git a/src/tensor-array/layers/layer_impl.hh b/src/tensor-array/layers/layer_impl.hh index 2d03419..f361bc2 100644 --- a/src/tensor-array/layers/layer_impl.hh +++ b/src/tensor-array/layers/layer_impl.hh @@ -20,21 +20,21 @@ limitations under the License. #include #pragma once -#ifdef __WIN32__ -#ifdef CUDA_ML_EXPORTS -#define CUDA_ML_API __declspec(dllexport) +#ifdef _WIN32 +#ifdef TENSOR_ARRAY_EXPORTS +#define TENSOR_ARRAY_API __declspec(dllexport) #else -#define CUDA_ML_API __declspec(dllimport) +#define TENSOR_ARRAY_API __declspec(dllimport) #endif #else -#define CUDA_ML_API +#define TENSOR_ARRAY_API #endif namespace tensor_array { namespace layers { - class CUDA_ML_API LayerImpl + class TENSOR_ARRAY_API LayerImpl { private: bool is_running = false; diff --git a/src/tensor-array/layers/layer_utility.hh b/src/tensor-array/layers/layer_utility.hh index a0966d9..f8bcbb0 100644 --- a/src/tensor-array/layers/layer_utility.hh +++ b/src/tensor-array/layers/layer_utility.hh @@ -24,7 +24,7 @@ namespace tensor_array { typedef value::Tensor(*LayerFunction)(const value::Tensor&); - class CUDA_ML_API ActivationImpl final : + class TENSOR_ARRAY_API ActivationImpl final : public TensorCalculateLayerImpl { public: @@ -34,7 +34,7 @@ namespace tensor_array const LayerFunction func; }; - class CUDA_ML_API ReShapeImpl final : + class TENSOR_ARRAY_API ReShapeImpl final : public TensorCalculateLayerImpl { public: @@ -44,11 +44,11 @@ namespace tensor_array const std::vector shape; }; - value::Tensor CUDA_ML_API NoActivation(const value::Tensor&); - value::Tensor CUDA_ML_API ReLU(const value::Tensor&); - value::Tensor CUDA_ML_API tanh(const value::Tensor&); - value::Tensor CUDA_ML_API Sigmoid(const value::Tensor&); - value::Tensor CUDA_ML_API SoftMax(const value::Tensor&, unsigned char dim); + value::Tensor TENSOR_ARRAY_API NoActivation(const value::Tensor&); + value::Tensor TENSOR_ARRAY_API ReLU(const value::Tensor&); + value::Tensor TENSOR_ARRAY_API tanh(const value::Tensor&); + value::Tensor TENSOR_ARRAY_API Sigmoid(const value::Tensor&); + value::Tensor TENSOR_ARRAY_API SoftMax(const value::Tensor&, unsigned char dim); using Activation = LayerHolder; using ReShape = LayerHolder; diff --git a/src/tensor-array/layers/linear.hh b/src/tensor-array/layers/linear.hh index 6d87fdd..e72f07e 100644 --- a/src/tensor-array/layers/linear.hh +++ b/src/tensor-array/layers/linear.hh @@ -21,7 +21,7 @@ namespace tensor_array { namespace layers { - class CUDA_ML_API LinearImpl final : + class TENSOR_ARRAY_API LinearImpl final : public TensorCalculateLayerImpl { private: diff --git a/src/tensor-array/layers/normalization.cc b/src/tensor-array/layers/normalization.cc index 0ad3870..f67a4fa 100644 --- a/src/tensor-array/layers/normalization.cc +++ b/src/tensor-array/layers/normalization.cc @@ -40,7 +40,7 @@ namespace tensor_array value::Tensor NormalizationImpl::calculate(const value::Tensor& input) { value::Tensor normal; - if (tensor_array::value::use_grad) + if (tensor_array::value::is_use_grad()) { value::Tensor temp_mean = input.mean(this->dims_mean); value::Tensor temp_variance = input.variance(this->dims_mean); diff --git a/src/tensor-array/layers/normalization.hh b/src/tensor-array/layers/normalization.hh index 1afa8b6..b93501f 100644 --- a/src/tensor-array/layers/normalization.hh +++ b/src/tensor-array/layers/normalization.hh @@ -21,7 +21,7 @@ namespace tensor_array { namespace layers { - class CUDA_ML_API NormalizationImpl final : + class TENSOR_ARRAY_API NormalizationImpl final : public TensorCalculateLayerImpl { private: diff --git a/src/tensor-array/layers/recurrent.hh b/src/tensor-array/layers/recurrent.hh index 1041e92..df8b2f1 100644 --- a/src/tensor-array/layers/recurrent.hh +++ b/src/tensor-array/layers/recurrent.hh @@ -22,7 +22,7 @@ namespace tensor_array { namespace layers { - class CUDA_ML_API RecurrentImpl : + class TENSOR_ARRAY_API RecurrentImpl : public TensorCalculateLayerImpl { private: @@ -41,7 +41,7 @@ namespace tensor_array }; using Recurrent = LayerHolder; - class CUDA_ML_API LSTM_Impl : + class TENSOR_ARRAY_API LSTM_Impl : public TensorCalculateLayerImpl { private: diff --git a/src/tensor-array/layers/sequential.hh b/src/tensor-array/layers/sequential.hh index db7854a..abf8608 100644 --- a/src/tensor-array/layers/sequential.hh +++ b/src/tensor-array/layers/sequential.hh @@ -23,7 +23,7 @@ namespace tensor_array { using LayerInSequential = LayerHolder; - class CUDA_ML_API SequentialImpl final : + class TENSOR_ARRAY_API SequentialImpl final : public TensorCalculateLayerImpl { private: @@ -37,7 +37,7 @@ namespace tensor_array value::Tensor calculate(const value::Tensor&) override; }; - class CUDA_ML_API Sequential : public LayerHolder + class TENSOR_ARRAY_API Sequential : public LayerHolder { public: Sequential() = default; diff --git a/src/tensor-array/layers/transformer.hh b/src/tensor-array/layers/transformer.hh index c4bac31..8352fc6 100644 --- a/src/tensor-array/layers/transformer.hh +++ b/src/tensor-array/layers/transformer.hh @@ -21,7 +21,7 @@ namespace tensor_array { namespace layers { - class CUDA_ML_API TransformerEncoderImpl final : + class TENSOR_ARRAY_API TransformerEncoderImpl final : public TensorCalculateLayerImpl { private: @@ -37,7 +37,7 @@ namespace tensor_array using TransformerEncoder = LayerHolder; - class CUDA_ML_API TransformerDecoderImpl final : + class TENSOR_ARRAY_API TransformerDecoderImpl final : public LayerImpl { private: From 88c78f34f277044af9a4e2909b9773fcb01e9c45 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 18 Jul 2025 16:13:44 +0700 Subject: [PATCH 144/281] Update tensor.hh --- src/tensor-array/core/tensor.hh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tensor-array/core/tensor.hh b/src/tensor-array/core/tensor.hh index 5654887..b3e6f1e 100644 --- a/src/tensor-array/core/tensor.hh +++ b/src/tensor-array/core/tensor.hh @@ -32,6 +32,8 @@ limitations under the License. #endif #else #define TENSOR_ARRAY_API +#define TENSOR_ARRAY_EXPORT_API +#define TENSOR_ARRAY_IMPORT_API #endif #define USING_DATA_TYPE_FLOAT (float)(double) From f11a6ba44d6d3ef1271e051a2d4a010827422ea4 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sat, 19 Jul 2025 21:39:35 +0700 Subject: [PATCH 145/281] Temporary add interpreter Not linking with CMake right now. --- .vscode/settings.json | 5 +- src/tensor-array/interp/CMakeLists.txt | 32 ++++ src/tensor-array/interp/glob_stack.cc | 23 +++ src/tensor-array/interp/glob_stack.h | 17 ++ src/tensor-array/interp/main.c | 101 ++++++++++ src/tensor-array/interp/open_file.c | 57 ++++++ src/tensor-array/interp/open_file.h | 8 + src/tensor-array/interp/option.c | 16 ++ src/tensor-array/interp/option.h | 2 + src/tensor-array/interp/parser.c | 183 ++++++++++++++++++ src/tensor-array/interp/parser.h | 1 + src/tensor-array/interp/token.c | 245 +++++++++++++++++++++++++ src/tensor-array/interp/token.h | 21 +++ src/tensor-array/interp/vm.c | 193 +++++++++++++++++++ src/tensor-array/interp/vm.h | 8 + src/tensor-array/interp/vmop.cc | 234 +++++++++++++++++++++++ src/tensor-array/interp/vmop.h | 37 ++++ 17 files changed, 1182 insertions(+), 1 deletion(-) create mode 100644 src/tensor-array/interp/CMakeLists.txt create mode 100644 src/tensor-array/interp/glob_stack.cc create mode 100644 src/tensor-array/interp/glob_stack.h create mode 100644 src/tensor-array/interp/main.c create mode 100644 src/tensor-array/interp/open_file.c create mode 100644 src/tensor-array/interp/open_file.h create mode 100644 src/tensor-array/interp/option.c create mode 100644 src/tensor-array/interp/option.h create mode 100644 src/tensor-array/interp/parser.c create mode 100644 src/tensor-array/interp/parser.h create mode 100644 src/tensor-array/interp/token.c create mode 100644 src/tensor-array/interp/token.h create mode 100644 src/tensor-array/interp/vm.c create mode 100644 src/tensor-array/interp/vm.h create mode 100644 src/tensor-array/interp/vmop.cc create mode 100644 src/tensor-array/interp/vmop.h diff --git a/.vscode/settings.json b/.vscode/settings.json index a838ff0..d6e1596 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -60,6 +60,9 @@ "streambuf": "cpp", "thread": "cpp", "typeindex": "cpp", - "cassert": "cpp" + "cassert": "cpp", + "stack": "cpp", + "token.h": "c", + "glob_stack.h": "c" } } \ No newline at end of file diff --git a/src/tensor-array/interp/CMakeLists.txt b/src/tensor-array/interp/CMakeLists.txt new file mode 100644 index 0000000..cada9e8 --- /dev/null +++ b/src/tensor-array/interp/CMakeLists.txt @@ -0,0 +1,32 @@ +cmake_minimum_required(VERSION 3.18) + +file(GLOB TensorArray_src "*.cc" "*.c") +file(GLOB TensorArray_inc "*.hh" "*.h") + +install( + FILES ${TensorArray_inc} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/interp + COMPONENT headers) + +add_executable(tensorarray_interp SHARED ${TensorArray_src}) + +target_include_directories(tensorarray_interp PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_link_libraries(tensorarray_interp TensorArray::Core) + +set_property(TARGET tensorarray_interp PROPERTY C_STANDARD 11) +set_property(TARGET tensorarray_interp PROPERTY C_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_interp PROPERTY C_EXTENSIONS OFF) + +set_property(TARGET tensorarray_interp PROPERTY CXX_STANDARD 17) +set_property(TARGET tensorarray_interp PROPERTY CXX_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_interp PROPERTY CXX_EXTENSIONS OFF) + +install( + TARGETS tensorarray_interp + EXPORT TensorArrayTargets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + COMPONENT Runtime + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array + COMPONENT Runtime + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/interp + COMPONENT Development) diff --git a/src/tensor-array/interp/glob_stack.cc b/src/tensor-array/interp/glob_stack.cc new file mode 100644 index 0000000..1147045 --- /dev/null +++ b/src/tensor-array/interp/glob_stack.cc @@ -0,0 +1,23 @@ +#include +#include +#include +#include "glob_stack.h" + +std::map data_map; + + +void glob_data_set(char* name, glob_data_t item) +{ + data_map[name] = item; +} + +glob_data_t glob_data_get(char* name) +{ + return data_map[name]; +} + +int glob_data_find(char* name) +{ + return data_map.find(name) != data_map.end(); +} + diff --git a/src/tensor-array/interp/glob_stack.h b/src/tensor-array/interp/glob_stack.h new file mode 100644 index 0000000..7d45ed4 --- /dev/null +++ b/src/tensor-array/interp/glob_stack.h @@ -0,0 +1,17 @@ +#ifdef __cplusplus +extern "C" +{ +#endif + typedef struct + { + long tkn; + long hash; + long cls; + void* data; // Pointer to additional data if needed + } glob_data_t; + void glob_data_set(char*, glob_data_t); + glob_data_t glob_stack_get(char*); + int glob_stack_find(char*); +#ifdef __cplusplus +} +#endif diff --git a/src/tensor-array/interp/main.c b/src/tensor-array/interp/main.c new file mode 100644 index 0000000..31b6354 --- /dev/null +++ b/src/tensor-array/interp/main.c @@ -0,0 +1,101 @@ +#include +#include +#include +#include "option.h" +#include "open_file.h" +#include "parser.h" + + +void initialize(int argc, char *argv[]) +{ + int i, fd; + while (argc <= 0) + { + char *argv_opt = ""; + size_t poolsize = 1024; // Default pool size + switch (argv_opt[0]) + { + case '-': + switch (argv_opt[1]) + { + case 'h': + help(); + return; + case 'v': + version(); + return; + case 'f': + if (argc < 2) + { + fprintf(stderr, "Error: No file specified after -f option\n"); + exit(1); + return; + } + open_file(argv[1]); + argc--; + argv++; + return; + case '-': + if (strcmp(argv_opt, "--help") == 0) + { + help(); + return; + } + else if (strcmp(argv_opt, "--version") == 0) + { + version(); + return; + } + else if (strcmp(argv_opt, "--poolsize") == 0) + { + if (argc < 2) + { + fprintf(stderr, "Error: No pool size specified after --poolsize option\n"); + exit(1); + return; + } + poolsize = atoi(argv[1]); + if (poolsize <= 0) + { + fprintf(stderr, "Error: Invalid pool size specified\n"); + exit(1); + return; + } + argc--; + argv++; + } + else if (strcmp(argv_opt, "--file") == 0) + { + if (argc < 2) + { + fprintf(stderr, "Error: No file specified after --file option\n"); + exit(1); + return; + } + open_file(argv[1], poolsize); + argc--; + argv++; + } + return; + default: + open_file(argv[0], poolsize); + return; + } + break; + default: + break; + } + argc--; + argv++; + } + +} + +int main(int argc, char *argv[]) +{ + initialize(argc-1, argv+1); + program(); + return 0; +} +// Future implementations may include command-line argument parsing, initialization of the TensorArray library, +// and other necessary setup for the interp functionality. diff --git a/src/tensor-array/interp/open_file.c b/src/tensor-array/interp/open_file.c new file mode 100644 index 0000000..51446e3 --- /dev/null +++ b/src/tensor-array/interp/open_file.c @@ -0,0 +1,57 @@ + +#include +#include +#include + +char *src, *text = NULL; +size_t poolsize = 1024; // Default pool size + +void interp_malloc() +{ + src = malloc(poolsize); + if (src == NULL) + { + fprintf(stderr, "Error: Could not allocate memory for interpreter\n"); + exit(1); + } + text = malloc(poolsize); + if (text == NULL) + { + fprintf(stderr, "Error: Could not allocate memory for interpreter text\n"); + free(src); + exit(1); + } +} + +void interp_memreset() +{ + memset(text, 0, poolsize); + memset(src, 0, poolsize); +} + +void interp_free() +{ + free(text); + free(src); +} + +void read_file(const char* filename) +{ + FILE* fptr = fopen(filename, "r"); + if (fptr == NULL) + { + fprintf(stderr, "Error: Could not open file %s\n", filename); + exit(1); + } + + int i; + interp_malloc(); + i = fread(src, poolsize, 1, fptr); + if (i < 0) + { + fprintf(stderr, "Error: Could not read file %s\n", filename); + fclose(fptr); + exit(1); + } + return 0; // Return 0 on success +} diff --git a/src/tensor-array/interp/open_file.h b/src/tensor-array/interp/open_file.h new file mode 100644 index 0000000..38981e6 --- /dev/null +++ b/src/tensor-array/interp/open_file.h @@ -0,0 +1,8 @@ +extern char *src, *text; +extern size_t poolsize; + +void interp_malloc(); +void interp_memreset(); +void interp_free(); +void read_file(const char*); + diff --git a/src/tensor-array/interp/option.c b/src/tensor-array/interp/option.c new file mode 100644 index 0000000..e7fd5f9 --- /dev/null +++ b/src/tensor-array/interp/option.c @@ -0,0 +1,16 @@ +#include + +void help() +{ + printf("Usage: tensor-array [options]\n"); + printf("Options:\n"); + printf(" -h, --help Show this help message\n"); + printf(" -v, --version Show version information\n"); + printf(" --poolsize SIZE Set the pool size (default: 1024)\n"); + printf(" -f, --file FILE Open the specified file\n"); +} + +void version() +{ + printf("Tensor Array Interpreter Version 0.1.0\n"); +} \ No newline at end of file diff --git a/src/tensor-array/interp/option.h b/src/tensor-array/interp/option.h new file mode 100644 index 0000000..b246448 --- /dev/null +++ b/src/tensor-array/interp/option.h @@ -0,0 +1,2 @@ +void help(); +void version(); diff --git a/src/tensor-array/interp/parser.c b/src/tensor-array/interp/parser.c new file mode 100644 index 0000000..21da098 --- /dev/null +++ b/src/tensor-array/interp/parser.c @@ -0,0 +1,183 @@ +#include +#include +#include +#include +#include +#include +#include "open_file.h" +#include "parser.h" +#include "token.h" +#include "vm.h" + +void emit(int size, ...) +{ + va_list args; + va_start(args, size); + + // Process the variable arguments as needed + for (int i = 0; i < size; ++i) { + ++text; + *text = va_arg(args, long); + } + + va_end(args); +} + +void match(long tk) +{ + if (tkn == tk) { + token_next(); // Move to the next token + } else { + if (tk < 0x80) { + fprintf(stderr, "Error: Expected token %ld but found %ld\n", tk, tkn); + } else { + char* tn = tknname[tk - 0x80]; + fprintf(stderr, "Error: Expected token %s but found %s\n", tn, tkn); + } + exit(1); + } +} + +void program() +{ + while (1) + { + // This is a placeholder for the main program loop + // You would typically call emit or other functions here based on your program logic + // Add your program logic here + interp_malloc(); + char *isrc = src; + char *itext = text; + interp_memreset(); + printf(">>> "); + read(0, src, poolsize-1); // Read input from stdin + token_next(); + statement(); + emit(1, EXIT); // Emit a token with value 0 to indicate end of processing + eval(); + puts(""); + free(itext); + free(isrc); + } +} + +void expression(int level) +{ + void* temp = NULL; // Temporary variable to hold intermediate values + int isArrRef = 0; // Flag to check if we are dealing with an array reference + // This function would handle parsing and evaluating expressions + // For now, it is a placeholder + // You can implement your logic here + switch (tkn) + { + case TOKEN_NUM: + /* code */ + emit(1, IMM); + match(TOKEN_NUM); + break; + case TOKEN_ID: + /* code */ + emit(1, GET); + match(TOKEN_ID); + break; + case '"': + { + match('"'); // Match the opening quote + } + break; + case '[': + if (temp == NULL) + { + *text = PUSH; // Push the current value onto the stack + match('['); // Match the opening bracket + expression(TOKEN_ASSIGN); // Parse the expression inside the brackets + emit(1, GETELEM); // Emit get element instruction + match(']'); // Match the closing bracket + } + break; + default: + break; + } + + while (tkn >= level) + { + switch (tkn) + { + case TOKEN_ASSIGN: + if (*text != GET && *text != GETELEM) + { + fprintf(stderr, "Error: Assignment without a variable\n"); + exit(1); + } + *text = PUSH; // Push the current value onto the stack + match(TOKEN_ASSIGN); + expression(TOKEN_ASSIGN); // Parse the right-hand side expression + if (isArrRef) emit(1, SETELEM); // Emit set element instruction if it's an array reference + else emit(1, SET); // Emit set instruction + break; + case TOKEN_ADD: + emit(1, PUSH); + match(TOKEN_ADD); + expression(TOKEN_MUL); // Parse the right-hand side expression + emit(1, ADD); // Emit add instruction + break; + case TOKEN_SUB: + emit(1, PUSH); + match(TOKEN_SUB); + expression(TOKEN_MUL); // Parse the right-hand side expression + emit(1, SUB); // Emit subtract instruction + break; + default: + fprintf(stderr, "Error: Unrecognized token in expression\n"); + exit(1); + } + } + +} + +void statement() +{ + // This function would handle parsing and executing statements + // For now, it is a placeholder + // You can implement your logic here + switch (tkn) + { + case TOKEN_IF: + { + match(TOKEN_IF); + match('('); + expression(TOKEN_ASSIGN); // Parse the condition expression + match(')'); + emit(1, JZ); // Emit jump if zero instruction + long *b = ++text; // Placeholder for jump address + statement(); // Parse the statement inside the if block + if (tkn == TOKEN_ELSE) + { + match(TOKEN_ELSE); + emit(1, JMP); // Emit jump instruction + *b = text + 2; // Set the jump address to the next instruction + statement(); // Parse the else block + } + *b = text + 1; // Set the jump address to the next instruction + } + break; + case TOKEN_WHILE: + { + long *a = NULL; // Placeholder for jump address + long *b = text+1; // Placeholder for jump address + match(TOKEN_WHILE); + match('('); + expression(TOKEN_ASSIGN); // Parse the condition expression + match(')'); + emit(1, JZ); // Emit jump if zero instruction + a=++text; // Set the jump address to the start of the while block + statement(); // Parse the statement inside the while block + emit(1, JMP); // Emit jump instruction to loop back + emit(1, b); // Emit the address to jump back to + *a = text + 1; // Set the jump address to the next instruction + } + break; + default: + break; + } +} diff --git a/src/tensor-array/interp/parser.h b/src/tensor-array/interp/parser.h new file mode 100644 index 0000000..5e48ba6 --- /dev/null +++ b/src/tensor-array/interp/parser.h @@ -0,0 +1 @@ +void program(); diff --git a/src/tensor-array/interp/token.c b/src/tensor-array/interp/token.c new file mode 100644 index 0000000..ec2212b --- /dev/null +++ b/src/tensor-array/interp/token.c @@ -0,0 +1,245 @@ +#include +#include +#include +#include "glob_stack.h" +#include "open_file.h" +#include "token.h" + +long tkn = 0; +long token_val = 0; // Variable to hold the value of the current token + +void token_next() +{ + glob_data_t token_item; + while ((tkn = *src++) != '\0') + { + switch (tkn) + { + case ' ': + case '\t': + /* code */ + break; + case '#': + /* code */ + while (*src != '\n' && *src != '\0') src++; + break; + case '"': + case '\'': + { + char* last_pos = src - 1; + while (*src != tkn && *src != '\0') + { + src++; + } + token_val = last_pos; // Store the start of the string literal + } + return; // Exit after processing the string literal + case '/': + switch (src[0]) + { + case '/': + /* code */ + while (*src != '\n' && *src != '\0') src++; + break; + case '*': + /* code */ + src++; + while (*src != '\0' && !(src[0] == '*' && src[1] == '/')) src++; + if (*src == '\0') { + fprintf(stderr, "Error: Unmatched comment block\n"); + exit(1); + } + src += 2; // Skip past the closing */ + break; + case '=': + src++; + tkn = TOKEN_DIV; // Store the token value + return; // Exit after processing the division operator + default: + tkn = TOKEN_DIV; // Store the token value + return; // Exit after processing the division operator + } + + case '*': + if (*src == '=') + { + src++; + tkn = TOKEN_MUL; // Store the token value + return; // Exit after processing the token + } + else + { + tkn = TOKEN_MUL; // Store the token value + return; // Exit after processing the token + } + case '+': + if (*src == '=') + { + src++; + tkn = TOKEN_ADD; // Store the token value + return; // Exit after processing the token + } + else + { + tkn = TOKEN_ADD; // Store the token value + return; // Exit after processing the token + } + case '-': + if (*src == '=') + { + src++; + tkn = TOKEN_SUB; // Store the token value + return; // Exit after processing the token + } + else + { + tkn = TOKEN_SUB; // Store the token value + return; // Exit after processing the token + } + case '=': + if (*src == '=') + { + src++; + tkn = TOKEN_EQ; // Store the token value + return; // Exit after processing the token + } + else + { + tkn = TOKEN_ASSIGN; // Store the token value + return; // Exit after processing the token + } + case '!': + if (*src == '=') + { + src++; + tkn = TOKEN_NE; // Store the token value + return; // Exit after processing the token + } + else + { + tkn = TOKEN_NOT; // Store the token value + return; // Exit after processing the token + } + case '<': + if (*src == '=') + { + src++; + tkn = TOKEN_LE; // Store the token value + return; // Exit after processing the token + } + else if (*src == '<') + { + src++; + tkn = TOKEN_SHL; // Store the token value + return; // Exit after processing the token + } + else + { + tkn = TOKEN_LT; // Store the token value + return; // Exit after processing the token + } + case '>': + if (*src == '=') + { + src++; + tkn = TOKEN_GE; // Store the token value + return; // Exit after processing the token + } + else if (*src == '>') + { + src++; + tkn = TOKEN_SHR; // Store the token value + return; // Exit after processing the token + } + else + { + tkn = TOKEN_GT; // Store the token value + return; // Exit after processing the token + } + case '&': + if (*src == '&') + { + src++; + tkn = TOKEN_AND; // Store the token value + return; // Exit after processing the token + } + else + { + fprintf(stderr, "Error: Unrecognized token '&'\n"); + exit(1); + } + case '|': + if (*src == '|') + { + src++; + tkn = TOKEN_LOR; // Store the token value + return; // Exit after processing the token + } + else + { + fprintf(stderr, "Error: Unrecognized token '|'\n"); + exit(1); + } + case '@': + src++; + tkn = TOKEN_MATMUL; // Store the token value + return; // Exit after processing the token + default: + if (tkn >= '0' && tkn <= '9') + { + if (tkn == '0' && (*src == 'x' || *src == 'X')) + { + src++; + while ((*src >= '0' && *src <= '9') || (*src >= 'a' && *src <= 'f') || (*src >= 'A' && *src <= 'F')) + { + token_val = (token_val << 4) + (*src >= '0' && *src <= '9' ? *src - '0' : (*src >= 'a' && *src <= 'f' ? *src - 'a' + 10 : *src - 'A' + 10)); + src++; + } + /* code to handle hexadecimal number */ + } + else + { + src--; + while (*src >= '0' && *src <= '9') + { + token_val = (token_val * 10) + (*src - '0'); + src++; + } + /* code to handle decimal number */ + } + tkn = TOKEN_NUM; // Set the token type + return; // Exit after processing the number + } + else if ((tkn >= 'a' && tkn <= 'z') || (tkn >= 'A' && tkn <= 'Z') || tkn == '_') + { + char* last_pos = src - 1; + long hash = tkn; + while ((*src >= '0' && *src <= '9') || (*src >= 'a' && *src <= 'z') || (*src >= 'A' && *src <= 'Z') || *src == '_') + { + hash = (hash * 0x40) + *src; + src++; + } + if (glob_stack_find(last_pos)) + { + /* code to handle existing identifier */ + tkn = glob_stack_get(last_pos).tkn; // Set the token type from the existing identifier + return; // Exit after processing the existing identifier + } + /* code to handle identifiers */ + glob_data_t item; + item.hash = hash; + item.data = NULL; // Initialize data pointer if needed + + tkn = item.tkn = TOKEN_ID; // Set the token type + glob_stack_set(last_pos, item); + return; // Exit after processing the identifier + } + else + { + /* code to handle other tokens */ + } + break; + } + } + +} \ No newline at end of file diff --git a/src/tensor-array/interp/token.h b/src/tensor-array/interp/token.h new file mode 100644 index 0000000..295726d --- /dev/null +++ b/src/tensor-array/interp/token.h @@ -0,0 +1,21 @@ +typedef enum +{ + TOKEN_NUM = 0x80, TOKEN_SYS, TOKEN_GLO, TOKEN_LOC, TOKEN_ID, + TOKEN_FUNC, TOKEN_ELSE, TOKEN_ENUM, TOKEN_IF, TOKEN_RETURN, TOKEN_SIZEOF, + TOKEN_WHILE, TOKEN_ASSIGN, TOKEN_COND, TOKEN_LOR, TOKEN_LAN, + TOKEN_OR, TOKEN_XOR, TOKEN_AND, TOKEN_SHL, TOKEN_SHR, + TOKEN_EQ, TOKEN_NE, TOKEN_LT, TOKEN_GT, TOKEN_LE, TOKEN_GE, + TOKEN_ADD, TOKEN_SUB, TOKEN_MUL, TOKEN_DIV, TOKEN_MATMUL, TOKEN_POS, TOKEN_NEG, TOKEN_NOT +} TOKEN_TYPE; + +char* tknname[] = { + "num", "sys", "glo", "loc", "id", + "func", "else", "enum", "if", "return", "sizeof", + "while", "assign", "cond", "lor", "lan", + "or", "xor", "and", + "eq", "ne", "lt", "gt", "le", "ge", + "add", "sub", "mul", "div", "matmul", "pos", "neg", "not", "brak" +}; + +void token_next(); +extern long tkn = 0; diff --git a/src/tensor-array/interp/vm.c b/src/tensor-array/interp/vm.c new file mode 100644 index 0000000..e3d7a33 --- /dev/null +++ b/src/tensor-array/interp/vm.c @@ -0,0 +1,193 @@ +#include +#include +#include "vmop.h" +#include "vm.h" + +VM_INSTRUCTION* orig; +void** pc; +void* any_value; + +void eval() +{ + VM_INSTRUCTION op; + pc = orig; + while (1) + { + + /* code */ + op = *pc++; + switch (op) + { + case LEA: + // Load effective address + break; + case IMM: + // Immediate value + any_value = *pc++; + break; + case JMP: + // Jump to address + pc = *pc; + break; + case CALL: + // Function call + break; + case JZ: + // Jump if zero + pc = (any_value) ? pc + 1 : *pc; + break; + case JNZ: + pc = (any_value) ? *pc : pc + 1; + // Jump if not zero + break; + case ENT: + // Enter function + break; + case ADJ: + // Adjust stack pointer + break; + case LEV: + // Leave function + break; + case RET: + // Return from function + return; + case LI: + // Load integer + break; + case LC: + // Load character + break; + case SI: + // Store integer + break; + case SC: + // Store character + break; + case SET: + // Set value + op_set(); + break; + case GET: + // Get value + op_get(); + break; + case PUSH: + // Push value onto stack + op_push(); + break; + case GETELEM: + // Get element from array + break; + case SETELEM: + // Set element in array + break; + case ADDELEM: + // Add element to array + break; + case OR: + // Logical OR operation + op_or(); + break; + case XOR: + // Logical XOR operation + break; + case AND: + // Logical AND operation + op_and(); + break; + case EQ: + // Equality check + op_eq(); + break; + case NE: + // Not equal check + op_ne(); + break; + case LT: + // Less than check + op_lt(); + break; + case GT: + // Greater than check + op_gt(); + break; + case LE: + // Less than or equal check + op_le(); + break; + case GE: + // Greater than or equal check + op_ge(); + break; + case ADD: + // Addition operation + op_add(); + break; + case SUB: + // Subtraction operation + op_sub(); + break; + case MUL: + // Multiplication operation + op_mul(); + break; + case DIV: + // Division operation + op_div(); + break; + case MATMUL: + // Matrix multiplication operation + op_matmul(); + break; + case POS: + // Unary plus operation + op_pos(); + break; + case NEG: + // Unary minus operation + op_neg(); + break; + case NOT: + // Logical NOT operation + op_not(); + break; + case SHL: + // Shift left operation + break; + case SHR: + // Shift right operation + break; + case OPEN: + // Open file operation + op_open(); + break; + case READ: + // Read from file operation + break; + case CLOSE: + // Close file operation + break; + case PRTF: + // Print formatted output + break; + case MALC: + // Memory allocation operation + break; + case MSET: + // Memory set operation + break; + case MCMP: + // Memory compare operation + break; + case EXIT: + // Exit operation + op_exit(); + return; + default: + fprintf(stderr, "Unknown instruction: %d\n", op); + exit(1); + } + } + +} diff --git a/src/tensor-array/interp/vm.h b/src/tensor-array/interp/vm.h new file mode 100644 index 0000000..86a3220 --- /dev/null +++ b/src/tensor-array/interp/vm.h @@ -0,0 +1,8 @@ +typedef enum +{ + LEA, IMM, JMP, CALL, JZ, JNZ, ENT, ADJ, LEV, RET, LI, LC, SI, SC, SET, GET, PUSH, GETELEM, SETELEM, ADDELEM, + OR, XOR, AND, EQ, NE, LT, GT, LE, GE, ADD, SUB, MUL, DIV, MATMUL, POS, NEG, NOT, SHL, SHR, + OPEN, READ, CLOSE, PRTF, MALC, MSET, MCMP, EXIT +} VM_INSTRUCTION; + +extern void* any_value; diff --git a/src/tensor-array/interp/vmop.cc b/src/tensor-array/interp/vmop.cc new file mode 100644 index 0000000..1ac9b04 --- /dev/null +++ b/src/tensor-array/interp/vmop.cc @@ -0,0 +1,234 @@ +#include +#include +#include + +std::stack tensor_stack; +tensor_array::value::Tensor ag; + +void op_add() +{ + if (tensor_stack.empty()) + { + throw std::runtime_error("Tensor stack is empty"); + } + ag += tensor_stack.top(); + tensor_stack.pop(); +} + +void op_sub() +{ + if (tensor_stack.empty()) + { + throw std::runtime_error("Tensor stack is empty"); + } + ag -= tensor_stack.top(); + tensor_stack.pop(); +} + +void op_mul() +{ + if (tensor_stack.empty()) + { + throw std::runtime_error("Tensor stack is empty"); + } + ag *= tensor_stack.top(); + tensor_stack.pop(); +} + +void op_div() +{ + if (tensor_stack.empty()) + { + throw std::runtime_error("Tensor stack is empty"); + } + ag /= tensor_stack.top(); + tensor_stack.pop(); +} + +void op_matmul() +{ + if (tensor_stack.empty()) + { + throw std::runtime_error("Tensor stack is empty"); + } + ag = tensor_array::value::matmul(ag, tensor_stack.top()); + tensor_stack.pop(); +} + +void op_pos() +{ + ag = +ag; +} + +void op_neg() +{ + ag = -ag; +} + +void op_and() +{ + ag = ag && tensor_stack.top(); + tensor_stack.pop(); +} + +void op_or() +{ + if (tensor_stack.empty()) + { + throw std::runtime_error("Tensor stack is empty"); + } + ag = ag || tensor_stack.top(); + tensor_stack.pop(); +} + +void op_not() +{ + if (tensor_stack.empty()) + { + throw std::runtime_error("Tensor stack is empty"); + } + ag = !ag; +} + +void op_eq() +{ + if (tensor_stack.empty()) + { + throw std::runtime_error("Tensor stack is empty"); + } + ag = ag == tensor_stack.top(); + tensor_stack.pop(); +} + +void op_ne() +{ + if (tensor_stack.empty()) + { + throw std::runtime_error("Tensor stack is empty"); + } + ag = ag != tensor_stack.top(); + tensor_stack.pop(); +} + +void op_lt() +{ + if (tensor_stack.empty()) + { + throw std::runtime_error("Tensor stack is empty"); + } + ag = ag < tensor_stack.top(); + tensor_stack.pop(); +} + +void op_gt() +{ + if (tensor_stack.empty()) + { + throw std::runtime_error("Tensor stack is empty"); + } + ag = ag > tensor_stack.top(); + tensor_stack.pop(); +} + +void op_le() +{ + if (tensor_stack.empty()) + { + throw std::runtime_error("Tensor stack is empty"); + } + ag = ag <= tensor_stack.top(); + tensor_stack.pop(); +} + +void op_ge() +{ + if (tensor_stack.empty()) + { + throw std::runtime_error("Tensor stack is empty"); + } + ag = ag >= tensor_stack.top(); + tensor_stack.pop(); +} + +void op_shl() +{ + // ag = ag << bg; +} + +void op_shr() +{ + // ag = ag >> bg; +} + +void op_open() +{ + // Implementation for opening a file or resource +} + +void op_read() +{ + // Implementation for reading from a file or resource +} + +void op_close() +{ + // Implementation for closing a file or resource +} + +void op_prtf() +{ + // Implementation for printing formatted output +} + +void op_malc() +{ + // Implementation for memory allocation +} + +void op_mset() +{ + // Implementation for setting memory +} + +void op_mcmp() +{ + // Implementation for memory comparison +} + +void op_exit() +{ + // Implementation for exiting the program + std::cout<< ag << std::endl; +} + +void op_push() +{ + tensor_stack.push(ag); +} + +void op_get() +{ + if (!tensor_stack.empty()) + { + ag = tensor_stack.top(); + tensor_stack.pop(); + } + else + { + throw std::runtime_error("Tensor stack is empty"); + } +} + +void op_set() +{ + if (!tensor_stack.empty()) + { + tensor_array::value::Tensor bg = tensor_stack.top(); + tensor_stack.pop(); + ag = bg; // Set the top of the stack to ag + } + else + { + throw std::runtime_error("Tensor stack is empty"); + } +} diff --git a/src/tensor-array/interp/vmop.h b/src/tensor-array/interp/vmop.h new file mode 100644 index 0000000..044c88d --- /dev/null +++ b/src/tensor-array/interp/vmop.h @@ -0,0 +1,37 @@ +#ifdef __cplusplus +extern "C" +{ +#endif + void op_add(); + void op_sub(); + void op_mul(); + void op_div(); + void op_matmul(); + void op_pos(); + void op_neg(); + void op_and(); + void op_or(); + void op_not(); + void op_eq(); + void op_ne(); + void op_lt(); + void op_gt(); + void op_le(); + void op_ge(); + void op_shl(); + void op_shr(); + void op_open(); + void op_read(); + void op_close(); + void op_prtf(); + void op_malc(); + void op_mset(); + void op_mcmp(); + void op_exit(); + void op_push(); + void op_get(); + void op_set(); +#ifdef __cplusplus +} +#endif + From 204fb88209aa93fa8e7a844bb03c1dc769e054d4 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sat, 19 Jul 2025 21:56:12 +0700 Subject: [PATCH 146/281] add license for files in interp --- src/tensor-array/interp/glob_stack.cc | 16 ++++++++++++++++ src/tensor-array/interp/glob_stack.h | 16 ++++++++++++++++ src/tensor-array/interp/main.c | 17 ++++++++++++++++- src/tensor-array/interp/open_file.c | 15 +++++++++++++++ src/tensor-array/interp/open_file.h | 17 ++++++++++++++++- src/tensor-array/interp/option.c | 16 ++++++++++++++++ src/tensor-array/interp/option.h | 16 ++++++++++++++++ src/tensor-array/interp/parser.c | 16 ++++++++++++++++ src/tensor-array/interp/parser.h | 16 ++++++++++++++++ src/tensor-array/interp/token.c | 16 ++++++++++++++++ src/tensor-array/interp/token.h | 16 ++++++++++++++++ src/tensor-array/interp/vm.c | 16 ++++++++++++++++ src/tensor-array/interp/vm.h | 16 ++++++++++++++++ src/tensor-array/interp/vmop.cc | 16 ++++++++++++++++ src/tensor-array/interp/vmop.h | 16 ++++++++++++++++ 15 files changed, 239 insertions(+), 2 deletions(-) diff --git a/src/tensor-array/interp/glob_stack.cc b/src/tensor-array/interp/glob_stack.cc index 1147045..f71619e 100644 --- a/src/tensor-array/interp/glob_stack.cc +++ b/src/tensor-array/interp/glob_stack.cc @@ -1,3 +1,19 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + #include #include #include diff --git a/src/tensor-array/interp/glob_stack.h b/src/tensor-array/interp/glob_stack.h index 7d45ed4..2795e7e 100644 --- a/src/tensor-array/interp/glob_stack.h +++ b/src/tensor-array/interp/glob_stack.h @@ -1,3 +1,19 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + #ifdef __cplusplus extern "C" { diff --git a/src/tensor-array/interp/main.c b/src/tensor-array/interp/main.c index 31b6354..6b9d40e 100644 --- a/src/tensor-array/interp/main.c +++ b/src/tensor-array/interp/main.c @@ -1,3 +1,19 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + #include #include #include @@ -5,7 +21,6 @@ #include "open_file.h" #include "parser.h" - void initialize(int argc, char *argv[]) { int i, fd; diff --git a/src/tensor-array/interp/open_file.c b/src/tensor-array/interp/open_file.c index 51446e3..1eb4317 100644 --- a/src/tensor-array/interp/open_file.c +++ b/src/tensor-array/interp/open_file.c @@ -1,3 +1,18 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ #include #include diff --git a/src/tensor-array/interp/open_file.h b/src/tensor-array/interp/open_file.h index 38981e6..10cc7d9 100644 --- a/src/tensor-array/interp/open_file.h +++ b/src/tensor-array/interp/open_file.h @@ -1,3 +1,19 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + extern char *src, *text; extern size_t poolsize; @@ -5,4 +21,3 @@ void interp_malloc(); void interp_memreset(); void interp_free(); void read_file(const char*); - diff --git a/src/tensor-array/interp/option.c b/src/tensor-array/interp/option.c index e7fd5f9..ef83c45 100644 --- a/src/tensor-array/interp/option.c +++ b/src/tensor-array/interp/option.c @@ -1,3 +1,19 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + #include void help() diff --git a/src/tensor-array/interp/option.h b/src/tensor-array/interp/option.h index b246448..168b613 100644 --- a/src/tensor-array/interp/option.h +++ b/src/tensor-array/interp/option.h @@ -1,2 +1,18 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + void help(); void version(); diff --git a/src/tensor-array/interp/parser.c b/src/tensor-array/interp/parser.c index 21da098..a0dbc19 100644 --- a/src/tensor-array/interp/parser.c +++ b/src/tensor-array/interp/parser.c @@ -1,3 +1,19 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + #include #include #include diff --git a/src/tensor-array/interp/parser.h b/src/tensor-array/interp/parser.h index 5e48ba6..021ba4a 100644 --- a/src/tensor-array/interp/parser.h +++ b/src/tensor-array/interp/parser.h @@ -1 +1,17 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + void program(); diff --git a/src/tensor-array/interp/token.c b/src/tensor-array/interp/token.c index ec2212b..f1fd805 100644 --- a/src/tensor-array/interp/token.c +++ b/src/tensor-array/interp/token.c @@ -1,3 +1,19 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + #include #include #include diff --git a/src/tensor-array/interp/token.h b/src/tensor-array/interp/token.h index 295726d..2f9974b 100644 --- a/src/tensor-array/interp/token.h +++ b/src/tensor-array/interp/token.h @@ -1,3 +1,19 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + typedef enum { TOKEN_NUM = 0x80, TOKEN_SYS, TOKEN_GLO, TOKEN_LOC, TOKEN_ID, diff --git a/src/tensor-array/interp/vm.c b/src/tensor-array/interp/vm.c index e3d7a33..06c49ad 100644 --- a/src/tensor-array/interp/vm.c +++ b/src/tensor-array/interp/vm.c @@ -1,3 +1,19 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + #include #include #include "vmop.h" diff --git a/src/tensor-array/interp/vm.h b/src/tensor-array/interp/vm.h index 86a3220..cb4bc9d 100644 --- a/src/tensor-array/interp/vm.h +++ b/src/tensor-array/interp/vm.h @@ -1,3 +1,19 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + typedef enum { LEA, IMM, JMP, CALL, JZ, JNZ, ENT, ADJ, LEV, RET, LI, LC, SI, SC, SET, GET, PUSH, GETELEM, SETELEM, ADDELEM, diff --git a/src/tensor-array/interp/vmop.cc b/src/tensor-array/interp/vmop.cc index 1ac9b04..9cb05ca 100644 --- a/src/tensor-array/interp/vmop.cc +++ b/src/tensor-array/interp/vmop.cc @@ -1,3 +1,19 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + #include #include #include diff --git a/src/tensor-array/interp/vmop.h b/src/tensor-array/interp/vmop.h index 044c88d..ab5dec5 100644 --- a/src/tensor-array/interp/vmop.h +++ b/src/tensor-array/interp/vmop.h @@ -1,3 +1,19 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + #ifdef __cplusplus extern "C" { From e6a06b117f1eeb23c05df207594d4042d9c53ed5 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sun, 20 Jul 2025 22:15:56 +0700 Subject: [PATCH 147/281] test exe --- .vscode/c_cpp_properties.json | 3 +- .vscode/settings.json | 7 +- CMakeLists.txt | 1 + src/tensor-array/interp/CMakeLists.txt | 28 ++-- src/tensor-array/interp/main.c | 7 +- src/tensor-array/interp/open_file.c | 6 +- src/tensor-array/interp/open_file.h | 5 +- src/tensor-array/interp/parser.c | 152 ++++++++++++++---- .../interp/{glob_stack.cc => sym_map.cc} | 24 ++- .../interp/{glob_stack.h => sym_map.h} | 10 +- src/tensor-array/interp/token.c | 71 ++++++-- src/tensor-array/interp/token.h | 17 +- src/tensor-array/interp/vm.c | 26 +-- src/tensor-array/interp/vm.h | 18 ++- src/tensor-array/interp/vmop.cc | 59 ++++++- src/tensor-array/interp/vmop.h | 4 + 16 files changed, 326 insertions(+), 112 deletions(-) rename src/tensor-array/interp/{glob_stack.cc => sym_map.cc} (64%) rename src/tensor-array/interp/{glob_stack.h => sym_map.h} (80%) diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json index d224842..84bab62 100644 --- a/.vscode/c_cpp_properties.json +++ b/.vscode/c_cpp_properties.json @@ -6,7 +6,8 @@ "${workspaceFolder}/src" ], "defines": [], - "compilerPath": "/usr/bin/g++" + "compilerPath": "/usr/bin/g++", + "configurationProvider": "ms-vscode.cmake-tools" }, { "name": "Windows", diff --git a/.vscode/settings.json b/.vscode/settings.json index d6e1596..bbbdf48 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -62,7 +62,10 @@ "typeindex": "cpp", "cassert": "cpp", "stack": "cpp", - "token.h": "c", - "glob_stack.h": "c" + "*.h": "c", + "map": "cpp", + "optional": "cpp", + "fstream": "cpp", + "set": "cpp" } } \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 1290953..9f14d1a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,6 +11,7 @@ endif() add_subdirectory("src/tensor-array/core") add_subdirectory("src/tensor-array/layers") +add_subdirectory("src/tensor-array/interp") set(CPACK_PACKAGE_NAME "TensorArray") set(CPACK_PACKAGE_VENDOR "TensorArray-Creators") diff --git a/src/tensor-array/interp/CMakeLists.txt b/src/tensor-array/interp/CMakeLists.txt index cada9e8..79dd349 100644 --- a/src/tensor-array/interp/CMakeLists.txt +++ b/src/tensor-array/interp/CMakeLists.txt @@ -8,21 +8,21 @@ install( DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/interp COMPONENT headers) -add_executable(tensorarray_interp SHARED ${TensorArray_src}) +add_executable(tensorarray_interpreter ${TensorArray_src}) -target_include_directories(tensorarray_interp PRIVATE ${PROJECT_SOURCE_DIR}/src) -target_link_libraries(tensorarray_interp TensorArray::Core) +target_include_directories(tensorarray_interpreter PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_link_libraries(tensorarray_interpreter TensorArray::Core) -set_property(TARGET tensorarray_interp PROPERTY C_STANDARD 11) -set_property(TARGET tensorarray_interp PROPERTY C_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_interp PROPERTY C_EXTENSIONS OFF) +set_property(TARGET tensorarray_interpreter PROPERTY C_STANDARD 11) +set_property(TARGET tensorarray_interpreter PROPERTY C_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_interpreter PROPERTY C_EXTENSIONS OFF) -set_property(TARGET tensorarray_interp PROPERTY CXX_STANDARD 17) -set_property(TARGET tensorarray_interp PROPERTY CXX_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_interp PROPERTY CXX_EXTENSIONS OFF) +set_property(TARGET tensorarray_interpreter PROPERTY CXX_STANDARD 17) +set_property(TARGET tensorarray_interpreter PROPERTY CXX_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_interpreter PROPERTY CXX_EXTENSIONS OFF) install( - TARGETS tensorarray_interp + TARGETS tensorarray_interpreter EXPORT TensorArrayTargets RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT Runtime @@ -30,3 +30,11 @@ install( COMPONENT Runtime ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/interp COMPONENT Development) +#[[ +add_custom_command( + OUTPUT test.tmp + DEPENDS tensorarray_interpreter + POST_BUILD + COMMAND tensorarray_interpreter) +]] +add_executable(TensorArray::Interpreter ALIAS tensorarray_interpreter) diff --git a/src/tensor-array/interp/main.c b/src/tensor-array/interp/main.c index 6b9d40e..7aa7285 100644 --- a/src/tensor-array/interp/main.c +++ b/src/tensor-array/interp/main.c @@ -46,7 +46,7 @@ void initialize(int argc, char *argv[]) exit(1); return; } - open_file(argv[1]); + read_file(argv[1]); argc--; argv++; return; @@ -87,13 +87,13 @@ void initialize(int argc, char *argv[]) exit(1); return; } - open_file(argv[1], poolsize); + read_file(argv[1]); argc--; argv++; } return; default: - open_file(argv[0], poolsize); + read_file(argv[0]); return; } break; @@ -108,6 +108,7 @@ void initialize(int argc, char *argv[]) int main(int argc, char *argv[]) { + printf("Hello\n"); initialize(argc-1, argv+1); program(); return 0; diff --git a/src/tensor-array/interp/open_file.c b/src/tensor-array/interp/open_file.c index 1eb4317..7966f6c 100644 --- a/src/tensor-array/interp/open_file.c +++ b/src/tensor-array/interp/open_file.c @@ -17,8 +17,10 @@ limitations under the License. #include #include #include +#include "open_file.h" -char *src, *text = NULL; +char *src = NULL; +VM_INSTRUCTION *text = NULL; size_t poolsize = 1024; // Default pool size void interp_malloc() @@ -29,7 +31,7 @@ void interp_malloc() fprintf(stderr, "Error: Could not allocate memory for interpreter\n"); exit(1); } - text = malloc(poolsize); + text = malloc(poolsize*sizeof(VM_INSTRUCTION)); if (text == NULL) { fprintf(stderr, "Error: Could not allocate memory for interpreter text\n"); diff --git a/src/tensor-array/interp/open_file.h b/src/tensor-array/interp/open_file.h index 10cc7d9..fbc4a21 100644 --- a/src/tensor-array/interp/open_file.h +++ b/src/tensor-array/interp/open_file.h @@ -14,7 +14,10 @@ See the License for the specific language governing permissions and limitations under the License. */ -extern char *src, *text; +#include "vm.h" + +extern char *src; +extern VM_INSTRUCTION *text; extern size_t poolsize; void interp_malloc(); diff --git a/src/tensor-array/interp/parser.c b/src/tensor-array/interp/parser.c index a0dbc19..10aad85 100644 --- a/src/tensor-array/interp/parser.c +++ b/src/tensor-array/interp/parser.c @@ -20,10 +20,10 @@ limitations under the License. #include #include #include -#include "open_file.h" +#include "sym_map.h" #include "parser.h" #include "token.h" -#include "vm.h" +#include "open_file.h" void emit(int size, ...) { @@ -33,7 +33,7 @@ void emit(int size, ...) // Process the variable arguments as needed for (int i = 0; i < size; ++i) { ++text; - *text = va_arg(args, long); + *text = va_arg(args, VM_INSTRUCTION); } va_end(args); @@ -54,32 +54,9 @@ void match(long tk) } } -void program() -{ - while (1) - { - // This is a placeholder for the main program loop - // You would typically call emit or other functions here based on your program logic - // Add your program logic here - interp_malloc(); - char *isrc = src; - char *itext = text; - interp_memreset(); - printf(">>> "); - read(0, src, poolsize-1); // Read input from stdin - token_next(); - statement(); - emit(1, EXIT); // Emit a token with value 0 to indicate end of processing - eval(); - puts(""); - free(itext); - free(isrc); - } -} - void expression(int level) { - void* temp = NULL; // Temporary variable to hold intermediate values + sym_data* temp = NULL; // Temporary variable to hold intermediate values int isArrRef = 0; // Flag to check if we are dealing with an array reference // This function would handle parsing and evaluating expressions // For now, it is a placeholder @@ -88,23 +65,34 @@ void expression(int level) { case TOKEN_NUM: /* code */ - emit(1, IMM); + emit(3, IMM, TYPE_INT, tkn_val); match(TOKEN_NUM); break; case TOKEN_ID: /* code */ - emit(1, GET); + temp = sym_cur; match(TOKEN_ID); + if (!temp->data) + { + temp->data = new_Tensor(); + } + if (0); + else + { + emit(3, IMM, TYPE_PTR, temp); + emit(1, GET); + } break; case '"': { + emit(3, IMM, TYPE_STRING, tkn_val); match('"'); // Match the opening quote } break; case '[': if (temp == NULL) { - *text = PUSH; // Push the current value onto the stack + *text = PTR_PUSH; // Push the current value onto the stack match('['); // Match the opening bracket expression(TOKEN_ASSIGN); // Parse the expression inside the brackets emit(1, GETELEM); // Emit get element instruction @@ -125,12 +113,60 @@ void expression(int level) fprintf(stderr, "Error: Assignment without a variable\n"); exit(1); } - *text = PUSH; // Push the current value onto the stack + *text = PTR_PUSH; // Push the current value onto the stack match(TOKEN_ASSIGN); expression(TOKEN_ASSIGN); // Parse the right-hand side expression if (isArrRef) emit(1, SETELEM); // Emit set element instruction if it's an array reference else emit(1, SET); // Emit set instruction break; + case TOKEN_EQ: + emit(1, PUSH); + match(TOKEN_EQ); + expression(TOKEN_SHL); // Parse the right-hand side expression + emit(1, EQ); // Emit equality instruction + break; + case TOKEN_NE: + emit(1, PUSH); + match(TOKEN_NE); + expression(TOKEN_SHL); // Parse the right-hand side expression + emit(1, NE); // Emit not equal instruction + break; + case TOKEN_LT: + emit(1, PUSH); + match(TOKEN_LT); + expression(TOKEN_SHL); // Parse the right-hand side expression + emit(1, LT); // Emit less than instruction + break; + case TOKEN_GT: + emit(1, PUSH); + match(TOKEN_GT); + expression(TOKEN_SHL); // Parse the right-hand side expression + emit(1, GT); // Emit greater than instruction + break; + case TOKEN_LE: + emit(1, PUSH); + match(TOKEN_LE); + expression(TOKEN_SHL); // Parse the right-hand side expression + emit(1, LE); // Emit less than or equal instruction + break; + case TOKEN_GE: + emit(1, PUSH); + match(TOKEN_GE); + expression(TOKEN_SHL); // Parse the right-hand side expression + emit(1, GE); // Emit greater than or equal instruction + break; + case TOKEN_SHL: + emit(1, PUSH); + match(TOKEN_SHL); + expression(TOKEN_ADD); // Parse the right-hand side expression + emit(1, SHL); // Emit shift left instruction + break; + case TOKEN_SHR: + emit(1, PUSH); + match(TOKEN_SHR); + expression(TOKEN_ADD); // Parse the right-hand side expression + emit(1, SHR); // Emit shift right instruction + break; case TOKEN_ADD: emit(1, PUSH); match(TOKEN_ADD); @@ -143,6 +179,24 @@ void expression(int level) expression(TOKEN_MUL); // Parse the right-hand side expression emit(1, SUB); // Emit subtract instruction break; + case TOKEN_MUL: + emit(1, PUSH); + match(TOKEN_MUL); + expression(TOKEN_MATMUL); // Parse the right-hand side expression + emit(1, MUL); // Emit multiply instruction + break; + case TOKEN_DIV: + emit(1, PUSH); + match(TOKEN_DIV); + expression(TOKEN_MATMUL); // Parse the right-hand side expression + emit(1, DIV); // Emit divide instruction + break; + case TOKEN_MATMUL: + emit(1, PUSH); + match(TOKEN_MATMUL); + expression(TOKEN_INC); // Parse the right-hand side expression + emit(1, MATMUL); // Emit matrix multiply instruction + break; default: fprintf(stderr, "Error: Unrecognized token in expression\n"); exit(1); @@ -165,13 +219,14 @@ void statement() expression(TOKEN_ASSIGN); // Parse the condition expression match(')'); emit(1, JZ); // Emit jump if zero instruction - long *b = ++text; // Placeholder for jump address + VM_INSTRUCTION *b = ++text; // Placeholder for jump address statement(); // Parse the statement inside the if block if (tkn == TOKEN_ELSE) { match(TOKEN_ELSE); emit(1, JMP); // Emit jump instruction *b = text + 2; // Set the jump address to the next instruction + b = ++text; statement(); // Parse the else block } *b = text + 1; // Set the jump address to the next instruction @@ -179,8 +234,8 @@ void statement() break; case TOKEN_WHILE: { - long *a = NULL; // Placeholder for jump address - long *b = text+1; // Placeholder for jump address + VM_INSTRUCTION *a = NULL; // Placeholder for jump address + VM_INSTRUCTION *b = text+1; // Placeholder for jump address match(TOKEN_WHILE); match('('); expression(TOKEN_ASSIGN); // Parse the condition expression @@ -194,6 +249,35 @@ void statement() } break; default: + expression(TOKEN_ASSIGN); + if (tkn = ';') + match(';'); break; } } + +void program() +{ + while (1) + { + // This is a placeholder for the main program loop + // You would typically call emit or other functions here based on your program logic + // Add your program logic here + interp_malloc(); + orig = text + 1; + char *isrc = src; + VM_INSTRUCTION *itext = text; + interp_memreset(); + printf(">>> "); + fflush(stdout); + read(0, src, poolsize-1); // Read input from stdin + token_next(); + statement(); + emit(1, EXIT); // Emit a token with value 0 to indicate end of processing + eval(); + printf("eval \n"); + puts(""); + free(itext); + free(isrc); + } +} diff --git a/src/tensor-array/interp/glob_stack.cc b/src/tensor-array/interp/sym_map.cc similarity index 64% rename from src/tensor-array/interp/glob_stack.cc rename to src/tensor-array/interp/sym_map.cc index f71619e..12d3d4e 100644 --- a/src/tensor-array/interp/glob_stack.cc +++ b/src/tensor-array/interp/sym_map.cc @@ -17,23 +17,33 @@ limitations under the License. #include #include #include -#include "glob_stack.h" +#include "sym_map.h" -std::map data_map; +sym_data* sym_cur = NULL; +std::map sym_map; -void glob_data_set(char* name, glob_data_t item) +void sym_data_set(char* name, sym_data dat) { - data_map[name] = item; + sym_map[name] = dat; } -glob_data_t glob_data_get(char* name) +sym_data* sym_data_get(char* name) { - return data_map[name]; + return &sym_map[name]; } int glob_data_find(char* name) { - return data_map.find(name) != data_map.end(); + return sym_map.find(name) != sym_map.end(); +} + +void* new_Tensor() +{ + return new tensor_array::value::Tensor; +} +void delete_Tensor(void* t) +{ + delete t; } diff --git a/src/tensor-array/interp/glob_stack.h b/src/tensor-array/interp/sym_map.h similarity index 80% rename from src/tensor-array/interp/glob_stack.h rename to src/tensor-array/interp/sym_map.h index 2795e7e..a887a48 100644 --- a/src/tensor-array/interp/glob_stack.h +++ b/src/tensor-array/interp/sym_map.h @@ -24,10 +24,12 @@ extern "C" long hash; long cls; void* data; // Pointer to additional data if needed - } glob_data_t; - void glob_data_set(char*, glob_data_t); - glob_data_t glob_stack_get(char*); - int glob_stack_find(char*); + } sym_data; + void sym_data_set(char* name, sym_data dat); + sym_data* sym_data_get(char*); + int glob_data_find(char* name); + extern sym_data* sym_cur; + void* new_Tensor(); #ifdef __cplusplus } #endif diff --git a/src/tensor-array/interp/token.c b/src/tensor-array/interp/token.c index f1fd805..e62ab5c 100644 --- a/src/tensor-array/interp/token.c +++ b/src/tensor-array/interp/token.c @@ -17,16 +17,24 @@ limitations under the License. #include #include #include -#include "glob_stack.h" +#include +#include "sym_map.h" #include "open_file.h" #include "token.h" long tkn = 0; -long token_val = 0; // Variable to hold the value of the current token +long tkn_val = 0; // Variable to hold the value of the current token +char* tknname[] = { + "num", "sys", "glo", "loc", "id", + "func", "else", "enum", "if", "return", "sizeof", + "while", "assign", "cond", "lor", "lan", + "or", "xor", "and", + "eq", "ne", "lt", "gt", "le", "ge", + "add", "sub", "mul", "div", "matmul", "pos", "neg", "not", "brak" +}; void token_next() { - glob_data_t token_item; while ((tkn = *src++) != '\0') { switch (tkn) @@ -42,12 +50,40 @@ void token_next() case '"': case '\'': { - char* last_pos = src - 1; - while (*src != tkn && *src != '\0') + char* last_pos = src; + while (*src != tkn && *src != '\0') src++; + char *st1 = malloc(src-last_pos+2); + memset(st1, 0, src-last_pos+2); + src = last_pos; + for (unsigned int i = 0; *src != tkn && *src != '\0'; i++) { - src++; + tkn_val=*src++; + if (tkn_val == '\\') + { + switch (*++src) + { + case '\\': + tkn_val='\\'; + break; + case 'r': + tkn_val='\r'; + break; + case 'n': + tkn_val='\n'; + break; + default: + if (*src == tkn) tkn_val=tkn; + else + { + fprintf(stderr, "invalid character string."); + exit(1); + } + break; + } + } + st1[i] = tkn_val; } - token_val = last_pos; // Store the start of the string literal + tkn_val = st1; // Store the start of the string literal } return; // Exit after processing the string literal case '/': @@ -203,22 +239,22 @@ void token_next() default: if (tkn >= '0' && tkn <= '9') { + tkn_val = tkn - '0'; if (tkn == '0' && (*src == 'x' || *src == 'X')) { src++; while ((*src >= '0' && *src <= '9') || (*src >= 'a' && *src <= 'f') || (*src >= 'A' && *src <= 'F')) { - token_val = (token_val << 4) + (*src >= '0' && *src <= '9' ? *src - '0' : (*src >= 'a' && *src <= 'f' ? *src - 'a' + 10 : *src - 'A' + 10)); + tkn_val = (tkn_val << 4) + (*src >= '0' && *src <= '9' ? *src - '0' : (*src >= 'a' && *src <= 'f' ? *src - 'a' + 10 : *src - 'A' + 10)); src++; } /* code to handle hexadecimal number */ } else { - src--; while (*src >= '0' && *src <= '9') { - token_val = (token_val * 10) + (*src - '0'); + tkn_val = (tkn_val * 10) + (*src - '0'); src++; } /* code to handle decimal number */ @@ -235,19 +271,26 @@ void token_next() hash = (hash * 0x40) + *src; src++; } - if (glob_stack_find(last_pos)) + int char_len = src-last_pos; + char *name = malloc(char_len+1); + memcpy(name, last_pos, char_len); + name[char_len] = '\0'; + if (glob_data_find(name)) { /* code to handle existing identifier */ - tkn = glob_stack_get(last_pos).tkn; // Set the token type from the existing identifier + tkn = sym_data_get(name)->tkn; // Set the token type from the existing identifier + free(name); return; // Exit after processing the existing identifier } /* code to handle identifiers */ - glob_data_t item; + sym_data item; item.hash = hash; item.data = NULL; // Initialize data pointer if needed tkn = item.tkn = TOKEN_ID; // Set the token type - glob_stack_set(last_pos, item); + sym_data_set(name,item); + sym_cur = sym_data_get(name); + free(name); return; // Exit after processing the identifier } else diff --git a/src/tensor-array/interp/token.h b/src/tensor-array/interp/token.h index 2f9974b..3f61b80 100644 --- a/src/tensor-array/interp/token.h +++ b/src/tensor-array/interp/token.h @@ -20,18 +20,11 @@ typedef enum TOKEN_FUNC, TOKEN_ELSE, TOKEN_ENUM, TOKEN_IF, TOKEN_RETURN, TOKEN_SIZEOF, TOKEN_WHILE, TOKEN_ASSIGN, TOKEN_COND, TOKEN_LOR, TOKEN_LAN, TOKEN_OR, TOKEN_XOR, TOKEN_AND, TOKEN_SHL, TOKEN_SHR, - TOKEN_EQ, TOKEN_NE, TOKEN_LT, TOKEN_GT, TOKEN_LE, TOKEN_GE, - TOKEN_ADD, TOKEN_SUB, TOKEN_MUL, TOKEN_DIV, TOKEN_MATMUL, TOKEN_POS, TOKEN_NEG, TOKEN_NOT + TOKEN_EQ, TOKEN_NE, TOKEN_LT, TOKEN_GT, TOKEN_LE, TOKEN_GE, TOKEN_POS, TOKEN_NEG, + TOKEN_ADD, TOKEN_SUB, TOKEN_MUL, TOKEN_DIV, TOKEN_MATMUL, TOKEN_INC, TOKEN_NOT } TOKEN_TYPE; -char* tknname[] = { - "num", "sys", "glo", "loc", "id", - "func", "else", "enum", "if", "return", "sizeof", - "while", "assign", "cond", "lor", "lan", - "or", "xor", "and", - "eq", "ne", "lt", "gt", "le", "ge", - "add", "sub", "mul", "div", "matmul", "pos", "neg", "not", "brak" -}; - void token_next(); -extern long tkn = 0; +extern long tkn; +extern long tkn_val; +extern char *tknname[]; diff --git a/src/tensor-array/interp/vm.c b/src/tensor-array/interp/vm.c index 06c49ad..91518de 100644 --- a/src/tensor-array/interp/vm.c +++ b/src/tensor-array/interp/vm.c @@ -19,19 +19,18 @@ limitations under the License. #include "vmop.h" #include "vm.h" -VM_INSTRUCTION* orig; -void** pc; -void* any_value; +VM_INSTRUCTION* orig = NULL; +VM_INSTRUCTION* pc = NULL; void eval() { - VM_INSTRUCTION op; + printf("vmstart\n"); + VM_INSTRUCTION_V2 op; pc = orig; while (1) { - - /* code */ - op = *pc++; + op = *((VM_INSTRUCTION_V2*)pc++); + printf("vmopassign %ld %ld %ld \n", orig, pc, op); switch (op) { case LEA: @@ -39,22 +38,24 @@ void eval() break; case IMM: // Immediate value + any_type = *pc++; any_value = *pc++; + op_imm(); break; case JMP: // Jump to address - pc = *pc; + pc = (VM_INSTRUCTION*) *pc; break; case CALL: // Function call break; case JZ: // Jump if zero - pc = (any_value) ? pc + 1 : *pc; + pc = (any_value) ? (VM_INSTRUCTION*)*pc : pc + 1; break; case JNZ: - pc = (any_value) ? *pc : pc + 1; // Jump if not zero + pc = (any_value) ? pc + 1 : (VM_INSTRUCTION*)*pc; break; case ENT: // Enter function @@ -92,6 +93,10 @@ void eval() // Push value onto stack op_push(); break; + case PTR_PUSH: + // Push value onto stack + op_ptr_push(); + break; case GETELEM: // Get element from array break; @@ -205,5 +210,4 @@ void eval() exit(1); } } - } diff --git a/src/tensor-array/interp/vm.h b/src/tensor-array/interp/vm.h index cb4bc9d..e760c68 100644 --- a/src/tensor-array/interp/vm.h +++ b/src/tensor-array/interp/vm.h @@ -16,9 +16,21 @@ limitations under the License. typedef enum { - LEA, IMM, JMP, CALL, JZ, JNZ, ENT, ADJ, LEV, RET, LI, LC, SI, SC, SET, GET, PUSH, GETELEM, SETELEM, ADDELEM, + LEA, IMM, JMP, CALL, JZ, JNZ, ENT, ADJ, LEV, RET, LI, LC, SI, SC, SET, GET, PUSH, PTR_PUSH, GETELEM, SETELEM, ADDELEM, OR, XOR, AND, EQ, NE, LT, GT, LE, GE, ADD, SUB, MUL, DIV, MATMUL, POS, NEG, NOT, SHL, SHR, OPEN, READ, CLOSE, PRTF, MALC, MSET, MCMP, EXIT -} VM_INSTRUCTION; +} VM_INSTRUCTION_V2; -extern void* any_value; +typedef long VM_INSTRUCTION; + +typedef enum +{ + TYPE_STRING, + TYPE_INT, + TYPE_PTR +} VM_TYPE; + +void eval(); + +extern long any_value; +extern VM_INSTRUCTION* orig; diff --git a/src/tensor-array/interp/vmop.cc b/src/tensor-array/interp/vmop.cc index 9cb05ca..6077820 100644 --- a/src/tensor-array/interp/vmop.cc +++ b/src/tensor-array/interp/vmop.cc @@ -17,9 +17,45 @@ limitations under the License. #include #include #include +#include +#include "sym_map.h" +#include "vmop.h" std::stack tensor_stack; +std::stack ptr_stack; tensor_array::value::Tensor ag; +void* aptr; +long any_value; +long any_type; + +void new_int() +{ + tensor_array::value::TensorArray tmp2 = {any_value}; + tensor_array::value::Tensor tmp1(tmp2); + ag = tmp1; +} + +void new_ptr() +{ + aptr = reinterpret_cast(any_value); +} + +void new_string() +{ + char* str = reinterpret_cast(any_value); + unsigned int s_len = std::strlen(str); + tensor_array::value::TensorBase tmp1(typeid(char),{s_len}, str); + ag = tmp1; + std::free(str); +} + +void op_imm() +{ + if (any_type = 0) new_string(); + else if (any_type = 1) new_int(); + else if (any_type = 2) new_ptr(); + else; +} void op_add() { @@ -214,7 +250,7 @@ void op_mcmp() void op_exit() { // Implementation for exiting the program - std::cout<< ag << std::endl; + // std::cout << ag << std::endl; } void op_push() @@ -222,12 +258,19 @@ void op_push() tensor_stack.push(ag); } +void op_ptr_push() +{ + std::cout << "test" << std::endl; + ptr_stack.push(*reinterpret_cast(aptr)); + std::cout << "test" << std::endl; +} + void op_get() { - if (!tensor_stack.empty()) + if (!ptr_stack.empty()) { - ag = tensor_stack.top(); - tensor_stack.pop(); + sym_data& temp = ptr_stack.top(); + ag = *reinterpret_cast(temp.data); } else { @@ -237,11 +280,11 @@ void op_get() void op_set() { - if (!tensor_stack.empty()) + if (!ptr_stack.empty()) { - tensor_array::value::Tensor bg = tensor_stack.top(); - tensor_stack.pop(); - ag = bg; // Set the top of the stack to ag + sym_data& temp = ptr_stack.top(); + delete temp.data; // Set the top of the stack to ag + temp.data = new tensor_array::value::Tensor(ag); } else { diff --git a/src/tensor-array/interp/vmop.h b/src/tensor-array/interp/vmop.h index ab5dec5..eba64a7 100644 --- a/src/tensor-array/interp/vmop.h +++ b/src/tensor-array/interp/vmop.h @@ -18,6 +18,9 @@ limitations under the License. extern "C" { #endif + extern long any_value; + extern long any_type; + void op_imm(); void op_add(); void op_sub(); void op_mul(); @@ -45,6 +48,7 @@ extern "C" void op_mcmp(); void op_exit(); void op_push(); + void op_ptr_push(); void op_get(); void op_set(); #ifdef __cplusplus From 86c3fb982979402d4ccd6dc2d2c74d33e9892c4e Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sun, 20 Jul 2025 23:03:24 +0700 Subject: [PATCH 148/281] test --- src/tensor-array/interp/parser.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/tensor-array/interp/parser.c b/src/tensor-array/interp/parser.c index 10aad85..2e3aa73 100644 --- a/src/tensor-array/interp/parser.c +++ b/src/tensor-array/interp/parser.c @@ -32,8 +32,7 @@ void emit(int size, ...) // Process the variable arguments as needed for (int i = 0; i < size; ++i) { - ++text; - *text = va_arg(args, VM_INSTRUCTION); + *text++ = va_arg(args, VM_INSTRUCTION); } va_end(args); @@ -250,7 +249,7 @@ void statement() break; default: expression(TOKEN_ASSIGN); - if (tkn = ';') + if (tkn == ';') match(';'); break; } @@ -264,7 +263,7 @@ void program() // You would typically call emit or other functions here based on your program logic // Add your program logic here interp_malloc(); - orig = text + 1; + orig = text; char *isrc = src; VM_INSTRUCTION *itext = text; interp_memreset(); From 98e69d6ba3d52a7cbac5704079b36c0ca22f0e59 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Mon, 21 Jul 2025 13:07:20 +0700 Subject: [PATCH 149/281] test --- src/tensor-array/interp/parser.c | 5 +++-- src/tensor-array/interp/vmop.cc | 20 ++++++-------------- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/src/tensor-array/interp/parser.c b/src/tensor-array/interp/parser.c index 2e3aa73..23037cd 100644 --- a/src/tensor-array/interp/parser.c +++ b/src/tensor-array/interp/parser.c @@ -32,7 +32,8 @@ void emit(int size, ...) // Process the variable arguments as needed for (int i = 0; i < size; ++i) { - *text++ = va_arg(args, VM_INSTRUCTION); + ++text; + *text = va_arg(args, VM_INSTRUCTION); } va_end(args); @@ -263,7 +264,7 @@ void program() // You would typically call emit or other functions here based on your program logic // Add your program logic here interp_malloc(); - orig = text; + orig = text+1; char *isrc = src; VM_INSTRUCTION *itext = text; interp_memreset(); diff --git a/src/tensor-array/interp/vmop.cc b/src/tensor-array/interp/vmop.cc index 6077820..5e23d98 100644 --- a/src/tensor-array/interp/vmop.cc +++ b/src/tensor-array/interp/vmop.cc @@ -51,9 +51,9 @@ void new_string() void op_imm() { - if (any_type = 0) new_string(); - else if (any_type = 1) new_int(); - else if (any_type = 2) new_ptr(); + if (any_type == 0) new_string(); + else if (any_type == 1) new_int(); + else if (any_type == 2) new_ptr(); else; } @@ -260,22 +260,13 @@ void op_push() void op_ptr_push() { - std::cout << "test" << std::endl; ptr_stack.push(*reinterpret_cast(aptr)); - std::cout << "test" << std::endl; } void op_get() { - if (!ptr_stack.empty()) - { - sym_data& temp = ptr_stack.top(); - ag = *reinterpret_cast(temp.data); - } - else - { - throw std::runtime_error("Tensor stack is empty"); - } + sym_data& temp = *reinterpret_cast(aptr); + ag = *reinterpret_cast(temp.data); } void op_set() @@ -285,6 +276,7 @@ void op_set() sym_data& temp = ptr_stack.top(); delete temp.data; // Set the top of the stack to ag temp.data = new tensor_array::value::Tensor(ag); + ptr_stack.pop(); } else { From 37e55a0f4b77eb616c40c1603106738f7a0ee696 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Mon, 21 Jul 2025 19:37:40 +0700 Subject: [PATCH 150/281] test --- Dockerfolder/Ubuntu.Dockerfile | 8 +++----- src/tensor-array/interp/parser.c | 11 +++++------ src/tensor-array/interp/sym_map.h | 2 ++ src/tensor-array/interp/token.c | 3 +-- src/tensor-array/interp/vmop.cc | 12 ++++++++---- 5 files changed, 19 insertions(+), 17 deletions(-) diff --git a/Dockerfolder/Ubuntu.Dockerfile b/Dockerfolder/Ubuntu.Dockerfile index 8317437..bfca7c1 100644 --- a/Dockerfolder/Ubuntu.Dockerfile +++ b/Dockerfolder/Ubuntu.Dockerfile @@ -22,12 +22,10 @@ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ # RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ # && apt-get -y install --no-install-recommends -WORKDIR /app/tensor-array -COPY src/ src/ -COPY CMakeLists.txt . -COPY Config.cmake.in . +WORKDIR /app +COPY ./ /tensor-array -WORKDIR build +WORKDIR tensor-array/build RUN cmake .. RUN cmake --build . diff --git a/src/tensor-array/interp/parser.c b/src/tensor-array/interp/parser.c index 23037cd..56582fb 100644 --- a/src/tensor-array/interp/parser.c +++ b/src/tensor-array/interp/parser.c @@ -18,9 +18,6 @@ limitations under the License. #include #include #include -#include -#include -#include "sym_map.h" #include "parser.h" #include "token.h" #include "open_file.h" @@ -70,16 +67,18 @@ void expression(int level) break; case TOKEN_ID: /* code */ - temp = sym_cur; + //temp = sym_cur; match(TOKEN_ID); + /* if (!temp->data) { temp->data = new_Tensor(); } + */ if (0); else { - emit(3, IMM, TYPE_PTR, temp); + emit(3, IMM, TYPE_PTR, tkn_val); emit(1, GET); } break; @@ -270,7 +269,7 @@ void program() interp_memreset(); printf(">>> "); fflush(stdout); - read(0, src, poolsize-1); // Read input from stdin + fgets(src, poolsize-1, stdin); // Read input from stdin token_next(); statement(); emit(1, EXIT); // Emit a token with value 0 to indicate end of processing diff --git a/src/tensor-array/interp/sym_map.h b/src/tensor-array/interp/sym_map.h index a887a48..02ad50f 100644 --- a/src/tensor-array/interp/sym_map.h +++ b/src/tensor-array/interp/sym_map.h @@ -32,4 +32,6 @@ extern "C" void* new_Tensor(); #ifdef __cplusplus } + +extern std::map sym_map; #endif diff --git a/src/tensor-array/interp/token.c b/src/tensor-array/interp/token.c index e62ab5c..59e66c9 100644 --- a/src/tensor-array/interp/token.c +++ b/src/tensor-array/interp/token.c @@ -275,11 +275,11 @@ void token_next() char *name = malloc(char_len+1); memcpy(name, last_pos, char_len); name[char_len] = '\0'; + tkn_val = name; if (glob_data_find(name)) { /* code to handle existing identifier */ tkn = sym_data_get(name)->tkn; // Set the token type from the existing identifier - free(name); return; // Exit after processing the existing identifier } /* code to handle identifiers */ @@ -290,7 +290,6 @@ void token_next() tkn = item.tkn = TOKEN_ID; // Set the token type sym_data_set(name,item); sym_cur = sym_data_get(name); - free(name); return; // Exit after processing the identifier } else diff --git a/src/tensor-array/interp/vmop.cc b/src/tensor-array/interp/vmop.cc index 5e23d98..7d983ad 100644 --- a/src/tensor-array/interp/vmop.cc +++ b/src/tensor-array/interp/vmop.cc @@ -22,7 +22,7 @@ limitations under the License. #include "vmop.h" std::stack tensor_stack; -std::stack ptr_stack; +std::stack ptr_stack; tensor_array::value::Tensor ag; void* aptr; long any_value; @@ -260,12 +260,15 @@ void op_push() void op_ptr_push() { - ptr_stack.push(*reinterpret_cast(aptr)); + ptr_stack.push(reinterpret_cast(aptr)); + std::free(aptr); } void op_get() { - sym_data& temp = *reinterpret_cast(aptr); + char *var_name = reinterpret_cast(aptr); + sym_data& temp = sym_map[var_name]; + std::free(aptr); ag = *reinterpret_cast(temp.data); } @@ -273,7 +276,8 @@ void op_set() { if (!ptr_stack.empty()) { - sym_data& temp = ptr_stack.top(); + std::string& var_name = ptr_stack.top(); + sym_data& temp = sym_map[var_name]; delete temp.data; // Set the top of the stack to ag temp.data = new tensor_array::value::Tensor(ag); ptr_stack.pop(); From a3c29996e1d143e8846675c3671f63968409607e Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Mon, 21 Jul 2025 19:43:38 +0700 Subject: [PATCH 151/281] Update parser.c --- src/tensor-array/interp/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tensor-array/interp/parser.c b/src/tensor-array/interp/parser.c index 56582fb..c5fc854 100644 --- a/src/tensor-array/interp/parser.c +++ b/src/tensor-array/interp/parser.c @@ -53,7 +53,7 @@ void match(long tk) void expression(int level) { - sym_data* temp = NULL; // Temporary variable to hold intermediate values + void* temp = NULL; // Temporary variable to hold intermediate values int isArrRef = 0; // Flag to check if we are dealing with an array reference // This function would handle parsing and evaluating expressions // For now, it is a placeholder From 36dba7eca08c31bca9abe7a9055c664e3e65ab89 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Mon, 21 Jul 2025 19:47:45 +0700 Subject: [PATCH 152/281] Update vmop.cc --- src/tensor-array/interp/vmop.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tensor-array/interp/vmop.cc b/src/tensor-array/interp/vmop.cc index 7d983ad..d117e55 100644 --- a/src/tensor-array/interp/vmop.cc +++ b/src/tensor-array/interp/vmop.cc @@ -266,7 +266,7 @@ void op_ptr_push() void op_get() { - char *var_name = reinterpret_cast(aptr); + char *var_name = reinterpret_cast(aptr); sym_data& temp = sym_map[var_name]; std::free(aptr); ag = *reinterpret_cast(temp.data); From 812487a9bc3feb4e03df1bf4419a7db3f74b1214 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Mon, 21 Jul 2025 19:55:20 +0700 Subject: [PATCH 153/281] test --- src/tensor-array/interp/sym_map.cc | 1 - src/tensor-array/interp/sym_map.h | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/tensor-array/interp/sym_map.cc b/src/tensor-array/interp/sym_map.cc index 12d3d4e..ebab7ee 100644 --- a/src/tensor-array/interp/sym_map.cc +++ b/src/tensor-array/interp/sym_map.cc @@ -14,7 +14,6 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include #include #include #include "sym_map.h" diff --git a/src/tensor-array/interp/sym_map.h b/src/tensor-array/interp/sym_map.h index 02ad50f..03c5692 100644 --- a/src/tensor-array/interp/sym_map.h +++ b/src/tensor-array/interp/sym_map.h @@ -33,5 +33,8 @@ extern "C" #ifdef __cplusplus } +#include +#include + extern std::map sym_map; #endif From b1704e391529e686819987c8db250c08c36d5970 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Mon, 21 Jul 2025 20:04:51 +0700 Subject: [PATCH 154/281] Update Ubuntu.Dockerfile --- Dockerfolder/Ubuntu.Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfolder/Ubuntu.Dockerfile b/Dockerfolder/Ubuntu.Dockerfile index bfca7c1..f3b7908 100644 --- a/Dockerfolder/Ubuntu.Dockerfile +++ b/Dockerfolder/Ubuntu.Dockerfile @@ -22,8 +22,8 @@ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ # RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ # && apt-get -y install --no-install-recommends -WORKDIR /app -COPY ./ /tensor-array +WORKDIR /main-project +COPY ./ tensor-array/ WORKDIR tensor-array/build From 6ac86282b6532e4fd279d14a34c6deae8bf23bcd Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 22 Jul 2025 11:53:56 +0700 Subject: [PATCH 155/281] add tests --- CMakeLists.txt | 2 ++ tests/_ | 0 tests/tensor-array/core/CMakeLists.txt | 19 ++++++++++++ tests/tensor-array/core/gradient.cc | 41 +++++++++++++++++++++++++ tests/tensor-array/core/print_output.cc | 36 ++++++++++++++++++++++ tests/tensor-array/core/tensor_array.cc | 32 +++++++++++++++++++ tests/tensor-array/layers/_ | 0 7 files changed, 130 insertions(+) create mode 100644 tests/_ create mode 100644 tests/tensor-array/core/CMakeLists.txt create mode 100644 tests/tensor-array/core/gradient.cc create mode 100644 tests/tensor-array/core/print_output.cc create mode 100644 tests/tensor-array/core/tensor_array.cc create mode 100644 tests/tensor-array/layers/_ diff --git a/CMakeLists.txt b/CMakeLists.txt index 9f14d1a..69a8171 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,6 +13,8 @@ add_subdirectory("src/tensor-array/core") add_subdirectory("src/tensor-array/layers") add_subdirectory("src/tensor-array/interp") +add_subdirectory("tests/tensor-array/core") + set(CPACK_PACKAGE_NAME "TensorArray") set(CPACK_PACKAGE_VENDOR "TensorArray-Creators") set(CPACK_PACKAGE_DESCRIPTION_SUMMARY) diff --git a/tests/_ b/tests/_ new file mode 100644 index 0000000..e69de29 diff --git a/tests/tensor-array/core/CMakeLists.txt b/tests/tensor-array/core/CMakeLists.txt new file mode 100644 index 0000000..1e71d2f --- /dev/null +++ b/tests/tensor-array/core/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 3.18) + +file(GLOB TensorArray_tests_src "*.cc") + +enable_testing() + +create_test_sourcelist( + TensorArray_tests + test_driver.cc + ${TensorArray_tests_src}) + +add_executable(tensorarray_tests ${TensorArray_tests}) +target_include_directories(tensorarray_interpreter PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_link_libraries(tensorarray_interpreter TensorArray::Core) + +foreach(test ${TensorArray_tests_src}) + get_filename_component(TName ${test} NAME_WE) + add_test(NAME ${TName} COMMAND tensorarray_tests ${TName}) +endforeach() diff --git a/tests/tensor-array/core/gradient.cc b/tests/tensor-array/core/gradient.cc new file mode 100644 index 0000000..eefc3b0 --- /dev/null +++ b/tests/tensor-array/core/gradient.cc @@ -0,0 +1,41 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include +#include + +using namespace std; +using namespace tensor_array::value; + +int gradient(int argc, char const *argv[]) +{ + TensorArray example_tensor_array = + {{ + {{ 1, 2, 3, 4 }}, + {{ 5, 6, 7, 8 }}, + {{ 9, 10, 11, 12 }}, + {{ 13, 14, 15, 16 }}, + }}; + TensorArray example_tensor_array_scalar = {100}; + Tensor example_tensor_1(example_tensor_array); + Tensor example_tensor_2(example_tensor_array_scalar); + Tensor example_tensor_sum = example_tensor_1 + example_tensor_2; + cout << example_tensor_sum << endl; + example_tensor_sum.calc_grad(); + cout << example_tensor_1.get_grad() << endl; + cout << example_tensor_2.get_grad() << endl; + return 0; +} diff --git a/tests/tensor-array/core/print_output.cc b/tests/tensor-array/core/print_output.cc new file mode 100644 index 0000000..d68e011 --- /dev/null +++ b/tests/tensor-array/core/print_output.cc @@ -0,0 +1,36 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include +#include + +using namespace std; +using namespace tensor_array::value; + +int print_output(int argc, char const *argv[]) +{ + /* code */ + TensorArray example_tensor_array = + {{ + {{ 1, 2, 3, 4 }}, + {{ 5, 6, 7, 8 }}, + {{ 9, 10, 11, 12 }}, + {{ 13, 14, 15, 16 }}, + }}; + Tensor example_tensor_1(example_tensor_array); + cout << example_tensor_1 << endl; + return 0; +} diff --git a/tests/tensor-array/core/tensor_array.cc b/tests/tensor-array/core/tensor_array.cc new file mode 100644 index 0000000..869fe75 --- /dev/null +++ b/tests/tensor-array/core/tensor_array.cc @@ -0,0 +1,32 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include +#include + +using namespace tensor_array::value; + +int tensor_array(int argc, char const *argv[]) +{ + TensorArray example_tensor_array = + {{ + {{ 1, 2, 3, 4 }}, + {{ 5, 6, 7, 8 }}, + {{ 9, 10, 11, 12 }}, + {{ 13, 14, 15, 16 }}, + }}; + return 0; +} diff --git a/tests/tensor-array/layers/_ b/tests/tensor-array/layers/_ new file mode 100644 index 0000000..e69de29 From e215883e57f4a97be0d4e48c8fa3654b44c56a6e Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 22 Jul 2025 11:58:33 +0700 Subject: [PATCH 156/281] Update CMakeLists.txt --- tests/tensor-array/core/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tensor-array/core/CMakeLists.txt b/tests/tensor-array/core/CMakeLists.txt index 1e71d2f..3f9b93f 100644 --- a/tests/tensor-array/core/CMakeLists.txt +++ b/tests/tensor-array/core/CMakeLists.txt @@ -10,8 +10,8 @@ create_test_sourcelist( ${TensorArray_tests_src}) add_executable(tensorarray_tests ${TensorArray_tests}) -target_include_directories(tensorarray_interpreter PRIVATE ${PROJECT_SOURCE_DIR}/src) -target_link_libraries(tensorarray_interpreter TensorArray::Core) +target_include_directories(tensorarray_tests PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_link_libraries(tensorarray_tests TensorArray::Core) foreach(test ${TensorArray_tests_src}) get_filename_component(TName ${test} NAME_WE) From dbd4d60eb6257fd4a3ce39c860a6ee4a0439b3d4 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 22 Jul 2025 12:08:51 +0700 Subject: [PATCH 157/281] test --- .../core/{tensor_array.cc => tensor_array_test.cc} | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) rename tests/tensor-array/core/{tensor_array.cc => tensor_array_test.cc} (91%) diff --git a/tests/tensor-array/core/tensor_array.cc b/tests/tensor-array/core/tensor_array_test.cc similarity index 91% rename from tests/tensor-array/core/tensor_array.cc rename to tests/tensor-array/core/tensor_array_test.cc index 869fe75..7ccabe9 100644 --- a/tests/tensor-array/core/tensor_array.cc +++ b/tests/tensor-array/core/tensor_array_test.cc @@ -14,12 +14,11 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include #include using namespace tensor_array::value; -int tensor_array(int argc, char const *argv[]) +int tensor_array_test(int argc, char const *argv[]) { TensorArray example_tensor_array = {{ @@ -28,5 +27,5 @@ int tensor_array(int argc, char const *argv[]) {{ 9, 10, 11, 12 }}, {{ 13, 14, 15, 16 }}, }}; - return 0; + return 0; } From 3b5c9d9ef2d9ad82b1c9273ae823695dd52dfe08 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 22 Jul 2025 13:35:25 +0700 Subject: [PATCH 158/281] Update CMakeLists.txt --- tests/tensor-array/core/CMakeLists.txt | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/tensor-array/core/CMakeLists.txt b/tests/tensor-array/core/CMakeLists.txt index 3f9b93f..ae7d3b2 100644 --- a/tests/tensor-array/core/CMakeLists.txt +++ b/tests/tensor-array/core/CMakeLists.txt @@ -1,19 +1,23 @@ cmake_minimum_required(VERSION 3.18) -file(GLOB TensorArray_tests_src "*.cc") +set( + TensorArray_tests_src + "tensor_array_test.cc" + "print_output.cc" + "gradient.cc") enable_testing() create_test_sourcelist( TensorArray_tests - test_driver.cc + "test_driver.cc" ${TensorArray_tests_src}) -add_executable(tensorarray_tests ${TensorArray_tests}) -target_include_directories(tensorarray_tests PRIVATE ${PROJECT_SOURCE_DIR}/src) -target_link_libraries(tensorarray_tests TensorArray::Core) +add_executable(tensorarray_core_tests ${TensorArray_tests}) +target_include_directories(tensorarray_core_tests PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_link_libraries(tensorarray_core_tests TensorArray::Core) foreach(test ${TensorArray_tests_src}) get_filename_component(TName ${test} NAME_WE) - add_test(NAME ${TName} COMMAND tensorarray_tests ${TName}) + add_test(NAME ${TName} COMMAND tensorarray_core_tests ${TName}) endforeach() From 51765bdb6c8f7d5bcbe537952b5439496ef15ae9 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 22 Jul 2025 13:40:46 +0700 Subject: [PATCH 159/281] tests --- tests/tensor-array/core/gradient.cc | 2 +- tests/tensor-array/core/print_output.cc | 2 +- tests/tensor-array/core/tensor_array_test.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/tensor-array/core/gradient.cc b/tests/tensor-array/core/gradient.cc index eefc3b0..fbb5768 100644 --- a/tests/tensor-array/core/gradient.cc +++ b/tests/tensor-array/core/gradient.cc @@ -20,7 +20,7 @@ limitations under the License. using namespace std; using namespace tensor_array::value; -int gradient(int argc, char const *argv[]) +int gradient(int argc, char *argv[]) { TensorArray example_tensor_array = {{ diff --git a/tests/tensor-array/core/print_output.cc b/tests/tensor-array/core/print_output.cc index d68e011..ad1c9be 100644 --- a/tests/tensor-array/core/print_output.cc +++ b/tests/tensor-array/core/print_output.cc @@ -20,7 +20,7 @@ limitations under the License. using namespace std; using namespace tensor_array::value; -int print_output(int argc, char const *argv[]) +int print_output(int argc, char *argv[]) { /* code */ TensorArray example_tensor_array = diff --git a/tests/tensor-array/core/tensor_array_test.cc b/tests/tensor-array/core/tensor_array_test.cc index 7ccabe9..039ed01 100644 --- a/tests/tensor-array/core/tensor_array_test.cc +++ b/tests/tensor-array/core/tensor_array_test.cc @@ -18,7 +18,7 @@ limitations under the License. using namespace tensor_array::value; -int tensor_array_test(int argc, char const *argv[]) +int tensor_array_test(int argc, char *argv[]) { TensorArray example_tensor_array = {{ From 0a1bc29456c8d1fdfa985c6354bf875ef9bd8cfd Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 22 Jul 2025 14:15:15 +0700 Subject: [PATCH 160/281] Update CMakeLists.txt --- CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 69a8171..ef70274 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,7 +13,10 @@ add_subdirectory("src/tensor-array/core") add_subdirectory("src/tensor-array/layers") add_subdirectory("src/tensor-array/interp") -add_subdirectory("tests/tensor-array/core") +include(CTest) +if(BUILD_TESTING) + add_subdirectory("tests/tensor-array/core") +endif() set(CPACK_PACKAGE_NAME "TensorArray") set(CPACK_PACKAGE_VENDOR "TensorArray-Creators") From ab061521fefb6ae29041cfdd0d67f0ba155ceedc Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 22 Jul 2025 14:41:11 +0700 Subject: [PATCH 161/281] more tests --- src/tensor-array/core/tensor.cc | 12 +++--- src/tensor-array/core/tensor.hh | 1 + .../core/tensor_matmul_transpose.cc | 33 ++++++++++++++++ tests/tensor-array/core/tensor_operators.cc | 39 +++++++++++++++++++ 4 files changed, 79 insertions(+), 6 deletions(-) create mode 100644 tests/tensor-array/core/tensor_matmul_transpose.cc create mode 100644 tests/tensor-array/core/tensor_operators.cc diff --git a/src/tensor-array/core/tensor.cc b/src/tensor-array/core/tensor.cc index 0310877..531ac7b 100644 --- a/src/tensor-array/core/tensor.cc +++ b/src/tensor-array/core/tensor.cc @@ -14,18 +14,18 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include -#include -#include -#include -#include -#include "data_type_wrapper.hh" #ifndef TENSOR_CONTENT #define TENSOR_CONTENT #include "tensor.hh" #undef TENSOR_CONTENT #endif // !TENSOR_CONTENT + +#include +#include #include +#include +#include +#include "data_type_wrapper.hh" #define USING_DATA_TYPE_FLOAT (float)(double) #define USING_DATA_TYPE_SINT (int8_t)(int16_t)(int32_t)(int64_t) diff --git a/src/tensor-array/core/tensor.hh b/src/tensor-array/core/tensor.hh index b3e6f1e..d2b491f 100644 --- a/src/tensor-array/core/tensor.hh +++ b/src/tensor-array/core/tensor.hh @@ -16,6 +16,7 @@ limitations under the License. #include #include +#include #include "tensorbase.hh" #pragma once diff --git a/tests/tensor-array/core/tensor_matmul_transpose.cc b/tests/tensor-array/core/tensor_matmul_transpose.cc new file mode 100644 index 0000000..eac71fb --- /dev/null +++ b/tests/tensor-array/core/tensor_matmul_transpose.cc @@ -0,0 +1,33 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include + +using namespace tensor_array::value; + +int tensor_matmul_transpose(int argc, char *argv[]) +{ + TensorArray example_tensor_array = + {{ + {{ 1, 2, 3 }}, + {{ 4, 5, 6 }} + }}; + TensorArray example_tensor_array_scalar = {100}; + Tensor example_tensor_1(example_tensor_array_1); + Tensor example_tensor_2 = example_tensor_1.transpose(0, 1); + Tensor example_tensor_add = matmul(example_tensor_1, example_tensor_2); + return 0; +} diff --git a/tests/tensor-array/core/tensor_operators.cc b/tests/tensor-array/core/tensor_operators.cc new file mode 100644 index 0000000..9a840d4 --- /dev/null +++ b/tests/tensor-array/core/tensor_operators.cc @@ -0,0 +1,39 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include + +using namespace tensor_array::value; + +int tensor_operators(int argc, char *argv[]) +{ + TensorArray example_tensor_array = + {{ + {{ 1, 2, 3, 4 }}, + {{ 5, 6, 7, 8 }}, + {{ 9, 10, 11, 12 }}, + {{ 13, 14, 15, 16 }}, + }}; + TensorArray example_tensor_array_scalar = {100}; + Tensor example_tensor_1(example_tensor_array); + Tensor example_tensor_2(example_tensor_array_scalar); + Tensor example_tensor_add = example_tensor_1 + example_tensor_2; + Tensor example_tensor_sub = example_tensor_1 - example_tensor_2; + Tensor example_tensor_mul = example_tensor_1 * example_tensor_2; + Tensor example_tensor_div = example_tensor_1 + example_tensor_2; + return 0; +} + From d10889d429b7869126d881da233bcf9bcd308e4a Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 22 Jul 2025 14:45:23 +0700 Subject: [PATCH 162/281] Update CMakeLists.txt --- tests/tensor-array/core/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/tensor-array/core/CMakeLists.txt b/tests/tensor-array/core/CMakeLists.txt index ae7d3b2..d91a7bf 100644 --- a/tests/tensor-array/core/CMakeLists.txt +++ b/tests/tensor-array/core/CMakeLists.txt @@ -4,6 +4,8 @@ set( TensorArray_tests_src "tensor_array_test.cc" "print_output.cc" + "tensor_operators.cc" + "tensor_matmul_transpose.cc" "gradient.cc") enable_testing() From 4964a395389ed8d200061367bd5970593196e7e2 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 22 Jul 2025 14:48:10 +0700 Subject: [PATCH 163/281] Update tensor_matmul_transpose.cc --- tests/tensor-array/core/tensor_matmul_transpose.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tensor-array/core/tensor_matmul_transpose.cc b/tests/tensor-array/core/tensor_matmul_transpose.cc index eac71fb..f101971 100644 --- a/tests/tensor-array/core/tensor_matmul_transpose.cc +++ b/tests/tensor-array/core/tensor_matmul_transpose.cc @@ -26,7 +26,7 @@ int tensor_matmul_transpose(int argc, char *argv[]) {{ 4, 5, 6 }} }}; TensorArray example_tensor_array_scalar = {100}; - Tensor example_tensor_1(example_tensor_array_1); + Tensor example_tensor_1(example_tensor_array); Tensor example_tensor_2 = example_tensor_1.transpose(0, 1); Tensor example_tensor_add = matmul(example_tensor_1, example_tensor_2); return 0; From 4e237cb75d9a52200fe215628269a9797a665e81 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 22 Jul 2025 14:55:20 +0700 Subject: [PATCH 164/281] Update CMakeLists.txt --- tests/tensor-array/core/CMakeLists.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/tensor-array/core/CMakeLists.txt b/tests/tensor-array/core/CMakeLists.txt index d91a7bf..c28b0d4 100644 --- a/tests/tensor-array/core/CMakeLists.txt +++ b/tests/tensor-array/core/CMakeLists.txt @@ -4,9 +4,10 @@ set( TensorArray_tests_src "tensor_array_test.cc" "print_output.cc" - "tensor_operators.cc" - "tensor_matmul_transpose.cc" - "gradient.cc") + # "tensor_operators.cc" + # "tensor_matmul_transpose.cc" + # "gradient.cc" + ) enable_testing() From 43f564ebca8f33d384b6b9dde74c74724f3f94b8 Mon Sep 17 00:00:00 2001 From: BigNoobWasTaken <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 22 Jul 2025 22:20:00 +0700 Subject: [PATCH 165/281] test --- src/tensor-array/interp/open_file.c | 8 ++++- src/tensor-array/interp/parser.c | 50 +++++++++++++++++++++++++---- src/tensor-array/interp/sym_map.cc | 12 +------ src/tensor-array/interp/sym_map.h | 6 ++-- src/tensor-array/interp/token.c | 12 ++++++- src/tensor-array/interp/vm.h | 7 ---- src/tensor-array/interp/vm_type.h | 15 +++++++++ src/tensor-array/interp/vmop.cc | 49 ++++++++++++++++++++++------ 8 files changed, 119 insertions(+), 40 deletions(-) create mode 100644 src/tensor-array/interp/vm_type.h diff --git a/src/tensor-array/interp/open_file.c b/src/tensor-array/interp/open_file.c index 7966f6c..c242684 100644 --- a/src/tensor-array/interp/open_file.c +++ b/src/tensor-array/interp/open_file.c @@ -17,6 +17,7 @@ limitations under the License. #include #include #include +#include "vm.h" #include "open_file.h" char *src = NULL; @@ -63,6 +64,7 @@ void read_file(const char* filename) int i; interp_malloc(); + interp_memreset(); i = fread(src, poolsize, 1, fptr); if (i < 0) { @@ -70,5 +72,9 @@ void read_file(const char* filename) fclose(fptr); exit(1); } - return 0; // Return 0 on success + orig = text; + text = text - 1; + src[i] = '\0'; + fclose(fptr); + } diff --git a/src/tensor-array/interp/parser.c b/src/tensor-array/interp/parser.c index c5fc854..18bc2aa 100644 --- a/src/tensor-array/interp/parser.c +++ b/src/tensor-array/interp/parser.c @@ -21,6 +21,7 @@ limitations under the License. #include "parser.h" #include "token.h" #include "open_file.h" +#include "vm_type.h" void emit(int size, ...) { @@ -53,7 +54,7 @@ void match(long tk) void expression(int level) { - void* temp = NULL; // Temporary variable to hold intermediate values + sym_data* temp = NULL; // Temporary variable to hold intermediate values int isArrRef = 0; // Flag to check if we are dealing with an array reference // This function would handle parsing and evaluating expressions // For now, it is a placeholder @@ -67,15 +68,19 @@ void expression(int level) break; case TOKEN_ID: /* code */ - //temp = sym_cur; + temp = sym_cur; match(TOKEN_ID); - /* - if (!temp->data) + if (temp->type) { - temp->data = new_Tensor(); + if (token == '(') + { + /* code */ + match('('); + match(')'); + emit(2, CALL, temp->data) + } + } - */ - if (0); else { emit(3, IMM, TYPE_PTR, tkn_val); @@ -247,6 +252,37 @@ void statement() *a = text + 1; // Set the jump address to the next instruction } break; + case TOKEN_FUNC: + match(TOKEN_FUNC); + if (tkn != TOKEN_ID) + { + fprintf(stderr, "Error: function name\n"); + exit(1); + } + cur->type = TYPE_FUNC; + cur->data = malloc(1024*8); + VM_INSTRUCTION *save = text; + text = cur->data + match(TOKEN_ID); + match('('); + match(')'); + statement(); + if (*text != RET) emit(1, RET); + text = save; + break; + case: TOKEN_RETURN: + match(TOKEN_RETURN); + expression(TOKEN_ASSIGN); + emit(1, RET); + break; + case '{': + match('{'); + while (tkn != '}') + statement(); + match('}'); + break; + case '\0': + return; default: expression(TOKEN_ASSIGN); if (tkn == ';') diff --git a/src/tensor-array/interp/sym_map.cc b/src/tensor-array/interp/sym_map.cc index ebab7ee..9794848 100644 --- a/src/tensor-array/interp/sym_map.cc +++ b/src/tensor-array/interp/sym_map.cc @@ -20,7 +20,7 @@ limitations under the License. sym_data* sym_cur = NULL; -std::map sym_map; +scope sym_map; void sym_data_set(char* name, sym_data dat) { @@ -36,13 +36,3 @@ int glob_data_find(char* name) { return sym_map.find(name) != sym_map.end(); } - -void* new_Tensor() -{ - return new tensor_array::value::Tensor; -} -void delete_Tensor(void* t) -{ - delete t; -} - diff --git a/src/tensor-array/interp/sym_map.h b/src/tensor-array/interp/sym_map.h index 03c5692..d5acb1e 100644 --- a/src/tensor-array/interp/sym_map.h +++ b/src/tensor-array/interp/sym_map.h @@ -23,18 +23,18 @@ extern "C" long tkn; long hash; long cls; + long type; void* data; // Pointer to additional data if needed } sym_data; void sym_data_set(char* name, sym_data dat); sym_data* sym_data_get(char*); int glob_data_find(char* name); extern sym_data* sym_cur; - void* new_Tensor(); #ifdef __cplusplus } #include #include - -extern std::map sym_map; +typedef std::map scope; +extern scope sym_map; #endif diff --git a/src/tensor-array/interp/token.c b/src/tensor-array/interp/token.c index 59e66c9..2625de9 100644 --- a/src/tensor-array/interp/token.c +++ b/src/tensor-array/interp/token.c @@ -236,6 +236,16 @@ void token_next() src++; tkn = TOKEN_MATMUL; // Store the token value return; // Exit after processing the token + case '[': + case ']': + case '(': + case ')': + case '{': + case '}': + case ',': + case ';': + case ':': + return; default: if (tkn >= '0' && tkn <= '9') { @@ -294,7 +304,7 @@ void token_next() } else { - /* code to handle other tokens */ + printf("invalid symbol %c", tkn) } break; } diff --git a/src/tensor-array/interp/vm.h b/src/tensor-array/interp/vm.h index e760c68..2cdd10a 100644 --- a/src/tensor-array/interp/vm.h +++ b/src/tensor-array/interp/vm.h @@ -23,13 +23,6 @@ typedef enum typedef long VM_INSTRUCTION; -typedef enum -{ - TYPE_STRING, - TYPE_INT, - TYPE_PTR -} VM_TYPE; - void eval(); extern long any_value; diff --git a/src/tensor-array/interp/vm_type.h b/src/tensor-array/interp/vm_type.h new file mode 100644 index 0000000..2f26bda --- /dev/null +++ b/src/tensor-array/interp/vm_type.h @@ -0,0 +1,15 @@ +#ifdef __cplusplus +extern "C" +{ +#endif + typedef enum + { + TYPE_NONE, + TYPE_STRING, + TYPE_INT, + TYPE_PTR, + TYPE_FUNC + } VM_TYPE; +#ifdef __cplusplus +} +#endif diff --git a/src/tensor-array/interp/vmop.cc b/src/tensor-array/interp/vmop.cc index d117e55..7f6a23a 100644 --- a/src/tensor-array/interp/vmop.cc +++ b/src/tensor-array/interp/vmop.cc @@ -20,13 +20,18 @@ limitations under the License. #include #include "sym_map.h" #include "vmop.h" +#include "vm_type.h" + +typedef long VM_INSTRUCTION; +extern VM_INSTRUCTION* pc; std::stack tensor_stack; -std::stack ptr_stack; +std::stack var_stack; +std::stack> call_stack; tensor_array::value::Tensor ag; void* aptr; long any_value; -long any_type; +VM_TYPE any_type; void new_int() { @@ -51,10 +56,21 @@ void new_string() void op_imm() { - if (any_type == 0) new_string(); - else if (any_type == 1) new_int(); - else if (any_type == 2) new_ptr(); - else; + switch (any_type) + { + case TYPE_INT: + /* code */ + new_int(); + break; + case TYPE_STRING: + new_string(); + break; + case TYPE_PTR: + new_ptr(); + break; + default: + break; + } } void op_add() @@ -212,6 +228,19 @@ void op_shr() // ag = ag >> bg; } +void op_call() +{ + VM_INSTRUCTION pc1 = (VM_INSTRUCTION)*pc++; + call_stack.push({std::move(pc), std::move(sym_map)}); + pc = pc1; +} + +void op_ret() +{ + [pc, sym_map] = std::move(call_stack.top()); + call_stack.pop(); +} + void op_open() { // Implementation for opening a file or resource @@ -260,7 +289,7 @@ void op_push() void op_ptr_push() { - ptr_stack.push(reinterpret_cast(aptr)); + var_stack.push(reinterpret_cast(aptr)); std::free(aptr); } @@ -274,13 +303,13 @@ void op_get() void op_set() { - if (!ptr_stack.empty()) + if (!var_stack.empty()) { - std::string& var_name = ptr_stack.top(); + std::string& var_name = var_stack.top(); sym_data& temp = sym_map[var_name]; delete temp.data; // Set the top of the stack to ag temp.data = new tensor_array::value::Tensor(ag); - ptr_stack.pop(); + var_stack.pop(); } else { From 236b9fee2fc55ee14e71b41471d91f28194d2ecd Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 22 Jul 2025 17:14:18 +0000 Subject: [PATCH 166/281] fix some error --- src/tensor-array/interp/open_file.c | 1 - src/tensor-array/interp/parser.c | 13 +++++++------ src/tensor-array/interp/token.c | 2 +- src/tensor-array/interp/vmop.cc | 9 +++++---- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/tensor-array/interp/open_file.c b/src/tensor-array/interp/open_file.c index c242684..63a34f6 100644 --- a/src/tensor-array/interp/open_file.c +++ b/src/tensor-array/interp/open_file.c @@ -17,7 +17,6 @@ limitations under the License. #include #include #include -#include "vm.h" #include "open_file.h" char *src = NULL; diff --git a/src/tensor-array/interp/parser.c b/src/tensor-array/interp/parser.c index 18bc2aa..57ef27f 100644 --- a/src/tensor-array/interp/parser.c +++ b/src/tensor-array/interp/parser.c @@ -21,6 +21,7 @@ limitations under the License. #include "parser.h" #include "token.h" #include "open_file.h" +#include "sym_map.h" #include "vm_type.h" void emit(int size, ...) @@ -72,12 +73,12 @@ void expression(int level) match(TOKEN_ID); if (temp->type) { - if (token == '(') + if (tkn == '(') { /* code */ match('('); match(')'); - emit(2, CALL, temp->data) + emit(2, CALL, temp->data); } } @@ -259,10 +260,10 @@ void statement() fprintf(stderr, "Error: function name\n"); exit(1); } - cur->type = TYPE_FUNC; - cur->data = malloc(1024*8); + sym_cur->type = TYPE_FUNC; + sym_cur->data = malloc(1024*8); VM_INSTRUCTION *save = text; - text = cur->data + text = sym_cur->data; match(TOKEN_ID); match('('); match(')'); @@ -270,7 +271,7 @@ void statement() if (*text != RET) emit(1, RET); text = save; break; - case: TOKEN_RETURN: + case TOKEN_RETURN: match(TOKEN_RETURN); expression(TOKEN_ASSIGN); emit(1, RET); diff --git a/src/tensor-array/interp/token.c b/src/tensor-array/interp/token.c index 2625de9..6850ecb 100644 --- a/src/tensor-array/interp/token.c +++ b/src/tensor-array/interp/token.c @@ -304,7 +304,7 @@ void token_next() } else { - printf("invalid symbol %c", tkn) + printf("invalid symbol %c", tkn); } break; } diff --git a/src/tensor-array/interp/vmop.cc b/src/tensor-array/interp/vmop.cc index 7f6a23a..ae68654 100644 --- a/src/tensor-array/interp/vmop.cc +++ b/src/tensor-array/interp/vmop.cc @@ -27,11 +27,11 @@ extern VM_INSTRUCTION* pc; std::stack tensor_stack; std::stack var_stack; -std::stack> call_stack; +std::stack> call_stack; tensor_array::value::Tensor ag; void* aptr; long any_value; -VM_TYPE any_type; +long any_type; void new_int() { @@ -230,14 +230,15 @@ void op_shr() void op_call() { - VM_INSTRUCTION pc1 = (VM_INSTRUCTION)*pc++; + VM_INSTRUCTION* pc1 = (VM_INSTRUCTION*)*pc++; call_stack.push({std::move(pc), std::move(sym_map)}); pc = pc1; } void op_ret() { - [pc, sym_map] = std::move(call_stack.top()); + pc = call_stack.top().first; + sym_map = std::move(call_stack.top().second); call_stack.pop(); } From 21058dac26b8290fd1c35d3ed6cba5419e8dacf6 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 23 Jul 2025 01:54:22 +0700 Subject: [PATCH 167/281] Update vm_type.h --- src/tensor-array/interp/vm_type.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/tensor-array/interp/vm_type.h b/src/tensor-array/interp/vm_type.h index 2f26bda..740016a 100644 --- a/src/tensor-array/interp/vm_type.h +++ b/src/tensor-array/interp/vm_type.h @@ -1,3 +1,19 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + #ifdef __cplusplus extern "C" { From 1991b95afbe37d7c37667664654612f418085ada Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 23 Jul 2025 04:02:21 +0000 Subject: [PATCH 168/281] test --- .devcontainer/ROCm/Dockerfile | 24 +++++++++++++ .devcontainer/ROCm/devcontainer.json | 43 ++++++++++++++++++++++ src/tensor-array/core/CMakeLists.txt | 53 ++++++++++++++++++---------- 3 files changed, 101 insertions(+), 19 deletions(-) create mode 100644 .devcontainer/ROCm/Dockerfile create mode 100644 .devcontainer/ROCm/devcontainer.json diff --git a/.devcontainer/ROCm/Dockerfile b/.devcontainer/ROCm/Dockerfile new file mode 100644 index 0000000..ee8c283 --- /dev/null +++ b/.devcontainer/ROCm/Dockerfile @@ -0,0 +1,24 @@ +FROM rocm/dev-ubuntu-20.04:latest + +RUN apt-get update +RUN apt-get upgrade -y +RUN apt-get install curl -y + +ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.9" + +# Optionally install the cmake for vcpkg +COPY scripts/packages-install/reinstall-cmake.sh /tmp/ + +RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ + chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ + fi \ + && rm -f /tmp/reinstall-cmake.sh + + + +# [Optional] Uncomment this section to install additional vcpkg ports. +# RUN su vscode -c "${VCPKG_ROOT}/vcpkg install " + +# [Optional] Uncomment this section to install additional packages. +# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ +# && apt-get -y install --no-install-recommends diff --git a/.devcontainer/ROCm/devcontainer.json b/.devcontainer/ROCm/devcontainer.json new file mode 100644 index 0000000..302e224 --- /dev/null +++ b/.devcontainer/ROCm/devcontainer.json @@ -0,0 +1,43 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the +// README at: https://github.com/devcontainers/templates/tree/main/src/cpp +{ + "name": "ROCm", + "build": { + "context": "../..", + "dockerfile": "Dockerfile" + }, + + "runArgs": [ + "--gpus", + "all" + ], + + "customizations": { + "vscode": { + "extensions": [ + "ms-vscode.cpptools-extension-pack", + "ms-vscode.cmake-tools" + ] + } + }, + + "hostRequirements": { + "gpu": "optional" + }, + + "features": { + "ghcr.io/devcontainers/features/git:1": {} + } + + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], + + // Use 'postCreateCommand' to run commands after the container is created. + // "postCreateCommand": "gcc -v", + + // Configure tool-specific properties. + // "customizations": {}, + + // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. + // "remoteUser": "root" +} diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index 97064dc..7bd2fd4 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -7,21 +7,28 @@ install( DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/core COMPONENT headers) -enable_language(CUDA) - -find_package(CUDAToolkit) - +file(GLOB TensorArray_src_cc "*.cc") -if (CUDAToolkit_FOUND) -# set(CMAKE_CUDA_ARCHITECTURES 52 75 89) -set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) -# list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") +find_package(hip) + +if (hip_FOUND) + enable_language(HIP) + # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) + # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") +else() + find_package(CUDAToolkit) + + if (CUDAToolkit_FOUND) + enable_language(CUDA) + # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) + set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") + endif() endif() -file(GLOB TensorArray_src_cc "*.cc") - -if (CUDAToolkit_FOUND) -file(GLOB TensorArray_src_cu "*.cu") +if (hip_FOUND OR CUDAToolkit_FOUND) + file(GLOB TensorArray_src_cu "*.cu") endif() # file(MAKE_DIRECTORY "include/tensor_array/core") @@ -36,14 +43,22 @@ set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CXX_EXTENSIONS OFF) -if (CUDAToolkit_FOUND) -set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) -set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) -endif() +if(hip_FOUND) + set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD 17) + set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_core PROPERTY HIP_EXTENSIONS OFF) + + foreach(TensorArray_src_hip ${TensorArray_src_cu}) + set_source_files_properties(${TensorArray_src_hip} PROPERTIES LANGUAGE HIP) + endforeach() + + target_link_libraries(tensorarray_core PRIVATE hip::host hip::device) +elif (CUDAToolkit_FOUND) + set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) + set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) -if (CUDAToolkit_FOUND) -target_link_libraries(tensorarray_core PRIVATE CUDA::cudart CUDA::cudart_static CUDA::cublas) + target_link_libraries(tensorarray_core PRIVATE CUDA::cudart CUDA::cudart_static CUDA::cublas CUDA::curand) endif() install( From a2378bf4ed22c8ccd2d20dfd1b2885e6260f1b01 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 23 Jul 2025 04:23:51 +0000 Subject: [PATCH 169/281] test --- src/tensor-array/core/CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index 7bd2fd4..6740088 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -9,18 +9,20 @@ install( file(GLOB TensorArray_src_cc "*.cc") +enable_language(HIP) + find_package(hip) if (hip_FOUND) - enable_language(HIP) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") else() + enable_language(CUDA) + find_package(CUDAToolkit) if (CUDAToolkit_FOUND) - enable_language(CUDA) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") From 038a7cc0c13df47199b558fcde0afb1704ef1e46 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 23 Jul 2025 04:32:19 +0000 Subject: [PATCH 170/281] test --- src/tensor-array/core/CMakeLists.txt | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index 6740088..7813593 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -9,20 +9,18 @@ install( file(GLOB TensorArray_src_cc "*.cc") -enable_language(HIP) - find_package(hip) if (hip_FOUND) + enable_language(HIP) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") else() - enable_language(CUDA) - find_package(CUDAToolkit) if (CUDAToolkit_FOUND) + enable_language(CUDA) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") @@ -60,7 +58,7 @@ elif (CUDAToolkit_FOUND) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) - target_link_libraries(tensorarray_core PRIVATE CUDA::cudart CUDA::cudart_static CUDA::cublas CUDA::curand) + target_link_libraries(tensorarray_core PRIVATE CUDA::cublas) endif() install( From 0759d5e8ab2c6c6f0fc0eb7277f9e723dd67657e Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 23 Jul 2025 04:47:21 +0000 Subject: [PATCH 171/281] test --- src/tensor-array/core/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index 7813593..e592ee4 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -24,6 +24,7 @@ else() # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") + include_directories(${CUDAToolkit_INCLUDE_DIRS}) endif() endif() @@ -58,7 +59,7 @@ elif (CUDAToolkit_FOUND) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) - target_link_libraries(tensorarray_core PRIVATE CUDA::cublas) + target_link_libraries(tensorarray_core PRIVATE CUDA::cudart CUDA::cudart_static CUDA::curand CUDA::cublas) endif() install( From 9aa690fc206d60059b2728bbb53838e28468059d Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 23 Jul 2025 04:51:58 +0000 Subject: [PATCH 172/281] test --- src/tensor-array/core/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index e592ee4..90e1457 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -59,7 +59,7 @@ elif (CUDAToolkit_FOUND) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) - target_link_libraries(tensorarray_core PRIVATE CUDA::cudart CUDA::cudart_static CUDA::curand CUDA::cublas) + target_link_libraries(tensorarray_core PRIVATE CUDA::cudart CUDA::cudart_static CUDA::curand CUDA::cublas CUDA::cublasLt) endif() install( From 0a82137fc78f76954fb451a640b998fee691b486 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 23 Jul 2025 04:52:10 +0000 Subject: [PATCH 173/281] test --- src/tensor-array/core/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index 90e1457..37ac6e9 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -59,7 +59,7 @@ elif (CUDAToolkit_FOUND) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) - target_link_libraries(tensorarray_core PRIVATE CUDA::cudart CUDA::cudart_static CUDA::curand CUDA::cublas CUDA::cublasLt) + target_link_libraries(tensorarray_core PRIVATE CUDA::cudart CUDA::cudart_static CUDA::curand CUDA::cublas CUDA::cublasLt) endif() install( From e90991fcbbabaa409c8359c714e3c6fc06524c59 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 23 Jul 2025 04:58:46 +0000 Subject: [PATCH 174/281] test --- src/tensor-array/core/CMakeLists.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index 37ac6e9..9f15d35 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -59,7 +59,12 @@ elif (CUDAToolkit_FOUND) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) - target_link_libraries(tensorarray_core PRIVATE CUDA::cudart CUDA::cudart_static CUDA::curand CUDA::cublas CUDA::cublasLt) + target_link_libraries( + tensorarray_core PRIVATE + CUDA::cudart CUDA::cudart_static + CUDA::curand CUDA::curand_static + CUDA::cublas CUDA::cublas_static + ) endif() install( From 976c1d774995a756a523b483b94b144a0a240f1c Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 23 Jul 2025 05:13:46 +0000 Subject: [PATCH 175/281] tests --- scripts/actions/install-cuda-rhel.sh | 4 +--- scripts/actions/install-cuda-ubuntu.sh | 4 +--- src/tensor-array/core/CMakeLists.txt | 1 - 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/scripts/actions/install-cuda-rhel.sh b/scripts/actions/install-cuda-rhel.sh index 266dfb9..65e8b1b 100644 --- a/scripts/actions/install-cuda-rhel.sh +++ b/scripts/actions/install-cuda-rhel.sh @@ -148,7 +148,5 @@ if [[ $GITHUB_ACTIONS ]] then echo "${CUDA_PATH}/bin" >> $GITHUB_PATH echo "CUDA_PATH=${CUDA_PATH}" >> $GITHUB_ENV - echo "PATH=$PATH:$CUDA_PATH/bin" >> $GITHUB_ENV - echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${CUDA_PATH}/lib" >> $GITHUB_ENV - echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${CUDA_PATH}/lib64" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" >> $GITHUB_ENV fi diff --git a/scripts/actions/install-cuda-ubuntu.sh b/scripts/actions/install-cuda-ubuntu.sh index 628ea25..34a507f 100644 --- a/scripts/actions/install-cuda-ubuntu.sh +++ b/scripts/actions/install-cuda-ubuntu.sh @@ -139,7 +139,5 @@ then echo "Adding CUDA to CUDA_PATH, PATH and LD_LIBRARY_PATH" echo "${CUDA_PATH}/bin" >> $GITHUB_PATH echo "CUDA_PATH=${CUDA_PATH}" >> $GITHUB_ENV - echo "PATH=$PATH:$CUDA_PATH/bin" >> $GITHUB_ENV - echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${CUDA_PATH}/lib" >> $GITHUB_ENV - echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${CUDA_PATH}/lib64" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" >> $GITHUB_ENV fi diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index 9f15d35..19e5320 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -24,7 +24,6 @@ else() # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") - include_directories(${CUDAToolkit_INCLUDE_DIRS}) endif() endif() From 892c40ce3ca9c723191065b4b6b0b1ccd603b052 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 23 Jul 2025 06:40:15 +0000 Subject: [PATCH 176/281] test --- src/tensor-array/core/CMakeLists.txt | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index 19e5320..a01928b 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -9,8 +9,17 @@ install( file(GLOB TensorArray_src_cc "*.cc") -find_package(hip) + enable_language(CUDA) + find_package(CUDAToolkit) + + if (CUDAToolkit_FOUND) + # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) + set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") + endif() +find_package(hip) +#[[ if (hip_FOUND) enable_language(HIP) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) @@ -26,7 +35,7 @@ else() # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") endif() endif() - +]] if (hip_FOUND OR CUDAToolkit_FOUND) file(GLOB TensorArray_src_cu "*.cu") endif() @@ -60,9 +69,9 @@ elif (CUDAToolkit_FOUND) target_link_libraries( tensorarray_core PRIVATE - CUDA::cudart CUDA::cudart_static - CUDA::curand CUDA::curand_static - CUDA::cublas CUDA::cublas_static + CUDA::cudart_static + CUDA::curand_static + CUDA::cublas_static ) endif() From fb37a0e33c402a5cf5f987c5acb68a285eb4ed18 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 23 Jul 2025 06:53:12 +0000 Subject: [PATCH 177/281] test --- src/tensor-array/core/CMakeLists.txt | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index a01928b..f72a01f 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -7,19 +7,8 @@ install( DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/core COMPONENT headers) -file(GLOB TensorArray_src_cc "*.cc") - - enable_language(CUDA) - find_package(CUDAToolkit) - - if (CUDAToolkit_FOUND) - # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) - set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) - # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") - endif() - find_package(hip) -#[[ + if (hip_FOUND) enable_language(HIP) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) @@ -35,7 +24,9 @@ else() # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") endif() endif() -]] + +file(GLOB TensorArray_src_cc "*.cc") + if (hip_FOUND OR CUDAToolkit_FOUND) file(GLOB TensorArray_src_cu "*.cu") endif() From dfe411c36cd7e01eb18b9762fed649dfc9c9bd15 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 23 Jul 2025 06:58:00 +0000 Subject: [PATCH 178/281] test --- src/tensor-array/core/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index f72a01f..ff8c0de 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -7,7 +7,7 @@ install( DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/core COMPONENT headers) -find_package(hip) +# find_package(hip) if (hip_FOUND) enable_language(HIP) From 82144bb25aabcde3ca1487bb27298daca7493dc4 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 23 Jul 2025 07:08:17 +0000 Subject: [PATCH 179/281] test --- src/tensor-array/core/CMakeLists.txt | 58 +++++++++------------------- 1 file changed, 19 insertions(+), 39 deletions(-) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index ff8c0de..97064dc 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -7,28 +7,21 @@ install( DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/core COMPONENT headers) -# find_package(hip) - -if (hip_FOUND) - enable_language(HIP) - # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) - # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) - # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") -else() - find_package(CUDAToolkit) - - if (CUDAToolkit_FOUND) - enable_language(CUDA) - # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) - set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) - # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") - endif() +enable_language(CUDA) + +find_package(CUDAToolkit) + + +if (CUDAToolkit_FOUND) +# set(CMAKE_CUDA_ARCHITECTURES 52 75 89) +set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) +# list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") endif() file(GLOB TensorArray_src_cc "*.cc") -if (hip_FOUND OR CUDAToolkit_FOUND) - file(GLOB TensorArray_src_cu "*.cu") +if (CUDAToolkit_FOUND) +file(GLOB TensorArray_src_cu "*.cu") endif() # file(MAKE_DIRECTORY "include/tensor_array/core") @@ -43,27 +36,14 @@ set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CXX_EXTENSIONS OFF) -if(hip_FOUND) - set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD 17) - set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_core PROPERTY HIP_EXTENSIONS OFF) - - foreach(TensorArray_src_hip ${TensorArray_src_cu}) - set_source_files_properties(${TensorArray_src_hip} PROPERTIES LANGUAGE HIP) - endforeach() - - target_link_libraries(tensorarray_core PRIVATE hip::host hip::device) -elif (CUDAToolkit_FOUND) - set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) - set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) - - target_link_libraries( - tensorarray_core PRIVATE - CUDA::cudart_static - CUDA::curand_static - CUDA::cublas_static - ) +if (CUDAToolkit_FOUND) +set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) +set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) +endif() + +if (CUDAToolkit_FOUND) +target_link_libraries(tensorarray_core PRIVATE CUDA::cudart CUDA::cudart_static CUDA::cublas) endif() install( From b31ba7d6bb1d6b8778b0f3bf9c8b229e9d714eac Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 23 Jul 2025 07:15:54 +0000 Subject: [PATCH 180/281] test --- src/tensor-array/core/CMakeLists.txt | 52 ++++++++++++++++++---------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index 97064dc..b1aeb04 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -7,21 +7,28 @@ install( DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/core COMPONENT headers) -enable_language(CUDA) - -find_package(CUDAToolkit) - - -if (CUDAToolkit_FOUND) -# set(CMAKE_CUDA_ARCHITECTURES 52 75 89) -set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) -# list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") +find_package(hip) + +if (hip_FOUND) + enable_language(HIP) + # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) + # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") +else() + find_package(CUDAToolkit) + + if (CUDAToolkit_FOUND) + enable_language(CUDA) + # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) + set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") + endif() endif() file(GLOB TensorArray_src_cc "*.cc") -if (CUDAToolkit_FOUND) -file(GLOB TensorArray_src_cu "*.cu") +if (hip_FOUND OR CUDAToolkit_FOUND) + file(GLOB TensorArray_src_cu "*.cu") endif() # file(MAKE_DIRECTORY "include/tensor_array/core") @@ -36,14 +43,23 @@ set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CXX_EXTENSIONS OFF) -if (CUDAToolkit_FOUND) -set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) -set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) -endif() +if(hip_FOUND) + set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD 17) + set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_core PROPERTY HIP_EXTENSIONS OFF) + + foreach(TensorArray_src_hip ${TensorArray_src_cu}) + set_source_files_properties(${TensorArray_src_hip} PROPERTIES LANGUAGE HIP) + endforeach() + + target_link_libraries(tensorarray_core PRIVATE hip::host hip::device) +elif(CUDAToolkit_FOUND) + set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) + set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) -if (CUDAToolkit_FOUND) -target_link_libraries(tensorarray_core PRIVATE CUDA::cudart CUDA::cudart_static CUDA::cublas) + target_link_libraries(tensorarray_core PRIVATE CUDA::cudart CUDA::cudart_static CUDA::cublas CUDA::curand) +else() endif() install( From b20504aabdc37412e8d81d29334205f017896b07 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 23 Jul 2025 07:19:28 +0000 Subject: [PATCH 181/281] test --- src/tensor-array/core/CMakeLists.txt | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index b1aeb04..cef167d 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -7,6 +7,7 @@ install( DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/core COMPONENT headers) +find_package(CUDAToolkit) find_package(hip) if (hip_FOUND) @@ -14,15 +15,12 @@ if (hip_FOUND) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") +elif(CUDAToolkit_FOUND) + enable_language(CUDA) + # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) + set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") else() - find_package(CUDAToolkit) - - if (CUDAToolkit_FOUND) - enable_language(CUDA) - # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) - set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) - # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") - endif() endif() file(GLOB TensorArray_src_cc "*.cc") From 33aeb6f0e4dd24b1214eedcd004516cfcceebd52 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 23 Jul 2025 08:01:21 +0000 Subject: [PATCH 182/281] test --- .../cmake/gpu-compiler/add-cuda-nvcc.cmake | 28 ++++++++++ scripts/cmake/gpu-compiler/add-rocm-hip.cmake | 29 ++++++++++ src/tensor-array/core/CMakeLists.txt | 53 +++---------------- 3 files changed, 64 insertions(+), 46 deletions(-) create mode 100644 scripts/cmake/gpu-compiler/add-cuda-nvcc.cmake create mode 100644 scripts/cmake/gpu-compiler/add-rocm-hip.cmake diff --git a/scripts/cmake/gpu-compiler/add-cuda-nvcc.cmake b/scripts/cmake/gpu-compiler/add-cuda-nvcc.cmake new file mode 100644 index 0000000..2fe64bc --- /dev/null +++ b/scripts/cmake/gpu-compiler/add-cuda-nvcc.cmake @@ -0,0 +1,28 @@ +function(add_for_CUDA TA_TARGET) + enable_language(CUDA) + # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) + set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") + file(GLOB TensorArray_src_cc "*.cc") + file(GLOB TensorArray_src_cu "*.cu") + add_library(${TA_TARGET} SHARED ${TensorArray_src_cc} ${TensorArray_src_cu}) + + set_property(TARGET ${TA_TARGET} PROPERTY C_STANDARD 11) + set_property(TARGET ${TA_TARGET} PROPERTY C_STANDARD_REQUIRED ON) + set_property(TARGET ${TA_TARGET} PROPERTY C_EXTENSIONS OFF) + + set_property(TARGET ${TA_TARGET} PROPERTY CXX_STANDARD 17) + set_property(TARGET ${TA_TARGET} PROPERTY CXX_STANDARD_REQUIRED ON) + set_property(TARGET ${TA_TARGET} PROPERTY CXX_EXTENSIONS OFF) + + set_property(TARGET ${TA_TARGET} PROPERTY CUDA_STANDARD 17) + set_property(TARGET ${TA_TARGET} PROPERTY CUDA_STANDARD_REQUIRED ON) + set_property(TARGET ${TA_TARGET} PROPERTY CUDA_EXTENSIONS OFF) + + target_link_libraries( + ${TA_TARGET} PRIVATE + CUDA::cudart CUDA::cudart_static + CUDA::curand CUDA::curand_static + CUDA::cublas CUDA::cublas_static + ) +endfunction() diff --git a/scripts/cmake/gpu-compiler/add-rocm-hip.cmake b/scripts/cmake/gpu-compiler/add-rocm-hip.cmake new file mode 100644 index 0000000..7af9e6b --- /dev/null +++ b/scripts/cmake/gpu-compiler/add-rocm-hip.cmake @@ -0,0 +1,29 @@ +function(add_for_ROCm TA_TARGET) + enable_language(HIP) + # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) + # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") + + file(GLOB TensorArray_src_cc "*.cc") + file(GLOB TensorArray_src_cu "*.cu") + + add_library(${TA_TARGET} SHARED ${TensorArray_src_cc} ${TensorArray_src_cu}) + + set_property(TARGET ${TA_TARGET} PROPERTY C_STANDARD 11) + set_property(TARGET ${TA_TARGET} PROPERTY C_STANDARD_REQUIRED ON) + set_property(TARGET ${TA_TARGET} PROPERTY C_EXTENSIONS OFF) + + set_property(TARGET ${TA_TARGET} PROPERTY CXX_STANDARD 17) + set_property(TARGET ${TA_TARGET} PROPERTY CXX_STANDARD_REQUIRED ON) + set_property(TARGET ${TA_TARGET} PROPERTY CXX_EXTENSIONS OFF) + + set_property(TARGET ${TA_TARGET} PROPERTY HIP_STANDARD 17) + set_property(TARGET ${TA_TARGET} PROPERTY HIP_STANDARD_REQUIRED ON) + set_property(TARGET ${TA_TARGET} PROPERTY HIP_EXTENSIONS OFF) + + foreach(TensorArray_src_hip ${TensorArray_src_cu}) + set_source_files_properties(${TensorArray_src_hip} PROPERTIES LANGUAGE HIP) + endforeach() + + target_link_libraries(${TA_TARGET} PRIVATE hip::host hip::device) +endfunction() diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index cef167d..ecc1ec2 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -7,57 +7,18 @@ install( DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/core COMPONENT headers) -find_package(CUDAToolkit) find_package(hip) +find_package(CUDAToolkit) if (hip_FOUND) - enable_language(HIP) - # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) - # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) - # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") -elif(CUDAToolkit_FOUND) - enable_language(CUDA) - # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) - set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) - # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") -else() -endif() - -file(GLOB TensorArray_src_cc "*.cc") - -if (hip_FOUND OR CUDAToolkit_FOUND) - file(GLOB TensorArray_src_cu "*.cu") -endif() - -# file(MAKE_DIRECTORY "include/tensor_array/core") - -add_library(tensorarray_core SHARED ${TensorArray_src_cc} ${TensorArray_src_cu}) - -set_property(TARGET tensorarray_core PROPERTY C_STANDARD 11) -set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) - -set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) -set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_core PROPERTY CXX_EXTENSIONS OFF) - -if(hip_FOUND) - set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD 17) - set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_core PROPERTY HIP_EXTENSIONS OFF) - - foreach(TensorArray_src_hip ${TensorArray_src_cu}) - set_source_files_properties(${TensorArray_src_hip} PROPERTIES LANGUAGE HIP) - endforeach() - - target_link_libraries(tensorarray_core PRIVATE hip::host hip::device) + include("${PROJECT_SOURCE_DIR}/cmake/gpu-compiler/add-rocm-hip.cmake") + add_for_ROCm(tensorarray_core) elif(CUDAToolkit_FOUND) - set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) - set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) - - target_link_libraries(tensorarray_core PRIVATE CUDA::cudart CUDA::cudart_static CUDA::cublas CUDA::curand) + include("${PROJECT_SOURCE_DIR}/cmake/gpu-compiler/add-cuda-nvcc.cmake") + add_for_CUDA(tensorarray_core) else() + file(GLOB TensorArray_src_cc "*.cc") + add_library(tensorarray_core SHARED ${TensorArray_src_cc}) endif() install( From ae836c65acbca0c0e51eadbafd059f22dee228b2 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 23 Jul 2025 08:20:34 +0000 Subject: [PATCH 183/281] test --- .../cmake/gpu-compiler/add-cuda-nvcc.cmake | 28 -------- scripts/cmake/gpu-compiler/add-rocm-hip.cmake | 29 -------- src/tensor-array/core/CMakeLists.txt | 70 +++++++++++++++---- 3 files changed, 58 insertions(+), 69 deletions(-) delete mode 100644 scripts/cmake/gpu-compiler/add-cuda-nvcc.cmake delete mode 100644 scripts/cmake/gpu-compiler/add-rocm-hip.cmake diff --git a/scripts/cmake/gpu-compiler/add-cuda-nvcc.cmake b/scripts/cmake/gpu-compiler/add-cuda-nvcc.cmake deleted file mode 100644 index 2fe64bc..0000000 --- a/scripts/cmake/gpu-compiler/add-cuda-nvcc.cmake +++ /dev/null @@ -1,28 +0,0 @@ -function(add_for_CUDA TA_TARGET) - enable_language(CUDA) - # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) - set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) - # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") - file(GLOB TensorArray_src_cc "*.cc") - file(GLOB TensorArray_src_cu "*.cu") - add_library(${TA_TARGET} SHARED ${TensorArray_src_cc} ${TensorArray_src_cu}) - - set_property(TARGET ${TA_TARGET} PROPERTY C_STANDARD 11) - set_property(TARGET ${TA_TARGET} PROPERTY C_STANDARD_REQUIRED ON) - set_property(TARGET ${TA_TARGET} PROPERTY C_EXTENSIONS OFF) - - set_property(TARGET ${TA_TARGET} PROPERTY CXX_STANDARD 17) - set_property(TARGET ${TA_TARGET} PROPERTY CXX_STANDARD_REQUIRED ON) - set_property(TARGET ${TA_TARGET} PROPERTY CXX_EXTENSIONS OFF) - - set_property(TARGET ${TA_TARGET} PROPERTY CUDA_STANDARD 17) - set_property(TARGET ${TA_TARGET} PROPERTY CUDA_STANDARD_REQUIRED ON) - set_property(TARGET ${TA_TARGET} PROPERTY CUDA_EXTENSIONS OFF) - - target_link_libraries( - ${TA_TARGET} PRIVATE - CUDA::cudart CUDA::cudart_static - CUDA::curand CUDA::curand_static - CUDA::cublas CUDA::cublas_static - ) -endfunction() diff --git a/scripts/cmake/gpu-compiler/add-rocm-hip.cmake b/scripts/cmake/gpu-compiler/add-rocm-hip.cmake deleted file mode 100644 index 7af9e6b..0000000 --- a/scripts/cmake/gpu-compiler/add-rocm-hip.cmake +++ /dev/null @@ -1,29 +0,0 @@ -function(add_for_ROCm TA_TARGET) - enable_language(HIP) - # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) - # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) - # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") - - file(GLOB TensorArray_src_cc "*.cc") - file(GLOB TensorArray_src_cu "*.cu") - - add_library(${TA_TARGET} SHARED ${TensorArray_src_cc} ${TensorArray_src_cu}) - - set_property(TARGET ${TA_TARGET} PROPERTY C_STANDARD 11) - set_property(TARGET ${TA_TARGET} PROPERTY C_STANDARD_REQUIRED ON) - set_property(TARGET ${TA_TARGET} PROPERTY C_EXTENSIONS OFF) - - set_property(TARGET ${TA_TARGET} PROPERTY CXX_STANDARD 17) - set_property(TARGET ${TA_TARGET} PROPERTY CXX_STANDARD_REQUIRED ON) - set_property(TARGET ${TA_TARGET} PROPERTY CXX_EXTENSIONS OFF) - - set_property(TARGET ${TA_TARGET} PROPERTY HIP_STANDARD 17) - set_property(TARGET ${TA_TARGET} PROPERTY HIP_STANDARD_REQUIRED ON) - set_property(TARGET ${TA_TARGET} PROPERTY HIP_EXTENSIONS OFF) - - foreach(TensorArray_src_hip ${TensorArray_src_cu}) - set_source_files_properties(${TensorArray_src_hip} PROPERTIES LANGUAGE HIP) - endforeach() - - target_link_libraries(${TA_TARGET} PRIVATE hip::host hip::device) -endfunction() diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index ecc1ec2..9584571 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -7,18 +7,64 @@ install( DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/core COMPONENT headers) -find_package(hip) -find_package(CUDAToolkit) - -if (hip_FOUND) - include("${PROJECT_SOURCE_DIR}/cmake/gpu-compiler/add-rocm-hip.cmake") - add_for_ROCm(tensorarray_core) -elif(CUDAToolkit_FOUND) - include("${PROJECT_SOURCE_DIR}/cmake/gpu-compiler/add-cuda-nvcc.cmake") - add_for_CUDA(tensorarray_core) -else() - file(GLOB TensorArray_src_cc "*.cc") - add_library(tensorarray_core SHARED ${TensorArray_src_cc}) +if (CMAKE_HIP_COMPILER_ROCM_ROOT) + enable_language(HIP) + find_package(hip) + # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) + # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") +endif() + +if(CMAKE_CUDA_COMPILER) + enable_language(CUDA) + find_package(CUDAToolkit) + + if (CUDAToolkit_FOUND) + # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) + set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") + endif() +endif() + +file(GLOB TensorArray_src_cc "*.cc") + +if (hip_FOUND OR CUDAToolkit_FOUND) + file(GLOB TensorArray_src_cu "*.cu") +endif() + +# file(MAKE_DIRECTORY "include/tensor_array/core") + +add_library(tensorarray_core SHARED ${TensorArray_src_cc} ${TensorArray_src_cu}) + +set_property(TARGET tensorarray_core PROPERTY C_STANDARD 11) +set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) + +set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) +set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_core PROPERTY CXX_EXTENSIONS OFF) + +if(hip_FOUND) + set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD 17) + set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_core PROPERTY HIP_EXTENSIONS OFF) + + foreach(TensorArray_src_hip ${TensorArray_src_cu}) + set_source_files_properties(${TensorArray_src_hip} PROPERTIES LANGUAGE HIP) + endforeach() + + target_link_libraries(tensorarray_core PRIVATE hip::host hip::device) +elif (CUDAToolkit_FOUND) + set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) + set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) + + target_link_libraries( + tensorarray_core PRIVATE + CUDA::cudart_static + CUDA::curand_static + CUDA::cublas_static + ) endif() install( From 36d72e6b1307340e38e3b0ba10ff9a0a2528dd8a Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 23 Jul 2025 09:03:24 +0000 Subject: [PATCH 184/281] test --- .github/workflows/cmake-multi-platform.yml | 2 +- src/tensor-array/core/CMakeLists.txt | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index 4612ea4..c7b66e7 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -61,7 +61,7 @@ jobs: - name: Configure CMake # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DUSE_CUDA=ON -DUSE_ROCM_HIP=OFF - name: Build # Build your program with the given configuration diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index 9584571..1a8ac07 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -7,7 +7,7 @@ install( DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/core COMPONENT headers) -if (CMAKE_HIP_COMPILER_ROCM_ROOT) +if(USE_ROCM_HIP) enable_language(HIP) find_package(hip) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) @@ -15,14 +15,15 @@ if (CMAKE_HIP_COMPILER_ROCM_ROOT) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") endif() -if(CMAKE_CUDA_COMPILER) +if(USE_CUDA) enable_language(CUDA) find_package(CUDAToolkit) + # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) + set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") if (CUDAToolkit_FOUND) - # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) - set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) - # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") + endif() endif() @@ -54,7 +55,9 @@ if(hip_FOUND) endforeach() target_link_libraries(tensorarray_core PRIVATE hip::host hip::device) -elif (CUDAToolkit_FOUND) +endif() + +if (CUDAToolkit_FOUND) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) From f2ee1970d5b25dc2b44a238ffdde0a2d55f18860 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 23 Jul 2025 09:20:55 +0000 Subject: [PATCH 185/281] changes readme --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0ee20a1..7746168 100644 --- a/README.md +++ b/README.md @@ -15,17 +15,21 @@ You need to clone repository by using [Git](https://git-scm.com/) You need to install `Tensor-Array` with [CMake](https://cmake.org/) +If you use NVIDIA GPUs and NVIDIA CUDA you can install `Tensor-Array` by using: + ```shell git clone https://github.com/Tensor-Array/Tensor-Array.git cd Tensor-Array mkdir build cd build -cmake .. +cmake .. -DUSE_CUDA=ON cmake --build . cmake --install . cd .. ``` +If you use AMD GPUs and AMD ROCm hip, then replace `-DUSE_CUDA=ON` to `-DUSE_ROCM_HIP=ON`. + ## Why this repository named `Tensor-Array` We created a template struct that named `TensorArray`. That struct is a multi-dimensional array wrapper. From 07e1b0e3122f04aac37f4575bcec0d5b84343e2a Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 23 Jul 2025 16:31:24 +0700 Subject: [PATCH 186/281] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7746168..84d2700 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ You need to clone repository by using [Git](https://git-scm.com/) You need to install `Tensor-Array` with [CMake](https://cmake.org/) -If you use NVIDIA GPUs and NVIDIA CUDA you can install `Tensor-Array` by using: +If you use [NVIDIA](https://www.nvidia.com/) GPUs and [NVIDIA CUDA](https://developer.nvidia.com/cuda-toolkit) you can install `Tensor-Array` by using: ```shell git clone https://github.com/Tensor-Array/Tensor-Array.git @@ -28,7 +28,7 @@ cmake --install . cd .. ``` -If you use AMD GPUs and AMD ROCm hip, then replace `-DUSE_CUDA=ON` to `-DUSE_ROCM_HIP=ON`. +If you use [AMD](https://www.amd.com/) GPUs and AMD ROCm HIP, then replace `-DUSE_CUDA=ON` to `-DUSE_ROCM_HIP=ON`. ## Why this repository named `Tensor-Array` From 6c6298318e30e63a0866a87862186475a06578d5 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 24 Jul 2025 20:56:35 +0700 Subject: [PATCH 187/281] Update codeql.yml From 4309dc1f61a66a0cecf0164fbbec50ab0db21c6d Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 24 Jul 2025 14:42:35 +0000 Subject: [PATCH 188/281] test --- .github/workflows/cmake-multi-platform.yml | 2 +- README.md | 6 +----- src/tensor-array/core/CMakeLists.txt | 25 ++++++++-------------- 3 files changed, 11 insertions(+), 22 deletions(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index c7b66e7..4612ea4 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -61,7 +61,7 @@ jobs: - name: Configure CMake # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DUSE_CUDA=ON -DUSE_ROCM_HIP=OFF + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} - name: Build # Build your program with the given configuration diff --git a/README.md b/README.md index 84d2700..0ee20a1 100644 --- a/README.md +++ b/README.md @@ -15,21 +15,17 @@ You need to clone repository by using [Git](https://git-scm.com/) You need to install `Tensor-Array` with [CMake](https://cmake.org/) -If you use [NVIDIA](https://www.nvidia.com/) GPUs and [NVIDIA CUDA](https://developer.nvidia.com/cuda-toolkit) you can install `Tensor-Array` by using: - ```shell git clone https://github.com/Tensor-Array/Tensor-Array.git cd Tensor-Array mkdir build cd build -cmake .. -DUSE_CUDA=ON +cmake .. cmake --build . cmake --install . cd .. ``` -If you use [AMD](https://www.amd.com/) GPUs and AMD ROCm HIP, then replace `-DUSE_CUDA=ON` to `-DUSE_ROCM_HIP=ON`. - ## Why this repository named `Tensor-Array` We created a template struct that named `TensorArray`. That struct is a multi-dimensional array wrapper. diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index 1a8ac07..9c06b0b 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -7,7 +7,10 @@ install( DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/core COMPONENT headers) -if(USE_ROCM_HIP) +include(CheckLanguage) + +check_language(HIP) +if(CMAKE_HIP_COMPILER) enable_language(HIP) find_package(hip) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) @@ -15,21 +18,18 @@ if(USE_ROCM_HIP) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") endif() -if(USE_CUDA) +check_language(CUDA) +if(CMAKE_CUDA_COMPILER) enable_language(CUDA) - find_package(CUDAToolkit) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") - if (CUDAToolkit_FOUND) - - endif() endif() file(GLOB TensorArray_src_cc "*.cc") -if (hip_FOUND OR CUDAToolkit_FOUND) +if (USE_CUDA OR USE_ROCM_HIP) file(GLOB TensorArray_src_cu "*.cu") endif() @@ -45,7 +45,7 @@ set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CXX_EXTENSIONS OFF) -if(hip_FOUND) +if(CMAKE_HIP_COMPILER) set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY HIP_EXTENSIONS OFF) @@ -57,17 +57,10 @@ if(hip_FOUND) target_link_libraries(tensorarray_core PRIVATE hip::host hip::device) endif() -if (CUDAToolkit_FOUND) +if (CMAKE_CUDA_COMPILER) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) - - target_link_libraries( - tensorarray_core PRIVATE - CUDA::cudart_static - CUDA::curand_static - CUDA::cublas_static - ) endif() install( From 1611b6a9d362c059c4e16c6255e7e3e9c3ae1f77 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 24 Jul 2025 14:47:08 +0000 Subject: [PATCH 189/281] test 1 --- src/tensor-array/core/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index 9c06b0b..dd3e1d4 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -29,7 +29,7 @@ endif() file(GLOB TensorArray_src_cc "*.cc") -if (USE_CUDA OR USE_ROCM_HIP) +if (CMAKE_CUDA_COMPILER OR CMAKE_HIP_COMPILER) file(GLOB TensorArray_src_cu "*.cu") endif() From 8c3728a730d365669774d992318686d1aa35c540 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 24 Jul 2025 16:17:11 +0000 Subject: [PATCH 190/281] test --- src/tensor-array/core/CMakeLists.txt | 37 ++++++++++++++-------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index dd3e1d4..1d9923e 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -7,29 +7,27 @@ install( DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/core COMPONENT headers) -include(CheckLanguage) +file(GLOB TensorArray_src_cc "*.cc") + +find_package(hip) -check_language(HIP) -if(CMAKE_HIP_COMPILER) +if (hip_FOUND) enable_language(HIP) - find_package(hip) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") -endif() - -check_language(CUDA) -if(CMAKE_CUDA_COMPILER) - enable_language(CUDA) +else() + find_package(CUDAToolkit) - # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) - set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) - # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") + if (CUDAToolkit_FOUND) + enable_language(CUDA) + # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) + set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") + endif() endif() -file(GLOB TensorArray_src_cc "*.cc") - -if (CMAKE_CUDA_COMPILER OR CMAKE_HIP_COMPILER) +if (hip_FOUND OR CUDAToolkit_FOUND) file(GLOB TensorArray_src_cu "*.cu") endif() @@ -45,7 +43,7 @@ set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CXX_EXTENSIONS OFF) -if(CMAKE_HIP_COMPILER) +if(hip_FOUND) set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY HIP_EXTENSIONS OFF) @@ -55,12 +53,13 @@ if(CMAKE_HIP_COMPILER) endforeach() target_link_libraries(tensorarray_core PRIVATE hip::host hip::device) -endif() - -if (CMAKE_CUDA_COMPILER) +elif (CUDAToolkit_FOUND) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) + + target_include_directories(tensorarray_core PRIVATE ${CUDAToolkit_INCLUDE_DIRS}) + target_link_directories(tensorarray_core PRIVATE ${CUDAToolkit_LIBRARY_DIR}) endif() install( From 807ed84224dbaa237426cc76cf7681f90b1b59db Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 24 Jul 2025 17:03:07 +0000 Subject: [PATCH 191/281] test --- src/tensor-array/core/CMakeLists.txt | 38 +++++++++++++++------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index 1d9923e..ff9b602 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -7,27 +7,30 @@ install( DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/core COMPONENT headers) -file(GLOB TensorArray_src_cc "*.cc") - -find_package(hip) +include(CheckLanguage) -if (hip_FOUND) +check_language(HIP) +if(CMAKE_HIP_COMPILER) enable_language(HIP) + find_package(hip REQUIRED) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") -else() - find_package(CUDAToolkit) +endif() + +check_language(CUDA) +if(CMAKE_CUDA_COMPILER AND (NOT CMAKE_HIP_COMPILER)) + enable_language(CUDA) - if (CUDAToolkit_FOUND) - enable_language(CUDA) - # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) - set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) - # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") - endif() + find_package(CUDAToolkit REQUIRED) + # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) + set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") endif() -if (hip_FOUND OR CUDAToolkit_FOUND) +file(GLOB TensorArray_src_cc "*.cc") + +if (CMAKE_CUDA_COMPILER OR CMAKE_HIP_COMPILER) file(GLOB TensorArray_src_cu "*.cu") endif() @@ -43,7 +46,7 @@ set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CXX_EXTENSIONS OFF) -if(hip_FOUND) +if(CMAKE_HIP_COMPILER) set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY HIP_EXTENSIONS OFF) @@ -53,13 +56,12 @@ if(hip_FOUND) endforeach() target_link_libraries(tensorarray_core PRIVATE hip::host hip::device) -elif (CUDAToolkit_FOUND) +endif() + +if (CMAKE_CUDA_COMPILER) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) - - target_include_directories(tensorarray_core PRIVATE ${CUDAToolkit_INCLUDE_DIRS}) - target_link_directories(tensorarray_core PRIVATE ${CUDAToolkit_LIBRARY_DIR}) endif() install( From f0973ed62693fd5c5ddc4e0f00a8b5c53664f0c0 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 24 Jul 2025 17:19:57 +0000 Subject: [PATCH 192/281] test --- src/tensor-array/core/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index ff9b602..f6998fa 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -11,6 +11,9 @@ include(CheckLanguage) check_language(HIP) if(CMAKE_HIP_COMPILER) + unset(CMAKE_HIP_COMPILER CACHE) + unset(CMAKE_HIP_HOST_COMPILER CACHE) + unset(CMAKE_HIP_PLATFORM CACHE) enable_language(HIP) find_package(hip REQUIRED) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) @@ -20,6 +23,8 @@ endif() check_language(CUDA) if(CMAKE_CUDA_COMPILER AND (NOT CMAKE_HIP_COMPILER)) + unset(CMAKE_CUDA_COMPILER CACHE) + unset(CMAKE_CUDA_HOST_COMPILER CACHE) enable_language(CUDA) find_package(CUDAToolkit REQUIRED) From df609db4cf4c153816e60a43aaee213d1817c20d Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 25 Jul 2025 01:45:07 +0000 Subject: [PATCH 193/281] test --- src/tensor-array/core/CMakeLists.txt | 32 ++++++++++++++-------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index f6998fa..3dacb3c 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -7,13 +7,12 @@ install( DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/core COMPONENT headers) -include(CheckLanguage) - -check_language(HIP) -if(CMAKE_HIP_COMPILER) - unset(CMAKE_HIP_COMPILER CACHE) - unset(CMAKE_HIP_HOST_COMPILER CACHE) - unset(CMAKE_HIP_PLATFORM CACHE) +execute_process( + COMMAND which hipcc + OUTPUT_VARIABLE WHICH_HIPCC_OUT + RESULT_VARIABLE WHICH_HIPCC_RET +) +if(WHICH_HIPCC_RET EQUAL 0) enable_language(HIP) find_package(hip REQUIRED) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) @@ -21,10 +20,13 @@ if(CMAKE_HIP_COMPILER) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") endif() -check_language(CUDA) -if(CMAKE_CUDA_COMPILER AND (NOT CMAKE_HIP_COMPILER)) - unset(CMAKE_CUDA_COMPILER CACHE) - unset(CMAKE_CUDA_HOST_COMPILER CACHE) +execute_process( + COMMAND which nvcc + OUTPUT_VARIABLE WHICH_NVCC_OUT + RESULT_VARIABLE WHICH_NVCC_RET +) + +if(WHICH_NVCC_RET EQUAL 0 AND NOT WHICH_HIPCC_RET EQUAL 0) enable_language(CUDA) find_package(CUDAToolkit REQUIRED) @@ -35,7 +37,7 @@ endif() file(GLOB TensorArray_src_cc "*.cc") -if (CMAKE_CUDA_COMPILER OR CMAKE_HIP_COMPILER) +if (WHICH_NVCC_RET EQUAL 0 OR WHICH_HIPCC_RET EQUAL 0) file(GLOB TensorArray_src_cu "*.cu") endif() @@ -51,7 +53,7 @@ set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CXX_EXTENSIONS OFF) -if(CMAKE_HIP_COMPILER) +if(WHICH_HIPCC_RET EQUAL 0) set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY HIP_EXTENSIONS OFF) @@ -61,9 +63,7 @@ if(CMAKE_HIP_COMPILER) endforeach() target_link_libraries(tensorarray_core PRIVATE hip::host hip::device) -endif() - -if (CMAKE_CUDA_COMPILER) +elif(WHICH_NVCC_RET EQUAL 0) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) From 25fd61d64bf18ded414b3aaead6810bd8d5b6722 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 25 Jul 2025 02:00:06 +0000 Subject: [PATCH 194/281] test --- src/tensor-array/core/CMakeLists.txt | 32 ++++++++++++---------------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt index 3dacb3c..6e056f2 100644 --- a/src/tensor-array/core/CMakeLists.txt +++ b/src/tensor-array/core/CMakeLists.txt @@ -5,14 +5,13 @@ file(GLOB TensorArray_inc "*.hh") install( FILES ${TensorArray_inc} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/core - COMPONENT headers) - -execute_process( - COMMAND which hipcc - OUTPUT_VARIABLE WHICH_HIPCC_OUT - RESULT_VARIABLE WHICH_HIPCC_RET + COMPONENT headers ) -if(WHICH_HIPCC_RET EQUAL 0) + +include(CheckLanguage) + +check_language(HIP) +if(CMAKE_HIP_COMPILER) enable_language(HIP) find_package(hip REQUIRED) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) @@ -20,16 +19,11 @@ if(WHICH_HIPCC_RET EQUAL 0) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") endif() -execute_process( - COMMAND which nvcc - OUTPUT_VARIABLE WHICH_NVCC_OUT - RESULT_VARIABLE WHICH_NVCC_RET -) - -if(WHICH_NVCC_RET EQUAL 0 AND NOT WHICH_HIPCC_RET EQUAL 0) +check_language(CUDA) +if(CMAKE_CUDA_COMPILER EQUAL 0 AND NOT CMAKE_HIP_COMPILER) enable_language(CUDA) - find_package(CUDAToolkit REQUIRED) + # find_package(CUDAToolkit REQUIRED) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") @@ -37,7 +31,7 @@ endif() file(GLOB TensorArray_src_cc "*.cc") -if (WHICH_NVCC_RET EQUAL 0 OR WHICH_HIPCC_RET EQUAL 0) +if (CMAKE_CUDA_COMPILER OR CMAKE_HIP_COMPILER) file(GLOB TensorArray_src_cu "*.cu") endif() @@ -53,7 +47,7 @@ set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CXX_EXTENSIONS OFF) -if(WHICH_HIPCC_RET EQUAL 0) +if(CMAKE_HIP_COMPILER) set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY HIP_EXTENSIONS OFF) @@ -63,7 +57,9 @@ if(WHICH_HIPCC_RET EQUAL 0) endforeach() target_link_libraries(tensorarray_core PRIVATE hip::host hip::device) -elif(WHICH_NVCC_RET EQUAL 0) +endif() + +if(CMAKE_CUDA_COMPILER) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) From b376b816fa25dd140160f1fdc60d4d508d726fd8 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 25 Jul 2025 03:08:57 +0000 Subject: [PATCH 195/281] test --- CMakeLists.txt | 162 ++++++++++++++++++++++++- src/tensor-array/core/CMakeLists.txt | 78 ------------ src/tensor-array/interp/CMakeLists.txt | 40 ------ src/tensor-array/layers/CMakeLists.txt | 34 ------ 4 files changed, 159 insertions(+), 155 deletions(-) delete mode 100644 src/tensor-array/core/CMakeLists.txt delete mode 100644 src/tensor-array/interp/CMakeLists.txt delete mode 100644 src/tensor-array/layers/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index ef70274..fbfacf0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,9 +9,165 @@ if(MSVC) add_compile_definitions(TENSOR_ARRAY_EXPORTS) endif() -add_subdirectory("src/tensor-array/core") -add_subdirectory("src/tensor-array/layers") -add_subdirectory("src/tensor-array/interp") +file( + GLOB_RECURSE TensorArray_inc + "${PROJECT_SOURCE_DIR}/src/*.h" + "${PROJECT_SOURCE_DIR}/src/*.hh" +) + +install( + FILES ${TensorArray_inc} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + COMPONENT headers +) + +block() + include(CheckLanguage) + + check_language(HIP) + check_language(CUDA) + file(GLOB TensorArray_Core_cc "${PROJECT_SOURCE_DIR}/src/tensor-array/core/*.cc") + + if (CMAKE_CUDA_COMPILER OR CMAKE_HIP_COMPILER) + file(GLOB TensorArray_Core_cu "${PROJECT_SOURCE_DIR}/src/tensor-array/core/*.cu") + endif() + + if(CMAKE_HIP_COMPILER) + block(PROPAGATE tensorarray_core TensorArray_Core_cc TensorArray_Core_cu) + enable_language(HIP) + find_package(hip REQUIRED) + + add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) + set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD 17) + set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_core PROPERTY HIP_EXTENSIONS OFF) + + foreach(TensorArray_src_hip ${TensorArray_src_cu}) + set_source_files_properties(${TensorArray_src_hip} PROPERTIES LANGUAGE HIP) + endforeach() + + target_link_libraries(tensorarray_core PRIVATE hip::host hip::device) + endblock() + # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) + # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") + elif(CMAKE_CUDA_COMPILER) + block(PROPAGATE tensorarray_core TensorArray_Core_cc TensorArray_Core_cu) + enable_language(CUDA) + + add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) + set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) + set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) + set_property(TARGET tensorarray_core PROPERTY CMAKE_CUDA_SEPARABLE_COMPILATION ON) + endblock() + + # find_package(CUDAToolkit REQUIRED) + # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) + # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") + else() + block(PROPAGATE tensorarray_core TensorArray_Core_cc TensorArray_Core_cu) + add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) + endblock() + endif() + + + # file(MAKE_DIRECTORY "include/tensor_array/core") + + set_property(TARGET tensorarray_core PROPERTY C_STANDARD 11) + set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) + + set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) + set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_core PROPERTY CXX_EXTENSIONS OFF) + + install( + TARGETS tensorarray_core + EXPORT TensorArrayTargets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + COMPONENT Runtime + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array + COMPONENT Runtime + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/core + COMPONENT Development) + + add_library(TensorArray::Core ALIAS tensorarray_core) + +endblock() + +block() + file(GLOB TensorArray_Layers_src "${PROJECT_SOURCE_DIR}/src/tensor-array/layers/*.cc") + + add_library(tensorarray_layers SHARED ${TensorArray_Layers_src}) + + target_include_directories(tensorarray_layers PRIVATE ${PROJECT_SOURCE_DIR}/src) + target_link_libraries(tensorarray_layers TensorArray::Core) + + set_property(TARGET tensorarray_layers PROPERTY C_STANDARD 11) + set_property(TARGET tensorarray_layers PROPERTY C_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_layers PROPERTY C_EXTENSIONS OFF) + + set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD 17) + set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_layers PROPERTY CXX_EXTENSIONS OFF) + + install( + TARGETS tensorarray_layers + EXPORT TensorArrayTargets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + COMPONENT Runtime + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array + COMPONENT Runtime + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/layers + COMPONENT Development) + + add_library(TensorArray::Layers ALIAS tensorarray_layers) +endblock() + +block() + file( + GLOB TensorArray_Interpreter_src + "${PROJECT_SOURCE_DIR}/src/tensor-array/interp/*.c" + "${PROJECT_SOURCE_DIR}/src/tensor-array/interp/*.cc" + ) + add_executable(tensorarray_interpreter ${TensorArray_Interpreter_src}) + + target_include_directories(tensorarray_interpreter PRIVATE ${PROJECT_SOURCE_DIR}/src) + target_link_libraries(tensorarray_interpreter TensorArray::Core) + + set_property(TARGET tensorarray_interpreter PROPERTY C_STANDARD 11) + set_property(TARGET tensorarray_interpreter PROPERTY C_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_interpreter PROPERTY C_EXTENSIONS OFF) + + set_property(TARGET tensorarray_interpreter PROPERTY CXX_STANDARD 17) + set_property(TARGET tensorarray_interpreter PROPERTY CXX_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_interpreter PROPERTY CXX_EXTENSIONS OFF) + + install( + TARGETS tensorarray_interpreter + EXPORT TensorArrayTargets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + COMPONENT Runtime + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array + COMPONENT Runtime + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/interp + COMPONENT Development) + #[[ + add_custom_command( + OUTPUT test.tmp + DEPENDS tensorarray_interpreter + POST_BUILD + COMMAND tensorarray_interpreter) + ]] + add_executable(TensorArray::Interpreter ALIAS tensorarray_interpreter) + +endblock() + +# add_subdirectory("src/tensor-array/core") +# add_subdirectory("src/tensor-array/layers") +# add_subdirectory("src/tensor-array/interp") include(CTest) if(BUILD_TESTING) diff --git a/src/tensor-array/core/CMakeLists.txt b/src/tensor-array/core/CMakeLists.txt deleted file mode 100644 index 6e056f2..0000000 --- a/src/tensor-array/core/CMakeLists.txt +++ /dev/null @@ -1,78 +0,0 @@ -cmake_minimum_required(VERSION 3.18) - -file(GLOB TensorArray_inc "*.hh") - -install( - FILES ${TensorArray_inc} - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/core - COMPONENT headers -) - -include(CheckLanguage) - -check_language(HIP) -if(CMAKE_HIP_COMPILER) - enable_language(HIP) - find_package(hip REQUIRED) - # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) - # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) - # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") -endif() - -check_language(CUDA) -if(CMAKE_CUDA_COMPILER EQUAL 0 AND NOT CMAKE_HIP_COMPILER) - enable_language(CUDA) - - # find_package(CUDAToolkit REQUIRED) - # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) - set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) - # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") -endif() - -file(GLOB TensorArray_src_cc "*.cc") - -if (CMAKE_CUDA_COMPILER OR CMAKE_HIP_COMPILER) - file(GLOB TensorArray_src_cu "*.cu") -endif() - -# file(MAKE_DIRECTORY "include/tensor_array/core") - -add_library(tensorarray_core SHARED ${TensorArray_src_cc} ${TensorArray_src_cu}) - -set_property(TARGET tensorarray_core PROPERTY C_STANDARD 11) -set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) - -set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) -set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_core PROPERTY CXX_EXTENSIONS OFF) - -if(CMAKE_HIP_COMPILER) - set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD 17) - set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_core PROPERTY HIP_EXTENSIONS OFF) - - foreach(TensorArray_src_hip ${TensorArray_src_cu}) - set_source_files_properties(${TensorArray_src_hip} PROPERTIES LANGUAGE HIP) - endforeach() - - target_link_libraries(tensorarray_core PRIVATE hip::host hip::device) -endif() - -if(CMAKE_CUDA_COMPILER) - set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) - set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) -endif() - -install( - TARGETS tensorarray_core - EXPORT TensorArrayTargets - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - COMPONENT Runtime - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array - COMPONENT Runtime - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/core - COMPONENT Development) - -add_library(TensorArray::Core ALIAS tensorarray_core) diff --git a/src/tensor-array/interp/CMakeLists.txt b/src/tensor-array/interp/CMakeLists.txt deleted file mode 100644 index 79dd349..0000000 --- a/src/tensor-array/interp/CMakeLists.txt +++ /dev/null @@ -1,40 +0,0 @@ -cmake_minimum_required(VERSION 3.18) - -file(GLOB TensorArray_src "*.cc" "*.c") -file(GLOB TensorArray_inc "*.hh" "*.h") - -install( - FILES ${TensorArray_inc} - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/interp - COMPONENT headers) - -add_executable(tensorarray_interpreter ${TensorArray_src}) - -target_include_directories(tensorarray_interpreter PRIVATE ${PROJECT_SOURCE_DIR}/src) -target_link_libraries(tensorarray_interpreter TensorArray::Core) - -set_property(TARGET tensorarray_interpreter PROPERTY C_STANDARD 11) -set_property(TARGET tensorarray_interpreter PROPERTY C_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_interpreter PROPERTY C_EXTENSIONS OFF) - -set_property(TARGET tensorarray_interpreter PROPERTY CXX_STANDARD 17) -set_property(TARGET tensorarray_interpreter PROPERTY CXX_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_interpreter PROPERTY CXX_EXTENSIONS OFF) - -install( - TARGETS tensorarray_interpreter - EXPORT TensorArrayTargets - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - COMPONENT Runtime - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array - COMPONENT Runtime - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/interp - COMPONENT Development) -#[[ -add_custom_command( - OUTPUT test.tmp - DEPENDS tensorarray_interpreter - POST_BUILD - COMMAND tensorarray_interpreter) -]] -add_executable(TensorArray::Interpreter ALIAS tensorarray_interpreter) diff --git a/src/tensor-array/layers/CMakeLists.txt b/src/tensor-array/layers/CMakeLists.txt deleted file mode 100644 index f013017..0000000 --- a/src/tensor-array/layers/CMakeLists.txt +++ /dev/null @@ -1,34 +0,0 @@ -cmake_minimum_required(VERSION 3.18) - -file(GLOB TensorArray_src "*.cc") -file(GLOB TensorArray_inc "*.hh") - -install( - FILES ${TensorArray_inc} - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensor-array/layers - COMPONENT headers) - -add_library(tensorarray_layers SHARED ${TensorArray_src}) - -target_include_directories(tensorarray_layers PRIVATE ${PROJECT_SOURCE_DIR}/src) -target_link_libraries(tensorarray_layers TensorArray::Core) - -set_property(TARGET tensorarray_layers PROPERTY C_STANDARD 11) -set_property(TARGET tensorarray_layers PROPERTY C_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_layers PROPERTY C_EXTENSIONS OFF) - -set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD 17) -set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_layers PROPERTY CXX_EXTENSIONS OFF) - -install( - TARGETS tensorarray_layers - EXPORT TensorArrayTargets - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - COMPONENT Runtime - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array - COMPONENT Runtime - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/layers - COMPONENT Development) - -add_library(TensorArray::Layers ALIAS tensorarray_layers) From 51ee1fd750ffaf5dd595654572d253164dda9c6a Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 25 Jul 2025 03:25:11 +0000 Subject: [PATCH 196/281] test --- CMakeLists.txt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fbfacf0..e06470c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,7 +21,7 @@ install( COMPONENT headers ) -block() +block(SCOPE_FOR POLICIES) include(CheckLanguage) check_language(HIP) @@ -33,7 +33,7 @@ block() endif() if(CMAKE_HIP_COMPILER) - block(PROPAGATE tensorarray_core TensorArray_Core_cc TensorArray_Core_cu) + block(PROPAGATE tensorarray_core) enable_language(HIP) find_package(hip REQUIRED) @@ -52,7 +52,7 @@ block() # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") elif(CMAKE_CUDA_COMPILER) - block(PROPAGATE tensorarray_core TensorArray_Core_cc TensorArray_Core_cu) + block(PROPAGATE tensorarray_core) enable_language(CUDA) add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) @@ -60,6 +60,7 @@ block() set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) set_property(TARGET tensorarray_core PROPERTY CMAKE_CUDA_SEPARABLE_COMPILATION ON) + target_include_directories(tensorarray_core PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) endblock() # find_package(CUDAToolkit REQUIRED) @@ -97,7 +98,7 @@ block() endblock() -block() +block(SCOPE_FOR POLICIES) file(GLOB TensorArray_Layers_src "${PROJECT_SOURCE_DIR}/src/tensor-array/layers/*.cc") add_library(tensorarray_layers SHARED ${TensorArray_Layers_src}) @@ -126,7 +127,7 @@ block() add_library(TensorArray::Layers ALIAS tensorarray_layers) endblock() -block() +block(SCOPE_FOR POLICIES) file( GLOB TensorArray_Interpreter_src "${PROJECT_SOURCE_DIR}/src/tensor-array/interp/*.c" From fe64f0b62b99a6c36a55372a4d807734ad41d15a Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 25 Jul 2025 04:03:50 +0000 Subject: [PATCH 197/281] test --- CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e06470c..d87302c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,7 +54,11 @@ block(SCOPE_FOR POLICIES) elif(CMAKE_CUDA_COMPILER) block(PROPAGATE tensorarray_core) enable_language(CUDA) - + if(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES) + message(NOTICE "cuda include path: ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}") + else() + message(FATAL_ERROR "No cuda include path") + endif() add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) From 362c4e600233d9010ee1233bf33c20046691c270 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 25 Jul 2025 06:29:52 +0000 Subject: [PATCH 198/281] test --- CMakeLists.txt | 155 ++--------------------------------- cmake/ta_core_config.cmake | 75 +++++++++++++++++ cmake/ta_interp_config.cmake | 40 +++++++++ cmake/ta_layers_config.cmake | 28 +++++++ 4 files changed, 148 insertions(+), 150 deletions(-) create mode 100644 cmake/ta_core_config.cmake create mode 100644 cmake/ta_interp_config.cmake create mode 100644 cmake/ta_layers_config.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index d87302c..dee4c05 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ -cmake_minimum_required(VERSION 3.18) +cmake_minimum_required(VERSION 3.8) -project(TensorArray) +project(TensorArray CXX) include(GNUInstallDirs) # set(CMAKE_INSTALL_PREFIX ${PROJECT_SOURCE_DIR}) @@ -21,154 +21,9 @@ install( COMPONENT headers ) -block(SCOPE_FOR POLICIES) - include(CheckLanguage) - - check_language(HIP) - check_language(CUDA) - file(GLOB TensorArray_Core_cc "${PROJECT_SOURCE_DIR}/src/tensor-array/core/*.cc") - - if (CMAKE_CUDA_COMPILER OR CMAKE_HIP_COMPILER) - file(GLOB TensorArray_Core_cu "${PROJECT_SOURCE_DIR}/src/tensor-array/core/*.cu") - endif() - - if(CMAKE_HIP_COMPILER) - block(PROPAGATE tensorarray_core) - enable_language(HIP) - find_package(hip REQUIRED) - - add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) - set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD 17) - set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_core PROPERTY HIP_EXTENSIONS OFF) - - foreach(TensorArray_src_hip ${TensorArray_src_cu}) - set_source_files_properties(${TensorArray_src_hip} PROPERTIES LANGUAGE HIP) - endforeach() - - target_link_libraries(tensorarray_core PRIVATE hip::host hip::device) - endblock() - # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) - # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) - # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") - elif(CMAKE_CUDA_COMPILER) - block(PROPAGATE tensorarray_core) - enable_language(CUDA) - if(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES) - message(NOTICE "cuda include path: ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}") - else() - message(FATAL_ERROR "No cuda include path") - endif() - add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) - set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) - set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) - set_property(TARGET tensorarray_core PROPERTY CMAKE_CUDA_SEPARABLE_COMPILATION ON) - target_include_directories(tensorarray_core PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) - endblock() - - # find_package(CUDAToolkit REQUIRED) - # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) - # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) - # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") - else() - block(PROPAGATE tensorarray_core TensorArray_Core_cc TensorArray_Core_cu) - add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) - endblock() - endif() - - - # file(MAKE_DIRECTORY "include/tensor_array/core") - - set_property(TARGET tensorarray_core PROPERTY C_STANDARD 11) - set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) - - set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) - set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_core PROPERTY CXX_EXTENSIONS OFF) - - install( - TARGETS tensorarray_core - EXPORT TensorArrayTargets - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - COMPONENT Runtime - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array - COMPONENT Runtime - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/core - COMPONENT Development) - - add_library(TensorArray::Core ALIAS tensorarray_core) - -endblock() - -block(SCOPE_FOR POLICIES) - file(GLOB TensorArray_Layers_src "${PROJECT_SOURCE_DIR}/src/tensor-array/layers/*.cc") - - add_library(tensorarray_layers SHARED ${TensorArray_Layers_src}) - - target_include_directories(tensorarray_layers PRIVATE ${PROJECT_SOURCE_DIR}/src) - target_link_libraries(tensorarray_layers TensorArray::Core) - - set_property(TARGET tensorarray_layers PROPERTY C_STANDARD 11) - set_property(TARGET tensorarray_layers PROPERTY C_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_layers PROPERTY C_EXTENSIONS OFF) - - set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD 17) - set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_layers PROPERTY CXX_EXTENSIONS OFF) - - install( - TARGETS tensorarray_layers - EXPORT TensorArrayTargets - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - COMPONENT Runtime - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array - COMPONENT Runtime - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/layers - COMPONENT Development) - - add_library(TensorArray::Layers ALIAS tensorarray_layers) -endblock() - -block(SCOPE_FOR POLICIES) - file( - GLOB TensorArray_Interpreter_src - "${PROJECT_SOURCE_DIR}/src/tensor-array/interp/*.c" - "${PROJECT_SOURCE_DIR}/src/tensor-array/interp/*.cc" - ) - add_executable(tensorarray_interpreter ${TensorArray_Interpreter_src}) - - target_include_directories(tensorarray_interpreter PRIVATE ${PROJECT_SOURCE_DIR}/src) - target_link_libraries(tensorarray_interpreter TensorArray::Core) - - set_property(TARGET tensorarray_interpreter PROPERTY C_STANDARD 11) - set_property(TARGET tensorarray_interpreter PROPERTY C_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_interpreter PROPERTY C_EXTENSIONS OFF) - - set_property(TARGET tensorarray_interpreter PROPERTY CXX_STANDARD 17) - set_property(TARGET tensorarray_interpreter PROPERTY CXX_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_interpreter PROPERTY CXX_EXTENSIONS OFF) - - install( - TARGETS tensorarray_interpreter - EXPORT TensorArrayTargets - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - COMPONENT Runtime - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array - COMPONENT Runtime - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/interp - COMPONENT Development) - #[[ - add_custom_command( - OUTPUT test.tmp - DEPENDS tensorarray_interpreter - POST_BUILD - COMMAND tensorarray_interpreter) - ]] - add_executable(TensorArray::Interpreter ALIAS tensorarray_interpreter) - -endblock() +include(cmake/ta_core_config.cmake) +include(cmake/ta_layers_config.cmake) +include(cmake/ta_interp_config.cmake) # add_subdirectory("src/tensor-array/core") # add_subdirectory("src/tensor-array/layers") diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake new file mode 100644 index 0000000..fc8b216 --- /dev/null +++ b/cmake/ta_core_config.cmake @@ -0,0 +1,75 @@ +block(SCOPE_FOR POLICIES) + include(CheckLanguage) + + check_language(HIP) + check_language(CUDA) + file(GLOB TensorArray_Core_cc "${PROJECT_SOURCE_DIR}/src/tensor-array/core/*.cc") + + if (CMAKE_CUDA_COMPILER OR CMAKE_HIP_COMPILER) + file(GLOB TensorArray_Core_cu "${PROJECT_SOURCE_DIR}/src/tensor-array/core/*.cu") + endif() + + if(CMAKE_HIP_COMPILER) + block(PROPAGATE tensorarray_core) + enable_language(HIP) + find_package(hip REQUIRED) + + add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) + set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD 17) + set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_core PROPERTY HIP_EXTENSIONS OFF) + + foreach(TensorArray_src_hip ${TensorArray_src_cu}) + set_source_files_properties(${TensorArray_src_hip} PROPERTIES LANGUAGE HIP) + endforeach() + + target_link_libraries(tensorarray_core PRIVATE hip::host hip::device) + endblock() + # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) + # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") + elif(CMAKE_CUDA_COMPILER) + block(PROPAGATE tensorarray_core) + enable_language(CUDA) + add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) + set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) + set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) + set_property(TARGET tensorarray_core PROPERTY CMAKE_CUDA_SEPARABLE_COMPILATION ON) + set_source_files_properties(data_type_wrapper.cc PROPERTIES LANGUAGE CUDA) + endblock() + + # find_package(CUDAToolkit REQUIRED) + # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) + # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") + else() + block(PROPAGATE tensorarray_core TensorArray_Core_cc TensorArray_Core_cu) + add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) + endblock() + endif() + + + # file(MAKE_DIRECTORY "include/tensor_array/core") + + set_property(TARGET tensorarray_core PROPERTY C_STANDARD 11) + set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) + + set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) + set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_core PROPERTY CXX_EXTENSIONS OFF) + + install( + TARGETS tensorarray_core + EXPORT TensorArrayTargets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + COMPONENT Runtime + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array + COMPONENT Runtime + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/core + COMPONENT Development) + + add_library(TensorArray::Core ALIAS tensorarray_core) + +endblock() diff --git a/cmake/ta_interp_config.cmake b/cmake/ta_interp_config.cmake new file mode 100644 index 0000000..f58b753 --- /dev/null +++ b/cmake/ta_interp_config.cmake @@ -0,0 +1,40 @@ +block(SCOPE_FOR POLICIES) + enable_language(C) + + file( + GLOB TensorArray_Interpreter_src + "${PROJECT_SOURCE_DIR}/src/tensor-array/interp/*.c" + "${PROJECT_SOURCE_DIR}/src/tensor-array/interp/*.cc" + ) + add_executable(tensorarray_interpreter ${TensorArray_Interpreter_src}) + + target_include_directories(tensorarray_interpreter PRIVATE ${PROJECT_SOURCE_DIR}/src) + target_link_libraries(tensorarray_interpreter TensorArray::Core) + + set_property(TARGET tensorarray_interpreter PROPERTY C_STANDARD 11) + set_property(TARGET tensorarray_interpreter PROPERTY C_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_interpreter PROPERTY C_EXTENSIONS OFF) + + set_property(TARGET tensorarray_interpreter PROPERTY CXX_STANDARD 17) + set_property(TARGET tensorarray_interpreter PROPERTY CXX_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_interpreter PROPERTY CXX_EXTENSIONS OFF) + + install( + TARGETS tensorarray_interpreter + EXPORT TensorArrayTargets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + COMPONENT Runtime + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array + COMPONENT Runtime + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/interp + COMPONENT Development) + #[[ + add_custom_command( + OUTPUT test.tmp + DEPENDS tensorarray_interpreter + POST_BUILD + COMMAND tensorarray_interpreter) + ]] + add_executable(TensorArray::Interpreter ALIAS tensorarray_interpreter) + +endblock() diff --git a/cmake/ta_layers_config.cmake b/cmake/ta_layers_config.cmake new file mode 100644 index 0000000..090cd87 --- /dev/null +++ b/cmake/ta_layers_config.cmake @@ -0,0 +1,28 @@ +block(SCOPE_FOR POLICIES) + file(GLOB TensorArray_Layers_src "${PROJECT_SOURCE_DIR}/src/tensor-array/layers/*.cc") + + add_library(tensorarray_layers SHARED ${TensorArray_Layers_src}) + + target_include_directories(tensorarray_layers PRIVATE ${PROJECT_SOURCE_DIR}/src) + target_link_libraries(tensorarray_layers TensorArray::Core) + + set_property(TARGET tensorarray_layers PROPERTY C_STANDARD 11) + set_property(TARGET tensorarray_layers PROPERTY C_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_layers PROPERTY C_EXTENSIONS OFF) + + set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD 17) + set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_layers PROPERTY CXX_EXTENSIONS OFF) + + install( + TARGETS tensorarray_layers + EXPORT TensorArrayTargets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + COMPONENT Runtime + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array + COMPONENT Runtime + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/layers + COMPONENT Development) + + add_library(TensorArray::Layers ALIAS tensorarray_layers) +endblock() From 7dbc02c4caa6a79c01692dc2ac543cc761ec95aa Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 25 Jul 2025 08:18:56 +0000 Subject: [PATCH 199/281] test --- cmake/ta_core_config.cmake | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index fc8b216..c17419d 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -1,15 +1,13 @@ block(SCOPE_FOR POLICIES) include(CheckLanguage) - check_language(HIP) - check_language(CUDA) file(GLOB TensorArray_Core_cc "${PROJECT_SOURCE_DIR}/src/tensor-array/core/*.cc") - if (CMAKE_CUDA_COMPILER OR CMAKE_HIP_COMPILER) + if ((DEFINED $ENV{CUDA_PATH}) OR (DEFINED $ENV{ROCM_BRANCH})) file(GLOB TensorArray_Core_cu "${PROJECT_SOURCE_DIR}/src/tensor-array/core/*.cu") endif() - if(CMAKE_HIP_COMPILER) + if(DEFINED $ENV{ROCM_BRANCH}) block(PROPAGATE tensorarray_core) enable_language(HIP) find_package(hip REQUIRED) @@ -28,7 +26,7 @@ block(SCOPE_FOR POLICIES) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") - elif(CMAKE_CUDA_COMPILER) + elif(DEFINED $ENV{CUDA_PATH}) block(PROPAGATE tensorarray_core) enable_language(CUDA) add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) From 6071eda6de8120a953f4f9c93d7c4ed598af1ad9 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 25 Jul 2025 08:27:46 +0000 Subject: [PATCH 200/281] add more test --- .github/workflows/cmake-multi-platform.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index 4612ea4..752d425 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -20,7 +20,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ "ubuntu-22.04", "ubuntu-22.04-arm", "windows-latest" ] + os: [ "ubuntu-24.04", "ubuntu-24.04-arm", "ubuntu-22.04", "ubuntu-22.04-arm", "windows-latest" ] cuda-version: [ "12.9.1", "12.4.1" ] exclude: - os: "windows-latest" From 1f35322678d66a9e3ffc5fe4698ed397518e2d40 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 25 Jul 2025 08:30:46 +0000 Subject: [PATCH 201/281] test --- .github/workflows/cmake-multi-platform.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index 752d425..4612ea4 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -20,7 +20,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ "ubuntu-24.04", "ubuntu-24.04-arm", "ubuntu-22.04", "ubuntu-22.04-arm", "windows-latest" ] + os: [ "ubuntu-22.04", "ubuntu-22.04-arm", "windows-latest" ] cuda-version: [ "12.9.1", "12.4.1" ] exclude: - os: "windows-latest" From 8932fa2d9d89f049614987d7411bd5e8f9bb4a79 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 25 Jul 2025 10:26:46 +0000 Subject: [PATCH 202/281] test --- cmake/ta_core_config.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index c17419d..856819c 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -3,11 +3,11 @@ block(SCOPE_FOR POLICIES) file(GLOB TensorArray_Core_cc "${PROJECT_SOURCE_DIR}/src/tensor-array/core/*.cc") - if ((DEFINED $ENV{CUDA_PATH}) OR (DEFINED $ENV{ROCM_BRANCH})) + if ((DEFINED ENV{CUDA_PATH}) OR (DEFINED ENV{ROCM_BRANCH})) file(GLOB TensorArray_Core_cu "${PROJECT_SOURCE_DIR}/src/tensor-array/core/*.cu") endif() - if(DEFINED $ENV{ROCM_BRANCH}) + if(DEFINED ENV{ROCM_BRANCH}) block(PROPAGATE tensorarray_core) enable_language(HIP) find_package(hip REQUIRED) @@ -26,7 +26,7 @@ block(SCOPE_FOR POLICIES) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") - elif(DEFINED $ENV{CUDA_PATH}) + elif(DEFINED ENV{CUDA_PATH}) block(PROPAGATE tensorarray_core) enable_language(CUDA) add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) From 1f3cd42e7a206a3fe7d4ea3cc3e85b7d74f484f7 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 25 Jul 2025 15:21:06 +0000 Subject: [PATCH 203/281] test --- CMakeLists.txt | 2 +- cmake/ta_core_config.cmake | 10 +++++++--- cmake/ta_interp_config.cmake | 2 -- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dee4c05..1710832 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.8) -project(TensorArray CXX) +project(TensorArray C CXX) include(GNUInstallDirs) # set(CMAKE_INSTALL_PREFIX ${PROJECT_SOURCE_DIR}) diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index 856819c..341202f 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -1,13 +1,17 @@ block(SCOPE_FOR POLICIES) include(CheckLanguage) + include(CheckLanguage) + check_language(HIP) + check_language(CUDA) + file(GLOB TensorArray_Core_cc "${PROJECT_SOURCE_DIR}/src/tensor-array/core/*.cc") - if ((DEFINED ENV{CUDA_PATH}) OR (DEFINED ENV{ROCM_BRANCH})) + if (CMAKE_CUDA_COMPILER OR CMAKE_HIP_COMPILER) file(GLOB TensorArray_Core_cu "${PROJECT_SOURCE_DIR}/src/tensor-array/core/*.cu") endif() - if(DEFINED ENV{ROCM_BRANCH}) + if(CMAKE_HIP_COMPILER) block(PROPAGATE tensorarray_core) enable_language(HIP) find_package(hip REQUIRED) @@ -26,7 +30,7 @@ block(SCOPE_FOR POLICIES) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") - elif(DEFINED ENV{CUDA_PATH}) + elseif(CMAKE_CUDA_COMPILER) block(PROPAGATE tensorarray_core) enable_language(CUDA) add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) diff --git a/cmake/ta_interp_config.cmake b/cmake/ta_interp_config.cmake index f58b753..4a3697a 100644 --- a/cmake/ta_interp_config.cmake +++ b/cmake/ta_interp_config.cmake @@ -1,6 +1,4 @@ block(SCOPE_FOR POLICIES) - enable_language(C) - file( GLOB TensorArray_Interpreter_src "${PROJECT_SOURCE_DIR}/src/tensor-array/interp/*.c" From 6f8845878541ceb7cdbc1dd667aed98642235441 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 25 Jul 2025 15:37:15 +0000 Subject: [PATCH 204/281] set cmake policy 0104 --- CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1710832..44f8e10 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,7 @@ cmake_minimum_required(VERSION 3.8) +cmake_policy(SET CMP0104 NEW) + project(TensorArray C CXX) include(GNUInstallDirs) From 7a2e801d77b6b60d6e5108687acc4f92bc2fb9fe Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 25 Jul 2025 22:46:40 +0700 Subject: [PATCH 205/281] Update ta_core_config.cmake --- cmake/ta_core_config.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index 341202f..7726224 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -38,7 +38,6 @@ block(SCOPE_FOR POLICIES) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) set_property(TARGET tensorarray_core PROPERTY CMAKE_CUDA_SEPARABLE_COMPILATION ON) - set_source_files_properties(data_type_wrapper.cc PROPERTIES LANGUAGE CUDA) endblock() # find_package(CUDAToolkit REQUIRED) From 913daf5120c462c42d2cdbb79f1abeebb1e185f1 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 25 Jul 2025 22:55:10 +0700 Subject: [PATCH 206/281] Update CMakeLists.txt --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 44f8e10..8844966 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.8) +cmake_minimum_required(VERSION 3.18) cmake_policy(SET CMP0104 NEW) From dbf73de706b3f4b922048f1bede1b8b819d7d708 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 25 Jul 2025 22:56:00 +0700 Subject: [PATCH 207/281] Update CMakeLists.txt --- CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8844966..95e9d71 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,5 @@ cmake_minimum_required(VERSION 3.18) -cmake_policy(SET CMP0104 NEW) - project(TensorArray C CXX) include(GNUInstallDirs) From 7e5895d793bfd8cd3af0dce48020b8b536278283 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 25 Jul 2025 16:30:47 +0000 Subject: [PATCH 208/281] test --- cmake/ta_core_config.cmake | 102 ++++++++++++++++------------------- cmake/ta_interp_config.cmake | 52 +++++++++--------- cmake/ta_layers_config.cmake | 43 ++++++++------- 3 files changed, 93 insertions(+), 104 deletions(-) diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index 7726224..6c51ca4 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -1,76 +1,68 @@ -block(SCOPE_FOR POLICIES) - include(CheckLanguage) +include(CheckLanguage) - include(CheckLanguage) - check_language(HIP) - check_language(CUDA) +include(CheckLanguage) +check_language(HIP) +check_language(CUDA) - file(GLOB TensorArray_Core_cc "${PROJECT_SOURCE_DIR}/src/tensor-array/core/*.cc") +file(GLOB TensorArray_Core_cc "${PROJECT_SOURCE_DIR}/src/tensor-array/core/*.cc") - if (CMAKE_CUDA_COMPILER OR CMAKE_HIP_COMPILER) - file(GLOB TensorArray_Core_cu "${PROJECT_SOURCE_DIR}/src/tensor-array/core/*.cu") - endif() +if (CMAKE_CUDA_COMPILER OR CMAKE_HIP_COMPILER) + file(GLOB TensorArray_Core_cu "${PROJECT_SOURCE_DIR}/src/tensor-array/core/*.cu") +endif() - if(CMAKE_HIP_COMPILER) - block(PROPAGATE tensorarray_core) - enable_language(HIP) - find_package(hip REQUIRED) +if(CMAKE_HIP_COMPILER) + enable_language(HIP) + find_package(hip REQUIRED) - add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) - set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD 17) - set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_core PROPERTY HIP_EXTENSIONS OFF) + add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) + set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD 17) + set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_core PROPERTY HIP_EXTENSIONS OFF) - foreach(TensorArray_src_hip ${TensorArray_src_cu}) - set_source_files_properties(${TensorArray_src_hip} PROPERTIES LANGUAGE HIP) - endforeach() + foreach(TensorArray_src_hip ${TensorArray_src_cu}) + set_source_files_properties(${TensorArray_src_hip} PROPERTIES LANGUAGE HIP) + endforeach() - target_link_libraries(tensorarray_core PRIVATE hip::host hip::device) - endblock() + target_link_libraries(tensorarray_core PRIVATE hip::host hip::device) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") - elseif(CMAKE_CUDA_COMPILER) - block(PROPAGATE tensorarray_core) - enable_language(CUDA) - add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) - set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) - set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) - set_property(TARGET tensorarray_core PROPERTY CMAKE_CUDA_SEPARABLE_COMPILATION ON) - endblock() +elseif(CMAKE_CUDA_COMPILER) + enable_language(CUDA) + add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) + set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) + set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) + set_property(TARGET tensorarray_core PROPERTY CMAKE_CUDA_SEPARABLE_COMPILATION ON) # find_package(CUDAToolkit REQUIRED) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") - else() - block(PROPAGATE tensorarray_core TensorArray_Core_cc TensorArray_Core_cu) - add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) - endblock() - endif() +else() + add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) +endif() - # file(MAKE_DIRECTORY "include/tensor_array/core") +# file(MAKE_DIRECTORY "include/tensor_array/core") - set_property(TARGET tensorarray_core PROPERTY C_STANDARD 11) - set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) +set_property(TARGET tensorarray_core PROPERTY C_STANDARD 11) +set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) - set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) - set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_core PROPERTY CXX_EXTENSIONS OFF) +set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) +set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_core PROPERTY CXX_EXTENSIONS OFF) - install( - TARGETS tensorarray_core - EXPORT TensorArrayTargets - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - COMPONENT Runtime - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array - COMPONENT Runtime - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/core - COMPONENT Development) +install( + TARGETS tensorarray_core + EXPORT TensorArrayTargets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + COMPONENT Runtime + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array + COMPONENT Runtime + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/core + COMPONENT Development +) - add_library(TensorArray::Core ALIAS tensorarray_core) - -endblock() +add_library(TensorArray::Core ALIAS tensorarray_core) diff --git a/cmake/ta_interp_config.cmake b/cmake/ta_interp_config.cmake index 4a3697a..bcc62c8 100644 --- a/cmake/ta_interp_config.cmake +++ b/cmake/ta_interp_config.cmake @@ -1,31 +1,31 @@ -block(SCOPE_FOR POLICIES) - file( - GLOB TensorArray_Interpreter_src - "${PROJECT_SOURCE_DIR}/src/tensor-array/interp/*.c" - "${PROJECT_SOURCE_DIR}/src/tensor-array/interp/*.cc" - ) - add_executable(tensorarray_interpreter ${TensorArray_Interpreter_src}) +file( + GLOB TensorArray_Interpreter_src + "${PROJECT_SOURCE_DIR}/src/tensor-array/interp/*.c" + "${PROJECT_SOURCE_DIR}/src/tensor-array/interp/*.cc" +) +add_executable(tensorarray_interpreter ${TensorArray_Interpreter_src}) - target_include_directories(tensorarray_interpreter PRIVATE ${PROJECT_SOURCE_DIR}/src) - target_link_libraries(tensorarray_interpreter TensorArray::Core) +target_include_directories(tensorarray_interpreter PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_link_libraries(tensorarray_interpreter TensorArray::Core) - set_property(TARGET tensorarray_interpreter PROPERTY C_STANDARD 11) - set_property(TARGET tensorarray_interpreter PROPERTY C_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_interpreter PROPERTY C_EXTENSIONS OFF) +set_property(TARGET tensorarray_interpreter PROPERTY C_STANDARD 11) +set_property(TARGET tensorarray_interpreter PROPERTY C_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_interpreter PROPERTY C_EXTENSIONS OFF) - set_property(TARGET tensorarray_interpreter PROPERTY CXX_STANDARD 17) - set_property(TARGET tensorarray_interpreter PROPERTY CXX_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_interpreter PROPERTY CXX_EXTENSIONS OFF) +set_property(TARGET tensorarray_interpreter PROPERTY CXX_STANDARD 17) +set_property(TARGET tensorarray_interpreter PROPERTY CXX_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_interpreter PROPERTY CXX_EXTENSIONS OFF) - install( - TARGETS tensorarray_interpreter - EXPORT TensorArrayTargets - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - COMPONENT Runtime - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array - COMPONENT Runtime - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/interp - COMPONENT Development) +install( + TARGETS tensorarray_interpreter + EXPORT TensorArrayTargets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + COMPONENT Runtime + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array + COMPONENT Runtime + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/interp + COMPONENT Development +) #[[ add_custom_command( OUTPUT test.tmp @@ -33,6 +33,4 @@ block(SCOPE_FOR POLICIES) POST_BUILD COMMAND tensorarray_interpreter) ]] - add_executable(TensorArray::Interpreter ALIAS tensorarray_interpreter) - -endblock() +add_executable(TensorArray::Interpreter ALIAS tensorarray_interpreter) diff --git a/cmake/ta_layers_config.cmake b/cmake/ta_layers_config.cmake index 090cd87..f663374 100644 --- a/cmake/ta_layers_config.cmake +++ b/cmake/ta_layers_config.cmake @@ -1,28 +1,27 @@ -block(SCOPE_FOR POLICIES) - file(GLOB TensorArray_Layers_src "${PROJECT_SOURCE_DIR}/src/tensor-array/layers/*.cc") +file(GLOB TensorArray_Layers_src "${PROJECT_SOURCE_DIR}/src/tensor-array/layers/*.cc") - add_library(tensorarray_layers SHARED ${TensorArray_Layers_src}) +add_library(tensorarray_layers SHARED ${TensorArray_Layers_src}) - target_include_directories(tensorarray_layers PRIVATE ${PROJECT_SOURCE_DIR}/src) - target_link_libraries(tensorarray_layers TensorArray::Core) +target_include_directories(tensorarray_layers PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_link_libraries(tensorarray_layers TensorArray::Core) - set_property(TARGET tensorarray_layers PROPERTY C_STANDARD 11) - set_property(TARGET tensorarray_layers PROPERTY C_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_layers PROPERTY C_EXTENSIONS OFF) +set_property(TARGET tensorarray_layers PROPERTY C_STANDARD 11) +set_property(TARGET tensorarray_layers PROPERTY C_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_layers PROPERTY C_EXTENSIONS OFF) - set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD 17) - set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_layers PROPERTY CXX_EXTENSIONS OFF) +set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD 17) +set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_layers PROPERTY CXX_EXTENSIONS OFF) - install( - TARGETS tensorarray_layers - EXPORT TensorArrayTargets - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - COMPONENT Runtime - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array - COMPONENT Runtime - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/layers - COMPONENT Development) +install( + TARGETS tensorarray_layers + EXPORT TensorArrayTargets + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + COMPONENT Runtime + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array + COMPONENT Runtime + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/layers + COMPONENT Development +) - add_library(TensorArray::Layers ALIAS tensorarray_layers) -endblock() +add_library(TensorArray::Layers ALIAS tensorarray_layers) From 5f938cf94a8b068197861cddedabef7149800e19 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 25 Jul 2025 16:39:17 +0000 Subject: [PATCH 209/281] test --- cmake/ta_core_config.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index 6c51ca4..3187de5 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -34,6 +34,7 @@ elseif(CMAKE_CUDA_COMPILER) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) set_property(TARGET tensorarray_core PROPERTY CMAKE_CUDA_SEPARABLE_COMPILATION ON) + target_include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) # find_package(CUDAToolkit REQUIRED) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) From a41db58a421aaff9b3e5cbb4a374f2a3d8350131 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 25 Jul 2025 16:42:00 +0000 Subject: [PATCH 210/281] test --- cmake/ta_core_config.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index 3187de5..1fc78f9 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -34,7 +34,7 @@ elseif(CMAKE_CUDA_COMPILER) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) set_property(TARGET tensorarray_core PROPERTY CMAKE_CUDA_SEPARABLE_COMPILATION ON) - target_include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) + target_include_directories(tensorarray_core PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) # find_package(CUDAToolkit REQUIRED) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) From a39381175b4972a4b10cd496c964873fcd7a88ca Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 25 Jul 2025 23:46:34 +0700 Subject: [PATCH 211/281] Update ta_core_config.cmake --- cmake/ta_core_config.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index 1fc78f9..06ecd5b 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -33,7 +33,7 @@ elseif(CMAKE_CUDA_COMPILER) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) - set_property(TARGET tensorarray_core PROPERTY CMAKE_CUDA_SEPARABLE_COMPILATION ON) + set_property(TARGET tensorarray_core PROPERTY CUDA_SEPARABLE_COMPILATION ON) target_include_directories(tensorarray_core PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) # find_package(CUDAToolkit REQUIRED) From b6452b22f9abaafcbd6758d39874a40e272b025c Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sat, 26 Jul 2025 00:54:38 +0000 Subject: [PATCH 212/281] test --- cmake/ta_core_config.cmake | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index 06ecd5b..66be496 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -29,12 +29,14 @@ if(CMAKE_HIP_COMPILER) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") elseif(CMAKE_CUDA_COMPILER) enable_language(CUDA) + find_package(CUDAToolkit REQUIRED) add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) set_property(TARGET tensorarray_core PROPERTY CUDA_SEPARABLE_COMPILATION ON) - target_include_directories(tensorarray_core PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) + target_include_directories(tensorarray_core PRIVATE $<$:${CUDAToolkit_INCLUDE_DIRS}>) + target_link_libraries(tensorarray_core PRIVATE $<$:CUDA::cublas>) # find_package(CUDAToolkit REQUIRED) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) From ab42202c4fd95672c226335d516bdb363225f0e9 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 29 Jul 2025 07:43:25 +0000 Subject: [PATCH 213/281] test --- .github/workflows/cmake-multi-platform.yml | 36 ++++--- scripts/actions/install-rocm-ubuntu.sh | 115 +++++++++++++++++++++ 2 files changed, 134 insertions(+), 17 deletions(-) create mode 100644 scripts/actions/install-rocm-ubuntu.sh diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index 4612ea4..2d77b5a 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -20,34 +20,35 @@ jobs: strategy: fail-fast: false matrix: - os: [ "ubuntu-22.04", "ubuntu-22.04-arm", "windows-latest" ] - cuda-version: [ "12.9.1", "12.4.1" ] + os: [ "ubuntu-22.04", "ubuntu-22.04-arm" ] + gpu-compiler: [ "cuda", "rocm" ] exclude: + - os: "ubuntu-22.04-arm" + gpu-compiler: "rocm" + include: + - gpu-compiler: "cuda" + cuda-version: "12.9" + - gpu-compiler: "cuda" + cuda-version: "12.4" + - gpu-compiler: "rocm" + rocm-version: "6.4.2" - os: "windows-latest" - cuda-version: "12.9.1" # CUDA 12.9.1 is not available for ARM64 + gpu-compiler: "cuda" + cuda-version: "12.4.1" runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 - name: Run CUDA bash shell Ubuntu/Debian - if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'debian') + if: (startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'debian')) env: temp: ${{ runner.temp }} cuda: ${{ matrix.cuda-version }} + rocm: ${{ matrix.rocm-version }} run: | - chmod +x ${{github.workspace}}/scripts/actions/install-cuda-ubuntu.sh - ${{github.workspace}}/scripts/actions/install-cuda-ubuntu.sh - shell: bash - - - name: Run CUDA bash shell RHEL - if: startsWith(matrix.os, 'rhel') - env: - temp: ${{ runner.temp }} - cuda: ${{ matrix.cuda-version }} - run: | - chmod +x ${{github.workspace}}/scripts/actions/install-cuda-rhel.sh - ${{github.workspace}}/scripts/actions/install-cuda-rhel.sh + chmod +x ${{github.workspace}}/scripts/actions/install-${{ matrix.gpu-compiler }}-ubuntu.sh + ${{github.workspace}}/scripts/actions/install-${{ matrix.gpu-compiler }}-ubuntu.sh shell: bash - name: Run CUDA bash shell Windows @@ -55,7 +56,8 @@ jobs: env: temp: ${{ runner.temp }} cuda: ${{ matrix.cuda-version }} - run: scripts/actions/install-cuda-windows.ps1 + rocm: ${{ matrix.rocm-version }} + run: scripts/actions/install-${{ matrix.gpu-compiler }}-windows.ps1 shell: pwsh - name: Configure CMake diff --git a/scripts/actions/install-rocm-ubuntu.sh b/scripts/actions/install-rocm-ubuntu.sh new file mode 100644 index 0000000..ddd8252 --- /dev/null +++ b/scripts/actions/install-rocm-ubuntu.sh @@ -0,0 +1,115 @@ +ROCM_PACKAGES_IN=( + rocm-hip-runtime-devel +) + +function version_ge() { + [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 + [ "$(printf '%s\n' "$@" | sort -V | head -n 1)" == "$2" ] +} +# returns 0 (true) if a > b +function version_gt() { + [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 + [ "$1" = "$2" ] && return 1 || version_ge $1 $2 +} +# returns 0 (true) if a <= b +function version_le() { + [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 + [ "$(printf '%s\n' "$@" | sort -V | head -n 1)" == "$1" ] +} +# returns 0 (true) if a < b +function version_lt() { + [ "$#" != "2" ] && echo "${FUNCNAME[0]} requires exactly 2 arguments." && exit 1 + [ "$1" = "$2" ] && return 1 || version_le $1 $2 +} + + +LINUX_ID=$(lsb_release -si) +LINUX_ID="${LINUX_ID,,}" + +LINUX_VERSION=$(lsb_release -sr) +LINUX_VERSION="${LINUX_VERSION//.}" + +LOCATION_TEMP=${temp} + +CUDA_VERSION_MAJOR_MINOR=${rocm} + +CPU_ARCH=$(uname -m) +if [[ "${CPU_ARCH}" == "aarch64" ]] +then + CPU_ARCH="sbsa" +fi + + +ROCM_PACKAGES="" +for package in "${ROCM_PACKAGES_IN[@]}" +do : + # Build the full package name and append to the string. + ROCM_PACKAGES+=" ${package}-${CUDA_MAJOR}-${CUDA_MINOR}" +done +echo "ROCM_PACKAGES ${ROCM_PACKAGES}" + +GPG_FILENAME="rocm.gpg.key" +GPG_URL="https://repo.radeon.com/rocm/${GPG_FILENAME}" +REPO_URL="https://repo.radeon.com/rocm/apt/${rocm}/" + +is_root=false +if (( $EUID == 0)) +then + is_root=true +fi +# Find if sudo is available +has_sudo=false +if command -v sudo &> /dev/null +then + has_sudo=true +fi +# Decide if we can proceed or not (root or sudo is required) and if so store whether sudo should be used or not. +if [ "$is_root" = false ] && [ "$has_sudo" = false ] +then + echo "Root or sudo is required. Aborting." + exit 1 +elif [ "$is_root" = false ] +then + USE_SUDO=sudo +else + USE_SUDO= +fi + +if [ -e /etc/apt/keyrings ] +then + $USE_SUDO mkdir --parents --mode=0755 /etc/apt/keyrings +fi + +ROCM_GPG_KEYRING=/etc/apt/keyrings/rocm.gpg + +echo "Adding CUDA Repository" +wget ${GPG_URL} -O - | \ + gpg --dearmor | $USE_SUDO tee ${ROCM_GPG_KEYRING} > /dev/null +echo "deb [arch=amd64 signed-by=${ROCM_GPG_KEYRING}] ${REPO_URL} jammy main" \ + | $USE_SUDO tee /etc/apt/sources.list.d/rocm.list +echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' \ + | $USE_SUDO tee /etc/apt/preferences.d/rocm-pin-600 +$USE_SUDO apt-get update + +$USE_SUDO apt-get -y install ${ROCM_PACKAGES} + +if [[ $? -ne 0 ]] +then + echo "CUDA Installation Error." + exit 1 +fi + +ROCM_PATH=/opt/rocm-${rocm} +echo "ROCM_PATH=${ROCM_PATH}" +export ROCM_PATH=${ROCM_PATH} +export PATH="$PATH:$ROCM_PATH/bin" +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$ROCM_PATH/lib" +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$ROCM_PATH/lib64" + +if [[ $GITHUB_ACTIONS ]] +then + echo "Adding CUDA to CUDA_PATH, PATH and LD_LIBRARY_PATH" + echo "${ROCM_PATH}/bin" >> $GITHUB_PATH + echo "ROCM_PATH=${ROCM_PATH}" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" >> $GITHUB_ENV +fi From c14e70b6a0002df976f1cc04060a0ff4e93a0272 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 29 Jul 2025 07:51:17 +0000 Subject: [PATCH 214/281] test --- .github/workflows/cmake-multi-platform.yml | 1 - scripts/actions/install-rocm-ubuntu.sh | 10 +++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index 2d77b5a..222a455 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -21,7 +21,6 @@ jobs: fail-fast: false matrix: os: [ "ubuntu-22.04", "ubuntu-22.04-arm" ] - gpu-compiler: [ "cuda", "rocm" ] exclude: - os: "ubuntu-22.04-arm" gpu-compiler: "rocm" diff --git a/scripts/actions/install-rocm-ubuntu.sh b/scripts/actions/install-rocm-ubuntu.sh index ddd8252..0629561 100644 --- a/scripts/actions/install-rocm-ubuntu.sh +++ b/scripts/actions/install-rocm-ubuntu.sh @@ -31,7 +31,7 @@ LINUX_VERSION="${LINUX_VERSION//.}" LOCATION_TEMP=${temp} -CUDA_VERSION_MAJOR_MINOR=${rocm} +ROCM_VERSION_MAJOR_MINOR=${rocm} CPU_ARCH=$(uname -m) if [[ "${CPU_ARCH}" == "aarch64" ]] @@ -44,7 +44,7 @@ ROCM_PACKAGES="" for package in "${ROCM_PACKAGES_IN[@]}" do : # Build the full package name and append to the string. - ROCM_PACKAGES+=" ${package}-${CUDA_MAJOR}-${CUDA_MINOR}" + ROCM_PACKAGES+=" ${package}" done echo "ROCM_PACKAGES ${ROCM_PACKAGES}" @@ -82,7 +82,7 @@ fi ROCM_GPG_KEYRING=/etc/apt/keyrings/rocm.gpg -echo "Adding CUDA Repository" +echo "Adding ROCm Repository" wget ${GPG_URL} -O - | \ gpg --dearmor | $USE_SUDO tee ${ROCM_GPG_KEYRING} > /dev/null echo "deb [arch=amd64 signed-by=${ROCM_GPG_KEYRING}] ${REPO_URL} jammy main" \ @@ -95,7 +95,7 @@ $USE_SUDO apt-get -y install ${ROCM_PACKAGES} if [[ $? -ne 0 ]] then - echo "CUDA Installation Error." + echo "ROCm Installation Error." exit 1 fi @@ -108,7 +108,7 @@ export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$ROCM_PATH/lib64" if [[ $GITHUB_ACTIONS ]] then - echo "Adding CUDA to CUDA_PATH, PATH and LD_LIBRARY_PATH" + echo "Adding ROCM to ROCM_PATH, PATH and LD_LIBRARY_PATH" echo "${ROCM_PATH}/bin" >> $GITHUB_PATH echo "ROCM_PATH=${ROCM_PATH}" >> $GITHUB_ENV echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" >> $GITHUB_ENV From 39771eb13c59c07a42bbb900a068c69ffed2cc7e Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 29 Jul 2025 08:05:21 +0000 Subject: [PATCH 215/281] test --- .github/workflows/cmake-multi-platform.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index 222a455..2d77b5a 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -21,6 +21,7 @@ jobs: fail-fast: false matrix: os: [ "ubuntu-22.04", "ubuntu-22.04-arm" ] + gpu-compiler: [ "cuda", "rocm" ] exclude: - os: "ubuntu-22.04-arm" gpu-compiler: "rocm" From d92dcafdb695add67fa94461f4d6801a5a1ae80d Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 29 Jul 2025 08:19:49 +0000 Subject: [PATCH 216/281] test --- scripts/actions/install-rocm-ubuntu.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/scripts/actions/install-rocm-ubuntu.sh b/scripts/actions/install-rocm-ubuntu.sh index 0629561..610dcb9 100644 --- a/scripts/actions/install-rocm-ubuntu.sh +++ b/scripts/actions/install-rocm-ubuntu.sh @@ -75,20 +75,24 @@ else USE_SUDO= fi -if [ -e /etc/apt/keyrings ] +KEYRINGS_DIR=/etc/apt/keyrings + +if [ ! -e $KEYRINGS_DIR ] then - $USE_SUDO mkdir --parents --mode=0755 /etc/apt/keyrings + echo "Create directory: ${KEYRINGS_DIR}" + $USE_SUDO mkdir --parents --mode=0755 ${KEYRINGS_DIR} fi -ROCM_GPG_KEYRING=/etc/apt/keyrings/rocm.gpg +ROCM_GPG_KEYRING=${KEYRINGS_DIR}/rocm.gpg -echo "Adding ROCm Repository" +echo "Adding ROCm Repository:" wget ${GPG_URL} -O - | \ gpg --dearmor | $USE_SUDO tee ${ROCM_GPG_KEYRING} > /dev/null echo "deb [arch=amd64 signed-by=${ROCM_GPG_KEYRING}] ${REPO_URL} jammy main" \ | $USE_SUDO tee /etc/apt/sources.list.d/rocm.list echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' \ | $USE_SUDO tee /etc/apt/preferences.d/rocm-pin-600 +echo "Adding ROCm Repository completed." $USE_SUDO apt-get update $USE_SUDO apt-get -y install ${ROCM_PACKAGES} From cf839b72fa6612b4c87870de54a4f0a9735bd177 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 29 Jul 2025 08:41:12 +0000 Subject: [PATCH 217/281] test --- .github/workflows/cmake-multi-platform.yml | 6 ++---- scripts/actions/install-rocm-ubuntu.sh | 5 ++++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index 2d77b5a..6bdd52b 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -26,8 +26,6 @@ jobs: - os: "ubuntu-22.04-arm" gpu-compiler: "rocm" include: - - gpu-compiler: "cuda" - cuda-version: "12.9" - gpu-compiler: "cuda" cuda-version: "12.4" - gpu-compiler: "rocm" @@ -40,7 +38,7 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Run CUDA bash shell Ubuntu/Debian + - name: Run ${{ matrix.gpu-compiler }} bash shell Ubuntu/Debian if: (startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'debian')) env: temp: ${{ runner.temp }} @@ -51,7 +49,7 @@ jobs: ${{github.workspace}}/scripts/actions/install-${{ matrix.gpu-compiler }}-ubuntu.sh shell: bash - - name: Run CUDA bash shell Windows + - name: Run ${{ matrix.gpu-compiler }} bash shell Windows if: runner.os == 'Windows' env: temp: ${{ runner.temp }} diff --git a/scripts/actions/install-rocm-ubuntu.sh b/scripts/actions/install-rocm-ubuntu.sh index 610dcb9..11e4426 100644 --- a/scripts/actions/install-rocm-ubuntu.sh +++ b/scripts/actions/install-rocm-ubuntu.sh @@ -29,6 +29,9 @@ LINUX_ID="${LINUX_ID,,}" LINUX_VERSION=$(lsb_release -sr) LINUX_VERSION="${LINUX_VERSION//.}" +LINUX_CODENAME=$(lsb_release -cs) +LINUX_CODENAME="${LINUX_CODENAME,,}" + LOCATION_TEMP=${temp} ROCM_VERSION_MAJOR_MINOR=${rocm} @@ -88,7 +91,7 @@ ROCM_GPG_KEYRING=${KEYRINGS_DIR}/rocm.gpg echo "Adding ROCm Repository:" wget ${GPG_URL} -O - | \ gpg --dearmor | $USE_SUDO tee ${ROCM_GPG_KEYRING} > /dev/null -echo "deb [arch=amd64 signed-by=${ROCM_GPG_KEYRING}] ${REPO_URL} jammy main" \ +echo "deb [arch=amd64 signed-by=${ROCM_GPG_KEYRING}] ${REPO_URL} ${LINUX_CODENAME} main" \ | $USE_SUDO tee /etc/apt/sources.list.d/rocm.list echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' \ | $USE_SUDO tee /etc/apt/preferences.d/rocm-pin-600 From 36f65e1a9aefd11eec2735b3d99d720f5fa671ff Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 29 Jul 2025 09:12:12 +0000 Subject: [PATCH 218/281] test --- scripts/actions/install-rocm-ubuntu.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/actions/install-rocm-ubuntu.sh b/scripts/actions/install-rocm-ubuntu.sh index 11e4426..3204253 100644 --- a/scripts/actions/install-rocm-ubuntu.sh +++ b/scripts/actions/install-rocm-ubuntu.sh @@ -90,7 +90,7 @@ ROCM_GPG_KEYRING=${KEYRINGS_DIR}/rocm.gpg echo "Adding ROCm Repository:" wget ${GPG_URL} -O - | \ - gpg --dearmor | $USE_SUDO tee ${ROCM_GPG_KEYRING} > /dev/null + gpg --dearmor | $USE_SUDO tee ${ROCM_GPG_KEYRING} echo "deb [arch=amd64 signed-by=${ROCM_GPG_KEYRING}] ${REPO_URL} ${LINUX_CODENAME} main" \ | $USE_SUDO tee /etc/apt/sources.list.d/rocm.list echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' \ From e7b38035ff701865fbacacafe3dfdc92f39afbd0 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 29 Jul 2025 09:12:51 +0000 Subject: [PATCH 219/281] test --- scripts/actions/install-rocm-ubuntu.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/actions/install-rocm-ubuntu.sh b/scripts/actions/install-rocm-ubuntu.sh index 3204253..11e4426 100644 --- a/scripts/actions/install-rocm-ubuntu.sh +++ b/scripts/actions/install-rocm-ubuntu.sh @@ -90,7 +90,7 @@ ROCM_GPG_KEYRING=${KEYRINGS_DIR}/rocm.gpg echo "Adding ROCm Repository:" wget ${GPG_URL} -O - | \ - gpg --dearmor | $USE_SUDO tee ${ROCM_GPG_KEYRING} + gpg --dearmor | $USE_SUDO tee ${ROCM_GPG_KEYRING} > /dev/null echo "deb [arch=amd64 signed-by=${ROCM_GPG_KEYRING}] ${REPO_URL} ${LINUX_CODENAME} main" \ | $USE_SUDO tee /etc/apt/sources.list.d/rocm.list echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' \ From ffc2bb110cb40dbec02e8a2a0c9b2e5b94754eb0 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 29 Jul 2025 23:35:57 +0700 Subject: [PATCH 220/281] Update install-rocm-ubuntu.sh --- scripts/actions/install-rocm-ubuntu.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/actions/install-rocm-ubuntu.sh b/scripts/actions/install-rocm-ubuntu.sh index 11e4426..7039c8d 100644 --- a/scripts/actions/install-rocm-ubuntu.sh +++ b/scripts/actions/install-rocm-ubuntu.sh @@ -1,5 +1,5 @@ ROCM_PACKAGES_IN=( - rocm-hip-runtime-devel + rocm-hip-runtime-dev ) function version_ge() { From fa9dd2bc6e41a217764083778cdecadc12ae3c92 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 30 Jul 2025 00:15:48 +0700 Subject: [PATCH 221/281] Update ta_core_config.cmake --- cmake/ta_core_config.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index 66be496..ce01b10 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -23,7 +23,7 @@ if(CMAKE_HIP_COMPILER) set_source_files_properties(${TensorArray_src_hip} PROPERTIES LANGUAGE HIP) endforeach() - target_link_libraries(tensorarray_core PRIVATE hip::host hip::device) + target_link_libraries(tensorarray_core PRIVATE hip::host) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") From e88c07b6d1d7f128a9600a254e11c0a3a95521f9 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 30 Jul 2025 12:54:20 +0000 Subject: [PATCH 222/281] test --- .github/workflows/cmake-multi-platform.yml | 37 ++++++++++++---------- cmake/ta_core_config.cmake | 24 ++------------ 2 files changed, 23 insertions(+), 38 deletions(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index 6bdd52b..cb55785 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -21,41 +21,46 @@ jobs: fail-fast: false matrix: os: [ "ubuntu-22.04", "ubuntu-22.04-arm" ] - gpu-compiler: [ "cuda", "rocm" ] - exclude: - - os: "ubuntu-22.04-arm" - gpu-compiler: "rocm" + cuda-version: [ "12.9", "12.4", "11.8" ] + rocm-enabled: [ false ] include: - - gpu-compiler: "cuda" - cuda-version: "12.4" - - gpu-compiler: "rocm" + - os: "ubuntu-22.04" + cuda-version: "11.8" + rocm-enabled: true rocm-version: "6.4.2" - os: "windows-latest" - gpu-compiler: "cuda" cuda-version: "12.4.1" runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 + + - name: Run ROCm bash shell Ubuntu/Debian + if: (startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'debian')) && ${{ matrix.rocm-enabled }} + env: + temp: ${{ runner.temp }} + rocm: ${{ matrix.rocm-version }} + run: | + chmod +x ${{github.workspace}}/scripts/actions/install-rocm-ubuntu.sh + ${{github.workspace}}/scripts/actions/install-rocm-ubuntu.sh + shell: bash - - name: Run ${{ matrix.gpu-compiler }} bash shell Ubuntu/Debian - if: (startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'debian')) + - name: Run CUDA bash shell Ubuntu/Debian + if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'debian') env: temp: ${{ runner.temp }} cuda: ${{ matrix.cuda-version }} - rocm: ${{ matrix.rocm-version }} run: | - chmod +x ${{github.workspace}}/scripts/actions/install-${{ matrix.gpu-compiler }}-ubuntu.sh - ${{github.workspace}}/scripts/actions/install-${{ matrix.gpu-compiler }}-ubuntu.sh + chmod +x ${{github.workspace}}/scripts/actions/install-cuda-ubuntu.sh + ${{github.workspace}}/scripts/actions/install-cuda-ubuntu.sh shell: bash - - name: Run ${{ matrix.gpu-compiler }} bash shell Windows + - name: Run CUDA bash shell Windows if: runner.os == 'Windows' env: temp: ${{ runner.temp }} cuda: ${{ matrix.cuda-version }} - rocm: ${{ matrix.rocm-version }} - run: scripts/actions/install-${{ matrix.gpu-compiler }}-windows.ps1 + run: scripts/actions/install-cuda-windows.ps1 shell: pwsh - name: Configure CMake diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index ce01b10..454a95b 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -1,33 +1,13 @@ include(CheckLanguage) - -include(CheckLanguage) -check_language(HIP) check_language(CUDA) file(GLOB TensorArray_Core_cc "${PROJECT_SOURCE_DIR}/src/tensor-array/core/*.cc") -if (CMAKE_CUDA_COMPILER OR CMAKE_HIP_COMPILER) +if (CMAKE_CUDA_COMPILER) file(GLOB TensorArray_Core_cu "${PROJECT_SOURCE_DIR}/src/tensor-array/core/*.cu") endif() -if(CMAKE_HIP_COMPILER) - enable_language(HIP) - find_package(hip REQUIRED) - - add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) - set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD 17) - set_property(TARGET tensorarray_core PROPERTY HIP_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_core PROPERTY HIP_EXTENSIONS OFF) - - foreach(TensorArray_src_hip ${TensorArray_src_cu}) - set_source_files_properties(${TensorArray_src_hip} PROPERTIES LANGUAGE HIP) - endforeach() - - target_link_libraries(tensorarray_core PRIVATE hip::host) - # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) - # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) - # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") -elseif(CMAKE_CUDA_COMPILER) +if(CMAKE_CUDA_COMPILER) enable_language(CUDA) find_package(CUDAToolkit REQUIRED) add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) From bde5d5739c21b77b9995b20dcbf9eceb4f5075c6 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 30 Jul 2025 12:56:34 +0000 Subject: [PATCH 223/281] test --- .github/workflows/cmake-multi-platform.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index cb55785..0d9a38d 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -36,7 +36,7 @@ jobs: - uses: actions/checkout@v3 - name: Run ROCm bash shell Ubuntu/Debian - if: (startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'debian')) && ${{ matrix.rocm-enabled }} + if: (startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'debian')) && matrix.rocm-enabled env: temp: ${{ runner.temp }} rocm: ${{ matrix.rocm-version }} From 8d9008a4af5b1ccc7a64caecf029472430b6af63 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 31 Jul 2025 04:37:45 +0000 Subject: [PATCH 224/281] test --- src/tensor-array/core/data_type_wrapper.cc | 12 +- src/tensor-array/core/tensor.cc | 28 +- src/tensor-array/core/tensor.hh | 8 +- src/tensor-array/core/tensor_cast.cu | 28 +- src/tensor-array/core/tensor_convolution.cu | 24 +- .../core/{tensor.cu => tensor_math_func.cu} | 378 +---------- src/tensor-array/core/tensor_math_op.cu | 601 ++++++++++++++++++ src/tensor-array/core/tensor_reduce.cu | 12 +- src/tensor-array/core/tensorbase.cc | 8 +- 9 files changed, 657 insertions(+), 442 deletions(-) rename src/tensor-array/core/{tensor.cu => tensor_math_func.cu} (62%) create mode 100644 src/tensor-array/core/tensor_math_op.cu diff --git a/src/tensor-array/core/data_type_wrapper.cc b/src/tensor-array/core/data_type_wrapper.cc index 95d7cbe..478f0c6 100644 --- a/src/tensor-array/core/data_type_wrapper.cc +++ b/src/tensor-array/core/data_type_wrapper.cc @@ -32,12 +32,12 @@ limitations under the License. typedef __nv_bfloat16 bfloat16; -#define USING_DATA_TYPE_NVIDIA_FLOAT_8 (__nv_fp8_e5m2)(__nv_fp8_e4m3) -#define USING_DATA_TYPE_NVIDIA_FLOAT (half)(bfloat16) -#define USING_DATA_TYPE_FLOAT (float)(double) -#define USING_DATA_TYPE_SINT (int8_t)(int16_t)(int32_t)(int64_t) -#define USING_DATA_TYPE_UINT (uint8_t)(uint16_t)(uint32_t)(uint64_t) -#define USING_DATA_TYPE USING_DATA_TYPE_SINT USING_DATA_TYPE_UINT USING_DATA_TYPE_FLOAT USING_DATA_TYPE_NVIDIA_FLOAT +#define USING_DATA_TYPE_NVIDIA_FLOAT_8() (__nv_fp8_e5m2)(__nv_fp8_e4m3) +#define USING_DATA_TYPE_NVIDIA_FLOAT() (half)(bfloat16) +#define USING_DATA_TYPE_FLOAT() (float)(double) +#define USING_DATA_TYPE_SINT() (int8_t)(int16_t)(int32_t)(int64_t) +#define USING_DATA_TYPE_UINT() (uint8_t)(uint16_t)(uint32_t)(uint64_t) +#define USING_DATA_TYPE USING_DATA_TYPE_SINT() USING_DATA_TYPE_UINT() USING_DATA_TYPE_FLOAT() USING_DATA_TYPE_NVIDIA_FLOAT() namespace tensor_array { diff --git a/src/tensor-array/core/tensor.cc b/src/tensor-array/core/tensor.cc index 531ac7b..7d10a75 100644 --- a/src/tensor-array/core/tensor.cc +++ b/src/tensor-array/core/tensor.cc @@ -27,10 +27,10 @@ limitations under the License. #include #include "data_type_wrapper.hh" -#define USING_DATA_TYPE_FLOAT (float)(double) -#define USING_DATA_TYPE_SINT (int8_t)(int16_t)(int32_t)(int64_t) -#define USING_DATA_TYPE_UINT (uint8_t)(uint16_t)(uint32_t)(uint64_t) -#define USING_DATA_TYPE USING_DATA_TYPE_SINT USING_DATA_TYPE_UINT USING_DATA_TYPE_FLOAT +#define USING_DATA_TYPE_FLOAT() (float)(double) +#define USING_DATA_TYPE_SINT() (int8_t)(int16_t)(int32_t)(int64_t) +#define USING_DATA_TYPE_UINT() (uint8_t)(uint16_t)(uint32_t)(uint64_t) +#define USING_DATA_TYPE USING_DATA_TYPE_SINT() USING_DATA_TYPE_UINT() USING_DATA_TYPE_FLOAT() #define LOOP(seq) END(A seq) #define BODY(x) ADD_CODE(x) @@ -744,26 +744,6 @@ temp_check_data_type = TEMP(temp.first) < TEMP(temp_tensor); return divide(a, b); } - Tensor operator!=(const Tensor& a, const Tensor& b) - { - return ab; - } - - Tensor operator==(const Tensor& a, const Tensor& b) - { - return !(a != b); - } - - Tensor operator>=(const Tensor& a, const Tensor& b) - { - return !(a < b); - } - - Tensor operator<=(const Tensor& a, const Tensor& b) - { - return !(a > b); - } - Tensor Tensor::exp() const { return this->exp(true); diff --git a/src/tensor-array/core/tensor.hh b/src/tensor-array/core/tensor.hh index d2b491f..a05ed91 100644 --- a/src/tensor-array/core/tensor.hh +++ b/src/tensor-array/core/tensor.hh @@ -37,10 +37,10 @@ limitations under the License. #define TENSOR_ARRAY_IMPORT_API #endif -#define USING_DATA_TYPE_FLOAT (float)(double) -#define USING_DATA_TYPE_SINT (int8_t)(int16_t)(int32_t)(int64_t) -#define USING_DATA_TYPE_UINT (uint8_t)(uint16_t)(uint32_t)(uint64_t) -#define USING_DATA_TYPE USING_DATA_TYPE_SINT USING_DATA_TYPE_UINT USING_DATA_TYPE_FLOAT +#define USING_DATA_TYPE_FLOAT() (float)(double) +#define USING_DATA_TYPE_SINT() (int8_t)(int16_t)(int32_t)(int64_t) +#define USING_DATA_TYPE_UINT() (uint8_t)(uint16_t)(uint32_t)(uint64_t) +#define USING_DATA_TYPE USING_DATA_TYPE_SINT() USING_DATA_TYPE_UINT() USING_DATA_TYPE_FLOAT() #define LOOP(seq) END(A seq) #define BODY(x) ADD_CODE(x) diff --git a/src/tensor-array/core/tensor_cast.cu b/src/tensor-array/core/tensor_cast.cu index da01ab5..e301cf0 100644 --- a/src/tensor-array/core/tensor_cast.cu +++ b/src/tensor-array/core/tensor_cast.cu @@ -35,24 +35,24 @@ limitations under the License. #define END(...) END_(__VA_ARGS__) #define END_(...) __VA_ARGS__##_END -#define USING_DATA_TYPE_NVIDIA_FLOAT_8 (__nv_fp8_e4m3)(__nv_fp8_e5m2) -#define USING_DATA_TYPE_NVIDIA_FLOAT (__half)(__nv_bfloat16) -#define USING_DATA_TYPE_FLOAT (float)(double) -#define USING_DATA_TYPE_SINT (int16_t)(int32_t)(int64_t) -#define USING_DATA_TYPE_UINT (uint16_t)(uint32_t)(uint64_t) +#define USING_DATA_TYPE_NVIDIA_FLOAT_8() (__nv_fp8_e4m3)(__nv_fp8_e5m2) +#define USING_DATA_TYPE_NVIDIA_FLOAT() (__half)(__nv_bfloat16) +#define USING_DATA_TYPE_FLOAT() (float)(double) +#define USING_DATA_TYPE_SINT() (int16_t)(int32_t)(int64_t) +#define USING_DATA_TYPE_UINT() (uint16_t)(uint32_t)(uint64_t) -#define USING_DATA_TYPE_CAST_FROM \ +#define USING_DATA_TYPE_CAST_FROM() \ (__nv_fp8_e4m3) \ -USING_DATA_TYPE_SINT \ -USING_DATA_TYPE_UINT \ -USING_DATA_TYPE_FLOAT \ -USING_DATA_TYPE_NVIDIA_FLOAT +USING_DATA_TYPE_SINT() \ +USING_DATA_TYPE_UINT() \ +USING_DATA_TYPE_FLOAT() \ +USING_DATA_TYPE_NVIDIA_FLOAT() -#define USING_DATA_TYPE_CAST_TO \ +#define USING_DATA_TYPE_CAST_TO() \ (bool) \ (int8_t) \ (uint8_t) \ -USING_DATA_TYPE_CAST_FROM +USING_DATA_TYPE_CAST_FROM() namespace tensor_array { @@ -88,7 +88,7 @@ namespace tensor_array #define ADD_CODE(TYPE) \ if(this->get_buffer().type() == typeid(TYPE)) \ type_casting<<>>(out_ptr, static_cast(base_of_this.data()), total_size); - LOOP(USING_DATA_TYPE_CAST_FROM); + LOOP(USING_DATA_TYPE_CAST_FROM()); #undef ADD_CODE cuda_status = cudaDeviceSynchronize(); cuda_status = cudaGetLastError(); @@ -111,7 +111,7 @@ type_casting<<>>(out_ptr, static_cast(base_of_ #define ADD_CODE(TYPE) \ if(dtype == typeid(TYPE)) \ return this->cast(is_derive); - LOOP(USING_DATA_TYPE_CAST_TO); + LOOP(USING_DATA_TYPE_CAST_TO()); #undef ADD_CODE throw std::exception(); } diff --git a/src/tensor-array/core/tensor_convolution.cu b/src/tensor-array/core/tensor_convolution.cu index 38e1489..6b3217d 100644 --- a/src/tensor-array/core/tensor_convolution.cu +++ b/src/tensor-array/core/tensor_convolution.cu @@ -28,29 +28,29 @@ limitations under the License. #endif #if __CUDA_ARCH__ >= 800 -#define USE_BF16 (__nv_bfloat16) +#define USE_BF16() (__nv_bfloat16) #else -#define USE_BF16 +#define USE_BF16() #endif #if __CUDA_ARCH__ >= 700 -#define USE_FP16 (__half) +#define USE_FP16() (__half) #else -#define USE_FP16 +#define USE_FP16() #endif #if __CUDA_ARCH__ >= 600 -#define USE_FP64 (double) +#define USE_FP64() (double) #else -#define USE_FP64 +#define USE_FP64() #endif -#define USING_DATA_TYPE_NVIDIA_FLOAT_8 (__nv_fp8_e5m2)(__nv_fp8_e4m3) -#define USING_DATA_TYPE_NVIDIA_FLOAT USE_FP16 USE_BF16 -#define USING_DATA_TYPE_FLOAT (float)USE_FP64 -#define USING_DATA_TYPE_SINT (int32_t) -#define USING_DATA_TYPE_UINT (uint32_t)(unsigned long long int) -#define USING_DATA_TYPE USING_DATA_TYPE_SINT USING_DATA_TYPE_UINT USING_DATA_TYPE_FLOAT USING_DATA_TYPE_NVIDIA_FLOAT +#define USING_DATA_TYPE_NVIDIA_FLOAT_8() (__nv_fp8_e5m2)(__nv_fp8_e4m3) +#define USING_DATA_TYPE_NVIDIA_FLOAT() USE_FP16() USE_BF16() +#define USING_DATA_TYPE_FLOAT() (float) USE_FP64() +#define USING_DATA_TYPE_SINT() (int32_t) +#define USING_DATA_TYPE_UINT() (uint32_t)(unsigned long long int) +#define USING_DATA_TYPE USING_DATA_TYPE_SINT() USING_DATA_TYPE_UINT() USING_DATA_TYPE_FLOAT() USING_DATA_TYPE_NVIDIA_FLOAT() #define LOOP(seq) END(A seq) #define BODY(x) ADD_CODE(x) diff --git a/src/tensor-array/core/tensor.cu b/src/tensor-array/core/tensor_math_func.cu similarity index 62% rename from src/tensor-array/core/tensor.cu rename to src/tensor-array/core/tensor_math_func.cu index 5587200..a46ce81 100644 --- a/src/tensor-array/core/tensor.cu +++ b/src/tensor-array/core/tensor_math_func.cu @@ -31,12 +31,12 @@ limitations under the License. #undef TENSOR_CONTENT #endif // !TENSOR_CONTENT -#define USING_DATA_TYPE_NVIDIA_FLOAT_8 (__nv_fp8_e5m2)(__nv_fp8_e4m3) -#define USING_DATA_TYPE_NVIDIA_FLOAT (__half)(__nv_bfloat16) -#define USING_DATA_TYPE_FLOAT (float)(double) -#define USING_DATA_TYPE_SINT (int8_t)(int16_t)(int32_t)(int64_t) -#define USING_DATA_TYPE_UINT (uint8_t)(uint16_t)(uint32_t)(uint64_t) -#define USING_DATA_TYPE USING_DATA_TYPE_SINT USING_DATA_TYPE_UINT USING_DATA_TYPE_FLOAT USING_DATA_TYPE_NVIDIA_FLOAT +#define USING_DATA_TYPE_NVIDIA_FLOAT_8() (__nv_fp8_e5m2)(__nv_fp8_e4m3) +#define USING_DATA_TYPE_NVIDIA_FLOAT() (__half)(__nv_bfloat16) +#define USING_DATA_TYPE_FLOAT() (float)(double) +#define USING_DATA_TYPE_SINT() (int8_t)(int16_t)(int32_t)(int64_t) +#define USING_DATA_TYPE_UINT() (uint8_t)(uint16_t)(uint32_t)(uint64_t) +#define USING_DATA_TYPE USING_DATA_TYPE_SINT() USING_DATA_TYPE_UINT() USING_DATA_TYPE_FLOAT() USING_DATA_TYPE_NVIDIA_FLOAT() #define LOOP(seq) END(A seq) #define BODY(x) ADD_CODE(x) @@ -70,30 +70,6 @@ namespace tensor_array value_arr[thread_x] = curand_uniform(&thisState); } - template - __global__ void sum_2_arr(T c[], const T a[], const T b[], unsigned int c_size) - { - unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; - if (thread_x < c_size) - c[thread_x] = a[thread_x] + b[thread_x]; - } - - template - __global__ void mul_2_arr(T c[], const T a[], const T b[], unsigned int c_size) - { - unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; - if (thread_x < c_size) - c[thread_x] = a[thread_x] * b[thread_x]; - } - - template - __global__ void div_2_arr(T c[], const T a[], const T b[], unsigned int c_size) - { - unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; - if (thread_x < c_size) - c[thread_x] = a[thread_x] / b[thread_x]; - } - __global__ void exp_arr(float value_out[], const float value_in[], unsigned int c_size) { unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; @@ -206,14 +182,6 @@ namespace tensor_array value_out[thread_x] = tanh(value_in[thread_x]); } - template - __global__ void sigmoid_arr(T value_out[], const T value_in[], unsigned int c_size) - { - unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; - if (thread_x < c_size) - value_out[thread_x] = T(1) / (T(1) + T(exp(double(-value_in[thread_x])))); - } - template __global__ void pow_arr(T value_out[], const T a[], const T b[], unsigned int c_size) { @@ -225,45 +193,6 @@ namespace tensor_array value_out[thread_x] = powf(a[thread_x], b[thread_x]); } - template - __global__ void arr_more_than(bool out_value[], const T in1_value[], const T in2_value[], unsigned int c_size) - { - unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; - if (thread_x < c_size) - out_value[thread_x] = in1_value[thread_x] > in2_value[thread_x]; - } - - template - __global__ void arr_less_than(bool out_value[], const T in1_value[], const T in2_value[], unsigned int c_size) - { - unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; - if (thread_x < c_size) - out_value[thread_x] = in1_value[thread_x] < in2_value[thread_x]; - } - - __global__ void arr_logical_and(bool out_value[], const bool in1_value[], const bool in2_value[], unsigned int c_size) - { - unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; - if (thread_x < c_size) - out_value[thread_x] = in1_value[thread_x] && in2_value[thread_x]; - } - - - __global__ void arr_logical_or(bool out_value[], const bool in1_value[], const bool in2_value[], unsigned int c_size) - { - unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; - if (thread_x < c_size) - out_value[thread_x] = in1_value[thread_x] || in2_value[thread_x]; - } - - - __global__ void arr_logical_not(bool out_value[], const bool in1_value[], unsigned int c_size) - { - unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; - if (thread_x < c_size) - out_value[thread_x] = !in1_value[thread_x]; - } - template __global__ void array_condition(T out_value[], unsigned int c_size, const bool bool_value[], const T true_value[], const T false_value[]) { @@ -388,190 +317,6 @@ return values0(list_dim, value); \ return other_buf; } - Tensor operator>(const Tensor& a, const Tensor& b) - { - assert(equal_dim_size(a.get_buffer(), b.get_buffer())); - cudaError cuda_status; - bool* c_ptr; - Device this_cuda{ CUDA }; - cuda_status = cudaGetDevice(&this_cuda.index); - cudaDeviceProp cu_dev_prop; - cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); - TensorBase base_a = a.get_buffer().change_device(this_cuda); - TensorBase base_b = b.get_buffer().change_device(this_cuda); - std::size_t c_size = std::max - ( - base_a.data_size() / get_sizeof_type(base_a.type()), - base_b.data_size() / get_sizeof_type(base_b.type()) - ); - cuda_status = cudaMalloc(&c_ptr, c_size * sizeof(bool)); - dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); - dim3 grid_dim(c_size / block_dim.x + (c_size % block_dim.x ? 1U : 0U)); -#define ADD_CODE(TYPE) \ -if(a.get_buffer().type() == typeid(TYPE) && b.get_buffer().type() == typeid(TYPE)) \ -arr_more_than<<>>(c_ptr, static_cast(base_a.data()), static_cast(base_b.data()), c_size); - LOOP(USING_DATA_TYPE); -#undef ADD_CODE - cuda_status = cudaDeviceSynchronize(); - cuda_status = cudaGetLastError(); - if (cuda_status != cudaSuccess) - { - std::printf("CUDA error: %s\n", cudaGetErrorString(cuda_status)); - } - TensorBase other_buf(typeid(bool), a.get_buffer().shape(), c_ptr, this_cuda); - cuda_status = cudaFree(c_ptr); - return other_buf; - } - - Tensor operator<(const Tensor& a, const Tensor& b) - { - assert(equal_dim_size(a.get_buffer(), b.get_buffer())); - cudaError cuda_status; - bool* c_ptr; - Device this_cuda{CUDA}; - cuda_status = cudaGetDevice(&this_cuda.index); - cudaDeviceProp cu_dev_prop; - cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); - TensorBase base_a = a.get_buffer().change_device(this_cuda); - TensorBase base_b = b.get_buffer().change_device(this_cuda); - std::size_t c_size = std::max - ( - a.get_buffer().data_size() / get_sizeof_type(a.get_buffer().type()), - b.get_buffer().data_size() / get_sizeof_type(b.get_buffer().type()) - ); - cuda_status = cudaMalloc(&c_ptr, c_size * sizeof(bool)); - dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); - dim3 grid_dim(c_size / block_dim.x + (c_size % block_dim.x ? 1U : 0U)); -#define ADD_CODE(TYPE) \ -if(a.get_buffer().type() == typeid(TYPE) && b.get_buffer().type() == typeid(TYPE)) \ -arr_less_than<<>>(c_ptr, static_cast(base_a.data()), static_cast(base_b.data()), c_size); - LOOP(USING_DATA_TYPE); -#undef ADD_CODE - cuda_status = cudaDeviceSynchronize(); - TensorBase other_buf(typeid(bool), a.get_buffer().shape(), c_ptr, this_cuda); - cuda_status = cudaFree(c_ptr); - return other_buf; - } - - Tensor operator&&(const Tensor& a, const Tensor& b) - { - assert( - equal_dim_size(a.get_buffer(), b.get_buffer()) - && a.get_buffer().type() == typeid(bool) - && b.get_buffer().type() == typeid(bool) - ); - cudaError cuda_status; - bool* c_ptr; - devices::Device this_cuda{ devices::CUDA }; - cuda_status = cudaGetDevice(&this_cuda.index); - cudaDeviceProp cu_dev_prop; - cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); - TensorBase base_a = a.get_buffer().change_device(this_cuda); - TensorBase base_b = b.get_buffer().change_device(this_cuda); - std::size_t c_size = std::max - ( - a.get_buffer().data_size() / get_sizeof_type(a.get_buffer().type()), - b.get_buffer().data_size() / get_sizeof_type(b.get_buffer().type()) - ); - cuda_status = cudaMalloc(&c_ptr, c_size); - dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); - dim3 grid_dim(c_size / block_dim.x + (c_size % block_dim.x ? 1U : 0U)); - arr_logical_and << > > (c_ptr, static_cast(base_a.data()), static_cast(base_b.data()), c_size); - cuda_status = cudaDeviceSynchronize(); - TensorBase other_buf(typeid(bool), a.get_buffer().shape(), c_ptr, this_cuda); - cuda_status = cudaFree(c_ptr); - return other_buf; - } - - Tensor operator||(const Tensor& a, const Tensor& b) - { - assert( - equal_dim_size(a.get_buffer(), b.get_buffer()) - && a.get_buffer().type() == typeid(bool) - && b.get_buffer().type() == typeid(bool) - ); - cudaError cuda_status; - bool* c_ptr; - devices::Device this_cuda{ devices::CUDA }; - cuda_status = cudaGetDevice(&this_cuda.index); - cudaDeviceProp cu_dev_prop; - cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); - TensorBase base_a = a.get_buffer().change_device(this_cuda); - TensorBase base_b = b.get_buffer().change_device(this_cuda); - std::size_t c_size = std::max - ( - a.get_buffer().data_size() / get_sizeof_type(a.get_buffer().type()), - b.get_buffer().data_size() / get_sizeof_type(b.get_buffer().type()) - ); - cuda_status = cudaMalloc(&c_ptr, c_size); - dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); - dim3 grid_dim(c_size / block_dim.x + (c_size % block_dim.x ? 1U : 0U)); - arr_logical_or << > > (c_ptr, static_cast(base_a.data()), static_cast(base_b.data()), c_size); - cuda_status = cudaDeviceSynchronize(); - TensorBase other_buf(typeid(bool), a.get_buffer().shape(), c_ptr, this_cuda); - cuda_status = cudaFree(c_ptr); - return other_buf; - } - - Tensor Tensor::operator!() - { - assert(this->get_buffer().type() == typeid(bool)); - cudaError cuda_status; - bool* out_ptr; - devices::Device this_cuda{ devices::CUDA }; - cuda_status = cudaGetDevice(&this_cuda.index); - cudaDeviceProp cu_dev_prop; - cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); - TensorBase base_of_this = this->get_buffer().change_device(this_cuda); - cuda_status = cudaMalloc(&out_ptr, this->get_buffer().data_size()); - dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); - dim3 grid_dim(this->get_buffer().data_size() / block_dim.x + 1U); - arr_logical_not << < grid_dim, block_dim >> > (out_ptr, static_cast(base_of_this.data()), this->get_buffer().data_size()); - cuda_status = cudaDeviceSynchronize(); - TensorBase other_buf(typeid(bool), this->get_buffer().shape(), out_ptr, this_cuda); - cuda_status = cudaFree(out_ptr); - return other_buf; - } - - Tensor multiply(const Tensor& a, const Tensor& b, bool is_derive, const DataBuffer&) - { - assert(equal_dim_size(a.get_buffer(), b.get_buffer())); - std::vector> temp; - if (is_derive) - { - temp.push_back(std::make_pair(a, Derivation(b.clone(), multiply))); - temp.push_back(std::make_pair(b, Derivation(a.clone(), multiply))); - } - cudaError cuda_status; - TensorBase other_buf; - void* c_ptr; - devices::Device this_cuda{ devices::CUDA }; - cuda_status = cudaGetDevice(&this_cuda.index); - cudaDeviceProp cu_dev_prop; - cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); - TensorBase base_a = a.get_buffer().change_device(this_cuda); - TensorBase base_b = b.get_buffer().change_device(this_cuda); - std::size_t c_size = std::max - ( - a.get_buffer().data_size() / get_sizeof_type(a.get_buffer().type()), - b.get_buffer().data_size() / get_sizeof_type(b.get_buffer().type()) - ); - cuda_status = cudaMalloc(&c_ptr, std::max(a.get_buffer().data_size(), b.get_buffer().data_size())); - dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); - dim3 grid_dim(c_size / block_dim.x + (c_size % block_dim.x ? 1U : 0U)); -#define ADD_CODE(TYPE) \ -if(a.get_buffer().type() == typeid(TYPE) && b.get_buffer().type() == typeid(TYPE)) \ -{ \ -mul_2_arr<<>>(static_cast(c_ptr), static_cast(base_a.data()), static_cast(base_b.data()), c_size); \ -cuda_status = cudaDeviceSynchronize(); \ -other_buf = TensorBase(typeid(TYPE), a.get_buffer().shape(), c_ptr, this_cuda); \ -} - LOOP(USING_DATA_TYPE); -#undef ADD_CODE - cuda_status = cudaFree(c_ptr); - return Tensor(std::move(other_buf), std::move(temp)); - } - Tensor condition(const Tensor& bool_value, const Tensor& true_value, const Tensor& false_value, bool is_derive) { assert( @@ -615,84 +360,6 @@ other_buf = TensorBase(typeid(TYPE), bool_value.get_buffer().shape(), ptr_out, t return Tensor(std::move(other_buf), std::move(temp)); } - Tensor add(const Tensor& a, const Tensor& b, bool is_derive) - { - assert(equal_dim_size(a.get_buffer(), b.get_buffer())); - std::vector> temp; - if (is_derive) - { - temp.push_back(std::make_pair(a, Derivation(values(a.get_buffer().shape(), 1).tensor_cast(a.get_buffer().type(), false), multiply))); - temp.push_back(std::make_pair(b, Derivation(values(b.get_buffer().shape(), 1).tensor_cast(b.get_buffer().type(), false), multiply))); - } - cudaError cuda_status; - TensorBase other_buf; - void* c_ptr; - devices::Device this_cuda{ devices::CUDA }; - cuda_status = cudaGetDevice(&this_cuda.index); - cudaDeviceProp cu_dev_prop; - cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); - TensorBase base_a = a.get_buffer().change_device(this_cuda); - TensorBase base_b = b.get_buffer().change_device(this_cuda); - std::size_t c_size = std::max - ( - a.get_buffer().data_size() / get_sizeof_type(a.get_buffer().type()), - b.get_buffer().data_size() / get_sizeof_type(b.get_buffer().type()) - ); - cuda_status = cudaMalloc(&c_ptr, std::max(a.get_buffer().data_size(), b.get_buffer().data_size())); - dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); - dim3 grid_dim(c_size / block_dim.x + (c_size % block_dim.x ? 1U : 0U)); -#define ADD_CODE(TYPE) \ -if(a.get_buffer().type() == typeid(TYPE) && b.get_buffer().type() == typeid(TYPE)) \ -{ \ -sum_2_arr<<>>(static_cast(c_ptr), static_cast(base_a.data()), static_cast(base_b.data()), c_size); \ -cuda_status = cudaDeviceSynchronize(); \ -other_buf = TensorBase(typeid(TYPE), a.get_buffer().shape(), c_ptr, this_cuda); \ -} - LOOP(USING_DATA_TYPE); -#undef ADD_CODE - cuda_status = cudaFree(c_ptr); - return Tensor(std::move(other_buf), std::move(temp)); - } - - Tensor divide(const Tensor& a, const Tensor& b, bool is_derive) - { - assert(equal_dim_size(a.get_buffer(), b.get_buffer())); - std::vector> temp; - if (is_derive) - { - temp.push_back(std::make_pair(a, Derivation(divide(values(b.get_buffer().shape(), 1).tensor_cast(b.get_buffer().type(), false), b, false), multiply))); - temp.push_back(std::make_pair(b, Derivation(divide(a, power(b, values(b.get_buffer().shape(), 2).tensor_cast(b.get_buffer().type(), false), false), false), multiply))); - } - cudaError cuda_status; - TensorBase other_buf; - void* c_ptr; - devices::Device this_cuda{ devices::CUDA }; - cuda_status = cudaGetDevice(&this_cuda.index); - cudaDeviceProp cu_dev_prop; - cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); - TensorBase base_a = a.get_buffer().change_device(this_cuda); - TensorBase base_b = b.get_buffer().change_device(this_cuda); - std::size_t c_size = std::max - ( - a.get_buffer().data_size() / get_sizeof_type(a.get_buffer().type()), - b.get_buffer().data_size() / get_sizeof_type(b.get_buffer().type()) - ); - cuda_status = cudaMalloc(&c_ptr, std::max(a.get_buffer().data_size(), b.get_buffer().data_size())); - dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); - dim3 grid_dim(c_size / block_dim.x + (c_size % block_dim.x ? 1U : 0U)); -#define ADD_CODE(TYPE) \ -if(a.get_buffer().type() == typeid(TYPE) && b.get_buffer().type() == typeid(TYPE)) \ -{ \ -div_2_arr<<>>(static_cast(c_ptr), static_cast(base_a.data()), static_cast(base_b.data()), c_size); \ -cuda_status = cudaDeviceSynchronize(); \ -other_buf = TensorBase(typeid(TYPE), a.get_buffer().shape(), c_ptr, this_cuda); \ -} - LOOP(USING_DATA_TYPE); -#undef ADD_CODE - cuda_status = cudaFree(c_ptr); - return Tensor(std::move(other_buf), std::move(temp)); - } - Tensor power(const Tensor& a, const Tensor& b, bool is_derive) { assert(equal_dim_size(a.get_buffer(), b.get_buffer())); @@ -956,39 +623,6 @@ tanh_arr<<>>(static_cast(out_ptr), static_cast> temp; - if (is_derive) - { - Tensor temp_ones = values(this->get_buffer().shape(), 1.f).tensor_cast(this->get_buffer().type(), false); - Tensor temp_sigmoid = this->sigmoid(false); - temp.push_back(std::make_pair(*this, Derivation(multiply(temp_sigmoid, add(temp_ones, -temp_sigmoid, false), false, DataBuffer()), multiply))); - } - cudaError cuda_status; - void* out_ptr; - devices::Device this_cuda{ devices::CUDA }; - cuda_status = cudaGetDevice(&this_cuda.index); - cudaDeviceProp cu_dev_prop; - cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); - TensorBase base_of_this = this->get_buffer().change_device(this_cuda); - cuda_status = cudaMalloc(&out_ptr, this->get_buffer().data_size()); - dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); - std::size_t out_size = this->get_buffer().data_size() / get_sizeof_type(this->get_buffer().type()); - dim3 grid_dim(out_size / block_dim.x + ((out_size % block_dim.x) ? 1U : 0U)); -#define ADD_CODE(TYPE) \ -if(this->get_buffer().type() == typeid(TYPE)) \ -sigmoid_arr<<>>(static_cast(out_ptr), static_cast(base_of_this.data()), out_size); - LOOP(USING_DATA_TYPE); -#undef ADD_CODE - cuda_status = cudaDeviceSynchronize(); - TensorBase other_buf(this->get_buffer().type(), this->get_buffer().shape(), out_ptr, this_cuda); - cuda_status = cudaFree(out_ptr); - return Tensor(std::move(other_buf), std::move(temp)); - } - - } } diff --git a/src/tensor-array/core/tensor_math_op.cu b/src/tensor-array/core/tensor_math_op.cu new file mode 100644 index 0000000..b3dbac0 --- /dev/null +++ b/src/tensor-array/core/tensor_math_op.cu @@ -0,0 +1,601 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef TENSOR_CONTENT +#define TENSOR_CONTENT +#include "tensor.hh" +#undef TENSOR_CONTENT +#endif // !TENSOR_CONTENT + +#define USING_DATA_TYPE_NVIDIA_FLOAT_8() (__nv_fp8_e5m2)(__nv_fp8_e4m3) +#define USING_DATA_TYPE_NVIDIA_FLOAT() (__half)(__nv_bfloat16) +#define USING_DATA_TYPE_FLOAT() (float)(double) +#define USING_DATA_TYPE_SINT() (int8_t)(int16_t)(int32_t)(int64_t) +#define USING_DATA_TYPE_UINT() (uint8_t)(uint16_t)(uint32_t)(uint64_t) +#if CUDART_VERSION >= 12020 +#define USING_DATA_TYPE \ +USING_DATA_TYPE_SINT() \ +USING_DATA_TYPE_UINT() \ +USING_DATA_TYPE_FLOAT() \ +USING_DATA_TYPE_NVIDIA_FLOAT() +#else +#define USING_DATA_TYPE \ +USING_DATA_TYPE_SINT() \ +USING_DATA_TYPE_UINT() \ +USING_DATA_TYPE_FLOAT() +#endif + +#define LOOP(seq) END(A seq) +#define BODY(x) ADD_CODE(x) +#define A(x) BODY(x) B +#define B(x) BODY(x) A +#define A_END +#define B_END +#define END(...) END_(__VA_ARGS__) +#define END_(...) __VA_ARGS__##_END + +namespace tensor_array +{ + namespace value + { + using namespace devices; + + template + __global__ void sum_2_arr(T c[], const T a[], const T b[], unsigned int c_size) + { + unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_x < c_size) + c[thread_x] = a[thread_x] + b[thread_x]; + } + + template + __global__ void mul_2_arr(T c[], const T a[], const T b[], unsigned int c_size) + { + unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_x < c_size) + c[thread_x] = a[thread_x] * b[thread_x]; + } + + template + __global__ void div_2_arr(T c[], const T a[], const T b[], unsigned int c_size) + { + unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_x < c_size) + c[thread_x] = a[thread_x] / b[thread_x]; + } + + template + __global__ void arr_greater_than(bool out_value[], const T in1_value[], const T in2_value[], unsigned int c_size) + { + unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_x < c_size) + out_value[thread_x] = in1_value[thread_x] > in2_value[thread_x]; + } + + template + __global__ void arr_greater_equal(bool out_value[], const T in1_value[], const T in2_value[], unsigned int c_size) + { + unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_x < c_size) + out_value[thread_x] = in1_value[thread_x] >= in2_value[thread_x]; + } + + template + __global__ void arr_less_than(bool out_value[], const T in1_value[], const T in2_value[], unsigned int c_size) + { + unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_x < c_size) + out_value[thread_x] = in1_value[thread_x] < in2_value[thread_x]; + } + + template + __global__ void arr_less_equal(bool out_value[], const T in1_value[], const T in2_value[], unsigned int c_size) + { + unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_x < c_size) + out_value[thread_x] = in1_value[thread_x] <= in2_value[thread_x]; + } + + template + __global__ void arr_equal_equal(bool out_value[], const T in1_value[], const T in2_value[], unsigned int c_size) + { + unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_x < c_size) + out_value[thread_x] = in1_value[thread_x] == in2_value[thread_x]; + } + + template + __global__ void arr_not_equal(bool out_value[], const T in1_value[], const T in2_value[], unsigned int c_size) + { + unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_x < c_size) + out_value[thread_x] = in1_value[thread_x] != in2_value[thread_x]; + } + + __global__ void arr_logical_and(bool out_value[], const bool in1_value[], const bool in2_value[], unsigned int c_size) + { + unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_x < c_size) + out_value[thread_x] = in1_value[thread_x] && in2_value[thread_x]; + } + + + __global__ void arr_logical_or(bool out_value[], const bool in1_value[], const bool in2_value[], unsigned int c_size) + { + unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_x < c_size) + out_value[thread_x] = in1_value[thread_x] || in2_value[thread_x]; + } + + + __global__ void arr_logical_not(bool out_value[], const bool in1_value[], unsigned int c_size) + { + unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_x < c_size) + out_value[thread_x] = !in1_value[thread_x]; + } + + template + __global__ void sigmoid_arr(T value_out[], const T value_in[], unsigned int c_size) + { + unsigned int thread_x = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_x < c_size) + value_out[thread_x] = T(1) / (T(1) + T(exp(double(-value_in[thread_x])))); + } + + bool equal_dim_size(const TensorBase& a, const TensorBase& b); + + Tensor operator>(const Tensor& a, const Tensor& b) + { + assert(equal_dim_size(a.get_buffer(), b.get_buffer())); + cudaError cuda_status; + bool* c_ptr; + Device this_cuda{ CUDA }; + cuda_status = cudaGetDevice(&this_cuda.index); + cudaDeviceProp cu_dev_prop; + cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); + TensorBase base_a = a.get_buffer().change_device(this_cuda); + TensorBase base_b = b.get_buffer().change_device(this_cuda); + std::size_t c_size = std::max + ( + base_a.data_size() / get_sizeof_type(base_a.type()), + base_b.data_size() / get_sizeof_type(base_b.type()) + ); + cuda_status = cudaMalloc(&c_ptr, c_size * sizeof(bool)); + dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); + dim3 grid_dim(c_size / block_dim.x + (c_size % block_dim.x ? 1U : 0U)); +#define ADD_CODE(TYPE) \ +if(a.get_buffer().type() == typeid(TYPE) && b.get_buffer().type() == typeid(TYPE)) \ +arr_greater_than<<>>(c_ptr, static_cast(base_a.data()), static_cast(base_b.data()), c_size); + LOOP(USING_DATA_TYPE); +#undef ADD_CODE + cuda_status = cudaDeviceSynchronize(); + cuda_status = cudaGetLastError(); + if (cuda_status != cudaSuccess) + { + std::printf("CUDA error: %s\n", cudaGetErrorString(cuda_status)); + } + TensorBase other_buf(typeid(bool), a.get_buffer().shape(), c_ptr, this_cuda); + cuda_status = cudaFree(c_ptr); + return other_buf; + } + + Tensor operator>=(const Tensor& a, const Tensor& b) + { + assert(equal_dim_size(a.get_buffer(), b.get_buffer())); + cudaError cuda_status; + bool* c_ptr; + Device this_cuda{ CUDA }; + cuda_status = cudaGetDevice(&this_cuda.index); + cudaDeviceProp cu_dev_prop; + cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); + TensorBase base_a = a.get_buffer().change_device(this_cuda); + TensorBase base_b = b.get_buffer().change_device(this_cuda); + std::size_t c_size = std::max + ( + base_a.data_size() / get_sizeof_type(base_a.type()), + base_b.data_size() / get_sizeof_type(base_b.type()) + ); + cuda_status = cudaMalloc(&c_ptr, c_size * sizeof(bool)); + dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); + dim3 grid_dim(c_size / block_dim.x + (c_size % block_dim.x ? 1U : 0U)); +#define ADD_CODE(TYPE) \ +if(a.get_buffer().type() == typeid(TYPE) && b.get_buffer().type() == typeid(TYPE)) \ +arr_greater_equal<<>>(c_ptr, static_cast(base_a.data()), static_cast(base_b.data()), c_size); + LOOP(USING_DATA_TYPE); +#undef ADD_CODE + cuda_status = cudaDeviceSynchronize(); + cuda_status = cudaGetLastError(); + if (cuda_status != cudaSuccess) + { + std::printf("CUDA error: %s\n", cudaGetErrorString(cuda_status)); + } + TensorBase other_buf(typeid(bool), a.get_buffer().shape(), c_ptr, this_cuda); + cuda_status = cudaFree(c_ptr); + return other_buf; + } + + Tensor operator<(const Tensor& a, const Tensor& b) + { + assert(equal_dim_size(a.get_buffer(), b.get_buffer())); + cudaError cuda_status; + bool* c_ptr; + Device this_cuda{CUDA}; + cuda_status = cudaGetDevice(&this_cuda.index); + cudaDeviceProp cu_dev_prop; + cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); + TensorBase base_a = a.get_buffer().change_device(this_cuda); + TensorBase base_b = b.get_buffer().change_device(this_cuda); + std::size_t c_size = std::max + ( + a.get_buffer().data_size() / get_sizeof_type(a.get_buffer().type()), + b.get_buffer().data_size() / get_sizeof_type(b.get_buffer().type()) + ); + cuda_status = cudaMalloc(&c_ptr, c_size * sizeof(bool)); + dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); + dim3 grid_dim(c_size / block_dim.x + (c_size % block_dim.x ? 1U : 0U)); +#define ADD_CODE(TYPE) \ +if(a.get_buffer().type() == typeid(TYPE) && b.get_buffer().type() == typeid(TYPE)) \ +arr_less_than<<>>(c_ptr, static_cast(base_a.data()), static_cast(base_b.data()), c_size); + LOOP(USING_DATA_TYPE); +#undef ADD_CODE + cuda_status = cudaDeviceSynchronize(); + TensorBase other_buf(typeid(bool), a.get_buffer().shape(), c_ptr, this_cuda); + cuda_status = cudaFree(c_ptr); + return other_buf; + } + + Tensor operator<=(const Tensor& a, const Tensor& b) + { + assert(equal_dim_size(a.get_buffer(), b.get_buffer())); + cudaError cuda_status; + bool* c_ptr; + Device this_cuda{CUDA}; + cuda_status = cudaGetDevice(&this_cuda.index); + cudaDeviceProp cu_dev_prop; + cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); + TensorBase base_a = a.get_buffer().change_device(this_cuda); + TensorBase base_b = b.get_buffer().change_device(this_cuda); + std::size_t c_size = std::max + ( + a.get_buffer().data_size() / get_sizeof_type(a.get_buffer().type()), + b.get_buffer().data_size() / get_sizeof_type(b.get_buffer().type()) + ); + cuda_status = cudaMalloc(&c_ptr, c_size * sizeof(bool)); + dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); + dim3 grid_dim(c_size / block_dim.x + (c_size % block_dim.x ? 1U : 0U)); +#define ADD_CODE(TYPE) \ +if(a.get_buffer().type() == typeid(TYPE) && b.get_buffer().type() == typeid(TYPE)) \ +arr_less_equal<<>>(c_ptr, static_cast(base_a.data()), static_cast(base_b.data()), c_size); + LOOP(USING_DATA_TYPE); +#undef ADD_CODE + cuda_status = cudaDeviceSynchronize(); + TensorBase other_buf(typeid(bool), a.get_buffer().shape(), c_ptr, this_cuda); + cuda_status = cudaFree(c_ptr); + return other_buf; + } + + Tensor operator==(const Tensor& a, const Tensor& b) + { + assert(equal_dim_size(a.get_buffer(), b.get_buffer())); + cudaError cuda_status; + bool* c_ptr; + Device this_cuda{CUDA}; + cuda_status = cudaGetDevice(&this_cuda.index); + cudaDeviceProp cu_dev_prop; + cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); + TensorBase base_a = a.get_buffer().change_device(this_cuda); + TensorBase base_b = b.get_buffer().change_device(this_cuda); + std::size_t c_size = std::max + ( + a.get_buffer().data_size() / get_sizeof_type(a.get_buffer().type()), + b.get_buffer().data_size() / get_sizeof_type(b.get_buffer().type()) + ); + cuda_status = cudaMalloc(&c_ptr, c_size * sizeof(bool)); + dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); + dim3 grid_dim(c_size / block_dim.x + (c_size % block_dim.x ? 1U : 0U)); +#define ADD_CODE(TYPE) \ +if(a.get_buffer().type() == typeid(TYPE) && b.get_buffer().type() == typeid(TYPE)) \ +arr_equal_equal<<>>(c_ptr, static_cast(base_a.data()), static_cast(base_b.data()), c_size); + LOOP(USING_DATA_TYPE); +#undef ADD_CODE + cuda_status = cudaDeviceSynchronize(); + TensorBase other_buf(typeid(bool), a.get_buffer().shape(), c_ptr, this_cuda); + cuda_status = cudaFree(c_ptr); + return other_buf; + } + + Tensor operator!=(const Tensor& a, const Tensor& b) + { + assert(equal_dim_size(a.get_buffer(), b.get_buffer())); + cudaError cuda_status; + bool* c_ptr; + Device this_cuda{CUDA}; + cuda_status = cudaGetDevice(&this_cuda.index); + cudaDeviceProp cu_dev_prop; + cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); + TensorBase base_a = a.get_buffer().change_device(this_cuda); + TensorBase base_b = b.get_buffer().change_device(this_cuda); + std::size_t c_size = std::max + ( + a.get_buffer().data_size() / get_sizeof_type(a.get_buffer().type()), + b.get_buffer().data_size() / get_sizeof_type(b.get_buffer().type()) + ); + cuda_status = cudaMalloc(&c_ptr, c_size * sizeof(bool)); + dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); + dim3 grid_dim(c_size / block_dim.x + (c_size % block_dim.x ? 1U : 0U)); +#define ADD_CODE(TYPE) \ +if(a.get_buffer().type() == typeid(TYPE) && b.get_buffer().type() == typeid(TYPE)) \ +arr_not_equal<<>>(c_ptr, static_cast(base_a.data()), static_cast(base_b.data()), c_size); + LOOP(USING_DATA_TYPE); +#undef ADD_CODE + cuda_status = cudaDeviceSynchronize(); + TensorBase other_buf(typeid(bool), a.get_buffer().shape(), c_ptr, this_cuda); + cuda_status = cudaFree(c_ptr); + return other_buf; + } + + Tensor operator&&(const Tensor& a, const Tensor& b) + { + assert( + equal_dim_size(a.get_buffer(), b.get_buffer()) + && a.get_buffer().type() == typeid(bool) + && b.get_buffer().type() == typeid(bool) + ); + cudaError cuda_status; + bool* c_ptr; + devices::Device this_cuda{ devices::CUDA }; + cuda_status = cudaGetDevice(&this_cuda.index); + cudaDeviceProp cu_dev_prop; + cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); + TensorBase base_a = a.get_buffer().change_device(this_cuda); + TensorBase base_b = b.get_buffer().change_device(this_cuda); + std::size_t c_size = std::max + ( + a.get_buffer().data_size() / get_sizeof_type(a.get_buffer().type()), + b.get_buffer().data_size() / get_sizeof_type(b.get_buffer().type()) + ); + cuda_status = cudaMalloc(&c_ptr, c_size); + dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); + dim3 grid_dim(c_size / block_dim.x + (c_size % block_dim.x ? 1U : 0U)); + arr_logical_and << > > (c_ptr, static_cast(base_a.data()), static_cast(base_b.data()), c_size); + cuda_status = cudaDeviceSynchronize(); + TensorBase other_buf(typeid(bool), a.get_buffer().shape(), c_ptr, this_cuda); + cuda_status = cudaFree(c_ptr); + return other_buf; + } + + Tensor operator||(const Tensor& a, const Tensor& b) + { + assert( + equal_dim_size(a.get_buffer(), b.get_buffer()) + && a.get_buffer().type() == typeid(bool) + && b.get_buffer().type() == typeid(bool) + ); + cudaError cuda_status; + bool* c_ptr; + devices::Device this_cuda{ devices::CUDA }; + cuda_status = cudaGetDevice(&this_cuda.index); + cudaDeviceProp cu_dev_prop; + cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); + TensorBase base_a = a.get_buffer().change_device(this_cuda); + TensorBase base_b = b.get_buffer().change_device(this_cuda); + std::size_t c_size = std::max + ( + a.get_buffer().data_size() / get_sizeof_type(a.get_buffer().type()), + b.get_buffer().data_size() / get_sizeof_type(b.get_buffer().type()) + ); + cuda_status = cudaMalloc(&c_ptr, c_size); + dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); + dim3 grid_dim(c_size / block_dim.x + (c_size % block_dim.x ? 1U : 0U)); + arr_logical_or << > > (c_ptr, static_cast(base_a.data()), static_cast(base_b.data()), c_size); + cuda_status = cudaDeviceSynchronize(); + TensorBase other_buf(typeid(bool), a.get_buffer().shape(), c_ptr, this_cuda); + cuda_status = cudaFree(c_ptr); + return other_buf; + } + + Tensor Tensor::operator!() + { + assert(this->get_buffer().type() == typeid(bool)); + cudaError cuda_status; + bool* out_ptr; + devices::Device this_cuda{ devices::CUDA }; + cuda_status = cudaGetDevice(&this_cuda.index); + cudaDeviceProp cu_dev_prop; + cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); + TensorBase base_of_this = this->get_buffer().change_device(this_cuda); + cuda_status = cudaMalloc(&out_ptr, this->get_buffer().data_size()); + dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); + dim3 grid_dim(this->get_buffer().data_size() / block_dim.x + 1U); + arr_logical_not << < grid_dim, block_dim >> > (out_ptr, static_cast(base_of_this.data()), this->get_buffer().data_size()); + cuda_status = cudaDeviceSynchronize(); + TensorBase other_buf(typeid(bool), this->get_buffer().shape(), out_ptr, this_cuda); + cuda_status = cudaFree(out_ptr); + return other_buf; + } + + Tensor multiply(const Tensor& a, const Tensor& b, bool is_derive, const DataBuffer&) + { + assert(equal_dim_size(a.get_buffer(), b.get_buffer())); + std::vector> temp; + if (is_derive) + { + temp.push_back(std::make_pair(a, Derivation(b.clone(), multiply))); + temp.push_back(std::make_pair(b, Derivation(a.clone(), multiply))); + } + cudaError cuda_status; + TensorBase other_buf; + void* c_ptr; + devices::Device this_cuda{ devices::CUDA }; + cuda_status = cudaGetDevice(&this_cuda.index); + cudaDeviceProp cu_dev_prop; + cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); + TensorBase base_a = a.get_buffer().change_device(this_cuda); + TensorBase base_b = b.get_buffer().change_device(this_cuda); + std::size_t c_size = std::max + ( + a.get_buffer().data_size() / get_sizeof_type(a.get_buffer().type()), + b.get_buffer().data_size() / get_sizeof_type(b.get_buffer().type()) + ); + cuda_status = cudaMalloc(&c_ptr, std::max(a.get_buffer().data_size(), b.get_buffer().data_size())); + dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); + dim3 grid_dim(c_size / block_dim.x + (c_size % block_dim.x ? 1U : 0U)); +#define ADD_CODE(TYPE) \ +if(a.get_buffer().type() == typeid(TYPE) && b.get_buffer().type() == typeid(TYPE)) \ +{ \ +mul_2_arr<<>>(static_cast(c_ptr), static_cast(base_a.data()), static_cast(base_b.data()), c_size); \ +cuda_status = cudaDeviceSynchronize(); \ +other_buf = TensorBase(typeid(TYPE), a.get_buffer().shape(), c_ptr, this_cuda); \ +} + LOOP(USING_DATA_TYPE); +#undef ADD_CODE + cuda_status = cudaFree(c_ptr); + return Tensor(std::move(other_buf), std::move(temp)); + } + + Tensor add(const Tensor& a, const Tensor& b, bool is_derive) + { + assert(equal_dim_size(a.get_buffer(), b.get_buffer())); + std::vector> temp; + if (is_derive) + { + temp.push_back(std::make_pair(a, Derivation(values(a.get_buffer().shape(), 1).tensor_cast(a.get_buffer().type(), false), multiply))); + temp.push_back(std::make_pair(b, Derivation(values(b.get_buffer().shape(), 1).tensor_cast(b.get_buffer().type(), false), multiply))); + } + cudaError cuda_status; + TensorBase other_buf; + void* c_ptr; + devices::Device this_cuda{ devices::CUDA }; + cuda_status = cudaGetDevice(&this_cuda.index); + cudaDeviceProp cu_dev_prop; + cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); + TensorBase base_a = a.get_buffer().change_device(this_cuda); + TensorBase base_b = b.get_buffer().change_device(this_cuda); + std::size_t c_size = std::max + ( + a.get_buffer().data_size() / get_sizeof_type(a.get_buffer().type()), + b.get_buffer().data_size() / get_sizeof_type(b.get_buffer().type()) + ); + cuda_status = cudaMalloc(&c_ptr, std::max(a.get_buffer().data_size(), b.get_buffer().data_size())); + dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); + dim3 grid_dim(c_size / block_dim.x + (c_size % block_dim.x ? 1U : 0U)); +#define ADD_CODE(TYPE) \ +if(a.get_buffer().type() == typeid(TYPE) && b.get_buffer().type() == typeid(TYPE)) \ +{ \ +sum_2_arr<<>>(static_cast(c_ptr), static_cast(base_a.data()), static_cast(base_b.data()), c_size); \ +cuda_status = cudaDeviceSynchronize(); \ +other_buf = TensorBase(typeid(TYPE), a.get_buffer().shape(), c_ptr, this_cuda); \ +} + LOOP(USING_DATA_TYPE); +#undef ADD_CODE + cuda_status = cudaFree(c_ptr); + return Tensor(std::move(other_buf), std::move(temp)); + } + + Tensor divide(const Tensor& a, const Tensor& b, bool is_derive) + { + assert(equal_dim_size(a.get_buffer(), b.get_buffer())); + std::vector> temp; + if (is_derive) + { + temp.push_back(std::make_pair(a, Derivation(divide(values(b.get_buffer().shape(), 1).tensor_cast(b.get_buffer().type(), false), b, false), multiply))); + temp.push_back(std::make_pair(b, Derivation(divide(a, power(b, values(b.get_buffer().shape(), 2).tensor_cast(b.get_buffer().type(), false), false), false), multiply))); + } + cudaError cuda_status; + TensorBase other_buf; + void* c_ptr; + devices::Device this_cuda{ devices::CUDA }; + cuda_status = cudaGetDevice(&this_cuda.index); + cudaDeviceProp cu_dev_prop; + cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); + TensorBase base_a = a.get_buffer().change_device(this_cuda); + TensorBase base_b = b.get_buffer().change_device(this_cuda); + std::size_t c_size = std::max + ( + a.get_buffer().data_size() / get_sizeof_type(a.get_buffer().type()), + b.get_buffer().data_size() / get_sizeof_type(b.get_buffer().type()) + ); + cuda_status = cudaMalloc(&c_ptr, std::max(a.get_buffer().data_size(), b.get_buffer().data_size())); + dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); + dim3 grid_dim(c_size / block_dim.x + (c_size % block_dim.x ? 1U : 0U)); +#define ADD_CODE(TYPE) \ +if(a.get_buffer().type() == typeid(TYPE) && b.get_buffer().type() == typeid(TYPE)) \ +{ \ +div_2_arr<<>>(static_cast(c_ptr), static_cast(base_a.data()), static_cast(base_b.data()), c_size); \ +cuda_status = cudaDeviceSynchronize(); \ +other_buf = TensorBase(typeid(TYPE), a.get_buffer().shape(), c_ptr, this_cuda); \ +} + LOOP(USING_DATA_TYPE); +#undef ADD_CODE + cuda_status = cudaFree(c_ptr); + return Tensor(std::move(other_buf), std::move(temp)); + } + + Tensor Tensor::sigmoid(bool is_derive) const + { + std::vector> temp; + if (is_derive) + { + Tensor temp_ones = values(this->get_buffer().shape(), 1.f).tensor_cast(this->get_buffer().type(), false); + Tensor temp_sigmoid = this->sigmoid(false); + temp.push_back(std::make_pair(*this, Derivation(multiply(temp_sigmoid, add(temp_ones, -temp_sigmoid, false), false, DataBuffer()), multiply))); + } + cudaError cuda_status; + void* out_ptr; + devices::Device this_cuda{ devices::CUDA }; + cuda_status = cudaGetDevice(&this_cuda.index); + cudaDeviceProp cu_dev_prop; + cuda_status = cudaGetDeviceProperties(&cu_dev_prop, this_cuda.index); + TensorBase base_of_this = this->get_buffer().change_device(this_cuda); + cuda_status = cudaMalloc(&out_ptr, this->get_buffer().data_size()); + dim3 block_dim(cu_dev_prop.maxThreadsDim[0]); + std::size_t out_size = this->get_buffer().data_size() / get_sizeof_type(this->get_buffer().type()); + dim3 grid_dim(out_size / block_dim.x + ((out_size % block_dim.x) ? 1U : 0U)); +#define ADD_CODE(TYPE) \ +if(this->get_buffer().type() == typeid(TYPE)) \ +sigmoid_arr<<>>(static_cast(out_ptr), static_cast(base_of_this.data()), out_size); + LOOP(USING_DATA_TYPE); +#undef ADD_CODE + cuda_status = cudaDeviceSynchronize(); + TensorBase other_buf(this->get_buffer().type(), this->get_buffer().shape(), out_ptr, this_cuda); + cuda_status = cudaFree(out_ptr); + return Tensor(std::move(other_buf), std::move(temp)); + } + } +} + +#undef LOOP +#undef BODY +#undef A +#undef B +#undef A_END +#undef B_END +#undef END +#undef END_ + +#undef USING_DATA_TYPE diff --git a/src/tensor-array/core/tensor_reduce.cu b/src/tensor-array/core/tensor_reduce.cu index 4a35a51..5b6ddfd 100644 --- a/src/tensor-array/core/tensor_reduce.cu +++ b/src/tensor-array/core/tensor_reduce.cu @@ -29,12 +29,12 @@ limitations under the License. #undef TENSOR_CONTENT #endif -#define USING_DATA_TYPE_NVIDIA_FLOAT_8 (__nv_fp8_e5m2)(__nv_fp8_e4m3) -#define USING_DATA_TYPE_NVIDIA_FLOAT (__half)(__nv_bfloat16) -#define USING_DATA_TYPE_FLOAT (float)(double) -#define USING_DATA_TYPE_SINT (int8_t)(int16_t)(int32_t)(int64_t) -#define USING_DATA_TYPE_UINT (uint8_t)(uint16_t)(uint32_t)(uint64_t) -#define USING_DATA_TYPE USING_DATA_TYPE_SINT USING_DATA_TYPE_UINT USING_DATA_TYPE_FLOAT +#define USING_DATA_TYPE_NVIDIA_FLOAT_8() (__nv_fp8_e5m2)(__nv_fp8_e4m3) +#define USING_DATA_TYPE_NVIDIA_FLOAT() (__half)(__nv_bfloat16) +#define USING_DATA_TYPE_FLOAT() (float)(double) +#define USING_DATA_TYPE_SINT() (int8_t)(int16_t)(int32_t)(int64_t) +#define USING_DATA_TYPE_UINT() (uint8_t)(uint16_t)(uint32_t)(uint64_t) +#define USING_DATA_TYPE USING_DATA_TYPE_SINT() USING_DATA_TYPE_UINT() USING_DATA_TYPE_FLOAT() #define LOOP(seq) END(A seq) #define BODY(x) ADD_CODE(x) diff --git a/src/tensor-array/core/tensorbase.cc b/src/tensor-array/core/tensorbase.cc index 55c5398..d3cc7a7 100644 --- a/src/tensor-array/core/tensorbase.cc +++ b/src/tensor-array/core/tensorbase.cc @@ -23,10 +23,10 @@ limitations under the License. #include #include -#define USING_DATA_TYPE_FLOAT (float)(double) -#define USING_DATA_TYPE_SINT (int8_t)(int16_t)(int32_t)(int64_t) -#define USING_DATA_TYPE_UINT (uint8_t)(uint16_t)(uint32_t)(uint64_t) -#define USING_DATA_TYPE USING_DATA_TYPE_SINT USING_DATA_TYPE_UINT USING_DATA_TYPE_FLOAT +#define USING_DATA_TYPE_FLOAT() (float)(double) +#define USING_DATA_TYPE_SINT() (int8_t)(int16_t)(int32_t)(int64_t) +#define USING_DATA_TYPE_UINT() (uint8_t)(uint16_t)(uint32_t)(uint64_t) +#define USING_DATA_TYPE USING_DATA_TYPE_SINT() USING_DATA_TYPE_UINT() USING_DATA_TYPE_FLOAT() #define LOOP(seq) END(A seq) #define BODY(x) ADD_CODE(x) From 4079d4667a7e2becdd83413dfd5bae8e808aa2f0 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Thu, 31 Jul 2025 05:09:27 +0000 Subject: [PATCH 225/281] test --- src/tensor-array/core/tensor_cast.cu | 17 +++++++++++++- src/tensor-array/core/tensor_math_func.cu | 1 + src/tensor-array/core/tensor_math_op.cu | 1 + src/tensor-array/core/tensor_math_op.hh | 27 +++++++++++++++++++++++ 4 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 src/tensor-array/core/tensor_math_op.hh diff --git a/src/tensor-array/core/tensor_cast.cu b/src/tensor-array/core/tensor_cast.cu index e301cf0..7749258 100644 --- a/src/tensor-array/core/tensor_cast.cu +++ b/src/tensor-array/core/tensor_cast.cu @@ -41,18 +41,33 @@ limitations under the License. #define USING_DATA_TYPE_SINT() (int16_t)(int32_t)(int64_t) #define USING_DATA_TYPE_UINT() (uint16_t)(uint32_t)(uint64_t) +#if CUDART_VERSION >= 12020 #define USING_DATA_TYPE_CAST_FROM() \ -(__nv_fp8_e4m3) \ USING_DATA_TYPE_SINT() \ USING_DATA_TYPE_UINT() \ USING_DATA_TYPE_FLOAT() \ USING_DATA_TYPE_NVIDIA_FLOAT() +#else +#define USING_DATA_TYPE_CAST_FROM() \ +USING_DATA_TYPE_SINT() \ +USING_DATA_TYPE_UINT() \ +USING_DATA_TYPE_FLOAT() +#endif +#if CUDART_VERSION >= 12020 +#define USING_DATA_TYPE_CAST_TO() \ +(bool) \ +(int8_t) \ +(uint8_t) \ +USING_DATA_TYPE_NVIDIA_FLOAT_8() \ +USING_DATA_TYPE_CAST_FROM() +#else #define USING_DATA_TYPE_CAST_TO() \ (bool) \ (int8_t) \ (uint8_t) \ USING_DATA_TYPE_CAST_FROM() +#endif namespace tensor_array { diff --git a/src/tensor-array/core/tensor_math_func.cu b/src/tensor-array/core/tensor_math_func.cu index a46ce81..5ccef02 100644 --- a/src/tensor-array/core/tensor_math_func.cu +++ b/src/tensor-array/core/tensor_math_func.cu @@ -28,6 +28,7 @@ limitations under the License. #ifndef TENSOR_CONTENT #define TENSOR_CONTENT #include "tensor.hh" +#include "tensor_math_op.hh" #undef TENSOR_CONTENT #endif // !TENSOR_CONTENT diff --git a/src/tensor-array/core/tensor_math_op.cu b/src/tensor-array/core/tensor_math_op.cu index b3dbac0..2514095 100644 --- a/src/tensor-array/core/tensor_math_op.cu +++ b/src/tensor-array/core/tensor_math_op.cu @@ -28,6 +28,7 @@ limitations under the License. #ifndef TENSOR_CONTENT #define TENSOR_CONTENT #include "tensor.hh" +#include "tensor_math_op.hh" #undef TENSOR_CONTENT #endif // !TENSOR_CONTENT diff --git a/src/tensor-array/core/tensor_math_op.hh b/src/tensor-array/core/tensor_math_op.hh new file mode 100644 index 0000000..1d3c33e --- /dev/null +++ b/src/tensor-array/core/tensor_math_op.hh @@ -0,0 +1,27 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#ifdef TENSOR_CONTENT +namespace tensor_array +{ + namespace value + { + Tensor multiply(const Tensor&, const Tensor&, bool, const DataBuffer&); + Tensor add(const Tensor&, const Tensor&, bool); + Tensor divide(const Tensor&, const Tensor&, bool); + } +} +#endif From 072599b7377ec971ba516eafd8d29c70ec8b977a Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 10:32:11 +0700 Subject: [PATCH 226/281] Create deploy_webpages.yml --- .github/workflows/deploy_webpages.yml | 89 +++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 .github/workflows/deploy_webpages.yml diff --git a/.github/workflows/deploy_webpages.yml b/.github/workflows/deploy_webpages.yml new file mode 100644 index 0000000..3f13865 --- /dev/null +++ b/.github/workflows/deploy_webpages.yml @@ -0,0 +1,89 @@ +# This starter workflow is for a CMake project running on a single platform. There is a different starter workflow if you need cross-platform coverage. +# See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-multi-platform.yml +name: CMake on a single platform (deploy web pages) + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + workflow_dispatch: + +env: + # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) + BUILD_TYPE: Release + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + build: + # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac. + # You can convert this to a matrix build if you need cross-platform coverage. + # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Setup Pages + uses: actions/configure-pages@v5 + + - name: Run CUDA bash shell Ubuntu/Debian + if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'debian') + env: + temp: ${{ runner.temp }} + cuda: "12.9" + run: | + chmod +x ${{github.workspace}}/scripts/actions/install-cuda-ubuntu.sh + ${{github.workspace}}/scripts/actions/install-cuda-ubuntu.sh + shell: bash + + - name: Install Doxygen + run: | + sudo apt-get update + sudo apt-get install doxygen + shell: bash + + - name: Configure CMake + # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. + # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + + - name: Build + # Build your program with the given configuration + run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} + + - name: Test + working-directory: ${{github.workspace}}/build + # Execute tests defined by the CMake configuration. + # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail + run: ctest -C ${{env.BUILD_TYPE}} + + - name: Upload GitHub Pages artifact + uses: actions/upload-pages-artifact@v3 + with: + # Path of the directory containing the static assets. + path: "build/src/" # default is _site/ + + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 + From ddfc111f2c8cbb5c5403dd921f061b7c08b6958d Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 03:35:37 +0000 Subject: [PATCH 227/281] test --- CMakeLists.txt | 1 + cmake/ta_add_doxygen.cmake | 10 ++++++++++ 2 files changed, 11 insertions(+) create mode 100644 cmake/ta_add_doxygen.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 95e9d71..bcbde5b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,7 @@ install( include(cmake/ta_core_config.cmake) include(cmake/ta_layers_config.cmake) include(cmake/ta_interp_config.cmake) +include(cmake/ta_add_doxygen.cmake) # add_subdirectory("src/tensor-array/core") # add_subdirectory("src/tensor-array/layers") diff --git a/cmake/ta_add_doxygen.cmake b/cmake/ta_add_doxygen.cmake new file mode 100644 index 0000000..c9449c5 --- /dev/null +++ b/cmake/ta_add_doxygen.cmake @@ -0,0 +1,10 @@ +find_package(Doxygen) + +if(Doxygen_FOUND) + set(DOXYGEN_GENERATE_HTML YES) + set(DOXYGEN_FILE_PATTERNS *.c *.cc *.h *.hh *.cu) + set(DOXYGEN_EXTENSION_MAPPING "*.cu=c++") + set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "${PROJECT_SOURCE_DIR}/README.md") + + doxyen_add_docs(tensorarray_docs ${PROJECT_SOURCE_DIR}/src ALL) +endif() From 1c1bc0d7c37083cef875408a0394e417ca3672e1 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 10:39:39 +0700 Subject: [PATCH 228/281] Update ta_add_doxygen.cmake --- cmake/ta_add_doxygen.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/ta_add_doxygen.cmake b/cmake/ta_add_doxygen.cmake index c9449c5..aaa96b4 100644 --- a/cmake/ta_add_doxygen.cmake +++ b/cmake/ta_add_doxygen.cmake @@ -6,5 +6,5 @@ if(Doxygen_FOUND) set(DOXYGEN_EXTENSION_MAPPING "*.cu=c++") set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "${PROJECT_SOURCE_DIR}/README.md") - doxyen_add_docs(tensorarray_docs ${PROJECT_SOURCE_DIR}/src ALL) + doxygen_add_docs(tensorarray_docs ${PROJECT_SOURCE_DIR}/src ALL) endif() From 5084d40953a0fd181b7fe246e79c6452010f11bb Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 10:42:19 +0700 Subject: [PATCH 229/281] Update deploy_webpages.yml --- .github/workflows/deploy_webpages.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/deploy_webpages.yml b/.github/workflows/deploy_webpages.yml index 3f13865..80e223e 100644 --- a/.github/workflows/deploy_webpages.yml +++ b/.github/workflows/deploy_webpages.yml @@ -39,7 +39,6 @@ jobs: uses: actions/configure-pages@v5 - name: Run CUDA bash shell Ubuntu/Debian - if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'debian') env: temp: ${{ runner.temp }} cuda: "12.9" From 27d66bb1c3ad711216dac465732f7fee8187560f Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 10:53:15 +0700 Subject: [PATCH 230/281] Update CMakeLists.txt --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bcbde5b..d5678c9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,7 +24,6 @@ install( include(cmake/ta_core_config.cmake) include(cmake/ta_layers_config.cmake) include(cmake/ta_interp_config.cmake) -include(cmake/ta_add_doxygen.cmake) # add_subdirectory("src/tensor-array/core") # add_subdirectory("src/tensor-array/layers") @@ -35,6 +34,8 @@ if(BUILD_TESTING) add_subdirectory("tests/tensor-array/core") endif() +include(cmake/ta_add_doxygen.cmake) + set(CPACK_PACKAGE_NAME "TensorArray") set(CPACK_PACKAGE_VENDOR "TensorArray-Creators") set(CPACK_PACKAGE_DESCRIPTION_SUMMARY) From af702f2cced9676955ee05a4e8f7db2cd12a8267 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 11:06:23 +0700 Subject: [PATCH 231/281] Update deploy_webpages.yml --- .github/workflows/deploy_webpages.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy_webpages.yml b/.github/workflows/deploy_webpages.yml index 80e223e..461c37b 100644 --- a/.github/workflows/deploy_webpages.yml +++ b/.github/workflows/deploy_webpages.yml @@ -56,7 +56,7 @@ jobs: - name: Configure CMake # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DBUILD_TESTING=OFF - name: Build # Build your program with the given configuration From 6cfe9aa959dfd165b6a76482282e5a30e27b6653 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 07:58:41 +0000 Subject: [PATCH 232/281] test --- .github/workflows/{deploy_webpages.yml => deploy-webpages.yml} | 2 +- cmake/ta_add_doxygen.cmake | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) rename .github/workflows/{deploy_webpages.yml => deploy-webpages.yml} (98%) diff --git a/.github/workflows/deploy_webpages.yml b/.github/workflows/deploy-webpages.yml similarity index 98% rename from .github/workflows/deploy_webpages.yml rename to .github/workflows/deploy-webpages.yml index 461c37b..e58ed5c 100644 --- a/.github/workflows/deploy_webpages.yml +++ b/.github/workflows/deploy-webpages.yml @@ -72,7 +72,7 @@ jobs: uses: actions/upload-pages-artifact@v3 with: # Path of the directory containing the static assets. - path: "build/src/" # default is _site/ + path: "build/docs/" # default is _site/ deploy: environment: diff --git a/cmake/ta_add_doxygen.cmake b/cmake/ta_add_doxygen.cmake index aaa96b4..3b5c9cb 100644 --- a/cmake/ta_add_doxygen.cmake +++ b/cmake/ta_add_doxygen.cmake @@ -2,9 +2,10 @@ find_package(Doxygen) if(Doxygen_FOUND) set(DOXYGEN_GENERATE_HTML YES) - set(DOXYGEN_FILE_PATTERNS *.c *.cc *.h *.hh *.cu) + set(DOXYGEN_FILE_PATTERNS *.c *.cc *.h *.hh *.cu *.md) set(DOXYGEN_EXTENSION_MAPPING "*.cu=c++") set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "${PROJECT_SOURCE_DIR}/README.md") + set(DOXYGEN_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/docs) doxygen_add_docs(tensorarray_docs ${PROJECT_SOURCE_DIR}/src ALL) endif() From 981a89955028603cff64620a9c7e6f9e85357dc4 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 08:14:45 +0000 Subject: [PATCH 233/281] add website --- .github/workflows/deploy-webpages.yml | 2 +- cmake/ta_add_doxygen.cmake | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/deploy-webpages.yml b/.github/workflows/deploy-webpages.yml index e58ed5c..e9716f0 100644 --- a/.github/workflows/deploy-webpages.yml +++ b/.github/workflows/deploy-webpages.yml @@ -72,7 +72,7 @@ jobs: uses: actions/upload-pages-artifact@v3 with: # Path of the directory containing the static assets. - path: "build/docs/" # default is _site/ + path: "build/html/" # default is _site/ deploy: environment: diff --git a/cmake/ta_add_doxygen.cmake b/cmake/ta_add_doxygen.cmake index 3b5c9cb..56b3377 100644 --- a/cmake/ta_add_doxygen.cmake +++ b/cmake/ta_add_doxygen.cmake @@ -5,7 +5,6 @@ if(Doxygen_FOUND) set(DOXYGEN_FILE_PATTERNS *.c *.cc *.h *.hh *.cu *.md) set(DOXYGEN_EXTENSION_MAPPING "*.cu=c++") set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "${PROJECT_SOURCE_DIR}/README.md") - set(DOXYGEN_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/docs) doxygen_add_docs(tensorarray_docs ${PROJECT_SOURCE_DIR}/src ALL) endif() From ba60e9f16aadd44760bcd483ec3d609df1b6c547 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 08:44:37 +0000 Subject: [PATCH 234/281] changed lvalue assigment --- cmake/ta_add_doxygen.cmake | 3 ++- src/tensor-array/core/tensor.cc | 8 ++++---- src/tensor-array/core/tensor.hh | 8 ++++---- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/cmake/ta_add_doxygen.cmake b/cmake/ta_add_doxygen.cmake index 56b3377..94a1bf6 100644 --- a/cmake/ta_add_doxygen.cmake +++ b/cmake/ta_add_doxygen.cmake @@ -5,6 +5,7 @@ if(Doxygen_FOUND) set(DOXYGEN_FILE_PATTERNS *.c *.cc *.h *.hh *.cu *.md) set(DOXYGEN_EXTENSION_MAPPING "*.cu=c++") set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "${PROJECT_SOURCE_DIR}/README.md") + set(DOXYGEN_EXCLUDE_PATTERNS ".*/*" "build/*" "cmake/*" "scripts/*" "CMakeLists.txt" "*.cmake" "*.cmake.*") - doxygen_add_docs(tensorarray_docs ${PROJECT_SOURCE_DIR}/src ALL) + doxygen_add_docs(tensorarray_docs ${PROJECT_SOURCE_DIR} ALL) endif() diff --git a/src/tensor-array/core/tensor.cc b/src/tensor-array/core/tensor.cc index 7d10a75..ccc1e4b 100644 --- a/src/tensor-array/core/tensor.cc +++ b/src/tensor-array/core/tensor.cc @@ -709,22 +709,22 @@ temp_check_data_type = TEMP(temp.first) < TEMP(temp_tensor); return multiply(*this, values(this->get_buffer().shape(), -1.f)); } - Tensor& Tensor::operator+=(const Tensor& other) + Tensor Tensor::operator+=(const Tensor& other) { return this->operator=((*this) + other); } - Tensor& Tensor::operator-=(const Tensor& other) + Tensor Tensor::operator-=(const Tensor& other) { return this->operator=((*this) - other); } - Tensor& Tensor::operator*=(const Tensor& other) + Tensor Tensor::operator*=(const Tensor& other) { return this->operator=((*this) * other); } - Tensor& Tensor::operator/=(const Tensor& other) + Tensor Tensor::operator/=(const Tensor& other) { return this->operator=((*this) / other); } diff --git a/src/tensor-array/core/tensor.hh b/src/tensor-array/core/tensor.hh index a05ed91..3a5a164 100644 --- a/src/tensor-array/core/tensor.hh +++ b/src/tensor-array/core/tensor.hh @@ -225,13 +225,13 @@ namespace tensor_array Tensor operator-() const; - Tensor& operator+=(const Tensor&); + Tensor operator+=(const Tensor&); - Tensor& operator-=(const Tensor&); + Tensor operator-=(const Tensor&); - Tensor& operator*=(const Tensor&); + Tensor operator*=(const Tensor&); - Tensor& operator/=(const Tensor&); + Tensor operator/=(const Tensor&); friend TENSOR_ARRAY_API Tensor operator>(const Tensor&, const Tensor&); friend TENSOR_ARRAY_API Tensor operator<(const Tensor&, const Tensor&); From 2757e181d62699fe1c41b2830f7cf6ded3fd95ce Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 08:52:42 +0000 Subject: [PATCH 235/281] test --- src/tensor-array/core/tensor.cc | 8 ++++---- src/tensor-array/core/tensor.hh | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/tensor-array/core/tensor.cc b/src/tensor-array/core/tensor.cc index ccc1e4b..7d10a75 100644 --- a/src/tensor-array/core/tensor.cc +++ b/src/tensor-array/core/tensor.cc @@ -709,22 +709,22 @@ temp_check_data_type = TEMP(temp.first) < TEMP(temp_tensor); return multiply(*this, values(this->get_buffer().shape(), -1.f)); } - Tensor Tensor::operator+=(const Tensor& other) + Tensor& Tensor::operator+=(const Tensor& other) { return this->operator=((*this) + other); } - Tensor Tensor::operator-=(const Tensor& other) + Tensor& Tensor::operator-=(const Tensor& other) { return this->operator=((*this) - other); } - Tensor Tensor::operator*=(const Tensor& other) + Tensor& Tensor::operator*=(const Tensor& other) { return this->operator=((*this) * other); } - Tensor Tensor::operator/=(const Tensor& other) + Tensor& Tensor::operator/=(const Tensor& other) { return this->operator=((*this) / other); } diff --git a/src/tensor-array/core/tensor.hh b/src/tensor-array/core/tensor.hh index 3a5a164..a05ed91 100644 --- a/src/tensor-array/core/tensor.hh +++ b/src/tensor-array/core/tensor.hh @@ -225,13 +225,13 @@ namespace tensor_array Tensor operator-() const; - Tensor operator+=(const Tensor&); + Tensor& operator+=(const Tensor&); - Tensor operator-=(const Tensor&); + Tensor& operator-=(const Tensor&); - Tensor operator*=(const Tensor&); + Tensor& operator*=(const Tensor&); - Tensor operator/=(const Tensor&); + Tensor& operator/=(const Tensor&); friend TENSOR_ARRAY_API Tensor operator>(const Tensor&, const Tensor&); friend TENSOR_ARRAY_API Tensor operator<(const Tensor&, const Tensor&); From 080ee507448959b9f106615f0747148a1b3b115b Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 10:50:55 +0000 Subject: [PATCH 236/281] tests --- .github/workflows/deploy-webpages.yml | 2 +- CMakeLists.txt | 2 +- .../CMakeLists.txt => cmake/ta_core_tests.cmake | 13 ++++++------- 3 files changed, 8 insertions(+), 9 deletions(-) rename tests/tensor-array/core/CMakeLists.txt => cmake/ta_core_tests.cmake (63%) diff --git a/.github/workflows/deploy-webpages.yml b/.github/workflows/deploy-webpages.yml index e9716f0..941a059 100644 --- a/.github/workflows/deploy-webpages.yml +++ b/.github/workflows/deploy-webpages.yml @@ -56,7 +56,7 @@ jobs: - name: Configure CMake # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DBUILD_TESTING=OFF + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} - name: Build # Build your program with the given configuration diff --git a/CMakeLists.txt b/CMakeLists.txt index d5678c9..ad9e155 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,7 +31,7 @@ include(cmake/ta_interp_config.cmake) include(CTest) if(BUILD_TESTING) - add_subdirectory("tests/tensor-array/core") +include(cmake/ta_core_tests.cmake) endif() include(cmake/ta_add_doxygen.cmake) diff --git a/tests/tensor-array/core/CMakeLists.txt b/cmake/ta_core_tests.cmake similarity index 63% rename from tests/tensor-array/core/CMakeLists.txt rename to cmake/ta_core_tests.cmake index c28b0d4..ddd4f19 100644 --- a/tests/tensor-array/core/CMakeLists.txt +++ b/cmake/ta_core_tests.cmake @@ -1,19 +1,18 @@ -cmake_minimum_required(VERSION 3.18) set( TensorArray_tests_src - "tensor_array_test.cc" - "print_output.cc" - # "tensor_operators.cc" - # "tensor_matmul_transpose.cc" - # "gradient.cc" + "tests/tensor-array/core/tensor_array_test.cc" + "tests/tensor-array/core/print_output.cc" + # "tests/tensor-array/core/tensor_operators.cc" + # "tests/tensor-array/core/tensor_matmul_transpose.cc" + # "tests/tensor-array/core/gradient.cc" ) enable_testing() create_test_sourcelist( TensorArray_tests - "test_driver.cc" + "tests/tensor-array/core/test_driver.cc" ${TensorArray_tests_src}) add_executable(tensorarray_core_tests ${TensorArray_tests}) From ccee3a863185a829721383c118e714b39cf7b678 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 11:00:16 +0000 Subject: [PATCH 237/281] tests --- SECURITY.md | 5 +++-- tests/tensor-array/core/gradient.cc | 2 +- tests/tensor-array/core/print_output.cc | 2 +- tests/tensor-array/core/tensor_array_test.cc | 2 +- tests/tensor-array/core/tensor_matmul_transpose.cc | 2 +- tests/tensor-array/core/tensor_operators.cc | 2 +- 6 files changed, 8 insertions(+), 7 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index c26c3f5..1ae7a71 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -2,9 +2,10 @@ ## Supported Versions -| Version | Supported | +| Version | Supported | | ------- | ------------------ | -| 2024 | :white_check_mark: | +| 2025 | :white_check_mark: | +| 2024 | :x: | | 2023 | :x: | ## Reporting a Vulnerability diff --git a/tests/tensor-array/core/gradient.cc b/tests/tensor-array/core/gradient.cc index fbb5768..a04cdd9 100644 --- a/tests/tensor-array/core/gradient.cc +++ b/tests/tensor-array/core/gradient.cc @@ -20,7 +20,7 @@ limitations under the License. using namespace std; using namespace tensor_array::value; -int gradient(int argc, char *argv[]) +int tests_tensor_array_core_gradient(int argc, char *argv[]) { TensorArray example_tensor_array = {{ diff --git a/tests/tensor-array/core/print_output.cc b/tests/tensor-array/core/print_output.cc index ad1c9be..51722a5 100644 --- a/tests/tensor-array/core/print_output.cc +++ b/tests/tensor-array/core/print_output.cc @@ -20,7 +20,7 @@ limitations under the License. using namespace std; using namespace tensor_array::value; -int print_output(int argc, char *argv[]) +int tests_tensor_array_core_print_output(int argc, char *argv[]) { /* code */ TensorArray example_tensor_array = diff --git a/tests/tensor-array/core/tensor_array_test.cc b/tests/tensor-array/core/tensor_array_test.cc index 039ed01..e5d33c0 100644 --- a/tests/tensor-array/core/tensor_array_test.cc +++ b/tests/tensor-array/core/tensor_array_test.cc @@ -18,7 +18,7 @@ limitations under the License. using namespace tensor_array::value; -int tensor_array_test(int argc, char *argv[]) +int tests_tensor_array_core_tensor_array_test(int argc, char *argv[]) { TensorArray example_tensor_array = {{ diff --git a/tests/tensor-array/core/tensor_matmul_transpose.cc b/tests/tensor-array/core/tensor_matmul_transpose.cc index f101971..1b5e70a 100644 --- a/tests/tensor-array/core/tensor_matmul_transpose.cc +++ b/tests/tensor-array/core/tensor_matmul_transpose.cc @@ -18,7 +18,7 @@ limitations under the License. using namespace tensor_array::value; -int tensor_matmul_transpose(int argc, char *argv[]) +int tests_tensor_array_core_tensor_matmul_transpose(int argc, char *argv[]) { TensorArray example_tensor_array = {{ diff --git a/tests/tensor-array/core/tensor_operators.cc b/tests/tensor-array/core/tensor_operators.cc index 9a840d4..a92b1de 100644 --- a/tests/tensor-array/core/tensor_operators.cc +++ b/tests/tensor-array/core/tensor_operators.cc @@ -18,7 +18,7 @@ limitations under the License. using namespace tensor_array::value; -int tensor_operators(int argc, char *argv[]) +int tests_tensor_array_core_tensor_operators(int argc, char *argv[]) { TensorArray example_tensor_array = {{ From 1cdeaf4ed95657388789c447272311f66c0d181b Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 11:34:12 +0000 Subject: [PATCH 238/281] test 1 --- cmake/ta_core_tests.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/ta_core_tests.cmake b/cmake/ta_core_tests.cmake index ddd4f19..c07d206 100644 --- a/cmake/ta_core_tests.cmake +++ b/cmake/ta_core_tests.cmake @@ -21,5 +21,5 @@ target_link_libraries(tensorarray_core_tests TensorArray::Core) foreach(test ${TensorArray_tests_src}) get_filename_component(TName ${test} NAME_WE) - add_test(NAME ${TName} COMMAND tensorarray_core_tests ${TName}) + add_test(NAME ${TName} COMMAND tensorarray_core_tests "tests_tensor_array_core_${TName}") endforeach() From 256c5a71b99157061051414b1092ac37a75bac50 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 11:39:47 +0000 Subject: [PATCH 239/281] test 2 --- cmake/ta_core_tests.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/ta_core_tests.cmake b/cmake/ta_core_tests.cmake index c07d206..03647b5 100644 --- a/cmake/ta_core_tests.cmake +++ b/cmake/ta_core_tests.cmake @@ -20,6 +20,6 @@ target_include_directories(tensorarray_core_tests PRIVATE ${PROJECT_SOURCE_DIR}/ target_link_libraries(tensorarray_core_tests TensorArray::Core) foreach(test ${TensorArray_tests_src}) - get_filename_component(TName ${test} NAME_WE) - add_test(NAME ${TName} COMMAND tensorarray_core_tests "tests_tensor_array_core_${TName}") + get_filename_component(TName "tests_tensor_array_core_${test}" NAME_WE) + add_test(NAME ${TName} COMMAND tensorarray_core_tests ${TName}) endforeach() From 4578d23f7ee3eb1282b1d675a5719ddbbeec493b Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 11:47:27 +0000 Subject: [PATCH 240/281] tests --- CMakeLists.txt | 2 +- cmake/ta_core_tests.cmake | 14 +++++++------- tests/tensor-array/core/CMakeLists.txt | 2 ++ 3 files changed, 10 insertions(+), 8 deletions(-) create mode 100644 tests/tensor-array/core/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index ad9e155..d5678c9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,7 +31,7 @@ include(cmake/ta_interp_config.cmake) include(CTest) if(BUILD_TESTING) -include(cmake/ta_core_tests.cmake) + add_subdirectory("tests/tensor-array/core") endif() include(cmake/ta_add_doxygen.cmake) diff --git a/cmake/ta_core_tests.cmake b/cmake/ta_core_tests.cmake index 03647b5..c5c2914 100644 --- a/cmake/ta_core_tests.cmake +++ b/cmake/ta_core_tests.cmake @@ -1,18 +1,18 @@ set( TensorArray_tests_src - "tests/tensor-array/core/tensor_array_test.cc" - "tests/tensor-array/core/print_output.cc" - # "tests/tensor-array/core/tensor_operators.cc" - # "tests/tensor-array/core/tensor_matmul_transpose.cc" - # "tests/tensor-array/core/gradient.cc" + "tensor_array_test.cc" + "print_output.cc" + # "tensor_operators.cc" + # "tensor_matmul_transpose.cc" + # "gradient.cc" ) enable_testing() create_test_sourcelist( TensorArray_tests - "tests/tensor-array/core/test_driver.cc" + "test_driver.cc" ${TensorArray_tests_src}) add_executable(tensorarray_core_tests ${TensorArray_tests}) @@ -20,6 +20,6 @@ target_include_directories(tensorarray_core_tests PRIVATE ${PROJECT_SOURCE_DIR}/ target_link_libraries(tensorarray_core_tests TensorArray::Core) foreach(test ${TensorArray_tests_src}) - get_filename_component(TName "tests_tensor_array_core_${test}" NAME_WE) + get_filename_component(TName ${test} NAME_WE) add_test(NAME ${TName} COMMAND tensorarray_core_tests ${TName}) endforeach() diff --git a/tests/tensor-array/core/CMakeLists.txt b/tests/tensor-array/core/CMakeLists.txt new file mode 100644 index 0000000..25473a1 --- /dev/null +++ b/tests/tensor-array/core/CMakeLists.txt @@ -0,0 +1,2 @@ +project(TensorArray_tests) +include(cmake/ta_core_tests.cmake) From e7eec1b2fe907d0a64aef7826fe0992455e89598 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 11:52:08 +0000 Subject: [PATCH 241/281] test 3 --- tests/tensor-array/core/CMakeLists.txt | 3 +++ {cmake => tests/tensor-array/core/cmake}/ta_core_tests.cmake | 0 2 files changed, 3 insertions(+) rename {cmake => tests/tensor-array/core/cmake}/ta_core_tests.cmake (100%) diff --git a/tests/tensor-array/core/CMakeLists.txt b/tests/tensor-array/core/CMakeLists.txt index 25473a1..f271fbe 100644 --- a/tests/tensor-array/core/CMakeLists.txt +++ b/tests/tensor-array/core/CMakeLists.txt @@ -1,2 +1,5 @@ +cmake_minimum_required(VERSION 3.18) + project(TensorArray_tests) + include(cmake/ta_core_tests.cmake) diff --git a/cmake/ta_core_tests.cmake b/tests/tensor-array/core/cmake/ta_core_tests.cmake similarity index 100% rename from cmake/ta_core_tests.cmake rename to tests/tensor-array/core/cmake/ta_core_tests.cmake From 93d8a27a8d53215bb5703958e9d8f2fc67102225 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 11:56:22 +0000 Subject: [PATCH 242/281] test 5 --- tests/tensor-array/core/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tensor-array/core/CMakeLists.txt b/tests/tensor-array/core/CMakeLists.txt index f271fbe..26cc477 100644 --- a/tests/tensor-array/core/CMakeLists.txt +++ b/tests/tensor-array/core/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.18) -project(TensorArray_tests) +# project(TensorArray_tests) include(cmake/ta_core_tests.cmake) From 3a0e37a9e6bc469dcacf7976572f51d11880d2f3 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 11:59:03 +0000 Subject: [PATCH 243/281] tests --- tests/tensor-array/core/gradient.cc | 2 +- tests/tensor-array/core/print_output.cc | 2 +- tests/tensor-array/core/tensor_array_test.cc | 2 +- tests/tensor-array/core/tensor_matmul_transpose.cc | 2 +- tests/tensor-array/core/tensor_operators.cc | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/tensor-array/core/gradient.cc b/tests/tensor-array/core/gradient.cc index a04cdd9..fbb5768 100644 --- a/tests/tensor-array/core/gradient.cc +++ b/tests/tensor-array/core/gradient.cc @@ -20,7 +20,7 @@ limitations under the License. using namespace std; using namespace tensor_array::value; -int tests_tensor_array_core_gradient(int argc, char *argv[]) +int gradient(int argc, char *argv[]) { TensorArray example_tensor_array = {{ diff --git a/tests/tensor-array/core/print_output.cc b/tests/tensor-array/core/print_output.cc index 51722a5..ad1c9be 100644 --- a/tests/tensor-array/core/print_output.cc +++ b/tests/tensor-array/core/print_output.cc @@ -20,7 +20,7 @@ limitations under the License. using namespace std; using namespace tensor_array::value; -int tests_tensor_array_core_print_output(int argc, char *argv[]) +int print_output(int argc, char *argv[]) { /* code */ TensorArray example_tensor_array = diff --git a/tests/tensor-array/core/tensor_array_test.cc b/tests/tensor-array/core/tensor_array_test.cc index e5d33c0..039ed01 100644 --- a/tests/tensor-array/core/tensor_array_test.cc +++ b/tests/tensor-array/core/tensor_array_test.cc @@ -18,7 +18,7 @@ limitations under the License. using namespace tensor_array::value; -int tests_tensor_array_core_tensor_array_test(int argc, char *argv[]) +int tensor_array_test(int argc, char *argv[]) { TensorArray example_tensor_array = {{ diff --git a/tests/tensor-array/core/tensor_matmul_transpose.cc b/tests/tensor-array/core/tensor_matmul_transpose.cc index 1b5e70a..f101971 100644 --- a/tests/tensor-array/core/tensor_matmul_transpose.cc +++ b/tests/tensor-array/core/tensor_matmul_transpose.cc @@ -18,7 +18,7 @@ limitations under the License. using namespace tensor_array::value; -int tests_tensor_array_core_tensor_matmul_transpose(int argc, char *argv[]) +int tensor_matmul_transpose(int argc, char *argv[]) { TensorArray example_tensor_array = {{ diff --git a/tests/tensor-array/core/tensor_operators.cc b/tests/tensor-array/core/tensor_operators.cc index a92b1de..9a840d4 100644 --- a/tests/tensor-array/core/tensor_operators.cc +++ b/tests/tensor-array/core/tensor_operators.cc @@ -18,7 +18,7 @@ limitations under the License. using namespace tensor_array::value; -int tests_tensor_array_core_tensor_operators(int argc, char *argv[]) +int tensor_operators(int argc, char *argv[]) { TensorArray example_tensor_array = {{ From 4390da1b535832f6344d467d6f4bd9b6a61d60b3 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 12:24:15 +0000 Subject: [PATCH 244/281] static --- src/tensor-array/core/tensorbase.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tensor-array/core/tensorbase.hh b/src/tensor-array/core/tensorbase.hh index 0474afa..c98881b 100644 --- a/src/tensor-array/core/tensorbase.hh +++ b/src/tensor-array/core/tensorbase.hh @@ -85,7 +85,7 @@ namespace tensor_array return std::make_unique>(this->arr_data); } - inline std::initializer_list dim_sizes() const override + inline static std::initializer_list dim_sizes() const override { return wrapper::initializer_wrapper(dim_size_array.data(), dim_size_array.data() + sizeof...(sz) + 1ULL); } From 613042c29145917b3a0cc820497c8bb1c5b75584 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 12:37:26 +0000 Subject: [PATCH 245/281] non-static and runner ubuntu 22.04 --- src/tensor-array/core/tensorbase.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tensor-array/core/tensorbase.hh b/src/tensor-array/core/tensorbase.hh index c98881b..0474afa 100644 --- a/src/tensor-array/core/tensorbase.hh +++ b/src/tensor-array/core/tensorbase.hh @@ -85,7 +85,7 @@ namespace tensor_array return std::make_unique>(this->arr_data); } - inline static std::initializer_list dim_sizes() const override + inline std::initializer_list dim_sizes() const override { return wrapper::initializer_wrapper(dim_size_array.data(), dim_size_array.data() + sizeof...(sz) + 1ULL); } From f612face772bada27f88360b28564cabc0c809b5 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 12:37:33 +0000 Subject: [PATCH 246/281] test --- .github/workflows/codeql.yml | 2 +- .github/workflows/deploy-webpages.yml | 2 +- .github/workflows/docker-publish-d.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index d4e74da..601cc8d 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -27,7 +27,7 @@ jobs: # - https://gh.io/supported-runners-and-hardware-resources # - https://gh.io/using-larger-runners # Consider using larger runners for possible analysis time improvements. - runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} + runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-22.04' }} timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} permissions: actions: read diff --git a/.github/workflows/deploy-webpages.yml b/.github/workflows/deploy-webpages.yml index 941a059..3edbb5e 100644 --- a/.github/workflows/deploy-webpages.yml +++ b/.github/workflows/deploy-webpages.yml @@ -30,7 +30,7 @@ jobs: # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac. # You can convert this to a matrix build if you need cross-platform coverage. # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/docker-publish-d.yml b/.github/workflows/docker-publish-d.yml index ecb85ca..7971e42 100644 --- a/.github/workflows/docker-publish-d.yml +++ b/.github/workflows/docker-publish-d.yml @@ -8,7 +8,7 @@ jobs: push_to_registry: name: Push Docker image - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 permissions: packages: write contents: read From e7953ee88d0c2c9d525c645efb5e07b257d30db1 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 12:38:45 +0000 Subject: [PATCH 247/281] runner ubuntu 22.04 --- .github/workflows/docker-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 2e3d127..f9173c5 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -29,7 +29,7 @@ jobs: matrix: image-os: [ "Ubuntu" ] - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 permissions: contents: read From 70e0e85f75d0cfd84de85948ee096e8607a0e3a6 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sat, 2 Aug 2025 00:42:15 +0700 Subject: [PATCH 248/281] Update Ubuntu.Dockerfile --- Dockerfolder/Ubuntu.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfolder/Ubuntu.Dockerfile b/Dockerfolder/Ubuntu.Dockerfile index f3b7908..2dba642 100644 --- a/Dockerfolder/Ubuntu.Dockerfile +++ b/Dockerfolder/Ubuntu.Dockerfile @@ -1,4 +1,4 @@ -FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 +FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu22.04 RUN apt-get update RUN apt-get upgrade -y From efc235ab02bb12bd453a6de9467e84fabfb891ad Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 1 Aug 2025 18:40:07 +0000 Subject: [PATCH 249/281] remove "#undef TENSOR_ARRAY_API" --- src/tensor-array/core/extern_type_map.hh | 18 +++++++++++++++++- src/tensor-array/core/tensor.hh | 18 ------------------ src/tensor-array/core/tensorbase.hh | 12 ------------ src/tensor-array/layers/layer_impl.hh | 10 ---------- 4 files changed, 17 insertions(+), 41 deletions(-) diff --git a/src/tensor-array/core/extern_type_map.hh b/src/tensor-array/core/extern_type_map.hh index 9146a3e..eab15cc 100644 --- a/src/tensor-array/core/extern_type_map.hh +++ b/src/tensor-array/core/extern_type_map.hh @@ -17,10 +17,26 @@ limitations under the License. #include #include +#ifdef _WIN32 +#ifdef TENSOR_ARRAY_EXPORTS +#define TENSOR_ARRAY_API __declspec(dllexport) +#define TENSOR_ARRAY_EXPORT_API __declspec(dllexport) +#define TENSOR_ARRAY_IMPORT_API +#else +#define TENSOR_ARRAY_API __declspec(dllimport) +#define TENSOR_ARRAY_EXPORT_API +#define TENSOR_ARRAY_IMPORT_API __declspec(dllimport) +#endif +#else +#define TENSOR_ARRAY_API +#define TENSOR_ARRAY_EXPORT_API +#define TENSOR_ARRAY_IMPORT_API +#endif + namespace tensor_array { namespace value { - extern std::unordered_map dynamic_type_size; + extern std::unordered_map TENSOR_ARRAY_API dynamic_type_size; } } diff --git a/src/tensor-array/core/tensor.hh b/src/tensor-array/core/tensor.hh index a05ed91..bc1601a 100644 --- a/src/tensor-array/core/tensor.hh +++ b/src/tensor-array/core/tensor.hh @@ -21,22 +21,6 @@ limitations under the License. #pragma once -#ifdef _WIN32 -#ifdef TENSOR_ARRAY_EXPORTS -#define TENSOR_ARRAY_API __declspec(dllexport) -#define TENSOR_ARRAY_EXPORT_API __declspec(dllexport) -#define TENSOR_ARRAY_IMPORT_API -#else -#define TENSOR_ARRAY_API __declspec(dllimport) -#define TENSOR_ARRAY_EXPORT_API -#define TENSOR_ARRAY_IMPORT_API __declspec(dllimport) -#endif -#else -#define TENSOR_ARRAY_API -#define TENSOR_ARRAY_EXPORT_API -#define TENSOR_ARRAY_IMPORT_API -#endif - #define USING_DATA_TYPE_FLOAT() (float)(double) #define USING_DATA_TYPE_SINT() (int8_t)(int16_t)(int32_t)(int64_t) #define USING_DATA_TYPE_UINT() (uint8_t)(uint16_t)(uint32_t)(uint64_t) @@ -458,5 +442,3 @@ struct std::equal_to #undef USING_DATA_TYPE_FLOAT #undef USING_DATA_TYPE_SINT #undef USING_DATA_TYPE_UINT - -#undef TENSOR_ARRAY_API \ No newline at end of file diff --git a/src/tensor-array/core/tensorbase.hh b/src/tensor-array/core/tensorbase.hh index 0474afa..d8106ea 100644 --- a/src/tensor-array/core/tensorbase.hh +++ b/src/tensor-array/core/tensorbase.hh @@ -23,16 +23,6 @@ limitations under the License. #include "initializer_wrapper.hh" #pragma once -#ifdef _WIN32 -#ifdef TENSOR_ARRAY_EXPORTS -#define TENSOR_ARRAY_API __declspec(dllexport) -#else -#define TENSOR_ARRAY_API __declspec(dllimport) -#endif -#else -#define TENSOR_ARRAY_API -#endif - namespace tensor_array { namespace value @@ -212,5 +202,3 @@ namespace tensor_array std::size_t get_sizeof_type(const std::type_info&); } } - -#undef TENSOR_ARRAY_API \ No newline at end of file diff --git a/src/tensor-array/layers/layer_impl.hh b/src/tensor-array/layers/layer_impl.hh index f361bc2..b168e3c 100644 --- a/src/tensor-array/layers/layer_impl.hh +++ b/src/tensor-array/layers/layer_impl.hh @@ -20,16 +20,6 @@ limitations under the License. #include #pragma once -#ifdef _WIN32 -#ifdef TENSOR_ARRAY_EXPORTS -#define TENSOR_ARRAY_API __declspec(dllexport) -#else -#define TENSOR_ARRAY_API __declspec(dllimport) -#endif -#else -#define TENSOR_ARRAY_API -#endif - namespace tensor_array { namespace layers From b7357c6cd83ef6573d1da83e2fd09e8be360a9b9 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sat, 2 Aug 2025 10:01:33 +0700 Subject: [PATCH 250/281] Update vmop.cc --- src/tensor-array/interp/vmop.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tensor-array/interp/vmop.cc b/src/tensor-array/interp/vmop.cc index ae68654..08799a6 100644 --- a/src/tensor-array/interp/vmop.cc +++ b/src/tensor-array/interp/vmop.cc @@ -22,7 +22,7 @@ limitations under the License. #include "vmop.h" #include "vm_type.h" -typedef long VM_INSTRUCTION; +typedef void* VM_INSTRUCTION; extern VM_INSTRUCTION* pc; std::stack tensor_stack; From 99452927edfbf5ca1b41b93927276e5ad06afd19 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sat, 2 Aug 2025 03:36:53 +0000 Subject: [PATCH 251/281] tests --- src/tensor-array/core/extern_type_map.hh | 5 ++++- src/tensor-array/core/tensor.hh | 3 +-- src/tensor-array/layers/normalization.cc | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/tensor-array/core/extern_type_map.hh b/src/tensor-array/core/extern_type_map.hh index eab15cc..1f4eb68 100644 --- a/src/tensor-array/core/extern_type_map.hh +++ b/src/tensor-array/core/extern_type_map.hh @@ -37,6 +37,9 @@ namespace tensor_array { namespace value { - extern std::unordered_map TENSOR_ARRAY_API dynamic_type_size; + /** + * Map of data types. + */ + extern TENSOR_ARRAY_API std::unordered_map dynamic_type_size; } } diff --git a/src/tensor-array/core/tensor.hh b/src/tensor-array/core/tensor.hh index bc1601a..6971f17 100644 --- a/src/tensor-array/core/tensor.hh +++ b/src/tensor-array/core/tensor.hh @@ -39,8 +39,7 @@ namespace tensor_array { namespace value { - bool TENSOR_ARRAY_API is_use_grad(); - void TENSOR_ARRAY_API set_use_grad(bool use_grad); + extern TENSOR_ARRAY_API bool use_grad; #ifdef TENSOR_CONTENT void* create_mem_101(std::size_t s, const void* dat); diff --git a/src/tensor-array/layers/normalization.cc b/src/tensor-array/layers/normalization.cc index f67a4fa..0ad3870 100644 --- a/src/tensor-array/layers/normalization.cc +++ b/src/tensor-array/layers/normalization.cc @@ -40,7 +40,7 @@ namespace tensor_array value::Tensor NormalizationImpl::calculate(const value::Tensor& input) { value::Tensor normal; - if (tensor_array::value::is_use_grad()) + if (tensor_array::value::use_grad) { value::Tensor temp_mean = input.mean(this->dims_mean); value::Tensor temp_variance = input.variance(this->dims_mean); From 67fa3140559732b0da3c4613a2b110ed8a58df09 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sat, 2 Aug 2025 03:58:09 +0000 Subject: [PATCH 252/281] change exports preprocessor --- CMakeLists.txt | 4 ---- cmake/ta_core_config.cmake | 3 +++ cmake/ta_layers_config.cmake | 4 ++++ src/tensor-array/core/extern_type_map.hh | 2 +- src/tensor-array/layers/layer_impl.hh | 18 ++++++++++++++++++ 5 files changed, 26 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d5678c9..4ccf9fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,10 +5,6 @@ project(TensorArray C CXX) include(GNUInstallDirs) # set(CMAKE_INSTALL_PREFIX ${PROJECT_SOURCE_DIR}) -if(MSVC) - add_compile_definitions(TENSOR_ARRAY_EXPORTS) -endif() - file( GLOB_RECURSE TensorArray_inc "${PROJECT_SOURCE_DIR}/src/*.h" diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index 454a95b..f514b72 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -17,6 +17,9 @@ if(CMAKE_CUDA_COMPILER) set_property(TARGET tensorarray_core PROPERTY CUDA_SEPARABLE_COMPILATION ON) target_include_directories(tensorarray_core PRIVATE $<$:${CUDAToolkit_INCLUDE_DIRS}>) target_link_libraries(tensorarray_core PRIVATE $<$:CUDA::cublas>) + if(MSVC) + target_compile_definitions(tensorarray_core PRIVATE TENSOR_ARRAY_CORE_EXPORTS) + endif() # find_package(CUDAToolkit REQUIRED) # set(CMAKE_CUDA_ARCHITECTURES 52 75 89) diff --git a/cmake/ta_layers_config.cmake b/cmake/ta_layers_config.cmake index f663374..d43b446 100644 --- a/cmake/ta_layers_config.cmake +++ b/cmake/ta_layers_config.cmake @@ -13,6 +13,10 @@ set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD 17) set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_layers PROPERTY CXX_EXTENSIONS OFF) +if(MSVC) + target_compile_definitions(tensorarray_layers PRIVATE TENSOR_ARRAY_LAYERS_EXPORTS) +endif() + install( TARGETS tensorarray_layers EXPORT TensorArrayTargets diff --git a/src/tensor-array/core/extern_type_map.hh b/src/tensor-array/core/extern_type_map.hh index 1f4eb68..b495e29 100644 --- a/src/tensor-array/core/extern_type_map.hh +++ b/src/tensor-array/core/extern_type_map.hh @@ -18,7 +18,7 @@ limitations under the License. #include #ifdef _WIN32 -#ifdef TENSOR_ARRAY_EXPORTS +#ifdef TENSOR_ARRAY_CORE_EXPORTS #define TENSOR_ARRAY_API __declspec(dllexport) #define TENSOR_ARRAY_EXPORT_API __declspec(dllexport) #define TENSOR_ARRAY_IMPORT_API diff --git a/src/tensor-array/layers/layer_impl.hh b/src/tensor-array/layers/layer_impl.hh index b168e3c..98ee173 100644 --- a/src/tensor-array/layers/layer_impl.hh +++ b/src/tensor-array/layers/layer_impl.hh @@ -18,6 +18,24 @@ limitations under the License. #include #include #include + +#ifdef _WIN32 +#ifdef TENSOR_ARRAY_LAYERS_EXPORTS +#define TENSOR_ARRAY_API __declspec(dllexport) +#define TENSOR_ARRAY_EXPORT_API __declspec(dllexport) +#define TENSOR_ARRAY_IMPORT_API +#else +#define TENSOR_ARRAY_API __declspec(dllimport) +#define TENSOR_ARRAY_EXPORT_API +#define TENSOR_ARRAY_IMPORT_API __declspec(dllimport) +#endif +#else +#define TENSOR_ARRAY_API +#define TENSOR_ARRAY_EXPORT_API +#define TENSOR_ARRAY_IMPORT_API +#endif + + #pragma once namespace tensor_array From e520ed878145034145759f5eba666cd2047fc9e7 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sat, 2 Aug 2025 11:50:59 +0700 Subject: [PATCH 253/281] Update tensor.hh --- src/tensor-array/core/tensor.hh | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/tensor-array/core/tensor.hh b/src/tensor-array/core/tensor.hh index 6971f17..1ceb078 100644 --- a/src/tensor-array/core/tensor.hh +++ b/src/tensor-array/core/tensor.hh @@ -174,17 +174,10 @@ namespace tensor_array Tensor reshape(const std::vector&) const; Tensor tensor_cast(const std::type_info&) const; Tensor conv_padding(const dimension&) const; -#ifdef TENSOR_CONTENT - friend Tensor derive_transpose(const Tensor&, const Tensor&, bool, const DataBuffer&); - - friend Tensor derive_reshape_cast(const Tensor&, const Tensor&, bool, const DataBuffer&); -#endif Tensor transpose(unsigned char, unsigned char) const; std::pair max(unsigned char = 0) const; std::pair min(unsigned char = 0) const; - friend TENSOR_ARRAY_EXPORT_API std::pair tensor_broadcasting(const Tensor&, const Tensor&, unsigned char, unsigned char); #ifdef TENSOR_CONTENT - friend TENSOR_ARRAY_API Tensor add_dim(const std::vector&); #endif bool has_tensor() const; template @@ -235,7 +228,9 @@ namespace tensor_array Tensor log() const; #ifdef TENSOR_CONTENT - friend TENSOR_ARRAY_EXPORT_API Tensor tensor_rand(const std::initializer_list&, unsigned int); + friend Tensor derive_transpose(const Tensor&, const Tensor&, bool, const DataBuffer&); + + friend Tensor derive_reshape_cast(const Tensor&, const Tensor&, bool, const DataBuffer&); friend Tensor add(const Tensor&, const Tensor&, bool); @@ -258,6 +253,13 @@ namespace tensor_array Tensor tensor_cast(const std::type_info&, bool) const; #endif + + friend TENSOR_ARRAY_API Tensor add_dim(const std::vector&); + + friend TENSOR_ARRAY_API Tensor tensor_rand(const std::initializer_list&, unsigned int = std::rand()); + + friend TENSOR_ARRAY_API std::pair tensor_broadcasting(const Tensor&, const Tensor&, unsigned char = 0, unsigned char = 0); + friend TENSOR_ARRAY_API std::ostream& operator<<(std::ostream&, const Tensor&); private: @@ -355,14 +357,9 @@ namespace tensor_array * Tensor (N, K, ...) */ TENSOR_ARRAY_API Tensor convolution(const Tensor&, const Tensor&, const dimension& = value::dimension(), const dimension& = value::dimension()); - TENSOR_ARRAY_IMPORT_API std::pair tensor_broadcasting(const Tensor&, const Tensor&, unsigned char = 0, unsigned char = 0); - TENSOR_ARRAY_IMPORT_API Tensor tensor_rand(const std::initializer_list&, unsigned int = std::rand()); #define ADD_CODE(TYPE) TENSOR_ARRAY_API Tensor values(const std::initializer_list&, TYPE); LOOP(USING_DATA_TYPE); #undef ADD_CODE -#ifndef TENSOR_CONTENT - TENSOR_ARRAY_API Tensor add_dim(const std::vector&); -#endif TENSOR_ARRAY_API const std::type_info& comparison_type(const std::type_info&, const std::type_info&); TENSOR_ARRAY_API Tensor tensor_rand(const std::vector&, unsigned int = std::rand()); From d7a975357b0d819801401c093ad6fdff4e108fe1 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sat, 2 Aug 2025 11:59:58 +0700 Subject: [PATCH 254/281] Update tensor.hh --- src/tensor-array/core/tensor.hh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tensor-array/core/tensor.hh b/src/tensor-array/core/tensor.hh index 1ceb078..2862e71 100644 --- a/src/tensor-array/core/tensor.hh +++ b/src/tensor-array/core/tensor.hh @@ -256,9 +256,9 @@ namespace tensor_array friend TENSOR_ARRAY_API Tensor add_dim(const std::vector&); - friend TENSOR_ARRAY_API Tensor tensor_rand(const std::initializer_list&, unsigned int = std::rand()); + friend TENSOR_ARRAY_API Tensor tensor_rand(const std::initializer_list&, unsigned int/* = std::rand() */); - friend TENSOR_ARRAY_API std::pair tensor_broadcasting(const Tensor&, const Tensor&, unsigned char = 0, unsigned char = 0); + friend TENSOR_ARRAY_API std::pair tensor_broadcasting(const Tensor&, const Tensor&, unsigned char /*= 0*/, unsigned char /*= 0*/); friend TENSOR_ARRAY_API std::ostream& operator<<(std::ostream&, const Tensor&); From 41abe6954744be6b608d06d1270cc0cb862389cc Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sat, 2 Aug 2025 12:12:16 +0700 Subject: [PATCH 255/281] Update tensor.hh --- src/tensor-array/core/tensor.hh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/tensor-array/core/tensor.hh b/src/tensor-array/core/tensor.hh index 2862e71..4b4354d 100644 --- a/src/tensor-array/core/tensor.hh +++ b/src/tensor-array/core/tensor.hh @@ -90,6 +90,12 @@ namespace tensor_array dilation; }; + class Tensor; + + TENSOR_ARRAY_API Tensor tensor_rand(const std::initializer_list&, unsigned int = std::rand()); + + TENSOR_ARRAY_API std::pair tensor_broadcasting(const Tensor&, const Tensor&, unsigned char 0, unsigned char = 0); + /** * \brief Dynamic derivative tensor. * \brief This class use to calculate the tensor. @@ -256,9 +262,9 @@ namespace tensor_array friend TENSOR_ARRAY_API Tensor add_dim(const std::vector&); - friend TENSOR_ARRAY_API Tensor tensor_rand(const std::initializer_list&, unsigned int/* = std::rand() */); + friend Tensor tensor_rand(const std::initializer_list&, unsigned int/* = std::rand() */); - friend TENSOR_ARRAY_API std::pair tensor_broadcasting(const Tensor&, const Tensor&, unsigned char /*= 0*/, unsigned char /*= 0*/); + friend std::pair tensor_broadcasting(const Tensor&, const Tensor&, unsigned char /*= 0*/, unsigned char /*= 0*/); friend TENSOR_ARRAY_API std::ostream& operator<<(std::ostream&, const Tensor&); From 48fd24f9753fd55e36298e74c95559b116e1f729 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sat, 2 Aug 2025 12:16:16 +0700 Subject: [PATCH 256/281] Update tensor.hh --- src/tensor-array/core/tensor.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tensor-array/core/tensor.hh b/src/tensor-array/core/tensor.hh index 4b4354d..b24a75a 100644 --- a/src/tensor-array/core/tensor.hh +++ b/src/tensor-array/core/tensor.hh @@ -94,7 +94,7 @@ namespace tensor_array TENSOR_ARRAY_API Tensor tensor_rand(const std::initializer_list&, unsigned int = std::rand()); - TENSOR_ARRAY_API std::pair tensor_broadcasting(const Tensor&, const Tensor&, unsigned char 0, unsigned char = 0); + TENSOR_ARRAY_API std::pair tensor_broadcasting(const Tensor&, const Tensor&, unsigned char = 0, unsigned char = 0); /** * \brief Dynamic derivative tensor. From 16f9e6f3394df4641b28a908cd1ab4f44d7fcfdc Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Sat, 2 Aug 2025 06:05:09 +0000 Subject: [PATCH 257/281] changes vm reg data types --- src/tensor-array/interp/parser.c | 6 +++--- src/tensor-array/interp/sym_map.h | 8 ++++---- src/tensor-array/interp/token.c | 4 ++-- src/tensor-array/interp/token.h | 4 ++-- src/tensor-array/interp/vm.h | 4 ++-- src/tensor-array/interp/vmop.cc | 6 +++--- src/tensor-array/interp/vmop.h | 4 ++-- 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/tensor-array/interp/parser.c b/src/tensor-array/interp/parser.c index 57ef27f..9ab76a9 100644 --- a/src/tensor-array/interp/parser.c +++ b/src/tensor-array/interp/parser.c @@ -24,13 +24,13 @@ limitations under the License. #include "sym_map.h" #include "vm_type.h" -void emit(int size, ...) +void emit(unsigned size, ...) { va_list args; va_start(args, size); // Process the variable arguments as needed - for (int i = 0; i < size; ++i) { + for (unsigned i = 0; i < size; ++i) { ++text; *text = va_arg(args, VM_INSTRUCTION); } @@ -38,7 +38,7 @@ void emit(int size, ...) va_end(args); } -void match(long tk) +void match(int tk) { if (tkn == tk) { token_next(); // Move to the next token diff --git a/src/tensor-array/interp/sym_map.h b/src/tensor-array/interp/sym_map.h index d5acb1e..b60eeb0 100644 --- a/src/tensor-array/interp/sym_map.h +++ b/src/tensor-array/interp/sym_map.h @@ -20,10 +20,10 @@ extern "C" #endif typedef struct { - long tkn; - long hash; - long cls; - long type; + int tkn; + int hash; + int cls; + int type; void* data; // Pointer to additional data if needed } sym_data; void sym_data_set(char* name, sym_data dat); diff --git a/src/tensor-array/interp/token.c b/src/tensor-array/interp/token.c index 6850ecb..888150d 100644 --- a/src/tensor-array/interp/token.c +++ b/src/tensor-array/interp/token.c @@ -22,8 +22,8 @@ limitations under the License. #include "open_file.h" #include "token.h" -long tkn = 0; -long tkn_val = 0; // Variable to hold the value of the current token +int tkn = 0; +int tkn_val = 0; // Variable to hold the value of the current token char* tknname[] = { "num", "sys", "glo", "loc", "id", diff --git a/src/tensor-array/interp/token.h b/src/tensor-array/interp/token.h index 3f61b80..835eaf5 100644 --- a/src/tensor-array/interp/token.h +++ b/src/tensor-array/interp/token.h @@ -25,6 +25,6 @@ typedef enum } TOKEN_TYPE; void token_next(); -extern long tkn; -extern long tkn_val; +extern int tkn; +extern int tkn_val; extern char *tknname[]; diff --git a/src/tensor-array/interp/vm.h b/src/tensor-array/interp/vm.h index 2cdd10a..9fee312 100644 --- a/src/tensor-array/interp/vm.h +++ b/src/tensor-array/interp/vm.h @@ -21,9 +21,9 @@ typedef enum OPEN, READ, CLOSE, PRTF, MALC, MSET, MCMP, EXIT } VM_INSTRUCTION_V2; -typedef long VM_INSTRUCTION; +typedef size_t VM_INSTRUCTION; void eval(); -extern long any_value; +extern size_t any_value; extern VM_INSTRUCTION* orig; diff --git a/src/tensor-array/interp/vmop.cc b/src/tensor-array/interp/vmop.cc index 08799a6..6fa2a8e 100644 --- a/src/tensor-array/interp/vmop.cc +++ b/src/tensor-array/interp/vmop.cc @@ -22,7 +22,7 @@ limitations under the License. #include "vmop.h" #include "vm_type.h" -typedef void* VM_INSTRUCTION; +typedef size_t VM_INSTRUCTION; extern VM_INSTRUCTION* pc; std::stack tensor_stack; @@ -30,8 +30,8 @@ std::stack var_stack; std::stack> call_stack; tensor_array::value::Tensor ag; void* aptr; -long any_value; -long any_type; +size_t any_value; +size_t any_type; void new_int() { diff --git a/src/tensor-array/interp/vmop.h b/src/tensor-array/interp/vmop.h index eba64a7..a8a98e0 100644 --- a/src/tensor-array/interp/vmop.h +++ b/src/tensor-array/interp/vmop.h @@ -18,8 +18,8 @@ limitations under the License. extern "C" { #endif - extern long any_value; - extern long any_type; + extern size_t any_value; + extern size_t any_type; void op_imm(); void op_add(); void op_sub(); From 3bfc0a94e08982711b18271b97b458ba700fbdc0 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Mon, 4 Aug 2025 03:13:37 +0000 Subject: [PATCH 258/281] test --- src/tensor-array/core/extern_type_map.hh | 8 ++------ src/tensor-array/core/tensor.hh | 12 +++++++++++ src/tensor-array/core/tensorbase.hh | 14 ++++++++++++- src/tensor-array/interp/parser.c | 1 + src/tensor-array/interp/vm.c | 1 + src/tensor-array/interp/vm.h | 3 --- src/tensor-array/interp/vm_instruction.h | 26 ++++++++++++++++++++++++ src/tensor-array/interp/vmop.cc | 4 +--- src/tensor-array/layers/attention.hh | 14 ++++++++++++- src/tensor-array/layers/convolution.hh | 11 ++++++++++ src/tensor-array/layers/layer_impl.hh | 11 +++------- src/tensor-array/layers/layer_utility.hh | 12 +++++++++++ src/tensor-array/layers/linear.hh | 12 ++++++++++- src/tensor-array/layers/normalization.hh | 11 ++++++++++ src/tensor-array/layers/recurrent.hh | 11 ++++++++++ src/tensor-array/layers/sequential.hh | 11 ++++++++++ src/tensor-array/layers/transformer.hh | 11 ++++++++++ 17 files changed, 150 insertions(+), 23 deletions(-) create mode 100644 src/tensor-array/interp/vm_instruction.h diff --git a/src/tensor-array/core/extern_type_map.hh b/src/tensor-array/core/extern_type_map.hh index b495e29..9e33356 100644 --- a/src/tensor-array/core/extern_type_map.hh +++ b/src/tensor-array/core/extern_type_map.hh @@ -20,17 +20,11 @@ limitations under the License. #ifdef _WIN32 #ifdef TENSOR_ARRAY_CORE_EXPORTS #define TENSOR_ARRAY_API __declspec(dllexport) -#define TENSOR_ARRAY_EXPORT_API __declspec(dllexport) -#define TENSOR_ARRAY_IMPORT_API #else #define TENSOR_ARRAY_API __declspec(dllimport) -#define TENSOR_ARRAY_EXPORT_API -#define TENSOR_ARRAY_IMPORT_API __declspec(dllimport) #endif #else #define TENSOR_ARRAY_API -#define TENSOR_ARRAY_EXPORT_API -#define TENSOR_ARRAY_IMPORT_API #endif namespace tensor_array @@ -43,3 +37,5 @@ namespace tensor_array extern TENSOR_ARRAY_API std::unordered_map dynamic_type_size; } } + +#undef TENSOR_ARRAY_API diff --git a/src/tensor-array/core/tensor.hh b/src/tensor-array/core/tensor.hh index b24a75a..af276d7 100644 --- a/src/tensor-array/core/tensor.hh +++ b/src/tensor-array/core/tensor.hh @@ -35,6 +35,16 @@ limitations under the License. #define END(...) END_(__VA_ARGS__) #define END_(...) __VA_ARGS__##_END +#ifdef _WIN32 +#ifdef TENSOR_ARRAY_CORE_EXPORTS +#define TENSOR_ARRAY_API __declspec(dllexport) +#else +#define TENSOR_ARRAY_API __declspec(dllimport) +#endif +#else +#define TENSOR_ARRAY_API +#endif + namespace tensor_array { namespace value @@ -444,3 +454,5 @@ struct std::equal_to #undef USING_DATA_TYPE_FLOAT #undef USING_DATA_TYPE_SINT #undef USING_DATA_TYPE_UINT + +#undef TENSOR_ARRAY_API diff --git a/src/tensor-array/core/tensorbase.hh b/src/tensor-array/core/tensorbase.hh index d8106ea..a32e733 100644 --- a/src/tensor-array/core/tensorbase.hh +++ b/src/tensor-array/core/tensorbase.hh @@ -23,6 +23,16 @@ limitations under the License. #include "initializer_wrapper.hh" #pragma once +#ifdef _WIN32 +#ifdef TENSOR_ARRAY_CORE_EXPORTS +#define TENSOR_ARRAY_API __declspec(dllexport) +#else +#define TENSOR_ARRAY_API __declspec(dllimport) +#endif +#else +#define TENSOR_ARRAY_API +#endif + namespace tensor_array { namespace value @@ -200,5 +210,7 @@ namespace tensor_array }; std::size_t get_sizeof_type(const std::type_info&); + } } -} + +#undef TENSOR_ARRAY_API diff --git a/src/tensor-array/interp/parser.c b/src/tensor-array/interp/parser.c index 9ab76a9..0bd9d0c 100644 --- a/src/tensor-array/interp/parser.c +++ b/src/tensor-array/interp/parser.c @@ -21,6 +21,7 @@ limitations under the License. #include "parser.h" #include "token.h" #include "open_file.h" +#include "vm_instruction.h" #include "sym_map.h" #include "vm_type.h" diff --git a/src/tensor-array/interp/vm.c b/src/tensor-array/interp/vm.c index 91518de..edd4a72 100644 --- a/src/tensor-array/interp/vm.c +++ b/src/tensor-array/interp/vm.c @@ -16,6 +16,7 @@ limitations under the License. #include #include +#include "vm_instruction.h" #include "vmop.h" #include "vm.h" diff --git a/src/tensor-array/interp/vm.h b/src/tensor-array/interp/vm.h index 9fee312..6c1633e 100644 --- a/src/tensor-array/interp/vm.h +++ b/src/tensor-array/interp/vm.h @@ -21,9 +21,6 @@ typedef enum OPEN, READ, CLOSE, PRTF, MALC, MSET, MCMP, EXIT } VM_INSTRUCTION_V2; -typedef size_t VM_INSTRUCTION; - void eval(); extern size_t any_value; -extern VM_INSTRUCTION* orig; diff --git a/src/tensor-array/interp/vm_instruction.h b/src/tensor-array/interp/vm_instruction.h new file mode 100644 index 0000000..29677f5 --- /dev/null +++ b/src/tensor-array/interp/vm_instruction.h @@ -0,0 +1,26 @@ +/* +Copyright 2024 TensorArray-Creators + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#ifdef __cplusplus +extern "C" +{ +#endif + typedef size_t VM_INSTRUCTION; + extern VM_INSTRUCTION* orig; + extern VM_INSTRUCTION* pc; +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/src/tensor-array/interp/vmop.cc b/src/tensor-array/interp/vmop.cc index 6fa2a8e..022e672 100644 --- a/src/tensor-array/interp/vmop.cc +++ b/src/tensor-array/interp/vmop.cc @@ -19,12 +19,10 @@ limitations under the License. #include #include #include "sym_map.h" +#include "vm_instruction.h" #include "vmop.h" #include "vm_type.h" -typedef size_t VM_INSTRUCTION; -extern VM_INSTRUCTION* pc; - std::stack tensor_stack; std::stack var_stack; std::stack> call_stack; diff --git a/src/tensor-array/layers/attention.hh b/src/tensor-array/layers/attention.hh index 0c3b4c3..dbb4f3c 100644 --- a/src/tensor-array/layers/attention.hh +++ b/src/tensor-array/layers/attention.hh @@ -19,6 +19,16 @@ limitations under the License. #include "normalization.hh" #pragma once +#ifdef _WIN32 +#ifdef TENSOR_ARRAY_LAYERS_EXPORTS +#define TENSOR_ARRAY_API __declspec(dllexport) +#else +#define TENSOR_ARRAY_API __declspec(dllimport) +#endif +#else +#define TENSOR_ARRAY_API +#endif + namespace tensor_array { namespace layers @@ -39,4 +49,6 @@ namespace tensor_array using MultiHeadAttention = LayerHolder; } -} \ No newline at end of file +} + +#undef TENSOR_ARRAY_API diff --git a/src/tensor-array/layers/convolution.hh b/src/tensor-array/layers/convolution.hh index 6da4845..e217a86 100644 --- a/src/tensor-array/layers/convolution.hh +++ b/src/tensor-array/layers/convolution.hh @@ -18,6 +18,16 @@ limitations under the License. #include "layer_utility.hh" #pragma once +#ifdef _WIN32 +#ifdef TENSOR_ARRAY_LAYERS_EXPORTS +#define TENSOR_ARRAY_API __declspec(dllexport) +#else +#define TENSOR_ARRAY_API __declspec(dllimport) +#endif +#else +#define TENSOR_ARRAY_API +#endif + namespace tensor_array { namespace layers @@ -71,3 +81,4 @@ namespace tensor_array } } +#undef TENSOR_ARRAY_API diff --git a/src/tensor-array/layers/layer_impl.hh b/src/tensor-array/layers/layer_impl.hh index 98ee173..7852524 100644 --- a/src/tensor-array/layers/layer_impl.hh +++ b/src/tensor-array/layers/layer_impl.hh @@ -22,20 +22,13 @@ limitations under the License. #ifdef _WIN32 #ifdef TENSOR_ARRAY_LAYERS_EXPORTS #define TENSOR_ARRAY_API __declspec(dllexport) -#define TENSOR_ARRAY_EXPORT_API __declspec(dllexport) -#define TENSOR_ARRAY_IMPORT_API #else #define TENSOR_ARRAY_API __declspec(dllimport) -#define TENSOR_ARRAY_EXPORT_API -#define TENSOR_ARRAY_IMPORT_API __declspec(dllimport) #endif #else #define TENSOR_ARRAY_API -#define TENSOR_ARRAY_EXPORT_API -#define TENSOR_ARRAY_IMPORT_API #endif - #pragma once namespace tensor_array @@ -75,4 +68,6 @@ namespace tensor_array public CalculateStruct {}; } -} \ No newline at end of file +} + +#undef TENSOR_ARRAY_API diff --git a/src/tensor-array/layers/layer_utility.hh b/src/tensor-array/layers/layer_utility.hh index f8bcbb0..5b901fd 100644 --- a/src/tensor-array/layers/layer_utility.hh +++ b/src/tensor-array/layers/layer_utility.hh @@ -18,6 +18,16 @@ limitations under the License. #pragma once +#ifdef _WIN32 +#ifdef TENSOR_ARRAY_LAYERS_EXPORTS +#define TENSOR_ARRAY_API __declspec(dllexport) +#else +#define TENSOR_ARRAY_API __declspec(dllimport) +#endif +#else +#define TENSOR_ARRAY_API +#endif + namespace tensor_array { namespace layers @@ -54,3 +64,5 @@ namespace tensor_array using ReShape = LayerHolder; } } + +#undef TENSOR_ARRAY_API diff --git a/src/tensor-array/layers/linear.hh b/src/tensor-array/layers/linear.hh index e72f07e..45cbc4e 100644 --- a/src/tensor-array/layers/linear.hh +++ b/src/tensor-array/layers/linear.hh @@ -17,6 +17,16 @@ limitations under the License. #include "layer_holder.hh" #pragma once +#ifdef _WIN32 +#ifdef TENSOR_ARRAY_LAYERS_EXPORTS +#define TENSOR_ARRAY_API __declspec(dllexport) +#else +#define TENSOR_ARRAY_API __declspec(dllimport) +#endif +#else +#define TENSOR_ARRAY_API +#endif + namespace tensor_array { namespace layers @@ -43,4 +53,4 @@ namespace tensor_array } } - +#undef TENSOR_ARRAY_API diff --git a/src/tensor-array/layers/normalization.hh b/src/tensor-array/layers/normalization.hh index b93501f..6d8a8f7 100644 --- a/src/tensor-array/layers/normalization.hh +++ b/src/tensor-array/layers/normalization.hh @@ -17,6 +17,16 @@ limitations under the License. #include "layer_holder.hh" #pragma once +#ifdef _WIN32 +#ifdef TENSOR_ARRAY_LAYERS_EXPORTS +#define TENSOR_ARRAY_API __declspec(dllexport) +#else +#define TENSOR_ARRAY_API __declspec(dllimport) +#endif +#else +#define TENSOR_ARRAY_API +#endif + namespace tensor_array { namespace layers @@ -39,3 +49,4 @@ namespace tensor_array } } +#undef TENSOR_ARRAY_API diff --git a/src/tensor-array/layers/recurrent.hh b/src/tensor-array/layers/recurrent.hh index df8b2f1..721cd2a 100644 --- a/src/tensor-array/layers/recurrent.hh +++ b/src/tensor-array/layers/recurrent.hh @@ -18,6 +18,16 @@ limitations under the License. #include "layer_utility.hh" #pragma once +#ifdef _WIN32 +#ifdef TENSOR_ARRAY_LAYERS_EXPORTS +#define TENSOR_ARRAY_API __declspec(dllexport) +#else +#define TENSOR_ARRAY_API __declspec(dllimport) +#endif +#else +#define TENSOR_ARRAY_API +#endif + namespace tensor_array { namespace layers @@ -60,3 +70,4 @@ namespace tensor_array } } +#undef TENSOR_ARRAY_API diff --git a/src/tensor-array/layers/sequential.hh b/src/tensor-array/layers/sequential.hh index abf8608..cbe95e0 100644 --- a/src/tensor-array/layers/sequential.hh +++ b/src/tensor-array/layers/sequential.hh @@ -17,6 +17,16 @@ limitations under the License. #include "layer_holder.hh" #pragma once +#ifdef _WIN32 +#ifdef TENSOR_ARRAY_LAYERS_EXPORTS +#define TENSOR_ARRAY_API __declspec(dllexport) +#else +#define TENSOR_ARRAY_API __declspec(dllimport) +#endif +#else +#define TENSOR_ARRAY_API +#endif + namespace tensor_array { namespace layers @@ -46,3 +56,4 @@ namespace tensor_array } } +#undef TENSOR_ARRAY_API diff --git a/src/tensor-array/layers/transformer.hh b/src/tensor-array/layers/transformer.hh index 8352fc6..a2c8257 100644 --- a/src/tensor-array/layers/transformer.hh +++ b/src/tensor-array/layers/transformer.hh @@ -17,6 +17,16 @@ limitations under the License. #include "attention.hh" #pragma once +#ifdef _WIN32 +#ifdef TENSOR_ARRAY_LAYERS_EXPORTS +#define TENSOR_ARRAY_API __declspec(dllexport) +#else +#define TENSOR_ARRAY_API __declspec(dllimport) +#endif +#else +#define TENSOR_ARRAY_API +#endif + namespace tensor_array { namespace layers @@ -70,3 +80,4 @@ namespace tensor_array } } +#undef TENSOR_ARRAY_API From 3754db86f4eecf95e86f9238bb62a86bd733bf93 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Mon, 4 Aug 2025 03:16:55 +0000 Subject: [PATCH 259/281] test --- src/tensor-array/interp/open_file.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tensor-array/interp/open_file.h b/src/tensor-array/interp/open_file.h index fbc4a21..7d37999 100644 --- a/src/tensor-array/interp/open_file.h +++ b/src/tensor-array/interp/open_file.h @@ -14,6 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ +#include "vm_instruction.h" #include "vm.h" extern char *src; From 5ca0fa59c881548a580b8eb39920e7d999d10744 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Mon, 4 Aug 2025 03:41:45 +0000 Subject: [PATCH 260/281] test array --- src/tensor-array/core/tensorbase.hh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tensor-array/core/tensorbase.hh b/src/tensor-array/core/tensorbase.hh index a32e733..e05ff84 100644 --- a/src/tensor-array/core/tensorbase.hh +++ b/src/tensor-array/core/tensorbase.hh @@ -64,7 +64,7 @@ namespace tensor_array class TensorArrayStorage final : public TensorStorage { private: - static constexpr const std::array dim_size_array{ sz0, sz... }; + static constexpr const unsigned int dim_size_array[sizeof...(sz) + 1ULL] = { sz0, sz... }; const TensorArray arr_data; public: constexpr TensorArrayStorage(const TensorArray& arr_data) : @@ -87,7 +87,7 @@ namespace tensor_array inline std::initializer_list dim_sizes() const override { - return wrapper::initializer_wrapper(dim_size_array.data(), dim_size_array.data() + sizeof...(sz) + 1ULL); + return wrapper::initializer_wrapper(dim_size_array, dim_size_array + sizeof...(sz) + 1ULL); } inline const void* data() const override From 150f1a85d122ada947154ac41abb6eeeb53901ef Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 5 Aug 2025 04:32:16 +0000 Subject: [PATCH 261/281] update docker files --- .github/workflows/docker-publish-d.yml | 8 +++-- .github/workflows/docker-publish.yml | 7 ++-- ...le => CUDA-12.9.1-Ubuntu-20.04.Dockerfile} | 5 +-- Dockerfolder/CUDA-12.9.1-ubi8.Dockerfile | 35 +++++++++++++++++++ 4 files changed, 48 insertions(+), 7 deletions(-) rename Dockerfolder/{Ubuntu.Dockerfile => CUDA-12.9.1-Ubuntu-20.04.Dockerfile} (92%) create mode 100644 Dockerfolder/CUDA-12.9.1-ubi8.Dockerfile diff --git a/.github/workflows/docker-publish-d.yml b/.github/workflows/docker-publish-d.yml index 7971e42..0197115 100644 --- a/.github/workflows/docker-publish-d.yml +++ b/.github/workflows/docker-publish-d.yml @@ -9,6 +9,9 @@ jobs: push_to_registry: name: Push Docker image runs-on: ubuntu-22.04 + strategy: + matrix: + image-tag: [ "CUDA-12.9.1-Ubuntu-2004", "CUDA-12.9.1-devel-ubi8" ] permissions: packages: write contents: read @@ -28,7 +31,8 @@ jobs: id: meta uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 with: - image: noobwastaken/tensor-array + image: ${{ secrets.DOCKER_USERNAME }}/tensor-array + tags: ${{ matrix.image-tag }} - name: Build and push Docker images id: push @@ -43,6 +47,6 @@ jobs: - name: Attest Build Provenance uses: actions/attest-build-provenance@v2 with: - subject-name: index.docker.io/noobwastaken/tensor-array + subject-name: index.docker.io/${{ secrets.DOCKER_USERNAME }}/tensor-array subject-digest: ${{ steps.push.outputs.digest }} push-to-registry: true diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index f9173c5..8f89c6c 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -27,7 +27,7 @@ jobs: strategy: fail-fast: false matrix: - image-os: [ "Ubuntu" ] + image-tag: [ "CUDA-12.9.1-Ubuntu-20.04", "CUDA-12.9.1-devel-ubi8" ] runs-on: ubuntu-22.04 @@ -73,7 +73,8 @@ jobs: uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934 # v5.0.0 with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - tags: ${{ matrix.image-os }} + tags: | + type=raw,value=${{ matrix.image-tag }} # Build and push Docker image with Buildx (don't push on PR) # https://github.com/docker/build-push-action @@ -81,7 +82,7 @@ jobs: id: build-and-push uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09 # v5.0.0 with: - file: Dockerfolder/${{ matrix.image-os }}.Dockerfile + file: Dockerfolder/${{ matrix.image-tag }}.Dockerfile push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} diff --git a/Dockerfolder/Ubuntu.Dockerfile b/Dockerfolder/CUDA-12.9.1-Ubuntu-20.04.Dockerfile similarity index 92% rename from Dockerfolder/Ubuntu.Dockerfile rename to Dockerfolder/CUDA-12.9.1-Ubuntu-20.04.Dockerfile index 2dba642..d9c1972 100644 --- a/Dockerfolder/Ubuntu.Dockerfile +++ b/Dockerfolder/CUDA-12.9.1-Ubuntu-20.04.Dockerfile @@ -1,4 +1,4 @@ -FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu22.04 +FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubuntu20.04 RUN apt-get update RUN apt-get upgrade -y @@ -30,5 +30,6 @@ WORKDIR tensor-array/build RUN cmake .. RUN cmake --build . RUN cmake --install . +RUN ctest -WORKDIR /app/tensor-array +WORKDIR .. diff --git a/Dockerfolder/CUDA-12.9.1-ubi8.Dockerfile b/Dockerfolder/CUDA-12.9.1-ubi8.Dockerfile new file mode 100644 index 0000000..f077d78 --- /dev/null +++ b/Dockerfolder/CUDA-12.9.1-ubi8.Dockerfile @@ -0,0 +1,35 @@ +FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubi8 + +RUN apt-get update +RUN apt-get upgrade -y +RUN apt-get install curl -y + +ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.9" + +# Optionally install the cmake for vcpkg +COPY scripts/packages-install/reinstall-cmake.sh /tmp/ + +RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ + chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ + fi \ + && rm -f /tmp/reinstall-cmake.sh + + +# [Optional] Uncomment this section to install additional vcpkg ports. +# RUN su vscode -c "${VCPKG_ROOT}/vcpkg install " + +# [Optional] Uncomment this section to install additional packages. +# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ +# && apt-get -y install --no-install-recommends + +WORKDIR /main-project +COPY ./ tensor-array/ + +WORKDIR tensor-array/build + +RUN cmake .. +RUN cmake --build . +RUN cmake --install . +RUN ctest + +WORKDIR .. From 52d27ca6607f2addde452e1c3e9d9b0b22589fc9 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 5 Aug 2025 04:33:51 +0000 Subject: [PATCH 262/281] docker files --- .github/workflows/docker-publish-d.yml | 2 +- .github/workflows/docker-publish.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker-publish-d.yml b/.github/workflows/docker-publish-d.yml index 0197115..2a6ead5 100644 --- a/.github/workflows/docker-publish-d.yml +++ b/.github/workflows/docker-publish-d.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - image-tag: [ "CUDA-12.9.1-Ubuntu-2004", "CUDA-12.9.1-devel-ubi8" ] + image-tag: [ "CUDA-12.9.1-Ubuntu-2004", "CUDA-12.9.1-ubi8" ] permissions: packages: write contents: read diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 8f89c6c..05715a9 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -27,7 +27,7 @@ jobs: strategy: fail-fast: false matrix: - image-tag: [ "CUDA-12.9.1-Ubuntu-20.04", "CUDA-12.9.1-devel-ubi8" ] + image-tag: [ "CUDA-12.9.1-Ubuntu-20.04", "CUDA-12.9.1-ubi8" ] runs-on: ubuntu-22.04 From ff31154d74185ddcf9acf9209a1ffa1ee76b02ab Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 5 Aug 2025 04:47:40 +0000 Subject: [PATCH 263/281] changes docker --- .../CUDA-12.9.1-Ubuntu-20.04.Dockerfile | 6 +- Dockerfolder/CUDA-12.9.1-ubi8.Dockerfile | 12 ++-- .../packages-install/reinstall-cmake-rhel.sh | 59 +++++++++++++++++++ ...all-cmake.sh => reinstall-cmake-ubuntu.sh} | 0 4 files changed, 68 insertions(+), 9 deletions(-) create mode 100644 scripts/packages-install/reinstall-cmake-rhel.sh rename scripts/packages-install/{reinstall-cmake.sh => reinstall-cmake-ubuntu.sh} (100%) diff --git a/Dockerfolder/CUDA-12.9.1-Ubuntu-20.04.Dockerfile b/Dockerfolder/CUDA-12.9.1-Ubuntu-20.04.Dockerfile index d9c1972..38875b0 100644 --- a/Dockerfolder/CUDA-12.9.1-Ubuntu-20.04.Dockerfile +++ b/Dockerfolder/CUDA-12.9.1-Ubuntu-20.04.Dockerfile @@ -7,12 +7,12 @@ RUN apt-get install curl -y ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.9" # Optionally install the cmake for vcpkg -COPY scripts/packages-install/reinstall-cmake.sh /tmp/ +COPY scripts/packages-install/reinstall-cmake-ubuntu.sh /tmp/ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ - chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ + chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake-ubuntu.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ fi \ - && rm -f /tmp/reinstall-cmake.sh + && rm -f /tmp/reinstall-cmake-ubuntu.sh # [Optional] Uncomment this section to install additional vcpkg ports. diff --git a/Dockerfolder/CUDA-12.9.1-ubi8.Dockerfile b/Dockerfolder/CUDA-12.9.1-ubi8.Dockerfile index f077d78..1d8ffca 100644 --- a/Dockerfolder/CUDA-12.9.1-ubi8.Dockerfile +++ b/Dockerfolder/CUDA-12.9.1-ubi8.Dockerfile @@ -1,18 +1,18 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubi8 -RUN apt-get update -RUN apt-get upgrade -y -RUN apt-get install curl -y +RUN dnf update +RUN dnf upgrade -y +RUN dnf install curl -y ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.9" # Optionally install the cmake for vcpkg -COPY scripts/packages-install/reinstall-cmake.sh /tmp/ +COPY scripts/packages-install/reinstall-cmake-rhel.sh /tmp/ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ - chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ + chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake-rhel.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ fi \ - && rm -f /tmp/reinstall-cmake.sh + && rm -f /tmp/reinstall-cmake-rhel.sh # [Optional] Uncomment this section to install additional vcpkg ports. diff --git a/scripts/packages-install/reinstall-cmake-rhel.sh b/scripts/packages-install/reinstall-cmake-rhel.sh new file mode 100644 index 0000000..5ea9d23 --- /dev/null +++ b/scripts/packages-install/reinstall-cmake-rhel.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +#------------------------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See https://go.microsoft.com/fwlink/?linkid=2090316 for license information. +#------------------------------------------------------------------------------------------------------------- +# +set -e + +CMAKE_VERSION=${1:-"none"} + +if [ "${CMAKE_VERSION}" = "none" ]; then + echo "No CMake version specified, skipping CMake reinstallation" + exit 0 +fi + +# Cleanup temporary directory and associated files when exiting the script. +cleanup() { + EXIT_CODE=$? + set +e + if [[ -n "${TMP_DIR}" ]]; then + echo "Executing cleanup of tmp files" + rm -Rf "${TMP_DIR}" + fi + exit $EXIT_CODE +} +trap cleanup EXIT + + +echo "Installing CMake..." +dnf -y autoremove cmake +mkdir -p /opt/cmake + +architecture=$(dpkg --print-architecture) +case "${architecture}" in + arm64) + ARCH=aarch64 ;; + amd64) + ARCH=x86_64 ;; + *) + echo "Unsupported architecture ${architecture}." + exit 1 + ;; +esac + +CMAKE_BINARY_NAME="cmake-${CMAKE_VERSION}-linux-${ARCH}.sh" +CMAKE_CHECKSUM_NAME="cmake-${CMAKE_VERSION}-SHA-256.txt" +TMP_DIR=$(mktemp -d -t cmake-XXXXXXXXXX) + +echo "${TMP_DIR}" +cd "${TMP_DIR}" + +curl -sSL "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_BINARY_NAME}" -O +curl -sSL "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_CHECKSUM_NAME}" -O + +sha256sum -c --ignore-missing "${CMAKE_CHECKSUM_NAME}" +sh "${TMP_DIR}/${CMAKE_BINARY_NAME}" --prefix=/opt/cmake --skip-license + +ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake +ln -s /opt/cmake/bin/ctest /usr/local/bin/ctest diff --git a/scripts/packages-install/reinstall-cmake.sh b/scripts/packages-install/reinstall-cmake-ubuntu.sh similarity index 100% rename from scripts/packages-install/reinstall-cmake.sh rename to scripts/packages-install/reinstall-cmake-ubuntu.sh From adc4dfa8db6a55bc1efb7e2cc5f3171666f5cda5 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 5 Aug 2025 04:51:06 +0000 Subject: [PATCH 264/281] test --- Dockerfolder/CUDA-12.9.1-Ubuntu-20.04.Dockerfile | 2 +- Dockerfolder/CUDA-12.9.1-ubi8.Dockerfile | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfolder/CUDA-12.9.1-Ubuntu-20.04.Dockerfile b/Dockerfolder/CUDA-12.9.1-Ubuntu-20.04.Dockerfile index 38875b0..c7377c5 100644 --- a/Dockerfolder/CUDA-12.9.1-Ubuntu-20.04.Dockerfile +++ b/Dockerfolder/CUDA-12.9.1-Ubuntu-20.04.Dockerfile @@ -10,7 +10,7 @@ ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.9" COPY scripts/packages-install/reinstall-cmake-ubuntu.sh /tmp/ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ - chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake-ubuntu.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ + chmod +x /tmp/reinstall-cmake-ubuntu.sh && /tmp/reinstall-cmake-ubuntu.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ fi \ && rm -f /tmp/reinstall-cmake-ubuntu.sh diff --git a/Dockerfolder/CUDA-12.9.1-ubi8.Dockerfile b/Dockerfolder/CUDA-12.9.1-ubi8.Dockerfile index 1d8ffca..d8de5c0 100644 --- a/Dockerfolder/CUDA-12.9.1-ubi8.Dockerfile +++ b/Dockerfolder/CUDA-12.9.1-ubi8.Dockerfile @@ -1,6 +1,6 @@ FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubi8 -RUN dnf update +RUN dnf update -y RUN dnf upgrade -y RUN dnf install curl -y @@ -10,7 +10,7 @@ ARG REINSTALL_CMAKE_VERSION_FROM_SOURCE="3.27.9" COPY scripts/packages-install/reinstall-cmake-rhel.sh /tmp/ RUN if [ "${REINSTALL_CMAKE_VERSION_FROM_SOURCE}" != "none" ]; then \ - chmod +x /tmp/reinstall-cmake.sh && /tmp/reinstall-cmake-rhel.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ + chmod +x /tmp/reinstall-cmake-rhel.sh && /tmp/reinstall-cmake-rhel.sh ${REINSTALL_CMAKE_VERSION_FROM_SOURCE}; \ fi \ && rm -f /tmp/reinstall-cmake-rhel.sh From 9fe651c86e885b7578cf046ec452c74cb381eb2c Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 5 Aug 2025 04:55:56 +0000 Subject: [PATCH 265/281] test --- scripts/packages-install/reinstall-cmake-rhel.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/packages-install/reinstall-cmake-rhel.sh b/scripts/packages-install/reinstall-cmake-rhel.sh index 5ea9d23..1ce240e 100644 --- a/scripts/packages-install/reinstall-cmake-rhel.sh +++ b/scripts/packages-install/reinstall-cmake-rhel.sh @@ -27,7 +27,8 @@ trap cleanup EXIT echo "Installing CMake..." -dnf -y autoremove cmake +dnf -y remove cmake +dnf -y autoremove mkdir -p /opt/cmake architecture=$(dpkg --print-architecture) From 336594a99f71889f49e85c8cc0dd59edeaf0261d Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 5 Aug 2025 05:07:42 +0000 Subject: [PATCH 266/281] test 1 --- scripts/packages-install/reinstall-cmake-rhel.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/packages-install/reinstall-cmake-rhel.sh b/scripts/packages-install/reinstall-cmake-rhel.sh index 1ce240e..93b7f44 100644 --- a/scripts/packages-install/reinstall-cmake-rhel.sh +++ b/scripts/packages-install/reinstall-cmake-rhel.sh @@ -31,7 +31,7 @@ dnf -y remove cmake dnf -y autoremove mkdir -p /opt/cmake -architecture=$(dpkg --print-architecture) +architecture=$(arch) case "${architecture}" in arm64) ARCH=aarch64 ;; From 53a51ffa23fc5369fb8d7a6091ebbc6b546268e6 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 5 Aug 2025 05:12:12 +0000 Subject: [PATCH 267/281] test 2 --- scripts/packages-install/reinstall-cmake-rhel.sh | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/scripts/packages-install/reinstall-cmake-rhel.sh b/scripts/packages-install/reinstall-cmake-rhel.sh index 93b7f44..0b7ee15 100644 --- a/scripts/packages-install/reinstall-cmake-rhel.sh +++ b/scripts/packages-install/reinstall-cmake-rhel.sh @@ -32,16 +32,7 @@ dnf -y autoremove mkdir -p /opt/cmake architecture=$(arch) -case "${architecture}" in - arm64) - ARCH=aarch64 ;; - amd64) - ARCH=x86_64 ;; - *) - echo "Unsupported architecture ${architecture}." - exit 1 - ;; -esac +ARCH=${architecture} CMAKE_BINARY_NAME="cmake-${CMAKE_VERSION}-linux-${ARCH}.sh" CMAKE_CHECKSUM_NAME="cmake-${CMAKE_VERSION}-SHA-256.txt" From 41cfeaad914e44a173055a528e4faa350341eca1 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 5 Aug 2025 05:39:12 +0000 Subject: [PATCH 268/281] trying to fix error --- src/tensor-array/core/tensorbase.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tensor-array/core/tensorbase.hh b/src/tensor-array/core/tensorbase.hh index e05ff84..e7c42d5 100644 --- a/src/tensor-array/core/tensorbase.hh +++ b/src/tensor-array/core/tensorbase.hh @@ -64,7 +64,7 @@ namespace tensor_array class TensorArrayStorage final : public TensorStorage { private: - static constexpr const unsigned int dim_size_array[sizeof...(sz) + 1ULL] = { sz0, sz... }; + static constexpr inline const unsigned int dim_size_array[sizeof...(sz) + 1ULL] = { sz0, sz... }; const TensorArray arr_data; public: constexpr TensorArrayStorage(const TensorArray& arr_data) : From cd86f15f5fd5b8b1b435d836e0c5637a3a9c9cc8 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Tue, 5 Aug 2025 05:43:33 +0000 Subject: [PATCH 269/281] run docker files --- .github/workflows/cmake-multi-platform.yml | 3 ++- Dockerfolder/CUDA-12.9.1-Ubuntu-20.04.Dockerfile | 2 +- Dockerfolder/CUDA-12.9.1-ubi8.Dockerfile | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index 0d9a38d..7347fe3 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -73,7 +73,8 @@ jobs: run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - name: Test - working-directory: ${{github.workspace}}/build + working-directory: ${{github.workspace}}/ + if: runner.os != 'Windows' # Execute tests defined by the CMake configuration. # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail run: ctest -C ${{env.BUILD_TYPE}} diff --git a/Dockerfolder/CUDA-12.9.1-Ubuntu-20.04.Dockerfile b/Dockerfolder/CUDA-12.9.1-Ubuntu-20.04.Dockerfile index c7377c5..e8226e3 100644 --- a/Dockerfolder/CUDA-12.9.1-Ubuntu-20.04.Dockerfile +++ b/Dockerfolder/CUDA-12.9.1-Ubuntu-20.04.Dockerfile @@ -30,6 +30,6 @@ WORKDIR tensor-array/build RUN cmake .. RUN cmake --build . RUN cmake --install . -RUN ctest +# RUN ctest WORKDIR .. diff --git a/Dockerfolder/CUDA-12.9.1-ubi8.Dockerfile b/Dockerfolder/CUDA-12.9.1-ubi8.Dockerfile index d8de5c0..5bbd1ec 100644 --- a/Dockerfolder/CUDA-12.9.1-ubi8.Dockerfile +++ b/Dockerfolder/CUDA-12.9.1-ubi8.Dockerfile @@ -30,6 +30,6 @@ WORKDIR tensor-array/build RUN cmake .. RUN cmake --build . RUN cmake --install . -RUN ctest +# RUN ctest WORKDIR .. From 9358f8eb430f4e7c42f2ae05c72d4be5642c993c Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Wed, 6 Aug 2025 09:19:07 +0000 Subject: [PATCH 270/281] fixed install include dirs --- CMakeLists.txt | 12 ------------ cmake/ta_core_config.cmake | 24 +++++++++++++++++++----- cmake/ta_interp_config.cmake | 24 +++++++++++++++++++----- cmake/ta_layers_config.cmake | 22 ++++++++++++++++++---- 4 files changed, 56 insertions(+), 26 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ccf9fc..c698055 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,18 +5,6 @@ project(TensorArray C CXX) include(GNUInstallDirs) # set(CMAKE_INSTALL_PREFIX ${PROJECT_SOURCE_DIR}) -file( - GLOB_RECURSE TensorArray_inc - "${PROJECT_SOURCE_DIR}/src/*.h" - "${PROJECT_SOURCE_DIR}/src/*.hh" -) - -install( - FILES ${TensorArray_inc} - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} - COMPONENT headers -) - include(cmake/ta_core_config.cmake) include(cmake/ta_layers_config.cmake) include(cmake/ta_interp_config.cmake) diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index f514b72..a80c922 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -1,10 +1,24 @@ +set(TensorArray_Core_Dir tensor-array/core) + +file( + GLOB TensorArray_Core_inc + "${PROJECT_SOURCE_DIR}/src/${TensorArray_Core_Dir}/*.h" + "${PROJECT_SOURCE_DIR}/src/${TensorArray_Core_Dir}/*.hh" +) + +install( + FILES ${TensorArray_Core_inc} + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/${TensorArray_Core_Dir}" + COMPONENT headers +) + include(CheckLanguage) check_language(CUDA) -file(GLOB TensorArray_Core_cc "${PROJECT_SOURCE_DIR}/src/tensor-array/core/*.cc") +file(GLOB TensorArray_Core_cc "${PROJECT_SOURCE_DIR}/src/${TensorArray_Core_Dir}/*.cc") if (CMAKE_CUDA_COMPILER) - file(GLOB TensorArray_Core_cu "${PROJECT_SOURCE_DIR}/src/tensor-array/core/*.cu") + file(GLOB TensorArray_Core_cu "${PROJECT_SOURCE_DIR}/src/${TensorArray_Core_Dir}/*.cu") endif() if(CMAKE_CUDA_COMPILER) @@ -43,11 +57,11 @@ set_property(TARGET tensorarray_core PROPERTY CXX_EXTENSIONS OFF) install( TARGETS tensorarray_core EXPORT TensorArrayTargets - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT Runtime - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}/tensor-array" COMPONENT Runtime - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/core + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}/${TensorArray_Core_Dir}" COMPONENT Development ) diff --git a/cmake/ta_interp_config.cmake b/cmake/ta_interp_config.cmake index bcc62c8..96f7bbf 100644 --- a/cmake/ta_interp_config.cmake +++ b/cmake/ta_interp_config.cmake @@ -1,7 +1,21 @@ +set(TensorArray_Interpreter_Dir tensor-array/interp) + +file( + GLOB TensorArray_Interpreter_inc + "${PROJECT_SOURCE_DIR}/src/${TensorArray_Interpreter_Dir}/*.h" + "${PROJECT_SOURCE_DIR}/src/${TensorArray_Interpreter_Dir}/*.hh" +) + +install( + FILES ${TensorArray_Interpreter_inc} + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/${TensorArray_Interpreter_Dir}" + COMPONENT headers +) + file( GLOB TensorArray_Interpreter_src - "${PROJECT_SOURCE_DIR}/src/tensor-array/interp/*.c" - "${PROJECT_SOURCE_DIR}/src/tensor-array/interp/*.cc" + "${PROJECT_SOURCE_DIR}/src/${TensorArray_Interpreter_Dir}/*.c" + "${PROJECT_SOURCE_DIR}/src/${TensorArray_Interpreter_Dir}/*.cc" ) add_executable(tensorarray_interpreter ${TensorArray_Interpreter_src}) @@ -19,11 +33,11 @@ set_property(TARGET tensorarray_interpreter PROPERTY CXX_EXTENSIONS OFF) install( TARGETS tensorarray_interpreter EXPORT TensorArrayTargets - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT Runtime - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}/tensor-array" COMPONENT Runtime - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/interp + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}/${TensorArray_Interpreter_Dir}" COMPONENT Development ) #[[ diff --git a/cmake/ta_layers_config.cmake b/cmake/ta_layers_config.cmake index d43b446..d8aa4b7 100644 --- a/cmake/ta_layers_config.cmake +++ b/cmake/ta_layers_config.cmake @@ -1,4 +1,18 @@ -file(GLOB TensorArray_Layers_src "${PROJECT_SOURCE_DIR}/src/tensor-array/layers/*.cc") +set(TensorArray_Layers_Dir tensor-array/layers) + +file( + GLOB TensorArray_Layers_inc + "${PROJECT_SOURCE_DIR}/src/${TensorArray_Layers_Dir}/*.h" + "${PROJECT_SOURCE_DIR}/src/${TensorArray_Layers_Dir}/*.hh" +) + +install( + FILES ${TensorArray_Layers_inc} + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/${TensorArray_Layers_Dir}" + COMPONENT headers +) + +file(GLOB TensorArray_Layers_src "${PROJECT_SOURCE_DIR}/src/${TensorArray_Layers_Dir}/*.cc") add_library(tensorarray_layers SHARED ${TensorArray_Layers_src}) @@ -20,11 +34,11 @@ endif() install( TARGETS tensorarray_layers EXPORT TensorArrayTargets - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT Runtime - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}/tensor-array" COMPONENT Runtime - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/tensor-array/layers + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}/${TensorArray_Layers_Dir}" COMPONENT Development ) From 9f5a22543db61f703f29f7865e6a3a504b67e1a6 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 8 Aug 2025 04:22:59 +0000 Subject: [PATCH 271/281] add static libs --- cmake/ta_core_config.cmake | 36 +++++++++++++++++-- cmake/ta_interp_config.cmake | 4 +-- cmake/ta_layers_config.cmake | 29 +++++++++++++-- .../core/cmake/ta_core_tests.cmake | 2 +- 4 files changed, 64 insertions(+), 7 deletions(-) diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index a80c922..c7c7778 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -25,12 +25,22 @@ if(CMAKE_CUDA_COMPILER) enable_language(CUDA) find_package(CUDAToolkit REQUIRED) add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) + add_library(tensorarray_core_static STATIC ${TensorArray_Core_cc} ${TensorArray_Core_cu}) + set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) set_property(TARGET tensorarray_core PROPERTY CUDA_SEPARABLE_COMPILATION ON) target_include_directories(tensorarray_core PRIVATE $<$:${CUDAToolkit_INCLUDE_DIRS}>) target_link_libraries(tensorarray_core PRIVATE $<$:CUDA::cublas>) + + set_property(TARGET tensorarray_core_static PROPERTY CUDA_STANDARD 17) + set_property(TARGET tensorarray_core_static PROPERTY CUDA_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_core_static PROPERTY CUDA_EXTENSIONS OFF) + set_property(TARGET tensorarray_core_static PROPERTY CUDA_SEPARABLE_COMPILATION ON) + target_include_directories(tensorarray_core_static PRIVATE $<$:${CUDAToolkit_INCLUDE_DIRS}>) + target_link_libraries(tensorarray_core_static PRIVATE $<$:CUDA::cublas>) + if(MSVC) target_compile_definitions(tensorarray_core PRIVATE TENSOR_ARRAY_CORE_EXPORTS) endif() @@ -40,7 +50,8 @@ if(CMAKE_CUDA_COMPILER) # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") else() - add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) + add_library(tensorarray_core SHARED ${TensorArray_Core_cc}) + add_library(tensorarray_core_static STATIC ${TensorArray_Core_cc}) endif() @@ -54,6 +65,15 @@ set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core PROPERTY CXX_EXTENSIONS OFF) + +set_property(TARGET tensorarray_core_static PROPERTY C_STANDARD 11) +set_property(TARGET tensorarray_core_static PROPERTY C_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_core_static PROPERTY C_EXTENSIONS OFF) + +set_property(TARGET tensorarray_core_static PROPERTY CXX_STANDARD 17) +set_property(TARGET tensorarray_core_static PROPERTY CXX_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_core_static PROPERTY CXX_EXTENSIONS OFF) + install( TARGETS tensorarray_core EXPORT TensorArrayTargets @@ -65,4 +85,16 @@ install( COMPONENT Development ) -add_library(TensorArray::Core ALIAS tensorarray_core) +install( + TARGETS tensorarray_core_static + EXPORT TensorArrayTargets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + COMPONENT Runtime + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}/tensor-array" + COMPONENT Runtime + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}/${TensorArray_Core_Dir}" + COMPONENT Development +) + +add_library(TensorArray::core ALIAS tensorarray_core) +add_library(TensorArray::core_static ALIAS tensorarray_core_static) diff --git a/cmake/ta_interp_config.cmake b/cmake/ta_interp_config.cmake index 96f7bbf..2415172 100644 --- a/cmake/ta_interp_config.cmake +++ b/cmake/ta_interp_config.cmake @@ -20,7 +20,7 @@ file( add_executable(tensorarray_interpreter ${TensorArray_Interpreter_src}) target_include_directories(tensorarray_interpreter PRIVATE ${PROJECT_SOURCE_DIR}/src) -target_link_libraries(tensorarray_interpreter TensorArray::Core) +target_link_libraries(tensorarray_interpreter TensorArray::core) set_property(TARGET tensorarray_interpreter PROPERTY C_STANDARD 11) set_property(TARGET tensorarray_interpreter PROPERTY C_STANDARD_REQUIRED ON) @@ -47,4 +47,4 @@ install( POST_BUILD COMMAND tensorarray_interpreter) ]] -add_executable(TensorArray::Interpreter ALIAS tensorarray_interpreter) +add_executable(TensorArray::interpreter ALIAS tensorarray_interpreter) diff --git a/cmake/ta_layers_config.cmake b/cmake/ta_layers_config.cmake index d8aa4b7..0568a19 100644 --- a/cmake/ta_layers_config.cmake +++ b/cmake/ta_layers_config.cmake @@ -15,9 +15,13 @@ install( file(GLOB TensorArray_Layers_src "${PROJECT_SOURCE_DIR}/src/${TensorArray_Layers_Dir}/*.cc") add_library(tensorarray_layers SHARED ${TensorArray_Layers_src}) +add_library(tensorarray_layers_static STATIC ${TensorArray_Layers_src}) target_include_directories(tensorarray_layers PRIVATE ${PROJECT_SOURCE_DIR}/src) -target_link_libraries(tensorarray_layers TensorArray::Core) +target_include_directories(tensorarray_layers_static PRIVATE ${PROJECT_SOURCE_DIR}/src) + +target_link_libraries(tensorarray_layers TensorArray::core) +target_link_libraries(tensorarray_layers_static TensorArray::core_static) set_property(TARGET tensorarray_layers PROPERTY C_STANDARD 11) set_property(TARGET tensorarray_layers PROPERTY C_STANDARD_REQUIRED ON) @@ -27,6 +31,15 @@ set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD 17) set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_layers PROPERTY CXX_EXTENSIONS OFF) + +set_property(TARGET tensorarray_layers_static PROPERTY C_STANDARD 11) +set_property(TARGET tensorarray_layers_static PROPERTY C_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_layers_static PROPERTY C_EXTENSIONS OFF) + +set_property(TARGET tensorarray_layers_static PROPERTY CXX_STANDARD 17) +set_property(TARGET tensorarray_layers_static PROPERTY CXX_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_layers_static PROPERTY CXX_EXTENSIONS OFF) + if(MSVC) target_compile_definitions(tensorarray_layers PRIVATE TENSOR_ARRAY_LAYERS_EXPORTS) endif() @@ -42,4 +55,16 @@ install( COMPONENT Development ) -add_library(TensorArray::Layers ALIAS tensorarray_layers) +install( + TARGETS tensorarray_layers_static + EXPORT TensorArrayTargets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + COMPONENT Runtime + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}/tensor-array" + COMPONENT Runtime + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}/${TensorArray_Layers_Dir}" + COMPONENT Development +) + +add_library(TensorArray::layers ALIAS tensorarray_layers) +add_library(TensorArray::layers_static ALIAS tensorarray_layers_static) diff --git a/tests/tensor-array/core/cmake/ta_core_tests.cmake b/tests/tensor-array/core/cmake/ta_core_tests.cmake index c5c2914..781e00d 100644 --- a/tests/tensor-array/core/cmake/ta_core_tests.cmake +++ b/tests/tensor-array/core/cmake/ta_core_tests.cmake @@ -17,7 +17,7 @@ create_test_sourcelist( add_executable(tensorarray_core_tests ${TensorArray_tests}) target_include_directories(tensorarray_core_tests PRIVATE ${PROJECT_SOURCE_DIR}/src) -target_link_libraries(tensorarray_core_tests TensorArray::Core) +target_link_libraries(tensorarray_core_tests TensorArray::core) foreach(test ${TensorArray_tests_src}) get_filename_component(TName ${test} NAME_WE) From d50d5e3cd98a762ff2531a0d4c0f773479ef792b Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 8 Aug 2025 04:42:39 +0000 Subject: [PATCH 272/281] add obj to cmake --- cmake/ta_core_config.cmake | 53 +++++++++++++----------------------- cmake/ta_layers_config.cmake | 38 ++++++++++---------------- 2 files changed, 34 insertions(+), 57 deletions(-) diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index c7c7778..0a3990b 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -24,25 +24,15 @@ endif() if(CMAKE_CUDA_COMPILER) enable_language(CUDA) find_package(CUDAToolkit REQUIRED) - add_library(tensorarray_core SHARED ${TensorArray_Core_cc} ${TensorArray_Core_cu}) - add_library(tensorarray_core_static STATIC ${TensorArray_Core_cc} ${TensorArray_Core_cu}) - - set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD 17) - set_property(TARGET tensorarray_core PROPERTY CUDA_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_core PROPERTY CUDA_EXTENSIONS OFF) - set_property(TARGET tensorarray_core PROPERTY CUDA_SEPARABLE_COMPILATION ON) - target_include_directories(tensorarray_core PRIVATE $<$:${CUDAToolkit_INCLUDE_DIRS}>) - target_link_libraries(tensorarray_core PRIVATE $<$:CUDA::cublas>) - - set_property(TARGET tensorarray_core_static PROPERTY CUDA_STANDARD 17) - set_property(TARGET tensorarray_core_static PROPERTY CUDA_STANDARD_REQUIRED ON) - set_property(TARGET tensorarray_core_static PROPERTY CUDA_EXTENSIONS OFF) - set_property(TARGET tensorarray_core_static PROPERTY CUDA_SEPARABLE_COMPILATION ON) - target_include_directories(tensorarray_core_static PRIVATE $<$:${CUDAToolkit_INCLUDE_DIRS}>) - target_link_libraries(tensorarray_core_static PRIVATE $<$:CUDA::cublas>) - + add_library(tensorarray_core_object OBJECT ${TensorArray_Core_cc} ${TensorArray_Core_cu}) + set_property(TARGET tensorarray_core_object PROPERTY CUDA_STANDARD 17) + set_property(TARGET tensorarray_core_object PROPERTY CUDA_STANDARD_REQUIRED ON) + set_property(TARGET tensorarray_core_object PROPERTY CUDA_EXTENSIONS OFF) + set_property(TARGET tensorarray_core_object PROPERTY CUDA_SEPARABLE_COMPILATION ON) + target_include_directories(tensorarray_core_object PRIVATE $<$:${CUDAToolkit_INCLUDE_DIRS}>) + target_link_libraries(tensorarray_core_object PRIVATE $<$:CUDA::cublas>) if(MSVC) - target_compile_definitions(tensorarray_core PRIVATE TENSOR_ARRAY_CORE_EXPORTS) + target_compile_definitions(tensorarray_core_object PRIVATE TENSOR_ARRAY_CORE_EXPORTS) endif() # find_package(CUDAToolkit REQUIRED) @@ -50,29 +40,23 @@ if(CMAKE_CUDA_COMPILER) # set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # list(APPEND CMAKE_CUDA_FLAGS "--default-stream per-thread") else() - add_library(tensorarray_core SHARED ${TensorArray_Core_cc}) - add_library(tensorarray_core_static STATIC ${TensorArray_Core_cc}) + add_library(tensorarray_core_object OBJECT ${TensorArray_Core_cc}) endif() # file(MAKE_DIRECTORY "include/tensor_array/core") -set_property(TARGET tensorarray_core PROPERTY C_STANDARD 11) -set_property(TARGET tensorarray_core PROPERTY C_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_core PROPERTY C_EXTENSIONS OFF) - -set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD 17) -set_property(TARGET tensorarray_core PROPERTY CXX_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_core PROPERTY CXX_EXTENSIONS OFF) - +set_property(TARGET tensorarray_core_object PROPERTY C_STANDARD 11) +set_property(TARGET tensorarray_core_object PROPERTY C_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_core_object PROPERTY C_EXTENSIONS OFF) -set_property(TARGET tensorarray_core_static PROPERTY C_STANDARD 11) -set_property(TARGET tensorarray_core_static PROPERTY C_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_core_static PROPERTY C_EXTENSIONS OFF) +set_property(TARGET tensorarray_core_object PROPERTY CXX_STANDARD 17) +set_property(TARGET tensorarray_core_object PROPERTY CXX_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_core_object PROPERTY CXX_EXTENSIONS OFF) -set_property(TARGET tensorarray_core_static PROPERTY CXX_STANDARD 17) -set_property(TARGET tensorarray_core_static PROPERTY CXX_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_core_static PROPERTY CXX_EXTENSIONS OFF) +# shared and static libraries built from the same object files +add_library(tensorarray_core SHARED $) +add_library(tensorarray_core_static STATIC $) install( TARGETS tensorarray_core @@ -98,3 +82,4 @@ install( add_library(TensorArray::core ALIAS tensorarray_core) add_library(TensorArray::core_static ALIAS tensorarray_core_static) +add_library(TensorArray::core_object ALIAS tensorarray_core_object) diff --git a/cmake/ta_layers_config.cmake b/cmake/ta_layers_config.cmake index 0568a19..cec4a90 100644 --- a/cmake/ta_layers_config.cmake +++ b/cmake/ta_layers_config.cmake @@ -14,36 +14,27 @@ install( file(GLOB TensorArray_Layers_src "${PROJECT_SOURCE_DIR}/src/${TensorArray_Layers_Dir}/*.cc") -add_library(tensorarray_layers SHARED ${TensorArray_Layers_src}) -add_library(tensorarray_layers_static STATIC ${TensorArray_Layers_src}) +add_library(tensorarray_layers_object OBJECT ${TensorArray_Layers_src}) -target_include_directories(tensorarray_layers PRIVATE ${PROJECT_SOURCE_DIR}/src) -target_include_directories(tensorarray_layers_static PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_include_directories(tensorarray_layers_object PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_link_libraries(tensorarray_layers_object TensorArray::core_object) -target_link_libraries(tensorarray_layers TensorArray::core) -target_link_libraries(tensorarray_layers_static TensorArray::core_static) +set_property(TARGET tensorarray_layers_object PROPERTY C_STANDARD 11) +set_property(TARGET tensorarray_layers_object PROPERTY C_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_layers_object PROPERTY C_EXTENSIONS OFF) -set_property(TARGET tensorarray_layers PROPERTY C_STANDARD 11) -set_property(TARGET tensorarray_layers PROPERTY C_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_layers PROPERTY C_EXTENSIONS OFF) - -set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD 17) -set_property(TARGET tensorarray_layers PROPERTY CXX_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_layers PROPERTY CXX_EXTENSIONS OFF) - - -set_property(TARGET tensorarray_layers_static PROPERTY C_STANDARD 11) -set_property(TARGET tensorarray_layers_static PROPERTY C_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_layers_static PROPERTY C_EXTENSIONS OFF) - -set_property(TARGET tensorarray_layers_static PROPERTY CXX_STANDARD 17) -set_property(TARGET tensorarray_layers_static PROPERTY CXX_STANDARD_REQUIRED ON) -set_property(TARGET tensorarray_layers_static PROPERTY CXX_EXTENSIONS OFF) +set_property(TARGET tensorarray_layers_object PROPERTY CXX_STANDARD 17) +set_property(TARGET tensorarray_layers_object PROPERTY CXX_STANDARD_REQUIRED ON) +set_property(TARGET tensorarray_layers_object PROPERTY CXX_EXTENSIONS OFF) if(MSVC) - target_compile_definitions(tensorarray_layers PRIVATE TENSOR_ARRAY_LAYERS_EXPORTS) + target_compile_definitions(tensorarray_layers_object PRIVATE TENSOR_ARRAY_LAYERS_EXPORTS) endif() +# shared and static libraries built from the same object files +add_library(tensorarray_layers SHARED $) +add_library(tensorarray_layers_static STATIC $) + install( TARGETS tensorarray_layers EXPORT TensorArrayTargets @@ -68,3 +59,4 @@ install( add_library(TensorArray::layers ALIAS tensorarray_layers) add_library(TensorArray::layers_static ALIAS tensorarray_layers_static) +add_library(TensorArray::layers_object ALIAS tensorarray_layers_object) From b41741a3e4b761d9f4e0b2175cc1d49a2ba3b045 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 8 Aug 2025 04:50:22 +0000 Subject: [PATCH 273/281] set properties: "POSITION_INDEPENDENT_CODE" to 1 in cmake files --- cmake/ta_core_config.cmake | 4 +++- cmake/ta_layers_config.cmake | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index 0a3990b..dd45acd 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -43,7 +43,6 @@ else() add_library(tensorarray_core_object OBJECT ${TensorArray_Core_cc}) endif() - # file(MAKE_DIRECTORY "include/tensor_array/core") set_property(TARGET tensorarray_core_object PROPERTY C_STANDARD 11) @@ -54,6 +53,9 @@ set_property(TARGET tensorarray_core_object PROPERTY CXX_STANDARD 17) set_property(TARGET tensorarray_core_object PROPERTY CXX_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core_object PROPERTY CXX_EXTENSIONS OFF) +# shared libraries need PIC +set_property(TARGET tensorarray_core_object PROPERTY POSITION_INDEPENDENT_CODE 1) + # shared and static libraries built from the same object files add_library(tensorarray_core SHARED $) add_library(tensorarray_core_static STATIC $) diff --git a/cmake/ta_layers_config.cmake b/cmake/ta_layers_config.cmake index cec4a90..d36e333 100644 --- a/cmake/ta_layers_config.cmake +++ b/cmake/ta_layers_config.cmake @@ -27,6 +27,9 @@ set_property(TARGET tensorarray_layers_object PROPERTY CXX_STANDARD 17) set_property(TARGET tensorarray_layers_object PROPERTY CXX_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_layers_object PROPERTY CXX_EXTENSIONS OFF) +# shared libraries need PIC +set_property(TARGET tensorarray_layers_object PROPERTY POSITION_INDEPENDENT_CODE 1) + if(MSVC) target_compile_definitions(tensorarray_layers_object PRIVATE TENSOR_ARRAY_LAYERS_EXPORTS) endif() From f5560abf72a83e88aa97f143d77cc81962954379 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 8 Aug 2025 05:20:47 +0000 Subject: [PATCH 274/281] test --- cmake/ta_core_config.cmake | 4 ++-- cmake/ta_interp_config.cmake | 2 +- cmake/ta_layers_config.cmake | 2 +- tests/tensor-array/core/cmake/ta_core_tests.cmake | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index dd45acd..2648396 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -29,8 +29,8 @@ if(CMAKE_CUDA_COMPILER) set_property(TARGET tensorarray_core_object PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core_object PROPERTY CUDA_EXTENSIONS OFF) set_property(TARGET tensorarray_core_object PROPERTY CUDA_SEPARABLE_COMPILATION ON) - target_include_directories(tensorarray_core_object PRIVATE $<$:${CUDAToolkit_INCLUDE_DIRS}>) - target_link_libraries(tensorarray_core_object PRIVATE $<$:CUDA::cublas>) + target_include_directories(tensorarray_core_object PUBLIC $<$:${CUDAToolkit_INCLUDE_DIRS}>) + target_link_libraries(tensorarray_core_object LINK_PUBLIC $<$:CUDA::cublas>) if(MSVC) target_compile_definitions(tensorarray_core_object PRIVATE TENSOR_ARRAY_CORE_EXPORTS) endif() diff --git a/cmake/ta_interp_config.cmake b/cmake/ta_interp_config.cmake index 2415172..88b5741 100644 --- a/cmake/ta_interp_config.cmake +++ b/cmake/ta_interp_config.cmake @@ -20,7 +20,7 @@ file( add_executable(tensorarray_interpreter ${TensorArray_Interpreter_src}) target_include_directories(tensorarray_interpreter PRIVATE ${PROJECT_SOURCE_DIR}/src) -target_link_libraries(tensorarray_interpreter TensorArray::core) +target_link_libraries(tensorarray_interpreter LINK_PUBLIC TensorArray::core) set_property(TARGET tensorarray_interpreter PROPERTY C_STANDARD 11) set_property(TARGET tensorarray_interpreter PROPERTY C_STANDARD_REQUIRED ON) diff --git a/cmake/ta_layers_config.cmake b/cmake/ta_layers_config.cmake index d36e333..f571871 100644 --- a/cmake/ta_layers_config.cmake +++ b/cmake/ta_layers_config.cmake @@ -17,7 +17,7 @@ file(GLOB TensorArray_Layers_src "${PROJECT_SOURCE_DIR}/src/${TensorArray_Layers add_library(tensorarray_layers_object OBJECT ${TensorArray_Layers_src}) target_include_directories(tensorarray_layers_object PRIVATE ${PROJECT_SOURCE_DIR}/src) -target_link_libraries(tensorarray_layers_object TensorArray::core_object) +target_link_libraries(tensorarray_layers_object PUBLIC TensorArray::core_object) set_property(TARGET tensorarray_layers_object PROPERTY C_STANDARD 11) set_property(TARGET tensorarray_layers_object PROPERTY C_STANDARD_REQUIRED ON) diff --git a/tests/tensor-array/core/cmake/ta_core_tests.cmake b/tests/tensor-array/core/cmake/ta_core_tests.cmake index 781e00d..71e583e 100644 --- a/tests/tensor-array/core/cmake/ta_core_tests.cmake +++ b/tests/tensor-array/core/cmake/ta_core_tests.cmake @@ -17,7 +17,7 @@ create_test_sourcelist( add_executable(tensorarray_core_tests ${TensorArray_tests}) target_include_directories(tensorarray_core_tests PRIVATE ${PROJECT_SOURCE_DIR}/src) -target_link_libraries(tensorarray_core_tests TensorArray::core) +target_link_libraries(tensorarray_core_tests LINK_PUBLIC TensorArray::core) foreach(test ${TensorArray_tests_src}) get_filename_component(TName ${test} NAME_WE) From 807024e1ccf23839262fe2b610a5a79363700f8f Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 8 Aug 2025 05:30:55 +0000 Subject: [PATCH 275/281] test 1 --- cmake/ta_core_config.cmake | 8 ++++++-- cmake/ta_interp_config.cmake | 2 +- cmake/ta_layers_config.cmake | 4 +++- tests/tensor-array/core/cmake/ta_core_tests.cmake | 2 +- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index 2648396..fd72f5e 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -29,8 +29,7 @@ if(CMAKE_CUDA_COMPILER) set_property(TARGET tensorarray_core_object PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core_object PROPERTY CUDA_EXTENSIONS OFF) set_property(TARGET tensorarray_core_object PROPERTY CUDA_SEPARABLE_COMPILATION ON) - target_include_directories(tensorarray_core_object PUBLIC $<$:${CUDAToolkit_INCLUDE_DIRS}>) - target_link_libraries(tensorarray_core_object LINK_PUBLIC $<$:CUDA::cublas>) + target_include_directories(tensorarray_core_object PRIVATE $<$:${CUDAToolkit_INCLUDE_DIRS}>) if(MSVC) target_compile_definitions(tensorarray_core_object PRIVATE TENSOR_ARRAY_CORE_EXPORTS) endif() @@ -60,6 +59,11 @@ set_property(TARGET tensorarray_core_object PROPERTY POSITION_INDEPENDENT_CODE 1 add_library(tensorarray_core SHARED $) add_library(tensorarray_core_static STATIC $) +if(CUDAToolkit_FOUND) + target_link_libraries(tensorarray_core PRIVATE $<$:CUDA::cublas>) + target_link_libraries(tensorarray_core_static PRIVATE $<$:CUDA::cublas>) +endif() + install( TARGETS tensorarray_core EXPORT TensorArrayTargets diff --git a/cmake/ta_interp_config.cmake b/cmake/ta_interp_config.cmake index 88b5741..85b4bfd 100644 --- a/cmake/ta_interp_config.cmake +++ b/cmake/ta_interp_config.cmake @@ -20,7 +20,7 @@ file( add_executable(tensorarray_interpreter ${TensorArray_Interpreter_src}) target_include_directories(tensorarray_interpreter PRIVATE ${PROJECT_SOURCE_DIR}/src) -target_link_libraries(tensorarray_interpreter LINK_PUBLIC TensorArray::core) +target_link_libraries(tensorarray_interpreter PUBLIC TensorArray::core) set_property(TARGET tensorarray_interpreter PROPERTY C_STANDARD 11) set_property(TARGET tensorarray_interpreter PROPERTY C_STANDARD_REQUIRED ON) diff --git a/cmake/ta_layers_config.cmake b/cmake/ta_layers_config.cmake index f571871..8e5e7ff 100644 --- a/cmake/ta_layers_config.cmake +++ b/cmake/ta_layers_config.cmake @@ -17,7 +17,6 @@ file(GLOB TensorArray_Layers_src "${PROJECT_SOURCE_DIR}/src/${TensorArray_Layers add_library(tensorarray_layers_object OBJECT ${TensorArray_Layers_src}) target_include_directories(tensorarray_layers_object PRIVATE ${PROJECT_SOURCE_DIR}/src) -target_link_libraries(tensorarray_layers_object PUBLIC TensorArray::core_object) set_property(TARGET tensorarray_layers_object PROPERTY C_STANDARD 11) set_property(TARGET tensorarray_layers_object PROPERTY C_STANDARD_REQUIRED ON) @@ -38,6 +37,9 @@ endif() add_library(tensorarray_layers SHARED $) add_library(tensorarray_layers_static STATIC $) +target_link_libraries(tensorarray_layers PUBLIC TensorArray::core) +target_link_libraries(tensorarray_layers_static PUBLIC TensorArray::core_static) + install( TARGETS tensorarray_layers EXPORT TensorArrayTargets diff --git a/tests/tensor-array/core/cmake/ta_core_tests.cmake b/tests/tensor-array/core/cmake/ta_core_tests.cmake index 71e583e..da6401d 100644 --- a/tests/tensor-array/core/cmake/ta_core_tests.cmake +++ b/tests/tensor-array/core/cmake/ta_core_tests.cmake @@ -17,7 +17,7 @@ create_test_sourcelist( add_executable(tensorarray_core_tests ${TensorArray_tests}) target_include_directories(tensorarray_core_tests PRIVATE ${PROJECT_SOURCE_DIR}/src) -target_link_libraries(tensorarray_core_tests LINK_PUBLIC TensorArray::core) +target_link_libraries(tensorarray_core_tests PUBLIC TensorArray::core) foreach(test ${TensorArray_tests_src}) get_filename_component(TName ${test} NAME_WE) From 4775f7b418ddbdde7fd9756cae2fa1f7220ac1a4 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 8 Aug 2025 05:48:50 +0000 Subject: [PATCH 276/281] move "CUDA_SEPARABLE_COMPILATION" --- cmake/ta_core_config.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index fd72f5e..96727c2 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -28,7 +28,6 @@ if(CMAKE_CUDA_COMPILER) set_property(TARGET tensorarray_core_object PROPERTY CUDA_STANDARD 17) set_property(TARGET tensorarray_core_object PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core_object PROPERTY CUDA_EXTENSIONS OFF) - set_property(TARGET tensorarray_core_object PROPERTY CUDA_SEPARABLE_COMPILATION ON) target_include_directories(tensorarray_core_object PRIVATE $<$:${CUDAToolkit_INCLUDE_DIRS}>) if(MSVC) target_compile_definitions(tensorarray_core_object PRIVATE TENSOR_ARRAY_CORE_EXPORTS) @@ -60,6 +59,7 @@ add_library(tensorarray_core SHARED $) add_library(tensorarray_core_static STATIC $) if(CUDAToolkit_FOUND) + set_property(TARGET tensorarray_core PROPERTY CUDA_SEPARABLE_COMPILATION ON) target_link_libraries(tensorarray_core PRIVATE $<$:CUDA::cublas>) target_link_libraries(tensorarray_core_static PRIVATE $<$:CUDA::cublas>) endif() From f97f3f386e7919b4fba8b8fb47d139fc0101c253 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 8 Aug 2025 05:57:15 +0000 Subject: [PATCH 277/281] try to link lib --- cmake/ta_core_config.cmake | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index 96727c2..d584600 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -28,6 +28,7 @@ if(CMAKE_CUDA_COMPILER) set_property(TARGET tensorarray_core_object PROPERTY CUDA_STANDARD 17) set_property(TARGET tensorarray_core_object PROPERTY CUDA_STANDARD_REQUIRED ON) set_property(TARGET tensorarray_core_object PROPERTY CUDA_EXTENSIONS OFF) + set_property(TARGET tensorarray_core_object PROPERTY CUDA_SEPARABLE_COMPILATION ON) target_include_directories(tensorarray_core_object PRIVATE $<$:${CUDAToolkit_INCLUDE_DIRS}>) if(MSVC) target_compile_definitions(tensorarray_core_object PRIVATE TENSOR_ARRAY_CORE_EXPORTS) @@ -59,9 +60,16 @@ add_library(tensorarray_core SHARED $) add_library(tensorarray_core_static STATIC $) if(CUDAToolkit_FOUND) - set_property(TARGET tensorarray_core PROPERTY CUDA_SEPARABLE_COMPILATION ON) - target_link_libraries(tensorarray_core PRIVATE $<$:CUDA::cublas>) - target_link_libraries(tensorarray_core_static PRIVATE $<$:CUDA::cublas>) + target_link_libraries( + tensorarray_core + PRIVATE $<$:CUDA::cudart> + PRIVATE $<$:CUDA::cublas> + ) + target_link_libraries( + tensorarray_core_static + PRIVATE $<$:CUDA::cudart_static> + PRIVATE $<$:CUDA::cublas_static> + ) endif() install( From bb5146a040ca8bf57319d87c38fea33394cf15a4 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 8 Aug 2025 06:27:03 +0000 Subject: [PATCH 278/281] test --- cmake/ta_core_config.cmake | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index d584600..a2e9e19 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -60,14 +60,13 @@ add_library(tensorarray_core SHARED $) add_library(tensorarray_core_static STATIC $) if(CUDAToolkit_FOUND) + set_property(TARGET tensorarray_core PROPERTY CUDA_SEPARABLE_COMPILATION ON) target_link_libraries( tensorarray_core - PRIVATE $<$:CUDA::cudart> PRIVATE $<$:CUDA::cublas> ) target_link_libraries( tensorarray_core_static - PRIVATE $<$:CUDA::cudart_static> PRIVATE $<$:CUDA::cublas_static> ) endif() From 54e72eba45669fcd994ce9ea2102edcb6cea7402 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 8 Aug 2025 06:32:18 +0000 Subject: [PATCH 279/281] update static lib path --- cmake/ta_core_config.cmake | 4 ++-- cmake/ta_interp_config.cmake | 2 +- cmake/ta_layers_config.cmake | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index a2e9e19..2a1415b 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -78,7 +78,7 @@ install( COMPONENT Runtime LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}/tensor-array" COMPONENT Runtime - ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}/${TensorArray_Core_Dir}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}/tensor-array" COMPONENT Development ) @@ -89,7 +89,7 @@ install( COMPONENT Runtime LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}/tensor-array" COMPONENT Runtime - ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}/${TensorArray_Core_Dir}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}/tensor-array" COMPONENT Development ) diff --git a/cmake/ta_interp_config.cmake b/cmake/ta_interp_config.cmake index 85b4bfd..9f43e0d 100644 --- a/cmake/ta_interp_config.cmake +++ b/cmake/ta_interp_config.cmake @@ -37,7 +37,7 @@ install( COMPONENT Runtime LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}/tensor-array" COMPONENT Runtime - ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}/${TensorArray_Interpreter_Dir}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}/tensor-array" COMPONENT Development ) #[[ diff --git a/cmake/ta_layers_config.cmake b/cmake/ta_layers_config.cmake index 8e5e7ff..bad535b 100644 --- a/cmake/ta_layers_config.cmake +++ b/cmake/ta_layers_config.cmake @@ -47,7 +47,7 @@ install( COMPONENT Runtime LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}/tensor-array" COMPONENT Runtime - ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}/${TensorArray_Layers_Dir}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}/tensor-array" COMPONENT Development ) @@ -58,7 +58,7 @@ install( COMPONENT Runtime LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}/tensor-array" COMPONENT Runtime - ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}/${TensorArray_Layers_Dir}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}/tensor-array" COMPONENT Development ) From a0b43416be1230ed862513c62659180cb7768a38 Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 8 Aug 2025 06:41:37 +0000 Subject: [PATCH 280/281] test 2 --- cmake/ta_core_config.cmake | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cmake/ta_core_config.cmake b/cmake/ta_core_config.cmake index 2a1415b..e1a2c2a 100644 --- a/cmake/ta_core_config.cmake +++ b/cmake/ta_core_config.cmake @@ -65,10 +65,6 @@ if(CUDAToolkit_FOUND) tensorarray_core PRIVATE $<$:CUDA::cublas> ) - target_link_libraries( - tensorarray_core_static - PRIVATE $<$:CUDA::cublas_static> - ) endif() install( From bdae3242982320104ca720599b4f6e0d1b24addf Mon Sep 17 00:00:00 2001 From: Noob <63889503+BigNoobWasTaken@users.noreply.github.com> Date: Fri, 8 Aug 2025 06:42:29 +0000 Subject: [PATCH 281/281] test 3 --- cmake/ta_layers_config.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/cmake/ta_layers_config.cmake b/cmake/ta_layers_config.cmake index bad535b..d0961a9 100644 --- a/cmake/ta_layers_config.cmake +++ b/cmake/ta_layers_config.cmake @@ -38,7 +38,6 @@ add_library(tensorarray_layers SHARED $) target_link_libraries(tensorarray_layers PUBLIC TensorArray::core) -target_link_libraries(tensorarray_layers_static PUBLIC TensorArray::core_static) install( TARGETS tensorarray_layers