diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000..c698441f73 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,6 @@ +# https://docs.gitlab.com/user/project/merge_requests/changes#collapse-generated-files +# https://github.com/github-linguist/linguist/blob/-/docs/overrides.md#generated-code +# https://git-scm.com/docs/gitattributes#_defining_macro_attributes +[attr]generated gitlab-generated linguist-generated + +/internal/collector/generated/*.json generated diff --git a/.github/actions/awk-matcher.json b/.github/actions/awk-matcher.json deleted file mode 100644 index 852a723577..0000000000 --- a/.github/actions/awk-matcher.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "problemMatcher": [ - { - "owner": "awk", - "pattern": [ - { - "regexp": "^([^:]+):([^ ]+) (([^:]+):.*)$", - "file": 1, "line": 2, "message": 3, "severity": 4 - } - ] - } - ] -} diff --git a/.github/actions/k3d/action.yaml b/.github/actions/k3d/action.yaml index 395d5f1116..b6e6ed5c2b 100644 --- a/.github/actions/k3d/action.yaml +++ b/.github/actions/k3d/action.yaml @@ -16,7 +16,7 @@ inputs: description: > Each line is the name of an image to fetch onto all Kubernetes nodes prefetch-timeout: - default: 90s + default: 3m required: true description: > Amount of time to wait for images to be fetched diff --git a/.github/actions/trivy/action.yaml b/.github/actions/trivy/action.yaml index d5d51e0441..bcc67421cb 100644 --- a/.github/actions/trivy/action.yaml +++ b/.github/actions/trivy/action.yaml @@ -1,39 +1,68 @@ +# Copyright 2024 - 2025 Crunchy Data Solutions, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# schema documentation: https://docs.github.com/actions/sharing-automations/creating-actions/metadata-syntax-for-github-actions +# yaml-language-server: $schema=https://json.schemastore.org/github-action.json + name: Trivy description: Scan this project using Trivy # The Trivy team maintains an action, but it has trouble caching its vulnerability data: # https://github.com/aquasecurity/trivy-action/issues/389 # +# 1. It caches vulnerability data once per calendar day, despite Trivy wanting +# to download more frequently than that. +# 2. When it fails to download the data, it fails the workflow *and* caches +# the incomplete data. +# 3. When (1) and (2) coincide, every following run that day *must* update the data, +# producing more opportunities for (2) and more failed workflows. +# # The action below uses any recent cache matching `cache-prefix` and calculates a cache key -# derived from the data Trivy downloads. +# derived from the data Trivy downloads. An older database is better than no scans at all. +# When a run successfully updates the data, that data is cached and available to other runs. inputs: cache: default: restore,success,use description: >- What Trivy data to cache; one or more of restore, save, success, or use. + The value "use" instructs Trivy to read and write to its cache. + The value "restore" loads the Trivy cache from GitHub. + The value "success" saves the Trivy cache to GitHub when Trivy succeeds. + The value "save" saves the Trivy cache to GitHub regardless of Trivy. database: default: update description: >- How Trivy should handle its data; one of update or skip. + The value "skip" fetches no Trivy data at all. setup: - default: v0.57.1,cache + default: v0.65.0,cache description: >- How to install Trivy; one or more of version, none, or cache. + The value "none" does not install Trivy at all. cache-directory: default: ${{ github.workspace }}/.cache/trivy + description: >- + Directory where Trivy should store its data cache-prefix: default: cache-trivy + description: >- + Name (key) where Trivy data should be stored in the GitHub cache scan-target: default: . + description: >- + What Trivy should scan scan-type: - default: filesystem + default: repository + description: >- + How Trivy should interpret scan-target; one of filesystem, image, repository, or sbom. runs: using: composite @@ -50,11 +79,12 @@ runs: "setup=\(split("[,\\s]+"; "") - [""])", "version=\(split("[,\\s]+"; "") | max_by(split("[v.]"; "") | map(tonumber?)))" ' - ) | tee --append $GITHUB_OUTPUT + ) | tee --append "${GITHUB_OUTPUT}" # Install Trivy as requested. + # NOTE: `setup-trivy` can download a "latest" version but cannot cache it. - if: ${{ ! contains(fromJSON(steps.parsed.outputs.setup), 'none') }} - uses: aquasecurity/setup-trivy@v0.2.2 + uses: aquasecurity/setup-trivy@v0.2.4 with: cache: ${{ contains(fromJSON(steps.parsed.outputs.setup), 'cache') }} version: ${{ steps.parsed.outputs.version }} @@ -75,12 +105,13 @@ runs: TRIVY_SKIP_CHECK_UPDATE: ${{ inputs.database == 'skip' }} TRIVY_SKIP_DB_UPDATE: ${{ inputs.database == 'skip' }} TRIVY_SKIP_JAVA_DB_UPDATE: ${{ inputs.database == 'skip' }} + TRIVY_SKIP_VEX_REPO_UPDATE: ${{ inputs.database == 'skip' }} run: | # Run Trivy trivy '${{ inputs.scan-type }}' '${{ inputs.scan-target }}' || result=$? checksum=$([[ -z "${TRIVY_CACHE_DIR}" ]] || cat "${TRIVY_CACHE_DIR}/"*/metadata.json | sha256sum) - echo 'cache-key=${{ inputs.cache-prefix }}-'"${checksum%% *}" >> $GITHUB_OUTPUT + echo 'cache-key=${{ inputs.cache-prefix }}-'"${checksum%% *}" >> "${GITHUB_OUTPUT}" exit "${result-0}" diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 639a059edc..8a16fc8d6f 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,16 +1,63 @@ -# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file -# https://docs.github.com/code-security/dependabot/dependabot-version-updates/customizing-dependency-updates +# Copyright 2024 - 2025 Crunchy Data Solutions, Inc. # -# See: https://www.github.com/dependabot/dependabot-core/issues/4605 ---- +# SPDX-License-Identifier: Apache-2.0 +# +# documentation: https://docs.github.com/code-security/dependabot/dependabot-version-updates +# schema documentation: https://docs.github.com/code-security/dependabot/working-with-dependabot/dependabot-options-reference # yaml-language-server: $schema=https://json.schemastore.org/dependabot-2.0.json +# +# Dependabot allows only one schedule per package-ecosystem, directory, and target-branch. +# Configurations that lack a "target-branch" field also affect security updates. +# +# There is a hack to have *two* schedules: https://github.com/dependabot/dependabot-core/issues/1778#issuecomment-1988140219 +--- version: 2 + updates: - package-ecosystem: github-actions - directory: / + directories: + # "/" is a special case that includes ".github/workflows/*" + - '/' + - '.github/actions/*' + registries: '*' schedule: interval: weekly day: tuesday + labels: + - dependencies groups: - all-github-actions: + # Group security updates into one pull request + action-vulnerabilities: + applies-to: security-updates + patterns: ['*'] + + # Group version updates into one pull request + github-actions: + applies-to: version-updates + patterns: ['*'] + + - package-ecosystem: gomod + directory: '/' + registries: '*' + schedule: + interval: weekly + day: wednesday + labels: + - dependencies + groups: + # Group security updates into one pull request + go-vulnerabilities: + applies-to: security-updates + patterns: ['*'] + + # Group Kubernetes and OpenTelemetry version updates into separate pull requests + kubernetes: + patterns: ['k8s.io/*', 'sigs.k8s.io/*'] + opentelemetry: + patterns: ['go.opentelemetry.io/*'] + go-dependencies: patterns: ['*'] + exclude-patterns: + - 'k8s.io/*' + - 'sigs.k8s.io/*' + - 'go.opentelemetry.io/*' diff --git a/.github/workflows/codeql-analysis.yaml b/.github/workflows/codeql-analysis.yaml index 78079bd4bc..16a6d85e12 100644 --- a/.github/workflows/codeql-analysis.yaml +++ b/.github/workflows/codeql-analysis.yaml @@ -3,17 +3,14 @@ name: CodeQL on: pull_request: + branches: + - REL_5_8 push: branches: - - main + - REL_5_8 schedule: - cron: '10 18 * * 2' -env: - # Use the Go toolchain installed by setup-go - # https://github.com/actions/setup-go/issues/457 - GOTOOLCHAIN: local - jobs: analyze: if: ${{ github.repository == 'CrunchyData/postgres-operator' }} @@ -24,17 +21,17 @@ jobs: runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 + - uses: actions/checkout@v5 + - uses: actions/setup-go@v6 with: { go-version: stable } - name: Initialize CodeQL - uses: github/codeql-action/init@v3 + uses: github/codeql-action/init@v4 with: { languages: go } - name: Autobuild # This action calls `make` which runs our "help" target. - uses: github/codeql-action/autobuild@v3 + uses: github/codeql-action/autobuild@v4 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v3 + uses: github/codeql-action/analyze@v4 diff --git a/.github/workflows/govulncheck.yaml b/.github/workflows/govulncheck.yaml index df81b90e53..6721104401 100644 --- a/.github/workflows/govulncheck.yaml +++ b/.github/workflows/govulncheck.yaml @@ -3,13 +3,17 @@ name: govulncheck on: pull_request: + branches: + - REL_5_8 push: branches: - - main + - REL_5_8 env: # Use the Go toolchain installed by setup-go # https://github.com/actions/setup-go/issues/457 + # + # TODO(govulncheck): Remove when "golang/govulncheck-action" uses "actions/setup-go" v6 or newer GOTOOLCHAIN: local jobs: @@ -20,7 +24,7 @@ jobs: runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 # Install Go and produce a SARIF report. This fails only when the tool is # unable to scan. @@ -35,7 +39,7 @@ jobs: # succeed or fail according to branch protection rules. # - https://docs.github.com/en/code-security/code-scanning - name: Upload results to GitHub - uses: github/codeql-action/upload-sarif@v3 + uses: github/codeql-action/upload-sarif@v4 with: sarif_file: 'govulncheck-results.sarif' diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index fa84193d09..230e2a7fae 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -2,11 +2,8 @@ name: Linters on: pull_request: - -env: - # Use the Go toolchain installed by setup-go - # https://github.com/actions/setup-go/issues/457 - GOTOOLCHAIN: local + branches: + - REL_5_8 jobs: golangci-lint: @@ -15,11 +12,11 @@ jobs: contents: read checks: write steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 + - uses: actions/checkout@v5 + - uses: actions/setup-go@v6 with: { go-version: stable } - - uses: golangci/golangci-lint-action@v6 + - uses: golangci/golangci-lint-action@v9 with: version: latest args: --timeout=5m @@ -28,12 +25,8 @@ jobs: # exits zero to ensure it does not fail the pull request check. - name: Count non-blocking issues run: | - golangci-lint run --config .golangci.next.yaml \ - --issues-exit-code 0 \ - --max-issues-per-linter 0 \ - --max-same-issues 0 \ - --out-format json | - jq --sort-keys 'reduce .Issues[] as $i ({}; .[$i.FromLinter] += 1)' | - awk >> "${GITHUB_STEP_SUMMARY}" ' - NR == 1 { print "```json" } { print } END { if (NR > 0) print "```" } - ' || true + golangci-lint run --config .golangci.next.yaml --show-stats >> "${GITHUB_STEP_SUMMARY}" \ + --max-issues-per-linter=0 \ + --max-same-issues=0 \ + --uniq-by-line=0 \ + --output.text.path=/dev/null ||: diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f4a8ba0e39..3e0629dd5f 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -2,27 +2,25 @@ name: Tests on: pull_request: + branches: + - REL_5_8 push: branches: - - main - -env: - # Use the Go toolchain installed by setup-go - # https://github.com/actions/setup-go/issues/457 - GOTOOLCHAIN: local + - REL_5_8 jobs: go-test: runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 + - uses: actions/checkout@v5 + - uses: actions/setup-go@v6 with: { go-version: stable } - - run: make check - - run: make check-generate - name: Ensure go.mod is tidy - run: go mod tidy && git diff --exit-code -- go.mod + run: go mod tidy --diff + - name: Ensure generated files are committed + run: make check-generate + - run: make check kubernetes-api: runs-on: ubuntu-24.04 @@ -32,9 +30,10 @@ jobs: matrix: kubernetes: ['default'] steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 + - uses: actions/checkout@v5 + - uses: actions/setup-go@v6 with: { go-version: stable } + - run: go mod download - run: ENVTEST_K8S_VERSION="${KUBERNETES#default}" make check-envtest env: @@ -43,7 +42,7 @@ jobs: # Upload coverage to GitHub - run: gzip envtest.coverage - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v5 with: name: "~coverage~kubernetes-api=${{ matrix.kubernetes }}" path: envtest.coverage.gz @@ -56,10 +55,10 @@ jobs: strategy: fail-fast: false matrix: - kubernetes: [v1.31, v1.28] + kubernetes: [v1.30, v1.34] steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 + - uses: actions/checkout@v5 + - uses: actions/setup-go@v6 with: { go-version: stable } - name: Start k3s @@ -67,9 +66,9 @@ jobs: with: k3s-channel: "${{ matrix.kubernetes }}" prefetch-images: | - registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi8-2.54.1-1 - registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi8-1.23-4 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-16.8-0 + registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.56.0-2547 + registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2547 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.11-2547 - run: make createnamespaces check-envtest-existing env: @@ -78,42 +77,37 @@ jobs: # Upload coverage to GitHub - run: gzip envtest-existing.coverage - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v5 with: name: "~coverage~kubernetes-k3d=${{ matrix.kubernetes }}" path: envtest-existing.coverage.gz retention-days: 1 - kuttl-k3d: + e2e-k3d-kuttl: runs-on: ubuntu-24.04 needs: [go-test] strategy: fail-fast: false matrix: - kubernetes: [v1.32, v1.31, v1.30, v1.29, v1.28] + kubernetes: [v1.30, v1.34] steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 + - uses: actions/checkout@v5 + - uses: actions/setup-go@v6 with: { go-version: stable } - name: Start k3s uses: ./.github/actions/k3d with: k3s-channel: "${{ matrix.kubernetes }}" + prefetch-timeout: 5m prefetch-images: | - registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi8-4.30-35 - registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi8-2.54.1-1 - registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi8-1.23-4 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:latest - registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:latest - registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-16.8-0 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-16.8-3.3-0 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-16.8-3.4-0 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-17.4-0 - registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-17.4-3.4-0 - - run: go mod download - - name: Build executable - run: PGO_VERSION='${{ github.sha }}' make build-postgres-operator + registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.56.0-2547 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.7-2547 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.11-2547 + registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2547 + registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.18.1-2547 + registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-18.1-2547 + registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi9-9.8-2547 - name: Get pgMonitor files. run: make get-pgmonitor @@ -121,6 +115,10 @@ jobs: PGMONITOR_DIR: "${{ github.workspace }}/hack/tools/pgmonitor" QUERIES_CONFIG_DIR: "${{ github.workspace }}/hack/tools/queries" + - run: go mod download + - name: Build executable + run: PGO_VERSION='${{ github.sha }}' make build-postgres-operator + # Start a Docker container with the working directory mounted. - name: Start PGO run: | @@ -129,28 +127,19 @@ jobs: hack/create-kubeconfig.sh postgres-operator pgo docker run --detach --network host --read-only \ --volume "$(pwd):/mnt" --workdir '/mnt' --env 'PATH=/mnt/bin' \ - --env 'CHECK_FOR_UPGRADES=false' \ --env 'QUERIES_CONFIG_DIR=/mnt/hack/tools/queries' \ --env 'KUBECONFIG=hack/.kube/postgres-operator/pgo' \ - --env 'RELATED_IMAGE_PGADMIN=registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi8-4.30-35' \ - --env 'RELATED_IMAGE_PGBACKREST=registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi8-2.54.1-1' \ - --env 'RELATED_IMAGE_PGBOUNCER=registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi8-1.23-4' \ - --env 'RELATED_IMAGE_PGEXPORTER=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:latest' \ - --env 'RELATED_IMAGE_PGUPGRADE=registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:latest' \ - --env 'RELATED_IMAGE_POSTGRES_16=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-16.8-0' \ - --env 'RELATED_IMAGE_POSTGRES_16_GIS_3.3=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-16.8-3.3-0' \ - --env 'RELATED_IMAGE_POSTGRES_16_GIS_3.4=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-16.8-3.4-0' \ - --env 'RELATED_IMAGE_POSTGRES_17=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-17.4-0' \ - --env 'RELATED_IMAGE_POSTGRES_17_GIS_3.4=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-17.4-3.4-0' \ - --env 'RELATED_IMAGE_STANDALONE_PGADMIN=registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi8-8.14-2' \ - --env 'RELATED_IMAGE_COLLECTOR=ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.119.0' \ - --env 'PGO_FEATURE_GATES=TablespaceVolumes=true' \ + --env 'RELATED_IMAGE_PGBACKREST=registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.56.0-2547' \ + --env 'RELATED_IMAGE_PGBOUNCER=registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2547' \ + --env 'RELATED_IMAGE_PGEXPORTER=registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.18.1-2547' \ + --env 'RELATED_IMAGE_PGUPGRADE=registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-18.1-2547' \ + --env 'RELATED_IMAGE_POSTGRES_16=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.11-2547' \ + --env 'RELATED_IMAGE_POSTGRES_17=registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.7-2547' \ + --env 'RELATED_IMAGE_STANDALONE_PGADMIN=registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi9-9.8-2547' \ + --env 'RELATED_IMAGE_COLLECTOR=registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.5-0' \ + --env 'PGO_FEATURE_GATES=TablespaceVolumes=true,OpenTelemetryLogs=true,OpenTelemetryMetrics=true' \ --name 'postgres-operator' ubuntu \ postgres-operator - - name: Install kuttl - run: | - curl -Lo /usr/local/bin/kubectl-kuttl https://github.com/kudobuilder/kuttl/releases/download/v0.13.0/kubectl-kuttl_0.13.0_linux_x86_64 - chmod +x /usr/local/bin/kubectl-kuttl - run: make generate-kuttl env: @@ -158,14 +147,13 @@ jobs: KUTTL_PG_UPGRADE_TO_VERSION: '17' KUTTL_PG_VERSION: '16' KUTTL_POSTGIS_VERSION: '3.4' - KUTTL_PSQL_IMAGE: 'registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-16.6-2' + KUTTL_PSQL_IMAGE: 'registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.11-2547' - run: | make check-kuttl && exit failed=$? echo '::group::PGO logs'; docker logs 'postgres-operator'; echo '::endgroup::' exit $failed - env: - KUTTL_TEST: kubectl-kuttl test + - name: Stop PGO run: docker stop 'postgres-operator' || true @@ -175,11 +163,12 @@ jobs: needs: - kubernetes-api - kubernetes-k3d + - e2e-k3d-kuttl steps: - - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 + - uses: actions/checkout@v5 + - uses: actions/setup-go@v6 with: { go-version: stable } - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v6 with: { path: download } # Combine the coverage profiles by taking the mode line from any one file @@ -203,7 +192,7 @@ jobs: # Upload coverage to GitHub - run: gzip total-coverage.html - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v5 with: name: coverage-report=html path: total-coverage.html.gz diff --git a/.github/workflows/trivy.yaml b/.github/workflows/trivy.yaml index de07b96c08..e3fd63b2ee 100644 --- a/.github/workflows/trivy.yaml +++ b/.github/workflows/trivy.yaml @@ -3,14 +3,16 @@ name: Trivy on: pull_request: + branches: + - REL_5_8 push: branches: - - main + - REL_5_8 env: - # Use the Go toolchain installed by setup-go - # https://github.com/actions/setup-go/issues/457 - GOTOOLCHAIN: local + # Use the committed Trivy configuration files. + TRIVY_IGNOREFILE: .trivyignore.yaml + TRIVY_SECRET_CONFIG: trivy-secret.yaml jobs: cache: @@ -21,7 +23,7 @@ jobs: runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Download Trivy uses: ./.github/actions/trivy env: @@ -29,6 +31,9 @@ jobs: TRIVY_DOWNLOAD_DB_ONLY: true TRIVY_NO_PROGRESS: true TRIVY_SCANNERS: license,secret,vuln + with: + cache: restore,success,use + database: update licenses: # Run this job after the cache job regardless of its success or failure. @@ -38,14 +43,14 @@ jobs: runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 # Trivy needs a populated Go module cache to detect Go module licenses. - - uses: actions/setup-go@v5 + - uses: actions/setup-go@v6 with: { go-version: stable } - run: go mod download - # Report success only when detected licenses are listed in [/trivy.yaml]. + # Report success only when detected licenses are listed in [.trivyignore.yaml]. - name: Scan licenses uses: ./.github/actions/trivy env: @@ -56,6 +61,26 @@ jobs: cache: restore,use database: skip + secrets: + # Run this job after the cache job regardless of its success or failure. + needs: [cache] + if: >- + ${{ !cancelled() }} + + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v5 + + # Report success only when detected secrets are listed in [.trivyignore.yaml]. + - name: Scan secrets + uses: ./.github/actions/trivy + env: + TRIVY_EXIT_CODE: 1 + TRIVY_SCANNERS: secret + with: + cache: restore,use + database: skip + vulnerabilities: # Run this job after the cache job regardless of its success or failure. needs: [cache] @@ -66,7 +91,7 @@ jobs: runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 # Print any detected secrets or vulnerabilities to the workflow log for # human consumption. This step fails only when Trivy is unable to scan. @@ -97,6 +122,6 @@ jobs: # succeed or fail according to branch protection rules. # - https://docs.github.com/en/code-security/code-scanning - name: Upload results to GitHub - uses: github/codeql-action/upload-sarif@v3 + uses: github/codeql-action/upload-sarif@v4 with: sarif_file: 'trivy-results.sarif' diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000..ce00e578ef --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,218 @@ +# Copyright Crunchy Data Solutions, Inc. All rights reserved. +# +# schema-documentation: https://docs.gitlab.com/ci/yaml +# yaml-language-server: $schema=https://gitlab.com/gitlab-org/gitlab/-/raw/master/app/assets/javascripts/editor/schema/ci.json + +spec: + inputs: + + # https://go.dev/doc/install/source#environment + architectures: + type: array + default: ['amd64','arm64'] + description: > + The CPU architectures on which to run tests + + # https://docs.gitlab.com/ci/yaml#artifactsexpire_in + retention: + type: string + default: 2d # Enough time to find and address MR failures the following day + description: > + How long to keep reports +--- + +# https://docs.gitlab.com/ci/yaml/workflow +workflow: + rules: + - if: >- + ($CI_PIPELINE_SOURCE == "merge_request_event") || + ($CI_PIPELINE_SOURCE == "schedule") || + ($CI_PIPELINE_SOURCE == "web") + +include: + - component: ${CI_SERVER_FQDN}/containers/gitlab/check-directory-secrets@main + inputs: + job-name: must-not-commit-secrets + job-stage: build + trivy-ignore: .trivyignore.yaml + +variables: + # https://docs.gitlab.com/runner/configuration/feature-flags + # Show the duration of individual script items in the job log. + FF_SCRIPT_SECTIONS: 'true' + +# This uses a specific minor version of golangci-lint to ensure new code conforms +# to the rules we set when this release branch was cut. We do not want new rules +# suggesting sweeping changes to our release branches. +# +# NOTE(2025-04): Some versions of golangci-lint eat memory until they are killed by Linux. +# > Ops Team: +# > this container was hanging around even after the ci job died +# > `golangci-lint run` was using ~240GB of RAM and caused the system to swap +# +# | | go1.21.13 | go1.22.12 | go1.23.8 | go1.24.2 | +# | golangci-lint@v1.54.2 | typecheck | typecheck | panic | typecheck | +# | golangci-lint@v1.55.2 | typecheck | typecheck | panic | typecheck | +# | golangci-lint@v1.56.2 | killed | killed | panic | typecheck | +# | golangci-lint@v1.57.2 | killed | killed | panic | typecheck | +# | golangci-lint@v1.58.2 | killed | killed | panic | typecheck | +# | golangci-lint@v1.59.1 | killed | killed | panic | typecheck | +# | golangci-lint@v1.60.3 | go1.22.1 | go1.23.0 | pass | typecheck | +# | golangci-lint@v1.61.0 | go1.22.1 | go1.23.0 | pass | typecheck | +# | golangci-lint@v1.62.2 | go1.22.1 | go1.23.0 | pass | recvcheck | +# | golangci-lint@v1.63.4 | go1.22.1 | go1.23.0 | pass | pass | +# | golangci-lint@v1.64.8 | go1.23.0 | go1.23.0 | pass | pass | +golang-lint: + stage: build + needs: [] + tags: ['image=container'] + image: '${CI_REGISTRY}/containers/gitlab/go-toolset-ubi9' + script: + # Help Git understand the file permissions here. + # > fatal: detected dubious ownership in repository + - git config --global --add safe.directory "$(pwd)" + + # Download golangci-lint and log its version. + - |- + TOOL='github.com/golangci/golangci-lint/cmd/golangci-lint@v1.64' + go run "${TOOL}" version + + # Produce a report for the GitLab UI. This only fails when the tool crashes. + - >- + go run "${TOOL}" run + --concurrency 2 + --timeout 5m + --issues-exit-code 0 + --max-issues-per-linter 0 + --max-same-issues 0 + --out-format junit-xml-extended > golangci-lint.junit.xml + + # Fail the job if there are any issues found and print a handful to the log. + - >- + go run "${TOOL}" run + --concurrency 2 + --timeout 5m + --verbose + + # Send the report to GitLab. + artifacts: + expire_in: '$[[ inputs.retention ]]' + reports: + junit: golangci-lint.junit.xml + +must-commit-generated: + stage: build + needs: [] + tags: ['image=container'] + image: '${CI_REGISTRY}/containers/gitlab/go-toolset-ubi9' + script: + # Help Git understand the file permissions here. + # > fatal: detected dubious ownership in repository + - git config --global --add safe.directory "$(pwd)" + - make check-generate + +# This uses the latest version of Go we have internally. +go-test: + stage: test + needs: + - job: must-commit-generated + tags: ['image=container','cpu=${TARGET_ARCHITECTURE}'] + image: '${CI_REGISTRY}/containers/gitlab/go-toolset-ubi9' + parallel: + matrix: + - TARGET_ARCHITECTURE: $[[ inputs.architectures ]] + script: + # Help Git understand the file permissions here. + # > fatal: detected dubious ownership in repository + - git config --global --add safe.directory "$(pwd)" + + # Tidy the file and fail if it changed. + - go mod tidy && git diff --exit-code -- go.mod + - go mod download + + # Run the fast/unit tests first. Failure here fails the job. + - >- + make check + GO_TEST='go run gotest.tools/gotestsum@latest --' + GOTESTSUM_JUNITFILE="make-check-${TARGET_ARCHITECTURE}.junit.xml" + + # Run the entire test suite using a local Kubernetes API. + - >- + make check-envtest + ENVTEST_K8S_VERSION='1.32' + GO_TEST='go run gotest.tools/gotestsum@latest --' + GOTESTSUM_JUNITFILE="make-check-envtest-${TARGET_ARCHITECTURE}.junit.xml" + + # Send the reports to GitLab. + artifacts: + expire_in: '$[[ inputs.retention ]]' + reports: + junit: '*.junit.xml' + +# https://go.dev/blog/govulncheck +govulncheck: + stage: test + needs: [] + rules: + # Run this job during scheduled pipelines and merge requests that change dependencies. + - changes: ['go.mod'] + + tags: ['image=container','cpu=${TARGET_ARCHITECTURE}'] + image: '${CI_REGISTRY}/containers/gitlab/go-toolset-ubi9' + parallel: + matrix: + - TARGET_ARCHITECTURE: $[[ inputs.architectures ]] + script: + # Download govulncheck and log its version. + - |- + TOOL='golang.org/x/vuln/cmd/govulncheck@latest' + go run "${TOOL}" --version + + # Print any detected vulnerabilities to the log. + # This fails the job when it detects a vulnerability in called code. + - go run "${TOOL}" --format text --show verbose ./... + +# https://trivy.dev/latest/ecosystem/cicd +trivy: + stage: test + needs: [] + rules: + # Run this job during scheduled pipelines and merge requests that change dependencies. + - changes: ['go.mod'] + + tags: ['image=container'] + image: '${CI_REGISTRY}/containers/gitlab/go-toolset-ubi9' + script: + # Help Git understand the file permissions here. + # > fatal: detected dubious ownership in repository + - git config --global --add safe.directory "$(pwd)" + + # Download Trivy and log its version. + - |- + VERSION=$(go list -m -f '{{.Version}}' github.com/aquasecurity/trivy@latest) + git clone --config 'advice.detachedHead=no' --depth 1 --branch "${VERSION}" --sparse \ + 'https://github.com/aquasecurity/trivy.git' \ + '.gitlab-remotes/aquasecurity-trivy' + ( + cd '.gitlab-remotes/aquasecurity-trivy' + git sparse-checkout set 'contrib' + bash 'contrib/install.sh' -b "${HOME}/bin" "${VERSION}" + ) + + # Generate a report and fail when there are issues with dependencies. + # Trivy needs a populated Go module cache to detect Go module licenses. + - go mod download + - >- + trivy repository . --exit-code 1 --skip-dirs .gitlab-remotes + --scanners license,vuln + --ignore-unfixed + --no-progress + --format template + --template '@.gitlab-remotes/aquasecurity-trivy/contrib/junit.tpl' + --output 'trivy.junit.xml' + + # Send the report to GitLab. + artifacts: + expire_in: '$[[ inputs.retention ]]' + reports: + junit: 'trivy.junit.xml' diff --git a/.golangci.next.yaml b/.golangci.next.yaml index 6b76d7b1d2..2aa389e841 100644 --- a/.golangci.next.yaml +++ b/.golangci.next.yaml @@ -4,39 +4,95 @@ # Rules that should be enforced immediately belong in [.golangci.yaml]. # # Both files are used by [.github/workflows/lint.yaml]. +version: "2" +# https://golangci-lint.run/usage/linters linters: - disable-all: true - enable: - - contextcheck - - err113 - - gocritic - - godot - - godox - - gofumpt - - gosec # exclude-use-default - - nilnil + default: all + disable: + - asasalint + - asciicheck + - bidichk + - bodyclose + - copyloopvar + - depguard + - dupword + - durationcheck + - errchkjson + - errname + - errorlint + - exhaustive + - exptostd + - fatcontext + - forbidigo + - ginkgolinter + - gocheckcompilerdirectives + - gochecksumtype + - goheader + - gomoddirectives + - gomodguard + - goprintffuncname + - gosmopolitan + - grouper + - iface + - importas + - interfacebloat + - intrange + - loggercheck + - makezero + - mirror + - misspell + - musttag + - nilerr + - nilnesserr + - noctx - nolintlint - - predeclared - - revive - - staticcheck # exclude-use-default - - tenv - - thelper - - tparallel + - nosprintfhostport + - prealloc + - promlinter + - protogetter + - reassign + - recvcheck + - rowserrcheck + - sloglint + - spancheck + - sqlclosecheck + - tagalign + - testifylint + - unconvert + - unparam + - usestdlibvars + - usetesting - wastedassign + - wsl + - zerologlint -issues: - exclude-rules: - # We call external linters when they are installed: Flake8, ShellCheck, etc. - - linters: [gosec] - path: '_test[.]go$' - text: 'G204: Subprocess launched with variable' + settings: + thelper: + # https://github.com/kulti/thelper/issues/27 + tb: { begin: true, first: true } + test: { begin: true, first: true, name: true } + + exclusions: + warn-unused: true + # Ignore built-in exclusions + presets: [] + rules: + # We call external linters when they are installed: Flake8, ShellCheck, etc. + - linters: [gosec] + path: '_test[.]go$' + text: 'G204: Subprocess launched with variable' - # https://github.com/golangci/golangci-lint/issues/2239 - exclude-use-default: false +# https://golangci-lint.run/usage/formatters +formatters: + enable: + - gofumpt + +issues: + # Fix only when requested + fix: false -linters-settings: - thelper: - # https://github.com/kulti/thelper/issues/27 - tb: { begin: true, first: true } - test: { begin: true, first: true, name: true } + # Show all issues at once + max-issues-per-linter: 0 + max-same-issues: 0 + uniq-by-line: false diff --git a/.golangci.yaml b/.golangci.yaml index da19e26976..55a54549f6 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -1,121 +1,201 @@ # https://golangci-lint.run/usage/configuration/ +version: "2" +# https://golangci-lint.run/usage/linters linters: - disable: - - contextcheck - - gci - - gofumpt + default: standard enable: + - asasalint + - asciicheck + - bidichk + - bodyclose + - copyloopvar - depguard + - dupword + - durationcheck + - errchkjson + - errname + - errorlint + - exhaustive + - exptostd + - fatcontext + - forbidigo + - ginkgolinter + - gocheckcompilerdirectives + - gochecksumtype - goheader + - gomoddirectives - gomodguard - - gosimple + - goprintffuncname + - gosec + - gosmopolitan + - grouper + - iface - importas + - interfacebloat + - intrange + - loggercheck + - makezero + - mirror - misspell + - musttag + - nilerr + - nilnesserr + - noctx + - nolintlint + - nosprintfhostport + - prealloc + - promlinter + - protogetter + - reassign + - recvcheck + - rowserrcheck + - sloglint + - spancheck + - sqlclosecheck + - tagalign + - testifylint - unconvert - presets: - - bugs - - format - - unused - -linters-settings: - depguard: + - unparam + - usestdlibvars + - usetesting + - wastedassign + - zerologlint + + settings: + dupword: + ignore: + # We might see duplicate instances of 'fi' if we end two bash 'if' statements + - fi + + depguard: + rules: + everything: + files: ['$all'] + list-mode: lax + allow: + - go.opentelemetry.io/otel/semconv/v1.27.0 + deny: + - pkg: go.opentelemetry.io/otel/semconv + desc: Use "go.opentelemetry.io/otel/semconv/v1.27.0" instead. + - pkg: io/ioutil + desc: Use the "io" and "os" packages instead. See https://go.dev/doc/go1.16#ioutil + - pkg: math/rand$ + desc: Use the "math/rand/v2" package instead. See https://go.dev/doc/go1.22#math_rand_v2 + not-tests: + files: ['!$test','!**/internal/testing/**'] + list-mode: lax + deny: + - pkg: net/http/httptest + desc: Should be used only in tests. + - pkg: testing/* + desc: The "testing" packages should be used only in tests. + - pkg: github.com/crunchydata/postgres-operator/internal/crd/* + desc: The "internal/crd" packages should be used only in tests. + - pkg: github.com/crunchydata/postgres-operator/internal/testing/* + desc: The "internal/testing" packages should be used only in tests. + - pkg: k8s.io/client-go/discovery + desc: Use the "internal/kubernetes" package instead. + tests: + files: ['$test'] + list-mode: lax + deny: + - pkg: github.com/pkg/errors + desc: Use the "errors" package unless you are interacting with stack traces. + + errchkjson: + check-error-free-encoding: true + + goheader: + template: |- + Copyright {{ DATES }} Crunchy Data Solutions, Inc. + + SPDX-License-Identifier: Apache-2.0 + values: + regexp: + DATES: ((201[7-9]|202[0-4]) - 2025|2025) + + gomodguard: + blocked: + modules: + - go.yaml.in/yaml/v2: { recommendations: [sigs.k8s.io/yaml] } + - go.yaml.in/yaml/v3: { recommendations: [sigs.k8s.io/yaml] } + - gopkg.in/yaml.v2: { recommendations: [sigs.k8s.io/yaml] } + - gopkg.in/yaml.v3: { recommendations: [sigs.k8s.io/yaml] } + - gotest.tools: { recommendations: [gotest.tools/v3] } + - k8s.io/kubernetes: + reason: k8s.io/kubernetes is for building kubelet, kubeadm, etc. + + importas: + no-unaliased: true + alias: + - pkg: k8s.io/api/(\w+)/(v[\w\w]+) + alias: $1$2 + - pkg: k8s.io/apimachinery/pkg/apis/(\w+)/(v[\w\d]+) + alias: $1$2 + - pkg: k8s.io/apimachinery/pkg/api/errors + alias: apierrors + + spancheck: + checks: [end, record-error] + extra-start-span-signatures: + - github.com/crunchydata/postgres-operator/internal/tracing.Start:opentelemetry + ignore-check-signatures: + - tracing.Escape + + exclusions: + warn-unused: true + presets: + - common-false-positives + - legacy + - std-error-handling rules: - everything: - list-mode: lax - allow: - - go.opentelemetry.io/otel/semconv/v1.27.0 - deny: - - pkg: go.opentelemetry.io/otel/semconv - desc: Use "go.opentelemetry.io/otel/semconv/v1.27.0" instead. - - - pkg: io/ioutil - desc: > - Use the "io" and "os" packages instead. - See https://go.dev/doc/go1.16#ioutil - - not-tests: - files: ['!$test'] - deny: - - pkg: net/http/httptest - desc: Should be used only in tests. - - - pkg: testing/* - desc: The "testing" packages should be used only in tests. - - - pkg: github.com/crunchydata/postgres-operator/internal/testing/* - desc: The "internal/testing" packages should be used only in tests. - - - pkg: k8s.io/client-go/discovery - desc: Use the "internal/kubernetes" package instead. - - tests: - files: ['$test'] - deny: - - pkg: github.com/pkg/errors - desc: Use the "errors" package unless you are interacting with stack traces. - - errchkjson: - check-error-free-encoding: true - - exhaustive: - default-signifies-exhaustive: true - - goheader: - template: |- - Copyright {{ DATES }} Crunchy Data Solutions, Inc. - - SPDX-License-Identifier: Apache-2.0 - values: - regexp: - DATES: '((201[7-9]|202[0-4]) - 2025|2025)' - - goimports: - local-prefixes: github.com/crunchydata/postgres-operator - - gomodguard: - blocked: - modules: - - gopkg.in/yaml.v2: { recommendations: [sigs.k8s.io/yaml] } - - gopkg.in/yaml.v3: { recommendations: [sigs.k8s.io/yaml] } - - gotest.tools: { recommendations: [gotest.tools/v3] } - - k8s.io/kubernetes: - reason: > - k8s.io/kubernetes is for managing dependencies of the Kubernetes - project, i.e. building kubelet and kubeadm. - - gosec: - excludes: - # Flags for potentially-unsafe casting of ints, similar problem to globally-disabled G103 - - G115 - - importas: - alias: - - pkg: k8s.io/api/(\w+)/(v[\w\w]+) - alias: $1$2 - - pkg: k8s.io/apimachinery/pkg/apis/(\w+)/(v[\w\d]+) - alias: $1$2 - - pkg: k8s.io/apimachinery/pkg/api/errors - alias: apierrors - no-unaliased: true - - spancheck: - checks: [end, record-error] - extra-start-span-signatures: - - 'github.com/crunchydata/postgres-operator/internal/tracing.Start:opentelemetry' - ignore-check-signatures: - - 'tracing.Escape' + # It is fine for tests to use "math/rand" packages. + - linters: [gosec] + path: '(.+)_test[.]go' + text: weak random number generator + + # This internal package is the one place we want to do API discovery. + - linters: [depguard] + path: internal/kubernetes/discovery.go + text: k8s.io/client-go/discovery + + # Postgres HBA rules often include "all all all" + - linters: [dupword] + path: /(hba|postgres)[^/]+$ + text: words \(all\) found + + # These value types have unmarshal methods. + # https://github.com/raeperd/recvcheck/issues/7 + - linters: [recvcheck] + path: internal/pki/pki.go + text: methods of "(Certificate|PrivateKey)" + + - linters: [staticcheck] + text: corev1.(Endpoints|EndpointSubset) is deprecated + + - linters: [staticcheck] + path: internal/controller/ + text: >- + deprecated: Use `RequeueAfter` instead + +# https://golangci-lint.run/usage/formatters +formatters: + enable: + - gci + - gofmt + settings: + gci: + sections: + - standard + - default + - localmodule issues: - exclude-generated: strict - exclude-rules: - # This internal package is the one place we want to do API discovery. - - linters: [depguard] - path: internal/kubernetes/discovery.go - text: k8s.io/client-go/discovery - - # These value types have unmarshal methods. - # https://github.com/raeperd/recvcheck/issues/7 - - linters: [recvcheck] - path: internal/pki/pki.go - text: 'methods of "(Certificate|PrivateKey)"' + # Fix only when requested + fix: false + + # Show all issues at once + max-issues-per-linter: 0 + max-same-issues: 0 + uniq-by-line: false diff --git a/.trivyignore.yaml b/.trivyignore.yaml new file mode 100644 index 0000000000..b275e406fa --- /dev/null +++ b/.trivyignore.yaml @@ -0,0 +1,12 @@ +# Copyright Crunchy Data Solutions, Inc. All rights reserved. +# +# https://trivy.dev/latest/docs/configuration/filtering/#trivyignoreyaml + +secrets: + - id: jwt-token + paths: + - internal/testing/token_* + + - id: private-key + paths: + - internal/pki/*_test.go diff --git a/Makefile b/Makefile index 5b291d7f66..ad32ad2f86 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ PGO_IMAGE_URL ?= https://www.crunchydata.com/products/crunchy-postgresql-for-kub PGO_IMAGE_PREFIX ?= localhost PGMONITOR_DIR ?= hack/tools/pgmonitor -PGMONITOR_VERSION ?= v5.1.1 +PGMONITOR_VERSION ?= v5.2.1 QUERIES_CONFIG_DIR ?= hack/tools/queries # Buildah's "build" used to be "bud". Use the alias to be compatible for a while. @@ -15,8 +15,9 @@ BUILDAH_BUILD ?= buildah bud GO ?= go GO_BUILD = $(GO) build GO_TEST ?= $(GO) test -KUTTL ?= kubectl-kuttl +KUTTL ?= $(GO) run github.com/kudobuilder/kuttl/cmd/kubectl-kuttl@latest KUTTL_TEST ?= $(KUTTL) test +ENVTEST_K8S_VERSION ?= 1.34 ##@ General @@ -119,17 +120,15 @@ undeploy: ## Undeploy the PostgreSQL Operator .PHONY: deploy-dev deploy-dev: ## Deploy the PostgreSQL Operator locally -deploy-dev: PGO_FEATURE_GATES ?= "AllAlpha=true" deploy-dev: get-pgmonitor deploy-dev: build-postgres-operator deploy-dev: createnamespaces kubectl apply --server-side -k ./config/dev hack/create-kubeconfig.sh postgres-operator pgo env \ - QUERIES_CONFIG_DIR="${QUERIES_CONFIG_DIR}" \ - CRUNCHY_DEBUG=true \ - PGO_FEATURE_GATES="${PGO_FEATURE_GATES}" \ - CHECK_FOR_UPGRADES='$(if $(CHECK_FOR_UPGRADES),$(CHECK_FOR_UPGRADES),false)' \ + QUERIES_CONFIG_DIR='$(QUERIES_CONFIG_DIR)' \ + CRUNCHY_DEBUG="$${CRUNCHY_DEBUG:-true}" \ + PGO_FEATURE_GATES="$${PGO_FEATURE_GATES:-AllAlpha=true,AppendCustomQueries=false}" \ KUBECONFIG=hack/.kube/postgres-operator/pgo \ PGO_NAMESPACE='postgres-operator' \ PGO_INSTALLER='deploy-dev' \ @@ -225,11 +224,11 @@ check-kuttl: ## example command: make check-kuttl KUTTL_TEST=' --config testing/kuttl/kuttl-test.yaml .PHONY: generate-kuttl -generate-kuttl: export KUTTL_PG_UPGRADE_FROM_VERSION ?= 15 -generate-kuttl: export KUTTL_PG_UPGRADE_TO_VERSION ?= 16 +generate-kuttl: export KUTTL_PG_UPGRADE_FROM_VERSION ?= 16 +generate-kuttl: export KUTTL_PG_UPGRADE_TO_VERSION ?= 17 generate-kuttl: export KUTTL_PG_VERSION ?= 16 generate-kuttl: export KUTTL_POSTGIS_VERSION ?= 3.4 -generate-kuttl: export KUTTL_PSQL_IMAGE ?= registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-16.8-0 +generate-kuttl: export KUTTL_PSQL_IMAGE ?= registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.7-2547 generate-kuttl: export KUTTL_TEST_DELETE_NAMESPACE ?= kuttl-test-delete-namespace generate-kuttl: ## Generate kuttl tests [ ! -d testing/kuttl/e2e-generated ] || rm -r testing/kuttl/e2e-generated @@ -250,6 +249,12 @@ generate-kuttl: ## Generate kuttl tests mkdir -p "$${target%/*}"; render < "$${source}" > "$${target}"; \ shift; \ done' - testing/kuttl/e2e/*/*.yaml testing/kuttl/e2e/*/*/*.yaml + if [ "$$KUTTL_PG_VERSION" -ge "18" ]; then \ + [ ! -d testing/kuttl/e2e-generated/exporter-custom-queries ] || rm -rf testing/kuttl/e2e-generated/exporter-custom-queries; \ + [ ! -d testing/kuttl/e2e-generated/exporter-no-tls ] || rm -rf testing/kuttl/e2e-generated/exporter-no-tls; \ + [ ! -d testing/kuttl/e2e-generated/exporter-tls ] || rm -rf testing/kuttl/e2e-generated/exporter-tls; \ + [ ! -d testing/kuttl/e2e-generated/exporter-password-change ] || rm -rf testing/kuttl/e2e-generated/exporter-password-change; \ + fi ##@ Generate diff --git a/cmd/postgres-operator/main.go b/cmd/postgres-operator/main.go index 8545e9e241..48ef5de308 100644 --- a/cmd/postgres-operator/main.go +++ b/cmd/postgres-operator/main.go @@ -34,9 +34,7 @@ import ( "github.com/crunchydata/postgres-operator/internal/kubernetes" "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/internal/naming" - "github.com/crunchydata/postgres-operator/internal/registration" "github.com/crunchydata/postgres-operator/internal/tracing" - "github.com/crunchydata/postgres-operator/internal/upgradecheck" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -223,13 +221,8 @@ func main() { assertNoError(err) assertNoError(mgr.Add(k8s)) - registrar, err := registration.NewRunner(os.Getenv("RSA_KEY"), os.Getenv("TOKEN_PATH"), stopRunning) - assertNoError(err) - assertNoError(mgr.Add(registrar)) - token, _ := registrar.CheckToken() - // add all PostgreSQL Operator controllers to the runtime manager - addControllersToManager(mgr, log, registrar) + addControllersToManager(mgr, log) if features.Enabled(feature.BridgeIdentifiers) { constructor := func() *bridge.Client { @@ -241,22 +234,6 @@ func main() { assertNoError(bridge.ManagedInstallationReconciler(mgr, constructor)) } - // Enable upgrade checking - upgradeCheckingDisabled := strings.EqualFold(os.Getenv("CHECK_FOR_UPGRADES"), "false") - if !upgradeCheckingDisabled { - log.Info("upgrade checking enabled") - // get the URL for the check for upgrades endpoint if set in the env - assertNoError( - upgradecheck.ManagedScheduler( - mgr, - os.Getenv("CHECK_FOR_UPGRADES_URL"), - versionString, - token, - )) - } else { - log.Info("upgrade checking disabled") - } - // Enable health probes assertNoError(mgr.AddHealthzCheck("health", healthz.Ping)) assertNoError(mgr.AddReadyzCheck("check", healthz.Ping)) @@ -288,12 +265,11 @@ func main() { // addControllersToManager adds all PostgreSQL Operator controllers to the provided controller // runtime manager. -func addControllersToManager(mgr runtime.Manager, log logging.Logger, reg registration.Registration) { +func addControllersToManager(mgr runtime.Manager, log logging.Logger) { pgReconciler := &postgrescluster.Reconciler{ - Client: mgr.GetClient(), - Owner: postgrescluster.ControllerName, - Recorder: mgr.GetEventRecorderFor(postgrescluster.ControllerName), - Registration: reg, + Client: mgr.GetClient(), + Owner: postgrescluster.ControllerName, + Recorder: mgr.GetEventRecorderFor(postgrescluster.ControllerName), } if err := pgReconciler.SetupWithManager(mgr); err != nil { @@ -302,10 +278,9 @@ func addControllersToManager(mgr runtime.Manager, log logging.Logger, reg regist } upgradeReconciler := &pgupgrade.PGUpgradeReconciler{ - Client: mgr.GetClient(), - Owner: "pgupgrade-controller", - Recorder: mgr.GetEventRecorderFor("pgupgrade-controller"), - Registration: reg, + Client: mgr.GetClient(), + Owner: "pgupgrade-controller", + Recorder: mgr.GetEventRecorderFor("pgupgrade-controller"), } if err := upgradeReconciler.SetupWithManager(mgr); err != nil { diff --git a/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml b/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml index 4871e399fd..1ac65c6360 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml @@ -1320,7 +1320,7 @@ spec: type: array gunicorn: description: |- - Settings for the gunicorn server. + Settings for the Gunicorn server. More info: https://docs.gunicorn.org/en/latest/settings.html type: object x-kubernetes-preserve-unknown-fields: true @@ -1353,12 +1353,61 @@ spec: - name type: object x-kubernetes-map-type: atomic + oauthConfigurations: + description: |- + Secrets for the `OAUTH2_CONFIG` setting. If there are `OAUTH2_CONFIG` values + in the settings field, they will be combined with the values loaded here. + More info: https://www.pgadmin.org/docs/pgadmin4/latest/oauth2.html + items: + properties: + name: + description: The OAUTH2_NAME of this configuration. + maxLength: 20 + minLength: 1 + pattern: ^[A-Za-z0-9]+$ + type: string + secret: + description: A Secret containing the settings of one OAuth2 + provider as a JSON object. + properties: + key: + description: Name of the data field within the Secret. + maxLength: 253 + minLength: 1 + pattern: ^[-._a-zA-Z0-9]+$ + type: string + x-kubernetes-validations: + - message: cannot be "." or start with ".." + rule: self != "." && !self.startsWith("..") + name: + description: Name of the Secret. + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?([.][a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + required: + - key + - name + type: object + x-kubernetes-map-type: atomic + required: + - name + - secret + type: object + x-kubernetes-map-type: atomic + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map settings: description: |- Settings for the pgAdmin server process. Keys should be uppercase and values must be constants. More info: https://www.pgadmin.org/docs/pgadmin4/latest/config_py.html type: object + x-kubernetes-map-type: granular x-kubernetes-preserve-unknown-fields: true type: object dataVolumeClaimSpec: @@ -1643,6 +1692,135 @@ spec: x-kubernetes-list-map-keys: - name x-kubernetes-list-type: map + environmentVariables: + description: |- + EnvironmentVariables allows the user to add environment variables to the + collector container. + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. Must + be a C_IDENTIFIER. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment variable's value. + Cannot be used if value is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the ConfigMap or + its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema the FieldPath + is written in terms of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to select in + the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required for volumes, + optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output format of + the exposed resources, defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret in the pod's + namespace + properties: + key: + description: The key of the secret to select + from. Must be a valid secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret or its + key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + x-kubernetes-validations: + - message: Cannot overwrite environment variables set by + operator + rule: self.name != 'K8S_POD_NAMESPACE' && self.name != + 'K8S_POD_NAME' && self.name != 'PGPASSWORD' + minItems: 1 + type: array + x-kubernetes-list-type: atomic exporters: description: |- Exporters allows users to configure OpenTelemetry exporters that exist @@ -2071,6 +2249,13 @@ spec: type: string x-kubernetes-validations: - rule: duration("0") <= self && self <= duration("60m") + databases: + description: |- + The databases to target with added custom queries. + Default behavior is to target `postgres`. + items: + type: string + type: array name: description: |- The name of this batch of queries, which will be used in naming the OTel @@ -2116,6 +2301,19 @@ spec: type: string type: array type: object + exporters: + description: The names of exporters that should send metrics. + items: + type: string + minItems: 1 + type: array + x-kubernetes-list-type: set + perDBMetricTargets: + description: User defined databases to target for default + per-db metrics + items: + type: string + type: array type: object resources: description: Resources holds the resource requirements for the @@ -2507,6 +2705,10 @@ spec: description: MajorVersion represents the major version of the running pgAdmin. type: integer + minorVersion: + description: MinorVersion represents the minor version of the running + pgAdmin. + type: string observedGeneration: description: observedGeneration represents the .metadata.generation on which the status was based. diff --git a/config/crd/bases/postgres-operator.crunchydata.com_pgupgrades.yaml b/config/crd/bases/postgres-operator.crunchydata.com_pgupgrades.yaml index 53d72671bc..5b3baae35b 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_pgupgrades.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_pgupgrades.yaml @@ -964,7 +964,7 @@ spec: fromPostgresVersion: description: The major version of PostgreSQL before the upgrade. format: int32 - maximum: 17 + maximum: 18 minimum: 11 type: integer image: @@ -1094,7 +1094,7 @@ spec: toPostgresVersion: description: The major version of PostgreSQL to be upgraded to. format: int32 - maximum: 17 + maximum: 18 minimum: 11 type: integer tolerations: diff --git a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml index a116a6b8b3..eb71aba33b 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml @@ -40,9 +40,19 @@ spec: description: PostgresClusterSpec defines the desired state of PostgresCluster properties: authentication: + description: Authentication settings for the PostgreSQL server properties: rules: - description: 'More info: https://www.postgresql.org/docs/current/auth-pg-hba-conf.html' + description: |- + Postgres compares every new connection to these rules in the order they are + defined. The first rule that matches determines if and how the connection + must then authenticate. Connections that match no rules are disconnected. + + When this is omitted or empty, Postgres accepts encrypted connections to any + database from users that have a password. To refuse all network connections, + set this to one rule that matches "host" connections to the "reject" method. + + More info: https://www.postgresql.org/docs/current/auth-pg-hba-conf.html items: properties: connection: @@ -79,6 +89,7 @@ spec: description: |- The authentication method to use when a connection matches this rule. The special value "reject" refuses connections that match this rule. + More info: https://www.postgresql.org/docs/current/auth-methods.html maxLength: 20 minLength: 1 @@ -93,6 +104,8 @@ spec: - type: integer - type: string x-kubernetes-int-or-string: true + description: Additional settings for this rule or its authentication + method. maxProperties: 20 type: object x-kubernetes-map-type: atomic @@ -110,11 +123,25 @@ spec: x-kubernetes-map-type: atomic x-kubernetes-validations: - message: '"hba" cannot be combined with other fields' - rule: 'has(self.hba) ? !has(self.connection) && !has(self.databases) - && !has(self.method) && !has(self.options) && !has(self.users) - : true' + rule: '[has(self.hba), has(self.connection) || has(self.databases) + || has(self.method) || has(self.options) || has(self.users)].exists_one(b,b)' - message: '"connection" and "method" are required' - rule: 'has(self.hba) ? true : has(self.connection) && has(self.method)' + rule: has(self.hba) || (has(self.connection) && has(self.method)) + - message: the "ldap" method requires an "ldapbasedn", "ldapprefix", + or "ldapsuffix" option + rule: has(self.hba) || self.method != "ldap" || (has(self.options) + && ["ldapbasedn","ldapprefix","ldapsuffix"].exists(k, k + in self.options)) + - message: cannot use "ldapbasedn", "ldapbinddn", "ldapbindpasswd", + "ldapsearchattribute", or "ldapsearchfilter" options with + "ldapprefix" or "ldapsuffix" options + rule: has(self.hba) || self.method != "ldap" || !has(self.options) + || [["ldapprefix","ldapsuffix"], ["ldapbasedn","ldapbinddn","ldapbindpasswd","ldapsearchattribute","ldapsearchfilter"]].exists_one(a, + a.exists(k, k in self.options)) + - message: the "radius" method requires "radiusservers" and + "radiussecrets" options + rule: has(self.hba) || self.method != "radius" || (has(self.options) + && ["radiusservers","radiussecrets"].all(k, k in self.options)) maxItems: 10 type: array x-kubernetes-list-type: atomic @@ -4447,6 +4474,7 @@ spec: type: object type: object config: + description: General configuration of the PostgreSQL server properties: files: description: Files to mount under "/etc/postgres". @@ -4794,7 +4822,8 @@ spec: - message: change port using .spec.port instead rule: '!has(self.port)' - message: TLS is always enabled - rule: '!has(self.ssl) && !self.exists(k, k.startsWith("ssl_"))' + rule: '!has(self.ssl) && !self.exists(k, k.startsWith("ssl_") + && !(k == ''ssl_groups'' || k == ''ssl_ecdh_curve''))' - message: domain socket paths cannot be changed rule: '!self.exists(k, k.startsWith("unix_socket_"))' - message: wal_level must be "replica" or higher @@ -6647,8 +6676,7 @@ spec: - stanza type: object x-kubernetes-validations: - - fieldPath: .repo - message: Only S3, GCS or Azure repos can be used as a pgBackRest + - message: Only S3, GCS or Azure repos can be used as a pgBackRest data source. rule: '!has(self.repo.volume)' postgresCluster: @@ -11041,6 +11069,218 @@ spec: - whenUnsatisfiable type: object type: array + volumes: + properties: + temp: + description: |- + An ephemeral volume for temporary files. + More info: https://kubernetes.io/docs/concepts/storage/ephemeral-volumes + properties: + accessModes: + description: |- + accessModes contains the desired access modes the volume should have. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 + items: + type: string + type: array + x-kubernetes-list-type: atomic + dataSource: + description: |- + dataSource field can be used to specify either: + * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) + * An existing PVC (PersistentVolumeClaim) + If the provisioner or an external controller can support the specified data source, + it will create a new volume based on the contents of the specified data source. + When the AnyVolumeDataSource feature gate is enabled, dataSource contents will be copied to dataSourceRef, + and dataSourceRef contents will be copied to dataSource when dataSourceRef.namespace is not specified. + If the namespace is specified, then dataSourceRef will not be copied to dataSource. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being + referenced + type: string + name: + description: Name is the name of resource being + referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + dataSourceRef: + description: |- + dataSourceRef specifies the object from which to populate the volume with data, if a non-empty + volume is desired. This may be any object from a non-empty API group (non + core object) or a PersistentVolumeClaim object. + When this field is specified, volume binding will only succeed if the type of + the specified object matches some installed volume populator or dynamic + provisioner. + This field will replace the functionality of the dataSource field and as such + if both fields are non-empty, they must have the same value. For backwards + compatibility, when namespace isn't specified in dataSourceRef, + both fields (dataSource and dataSourceRef) will be set to the same + value automatically if one of them is empty and the other is non-empty. + When namespace is specified in dataSourceRef, + dataSource isn't set to the same value and must be empty. + There are three important differences between dataSource and dataSourceRef: + * While dataSource only allows two specific types of objects, dataSourceRef + allows any non-core object, as well as PersistentVolumeClaim objects. + * While dataSource ignores disallowed values (dropping them), dataSourceRef + preserves all values, and generates an error if a disallowed value is + specified. + * While dataSource only allows local objects, dataSourceRef allows objects + in any namespaces. + (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled. + (Alpha) Using the namespace field of dataSourceRef requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being + referenced + type: string + name: + description: Name is the name of resource being + referenced + type: string + namespace: + description: |- + Namespace is the namespace of resource being referenced + Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. + (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + type: string + required: + - kind + - name + type: object + resources: + description: |- + resources represents the minimum resources the volume should have. + If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements + that are lower than previous value but must still be higher than capacity recorded in the + status field of the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + selector: + description: selector is a label query over volumes + to consider for binding. + properties: + matchExpressions: + description: matchExpressions is a list of label + selector requirements. The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the + selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + storageClassName: + description: |- + storageClassName is the name of the StorageClass required by the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 + type: string + volumeAttributesClassName: + description: |- + volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. + If specified, the CSI driver will create or update the volume with the attributes defined + in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, + it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass + will be applied to the claim but it's not allowed to reset this field to empty string once it is set. + If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass + will be set by the persistentvolume controller if it exists. + If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be + set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource + exists. + More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ + (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). + type: string + volumeMode: + description: |- + volumeMode defines what type of volume is required by the claim. + Value of Filesystem is implied when not included in claim spec. + type: string + volumeName: + description: volumeName is the binding reference to + the PersistentVolume backing this claim. + type: string + type: object + x-kubernetes-map-type: atomic + x-kubernetes-validations: + - message: missing accessModes + rule: 0 < size(self.accessModes) + - message: missing storage request + rule: has(self.resources.requests.storage) + type: object walVolumeClaimSpec: description: |- Defines a separate PersistentVolumeClaim for PostgreSQL's write-ahead log. @@ -11295,6 +11535,135 @@ spec: x-kubernetes-list-map-keys: - name x-kubernetes-list-type: map + environmentVariables: + description: |- + EnvironmentVariables allows the user to add environment variables to the + collector container. + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. Must + be a C_IDENTIFIER. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment variable's value. + Cannot be used if value is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the ConfigMap or + its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema the FieldPath + is written in terms of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to select in + the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required for volumes, + optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output format of + the exposed resources, defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret in the pod's + namespace + properties: + key: + description: The key of the secret to select + from. Must be a valid secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret or its + key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + x-kubernetes-validations: + - message: Cannot overwrite environment variables set by + operator + rule: self.name != 'K8S_POD_NAMESPACE' && self.name != + 'K8S_POD_NAME' && self.name != 'PGPASSWORD' + minItems: 1 + type: array + x-kubernetes-list-type: atomic exporters: description: |- Exporters allows users to configure OpenTelemetry exporters that exist @@ -11723,6 +12092,13 @@ spec: type: string x-kubernetes-validations: - rule: duration("0") <= self && self <= duration("60m") + databases: + description: |- + The databases to target with added custom queries. + Default behavior is to target `postgres`. + items: + type: string + type: array name: description: |- The name of this batch of queries, which will be used in naming the OTel @@ -11768,6 +12144,19 @@ spec: type: string type: array type: object + exporters: + description: The names of exporters that should send metrics. + items: + type: string + minItems: 1 + type: array + x-kubernetes-list-type: set + perDBMetricTargets: + description: User defined databases to target for default + per-db metrics + items: + type: string + type: array type: object resources: description: Resources holds the resource requirements for the @@ -12435,7 +12824,7 @@ spec: postgresVersion: description: The major version of PostgreSQL installed in the PostgreSQL image - maximum: 17 + maximum: 18 minimum: 11 type: integer proxy: @@ -17859,6 +18248,10 @@ spec: - instances - postgresVersion type: object + x-kubernetes-validations: + - message: The ssl_groups parameter is only available in pg18 and greater + rule: '!has(self.config) || !has(self.config.parameters) || !has(self.config.parameters.ssl_groups) + || self.postgresVersion > 17' status: description: PostgresClusterStatus defines the observed state of PostgresCluster properties: @@ -18213,11 +18606,6 @@ spec: type: integer type: object type: object - registrationRequired: - properties: - pgoVersion: - type: string - type: object startupInstance: description: |- The instance that should be started first when bootstrapping and/or starting a @@ -18226,8 +18614,6 @@ spec: startupInstanceSet: description: The instance set associated with the startupInstance type: string - tokenRequired: - type: string userInterface: description: Current state of the PostgreSQL user interface. properties: diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 7e5c21a7b4..fc6133d899 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -23,29 +23,27 @@ spec: - name: CRUNCHY_DEBUG value: "true" - name: RELATED_IMAGE_POSTGRES_16 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-16.8-0" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-16.11-2547" - name: RELATED_IMAGE_POSTGRES_16_GIS_3.3 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-16.8-3.3-0" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.11-3.3-2547" - name: RELATED_IMAGE_POSTGRES_16_GIS_3.4 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-16.8-3.4-0" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-16.11-3.4-2547" - name: RELATED_IMAGE_POSTGRES_17 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi8-17.4-0" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres:ubi9-17.7-2547" - name: RELATED_IMAGE_POSTGRES_17_GIS_3.4 - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi8-17.4-3.4-0" - - name: RELATED_IMAGE_PGADMIN - value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi8-4.30-35" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-gis:ubi9-17.7-3.4-2547" - name: RELATED_IMAGE_PGBACKREST - value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi8-2.54.1-1" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgbackrest:ubi9-2.56.0-2547" - name: RELATED_IMAGE_PGBOUNCER - value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi8-1.23-4" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgbouncer:ubi9-1.24-2547" - name: RELATED_IMAGE_PGEXPORTER - value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:latest" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-postgres-exporter:ubi9-0.18.1-2547" - name: RELATED_IMAGE_PGUPGRADE - value: "registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:latest" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-upgrade:ubi9-17.7-2547" - name: RELATED_IMAGE_STANDALONE_PGADMIN - value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi8-8.14-2" + value: "registry.developers.crunchydata.com/crunchydata/crunchy-pgadmin4:ubi9-9.8-2547" - name: RELATED_IMAGE_COLLECTOR - value: "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.119.0" + value: "registry.developers.crunchydata.com/crunchydata/postgres-operator:ubi9-5.8.5-0" securityContext: allowPrivilegeEscalation: false capabilities: { drop: [ALL] } diff --git a/go.mod b/go.mod index 0db97ac83d..8ac52ebaf2 100644 --- a/go.mod +++ b/go.mod @@ -1,13 +1,11 @@ module github.com/crunchydata/postgres-operator // If this is changing when you don't want it to, see hack/go-get.sh -go 1.23.0 +go 1.24.0 require ( github.com/go-logr/logr v1.4.2 - github.com/golang-jwt/jwt/v5 v5.2.1 github.com/google/go-cmp v0.6.0 - github.com/google/uuid v1.6.0 github.com/kubernetes-csi/external-snapshotter/client/v8 v8.0.0 github.com/onsi/ginkgo/v2 v2.22.0 github.com/onsi/gomega v1.36.1 @@ -21,8 +19,8 @@ require ( go.opentelemetry.io/otel v1.32.0 go.opentelemetry.io/otel/sdk v1.32.0 go.opentelemetry.io/otel/trace v1.32.0 - golang.org/x/crypto v0.35.0 - golang.org/x/tools v0.28.0 + golang.org/x/crypto v0.45.0 + golang.org/x/tools v0.38.0 gotest.tools/v3 v3.5.1 k8s.io/api v0.31.0 k8s.io/apimachinery v0.31.0 @@ -59,6 +57,7 @@ require ( github.com/google/gnostic-models v0.6.8 // indirect github.com/google/gofuzz v1.2.0 // indirect github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect + github.com/google/uuid v1.6.0 // indirect github.com/gorilla/websocket v1.5.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 // indirect github.com/imdario/mergo v0.3.16 // indirect @@ -103,13 +102,13 @@ require ( go.opentelemetry.io/proto/otlp v1.3.1 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/exp v0.0.0-20240604190554-fc45aab8b7f8 // indirect - golang.org/x/mod v0.22.0 // indirect - golang.org/x/net v0.33.0 // indirect - golang.org/x/oauth2 v0.27.0 // indirect - golang.org/x/sync v0.11.0 // indirect - golang.org/x/sys v0.30.0 // indirect - golang.org/x/term v0.29.0 // indirect - golang.org/x/text v0.22.0 // indirect + golang.org/x/mod v0.29.0 // indirect + golang.org/x/net v0.47.0 // indirect + golang.org/x/oauth2 v0.30.0 // indirect + golang.org/x/sync v0.18.0 // indirect + golang.org/x/sys v0.38.0 // indirect + golang.org/x/term v0.37.0 // indirect + golang.org/x/text v0.31.0 // indirect golang.org/x/time v0.5.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 // indirect diff --git a/go.sum b/go.sum index 0fa2adc5a3..abaef7095e 100644 --- a/go.sum +++ b/go.sum @@ -46,8 +46,6 @@ github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1v github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk= -github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= @@ -210,48 +208,48 @@ go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.35.0 h1:b15kiHdrGCHrP6LvwaQ3c03kgNhhiMgvlhxHQhmg2Xs= -golang.org/x/crypto v0.35.0/go.mod h1:dy7dXNW32cAb/6/PRuTNsix8T+vJAqvuIy5Bli/x0YQ= +golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q= +golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4= golang.org/x/exp v0.0.0-20240604190554-fc45aab8b7f8 h1:LoYXNGAShUG3m/ehNk4iFctuhGX/+R1ZpfJ4/ia80JM= golang.org/x/exp v0.0.0-20240604190554-fc45aab8b7f8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4= -golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= +golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA= +golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= -golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= -golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M= -golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= +golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= +golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= +golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= +golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= -golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I= +golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= -golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.29.0 h1:L6pJp37ocefwRRtYPKSWOWzOtWSxVajvz2ldH/xi3iU= -golang.org/x/term v0.29.0/go.mod h1:6bl4lRlvVuDgSf3179VpIxBF0o10JUpXWOnI7nErv7s= +golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= +golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU= +golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= -golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= +golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= +golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.28.0 h1:WuB6qZ4RPCQo5aP3WdKZS7i595EdWqWR8vqJTlwTVK8= -golang.org/x/tools v0.28.0/go.mod h1:dcIOrVd3mfQKTgrDVQHqCPMWy6lnhfhtX3hLXYVLfRw= +golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ= +golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/internal/bridge/client.go b/internal/bridge/client.go index 9ec13ec2bb..3e3c4c3b4c 100644 --- a/internal/bridge/client.go +++ b/internal/bridge/client.go @@ -280,7 +280,7 @@ func (c *Client) doWithBackoff( request.Header = headers.Clone() //nolint:bodyclose // This response is returned to the caller. - response, err = c.Client.Do(request) + response, err = c.Do(request) } // An error indicates there was no response from the server, and the @@ -327,7 +327,7 @@ func (c *Client) doWithRetry( // Retry the request when the server responds with "Too many requests". // - https://docs.crunchybridge.com/api-concepts/getting-started/#status-codes // - https://docs.crunchybridge.com/api-concepts/getting-started/#rate-limiting - for err == nil && response.StatusCode == 429 { + for err == nil && response.StatusCode == http.StatusTooManyRequests { seconds, _ := strconv.Atoi(response.Header.Get("Retry-After")) // Only retry when the response indicates how long to wait. @@ -378,11 +378,11 @@ func (c *Client) CreateAuthObject(ctx context.Context, authn AuthObject) (AuthOb } // 401, Unauthorized - case response.StatusCode == 401: + case response.StatusCode == http.StatusUnauthorized: err = fmt.Errorf("%w: %s", errAuthentication, body) default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -409,7 +409,7 @@ func (c *Client) CreateInstallation(ctx context.Context) (Installation, error) { } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -445,7 +445,7 @@ func (c *Client) ListClusters(ctx context.Context, apiKey, teamId string) ([]*Cl } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -486,7 +486,7 @@ func (c *Client) CreateCluster( } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -524,14 +524,14 @@ func (c *Client) DeleteCluster(ctx context.Context, apiKey, id string) (*Cluster // --https://docs.crunchybridge.com/api-concepts/idempotency#delete-semantics // But also, if we can't find it... // Maybe if no ID we return already deleted? - case response.StatusCode == 410: + case response.StatusCode == http.StatusGone: fallthrough - case response.StatusCode == 404: + case response.StatusCode == http.StatusNotFound: deletedAlready = true err = nil default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -565,7 +565,7 @@ func (c *Client) GetCluster(ctx context.Context, apiKey, id string) (*ClusterApi } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -599,7 +599,7 @@ func (c *Client) GetClusterStatus(ctx context.Context, apiKey, id string) (*Clus } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -633,7 +633,7 @@ func (c *Client) GetClusterUpgrade(ctx context.Context, apiKey, id string) (*Clu } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -674,7 +674,7 @@ func (c *Client) UpgradeCluster( } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -709,7 +709,7 @@ func (c *Client) UpgradeClusterHA(ctx context.Context, apiKey, id, action string } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -747,7 +747,7 @@ func (c *Client) UpdateCluster( } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -777,7 +777,7 @@ func (c *Client) GetClusterRole(ctx context.Context, apiKey, clusterId, roleName } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } @@ -807,7 +807,7 @@ func (c *Client) ListClusterRoles(ctx context.Context, apiKey, id string) ([]*Cl } default: - //nolint:goerr113 // This is intentionally dynamic. + //nolint:err113 // This is intentionally dynamic. err = fmt.Errorf("%v: %s", response.Status, body) } } diff --git a/internal/bridge/client_test.go b/internal/bridge/client_test.go index 6b464c05b3..f1aa1c8ddd 100644 --- a/internal/bridge/client_test.go +++ b/internal/bridge/client_test.go @@ -31,8 +31,8 @@ func TestClientBackoff(t *testing.T) { client := NewClient("", "") var total time.Duration - for i := 1; i <= 50 && client.Backoff.Steps > 0; i++ { - step := client.Backoff.Step() + for i := 1; i <= 50 && client.Steps > 0; i++ { + step := client.Step() total += step t.Logf("%02d:%20v%20v", i, step, total) @@ -68,7 +68,7 @@ func TestClientDoWithBackoff(t *testing.T) { // Client with one attempt, i.e. no backoff. client := NewClient(server.URL, "xyz") - client.Backoff.Steps = 1 + client.Steps = 1 assert.Equal(t, client.BaseURL.String(), server.URL) ctx := context.Background() @@ -113,8 +113,8 @@ func TestClientDoWithBackoff(t *testing.T) { // Client with brief backoff. client := NewClient(server.URL, "") - client.Backoff.Duration = time.Millisecond - client.Backoff.Steps = 5 + client.Duration = time.Millisecond + client.Steps = 5 assert.Equal(t, client.BaseURL.String(), server.URL) ctx := context.Background() @@ -170,8 +170,8 @@ func TestClientDoWithBackoff(t *testing.T) { // Client with brief backoff. client := NewClient(server.URL, "") - client.Backoff.Duration = time.Millisecond - client.Backoff.Steps = 5 + client.Duration = time.Millisecond + client.Steps = 5 assert.Equal(t, client.BaseURL.String(), server.URL) ctx := context.Background() @@ -190,8 +190,8 @@ func TestClientDoWithBackoff(t *testing.T) { // Client with lots of brief backoff. client := NewClient(server.URL, "") - client.Backoff.Duration = time.Millisecond - client.Backoff.Steps = 100 + client.Duration = time.Millisecond + client.Steps = 100 assert.Equal(t, client.BaseURL.String(), server.URL) ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) diff --git a/internal/bridge/crunchybridgecluster/apply.go b/internal/bridge/crunchybridgecluster/apply.go index baffd16516..6edd870790 100644 --- a/internal/bridge/crunchybridgecluster/apply.go +++ b/internal/bridge/crunchybridgecluster/apply.go @@ -22,7 +22,7 @@ func (r *CrunchyBridgeClusterReconciler) patch( patch client.Patch, options ...client.PatchOption, ) error { options = append([]client.PatchOption{r.Owner}, options...) - return r.Client.Patch(ctx, object, patch, options...) + return r.Patch(ctx, object, patch, options...) } // apply sends an apply patch to object's endpoint in the Kubernetes API and diff --git a/internal/bridge/crunchybridgecluster/crunchybridgecluster_controller.go b/internal/bridge/crunchybridgecluster/crunchybridgecluster_controller.go index 2e81e7f113..ec9973ade1 100644 --- a/internal/bridge/crunchybridgecluster/crunchybridgecluster_controller.go +++ b/internal/bridge/crunchybridgecluster/crunchybridgecluster_controller.go @@ -91,7 +91,7 @@ func (r *CrunchyBridgeClusterReconciler) SetupWithManager( func (r *CrunchyBridgeClusterReconciler) setControllerReference( owner *v1beta1.CrunchyBridgeCluster, controlled client.Object, ) error { - return controllerutil.SetControllerReference(owner, controlled, r.Client.Scheme()) + return controllerutil.SetControllerReference(owner, controlled, r.Scheme()) } //+kubebuilder:rbac:groups="postgres-operator.crunchydata.com",resources="crunchybridgeclusters",verbs={get,patch,update} @@ -684,7 +684,7 @@ func (r *CrunchyBridgeClusterReconciler) GetSecretKeys( }} err := errors.WithStack( - r.Client.Get(ctx, client.ObjectKeyFromObject(existing), existing)) + r.Get(ctx, client.ObjectKeyFromObject(existing), existing)) if err == nil { if existing.Data["key"] != nil && existing.Data["team"] != nil { @@ -707,7 +707,7 @@ func (r *CrunchyBridgeClusterReconciler) deleteControlled( version := object.GetResourceVersion() exactly := client.Preconditions{UID: &uid, ResourceVersion: &version} - return r.Client.Delete(ctx, object, exactly) + return r.Delete(ctx, object, exactly) } return nil diff --git a/internal/bridge/crunchybridgecluster/delete.go b/internal/bridge/crunchybridgecluster/delete.go index b0a957a0ec..ae44c8036b 100644 --- a/internal/bridge/crunchybridgecluster/delete.go +++ b/internal/bridge/crunchybridgecluster/delete.go @@ -28,7 +28,7 @@ func (r *CrunchyBridgeClusterReconciler) handleDelete( log := ctrl.LoggerFrom(ctx) // If the CrunchyBridgeCluster isn't being deleted, add the finalizer - if crunchybridgecluster.ObjectMeta.DeletionTimestamp.IsZero() { + if crunchybridgecluster.DeletionTimestamp.IsZero() { if !controllerutil.ContainsFinalizer(crunchybridgecluster, finalizer) { controllerutil.AddFinalizer(crunchybridgecluster, finalizer) if err := r.Update(ctx, crunchybridgecluster); err != nil { diff --git a/internal/bridge/crunchybridgecluster/delete_test.go b/internal/bridge/crunchybridgecluster/delete_test.go index c04daaa131..c86746ef1b 100644 --- a/internal/bridge/crunchybridgecluster/delete_test.go +++ b/internal/bridge/crunchybridgecluster/delete_test.go @@ -65,7 +65,7 @@ func TestHandleDeleteCluster(t *testing.T) { // Get cluster from kubernetes and assert that the deletion timestamp was added assert.NilError(t, tClient.Get(ctx, client.ObjectKeyFromObject(cluster), cluster)) - assert.Check(t, !cluster.ObjectMeta.DeletionTimestamp.IsZero()) + assert.Check(t, !cluster.DeletionTimestamp.IsZero()) // Note: We must run handleDelete multiple times because we don't want to remove the // finalizer until we're sure that the cluster has been deleted from Bridge, so we @@ -107,7 +107,7 @@ func TestHandleDeleteCluster(t *testing.T) { // Get cluster from kubernetes and assert that the deletion timestamp was added assert.NilError(t, tClient.Get(ctx, client.ObjectKeyFromObject(cluster), cluster)) - assert.Check(t, !cluster.ObjectMeta.DeletionTimestamp.IsZero()) + assert.Check(t, !cluster.DeletionTimestamp.IsZero()) // Run handleDelete again to attempt to delete from Bridge, but provide bad api key cluster.Status.ID = "2345" diff --git a/internal/bridge/crunchybridgecluster/mock_bridge_api.go b/internal/bridge/crunchybridgecluster/mock_bridge_api.go index f0841dee44..f0439531d1 100644 --- a/internal/bridge/crunchybridgecluster/mock_bridge_api.go +++ b/internal/bridge/crunchybridgecluster/mock_bridge_api.go @@ -13,7 +13,6 @@ import ( "github.com/crunchydata/postgres-operator/internal/bridge" "github.com/crunchydata/postgres-operator/internal/initialize" - "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) diff --git a/internal/bridge/crunchybridgecluster/postgres.go b/internal/bridge/crunchybridgecluster/postgres.go index a1431ca93f..80096de91b 100644 --- a/internal/bridge/crunchybridgecluster/postgres.go +++ b/internal/bridge/crunchybridgecluster/postgres.go @@ -11,7 +11,6 @@ import ( "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -93,7 +92,7 @@ func (r *CrunchyBridgeClusterReconciler) reconcilePostgresRoleSecrets( // Make sure that this cluster's role secret names are not being used by any other // secrets in the namespace allSecretsInNamespace := &corev1.SecretList{} - err := errors.WithStack(r.Client.List(ctx, allSecretsInNamespace, client.InNamespace(cluster.Namespace))) + err := errors.WithStack(r.List(ctx, allSecretsInNamespace, client.InNamespace(cluster.Namespace))) if err != nil { return nil, nil, err } @@ -116,7 +115,7 @@ func (r *CrunchyBridgeClusterReconciler) reconcilePostgresRoleSecrets( selector, err := naming.AsSelector(naming.CrunchyBridgeClusterPostgresRoles(cluster.Name)) if err == nil { err = errors.WithStack( - r.Client.List(ctx, secrets, + r.List(ctx, secrets, client.InNamespace(cluster.Namespace), client.MatchingLabelsSelector{Selector: selector}, )) diff --git a/internal/bridge/crunchybridgecluster/postgres_test.go b/internal/bridge/crunchybridgecluster/postgres_test.go index e9454bd4ee..6fae4fe26a 100644 --- a/internal/bridge/crunchybridgecluster/postgres_test.go +++ b/internal/bridge/crunchybridgecluster/postgres_test.go @@ -8,12 +8,11 @@ import ( "context" "testing" - "sigs.k8s.io/controller-runtime/pkg/client" - "gotest.tools/v3/assert" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" "github.com/crunchydata/postgres-operator/internal/bridge" "github.com/crunchydata/postgres-operator/internal/testing/require" diff --git a/internal/bridge/installation_test.go b/internal/bridge/installation_test.go index 766233b8bb..f7a86e2d3a 100644 --- a/internal/bridge/installation_test.go +++ b/internal/bridge/installation_test.go @@ -99,7 +99,7 @@ func TestInstallationReconcile(t *testing.T) { reconciler.NewClient = func() *Client { c := NewClient(server.URL, "") - c.Backoff.Steps = 1 + c.Steps = 1 assert.Equal(t, c.BaseURL.String(), server.URL) return c } @@ -155,7 +155,7 @@ func TestInstallationReconcile(t *testing.T) { reconciler.NewClient = func() *Client { c := NewClient(server.URL, "") - c.Backoff.Steps = 1 + c.Steps = 1 assert.Equal(t, c.BaseURL.String(), server.URL) return c } @@ -289,7 +289,7 @@ func TestInstallationReconcile(t *testing.T) { reconciler.NewClient = func() *Client { c := NewClient(server.URL, "") - c.Backoff.Steps = 1 + c.Steps = 1 assert.Equal(t, c.BaseURL.String(), server.URL) return c } @@ -343,7 +343,7 @@ func TestInstallationReconcile(t *testing.T) { reconciler.NewClient = func() *Client { c := NewClient(server.URL, "") - c.Backoff.Steps = 1 + c.Steps = 1 assert.Equal(t, c.BaseURL.String(), server.URL) return c } @@ -426,7 +426,7 @@ func TestInstallationReconcile(t *testing.T) { reconciler.NewClient = func() *Client { c := NewClient(server.URL, "") - c.Backoff.Steps = 1 + c.Steps = 1 assert.Equal(t, c.BaseURL.String(), server.URL) return c } diff --git a/internal/collector/eq_pg16_fast_metrics.yaml b/internal/collector/eq_pg16_fast_metrics.yaml new file mode 100644 index 0000000000..855dc8a3d3 --- /dev/null +++ b/internal/collector/eq_pg16_fast_metrics.yaml @@ -0,0 +1,51 @@ +# This list of queries configures an OTel SQL Query Receiver to read pgMonitor +# metrics from Postgres. +# +# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries +# https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml + +# NOTE: Some of the columns below can return NULL values, for which sqlqueryreceiver will warn. +# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/sqlqueryreceiver#null-values +# Those columns are retained_bytes, database, and conflicting and we avoid NULL by using COALESCE. + - sql: > + SELECT + s.slot_name + , s.active::int + , COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_insert_lsn() END, s.restart_lsn), 0) AS retained_bytes + , COALESCE(s.database, '') + , s.slot_type + , COALESCE(s.conflicting::int, 0) + , 0 AS failover + , 0 AS synced + FROM pg_catalog.pg_replication_slots s; + metrics: + - metric_name: ccp_replication_slots_active + value_column: active + description: Active state of slot. 1 = true. 0 = false. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_retained_bytes + value_column: retained_bytes + description: The amount of WAL (in bytes) being retained for this slot + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_conflicting + value_column: conflicting + description: True if this logical slot conflicted with recovery (and so is now invalidated). When this column is true, check invalidation_reason column for the conflict reason. Always NULL for physical slots. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_failover + value_column: failover + description: True if this is a logical slot enabled to be synced to the standbys so that logical replication can be resumed from the new primary after failover. Always false for physical slots. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_synced + value_column: synced + description: True if this is a logical slot that was synced from a primary server. On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on the primary is default false for all slots but may (if leftover from a promoted standby) also be true. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" diff --git a/internal/collector/generated/.gitattributes b/internal/collector/generated/.gitattributes deleted file mode 100644 index 49e9f142dd..0000000000 --- a/internal/collector/generated/.gitattributes +++ /dev/null @@ -1,2 +0,0 @@ -# https://docs.github.com/en/repositories/working-with-files/managing-files/customizing-how-changed-files-appear-on-github -/*.json linguist-generated=true diff --git a/internal/collector/generated/eq_pg16_fast_metrics.json b/internal/collector/generated/eq_pg16_fast_metrics.json new file mode 100644 index 0000000000..a695d811d9 --- /dev/null +++ b/internal/collector/generated/eq_pg16_fast_metrics.json @@ -0,0 +1 @@ +[{"metrics":[{"attribute_columns":["database","slot_name","slot_type"],"description":"Active state of slot. 1 = true. 0 = false.","metric_name":"ccp_replication_slots_active","static_attributes":{"server":"localhost:5432"},"value_column":"active"},{"attribute_columns":["database","slot_name","slot_type"],"description":"The amount of WAL (in bytes) being retained for this slot","metric_name":"ccp_replication_slots_retained_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"retained_bytes"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this logical slot conflicted with recovery (and so is now invalidated). When this column is true, check invalidation_reason column for the conflict reason. Always NULL for physical slots.","metric_name":"ccp_replication_slots_conflicting","static_attributes":{"server":"localhost:5432"},"value_column":"conflicting"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot enabled to be synced to the standbys so that logical replication can be resumed from the new primary after failover. Always false for physical slots.","metric_name":"ccp_replication_slots_failover","static_attributes":{"server":"localhost:5432"},"value_column":"failover"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot that was synced from a primary server. On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on the primary is default false for all slots but may (if leftover from a promoted standby) also be true.","metric_name":"ccp_replication_slots_synced","static_attributes":{"server":"localhost:5432"},"value_column":"synced"}],"sql":"SELECT\n s.slot_name\n , s.active::int\n , COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_insert_lsn() END, s.restart_lsn), 0) AS retained_bytes\n , COALESCE(s.database, '')\n , s.slot_type\n , COALESCE(s.conflicting::int, 0)\n , 0 AS failover\n , 0 AS synced\nFROM pg_catalog.pg_replication_slots s;\n"}] diff --git a/internal/collector/generated/gte_pg16_metrics.json b/internal/collector/generated/gte_pg16_metrics.json deleted file mode 100644 index 3b27be7bc0..0000000000 --- a/internal/collector/generated/gte_pg16_metrics.json +++ /dev/null @@ -1 +0,0 @@ -[{"metrics":[{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been manually analyzed","metric_name":"ccp_stat_user_tables_analyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"analyze_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been analyzed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autoanalyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"autoanalyze_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been vacuumed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autovacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"autovacuum_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of index scans initiated on this table","metric_name":"ccp_stat_user_tables_idx_scan","static_attributes":{"server":"localhost:5432"},"value_column":"idx_scan"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of live rows fetched by index scans","metric_name":"ccp_stat_user_tables_idx_tup_fetch","static_attributes":{"server":"localhost:5432"},"value_column":"idx_tup_fetch"},{"attribute_columns":["dbname","relname","schemaname"],"description":"Estimated number of dead rows","metric_name":"ccp_stat_user_tables_n_dead_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_dead_tup"},{"attribute_columns":["dbname","relname","schemaname"],"description":"Estimated number of live rows","metric_name":"ccp_stat_user_tables_n_live_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_live_tup"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows deleted","metric_name":"ccp_stat_user_tables_n_tup_del","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_del"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows HOT updated (i.e., with no separate index update required)","metric_name":"ccp_stat_user_tables_n_tup_hot_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_hot_upd"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows inserted","metric_name":"ccp_stat_user_tables_n_tup_ins","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_ins"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows updated","metric_name":"ccp_stat_user_tables_n_tup_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_upd"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of sequential scans initiated on this table","metric_name":"ccp_stat_user_tables_seq_scan","static_attributes":{"server":"localhost:5432"},"value_column":"seq_scan"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of live rows fetched by sequential scans","metric_name":"ccp_stat_user_tables_seq_tup_read","static_attributes":{"server":"localhost:5432"},"value_column":"seq_tup_read"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been manually vacuumed (not counting VACUUM FULL)","metric_name":"ccp_stat_user_tables_vacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"vacuum_count"}],"sql":"SELECT\n current_database() as dbname\n , p.schemaname\n , p.relname\n , p.seq_scan\n , p.seq_tup_read\n , COALESCE(p.idx_scan, 0) AS idx_scan\n , COALESCE(p.idx_tup_fetch, 0) as idx_tup_fetch\n , p.n_tup_ins\n , p.n_tup_upd\n , p.n_tup_del\n , p.n_tup_hot_upd\n , p.n_tup_newpage_upd\n , p.n_live_tup\n , p.n_dead_tup\n , p.vacuum_count\n , p.autovacuum_count\n , p.analyze_count\n , p.autoanalyze_count\n FROM pg_catalog.pg_stat_user_tables p;\n"}] diff --git a/internal/collector/generated/gte_pg17_fast_metrics.json b/internal/collector/generated/gte_pg17_fast_metrics.json new file mode 100644 index 0000000000..9553e8c756 --- /dev/null +++ b/internal/collector/generated/gte_pg17_fast_metrics.json @@ -0,0 +1 @@ +[{"metrics":[{"data_type":"sum","description":"Number of write operations by background writers","metric_name":"ccp_stat_io_bgwriter_writes","static_attributes":{"server":"localhost:5432"},"value_column":"writes"},{"data_type":"sum","description":"Number of fsync calls by background writers","metric_name":"ccp_stat_io_bgwriter_fsyncs","static_attributes":{"server":"localhost:5432"},"value_column":"fsyncs"}],"sql":"SELECT\n s.writes\n , s.fsyncs\nFROM pg_catalog.pg_stat_io s WHERE backend_type = 'background writer';\n"},{"metrics":[{"description":"Number of scheduled checkpoints that have been performed","metric_name":"ccp_stat_checkpointer_num_timed","static_attributes":{"server":"localhost:5432"},"value_column":"num_timed"},{"description":"Number of requested checkpoints that have been performed","metric_name":"ccp_stat_checkpointer_num_requested","static_attributes":{"server":"localhost:5432"},"value_column":"num_requested"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds","metric_name":"ccp_stat_checkpointer_write_time","static_attributes":{"server":"localhost:5432"},"value_column":"write_time","value_type":"double"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds","metric_name":"ccp_stat_checkpointer_sync_time","static_attributes":{"server":"localhost:5432"},"value_column":"sync_time"},{"description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_checkpointer_buffers_written","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT\n c.num_timed\n , c.num_requested\n , c.write_time\n , c.sync_time\n , c.buffers_written\nFROM pg_catalog.pg_stat_checkpointer c;\n"},{"metrics":[{"attribute_columns":["database","slot_name","slot_type"],"description":"Active state of slot. 1 = true. 0 = false.","metric_name":"ccp_replication_slots_active","static_attributes":{"server":"localhost:5432"},"value_column":"active"},{"attribute_columns":["database","slot_name","slot_type"],"description":"The amount of WAL (in bytes) being retained for this slot","metric_name":"ccp_replication_slots_retained_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"retained_bytes"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this logical slot conflicted with recovery (and so is now invalidated). When this column is true, check invalidation_reason column for the conflict reason. Always NULL for physical slots.","metric_name":"ccp_replication_slots_conflicting","static_attributes":{"server":"localhost:5432"},"value_column":"conflicting"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot enabled to be synced to the standbys so that logical replication can be resumed from the new primary after failover. Always false for physical slots.","metric_name":"ccp_replication_slots_failover","static_attributes":{"server":"localhost:5432"},"value_column":"failover"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot that was synced from a primary server. On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on the primary is default false for all slots but may (if leftover from a promoted standby) also be true.","metric_name":"ccp_replication_slots_synced","static_attributes":{"server":"localhost:5432"},"value_column":"synced"}],"sql":"SELECT\n s.slot_name\n , s.active::int\n , COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_insert_lsn() END, s.restart_lsn), 0) AS retained_bytes\n , COALESCE(s.database, '')\n , s.slot_type\n , COALESCE(s.conflicting::int, 0)\n , COALESCE(s.failover::int, 0)\n , COALESCE(s.synced::int, 0)\nFROM pg_catalog.pg_replication_slots s;\n"}] diff --git a/internal/collector/generated/gte_pg17_metrics.json b/internal/collector/generated/gte_pg17_metrics.json deleted file mode 100644 index 563abf01b3..0000000000 --- a/internal/collector/generated/gte_pg17_metrics.json +++ /dev/null @@ -1 +0,0 @@ -[{"metrics":[{"data_type":"sum","description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_bgwriter_buffers_checkpoint","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT c.buffers_written FROM pg_catalog.pg_stat_checkpointer c;\n"},{"metrics":[{"data_type":"sum","description":"Number of write operations, each of the size specified in op_bytes.","metric_name":"ccp_stat_bgwriter_buffers_backend","static_attributes":{"server":"localhost:5432"},"value_column":"writes"},{"data_type":"sum","description":"Number of fsync calls. These are only tracked in context normal.","metric_name":"ccp_stat_bgwriter_buffers_backend_fsync","static_attributes":{"server":"localhost:5432"},"value_column":"fsyncs"}],"sql":"SELECT\n s.writes\n , s.fsyncs\nFROM pg_catalog.pg_stat_io s WHERE backend_type = 'background writer';\n"},{"metrics":[{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds","metric_name":"ccp_stat_bgwriter_checkpoint_sync_time","static_attributes":{"server":"localhost:5432"},"value_column":"sync_time"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds","metric_name":"ccp_stat_bgwriter_checkpoint_write_time","static_attributes":{"server":"localhost:5432"},"value_column":"write_time","value_type":"double"},{"description":"Number of requested checkpoints that have been performed","metric_name":"ccp_stat_bgwriter_checkpoints_req","static_attributes":{"server":"localhost:5432"},"value_column":"num_requested"},{"description":"Number of scheduled checkpoints that have been performed","metric_name":"ccp_stat_bgwriter_checkpoints_timed","static_attributes":{"server":"localhost:5432"},"value_column":"num_timed"},{"description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_checkpointer_buffers_written","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT\n c.num_timed\n , c.num_requested\n , c.write_time\n , c.sync_time\n , c.buffers_written\nFROM pg_catalog.pg_stat_checkpointer c;\n"}] diff --git a/internal/collector/generated/lt_pg16_fast_metrics.json b/internal/collector/generated/lt_pg16_fast_metrics.json new file mode 100644 index 0000000000..dcd1d5fe77 --- /dev/null +++ b/internal/collector/generated/lt_pg16_fast_metrics.json @@ -0,0 +1 @@ +[{"metrics":[{"attribute_columns":["database","slot_name","slot_type"],"description":"Active state of slot. 1 = true. 0 = false.","metric_name":"ccp_replication_slots_active","static_attributes":{"server":"localhost:5432"},"value_column":"active"},{"attribute_columns":["database","slot_name","slot_type"],"description":"The amount of WAL (in bytes) being retained for this slot","metric_name":"ccp_replication_slots_retained_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"retained_bytes"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this logical slot conflicted with recovery (and so is now invalidated). When this column is true, check invalidation_reason column for the conflict reason. Always NULL for physical slots.","metric_name":"ccp_replication_slots_conflicting","static_attributes":{"server":"localhost:5432"},"value_column":"conflicting"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot enabled to be synced to the standbys so that logical replication can be resumed from the new primary after failover. Always false for physical slots.","metric_name":"ccp_replication_slots_failover","static_attributes":{"server":"localhost:5432"},"value_column":"failover"},{"attribute_columns":["database","slot_name","slot_type"],"description":"True if this is a logical slot that was synced from a primary server. On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on the primary is default false for all slots but may (if leftover from a promoted standby) also be true.","metric_name":"ccp_replication_slots_synced","static_attributes":{"server":"localhost:5432"},"value_column":"synced"}],"sql":"SELECT\n s.slot_name\n , s.active::int\n , COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_insert_lsn() END, s.restart_lsn), 0) AS retained_bytes\n , COALESCE(s.database, '')\n , s.slot_type\n , 0 AS conflicting\n , 0 AS failover\n , 0 AS synced\nFROM pg_catalog.pg_replication_slots s;\n"}] diff --git a/internal/collector/generated/lt_pg16_metrics.json b/internal/collector/generated/lt_pg16_metrics.json deleted file mode 100644 index 98bb0cc213..0000000000 --- a/internal/collector/generated/lt_pg16_metrics.json +++ /dev/null @@ -1 +0,0 @@ -[{"metrics":[{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been manually analyzed","metric_name":"ccp_stat_user_tables_analyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"analyze_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been analyzed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autoanalyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"autoanalyze_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been vacuumed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autovacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"autovacuum_count"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of index scans initiated on this table","metric_name":"ccp_stat_user_tables_idx_scan","static_attributes":{"server":"localhost:5432"},"value_column":"idx_scan"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of live rows fetched by index scans","metric_name":"ccp_stat_user_tables_idx_tup_fetch","static_attributes":{"server":"localhost:5432"},"value_column":"idx_tup_fetch"},{"attribute_columns":["dbname","relname","schemaname"],"description":"Estimated number of dead rows","metric_name":"ccp_stat_user_tables_n_dead_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_dead_tup"},{"attribute_columns":["dbname","relname","schemaname"],"description":"Estimated number of live rows","metric_name":"ccp_stat_user_tables_n_live_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_live_tup"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows deleted","metric_name":"ccp_stat_user_tables_n_tup_del","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_del"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows HOT updated (i.e., with no separate index update required)","metric_name":"ccp_stat_user_tables_n_tup_hot_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_hot_upd"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows inserted","metric_name":"ccp_stat_user_tables_n_tup_ins","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_ins"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of rows updated","metric_name":"ccp_stat_user_tables_n_tup_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_upd"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of sequential scans initiated on this table","metric_name":"ccp_stat_user_tables_seq_scan","static_attributes":{"server":"localhost:5432"},"value_column":"seq_scan"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of live rows fetched by sequential scans","metric_name":"ccp_stat_user_tables_seq_tup_read","static_attributes":{"server":"localhost:5432"},"value_column":"seq_tup_read"},{"attribute_columns":["dbname","relname","schemaname"],"data_type":"sum","description":"Number of times this table has been manually vacuumed (not counting VACUUM FULL)","metric_name":"ccp_stat_user_tables_vacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"vacuum_count"}],"sql":"SELECT\n current_database() as dbname\n , p.schemaname\n , p.relname\n , p.seq_scan\n , p.seq_tup_read\n , COALESCE(p.idx_scan, 0) AS idx_scan\n , COALESCE(p.idx_tup_fetch, 0) as idx_tup_fetch\n , p.n_tup_ins\n , p.n_tup_upd\n , p.n_tup_del\n , p.n_tup_hot_upd\n , 0::bigint AS n_tup_newpage_upd\n , p.n_live_tup\n , p.n_dead_tup\n , p.vacuum_count\n , p.autovacuum_count\n , p.analyze_count\n , p.autoanalyze_count\nFROM pg_catalog.pg_stat_user_tables p;\n"}] diff --git a/internal/collector/generated/lt_pg17_fast_metrics.json b/internal/collector/generated/lt_pg17_fast_metrics.json new file mode 100644 index 0000000000..55b6ca78fc --- /dev/null +++ b/internal/collector/generated/lt_pg17_fast_metrics.json @@ -0,0 +1 @@ +[{"metrics":[{"data_type":"sum","description":"Number of write operations by background writers","metric_name":"ccp_stat_io_bgwriter_writes","static_attributes":{"server":"localhost:5432"},"value_column":"writes"},{"data_type":"sum","description":"Number of fsync calls by background writers","metric_name":"ccp_stat_io_bgwriter_fsyncs","static_attributes":{"server":"localhost:5432"},"value_column":"fsyncs"}],"sql":"SELECT\n s.buffers_backend AS writes\n , s.buffers_backend_fsync AS fsyncs\nFROM pg_catalog.pg_stat_bgwriter s;\n"},{"metrics":[{"description":"Number of scheduled checkpoints that have been performed","metric_name":"ccp_stat_checkpointer_num_timed","static_attributes":{"server":"localhost:5432"},"value_column":"num_timed"},{"description":"Number of requested checkpoints that have been performed","metric_name":"ccp_stat_checkpointer_num_requested","static_attributes":{"server":"localhost:5432"},"value_column":"num_requested"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds","metric_name":"ccp_stat_checkpointer_write_time","static_attributes":{"server":"localhost:5432"},"value_column":"write_time","value_type":"double"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds","metric_name":"ccp_stat_checkpointer_sync_time","static_attributes":{"server":"localhost:5432"},"value_column":"sync_time"},{"description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_checkpointer_buffers_written","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT\n c.checkpoints_timed AS num_timed\n , c.checkpoints_req AS num_requested\n , c.checkpoint_write_time AS write_time\n , c.checkpoint_sync_time AS sync_time\n , c.buffers_checkpoint AS buffers_written\nFROM pg_catalog.pg_stat_bgwriter c;\n"}] diff --git a/internal/collector/generated/lt_pg17_metrics.json b/internal/collector/generated/lt_pg17_metrics.json deleted file mode 100644 index d6266ffacb..0000000000 --- a/internal/collector/generated/lt_pg17_metrics.json +++ /dev/null @@ -1 +0,0 @@ -[{"metrics":[{"data_type":"sum","description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_bgwriter_buffers_checkpoint","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT c.buffers_checkpoint AS buffers_written FROM pg_catalog.pg_stat_bgwriter c;\n"},{"metrics":[{"data_type":"sum","description":"Number of write operations, each of the size specified in op_bytes.","metric_name":"ccp_stat_bgwriter_buffers_backend","static_attributes":{"server":"localhost:5432"},"value_column":"writes"},{"data_type":"sum","description":"Number of fsync calls. These are only tracked in context normal.","metric_name":"ccp_stat_bgwriter_buffers_backend_fsync","static_attributes":{"server":"localhost:5432"},"value_column":"fsyncs"}],"sql":"SELECT\n s.buffers_backend AS writes\n , s.buffers_backend_fsync AS fsyncs\nFROM pg_catalog.pg_stat_bgwriter s;\n"},{"metrics":[{"description":"Number of scheduled checkpoints that have been performed","metric_name":"ccp_stat_bgwriter_checkpoints_timed","static_attributes":{"server":"localhost:5432"},"value_column":"num_timed"},{"description":"Number of requested checkpoints that have been performed","metric_name":"ccp_stat_bgwriter_checkpoints_req","static_attributes":{"server":"localhost:5432"},"value_column":"num_requested"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds","metric_name":"ccp_stat_bgwriter_checkpoint_write_time","static_attributes":{"server":"localhost:5432"},"value_column":"write_time","value_type":"double"},{"description":"Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds","metric_name":"ccp_stat_bgwriter_checkpoint_sync_time","static_attributes":{"server":"localhost:5432"},"value_column":"sync_time"},{"description":"Number of buffers written during checkpoints and restartpoints","metric_name":"ccp_stat_checkpointer_buffers_written","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_written"}],"sql":"SELECT\n c.checkpoints_timed AS num_timed\n , c.checkpoints_req AS num_requested\n , c.checkpoint_write_time AS write_time\n , c.checkpoint_sync_time AS sync_time\n , c.buffers_checkpoint AS buffers_written\nFROM pg_catalog.pg_stat_bgwriter c;\n"}] diff --git a/internal/collector/generated/pgbackrest_logs_transforms.json b/internal/collector/generated/pgbackrest_logs_transforms.json index adf3b09af9..3f8cf5137a 100644 --- a/internal/collector/generated/pgbackrest_logs_transforms.json +++ b/internal/collector/generated/pgbackrest_logs_transforms.json @@ -1 +1 @@ -[{"context":"log","statements":["set(instrumentation_scope.name, \"pgbackrest\")","set(instrumentation_scope.schema_url, \"https://opentelemetry.io/schemas/1.29.0\")","merge_maps(cache, ExtractPatterns(body, \"^(?\u003ctimestamp\u003e\\\\d{4}-\\\\d{2}-\\\\d{2} \\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d{3}) (?\u003cprocess_id\u003eP\\\\d{2,3})\\\\s*(?\u003cerror_severity\u003e\\\\S*): (?\u003cmessage\u003e(?s).*)$\"), \"insert\") where Len(body) \u003e 0","set(severity_text, cache[\"error_severity\"]) where IsString(cache[\"error_severity\"])","set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == \"TRACE\"","set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == \"DEBUG\"","set(severity_number, SEVERITY_NUMBER_DEBUG2) where severity_text == \"DETAIL\"","set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == \"INFO\"","set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == \"WARN\"","set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == \"ERROR\"","set(time, Time(cache[\"timestamp\"], \"%Y-%m-%d %H:%M:%S.%L\")) where IsString(cache[\"timestamp\"])","set(attributes[\"process.pid\"], cache[\"process_id\"])","set(attributes[\"log.record.original\"], body)","set(body, cache[\"message\"])"]}] +[{"statements":["set(instrumentation_scope.name, \"pgbackrest\")","set(instrumentation_scope.schema_url, \"https://opentelemetry.io/schemas/1.29.0\")","merge_maps(log.cache, ExtractPatterns(log.body, \"^(?\u003ctimestamp\u003e\\\\d{4}-\\\\d{2}-\\\\d{2} \\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d{3}) (?\u003cprocess_id\u003eP\\\\d{2,3})\\\\s*(?\u003cerror_severity\u003e\\\\S*): (?\u003cmessage\u003e(?s).*)$\"), \"insert\") where Len(log.body) \u003e 0","set(log.severity_text, log.cache[\"error_severity\"]) where IsString(log.cache[\"error_severity\"])","set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == \"TRACE\"","set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == \"DEBUG\"","set(log.severity_number, SEVERITY_NUMBER_DEBUG2) where log.severity_text == \"DETAIL\"","set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == \"INFO\"","set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == \"WARN\"","set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == \"ERROR\"","set(log.time, Time(log.cache[\"timestamp\"], \"%Y-%m-%d %H:%M:%S.%L\")) where IsString(log.cache[\"timestamp\"])","set(log.attributes[\"process.pid\"], log.cache[\"process_id\"])","set(log.attributes[\"log.record.original\"], log.body)","set(log.body, log.cache[\"message\"])"]}] diff --git a/internal/collector/generated/pgbackrest_metrics.json b/internal/collector/generated/pgbackrest_metrics.json deleted file mode 100644 index 63114afc03..0000000000 --- a/internal/collector/generated/pgbackrest_metrics.json +++ /dev/null @@ -1 +0,0 @@ -[{"metrics":[{"attribute_columns":["repo"],"description":"Seconds since the last completed full or differential backup. Differential is always based off last full.","metric_name":"ccp_backrest_last_diff_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_diff_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full backup","metric_name":"ccp_backrest_last_full_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_full_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full, differential or incremental backup.\nIncremental is always based off last full or differential.\n","metric_name":"ccp_backrest_last_incr_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_incr_backup"},{"attribute_columns":["backup_type","repo"],"description":"pgBackRest version number when this backup was performed","metric_name":"ccp_backrest_last_info_backrest_repo_version","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backrest_repo_version"},{"attribute_columns":["backup_type","repo"],"description":"An error has been encountered in the backup. Check logs for more information.","metric_name":"ccp_backrest_last_info_backup_error","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backup_error"},{"attribute_columns":["backup_type","repo"],"description":"Total runtime in seconds of this backup","metric_name":"ccp_backrest_last_info_backup_runtime_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"backup_runtime_seconds"},{"attribute_columns":["backup_type","repo"],"description":"Actual size of only this individual backup in the pgbackrest repository","metric_name":"ccp_backrest_last_info_repo_backup_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_backup_size_bytes"},{"attribute_columns":["backup_type","repo"],"description":"Total size of this backup in the pgbackrest repository, including all required previous backups and WAL","metric_name":"ccp_backrest_last_info_repo_total_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_total_size_bytes"},{"attribute_columns":["repo"],"description":"Seconds since the oldest completed full backup","metric_name":"ccp_backrest_oldest_full_backup_time_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"oldest_full_backup"}],"sql":"SELECT * FROM get_pgbackrest_info();\n"}] diff --git a/internal/collector/generated/pgbouncer_metrics_queries.json b/internal/collector/generated/pgbouncer_metrics_queries.json index 0248051d94..21ebb140bc 100644 --- a/internal/collector/generated/pgbouncer_metrics_queries.json +++ b/internal/collector/generated/pgbouncer_metrics_queries.json @@ -1 +1 @@ -[{"metrics":[{"attribute_columns":["database","user","state","application_name","link"],"description":"Current waiting time in seconds","metric_name":"ccp_pgbouncer_clients_wait_seconds","value_column":"wait"}],"sql":"SHOW CLIENTS"},{"metrics":[{"attribute_columns":["name","port","database","force_user","pool_mode"],"description":"Maximum number of server connections","metric_name":"ccp_pgbouncer_databases_pool_size","value_column":"pool_size"},{"attribute_columns":["name","port","database","force_user","pool_mode"],"description":"Minimum number of server connections","metric_name":"ccp_pgbouncer_databases_min_pool_size","value_column":"min_pool_size"},{"attribute_columns":["name","port","database","force_user","pool_mode"],"description":"Maximum number of additional connections for this database","metric_name":"ccp_pgbouncer_databases_reserve_pool","value_column":"reserve_pool"},{"attribute_columns":["name","port","database","force_user","pool_mode"],"description":"Maximum number of allowed connections for this database, as set by max_db_connections, either globally or per database","metric_name":"ccp_pgbouncer_databases_max_connections","value_column":"max_connections"},{"attribute_columns":["name","port","database","force_user","pool_mode"],"description":"Current number of connections for this database","metric_name":"ccp_pgbouncer_databases_current_connections","value_column":"current_connections"},{"attribute_columns":["name","port","database","force_user","pool_mode"],"description":"1 if this database is currently paused, else 0","metric_name":"ccp_pgbouncer_databases_paused","value_column":"paused"},{"attribute_columns":["name","port","database","force_user","pool_mode"],"description":"1 if this database is currently disabled, else 0","metric_name":"ccp_pgbouncer_databases_disabled","value_column":"disabled"}],"sql":"SHOW DATABASES"},{"metrics":[{"attribute_columns":["list"],"description":"Count of items registered with pgBouncer","metric_name":"ccp_pgbouncer_lists_item_count","value_column":"items"}],"sql":"SHOW LISTS"},{"metrics":[{"attribute_columns":["database","user"],"description":"Client connections that are either linked to server connections or are idle with no queries waiting to be processed","metric_name":"ccp_pgbouncer_pools_client_active","value_column":"cl_active"},{"attribute_columns":["database","user"],"description":"Client connections that have sent queries but have not yet got a server connection","metric_name":"ccp_pgbouncer_pools_client_waiting","value_column":"cl_waiting"},{"attribute_columns":["database","user"],"description":"Server connections that are linked to a client","metric_name":"ccp_pgbouncer_pools_server_active","value_column":"sv_active"},{"attribute_columns":["database","user"],"description":"Server connections that are unused and immediately usable for client queries","metric_name":"ccp_pgbouncer_pools_server_idle","value_column":"sv_idle"},{"attribute_columns":["database","user"],"description":"Server connections that have been idle for more than server_check_delay, so they need server_check_query to run on them before they can be used again","metric_name":"ccp_pgbouncer_pools_server_used","value_column":"sv_used"}],"sql":"SHOW POOLS"},{"metrics":[{"attribute_columns":["database","user","state","application_name","link"],"description":"1 if the connection will be closed as soon as possible, because a configuration file reload or DNS update changed the connection information or RECONNECT was issued","metric_name":"ccp_pgbouncer_servers_close_needed","value_column":"close_needed"}],"sql":"SHOW SERVERS"}] +[{"metrics":[{"attribute_columns":["database","user","state","application_name","link"],"description":"Current waiting time in seconds","metric_name":"ccp_pgbouncer_clients_wait_seconds","value_column":"wait"}],"sql":"SHOW CLIENTS;"},{"metrics":[{"attribute_columns":["name","port","database"],"description":"Maximum number of server connections","metric_name":"ccp_pgbouncer_databases_pool_size","value_column":"pool_size"},{"attribute_columns":["name","port","database"],"description":"Minimum number of server connections","metric_name":"ccp_pgbouncer_databases_min_pool_size","value_column":"min_pool_size"},{"attribute_columns":["name","port","database"],"description":"Maximum number of additional connections for this database","metric_name":"ccp_pgbouncer_databases_reserve_pool_size","value_column":"reserve_pool_size"},{"attribute_columns":["name","port","database"],"description":"Maximum number of allowed connections for this database, as set by max_db_connections, either globally or per database","metric_name":"ccp_pgbouncer_databases_max_connections","value_column":"max_connections"},{"attribute_columns":["name","port","database"],"description":"Current number of connections for this database","metric_name":"ccp_pgbouncer_databases_current_connections","value_column":"current_connections"},{"attribute_columns":["name","port","database"],"description":"1 if this database is currently paused, else 0","metric_name":"ccp_pgbouncer_databases_paused","value_column":"paused"},{"attribute_columns":["name","port","database"],"description":"1 if this database is currently disabled, else 0","metric_name":"ccp_pgbouncer_databases_disabled","value_column":"disabled"}],"sql":"SHOW DATABASES;"},{"metrics":[{"attribute_columns":["list"],"description":"Count of items registered with pgBouncer","metric_name":"ccp_pgbouncer_lists_item_count","value_column":"items"}],"sql":"SHOW LISTS;"},{"metrics":[{"attribute_columns":["database","user"],"description":"Client connections that are either linked to server connections or are idle with no queries waiting to be processed","metric_name":"ccp_pgbouncer_pools_client_active","value_column":"cl_active"},{"attribute_columns":["database","user"],"description":"Client connections that have sent queries but have not yet got a server connection","metric_name":"ccp_pgbouncer_pools_client_waiting","value_column":"cl_waiting"},{"attribute_columns":["database","user"],"description":"Server connections that are linked to a client","metric_name":"ccp_pgbouncer_pools_server_active","value_column":"sv_active"},{"attribute_columns":["database","user"],"description":"Server connections that are unused and immediately usable for client queries","metric_name":"ccp_pgbouncer_pools_server_idle","value_column":"sv_idle"},{"attribute_columns":["database","user"],"description":"Server connections that have been idle for more than server_check_delay, so they need server_check_query to run on them before they can be used again","metric_name":"ccp_pgbouncer_pools_server_used","value_column":"sv_used"}],"sql":"SHOW POOLS;"},{"metrics":[{"attribute_columns":["database","user","state","application_name","link"],"description":"1 if the connection will be closed as soon as possible, because a configuration file reload or DNS update changed the connection information or RECONNECT was issued","metric_name":"ccp_pgbouncer_servers_close_needed","value_column":"close_needed"}],"sql":"SHOW SERVERS;"}] diff --git a/internal/collector/generated/postgres_5m_metrics.json b/internal/collector/generated/postgres_5m_metrics.json index a9a3500a02..f8f73cdde5 100644 --- a/internal/collector/generated/postgres_5m_metrics.json +++ b/internal/collector/generated/postgres_5m_metrics.json @@ -1 +1 @@ -[{"metrics":[{"attribute_columns":["dbname"],"description":"Database size in bytes","metric_name":"ccp_database_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes"}],"sql":"SELECT datname as dbname , pg_database_size(datname) as bytes FROM pg_catalog.pg_database WHERE datistemplate = false;\n"},{"metrics":[{"description":"Count of sequences that have reached greater than or equal to 75% of their max available numbers.\nFunction monitor.sequence_status() can provide more details if run directly on system.\n","metric_name":"ccp_sequence_exhaustion_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT count(*) AS count FROM (\n SELECT CEIL((s.max_value-min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS slots\n , CEIL((COALESCE(s.last_value,s.min_value)-s.min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS used\n FROM pg_catalog.pg_sequences s\n) x WHERE (ROUND(used/slots*100)::int) \u003e 75;\n"},{"metrics":[{"attribute_columns":["dbname"],"description":"Number of times disk blocks were found already in the buffer cache, so that a read was not necessary","metric_name":"ccp_stat_database_blks_hit","static_attributes":{"server":"localhost:5432"},"value_column":"blks_hit"},{"attribute_columns":["dbname"],"description":"Number of disk blocks read in this database","metric_name":"ccp_stat_database_blks_read","static_attributes":{"server":"localhost:5432"},"value_column":"blks_read"},{"attribute_columns":["dbname"],"description":"Number of queries canceled due to conflicts with recovery in this database","metric_name":"ccp_stat_database_conflicts","static_attributes":{"server":"localhost:5432"},"value_column":"conflicts"},{"attribute_columns":["dbname"],"description":"Number of deadlocks detected in this database","metric_name":"ccp_stat_database_deadlocks","static_attributes":{"server":"localhost:5432"},"value_column":"deadlocks"},{"attribute_columns":["dbname"],"description":"Total amount of data written to temporary files by queries in this database","metric_name":"ccp_stat_database_temp_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"temp_bytes"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_temp_files","static_attributes":{"server":"localhost:5432"},"value_column":"temp_files"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_tup_deleted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_deleted"},{"attribute_columns":["dbname"],"description":"Number of rows fetched by queries in this database","metric_name":"ccp_stat_database_tup_fetched","static_attributes":{"server":"localhost:5432"},"value_column":"tup_fetched"},{"attribute_columns":["dbname"],"description":"Number of rows inserted by queries in this database","metric_name":"ccp_stat_database_tup_inserted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_inserted"},{"attribute_columns":["dbname"],"description":"Number of rows returned by queries in this database","metric_name":"ccp_stat_database_tup_returned","static_attributes":{"server":"localhost:5432"},"value_column":"tup_returned"},{"attribute_columns":["dbname"],"description":"Number of rows updated by queries in this database","metric_name":"ccp_stat_database_tup_updated","static_attributes":{"server":"localhost:5432"},"value_column":"tup_updated"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been committed","metric_name":"ccp_stat_database_xact_commit","static_attributes":{"server":"localhost:5432"},"value_column":"xact_commit"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been rolled back","metric_name":"ccp_stat_database_xact_rollback","static_attributes":{"server":"localhost:5432"},"value_column":"xact_rollback"}],"sql":"SELECT s.datname AS dbname , s.xact_commit , s.xact_rollback , s.blks_read , s.blks_hit , s.tup_returned , s.tup_fetched , s.tup_inserted , s.tup_updated , s.tup_deleted , s.conflicts , s.temp_files , s.temp_bytes , s.deadlocks FROM pg_catalog.pg_stat_database s JOIN pg_catalog.pg_database d ON d.datname = s.datname WHERE d.datistemplate = false;\n"}] +[{"metrics":[{"attribute_columns":["dbname"],"description":"Database size in bytes","metric_name":"ccp_database_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes"}],"sql":"SELECT datname as dbname , pg_catalog.pg_database_size(datname) as bytes FROM pg_catalog.pg_database WHERE datistemplate = false;\n"},{"metrics":[{"description":"Count of sequences that have reached greater than or equal to 75% of their max available numbers.\nFunction monitor.sequence_status() can provide more details if run directly on system.\n","metric_name":"ccp_sequence_exhaustion_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT count(*) AS count FROM (\n SELECT CEIL((s.max_value-min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS slots\n , CEIL((COALESCE(s.last_value,s.min_value)-s.min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS used\n FROM pg_catalog.pg_sequences s\n) x WHERE (ROUND(used/slots*100)::int) \u003e 75;\n"},{"metrics":[{"description":"Value of checksum monitoring status for pg_catalog.pg_hba_file_rules (pg_hba.conf).\n0 = valid config. 1 = settings changed. \nSettings history is available for review in the table `monitor.pg_hba_checksum`.\nTo reset current config to valid after alert, run monitor.pg_hba_checksum_set_valid(). Note this will clear the history table.\n","metric_name":"ccp_pg_hba_checksum_status","static_attributes":{"server":"localhost:5432"},"value_column":"status"}],"sql":"SELECT monitor.pg_hba_checksum() AS status;"}] diff --git a/internal/collector/generated/postgres_5m_per_db_metrics.json b/internal/collector/generated/postgres_5m_per_db_metrics.json new file mode 100644 index 0000000000..0478569504 --- /dev/null +++ b/internal/collector/generated/postgres_5m_per_db_metrics.json @@ -0,0 +1 @@ +[{"metrics":[{"attribute_columns":["dbname","schemaname","relname"],"description":"Table size in bytes including indexes","metric_name":"ccp_table_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes","value_type":"double"}],"sql":"SELECT current_database() as dbname , n.nspname as schemaname , c.relname , pg_catalog.pg_total_relation_size(c.oid) as bytes FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid WHERE NOT pg_is_other_temp_schema(n.oid) AND relkind IN ('r', 'm', 'f');\n"},{"metrics":[{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of sequential scans initiated on this table","metric_name":"ccp_stat_user_tables_seq_scan","static_attributes":{"server":"localhost:5432"},"value_column":"seq_scan"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of live rows fetched by sequential scans","metric_name":"ccp_stat_user_tables_seq_tup_read","static_attributes":{"server":"localhost:5432"},"value_column":"seq_tup_read"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of index scans initiated on this table","metric_name":"ccp_stat_user_tables_idx_scan","static_attributes":{"server":"localhost:5432"},"value_column":"idx_scan"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of live rows fetched by index scans","metric_name":"ccp_stat_user_tables_idx_tup_fetch","static_attributes":{"server":"localhost:5432"},"value_column":"idx_tup_fetch"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of rows inserted","metric_name":"ccp_stat_user_tables_n_tup_ins","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_ins"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of rows updated","metric_name":"ccp_stat_user_tables_n_tup_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_upd"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of rows deleted","metric_name":"ccp_stat_user_tables_n_tup_del","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_del"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of rows HOT updated (i.e., with no separate index update required)","metric_name":"ccp_stat_user_tables_n_tup_hot_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_hot_upd"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of rows updated where the successor version goes onto a new heap page, leaving behind an original version with a t_ctid field that points to a different heap page. These are always non-HOT updates.","metric_name":"ccp_stat_user_tables_n_tup_newpage_upd","static_attributes":{"server":"localhost:5432"},"value_column":"n_tup_newpage_upd"},{"attribute_columns":["dbname","schemaname","relname"],"description":"Estimated number of live rows","metric_name":"ccp_stat_user_tables_n_live_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_live_tup"},{"attribute_columns":["dbname","schemaname","relname"],"description":"Estimated number of dead rows","metric_name":"ccp_stat_user_tables_n_dead_tup","static_attributes":{"server":"localhost:5432"},"value_column":"n_dead_tup"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of times this table has been manually vacuumed (not counting VACUUM FULL)","metric_name":"ccp_stat_user_tables_vacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"vacuum_count"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of times this table has been vacuumed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autovacuum_count","static_attributes":{"server":"localhost:5432"},"value_column":"autovacuum_count"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of times this table has been manually analyzed","metric_name":"ccp_stat_user_tables_analyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"analyze_count"},{"attribute_columns":["dbname","schemaname","relname"],"data_type":"sum","description":"Number of times this table has been analyzed by the autovacuum daemon","metric_name":"ccp_stat_user_tables_autoanalyze_count","static_attributes":{"server":"localhost:5432"},"value_column":"autoanalyze_count"}],"sql":"SELECT current_database() as dbname , p.schemaname , p.relname , p.seq_scan , p.seq_tup_read , COALESCE(p.idx_scan, 0) AS idx_scan , COALESCE(p.idx_tup_fetch, 0) as idx_tup_fetch , p.n_tup_ins , p.n_tup_upd , p.n_tup_del , p.n_tup_hot_upd , CASE\n WHEN current_setting('server_version_num')::int \u003e= 160000 \n THEN p.n_tup_newpage_upd\n ELSE 0::bigint\n END AS n_tup_newpage_upd\n, p.n_live_tup , p.n_dead_tup , p.vacuum_count , p.autovacuum_count , p.analyze_count , p.autoanalyze_count FROM pg_catalog.pg_stat_user_tables p;\n"}] diff --git a/internal/collector/generated/postgres_5s_metrics.json b/internal/collector/generated/postgres_5s_metrics.json index 484c99dfa0..dda612ae59 100644 --- a/internal/collector/generated/postgres_5s_metrics.json +++ b/internal/collector/generated/postgres_5s_metrics.json @@ -1 +1 @@ -[{"metrics":[{"attribute_columns":["application_name","datname","state","usename"],"description":"number of connections in this state","metric_name":"ccp_pg_stat_activity_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT\n pg_database.datname,\n tmp.state,\n COALESCE(tmp2.usename, '') as usename,\n COALESCE(tmp2.application_name, '') as application_name,\n COALESCE(count,0) as count,\n COALESCE(max_tx_duration,0) as max_tx_duration\nFROM\n (\n VALUES ('active'),\n ('idle'),\n ('idle in transaction'),\n ('idle in transaction (aborted)'),\n ('fastpath function call'),\n ('disabled')\n ) AS tmp(state) CROSS JOIN pg_database\nLEFT JOIN (\n SELECT\n datname,\n state,\n usename,\n application_name,\n count(*) AS count,\n MAX(EXTRACT(EPOCH FROM now() - xact_start))::float AS max_tx_duration\n FROM pg_stat_activity GROUP BY datname,state,usename,application_name) AS tmp2\n ON tmp.state = tmp2.state AND pg_database.datname = tmp2.datname;\n"},{"metrics":[{"description":"Seconds since the last successful archive operation","metric_name":"ccp_archive_command_status_seconds_since_last_archive","static_attributes":{"server":"localhost:5432"},"value_column":"seconds_since_last_archive","value_type":"double"}],"sql":"SELECT COALESCE(EXTRACT(epoch from (CURRENT_TIMESTAMP - last_archived_time)), 0) AS seconds_since_last_archive FROM pg_catalog.pg_stat_archiver;\n"},{"metrics":[{"description":"Number of WAL files that have been successfully archived","metric_name":"ccp_archive_command_status_archived_count","static_attributes":{"server":"localhost:5432"},"value_column":"archived_count"}],"sql":"SELECT archived_count FROM pg_catalog.pg_stat_archiver\n"},{"metrics":[{"description":"Number of failed attempts for archiving WAL files","metric_name":"ccp_archive_command_status_failed_count","static_attributes":{"server":"localhost:5432"},"value_column":"failed_count"}],"sql":"SELECT failed_count FROM pg_catalog.pg_stat_archiver\n"},{"metrics":[{"description":"Seconds since the last recorded failure of the archive_command","metric_name":"ccp_archive_command_status_seconds_since_last_fail","static_attributes":{"server":"localhost:5432"},"value_column":"seconds_since_last_fail"}],"sql":"SELECT CASE\n WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) IS NULL THEN 0\n WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) \u003c 0 THEN 0\n ELSE EXTRACT(epoch from (last_failed_time - last_archived_time))\n END AS seconds_since_last_fail\nFROM pg_catalog.pg_stat_archiver\n"},{"metrics":[{"description":"Total non-idle connections","metric_name":"ccp_connection_stats_active","static_attributes":{"server":"localhost:5432"},"value_column":"active"},{"description":"Total idle connections","metric_name":"ccp_connection_stats_idle","static_attributes":{"server":"localhost:5432"},"value_column":"idle"},{"description":"Total idle in transaction connections","metric_name":"ccp_connection_stats_idle_in_txn","static_attributes":{"server":"localhost:5432"},"value_column":"idle_in_txn"},{"description":"Value of max_connections for the monitored database","metric_name":"ccp_connection_stats_max_blocked_query_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_blocked_query_time","value_type":"double"},{"description":"Value of max_connections for the monitored database","metric_name":"ccp_connection_stats_max_connections","static_attributes":{"server":"localhost:5432"},"value_column":"max_connections"},{"description":"Length of time in seconds of the longest idle in transaction session","metric_name":"ccp_connection_stats_max_idle_in_txn_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_idle_in_txn_time","value_type":"double"},{"description":"Length of time in seconds of the longest running query","metric_name":"ccp_connection_stats_max_query_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_query_time","value_type":"double"},{"description":"Total idle and non-idle connections","metric_name":"ccp_connection_stats_total","static_attributes":{"server":"localhost:5432"},"value_column":"total"}],"sql":"SELECT ((total - idle) - idle_in_txn) as active\n , total\n , idle\n , idle_in_txn\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - state_change))),0) FROM pg_catalog.pg_stat_activity WHERE state = 'idle in transaction') AS max_idle_in_txn_time\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - query_start))),0) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'client backend' AND state \u003c\u003e 'idle' ) AS max_query_time\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - query_start))),0) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'client backend' AND wait_event_type = 'Lock' ) AS max_blocked_query_time\n , max_connections\n FROM (\n SELECT COUNT(*) as total\n , COALESCE(SUM(CASE WHEN state = 'idle' THEN 1 ELSE 0 END),0) AS idle\n , COALESCE(SUM(CASE WHEN state = 'idle in transaction' THEN 1 ELSE 0 END),0) AS idle_in_txn FROM pg_catalog.pg_stat_activity) x\n JOIN (SELECT setting::float AS max_connections FROM pg_settings WHERE name = 'max_connections') xx ON (true);\n"},{"metrics":[{"attribute_columns":["dbname"],"description":"Total number of checksum failures on this database","metric_name":"ccp_data_checksum_failure_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"},{"attribute_columns":["dbname"],"description":"Time interval in seconds since the last checksum failure was encountered","metric_name":"ccp_data_checksum_failure_time_since_last_failure_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"time_since_last_failure_seconds","value_type":"double"}],"sql":"SELECT datname AS dbname , checksum_failures AS count , coalesce(extract(epoch from (clock_timestamp() - checksum_last_failure)), 0) AS time_since_last_failure_seconds FROM pg_catalog.pg_stat_database WHERE pg_stat_database.datname IS NOT NULL;\n"},{"metrics":[{"attribute_columns":["dbname","mode"],"description":"Number of locks per mode type","metric_name":"ccp_locks_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT pg_database.datname as dbname , tmp.mode , COALESCE(count,0) as count FROM (\n VALUES ('accesssharelock'),\n ('rowsharelock'),\n ('rowexclusivelock'),\n ('shareupdateexclusivelock'),\n ('sharelock'),\n ('sharerowexclusivelock'),\n ('exclusivelock'),\n ('accessexclusivelock')\n) AS tmp(mode) CROSS JOIN pg_catalog.pg_database LEFT JOIN\n (SELECT database, lower(mode) AS mode,count(*) AS count\n FROM pg_catalog.pg_locks WHERE database IS NOT NULL\n GROUP BY database, lower(mode)\n) AS tmp2 ON tmp.mode=tmp2.mode and pg_database.oid = tmp2.database;\n"},{"metrics":[{"description":"CPU limit value in milli cores","metric_name":"ccp_nodemx_cpu_limit","static_attributes":{"server":"localhost:5432"},"value_column":"limit"},{"description":"CPU request value in milli cores","metric_name":"ccp_nodemx_cpu_request","static_attributes":{"server":"localhost:5432"},"value_column":"request"}],"sql":"SELECT monitor.kdapi_scalar_bigint('cpu_request') AS request , monitor.kdapi_scalar_bigint('cpu_limit') AS limit\n"},{"metrics":[{"description":"CPU usage in nanoseconds","metric_name":"ccp_nodemx_cpuacct_usage","static_attributes":{"server":"localhost:5432"},"value_column":"usage","value_type":"double"},{"description":"CPU usage snapshot timestamp","metric_name":"ccp_nodemx_cpuacct_usage_ts","static_attributes":{"server":"localhost:5432"},"value_column":"usage_ts","value_type":"double"}],"sql":"SELECT CASE WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('cpuacct.usage')\n ELSE (SELECT val FROM monitor.cgroup_setof_kv('cpu.stat') where key = 'usage_usec') * 1000\n END AS usage,\n extract(epoch from clock_timestamp()) AS usage_ts;\n"},{"metrics":[{"description":"The total available run-time within a period (in microseconds)","metric_name":"ccp_nodemx_cpucfs_period_us","static_attributes":{"server":"localhost:5432"},"value_column":"period_us"},{"description":"The length of a period (in microseconds)","metric_name":"ccp_nodemx_cpucfs_quota_us","static_attributes":{"server":"localhost:5432"},"value_column":"quota_us","value_type":"double"}],"sql":"SELECT\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n monitor.cgroup_scalar_bigint('cpu.cfs_period_us')\n ELSE\n (monitor.cgroup_array_bigint('cpu.max'))[2]\n END AS period_us,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n GREATEST(monitor.cgroup_scalar_bigint('cpu.cfs_quota_us'), 0)\n ELSE\n GREATEST((monitor.cgroup_array_bigint('cpu.max'))[1], 0)\n END AS quota_us;\n"},{"metrics":[{"description":"Number of periods that any thread was runnable","metric_name":"ccp_nodemx_cpustat_nr_periods","static_attributes":{"server":"localhost:5432"},"value_column":"nr_periods","value_type":"double"},{"description":"Number of runnable periods in which the application used its entire quota and was throttled","metric_name":"ccp_nodemx_cpustat_nr_throttled","static_attributes":{"server":"localhost:5432"},"value_column":"nr_throttled"},{"description":"CPU stat snapshot timestamp","metric_name":"ccp_nodemx_cpustat_snap_ts","static_attributes":{"server":"localhost:5432"},"value_column":"snap_ts","value_type":"double"},{"description":"Sum total amount of time individual threads within the monitor.cgroup were throttled","metric_name":"ccp_nodemx_cpustat_throttled_time","static_attributes":{"server":"localhost:5432"},"value_column":"throttled_time","value_type":"double"}],"sql":"WITH d(key, val) AS (select key, val from monitor.cgroup_setof_kv('cpu.stat')) SELECT\n (SELECT val FROM d WHERE key='nr_periods') AS nr_periods,\n (SELECT val FROM d WHERE key='nr_throttled') AS nr_throttled,\n (SELECT val FROM d WHERE key='throttled_usec') AS throttled_time,\n extract(epoch from clock_timestamp()) as snap_ts;\n"},{"metrics":[{"attribute_columns":["fs_type","mount_point"],"description":"Available size in bytes","metric_name":"ccp_nodemx_data_disk_available_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"available_bytes","value_type":"double"},{"attribute_columns":["fs_type","mount_point"],"description":"Available file nodes","metric_name":"ccp_nodemx_data_disk_free_file_nodes","static_attributes":{"server":"localhost:5432"},"value_column":"free_file_nodes"},{"attribute_columns":["fs_type","mount_point"],"description":"Size in bytes","metric_name":"ccp_nodemx_data_disk_total_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"total_bytes"},{"attribute_columns":["fs_type","mount_point"],"description":"Total file nodes","metric_name":"ccp_nodemx_data_disk_total_file_nodes","static_attributes":{"server":"localhost:5432"},"value_column":"total_file_nodes"}],"sql":"SELECT mount_point,fs_type,total_bytes,available_bytes,total_file_nodes,free_file_nodes\n FROM monitor.proc_mountinfo() m\n JOIN monitor.fsinfo(m.mount_point) f USING (major_number, minor_number)\n WHERE m.mount_point IN ('/pgdata', '/pgwal') OR\n m.mount_point like '/tablespaces/%'\n"},{"metrics":[{"attribute_columns":["mount_point"],"description":"Total sectors read","metric_name":"ccp_nodemx_disk_activity_sectors_read","static_attributes":{"server":"localhost:5432"},"value_column":"sectors_read"},{"attribute_columns":["mount_point"],"description":"Total sectors written","metric_name":"ccp_nodemx_disk_activity_sectors_written","static_attributes":{"server":"localhost:5432"},"value_column":"sectors_written"}],"sql":"SELECT mount_point,sectors_read,sectors_written\n FROM monitor.proc_mountinfo() m\n JOIN monitor.proc_diskstats() d USING (major_number, minor_number)\n WHERE m.mount_point IN ('/pgdata', '/pgwal') OR\n m.mount_point like '/tablespaces/%';\n"},{"metrics":[{"description":"Total bytes of anonymous and swap cache memory on active LRU list","metric_name":"ccp_nodemx_mem_active_anon","static_attributes":{"server":"localhost:5432"},"value_column":"active_anon","value_type":"double"},{"description":"Total bytes of file-backed memory on active LRU list","metric_name":"ccp_nodemx_mem_active_file","static_attributes":{"server":"localhost:5432"},"value_column":"active_file","value_type":"double"},{"description":"Total bytes of page cache memory","metric_name":"ccp_nodemx_mem_cache","static_attributes":{"server":"localhost:5432"},"value_column":"cache","value_type":"double"},{"description":"Total bytes that are waiting to get written back to the disk","metric_name":"ccp_nodemx_mem_dirty","static_attributes":{"server":"localhost:5432"},"value_column":"dirty"},{"description":"Total bytes of anonymous and swap cache memory on inactive LRU list","metric_name":"ccp_nodemx_mem_inactive_anon","static_attributes":{"server":"localhost:5432"},"value_column":"inactive_anon","value_type":"double"},{"description":"Total bytes of file-backed memory on inactive LRU list","metric_name":"ccp_nodemx_mem_inactive_file","static_attributes":{"server":"localhost:5432"},"value_column":"inactive_file","value_type":"double"},{"description":"Unknown metric from ccp_nodemx_mem","metric_name":"ccp_nodemx_mem_kmem_usage_in_byte","static_attributes":{"server":"localhost:5432"},"value_column":"kmem_usage_in_byte"},{"description":"Memory limit value in bytes","metric_name":"ccp_nodemx_mem_limit","static_attributes":{"server":"localhost:5432"},"value_column":"limit"},{"description":"Total bytes of mapped file (includes tmpfs/shmem)","metric_name":"ccp_nodemx_mem_mapped_file","static_attributes":{"server":"localhost:5432"},"value_column":"mapped_file"},{"description":"Memory request value in bytes","metric_name":"ccp_nodemx_mem_request","static_attributes":{"server":"localhost:5432"},"value_column":"request"},{"description":"Total bytes of anonymous and swap cache memory","metric_name":"ccp_nodemx_mem_rss","static_attributes":{"server":"localhost:5432"},"value_column":"rss","value_type":"double"},{"description":"Total bytes of shared memory","metric_name":"ccp_nodemx_mem_shmem","static_attributes":{"server":"localhost:5432"},"value_column":"shmem","value_type":"double"},{"description":"Total usage in bytes","metric_name":"ccp_nodemx_mem_usage_in_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"usage_in_bytes"}],"sql":"WITH d(key, val) as (SELECT key, val FROM monitor.cgroup_setof_kv('memory.stat')) SELECT\n monitor.kdapi_scalar_bigint('mem_request') AS request,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n (CASE WHEN monitor.cgroup_scalar_bigint('memory.limit_in_bytes') = 9223372036854771712 THEN 0 ELSE monitor.cgroup_scalar_bigint('memory.limit_in_bytes') END)\n ELSE\n (CASE WHEN monitor.cgroup_scalar_bigint('memory.max') = 9223372036854775807 THEN 0 ELSE monitor.cgroup_scalar_bigint('memory.max') END)\n END AS limit,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='cache')\n ELSE 0\n END as cache,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='rss')\n ELSE 0\n END as RSS,\n (SELECT val FROM d WHERE key='shmem') as shmem,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='mapped_file')\n ELSE 0\n END as mapped_file,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='dirty')\n ELSE (SELECT val FROM d WHERE key='file_dirty')\n END as dirty,\n (SELECT val FROM d WHERE key='active_anon') as active_anon,\n (SELECT val FROM d WHERE key='inactive_anon') as inactive_anon,\n (SELECT val FROM d WHERE key='active_file') as active_file,\n (SELECT val FROM d WHERE key='inactive_file') as inactive_file,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('memory.usage_in_bytes')\n ELSE monitor.cgroup_scalar_bigint('memory.current')\n END as usage_in_bytes,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('memory.kmem.usage_in_bytes')\n ELSE 0\n END as kmem_usage_in_byte;\n"},{"metrics":[{"attribute_columns":["interface"],"description":"Number of bytes received","metric_name":"ccp_nodemx_network_rx_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"rx_bytes"},{"attribute_columns":["interface"],"description":"Number of packets received","metric_name":"ccp_nodemx_network_rx_packets","static_attributes":{"server":"localhost:5432"},"value_column":"rx_packets"},{"attribute_columns":["interface"],"description":"Number of bytes transmitted","metric_name":"ccp_nodemx_network_tx_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"tx_bytes"},{"attribute_columns":["interface"],"description":"Number of packets transmitted","metric_name":"ccp_nodemx_network_tx_packets","static_attributes":{"server":"localhost:5432"},"value_column":"tx_packets"}],"sql":"SELECT interface\n ,tx_bytes\n ,tx_packets\n ,rx_bytes\n ,rx_packets from monitor.proc_network_stats()\n"},{"metrics":[{"description":"Total number of database processes","metric_name":"ccp_nodemx_process_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT monitor.cgroup_process_count() as count;\n"},{"metrics":[{"description":"Epoch time when stats were reset","metric_name":"ccp_pg_stat_statements_reset_time","static_attributes":{"server":"localhost:5432"},"value_column":"time"}],"sql":"SELECT monitor.pg_stat_statements_reset_info(-1) as time;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Average query runtime in milliseconds","metric_name":"ccp_pg_stat_statements_top_mean_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"top_mean_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time\n , s.mean_exec_time AS mean_exec_time\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , max(monitor.mean_exec_time) AS top_mean_exec_time_ms\nFROM monitor GROUP BY 1,2,3,4 ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","role"],"description":"Total number of queries run per user/database","metric_name":"ccp_pg_stat_statements_total_calls_count","static_attributes":{"server":"localhost:5432"},"value_column":"calls_count","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total runtime of all queries per user/database","metric_name":"ccp_pg_stat_statements_total_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"exec_time_ms","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total runtime of all queries per user/database","metric_name":"ccp_pg_stat_statements_total_mean_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"mean_exec_time_ms","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total rows returned from all queries per user/database","metric_name":"ccp_pg_stat_statements_total_row_count","static_attributes":{"server":"localhost:5432"},"value_column":"row_count","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.calls\n , s.total_exec_time\n , s.mean_exec_time\n , s.rows\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , sum(calls) AS calls_count\n , sum(total_exec_time) AS exec_time_ms\n , avg(mean_exec_time) AS mean_exec_time_ms\n , sum(rows) AS row_count\nFROM monitor GROUP BY 1,2;\n"},{"metrics":[{"description":"The current version of PostgreSQL that this exporter is running on as a 6 digit integer (######).","metric_name":"ccp_postgresql_version_current","static_attributes":{"server":"localhost:5432"},"value_column":"current"}],"sql":"SELECT current_setting('server_version_num')::int AS current;\n"},{"metrics":[{"description":"Time interval in seconds since PostgreSQL database was last restarted.","metric_name":"ccp_postmaster_uptime_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"seconds","value_type":"double"}],"sql":"SELECT extract(epoch from (clock_timestamp() - pg_postmaster_start_time() )) AS seconds;\n"},{"metrics":[{"description":"Time interval in seconds since PostgreSQL database was last restarted.","metric_name":"ccp_replication_lag_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes","value_type":"double"}],"sql":"SELECT * FROM get_replication_lag();\n"},{"metrics":[{"description":"Return value of 1 means database is in recovery. Otherwise 2 it is a primary","metric_name":"ccp_is_in_recovery_status","static_attributes":{"server":"localhost:5432"},"value_column":"status","value_type":"double"},{"attribute_columns":["role"],"description":"Length of time since the last WAL file was received and replayed on replica.\nAlways increases, possibly causing false positives if the primary stops writing.\nMonitors for replicas that stop receiving WAL all together.\n","metric_name":"ccp_replication_lag_received_time","static_attributes":{"server":"localhost:5432"},"value_column":"received_time","value_type":"double"},{"attribute_columns":["role"],"description":"Length of time since the last transaction was replayed on replica.\nReturns zero if last WAL received equals last WAL replayed. Avoids\nfalse positives when primary stops writing. Monitors for replicas that\ncannot keep up with primary WAL generation.\n","metric_name":"ccp_replication_lag_replay_time","static_attributes":{"server":"localhost:5432"},"value_column":"replay_time","value_type":"double"}],"sql":"SELECT\n COALESCE(\n CASE\n WHEN (pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn()) OR (pg_is_in_recovery() = false) THEN 0\n ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER\n END,\n 0\n ) AS replay_time,\n COALESCE(\n CASE\n WHEN pg_is_in_recovery() = false THEN 0\n ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER\n END,\n 0\n ) AS received_time,\n CASE\n WHEN pg_is_in_recovery() = true THEN 'replica'\n ELSE 'primary'\n END AS role,\n CASE\n WHEN pg_is_in_recovery() = true THEN 1\n ELSE 2\n END AS status;\n"},{"metrics":[{"description":"Number of settings from pg_settings catalog in a pending_restart state","metric_name":"ccp_settings_pending_restart_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT count(*) AS count FROM pg_catalog.pg_settings WHERE pending_restart = true;\n"},{"metrics":[{"description":"Number of buffers allocated","metric_name":"ccp_stat_bgwriter_buffers_alloc","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_alloc"},{"data_type":"sum","description":"Number of buffers written by the background writer","metric_name":"ccp_stat_bgwriter_buffers_clean","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_clean"},{"description":"Number of times the background writer stopped a cleaning scan because it had written too many buffers","metric_name":"ccp_stat_bgwriter_maxwritten_clean","static_attributes":{"server":"localhost:5432"},"value_column":"maxwritten_clean"}],"sql":"SELECT\n buffers_clean\n , maxwritten_clean\n , buffers_alloc\nFROM pg_catalog.pg_stat_bgwriter;\n"},{"metrics":[{"description":"Oldest current transaction ID in cluster","metric_name":"ccp_transaction_wraparound_oldest_current_xid","static_attributes":{"server":"localhost:5432"},"value_column":"oldest_current_xid"},{"description":"Percentage towards emergency autovacuum process starting","metric_name":"ccp_transaction_wraparound_percent_towards_emergency_autovac","static_attributes":{"server":"localhost:5432"},"value_column":"percent_towards_emergency_autovac"},{"description":"Percentage towards transaction ID wraparound","metric_name":"ccp_transaction_wraparound_percent_towards_wraparound","static_attributes":{"server":"localhost:5432"},"value_column":"percent_towards_wraparound"}],"sql":"WITH max_age AS (\n SELECT 2000000000 as max_old_xid\n , setting AS autovacuum_freeze_max_age\n FROM pg_catalog.pg_settings\n WHERE name = 'autovacuum_freeze_max_age')\n, per_database_stats AS (\n SELECT datname\n , m.max_old_xid::int\n , m.autovacuum_freeze_max_age::int\n , age(d.datfrozenxid) AS oldest_current_xid\n FROM pg_catalog.pg_database d\n JOIN max_age m ON (true)\n WHERE d.datallowconn)\nSELECT max(oldest_current_xid) AS oldest_current_xid , max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound , max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_emergency_autovac FROM per_database_stats;\n"},{"metrics":[{"description":"Current size in bytes of the WAL directory","metric_name":"ccp_wal_activity_total_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"total_size_bytes"}],"sql":"SELECT last_5_min_size_bytes,\n (SELECT COALESCE(sum(size),0) FROM pg_catalog.pg_ls_waldir()) AS total_size_bytes\n FROM (SELECT COALESCE(sum(size),0) AS last_5_min_size_bytes FROM pg_catalog.pg_ls_waldir() WHERE modification \u003e CURRENT_TIMESTAMP - '5 minutes'::interval) x;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Epoch time when stats were reset","metric_name":"ccp_pg_stat_statements_top_max_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"max_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time_ms\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , max_exec_time_ms\n , records\nFROM monitor ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Total time spent in the statement in milliseconds","metric_name":"ccp_pg_stat_statements_top_total_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"total_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time_ms\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , total_exec_time_ms\n , records\nFROM monitor ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Total amount of WAL generated by the statement in bytes","metric_name":"ccp_pg_stat_statements_top_wal_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes","value_type":"double"},{"attribute_columns":["dbname","query","queryid","role"],"description":"Total number of WAL full page images generated by the statement","metric_name":"ccp_pg_stat_statements_top_wal_fpi","static_attributes":{"server":"localhost:5432"},"value_column":"fpi","value_type":"double"},{"attribute_columns":["dbname","query","queryid","role"],"description":"Total number of WAL records generated by the statement","metric_name":"ccp_pg_stat_statements_top_wal_records","static_attributes":{"server":"localhost:5432"},"value_column":"records","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time\n , s.mean_exec_time AS mean_exec_time\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , query\n , queryid\n , records\n , fpi\n , bytes\nFROM monitor ORDER BY bytes DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["repo"],"description":"Seconds since the last completed full or differential backup. Differential is always based off last full.","metric_name":"ccp_backrest_last_diff_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_diff_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full backup","metric_name":"ccp_backrest_last_full_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_full_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full, differential or incremental backup.\nIncremental is always based off last full or differential.\n","metric_name":"ccp_backrest_last_incr_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_incr_backup"},{"attribute_columns":["backup_type","repo"],"description":"pgBackRest version number when this backup was performed","metric_name":"ccp_backrest_last_info_backrest_repo_version","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backrest_repo_version"},{"attribute_columns":["backup_type","repo"],"description":"An error has been encountered in the backup. Check logs for more information.","metric_name":"ccp_backrest_last_info_backup_error","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backup_error"},{"attribute_columns":["backup_type","repo"],"description":"Total runtime in seconds of this backup","metric_name":"ccp_backrest_last_info_backup_runtime_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"backup_runtime_seconds"},{"attribute_columns":["backup_type","repo"],"description":"Actual size of only this individual backup in the pgbackrest repository","metric_name":"ccp_backrest_last_info_repo_backup_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_backup_size_bytes"},{"attribute_columns":["backup_type","repo"],"description":"Total size of this backup in the pgbackrest repository, including all required previous backups and WAL","metric_name":"ccp_backrest_last_info_repo_total_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_total_size_bytes"},{"attribute_columns":["repo"],"description":"Seconds since the oldest completed full backup","metric_name":"ccp_backrest_oldest_full_backup_time_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"oldest_full_backup"}],"sql":"SELECT * FROM get_pgbackrest_info();\n"}] +[{"metrics":[{"attribute_columns":["application_name","datname","state","usename"],"description":"number of connections in this state","metric_name":"ccp_pg_stat_activity_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT\n pg_database.datname,\n tmp.state,\n COALESCE(tmp2.usename, '') as usename,\n COALESCE(tmp2.application_name, '') as application_name,\n COALESCE(count,0) as count,\n COALESCE(max_tx_duration,0) as max_tx_duration\nFROM\n (\n VALUES ('active'),\n ('idle'),\n ('idle in transaction'),\n ('idle in transaction (aborted)'),\n ('fastpath function call'),\n ('disabled')\n ) AS tmp(state) CROSS JOIN pg_database\nLEFT JOIN (\n SELECT\n datname,\n state,\n usename,\n application_name,\n count(*) AS count,\n MAX(EXTRACT(EPOCH FROM now() - xact_start))::float AS max_tx_duration\n FROM pg_stat_activity GROUP BY datname,state,usename,application_name) AS tmp2\n ON tmp.state = tmp2.state AND pg_database.datname = tmp2.datname;\n"},{"metrics":[{"description":"Seconds since the last successful archive operation","metric_name":"ccp_archive_command_status_seconds_since_last_archive","static_attributes":{"server":"localhost:5432"},"value_column":"seconds_since_last_archive","value_type":"double"},{"description":"Number of WAL files that have been successfully archived","metric_name":"ccp_archive_command_status_archived_count","static_attributes":{"server":"localhost:5432"},"value_column":"archived_count"},{"description":"Number of failed attempts for archiving WAL files","metric_name":"ccp_archive_command_status_failed_count","static_attributes":{"server":"localhost:5432"},"value_column":"failed_count"},{"description":"Seconds since the last recorded failure of the archive_command","metric_name":"ccp_archive_command_status_seconds_since_last_fail","static_attributes":{"server":"localhost:5432"},"value_column":"seconds_since_last_fail"}],"sql":"SELECT\n COALESCE(EXTRACT(epoch from (CURRENT_TIMESTAMP - last_archived_time)), 0) AS seconds_since_last_archive,\n archived_count,\n failed_count,\n CASE\n WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) IS NULL THEN 0\n WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) \u003c 0 THEN 0\n ELSE EXTRACT(epoch from (last_failed_time - last_archived_time))\n END AS seconds_since_last_fail\nFROM pg_catalog.pg_stat_archiver;\n"},{"metrics":[{"description":"Total non-idle connections","metric_name":"ccp_connection_stats_active","static_attributes":{"server":"localhost:5432"},"value_column":"active"},{"description":"Total idle connections","metric_name":"ccp_connection_stats_idle","static_attributes":{"server":"localhost:5432"},"value_column":"idle"},{"description":"Total idle in transaction connections","metric_name":"ccp_connection_stats_idle_in_txn","static_attributes":{"server":"localhost:5432"},"value_column":"idle_in_txn"},{"description":"Value of max_connections for the monitored database","metric_name":"ccp_connection_stats_max_blocked_query_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_blocked_query_time","value_type":"double"},{"description":"Value of max_connections for the monitored database","metric_name":"ccp_connection_stats_max_connections","static_attributes":{"server":"localhost:5432"},"value_column":"max_connections"},{"description":"Length of time in seconds of the longest idle in transaction session","metric_name":"ccp_connection_stats_max_idle_in_txn_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_idle_in_txn_time","value_type":"double"},{"description":"Length of time in seconds of the longest running query","metric_name":"ccp_connection_stats_max_query_time","static_attributes":{"server":"localhost:5432"},"value_column":"max_query_time","value_type":"double"},{"description":"Total idle and non-idle connections","metric_name":"ccp_connection_stats_total","static_attributes":{"server":"localhost:5432"},"value_column":"total"}],"sql":"SELECT ((total - idle) - idle_in_txn) as active\n , total\n , idle\n , idle_in_txn\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - state_change))),0) FROM pg_catalog.pg_stat_activity WHERE state = 'idle in transaction') AS max_idle_in_txn_time\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - query_start))),0) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'client backend' AND state \u003c\u003e 'idle' ) AS max_query_time\n , (SELECT COALESCE(EXTRACT(epoch FROM (MAX(clock_timestamp() - query_start))),0) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'client backend' AND wait_event_type = 'Lock' ) AS max_blocked_query_time\n , max_connections\n FROM (\n SELECT COUNT(*) as total\n , COALESCE(SUM(CASE WHEN state = 'idle' THEN 1 ELSE 0 END),0) AS idle\n , COALESCE(SUM(CASE WHEN state = 'idle in transaction' THEN 1 ELSE 0 END),0) AS idle_in_txn FROM pg_catalog.pg_stat_activity) x\n JOIN (SELECT setting::float AS max_connections FROM pg_settings WHERE name = 'max_connections') xx ON (true);\n"},{"metrics":[{"attribute_columns":["dbname"],"description":"Total number of checksum failures on this database","metric_name":"ccp_data_checksum_failure_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"},{"attribute_columns":["dbname"],"description":"Time interval in seconds since the last checksum failure was encountered","metric_name":"ccp_data_checksum_failure_time_since_last_failure_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"time_since_last_failure_seconds","value_type":"double"}],"sql":"SELECT datname AS dbname , checksum_failures AS count , coalesce(extract(epoch from (clock_timestamp() - checksum_last_failure)), 0) AS time_since_last_failure_seconds FROM pg_catalog.pg_stat_database WHERE pg_stat_database.datname IS NOT NULL;\n"},{"metrics":[{"attribute_columns":["dbname","mode"],"description":"Number of locks per mode type","metric_name":"ccp_locks_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT pg_database.datname as dbname , tmp.mode , COALESCE(count,0) as count FROM (\n VALUES ('accesssharelock'),\n ('rowsharelock'),\n ('rowexclusivelock'),\n ('shareupdateexclusivelock'),\n ('sharelock'),\n ('sharerowexclusivelock'),\n ('exclusivelock'),\n ('accessexclusivelock')\n) AS tmp(mode) CROSS JOIN pg_catalog.pg_database LEFT JOIN\n (SELECT database, lower(mode) AS mode,count(*) AS count\n FROM pg_catalog.pg_locks WHERE database IS NOT NULL\n GROUP BY database, lower(mode)\n) AS tmp2 ON tmp.mode=tmp2.mode and pg_database.oid = tmp2.database;\n"},{"metrics":[{"description":"CPU limit value in milli cores","metric_name":"ccp_nodemx_cpu_limit","static_attributes":{"server":"localhost:5432"},"value_column":"limit"},{"description":"CPU request value in milli cores","metric_name":"ccp_nodemx_cpu_request","static_attributes":{"server":"localhost:5432"},"value_column":"request"}],"sql":"SELECT monitor.kdapi_scalar_bigint('cpu_request') AS request , monitor.kdapi_scalar_bigint('cpu_limit') AS limit;\n"},{"metrics":[{"description":"CPU usage in nanoseconds","metric_name":"ccp_nodemx_cpuacct_usage","static_attributes":{"server":"localhost:5432"},"value_column":"usage","value_type":"double"},{"description":"CPU usage snapshot timestamp","metric_name":"ccp_nodemx_cpuacct_usage_ts","static_attributes":{"server":"localhost:5432"},"value_column":"usage_ts","value_type":"double"}],"sql":"SELECT CASE WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('cpuacct.usage')\n ELSE (SELECT val FROM monitor.cgroup_setof_kv('cpu.stat') where key = 'usage_usec') * 1000\n END AS usage,\n extract(epoch from clock_timestamp()) AS usage_ts;\n"},{"metrics":[{"description":"The total available run-time within a period (in microseconds)","metric_name":"ccp_nodemx_cpucfs_period_us","static_attributes":{"server":"localhost:5432"},"value_column":"period_us"},{"description":"The length of a period (in microseconds)","metric_name":"ccp_nodemx_cpucfs_quota_us","static_attributes":{"server":"localhost:5432"},"value_column":"quota_us","value_type":"double"}],"sql":"SELECT\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n monitor.cgroup_scalar_bigint('cpu.cfs_period_us')\n ELSE\n (monitor.cgroup_array_bigint('cpu.max'))[2]\n END AS period_us,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n GREATEST(monitor.cgroup_scalar_bigint('cpu.cfs_quota_us'), 0)\n ELSE\n GREATEST((monitor.cgroup_array_bigint('cpu.max'))[1], 0)\n END AS quota_us;\n"},{"metrics":[{"description":"Number of periods that any thread was runnable","metric_name":"ccp_nodemx_cpustat_nr_periods","static_attributes":{"server":"localhost:5432"},"value_column":"nr_periods","value_type":"double"},{"description":"Number of runnable periods in which the application used its entire quota and was throttled","metric_name":"ccp_nodemx_cpustat_nr_throttled","static_attributes":{"server":"localhost:5432"},"value_column":"nr_throttled"},{"description":"CPU stat snapshot timestamp","metric_name":"ccp_nodemx_cpustat_snap_ts","static_attributes":{"server":"localhost:5432"},"value_column":"snap_ts","value_type":"double"},{"description":"Sum total amount of time individual threads within the monitor.cgroup were throttled","metric_name":"ccp_nodemx_cpustat_throttled_time","static_attributes":{"server":"localhost:5432"},"value_column":"throttled_time","value_type":"double"}],"sql":"WITH d(key, val) AS (select key, val from monitor.cgroup_setof_kv('cpu.stat')) SELECT\n (SELECT val FROM d WHERE key='nr_periods') AS nr_periods,\n (SELECT val FROM d WHERE key='nr_throttled') AS nr_throttled,\n (SELECT val FROM d WHERE key='throttled_usec') AS throttled_time,\n extract(epoch from clock_timestamp()) as snap_ts;\n"},{"metrics":[{"attribute_columns":["fs_type","mount_point"],"description":"Available size in bytes","metric_name":"ccp_nodemx_data_disk_available_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"available_bytes","value_type":"double"},{"attribute_columns":["fs_type","mount_point"],"description":"Available file nodes","metric_name":"ccp_nodemx_data_disk_free_file_nodes","static_attributes":{"server":"localhost:5432"},"value_column":"free_file_nodes"},{"attribute_columns":["fs_type","mount_point"],"description":"Size in bytes","metric_name":"ccp_nodemx_data_disk_total_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"total_bytes"},{"attribute_columns":["fs_type","mount_point"],"description":"Total file nodes","metric_name":"ccp_nodemx_data_disk_total_file_nodes","static_attributes":{"server":"localhost:5432"},"value_column":"total_file_nodes"}],"sql":"SELECT mount_point,fs_type,total_bytes,available_bytes,total_file_nodes,free_file_nodes\n FROM monitor.proc_mountinfo() m\n JOIN monitor.fsinfo(m.mount_point) f USING (major_number, minor_number)\n WHERE m.mount_point IN ('/pgdata', '/pgwal') OR\n m.mount_point like '/tablespaces/%';\n"},{"metrics":[{"attribute_columns":["mount_point"],"description":"Total sectors read","metric_name":"ccp_nodemx_disk_activity_sectors_read","static_attributes":{"server":"localhost:5432"},"value_column":"sectors_read"},{"attribute_columns":["mount_point"],"description":"Total sectors written","metric_name":"ccp_nodemx_disk_activity_sectors_written","static_attributes":{"server":"localhost:5432"},"value_column":"sectors_written"}],"sql":"SELECT mount_point,sectors_read,sectors_written\n FROM monitor.proc_mountinfo() m\n JOIN monitor.proc_diskstats() d USING (major_number, minor_number)\n WHERE m.mount_point IN ('/pgdata', '/pgwal') OR\n m.mount_point like '/tablespaces/%';\n"},{"metrics":[{"description":"Total bytes of anonymous and swap cache memory on active LRU list","metric_name":"ccp_nodemx_mem_active_anon","static_attributes":{"server":"localhost:5432"},"value_column":"active_anon","value_type":"double"},{"description":"Total bytes of file-backed memory on active LRU list","metric_name":"ccp_nodemx_mem_active_file","static_attributes":{"server":"localhost:5432"},"value_column":"active_file","value_type":"double"},{"description":"Total bytes of page cache memory","metric_name":"ccp_nodemx_mem_cache","static_attributes":{"server":"localhost:5432"},"value_column":"cache","value_type":"double"},{"description":"Total bytes that are waiting to get written back to the disk","metric_name":"ccp_nodemx_mem_dirty","static_attributes":{"server":"localhost:5432"},"value_column":"dirty"},{"description":"Total bytes of anonymous and swap cache memory on inactive LRU list","metric_name":"ccp_nodemx_mem_inactive_anon","static_attributes":{"server":"localhost:5432"},"value_column":"inactive_anon","value_type":"double"},{"description":"Total bytes of file-backed memory on inactive LRU list","metric_name":"ccp_nodemx_mem_inactive_file","static_attributes":{"server":"localhost:5432"},"value_column":"inactive_file","value_type":"double"},{"description":"Unknown metric from ccp_nodemx_mem","metric_name":"ccp_nodemx_mem_kmem_usage_in_byte","static_attributes":{"server":"localhost:5432"},"value_column":"kmem_usage_in_byte"},{"description":"Memory limit value in bytes","metric_name":"ccp_nodemx_mem_limit","static_attributes":{"server":"localhost:5432"},"value_column":"limit"},{"description":"Total bytes of mapped file (includes tmpfs/shmem)","metric_name":"ccp_nodemx_mem_mapped_file","static_attributes":{"server":"localhost:5432"},"value_column":"mapped_file"},{"description":"Memory request value in bytes","metric_name":"ccp_nodemx_mem_request","static_attributes":{"server":"localhost:5432"},"value_column":"request"},{"description":"Total bytes of anonymous and swap cache memory","metric_name":"ccp_nodemx_mem_rss","static_attributes":{"server":"localhost:5432"},"value_column":"rss","value_type":"double"},{"description":"Total bytes of shared memory","metric_name":"ccp_nodemx_mem_shmem","static_attributes":{"server":"localhost:5432"},"value_column":"shmem","value_type":"double"},{"description":"Total usage in bytes","metric_name":"ccp_nodemx_mem_usage_in_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"usage_in_bytes"}],"sql":"WITH d(key, val) as (SELECT key, val FROM monitor.cgroup_setof_kv('memory.stat')) SELECT\n monitor.kdapi_scalar_bigint('mem_request') AS request,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy' THEN\n (CASE WHEN monitor.cgroup_scalar_bigint('memory.limit_in_bytes') = 9223372036854771712 THEN 0 ELSE monitor.cgroup_scalar_bigint('memory.limit_in_bytes') END)\n ELSE\n (CASE WHEN monitor.cgroup_scalar_bigint('memory.max') = 9223372036854775807 THEN 0 ELSE monitor.cgroup_scalar_bigint('memory.max') END)\n END AS limit,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='cache')\n ELSE 0\n END as cache,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='rss')\n ELSE 0\n END as RSS,\n (SELECT val FROM d WHERE key='shmem') as shmem,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='mapped_file')\n ELSE 0\n END as mapped_file,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN (SELECT val FROM d WHERE key='dirty')\n ELSE (SELECT val FROM d WHERE key='file_dirty')\n END as dirty,\n (SELECT val FROM d WHERE key='active_anon') as active_anon,\n (SELECT val FROM d WHERE key='inactive_anon') as inactive_anon,\n (SELECT val FROM d WHERE key='active_file') as active_file,\n (SELECT val FROM d WHERE key='inactive_file') as inactive_file,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('memory.usage_in_bytes')\n ELSE monitor.cgroup_scalar_bigint('memory.current')\n END as usage_in_bytes,\n CASE\n WHEN monitor.cgroup_mode() = 'legacy'\n THEN monitor.cgroup_scalar_bigint('memory.kmem.usage_in_bytes')\n ELSE 0\n END as kmem_usage_in_byte;\n"},{"metrics":[{"attribute_columns":["interface"],"description":"Number of bytes received","metric_name":"ccp_nodemx_network_rx_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"rx_bytes"},{"attribute_columns":["interface"],"description":"Number of packets received","metric_name":"ccp_nodemx_network_rx_packets","static_attributes":{"server":"localhost:5432"},"value_column":"rx_packets"},{"attribute_columns":["interface"],"description":"Number of bytes transmitted","metric_name":"ccp_nodemx_network_tx_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"tx_bytes"},{"attribute_columns":["interface"],"description":"Number of packets transmitted","metric_name":"ccp_nodemx_network_tx_packets","static_attributes":{"server":"localhost:5432"},"value_column":"tx_packets"}],"sql":"SELECT interface\n ,tx_bytes\n ,tx_packets\n ,rx_bytes\n ,rx_packets from monitor.proc_network_stats();\n"},{"metrics":[{"description":"Total number of database processes","metric_name":"ccp_nodemx_process_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT monitor.cgroup_process_count() as count;\n"},{"metrics":[{"description":"Epoch time when stats were reset","metric_name":"ccp_pg_stat_statements_reset_time","static_attributes":{"server":"localhost:5432"},"value_column":"time"}],"sql":"SELECT monitor.pg_stat_statements_reset_info(-1) as time;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Average query runtime in milliseconds","metric_name":"ccp_pg_stat_statements_top_mean_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"top_mean_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time\n , s.mean_exec_time AS mean_exec_time\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , max(monitor.mean_exec_time) AS top_mean_exec_time_ms\nFROM monitor GROUP BY 1,2,3,4 ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","role"],"description":"Total number of queries run per user/database","metric_name":"ccp_pg_stat_statements_total_calls_count","static_attributes":{"server":"localhost:5432"},"value_column":"calls_count","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total runtime of all queries per user/database","metric_name":"ccp_pg_stat_statements_total_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"exec_time_ms","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total runtime of all queries per user/database","metric_name":"ccp_pg_stat_statements_total_mean_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"mean_exec_time_ms","value_type":"double"},{"attribute_columns":["dbname","role"],"description":"Total rows returned from all queries per user/database","metric_name":"ccp_pg_stat_statements_total_row_count","static_attributes":{"server":"localhost:5432"},"value_column":"row_count","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.calls\n , s.total_exec_time\n , s.mean_exec_time\n , s.rows\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , sum(calls) AS calls_count\n , sum(total_exec_time) AS exec_time_ms\n , avg(mean_exec_time) AS mean_exec_time_ms\n , sum(rows) AS row_count\nFROM monitor GROUP BY 1,2;\n"},{"metrics":[{"description":"The current version of PostgreSQL that this exporter is running on as a 6 digit integer (######).","metric_name":"ccp_postgresql_version_current","static_attributes":{"server":"localhost:5432"},"value_column":"current"}],"sql":"SELECT current_setting('server_version_num')::int AS current;\n"},{"metrics":[{"description":"Time interval in seconds since PostgreSQL database was last restarted.","metric_name":"ccp_postmaster_uptime_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"seconds","value_type":"double"}],"sql":"SELECT extract(epoch from (clock_timestamp() - pg_postmaster_start_time() )) AS seconds;\n"},{"metrics":[{"attribute_columns":["replica"],"description":"Replication lag in bytes.","metric_name":"ccp_replication_lag_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes","value_type":"double"}],"sql":"SELECT * FROM get_replication_lag();\n"},{"metrics":[{"description":"Return value of 1 means database is in recovery. Otherwise 2 it is a primary","metric_name":"ccp_is_in_recovery_status","static_attributes":{"server":"localhost:5432"},"value_column":"status","value_type":"double"},{"attribute_columns":["role"],"description":"Length of time since the last WAL file was received and replayed on replica.\nAlways increases, possibly causing false positives if the primary stops writing.\nMonitors for replicas that stop receiving WAL all together.\n","metric_name":"ccp_replication_lag_received_time","static_attributes":{"server":"localhost:5432"},"value_column":"received_time","value_type":"double"},{"attribute_columns":["role"],"description":"Length of time since the last transaction was replayed on replica.\nReturns zero if last WAL received equals last WAL replayed. Avoids\nfalse positives when primary stops writing. Monitors for replicas that\ncannot keep up with primary WAL generation.\n","metric_name":"ccp_replication_lag_replay_time","static_attributes":{"server":"localhost:5432"},"value_column":"replay_time","value_type":"double"}],"sql":"SELECT\n COALESCE(\n CASE\n WHEN (pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn()) OR (pg_is_in_recovery() = false) THEN 0\n ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER\n END,\n 0\n ) AS replay_time,\n COALESCE(\n CASE\n WHEN pg_is_in_recovery() = false THEN 0\n ELSE EXTRACT (EPOCH FROM clock_timestamp() - pg_last_xact_replay_timestamp())::INTEGER\n END,\n 0\n ) AS received_time,\n CASE\n WHEN pg_is_in_recovery() = true THEN 'replica'\n ELSE 'primary'\n END AS role,\n CASE\n WHEN pg_is_in_recovery() = true THEN 1\n ELSE 2\n END AS status;\n"},{"metrics":[{"description":"Number of settings from pg_settings catalog in a pending_restart state","metric_name":"ccp_settings_pending_restart_count","static_attributes":{"server":"localhost:5432"},"value_column":"count"}],"sql":"SELECT count(*) AS count FROM pg_catalog.pg_settings WHERE pending_restart = true;\n"},{"metrics":[{"description":"Number of buffers allocated","metric_name":"ccp_stat_bgwriter_buffers_alloc","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_alloc"},{"data_type":"sum","description":"Number of buffers written by the background writer","metric_name":"ccp_stat_bgwriter_buffers_clean","static_attributes":{"server":"localhost:5432"},"value_column":"buffers_clean"},{"description":"Number of times the background writer stopped a cleaning scan because it had written too many buffers","metric_name":"ccp_stat_bgwriter_maxwritten_clean","static_attributes":{"server":"localhost:5432"},"value_column":"maxwritten_clean"}],"sql":"SELECT\n buffers_clean\n , maxwritten_clean\n , buffers_alloc\nFROM pg_catalog.pg_stat_bgwriter;\n"},{"metrics":[{"description":"Oldest current transaction ID in cluster","metric_name":"ccp_transaction_wraparound_oldest_current_xid","static_attributes":{"server":"localhost:5432"},"value_column":"oldest_current_xid"},{"description":"Percentage towards emergency autovacuum process starting","metric_name":"ccp_transaction_wraparound_percent_towards_emergency_autovac","static_attributes":{"server":"localhost:5432"},"value_column":"percent_towards_emergency_autovac"},{"description":"Percentage towards transaction ID wraparound","metric_name":"ccp_transaction_wraparound_percent_towards_wraparound","static_attributes":{"server":"localhost:5432"},"value_column":"percent_towards_wraparound"}],"sql":"WITH max_age AS (\n SELECT 2000000000 as max_old_xid\n , setting AS autovacuum_freeze_max_age\n FROM pg_catalog.pg_settings\n WHERE name = 'autovacuum_freeze_max_age')\n, per_database_stats AS (\n SELECT datname\n , m.max_old_xid::int\n , m.autovacuum_freeze_max_age::int\n , age(d.datfrozenxid) AS oldest_current_xid\n FROM pg_catalog.pg_database d\n JOIN max_age m ON (true)\n WHERE d.datallowconn)\nSELECT max(oldest_current_xid) AS oldest_current_xid , max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound , max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_emergency_autovac FROM per_database_stats;\n"},{"metrics":[{"description":"Current size in bytes of the WAL directory","metric_name":"ccp_wal_activity_total_size_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"total_size_bytes"}],"sql":"SELECT last_5_min_size_bytes,\n (SELECT COALESCE(sum(size),0) FROM pg_catalog.pg_ls_waldir()) AS total_size_bytes\n FROM (SELECT COALESCE(sum(size),0) AS last_5_min_size_bytes FROM pg_catalog.pg_ls_waldir() WHERE modification \u003e CURRENT_TIMESTAMP - '5 minutes'::interval) x;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Epoch time when stats were reset","metric_name":"ccp_pg_stat_statements_top_max_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"max_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time_ms\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , max_exec_time_ms\n , records\nFROM monitor ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Total time spent in the statement in milliseconds","metric_name":"ccp_pg_stat_statements_top_total_exec_time_ms","static_attributes":{"server":"localhost:5432"},"value_column":"total_exec_time_ms","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time_ms\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , queryid\n , query\n , total_exec_time_ms\n , records\nFROM monitor ORDER BY 5 DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["dbname","query","queryid","role"],"description":"Total amount of WAL generated by the statement in bytes","metric_name":"ccp_pg_stat_statements_top_wal_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"bytes","value_type":"double"},{"attribute_columns":["dbname","query","queryid","role"],"description":"Total number of WAL full page images generated by the statement","metric_name":"ccp_pg_stat_statements_top_wal_fpi","static_attributes":{"server":"localhost:5432"},"value_column":"fpi","value_type":"double"},{"attribute_columns":["dbname","query","queryid","role"],"description":"Total number of WAL records generated by the statement","metric_name":"ccp_pg_stat_statements_top_wal_records","static_attributes":{"server":"localhost:5432"},"value_column":"records","value_type":"double"}],"sql":"WITH monitor AS (\n SELECT\n pg_get_userbyid(s.userid) AS role\n , d.datname AS dbname\n , s.queryid AS queryid\n , btrim(replace(left(s.query, 40), '\\n', '')) AS query\n , s.calls\n , s.total_exec_time AS total_exec_time\n , s.max_exec_time AS max_exec_time\n , s.mean_exec_time AS mean_exec_time\n , s.rows\n , s.wal_records AS records\n , s.wal_fpi AS fpi\n , s.wal_bytes AS bytes\n FROM public.pg_stat_statements s\n JOIN pg_catalog.pg_database d ON d.oid = s.dbid\n) SELECT role\n , dbname\n , query\n , queryid\n , records\n , fpi\n , bytes\nFROM monitor ORDER BY bytes DESC LIMIT 20;\n"},{"metrics":[{"attribute_columns":["repo"],"description":"Seconds since the last completed full or differential backup. Differential is always based off last full.","metric_name":"ccp_backrest_last_diff_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_diff_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full backup","metric_name":"ccp_backrest_last_full_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_full_backup"},{"attribute_columns":["repo"],"description":"Seconds since the last completed full, differential or incremental backup.\nIncremental is always based off last full or differential.\n","metric_name":"ccp_backrest_last_incr_backup_time_since_completion_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_incr_backup"},{"attribute_columns":["backup_type","repo"],"description":"pgBackRest version number when this backup was performed","metric_name":"ccp_backrest_last_info_backrest_repo_version","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backrest_repo_version"},{"attribute_columns":["backup_type","repo"],"description":"An error has been encountered in the backup. Check logs for more information.","metric_name":"ccp_backrest_last_info_backup_error","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"last_info_backup_error"},{"attribute_columns":["backup_type","repo"],"description":"Total runtime in seconds of this backup","metric_name":"ccp_backrest_last_info_backup_runtime_seconds","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"backup_runtime_seconds"},{"attribute_columns":["backup_type","repo"],"description":"Actual size of only this individual backup in the pgbackrest repository","metric_name":"ccp_backrest_last_info_repo_backup_size_bytes","static_attributes":{"server":"localhost:5432","stanza":"db"},"value_column":"repo_backup_size_bytes"},{"attribute_columns":["repo"],"description":"Seconds since the oldest completed full backup","metric_name":"ccp_backrest_oldest_full_backup_time_seconds","static_attributes":{"server":"localhost:5432"},"value_column":"oldest_full_backup"}],"sql":"SELECT * FROM get_pgbackrest_info();\n"},{"metrics":[{"attribute_columns":["dbname"],"description":"Number of times disk blocks were found already in the buffer cache, so that a read was not necessary","metric_name":"ccp_stat_database_blks_hit","static_attributes":{"server":"localhost:5432"},"value_column":"blks_hit"},{"attribute_columns":["dbname"],"description":"Number of disk blocks read in this database","metric_name":"ccp_stat_database_blks_read","static_attributes":{"server":"localhost:5432"},"value_column":"blks_read"},{"attribute_columns":["dbname"],"description":"Number of queries canceled due to conflicts with recovery in this database","metric_name":"ccp_stat_database_conflicts","static_attributes":{"server":"localhost:5432"},"value_column":"conflicts"},{"attribute_columns":["dbname"],"description":"Number of deadlocks detected in this database","metric_name":"ccp_stat_database_deadlocks","static_attributes":{"server":"localhost:5432"},"value_column":"deadlocks"},{"attribute_columns":["dbname"],"description":"Total amount of data written to temporary files by queries in this database","metric_name":"ccp_stat_database_temp_bytes","static_attributes":{"server":"localhost:5432"},"value_column":"temp_bytes"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_temp_files","static_attributes":{"server":"localhost:5432"},"value_column":"temp_files"},{"attribute_columns":["dbname"],"description":"Number of rows deleted by queries in this database","metric_name":"ccp_stat_database_tup_deleted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_deleted"},{"attribute_columns":["dbname"],"description":"Number of rows fetched by queries in this database","metric_name":"ccp_stat_database_tup_fetched","static_attributes":{"server":"localhost:5432"},"value_column":"tup_fetched"},{"attribute_columns":["dbname"],"description":"Number of rows inserted by queries in this database","metric_name":"ccp_stat_database_tup_inserted","static_attributes":{"server":"localhost:5432"},"value_column":"tup_inserted"},{"attribute_columns":["dbname"],"description":"Number of rows returned by queries in this database","metric_name":"ccp_stat_database_tup_returned","static_attributes":{"server":"localhost:5432"},"value_column":"tup_returned"},{"attribute_columns":["dbname"],"description":"Number of rows updated by queries in this database","metric_name":"ccp_stat_database_tup_updated","static_attributes":{"server":"localhost:5432"},"value_column":"tup_updated"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been committed","metric_name":"ccp_stat_database_xact_commit","static_attributes":{"server":"localhost:5432"},"value_column":"xact_commit"},{"attribute_columns":["dbname"],"description":"Number of transactions in this database that have been rolled back","metric_name":"ccp_stat_database_xact_rollback","static_attributes":{"server":"localhost:5432"},"value_column":"xact_rollback"}],"sql":"SELECT s.datname AS dbname , s.xact_commit , s.xact_rollback , s.blks_read , s.blks_hit , s.tup_returned , s.tup_fetched , s.tup_inserted , s.tup_updated , s.tup_deleted , s.conflicts , s.temp_files , s.temp_bytes , s.deadlocks FROM pg_catalog.pg_stat_database s JOIN pg_catalog.pg_database d ON d.datname = s.datname WHERE d.datistemplate = false;\n"}] diff --git a/internal/collector/generated/postgres_logs_transforms.json b/internal/collector/generated/postgres_logs_transforms.json index d3a2dbe47f..066c067399 100644 --- a/internal/collector/generated/postgres_logs_transforms.json +++ b/internal/collector/generated/postgres_logs_transforms.json @@ -1 +1 @@ -[{"conditions":["body[\"format\"] == \"csv\""],"context":"log","statements":["set(cache, ParseCSV(body[\"original\"], body[\"headers\"], delimiter=\",\", mode=\"strict\"))","merge_maps(cache, ExtractPatterns(cache[\"connection_from\"], \"(?:^[[]local[]]:(?\u003cremote_port\u003e.+)|:(?\u003cremote_port\u003e[^:]+))$\"), \"insert\") where Len(cache[\"connection_from\"]) \u003e 0","set(cache[\"remote_host\"], Substring(cache[\"connection_from\"], 0, Len(cache[\"connection_from\"]) - Len(cache[\"remote_port\"]) - 1)) where Len(cache[\"connection_from\"]) \u003e 0 and IsString(cache[\"remote_port\"])","set(cache[\"remote_host\"], cache[\"connection_from\"]) where Len(cache[\"connection_from\"]) \u003e 0 and not IsString(cache[\"remote_host\"])","merge_maps(cache, ExtractPatterns(cache[\"location\"], \"^(?:(?\u003cfunc_name\u003e[^,]+), )?(?\u003cfile_name\u003e[^:]+):(?\u003cfile_line_num\u003e\\\\d+)$\"), \"insert\") where Len(cache[\"location\"]) \u003e 0","set(cache[\"cursor_position\"], Double(cache[\"cursor_position\"])) where IsMatch(cache[\"cursor_position\"], \"^[0-9.]+$\")","set(cache[\"file_line_num\"], Double(cache[\"file_line_num\"])) where IsMatch(cache[\"file_line_num\"], \"^[0-9.]+$\")","set(cache[\"internal_position\"], Double(cache[\"internal_position\"])) where IsMatch(cache[\"internal_position\"], \"^[0-9.]+$\")","set(cache[\"leader_pid\"], Double(cache[\"leader_pid\"])) where IsMatch(cache[\"leader_pid\"], \"^[0-9.]+$\")","set(cache[\"line_num\"], Double(cache[\"line_num\"])) where IsMatch(cache[\"line_num\"], \"^[0-9.]+$\")","set(cache[\"pid\"], Double(cache[\"pid\"])) where IsMatch(cache[\"pid\"], \"^[0-9.]+$\")","set(cache[\"query_id\"], Double(cache[\"query_id\"])) where IsMatch(cache[\"query_id\"], \"^[0-9.]+$\")","set(cache[\"remote_port\"], Double(cache[\"remote_port\"])) where IsMatch(cache[\"remote_port\"], \"^[0-9.]+$\")","set(body[\"parsed\"], cache)"]},{"context":"log","statements":["set(instrumentation_scope.name, \"postgres\")","set(instrumentation_scope.version, resource.attributes[\"db.version\"])","set(cache, body[\"parsed\"]) where body[\"format\"] == \"csv\"","set(cache, ParseJSON(body[\"original\"])) where body[\"format\"] == \"json\"","set(severity_text, cache[\"error_severity\"])","set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == \"DEBUG5\"","set(severity_number, SEVERITY_NUMBER_TRACE2) where severity_text == \"DEBUG4\"","set(severity_number, SEVERITY_NUMBER_TRACE3) where severity_text == \"DEBUG3\"","set(severity_number, SEVERITY_NUMBER_TRACE4) where severity_text == \"DEBUG2\"","set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == \"DEBUG1\"","set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == \"INFO\" or severity_text == \"LOG\"","set(severity_number, SEVERITY_NUMBER_INFO2) where severity_text == \"NOTICE\"","set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == \"WARNING\"","set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == \"ERROR\"","set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == \"FATAL\"","set(severity_number, SEVERITY_NUMBER_FATAL2) where severity_text == \"PANIC\"","set(time, Time(cache[\"timestamp\"], \"%F %T.%L %Z\"))","set(instrumentation_scope.schema_url, \"https://opentelemetry.io/schemas/1.29.0\")","set(resource.attributes[\"db.system\"], \"postgresql\")","set(attributes[\"log.record.original\"], body[\"original\"])","set(body, cache)","set(attributes[\"client.address\"], body[\"remote_host\"]) where IsString(body[\"remote_host\"])","set(attributes[\"client.port\"], Int(body[\"remote_port\"])) where IsDouble(body[\"remote_port\"])","set(attributes[\"code.filepath\"], body[\"file_name\"]) where IsString(body[\"file_name\"])","set(attributes[\"code.function\"], body[\"func_name\"]) where IsString(body[\"func_name\"])","set(attributes[\"code.lineno\"], Int(body[\"file_line_num\"])) where IsDouble(body[\"file_line_num\"])","set(attributes[\"db.namespace\"], body[\"dbname\"]) where IsString(body[\"dbname\"])","set(attributes[\"db.response.status_code\"], body[\"state_code\"]) where IsString(body[\"state_code\"])","set(attributes[\"process.creation.time\"], Concat([ Substring(body[\"session_start\"], 0, 10), \"T\", Substring(body[\"session_start\"], 11, 8), \"Z\"], \"\")) where IsMatch(body[\"session_start\"], \"^[^ ]{10} [^ ]{8} UTC$\")","set(attributes[\"process.pid\"], Int(body[\"pid\"])) where IsDouble(body[\"pid\"])","set(attributes[\"process.title\"], body[\"ps\"]) where IsString(body[\"ps\"])","set(attributes[\"user.name\"], body[\"user\"]) where IsString(body[\"user\"])"]},{"conditions":["Len(body[\"message\"]) \u003e 7 and Substring(body[\"message\"], 0, 7) == \"AUDIT: \""],"context":"log","statements":["set(body[\"pgaudit\"], ParseCSV(Substring(body[\"message\"], 7, Len(body[\"message\"]) - 7), \"audit_type,statement_id,substatement_id,class,command,object_type,object_name,statement,parameter\", delimiter=\",\", mode=\"strict\"))","set(instrumentation_scope.name, \"pgaudit\") where Len(body[\"pgaudit\"]) \u003e 0"]}] +[{"conditions":["log.body[\"format\"] == \"csv\""],"statements":["set(log.cache, ParseCSV(log.body[\"original\"], log.body[\"headers\"], delimiter=\",\", mode=\"strict\"))","merge_maps(log.cache, ExtractPatterns(log.cache[\"connection_from\"], \"(?:^[[]local[]]:(?\u003cremote_port\u003e.+)|:(?\u003cremote_port\u003e[^:]+))$\"), \"insert\") where Len(log.cache[\"connection_from\"]) \u003e 0","set(log.cache[\"remote_host\"], Substring(log.cache[\"connection_from\"], 0, Len(log.cache[\"connection_from\"]) - Len(log.cache[\"remote_port\"]) - 1)) where Len(log.cache[\"connection_from\"]) \u003e 0 and IsString(log.cache[\"remote_port\"])","set(log.cache[\"remote_host\"], log.cache[\"connection_from\"]) where Len(log.cache[\"connection_from\"]) \u003e 0 and not IsString(log.cache[\"remote_host\"])","merge_maps(log.cache, ExtractPatterns(log.cache[\"location\"], \"^(?:(?\u003cfunc_name\u003e[^,]+), )?(?\u003cfile_name\u003e[^:]+):(?\u003cfile_line_num\u003e\\\\d+)$\"), \"insert\") where Len(log.cache[\"location\"]) \u003e 0","set(log.cache[\"cursor_position\"], Double(log.cache[\"cursor_position\"])) where IsMatch(log.cache[\"cursor_position\"], \"^[0-9.]+$\")","set(log.cache[\"file_line_num\"], Double(log.cache[\"file_line_num\"])) where IsMatch(log.cache[\"file_line_num\"], \"^[0-9.]+$\")","set(log.cache[\"internal_position\"], Double(log.cache[\"internal_position\"])) where IsMatch(log.cache[\"internal_position\"], \"^[0-9.]+$\")","set(log.cache[\"leader_pid\"], Double(log.cache[\"leader_pid\"])) where IsMatch(log.cache[\"leader_pid\"], \"^[0-9.]+$\")","set(log.cache[\"line_num\"], Double(log.cache[\"line_num\"])) where IsMatch(log.cache[\"line_num\"], \"^[0-9.]+$\")","set(log.cache[\"pid\"], Double(log.cache[\"pid\"])) where IsMatch(log.cache[\"pid\"], \"^[0-9.]+$\")","set(log.cache[\"query_id\"], Double(log.cache[\"query_id\"])) where IsMatch(log.cache[\"query_id\"], \"^[0-9.]+$\")","set(log.cache[\"remote_port\"], Double(log.cache[\"remote_port\"])) where IsMatch(log.cache[\"remote_port\"], \"^[0-9.]+$\")","set(log.body[\"parsed\"], log.cache)"]},{"statements":["set(instrumentation_scope.name, \"postgres\")","set(instrumentation_scope.version, resource.attributes[\"db.version\"])","set(log.cache, log.body[\"parsed\"]) where log.body[\"format\"] == \"csv\"","set(log.cache, ParseJSON(log.body[\"original\"])) where log.body[\"format\"] == \"json\"","set(log.severity_text, log.cache[\"error_severity\"])","set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == \"DEBUG5\"","set(log.severity_number, SEVERITY_NUMBER_TRACE2) where log.severity_text == \"DEBUG4\"","set(log.severity_number, SEVERITY_NUMBER_TRACE3) where log.severity_text == \"DEBUG3\"","set(log.severity_number, SEVERITY_NUMBER_TRACE4) where log.severity_text == \"DEBUG2\"","set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == \"DEBUG1\"","set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == \"INFO\" or log.severity_text == \"LOG\"","set(log.severity_number, SEVERITY_NUMBER_INFO2) where log.severity_text == \"NOTICE\"","set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == \"WARNING\"","set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == \"ERROR\"","set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == \"FATAL\"","set(log.severity_number, SEVERITY_NUMBER_FATAL2) where log.severity_text == \"PANIC\"","set(log.time, Time(log.cache[\"timestamp\"], \"%F %T.%L %Z\")) where IsString(log.cache[\"timestamp\"])","set(instrumentation_scope.schema_url, \"https://opentelemetry.io/schemas/1.29.0\")","set(resource.attributes[\"db.system\"], \"postgresql\")","set(log.attributes[\"log.record.original\"], log.body[\"original\"])","set(log.body, log.cache)","set(log.attributes[\"client.address\"], log.body[\"remote_host\"]) where IsString(log.body[\"remote_host\"])","set(log.attributes[\"client.port\"], Int(log.body[\"remote_port\"])) where IsDouble(log.body[\"remote_port\"])","set(log.attributes[\"code.filepath\"], log.body[\"file_name\"]) where IsString(log.body[\"file_name\"])","set(log.attributes[\"code.function\"], log.body[\"func_name\"]) where IsString(log.body[\"func_name\"])","set(log.attributes[\"code.lineno\"], Int(log.body[\"file_line_num\"])) where IsDouble(log.body[\"file_line_num\"])","set(log.attributes[\"db.namespace\"], log.body[\"dbname\"]) where IsString(log.body[\"dbname\"])","set(log.attributes[\"db.response.status_code\"], log.body[\"state_code\"]) where IsString(log.body[\"state_code\"])","set(log.attributes[\"process.creation.time\"], Concat([ Substring(log.body[\"session_start\"], 0, 10), \"T\", Substring(log.body[\"session_start\"], 11, 8), \"Z\"], \"\")) where IsMatch(log.body[\"session_start\"], \"^[^ ]{10} [^ ]{8} UTC$\")","set(log.attributes[\"process.pid\"], Int(log.body[\"pid\"])) where IsDouble(log.body[\"pid\"])","set(log.attributes[\"process.title\"], log.body[\"ps\"]) where IsString(log.body[\"ps\"])","set(log.attributes[\"user.name\"], log.body[\"user\"]) where IsString(log.body[\"user\"])"]},{"conditions":["Len(log.body[\"message\"]) \u003e 7 and Substring(log.body[\"message\"], 0, 7) == \"AUDIT: \""],"statements":["set(log.body[\"pgaudit\"], ParseCSV(Substring(log.body[\"message\"], 7, Len(log.body[\"message\"]) - 7), \"audit_type,statement_id,substatement_id,class,command,object_type,object_name,statement,parameter\", delimiter=\",\", mode=\"strict\"))","set(instrumentation_scope.name, \"pgaudit\") where Len(log.body[\"pgaudit\"]) \u003e 0"]}] diff --git a/internal/collector/gte_pg16_metrics.yaml b/internal/collector/gte_pg16_metrics.yaml deleted file mode 100644 index 319aad62dc..0000000000 --- a/internal/collector/gte_pg16_metrics.yaml +++ /dev/null @@ -1,127 +0,0 @@ -# This list of queries configures an OTel SQL Query Receiver to read pgMonitor -# metrics from Postgres. -# -# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries -# https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml - -# NOTE: Some of the columns below can return NULL values, for which sqlqueryreceiver will warn. -# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/sqlqueryreceiver#null-values -# Those columns are idx_scan and idx_tup_fetch and we avoid NULL by using COALESCE. - - sql: > - SELECT - current_database() as dbname - , p.schemaname - , p.relname - , p.seq_scan - , p.seq_tup_read - , COALESCE(p.idx_scan, 0) AS idx_scan - , COALESCE(p.idx_tup_fetch, 0) as idx_tup_fetch - , p.n_tup_ins - , p.n_tup_upd - , p.n_tup_del - , p.n_tup_hot_upd - , p.n_tup_newpage_upd - , p.n_live_tup - , p.n_dead_tup - , p.vacuum_count - , p.autovacuum_count - , p.analyze_count - , p.autoanalyze_count - FROM pg_catalog.pg_stat_user_tables p; - metrics: - - metric_name: ccp_stat_user_tables_analyze_count - data_type: sum - value_column: analyze_count - description: Number of times this table has been manually analyzed - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_autoanalyze_count - data_type: sum - value_column: autoanalyze_count - description: Number of times this table has been analyzed by the autovacuum daemon - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_autovacuum_count - data_type: sum - value_column: autovacuum_count - description: Number of times this table has been vacuumed by the autovacuum daemon - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_idx_scan - data_type: sum - value_column: idx_scan - description: Number of index scans initiated on this table - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_idx_tup_fetch - data_type: sum - value_column: idx_tup_fetch - description: Number of live rows fetched by index scans - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_n_dead_tup - value_column: n_dead_tup - description: Estimated number of dead rows - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_n_live_tup - value_column: n_live_tup - description: Estimated number of live rows - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_n_tup_del - data_type: sum - value_column: n_tup_del - description: Number of rows deleted - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_n_tup_hot_upd - data_type: sum - value_column: n_tup_hot_upd - description: Number of rows HOT updated (i.e., with no separate index update required) - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_n_tup_ins - data_type: sum - value_column: n_tup_ins - description: Number of rows inserted - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_n_tup_upd - data_type: sum - value_column: n_tup_upd - description: Number of rows updated - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_seq_scan - data_type: sum - value_column: seq_scan - description: Number of sequential scans initiated on this table - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_seq_tup_read - data_type: sum - value_column: seq_tup_read - description: Number of live rows fetched by sequential scans - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_vacuum_count - data_type: sum - value_column: vacuum_count - description: Number of times this table has been manually vacuumed (not counting VACUUM FULL) - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" diff --git a/internal/collector/gte_pg17_fast_metrics.yaml b/internal/collector/gte_pg17_fast_metrics.yaml new file mode 100644 index 0000000000..a590b48272 --- /dev/null +++ b/internal/collector/gte_pg17_fast_metrics.yaml @@ -0,0 +1,107 @@ +# This list of queries configures an OTel SQL Query Receiver to read pgMonitor +# metrics from Postgres. +# +# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries +# https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml + + - sql: > + SELECT + s.writes + , s.fsyncs + FROM pg_catalog.pg_stat_io s + WHERE backend_type = 'background writer'; + metrics: + - metric_name: ccp_stat_io_bgwriter_writes + value_column: writes + data_type: sum + description: Number of write operations by background writers + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_io_bgwriter_fsyncs + value_column: fsyncs + data_type: sum + description: Number of fsync calls by background writers + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT + c.num_timed + , c.num_requested + , c.write_time + , c.sync_time + , c.buffers_written + FROM pg_catalog.pg_stat_checkpointer c; + metrics: + - metric_name: ccp_stat_checkpointer_num_timed + value_column: num_timed + description: Number of scheduled checkpoints that have been performed + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_checkpointer_num_requested + value_column: num_requested + description: Number of requested checkpoints that have been performed + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_checkpointer_write_time + value_column: write_time + value_type: double + description: Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_checkpointer_sync_time + value_column: sync_time + description: Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_checkpointer_buffers_written + description: Number of buffers written during checkpoints and restartpoints + value_column: buffers_written + static_attributes: + server: "localhost:5432" + +# NOTE: Some of the columns below can return NULL values, for which sqlqueryreceiver will warn. +# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/sqlqueryreceiver#null-values +# Those columns are retained_bytes, database, conflicting, failover, and synced and we avoid NULL by using COALESCE. + - sql: > + SELECT + s.slot_name + , s.active::int + , COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_insert_lsn() END, s.restart_lsn), 0) AS retained_bytes + , COALESCE(s.database, '') + , s.slot_type + , COALESCE(s.conflicting::int, 0) + , COALESCE(s.failover::int, 0) + , COALESCE(s.synced::int, 0) + FROM pg_catalog.pg_replication_slots s; + metrics: + - metric_name: ccp_replication_slots_active + value_column: active + description: Active state of slot. 1 = true. 0 = false. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_retained_bytes + value_column: retained_bytes + description: The amount of WAL (in bytes) being retained for this slot + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_conflicting + value_column: conflicting + description: True if this logical slot conflicted with recovery (and so is now invalidated). When this column is true, check invalidation_reason column for the conflict reason. Always NULL for physical slots. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_failover + value_column: failover + description: True if this is a logical slot enabled to be synced to the standbys so that logical replication can be resumed from the new primary after failover. Always false for physical slots. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_synced + value_column: synced + description: True if this is a logical slot that was synced from a primary server. On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on the primary is default false for all slots but may (if leftover from a promoted standby) also be true. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" diff --git a/internal/collector/gte_pg17_metrics.yaml b/internal/collector/gte_pg17_metrics.yaml deleted file mode 100644 index de8f6786f5..0000000000 --- a/internal/collector/gte_pg17_metrics.yaml +++ /dev/null @@ -1,72 +0,0 @@ -# This list of queries configures an OTel SQL Query Receiver to read pgMonitor -# metrics from Postgres. -# -# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries -# https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml - - - sql: > - SELECT c.buffers_written - FROM pg_catalog.pg_stat_checkpointer c; - metrics: - - metric_name: ccp_stat_bgwriter_buffers_checkpoint - value_column: buffers_written - data_type: sum - description: Number of buffers written during checkpoints and restartpoints - static_attributes: - server: "localhost:5432" - - - sql: > - SELECT - s.writes - , s.fsyncs - FROM pg_catalog.pg_stat_io s - WHERE backend_type = 'background writer'; - metrics: - - metric_name: ccp_stat_bgwriter_buffers_backend - value_column: writes - data_type: sum - description: Number of write operations, each of the size specified in op_bytes. - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_bgwriter_buffers_backend_fsync - value_column: fsyncs - data_type: sum - description: Number of fsync calls. These are only tracked in context normal. - static_attributes: - server: "localhost:5432" - - - sql: > - SELECT - c.num_timed - , c.num_requested - , c.write_time - , c.sync_time - , c.buffers_written - FROM pg_catalog.pg_stat_checkpointer c; - metrics: - - metric_name: ccp_stat_bgwriter_checkpoint_sync_time - value_column: sync_time - description: Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_bgwriter_checkpoint_write_time - value_column: write_time - value_type: double - description: Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_bgwriter_checkpoints_req - value_column: num_requested - description: Number of requested checkpoints that have been performed - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_bgwriter_checkpoints_timed - value_column: num_timed - description: Number of scheduled checkpoints that have been performed - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_checkpointer_buffers_written - description: Number of buffers written during checkpoints and restartpoints - value_column: buffers_written - static_attributes: - server: "localhost:5432" diff --git a/internal/collector/helpers_test.go b/internal/collector/helpers_test.go index 7f1e277e9b..1f174ebcda 100644 --- a/internal/collector/helpers_test.go +++ b/internal/collector/helpers_test.go @@ -23,6 +23,9 @@ func testInstrumentationSpec() *v1beta1.InstrumentationSpec { Logs: &v1beta1.InstrumentationLogsSpec{ Exporters: []string{"googlecloud"}, }, + Metrics: &v1beta1.InstrumentationMetricsSpec{ + Exporters: []string{"googlecloud"}, + }, } return spec.DeepCopy() diff --git a/internal/collector/instance.go b/internal/collector/instance.go index 9c83f11f3a..9cb1708042 100644 --- a/internal/collector/instance.go +++ b/internal/collector/instance.go @@ -50,12 +50,15 @@ func AddToPod( includeLogrotate bool, thisPodServesMetrics bool, ) { - if spec == nil || - !(feature.Enabled(ctx, feature.OpenTelemetryLogs) || - feature.Enabled(ctx, feature.OpenTelemetryMetrics)) { + if !OpenTelemetryLogsOrMetricsEnabled(ctx, spec) { return } + // We only want to include log rotation if this type of pod requires it + // (indicate by the includeLogrotate boolean) AND if logging is enabled + // for this PostgresCluster/PGAdmin + includeLogrotate = includeLogrotate && OpenTelemetryLogsEnabled(ctx, spec) + // Create volume and volume mount for otel collector config configVolumeMount := corev1.VolumeMount{ Name: "collector-config", @@ -113,6 +116,11 @@ func AddToPod( VolumeMounts: append(volumeMounts, configVolumeMount), } + // Add any user specified environment variables to the collector container + if spec.Config != nil && spec.Config.EnvironmentVariables != nil { + container.Env = append(container.Env, spec.Config.EnvironmentVariables...) + } + // If metrics feature is enabled and this Pod serves metrics, add the // Prometheus port to this container if feature.Enabled(ctx, feature.OpenTelemetryMetrics) && thisPodServesMetrics { @@ -177,8 +185,7 @@ func startCommand(logDirectories []string, includeLogrotate bool) []string { if len(logDirectories) != 0 { for _, logDir := range logDirectories { mkdirScript = mkdirScript + ` -` + shell.MakeDirectories(0o775, logDir, - path.Join(logDir, "receiver")) +` + shell.MakeDirectories(logDir, path.Join(logDir, "receiver")) } } diff --git a/internal/collector/lt_pg16_fast_metrics.yaml b/internal/collector/lt_pg16_fast_metrics.yaml new file mode 100644 index 0000000000..8144abc144 --- /dev/null +++ b/internal/collector/lt_pg16_fast_metrics.yaml @@ -0,0 +1,51 @@ +# This list of queries configures an OTel SQL Query Receiver to read pgMonitor +# metrics from Postgres. +# +# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries +# https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml + +# NOTE: Some of the columns below can return NULL values, for which sqlqueryreceiver will warn. +# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/sqlqueryreceiver#null-values +# Those columns are retained_bytes and database and we avoid NULL by using COALESCE. + - sql: > + SELECT + s.slot_name + , s.active::int + , COALESCE(pg_wal_lsn_diff(CASE WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn() ELSE pg_current_wal_insert_lsn() END, s.restart_lsn), 0) AS retained_bytes + , COALESCE(s.database, '') + , s.slot_type + , 0 AS conflicting + , 0 AS failover + , 0 AS synced + FROM pg_catalog.pg_replication_slots s; + metrics: + - metric_name: ccp_replication_slots_active + value_column: active + description: Active state of slot. 1 = true. 0 = false. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_retained_bytes + value_column: retained_bytes + description: The amount of WAL (in bytes) being retained for this slot + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_conflicting + value_column: conflicting + description: True if this logical slot conflicted with recovery (and so is now invalidated). When this column is true, check invalidation_reason column for the conflict reason. Always NULL for physical slots. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_failover + value_column: failover + description: True if this is a logical slot enabled to be synced to the standbys so that logical replication can be resumed from the new primary after failover. Always false for physical slots. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_replication_slots_synced + value_column: synced + description: True if this is a logical slot that was synced from a primary server. On a hot standby, the slots with the synced column marked as true can neither be used for logical decoding nor dropped manually. The value of this column has no meaning on the primary server; the column value on the primary is default false for all slots but may (if leftover from a promoted standby) also be true. + attribute_columns: ["database", "slot_name", "slot_type"] + static_attributes: + server: "localhost:5432" diff --git a/internal/collector/lt_pg16_metrics.yaml b/internal/collector/lt_pg16_metrics.yaml deleted file mode 100644 index ca9fe8a0c8..0000000000 --- a/internal/collector/lt_pg16_metrics.yaml +++ /dev/null @@ -1,135 +0,0 @@ -# This list of queries configures an OTel SQL Query Receiver to read pgMonitor -# metrics from Postgres. -# -# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries -# https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml - -# NOTE: Some of the columns below can return NULL values, for which sqlqueryreceiver will warn. -# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/sqlqueryreceiver#null-values -# Those columns are idx_scan and idx_tup_fetch and we avoid NULL by using COALESCE. - - sql: > - SELECT - current_database() as dbname - , p.schemaname - , p.relname - , p.seq_scan - , p.seq_tup_read - , COALESCE(p.idx_scan, 0) AS idx_scan - , COALESCE(p.idx_tup_fetch, 0) as idx_tup_fetch - , p.n_tup_ins - , p.n_tup_upd - , p.n_tup_del - , p.n_tup_hot_upd - , 0::bigint AS n_tup_newpage_upd - , p.n_live_tup - , p.n_dead_tup - , p.vacuum_count - , p.autovacuum_count - , p.analyze_count - , p.autoanalyze_count - FROM pg_catalog.pg_stat_user_tables p; - metrics: - - metric_name: ccp_stat_user_tables_analyze_count - data_type: sum - value_column: analyze_count - description: Number of times this table has been manually analyzed - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_autoanalyze_count - data_type: sum - value_column: autoanalyze_count - description: Number of times this table has been analyzed by the autovacuum daemon - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_autovacuum_count - data_type: sum - value_column: autovacuum_count - description: Number of times this table has been vacuumed by the autovacuum daemon - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_idx_scan - data_type: sum - value_column: idx_scan - description: Number of index scans initiated on this table - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_idx_tup_fetch - data_type: sum - value_column: idx_tup_fetch - description: Number of live rows fetched by index scans - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_n_dead_tup - value_column: n_dead_tup - description: Estimated number of dead rows - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - # FIXME: This metric returns 0, when the query returns 1 for relname="pgbackrest_info",schemaname="pg_temp_33". - # The issue doesn't occur with gte_pg16. - - metric_name: ccp_stat_user_tables_n_live_tup - value_column: n_live_tup - description: Estimated number of live rows - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_n_tup_del - data_type: sum - value_column: n_tup_del - description: Number of rows deleted - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_n_tup_hot_upd - data_type: sum - value_column: n_tup_hot_upd - description: Number of rows HOT updated (i.e., with no separate index update required) - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - # FIXME: This metric returns 0, when the query returns 1 for relname="pgbackrest_info",schemaname="pg_temp_33". - # The issue doesn't occur with gte_pg16. - - metric_name: ccp_stat_user_tables_n_tup_ins - data_type: sum - value_column: n_tup_ins - description: Number of rows inserted - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_n_tup_upd - data_type: sum - value_column: n_tup_upd - description: Number of rows updated - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - # FIXME: This metric returns 0, when the query returns 1 for relname="pgbackrest_info",schemaname="pg_temp_33". - # The issue doesn't occur with gte_pg16. - - metric_name: ccp_stat_user_tables_seq_scan - data_type: sum - value_column: seq_scan - description: Number of sequential scans initiated on this table - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - # FIXME: This metric returns 0, when the query returns 1 for relname="pgbackrest_info",schemaname="pg_temp_33". - # The issue doesn't occur with gte_pg16. - - metric_name: ccp_stat_user_tables_seq_tup_read - data_type: sum - value_column: seq_tup_read - description: Number of live rows fetched by sequential scans - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_user_tables_vacuum_count - data_type: sum - value_column: vacuum_count - description: Number of times this table has been manually vacuumed (not counting VACUUM FULL) - attribute_columns: ["dbname", "relname", "schemaname"] - static_attributes: - server: "localhost:5432" diff --git a/internal/collector/lt_pg17_metrics.yaml b/internal/collector/lt_pg17_fast_metrics.yaml similarity index 70% rename from internal/collector/lt_pg17_metrics.yaml rename to internal/collector/lt_pg17_fast_metrics.yaml index 330ff7d798..576ea8e4a6 100644 --- a/internal/collector/lt_pg17_metrics.yaml +++ b/internal/collector/lt_pg17_fast_metrics.yaml @@ -4,33 +4,22 @@ # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries # https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml - - sql: > - SELECT c.buffers_checkpoint AS buffers_written - FROM pg_catalog.pg_stat_bgwriter c; - metrics: - - metric_name: ccp_stat_bgwriter_buffers_checkpoint - value_column: buffers_written - data_type: sum - description: Number of buffers written during checkpoints and restartpoints - static_attributes: - server: "localhost:5432" - - sql: > SELECT s.buffers_backend AS writes , s.buffers_backend_fsync AS fsyncs FROM pg_catalog.pg_stat_bgwriter s; metrics: - - metric_name: ccp_stat_bgwriter_buffers_backend + - metric_name: ccp_stat_io_bgwriter_writes value_column: writes data_type: sum - description: Number of write operations, each of the size specified in op_bytes. + description: Number of write operations by background writers static_attributes: server: "localhost:5432" - - metric_name: ccp_stat_bgwriter_buffers_backend_fsync + - metric_name: ccp_stat_io_bgwriter_fsyncs value_column: fsyncs data_type: sum - description: Number of fsync calls. These are only tracked in context normal. + description: Number of fsync calls by background writers static_attributes: server: "localhost:5432" @@ -43,23 +32,23 @@ , c.buffers_checkpoint AS buffers_written FROM pg_catalog.pg_stat_bgwriter c; metrics: - - metric_name: ccp_stat_bgwriter_checkpoints_timed + - metric_name: ccp_stat_checkpointer_num_timed value_column: num_timed description: Number of scheduled checkpoints that have been performed static_attributes: server: "localhost:5432" - - metric_name: ccp_stat_bgwriter_checkpoints_req + - metric_name: ccp_stat_checkpointer_num_requested value_column: num_requested description: Number of requested checkpoints that have been performed static_attributes: server: "localhost:5432" - - metric_name: ccp_stat_bgwriter_checkpoint_write_time + - metric_name: ccp_stat_checkpointer_write_time value_column: write_time value_type: double description: Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds static_attributes: server: "localhost:5432" - - metric_name: ccp_stat_bgwriter_checkpoint_sync_time + - metric_name: ccp_stat_checkpointer_sync_time value_column: sync_time description: Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds static_attributes: diff --git a/internal/collector/naming.go b/internal/collector/naming.go index c8db6d6f21..c12ed89ebc 100644 --- a/internal/collector/naming.go +++ b/internal/collector/naming.go @@ -9,12 +9,13 @@ const DebugExporter = "debug" const LogsBatchProcessor = "batch/logs" const OneSecondBatchProcessor = "batch/1s" const SubSecondBatchProcessor = "batch/200ms" -const Prometheus = "prometheus" +const Prometheus = "prometheus/cpk-monitoring" const PrometheusPort = 9187 const PGBouncerMetrics = "metrics/pgbouncer" const PostgresMetrics = "metrics/postgres" const PatroniMetrics = "metrics/patroni" const ResourceDetectionProcessor = "resourcedetection" +const MonitoringUser = "ccp_monitoring" const SqlQuery = "sqlquery" diff --git a/internal/collector/patroni.go b/internal/collector/patroni.go index 532d103db7..e3b56718d8 100644 --- a/internal/collector/patroni.go +++ b/internal/collector/patroni.go @@ -9,7 +9,6 @@ import ( "slices" "strconv" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -23,7 +22,7 @@ func EnablePatroniLogging(ctx context.Context, spec = inCluster.Spec.Instrumentation.Logs } - if feature.Enabled(ctx, feature.OpenTelemetryLogs) { + if OpenTelemetryLogsEnabled(ctx, inCluster) { directory := naming.PatroniPGDataLogPath // Keep track of what log records and files have been processed. @@ -40,7 +39,14 @@ func EnablePatroniLogging(ctx context.Context, // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/filelogreceiver#readme outConfig.Receivers["filelog/patroni_jsonlog"] = map[string]any{ // Read the JSON files and keep track of what has been processed. - "include": []string{directory + "/*.log"}, + // When patroni rotates its log files, it renames the old .log file + // to .log.1. We want the collector to ingest logs from both files + // as it is possible that patroni will continue to write a log + // record or two to the old file while rotation is occurring. The + // collector knows not to create duplicate logs. + "include": []string{ + directory + "/*.log", directory + "/*.log.1", + }, "storage": "file_storage/patroni_logs", "operators": []map[string]any{ @@ -59,22 +65,22 @@ func EnablePatroniLogging(ctx context.Context, {"action": "insert", "key": "k8s.container.name", "value": naming.ContainerDatabase}, {"action": "insert", "key": "k8s.namespace.name", "value": "${env:K8S_POD_NAMESPACE}"}, {"action": "insert", "key": "k8s.pod.name", "value": "${env:K8S_POD_NAME}"}, + {"action": "insert", "key": "process.executable.name", "value": "patroni"}, }, } // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/processor/transformprocessor#readme outConfig.Processors["transform/patroni_logs"] = map[string]any{ "log_statements": []map[string]any{{ - "context": "log", "statements": []string{ `set(instrumentation_scope.name, "patroni")`, // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/ottlfuncs#parsejson - `set(cache, ParseJSON(body["original"]))`, + `set(log.cache, ParseJSON(log.body["original"]))`, // The log severity is in the "levelname" field. // https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitytext - `set(severity_text, cache["levelname"])`, + `set(log.severity_text, log.cache["levelname"])`, // Map Patroni (python) "logging levels" to OpenTelemetry severity levels. // @@ -82,11 +88,11 @@ func EnablePatroniLogging(ctx context.Context, // https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitynumber // https://github.com/open-telemetry/opentelemetry-python/blob/v1.29.0/opentelemetry-api/src/opentelemetry/_logs/severity/__init__.py // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#enums - `set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG"`, - `set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO"`, - `set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING"`, - `set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR"`, - `set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "CRITICAL"`, + `set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == "DEBUG"`, + `set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == "INFO"`, + `set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == "WARNING"`, + `set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == "ERROR"`, + `set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == "CRITICAL"`, // Parse the "asctime" field into the record timestamp. // The format is neither RFC 3339 nor ISO 8601: @@ -96,14 +102,14 @@ func EnablePatroniLogging(ctx context.Context, // // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/stanza/docs/types/timestamp.md // https://docs.python.org/3.6/library/logging.html#logging.LogRecord - `set(time, Time(cache["asctime"], "%F %T,%L"))`, + `set(log.time, Time(log.cache["asctime"], "%F %T,%L")) where IsString(log.cache["asctime"])`, // Keep the unparsed log record in a standard attribute, and replace // the log record body with the message field. // // https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/general/logs.md - `set(attributes["log.record.original"], body["original"])`, - `set(body, cache["message"])`, + `set(log.attributes["log.record.original"], log.body["original"])`, + `set(log.body, log.cache["message"])`, }, }}, } @@ -134,7 +140,7 @@ func EnablePatroniMetrics(ctx context.Context, inCluster *v1beta1.PostgresCluster, outConfig *Config, ) { - if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if OpenTelemetryMetricsEnabled(ctx, inCluster) { // Add Prometheus exporter outConfig.Exporters[Prometheus] = map[string]any{ "endpoint": "0.0.0.0:" + strconv.Itoa(PrometheusPort), @@ -163,6 +169,14 @@ func EnablePatroniMetrics(ctx context.Context, }, } + // If there are exporters to be added to the metrics pipelines defined + // in the spec, add them to the pipeline. + exporters := []ComponentID{Prometheus} + if inCluster.Spec.Instrumentation.Metrics != nil && + inCluster.Spec.Instrumentation.Metrics.Exporters != nil { + exporters = append(exporters, inCluster.Spec.Instrumentation.Metrics.Exporters...) + } + // Add Metrics Pipeline outConfig.Pipelines[PatroniMetrics] = Pipeline{ Receivers: []ComponentID{Prometheus}, @@ -170,7 +184,7 @@ func EnablePatroniMetrics(ctx context.Context, SubSecondBatchProcessor, CompactingProcessor, }, - Exporters: []ComponentID{Prometheus}, + Exporters: exporters, } } } diff --git a/internal/collector/patroni_test.go b/internal/collector/patroni_test.go index e2d3a84e58..3a37b14697 100644 --- a/internal/collector/patroni_test.go +++ b/internal/collector/patroni_test.go @@ -11,11 +11,12 @@ import ( "gotest.tools/v3/assert" "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) func TestEnablePatroniLogging(t *testing.T) { - t.Run("NilInstrumentationSpec", func(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { gate := feature.NewGate() assert.NilError(t, gate.SetFromMap(map[string]bool{ feature.OpenTelemetryLogs: true, @@ -23,8 +24,12 @@ func TestEnablePatroniLogging(t *testing.T) { ctx := feature.NewContext(context.Background(), gate) config := NewConfig(nil) + cluster := new(v1beta1.PostgresCluster) + require.UnmarshalInto(t, &cluster.Spec, `{ + instrumentation: {} + }`) - EnablePatroniLogging(ctx, new(v1beta1.PostgresCluster), config) + EnablePatroniLogging(ctx, cluster, config) result, err := config.ToYAML() assert.NilError(t, err) @@ -58,29 +63,37 @@ processors: - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: patroni resourcedetection: detectors: [] override: false timeout: 30s transform/patroni_logs: log_statements: - - context: log - statements: + - statements: - set(instrumentation_scope.name, "patroni") - - set(cache, ParseJSON(body["original"])) - - set(severity_text, cache["levelname"]) - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "CRITICAL" - - set(time, Time(cache["asctime"], "%F %T,%L")) - - set(attributes["log.record.original"], body["original"]) - - set(body, cache["message"]) + - set(log.cache, ParseJSON(log.body["original"])) + - set(log.severity_text, log.cache["levelname"]) + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "INFO" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == + "CRITICAL" + - set(log.time, Time(log.cache["asctime"], "%F %T,%L")) where IsString(log.cache["asctime"]) + - set(log.attributes["log.record.original"], log.body["original"]) + - set(log.body, log.cache["message"]) receivers: filelog/patroni_jsonlog: include: - /pgdata/patroni/log/*.log + - /pgdata/patroni/log/*.log.1 operators: - from: body to: body.original @@ -153,29 +166,37 @@ processors: - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: patroni resourcedetection: detectors: [] override: false timeout: 30s transform/patroni_logs: log_statements: - - context: log - statements: + - statements: - set(instrumentation_scope.name, "patroni") - - set(cache, ParseJSON(body["original"])) - - set(severity_text, cache["levelname"]) - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "CRITICAL" - - set(time, Time(cache["asctime"], "%F %T,%L")) - - set(attributes["log.record.original"], body["original"]) - - set(body, cache["message"]) + - set(log.cache, ParseJSON(log.body["original"])) + - set(log.severity_text, log.cache["levelname"]) + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "INFO" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == + "CRITICAL" + - set(log.time, Time(log.cache["asctime"], "%F %T,%L")) where IsString(log.cache["asctime"]) + - set(log.attributes["log.record.original"], log.body["original"]) + - set(log.body, log.cache["message"]) receivers: filelog/patroni_jsonlog: include: - /pgdata/patroni/log/*.log + - /pgdata/patroni/log/*.log.1 operators: - from: body to: body.original @@ -199,3 +220,137 @@ service: `) }) } + +func TestEnablePatroniMetrics(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + config := NewConfig(nil) + cluster := new(v1beta1.PostgresCluster) + require.UnmarshalInto(t, &cluster.Spec, `{ + instrumentation: {} + }`) + + EnablePatroniMetrics(ctx, cluster, config) + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + prometheus/cpk-monitoring: + endpoint: 0.0.0.0:9187 +extensions: {} +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + batch/logs: + send_batch_size: 8192 + timeout: 200ms + groupbyattrs/compact: {} + resourcedetection: + detectors: [] + override: false + timeout: 30s +receivers: + prometheus/cpk-monitoring: + config: + scrape_configs: + - job_name: patroni + scheme: https + scrape_interval: 10s + static_configs: + - targets: + - 0.0.0.0:8008 + tls_config: + insecure_skip_verify: true +service: + extensions: [] + pipelines: + metrics/patroni: + exporters: + - prometheus/cpk-monitoring + processors: + - batch/200ms + - groupbyattrs/compact + receivers: + - prometheus/cpk-monitoring +`) + }) + + t.Run("InstrumentationSpecDefined", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + cluster := new(v1beta1.PostgresCluster) + cluster.Spec.Instrumentation = testInstrumentationSpec() + config := NewConfig(cluster.Spec.Instrumentation) + + EnablePatroniMetrics(ctx, cluster, config) + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + googlecloud: + log: + default_log_name: opentelemetry.io/collector-exported-log + project: google-project-name + prometheus/cpk-monitoring: + endpoint: 0.0.0.0:9187 +extensions: {} +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + batch/logs: + send_batch_size: 8192 + timeout: 200ms + groupbyattrs/compact: {} + resourcedetection: + detectors: [] + override: false + timeout: 30s +receivers: + prometheus/cpk-monitoring: + config: + scrape_configs: + - job_name: patroni + scheme: https + scrape_interval: 10s + static_configs: + - targets: + - 0.0.0.0:8008 + tls_config: + insecure_skip_verify: true +service: + extensions: [] + pipelines: + metrics/patroni: + exporters: + - prometheus/cpk-monitoring + - googlecloud + processors: + - batch/200ms + - groupbyattrs/compact + receivers: + - prometheus/cpk-monitoring +`) + + }) +} diff --git a/internal/collector/pgadmin.go b/internal/collector/pgadmin.go index e22ed621f0..244fc57546 100644 --- a/internal/collector/pgadmin.go +++ b/internal/collector/pgadmin.go @@ -10,7 +10,6 @@ import ( corev1 "k8s.io/api/core/v1" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -18,9 +17,10 @@ import ( func EnablePgAdminLogging(ctx context.Context, spec *v1beta1.InstrumentationSpec, configmap *corev1.ConfigMap, ) error { - if !feature.Enabled(ctx, feature.OpenTelemetryLogs) { + if !OpenTelemetryLogsEnabled(ctx, spec) { return nil } + otelConfig := NewConfig(spec) otelConfig.Extensions["file_storage/pgadmin_data_logs"] = map[string]any{ @@ -29,6 +29,11 @@ func EnablePgAdminLogging(ctx context.Context, spec *v1beta1.InstrumentationSpec "fsync": true, } + // PgAdmin/gunicorn logs are rotated by python -- python tries to emit a log + // and if the file needs to rotate, it rotates first and then emits the log. + // The collector therefore only needs to watch the single active log for + // each component. + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/filelogreceiver#readme otelConfig.Receivers["filelog/pgadmin"] = map[string]any{ "include": []string{"/var/lib/pgadmin/logs/pgadmin.log"}, "storage": "file_storage/pgadmin_data_logs", @@ -49,40 +54,40 @@ func EnablePgAdminLogging(ctx context.Context, spec *v1beta1.InstrumentationSpec {"action": "insert", "key": "k8s.container.name", "value": naming.ContainerPGAdmin}, {"action": "insert", "key": "k8s.namespace.name", "value": "${env:K8S_POD_NAMESPACE}"}, {"action": "insert", "key": "k8s.pod.name", "value": "${env:K8S_POD_NAME}"}, + {"action": "insert", "key": "process.executable.name", "value": "pgadmin"}, }, } otelConfig.Processors["transform/pgadmin_log"] = map[string]any{ "log_statements": []map[string]any{ { - "context": "log", "statements": []string{ // Keep the unparsed log record in a standard attribute, and replace // the log record body with the message field. // // https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/general/logs.md - `set(attributes["log.record.original"], body)`, - `set(cache, ParseJSON(body))`, - `merge_maps(attributes, ExtractPatterns(cache["message"], "(?P[A-Z]{3}.*?[\\d]{3})"), "insert")`, - `set(body, cache["message"])`, + `set(log.attributes["log.record.original"], log.body)`, + `set(log.cache, ParseJSON(log.body))`, + `merge_maps(log.attributes, ExtractPatterns(log.cache["message"], "(?P[A-Z]{3}.*?[\\d]{3})"), "insert")`, + `set(log.body, log.cache["message"])`, // Set instrumentation scope to the "name" from each log record. - `set(instrumentation_scope.name, cache["name"])`, + `set(instrumentation_scope.name, log.cache["name"])`, // https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitytext - `set(severity_text, cache["level"])`, - `set(time_unix_nano, Int(cache["time"]*1000000000))`, + `set(log.severity_text, log.cache["level"])`, + `set(log.time_unix_nano, Int(log.cache["time"]*1000000000))`, // Map pgAdmin "logging levels" to OpenTelemetry severity levels. // // https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitynumber // https://opentelemetry.io/docs/specs/otel/logs/data-model-appendix/#appendix-b-severitynumber-example-mappings // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#enums - `set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG"`, - `set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO"`, - `set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING"`, - `set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR"`, - `set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "CRITICAL"`, + `set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == "DEBUG"`, + `set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == "INFO"`, + `set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == "WARNING"`, + `set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == "ERROR"`, + `set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == "CRITICAL"`, }, }, }, @@ -125,5 +130,6 @@ func EnablePgAdminLogging(ctx context.Context, spec *v1beta1.InstrumentationSpec if err == nil { configmap.Data["collector.yaml"] = otelYAML } + return err } diff --git a/internal/collector/pgadmin_test.go b/internal/collector/pgadmin_test.go index c4d5acfab6..c6c86b4b37 100644 --- a/internal/collector/pgadmin_test.go +++ b/internal/collector/pgadmin_test.go @@ -12,7 +12,6 @@ import ( corev1 "k8s.io/api/core/v1" "github.com/crunchydata/postgres-operator/internal/collector" - pgadmin "github.com/crunchydata/postgres-operator/internal/controller/standalone_pgadmin" "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/testing/cmp" @@ -21,7 +20,7 @@ import ( ) func TestEnablePgAdminLogging(t *testing.T) { - t.Run("NilInstrumentationSpec", func(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { gate := feature.NewGate() assert.NilError(t, gate.SetFromMap(map[string]bool{ feature.OpenTelemetryLogs: true, @@ -31,7 +30,9 @@ func TestEnablePgAdminLogging(t *testing.T) { configmap := new(corev1.ConfigMap) initialize.Map(&configmap.Data) - err := collector.EnablePgAdminLogging(ctx, nil, configmap) + var instrumentation *v1beta1.InstrumentationSpec + require.UnmarshalInto(t, &instrumentation, `{}`) + err := collector.EnablePgAdminLogging(ctx, instrumentation, configmap) assert.NilError(t, err) assert.Assert(t, cmp.MarshalMatches(configmap.Data, ` @@ -44,7 +45,7 @@ collector.yaml: | extensions: file_storage/pgadmin_data_logs: create_directory: false - directory: `+pgadmin.LogDirectoryAbsolutePath+`/receiver + directory: /var/lib/pgadmin/logs/receiver fsync: true processors: batch/1s: @@ -66,35 +67,42 @@ collector.yaml: | - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: pgadmin resourcedetection: detectors: [] override: false timeout: 30s transform/pgadmin_log: log_statements: - - context: log - statements: - - set(attributes["log.record.original"], body) - - set(cache, ParseJSON(body)) - - merge_maps(attributes, ExtractPatterns(cache["message"], "(?P[A-Z]{3}.*?[\\d]{3})"), + - statements: + - set(log.attributes["log.record.original"], log.body) + - set(log.cache, ParseJSON(log.body)) + - merge_maps(log.attributes, ExtractPatterns(log.cache["message"], "(?P[A-Z]{3}.*?[\\d]{3})"), "insert") - - set(body, cache["message"]) - - set(instrumentation_scope.name, cache["name"]) - - set(severity_text, cache["level"]) - - set(time_unix_nano, Int(cache["time"]*1000000000)) - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "CRITICAL" + - set(log.body, log.cache["message"]) + - set(instrumentation_scope.name, log.cache["name"]) + - set(log.severity_text, log.cache["level"]) + - set(log.time_unix_nano, Int(log.cache["time"]*1000000000)) + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "INFO" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == + "CRITICAL" receivers: filelog/gunicorn: include: - - `+pgadmin.GunicornLogFileAbsolutePath+` + - /var/lib/pgadmin/logs/gunicorn.log storage: file_storage/pgadmin_data_logs filelog/pgadmin: include: - - `+pgadmin.LogFileAbsolutePath+` + - /var/lib/pgadmin/logs/pgadmin.log storage: file_storage/pgadmin_data_logs service: extensions: @@ -165,7 +173,7 @@ collector.yaml: | extensions: file_storage/pgadmin_data_logs: create_directory: false - directory: `+pgadmin.LogDirectoryAbsolutePath+`/receiver + directory: /var/lib/pgadmin/logs/receiver fsync: true processors: batch/1s: @@ -187,35 +195,42 @@ collector.yaml: | - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: pgadmin resourcedetection: detectors: [] override: false timeout: 30s transform/pgadmin_log: log_statements: - - context: log - statements: - - set(attributes["log.record.original"], body) - - set(cache, ParseJSON(body)) - - merge_maps(attributes, ExtractPatterns(cache["message"], "(?P[A-Z]{3}.*?[\\d]{3})"), + - statements: + - set(log.attributes["log.record.original"], log.body) + - set(log.cache, ParseJSON(log.body)) + - merge_maps(log.attributes, ExtractPatterns(log.cache["message"], "(?P[A-Z]{3}.*?[\\d]{3})"), "insert") - - set(body, cache["message"]) - - set(instrumentation_scope.name, cache["name"]) - - set(severity_text, cache["level"]) - - set(time_unix_nano, Int(cache["time"]*1000000000)) - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "CRITICAL" + - set(log.body, log.cache["message"]) + - set(instrumentation_scope.name, log.cache["name"]) + - set(log.severity_text, log.cache["level"]) + - set(log.time_unix_nano, Int(log.cache["time"]*1000000000)) + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "INFO" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == + "CRITICAL" receivers: filelog/gunicorn: include: - - `+pgadmin.GunicornLogFileAbsolutePath+` + - /var/lib/pgadmin/logs/gunicorn.log storage: file_storage/pgadmin_data_logs filelog/pgadmin: include: - - `+pgadmin.LogFileAbsolutePath+` + - /var/lib/pgadmin/logs/pgadmin.log storage: file_storage/pgadmin_data_logs service: extensions: diff --git a/internal/collector/pgbackrest.go b/internal/collector/pgbackrest.go index 569748ed9c..75cc9a55c1 100644 --- a/internal/collector/pgbackrest.go +++ b/internal/collector/pgbackrest.go @@ -11,7 +11,6 @@ import ( "fmt" "slices" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -29,7 +28,7 @@ func NewConfigForPgBackrestRepoHostPod( ) *Config { config := NewConfig(spec) - if feature.Enabled(ctx, feature.OpenTelemetryLogs) { + if OpenTelemetryLogsEnabled(ctx, spec) { var directory string for _, repo := range repos { @@ -56,8 +55,13 @@ func NewConfigForPgBackrestRepoHostPod( // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/filelogreceiver#readme config.Receivers["filelog/pgbackrest_log"] = map[string]any{ // Read the files and keep track of what has been processed. + // We use logrotate to rotate the pgbackrest logs which renames the + // old .log file to .log.1. We want the collector to ingest logs from + // both files as it is possible that pgbackrest will continue to write + // a log record or two to the old file while rotation is occurring. + // The collector knows not to create duplicate logs. "include": []string{ - directory + "/*.log", + directory + "/*.log", directory + "/*.log.1", }, "storage": "file_storage/pgbackrest_logs", // pgBackRest prints logs with a log prefix, which includes a timestamp @@ -83,6 +87,7 @@ func NewConfigForPgBackrestRepoHostPod( {"action": "insert", "key": "k8s.container.name", "value": naming.PGBackRestRepoContainerName}, {"action": "insert", "key": "k8s.namespace.name", "value": "${env:K8S_POD_NAMESPACE}"}, {"action": "insert", "key": "k8s.pod.name", "value": "${env:K8S_POD_NAME}"}, + {"action": "insert", "key": "process.executable.name", "value": "pgbackrest"}, }, } diff --git a/internal/collector/pgbackrest_logs_transforms.yaml b/internal/collector/pgbackrest_logs_transforms.yaml index 31f4a48f94..389f9d0a2c 100644 --- a/internal/collector/pgbackrest_logs_transforms.yaml +++ b/internal/collector/pgbackrest_logs_transforms.yaml @@ -3,8 +3,7 @@ # # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/processor/transformprocessor#readme -- context: log - statements: +- statements: - set(instrumentation_scope.name, "pgbackrest") - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") @@ -14,30 +13,30 @@ # 3) the log level (form INFO, WARN, etc.) # 4) the message (anything else, including newline -- we can do this because we have a multiline block on the receiver) - >- - merge_maps(cache, - ExtractPatterns(body, "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (?P\\d{2,3})\\s*(?\\S*): (?(?s).*)$"), + merge_maps(log.cache, + ExtractPatterns(log.body, "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (?P\\d{2,3})\\s*(?\\S*): (?(?s).*)$"), "insert") - where Len(body) > 0 + where Len(log.body) > 0 # The log severity is the "error_severity" field. # https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitytext # https://pgbackrest.org/configuration.html#section-log/option-log-level-file - - set(severity_text, cache["error_severity"]) where IsString(cache["error_severity"]) - - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "TRACE" - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_DEBUG2) where severity_text == "DETAIL" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARN" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" + - set(log.severity_text, log.cache["error_severity"]) where IsString(log.cache["error_severity"]) + - set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == "TRACE" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG2) where log.severity_text == "DETAIL" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == "INFO" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == "WARN" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == "ERROR" # https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-timestamp - - set(time, Time(cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where IsString(cache["timestamp"]) + - set(log.time, Time(log.cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where IsString(log.cache["timestamp"]) # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/attributes-registry/process.md - - set(attributes["process.pid"], cache["process_id"]) + - set(log.attributes["process.pid"], log.cache["process_id"]) # Keep the unparsed log record in a standard attribute, - # and replace the log record body with the message field. + # and replace the log record log.body with the message field. # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/general/logs.md - - set(attributes["log.record.original"], body) - - set(body, cache["message"]) + - set(log.attributes["log.record.original"], log.body) + - set(log.body, log.cache["message"]) diff --git a/internal/collector/pgbackrest_test.go b/internal/collector/pgbackrest_test.go index f1ebf14e4f..2b26d40531 100644 --- a/internal/collector/pgbackrest_test.go +++ b/internal/collector/pgbackrest_test.go @@ -11,11 +11,12 @@ import ( "gotest.tools/v3/assert" "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) func TestNewConfigForPgBackrestRepoHostPod(t *testing.T) { - t.Run("NilInstrumentationSpec", func(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { gate := feature.NewGate() assert.NilError(t, gate.SetFromMap(map[string]bool{ feature.OpenTelemetryLogs: true, @@ -27,8 +28,10 @@ func TestNewConfigForPgBackrestRepoHostPod(t *testing.T) { Volume: new(v1beta1.RepoPVC), }, } + var instrumentation *v1beta1.InstrumentationSpec + require.UnmarshalInto(t, &instrumentation, `{}`) - config := NewConfigForPgBackrestRepoHostPod(ctx, nil, repos) + config := NewConfigForPgBackrestRepoHostPod(ctx, instrumentation, repos) result, err := config.ToYAML() assert.NilError(t, err) @@ -62,34 +65,44 @@ processors: - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: pgbackrest resourcedetection: detectors: [] override: false timeout: 30s transform/pgbackrest_logs: log_statements: - - context: log - statements: + - statements: - set(instrumentation_scope.name, "pgbackrest") - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") - - 'merge_maps(cache, ExtractPatterns(body, "^(?\\d{4}-\\d{2}-\\d{2} + - 'merge_maps(log.cache, ExtractPatterns(log.body, "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (?P\\d{2,3})\\s*(?\\S*): - (?(?s).*)$"), "insert") where Len(body) > 0' - - set(severity_text, cache["error_severity"]) where IsString(cache["error_severity"]) - - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "TRACE" - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_DEBUG2) where severity_text == "DETAIL" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARN" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(time, Time(cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where IsString(cache["timestamp"]) - - set(attributes["process.pid"], cache["process_id"]) - - set(attributes["log.record.original"], body) - - set(body, cache["message"]) + (?(?s).*)$"), "insert") where Len(log.body) > 0' + - set(log.severity_text, log.cache["error_severity"]) where IsString(log.cache["error_severity"]) + - set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == + "TRACE" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG2) where log.severity_text == + "DETAIL" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "INFO" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARN" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.time, Time(log.cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where + IsString(log.cache["timestamp"]) + - set(log.attributes["process.pid"], log.cache["process_id"]) + - set(log.attributes["log.record.original"], log.body) + - set(log.body, log.cache["message"]) receivers: filelog/pgbackrest_log: include: - /pgbackrest/repo1/log/*.log + - /pgbackrest/repo1/log/*.log.1 multiline: line_start_pattern: ^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}|^-{19} storage: file_storage/pgbackrest_logs @@ -162,34 +175,44 @@ processors: - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: pgbackrest resourcedetection: detectors: [] override: false timeout: 30s transform/pgbackrest_logs: log_statements: - - context: log - statements: + - statements: - set(instrumentation_scope.name, "pgbackrest") - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") - - 'merge_maps(cache, ExtractPatterns(body, "^(?\\d{4}-\\d{2}-\\d{2} + - 'merge_maps(log.cache, ExtractPatterns(log.body, "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (?P\\d{2,3})\\s*(?\\S*): - (?(?s).*)$"), "insert") where Len(body) > 0' - - set(severity_text, cache["error_severity"]) where IsString(cache["error_severity"]) - - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "TRACE" - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_DEBUG2) where severity_text == "DETAIL" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARN" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(time, Time(cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where IsString(cache["timestamp"]) - - set(attributes["process.pid"], cache["process_id"]) - - set(attributes["log.record.original"], body) - - set(body, cache["message"]) + (?(?s).*)$"), "insert") where Len(log.body) > 0' + - set(log.severity_text, log.cache["error_severity"]) where IsString(log.cache["error_severity"]) + - set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == + "TRACE" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG2) where log.severity_text == + "DETAIL" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "INFO" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARN" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.time, Time(log.cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where + IsString(log.cache["timestamp"]) + - set(log.attributes["process.pid"], log.cache["process_id"]) + - set(log.attributes["log.record.original"], log.body) + - set(log.body, log.cache["message"]) receivers: filelog/pgbackrest_log: include: - /pgbackrest/repo1/log/*.log + - /pgbackrest/repo1/log/*.log.1 multiline: line_start_pattern: ^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}|^-{19} storage: file_storage/pgbackrest_logs diff --git a/internal/collector/pgbouncer.go b/internal/collector/pgbouncer.go index 9133bd6813..785b2b187e 100644 --- a/internal/collector/pgbouncer.go +++ b/internal/collector/pgbouncer.go @@ -12,7 +12,6 @@ import ( "slices" "strconv" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -40,7 +39,7 @@ func NewConfigForPgBouncerPod( config := NewConfig(cluster.Spec.Instrumentation) EnablePgBouncerLogging(ctx, cluster, config) - EnablePgBouncerMetrics(ctx, config, sqlQueryUsername) + EnablePgBouncerMetrics(ctx, cluster, config, sqlQueryUsername) return config } @@ -56,7 +55,7 @@ func EnablePgBouncerLogging(ctx context.Context, spec = inCluster.Spec.Instrumentation.Logs } - if feature.Enabled(ctx, feature.OpenTelemetryLogs) { + if OpenTelemetryLogsEnabled(ctx, inCluster) { directory := naming.PGBouncerLogPath // Keep track of what log records and files have been processed. @@ -91,24 +90,24 @@ func EnablePgBouncerLogging(ctx context.Context, {"action": "insert", "key": "k8s.container.name", "value": naming.ContainerPGBouncer}, {"action": "insert", "key": "k8s.namespace.name", "value": "${env:K8S_POD_NAMESPACE}"}, {"action": "insert", "key": "k8s.pod.name", "value": "${env:K8S_POD_NAME}"}, + {"action": "insert", "key": "process.executable.name", "value": "pgbouncer"}, }, } // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/processor/transformprocessor#readme outConfig.Processors["transform/pgbouncer_logs"] = map[string]any{ "log_statements": []map[string]any{{ - "context": "log", "statements": []string{ // Set instrumentation scope `set(instrumentation_scope.name, "pgbouncer")`, // Extract timestamp, pid, log level, and message and store in cache. - `merge_maps(cache, ExtractPatterns(body, ` + + `merge_maps(log.cache, ExtractPatterns(log.body, ` + `"^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3} [A-Z]{3}) ` + `\\[(?\\d+)\\] (?[A-Z]+) (?.*$)"), "insert")`, // https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitytext - `set(severity_text, cache["log_level"])`, + `set(log.severity_text, log.cache["log_level"])`, // Map pgBouncer (libusual) "logging levels" to OpenTelemetry severity levels. // @@ -116,11 +115,11 @@ func EnablePgBouncerLogging(ctx context.Context, // https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitynumber // https://opentelemetry.io/docs/specs/otel/logs/data-model-appendix/#appendix-b-severitynumber-example-mappings // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#enums - `set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "NOISE" or severity_text == "DEBUG"`, - `set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "LOG"`, - `set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING"`, - `set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR"`, - `set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "FATAL"`, + `set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == "NOISE" or log.severity_text == "DEBUG"`, + `set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == "LOG"`, + `set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == "WARNING"`, + `set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == "ERROR"`, + `set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == "FATAL"`, // Parse the timestamp. // The format is neither RFC 3339 nor ISO 8601: @@ -130,19 +129,19 @@ func EnablePgBouncerLogging(ctx context.Context, // then a timezone abbreviation. // // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/stanza/docs/types/timestamp.md - `set(time, Time(cache["timestamp"], "%F %T.%L %Z"))`, + `set(log.time, Time(log.cache["timestamp"], "%F %T.%L %Z")) where IsString(log.cache["timestamp"])`, // Keep the unparsed log record in a standard attribute, and replace // the log record body with the message field. // // https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/general/logs.md - `set(attributes["log.record.original"], body)`, + `set(log.attributes["log.record.original"], log.body)`, // Set pid as attribute - `set(attributes["process.pid"], cache["pid"])`, + `set(log.attributes["process.pid"], log.cache["pid"])`, // Set the log message to body. - `set(body, cache["msg"])`, + `set(log.body, log.cache["msg"])`, }, }}, } @@ -171,8 +170,10 @@ func EnablePgBouncerLogging(ctx context.Context, // EnablePgBouncerMetrics adds necessary configuration to the collector config to scrape // metrics from pgBouncer when the OpenTelemetryMetrics feature flag is enabled. -func EnablePgBouncerMetrics(ctx context.Context, config *Config, sqlQueryUsername string) { - if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { +func EnablePgBouncerMetrics(ctx context.Context, inCluster *v1beta1.PostgresCluster, + config *Config, sqlQueryUsername string) { + + if OpenTelemetryMetricsEnabled(ctx, inCluster) { // Add Prometheus exporter config.Exporters[Prometheus] = map[string]any{ "endpoint": "0.0.0.0:" + strconv.Itoa(PrometheusPort), @@ -187,6 +188,14 @@ func EnablePgBouncerMetrics(ctx context.Context, config *Config, sqlQueryUsernam "queries": slices.Clone(pgBouncerMetricsQueries), } + // If there are exporters to be added to the metrics pipelines defined + // in the spec, add them to the pipeline. + exporters := []ComponentID{Prometheus} + if inCluster.Spec.Instrumentation.Metrics != nil && + inCluster.Spec.Instrumentation.Metrics.Exporters != nil { + exporters = append(exporters, inCluster.Spec.Instrumentation.Metrics.Exporters...) + } + // Add Metrics Pipeline config.Pipelines[PGBouncerMetrics] = Pipeline{ Receivers: []ComponentID{SqlQuery}, @@ -194,7 +203,7 @@ func EnablePgBouncerMetrics(ctx context.Context, config *Config, sqlQueryUsernam SubSecondBatchProcessor, CompactingProcessor, }, - Exporters: []ComponentID{Prometheus}, + Exporters: exporters, } } } diff --git a/internal/collector/pgbouncer_metrics_queries.yaml b/internal/collector/pgbouncer_metrics_queries.yaml index 228fef1cc0..21cd0ae6ee 100644 --- a/internal/collector/pgbouncer_metrics_queries.yaml +++ b/internal/collector/pgbouncer_metrics_queries.yaml @@ -4,62 +4,64 @@ # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries # https://github.com/CrunchyData/pgmonitor/blob/v5.1.1/sql_exporter/common/crunchy_pgbouncer_121_collector.yml - - sql: "SHOW CLIENTS" + - sql: "SHOW CLIENTS;" metrics: - metric_name: ccp_pgbouncer_clients_wait_seconds value_column: wait attribute_columns: ["database", "user", "state", "application_name", "link"] description: "Current waiting time in seconds" - # NOTE: Avoid collecting "host" column because it can be null; the collector will warn against null. + # NOTE: Avoid collecting/using "host", "force_user", and "pool_mode" columns because they + # can be NULL; the collector will warn against NULL even when not used. But it will emit + # an error log if those columns are used. # The host column should always point either to pgBouncer's virtual database (the null case) or to the primary. - - sql: "SHOW DATABASES" + - sql: "SHOW DATABASES;" metrics: - metric_name: ccp_pgbouncer_databases_pool_size value_column: pool_size - attribute_columns: ["name", "port", "database", "force_user", "pool_mode"] + attribute_columns: ["name", "port", "database"] description: "Maximum number of server connections" - metric_name: ccp_pgbouncer_databases_min_pool_size value_column: min_pool_size - attribute_columns: ["name", "port", "database", "force_user", "pool_mode"] + attribute_columns: ["name", "port", "database"] description: "Minimum number of server connections" - - metric_name: ccp_pgbouncer_databases_reserve_pool - value_column: reserve_pool - attribute_columns: ["name", "port", "database", "force_user", "pool_mode"] + - metric_name: ccp_pgbouncer_databases_reserve_pool_size + value_column: reserve_pool_size + attribute_columns: ["name", "port", "database"] description: "Maximum number of additional connections for this database" - metric_name: ccp_pgbouncer_databases_max_connections value_column: max_connections - attribute_columns: ["name", "port", "database", "force_user", "pool_mode"] + attribute_columns: ["name", "port", "database"] description: >- Maximum number of allowed connections for this database, as set by max_db_connections, either globally or per database - metric_name: ccp_pgbouncer_databases_current_connections value_column: current_connections - attribute_columns: ["name", "port", "database", "force_user", "pool_mode"] + attribute_columns: ["name", "port", "database"] description: "Current number of connections for this database" - metric_name: ccp_pgbouncer_databases_paused value_column: paused - attribute_columns: ["name", "port", "database", "force_user", "pool_mode"] + attribute_columns: ["name", "port", "database"] description: "1 if this database is currently paused, else 0" - metric_name: ccp_pgbouncer_databases_disabled value_column: disabled - attribute_columns: ["name", "port", "database", "force_user", "pool_mode"] + attribute_columns: ["name", "port", "database"] description: "1 if this database is currently disabled, else 0" - - sql: "SHOW LISTS" + - sql: "SHOW LISTS;" metrics: - metric_name: ccp_pgbouncer_lists_item_count value_column: items attribute_columns: ["list"] description: "Count of items registered with pgBouncer" - - sql: "SHOW POOLS" + - sql: "SHOW POOLS;" metrics: - metric_name: ccp_pgbouncer_pools_client_active value_column: cl_active @@ -90,7 +92,7 @@ Server connections that have been idle for more than server_check_delay, so they need server_check_query to run on them before they can be used again - - sql: "SHOW SERVERS" + - sql: "SHOW SERVERS;" metrics: - metric_name: ccp_pgbouncer_servers_close_needed value_column: close_needed diff --git a/internal/collector/pgbouncer_test.go b/internal/collector/pgbouncer_test.go index df8427fbbd..34f2ccf328 100644 --- a/internal/collector/pgbouncer_test.go +++ b/internal/collector/pgbouncer_test.go @@ -11,11 +11,12 @@ import ( "gotest.tools/v3/assert" "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) func TestEnablePgBouncerLogging(t *testing.T) { - t.Run("NilInstrumentationSpec", func(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { gate := feature.NewGate() assert.NilError(t, gate.SetFromMap(map[string]bool{ feature.OpenTelemetryLogs: true, @@ -23,8 +24,11 @@ func TestEnablePgBouncerLogging(t *testing.T) { ctx := feature.NewContext(context.Background(), gate) config := NewConfig(nil) - - EnablePgBouncerLogging(ctx, new(v1beta1.PostgresCluster), config) + cluster := new(v1beta1.PostgresCluster) + require.UnmarshalInto(t, &cluster.Spec, `{ + instrumentation: {} + }`) + EnablePgBouncerLogging(ctx, cluster, config) result, err := config.ToYAML() assert.NilError(t, err) @@ -58,29 +62,35 @@ processors: - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: pgbouncer resourcedetection: detectors: [] override: false timeout: 30s transform/pgbouncer_logs: log_statements: - - context: log - statements: + - statements: - set(instrumentation_scope.name, "pgbouncer") - - merge_maps(cache, ExtractPatterns(body, "^(?\\d{4}-\\d{2}-\\d{2} + - merge_maps(log.cache, ExtractPatterns(log.body, "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3} [A-Z]{3}) \\[(?\\d+)\\] (?[A-Z]+) (?.*$)"), "insert") - - set(severity_text, cache["log_level"]) - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "NOISE" - or severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "LOG" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "FATAL" - - set(time, Time(cache["timestamp"], "%F %T.%L %Z")) - - set(attributes["log.record.original"], body) - - set(attributes["process.pid"], cache["pid"]) - - set(body, cache["msg"]) + - set(log.severity_text, log.cache["log_level"]) + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "NOISE" or log.severity_text == "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "LOG" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == + "FATAL" + - set(log.time, Time(log.cache["timestamp"], "%F %T.%L %Z")) where IsString(log.cache["timestamp"]) + - set(log.attributes["log.record.original"], log.body) + - set(log.attributes["process.pid"], log.cache["pid"]) + - set(log.body, log.cache["msg"]) receivers: filelog/pgbouncer_log: include: @@ -155,29 +165,35 @@ processors: - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: pgbouncer resourcedetection: detectors: [] override: false timeout: 30s transform/pgbouncer_logs: log_statements: - - context: log - statements: + - statements: - set(instrumentation_scope.name, "pgbouncer") - - merge_maps(cache, ExtractPatterns(body, "^(?\\d{4}-\\d{2}-\\d{2} + - merge_maps(log.cache, ExtractPatterns(log.body, "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3} [A-Z]{3}) \\[(?\\d+)\\] (?[A-Z]+) (?.*$)"), "insert") - - set(severity_text, cache["log_level"]) - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "NOISE" - or severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "LOG" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "FATAL" - - set(time, Time(cache["timestamp"], "%F %T.%L %Z")) - - set(attributes["log.record.original"], body) - - set(attributes["process.pid"], cache["pid"]) - - set(body, cache["msg"]) + - set(log.severity_text, log.cache["log_level"]) + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "NOISE" or log.severity_text == "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "LOG" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == + "FATAL" + - set(log.time, Time(log.cache["timestamp"], "%F %T.%L %Z")) where IsString(log.cache["timestamp"]) + - set(log.attributes["log.record.original"], log.body) + - set(log.attributes["process.pid"], log.cache["pid"]) + - set(log.body, log.cache["msg"]) receivers: filelog/pgbouncer_log: include: @@ -202,3 +218,361 @@ service: `) }) } + +func TestEnablePgBouncerMetrics(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + config := NewConfig(nil) + cluster := new(v1beta1.PostgresCluster) + require.UnmarshalInto(t, &cluster.Spec, `{ + instrumentation: {} + }`) + EnablePgBouncerMetrics(ctx, cluster, config, "test_user") + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + prometheus/cpk-monitoring: + endpoint: 0.0.0.0:9187 +extensions: {} +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + batch/logs: + send_batch_size: 8192 + timeout: 200ms + groupbyattrs/compact: {} + resourcedetection: + detectors: [] + override: false + timeout: 30s +receivers: + sqlquery: + datasource: host=localhost dbname=pgbouncer port=5432 user=test_user password=${env:PGPASSWORD} + driver: postgres + queries: + - metrics: + - attribute_columns: + - database + - user + - state + - application_name + - link + description: Current waiting time in seconds + metric_name: ccp_pgbouncer_clients_wait_seconds + value_column: wait + sql: SHOW CLIENTS; + - metrics: + - attribute_columns: + - name + - port + - database + description: Maximum number of server connections + metric_name: ccp_pgbouncer_databases_pool_size + value_column: pool_size + - attribute_columns: + - name + - port + - database + description: Minimum number of server connections + metric_name: ccp_pgbouncer_databases_min_pool_size + value_column: min_pool_size + - attribute_columns: + - name + - port + - database + description: Maximum number of additional connections for this database + metric_name: ccp_pgbouncer_databases_reserve_pool_size + value_column: reserve_pool_size + - attribute_columns: + - name + - port + - database + description: Maximum number of allowed connections for this database, as set + by max_db_connections, either globally or per database + metric_name: ccp_pgbouncer_databases_max_connections + value_column: max_connections + - attribute_columns: + - name + - port + - database + description: Current number of connections for this database + metric_name: ccp_pgbouncer_databases_current_connections + value_column: current_connections + - attribute_columns: + - name + - port + - database + description: 1 if this database is currently paused, else 0 + metric_name: ccp_pgbouncer_databases_paused + value_column: paused + - attribute_columns: + - name + - port + - database + description: 1 if this database is currently disabled, else 0 + metric_name: ccp_pgbouncer_databases_disabled + value_column: disabled + sql: SHOW DATABASES; + - metrics: + - attribute_columns: + - list + description: Count of items registered with pgBouncer + metric_name: ccp_pgbouncer_lists_item_count + value_column: items + sql: SHOW LISTS; + - metrics: + - attribute_columns: + - database + - user + description: Client connections that are either linked to server connections + or are idle with no queries waiting to be processed + metric_name: ccp_pgbouncer_pools_client_active + value_column: cl_active + - attribute_columns: + - database + - user + description: Client connections that have sent queries but have not yet got + a server connection + metric_name: ccp_pgbouncer_pools_client_waiting + value_column: cl_waiting + - attribute_columns: + - database + - user + description: Server connections that are linked to a client + metric_name: ccp_pgbouncer_pools_server_active + value_column: sv_active + - attribute_columns: + - database + - user + description: Server connections that are unused and immediately usable for + client queries + metric_name: ccp_pgbouncer_pools_server_idle + value_column: sv_idle + - attribute_columns: + - database + - user + description: Server connections that have been idle for more than server_check_delay, + so they need server_check_query to run on them before they can be used again + metric_name: ccp_pgbouncer_pools_server_used + value_column: sv_used + sql: SHOW POOLS; + - metrics: + - attribute_columns: + - database + - user + - state + - application_name + - link + description: 1 if the connection will be closed as soon as possible, because + a configuration file reload or DNS update changed the connection information + or RECONNECT was issued + metric_name: ccp_pgbouncer_servers_close_needed + value_column: close_needed + sql: SHOW SERVERS; +service: + extensions: [] + pipelines: + metrics/pgbouncer: + exporters: + - prometheus/cpk-monitoring + processors: + - batch/200ms + - groupbyattrs/compact + receivers: + - sqlquery +`) + }) + + t.Run("InstrumentationSpecDefined", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + config := NewConfig(testInstrumentationSpec()) + + cluster := new(v1beta1.PostgresCluster) + cluster.Spec.Instrumentation = testInstrumentationSpec() + + EnablePgBouncerMetrics(ctx, cluster, config, "test_user") + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + googlecloud: + log: + default_log_name: opentelemetry.io/collector-exported-log + project: google-project-name + prometheus/cpk-monitoring: + endpoint: 0.0.0.0:9187 +extensions: {} +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + batch/logs: + send_batch_size: 8192 + timeout: 200ms + groupbyattrs/compact: {} + resourcedetection: + detectors: [] + override: false + timeout: 30s +receivers: + sqlquery: + datasource: host=localhost dbname=pgbouncer port=5432 user=test_user password=${env:PGPASSWORD} + driver: postgres + queries: + - metrics: + - attribute_columns: + - database + - user + - state + - application_name + - link + description: Current waiting time in seconds + metric_name: ccp_pgbouncer_clients_wait_seconds + value_column: wait + sql: SHOW CLIENTS; + - metrics: + - attribute_columns: + - name + - port + - database + description: Maximum number of server connections + metric_name: ccp_pgbouncer_databases_pool_size + value_column: pool_size + - attribute_columns: + - name + - port + - database + description: Minimum number of server connections + metric_name: ccp_pgbouncer_databases_min_pool_size + value_column: min_pool_size + - attribute_columns: + - name + - port + - database + description: Maximum number of additional connections for this database + metric_name: ccp_pgbouncer_databases_reserve_pool_size + value_column: reserve_pool_size + - attribute_columns: + - name + - port + - database + description: Maximum number of allowed connections for this database, as set + by max_db_connections, either globally or per database + metric_name: ccp_pgbouncer_databases_max_connections + value_column: max_connections + - attribute_columns: + - name + - port + - database + description: Current number of connections for this database + metric_name: ccp_pgbouncer_databases_current_connections + value_column: current_connections + - attribute_columns: + - name + - port + - database + description: 1 if this database is currently paused, else 0 + metric_name: ccp_pgbouncer_databases_paused + value_column: paused + - attribute_columns: + - name + - port + - database + description: 1 if this database is currently disabled, else 0 + metric_name: ccp_pgbouncer_databases_disabled + value_column: disabled + sql: SHOW DATABASES; + - metrics: + - attribute_columns: + - list + description: Count of items registered with pgBouncer + metric_name: ccp_pgbouncer_lists_item_count + value_column: items + sql: SHOW LISTS; + - metrics: + - attribute_columns: + - database + - user + description: Client connections that are either linked to server connections + or are idle with no queries waiting to be processed + metric_name: ccp_pgbouncer_pools_client_active + value_column: cl_active + - attribute_columns: + - database + - user + description: Client connections that have sent queries but have not yet got + a server connection + metric_name: ccp_pgbouncer_pools_client_waiting + value_column: cl_waiting + - attribute_columns: + - database + - user + description: Server connections that are linked to a client + metric_name: ccp_pgbouncer_pools_server_active + value_column: sv_active + - attribute_columns: + - database + - user + description: Server connections that are unused and immediately usable for + client queries + metric_name: ccp_pgbouncer_pools_server_idle + value_column: sv_idle + - attribute_columns: + - database + - user + description: Server connections that have been idle for more than server_check_delay, + so they need server_check_query to run on them before they can be used again + metric_name: ccp_pgbouncer_pools_server_used + value_column: sv_used + sql: SHOW POOLS; + - metrics: + - attribute_columns: + - database + - user + - state + - application_name + - link + description: 1 if the connection will be closed as soon as possible, because + a configuration file reload or DNS update changed the connection information + or RECONNECT was issued + metric_name: ccp_pgbouncer_servers_close_needed + value_column: close_needed + sql: SHOW SERVERS; +service: + extensions: [] + pipelines: + metrics/pgbouncer: + exporters: + - prometheus/cpk-monitoring + - googlecloud + processors: + - batch/200ms + - groupbyattrs/compact + receivers: + - sqlquery +`) + + }) +} diff --git a/internal/collector/postgres.go b/internal/collector/postgres.go index cfc0b88245..a926639097 100644 --- a/internal/collector/postgres.go +++ b/internal/collector/postgres.go @@ -15,7 +15,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/internal/postgres" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" @@ -86,7 +85,7 @@ func EnablePostgresLogging( spec = inCluster.Spec.Instrumentation.Logs } - if inCluster != nil && feature.Enabled(ctx, feature.OpenTelemetryLogs) { + if OpenTelemetryLogsEnabled(ctx, inCluster) { directory := postgres.LogDirectory() version := inCluster.Spec.PostgresVersion @@ -143,6 +142,7 @@ func EnablePostgresLogging( // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/filelogreceiver#readme outConfig.Receivers["filelog/postgres_csvlog"] = map[string]any{ // Read the CSV files and keep track of what has been processed. + // The wildcard covers all potential log file names. "include": []string{directory + "/*.csv"}, "storage": "file_storage/postgres_logs", @@ -174,6 +174,7 @@ func EnablePostgresLogging( // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/filelogreceiver#readme outConfig.Receivers["filelog/postgres_jsonlog"] = map[string]any{ // Read the JSON files and keep track of what has been processed. + // The wildcard covers all potential log file names. "include": []string{directory + "/*.json"}, "storage": "file_storage/postgres_logs", @@ -196,6 +197,7 @@ func EnablePostgresLogging( {"action": "insert", "key": "k8s.container.name", "value": naming.ContainerDatabase}, {"action": "insert", "key": "k8s.namespace.name", "value": "${env:K8S_POD_NAMESPACE}"}, {"action": "insert", "key": "k8s.pod.name", "value": "${env:K8S_POD_NAME}"}, + {"action": "insert", "key": "process.executable.name", "value": "postgres"}, // https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/database#readme {"action": "insert", "key": "db.system", "value": "postgresql"}, @@ -239,8 +241,17 @@ func EnablePostgresLogging( "fsync": true, } + // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/filelogreceiver#readme outConfig.Receivers["filelog/pgbackrest_log"] = map[string]any{ - "include": []string{naming.PGBackRestPGDataLogPath + "/*.log"}, + // We use logrotate to rotate the pgbackrest logs which renames the + // old .log file to .log.1. We want the collector to ingest logs from + // both files as it is possible that pgbackrest will continue to write + // a log record or two to the old file while rotation is occurring. + // The collector knows not to create duplicate logs. + "include": []string{ + naming.PGBackRestPGDataLogPath + "/*.log", + naming.PGBackRestPGDataLogPath + "/*.log.1", + }, "storage": "file_storage/pgbackrest_logs", // pgBackRest prints logs with a log prefix, which includes a timestamp @@ -266,6 +277,7 @@ func EnablePostgresLogging( {"action": "insert", "key": "k8s.container.name", "value": naming.ContainerDatabase}, {"action": "insert", "key": "k8s.namespace.name", "value": "${env:K8S_POD_NAMESPACE}"}, {"action": "insert", "key": "k8s.pod.name", "value": "${env:K8S_POD_NAME}"}, + {"action": "insert", "key": "process.executable.name", "value": "pgbackrest"}, }, } diff --git a/internal/collector/postgres_5m_metrics.yaml b/internal/collector/postgres_5m_metrics.yaml index 9f5c3212dc..b554ed8dae 100644 --- a/internal/collector/postgres_5m_metrics.yaml +++ b/internal/collector/postgres_5m_metrics.yaml @@ -5,7 +5,7 @@ # https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml - sql: > SELECT datname as dbname - , pg_database_size(datname) as bytes + , pg_catalog.pg_database_size(datname) as bytes FROM pg_catalog.pg_database WHERE datistemplate = false; metrics: @@ -36,108 +36,14 @@ static_attributes: server: "localhost:5432" - - sql: > - SELECT s.datname AS dbname - , s.xact_commit - , s.xact_rollback - , s.blks_read - , s.blks_hit - , s.tup_returned - , s.tup_fetched - , s.tup_inserted - , s.tup_updated - , s.tup_deleted - , s.conflicts - , s.temp_files - , s.temp_bytes - , s.deadlocks - FROM pg_catalog.pg_stat_database s - JOIN pg_catalog.pg_database d ON d.datname = s.datname - WHERE d.datistemplate = false; + - sql: SELECT monitor.pg_hba_checksum() AS status; metrics: - - metric_name: ccp_stat_database_blks_hit - value_column: blks_hit - description: Number of times disk blocks were found already in the buffer cache, so that a read was not necessary - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_database_blks_read - value_column: blks_read - description: Number of disk blocks read in this database - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_database_conflicts - value_column: conflicts - description: Number of queries canceled due to conflicts with recovery in this database - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_database_deadlocks - value_column: deadlocks - description: Number of deadlocks detected in this database - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_database_temp_bytes - value_column: temp_bytes - description: Total amount of data written to temporary files by queries in this database - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - metric_name: ccp_stat_database_temp_files - value_column: temp_files - description: Number of rows deleted by queries in this database - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - - metric_name: ccp_stat_database_tup_deleted - value_column: tup_deleted - description: Number of rows deleted by queries in this database - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - - metric_name: ccp_stat_database_tup_fetched - value_column: tup_fetched - description: Number of rows fetched by queries in this database - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - - metric_name: ccp_stat_database_tup_inserted - value_column: tup_inserted - description: Number of rows inserted by queries in this database - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - - metric_name: ccp_stat_database_tup_returned - value_column: tup_returned - description: Number of rows returned by queries in this database - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - - metric_name: ccp_stat_database_tup_updated - value_column: tup_updated - description: Number of rows updated by queries in this database - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - - metric_name: ccp_stat_database_xact_commit - value_column: xact_commit - description: Number of transactions in this database that have been committed - attribute_columns: ["dbname"] - static_attributes: - server: "localhost:5432" - - - metric_name: ccp_stat_database_xact_rollback - value_column: xact_rollback - description: Number of transactions in this database that have been rolled back - attribute_columns: ["dbname"] + - metric_name: ccp_pg_hba_checksum_status + value_column: status + description: | + Value of checksum monitoring status for pg_catalog.pg_hba_file_rules (pg_hba.conf). + 0 = valid config. 1 = settings changed. + Settings history is available for review in the table `monitor.pg_hba_checksum`. + To reset current config to valid after alert, run monitor.pg_hba_checksum_set_valid(). Note this will clear the history table. static_attributes: server: "localhost:5432" - diff --git a/internal/collector/postgres_5m_per_db_metrics.yaml b/internal/collector/postgres_5m_per_db_metrics.yaml new file mode 100644 index 0000000000..6fcefcf9d0 --- /dev/null +++ b/internal/collector/postgres_5m_per_db_metrics.yaml @@ -0,0 +1,161 @@ +# This list of queries configures an OTel SQL Query Receiver to read pgMonitor +# metrics from Postgres. +# +# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries +# https://github.com/CrunchyData/pgmonitor/blob/v5.2.1/sql_exporter/common/crunchy_per_db_collector.yml +# +# Note: Several metrics in the `crunchy_per_db_collector` track the materialized views and +# pgMonitor-extension version -- metrics that aren't meaningful in the CPK environment. +# The list of metrics that fall into this category include +# * ccp_metric_matview_refresh_last_run_fail_count +# * ccp_metric_matview_refresh_longest_runtime_seconds +# * ccp_metric_matview_refresh_longest_runtime +# * ccp_metric_table_refresh_longest_runtime +# * ccp_pgmonitor_extension_per_db + + - sql: > + SELECT current_database() as dbname + , n.nspname as schemaname + , c.relname + , pg_catalog.pg_total_relation_size(c.oid) as bytes + FROM pg_catalog.pg_class c + JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid + WHERE NOT pg_is_other_temp_schema(n.oid) + AND relkind IN ('r', 'm', 'f'); + metrics: + - metric_name: ccp_table_size_bytes + value_type: double + value_column: bytes + description: "Table size in bytes including indexes" + attribute_columns: ["dbname", "schemaname", "relname"] + static_attributes: + server: "localhost:5432" + + - sql: > + SELECT current_database() as dbname + , p.schemaname + , p.relname + , p.seq_scan + , p.seq_tup_read + , COALESCE(p.idx_scan, 0) AS idx_scan + , COALESCE(p.idx_tup_fetch, 0) as idx_tup_fetch + , p.n_tup_ins + , p.n_tup_upd + , p.n_tup_del + , p.n_tup_hot_upd + , CASE + WHEN current_setting('server_version_num')::int >= 160000 + THEN p.n_tup_newpage_upd + ELSE 0::bigint + END AS n_tup_newpage_upd + , p.n_live_tup + , p.n_dead_tup + , p.vacuum_count + , p.autovacuum_count + , p.analyze_count + , p.autoanalyze_count + FROM pg_catalog.pg_stat_user_tables p; + metrics: + - metric_name: ccp_stat_user_tables_seq_scan + data_type: sum + value_column: seq_scan + description: "Number of sequential scans initiated on this table" + attribute_columns: ["dbname", "schemaname", "relname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_seq_tup_read + data_type: sum + value_column: seq_tup_read + description: "Number of live rows fetched by sequential scans" + attribute_columns: ["dbname", "schemaname", "relname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_user_tables_idx_scan + data_type: sum + description: "Number of index scans initiated on this table" + value_column: idx_scan + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_idx_tup_fetch + data_type: sum + description: "Number of live rows fetched by index scans" + value_column: idx_tup_fetch + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_n_tup_ins + data_type: sum + description: "Number of rows inserted" + value_column: n_tup_ins + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_n_tup_upd + data_type: sum + description: "Number of rows updated" + value_column: n_tup_upd + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_n_tup_del + data_type: sum + description: "Number of rows deleted" + value_column: n_tup_del + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_n_tup_hot_upd + data_type: sum + description: "Number of rows HOT updated (i.e., with no separate index update required)" + value_column: n_tup_hot_upd + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_n_tup_newpage_upd + data_type: sum + description: "Number of rows updated where the successor version goes onto a new heap page, leaving behind an original version with a t_ctid field that points to a different heap page. These are always non-HOT updates." + value_column: n_tup_newpage_upd + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_n_live_tup + description: "Estimated number of live rows" + value_column: n_live_tup + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_n_dead_tup + description: "Estimated number of dead rows" + value_column: n_dead_tup + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_vacuum_count + data_type: sum + description: "Number of times this table has been manually vacuumed (not counting VACUUM FULL)" + value_column: vacuum_count + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_autovacuum_count + data_type: sum + description: "Number of times this table has been vacuumed by the autovacuum daemon" + value_column: autovacuum_count + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_analyze_count + data_type: sum + description: "Number of times this table has been manually analyzed" + value_column: analyze_count + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] + - metric_name: ccp_stat_user_tables_autoanalyze_count + data_type: sum + description: "Number of times this table has been analyzed by the autovacuum daemon" + value_column: autoanalyze_count + static_attributes: + server: "localhost:5432" + attribute_columns: ["dbname", "schemaname", "relname"] diff --git a/internal/collector/postgres_5s_metrics.yaml b/internal/collector/postgres_5s_metrics.yaml index 82ab10ef3c..d424dcb014 100644 --- a/internal/collector/postgres_5s_metrics.yaml +++ b/internal/collector/postgres_5s_metrics.yaml @@ -4,7 +4,7 @@ # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries # https://github.com/CrunchyData/pgmonitor/blob/development/sql_exporter/common/crunchy_global_collector.yml # - # TODO ccp_pg_stat_activity can be removed after metrics are fully aligned with the latest pgMonitor + # TODO ccp_pg_stat_activity can be removed/replaced once an equivalent metric is added to pgMonitor - sql: > SELECT pg_database.datname, @@ -43,9 +43,15 @@ - sql: > SELECT - COALESCE(EXTRACT(epoch from (CURRENT_TIMESTAMP - last_archived_time)), 0) AS seconds_since_last_archive + COALESCE(EXTRACT(epoch from (CURRENT_TIMESTAMP - last_archived_time)), 0) AS seconds_since_last_archive, + archived_count, + failed_count, + CASE + WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) IS NULL THEN 0 + WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) < 0 THEN 0 + ELSE EXTRACT(epoch from (last_failed_time - last_archived_time)) + END AS seconds_since_last_fail FROM pg_catalog.pg_stat_archiver; - metrics: - metric_name: ccp_archive_command_status_seconds_since_last_archive value_column: seconds_since_last_archive @@ -53,36 +59,16 @@ description: Seconds since the last successful archive operation static_attributes: server: "localhost:5432" - - - sql: > - SELECT archived_count - FROM pg_catalog.pg_stat_archiver - metrics: - metric_name: ccp_archive_command_status_archived_count value_column: archived_count description: Number of WAL files that have been successfully archived static_attributes: server: "localhost:5432" - - - sql: > - SELECT failed_count - FROM pg_catalog.pg_stat_archiver - metrics: - metric_name: ccp_archive_command_status_failed_count value_column: failed_count description: Number of failed attempts for archiving WAL files static_attributes: server: "localhost:5432" - - - sql: > - SELECT CASE - WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) IS NULL THEN 0 - WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) < 0 THEN 0 - ELSE EXTRACT(epoch from (last_failed_time - last_archived_time)) - END AS seconds_since_last_fail - FROM pg_catalog.pg_stat_archiver - - metrics: - metric_name: ccp_archive_command_status_seconds_since_last_fail value_column: seconds_since_last_fail description: Seconds since the last recorded failure of the archive_command @@ -103,7 +89,6 @@ , COALESCE(SUM(CASE WHEN state = 'idle' THEN 1 ELSE 0 END),0) AS idle , COALESCE(SUM(CASE WHEN state = 'idle in transaction' THEN 1 ELSE 0 END),0) AS idle_in_txn FROM pg_catalog.pg_stat_activity) x JOIN (SELECT setting::float AS max_connections FROM pg_settings WHERE name = 'max_connections') xx ON (true); - metrics: - metric_name: ccp_connection_stats_active value_column: active @@ -201,7 +186,7 @@ - sql: > SELECT monitor.kdapi_scalar_bigint('cpu_request') AS request - , monitor.kdapi_scalar_bigint('cpu_limit') AS limit + , monitor.kdapi_scalar_bigint('cpu_limit') AS limit; metrics: - metric_name: ccp_nodemx_cpu_limit value_column: limit @@ -300,7 +285,7 @@ FROM monitor.proc_mountinfo() m JOIN monitor.fsinfo(m.mount_point) f USING (major_number, minor_number) WHERE m.mount_point IN ('/pgdata', '/pgwal') OR - m.mount_point like '/tablespaces/%' + m.mount_point like '/tablespaces/%'; metrics: - metric_name: ccp_nodemx_data_disk_available_bytes value_column: available_bytes @@ -472,7 +457,7 @@ ,tx_bytes ,tx_packets ,rx_bytes - ,rx_packets from monitor.proc_network_stats() + ,rx_packets from monitor.proc_network_stats(); metrics: - metric_name: ccp_nodemx_network_rx_bytes value_column: rx_bytes @@ -634,7 +619,8 @@ - metric_name: ccp_replication_lag_size_bytes value_column: bytes value_type: double - description: Time interval in seconds since PostgreSQL database was last restarted. + description: Replication lag in bytes. + attribute_columns: ['replica'] static_attributes: server: "localhost:5432" @@ -944,16 +930,114 @@ static_attributes: server: "localhost:5432" stanza: "db" - - metric_name: ccp_backrest_last_info_repo_total_size_bytes - description: Total size of this backup in the pgbackrest repository, including all required previous backups and WAL - value_column: repo_total_size_bytes - attribute_columns: ["backup_type", "repo"] - static_attributes: - server: "localhost:5432" - stanza: "db" - metric_name: ccp_backrest_oldest_full_backup_time_seconds description: Seconds since the oldest completed full backup value_column: oldest_full_backup attribute_columns: ["repo"] static_attributes: server: "localhost:5432" + + - sql: > + SELECT s.datname AS dbname + , s.xact_commit + , s.xact_rollback + , s.blks_read + , s.blks_hit + , s.tup_returned + , s.tup_fetched + , s.tup_inserted + , s.tup_updated + , s.tup_deleted + , s.conflicts + , s.temp_files + , s.temp_bytes + , s.deadlocks + FROM pg_catalog.pg_stat_database s + JOIN pg_catalog.pg_database d ON d.datname = s.datname + WHERE d.datistemplate = false; + metrics: + - metric_name: ccp_stat_database_blks_hit + value_column: blks_hit + description: Number of times disk blocks were found already in the buffer cache, so that a read was not necessary + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_database_blks_read + value_column: blks_read + description: Number of disk blocks read in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_database_conflicts + value_column: conflicts + description: Number of queries canceled due to conflicts with recovery in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_database_deadlocks + value_column: deadlocks + description: Number of deadlocks detected in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_database_temp_bytes + value_column: temp_bytes + description: Total amount of data written to temporary files by queries in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + - metric_name: ccp_stat_database_temp_files + value_column: temp_files + description: Number of rows deleted by queries in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + - metric_name: ccp_stat_database_tup_deleted + value_column: tup_deleted + description: Number of rows deleted by queries in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + - metric_name: ccp_stat_database_tup_fetched + value_column: tup_fetched + description: Number of rows fetched by queries in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + - metric_name: ccp_stat_database_tup_inserted + value_column: tup_inserted + description: Number of rows inserted by queries in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + - metric_name: ccp_stat_database_tup_returned + value_column: tup_returned + description: Number of rows returned by queries in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + - metric_name: ccp_stat_database_tup_updated + value_column: tup_updated + description: Number of rows updated by queries in this database + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + - metric_name: ccp_stat_database_xact_commit + value_column: xact_commit + description: Number of transactions in this database that have been committed + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" + + - metric_name: ccp_stat_database_xact_rollback + value_column: xact_rollback + description: Number of transactions in this database that have been rolled back + attribute_columns: ["dbname"] + static_attributes: + server: "localhost:5432" diff --git a/internal/collector/postgres_logs_transforms.yaml b/internal/collector/postgres_logs_transforms.yaml index f397b996e8..c58f1a1a7b 100644 --- a/internal/collector/postgres_logs_transforms.yaml +++ b/internal/collector/postgres_logs_transforms.yaml @@ -7,12 +7,11 @@ # TODO(postgres-14): We can stop parsing CSV logs when 14 is EOL. # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#readme -- context: log - conditions: - - body["format"] == "csv" +- conditions: + - log.body["format"] == "csv" statements: # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/ottlfuncs#parsecsv - - set(cache, ParseCSV(body["original"], body["headers"], delimiter=",", mode="strict")) + - set(log.cache, ParseCSV(log.body["original"], log.body["headers"], delimiter=",", mode="strict")) # Extract the optional "remote_port" value from the "connection_from" field. It is either: # 1. a Unix socket starting with "[local]:" or @@ -24,77 +23,76 @@ # https://git.postgresql.org/gitweb/?p=postgresql.git;hb=REL_17_0;f=src/backend/utils/error/csvlog.c#l108 # https://git.postgresql.org/gitweb/?p=postgresql.git;hb=REL_17_0;f=src/common/ip.c#l224 - >- - merge_maps(cache, - ExtractPatterns(cache["connection_from"], "(?:^[[]local[]]:(?.+)|:(?[^:]+))$"), + merge_maps(log.cache, + ExtractPatterns(log.cache["connection_from"], "(?:^[[]local[]]:(?.+)|:(?[^:]+))$"), "insert") - where Len(cache["connection_from"]) > 0 + where Len(log.cache["connection_from"]) > 0 # When there is a "remote_port" value, everything before it is the "remote_host" value. - >- - set(cache["remote_host"], - Substring(cache["connection_from"], 0, Len(cache["connection_from"]) - Len(cache["remote_port"]) - 1)) - where Len(cache["connection_from"]) > 0 and IsString(cache["remote_port"]) + set(log.cache["remote_host"], + Substring(log.cache["connection_from"], 0, Len(log.cache["connection_from"]) - Len(log.cache["remote_port"]) - 1)) + where Len(log.cache["connection_from"]) > 0 and IsString(log.cache["remote_port"]) # When there is still no "remote_host" value, copy the "connection_from" value, if any. - >- - set(cache["remote_host"], cache["connection_from"]) - where Len(cache["connection_from"]) > 0 and not IsString(cache["remote_host"]) + set(log.cache["remote_host"], log.cache["connection_from"]) + where Len(log.cache["connection_from"]) > 0 and not IsString(log.cache["remote_host"]) # Extract the values encoded in the "location" field. # # https://git.postgresql.org/gitweb/?p=postgresql.git;hb=REL_10_0;f=src/backend/utils/error/elog.c#l2805 # https://git.postgresql.org/gitweb/?p=postgresql.git;hb=REL_17_0;f=src/backend/utils/error/csvlog.c#l207 - >- - merge_maps(cache, - ExtractPatterns(cache["location"], "^(?:(?[^,]+), )?(?[^:]+):(?\\d+)$"), + merge_maps(log.cache, + ExtractPatterns(log.cache["location"], "^(?:(?[^,]+), )?(?[^:]+):(?\\d+)$"), "insert") - where Len(cache["location"]) > 0 + where Len(log.cache["location"]) > 0 # These values are numeric in JSON logs. - >- - set(cache["cursor_position"], Double(cache["cursor_position"])) - where IsMatch(cache["cursor_position"], "^[0-9.]+$") + set(log.cache["cursor_position"], Double(log.cache["cursor_position"])) + where IsMatch(log.cache["cursor_position"], "^[0-9.]+$") - >- - set(cache["file_line_num"], Double(cache["file_line_num"])) - where IsMatch(cache["file_line_num"], "^[0-9.]+$") + set(log.cache["file_line_num"], Double(log.cache["file_line_num"])) + where IsMatch(log.cache["file_line_num"], "^[0-9.]+$") - >- - set(cache["internal_position"], Double(cache["internal_position"])) - where IsMatch(cache["internal_position"], "^[0-9.]+$") + set(log.cache["internal_position"], Double(log.cache["internal_position"])) + where IsMatch(log.cache["internal_position"], "^[0-9.]+$") - >- - set(cache["leader_pid"], Double(cache["leader_pid"])) - where IsMatch(cache["leader_pid"], "^[0-9.]+$") + set(log.cache["leader_pid"], Double(log.cache["leader_pid"])) + where IsMatch(log.cache["leader_pid"], "^[0-9.]+$") - >- - set(cache["line_num"], Double(cache["line_num"])) - where IsMatch(cache["line_num"], "^[0-9.]+$") + set(log.cache["line_num"], Double(log.cache["line_num"])) + where IsMatch(log.cache["line_num"], "^[0-9.]+$") - >- - set(cache["pid"], Double(cache["pid"])) - where IsMatch(cache["pid"], "^[0-9.]+$") + set(log.cache["pid"], Double(log.cache["pid"])) + where IsMatch(log.cache["pid"], "^[0-9.]+$") - >- - set(cache["query_id"], Double(cache["query_id"])) - where IsMatch(cache["query_id"], "^[0-9.]+$") + set(log.cache["query_id"], Double(log.cache["query_id"])) + where IsMatch(log.cache["query_id"], "^[0-9.]+$") - >- - set(cache["remote_port"], Double(cache["remote_port"])) - where IsMatch(cache["remote_port"], "^[0-9.]+$") + set(log.cache["remote_port"], Double(log.cache["remote_port"])) + where IsMatch(log.cache["remote_port"], "^[0-9.]+$") # Pass the results to the next set of statements. - - set(body["parsed"], cache) + - set(log.body["parsed"], log.cache) # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#readme -- context: log - statements: +- statements: - set(instrumentation_scope.name, "postgres") - set(instrumentation_scope.version, resource.attributes["db.version"]) # TODO(postgres-14): We can stop parsing CSV logs when 14 is EOL. - - set(cache, body["parsed"]) where body["format"] == "csv" + - set(log.cache, log.body["parsed"]) where log.body["format"] == "csv" # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/ottlfuncs#parsejson - - set(cache, ParseJSON(body["original"])) where body["format"] == "json" + - set(log.cache, ParseJSON(log.body["original"])) where log.body["format"] == "json" # The log severity is in the "error_severity" field. # https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitytext - - set(severity_text, cache["error_severity"]) + - set(log.severity_text, log.cache["error_severity"]) # Map severity text to OpenTelemetry severity levels. # Postgres has levels beyond the typical ones: @@ -106,17 +104,17 @@ # https://opentelemetry.io/docs/specs/otel/logs/data-model/#field-severitynumber # https://opentelemetry.io/docs/specs/otel/logs/data-model-appendix/#appendix-b-severitynumber-example-mappings # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#enums - - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "DEBUG5" - - set(severity_number, SEVERITY_NUMBER_TRACE2) where severity_text == "DEBUG4" - - set(severity_number, SEVERITY_NUMBER_TRACE3) where severity_text == "DEBUG3" - - set(severity_number, SEVERITY_NUMBER_TRACE4) where severity_text == "DEBUG2" - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG1" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" or severity_text == "LOG" - - set(severity_number, SEVERITY_NUMBER_INFO2) where severity_text == "NOTICE" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "FATAL" - - set(severity_number, SEVERITY_NUMBER_FATAL2) where severity_text == "PANIC" + - set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == "DEBUG5" + - set(log.severity_number, SEVERITY_NUMBER_TRACE2) where log.severity_text == "DEBUG4" + - set(log.severity_number, SEVERITY_NUMBER_TRACE3) where log.severity_text == "DEBUG3" + - set(log.severity_number, SEVERITY_NUMBER_TRACE4) where log.severity_text == "DEBUG2" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == "DEBUG1" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == "INFO" or log.severity_text == "LOG" + - set(log.severity_number, SEVERITY_NUMBER_INFO2) where log.severity_text == "NOTICE" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == "ERROR" + - set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == "FATAL" + - set(log.severity_number, SEVERITY_NUMBER_FATAL2) where log.severity_text == "PANIC" # Parse the "timestamp" field into the record timestamp. # The format is neither RFC 3339 nor ISO 8601: @@ -128,7 +126,7 @@ # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/ottlfuncs#time # https://git.postgresql.org/gitweb/?p=postgresql.git;hb=REL_10_0;f=src/backend/utils/error/elog.c#l2246 # https://git.postgresql.org/gitweb/?p=postgresql.git;hb=REL_17_0;f=src/backend/utils/error/elog.c#l2671 - - set(time, Time(cache["timestamp"], "%F %T.%L %Z")) + - set(log.time, Time(log.cache["timestamp"], "%F %T.%L %Z")) where IsString(log.cache["timestamp"]) # Rename fields emitted by Postgres to align with OpenTelemetry semantic conventions. # @@ -140,27 +138,27 @@ - set(resource.attributes["db.system"], "postgresql") # Keep the unparsed log record in a standard attribute, - # and replace the log record body with the parsed fields. + # and replace the log record log.body with the parsed fields. # # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/general/logs.md - - set(attributes["log.record.original"], body["original"]) - - set(body, cache) + - set(log.attributes["log.record.original"], log.body["original"]) + - set(log.body, log.cache) # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/attributes-registry/client.md - - set(attributes["client.address"], body["remote_host"]) where IsString(body["remote_host"]) - - set(attributes["client.port"], Int(body["remote_port"])) where IsDouble(body["remote_port"]) + - set(log.attributes["client.address"], log.body["remote_host"]) where IsString(log.body["remote_host"]) + - set(log.attributes["client.port"], Int(log.body["remote_port"])) where IsDouble(log.body["remote_port"]) # These values are populated when the "log_error_verbosity" parameter is VERBOSE. # # https://www.postgresql.org/docs/current/runtime-config-logging.html#GUC-LOG-ERROR-VERBOSITY # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/attributes-registry/code.md - - set(attributes["code.filepath"], body["file_name"]) where IsString(body["file_name"]) - - set(attributes["code.function"], body["func_name"]) where IsString(body["func_name"]) - - set(attributes["code.lineno"], Int(body["file_line_num"])) where IsDouble(body["file_line_num"]) + - set(log.attributes["code.filepath"], log.body["file_name"]) where IsString(log.body["file_name"]) + - set(log.attributes["code.function"], log.body["func_name"]) where IsString(log.body["func_name"]) + - set(log.attributes["code.lineno"], Int(log.body["file_line_num"])) where IsDouble(log.body["file_line_num"]) # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/attributes-registry/db.md - - set(attributes["db.namespace"], body["dbname"]) where IsString(body["dbname"]) - - set(attributes["db.response.status_code"], body["state_code"]) where IsString(body["state_code"]) + - set(log.attributes["db.namespace"], log.body["dbname"]) where IsString(log.body["dbname"]) + - set(log.attributes["db.response.status_code"], log.body["state_code"]) where IsString(log.body["state_code"]) # Postgres is multiprocess so some client/backend details align here. # @@ -170,42 +168,41 @@ # https://git.postgresql.org/gitweb/?p=postgresql.git;f=src/backend/utils/error/elog.c;hb=REL_17_0#l2697 # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/attributes-registry/process.md - >- - set(attributes["process.creation.time"], Concat([ - Substring(body["session_start"], 0, 10), "T", - Substring(body["session_start"], 11, 8), "Z"], "")) - where IsMatch(body["session_start"], "^[^ ]{10} [^ ]{8} UTC$") + set(log.attributes["process.creation.time"], Concat([ + Substring(log.body["session_start"], 0, 10), "T", + Substring(log.body["session_start"], 11, 8), "Z"], "")) + where IsMatch(log.body["session_start"], "^[^ ]{10} [^ ]{8} UTC$") - >- - set(attributes["process.pid"], Int(body["pid"])) - where IsDouble(body["pid"]) + set(log.attributes["process.pid"], Int(log.body["pid"])) + where IsDouble(log.body["pid"]) - >- - set(attributes["process.title"], body["ps"]) - where IsString(body["ps"]) + set(log.attributes["process.title"], log.body["ps"]) + where IsString(log.body["ps"]) # https://github.com/open-telemetry/semantic-conventions/blob/v1.29.0/docs/attributes-registry/user.md - >- - set(attributes["user.name"], body["user"]) - where IsString(body["user"]) + set(log.attributes["user.name"], log.body["user"]) + where IsString(log.body["user"]) # Look for and parse the CSV of a pgAudit message. # # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/contexts/ottllog#readme # https://github.com/pgaudit/pgaudit#format -- context: log - conditions: +- conditions: # Messages from pgAudit have always been prefixed with "AUDIT:", but that # could change in the future. # # https://github.com/pgaudit/pgaudit/blame/17.0/pgaudit.c#L876 # TODO(postgres-18): Check this prefix and update the URL above. - >- - Len(body["message"]) > 7 and Substring(body["message"], 0, 7) == "AUDIT: " + Len(log.body["message"]) > 7 and Substring(log.body["message"], 0, 7) == "AUDIT: " statements: # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/pkg/ottl/ottlfuncs#parsecsv - >- - set(body["pgaudit"], ParseCSV(Substring(body["message"], 7, Len(body["message"]) - 7), + set(log.body["pgaudit"], ParseCSV(Substring(log.body["message"], 7, Len(log.body["message"]) - 7), "audit_type,statement_id,substatement_id,class,command,object_type,object_name,statement,parameter", delimiter=",", mode="strict")) - >- set(instrumentation_scope.name, "pgaudit") - where Len(body["pgaudit"]) > 0 + where Len(log.body["pgaudit"]) > 0 diff --git a/internal/collector/postgres_metrics.go b/internal/collector/postgres_metrics.go index b6bd39cd87..f3d5371cc6 100644 --- a/internal/collector/postgres_metrics.go +++ b/internal/collector/postgres_metrics.go @@ -12,9 +12,7 @@ import ( "slices" "strconv" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/logging" - "github.com/crunchydata/postgres-operator/internal/pgmonitor" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -23,20 +21,23 @@ import ( //go:embed "generated/postgres_5s_metrics.json" var fiveSecondMetrics json.RawMessage +//go:embed "generated/postgres_5m_per_db_metrics.json" +var fiveMinutePerDBMetrics json.RawMessage + //go:embed "generated/postgres_5m_metrics.json" var fiveMinuteMetrics json.RawMessage -//go:embed "generated/gte_pg17_metrics.json" -var gtePG17 json.RawMessage +//go:embed "generated/gte_pg17_fast_metrics.json" +var gtePG17Fast json.RawMessage -//go:embed "generated/lt_pg17_metrics.json" -var ltPG17 json.RawMessage +//go:embed "generated/lt_pg17_fast_metrics.json" +var ltPG17Fast json.RawMessage -//go:embed "generated/gte_pg16_metrics.json" -var gtePG16 json.RawMessage +//go:embed "generated/eq_pg16_fast_metrics.json" +var eqPG16Fast json.RawMessage -//go:embed "generated/lt_pg16_metrics.json" -var ltPG16 json.RawMessage +//go:embed "generated/lt_pg16_fast_metrics.json" +var ltPG16Fast json.RawMessage type queryMetrics struct { Metrics []*metric `json:"metrics"` @@ -59,7 +60,7 @@ type metric struct { } func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresCluster, config *Config) { - if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if OpenTelemetryMetricsEnabled(ctx, inCluster) { log := logging.FromContext(ctx) var err error @@ -67,23 +68,32 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust // will continually append to it and blow up our ConfigMap fiveSecondMetricsClone := slices.Clone(fiveSecondMetrics) fiveMinuteMetricsClone := slices.Clone(fiveMinuteMetrics) + fiveMinutePerDBMetricsClone := slices.Clone(fiveMinutePerDBMetrics) if inCluster.Spec.PostgresVersion >= 17 { - fiveSecondMetricsClone, err = appendToJSONArray(fiveSecondMetricsClone, gtePG17) + fiveSecondMetricsClone, err = appendToJSONArray(fiveSecondMetricsClone, gtePG17Fast) + if err != nil { + log.Error(err, "error compiling metrics for postgres 17 and greater") + } } else { - fiveSecondMetricsClone, err = appendToJSONArray(fiveSecondMetricsClone, ltPG17) - } - if err != nil { - log.Error(err, "error compiling postgres metrics") + fiveSecondMetricsClone, err = appendToJSONArray(fiveSecondMetricsClone, ltPG17Fast) + if err != nil { + log.Error(err, "error compiling metrics for postgres versions less than 17") + } } - if inCluster.Spec.PostgresVersion >= 16 { - fiveSecondMetricsClone, err = appendToJSONArray(fiveSecondMetricsClone, gtePG16) - } else { - fiveSecondMetricsClone, err = appendToJSONArray(fiveSecondMetricsClone, ltPG16) + if inCluster.Spec.PostgresVersion == 16 { + fiveSecondMetricsClone, err = appendToJSONArray(fiveSecondMetricsClone, eqPG16Fast) } if err != nil { - log.Error(err, "error compiling postgres metrics") + log.Error(err, "error compiling metrics for postgres 16") + } + + if inCluster.Spec.PostgresVersion < 16 { + fiveSecondMetricsClone, err = appendToJSONArray(fiveSecondMetricsClone, ltPG16Fast) + if err != nil { + log.Error(err, "error compiling fast metrics for postgres versions less than 16") + } } // Remove any queries that user has specified in the spec @@ -96,7 +106,7 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust var fiveSecondMetricsArr []queryMetrics err := json.Unmarshal(fiveSecondMetricsClone, &fiveSecondMetricsArr) if err != nil { - log.Error(err, "error compiling postgres metrics") + log.Error(err, "error compiling five second postgres metrics") } // Remove any specified metrics from the five second metrics @@ -107,19 +117,31 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust var fiveMinuteMetricsArr []queryMetrics err = json.Unmarshal(fiveMinuteMetricsClone, &fiveMinuteMetricsArr) if err != nil { - log.Error(err, "error compiling postgres metrics") + log.Error(err, "error compiling five minute postgres metrics") } // Remove any specified metrics from the five minute metrics fiveMinuteMetricsArr = removeMetricsFromQueries( inCluster.Spec.Instrumentation.Metrics.CustomQueries.Remove, fiveMinuteMetricsArr) + // Convert json to array of queryMetrics objects + var fiveMinutePerDBMetricsArr []queryMetrics + err = json.Unmarshal(fiveMinutePerDBMetricsClone, &fiveMinutePerDBMetricsArr) + if err != nil { + log.Error(err, "error compiling per-db postgres metrics") + } + + // Remove any specified metrics from the five minute per-db metrics + fiveMinutePerDBMetricsArr = removeMetricsFromQueries( + inCluster.Spec.Instrumentation.Metrics.CustomQueries.Remove, fiveMinutePerDBMetricsArr) + // Convert back to json data // The error return value can be ignored as the errchkjson linter // deems the []queryMetrics to be a safe argument: // https://github.com/breml/errchkjson fiveSecondMetricsClone, _ = json.Marshal(fiveSecondMetricsArr) fiveMinuteMetricsClone, _ = json.Marshal(fiveMinuteMetricsArr) + fiveMinutePerDBMetricsClone, _ = json.Marshal(fiveMinutePerDBMetricsArr) } // Add Prometheus exporter @@ -131,10 +153,10 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust "driver": "postgres", "datasource": fmt.Sprintf( `host=localhost dbname=postgres port=5432 user=%s password=${env:PGPASSWORD}`, - pgmonitor.MonitoringUser), + MonitoringUser), "collection_interval": "5s", // Give Postgres time to finish setup. - "initial_delay": "10s", + "initial_delay": "15s", "queries": slices.Clone(fiveSecondMetricsClone), } @@ -142,13 +164,21 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust "driver": "postgres", "datasource": fmt.Sprintf( `host=localhost dbname=postgres port=5432 user=%s password=${env:PGPASSWORD}`, - pgmonitor.MonitoringUser), + MonitoringUser), "collection_interval": "300s", // Give Postgres time to finish setup. - "initial_delay": "10s", + "initial_delay": "15s", "queries": slices.Clone(fiveMinuteMetricsClone), } + // If there are exporters to be added to the metrics pipelines defined + // in the spec, add them to the pipeline. + exporters := []ComponentID{Prometheus} + if inCluster.Spec.Instrumentation.Metrics != nil && + inCluster.Spec.Instrumentation.Metrics.Exporters != nil { + exporters = append(exporters, inCluster.Spec.Instrumentation.Metrics.Exporters...) + } + // Add Metrics Pipeline config.Pipelines[PostgresMetrics] = Pipeline{ Receivers: []ComponentID{FiveSecondSqlQuery, FiveMinuteSqlQuery}, @@ -156,34 +186,68 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust SubSecondBatchProcessor, CompactingProcessor, }, - Exporters: []ComponentID{Prometheus}, + Exporters: exporters, } - // Add custom queries if they are defined in the spec + // Add custom queries and per-db metrics if they are defined in the spec if inCluster.Spec.Instrumentation != nil && - inCluster.Spec.Instrumentation.Metrics != nil && - inCluster.Spec.Instrumentation.Metrics.CustomQueries != nil && - inCluster.Spec.Instrumentation.Metrics.CustomQueries.Add != nil { - - for _, querySet := range inCluster.Spec.Instrumentation.Metrics.CustomQueries.Add { - // Create a receiver for the query set - receiverName := "sqlquery/" + querySet.Name - config.Receivers[receiverName] = map[string]any{ - "driver": "postgres", - "datasource": fmt.Sprintf( - `host=localhost dbname=postgres port=5432 user=%s password=${env:PGPASSWORD}`, - pgmonitor.MonitoringUser), - "collection_interval": querySet.CollectionInterval, - // Give Postgres time to finish setup. - "initial_delay": "10s", - "queries": "${file:/etc/otel-collector/" + - querySet.Name + "/" + querySet.Queries.Key + "}", + inCluster.Spec.Instrumentation.Metrics != nil { + + if inCluster.Spec.Instrumentation.Metrics.CustomQueries != nil && + inCluster.Spec.Instrumentation.Metrics.CustomQueries.Add != nil { + + for _, querySet := range inCluster.Spec.Instrumentation.Metrics.CustomQueries.Add { + // Create a receiver for the query set + + dbs := []string{"postgres"} + if len(querySet.Databases) != 0 { + dbs = querySet.Databases + } + for _, db := range dbs { + receiverName := fmt.Sprintf( + "sqlquery/%s-%s", querySet.Name, db) + config.Receivers[receiverName] = map[string]any{ + "driver": "postgres", + "datasource": fmt.Sprintf( + `host=localhost dbname=%s port=5432 user=%s password=${env:PGPASSWORD}`, + db, + MonitoringUser), + "collection_interval": querySet.CollectionInterval, + // Give Postgres time to finish setup. + "initial_delay": "15s", + "queries": "${file:/etc/otel-collector/" + + querySet.Name + "/" + querySet.Queries.Key + "}", + } + + // Add the receiver to the pipeline + pipeline := config.Pipelines[PostgresMetrics] + pipeline.Receivers = append(pipeline.Receivers, receiverName) + config.Pipelines[PostgresMetrics] = pipeline + } } + } + if inCluster.Spec.Instrumentation.Metrics.PerDBMetricTargets != nil { + + for _, db := range inCluster.Spec.Instrumentation.Metrics.PerDBMetricTargets { + // Create a receiver for the query set for the db + receiverName := "sqlquery/" + db + config.Receivers[receiverName] = map[string]any{ + "driver": "postgres", + "datasource": fmt.Sprintf( + `host=localhost dbname=%s port=5432 user=%s password=${env:PGPASSWORD}`, + db, + MonitoringUser), + "collection_interval": "5m", + // Give Postgres time to finish setup. + "initial_delay": "15s", + "queries": slices.Clone(fiveMinutePerDBMetricsClone), + } - // Add the receiver to the pipeline - pipeline := config.Pipelines[PostgresMetrics] - pipeline.Receivers = append(pipeline.Receivers, receiverName) - config.Pipelines[PostgresMetrics] = pipeline + // Add the receiver to the pipeline + pipeline := config.Pipelines[PostgresMetrics] + pipeline.Receivers = append(pipeline.Receivers, receiverName) + config.Pipelines[PostgresMetrics] = pipeline + } } } } diff --git a/internal/collector/postgres_test.go b/internal/collector/postgres_test.go index a6736d66cc..89f5f52255 100644 --- a/internal/collector/postgres_test.go +++ b/internal/collector/postgres_test.go @@ -12,11 +12,12 @@ import ( "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/postgres" + "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) func TestEnablePostgresLogging(t *testing.T) { - t.Run("NilInstrumentationSpec", func(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { gate := feature.NewGate() assert.NilError(t, gate.SetFromMap(map[string]bool{ feature.OpenTelemetryLogs: true, @@ -25,6 +26,9 @@ func TestEnablePostgresLogging(t *testing.T) { cluster := new(v1beta1.PostgresCluster) cluster.Spec.PostgresVersion = 99 + require.UnmarshalInto(t, &cluster.Spec, `{ + instrumentation: {} + }`) config := NewConfig(nil) params := postgres.NewParameterSet() @@ -67,6 +71,9 @@ processors: - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: pgbackrest resource/postgres: attributes: - action: insert @@ -78,6 +85,9 @@ processors: - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: postgres - action: insert key: db.system value: postgresql @@ -90,107 +100,126 @@ processors: timeout: 30s transform/pgbackrest_logs: log_statements: - - context: log - statements: + - statements: - set(instrumentation_scope.name, "pgbackrest") - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") - - 'merge_maps(cache, ExtractPatterns(body, "^(?\\d{4}-\\d{2}-\\d{2} + - 'merge_maps(log.cache, ExtractPatterns(log.body, "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (?P\\d{2,3})\\s*(?\\S*): - (?(?s).*)$"), "insert") where Len(body) > 0' - - set(severity_text, cache["error_severity"]) where IsString(cache["error_severity"]) - - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "TRACE" - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_DEBUG2) where severity_text == "DETAIL" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARN" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(time, Time(cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where IsString(cache["timestamp"]) - - set(attributes["process.pid"], cache["process_id"]) - - set(attributes["log.record.original"], body) - - set(body, cache["message"]) + (?(?s).*)$"), "insert") where Len(log.body) > 0' + - set(log.severity_text, log.cache["error_severity"]) where IsString(log.cache["error_severity"]) + - set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == + "TRACE" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG2) where log.severity_text == + "DETAIL" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "INFO" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARN" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.time, Time(log.cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where + IsString(log.cache["timestamp"]) + - set(log.attributes["process.pid"], log.cache["process_id"]) + - set(log.attributes["log.record.original"], log.body) + - set(log.body, log.cache["message"]) transform/postgres_logs: log_statements: - conditions: - - body["format"] == "csv" - context: log + - log.body["format"] == "csv" statements: - - set(cache, ParseCSV(body["original"], body["headers"], delimiter=",", mode="strict")) - - merge_maps(cache, ExtractPatterns(cache["connection_from"], "(?:^[[]local[]]:(?.+)|:(?[^:]+))$"), - "insert") where Len(cache["connection_from"]) > 0 - - set(cache["remote_host"], Substring(cache["connection_from"], 0, Len(cache["connection_from"]) - - Len(cache["remote_port"]) - 1)) where Len(cache["connection_from"]) > 0 - and IsString(cache["remote_port"]) - - set(cache["remote_host"], cache["connection_from"]) where Len(cache["connection_from"]) - > 0 and not IsString(cache["remote_host"]) - - merge_maps(cache, ExtractPatterns(cache["location"], "^(?:(?[^,]+), - )?(?[^:]+):(?\\d+)$"), "insert") where Len(cache["location"]) + - set(log.cache, ParseCSV(log.body["original"], log.body["headers"], delimiter=",", + mode="strict")) + - merge_maps(log.cache, ExtractPatterns(log.cache["connection_from"], "(?:^[[]local[]]:(?.+)|:(?[^:]+))$"), + "insert") where Len(log.cache["connection_from"]) > 0 + - set(log.cache["remote_host"], Substring(log.cache["connection_from"], 0, Len(log.cache["connection_from"]) + - Len(log.cache["remote_port"]) - 1)) where Len(log.cache["connection_from"]) + > 0 and IsString(log.cache["remote_port"]) + - set(log.cache["remote_host"], log.cache["connection_from"]) where Len(log.cache["connection_from"]) + > 0 and not IsString(log.cache["remote_host"]) + - merge_maps(log.cache, ExtractPatterns(log.cache["location"], "^(?:(?[^,]+), + )?(?[^:]+):(?\\d+)$"), "insert") where Len(log.cache["location"]) > 0 - - set(cache["cursor_position"], Double(cache["cursor_position"])) where IsMatch(cache["cursor_position"], - "^[0-9.]+$") - - set(cache["file_line_num"], Double(cache["file_line_num"])) where IsMatch(cache["file_line_num"], + - set(log.cache["cursor_position"], Double(log.cache["cursor_position"])) where + IsMatch(log.cache["cursor_position"], "^[0-9.]+$") + - set(log.cache["file_line_num"], Double(log.cache["file_line_num"])) where + IsMatch(log.cache["file_line_num"], "^[0-9.]+$") + - set(log.cache["internal_position"], Double(log.cache["internal_position"])) + where IsMatch(log.cache["internal_position"], "^[0-9.]+$") + - set(log.cache["leader_pid"], Double(log.cache["leader_pid"])) where IsMatch(log.cache["leader_pid"], "^[0-9.]+$") - - set(cache["internal_position"], Double(cache["internal_position"])) where - IsMatch(cache["internal_position"], "^[0-9.]+$") - - set(cache["leader_pid"], Double(cache["leader_pid"])) where IsMatch(cache["leader_pid"], + - set(log.cache["line_num"], Double(log.cache["line_num"])) where IsMatch(log.cache["line_num"], "^[0-9.]+$") - - set(cache["line_num"], Double(cache["line_num"])) where IsMatch(cache["line_num"], + - set(log.cache["pid"], Double(log.cache["pid"])) where IsMatch(log.cache["pid"], "^[0-9.]+$") - - set(cache["pid"], Double(cache["pid"])) where IsMatch(cache["pid"], "^[0-9.]+$") - - set(cache["query_id"], Double(cache["query_id"])) where IsMatch(cache["query_id"], + - set(log.cache["query_id"], Double(log.cache["query_id"])) where IsMatch(log.cache["query_id"], "^[0-9.]+$") - - set(cache["remote_port"], Double(cache["remote_port"])) where IsMatch(cache["remote_port"], + - set(log.cache["remote_port"], Double(log.cache["remote_port"])) where IsMatch(log.cache["remote_port"], "^[0-9.]+$") - - set(body["parsed"], cache) - - context: log - statements: + - set(log.body["parsed"], log.cache) + - statements: - set(instrumentation_scope.name, "postgres") - set(instrumentation_scope.version, resource.attributes["db.version"]) - - set(cache, body["parsed"]) where body["format"] == "csv" - - set(cache, ParseJSON(body["original"])) where body["format"] == "json" - - set(severity_text, cache["error_severity"]) - - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "DEBUG5" - - set(severity_number, SEVERITY_NUMBER_TRACE2) where severity_text == "DEBUG4" - - set(severity_number, SEVERITY_NUMBER_TRACE3) where severity_text == "DEBUG3" - - set(severity_number, SEVERITY_NUMBER_TRACE4) where severity_text == "DEBUG2" - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG1" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - or severity_text == "LOG" - - set(severity_number, SEVERITY_NUMBER_INFO2) where severity_text == "NOTICE" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "FATAL" - - set(severity_number, SEVERITY_NUMBER_FATAL2) where severity_text == "PANIC" - - set(time, Time(cache["timestamp"], "%F %T.%L %Z")) + - set(log.cache, log.body["parsed"]) where log.body["format"] == "csv" + - set(log.cache, ParseJSON(log.body["original"])) where log.body["format"] == + "json" + - set(log.severity_text, log.cache["error_severity"]) + - set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == + "DEBUG5" + - set(log.severity_number, SEVERITY_NUMBER_TRACE2) where log.severity_text == + "DEBUG4" + - set(log.severity_number, SEVERITY_NUMBER_TRACE3) where log.severity_text == + "DEBUG3" + - set(log.severity_number, SEVERITY_NUMBER_TRACE4) where log.severity_text == + "DEBUG2" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "DEBUG1" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "INFO" or log.severity_text == "LOG" + - set(log.severity_number, SEVERITY_NUMBER_INFO2) where log.severity_text == + "NOTICE" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == + "FATAL" + - set(log.severity_number, SEVERITY_NUMBER_FATAL2) where log.severity_text == + "PANIC" + - set(log.time, Time(log.cache["timestamp"], "%F %T.%L %Z")) where IsString(log.cache["timestamp"]) - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") - set(resource.attributes["db.system"], "postgresql") - - set(attributes["log.record.original"], body["original"]) - - set(body, cache) - - set(attributes["client.address"], body["remote_host"]) where IsString(body["remote_host"]) - - set(attributes["client.port"], Int(body["remote_port"])) where IsDouble(body["remote_port"]) - - set(attributes["code.filepath"], body["file_name"]) where IsString(body["file_name"]) - - set(attributes["code.function"], body["func_name"]) where IsString(body["func_name"]) - - set(attributes["code.lineno"], Int(body["file_line_num"])) where IsDouble(body["file_line_num"]) - - set(attributes["db.namespace"], body["dbname"]) where IsString(body["dbname"]) - - set(attributes["db.response.status_code"], body["state_code"]) where IsString(body["state_code"]) - - set(attributes["process.creation.time"], Concat([ Substring(body["session_start"], - 0, 10), "T", Substring(body["session_start"], 11, 8), "Z"], "")) where IsMatch(body["session_start"], - "^[^ ]{10} [^ ]{8} UTC$") - - set(attributes["process.pid"], Int(body["pid"])) where IsDouble(body["pid"]) - - set(attributes["process.title"], body["ps"]) where IsString(body["ps"]) - - set(attributes["user.name"], body["user"]) where IsString(body["user"]) + - set(log.attributes["log.record.original"], log.body["original"]) + - set(log.body, log.cache) + - set(log.attributes["client.address"], log.body["remote_host"]) where IsString(log.body["remote_host"]) + - set(log.attributes["client.port"], Int(log.body["remote_port"])) where IsDouble(log.body["remote_port"]) + - set(log.attributes["code.filepath"], log.body["file_name"]) where IsString(log.body["file_name"]) + - set(log.attributes["code.function"], log.body["func_name"]) where IsString(log.body["func_name"]) + - set(log.attributes["code.lineno"], Int(log.body["file_line_num"])) where IsDouble(log.body["file_line_num"]) + - set(log.attributes["db.namespace"], log.body["dbname"]) where IsString(log.body["dbname"]) + - set(log.attributes["db.response.status_code"], log.body["state_code"]) where + IsString(log.body["state_code"]) + - set(log.attributes["process.creation.time"], Concat([ Substring(log.body["session_start"], + 0, 10), "T", Substring(log.body["session_start"], 11, 8), "Z"], "")) where + IsMatch(log.body["session_start"], "^[^ ]{10} [^ ]{8} UTC$") + - set(log.attributes["process.pid"], Int(log.body["pid"])) where IsDouble(log.body["pid"]) + - set(log.attributes["process.title"], log.body["ps"]) where IsString(log.body["ps"]) + - set(log.attributes["user.name"], log.body["user"]) where IsString(log.body["user"]) - conditions: - - 'Len(body["message"]) > 7 and Substring(body["message"], 0, 7) == "AUDIT: - "' - context: log + - 'Len(log.body["message"]) > 7 and Substring(log.body["message"], 0, 7) == + "AUDIT: "' statements: - - set(body["pgaudit"], ParseCSV(Substring(body["message"], 7, Len(body["message"]) + - set(log.body["pgaudit"], ParseCSV(Substring(log.body["message"], 7, Len(log.body["message"]) - 7), "audit_type,statement_id,substatement_id,class,command,object_type,object_name,statement,parameter", delimiter=",", mode="strict")) - - set(instrumentation_scope.name, "pgaudit") where Len(body["pgaudit"]) > 0 + - set(instrumentation_scope.name, "pgaudit") where Len(log.body["pgaudit"]) + > 0 receivers: filelog/pgbackrest_log: include: - /pgdata/pgbackrest/log/*.log + - /pgdata/pgbackrest/log/*.log.1 multiline: line_start_pattern: ^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}|^-{19} storage: file_storage/pgbackrest_logs @@ -308,6 +337,9 @@ processors: - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: pgbackrest resource/postgres: attributes: - action: insert @@ -319,6 +351,9 @@ processors: - action: insert key: k8s.pod.name value: ${env:K8S_POD_NAME} + - action: insert + key: process.executable.name + value: postgres - action: insert key: db.system value: postgresql @@ -331,107 +366,126 @@ processors: timeout: 30s transform/pgbackrest_logs: log_statements: - - context: log - statements: + - statements: - set(instrumentation_scope.name, "pgbackrest") - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") - - 'merge_maps(cache, ExtractPatterns(body, "^(?\\d{4}-\\d{2}-\\d{2} + - 'merge_maps(log.cache, ExtractPatterns(log.body, "^(?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (?P\\d{2,3})\\s*(?\\S*): - (?(?s).*)$"), "insert") where Len(body) > 0' - - set(severity_text, cache["error_severity"]) where IsString(cache["error_severity"]) - - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "TRACE" - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG" - - set(severity_number, SEVERITY_NUMBER_DEBUG2) where severity_text == "DETAIL" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARN" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(time, Time(cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where IsString(cache["timestamp"]) - - set(attributes["process.pid"], cache["process_id"]) - - set(attributes["log.record.original"], body) - - set(body, cache["message"]) + (?(?s).*)$"), "insert") where Len(log.body) > 0' + - set(log.severity_text, log.cache["error_severity"]) where IsString(log.cache["error_severity"]) + - set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == + "TRACE" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "DEBUG" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG2) where log.severity_text == + "DETAIL" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "INFO" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARN" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.time, Time(log.cache["timestamp"], "%Y-%m-%d %H:%M:%S.%L")) where + IsString(log.cache["timestamp"]) + - set(log.attributes["process.pid"], log.cache["process_id"]) + - set(log.attributes["log.record.original"], log.body) + - set(log.body, log.cache["message"]) transform/postgres_logs: log_statements: - conditions: - - body["format"] == "csv" - context: log + - log.body["format"] == "csv" statements: - - set(cache, ParseCSV(body["original"], body["headers"], delimiter=",", mode="strict")) - - merge_maps(cache, ExtractPatterns(cache["connection_from"], "(?:^[[]local[]]:(?.+)|:(?[^:]+))$"), - "insert") where Len(cache["connection_from"]) > 0 - - set(cache["remote_host"], Substring(cache["connection_from"], 0, Len(cache["connection_from"]) - - Len(cache["remote_port"]) - 1)) where Len(cache["connection_from"]) > 0 - and IsString(cache["remote_port"]) - - set(cache["remote_host"], cache["connection_from"]) where Len(cache["connection_from"]) - > 0 and not IsString(cache["remote_host"]) - - merge_maps(cache, ExtractPatterns(cache["location"], "^(?:(?[^,]+), - )?(?[^:]+):(?\\d+)$"), "insert") where Len(cache["location"]) + - set(log.cache, ParseCSV(log.body["original"], log.body["headers"], delimiter=",", + mode="strict")) + - merge_maps(log.cache, ExtractPatterns(log.cache["connection_from"], "(?:^[[]local[]]:(?.+)|:(?[^:]+))$"), + "insert") where Len(log.cache["connection_from"]) > 0 + - set(log.cache["remote_host"], Substring(log.cache["connection_from"], 0, Len(log.cache["connection_from"]) + - Len(log.cache["remote_port"]) - 1)) where Len(log.cache["connection_from"]) + > 0 and IsString(log.cache["remote_port"]) + - set(log.cache["remote_host"], log.cache["connection_from"]) where Len(log.cache["connection_from"]) + > 0 and not IsString(log.cache["remote_host"]) + - merge_maps(log.cache, ExtractPatterns(log.cache["location"], "^(?:(?[^,]+), + )?(?[^:]+):(?\\d+)$"), "insert") where Len(log.cache["location"]) > 0 - - set(cache["cursor_position"], Double(cache["cursor_position"])) where IsMatch(cache["cursor_position"], + - set(log.cache["cursor_position"], Double(log.cache["cursor_position"])) where + IsMatch(log.cache["cursor_position"], "^[0-9.]+$") + - set(log.cache["file_line_num"], Double(log.cache["file_line_num"])) where + IsMatch(log.cache["file_line_num"], "^[0-9.]+$") + - set(log.cache["internal_position"], Double(log.cache["internal_position"])) + where IsMatch(log.cache["internal_position"], "^[0-9.]+$") + - set(log.cache["leader_pid"], Double(log.cache["leader_pid"])) where IsMatch(log.cache["leader_pid"], "^[0-9.]+$") - - set(cache["file_line_num"], Double(cache["file_line_num"])) where IsMatch(cache["file_line_num"], + - set(log.cache["line_num"], Double(log.cache["line_num"])) where IsMatch(log.cache["line_num"], "^[0-9.]+$") - - set(cache["internal_position"], Double(cache["internal_position"])) where - IsMatch(cache["internal_position"], "^[0-9.]+$") - - set(cache["leader_pid"], Double(cache["leader_pid"])) where IsMatch(cache["leader_pid"], + - set(log.cache["pid"], Double(log.cache["pid"])) where IsMatch(log.cache["pid"], "^[0-9.]+$") - - set(cache["line_num"], Double(cache["line_num"])) where IsMatch(cache["line_num"], + - set(log.cache["query_id"], Double(log.cache["query_id"])) where IsMatch(log.cache["query_id"], "^[0-9.]+$") - - set(cache["pid"], Double(cache["pid"])) where IsMatch(cache["pid"], "^[0-9.]+$") - - set(cache["query_id"], Double(cache["query_id"])) where IsMatch(cache["query_id"], + - set(log.cache["remote_port"], Double(log.cache["remote_port"])) where IsMatch(log.cache["remote_port"], "^[0-9.]+$") - - set(cache["remote_port"], Double(cache["remote_port"])) where IsMatch(cache["remote_port"], - "^[0-9.]+$") - - set(body["parsed"], cache) - - context: log - statements: + - set(log.body["parsed"], log.cache) + - statements: - set(instrumentation_scope.name, "postgres") - set(instrumentation_scope.version, resource.attributes["db.version"]) - - set(cache, body["parsed"]) where body["format"] == "csv" - - set(cache, ParseJSON(body["original"])) where body["format"] == "json" - - set(severity_text, cache["error_severity"]) - - set(severity_number, SEVERITY_NUMBER_TRACE) where severity_text == "DEBUG5" - - set(severity_number, SEVERITY_NUMBER_TRACE2) where severity_text == "DEBUG4" - - set(severity_number, SEVERITY_NUMBER_TRACE3) where severity_text == "DEBUG3" - - set(severity_number, SEVERITY_NUMBER_TRACE4) where severity_text == "DEBUG2" - - set(severity_number, SEVERITY_NUMBER_DEBUG) where severity_text == "DEBUG1" - - set(severity_number, SEVERITY_NUMBER_INFO) where severity_text == "INFO" - or severity_text == "LOG" - - set(severity_number, SEVERITY_NUMBER_INFO2) where severity_text == "NOTICE" - - set(severity_number, SEVERITY_NUMBER_WARN) where severity_text == "WARNING" - - set(severity_number, SEVERITY_NUMBER_ERROR) where severity_text == "ERROR" - - set(severity_number, SEVERITY_NUMBER_FATAL) where severity_text == "FATAL" - - set(severity_number, SEVERITY_NUMBER_FATAL2) where severity_text == "PANIC" - - set(time, Time(cache["timestamp"], "%F %T.%L %Z")) + - set(log.cache, log.body["parsed"]) where log.body["format"] == "csv" + - set(log.cache, ParseJSON(log.body["original"])) where log.body["format"] == + "json" + - set(log.severity_text, log.cache["error_severity"]) + - set(log.severity_number, SEVERITY_NUMBER_TRACE) where log.severity_text == + "DEBUG5" + - set(log.severity_number, SEVERITY_NUMBER_TRACE2) where log.severity_text == + "DEBUG4" + - set(log.severity_number, SEVERITY_NUMBER_TRACE3) where log.severity_text == + "DEBUG3" + - set(log.severity_number, SEVERITY_NUMBER_TRACE4) where log.severity_text == + "DEBUG2" + - set(log.severity_number, SEVERITY_NUMBER_DEBUG) where log.severity_text == + "DEBUG1" + - set(log.severity_number, SEVERITY_NUMBER_INFO) where log.severity_text == + "INFO" or log.severity_text == "LOG" + - set(log.severity_number, SEVERITY_NUMBER_INFO2) where log.severity_text == + "NOTICE" + - set(log.severity_number, SEVERITY_NUMBER_WARN) where log.severity_text == + "WARNING" + - set(log.severity_number, SEVERITY_NUMBER_ERROR) where log.severity_text == + "ERROR" + - set(log.severity_number, SEVERITY_NUMBER_FATAL) where log.severity_text == + "FATAL" + - set(log.severity_number, SEVERITY_NUMBER_FATAL2) where log.severity_text == + "PANIC" + - set(log.time, Time(log.cache["timestamp"], "%F %T.%L %Z")) where IsString(log.cache["timestamp"]) - set(instrumentation_scope.schema_url, "https://opentelemetry.io/schemas/1.29.0") - set(resource.attributes["db.system"], "postgresql") - - set(attributes["log.record.original"], body["original"]) - - set(body, cache) - - set(attributes["client.address"], body["remote_host"]) where IsString(body["remote_host"]) - - set(attributes["client.port"], Int(body["remote_port"])) where IsDouble(body["remote_port"]) - - set(attributes["code.filepath"], body["file_name"]) where IsString(body["file_name"]) - - set(attributes["code.function"], body["func_name"]) where IsString(body["func_name"]) - - set(attributes["code.lineno"], Int(body["file_line_num"])) where IsDouble(body["file_line_num"]) - - set(attributes["db.namespace"], body["dbname"]) where IsString(body["dbname"]) - - set(attributes["db.response.status_code"], body["state_code"]) where IsString(body["state_code"]) - - set(attributes["process.creation.time"], Concat([ Substring(body["session_start"], - 0, 10), "T", Substring(body["session_start"], 11, 8), "Z"], "")) where IsMatch(body["session_start"], - "^[^ ]{10} [^ ]{8} UTC$") - - set(attributes["process.pid"], Int(body["pid"])) where IsDouble(body["pid"]) - - set(attributes["process.title"], body["ps"]) where IsString(body["ps"]) - - set(attributes["user.name"], body["user"]) where IsString(body["user"]) + - set(log.attributes["log.record.original"], log.body["original"]) + - set(log.body, log.cache) + - set(log.attributes["client.address"], log.body["remote_host"]) where IsString(log.body["remote_host"]) + - set(log.attributes["client.port"], Int(log.body["remote_port"])) where IsDouble(log.body["remote_port"]) + - set(log.attributes["code.filepath"], log.body["file_name"]) where IsString(log.body["file_name"]) + - set(log.attributes["code.function"], log.body["func_name"]) where IsString(log.body["func_name"]) + - set(log.attributes["code.lineno"], Int(log.body["file_line_num"])) where IsDouble(log.body["file_line_num"]) + - set(log.attributes["db.namespace"], log.body["dbname"]) where IsString(log.body["dbname"]) + - set(log.attributes["db.response.status_code"], log.body["state_code"]) where + IsString(log.body["state_code"]) + - set(log.attributes["process.creation.time"], Concat([ Substring(log.body["session_start"], + 0, 10), "T", Substring(log.body["session_start"], 11, 8), "Z"], "")) where + IsMatch(log.body["session_start"], "^[^ ]{10} [^ ]{8} UTC$") + - set(log.attributes["process.pid"], Int(log.body["pid"])) where IsDouble(log.body["pid"]) + - set(log.attributes["process.title"], log.body["ps"]) where IsString(log.body["ps"]) + - set(log.attributes["user.name"], log.body["user"]) where IsString(log.body["user"]) - conditions: - - 'Len(body["message"]) > 7 and Substring(body["message"], 0, 7) == "AUDIT: - "' - context: log + - 'Len(log.body["message"]) > 7 and Substring(log.body["message"], 0, 7) == + "AUDIT: "' statements: - - set(body["pgaudit"], ParseCSV(Substring(body["message"], 7, Len(body["message"]) + - set(log.body["pgaudit"], ParseCSV(Substring(log.body["message"], 7, Len(log.body["message"]) - 7), "audit_type,statement_id,substatement_id,class,command,object_type,object_name,statement,parameter", delimiter=",", mode="strict")) - - set(instrumentation_scope.name, "pgaudit") where Len(body["pgaudit"]) > 0 + - set(instrumentation_scope.name, "pgaudit") where Len(log.body["pgaudit"]) + > 0 receivers: filelog/pgbackrest_log: include: - /pgdata/pgbackrest/log/*.log + - /pgdata/pgbackrest/log/*.log.1 multiline: line_start_pattern: ^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}|^-{19} storage: file_storage/pgbackrest_logs @@ -493,3 +547,137 @@ service: `) }) } + +func TestEnablePostgresMetrics(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + cluster := new(v1beta1.PostgresCluster) + cluster.Spec.PostgresVersion = 99 + require.UnmarshalInto(t, &cluster.Spec, `{ + instrumentation: {} + }`) + + config := NewConfig(nil) + + EnablePostgresMetrics(ctx, cluster, config) + + // The queries aren't really needed for this test and sheer number of queries + // would make this file excessively long (and string formatting presented it's + // own formatting headaches), so I am removing them + config.Receivers["sqlquery/5s"] = nil + config.Receivers["sqlquery/300s"] = nil + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + prometheus/cpk-monitoring: + endpoint: 0.0.0.0:9187 +extensions: {} +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + batch/logs: + send_batch_size: 8192 + timeout: 200ms + groupbyattrs/compact: {} + resourcedetection: + detectors: [] + override: false + timeout: 30s +receivers: + sqlquery/5s: null + sqlquery/300s: null +service: + extensions: [] + pipelines: + metrics/postgres: + exporters: + - prometheus/cpk-monitoring + processors: + - batch/200ms + - groupbyattrs/compact + receivers: + - sqlquery/5s + - sqlquery/300s +`) + }) + + t.Run("InstrumentationSpecDefined", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + cluster := new(v1beta1.PostgresCluster) + cluster.Spec.PostgresVersion = 99 + cluster.Spec.Instrumentation = testInstrumentationSpec() + + config := NewConfig(cluster.Spec.Instrumentation) + + EnablePostgresMetrics(ctx, cluster, config) + + // The queries aren't really needed for this test and sheer number of queries + // would make this file excessively long (and string formatting presented it's + // own formatting headaches), so I am removing them + config.Receivers["sqlquery/5s"] = nil + config.Receivers["sqlquery/300s"] = nil + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + googlecloud: + log: + default_log_name: opentelemetry.io/collector-exported-log + project: google-project-name + prometheus/cpk-monitoring: + endpoint: 0.0.0.0:9187 +extensions: {} +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + batch/logs: + send_batch_size: 8192 + timeout: 200ms + groupbyattrs/compact: {} + resourcedetection: + detectors: [] + override: false + timeout: 30s +receivers: + sqlquery/5s: null + sqlquery/300s: null +service: + extensions: [] + pipelines: + metrics/postgres: + exporters: + - prometheus/cpk-monitoring + - googlecloud + processors: + - batch/200ms + - groupbyattrs/compact + receivers: + - sqlquery/5s + - sqlquery/300s +`) + + }) +} diff --git a/internal/collector/util.go b/internal/collector/util.go new file mode 100644 index 0000000000..72cf8641ef --- /dev/null +++ b/internal/collector/util.go @@ -0,0 +1,56 @@ +// Copyright 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package collector + +import ( + "context" + + "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +type CrunchyCRD interface { + *v1beta1.PostgresCluster | *v1beta1.PGAdmin | *v1beta1.InstrumentationSpec +} + +func OpenTelemetrySpecPresent[T CrunchyCRD](object T) bool { + + switch v := any(object).(type) { + case *v1beta1.InstrumentationSpec: + return v != nil + case *v1beta1.PostgresCluster: + return v.Spec.Instrumentation != nil + case *v1beta1.PGAdmin: + return v.Spec.Instrumentation != nil + default: + return false + } + +} + +func OpenTelemetryLogsOrMetricsEnabled[T CrunchyCRD]( + ctx context.Context, + object T, +) bool { + return OpenTelemetrySpecPresent(object) && + (feature.Enabled(ctx, feature.OpenTelemetryLogs) || + feature.Enabled(ctx, feature.OpenTelemetryMetrics)) +} + +func OpenTelemetryLogsEnabled[T CrunchyCRD]( + ctx context.Context, + object T, +) bool { + return OpenTelemetrySpecPresent(object) && + feature.Enabled(ctx, feature.OpenTelemetryLogs) +} + +func OpenTelemetryMetricsEnabled[T CrunchyCRD]( + ctx context.Context, + object T, +) bool { + return OpenTelemetrySpecPresent(object) && + feature.Enabled(ctx, feature.OpenTelemetryMetrics) +} diff --git a/internal/config/config.go b/internal/config/config.go index cc72b921ed..ed8d87c5d0 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -173,7 +173,7 @@ func VerifyImageValues(cluster *v1beta1.PostgresCluster) error { } if len(images) > 0 { - return fmt.Errorf("Missing image(s): %s", images) + return fmt.Errorf("missing image(s): %s", images) } return nil diff --git a/internal/controller/pgupgrade/jobs.go b/internal/controller/pgupgrade/jobs.go index 4879209734..c7b6e4e010 100644 --- a/internal/controller/pgupgrade/jobs.go +++ b/internal/controller/pgupgrade/jobs.go @@ -192,6 +192,7 @@ func (r *PGUpgradeReconciler) generateUpgradeJob( settings := upgrade.Spec.PGUpgradeSettings.DeepCopy() // When jobs is undefined, use one less than the number of CPUs. + //nolint:gosec // The CPU count is clamped to MaxInt32. if settings.Jobs == 0 && feature.Enabled(ctx, feature.PGUpgradeCPUConcurrency) { wholeCPUs := int32(min(math.MaxInt32, largestWholeCPU(upgrade.Spec.Resources))) settings.Jobs = wholeCPUs - 1 @@ -354,7 +355,7 @@ func pgUpgradeContainerImage(upgrade *v1beta1.PGUpgrade) string { // spec is defined. If it is undefined, an error is returned. func verifyUpgradeImageValue(upgrade *v1beta1.PGUpgrade) error { if pgUpgradeContainerImage(upgrade) == "" { - return fmt.Errorf("Missing crunchy-upgrade image") + return fmt.Errorf("missing crunchy-upgrade image") } return nil } diff --git a/internal/controller/pgupgrade/pgupgrade_controller.go b/internal/controller/pgupgrade/pgupgrade_controller.go index 06a36574f0..5fbd8262fc 100644 --- a/internal/controller/pgupgrade/pgupgrade_controller.go +++ b/internal/controller/pgupgrade/pgupgrade_controller.go @@ -21,7 +21,6 @@ import ( "github.com/crunchydata/postgres-operator/internal/config" "github.com/crunchydata/postgres-operator/internal/controller/runtime" "github.com/crunchydata/postgres-operator/internal/logging" - "github.com/crunchydata/postgres-operator/internal/registration" "github.com/crunchydata/postgres-operator/internal/tracing" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -35,8 +34,7 @@ type PGUpgradeReconciler struct { Client client.Client Owner client.FieldOwner - Recorder record.EventRecorder - Registration registration.Registration + Recorder record.EventRecorder } //+kubebuilder:rbac:groups="batch",resources="jobs",verbs={list,watch} @@ -145,10 +143,6 @@ func (r *PGUpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( return } - if !r.UpgradeAuthorized(upgrade) { - return ctrl.Result{}, nil - } - // Set progressing condition to true if it doesn't exist already setStatusToProgressingIfReasonWas("", upgrade) diff --git a/internal/controller/pgupgrade/registration.go b/internal/controller/pgupgrade/registration.go deleted file mode 100644 index 4fbf7a7ce1..0000000000 --- a/internal/controller/pgupgrade/registration.go +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package pgupgrade - -import ( - "k8s.io/apimachinery/pkg/api/meta" - - "github.com/crunchydata/postgres-operator/internal/registration" - "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" -) - -func (r *PGUpgradeReconciler) UpgradeAuthorized(upgrade *v1beta1.PGUpgrade) bool { - // Allow an upgrade in progress to complete, when the registration requirement is introduced. - // But don't allow new upgrades to be started until a valid token is applied. - progressing := meta.FindStatusCondition(upgrade.Status.Conditions, ConditionPGUpgradeProgressing) != nil - required := r.Registration.Required(r.Recorder, upgrade, &upgrade.Status.Conditions) - - // If a valid token has not been applied, warn the user. - if required && !progressing { - registration.SetRequiredWarning(r.Recorder, upgrade, &upgrade.Status.Conditions) - return false - } - - return true -} diff --git a/internal/controller/pgupgrade/registration_test.go b/internal/controller/pgupgrade/registration_test.go deleted file mode 100644 index 22903d8cdb..0000000000 --- a/internal/controller/pgupgrade/registration_test.go +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package pgupgrade - -import ( - "testing" - - "gotest.tools/v3/assert" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/tools/record" - "sigs.k8s.io/controller-runtime/pkg/client" - - "github.com/crunchydata/postgres-operator/internal/controller/runtime" - "github.com/crunchydata/postgres-operator/internal/registration" - "github.com/crunchydata/postgres-operator/internal/testing/cmp" - "github.com/crunchydata/postgres-operator/internal/testing/events" - "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" -) - -func TestUpgradeAuthorized(t *testing.T) { - t.Run("UpgradeAlreadyInProgress", func(t *testing.T) { - reconciler := new(PGUpgradeReconciler) - upgrade := new(v1beta1.PGUpgrade) - - for _, required := range []bool{false, true} { - reconciler.Registration = registration.RegistrationFunc( - func(record.EventRecorder, client.Object, *[]metav1.Condition) bool { - return required - }) - - meta.SetStatusCondition(&upgrade.Status.Conditions, metav1.Condition{ - Type: ConditionPGUpgradeProgressing, - Status: metav1.ConditionTrue, - }) - - result := reconciler.UpgradeAuthorized(upgrade) - assert.Assert(t, result, "expected signal to proceed") - - progressing := meta.FindStatusCondition(upgrade.Status.Conditions, ConditionPGUpgradeProgressing) - assert.Equal(t, progressing.Status, metav1.ConditionTrue) - } - }) - - t.Run("RegistrationRequired", func(t *testing.T) { - recorder := events.NewRecorder(t, runtime.Scheme) - upgrade := new(v1beta1.PGUpgrade) - upgrade.Name = "some-upgrade" - - reconciler := PGUpgradeReconciler{ - Recorder: recorder, - Registration: registration.RegistrationFunc( - func(record.EventRecorder, client.Object, *[]metav1.Condition) bool { - return true - }), - } - - meta.RemoveStatusCondition(&upgrade.Status.Conditions, ConditionPGUpgradeProgressing) - - result := reconciler.UpgradeAuthorized(upgrade) - assert.Assert(t, !result, "expected signal to not proceed") - - condition := meta.FindStatusCondition(upgrade.Status.Conditions, v1beta1.Registered) - if assert.Check(t, condition != nil) { - assert.Equal(t, condition.Status, metav1.ConditionFalse) - } - - if assert.Check(t, len(recorder.Events) > 0) { - assert.Equal(t, recorder.Events[0].Type, "Warning") - assert.Equal(t, recorder.Events[0].Regarding.Kind, "PGUpgrade") - assert.Equal(t, recorder.Events[0].Regarding.Name, "some-upgrade") - assert.Assert(t, cmp.Contains(recorder.Events[0].Note, "requires")) - } - }) - - t.Run("RegistrationCompleted", func(t *testing.T) { - reconciler := new(PGUpgradeReconciler) - upgrade := new(v1beta1.PGUpgrade) - - called := false - reconciler.Registration = registration.RegistrationFunc( - func(record.EventRecorder, client.Object, *[]metav1.Condition) bool { - called = true - return false - }) - - meta.RemoveStatusCondition(&upgrade.Status.Conditions, ConditionPGUpgradeProgressing) - - result := reconciler.UpgradeAuthorized(upgrade) - assert.Assert(t, result, "expected signal to proceed") - assert.Assert(t, called, "expected registration package to clear conditions") - }) -} diff --git a/internal/controller/postgrescluster/apply_test.go b/internal/controller/postgrescluster/apply_test.go index 85dbca995d..d2c77ceb27 100644 --- a/internal/controller/postgrescluster/apply_test.go +++ b/internal/controller/postgrescluster/apply_test.go @@ -151,6 +151,14 @@ func TestServerSideApply(t *testing.T) { MatchLabels: map[string]string{"select": name}, } sts.Spec.Template.Labels = map[string]string{"select": name} + sts.Spec.Template.Spec = corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "some-container", + Image: "some-image", + }, + }, + } return &sts } diff --git a/internal/controller/postgrescluster/cluster.go b/internal/controller/postgrescluster/cluster.go index ead4881b1e..2ceb30453a 100644 --- a/internal/controller/postgrescluster/cluster.go +++ b/internal/controller/postgrescluster/cluster.go @@ -15,7 +15,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" - "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/internal/patroni" @@ -75,7 +75,7 @@ func (r *Reconciler) patroniLogSize(ctx context.Context, cluster *v1beta1.Postgr sizeInBytes = 25000000 } return sizeInBytes - } else if feature.Enabled(ctx, feature.OpenTelemetryLogs) { + } else if collector.OpenTelemetryLogsEnabled(ctx, cluster) { return 25000000 } return 0 diff --git a/internal/controller/postgrescluster/cluster_test.go b/internal/controller/postgrescluster/cluster_test.go index 6882cfa27b..5fa92d32cf 100644 --- a/internal/controller/postgrescluster/cluster_test.go +++ b/internal/controller/postgrescluster/cluster_test.go @@ -137,8 +137,8 @@ func TestCustomLabels(t *testing.T) { t.Run("Cluster", func(t *testing.T) { cluster := testCluster() - cluster.ObjectMeta.Name = "global-cluster" - cluster.ObjectMeta.Namespace = ns.Name + cluster.Name = "global-cluster" + cluster.Namespace = ns.Name cluster.Spec.InstanceSets = []v1beta1.PostgresInstanceSetSpec{{ Name: "daisy-instance1", Replicas: initialize.Int32(1), @@ -185,8 +185,8 @@ func TestCustomLabels(t *testing.T) { t.Run("Instance", func(t *testing.T) { cluster := testCluster() - cluster.ObjectMeta.Name = "instance-cluster" - cluster.ObjectMeta.Namespace = ns.Name + cluster.Name = "instance-cluster" + cluster.Namespace = ns.Name cluster.Spec.InstanceSets = []v1beta1.PostgresInstanceSetSpec{{ Name: "max-instance", Replicas: initialize.Int32(1), @@ -236,8 +236,8 @@ func TestCustomLabels(t *testing.T) { t.Run("PGBackRest", func(t *testing.T) { cluster := testCluster() - cluster.ObjectMeta.Name = "pgbackrest-cluster" - cluster.ObjectMeta.Namespace = ns.Name + cluster.Name = "pgbackrest-cluster" + cluster.Namespace = ns.Name cluster.Spec.Backups.PGBackRest.Metadata = &v1beta1.Metadata{ Labels: map[string]string{"my.pgbackrest.label": "lucy"}, } @@ -280,8 +280,8 @@ func TestCustomLabels(t *testing.T) { t.Run("PGBouncer", func(t *testing.T) { cluster := testCluster() - cluster.ObjectMeta.Name = "pgbouncer-cluster" - cluster.ObjectMeta.Namespace = ns.Name + cluster.Name = "pgbouncer-cluster" + cluster.Namespace = ns.Name cluster.Spec.Proxy.PGBouncer.Metadata = &v1beta1.Metadata{ Labels: map[string]string{"my.pgbouncer.label": "lucy"}, } @@ -375,8 +375,8 @@ func TestCustomAnnotations(t *testing.T) { t.Run("Cluster", func(t *testing.T) { cluster := testCluster() - cluster.ObjectMeta.Name = "global-cluster" - cluster.ObjectMeta.Namespace = ns.Name + cluster.Name = "global-cluster" + cluster.Namespace = ns.Name cluster.Spec.InstanceSets = []v1beta1.PostgresInstanceSetSpec{{ Name: "daisy-instance1", Replicas: initialize.Int32(1), @@ -424,8 +424,8 @@ func TestCustomAnnotations(t *testing.T) { t.Run("Instance", func(t *testing.T) { cluster := testCluster() - cluster.ObjectMeta.Name = "instance-cluster" - cluster.ObjectMeta.Namespace = ns.Name + cluster.Name = "instance-cluster" + cluster.Namespace = ns.Name cluster.Spec.InstanceSets = []v1beta1.PostgresInstanceSetSpec{{ Name: "max-instance", Replicas: initialize.Int32(1), @@ -475,8 +475,8 @@ func TestCustomAnnotations(t *testing.T) { t.Run("PGBackRest", func(t *testing.T) { cluster := testCluster() - cluster.ObjectMeta.Name = "pgbackrest-cluster" - cluster.ObjectMeta.Namespace = ns.Name + cluster.Name = "pgbackrest-cluster" + cluster.Namespace = ns.Name cluster.Spec.Backups.PGBackRest.Metadata = &v1beta1.Metadata{ Annotations: map[string]string{"my.pgbackrest.annotation": "lucy"}, } @@ -519,8 +519,8 @@ func TestCustomAnnotations(t *testing.T) { t.Run("PGBouncer", func(t *testing.T) { cluster := testCluster() - cluster.ObjectMeta.Name = "pgbouncer-cluster" - cluster.ObjectMeta.Namespace = ns.Name + cluster.Name = "pgbouncer-cluster" + cluster.Namespace = ns.Name cluster.Spec.Proxy.PGBouncer.Metadata = &v1beta1.Metadata{ Annotations: map[string]string{"my.pgbouncer.annotation": "lucy"}, } @@ -768,12 +768,12 @@ type: ClusterIP assert.NilError(t, err) // Annotations present in the metadata. - assert.Assert(t, cmp.MarshalMatches(service.ObjectMeta.Annotations, ` + assert.Assert(t, cmp.MarshalMatches(service.Annotations, ` some: note `)) // Labels present in the metadata. - assert.Assert(t, cmp.MarshalMatches(service.ObjectMeta.Labels, ` + assert.Assert(t, cmp.MarshalMatches(service.Labels, ` happy: label postgres-operator.crunchydata.com/cluster: pg2 postgres-operator.crunchydata.com/role: replica @@ -870,6 +870,11 @@ func TestPatroniLogSize(t *testing.T) { reconciler := &Reconciler{Recorder: recorder} cluster.Spec.Patroni = nil + require.UnmarshalInto(t, &cluster.Spec, `{ + instrumentation: { + logs: { retentionPeriod: 5h }, + }, + }`) size := reconciler.patroniLogSize(ctx, &cluster) diff --git a/internal/controller/postgrescluster/controller.go b/internal/controller/postgrescluster/controller.go index bbe141c0b4..b8ede195f5 100644 --- a/internal/controller/postgrescluster/controller.go +++ b/internal/controller/postgrescluster/controller.go @@ -35,7 +35,6 @@ import ( "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/internal/pki" "github.com/crunchydata/postgres-operator/internal/postgres" - "github.com/crunchydata/postgres-operator/internal/registration" "github.com/crunchydata/postgres-operator/internal/tracing" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -53,8 +52,7 @@ type Reconciler struct { ctx context.Context, namespace, pod, container string, stdin io.Reader, stdout, stderr io.Writer, command ...string, ) error - Recorder record.EventRecorder - Registration registration.Registration + Recorder record.EventRecorder } // +kubebuilder:rbac:groups="",resources="events",verbs={create,patch} @@ -187,12 +185,6 @@ func (r *Reconciler) Reconcile( return nil } - if r.Registration != nil && r.Registration.Required(r.Recorder, cluster, &cluster.Status.Conditions) { - registration.SetAdvanceWarning(r.Recorder, cluster, &cluster.Status.Conditions) - } - cluster.Status.RegistrationRequired = nil - cluster.Status.TokenRequired = "" - // if the cluster is paused, set a condition and return if cluster.Spec.Paused != nil && *cluster.Spec.Paused { meta.SetStatusCondition(&cluster.Status.Conditions, metav1.Condition{ diff --git a/internal/controller/postgrescluster/controller_ref_manager.go b/internal/controller/postgrescluster/controller_ref_manager.go index 36f3b67d6d..d229728b12 100644 --- a/internal/controller/postgrescluster/controller_ref_manager.go +++ b/internal/controller/postgrescluster/controller_ref_manager.go @@ -88,7 +88,7 @@ func (r *Reconciler) claimObject(ctx context.Context, postgresCluster *v1beta1.P // At this point the resource has no controller ref and is therefore an orphan. Ignore if // either the PostgresCluster resource or the orphaned resource is being deleted, or if the selector - // for the orphaned resource doesn't doesn't include the proper PostgresCluster label + // for the orphaned resource doesn't include the proper PostgresCluster label _, hasPGClusterLabel := obj.GetLabels()[naming.LabelCluster] if postgresCluster.GetDeletionTimestamp() != nil || !hasPGClusterLabel { return nil diff --git a/internal/controller/postgrescluster/controller_ref_manager_test.go b/internal/controller/postgrescluster/controller_ref_manager_test.go index fa8450c5d9..758daf2ef3 100644 --- a/internal/controller/postgrescluster/controller_ref_manager_test.go +++ b/internal/controller/postgrescluster/controller_ref_manager_test.go @@ -46,6 +46,14 @@ func TestManageControllerRefs(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{"label1": "val1"}, }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "some-container", + Image: "some-image", + }, + }, + }, }, }, } diff --git a/internal/controller/postgrescluster/controller_test.go b/internal/controller/postgrescluster/controller_test.go index 9e36d0c2d0..4d561d4c7f 100644 --- a/internal/controller/postgrescluster/controller_test.go +++ b/internal/controller/postgrescluster/controller_test.go @@ -14,12 +14,10 @@ import ( . "github.com/onsi/gomega" . "github.com/onsi/gomega/gstruct" "github.com/pkg/errors" //nolint:depguard // This legacy test covers so much code, it logs the origin of unexpected errors. - "gotest.tools/v3/assert" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/rand" "k8s.io/apimachinery/pkg/util/version" @@ -29,7 +27,6 @@ import ( "sigs.k8s.io/yaml" "github.com/crunchydata/postgres-operator/internal/naming" - "github.com/crunchydata/postgres-operator/internal/registration" "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -88,34 +85,6 @@ func TestDeleteControlled(t *testing.T) { }) } -var olmClusterYAML = ` -metadata: - name: olm -spec: - postgresVersion: 13 - image: postgres - instances: - - name: register-now - dataVolumeClaimSpec: - accessModes: - - "ReadWriteMany" - resources: - requests: - storage: 1Gi - backups: - pgbackrest: - image: pgbackrest - repos: - - name: repo1 - volume: - volumeClaimSpec: - accessModes: - - "ReadWriteOnce" - resources: - requests: - storage: 1Gi -` - var _ = Describe("PostgresCluster Reconciler", func() { var test struct { Namespace *corev1.Namespace @@ -136,7 +105,6 @@ var _ = Describe("PostgresCluster Reconciler", func() { test.Reconciler.Client = suite.Client test.Reconciler.Owner = "asdf" test.Reconciler.Recorder = test.Recorder - test.Reconciler.Registration = nil }) AfterEach(func() { @@ -176,49 +144,6 @@ var _ = Describe("PostgresCluster Reconciler", func() { return result } - Context("Cluster with Registration Requirement, no token", func() { - var cluster *v1beta1.PostgresCluster - - BeforeEach(func() { - test.Reconciler.Registration = registration.RegistrationFunc( - func(record.EventRecorder, client.Object, *[]metav1.Condition) bool { - return true - }) - - cluster = create(olmClusterYAML) - Expect(reconcile(cluster)).To(BeZero()) - }) - - AfterEach(func() { - ctx := context.Background() - - if cluster != nil { - Expect(client.IgnoreNotFound( - suite.Client.Delete(ctx, cluster), - )).To(Succeed()) - - // Remove finalizers, if any, so the namespace can terminate. - Expect(client.IgnoreNotFound( - suite.Client.Patch(ctx, cluster, client.RawPatch( - client.Merge.Type(), []byte(`{"metadata":{"finalizers":[]}}`))), - )).To(Succeed()) - } - }) - - Specify("Cluster RegistrationRequired Status", func() { - existing := &v1beta1.PostgresCluster{} - Expect(suite.Client.Get( - context.Background(), client.ObjectKeyFromObject(cluster), existing, - )).To(Succeed()) - - Expect(meta.IsStatusConditionFalse(existing.Status.Conditions, v1beta1.Registered)).To(BeTrue()) - - event, ok := <-test.Recorder.Events - Expect(ok).To(BeTrue()) - Expect(event).To(ContainSubstring("Register Soon")) - }) - }) - Context("Cluster", func() { var cluster *v1beta1.PostgresCluster diff --git a/internal/controller/postgrescluster/instance.go b/internal/controller/postgrescluster/instance.go index d6fc6158e8..b3bf0b6f75 100644 --- a/internal/controller/postgrescluster/instance.go +++ b/internal/controller/postgrescluster/instance.go @@ -335,7 +335,8 @@ func (r *Reconciler) observeInstances( status.DesiredPGDataVolume = make(map[string]string) for _, instance := range observed.bySet[name] { - status.Replicas += int32(len(instance.Pods)) //nolint:gosec + //nolint:gosec // This slice is always small. + status.Replicas += int32(len(instance.Pods)) if ready, known := instance.IsReady(); known && ready { status.ReadyReplicas++ @@ -752,7 +753,7 @@ func findAvailableInstanceNames(set v1beta1.PostgresInstanceSetSpec, } // Determine whether or not the PVC is associated with an existing instance within the same - // instance set. If not, then the instance name associated with that PVC can be be reused. + // instance set. If not, then the instance name associated with that PVC can be reused. for _, pvc := range setVolumes { pvcInstanceName := pvc.GetLabels()[naming.LabelInstance] instance := observedInstances.byName[pvcInstanceName] @@ -1168,11 +1169,11 @@ func (r *Reconciler) reconcileInstance( ) if err == nil { - instanceConfigMap, err = r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig) + instanceConfigMap, err = r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig, backupsSpecFound) } if err == nil { instanceCertificates, err = r.reconcileInstanceCertificates( - ctx, cluster, spec, instance, rootCA) + ctx, cluster, spec, instance, rootCA, backupsSpecFound) } if err == nil { postgresDataVolume, err = r.reconcilePostgresDataVolume(ctx, cluster, spec, instance, clusterVolumes, nil) @@ -1188,7 +1189,7 @@ func (r *Reconciler) reconcileInstance( ctx, cluster, spec, primaryCertificate, replicationCertSecretProjection(clusterReplicationSecret), postgresDataVolume, postgresWALVolume, tablespaceVolumes, - &instance.Spec.Template.Spec) + &instance.Spec.Template) if backupsSpecFound { addPGBackRestToInstancePodSpec( @@ -1202,7 +1203,7 @@ func (r *Reconciler) reconcileInstance( // If either OpenTelemetry feature is enabled, we want to add the collector config to the pod if err == nil && - (feature.Enabled(ctx, feature.OpenTelemetryLogs) || feature.Enabled(ctx, feature.OpenTelemetryMetrics)) { + collector.OpenTelemetryLogsOrMetricsEnabled(ctx, cluster) { // If the OpenTelemetryMetrics feature is enabled, we need to get the pgpassword from the // monitoring user secret @@ -1218,11 +1219,14 @@ func (r *Reconciler) reconcileInstance( } } - // For now, we are not using logrotate to rotate postgres or patroni logs - // but we are using it for pgbackrest logs in the postgres pod + // For now, we are not using logrotate to rotate postgres or patroni logs, + // but we are using it for pgbackrest logs in the postgres pod, so we will + // set includeLogrotate to true, but only if backups are enabled + // and local volumes are available. + includeLogrotate := backupsSpecFound && pgbackrest.RepoHostVolumeDefined(cluster) collector.AddToPod(ctx, cluster.Spec.Instrumentation, cluster.Spec.ImagePullPolicy, instanceConfigMap, &instance.Spec.Template, []corev1.VolumeMount{postgres.DataVolumeMount()}, pgPassword, - []string{naming.PGBackRestPGDataLogPath}, true, true) + []string{naming.PGBackRestPGDataLogPath}, includeLogrotate, true) } // Add postgres-exporter to the instance Pod spec @@ -1406,7 +1410,7 @@ func addPGBackRestToInstancePodSpec( // files (etc) that apply to instance of cluster. func (r *Reconciler) reconcileInstanceConfigMap( ctx context.Context, cluster *v1beta1.PostgresCluster, spec *v1beta1.PostgresInstanceSetSpec, - instance *appsv1.StatefulSet, otelConfig *collector.Config, + instance *appsv1.StatefulSet, otelConfig *collector.Config, backupsSpecFound bool, ) (*corev1.ConfigMap, error) { instanceConfigMap := &corev1.ConfigMap{ObjectMeta: naming.InstanceConfigMap(instance)} instanceConfigMap.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ConfigMap")) @@ -1428,16 +1432,14 @@ func (r *Reconciler) reconcileInstanceConfigMap( // If OTel logging or metrics is enabled, add collector config if err == nil && - (feature.Enabled(ctx, feature.OpenTelemetryLogs) || - feature.Enabled(ctx, feature.OpenTelemetryMetrics)) { + collector.OpenTelemetryLogsOrMetricsEnabled(ctx, cluster) { + err = collector.AddToConfigMap(ctx, otelConfig, instanceConfigMap) // Add pgbackrest logrotate if OpenTelemetryLogs is enabled and - // local volumes are available + // backups are enabled if err == nil && - feature.Enabled(ctx, feature.OpenTelemetryLogs) && - pgbackrest.RepoHostVolumeDefined(cluster) && - cluster.Spec.Instrumentation != nil { + collector.OpenTelemetryLogsEnabled(ctx, cluster) && backupsSpecFound { collector.AddLogrotateConfigs(ctx, cluster.Spec.Instrumentation, instanceConfigMap, @@ -1464,7 +1466,7 @@ func (r *Reconciler) reconcileInstanceConfigMap( func (r *Reconciler) reconcileInstanceCertificates( ctx context.Context, cluster *v1beta1.PostgresCluster, spec *v1beta1.PostgresInstanceSetSpec, instance *appsv1.StatefulSet, - root *pki.RootCertificateAuthority, + root *pki.RootCertificateAuthority, backupsSpecFound bool, ) (*corev1.Secret, error) { existing := &corev1.Secret{ObjectMeta: naming.InstanceCertificates(instance)} err := errors.WithStack(client.IgnoreNotFound( @@ -1507,7 +1509,7 @@ func (r *Reconciler) reconcileInstanceCertificates( root.Certificate, leafCert.Certificate, leafCert.PrivateKey, instanceCerts) } - if err == nil { + if err == nil && backupsSpecFound { err = pgbackrest.InstanceCertificates(ctx, cluster, root.Certificate, leafCert.Certificate, leafCert.PrivateKey, instanceCerts) diff --git a/internal/controller/postgrescluster/instance_test.go b/internal/controller/postgrescluster/instance_test.go index 2381b4cb5b..83afc6d20f 100644 --- a/internal/controller/postgrescluster/instance_test.go +++ b/internal/controller/postgrescluster/instance_test.go @@ -32,7 +32,9 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/controller/runtime" + "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/internal/naming" @@ -544,151 +546,7 @@ func TestAddPGBackRestToInstancePodSpec(t *testing.T) { }, } - t.Run("NoVolumeRepo", func(t *testing.T) { - cluster := cluster.DeepCopy() - cluster.Spec.Backups.PGBackRest.Repos = nil - - out := pod.DeepCopy() - addPGBackRestToInstancePodSpec(ctx, cluster, &certificates, out) - - // Only Containers and Volumes fields have changed. - assert.DeepEqual(t, pod, *out, cmpopts.IgnoreFields(pod, "Containers", "Volumes")) - - // Only database container has mounts. - // Other containers are ignored. - assert.Assert(t, cmp.MarshalMatches(out.Containers, ` -- name: database - resources: {} - volumeMounts: - - mountPath: /etc/pgbackrest/conf.d - name: pgbackrest-config - readOnly: true -- name: other - resources: {} -- command: - - pgbackrest - - server - livenessProbe: - exec: - command: - - pgbackrest - - server-ping - name: pgbackrest - resources: {} - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - privileged: false - readOnlyRootFilesystem: true - runAsNonRoot: true - seccompProfile: - type: RuntimeDefault - volumeMounts: - - mountPath: /etc/pgbackrest/server - name: pgbackrest-server - readOnly: true - - mountPath: /pgdata - name: postgres-data - - mountPath: /pgwal - name: postgres-wal - - mountPath: /etc/pgbackrest/conf.d - name: pgbackrest-config - readOnly: true -- command: - - bash - - -ceu - - -- - - |- - monitor() { - exec {fd}<> <(:||:) - until read -r -t 5 -u "${fd}"; do - if - [[ "${filename}" -nt "/proc/self/fd/${fd}" ]] && - pkill -HUP --exact --parent=0 pgbackrest - then - exec {fd}>&- && exec {fd}<> <(:||:) - stat --dereference --format='Loaded configuration dated %y' "${filename}" - elif - { [[ "${directory}" -nt "/proc/self/fd/${fd}" ]] || - [[ "${authority}" -nt "/proc/self/fd/${fd}" ]] - } && - pkill -HUP --exact --parent=0 pgbackrest - then - exec {fd}>&- && exec {fd}<> <(:||:) - stat --format='Loaded certificates dated %y' "${directory}" - fi - done - }; export directory="$1" authority="$2" filename="$3"; export -f monitor; exec -a "$0" bash -ceu monitor - - pgbackrest-config - - /etc/pgbackrest/server - - /etc/pgbackrest/conf.d/~postgres-operator/tls-ca.crt - - /etc/pgbackrest/conf.d/~postgres-operator_server.conf - name: pgbackrest-config - resources: {} - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - privileged: false - readOnlyRootFilesystem: true - runAsNonRoot: true - seccompProfile: - type: RuntimeDefault - volumeMounts: - - mountPath: /etc/pgbackrest/server - name: pgbackrest-server - readOnly: true - - mountPath: /etc/pgbackrest/conf.d - name: pgbackrest-config - readOnly: true - `)) - - // Instance configuration files with certificates. - // Other volumes are ignored. - assert.Assert(t, cmp.MarshalMatches(out.Volumes, ` -- name: other -- name: postgres-data -- name: postgres-wal -- name: pgbackrest-server - projected: - sources: - - secret: - items: - - key: pgbackrest-server.crt - path: server-tls.crt - - key: pgbackrest-server.key - mode: 384 - path: server-tls.key - name: some-secret -- name: pgbackrest-config - projected: - sources: - - configMap: - items: - - key: pgbackrest_instance.conf - path: pgbackrest_instance.conf - - key: config-hash - path: config-hash - - key: pgbackrest-server.conf - path: ~postgres-operator_server.conf - name: hippo-pgbackrest-config - - secret: - items: - - key: pgbackrest.ca-roots - path: ~postgres-operator/tls-ca.crt - - key: pgbackrest-client.crt - path: ~postgres-operator/client-tls.crt - - key: pgbackrest-client.key - mode: 384 - path: ~postgres-operator/client-tls.key - name: hippo-pgbackrest - `)) - }) - - t.Run("OneVolumeRepo", func(t *testing.T) { + t.Run("CloudOrVolumeSameBehavior", func(t *testing.T) { alwaysExpect := func(t testing.TB, result *corev1.PodSpec) { // Only Containers and Volumes fields have changed. assert.DeepEqual(t, pod, *result, cmpopts.IgnoreFields(pod, "Containers", "Volumes")) @@ -733,24 +591,35 @@ func TestAddPGBackRestToInstancePodSpec(t *testing.T) { mode: 384 path: ~postgres-operator/client-tls.key name: hippo-pgbackrest + optional: true `)) } - cluster := cluster.DeepCopy() - cluster.Spec.Backups.PGBackRest.Repos = []v1beta1.PGBackRestRepo{ + clusterWithVolume := cluster.DeepCopy() + clusterWithVolume.Spec.Backups.PGBackRest.Repos = []v1beta1.PGBackRestRepo{ { Name: "repo1", Volume: new(v1beta1.RepoPVC), }, } - out := pod.DeepCopy() - addPGBackRestToInstancePodSpec(ctx, cluster, &certificates, out) - alwaysExpect(t, out) + clusterWithCloudRepo := cluster.DeepCopy() + clusterWithCloudRepo.Spec.Backups.PGBackRest.Repos = []v1beta1.PGBackRestRepo{ + { + Name: "repo1", + GCS: new(v1beta1.RepoGCS), + }, + } - // The TLS server is added and configuration mounted. - // It has PostgreSQL volumes mounted while other volumes are ignored. - assert.Assert(t, cmp.MarshalMatches(out.Containers, ` + outWithVolume := pod.DeepCopy() + addPGBackRestToInstancePodSpec(ctx, clusterWithVolume, &certificates, outWithVolume) + alwaysExpect(t, outWithVolume) + + outWithCloudRepo := pod.DeepCopy() + addPGBackRestToInstancePodSpec(ctx, clusterWithCloudRepo, &certificates, outWithCloudRepo) + alwaysExpect(t, outWithCloudRepo) + + outContainers := ` - name: database resources: {} volumeMounts: @@ -838,7 +707,12 @@ func TestAddPGBackRestToInstancePodSpec(t *testing.T) { - mountPath: /etc/pgbackrest/conf.d name: pgbackrest-config readOnly: true - `)) + ` + + // The TLS server is added and configuration mounted. + // It has PostgreSQL volumes mounted while other volumes are ignored. + assert.Assert(t, cmp.MarshalMatches(outWithVolume.Containers, outContainers)) + assert.Assert(t, cmp.MarshalMatches(outWithCloudRepo.Containers, outContainers)) t.Run("CustomResources", func(t *testing.T) { cluster := cluster.DeepCopy() @@ -855,7 +729,7 @@ func TestAddPGBackRestToInstancePodSpec(t *testing.T) { }, } - before := out.DeepCopy() + before := outWithVolume.DeepCopy() out := pod.DeepCopy() addPGBackRestToInstancePodSpec(ctx, cluster, &certificates, out) alwaysExpect(t, out) @@ -1707,7 +1581,6 @@ func TestGenerateInstanceStatefulSetIntent(t *testing.T) { `)) }, }} { - test := test t.Run(test.name, func(t *testing.T) { cluster := test.ip.cluster @@ -2147,3 +2020,286 @@ func TestCleanupDisruptionBudgets(t *testing.T) { }) }) } + +func TestReconcileInstanceConfigMap(t *testing.T) { + ctx := context.Background() + _, cc := setupKubernetes(t) + require.ParallelCapacity(t, 1) + + r := &Reconciler{ + Client: cc, + Owner: client.FieldOwner(t.Name()), + } + + t.Run("LocalVolumeOtelDisabled", func(t *testing.T) { + ns := setupNamespace(t, cc) + cluster := testCluster() + cluster.Namespace = ns.Name + cluster.Name = "test-hippo-1" + assert.NilError(t, cc.Create(ctx, cluster)) + + spec := &v1beta1.PostgresInstanceSetSpec{} + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name + "-instance", + Namespace: ns.Name, + }, + } + pgParameters := r.generatePostgresParameters(ctx, cluster, true) + otelConfig := collector.NewConfigForPostgresPod(ctx, cluster, pgParameters) + + cm, err := r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig, true) + assert.NilError(t, err) + assert.Equal(t, cm.Name, "test-hippo-1-instance-config") + assert.Equal(t, cm.Data["collector.yaml"], "") + assert.Equal(t, cm.Data["logrotate.conf"], "") + }) + + t.Run("CloudRepoOtelDisabled", func(t *testing.T) { + ns := setupNamespace(t, cc) + cluster := testCluster() + cluster.Namespace = ns.Name + cluster.Name = "test-hippo-2" + cluster.Spec.Backups.PGBackRest.Repos = []v1beta1.PGBackRestRepo{{ + Name: "repo1", + GCS: &v1beta1.RepoGCS{ + Bucket: "test-bucket", + }, + }} + assert.NilError(t, cc.Create(ctx, cluster)) + + spec := &v1beta1.PostgresInstanceSetSpec{} + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name + "-instance", + Namespace: ns.Name, + }, + } + pgParameters := r.generatePostgresParameters(ctx, cluster, true) + otelConfig := collector.NewConfigForPostgresPod(ctx, cluster, pgParameters) + + cm, err := r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig, true) + assert.NilError(t, err) + assert.Equal(t, cm.Name, "test-hippo-2-instance-config") + assert.Equal(t, cm.Data["collector.yaml"], "") + assert.Equal(t, cm.Data["logrotate.conf"], "") + }) + + t.Run("LocalVolumeOtelMetricsEnabled", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + ns := setupNamespace(t, cc) + cluster := testCluster() + cluster.Namespace = ns.Name + cluster.Name = "test-hippo-3" + cluster.Spec.Instrumentation = &v1beta1.InstrumentationSpec{} + assert.NilError(t, cc.Create(ctx, cluster)) + + spec := &v1beta1.PostgresInstanceSetSpec{} + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name + "-instance", + Namespace: ns.Name, + }, + } + pgParameters := r.generatePostgresParameters(ctx, cluster, true) + otelConfig := collector.NewConfigForPostgresPod(ctx, cluster, pgParameters) + + cm, err := r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig, true) + assert.NilError(t, err) + assert.Equal(t, cm.Name, "test-hippo-3-instance-config") + // We test the contents of the collector yaml elsewhere, I just want to + // make sure that it isn't empty here + assert.Assert(t, len(cm.Data["collector.yaml"]) > 0) + assert.Equal(t, cm.Data["logrotate.conf"], "") + }) + + t.Run("LocalVolumeOtelLogsEnabled", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryLogs: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + ns := setupNamespace(t, cc) + cluster := testCluster() + cluster.Namespace = ns.Name + cluster.Name = "test-hippo-4" + cluster.Spec.Instrumentation = &v1beta1.InstrumentationSpec{} + assert.NilError(t, cc.Create(ctx, cluster)) + + spec := &v1beta1.PostgresInstanceSetSpec{} + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name + "-instance", + Namespace: ns.Name, + }, + } + pgParameters := r.generatePostgresParameters(ctx, cluster, true) + otelConfig := collector.NewConfigForPostgresPod(ctx, cluster, pgParameters) + + cm, err := r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig, true) + assert.NilError(t, err) + assert.Equal(t, cm.Name, "test-hippo-4-instance-config") + // We test the contents of the collector and logrotate configs elsewhere, + // I just want to test that they aren't empty here + assert.Assert(t, len(cm.Data["collector.yaml"]) > 0) + assert.Assert(t, len(cm.Data["logrotate.conf"]) > 0) + }) + + t.Run("CloudRepoOtelMetricsEnabled", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + ns := setupNamespace(t, cc) + cluster := testCluster() + cluster.Namespace = ns.Name + cluster.Name = "test-hippo-5" + cluster.Spec.Instrumentation = &v1beta1.InstrumentationSpec{} + assert.NilError(t, cc.Create(ctx, cluster)) + + spec := &v1beta1.PostgresInstanceSetSpec{} + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name + "-instance", + Namespace: ns.Name, + }, + } + pgParameters := r.generatePostgresParameters(ctx, cluster, true) + otelConfig := collector.NewConfigForPostgresPod(ctx, cluster, pgParameters) + + cm, err := r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig, true) + assert.NilError(t, err) + assert.Equal(t, cm.Name, "test-hippo-5-instance-config") + // We test the contents of the collector yaml elsewhere, I just want to + // make sure that it isn't empty here + assert.Assert(t, len(cm.Data["collector.yaml"]) > 0) + assert.Equal(t, cm.Data["logrotate.conf"], "") + }) + + t.Run("CloudRepoOtelLogsEnabled", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryLogs: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + ns := setupNamespace(t, cc) + cluster := testCluster() + cluster.Namespace = ns.Name + cluster.Name = "test-hippo-6" + cluster.Spec.Instrumentation = &v1beta1.InstrumentationSpec{} + assert.NilError(t, cc.Create(ctx, cluster)) + + spec := &v1beta1.PostgresInstanceSetSpec{} + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name + "-instance", + Namespace: ns.Name, + }, + } + pgParameters := r.generatePostgresParameters(ctx, cluster, true) + otelConfig := collector.NewConfigForPostgresPod(ctx, cluster, pgParameters) + + cm, err := r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig, true) + assert.NilError(t, err) + assert.Equal(t, cm.Name, "test-hippo-6-instance-config") + // We test the contents of the collector and logrotate configs elsewhere, + // I just want to test that they aren't empty here + assert.Assert(t, len(cm.Data["collector.yaml"]) > 0) + assert.Assert(t, len(cm.Data["logrotate.conf"]) > 0) + }) + + t.Run("BackupsDisabledOtelDisabled", func(t *testing.T) { + ns := setupNamespace(t, cc) + cluster := testCluster() + cluster.Namespace = ns.Name + cluster.Name = "test-hippo-7" + assert.NilError(t, cc.Create(ctx, cluster)) + + spec := &v1beta1.PostgresInstanceSetSpec{} + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name + "-instance", + Namespace: ns.Name, + }, + } + pgParameters := r.generatePostgresParameters(ctx, cluster, false) + otelConfig := collector.NewConfigForPostgresPod(ctx, cluster, pgParameters) + + cm, err := r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig, false) + assert.NilError(t, err) + assert.Equal(t, cm.Name, "test-hippo-7-instance-config") + assert.Equal(t, cm.Data["collector.yaml"], "") + assert.Equal(t, cm.Data["logrotate.conf"], "") + }) + + t.Run("BackupsDisabledOtelMetricsEnabled", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + ns := setupNamespace(t, cc) + cluster := testCluster() + cluster.Namespace = ns.Name + cluster.Name = "test-hippo-8" + cluster.Spec.Instrumentation = &v1beta1.InstrumentationSpec{} + assert.NilError(t, cc.Create(ctx, cluster)) + + spec := &v1beta1.PostgresInstanceSetSpec{} + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name + "-instance", + Namespace: ns.Name, + }, + } + pgParameters := r.generatePostgresParameters(ctx, cluster, false) + otelConfig := collector.NewConfigForPostgresPod(ctx, cluster, pgParameters) + + cm, err := r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig, false) + assert.NilError(t, err) + assert.Equal(t, cm.Name, "test-hippo-8-instance-config") + assert.Assert(t, len(cm.Data["collector.yaml"]) > 0) + assert.Equal(t, cm.Data["logrotate.conf"], "") + }) + + t.Run("BackupsDisabledOtelLogsEnabled", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryLogs: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + ns := setupNamespace(t, cc) + cluster := testCluster() + cluster.Namespace = ns.Name + cluster.Name = "test-hippo-9" + cluster.Spec.Instrumentation = &v1beta1.InstrumentationSpec{} + assert.NilError(t, cc.Create(ctx, cluster)) + + spec := &v1beta1.PostgresInstanceSetSpec{} + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name + "-instance", + Namespace: ns.Name, + }, + } + pgParameters := r.generatePostgresParameters(ctx, cluster, false) + otelConfig := collector.NewConfigForPostgresPod(ctx, cluster, pgParameters) + + cm, err := r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig, false) + assert.NilError(t, err) + assert.Equal(t, cm.Name, "test-hippo-9-instance-config") + assert.Assert(t, len(cm.Data["collector.yaml"]) > 0) + assert.Equal(t, cm.Data["logrotate.conf"], "") + }) +} diff --git a/internal/controller/postgrescluster/metrics_setup.sql b/internal/controller/postgrescluster/metrics_setup.sql index 728de80c3e..dbaee4f030 100644 --- a/internal/controller/postgrescluster/metrics_setup.sql +++ b/internal/controller/postgrescluster/metrics_setup.sql @@ -71,23 +71,25 @@ $function$; GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA monitor TO ccp_monitoring; GRANT ALL ON ALL TABLES IN SCHEMA monitor TO ccp_monitoring; ---- get_pgbackrest_info is used by the OTel collector. +DROP FUNCTION IF EXISTS get_replication_lag(); +--- get_replication_lag is used by the OTel collector. --- get_replication_lag is created as function, so that we can query without warning on a replica. -CREATE OR REPLACE FUNCTION get_replication_lag() RETURNS TABLE(bytes NUMERIC) AS $$ +CREATE FUNCTION get_replication_lag() RETURNS TABLE(replica text, bytes NUMERIC) AS $$ BEGIN IF pg_is_in_recovery() THEN - RETURN QUERY SELECT 0::NUMERIC AS bytes; + RETURN QUERY SELECT ''::text as replica, 0::NUMERIC AS bytes; ELSE - RETURN QUERY SELECT pg_wal_lsn_diff(sent_lsn, replay_lsn) AS bytes + RETURN QUERY SELECT application_name AS replica, pg_wal_lsn_diff(sent_lsn, replay_lsn) AS bytes FROM pg_catalog.pg_stat_replication; END IF; END; $$ LANGUAGE plpgsql; +DROP FUNCTION IF EXISTS get_pgbackrest_info(); --- get_pgbackrest_info is used by the OTel collector. --- get_pgbackrest_info is created as a function so that no ddl runs on a replica. --- In the query, the --stanza argument matches DefaultStanzaName, defined in internal/pgbackrest/config.go. -CREATE OR REPLACE FUNCTION get_pgbackrest_info() +CREATE FUNCTION get_pgbackrest_info() RETURNS TABLE ( last_diff_backup BIGINT, last_full_backup BIGINT, @@ -97,7 +99,6 @@ RETURNS TABLE ( backup_type TEXT, backup_runtime_seconds BIGINT, repo_backup_size_bytes TEXT, - repo_total_size_bytes TEXT, oldest_full_backup BIGINT, repo TEXT ) AS $$ @@ -113,7 +114,6 @@ BEGIN 'n/a'::text AS backup_type, 0::bigint AS backup_runtime_seconds, '0'::text AS repo_backup_size_bytes, - '0'::text AS repo_total_size_bytes, 0::bigint AS oldest_full_backup, 'n/a' AS repo; ELSE @@ -151,7 +151,6 @@ BEGIN backup->'database'->>'repo-key' AS repo, backup->>'type' AS backup_type, backup->'info'->'repository'->>'delta' AS repo_backup_size_bytes, - backup->'info'->'repository'->>'size' AS repo_total_size_bytes, (backup->'timestamp'->>'stop')::bigint - (backup->'timestamp'->>'start')::bigint AS backup_runtime_seconds, CASE WHEN backup->>'error' = 'true' THEN 1 ELSE 0 END AS backup_error FROM ordered_backups @@ -207,7 +206,6 @@ BEGIN ccp_backrest_last_info.backup_type, ccp_backrest_last_info.backup_runtime_seconds, ccp_backrest_last_info.repo_backup_size_bytes, - ccp_backrest_last_info.repo_total_size_bytes, ccp_backrest_oldest_full_backup.time_seconds, ccp_backrest_last_incr_backup.repo FROM @@ -220,3 +218,164 @@ BEGIN END; $$ LANGUAGE plpgsql; +/* +* The `pg_hba_checksum` table, functions, and view are taken from +* https://github.com/CrunchyData/pgmonitor/blob/development/postgres_exporter/common +* +* The goal of these table, functions, and view is to monitor changes +* to the pg_hba_file_rules system catalog. +* +* This material is used in the metric `ccp_pg_hba_checksum`. +*/ + +/* +* `monitor.pg_hba_checksum` table is used to store +* - the pg_hba settings as string (for reference) +* - the pg_hba settings as hash (for quick comparison) +* - the `hba_hash_known_provided` (for overide hash manually given to the `monitor.pg_hba_checksum` function) +* - the `valid` field to signal whether the pg_hba settings have not changed since they were accepted as valid +* +* We create an index on `created_at` in order to pull the most recent entry for +* comparison in the `monitor.pg_hba_checksum` function +*/ +DROP TABLE IF EXISTS monitor.pg_hba_checksum; +CREATE TABLE monitor.pg_hba_checksum ( + hba_hash_generated text NOT NULL + , hba_hash_known_provided text + , hba_string text NOT NULL + , created_at timestamptz DEFAULT now() NOT NULL + , valid smallint NOT NULL ); +COMMENT ON COLUMN monitor.pg_hba_checksum.valid IS 'Set this column to zero if this group of settings is a valid change'; +CREATE INDEX ON monitor.pg_hba_checksum (created_at); + +/* + * `monitor.pg_hba_checksum(text)` is used to compare the previous pg_hba hash + * with a hash made of the current pg_hba hash, derived from the `monitor.pg_hba_hash` view below. + * + * This function returns + * - 0, indicating NO settings have changed + * - 1, indicating something has changed since last known valid state + * + * `monitor.pg_hba_checksum` can take a hash to be used as an override. + * This may be useful when you have a standby with different pg_hba rules; + * since it will have different rules (and therefore a different hash), you + * could alter the metric function to pass the actual hash, which would be + * used in lieu of this table's value (derived from the primary cluster's rules). + */ +DROP FUNCTION IF EXISTS monitor.pg_hba_checksum(text); +CREATE FUNCTION monitor.pg_hba_checksum(p_known_hba_hash text DEFAULT NULL) + RETURNS smallint + LANGUAGE plpgsql SECURITY DEFINER + SET search_path TO pg_catalog, pg_temp +AS $function$ +DECLARE + +v_hba_hash text; +v_hba_hash_old text; +v_hba_string text; +v_is_in_recovery boolean; +v_valid smallint; + +BEGIN + +-- Retrieve the current settings from the `monitor.pg_hba_hash` view below +IF current_setting('server_version_num')::int >= 100000 THEN + SELECT sha256_hash, hba_string + INTO v_hba_hash, v_hba_string + FROM monitor.pg_hba_hash; +ELSE + RAISE EXCEPTION 'pg_hba change monitoring unsupported in versions older than PostgreSQL 10'; +END IF; + +-- Retrieve the last previous hash from the table +SELECT hba_hash_generated, valid +INTO v_hba_hash_old, v_valid +FROM monitor.pg_hba_checksum +ORDER BY created_at DESC LIMIT 1; + +-- If an manual/override hash has been given, we will use that: +-- Do not base validity on the stored value if manual hash is given. +IF p_known_hba_hash IS NOT NULL THEN + v_hba_hash_old := p_known_hba_hash; + v_valid := 0; +END IF; + +/* If the table is not empty or a manual hash was given, + * then we want to compare the old hash (from the table) + * with the new hash: if those differ, then we set the validity to 1; + * if they are the same, then we honor what the validity was + * in the table (which would be 1). + */ +IF (v_hba_hash_old IS NOT NULL) THEN + IF (v_hba_hash != v_hba_hash_old) THEN + v_valid := 1; + END IF; +ELSE + v_valid := 0; +END IF; + +/* + * We only want to insert into the table if we're on a primary and + * - the table/manually entered hash is empty, e.g., we've just started the cluster; or + * - the hashes don't match + * + * There's no value added by inserting into the table when no change was detected. + */ +IF (v_hba_hash_old IS NULL) OR (v_hba_hash != v_hba_hash_old) THEN + SELECT pg_is_in_recovery() INTO v_is_in_recovery; + IF v_is_in_recovery = false THEN + INSERT INTO monitor.pg_hba_checksum ( + hba_hash_generated + , hba_hash_known_provided + , hba_string + , valid) + VALUES ( + v_hba_hash + , p_known_hba_hash + , v_hba_string + , v_valid); + END IF; +END IF; + +RETURN v_valid; + +END +$function$; + +/* + * The `monitor.pg_hba_hash` view return both a hash and a string aggregate of the + * pg_catalog.pg_hba_file_rules. + * Note: We use `sha256` to hash to allow this to run on FIPS environments. + */ +DROP VIEW IF EXISTS monitor.pg_hba_hash; +CREATE VIEW monitor.pg_hba_hash AS + -- Order by line number so it's caught if no content is changed but the order of entries is changed + WITH hba_ordered_list AS ( + SELECT COALESCE(type, '<>') AS type + , array_to_string(COALESCE(database, ARRAY['<>']), ',') AS database + , array_to_string(COALESCE(user_name, ARRAY['<>']), ',') AS user_name + , COALESCE(address, '<>') AS address + , COALESCE(netmask, '<>') AS netmask + , COALESCE(auth_method, '<>') AS auth_method + , array_to_string(COALESCE(options, ARRAY['<>']), ',') AS options + FROM pg_catalog.pg_hba_file_rules + ORDER BY line_number) + SELECT sha256((string_agg(type||database||user_name||address||netmask||auth_method||options, ','))::bytea) AS sha256_hash + , string_agg(type||database||user_name||address||netmask||auth_method||options, ',') AS hba_string + FROM hba_ordered_list; + +/* + * The `monitor.pg_hba_checksum_set_valid` function provides an interface for resetting the + * checksum monitor. + * Note: configuration history will be cleared. + */ +DROP FUNCTION IF EXISTS monitor.pg_hba_checksum_set_valid(); +CREATE FUNCTION monitor.pg_hba_checksum_set_valid() RETURNS smallint + LANGUAGE sql +AS $function$ + +TRUNCATE monitor.pg_hba_checksum; + +SELECT monitor.pg_hba_checksum(); + +$function$; diff --git a/internal/controller/postgrescluster/patroni_test.go b/internal/controller/postgrescluster/patroni_test.go index 85cd2dddb7..728b75aee3 100644 --- a/internal/controller/postgrescluster/patroni_test.go +++ b/internal/controller/postgrescluster/patroni_test.go @@ -97,12 +97,12 @@ ownerReferences: assert.NilError(t, err) // Annotations present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Annotations, map[string]string{ + assert.DeepEqual(t, service.Annotations, map[string]string{ "a": "v1", }) // Labels present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Labels, map[string]string{ + assert.DeepEqual(t, service.Labels, map[string]string{ "b": "v2", "postgres-operator.crunchydata.com/cluster": "pg2", "postgres-operator.crunchydata.com/patroni": "pg2-ha", @@ -125,13 +125,13 @@ ownerReferences: assert.NilError(t, err) // Annotations present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Annotations, map[string]string{ + assert.DeepEqual(t, service.Annotations, map[string]string{ "a": "v1", "c": "v3", }) // Labels present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Labels, map[string]string{ + assert.DeepEqual(t, service.Labels, map[string]string{ "b": "v2", "d": "v4", "postgres-operator.crunchydata.com/cluster": "pg2", @@ -472,15 +472,15 @@ func TestReconcilePatroniStatus(t *testing.T) { ObjectMeta: naming.PatroniDistributedConfiguration(postgresCluster), } if writeAnnotation { - endpoints.ObjectMeta.Annotations = make(map[string]string) - endpoints.ObjectMeta.Annotations["initialize"] = systemIdentifier + endpoints.Annotations = make(map[string]string) + endpoints.Annotations["initialize"] = systemIdentifier } assert.NilError(t, tClient.Create(ctx, endpoints, &client.CreateOptions{})) instance := &Instance{ Name: instanceName, Runner: runner, } - for i := 0; i < readyReplicas; i++ { + for range readyReplicas { instance.Pods = append(instance.Pods, &corev1.Pod{ Status: corev1.PodStatus{ Conditions: []corev1.PodCondition{{ diff --git a/internal/controller/postgrescluster/pgadmin_test.go b/internal/controller/postgrescluster/pgadmin_test.go index f4be61a8bb..1d0a305b2a 100644 --- a/internal/controller/postgrescluster/pgadmin_test.go +++ b/internal/controller/postgrescluster/pgadmin_test.go @@ -104,12 +104,12 @@ ownerReferences: assert.Assert(t, specified) // Annotations present in the metadata. - assert.DeepEqual(t, configmap.ObjectMeta.Annotations, map[string]string{ + assert.DeepEqual(t, configmap.Annotations, map[string]string{ "a": "v5", "b": "v2", "e": "v6", }) // Labels present in the metadata. - assert.DeepEqual(t, configmap.ObjectMeta.Labels, map[string]string{ + assert.DeepEqual(t, configmap.Labels, map[string]string{ "c": "v7", "d": "v4", "f": "v8", "postgres-operator.crunchydata.com/cluster": "pg1", "postgres-operator.crunchydata.com/role": "pgadmin", @@ -194,12 +194,12 @@ ownerReferences: assert.Assert(t, specified) // Annotations present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Annotations, map[string]string{ + assert.DeepEqual(t, service.Annotations, map[string]string{ "a": "v1", }) // Labels present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Labels, map[string]string{ + assert.DeepEqual(t, service.Labels, map[string]string{ "b": "v2", "postgres-operator.crunchydata.com/cluster": "my-cluster", "postgres-operator.crunchydata.com/role": "pgadmin", @@ -225,13 +225,13 @@ ownerReferences: assert.Assert(t, specified) // Annotations present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Annotations, map[string]string{ + assert.DeepEqual(t, service.Annotations, map[string]string{ "a": "v1", "c": "v3", }) // Labels present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Labels, map[string]string{ + assert.DeepEqual(t, service.Labels, map[string]string{ "b": "v2", "d": "v4", "postgres-operator.crunchydata.com/cluster": "my-cluster", diff --git a/internal/controller/postgrescluster/pgbackrest.go b/internal/controller/postgrescluster/pgbackrest.go index 41d1b942a1..5c84f2a22a 100644 --- a/internal/controller/postgrescluster/pgbackrest.go +++ b/internal/controller/postgrescluster/pgbackrest.go @@ -38,6 +38,7 @@ import ( "github.com/crunchydata/postgres-operator/internal/pgbackrest" "github.com/crunchydata/postgres-operator/internal/pki" "github.com/crunchydata/postgres-operator/internal/postgres" + "github.com/crunchydata/postgres-operator/internal/util" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -323,10 +324,12 @@ func (r *Reconciler) cleanupRepoResources(ctx context.Context, // TODO(tjmoore4): This can be removed once 5.0 is EOL. if owned.GetName() != naming.PGBackRestSSHConfig(postgresCluster).Name && owned.GetName() != naming.PGBackRestSSHSecret(postgresCluster).Name { - // If a dedicated repo host resource and a dedicated repo host is enabled, then - // add to the slice and do not delete. - ownedNoDelete = append(ownedNoDelete, owned) - delete = false + // If it is a dedicated repo host resource and a dedicated repo + // host is enabled, then add to the slice and do not delete. + if pgbackrest.RepoHostVolumeDefined(postgresCluster) { + ownedNoDelete = append(ownedNoDelete, owned) + delete = false + } } case hasLabel(naming.LabelPGBackRestRepoVolume): if !backupsSpecFound { @@ -686,30 +689,29 @@ func (r *Reconciler) generateRepoHostIntent(ctx context.Context, postgresCluster pgbackrest.AddServerToRepoPod(ctx, postgresCluster, &repo.Spec.Template.Spec) - if pgbackrest.RepoHostVolumeDefined(postgresCluster) { - // add the init container to make the pgBackRest repo volume log directory - pgBackRestLogPath := pgbackrest.MakePGBackrestLogDir(&repo.Spec.Template, postgresCluster) + // add the init container to make the pgBackRest repo volume log directory + pgBackRestLogPath := pgbackrest.MakePGBackrestLogDir(&repo.Spec.Template, postgresCluster) - containersToAdd := []string{naming.PGBackRestRepoContainerName} + containersToAdd := []string{naming.PGBackRestRepoContainerName} - // If OpenTelemetryLogs is enabled, we want to add the collector to the pod - // and also add the RepoVolumes to the container. - if postgresCluster.Spec.Instrumentation != nil && feature.Enabled(ctx, feature.OpenTelemetryLogs) { - collector.AddToPod(ctx, postgresCluster.Spec.Instrumentation, postgresCluster.Spec.ImagePullPolicy, - &corev1.ConfigMap{ObjectMeta: naming.PGBackRestConfig(postgresCluster)}, - &repo.Spec.Template, []corev1.VolumeMount{}, "", - []string{pgBackRestLogPath}, true, false) + // If OpenTelemetryLogs is enabled, we want to add the collector to the pod + // and also add the RepoVolumes to the container. + if collector.OpenTelemetryLogsEnabled(ctx, postgresCluster) { + collector.AddToPod(ctx, postgresCluster.Spec.Instrumentation, postgresCluster.Spec.ImagePullPolicy, + &corev1.ConfigMap{ObjectMeta: naming.PGBackRestConfig(postgresCluster)}, + &repo.Spec.Template, []corev1.VolumeMount{}, "", + []string{pgBackRestLogPath}, true, false) - containersToAdd = append(containersToAdd, naming.ContainerCollector) - } + containersToAdd = append(containersToAdd, naming.ContainerCollector) + } - // add pgBackRest repo volumes to pod and to containers - if err := pgbackrest.AddRepoVolumesToPod(postgresCluster, &repo.Spec.Template, - getRepoPVCNames(postgresCluster, repoResources.pvcs), - containersToAdd...); err != nil { - return nil, errors.WithStack(err) - } + // add pgBackRest repo volumes to pod and to containers + if err := pgbackrest.AddRepoVolumesToPod(postgresCluster, &repo.Spec.Template, + getRepoPVCNames(postgresCluster, repoResources.pvcs), + containersToAdd...); err != nil { + return nil, errors.WithStack(err) } + // add configs to pod pgbackrest.AddConfigToRepoPod(postgresCluster, &repo.Spec.Template.Spec) @@ -776,7 +778,7 @@ func (r *Reconciler) generateRepoVolumeIntent(postgresCluster *v1beta1.PostgresC } // generateBackupJobSpecIntent generates a JobSpec for a pgBackRest backup job -func generateBackupJobSpecIntent(ctx context.Context, postgresCluster *v1beta1.PostgresCluster, +func (r *Reconciler) generateBackupJobSpecIntent(ctx context.Context, postgresCluster *v1beta1.PostgresCluster, repo v1beta1.PGBackRestRepo, serviceAccountName string, labels, annotations map[string]string, opts ...string) *batchv1.JobSpec { @@ -793,19 +795,29 @@ func generateBackupJobSpecIntent(ctx context.Context, postgresCluster *v1beta1.P cmdOpts = append(cmdOpts, opts...) container := corev1.Container{ - Command: []string{"/opt/crunchy/bin/pgbackrest"}, - Env: []corev1.EnvVar{ + Image: config.PGBackRestContainerImage(postgresCluster), + ImagePullPolicy: postgresCluster.Spec.ImagePullPolicy, + Name: naming.PGBackRestRepoContainerName, + SecurityContext: initialize.RestrictedSecurityContext(), + } + + // If the repo that we are backing up to is a local volume, we will configure + // the job to use the pgbackrest go binary to exec into the repo host and run + // the backup. If the repo is a cloud-based repo, we will run the pgbackrest + // backup command directly in the job pod. + if repo.Volume != nil { + container.Command = []string{"/opt/crunchy/bin/pgbackrest"} + container.Env = []corev1.EnvVar{ {Name: "COMMAND", Value: "backup"}, {Name: "COMMAND_OPTS", Value: strings.Join(cmdOpts, " ")}, {Name: "COMPARE_HASH", Value: "true"}, {Name: "CONTAINER", Value: naming.PGBackRestRepoContainerName}, {Name: "NAMESPACE", Value: postgresCluster.GetNamespace()}, {Name: "SELECTOR", Value: naming.PGBackRestDedicatedSelector(postgresCluster.GetName()).String()}, - }, - Image: config.PGBackRestContainerImage(postgresCluster), - ImagePullPolicy: postgresCluster.Spec.ImagePullPolicy, - Name: naming.PGBackRestRepoContainerName, - SecurityContext: initialize.RestrictedSecurityContext(), + } + } else { + container.Command = []string{"/bin/pgbackrest", "backup"} + container.Command = append(container.Command, cmdOpts...) } if postgresCluster.Spec.Backups.PGBackRest.Jobs != nil { @@ -861,7 +873,35 @@ func generateBackupJobSpecIntent(ctx context.Context, postgresCluster *v1beta1.P jobSpec.Template.Spec.ImagePullSecrets = postgresCluster.Spec.ImagePullSecrets // add pgBackRest configs to template - pgbackrest.AddConfigToRepoPod(postgresCluster, &jobSpec.Template.Spec) + if repo.Volume != nil { + pgbackrest.AddConfigToRepoPod(postgresCluster, &jobSpec.Template.Spec) + } else { + // If we are doing a cloud repo backup, we need to give pgbackrest proper permissions + // to read certificate files + jobSpec.Template.Spec.SecurityContext = postgres.PodSecurityContext(postgresCluster) + pgbackrest.AddConfigToCloudBackupJob(postgresCluster, &jobSpec.Template) + + // If the user has specified a PVC to use as a log volume via the PGBackRestCloudLogVolume + // annotation, check for the PVC. If we find it, mount it to the backup job. + // Otherwise, create a warning event. + if logVolumeName := postgresCluster.Annotations[naming.PGBackRestCloudLogVolume]; logVolumeName != "" { + logVolume := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: logVolumeName, + Namespace: postgresCluster.GetNamespace(), + }, + } + err := errors.WithStack(r.Client.Get(ctx, + client.ObjectKeyFromObject(logVolume), logVolume)) + if err != nil { + // PVC not retrieved, create warning event + r.Recorder.Event(postgresCluster, corev1.EventTypeWarning, "PGBackRestCloudLogVolumeNotFound", err.Error()) + } else { + // We successfully found the specified PVC, so we will add it to the backup job + util.AddVolumeAndMountsToPod(&jobSpec.Template.Spec, logVolume) + } + } + } return jobSpec } @@ -1166,10 +1206,16 @@ func (r *Reconciler) reconcileRestoreJob(ctx context.Context, "--pg1-path=" + pgdata, "--repo=" + regexRepoIndex.FindString(repoName)}...) + // Look specifically for the "--target" flag, NOT flags that contain + // "--target" (e.g. "--target-timeline") + targetRegex, err := regexp.Compile("--target[ =]") + if err != nil { + return err + } var deltaOptFound, foundTarget bool for _, opt := range opts { switch { - case strings.Contains(opt, "--target"): + case targetRegex.MatchString(opt): foundTarget = true case strings.Contains(opt, "--delta"): deltaOptFound = true @@ -1406,14 +1452,19 @@ func (r *Reconciler) reconcilePGBackRest(ctx context.Context, var repoHost *appsv1.StatefulSet var repoHostName string - // reconcile the pgbackrest repository host - repoHost, err = r.reconcileDedicatedRepoHost(ctx, postgresCluster, repoResources, instances, repoHostSA.GetName()) - if err != nil { - log.Error(err, "unable to reconcile pgBackRest repo host") - result.Requeue = true - return result, nil + if pgbackrest.RepoHostVolumeDefined(postgresCluster) { + // reconcile the pgbackrest repository host + repoHost, err = r.reconcileDedicatedRepoHost(ctx, postgresCluster, repoResources, instances, repoHostSA.GetName()) + if err != nil { + log.Error(err, "unable to reconcile pgBackRest repo host") + result.Requeue = true + return result, nil + } + repoHostName = repoHost.GetName() + } else { + // remove the dedicated repo host status if a dedicated host is not enabled + meta.RemoveStatusCondition(&postgresCluster.Status.Conditions, ConditionRepoHostReady) } - repoHostName = repoHost.GetName() if err := r.reconcilePGBackRestSecret(ctx, postgresCluster, repoHost, rootCA); err != nil { log.Error(err, "unable to reconcile pgBackRest secret") @@ -2011,14 +2062,37 @@ func (r *Reconciler) copyConfigurationResources(ctx context.Context, cluster, return nil } -// reconcilePGBackRestConfig is responsible for reconciling the pgBackRest ConfigMaps and Secrets. +// reconcilePGBackRestConfig is responsible for reconciling the pgBackRest ConfigMaps. func (r *Reconciler) reconcilePGBackRestConfig(ctx context.Context, postgresCluster *v1beta1.PostgresCluster, repoHostName, configHash, serviceName, serviceNamespace string, instanceNames []string) error { + // If the user has specified a PVC to use as a log volume for cloud backups via the + // PGBackRestCloudLogVolume annotation, check for the PVC. If we find it, set the cloud + // log path. If the user has specified a PVC, but we can't find it, create a warning event. + cloudLogPath := "" + if logVolumeName := postgresCluster.Annotations[naming.PGBackRestCloudLogVolume]; logVolumeName != "" { + logVolume := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: logVolumeName, + Namespace: postgresCluster.GetNamespace(), + }, + } + err := errors.WithStack(r.Client.Get(ctx, + client.ObjectKeyFromObject(logVolume), logVolume)) + if err != nil { + // PVC not retrieved, create warning event + r.Recorder.Event(postgresCluster, corev1.EventTypeWarning, + "PGBackRestCloudLogVolumeNotFound", err.Error()) + } else { + // We successfully found the specified PVC, so we will set the log path + cloudLogPath = "/volumes/" + logVolumeName + } + } + backrestConfig, err := pgbackrest.CreatePGBackRestConfigMapIntent(ctx, postgresCluster, repoHostName, - configHash, serviceName, serviceNamespace, instanceNames) + configHash, serviceName, serviceNamespace, cloudLogPath, instanceNames) if err != nil { return err } @@ -2229,7 +2303,7 @@ func (r *Reconciler) reconcileDedicatedRepoHost(ctx context.Context, if isCreate { r.Recorder.Eventf(postgresCluster, corev1.EventTypeNormal, EventRepoHostCreated, - "created pgBackRest repository host %s/%s", repoHost.TypeMeta.Kind, repoHostName) + "created pgBackRest repository host %s/%s", repoHost.Kind, repoHostName) } return repoHost, nil @@ -2343,11 +2417,13 @@ func (r *Reconciler) reconcileManualBackup(ctx context.Context, return nil } - // determine if the dedicated repository host is ready using the repo host ready + // determine if the dedicated repository host is ready (if enabled) using the repo host ready // condition, and return if not - repoCondition := meta.FindStatusCondition(postgresCluster.Status.Conditions, ConditionRepoHostReady) - if repoCondition == nil || repoCondition.Status != metav1.ConditionTrue { - return nil + if pgbackrest.RepoHostVolumeDefined(postgresCluster) { + repoCondition := meta.FindStatusCondition(postgresCluster.Status.Conditions, ConditionRepoHostReady) + if repoCondition == nil || repoCondition.Status != metav1.ConditionTrue { + return nil + } } // Determine if the replica create backup is complete and return if not. This allows for proper @@ -2413,7 +2489,7 @@ func (r *Reconciler) reconcileManualBackup(ctx context.Context, backupJob := &batchv1.Job{} backupJob.ObjectMeta = naming.PGBackRestBackupJob(postgresCluster) if currentBackupJob != nil { - backupJob.ObjectMeta.Name = currentBackupJob.ObjectMeta.Name + backupJob.Name = currentBackupJob.Name } var labels, annotations map[string]string @@ -2426,10 +2502,10 @@ func (r *Reconciler) reconcileManualBackup(ctx context.Context, map[string]string{ naming.PGBackRestBackup: manualAnnotation, }) - backupJob.ObjectMeta.Labels = labels - backupJob.ObjectMeta.Annotations = annotations + backupJob.Labels = labels + backupJob.Annotations = annotations - spec := generateBackupJobSpecIntent(ctx, postgresCluster, repo, + spec := r.generateBackupJobSpecIntent(ctx, postgresCluster, repo, serviceAccount.GetName(), labels, annotations, backupOpts...) backupJob.Spec = *spec @@ -2518,6 +2594,17 @@ func (r *Reconciler) reconcileReplicaCreateBackup(ctx context.Context, replicaRepoReady = (condition.Status == metav1.ConditionTrue) } + // TODO: Since we now only exec into the repo host when backing up to a local volume and + // run the backup in the job pod when backing up to a cloud-based repo, we should consider + // using a different value than the container name for the "pgbackrest-config" annotation + // that we attach to these backups + var containerName string + if replicaCreateRepo.Volume != nil { + containerName = naming.PGBackRestRepoContainerName + } else { + containerName = naming.ContainerDatabase + } + // determine if the dedicated repository host is ready using the repo host ready status var dedicatedRepoReady bool condition = meta.FindStatusCondition(postgresCluster.Status.Conditions, ConditionRepoHostReady) @@ -2544,10 +2631,14 @@ func (r *Reconciler) reconcileReplicaCreateBackup(ctx context.Context, // - The job has failed. The Job will be deleted and recreated to try again. // - The replica creation repo has changed since the Job was created. Delete and recreate // with the Job with the proper repo configured. + // - The "config" annotation has changed, indicating there is a new primary. Delete and + // recreate the Job with the proper config mounted (applicable when a dedicated repo + // host is not enabled). // - The "config hash" annotation has changed, indicating a configuration change has been // made in the spec (specifically a change to the config for an external repo). Delete // and recreate the Job with proper hash per the current config. if failed || replicaCreateRepoChanged || + (job.GetAnnotations()[naming.PGBackRestCurrentConfig] != containerName) || (job.GetAnnotations()[naming.PGBackRestConfigHash] != configHash) { if err := r.Client.Delete(ctx, job, client.PropagationPolicy(metav1.DeletePropagationBackground)); err != nil { @@ -2565,7 +2656,8 @@ func (r *Reconciler) reconcileReplicaCreateBackup(ctx context.Context, // return if no job has been created and the replica repo or the dedicated // repo host is not ready - if job == nil && (!dedicatedRepoReady || !replicaRepoReady) { + if job == nil && ((pgbackrest.RepoHostVolumeDefined(postgresCluster) && !dedicatedRepoReady) || + !replicaRepoReady) { return nil } @@ -2573,7 +2665,7 @@ func (r *Reconciler) reconcileReplicaCreateBackup(ctx context.Context, backupJob := &batchv1.Job{} backupJob.ObjectMeta = naming.PGBackRestBackupJob(postgresCluster) if job != nil { - backupJob.ObjectMeta.Name = job.ObjectMeta.Name + backupJob.Name = job.Name } var labels, annotations map[string]string @@ -2584,12 +2676,13 @@ func (r *Reconciler) reconcileReplicaCreateBackup(ctx context.Context, annotations = naming.Merge(postgresCluster.Spec.Metadata.GetAnnotationsOrNil(), postgresCluster.Spec.Backups.PGBackRest.Metadata.GetAnnotationsOrNil(), map[string]string{ - naming.PGBackRestConfigHash: configHash, + naming.PGBackRestCurrentConfig: containerName, + naming.PGBackRestConfigHash: configHash, }) - backupJob.ObjectMeta.Labels = labels - backupJob.ObjectMeta.Annotations = annotations + backupJob.Labels = labels + backupJob.Annotations = annotations - spec := generateBackupJobSpecIntent(ctx, postgresCluster, replicaCreateRepo, + spec := r.generateBackupJobSpecIntent(ctx, postgresCluster, replicaCreateRepo, serviceAccount.GetName(), labels, annotations) backupJob.Spec = *spec @@ -2754,7 +2847,7 @@ func (r *Reconciler) reconcileStanzaCreate(ctx context.Context, } // Don't record event or return an error if configHashMismatch is true, since this just means // configuration changes in ConfigMaps/Secrets have not yet propagated to the container. - // Therefore, just log an an info message and return an error to requeue and try again. + // Therefore, just log an info message and return an error to requeue and try again. if configHashMismatch { return true, nil @@ -3016,7 +3109,7 @@ func (r *Reconciler) reconcilePGBackRestCronJob( // set backup type (i.e. "full", "diff", "incr") backupOpts := []string{"--type=" + backupType} - jobSpec := generateBackupJobSpecIntent(ctx, cluster, repo, + jobSpec := r.generateBackupJobSpecIntent(ctx, cluster, repo, serviceAccount.GetName(), labels, annotations, backupOpts...) // Suspend cronjobs when shutdown or read-only. Any jobs that have already diff --git a/internal/controller/postgrescluster/pgbackrest_test.go b/internal/controller/postgrescluster/pgbackrest_test.go index b63120b719..5e5e43737c 100644 --- a/internal/controller/postgrescluster/pgbackrest_test.go +++ b/internal/controller/postgrescluster/pgbackrest_test.go @@ -40,6 +40,7 @@ import ( "github.com/crunchydata/postgres-operator/internal/pgbackrest" "github.com/crunchydata/postgres-operator/internal/pki" "github.com/crunchydata/postgres-operator/internal/testing/cmp" + "github.com/crunchydata/postgres-operator/internal/testing/events" "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -463,9 +464,10 @@ topologySpreadConstraints: var instanceConfFound, dedicatedRepoConfFound bool for k, v := range config.Data { if v != "" { - if k == pgbackrest.CMInstanceKey { + switch k { + case pgbackrest.CMInstanceKey: instanceConfFound = true - } else if k == pgbackrest.CMRepoKey { + case pgbackrest.CMRepoKey: dedicatedRepoConfFound = true } } @@ -962,7 +964,7 @@ func TestReconcileReplicaCreateBackup(t *testing.T) { var foundOwnershipRef bool // verify ownership refs - for _, ref := range backupJob.ObjectMeta.GetOwnerReferences() { + for _, ref := range backupJob.GetOwnerReferences() { if ref.Name == clusterName { foundOwnershipRef = true break @@ -970,13 +972,17 @@ func TestReconcileReplicaCreateBackup(t *testing.T) { } assert.Assert(t, foundOwnershipRef) - var foundHashAnnotation bool + var foundConfigAnnotation, foundHashAnnotation bool // verify annotations for k, v := range backupJob.GetAnnotations() { + if k == naming.PGBackRestCurrentConfig && v == naming.PGBackRestRepoContainerName { + foundConfigAnnotation = true + } if k == naming.PGBackRestConfigHash && v == configHash { foundHashAnnotation = true } } + assert.Assert(t, foundConfigAnnotation) assert.Assert(t, foundHashAnnotation) // verify container & env vars @@ -1674,7 +1680,14 @@ func TestGetPGBackRestResources(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Labels: naming.PGBackRestDedicatedLabels(clusterName), }, - Spec: corev1.PodSpec{}, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "some-container", + Image: "some-image", + }, + }, + }, }, }, }, @@ -1697,11 +1710,11 @@ func TestGetPGBackRestResources(t *testing.T) { jobCount: 0, pvcCount: 0, hostCount: 1, }, }, { - desc: "no dedicated repo host defined, dedicated sts not deleted", + desc: "no dedicated repo host defined delete dedicated sts", createResources: []client.Object{ &appsv1.StatefulSet{ ObjectMeta: metav1.ObjectMeta{ - Name: "keep-dedicated-two", + Name: "delete-dedicated", Namespace: namespace, Labels: naming.PGBackRestDedicatedLabels(clusterName), }, @@ -1712,7 +1725,14 @@ func TestGetPGBackRestResources(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Labels: naming.PGBackRestDedicatedLabels(clusterName), }, - Spec: corev1.PodSpec{}, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "some-container", + Image: "some-image", + }, + }, + }, }, }, }, @@ -1730,8 +1750,50 @@ func TestGetPGBackRestResources(t *testing.T) { }, }, result: testResult{ - // Host count is 2 due to previous repo host sts not being deleted. - jobCount: 0, pvcCount: 0, hostCount: 2, + jobCount: 0, pvcCount: 0, hostCount: 0, + }, + }, { + desc: "no repo host defined delete dedicated sts", + createResources: []client.Object{ + &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "delete-dedicated-no-repo-host", + Namespace: namespace, + Labels: naming.PGBackRestDedicatedLabels(clusterName), + }, + Spec: appsv1.StatefulSetSpec{ + Selector: metav1.SetAsLabelSelector( + naming.PGBackRestDedicatedLabels(clusterName)), + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: naming.PGBackRestDedicatedLabels(clusterName), + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "some-container", + Image: "some-image", + }, + }, + }, + }, + }, + }, + }, + cluster: &v1beta1.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: namespace, + UID: types.UID(clusterUID), + }, + Spec: v1beta1.PostgresClusterSpec{ + Backups: v1beta1.Backups{ + PGBackRest: v1beta1.PGBackRestArchive{}, + }, + }, + }, + result: testResult{ + jobCount: 0, pvcCount: 0, hostCount: 0, }, }} @@ -1777,6 +1839,9 @@ func TestReconcilePostgresClusterDataSource(t *testing.T) { configCount, jobCount, pvcCount int invalidSourceRepo, invalidSourceCluster, invalidOptions bool expectedClusterCondition *metav1.Condition + expectedEventMessage string + expectedCommandPieces []string + missingCommandPieces []string } for _, dedicated := range []bool{true, false} { @@ -1799,6 +1864,8 @@ func TestReconcilePostgresClusterDataSource(t *testing.T) { configCount: 1, jobCount: 1, pvcCount: 1, invalidSourceRepo: false, invalidSourceCluster: false, invalidOptions: false, expectedClusterCondition: nil, + expectedCommandPieces: []string{"--stanza=", "--pg1-path=", "--repo=", "--delta"}, + missingCommandPieces: []string{"--target-action"}, }, }, { desc: "invalid source cluster", @@ -1812,6 +1879,7 @@ func TestReconcilePostgresClusterDataSource(t *testing.T) { configCount: 0, jobCount: 0, pvcCount: 0, invalidSourceRepo: false, invalidSourceCluster: true, invalidOptions: false, expectedClusterCondition: nil, + expectedEventMessage: "does not exist", }, }, { desc: "invalid source repo", @@ -1825,6 +1893,7 @@ func TestReconcilePostgresClusterDataSource(t *testing.T) { configCount: 1, jobCount: 0, pvcCount: 0, invalidSourceRepo: true, invalidSourceCluster: false, invalidOptions: false, expectedClusterCondition: nil, + expectedEventMessage: "does not have a repo named", }, }, { desc: "invalid option: --repo=", @@ -1839,6 +1908,7 @@ func TestReconcilePostgresClusterDataSource(t *testing.T) { configCount: 1, jobCount: 0, pvcCount: 1, invalidSourceRepo: false, invalidSourceCluster: false, invalidOptions: true, expectedClusterCondition: nil, + expectedEventMessage: "Option '--repo' is not allowed: please use the 'repoName' field instead.", }, }, { desc: "invalid option: --repo ", @@ -1853,6 +1923,7 @@ func TestReconcilePostgresClusterDataSource(t *testing.T) { configCount: 1, jobCount: 0, pvcCount: 1, invalidSourceRepo: false, invalidSourceCluster: false, invalidOptions: true, expectedClusterCondition: nil, + expectedEventMessage: "Option '--repo' is not allowed: please use the 'repoName' field instead.", }, }, { desc: "invalid option: stanza", @@ -1867,6 +1938,7 @@ func TestReconcilePostgresClusterDataSource(t *testing.T) { configCount: 1, jobCount: 0, pvcCount: 1, invalidSourceRepo: false, invalidSourceCluster: false, invalidOptions: true, expectedClusterCondition: nil, + expectedEventMessage: "Option '--stanza' is not allowed: the operator will automatically set this option", }, }, { desc: "invalid option: pg1-path", @@ -1881,6 +1953,68 @@ func TestReconcilePostgresClusterDataSource(t *testing.T) { configCount: 1, jobCount: 0, pvcCount: 1, invalidSourceRepo: false, invalidSourceCluster: false, invalidOptions: true, expectedClusterCondition: nil, + expectedEventMessage: "Option '--pg1-path' is not allowed: the operator will automatically set this option", + }, + }, { + desc: "invalid option: target-action", + dataSource: &v1beta1.DataSource{PostgresCluster: &v1beta1.PostgresClusterDataSource{ + ClusterName: "invalid-target-action-option", RepoName: "repo1", + Options: []string{"--target-action"}, + }}, + clusterBootstrapped: false, + sourceClusterName: "invalid-target-action-option", + sourceClusterRepos: []v1beta1.PGBackRestRepo{{Name: "repo1"}}, + result: testResult{ + configCount: 1, jobCount: 0, pvcCount: 1, + invalidSourceRepo: false, invalidSourceCluster: false, invalidOptions: true, + expectedClusterCondition: nil, + expectedEventMessage: "Option '--target-action' is not allowed: the operator will automatically set this option", + }, + }, { + desc: "invalid option: link-map", + dataSource: &v1beta1.DataSource{PostgresCluster: &v1beta1.PostgresClusterDataSource{ + ClusterName: "invalid-link-map-option", RepoName: "repo1", + Options: []string{"--link-map"}, + }}, + clusterBootstrapped: false, + sourceClusterName: "invalid-link-map-option", + sourceClusterRepos: []v1beta1.PGBackRestRepo{{Name: "repo1"}}, + result: testResult{ + configCount: 1, jobCount: 0, pvcCount: 1, + invalidSourceRepo: false, invalidSourceCluster: false, invalidOptions: true, + expectedClusterCondition: nil, + expectedEventMessage: "Option '--link-map' is not allowed: the operator will automatically set this option", + }, + }, { + desc: "valid option: target-timeline", + dataSource: &v1beta1.DataSource{PostgresCluster: &v1beta1.PostgresClusterDataSource{ + ClusterName: "valid-target-timeline-option", RepoName: "repo1", + Options: []string{"--target-timeline=1"}, + }}, + clusterBootstrapped: false, + sourceClusterName: "valid-target-timeline-option", + sourceClusterRepos: []v1beta1.PGBackRestRepo{{Name: "repo1"}}, + result: testResult{ + configCount: 1, jobCount: 1, pvcCount: 1, + invalidSourceRepo: false, invalidSourceCluster: false, invalidOptions: false, + expectedClusterCondition: nil, + expectedCommandPieces: []string{"--stanza=", "--pg1-path=", "--repo=", "--delta", "--target-timeline=1"}, + missingCommandPieces: []string{"--target=", "--target-action=promote"}, + }, + }, { + desc: "valid option: target", + dataSource: &v1beta1.DataSource{PostgresCluster: &v1beta1.PostgresClusterDataSource{ + ClusterName: "valid-target-option", RepoName: "repo1", + Options: []string{"--target=some-date"}, + }}, + clusterBootstrapped: false, + sourceClusterName: "valid-target-option", + sourceClusterRepos: []v1beta1.PGBackRestRepo{{Name: "repo1"}}, + result: testResult{ + configCount: 1, jobCount: 1, pvcCount: 1, + invalidSourceRepo: false, invalidSourceCluster: false, invalidOptions: false, + expectedClusterCondition: nil, + expectedCommandPieces: []string{"--stanza=", "--pg1-path=", "--repo=", "--delta", "--target=some-date", "--target-action=promote"}, }, }, { desc: "cluster bootstrapped init condition missing", @@ -2003,6 +2137,16 @@ func TestReconcilePostgresClusterDataSource(t *testing.T) { if len(restoreJobs.Items) == 1 { assert.Assert(t, restoreJobs.Items[0].Labels[naming.LabelStartupInstance] != "") assert.Assert(t, restoreJobs.Items[0].Annotations[naming.PGBackRestConfigHash] != "") + for _, cmd := range tc.result.expectedCommandPieces { + assert.Assert(t, cmp.Contains( + strings.Join(restoreJobs.Items[0].Spec.Template.Spec.Containers[0].Command, " "), + cmd)) + } + for _, cmd := range tc.result.missingCommandPieces { + assert.Assert(t, !strings.Contains( + strings.Join(restoreJobs.Items[0].Spec.Template.Spec.Containers[0].Command, " "), + cmd)) + } } dataPVCs := &corev1.PersistentVolumeClaimList{} @@ -2040,7 +2184,11 @@ func TestReconcilePostgresClusterDataSource(t *testing.T) { "involvedObject.namespace": namespace, "reason": "InvalidDataSource", }) - return len(events.Items) == 1, err + eventExists := len(events.Items) > 0 + if eventExists { + assert.Assert(t, cmp.Contains(events.Items[0].Message, tc.result.expectedEventMessage)) + } + return eventExists, err })) } }) @@ -2475,10 +2623,90 @@ func TestCopyConfigurationResources(t *testing.T) { } func TestGenerateBackupJobIntent(t *testing.T) { + _, cc := setupKubernetes(t) + require.ParallelCapacity(t, 0) + ns := setupNamespace(t, cc) + + r := &Reconciler{ + Client: cc, + Owner: ControllerName, + } + ctx := context.Background() + cluster := v1beta1.PostgresCluster{} + cluster.Name = "hippo-test" + cluster.Default() + + // If repo.Volume is nil, the code interprets this as a cloud repo backup, + // therefore, an "empty" input results in a job spec for a cloud repo backup t.Run("empty", func(t *testing.T) { - spec := generateBackupJobSpecIntent(ctx, - &v1beta1.PostgresCluster{}, v1beta1.PGBackRestRepo{}, + spec := r.generateBackupJobSpecIntent(ctx, + &cluster, v1beta1.PGBackRestRepo{}, + "", + nil, nil, + ) + assert.Assert(t, cmp.MarshalMatches(spec.Template.Spec, ` +containers: +- command: + - /bin/pgbackrest + - backup + - --stanza=db + - --repo= + name: pgbackrest + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + privileged: false + readOnlyRootFilesystem: true + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + volumeMounts: + - mountPath: /etc/pgbackrest/conf.d + name: pgbackrest-config + readOnly: true + - mountPath: /tmp + name: tmp +enableServiceLinks: false +restartPolicy: Never +securityContext: + fsGroup: 26 + fsGroupChangePolicy: OnRootMismatch +volumes: +- name: pgbackrest-config + projected: + sources: + - configMap: + items: + - key: pgbackrest_cloud.conf + path: pgbackrest_cloud.conf + name: hippo-test-pgbackrest-config + - secret: + items: + - key: pgbackrest.ca-roots + path: ~postgres-operator/tls-ca.crt + - key: pgbackrest-client.crt + path: ~postgres-operator/client-tls.crt + - key: pgbackrest-client.key + mode: 384 + path: ~postgres-operator/client-tls.key + name: hippo-test-pgbackrest +- emptyDir: + sizeLimit: 16Mi + name: tmp + `)) + }) + + t.Run("volumeRepo", func(t *testing.T) { + spec := r.generateBackupJobSpecIntent(ctx, + &cluster, v1beta1.PGBackRestRepo{ + Volume: &v1beta1.RepoPVC{ + VolumeClaimSpec: v1beta1.VolumeClaimSpec{}, + }, + }, "", nil, nil, ) @@ -2497,7 +2725,7 @@ containers: value: pgbackrest - name: NAMESPACE - name: SELECTOR - value: postgres-operator.crunchydata.com/cluster=,postgres-operator.crunchydata.com/pgbackrest=,postgres-operator.crunchydata.com/pgbackrest-dedicated= + value: postgres-operator.crunchydata.com/cluster=hippo-test,postgres-operator.crunchydata.com/pgbackrest=,postgres-operator.crunchydata.com/pgbackrest-dedicated= name: pgbackrest resources: {} securityContext: @@ -2530,7 +2758,7 @@ volumes: path: config-hash - key: pgbackrest-server.conf path: ~postgres-operator_server.conf - name: -pgbackrest-config + name: hippo-test-pgbackrest-config - secret: items: - key: pgbackrest.ca-roots @@ -2540,7 +2768,7 @@ volumes: - key: pgbackrest-client.key mode: 384 path: ~postgres-operator/client-tls.key - name: -pgbackrest + name: hippo-test-pgbackrest `)) }) @@ -2550,7 +2778,7 @@ volumes: ImagePullPolicy: corev1.PullAlways, }, } - job := generateBackupJobSpecIntent(ctx, + job := r.generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, @@ -2565,7 +2793,7 @@ volumes: cluster.Spec.Backups = v1beta1.Backups{ PGBackRest: v1beta1.PGBackRestArchive{}, } - job := generateBackupJobSpecIntent(ctx, + job := r.generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, @@ -2582,7 +2810,7 @@ volumes: }, }, } - job := generateBackupJobSpecIntent(ctx, + job := r.generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, @@ -2621,7 +2849,7 @@ volumes: }, }, } - job := generateBackupJobSpecIntent(ctx, + job := r.generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, @@ -2634,7 +2862,7 @@ volumes: cluster.Spec.Backups.PGBackRest.Jobs = &v1beta1.BackupJobs{ PriorityClassName: initialize.String("some-priority-class"), } - job := generateBackupJobSpecIntent(ctx, + job := r.generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, @@ -2652,7 +2880,7 @@ volumes: cluster.Spec.Backups.PGBackRest.Jobs = &v1beta1.BackupJobs{ Tolerations: tolerations, } - job := generateBackupJobSpecIntent(ctx, + job := r.generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, @@ -2666,14 +2894,14 @@ volumes: t.Run("Undefined", func(t *testing.T) { cluster.Spec.Backups.PGBackRest.Jobs = nil - spec := generateBackupJobSpecIntent(ctx, + spec := r.generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) assert.Assert(t, spec.TTLSecondsAfterFinished == nil) cluster.Spec.Backups.PGBackRest.Jobs = &v1beta1.BackupJobs{} - spec = generateBackupJobSpecIntent(ctx, + spec = r.generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) assert.Assert(t, spec.TTLSecondsAfterFinished == nil) @@ -2684,7 +2912,7 @@ volumes: TTLSecondsAfterFinished: initialize.Int32(0), } - spec := generateBackupJobSpecIntent(ctx, + spec := r.generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) if assert.Check(t, spec.TTLSecondsAfterFinished != nil) { @@ -2697,7 +2925,7 @@ volumes: TTLSecondsAfterFinished: initialize.Int32(100), } - spec := generateBackupJobSpecIntent(ctx, + spec := r.generateBackupJobSpecIntent(ctx, cluster, v1beta1.PGBackRestRepo{}, "", nil, nil, ) if assert.Check(t, spec.TTLSecondsAfterFinished != nil) { @@ -2705,6 +2933,164 @@ volumes: } }) }) + + t.Run("CloudLogVolumeAnnotationNoPvc", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + r.Recorder = recorder + + cluster.Namespace = ns.Name + cluster.Annotations = map[string]string{} + cluster.Annotations[naming.PGBackRestCloudLogVolume] = "some-pvc" + spec := r.generateBackupJobSpecIntent(ctx, + &cluster, v1beta1.PGBackRestRepo{}, + "", + nil, nil, + ) + assert.Assert(t, cmp.MarshalMatches(spec.Template.Spec, ` +containers: +- command: + - /bin/pgbackrest + - backup + - --stanza=db + - --repo= + name: pgbackrest + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + privileged: false + readOnlyRootFilesystem: true + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + volumeMounts: + - mountPath: /etc/pgbackrest/conf.d + name: pgbackrest-config + readOnly: true + - mountPath: /tmp + name: tmp +enableServiceLinks: false +restartPolicy: Never +securityContext: + fsGroup: 26 + fsGroupChangePolicy: OnRootMismatch +volumes: +- name: pgbackrest-config + projected: + sources: + - configMap: + items: + - key: pgbackrest_cloud.conf + path: pgbackrest_cloud.conf + name: hippo-test-pgbackrest-config + - secret: + items: + - key: pgbackrest.ca-roots + path: ~postgres-operator/tls-ca.crt + - key: pgbackrest-client.crt + path: ~postgres-operator/client-tls.crt + - key: pgbackrest-client.key + mode: 384 + path: ~postgres-operator/client-tls.key + name: hippo-test-pgbackrest +- emptyDir: + sizeLimit: 16Mi + name: tmp + `)) + + assert.Equal(t, len(recorder.Events), 1) + assert.Equal(t, recorder.Events[0].Regarding.Name, cluster.Name) + assert.Equal(t, recorder.Events[0].Reason, "PGBackRestCloudLogVolumeNotFound") + assert.Equal(t, recorder.Events[0].Note, "persistentvolumeclaims \"some-pvc\" not found") + }) + + t.Run("CloudLogVolumeAnnotationPvcInPlace", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + r.Recorder = recorder + + cluster.Namespace = ns.Name + cluster.Annotations = map[string]string{} + cluster.Annotations[naming.PGBackRestCloudLogVolume] = "another-pvc" + + pvc := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "another-pvc", + Namespace: ns.Name, + }, + Spec: corev1.PersistentVolumeClaimSpec(testVolumeClaimSpec()), + } + err := r.Client.Create(ctx, pvc) + assert.NilError(t, err) + + spec := r.generateBackupJobSpecIntent(ctx, + &cluster, v1beta1.PGBackRestRepo{}, + "", + nil, nil, + ) + assert.Assert(t, cmp.MarshalMatches(spec.Template.Spec, ` +containers: +- command: + - /bin/pgbackrest + - backup + - --stanza=db + - --repo= + name: pgbackrest + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + privileged: false + readOnlyRootFilesystem: true + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + volumeMounts: + - mountPath: /etc/pgbackrest/conf.d + name: pgbackrest-config + readOnly: true + - mountPath: /tmp + name: tmp + - mountPath: /volumes/another-pvc + name: another-pvc +enableServiceLinks: false +restartPolicy: Never +securityContext: + fsGroup: 26 + fsGroupChangePolicy: OnRootMismatch +volumes: +- name: pgbackrest-config + projected: + sources: + - configMap: + items: + - key: pgbackrest_cloud.conf + path: pgbackrest_cloud.conf + name: hippo-test-pgbackrest-config + - secret: + items: + - key: pgbackrest.ca-roots + path: ~postgres-operator/tls-ca.crt + - key: pgbackrest-client.crt + path: ~postgres-operator/client-tls.crt + - key: pgbackrest-client.key + mode: 384 + path: ~postgres-operator/client-tls.key + name: hippo-test-pgbackrest +- emptyDir: + sizeLimit: 16Mi + name: tmp +- name: another-pvc + persistentVolumeClaim: + claimName: another-pvc + `)) + + // No events created + assert.Equal(t, len(recorder.Events), 0) + }) } func TestGenerateRepoHostIntent(t *testing.T) { @@ -2842,11 +3228,11 @@ func TestGenerateRestoreJobIntent(t *testing.T) { t.Run(fmt.Sprintf("openshift-%v", openshift), func(t *testing.T) { t.Run("ObjectMeta", func(t *testing.T) { t.Run("Name", func(t *testing.T) { - assert.Equal(t, job.ObjectMeta.Name, + assert.Equal(t, job.Name, naming.PGBackRestRestoreJob(cluster).Name) }) t.Run("Namespace", func(t *testing.T) { - assert.Equal(t, job.ObjectMeta.Namespace, + assert.Equal(t, job.Namespace, naming.PGBackRestRestoreJob(cluster).Namespace) }) t.Run("Annotations", func(t *testing.T) { @@ -3069,15 +3455,15 @@ func TestObserveRestoreEnv(t *testing.T) { createResources: func(t *testing.T, cluster *v1beta1.PostgresCluster) { fakeLeaderEP := &corev1.Endpoints{} fakeLeaderEP.ObjectMeta = naming.PatroniLeaderEndpoints(cluster) - fakeLeaderEP.ObjectMeta.Namespace = namespace + fakeLeaderEP.Namespace = namespace assert.NilError(t, r.Client.Create(ctx, fakeLeaderEP)) fakeDCSEP := &corev1.Endpoints{} fakeDCSEP.ObjectMeta = naming.PatroniDistributedConfiguration(cluster) - fakeDCSEP.ObjectMeta.Namespace = namespace + fakeDCSEP.Namespace = namespace assert.NilError(t, r.Client.Create(ctx, fakeDCSEP)) fakeFailoverEP := &corev1.Endpoints{} fakeFailoverEP.ObjectMeta = naming.PatroniTrigger(cluster) - fakeFailoverEP.ObjectMeta.Namespace = namespace + fakeFailoverEP.Namespace = namespace assert.NilError(t, r.Client.Create(ctx, fakeFailoverEP)) job := generateJob(cluster.Name, initialize.Bool(false), initialize.Bool(false)) @@ -3093,15 +3479,15 @@ func TestObserveRestoreEnv(t *testing.T) { createResources: func(t *testing.T, cluster *v1beta1.PostgresCluster) { fakeLeaderEP := &corev1.Endpoints{} fakeLeaderEP.ObjectMeta = naming.PatroniLeaderEndpoints(cluster) - fakeLeaderEP.ObjectMeta.Namespace = namespace + fakeLeaderEP.Namespace = namespace assert.NilError(t, r.Client.Create(ctx, fakeLeaderEP)) fakeDCSEP := &corev1.Endpoints{} fakeDCSEP.ObjectMeta = naming.PatroniDistributedConfiguration(cluster) - fakeDCSEP.ObjectMeta.Namespace = namespace + fakeDCSEP.Namespace = namespace assert.NilError(t, r.Client.Create(ctx, fakeDCSEP)) fakeFailoverEP := &corev1.Endpoints{} fakeFailoverEP.ObjectMeta = naming.PatroniTrigger(cluster) - fakeFailoverEP.ObjectMeta.Namespace = namespace + fakeFailoverEP.Namespace = namespace assert.NilError(t, r.Client.Create(ctx, fakeFailoverEP)) }, result: testResult{ @@ -3271,15 +3657,15 @@ func TestPrepareForRestore(t *testing.T) { cluster *v1beta1.PostgresCluster) (*batchv1.Job, []corev1.Endpoints) { fakeLeaderEP := corev1.Endpoints{} fakeLeaderEP.ObjectMeta = naming.PatroniLeaderEndpoints(cluster) - fakeLeaderEP.ObjectMeta.Namespace = namespace + fakeLeaderEP.Namespace = namespace assert.NilError(t, r.Client.Create(ctx, &fakeLeaderEP)) fakeDCSEP := corev1.Endpoints{} fakeDCSEP.ObjectMeta = naming.PatroniDistributedConfiguration(cluster) - fakeDCSEP.ObjectMeta.Namespace = namespace + fakeDCSEP.Namespace = namespace assert.NilError(t, r.Client.Create(ctx, &fakeDCSEP)) fakeFailoverEP := corev1.Endpoints{} fakeFailoverEP.ObjectMeta = naming.PatroniTrigger(cluster) - fakeFailoverEP.ObjectMeta.Namespace = namespace + fakeFailoverEP.Namespace = namespace assert.NilError(t, r.Client.Create(ctx, &fakeFailoverEP)) return nil, []corev1.Endpoints{fakeLeaderEP, fakeDCSEP, fakeFailoverEP} }, diff --git a/internal/controller/postgrescluster/pgbouncer.go b/internal/controller/postgrescluster/pgbouncer.go index 660572005a..671b284299 100644 --- a/internal/controller/postgrescluster/pgbouncer.go +++ b/internal/controller/postgrescluster/pgbouncer.go @@ -19,7 +19,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "github.com/crunchydata/postgres-operator/internal/collector" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/internal/naming" @@ -99,13 +98,11 @@ func (r *Reconciler) reconcilePGBouncerConfigMap( pgbouncer.ConfigMap(ctx, cluster, configmap) } // If OTel logging or metrics is enabled, add collector config - if otelConfig != nil && - (feature.Enabled(ctx, feature.OpenTelemetryLogs) || - feature.Enabled(ctx, feature.OpenTelemetryMetrics)) { + if collector.OpenTelemetryLogsOrMetricsEnabled(ctx, cluster) { err = collector.AddToConfigMap(ctx, otelConfig, configmap) } // If OTel logging is enabled, add logrotate config - if err == nil && otelConfig != nil && feature.Enabled(ctx, feature.OpenTelemetryLogs) { + if err == nil && collector.OpenTelemetryLogsEnabled(ctx, cluster) { logrotateConfig := collector.LogrotateConfig{ LogFiles: []string{naming.PGBouncerFullLogPath}, PostrotateScript: collector.PGBouncerPostRotateScript, diff --git a/internal/controller/postgrescluster/pgbouncer_test.go b/internal/controller/postgrescluster/pgbouncer_test.go index 3785a50695..6d389c3bad 100644 --- a/internal/controller/postgrescluster/pgbouncer_test.go +++ b/internal/controller/postgrescluster/pgbouncer_test.go @@ -105,12 +105,12 @@ ownerReferences: assert.Assert(t, specified) // Annotations present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Annotations, map[string]string{ + assert.DeepEqual(t, service.Annotations, map[string]string{ "a": "v1", }) // Labels present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Labels, map[string]string{ + assert.DeepEqual(t, service.Labels, map[string]string{ "b": "v2", "postgres-operator.crunchydata.com/cluster": "pg7", "postgres-operator.crunchydata.com/role": "pgbouncer", @@ -136,13 +136,13 @@ ownerReferences: assert.Assert(t, specified) // Annotations present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Annotations, map[string]string{ + assert.DeepEqual(t, service.Annotations, map[string]string{ "a": "v1", "c": "v3", }) // Labels present in the metadata. - assert.DeepEqual(t, service.ObjectMeta.Labels, map[string]string{ + assert.DeepEqual(t, service.Labels, map[string]string{ "b": "v2", "d": "v4", "postgres-operator.crunchydata.com/cluster": "pg7", @@ -420,12 +420,12 @@ namespace: ns3 assert.Assert(t, specified) // Annotations present in the metadata. - assert.DeepEqual(t, deploy.ObjectMeta.Annotations, map[string]string{ + assert.DeepEqual(t, deploy.Annotations, map[string]string{ "a": "v1", }) // Labels present in the metadata. - assert.DeepEqual(t, deploy.ObjectMeta.Labels, map[string]string{ + assert.DeepEqual(t, deploy.Labels, map[string]string{ "b": "v2", "postgres-operator.crunchydata.com/cluster": "test-cluster", "postgres-operator.crunchydata.com/role": "pgbouncer", diff --git a/internal/controller/postgrescluster/pgmonitor.go b/internal/controller/postgrescluster/pgmonitor.go index 84b955559a..37ded3ff66 100644 --- a/internal/controller/postgrescluster/pgmonitor.go +++ b/internal/controller/postgrescluster/pgmonitor.go @@ -16,6 +16,7 @@ import ( corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/config" "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" @@ -42,6 +43,7 @@ func (r *Reconciler) reconcilePGMonitorExporter(ctx context.Context, monitoringSecret *corev1.Secret) error { var ( + err error writableInstance *Instance writablePod *corev1.Pod setup string @@ -62,24 +64,12 @@ func (r *Reconciler) reconcilePGMonitorExporter(ctx context.Context, // the `EnableExporterInPostgreSQL` funcs; that way we are always running // that function against an updated and running pod. - if pgmonitor.ExporterEnabled(ctx, cluster) || feature.Enabled(ctx, feature.OpenTelemetryMetrics) { - sql, err := os.ReadFile(fmt.Sprintf("%s/pg%d/setup.sql", pgmonitor.GetQueriesConfigDir(ctx), cluster.Spec.PostgresVersion)) + if pgmonitor.ExporterEnabled(ctx, cluster) || collector.OpenTelemetryMetricsEnabled(ctx, cluster) { + setup, err = r.reconcileExporterSqlSetup(ctx, cluster) if err != nil { return err } - if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { - setup = metricsSetupForOTelCollector - } else { - // TODO: Revisit how pgbackrest_info.sh is used with pgMonitor. - // pgMonitor queries expect a path to a script that runs pgBackRest - // info and provides json output. In the queries yaml for pgBackRest - // the default path is `/usr/bin/pgbackrest-info.sh`. We update - // the path to point to the script in our database image. - setup = strings.ReplaceAll(string(sql), "/usr/bin/pgbackrest-info.sh", - "/opt/crunchy/bin/postgres/pgbackrest_info.sh") - } - for _, containerStatus := range writablePod.Status.ContainerStatuses { if containerStatus.Name == naming.ContainerDatabase { pgImageSHA = containerStatus.ImageID @@ -99,7 +89,7 @@ func (r *Reconciler) reconcilePGMonitorExporter(ctx context.Context, return pgmonitor.EnableExporterInPostgreSQL(ctx, exec, monitoringSecret, pgmonitor.ExporterDB, setup) } - if !pgmonitor.ExporterEnabled(ctx, cluster) && !feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if !pgmonitor.ExporterEnabled(ctx, cluster) && !collector.OpenTelemetryMetricsEnabled(ctx, cluster) { action = func(ctx context.Context, exec postgres.Executor) error { return pgmonitor.DisableMonitoringUserInPostgres(ctx, exec) } @@ -144,6 +134,47 @@ func (r *Reconciler) reconcilePGMonitorExporter(ctx context.Context, return err } +// reconcileExporterSqlSetup generates the setup.sql string based on +// whether the OTel metrics feature is enabled or not and the postgres +// version being used. This function assumes that at least one of +// OTel metrics or postgres_exporter are enabled. +func (r *Reconciler) reconcileExporterSqlSetup(ctx context.Context, + cluster *v1beta1.PostgresCluster) (string, error) { + + // If OTel Metrics is enabled we always want to use it. Otherwise, + // we can assume that postgres_exporter is enabled and we should + // use that + if collector.OpenTelemetryMetricsEnabled(ctx, cluster) { + return metricsSetupForOTelCollector, nil + } + + // pgMonitor will not be adding support for postgres_exporter for postgres + // versions past 17. If using postgres 18 or later with the postgres_exporter, + // create a warning event and set the sql setup to an empty string + pgVersion := cluster.Spec.PostgresVersion + if pgVersion > 17 { + r.Recorder.Eventf(cluster, corev1.EventTypeWarning, "ExporterNotSupportedForPostgresVersion", + "postgres_exporter not supported for pg%d; use OTel for postgres 18 and later", + pgVersion) + return "", nil + } + + // OTel Metrics is not enabled and postgres is version 17 or less, + // go ahead and read the appropriate sql file, format the string, + // and return it + sql, err := os.ReadFile(fmt.Sprintf("%s/pg%d/setup.sql", pgmonitor.GetQueriesConfigDir(ctx), pgVersion)) + if err != nil { + return "", err + } + // TODO: Revisit how pgbackrest_info.sh is used with pgMonitor. + // pgMonitor queries expect a path to a script that runs pgBackRest + // info and provides json output. In the queries yaml for pgBackRest + // the default path is `/usr/bin/pgbackrest-info.sh`. We update + // the path to point to the script in our database image. + return strings.ReplaceAll(string(sql), "/usr/bin/pgbackrest-info.sh", + "/opt/crunchy/bin/postgres/pgbackrest_info.sh"), nil +} + // reconcileMonitoringSecret reconciles the secret containing authentication // for monitoring tools func (r *Reconciler) reconcileMonitoringSecret( @@ -161,7 +192,7 @@ func (r *Reconciler) reconcileMonitoringSecret( // is enabled to determine when monitoring secret should be created, // since our implementation of the SqlQuery receiver in the OTel Collector // uses the monitoring user as well. - if !pgmonitor.ExporterEnabled(ctx, cluster) && !feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if !pgmonitor.ExporterEnabled(ctx, cluster) && !collector.OpenTelemetryMetricsEnabled(ctx, cluster) { if err == nil { err = errors.WithStack(r.deleteControlled(ctx, cluster, existing)) } @@ -234,7 +265,7 @@ func addPGMonitorExporterToInstancePodSpec( template *corev1.PodTemplateSpec, exporterQueriesConfig, exporterWebConfig *corev1.ConfigMap) { - if !pgmonitor.ExporterEnabled(ctx, cluster) || feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if !pgmonitor.ExporterEnabled(ctx, cluster) || collector.OpenTelemetryMetricsEnabled(ctx, cluster) { return } @@ -329,7 +360,7 @@ func addPGMonitorExporterToInstancePodSpec( }, }, } - configVolume.VolumeSource.Projected.Sources = append(configVolume.VolumeSource.Projected.Sources, + configVolume.Projected.Sources = append(configVolume.Projected.Sources, defaultConfigVolumeProjection) } @@ -374,7 +405,7 @@ func addPGMonitorExporterToInstancePodSpec( func (r *Reconciler) reconcileExporterWebConfig(ctx context.Context, cluster *v1beta1.PostgresCluster) (*corev1.ConfigMap, error) { - if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if collector.OpenTelemetryMetricsEnabled(ctx, cluster) { return nil, nil } @@ -384,7 +415,9 @@ func (r *Reconciler) reconcileExporterWebConfig(ctx context.Context, return nil, err } - if !pgmonitor.ExporterEnabled(ctx, cluster) || feature.Enabled(ctx, feature.OpenTelemetryMetrics) || cluster.Spec.Monitoring.PGMonitor.Exporter.CustomTLSSecret == nil { + if !pgmonitor.ExporterEnabled(ctx, cluster) || + collector.OpenTelemetryMetricsEnabled(ctx, cluster) || + cluster.Spec.Monitoring.PGMonitor.Exporter.CustomTLSSecret == nil { // We could still have a NotFound error here so check the err. // If no error that means the configmap is found and needs to be deleted if err == nil { @@ -441,7 +474,7 @@ func (r *Reconciler) reconcileExporterQueriesConfig(ctx context.Context, return nil, err } - if !pgmonitor.ExporterEnabled(ctx, cluster) || feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if !pgmonitor.ExporterEnabled(ctx, cluster) || collector.OpenTelemetryMetricsEnabled(ctx, cluster) { // We could still have a NotFound error here so check the err. // If no error that means the configmap is found and needs to be deleted if err == nil { diff --git a/internal/controller/postgrescluster/pgmonitor_test.go b/internal/controller/postgrescluster/pgmonitor_test.go index bf46dd204b..084ed01755 100644 --- a/internal/controller/postgrescluster/pgmonitor_test.go +++ b/internal/controller/postgrescluster/pgmonitor_test.go @@ -20,10 +20,12 @@ import ( "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/crunchydata/postgres-operator/internal/controller/runtime" "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/internal/testing/cmp" + "github.com/crunchydata/postgres-operator/internal/testing/events" "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -551,8 +553,7 @@ func TestReconcilePGMonitorExporter(t *testing.T) { observed := &observedInstances{forCluster: instances} called = false - assert.NilError(t, reconciler.reconcilePGMonitorExporter(ctx, - cluster, observed, nil)) + assert.NilError(t, reconciler.reconcilePGMonitorExporter(ctx, cluster, observed, nil)) assert.Assert(t, called, "PodExec was not called.") assert.Assert(t, cluster.Status.Monitoring.ExporterConfiguration != "", "ExporterConfiguration was empty.") }) @@ -598,7 +599,7 @@ func TestReconcilePGMonitorExporterStatus(t *testing.T) { podExecCalled: false, // Status was generated manually for this test case // TODO (jmckulk): add code to generate status - status: v1beta1.MonitoringStatus{ExporterConfiguration: "5c5f955485"}, + status: v1beta1.MonitoringStatus{ExporterConfiguration: "86cdc4f778"}, statusChangedAfterReconcile: false, }} { t.Run(test.name, func(t *testing.T) { @@ -830,6 +831,147 @@ func TestReconcileExporterQueriesConfig(t *testing.T) { actual, err = reconciler.reconcileExporterQueriesConfig(ctx, cluster) assert.NilError(t, err) assert.Assert(t, actual.Data["defaultQueries.yml"] == existing.Data["defaultQueries.yml"], "Data does not align.") + assert.Assert(t, actual.Data["defaultQueries.yml"] != "", "Data should not be empty.") + }) + + t.Run("Pg>17", func(t *testing.T) { + cluster.Spec.PostgresVersion = 18 + actual, err = reconciler.reconcileExporterQueriesConfig(ctx, cluster) + assert.NilError(t, err) + assert.Assert(t, actual.Data["defaultQueries.yml"] == "", "Data should be empty") }) }) } + +// TestReconcileExporterSqlSetup checks that the setup script returned +// by reconcileExporterSqlSetup is either empty or not depending on +// which exporter is enabled and what the postgres version is. +func TestReconcileExporterSqlSetup(t *testing.T) { + ctx := context.Background() + + monitoringSpec := &v1beta1.MonitoringSpec{ + PGMonitor: &v1beta1.PGMonitorSpec{ + Exporter: &v1beta1.ExporterSpec{ + Image: "image", + }, + }, + } + + instrumentationSpec := &v1beta1.InstrumentationSpec{ + Image: "image", + } + + testCases := []struct { + tcName string + postgresVersion int + exporterEnabled bool + otelMetricsEnabled bool + errorPresent bool + setupEmpty bool + expectedNumEvents int + expectedEvent string + }{{ + tcName: "ExporterEnabledOtelDisabled", + postgresVersion: 17, + exporterEnabled: true, + otelMetricsEnabled: false, + errorPresent: false, + setupEmpty: false, + expectedNumEvents: 0, + expectedEvent: "", + }, { + tcName: "ExporterDisabledOtelEnabled", + postgresVersion: 17, + exporterEnabled: false, + otelMetricsEnabled: true, + errorPresent: false, + setupEmpty: false, + expectedNumEvents: 0, + expectedEvent: "", + }, { + tcName: "BothEnabled", + postgresVersion: 17, + exporterEnabled: true, + otelMetricsEnabled: true, + errorPresent: false, + setupEmpty: false, + expectedNumEvents: 0, + expectedEvent: "", + }, { + tcName: "ExporterEnabledOtelDisabledPostgres18", + postgresVersion: 18, + exporterEnabled: true, + otelMetricsEnabled: false, + errorPresent: false, + setupEmpty: true, + expectedNumEvents: 1, + expectedEvent: "postgres_exporter not supported for pg18; use OTel for postgres 18 and later", + }, { + tcName: "ExporterDisabledOtelEnabledPostgres18", + postgresVersion: 18, + exporterEnabled: false, + otelMetricsEnabled: true, + errorPresent: false, + setupEmpty: false, + expectedNumEvents: 0, + expectedEvent: "", + }, { + tcName: "BothEnabledPostgres18", + postgresVersion: 18, + exporterEnabled: true, + otelMetricsEnabled: true, + errorPresent: false, + setupEmpty: false, + expectedNumEvents: 0, + expectedEvent: "", + }, { + tcName: "ExporterEnabledOtelDisabledBadPostgresVersion", + postgresVersion: 1, + exporterEnabled: true, + otelMetricsEnabled: false, + errorPresent: true, + setupEmpty: true, + expectedNumEvents: 0, + expectedEvent: "", + }} + + for _, tc := range testCases { + t.Run(tc.tcName, func(t *testing.T) { + cluster := testCluster() + cluster.Spec.PostgresVersion = tc.postgresVersion + + recorder := events.NewRecorder(t, runtime.Scheme) + r := &Reconciler{Recorder: recorder} + + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: tc.otelMetricsEnabled, + })) + ctx := feature.NewContext(ctx, gate) + + if tc.otelMetricsEnabled { + cluster.Spec.Instrumentation = instrumentationSpec + } + + if tc.exporterEnabled { + cluster.Spec.Monitoring = monitoringSpec + } + + setup, err := r.reconcileExporterSqlSetup(ctx, cluster) + if tc.errorPresent { + assert.Assert(t, err != nil) + } else { + assert.NilError(t, err) + } + assert.Equal(t, setup == "", tc.setupEmpty) + + assert.Equal(t, len(recorder.Events), tc.expectedNumEvents) + if tc.expectedNumEvents == 1 { + assert.Equal(t, recorder.Events[0].Regarding.Name, cluster.Name) + assert.Equal(t, recorder.Events[0].Reason, "ExporterNotSupportedForPostgresVersion") + assert.Equal(t, recorder.Events[0].Note, tc.expectedEvent) + assert.Equal(t, recorder.Events[0].Type, corev1.EventTypeWarning) + } + }) + } +} diff --git a/internal/controller/postgrescluster/pki.go b/internal/controller/postgrescluster/pki.go index 787daef212..d52d6a75da 100644 --- a/internal/controller/postgrescluster/pki.go +++ b/internal/controller/postgrescluster/pki.go @@ -63,7 +63,7 @@ func (r *Reconciler) reconcileRootCertificate( intent.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("Secret")) intent.Namespace, intent.Name = cluster.Namespace, naming.RootCertSecret intent.Data = make(map[string][]byte) - intent.ObjectMeta.OwnerReferences = existing.ObjectMeta.OwnerReferences + intent.OwnerReferences = existing.OwnerReferences // A root secret is scoped to the namespace where postgrescluster(s) // are deployed. For operator deployments with postgresclusters in more than @@ -140,7 +140,7 @@ func (r *Reconciler) reconcileClusterCertificate( intent := &corev1.Secret{ObjectMeta: naming.PostgresTLSSecret(cluster)} intent.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("Secret")) intent.Data = make(map[string][]byte) - intent.ObjectMeta.OwnerReferences = existing.ObjectMeta.OwnerReferences + intent.OwnerReferences = existing.OwnerReferences intent.Annotations = naming.Merge(cluster.Spec.Metadata.GetAnnotationsOrNil()) intent.Labels = naming.Merge( diff --git a/internal/controller/postgrescluster/pki_test.go b/internal/controller/postgrescluster/pki_test.go index a234292eb8..0cb5f15a99 100644 --- a/internal/controller/postgrescluster/pki_test.go +++ b/internal/controller/postgrescluster/pki_test.go @@ -89,7 +89,7 @@ func TestReconcileCerts(t *testing.T) { err := tClient.Get(ctx, client.ObjectKeyFromObject(rootSecret), rootSecret) assert.NilError(t, err) - assert.Check(t, len(rootSecret.ObjectMeta.OwnerReferences) == 1, "first owner reference not set") + assert.Check(t, len(rootSecret.OwnerReferences) == 1, "first owner reference not set") expectedOR := metav1.OwnerReference{ APIVersion: "postgres-operator.crunchydata.com/v1beta1", @@ -98,8 +98,8 @@ func TestReconcileCerts(t *testing.T) { UID: cluster1.UID, } - if len(rootSecret.ObjectMeta.OwnerReferences) > 0 { - assert.Equal(t, rootSecret.ObjectMeta.OwnerReferences[0], expectedOR) + if len(rootSecret.OwnerReferences) > 0 { + assert.Equal(t, rootSecret.OwnerReferences[0], expectedOR) } }) @@ -114,7 +114,7 @@ func TestReconcileCerts(t *testing.T) { clist := &v1beta1.PostgresClusterList{} assert.NilError(t, tClient.List(ctx, clist)) - assert.Check(t, len(rootSecret.ObjectMeta.OwnerReferences) == 2, "second owner reference not set") + assert.Check(t, len(rootSecret.OwnerReferences) == 2, "second owner reference not set") expectedOR := metav1.OwnerReference{ APIVersion: "postgres-operator.crunchydata.com/v1beta1", @@ -123,8 +123,8 @@ func TestReconcileCerts(t *testing.T) { UID: cluster2.UID, } - if len(rootSecret.ObjectMeta.OwnerReferences) > 1 { - assert.Equal(t, rootSecret.ObjectMeta.OwnerReferences[1], expectedOR) + if len(rootSecret.OwnerReferences) > 1 { + assert.Equal(t, rootSecret.OwnerReferences[1], expectedOR) } }) @@ -301,7 +301,7 @@ func TestReconcileCerts(t *testing.T) { testSecret := &corev1.Secret{} testSecret.Namespace, testSecret.Name = namespace, "newcustomsecret" // simulate cluster spec update - cluster2.Spec.CustomTLSSecret.LocalObjectReference.Name = "newcustomsecret" + cluster2.Spec.CustomTLSSecret.Name = "newcustomsecret" // get the expected secret projection testSecretProjection := clusterCertSecretProjection(testSecret) diff --git a/internal/controller/postgrescluster/postgres.go b/internal/controller/postgrescluster/postgres.go index 6351e18f84..10901e10dd 100644 --- a/internal/controller/postgrescluster/postgres.go +++ b/internal/controller/postgrescluster/postgres.go @@ -571,7 +571,7 @@ func (r *Reconciler) reconcilePostgresUserSecrets( // If both secrets have "pguser" or neither have "pguser", // sort by creation timestamp - return secrets.Items[i].CreationTimestamp.Time.After(secrets.Items[j].CreationTimestamp.Time) + return secrets.Items[i].CreationTimestamp.After(secrets.Items[j].CreationTimestamp.Time) }) // Index secrets by PostgreSQL user name and delete any that are not in the @@ -644,11 +644,11 @@ func (r *Reconciler) reconcilePostgresUsersInPostgreSQL( running, known := instance.IsRunning(container) if running && known && len(instance.Pods) > 0 { pod := instance.Pods[0] - ctx = logging.NewContext(ctx, logging.FromContext(ctx).WithValues("pod", pod.Name)) podExecutor = func( ctx context.Context, stdin io.Reader, stdout, stderr io.Writer, command ...string, ) error { + ctx = logging.NewContext(ctx, logging.FromContext(ctx).WithValues("pod", pod.Name)) return r.PodExec(ctx, pod.Namespace, pod.Name, container, stdin, stdout, stderr, command...) } break diff --git a/internal/controller/postgrescluster/postgres_test.go b/internal/controller/postgrescluster/postgres_test.go index db33e7f074..e1a1a5da0f 100644 --- a/internal/controller/postgrescluster/postgres_test.go +++ b/internal/controller/postgrescluster/postgres_test.go @@ -9,6 +9,8 @@ import ( "errors" "fmt" "io" + "os" + "strings" "testing" "github.com/go-logr/logr/funcr" @@ -526,6 +528,9 @@ volumeMode: Filesystem }) t.Run("DataVolumeSourceClusterWithGoodSnapshot", func(t *testing.T) { + if strings.EqualFold(os.Getenv("USE_EXISTING_CLUSTER"), "true") { + t.Skip("requires mocking of Job conditions") + } cluster := testCluster() ns := setupNamespace(t, tClient) cluster.Namespace = ns.Name diff --git a/internal/controller/postgrescluster/snapshots.go b/internal/controller/postgrescluster/snapshots.go index 8f36cefdfc..ff00928d6b 100644 --- a/internal/controller/postgrescluster/snapshots.go +++ b/internal/controller/postgrescluster/snapshots.go @@ -10,6 +10,7 @@ import ( "strings" "time" + volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" "github.com/pkg/errors" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" @@ -17,8 +18,6 @@ import ( "k8s.io/apimachinery/pkg/labels" "sigs.k8s.io/controller-runtime/pkg/client" - volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" - "github.com/crunchydata/postgres-operator/internal/config" "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" diff --git a/internal/controller/postgrescluster/snapshots_test.go b/internal/controller/postgrescluster/snapshots_test.go index 4c0ea36761..8c9dd7ece4 100644 --- a/internal/controller/postgrescluster/snapshots_test.go +++ b/internal/controller/postgrescluster/snapshots_test.go @@ -6,9 +6,12 @@ package postgrescluster import ( "context" + "os" + "strings" "testing" "time" + volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" "gotest.tools/v3/assert" appsv1 "k8s.io/api/apps/v1" batchv1 "k8s.io/api/batch/v1" @@ -27,8 +30,6 @@ import ( "github.com/crunchydata/postgres-operator/internal/testing/events" "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" - - volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" ) func TestReconcileVolumeSnapshots(t *testing.T) { @@ -58,7 +59,7 @@ func TestReconcileVolumeSnapshots(t *testing.T) { // Create cluster (without snapshots spec) cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" assert.NilError(t, r.Client.Create(ctx, cluster)) t.Cleanup(func() { assert.Check(t, r.Client.Delete(ctx, cluster)) }) @@ -181,6 +182,9 @@ func TestReconcileVolumeSnapshots(t *testing.T) { }) t.Run("SnapshotsEnabledReadySnapshotsExist", func(t *testing.T) { + if strings.EqualFold(os.Getenv("USE_EXISTING_CLUSTER"), "true") { + t.Skip("requires mocking of Job conditions") + } // Create a volume snapshot class volumeSnapshotClassName := "my-snapshotclass" volumeSnapshotClass := &volumesnapshotv1.VolumeSnapshotClass{ @@ -195,7 +199,7 @@ func TestReconcileVolumeSnapshots(t *testing.T) { // Create a cluster with snapshots enabled cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" cluster.Spec.Backups.Snapshots = &v1beta1.VolumeSnapshots{ VolumeSnapshotClassName: volumeSnapshotClassName, } @@ -310,7 +314,7 @@ func TestReconcileVolumeSnapshots(t *testing.T) { // Create a cluster with snapshots enabled cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" cluster.Spec.Backups.Snapshots = &v1beta1.VolumeSnapshots{ VolumeSnapshotClassName: volumeSnapshotClassName, } @@ -369,7 +373,7 @@ func TestReconcileDedicatedSnapshotVolume(t *testing.T) { ns := setupNamespace(t, cc) cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" assert.NilError(t, r.Client.Create(ctx, cluster)) t.Cleanup(func() { assert.Check(t, r.Client.Delete(ctx, cluster)) }) @@ -427,7 +431,7 @@ func TestReconcileDedicatedSnapshotVolume(t *testing.T) { ns := setupNamespace(t, cc) cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" cluster.Spec.Backups.Snapshots = &v1beta1.VolumeSnapshots{ VolumeSnapshotClassName: "my-snapshotclass", } @@ -455,11 +459,14 @@ func TestReconcileDedicatedSnapshotVolume(t *testing.T) { }) t.Run("SnapshotsEnabledBackupExistsCreateRestore", func(t *testing.T) { + if strings.EqualFold(os.Getenv("USE_EXISTING_CLUSTER"), "true") { + t.Skip("requires mocking of Job conditions") + } // Create cluster with snapshots enabled ns := setupNamespace(t, cc) cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" cluster.Spec.Backups.Snapshots = &v1beta1.VolumeSnapshots{ VolumeSnapshotClassName: "my-snapshotclass", } @@ -500,11 +507,14 @@ func TestReconcileDedicatedSnapshotVolume(t *testing.T) { }) t.Run("SnapshotsEnabledSuccessfulRestoreExists", func(t *testing.T) { + if strings.EqualFold(os.Getenv("USE_EXISTING_CLUSTER"), "true") { + t.Skip("requires mocking of Job conditions") + } // Create cluster with snapshots enabled ns := setupNamespace(t, cc) cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" cluster.Spec.Backups.Snapshots = &v1beta1.VolumeSnapshots{ VolumeSnapshotClassName: "my-snapshotclass", } @@ -562,11 +572,14 @@ func TestReconcileDedicatedSnapshotVolume(t *testing.T) { }) t.Run("SnapshotsEnabledFailedRestoreExists", func(t *testing.T) { + if strings.EqualFold(os.Getenv("USE_EXISTING_CLUSTER"), "true") { + t.Skip("requires mocking of Job conditions") + } // Create cluster with snapshots enabled ns := setupNamespace(t, cc) cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" cluster.Spec.Backups.Snapshots = &v1beta1.VolumeSnapshots{ VolumeSnapshotClassName: "my-snapshotclass", } @@ -633,7 +646,7 @@ func TestCreateDedicatedSnapshotVolume(t *testing.T) { ns := setupNamespace(t, cc) cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" labelMap := map[string]string{ naming.LabelCluster: cluster.Name, @@ -661,7 +674,7 @@ func TestDedicatedSnapshotVolumeRestore(t *testing.T) { ns := setupNamespace(t, cc) cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" pvc := &corev1.PersistentVolumeClaim{ ObjectMeta: metav1.ObjectMeta{ @@ -747,7 +760,7 @@ func TestGenerateVolumeSnapshot(t *testing.T) { assert.Equal(t, *snapshot.Spec.VolumeSnapshotClassName, "my-snapshot") assert.Equal(t, *snapshot.Spec.Source.PersistentVolumeClaimName, "dedicated-snapshot-volume") assert.Equal(t, snapshot.Labels[naming.LabelCluster], "hippo") - assert.Equal(t, snapshot.ObjectMeta.OwnerReferences[0].Name, "hippo") + assert.Equal(t, snapshot.OwnerReferences[0].Name, "hippo") } func TestGetDedicatedSnapshotVolumeRestoreJob(t *testing.T) { @@ -838,6 +851,9 @@ func TestGetLatestCompleteBackupJob(t *testing.T) { }) t.Run("OneCompleteBackupJob", func(t *testing.T) { + if strings.EqualFold(os.Getenv("USE_EXISTING_CLUSTER"), "true") { + t.Skip("requires mocking of Job conditions") + } currentTime := metav1.Now() currentStartTime := metav1.NewTime(currentTime.AddDate(0, 0, -1)) @@ -865,6 +881,9 @@ func TestGetLatestCompleteBackupJob(t *testing.T) { }) t.Run("TwoCompleteBackupJobs", func(t *testing.T) { + if strings.EqualFold(os.Getenv("USE_EXISTING_CLUSTER"), "true") { + t.Skip("requires mocking of Job conditions") + } currentTime := metav1.Now() currentStartTime := metav1.NewTime(currentTime.AddDate(0, 0, -1)) earlierTime := metav1.NewTime(currentTime.AddDate(-1, 0, 0)) @@ -962,7 +981,7 @@ func TestGetSnapshotWithLatestError(t *testing.T) { }, } snapshotWithLatestError := getSnapshotWithLatestError(snapshots) - assert.Equal(t, snapshotWithLatestError.ObjectMeta.Name, "bad-snapshot") + assert.Equal(t, snapshotWithLatestError.Name, "bad-snapshot") }) t.Run("TwoSnapshotsWithErrors", func(t *testing.T) { @@ -995,7 +1014,7 @@ func TestGetSnapshotWithLatestError(t *testing.T) { }, } snapshotWithLatestError := getSnapshotWithLatestError(snapshots) - assert.Equal(t, snapshotWithLatestError.ObjectMeta.Name, "second-bad-snapshot") + assert.Equal(t, snapshotWithLatestError.Name, "second-bad-snapshot") }) } @@ -1185,7 +1204,7 @@ func TestGetLatestReadySnapshot(t *testing.T) { }, } latestReadySnapshot := getLatestReadySnapshot(snapshots) - assert.Equal(t, latestReadySnapshot.ObjectMeta.Name, "good-snapshot") + assert.Equal(t, latestReadySnapshot.Name, "good-snapshot") }) t.Run("TwoReadySnapshots", func(t *testing.T) { @@ -1214,7 +1233,7 @@ func TestGetLatestReadySnapshot(t *testing.T) { }, } latestReadySnapshot := getLatestReadySnapshot(snapshots) - assert.Equal(t, latestReadySnapshot.ObjectMeta.Name, "second-good-snapshot") + assert.Equal(t, latestReadySnapshot.Name, "second-good-snapshot") }) } @@ -1230,13 +1249,13 @@ func TestDeleteSnapshots(t *testing.T) { cluster := testCluster() cluster.Namespace = ns.Name - cluster.ObjectMeta.UID = "the-uid-123" + cluster.UID = "the-uid-123" assert.NilError(t, r.Client.Create(ctx, cluster)) rhinoCluster := testCluster() rhinoCluster.Name = "rhino" rhinoCluster.Namespace = ns.Name - rhinoCluster.ObjectMeta.UID = "the-uid-456" + rhinoCluster.UID = "the-uid-456" assert.NilError(t, r.Client.Create(ctx, rhinoCluster)) t.Cleanup(func() { diff --git a/internal/controller/postgrescluster/suite_test.go b/internal/controller/postgrescluster/suite_test.go index b9f80df2f9..7918523d0a 100644 --- a/internal/controller/postgrescluster/suite_test.go +++ b/internal/controller/postgrescluster/suite_test.go @@ -14,9 +14,7 @@ import ( . "github.com/onsi/gomega" "k8s.io/apimachinery/pkg/util/version" "k8s.io/client-go/discovery" - - // Google Kubernetes Engine / Google Cloud Platform authentication provider - _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" + _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" // Google Kubernetes Engine / Google Cloud Platform authentication provider "k8s.io/client-go/rest" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" diff --git a/internal/controller/postgrescluster/util_test.go b/internal/controller/postgrescluster/util_test.go index c7332eea4e..8e7d5c434f 100644 --- a/internal/controller/postgrescluster/util_test.go +++ b/internal/controller/postgrescluster/util_test.go @@ -79,7 +79,7 @@ func TestAddDevSHM(t *testing.T) { // check there is an empty dir mounted under the dshm volume for _, v := range template.Spec.Volumes { - if v.Name == "dshm" && v.VolumeSource.EmptyDir != nil && v.VolumeSource.EmptyDir.Medium == corev1.StorageMediumMemory { + if v.Name == "dshm" && v.EmptyDir != nil && v.EmptyDir.Medium == corev1.StorageMediumMemory { found = true break } @@ -221,15 +221,14 @@ func TestAddNSSWrapper(t *testing.T) { // Each container that requires the nss_wrapper envs should be updated var actualUpdatedContainerCount int for i, c := range template.Spec.Containers { - if c.Name == naming.ContainerDatabase || - c.Name == naming.PGBackRestRepoContainerName || - c.Name == naming.PGBackRestRestoreContainerName { + switch c.Name { + case naming.ContainerDatabase, naming.PGBackRestRepoContainerName, naming.PGBackRestRestoreContainerName: assert.DeepEqual(t, expectedEnv, c.Env) actualUpdatedContainerCount++ - } else if c.Name == "pgadmin" { + case "pgadmin": assert.DeepEqual(t, expectedPGAdminEnv, c.Env) actualUpdatedContainerCount++ - } else { + default: assert.DeepEqual(t, beforeAddNSS[i], c) } } diff --git a/internal/controller/postgrescluster/volumes.go b/internal/controller/postgrescluster/volumes.go index 809b2fe8e1..a26fa05e78 100644 --- a/internal/controller/postgrescluster/volumes.go +++ b/internal/controller/postgrescluster/volumes.go @@ -257,7 +257,7 @@ func (r *Reconciler) configureExistingPGVolumes( Spec: cluster.Spec.InstanceSets[0].DataVolumeClaimSpec.AsPersistentVolumeClaimSpec(), } - volume.ObjectMeta.Labels = map[string]string{ + volume.Labels = map[string]string{ naming.LabelCluster: cluster.Name, naming.LabelInstanceSet: cluster.Spec.InstanceSets[0].Name, naming.LabelInstance: instanceName, @@ -310,7 +310,7 @@ func (r *Reconciler) configureExistingPGWALVolume( Spec: cluster.Spec.InstanceSets[0].DataVolumeClaimSpec.AsPersistentVolumeClaimSpec(), } - volume.ObjectMeta.Labels = map[string]string{ + volume.Labels = map[string]string{ naming.LabelCluster: cluster.Name, naming.LabelInstanceSet: cluster.Spec.InstanceSets[0].Name, naming.LabelInstance: instanceName, @@ -465,14 +465,14 @@ func (r *Reconciler) reconcileMovePGDataDir(ctx context.Context, // at this point, the Job either wasn't found or it has failed, so the it // should be created - moveDirJob.ObjectMeta.Annotations = naming.Merge(cluster.Spec.Metadata. + moveDirJob.Annotations = naming.Merge(cluster.Spec.Metadata. GetAnnotationsOrNil()) labels := naming.Merge(cluster.Spec.Metadata.GetLabelsOrNil(), naming.DirectoryMoveJobLabels(cluster.Name), map[string]string{ naming.LabelMovePGDataDir: "", }) - moveDirJob.ObjectMeta.Labels = labels + moveDirJob.Labels = labels // `patroni.dynamic.json` holds the previous state of the DCS. Since we are // migrating the volumes, we want to clear out any obsolete configuration info. @@ -588,14 +588,14 @@ func (r *Reconciler) reconcileMoveWALDir(ctx context.Context, } } - moveDirJob.ObjectMeta.Annotations = naming.Merge(cluster.Spec.Metadata. + moveDirJob.Annotations = naming.Merge(cluster.Spec.Metadata. GetAnnotationsOrNil()) labels := naming.Merge(cluster.Spec.Metadata.GetLabelsOrNil(), naming.DirectoryMoveJobLabels(cluster.Name), map[string]string{ naming.LabelMovePGWalDir: "", }) - moveDirJob.ObjectMeta.Labels = labels + moveDirJob.Labels = labels script := fmt.Sprintf(`echo "Preparing cluster %s volumes for PGO v5.x" echo "pg_wal_pvc=%s" @@ -610,7 +610,7 @@ func (r *Reconciler) reconcileMoveWALDir(ctx context.Context, cluster.Spec.DataSource.Volumes.PGWALVolume.PVCName, cluster.Spec.DataSource.Volumes.PGWALVolume.Directory, cluster.Spec.DataSource.Volumes.PGWALVolume.Directory, - cluster.ObjectMeta.Name) + cluster.Name) container := corev1.Container{ Command: []string{"bash", "-ceu", script}, @@ -707,14 +707,14 @@ func (r *Reconciler) reconcileMoveRepoDir(ctx context.Context, } } - moveDirJob.ObjectMeta.Annotations = naming.Merge( + moveDirJob.Annotations = naming.Merge( cluster.Spec.Metadata.GetAnnotationsOrNil()) labels := naming.Merge(cluster.Spec.Metadata.GetLabelsOrNil(), naming.DirectoryMoveJobLabels(cluster.Name), map[string]string{ naming.LabelMovePGBackRestRepoDir: "", }) - moveDirJob.ObjectMeta.Labels = labels + moveDirJob.Labels = labels script := fmt.Sprintf(`echo "Preparing cluster %s pgBackRest repo volume for PGO v5.x" echo "repo_pvc=%s" diff --git a/internal/controller/runtime/runtime.go b/internal/controller/runtime/runtime.go index 152f490035..e3b0aca230 100644 --- a/internal/controller/runtime/runtime.go +++ b/internal/controller/runtime/runtime.go @@ -5,6 +5,7 @@ package runtime import ( + volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" @@ -15,8 +16,6 @@ import ( "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" - - volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" ) type ( diff --git a/internal/controller/standalone_pgadmin/apply.go b/internal/controller/standalone_pgadmin/apply.go index 1108853e7f..0cc3191967 100644 --- a/internal/controller/standalone_pgadmin/apply.go +++ b/internal/controller/standalone_pgadmin/apply.go @@ -22,7 +22,7 @@ func (r *PGAdminReconciler) patch( patch client.Patch, options ...client.PatchOption, ) error { options = append([]client.PatchOption{r.Owner}, options...) - return r.Client.Patch(ctx, object, patch, options...) + return r.Patch(ctx, object, patch, options...) } // apply sends an apply patch to object's endpoint in the Kubernetes API and diff --git a/internal/controller/standalone_pgadmin/configmap.go b/internal/controller/standalone_pgadmin/configmap.go index 72a95b14db..ad0da80dfa 100644 --- a/internal/controller/standalone_pgadmin/configmap.go +++ b/internal/controller/standalone_pgadmin/configmap.go @@ -14,12 +14,10 @@ import ( "strconv" "strings" - corev1 "k8s.io/api/core/v1" - "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" "github.com/crunchydata/postgres-operator/internal/collector" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" @@ -73,7 +71,7 @@ func configmap(ctx context.Context, pgadmin *v1beta1.PGAdmin, gunicornRetentionPeriod = "D" ) // If OTel logs feature gate is enabled, we want to change the pgAdmin/gunicorn logging - if feature.Enabled(ctx, feature.OpenTelemetryLogs) && pgadmin.Spec.Instrumentation != nil { + if collector.OpenTelemetryLogsEnabled(ctx, pgadmin) { logRetention = true // If the user has set a retention period, we will use those values for log rotation, diff --git a/internal/controller/standalone_pgadmin/configmap_test.go b/internal/controller/standalone_pgadmin/configmap_test.go index 267dd77325..3a9bab7b28 100644 --- a/internal/controller/standalone_pgadmin/configmap_test.go +++ b/internal/controller/standalone_pgadmin/configmap_test.go @@ -235,12 +235,12 @@ namespace: some-ns assert.NilError(t, err) // Annotations present in the metadata. - assert.DeepEqual(t, configmap.ObjectMeta.Annotations, map[string]string{ + assert.DeepEqual(t, configmap.Annotations, map[string]string{ "a": "v1", "b": "v2", }) // Labels present in the metadata. - assert.DeepEqual(t, configmap.ObjectMeta.Labels, map[string]string{ + assert.DeepEqual(t, configmap.Labels, map[string]string{ "c": "v3", "d": "v4", "postgres-operator.crunchydata.com/pgadmin": "pg1", "postgres-operator.crunchydata.com/role": "pgadmin", diff --git a/internal/controller/standalone_pgadmin/controller.go b/internal/controller/standalone_pgadmin/controller.go index 23ba7b6793..a8b95b0053 100644 --- a/internal/controller/standalone_pgadmin/controller.go +++ b/internal/controller/standalone_pgadmin/controller.go @@ -166,7 +166,7 @@ func (r *PGAdminReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct func (r *PGAdminReconciler) setControllerReference( owner *v1beta1.PGAdmin, controlled client.Object, ) error { - return controllerutil.SetControllerReference(owner, controlled, r.Client.Scheme()) + return controllerutil.SetControllerReference(owner, controlled, r.Scheme()) } // deleteControlled safely deletes object when it is controlled by pgAdmin. @@ -178,7 +178,7 @@ func (r *PGAdminReconciler) deleteControlled( version := object.GetResourceVersion() exactly := client.Preconditions{UID: &uid, ResourceVersion: &version} - return r.Client.Delete(ctx, object, exactly) + return r.Delete(ctx, object, exactly) } return nil diff --git a/internal/controller/standalone_pgadmin/pod.go b/internal/controller/standalone_pgadmin/pod.go index ab6f8679f4..71f785c15e 100644 --- a/internal/controller/standalone_pgadmin/pod.go +++ b/internal/controller/standalone_pgadmin/pod.go @@ -28,6 +28,8 @@ const ( configDatabaseURIPath = "~postgres-operator/config-database-uri" ldapFilePath = "~postgres-operator/ldap-bind-password" gunicornConfigFilePath = "~postgres-operator/" + gunicornConfigKey + oauthConfigDir = "~postgres-operator/oauth-config" + oauthAbsolutePath = configMountPath + "/" + oauthConfigDir // scriptMountPath is where to mount a temporary directory that is only // writable during Pod initialization. @@ -148,7 +150,7 @@ func pod( // Check the configmap to see if we think TLS is enabled // If so, update the readiness check scheme to HTTPS if strings.Contains(gunicornData, "certfile") && strings.Contains(gunicornData, "keyfile") { - readinessProbe.ProbeHandler.HTTPGet.Scheme = corev1.URISchemeHTTPS + readinessProbe.HTTPGet.Scheme = corev1.URISchemeHTTPS } container.ReadinessProbe = readinessProbe @@ -212,6 +214,17 @@ func podConfigFiles(configmap *corev1.ConfigMap, pgadmin v1beta1.PGAdmin) []core }, }...) + for i, oauth := range pgadmin.Spec.Config.OAuthConfigurations { + // Safely encode the OAUTH2_NAME in the file name. Prepend the index so + // the files can be loaded in the order they are defined in the spec. + mountPath := fmt.Sprintf( + "%s/%02d-%s.json", oauthConfigDir, i, shell.CleanFileName(oauth.Name), + ) + config = append(config, corev1.VolumeProjection{ + Secret: initialize.Pointer(oauth.Secret.AsProjection(mountPath)), + }) + } + if pgadmin.Spec.Config.ConfigDatabaseURI != nil { config = append(config, corev1.VolumeProjection{ Secret: initialize.Pointer( @@ -311,15 +324,17 @@ loadServerCommand // descriptor and uses the timeout of the builtin `read` to wait. That same // descriptor gets closed and reopened to use the builtin `[ -nt` to check mtimes. // - https://unix.stackexchange.com/a/407383 - // In order to get gunicorn to reload the logging config - // we need to send a KILL rather than a HUP signal. + // + // Gunicorn needs a SIGTERM rather than SIGHUP to reload its logging config. + // This also causes pgAdmin to restart when its configuration changes. // - https://github.com/benoitc/gunicorn/issues/3353 + // // Right now the config file is on the same configMap as the cluster file // so if the mtime changes for any of those files, it will change for all. var reloadScript = ` exec {fd}<> <(:||:) while read -r -t 5 -u "${fd}" ||:; do - if [[ "${cluster_file}" -nt "/proc/self/fd/${fd}" ]] && loadServerCommand && kill -KILL $(head -1 ${PGADMIN4_PIDFILE?}); + if [[ "${cluster_file}" -nt "/proc/self/fd/${fd}" ]] && loadServerCommand && kill -TERM $(head -1 ${PGADMIN4_PIDFILE?}); then exec {fd}>&- && exec {fd}<> <(:||:) stat --format='Loaded shared servers dated %y' "${cluster_file}" @@ -375,12 +390,31 @@ with open('` + configMountPath + `/` + configFilePath + `') as _f: _conf, _data = re.compile(r'[A-Z_0-9]+'), json.load(_f) if type(_data) is dict: globals().update({k: v for k, v in _data.items() if _conf.fullmatch(k)}) +if 'OAUTH2_CONFIG' in globals() and type(OAUTH2_CONFIG) is list: + OAUTH2_CONFIG = [_conf for _conf in OAUTH2_CONFIG if type(_conf) is dict and 'OAUTH2_NAME' in _conf] +for _f in reversed(glob.glob('` + oauthAbsolutePath + `/[0-9][0-9]-*.json')): + if 'OAUTH2_CONFIG' not in globals() or type(OAUTH2_CONFIG) is not list: + OAUTH2_CONFIG = [] + try: + with open(_f) as _f: + _data, _name = json.load(_f), os.path.basename(_f.name)[3:-5] + _data, _next = { 'OAUTH2_NAME': _name } | _data, [] + for _conf in OAUTH2_CONFIG: + if _data['OAUTH2_NAME'] == _conf.get('OAUTH2_NAME'): + _data = _conf | _data + else: + _next.append(_conf) + OAUTH2_CONFIG = [_data] + _next + del _next + except: + pass if os.path.isfile('` + ldapPasswordAbsolutePath + `'): with open('` + ldapPasswordAbsolutePath + `') as _f: LDAP_BIND_PASSWORD = _f.read() if os.path.isfile('` + configDatabaseURIPathAbsolutePath + `'): with open('` + configDatabaseURIPathAbsolutePath + `') as _f: CONFIG_DATABASE_URI = _f.read() +del _conf, _data, _f ` // Gunicorn reads from the `/etc/pgadmin/gunicorn_config.py` file during startup @@ -408,10 +442,10 @@ with open('` + configMountPath + `/` + gunicornConfigFilePath + `') as _f: script := strings.Join([]string{ // Create the config directory so Kubernetes can mount it later. // - https://issue.k8s.io/121294 - shell.MakeDirectories(0o775, scriptMountPath, configMountPath), + shell.MakeDirectories(scriptMountPath, configMountPath), - // Create the logs directory with g+rwx to ensure pgAdmin can write to it as well. - shell.MakeDirectories(0o775, dataMountPath, LogDirectoryAbsolutePath), + // Create the logs directory and ensure pgAdmin can write to it as well. + shell.MakeDirectories(dataMountPath, LogDirectoryAbsolutePath), // Write the system and server configurations. `echo "$1" > ` + scriptMountPath + `/config_system.py`, diff --git a/internal/controller/standalone_pgadmin/pod_test.go b/internal/controller/standalone_pgadmin/pod_test.go index b414a7bab0..b30b35bc65 100644 --- a/internal/controller/standalone_pgadmin/pod_test.go +++ b/internal/controller/standalone_pgadmin/pod_test.go @@ -16,6 +16,7 @@ import ( "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/kubernetes" "github.com/crunchydata/postgres-operator/internal/testing/cmp" + "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -74,7 +75,7 @@ containers: exec {fd}<> <(:||:) while read -r -t 5 -u "${fd}" ||:; do - if [[ "${cluster_file}" -nt "/proc/self/fd/${fd}" ]] && loadServerCommand && kill -KILL $(head -1 ${PGADMIN4_PIDFILE?}); + if [[ "${cluster_file}" -nt "/proc/self/fd/${fd}" ]] && loadServerCommand && kill -TERM $(head -1 ${PGADMIN4_PIDFILE?}); then exec {fd}>&- && exec {fd}<> <(:||:) stat --format='Loaded shared servers dated %y' "${cluster_file}" @@ -136,8 +137,8 @@ initContainers: - -ceu - -- - |- - mkdir -p '/etc/pgadmin/conf.d' && chmod 0775 '/etc/pgadmin/conf.d' - mkdir -p '/var/lib/pgadmin/logs' && chmod 0775 '/var/lib/pgadmin/logs' + mkdir -p '/etc/pgadmin/conf.d' && { chmod 0775 '/etc/pgadmin/conf.d' || :; } + mkdir -p '/var/lib/pgadmin/logs' && { chmod 0775 '/var/lib/pgadmin/logs' || :; } echo "$1" > /etc/pgadmin/config_system.py echo "$2" > /etc/pgadmin/gunicorn_config.py - startup @@ -148,12 +149,31 @@ initContainers: _conf, _data = re.compile(r'[A-Z_0-9]+'), json.load(_f) if type(_data) is dict: globals().update({k: v for k, v in _data.items() if _conf.fullmatch(k)}) + if 'OAUTH2_CONFIG' in globals() and type(OAUTH2_CONFIG) is list: + OAUTH2_CONFIG = [_conf for _conf in OAUTH2_CONFIG if type(_conf) is dict and 'OAUTH2_NAME' in _conf] + for _f in reversed(glob.glob('/etc/pgadmin/conf.d/~postgres-operator/oauth-config/[0-9][0-9]-*.json')): + if 'OAUTH2_CONFIG' not in globals() or type(OAUTH2_CONFIG) is not list: + OAUTH2_CONFIG = [] + try: + with open(_f) as _f: + _data, _name = json.load(_f), os.path.basename(_f.name)[3:-5] + _data, _next = { 'OAUTH2_NAME': _name } | _data, [] + for _conf in OAUTH2_CONFIG: + if _data['OAUTH2_NAME'] == _conf.get('OAUTH2_NAME'): + _data = _conf | _data + else: + _next.append(_conf) + OAUTH2_CONFIG = [_data] + _next + del _next + except: + pass if os.path.isfile('/etc/pgadmin/conf.d/~postgres-operator/ldap-bind-password'): with open('/etc/pgadmin/conf.d/~postgres-operator/ldap-bind-password') as _f: LDAP_BIND_PASSWORD = _f.read() if os.path.isfile('/etc/pgadmin/conf.d/~postgres-operator/config-database-uri'): with open('/etc/pgadmin/conf.d/~postgres-operator/config-database-uri') as _f: CONFIG_DATABASE_URI = _f.read() + del _conf, _data, _f - | import json, re, gunicorn gunicorn.SERVER_SOFTWARE = 'Python' @@ -211,13 +231,9 @@ volumes: pgadmin.Spec.Resources.Requests = corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("100m"), } - retentionPeriod, err := v1beta1.NewDuration("12 hours") - assert.NilError(t, err) - pgadmin.Spec.Instrumentation = &v1beta1.InstrumentationSpec{ - Logs: &v1beta1.InstrumentationLogsSpec{ - RetentionPeriod: retentionPeriod, - }, - } + require.UnmarshalInto(t, &pgadmin.Spec.Instrumentation, `{ + logs: { retentionPeriod: 12h }, + }`) call() @@ -260,7 +276,7 @@ containers: exec {fd}<> <(:||:) while read -r -t 5 -u "${fd}" ||:; do - if [[ "${cluster_file}" -nt "/proc/self/fd/${fd}" ]] && loadServerCommand && kill -KILL $(head -1 ${PGADMIN4_PIDFILE?}); + if [[ "${cluster_file}" -nt "/proc/self/fd/${fd}" ]] && loadServerCommand && kill -TERM $(head -1 ${PGADMIN4_PIDFILE?}); then exec {fd}>&- && exec {fd}<> <(:||:) stat --format='Loaded shared servers dated %y' "${cluster_file}" @@ -326,8 +342,8 @@ initContainers: - -ceu - -- - |- - mkdir -p '/etc/pgadmin/conf.d' && chmod 0775 '/etc/pgadmin/conf.d' - mkdir -p '/var/lib/pgadmin/logs' && chmod 0775 '/var/lib/pgadmin/logs' + mkdir -p '/etc/pgadmin/conf.d' && { chmod 0775 '/etc/pgadmin/conf.d' || :; } + mkdir -p '/var/lib/pgadmin/logs' && { chmod 0775 '/var/lib/pgadmin/logs' || :; } echo "$1" > /etc/pgadmin/config_system.py echo "$2" > /etc/pgadmin/gunicorn_config.py - startup @@ -338,12 +354,31 @@ initContainers: _conf, _data = re.compile(r'[A-Z_0-9]+'), json.load(_f) if type(_data) is dict: globals().update({k: v for k, v in _data.items() if _conf.fullmatch(k)}) + if 'OAUTH2_CONFIG' in globals() and type(OAUTH2_CONFIG) is list: + OAUTH2_CONFIG = [_conf for _conf in OAUTH2_CONFIG if type(_conf) is dict and 'OAUTH2_NAME' in _conf] + for _f in reversed(glob.glob('/etc/pgadmin/conf.d/~postgres-operator/oauth-config/[0-9][0-9]-*.json')): + if 'OAUTH2_CONFIG' not in globals() or type(OAUTH2_CONFIG) is not list: + OAUTH2_CONFIG = [] + try: + with open(_f) as _f: + _data, _name = json.load(_f), os.path.basename(_f.name)[3:-5] + _data, _next = { 'OAUTH2_NAME': _name } | _data, [] + for _conf in OAUTH2_CONFIG: + if _data['OAUTH2_NAME'] == _conf.get('OAUTH2_NAME'): + _data = _conf | _data + else: + _next.append(_conf) + OAUTH2_CONFIG = [_data] + _next + del _next + except: + pass if os.path.isfile('/etc/pgadmin/conf.d/~postgres-operator/ldap-bind-password'): with open('/etc/pgadmin/conf.d/~postgres-operator/ldap-bind-password') as _f: LDAP_BIND_PASSWORD = _f.read() if os.path.isfile('/etc/pgadmin/conf.d/~postgres-operator/config-database-uri'): with open('/etc/pgadmin/conf.d/~postgres-operator/config-database-uri') as _f: CONFIG_DATABASE_URI = _f.read() + del _conf, _data, _f - | import json, re, gunicorn gunicorn.SERVER_SOFTWARE = 'Python' diff --git a/internal/controller/standalone_pgadmin/related.go b/internal/controller/standalone_pgadmin/related.go index 50d5a68b09..c7fcb119bc 100644 --- a/internal/controller/standalone_pgadmin/related.go +++ b/internal/controller/standalone_pgadmin/related.go @@ -7,12 +7,12 @@ package standalone_pgadmin import ( "context" + "k8s.io/apimachinery/pkg/labels" + "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" - - "k8s.io/apimachinery/pkg/labels" - "sigs.k8s.io/controller-runtime/pkg/client" ) //+kubebuilder:rbac:groups="postgres-operator.crunchydata.com",resources="pgadmins",verbs={list} @@ -30,7 +30,7 @@ func (r *PGAdminReconciler) findPGAdminsForPostgresCluster( // namespace, we can configure the [manager.Manager] field indexer and pass a // [fields.Selector] here. // - https://book.kubebuilder.io/reference/watching-resources/externally-managed.html - if r.Client.List(ctx, &pgadmins, &client.ListOptions{ + if r.List(ctx, &pgadmins, &client.ListOptions{ Namespace: cluster.GetNamespace(), }) == nil { for i := range pgadmins.Items { @@ -64,7 +64,7 @@ func (r *PGAdminReconciler) findPGAdminsForSecret( // namespace, we can configure the [manager.Manager] field indexer and pass a // [fields.Selector] here. // - https://book.kubebuilder.io/reference/watching-resources/externally-managed.html - if err := r.Client.List(ctx, &pgadmins, &client.ListOptions{ + if err := r.List(ctx, &pgadmins, &client.ListOptions{ Namespace: secret.Namespace, }); err == nil { for i := range pgadmins.Items { @@ -93,7 +93,7 @@ func (r *PGAdminReconciler) getClustersForPGAdmin( for _, serverGroup := range pgAdmin.Spec.ServerGroups { var cluster v1beta1.PostgresCluster if serverGroup.PostgresClusterName != "" { - err = r.Client.Get(ctx, client.ObjectKey{ + err = r.Get(ctx, client.ObjectKey{ Name: serverGroup.PostgresClusterName, Namespace: pgAdmin.GetNamespace(), }, &cluster) @@ -104,7 +104,7 @@ func (r *PGAdminReconciler) getClustersForPGAdmin( } if selector, err = naming.AsSelector(serverGroup.PostgresClusterSelector); err == nil { var list v1beta1.PostgresClusterList - err = r.Client.List(ctx, &list, + err = r.List(ctx, &list, client.InNamespace(pgAdmin.Namespace), client.MatchingLabelsSelector{Selector: selector}, ) diff --git a/internal/controller/standalone_pgadmin/service.go b/internal/controller/standalone_pgadmin/service.go index b465dadb97..bfdc04c6ec 100644 --- a/internal/controller/standalone_pgadmin/service.go +++ b/internal/controller/standalone_pgadmin/service.go @@ -7,16 +7,14 @@ package standalone_pgadmin import ( "context" + "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" "sigs.k8s.io/controller-runtime/pkg/client" - apierrors "k8s.io/apimachinery/pkg/api/errors" - - "github.com/pkg/errors" - "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" @@ -38,7 +36,7 @@ func (r *PGAdminReconciler) reconcilePGAdminService( // need to delete any existing service(s). At the start of every reconcile // get all services that match the current pgAdmin labels. services := corev1.ServiceList{} - if err := r.Client.List(ctx, &services, + if err := r.List(ctx, &services, client.InNamespace(pgadmin.Namespace), client.MatchingLabels{ naming.LabelStandalonePGAdmin: pgadmin.Name, @@ -64,7 +62,7 @@ func (r *PGAdminReconciler) reconcilePGAdminService( if pgadmin.Spec.ServiceName != "" { // Look for an existing service with name ServiceName in the namespace existingService := &corev1.Service{} - err := r.Client.Get(ctx, types.NamespacedName{ + err := r.Get(ctx, types.NamespacedName{ Name: pgadmin.Spec.ServiceName, Namespace: pgadmin.GetNamespace(), }, existingService) diff --git a/internal/controller/standalone_pgadmin/statefulset.go b/internal/controller/standalone_pgadmin/statefulset.go index 6e606b0867..b8730b7112 100644 --- a/internal/controller/standalone_pgadmin/statefulset.go +++ b/internal/controller/standalone_pgadmin/statefulset.go @@ -7,17 +7,15 @@ package standalone_pgadmin import ( "context" + "github.com/pkg/errors" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" - "github.com/pkg/errors" - "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/controller/postgrescluster" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" @@ -36,7 +34,7 @@ func (r *PGAdminReconciler) reconcilePGAdminStatefulSet( // When we delete the StatefulSet, we will leave its Pods in place. They will be claimed by // the StatefulSet that gets created in the next reconcile. existing := &appsv1.StatefulSet{} - if err := errors.WithStack(r.Client.Get(ctx, client.ObjectKeyFromObject(sts), existing)); err != nil { + if err := errors.WithStack(r.Get(ctx, client.ObjectKeyFromObject(sts), existing)); err != nil { if !apierrors.IsNotFound(err) { return err } @@ -49,7 +47,7 @@ func (r *PGAdminReconciler) reconcilePGAdminStatefulSet( exactly := client.Preconditions{UID: &uid, ResourceVersion: &version} propagate := client.PropagationPolicy(metav1.DeletePropagationOrphan) - return errors.WithStack(client.IgnoreNotFound(r.Client.Delete(ctx, existing, exactly, propagate))) + return errors.WithStack(client.IgnoreNotFound(r.Delete(ctx, existing, exactly, propagate))) } } @@ -122,9 +120,9 @@ func statefulset( pod(pgadmin, configmap, &sts.Spec.Template.Spec, dataVolume) - if pgadmin.Spec.Instrumentation != nil && feature.Enabled(ctx, feature.OpenTelemetryLogs) { + if collector.OpenTelemetryLogsEnabled(ctx, pgadmin) { // Logs for gunicorn and pgadmin write to /var/lib/pgadmin/logs - // so the collector needs access to that that path. + // so the collector needs access to that path. dataVolumeMount := corev1.VolumeMount{ Name: "pgadmin-data", MountPath: "/var/lib/pgadmin", diff --git a/internal/controller/standalone_pgadmin/statefulset_test.go b/internal/controller/standalone_pgadmin/statefulset_test.go index 9d6b804476..e7cccf15dc 100644 --- a/internal/controller/standalone_pgadmin/statefulset_test.go +++ b/internal/controller/standalone_pgadmin/statefulset_test.go @@ -41,6 +41,7 @@ func TestReconcilePGAdminStatefulSet(t *testing.T) { resources: { requests: { storage: 1Gi } }, }, }`) + pgadmin.Spec.Image = initialize.String("some-image") assert.NilError(t, cc.Create(ctx, pgadmin)) t.Cleanup(func() { assert.Check(t, cc.Delete(ctx, pgadmin)) }) @@ -117,6 +118,7 @@ terminationGracePeriodSeconds: 30 resources: { requests: { storage: 1Gi } }, }, }`) + custompgadmin.Spec.Image = initialize.String("some-image") // annotation and label custompgadmin.Spec.Metadata = &v1beta1.Metadata{ diff --git a/internal/controller/standalone_pgadmin/users.go b/internal/controller/standalone_pgadmin/users.go index 34a9ba8661..027960e90c 100644 --- a/internal/controller/standalone_pgadmin/users.go +++ b/internal/controller/standalone_pgadmin/users.go @@ -53,7 +53,7 @@ func (r *PGAdminReconciler) reconcilePGAdminUsers(ctx context.Context, pgadmin * pod := &corev1.Pod{ObjectMeta: naming.StandalonePGAdmin(pgadmin)} pod.Name += "-0" - err := errors.WithStack(r.Client.Get(ctx, client.ObjectKeyFromObject(pod), pod)) + err := errors.WithStack(r.Get(ctx, client.ObjectKeyFromObject(pod), pod)) if err != nil { return client.IgnoreNotFound(err) } @@ -79,28 +79,53 @@ func (r *PGAdminReconciler) reconcilePGAdminUsers(ctx context.Context, pgadmin * return nil } - // If the pgAdmin version is not in the status or the image SHA has changed, get - // the pgAdmin version and store it in the status. - var pgadminVersion int - if pgadmin.Status.MajorVersion == 0 || pgadmin.Status.ImageSHA != pgAdminImageSha { - pgadminVersion, err = r.reconcilePGAdminMajorVersion(ctx, podExecutor) + // If the pgAdmin major or minor version is not in the status or the image + // SHA has changed, get the pgAdmin version and store it in the status. + var pgadminMajorVersion int + if pgadmin.Status.MajorVersion == 0 || pgadmin.Status.MinorVersion == "" || + pgadmin.Status.ImageSHA != pgAdminImageSha { + + // exec into the pgAdmin pod and retrieve the pgAdmin minor version + script := fmt.Sprintf(` +PGADMIN_DIR=%s +cd $PGADMIN_DIR && python3 -c "import config; print(config.APP_VERSION)" +`, pgAdminDir) + + var stdin, stdout, stderr bytes.Buffer + + if err := podExecutor(ctx, &stdin, &stdout, &stderr, + []string{"bash", "-ceu", "--", script}...); err != nil { + return err + } + + pgadminMinorVersion := strings.TrimSpace(stdout.String()) + + // ensure minor version is valid before storing in status + parsedMinorVersion, err := strconv.ParseFloat(pgadminMinorVersion, 64) if err != nil { return err } - pgadmin.Status.MajorVersion = pgadminVersion + + // Note: "When converting a floating-point number to an integer, the + // fraction is discarded (truncation towards zero)." + // - https://go.dev/ref/spec#Conversions + pgadminMajorVersion = int(parsedMinorVersion) + + pgadmin.Status.MinorVersion = pgadminMinorVersion + pgadmin.Status.MajorVersion = pgadminMajorVersion pgadmin.Status.ImageSHA = pgAdminImageSha } else { - pgadminVersion = pgadmin.Status.MajorVersion + pgadminMajorVersion = pgadmin.Status.MajorVersion } // If the pgAdmin version is not v8 or higher, return early as user management is // only supported for pgAdmin v8 and higher. - if pgadminVersion < 8 { + if pgadminMajorVersion < 8 { // If pgAdmin version is less than v8 and user management is being attempted, // log a message clarifying that it is only supported for pgAdmin v8 and higher. if len(pgadmin.Spec.Users) > 0 { log.Info("User management is only supported for pgAdmin v8 and higher.", - "pgadminVersion", pgadminVersion) + "pgadminVersion", pgadminMajorVersion) } return err } @@ -108,25 +133,6 @@ func (r *PGAdminReconciler) reconcilePGAdminUsers(ctx context.Context, pgadmin * return r.writePGAdminUsers(ctx, pgadmin, podExecutor) } -// reconcilePGAdminMajorVersion execs into the pgAdmin pod and retrieves the pgAdmin major version -func (r *PGAdminReconciler) reconcilePGAdminMajorVersion(ctx context.Context, exec Executor) (int, error) { - script := fmt.Sprintf(` -PGADMIN_DIR=%s -cd $PGADMIN_DIR && python3 -c "import config; print(config.APP_RELEASE)" -`, pgAdminDir) - - var stdin, stdout, stderr bytes.Buffer - - err := exec(ctx, &stdin, &stdout, &stderr, - []string{"bash", "-ceu", "--", script}...) - - if err != nil { - return 0, err - } - - return strconv.Atoi(strings.TrimSpace(stdout.String())) -} - // writePGAdminUsers takes the users in the pgAdmin spec and writes (adds or updates) their data // to both pgAdmin and the users.json file that is stored in the pgAdmin secret. If a user is // removed from the spec, its data is removed from users.json, but it is not deleted from pgAdmin. @@ -136,7 +142,7 @@ func (r *PGAdminReconciler) writePGAdminUsers(ctx context.Context, pgadmin *v1be existingUserSecret := &corev1.Secret{ObjectMeta: naming.StandalonePGAdmin(pgadmin)} err := errors.WithStack( - r.Client.Get(ctx, client.ObjectKeyFromObject(existingUserSecret), existingUserSecret)) + r.Get(ctx, client.ObjectKeyFromObject(existingUserSecret), existingUserSecret)) if client.IgnoreNotFound(err) != nil { return err } @@ -170,10 +176,25 @@ cd $PGADMIN_DIR for _, user := range existingUsersArr { existingUsersMap[user.Username] = user } + + var olderThan9_3 bool + versionFloat, err := strconv.ParseFloat(pgadmin.Status.MinorVersion, 64) + if err != nil { + return err + } + if versionFloat < 9.3 { + olderThan9_3 = true + } + intentUsers := []pgAdminUserForJson{} for _, user := range pgadmin.Spec.Users { var stdin, stdout, stderr bytes.Buffer - typeFlag := "--nonadmin" + // starting in pgAdmin 9.3, custom roles are supported and a new flag is used + // - https://github.com/pgadmin-org/pgadmin4/pull/8631 + typeFlag := "--role User" + if olderThan9_3 { + typeFlag = "--nonadmin" + } isAdmin := false if user.Role == "Administrator" { typeFlag = "--admin" @@ -183,10 +204,10 @@ cd $PGADMIN_DIR // Get password from secret userPasswordSecret := &corev1.Secret{ObjectMeta: metav1.ObjectMeta{ Namespace: pgadmin.Namespace, - Name: user.PasswordRef.LocalObjectReference.Name, + Name: user.PasswordRef.Name, }} err := errors.WithStack( - r.Client.Get(ctx, client.ObjectKeyFromObject(userPasswordSecret), userPasswordSecret)) + r.Get(ctx, client.ObjectKeyFromObject(userPasswordSecret), userPasswordSecret)) if err != nil { log.Error(err, "Could not get user password secret") continue @@ -229,8 +250,13 @@ cd $PGADMIN_DIR log.Error(err, "PodExec failed: ") intentUsers = append(intentUsers, existingUser) continue + + } else if strings.Contains(strings.TrimSpace(stderr.String()), "UserWarning: pkg_resources is deprecated as an API") { + // Started seeing this error with pgAdmin 9.7 when using Python 3.11. + // Issue appears to resolve with Python 3.13. + log.Info(stderr.String()) } else if strings.TrimSpace(stderr.String()) != "" { - log.Error(errors.New(stderr.String()), fmt.Sprintf("pgAdmin setup.py error for %s: ", + log.Error(errors.New(stderr.String()), fmt.Sprintf("pgAdmin setup.py update-user error for %s: ", intentUser.Username)) intentUsers = append(intentUsers, existingUser) continue @@ -263,8 +289,12 @@ cd $PGADMIN_DIR log.Error(err, "PodExec failed: ") continue } - if strings.TrimSpace(stderr.String()) != "" { - log.Error(errors.New(stderr.String()), fmt.Sprintf("pgAdmin setup.py error for %s: ", + if strings.Contains(strings.TrimSpace(stderr.String()), "UserWarning: pkg_resources is deprecated as an API") { + // Started seeing this error with pgAdmin 9.7 when using Python 3.11. + // Issue appears to resolve with Python 3.13. + log.Info(stderr.String()) + } else if strings.TrimSpace(stderr.String()) != "" { + log.Error(errors.New(stderr.String()), fmt.Sprintf("pgAdmin setup.py add-user error for %s: ", intentUser.Username)) continue } diff --git a/internal/controller/standalone_pgadmin/users_test.go b/internal/controller/standalone_pgadmin/users_test.go index fb861e17a7..35c599930d 100644 --- a/internal/controller/standalone_pgadmin/users_test.go +++ b/internal/controller/standalone_pgadmin/users_test.go @@ -110,15 +110,16 @@ func TestReconcilePGAdminUsers(t *testing.T) { assert.Equal(t, namespace, pgadmin.Namespace) assert.Equal(t, container, naming.ContainerPGAdmin) - // Simulate a v7 version of pgAdmin by setting stdout to "7" for - // podexec call in reconcilePGAdminMajorVersion - _, _ = stdout.Write([]byte("7")) + // Simulate a v7.1 version of pgAdmin by setting stdout to "7.1" + // for podexec call in reconcilePGAdminVersion + _, _ = stdout.Write([]byte("7.1")) return nil } assert.NilError(t, r.reconcilePGAdminUsers(ctx, pgadmin)) assert.Equal(t, calls, 1, "PodExec should be called once") assert.Equal(t, pgadmin.Status.MajorVersion, 7) + assert.Equal(t, pgadmin.Status.MinorVersion, "7.1") assert.Equal(t, pgadmin.Status.ImageSHA, "fakeSHA") }) @@ -145,78 +146,89 @@ func TestReconcilePGAdminUsers(t *testing.T) { ) error { calls++ - // Simulate a v7 version of pgAdmin by setting stdout to "7" for - // podexec call in reconcilePGAdminMajorVersion - _, _ = stdout.Write([]byte("7")) + // Simulate a v7.1 version of pgAdmin by setting stdout to "7.1" + // for podexec call in reconcilePGAdminVersion + _, _ = stdout.Write([]byte("7.1")) return nil } assert.NilError(t, r.reconcilePGAdminUsers(ctx, pgadmin)) assert.Equal(t, calls, 1, "PodExec should be called once") assert.Equal(t, pgadmin.Status.MajorVersion, 7) + assert.Equal(t, pgadmin.Status.MinorVersion, "7.1") assert.Equal(t, pgadmin.Status.ImageSHA, "newFakeSHA") }) -} -func TestReconcilePGAdminMajorVersion(t *testing.T) { - ctx := context.Background() - pod := corev1.Pod{} - pod.Namespace = "test-namespace" - pod.Name = "pgadmin-123-0" - reconciler := &PGAdminReconciler{} + t.Run("PodHealthyBadVersion", func(t *testing.T) { + pgadmin := pgadmin.DeepCopy() + pod := pod.DeepCopy() - podExecutor := func( - ctx context.Context, stdin io.Reader, stdout, stderr io.Writer, command ...string, - ) error { - return reconciler.PodExec(ctx, pod.Namespace, pod.Name, "pgadmin", stdin, stdout, stderr, command...) - } + pod.DeletionTimestamp = nil + pod.Status.ContainerStatuses = + []corev1.ContainerStatus{{Name: naming.ContainerPGAdmin}} + pod.Status.ContainerStatuses[0].State.Running = + new(corev1.ContainerStateRunning) + pod.Status.ContainerStatuses[0].ImageID = "fakeSHA" - t.Run("SuccessfulRetrieval", func(t *testing.T) { - reconciler.PodExec = func( + r := new(PGAdminReconciler) + r.Client = fake.NewClientBuilder().WithObjects(pod).Build() + + calls := 0 + r.PodExec = func( ctx context.Context, namespace, pod, container string, stdin io.Reader, stdout, stderr io.Writer, command ...string, ) error { + calls++ + assert.Equal(t, pod, "pgadmin-123-0") - assert.Equal(t, namespace, "test-namespace") + assert.Equal(t, namespace, pgadmin.Namespace) assert.Equal(t, container, naming.ContainerPGAdmin) - // Simulate a v7 version of pgAdmin by setting stdout to "7" for - // podexec call in reconcilePGAdminMajorVersion - _, _ = stdout.Write([]byte("7")) + // set expected version to something completely wrong + _, _ = stdout.Write([]byte("woot")) return nil } - version, err := reconciler.reconcilePGAdminMajorVersion(ctx, podExecutor) - assert.NilError(t, err) - assert.Equal(t, version, 7) + assert.ErrorContains(t, r.reconcilePGAdminUsers(ctx, pgadmin), "strconv.ParseFloat: parsing \"woot\": invalid syntax") + assert.Equal(t, calls, 1, "PodExec should be called once") + assert.Equal(t, pgadmin.Status.MajorVersion, 0) + assert.Equal(t, pgadmin.Status.MinorVersion, "") + assert.Equal(t, pgadmin.Status.ImageSHA, "") }) - t.Run("FailedRetrieval", func(t *testing.T) { - reconciler.PodExec = func( - ctx context.Context, namespace, pod, container string, - stdin io.Reader, stdout, stderr io.Writer, command ...string, - ) error { - // Simulate the python call giving bad data (not a version int) - _, _ = stdout.Write([]byte("asdfjkl;")) - return nil - } + t.Run("PodExecError", func(t *testing.T) { + pgadmin := pgadmin.DeepCopy() + pod := pod.DeepCopy() - version, err := reconciler.reconcilePGAdminMajorVersion(ctx, podExecutor) - assert.Check(t, err != nil) - assert.Equal(t, version, 0) - }) + pod.DeletionTimestamp = nil + pod.Status.ContainerStatuses = + []corev1.ContainerStatus{{Name: naming.ContainerPGAdmin}} + pod.Status.ContainerStatuses[0].State.Running = + new(corev1.ContainerStateRunning) + pod.Status.ContainerStatuses[0].ImageID = "fakeSHA" - t.Run("PodExecError", func(t *testing.T) { - reconciler.PodExec = func( + r := new(PGAdminReconciler) + r.Client = fake.NewClientBuilder().WithObjects(pod).Build() + + calls := 0 + r.PodExec = func( ctx context.Context, namespace, pod, container string, stdin io.Reader, stdout, stderr io.Writer, command ...string, ) error { + calls++ + + assert.Equal(t, pod, "pgadmin-123-0") + assert.Equal(t, namespace, pgadmin.Namespace) + assert.Equal(t, container, naming.ContainerPGAdmin) + return errors.New("PodExecError") } - version, err := reconciler.reconcilePGAdminMajorVersion(ctx, podExecutor) - assert.Check(t, err != nil) - assert.Equal(t, version, 0) + assert.Error(t, r.reconcilePGAdminUsers(ctx, pgadmin), "PodExecError") + assert.Equal(t, calls, 1, "PodExec should be called once") + assert.Equal(t, pgadmin.Status.MajorVersion, 0) + assert.Equal(t, pgadmin.Status.MinorVersion, "") + assert.Equal(t, pgadmin.Status.ImageSHA, "") }) } @@ -244,6 +256,14 @@ func TestWritePGAdminUsers(t *testing.T) { }`) assert.NilError(t, cc.Create(ctx, pgadmin)) + // fake the status so that the correct commands will be used when creating + // users. + pgadmin.Status = v1beta1.PGAdminStatus{ + ImageSHA: "fakesha", + MajorVersion: 9, + MinorVersion: "9.3", + } + userPasswordSecret1 := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Name: "user-password-secret1", @@ -317,7 +337,7 @@ func TestWritePGAdminUsers(t *testing.T) { secret := &corev1.Secret{ObjectMeta: naming.StandalonePGAdmin(pgadmin)} assert.NilError(t, - reconciler.Client.Get(ctx, client.ObjectKeyFromObject(secret), secret)) + reconciler.Get(ctx, client.ObjectKeyFromObject(secret), secret)) if assert.Check(t, secret.Data["users.json"] != nil) { var usersArr []pgAdminUserForJson assert.NilError(t, json.Unmarshal(secret.Data["users.json"], &usersArr)) @@ -377,7 +397,7 @@ func TestWritePGAdminUsers(t *testing.T) { secret := &corev1.Secret{ObjectMeta: naming.StandalonePGAdmin(pgadmin)} assert.NilError(t, - reconciler.Client.Get(ctx, client.ObjectKeyFromObject(secret), secret)) + reconciler.Get(ctx, client.ObjectKeyFromObject(secret), secret)) if assert.Check(t, secret.Data["users.json"] != nil) { var usersArr []pgAdminUserForJson assert.NilError(t, json.Unmarshal(secret.Data["users.json"], &usersArr)) @@ -449,7 +469,7 @@ func TestWritePGAdminUsers(t *testing.T) { secret := &corev1.Secret{ObjectMeta: naming.StandalonePGAdmin(pgadmin)} assert.NilError(t, - reconciler.Client.Get(ctx, client.ObjectKeyFromObject(secret), secret)) + reconciler.Get(ctx, client.ObjectKeyFromObject(secret), secret)) if assert.Check(t, secret.Data["users.json"] != nil) { var usersArr []pgAdminUserForJson assert.NilError(t, json.Unmarshal(secret.Data["users.json"], &usersArr)) @@ -494,7 +514,7 @@ func TestWritePGAdminUsers(t *testing.T) { secret := &corev1.Secret{ObjectMeta: naming.StandalonePGAdmin(pgadmin)} assert.NilError(t, - reconciler.Client.Get(ctx, client.ObjectKeyFromObject(secret), secret)) + reconciler.Get(ctx, client.ObjectKeyFromObject(secret), secret)) if assert.Check(t, secret.Data["users.json"] != nil) { var usersArr []pgAdminUserForJson assert.NilError(t, json.Unmarshal(secret.Data["users.json"], &usersArr)) @@ -536,7 +556,7 @@ func TestWritePGAdminUsers(t *testing.T) { // User in users.json should be unchanged secret := &corev1.Secret{ObjectMeta: naming.StandalonePGAdmin(pgadmin)} assert.NilError(t, - reconciler.Client.Get(ctx, client.ObjectKeyFromObject(secret), secret)) + reconciler.Get(ctx, client.ObjectKeyFromObject(secret), secret)) if assert.Check(t, secret.Data["users.json"] != nil) { var usersArr []pgAdminUserForJson assert.NilError(t, json.Unmarshal(secret.Data["users.json"], &usersArr)) @@ -563,7 +583,7 @@ func TestWritePGAdminUsers(t *testing.T) { // User in users.json should be unchanged assert.NilError(t, - reconciler.Client.Get(ctx, client.ObjectKeyFromObject(secret), secret)) + reconciler.Get(ctx, client.ObjectKeyFromObject(secret), secret)) if assert.Check(t, secret.Data["users.json"] != nil) { var usersArr []pgAdminUserForJson assert.NilError(t, json.Unmarshal(secret.Data["users.json"], &usersArr)) @@ -616,7 +636,7 @@ func TestWritePGAdminUsers(t *testing.T) { // have succeeded secret := &corev1.Secret{ObjectMeta: naming.StandalonePGAdmin(pgadmin)} assert.NilError(t, - reconciler.Client.Get(ctx, client.ObjectKeyFromObject(secret), secret)) + reconciler.Get(ctx, client.ObjectKeyFromObject(secret), secret)) if assert.Check(t, secret.Data["users.json"] != nil) { var usersArr []pgAdminUserForJson assert.NilError(t, json.Unmarshal(secret.Data["users.json"], &usersArr)) @@ -644,7 +664,7 @@ func TestWritePGAdminUsers(t *testing.T) { // User in users.json should be unchanged and attempt to add user should not // have succeeded assert.NilError(t, - reconciler.Client.Get(ctx, client.ObjectKeyFromObject(secret), secret)) + reconciler.Get(ctx, client.ObjectKeyFromObject(secret), secret)) if assert.Check(t, secret.Data["users.json"] != nil) { var usersArr []pgAdminUserForJson assert.NilError(t, json.Unmarshal(secret.Data["users.json"], &usersArr)) @@ -672,7 +692,7 @@ func TestWritePGAdminUsers(t *testing.T) { // User in users.json should be unchanged and attempt to add user should not // have succeeded assert.NilError(t, - reconciler.Client.Get(ctx, client.ObjectKeyFromObject(secret), secret)) + reconciler.Get(ctx, client.ObjectKeyFromObject(secret), secret)) if assert.Check(t, secret.Data["users.json"] != nil) { var usersArr []pgAdminUserForJson assert.NilError(t, json.Unmarshal(secret.Data["users.json"], &usersArr)) @@ -701,7 +721,7 @@ func TestWritePGAdminUsers(t *testing.T) { // User in users.json should be unchanged and attempt to add user should not // have succeeded assert.NilError(t, - reconciler.Client.Get(ctx, client.ObjectKeyFromObject(secret), secret)) + reconciler.Get(ctx, client.ObjectKeyFromObject(secret), secret)) if assert.Check(t, secret.Data["users.json"] != nil) { var usersArr []pgAdminUserForJson assert.NilError(t, json.Unmarshal(secret.Data["users.json"], &usersArr)) diff --git a/internal/controller/standalone_pgadmin/volume.go b/internal/controller/standalone_pgadmin/volume.go index dbdfaee649..a3e26682ef 100644 --- a/internal/controller/standalone_pgadmin/volume.go +++ b/internal/controller/standalone_pgadmin/volume.go @@ -7,14 +7,13 @@ package standalone_pgadmin import ( "context" + "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/validation/field" - "github.com/pkg/errors" - "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) diff --git a/internal/crd/validation/postgrescluster/postgres_config_test.go b/internal/crd/validation/postgrescluster/postgres_config_test.go new file mode 100644 index 0000000000..e9315eff2a --- /dev/null +++ b/internal/crd/validation/postgrescluster/postgres_config_test.go @@ -0,0 +1,353 @@ +// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package validation + +import ( + "context" + "fmt" + "testing" + + "gotest.tools/v3/assert" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/yaml" + + "github.com/crunchydata/postgres-operator/internal/testing/cmp" + "github.com/crunchydata/postgres-operator/internal/testing/require" + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +func TestPostgresConfigParametersV1beta1(t *testing.T) { + ctx := context.Background() + cc := require.Kubernetes(t) + t.Parallel() + + namespace := require.Namespace(t, cc) + base := v1beta1.NewPostgresCluster() + + // required fields + require.UnmarshalInto(t, &base.Spec, `{ + postgresVersion: 16, + instances: [{ + dataVolumeClaimSpec: { + accessModes: [ReadWriteOnce], + resources: { requests: { storage: 1Mi } }, + }, + }], + }`) + + base.Spec.Backups = v1beta1.Backups{ + PGBackRest: v1beta1.PGBackRestArchive{ + Repos: []v1beta1.PGBackRestRepo{{Name: "repo1"}}, + }, + } + base.Namespace = namespace.Name + base.Name = "postgres-config-parameters" + + assert.NilError(t, cc.Create(ctx, base.DeepCopy(), client.DryRunAll), + "expected this base cluster to be valid") + + var u unstructured.Unstructured + require.UnmarshalInto(t, &u, require.Value(yaml.Marshal(base))) + assert.Equal(t, u.GetAPIVersion(), "postgres-operator.crunchydata.com/v1beta1") + + testPostgresConfigParametersCommon(t, cc, u) + + t.Run("Logging", func(t *testing.T) { + t.Run("Allowed", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + }{ + {key: "log_directory", value: "anything"}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := u.DeepCopy() + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + assert.NilError(t, cc.Create(ctx, cluster, client.DryRunAll)) + }) + } + }) + }) + + t.Run("SSL Settings", func(t *testing.T) { + t.Run("Allowed", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + postgresVersion int + }{ + // ssl_ecdh_curve is allowed for all supported Postgres versions + {key: "ssl_ecdh_curve", value: "anything", postgresVersion: 17}, + {key: "ssl_ecdh_curve", value: "anything", postgresVersion: 18}, + + // ssl_groups is only supported for Postgres 18 and greater + {key: "ssl_groups", value: "anything", postgresVersion: 18}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := u.DeepCopy() + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.postgresVersion)), + "spec", "postgresVersion") + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + assert.NilError(t, cc.Create(ctx, cluster, client.DryRunAll)) + }) + } + }) + + t.Run("Not Allowed", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + postgresVersion int + }{ + // setting "ssl" is not allowed for any Postgres version + {key: "ssl", value: "anything", postgresVersion: 17}, + {key: "ssl", value: "anything", postgresVersion: 18}, + + // setting any parameter with an "ssl_" prefix that is not + // "ssl_ecdh_curve" or "ssl_groups" is not allowed for any version + {key: "ssl_anything", value: "anything", postgresVersion: 17}, + {key: "ssl_anything", value: "anything", postgresVersion: 18}, + + // setting "ssl_ecdh_curve" with any additional suffix is not + // allowed for any version + {key: "ssl_ecdh_curve_bad", value: "anything", postgresVersion: 17}, + {key: "ssl_ecdh_curve_bad", value: "anything", postgresVersion: 18}, + + // setting "ssl_groups" is not allowed for Postgres versions 17 + // or earlier + {key: "ssl_groups", value: "anything", postgresVersion: 17}, + + // setting "ssl_groups" with any additional suffix is not + // allowed for any version + {key: "ssl_groups_bad", value: "anything", postgresVersion: 17}, + {key: "ssl_groups_bad", value: "anything", postgresVersion: 18}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := u.DeepCopy() + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.postgresVersion)), + "spec", "postgresVersion") + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + + details := require.StatusErrorDetails(t, err) + assert.Assert(t, cmp.Len(details.Causes, 1)) + }) + } + }) + }) +} + +func testPostgresConfigParametersCommon(t *testing.T, cc client.Client, base unstructured.Unstructured) { + ctx := context.Background() + + t.Run("Allowed", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + }{ + {"archive_timeout", 100}, + {"archive_timeout", "20s"}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := base.DeepCopy() + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + assert.NilError(t, cc.Create(ctx, cluster, client.DryRunAll)) + }) + } + }) + + t.Run("Disallowed", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + }{ + {key: "cluster_name", value: "asdf"}, + {key: "config_file", value: "asdf"}, + {key: "data_directory", value: ""}, + {key: "external_pid_file", value: ""}, + {key: "hba_file", value: "one"}, + {key: "hot_standby", value: "off"}, + {key: "ident_file", value: "two"}, + {key: "listen_addresses", value: ""}, + {key: "port", value: 5}, + {key: "wal_log_hints", value: "off"}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := base.DeepCopy() + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + + details := require.StatusErrorDetails(t, err) + assert.Assert(t, cmp.Len(details.Causes, 1)) + + // TODO(k8s-1.30) TODO(validation): Move the parameter name from the message to the field path. + assert.Equal(t, details.Causes[0].Field, "spec.config.parameters") + assert.Assert(t, cmp.Contains(details.Causes[0].Message, tt.key)) + }) + } + }) + + t.Run("Logging", func(t *testing.T) { + for _, tt := range []struct { + valid bool + key string + value any + message string + }{ + {valid: false, key: "log_file_mode", value: "", message: "cannot be changed"}, + {valid: false, key: "log_file_mode", value: "any", message: "cannot be changed"}, + {valid: false, key: "logging_collector", value: "", message: "unsafe"}, + {valid: false, key: "logging_collector", value: "off", message: "unsafe"}, + {valid: false, key: "logging_collector", value: "on", message: "unsafe"}, + + {valid: true, key: "log_destination", value: "anything"}, + {valid: true, key: "log_filename", value: "anything"}, + {valid: true, key: "log_filename", value: "percent-%s-too"}, + {valid: true, key: "log_rotation_age", value: "7d"}, + {valid: true, key: "log_rotation_age", value: 5}, + {valid: true, key: "log_rotation_size", value: "100MB"}, + {valid: true, key: "log_rotation_size", value: 13}, + {valid: true, key: "log_timezone", value: ""}, + {valid: true, key: "log_timezone", value: "nonsense"}, + } { + t.Run(fmt.Sprint(tt), func(t *testing.T) { + cluster := base.DeepCopy() + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + err := cc.Create(ctx, cluster, client.DryRunAll) + + if tt.valid { + assert.NilError(t, err) + assert.Equal(t, "", tt.message, "BUG IN TEST: no message expected when valid") + } else { + assert.Assert(t, apierrors.IsInvalid(err)) + + details := require.StatusErrorDetails(t, err) + assert.Assert(t, cmp.Len(details.Causes, 1)) + + // TODO(k8s-1.30) TODO(validation): Move the parameter name from the message to the field path. + assert.Equal(t, details.Causes[0].Field, "spec.config.parameters") + assert.Assert(t, cmp.Contains(details.Causes[0].Message, tt.key)) + assert.Assert(t, cmp.Contains(details.Causes[0].Message, tt.message)) + } + }) + } + }) + + t.Run("NoConnections", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + }{ + {key: "ssl", value: "off"}, + {key: "ssl_ca_file", value: ""}, + {key: "unix_socket_directories", value: "one"}, + {key: "unix_socket_group", value: "two"}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := base.DeepCopy() + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + }) + } + }) + + t.Run("NoWriteAheadLog", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + }{ + {key: "archive_mode", value: "off"}, + {key: "archive_command", value: "true"}, + {key: "restore_command", value: "true"}, + {key: "recovery_target", value: "immediate"}, + {key: "recovery_target_name", value: "doot"}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := base.DeepCopy() + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + }) + } + }) + + t.Run("wal_level", func(t *testing.T) { + t.Run("Valid", func(t *testing.T) { + cluster := base.DeepCopy() + require.UnmarshalIntoField(t, cluster, + `logical`, "spec", "config", "parameters", "wal_level") + + assert.NilError(t, cc.Create(ctx, cluster, client.DryRunAll)) + }) + + t.Run("Invalid", func(t *testing.T) { + cluster := base.DeepCopy() + require.UnmarshalIntoField(t, cluster, + `minimal`, "spec", "config", "parameters", "wal_level") + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + assert.ErrorContains(t, err, `"replica" or higher`) + + details := require.StatusErrorDetails(t, err) + assert.Assert(t, cmp.Len(details.Causes, 1)) + assert.Equal(t, details.Causes[0].Field, "spec.config.parameters") + assert.Assert(t, cmp.Contains(details.Causes[0].Message, "wal_level")) + }) + }) + + t.Run("NoReplication", func(t *testing.T) { + for _, tt := range []struct { + key string + value any + }{ + {key: "synchronous_standby_names", value: ""}, + {key: "primary_conninfo", value: ""}, + {key: "primary_slot_name", value: ""}, + {key: "recovery_min_apply_delay", value: ""}, + } { + t.Run(tt.key, func(t *testing.T) { + cluster := base.DeepCopy() + require.UnmarshalIntoField(t, cluster, + require.Value(yaml.Marshal(tt.value)), + "spec", "config", "parameters", tt.key) + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + }) + } + }) +} diff --git a/internal/kubernetes/discovery.go b/internal/kubernetes/discovery.go index 0a96398e90..62e14fe496 100644 --- a/internal/kubernetes/discovery.go +++ b/internal/kubernetes/discovery.go @@ -165,7 +165,7 @@ func (r *DiscoveryRunner) readAPIs(ctx context.Context) error { r.have.RLock() defer r.have.RUnlock() - logging.FromContext(ctx).V(1).Info("Found APIs", "index_size", r.have.APISet.Len()) + logging.FromContext(ctx).V(1).Info("Found APIs", "index_size", r.have.Len()) return nil } diff --git a/internal/naming/annotations.go b/internal/naming/annotations.go index a2fedb5747..61a5438908 100644 --- a/internal/naming/annotations.go +++ b/internal/naming/annotations.go @@ -32,6 +32,14 @@ const ( // (and therefore must be recreated) PGBackRestConfigHash = annotationPrefix + "pgbackrest-hash" + // PGBackRestCurrentConfig is an annotation used to indicate the name of the pgBackRest + // configuration associated with a specific Job as determined by either the current primary + // (if no dedicated repository host is enabled), or the dedicated repository host. This helps + // in detecting pgBackRest backup Jobs that no longer mount the proper pgBackRest + // configuration, e.g. because a failover has occurred, or because dedicated repo host has been + // enabled or disabled. + PGBackRestCurrentConfig = annotationPrefix + "pgbackrest-config" + // PGBackRestRestore is the annotation that is added to a PostgresCluster to initiate an in-place // restore. The value of the annotation will be a unique identifier for a restore Job (e.g. a // timestamp), which will be stored in the PostgresCluster status to properly track completion @@ -46,6 +54,10 @@ const ( // bind all addresses does not work in certain IPv6 environments. PGBackRestIPVersion = annotationPrefix + "pgbackrest-ip-version" + // PGBackRestCloudLogVolume is an annotation used to indicate which persistent volume claim + // should be mounted to cloud repo backup jobs so that the backup logs can be persisted. + PGBackRestCloudLogVolume = annotationPrefix + "pgbackrest-cloud-log-volume" + // PostgresExporterCollectorsAnnotation is an annotation used to allow users to control whether or // not postgres_exporter default metrics, settings, and collectors are enabled. The value "None" // disables all postgres_exporter defaults. Disabling the defaults may cause errors in dashboards. diff --git a/internal/naming/annotations_test.go b/internal/naming/annotations_test.go index f64004557f..9553e5e72a 100644 --- a/internal/naming/annotations_test.go +++ b/internal/naming/annotations_test.go @@ -20,7 +20,9 @@ func TestAnnotationsValid(t *testing.T) { assert.Assert(t, nil == validation.IsQualifiedName(PGBackRestBackup)) assert.Assert(t, nil == validation.IsQualifiedName(PGBackRestBackupJobCompletion)) assert.Assert(t, nil == validation.IsQualifiedName(PGBackRestConfigHash)) + assert.Assert(t, nil == validation.IsQualifiedName(PGBackRestCurrentConfig)) assert.Assert(t, nil == validation.IsQualifiedName(PGBackRestIPVersion)) + assert.Assert(t, nil == validation.IsQualifiedName(PGBackRestCloudLogVolume)) assert.Assert(t, nil == validation.IsQualifiedName(PGBackRestRestore)) assert.Assert(t, nil == validation.IsQualifiedName(PostgresExporterCollectorsAnnotation)) } diff --git a/internal/naming/names.go b/internal/naming/names.go index 04923730fb..345967d1a7 100644 --- a/internal/naming/names.go +++ b/internal/naming/names.go @@ -604,11 +604,3 @@ func StandalonePGAdmin(pgadmin *v1beta1.PGAdmin) metav1.ObjectMeta { Name: fmt.Sprintf("pgadmin-%s", pgadmin.UID), } } - -// UpgradeCheckConfigMap returns the ObjectMeta for the PGO ConfigMap -func UpgradeCheckConfigMap() metav1.ObjectMeta { - return metav1.ObjectMeta{ - Namespace: config.PGONamespace(), - Name: "pgo-upgrade-check", - } -} diff --git a/internal/naming/selectors.go b/internal/naming/selectors.go index a7b105de4b..c51f2d0262 100644 --- a/internal/naming/selectors.go +++ b/internal/naming/selectors.go @@ -152,6 +152,13 @@ func ClusterPostgresUsers(cluster string) metav1.LabelSelector { } } +// ClusterPrimary selects things for the Primary PostgreSQL instance. +func ClusterPrimary(cluster string) metav1.LabelSelector { + s := ClusterInstances(cluster) + s.MatchLabels[LabelRole] = RolePatroniLeader + return s +} + // CrunchyBridgeClusterPostgresRoles selects things labeled for CrunchyBridgeCluster // PostgreSQL roles in cluster. func CrunchyBridgeClusterPostgresRoles(clusterName string) metav1.LabelSelector { diff --git a/internal/naming/selectors_test.go b/internal/naming/selectors_test.go index a9d2ce987d..c8617bcb78 100644 --- a/internal/naming/selectors_test.go +++ b/internal/naming/selectors_test.go @@ -148,6 +148,16 @@ func TestClusterPostgresUsers(t *testing.T) { assert.ErrorContains(t, err, "Invalid") } +func TestClusterPrimary(t *testing.T) { + s, err := AsSelector(ClusterPrimary("something")) + assert.NilError(t, err) + assert.DeepEqual(t, s.String(), strings.Join([]string{ + "postgres-operator.crunchydata.com/cluster=something", + "postgres-operator.crunchydata.com/instance", + "postgres-operator.crunchydata.com/role=master", + }, ",")) +} + func TestCrunchyBridgeClusterPostgresRoles(t *testing.T) { s, err := AsSelector(CrunchyBridgeClusterPostgresRoles("something")) assert.NilError(t, err) diff --git a/internal/patroni/config.go b/internal/patroni/config.go index 72202fbd78..3e6f7b6c83 100644 --- a/internal/patroni/config.go +++ b/internal/patroni/config.go @@ -465,7 +465,7 @@ func instanceYAML( // created. That value should be injected using the downward API and the // PATRONI_KUBERNETES_POD_IP environment variable. - // Missing here is "ports" which is is connascent with "postgresql.connect_address". + // Missing here is "ports" which is connascent with "postgresql.connect_address". // See the PATRONI_KUBERNETES_PORTS env variable. }, diff --git a/internal/patroni/config_test.go b/internal/patroni/config_test.go index 222c174f40..f1d2a4c5d9 100644 --- a/internal/patroni/config_test.go +++ b/internal/patroni/config_test.go @@ -794,7 +794,7 @@ func TestPGBackRestCreateReplicaCommand(t *testing.T) { file := filepath.Join(dir, "command.sh") assert.NilError(t, os.WriteFile(file, []byte(command), 0o600)) - cmd := exec.Command(shellcheck, "--enable=all", "--shell=sh", file) + cmd := exec.CommandContext(t.Context(), shellcheck, "--enable=all", "--shell=sh", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) } @@ -816,7 +816,7 @@ func TestPGBackRestCreateReplicaCommand(t *testing.T) { file := filepath.Join(dir, "script.bash") assert.NilError(t, os.WriteFile(file, []byte(script), 0o600)) - cmd := exec.Command(shellcheck, "--enable=all", file) + cmd := exec.CommandContext(t.Context(), shellcheck, "--enable=all", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) } @@ -895,7 +895,6 @@ func TestProbeTiming(t *testing.T) { FailureThreshold: 1, }}, } { - tt := tt actual := probeTiming(&v1beta1.PatroniSpec{ LeaderLeaseDurationSeconds: &tt.lease, SyncPeriodSeconds: &tt.sync, diff --git a/internal/pgadmin/config_test.go b/internal/pgadmin/config_test.go index e634aee361..0e659c7070 100644 --- a/internal/pgadmin/config_test.go +++ b/internal/pgadmin/config_test.go @@ -77,7 +77,7 @@ func TestStartupCommand(t *testing.T) { assert.NilError(t, os.WriteFile(file, []byte(command[3]), 0o600)) // Expect shellcheck to be happy. - cmd := exec.Command(shellcheck, "--enable=all", file) + cmd := exec.CommandContext(t.Context(), shellcheck, "--enable=all", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) }) @@ -94,7 +94,7 @@ func TestStartupCommand(t *testing.T) { // Expect flake8 to be happy. Ignore "E401 multiple imports on one line" // in addition to the defaults. The file contents appear in PodSpec, so // allow lines longer than the default to save some vertical space. - cmd := exec.Command(flake8, "--extend-ignore=E401", "--max-line-length=99", file) + cmd := exec.CommandContext(t.Context(), flake8, "--extend-ignore=E401", "--max-line-length=99", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) }) diff --git a/internal/pgadmin/users_test.go b/internal/pgadmin/users_test.go index 4dba70f81a..673a2c4b02 100644 --- a/internal/pgadmin/users_test.go +++ b/internal/pgadmin/users_test.go @@ -180,7 +180,7 @@ with create_app().app_context(): // Expect flake8 to be happy. Ignore "E402 module level import not // at top of file" in addition to the defaults. - cmd := exec.Command(flake8, "--extend-ignore=E402", file) + cmd := exec.CommandContext(ctx, flake8, "--extend-ignore=E402", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) diff --git a/internal/pgbackrest/config.go b/internal/pgbackrest/config.go index c14a264ce3..3899c33339 100644 --- a/internal/pgbackrest/config.go +++ b/internal/pgbackrest/config.go @@ -17,7 +17,6 @@ import ( "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/config" - "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/internal/postgres" @@ -39,6 +38,10 @@ const ( // repository host CMRepoKey = "pgbackrest_repo.conf" + // CMCloudRepoKey is the name of the pgBackRest configuration file used by backup jobs + // for cloud repos + CMCloudRepoKey = "pgbackrest_cloud.conf" + // configDirectory is the pgBackRest configuration directory. configDirectory = "/etc/pgbackrest/conf.d" @@ -70,8 +73,9 @@ const ( // pgbackrest_job.conf is used by certain jobs, such as stanza create and backup // pgbackrest_primary.conf is used by the primary database pod // pgbackrest_repo.conf is used by the pgBackRest repository pod +// pgbackrest_cloud.conf is used by cloud repo backup jobs func CreatePGBackRestConfigMapIntent(ctx context.Context, postgresCluster *v1beta1.PostgresCluster, - repoHostName, configHash, serviceName, serviceNamespace string, + repoHostName, configHash, serviceName, serviceNamespace, cloudLogPath string, instanceNames []string) (*corev1.ConfigMap, error) { var err error @@ -109,17 +113,14 @@ func CreatePGBackRestConfigMapIntent(ctx context.Context, postgresCluster *v1bet postgresCluster.Spec.Backups.PGBackRest.Global, ).String() + // As the cluster transitions from having a repository host to having none, // PostgreSQL instances that have not rolled out expect to mount a server // config file. Always populate that file so those volumes stay valid and - // Kubernetes propagates their contents to those pods. The repo host name - // given below should always be set, but this guards for cases when it might - // not be. - cm.Data[serverConfigMapKey] = "" - - if repoHostName != "" { - cm.Data[serverConfigMapKey] = iniGeneratedWarning + - serverConfig(postgresCluster).String() + // Kubernetes propagates their contents to those pods. + cm.Data[serverConfigMapKey] = iniGeneratedWarning + + serverConfig(postgresCluster).String() + if RepoHostVolumeDefined(postgresCluster) && repoHostName != "" { cm.Data[CMRepoKey] = iniGeneratedWarning + populateRepoHostConfigurationMap( serviceName, serviceNamespace, @@ -130,9 +131,8 @@ func CreatePGBackRestConfigMapIntent(ctx context.Context, postgresCluster *v1bet postgresCluster.Spec.Backups.PGBackRest.Global, ).String() - if RepoHostVolumeDefined(postgresCluster) && - (feature.Enabled(ctx, feature.OpenTelemetryLogs) || - feature.Enabled(ctx, feature.OpenTelemetryMetrics)) { + if collector.OpenTelemetryLogsOrMetricsEnabled(ctx, postgresCluster) { + err = collector.AddToConfigMap(ctx, collector.NewConfigForPgBackrestRepoHostPod( ctx, postgresCluster.Spec.Instrumentation, @@ -141,8 +141,7 @@ func CreatePGBackRestConfigMapIntent(ctx context.Context, postgresCluster *v1bet // If OTel logging is enabled, add logrotate config for the RepoHost if err == nil && - postgresCluster.Spec.Instrumentation != nil && - feature.Enabled(ctx, feature.OpenTelemetryLogs) { + collector.OpenTelemetryLogsEnabled(ctx, postgresCluster) { var pgBackRestLogPath string for _, repo := range postgresCluster.Spec.Backups.PGBackRest.Repos { if repo.Volume != nil { @@ -158,6 +157,18 @@ func CreatePGBackRestConfigMapIntent(ctx context.Context, postgresCluster *v1bet } } + if CloudRepoDefined(postgresCluster) { + cm.Data[CMCloudRepoKey] = iniGeneratedWarning + + populateCloudRepoConfigurationMap( + serviceName, serviceNamespace, pgdataDir, + config.FetchKeyCommand(&postgresCluster.Spec), + strconv.Itoa(postgresCluster.Spec.PostgresVersion), + cloudLogPath, pgPort, instanceNames, + postgresCluster.Spec.Backups.PGBackRest.Repos, + postgresCluster.Spec.Backups.PGBackRest.Global, + ).String() + } + cm.Data[ConfigHashKey] = configHash return cm, err @@ -179,7 +190,7 @@ func MakePGBackrestLogDir(template *corev1.PodTemplateSpec, container := corev1.Container{ // TODO(log-rotation): The second argument here should be the path // of the volume mount. Find a way to calculate that consistently. - Command: []string{"bash", "-c", shell.MakeDirectories(0o775, path.Dir(pgBackRestLogPath), pgBackRestLogPath)}, + Command: []string{"bash", "-c", shell.MakeDirectories(path.Dir(pgBackRestLogPath), pgBackRestLogPath)}, Image: config.PGBackRestContainerImage(cluster), ImagePullPolicy: cluster.Spec.ImagePullPolicy, Name: naming.ContainerPGBackRestLogDirInit, @@ -240,7 +251,7 @@ func RestoreCommand(pgdata, hugePagesSetting, fetchKeyCommand string, _ []*corev `read -r max_ptxn <<< "${control##*max_prepared_xacts setting:}"`, `read -r max_work <<< "${control##*max_worker_processes setting:}"`, - // During recovery, only allow connections over the the domain socket. + // During recovery, only allow connections over the domain socket. `echo > /tmp/pg_hba.restore.conf 'local all "postgres" peer'`, // Combine parameters from Go with those detected in Bash. @@ -506,6 +517,69 @@ func populateRepoHostConfigurationMap( } } +func populateCloudRepoConfigurationMap( + serviceName, serviceNamespace, pgdataDir, + fetchKeyCommand, postgresVersion, logPath string, + pgPort int32, pgHosts []string, repos []v1beta1.PGBackRestRepo, + globalConfig map[string]string, +) iniSectionSet { + + global := iniMultiSet{} + stanza := iniMultiSet{} + + for _, repo := range repos { + if repo.Volume != nil { + continue + } + + global.Set(repo.Name+"-path", defaultRepo1Path+repo.Name) + + for option, val := range getExternalRepoConfigs(repo) { + global.Set(option, val) + } + } + + // If we are given a log path, set it in the config. Otherwise, turn off logging to file. + if logPath != "" { + global.Set("log-path", logPath) + } else { + global.Set("log-level-file", "off") + } + + for option, val := range globalConfig { + global.Set(option, val) + } + + // set the configs for all PG hosts + for i, pgHost := range pgHosts { + // TODO(cbandy): pass a FQDN in already. + pgHostFQDN := pgHost + "-0." + + serviceName + "." + serviceNamespace + ".svc." + + naming.KubernetesClusterDomain(context.Background()) + + stanza.Set(fmt.Sprintf("pg%d-host", i+1), pgHostFQDN) + stanza.Set(fmt.Sprintf("pg%d-host-type", i+1), "tls") + stanza.Set(fmt.Sprintf("pg%d-host-ca-file", i+1), certAuthorityAbsolutePath) + stanza.Set(fmt.Sprintf("pg%d-host-cert-file", i+1), certClientAbsolutePath) + stanza.Set(fmt.Sprintf("pg%d-host-key-file", i+1), certClientPrivateKeyAbsolutePath) + + stanza.Set(fmt.Sprintf("pg%d-path", i+1), pgdataDir) + stanza.Set(fmt.Sprintf("pg%d-port", i+1), fmt.Sprint(pgPort)) + stanza.Set(fmt.Sprintf("pg%d-socket-path", i+1), postgres.SocketDirectory) + + if fetchKeyCommand != "" { + stanza.Set("archive-header-check", "n") + stanza.Set("page-header-check", "n") + stanza.Set("pg-version-force", postgresVersion) + } + } + + return iniSectionSet{ + "global": global, + DefaultStanzaName: stanza, + } +} + // getExternalRepoConfigs returns a map containing the configuration settings for an external // pgBackRest repository as defined in the PostgresCluster spec func getExternalRepoConfigs(repo v1beta1.PGBackRestRepo) map[string]string { diff --git a/internal/pgbackrest/config.md b/internal/pgbackrest/config.md index dd1127643a..f19c9ac1e4 100644 --- a/internal/pgbackrest/config.md +++ b/internal/pgbackrest/config.md @@ -6,17 +6,17 @@ # pgBackRest Configuration Overview -The initial pgBackRest configuration for the Postgres Clusters is designed to stand up a +The initial pgBackRest configuration for the Postgres Clusters is designed to stand up a minimal configuration for use by the various pgBackRest functions needed by the Postgres cluster. These settings are meant to be the minimally required settings, with other settings supported through the use of custom configurations. -During initial cluster creation, four pgBackRest use cases are involved. +During initial cluster creation, four pgBackRest use cases are involved. -These settings are configured in either the [global] or [stanza] sections of the +These settings are configured in either the [global] or [stanza] sections of the pgBackRest configuration based on their designation in the pgBackRest code. For more information on the above, and other settings, please see -https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/config/parse.auto.c + As shown, the settings with the `cfgSectionGlobal` designation are @@ -24,18 +24,17 @@ As shown, the settings with the `cfgSectionGlobal` designation are `log-level-file`: Level for file logging. Set to 'off' when the repo host has no volume. -`repo-path`: Path where backups and archive are stored. +`repo-path`: Path where backups and archive are stored. The repository is where pgBackRest stores backups and archives WAL segments. `repo-host`: Repository host when operating remotely via TLS. - The settings with the `cfgSectionStanza` designation are `pg-host`: PostgreSQL host for operating remotely via TLS. `pg-path`: The path of the PostgreSQL data directory. - This should be the same as the data_directory setting in postgresql.conf. + This should be the same as the data_directory setting in postgresql.conf. `pg-port`: The port that PostgreSQL is running on. @@ -44,14 +43,13 @@ The settings with the `cfgSectionStanza` designation are For more information on these and other configuration settings, please see `https://pgbackrest.org/configuration.html`. -# Configuration Per Function +## Configuration Per Function -Below, each of the four configuration sets is outlined by use case. Please note that certain -settings have acceptable defaults for the cluster's usage (such as for `repo1-type` which +Below, each of the four configuration sets is outlined by use case. Please note that certain +settings have acceptable defaults for the cluster's usage (such as for `repo1-type` which defaults to `posix`), so those settings are not included. - -1. Primary Database Pod +1. Primary Database Pod [global] log-path @@ -86,28 +84,26 @@ log-path [global] log-path - -# Initial pgBackRest Configuration +## Initial pgBackRest Configuration In order to be used by the Postgres cluster, these default configurations are stored in -a configmap. This configmap is named with the following convention `-pgbackrest-config`, +a configmap. This configmap is named with the following convention `-pgbackrest-config`, such that a cluster named 'mycluster' would have a configuration configmap named `mycluster-pgbackrest-config`. -As noted above, there are three distinct default configurations, each of which is referenced +As noted above, there are three distinct default configurations, each of which is referenced by a key value in the configmap's data section. For the primary database pod, the key is `pgbackrest_primary.conf`. For the pgBackRest repo pod, the key is `pgbackrest_repo.conf`. Finally, for the pgBackRest stanza job pod and the initial pgBackRest backup job pod, the key is `pgbackrest_job.conf`. - -For each pod, the relevant configuration file is mounted as a projected volume named + +For each pod, the relevant configuration file is mounted as a projected volume named `pgbackrest-config-vol`. The configuration file will be found in the `/etc/pgbackrest` directory -of the relevant container and is named `pgbackrest.conf`, matching the default pgBackRest location. -For more information, please see +of the relevant container and is named `pgbackrest.conf`, matching the default pgBackRest location. +For more information, please see `https://pgbackrest.org/configuration.html#introduction` - -# Custom Configuration Support +## Custom Configuration Support TODO(tjmoore4): Document custom configuration solution once implemented @@ -116,7 +112,7 @@ flag with the desired pgBackRest command. This should point to the directory pat where the `*.conf` file with the custom configuration is located. This file will be added as a projected volume and must be formatted in the standard -pgBackRest INI convention. Please note that any of the configuration settings listed +pgBackRest INI convention. Please note that any of the configuration settings listed above MUST BE CONFIGURED VIA THE POSTGRESCLUSTER SPEC so as to avoid errors. For more information, please see @@ -140,7 +136,7 @@ command-line or top-to-bottom in INI files. The remaining options must be set exactly once. `pgbackrest` exits non-zero when the option occurs twice on the command-line or twice in a file: -``` +```text ERROR: [031]: option 'io-timeout' cannot be set multiple times ``` diff --git a/internal/pgbackrest/config_test.go b/internal/pgbackrest/config_test.go index 08aaaf8d94..c1b4e0b155 100644 --- a/internal/pgbackrest/config_test.go +++ b/internal/pgbackrest/config_test.go @@ -33,21 +33,272 @@ func TestCreatePGBackRestConfigMapIntent(t *testing.T) { domain := naming.KubernetesClusterDomain(context.Background()) - t.Run("NoVolumeRepo", func(t *testing.T) { + t.Run("NoRepos", func(t *testing.T) { + // We always create the config for the pgbackrest instance and server cluster := cluster.DeepCopy() cluster.Spec.Backups.PGBackRest.Repos = nil + cluster.UID = "piano" configmap, err := CreatePGBackRestConfigMapIntent(context.Background(), cluster, - "", "number", "pod-service-name", "test-ns", + "", "number", "pod-service-name", "test-ns", "", []string{"some-instance"}) assert.NilError(t, err) assert.Equal(t, configmap.Data["config-hash"], "number") - assert.Equal(t, configmap.Data["pgbackrest-server.conf"], "") + assert.Equal(t, configmap.Data["pgbackrest-server.conf"], strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +tls-server-address = 0.0.0.0 +tls-server-auth = pgbackrest@piano=* +tls-server-ca-file = /etc/pgbackrest/conf.d/~postgres-operator/tls-ca.crt +tls-server-cert-file = /etc/pgbackrest/server/server-tls.crt +tls-server-key-file = /etc/pgbackrest/server/server-tls.key + +[global:server] +log-level-console = detail +log-level-file = off +log-level-stderr = error +log-timestamp = n + `, "\t\n")+"\n") + + assert.Equal(t, configmap.Data["pgbackrest_instance.conf"], strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +archive-async = y +log-path = /pgdata/pgbackrest/log +spool-path = /pgdata/pgbackrest-spool + +[db] +pg1-path = /pgdata/pg12 +pg1-port = 2345 +pg1-socket-path = /tmp/postgres + `, "\t\n")+"\n") + + assert.Equal(t, configmap.Data["pgbackrest_repo.conf"], "") + assert.Equal(t, configmap.Data["pgbackrest_cloud.conf"], "") + }) + + t.Run("CloudRepoPresentNoVolumeRepo", func(t *testing.T) { + cluster := cluster.DeepCopy() + cluster.UID = "ukulele" + cluster.Spec.Backups.PGBackRest.Global = map[string]string{ + "repo1-test": "something", + } + cluster.Spec.Backups.PGBackRest.Repos = []v1beta1.PGBackRestRepo{ + { + Name: "repo1", + GCS: &v1beta1.RepoGCS{Bucket: "g-bucket"}, + }, + } + + configmap, err := CreatePGBackRestConfigMapIntent(context.Background(), cluster, + "", "anumber", "pod-service-name", "test-ns", "", + []string{"some-instance"}) + assert.NilError(t, err) + + configmapWithCloudLogging, err := CreatePGBackRestConfigMapIntent(context.Background(), cluster, + "", "anumber", "pod-service-name", "test-ns", "/a/log/path", + []string{"some-instance"}) + + assert.NilError(t, err) + assert.DeepEqual(t, configmap.Annotations, map[string]string{}) + assert.DeepEqual(t, configmapWithCloudLogging.Annotations, map[string]string{}) + + assert.DeepEqual(t, configmap.Labels, map[string]string{ + "postgres-operator.crunchydata.com/cluster": "hippo-dance", + "postgres-operator.crunchydata.com/pgbackrest": "", + "postgres-operator.crunchydata.com/pgbackrest-config": "", + }) + assert.DeepEqual(t, configmapWithCloudLogging.Labels, map[string]string{ + "postgres-operator.crunchydata.com/cluster": "hippo-dance", + "postgres-operator.crunchydata.com/pgbackrest": "", + "postgres-operator.crunchydata.com/pgbackrest-config": "", + }) + + assert.Equal(t, configmap.Data["config-hash"], "anumber") + assert.Equal(t, configmapWithCloudLogging.Data["config-hash"], "anumber") + + serverConfigExpectation := strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +tls-server-address = 0.0.0.0 +tls-server-auth = pgbackrest@ukulele=* +tls-server-ca-file = /etc/pgbackrest/conf.d/~postgres-operator/tls-ca.crt +tls-server-cert-file = /etc/pgbackrest/server/server-tls.crt +tls-server-key-file = /etc/pgbackrest/server/server-tls.key + +[global:server] +log-level-console = detail +log-level-file = off +log-level-stderr = error +log-timestamp = n + `, "\t\n") + assert.Equal(t, configmap.Data["pgbackrest-server.conf"], serverConfigExpectation+"\n") + assert.Equal(t, configmapWithCloudLogging.Data["pgbackrest-server.conf"], serverConfigExpectation+"\n") + + instanceConfigExpectation := strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +archive-async = y +log-path = /pgdata/pgbackrest/log +repo1-gcs-bucket = g-bucket +repo1-path = /pgbackrest/repo1 +repo1-test = something +repo1-type = gcs +spool-path = /pgdata/pgbackrest-spool + +[db] +pg1-path = /pgdata/pg12 +pg1-port = 2345 +pg1-socket-path = /tmp/postgres + `, "\t\n") + assert.Equal(t, configmap.Data["pgbackrest_instance.conf"], instanceConfigExpectation+"\n") + assert.Equal(t, configmapWithCloudLogging.Data["pgbackrest_instance.conf"], instanceConfigExpectation+"\n") + + assert.Equal(t, configmap.Data["pgbackrest_cloud.conf"], strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +log-level-file = off +repo1-gcs-bucket = g-bucket +repo1-path = /pgbackrest/repo1 +repo1-test = something +repo1-type = gcs + +[db] +pg1-host = some-instance-0.pod-service-name.test-ns.svc.`+domain+` +pg1-host-ca-file = /etc/pgbackrest/conf.d/~postgres-operator/tls-ca.crt +pg1-host-cert-file = /etc/pgbackrest/conf.d/~postgres-operator/client-tls.crt +pg1-host-key-file = /etc/pgbackrest/conf.d/~postgres-operator/client-tls.key +pg1-host-type = tls +pg1-path = /pgdata/pg12 +pg1-port = 2345 +pg1-socket-path = /tmp/postgres + `, "\t\n")+"\n") + + assert.Equal(t, configmapWithCloudLogging.Data["pgbackrest_cloud.conf"], strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +log-path = /a/log/path +repo1-gcs-bucket = g-bucket +repo1-path = /pgbackrest/repo1 +repo1-test = something +repo1-type = gcs + +[db] +pg1-host = some-instance-0.pod-service-name.test-ns.svc.`+domain+` +pg1-host-ca-file = /etc/pgbackrest/conf.d/~postgres-operator/tls-ca.crt +pg1-host-cert-file = /etc/pgbackrest/conf.d/~postgres-operator/client-tls.crt +pg1-host-key-file = /etc/pgbackrest/conf.d/~postgres-operator/client-tls.key +pg1-host-type = tls +pg1-path = /pgdata/pg12 +pg1-port = 2345 +pg1-socket-path = /tmp/postgres + `, "\t\n")+"\n") + + assert.Equal(t, configmap.Data["pgbackrest_repo.conf"], "") + assert.Equal(t, configmapWithCloudLogging.Data["pgbackrest_repo.conf"], "") }) - t.Run("DedicatedRepoHost", func(t *testing.T) { + t.Run("VolumeRepoPresentNoCloudRepo", func(t *testing.T) { cluster := cluster.DeepCopy() + cluster.UID = "guitar" + cluster.Spec.Backups.PGBackRest.Repos = []v1beta1.PGBackRestRepo{ + { + Name: "repo1", + Volume: &v1beta1.RepoPVC{}, + }, + } + + configmap, err := CreatePGBackRestConfigMapIntent(context.Background(), cluster, + "repo-hostname", "anumber", "pod-service-name", "test-ns", "", + []string{"some-instance"}) + + assert.NilError(t, err) + assert.DeepEqual(t, configmap.Annotations, map[string]string{}) + assert.DeepEqual(t, configmap.Labels, map[string]string{ + "postgres-operator.crunchydata.com/cluster": "hippo-dance", + "postgres-operator.crunchydata.com/pgbackrest": "", + "postgres-operator.crunchydata.com/pgbackrest-config": "", + }) + + assert.Equal(t, configmap.Data["config-hash"], "anumber") + assert.Equal(t, configmap.Data["pgbackrest-server.conf"], strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +tls-server-address = 0.0.0.0 +tls-server-auth = pgbackrest@guitar=* +tls-server-ca-file = /etc/pgbackrest/conf.d/~postgres-operator/tls-ca.crt +tls-server-cert-file = /etc/pgbackrest/server/server-tls.crt +tls-server-key-file = /etc/pgbackrest/server/server-tls.key + +[global:server] +log-level-console = detail +log-level-file = off +log-level-stderr = error +log-timestamp = n + `, "\t\n")+"\n") + + assert.Equal(t, configmap.Data["pgbackrest_instance.conf"], strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +archive-async = y +log-path = /pgdata/pgbackrest/log +repo1-host = repo-hostname-0.pod-service-name.test-ns.svc.`+domain+` +repo1-host-ca-file = /etc/pgbackrest/conf.d/~postgres-operator/tls-ca.crt +repo1-host-cert-file = /etc/pgbackrest/conf.d/~postgres-operator/client-tls.crt +repo1-host-key-file = /etc/pgbackrest/conf.d/~postgres-operator/client-tls.key +repo1-host-type = tls +repo1-host-user = postgres +repo1-path = /pgbackrest/repo1 +spool-path = /pgdata/pgbackrest-spool + +[db] +pg1-path = /pgdata/pg12 +pg1-port = 2345 +pg1-socket-path = /tmp/postgres + `, "\t\n")+"\n") + + assert.Equal(t, configmap.Data["pgbackrest_repo.conf"], strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +log-path = /pgbackrest/repo1/log +repo1-path = /pgbackrest/repo1 + +[db] +pg1-host = some-instance-0.pod-service-name.test-ns.svc.`+domain+` +pg1-host-ca-file = /etc/pgbackrest/conf.d/~postgres-operator/tls-ca.crt +pg1-host-cert-file = /etc/pgbackrest/conf.d/~postgres-operator/client-tls.crt +pg1-host-key-file = /etc/pgbackrest/conf.d/~postgres-operator/client-tls.key +pg1-host-type = tls +pg1-path = /pgdata/pg12 +pg1-port = 2345 +pg1-socket-path = /tmp/postgres + `, "\t\n")+"\n") + + assert.Equal(t, configmap.Data["pgbackrest_cloud.conf"], "") + }) + + t.Run("DedicatedRepoHostAndCloudRepos", func(t *testing.T) { + cluster := cluster.DeepCopy() + cluster.UID = "bass" cluster.Spec.Backups.PGBackRest.Global = map[string]string{ "repo3-test": "something", } @@ -73,7 +324,7 @@ func TestCreatePGBackRestConfigMapIntent(t *testing.T) { } configmap, err := CreatePGBackRestConfigMapIntent(context.Background(), cluster, - "repo-hostname", "abcde12345", "pod-service-name", "test-ns", + "repo-hostname", "abcde12345", "pod-service-name", "test-ns", "", []string{"some-instance"}) assert.NilError(t, err) @@ -85,6 +336,25 @@ func TestCreatePGBackRestConfigMapIntent(t *testing.T) { }) assert.Equal(t, configmap.Data["config-hash"], "abcde12345") + + assert.Equal(t, configmap.Data["pgbackrest-server.conf"], strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +tls-server-address = 0.0.0.0 +tls-server-auth = pgbackrest@bass=* +tls-server-ca-file = /etc/pgbackrest/conf.d/~postgres-operator/tls-ca.crt +tls-server-cert-file = /etc/pgbackrest/server/server-tls.crt +tls-server-key-file = /etc/pgbackrest/server/server-tls.key + +[global:server] +log-level-console = detail +log-level-file = off +log-level-stderr = error +log-timestamp = n + `, "\t\n")+"\n") + assert.Equal(t, configmap.Data["pgbackrest_repo.conf"], strings.Trim(` # Generated by postgres-operator. DO NOT EDIT. # Your changes will not be saved. @@ -147,6 +417,36 @@ spool-path = /pgdata/pgbackrest-spool [db] pg1-path = /pgdata/pg12 pg1-port = 2345 +pg1-socket-path = /tmp/postgres + `, "\t\n")+"\n") + + assert.Equal(t, configmap.Data["pgbackrest_cloud.conf"], strings.Trim(` +# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. + +[global] +log-level-file = off +repo2-azure-container = a-container +repo2-path = /pgbackrest/repo2 +repo2-type = azure +repo3-gcs-bucket = g-bucket +repo3-path = /pgbackrest/repo3 +repo3-test = something +repo3-type = gcs +repo4-path = /pgbackrest/repo4 +repo4-s3-bucket = s-bucket +repo4-s3-endpoint = endpoint-s +repo4-s3-region = earth +repo4-type = s3 + +[db] +pg1-host = some-instance-0.pod-service-name.test-ns.svc.`+domain+` +pg1-host-ca-file = /etc/pgbackrest/conf.d/~postgres-operator/tls-ca.crt +pg1-host-cert-file = /etc/pgbackrest/conf.d/~postgres-operator/client-tls.crt +pg1-host-key-file = /etc/pgbackrest/conf.d/~postgres-operator/client-tls.key +pg1-host-type = tls +pg1-path = /pgdata/pg12 +pg1-port = 2345 pg1-socket-path = /tmp/postgres `, "\t\n")+"\n") }) @@ -179,7 +479,7 @@ pg1-socket-path = /tmp/postgres } configmap, err := CreatePGBackRestConfigMapIntent(context.Background(), cluster, - "any", "any", "any", "any", nil) + "any", "any", "any", "any", "any", nil) assert.NilError(t, err) assert.DeepEqual(t, configmap.Annotations, map[string]string{ @@ -211,7 +511,7 @@ pg1-socket-path = /tmp/postgres } configmap, err := CreatePGBackRestConfigMapIntent(context.Background(), cluster, - "", "number", "pod-service-name", "test-ns", + "", "number", "pod-service-name", "test-ns", "", []string{"some-instance"}) assert.NilError(t, err) @@ -233,7 +533,7 @@ pg1-socket-path = /tmp/postgres } configmap, err = CreatePGBackRestConfigMapIntent(context.Background(), cluster, - "repo1", "number", "pod-service-name", "test-ns", + "repo1", "number", "pod-service-name", "test-ns", "", []string{"some-instance"}) assert.NilError(t, err) @@ -292,7 +592,7 @@ func TestMakePGBackrestLogDir(t *testing.T) { for _, c := range podTemplate.Spec.InitContainers { if c.Name == naming.ContainerPGBackRestLogDirInit { // ignore "bash -c", should skip repo with no volume - assert.Equal(t, `mkdir -p '/pgbackrest/repo2/log' && chmod 0775 '/pgbackrest/repo2/log'`, c.Command[2]) + assert.Equal(t, `mkdir -p '/pgbackrest/repo2/log' && { chmod 0775 '/pgbackrest/repo2/log' || :; }`, c.Command[2]) assert.Equal(t, c.Image, "test-image") assert.Equal(t, c.ImagePullPolicy, corev1.PullAlways) assert.Assert(t, !cmp.DeepEqual(c.SecurityContext, @@ -321,7 +621,7 @@ func TestReloadCommand(t *testing.T) { assert.NilError(t, os.WriteFile(file, []byte(command[3]), 0o600)) // Expect shellcheck to be happy. - cmd := exec.Command(shellcheck, "--enable=all", file) + cmd := exec.CommandContext(t.Context(), shellcheck, "--enable=all", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) } @@ -347,7 +647,7 @@ func TestRestoreCommand(t *testing.T) { file := filepath.Join(dir, "script.bash") assert.NilError(t, os.WriteFile(file, []byte(command[3]), 0o600)) - cmd := exec.Command(shellcheck, "--enable=all", file) + cmd := exec.CommandContext(t.Context(), shellcheck, "--enable=all", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) } @@ -386,7 +686,7 @@ func TestDedicatedSnapshotVolumeRestoreCommand(t *testing.T) { file := filepath.Join(dir, "script.bash") assert.NilError(t, os.WriteFile(file, []byte(command[3]), 0o600)) - cmd := exec.Command(shellcheck, "--enable=all", file) + cmd := exec.CommandContext(t.Context(), shellcheck, "--enable=all", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) } diff --git a/internal/pgbackrest/pgbackrest_test.go b/internal/pgbackrest/pgbackrest_test.go index 07ff3d127a..0930b72f45 100644 --- a/internal/pgbackrest/pgbackrest_test.go +++ b/internal/pgbackrest/pgbackrest_test.go @@ -13,12 +13,10 @@ import ( "testing" "gotest.tools/v3/assert" - "k8s.io/apimachinery/pkg/api/resource" - corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" "github.com/crunchydata/postgres-operator/internal/testing/require" - "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -94,7 +92,7 @@ fi assert.NilError(t, os.WriteFile(file, []byte(shellCheckScript), 0o600)) // Expect shellcheck to be happy. - cmd := exec.Command(shellcheck, "--enable=all", file) + cmd := exec.CommandContext(ctx, shellcheck, "--enable=all", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) } diff --git a/internal/pgbackrest/reconcile.go b/internal/pgbackrest/reconcile.go index 4e789d137e..426e1312f6 100644 --- a/internal/pgbackrest/reconcile.go +++ b/internal/pgbackrest/reconcile.go @@ -103,17 +103,17 @@ func AddConfigToInstancePod( configmap.ConfigMap.Items = []corev1.KeyToPath{ {Key: CMInstanceKey, Path: CMInstanceKey}, {Key: ConfigHashKey, Path: ConfigHashKey}, + {Key: serverConfigMapKey, Path: serverConfigProjectionPath}, } + // As the cluster transitions from having a repository host to having none, + // PostgreSQL instances that have not rolled out expect to mount client + // certificates. Specify those files are optional so the configuration + // volumes stay valid and Kubernetes propagates their contents to those pods. secret := corev1.VolumeProjection{Secret: &corev1.SecretProjection{}} secret.Secret.Name = naming.PGBackRestSecret(cluster).Name - - configmap.ConfigMap.Items = append( - configmap.ConfigMap.Items, corev1.KeyToPath{ - Key: serverConfigMapKey, - Path: serverConfigProjectionPath, - }) secret.Secret.Items = append(secret.Secret.Items, clientCertificates()...) + secret.Secret.Optional = initialize.Bool(true) // Start with a copy of projections specified in the cluster. Items later in // the list take precedence over earlier items (that is, last write wins). @@ -130,7 +130,7 @@ func AddConfigToInstancePod( addConfigVolumeAndMounts(pod, sources) } -// AddConfigToRepoPod adds and mounts the pgBackRest configuration volume for +// AddConfigToRepoPod adds and mounts the pgBackRest configuration volumes for // the dedicated repository host of cluster to pod. The pgBackRest containers // must already be in pod. func AddConfigToRepoPod( @@ -157,6 +157,33 @@ func AddConfigToRepoPod( addConfigVolumeAndMounts(pod, append(sources, configmap, secret)) } +// AddConfigToCloudBackupJob adds and mounts the pgBackRest configuration volumes +// to the backup job for creating a backup to a cloud repo. +func AddConfigToCloudBackupJob( + cluster *v1beta1.PostgresCluster, podTemplateSpec *corev1.PodTemplateSpec, +) { + configmap := corev1.VolumeProjection{ConfigMap: &corev1.ConfigMapProjection{}} + configmap.ConfigMap.Name = naming.PGBackRestConfig(cluster).Name + configmap.ConfigMap.Items = []corev1.KeyToPath{ + {Key: CMCloudRepoKey, Path: CMCloudRepoKey}, + } + + secret := corev1.VolumeProjection{Secret: &corev1.SecretProjection{}} + secret.Secret.Name = naming.PGBackRestSecret(cluster).Name + secret.Secret.Items = append(secret.Secret.Items, clientCertificates()...) + + // Start with a copy of projections specified in the cluster. Items later in + // the list take precedence over earlier items (that is, last write wins). + // - https://kubernetes.io/docs/concepts/storage/volumes/#projected + sources := append([]corev1.VolumeProjection{}, + cluster.Spec.Backups.PGBackRest.Configuration...) + + addConfigVolumeAndMounts(&podTemplateSpec.Spec, append(sources, configmap, secret)) + + // Add tmp directory for pgbackrest lock files + AddTMPEmptyDir(podTemplateSpec) +} + // AddConfigToRestorePod adds and mounts the pgBackRest configuration volume // for the restore job of cluster to pod. The pgBackRest containers must // already be in pod. @@ -508,38 +535,36 @@ func Secret(ctx context.Context, var err error // Save the CA and generate a TLS client certificate for the entire cluster. - if inRepoHost != nil { - initialize.Map(&outSecret.Data) - - // The server verifies its "tls-server-auth" option contains the common - // name (CN) of the certificate presented by a client. The entire - // cluster uses a single client certificate so the "tls-server-auth" - // option can stay the same when PostgreSQL instances and repository - // hosts are added or removed. - leaf := &pki.LeafCertificate{} - commonName := clientCommonName(inCluster) - dnsNames := []string{commonName} + initialize.Map(&outSecret.Data) - if err == nil { - // Unmarshal and validate the stored leaf. These first errors can - // be ignored because they result in an invalid leaf which is then - // correctly regenerated. - _ = leaf.Certificate.UnmarshalText(inSecret.Data[certClientSecretKey]) - _ = leaf.PrivateKey.UnmarshalText(inSecret.Data[certClientPrivateKeySecretKey]) + // The server verifies its "tls-server-auth" option contains the common + // name (CN) of the certificate presented by a client. The entire + // cluster uses a single client certificate so the "tls-server-auth" + // option can stay the same when PostgreSQL instances and repository + // hosts are added or removed. + leaf := &pki.LeafCertificate{} + commonName := clientCommonName(inCluster) + dnsNames := []string{commonName} - leaf, err = inRoot.RegenerateLeafWhenNecessary(leaf, commonName, dnsNames) - err = errors.WithStack(err) - } + if err == nil { + // Unmarshal and validate the stored leaf. These first errors can + // be ignored because they result in an invalid leaf which is then + // correctly regenerated. + _ = leaf.Certificate.UnmarshalText(inSecret.Data[certClientSecretKey]) + _ = leaf.PrivateKey.UnmarshalText(inSecret.Data[certClientPrivateKeySecretKey]) + + leaf, err = inRoot.RegenerateLeafWhenNecessary(leaf, commonName, dnsNames) + err = errors.WithStack(err) + } - if err == nil { - outSecret.Data[certAuthoritySecretKey], err = certFile(inRoot.Certificate) - } - if err == nil { - outSecret.Data[certClientPrivateKeySecretKey], err = certFile(leaf.PrivateKey) - } - if err == nil { - outSecret.Data[certClientSecretKey], err = certFile(leaf.Certificate) - } + if err == nil { + outSecret.Data[certAuthoritySecretKey], err = certFile(inRoot.Certificate) + } + if err == nil { + outSecret.Data[certClientPrivateKeySecretKey], err = certFile(leaf.PrivateKey) + } + if err == nil { + outSecret.Data[certClientSecretKey], err = certFile(leaf.Certificate) } // Generate a TLS server certificate for each repository host. diff --git a/internal/pgbackrest/reconcile_test.go b/internal/pgbackrest/reconcile_test.go index 0c9aece2b1..fbd146475c 100644 --- a/internal/pgbackrest/reconcile_test.go +++ b/internal/pgbackrest/reconcile_test.go @@ -128,7 +128,7 @@ func TestAddRepoVolumesToPod(t *testing.T) { for _, r := range tc.repos { var foundVolume bool for _, v := range template.Spec.Volumes { - if v.Name == r.Name && v.VolumeSource.PersistentVolumeClaim.ClaimName == + if v.Name == r.Name && v.PersistentVolumeClaim.ClaimName == naming.PGBackRestRepoVolume(postgresCluster, r.Name).Name { foundVolume = true break @@ -244,6 +244,7 @@ func TestAddConfigToInstancePod(t *testing.T) { mode: 384 path: ~postgres-operator/client-tls.key name: hippo-pgbackrest + optional: true `)) }) @@ -279,6 +280,7 @@ func TestAddConfigToInstancePod(t *testing.T) { mode: 384 path: ~postgres-operator/client-tls.key name: hippo-pgbackrest + optional: true `)) }) @@ -319,6 +321,7 @@ func TestAddConfigToInstancePod(t *testing.T) { mode: 384 path: ~postgres-operator/client-tls.key name: hippo-pgbackrest + optional: true `)) }) } @@ -396,6 +399,84 @@ func TestAddConfigToRepoPod(t *testing.T) { }) } +func TestAddConfigToCloudBackupJob(t *testing.T) { + cluster := v1beta1.PostgresCluster{} + cluster.Name = "hippo" + cluster.Default() + + podTemplate := corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "other"}, + {Name: "pgbackrest"}, + }, + }, + } + + alwaysExpect := func(t testing.TB, result *corev1.PodSpec) { + // Only Containers and Volumes fields have changed. + assert.DeepEqual(t, podTemplate.Spec, *result, cmpopts.IgnoreFields(podTemplate.Spec, "Containers", "Volumes")) + + // Only pgBackRest container has config mount, but tmp dir is mounted to all containers + assert.Assert(t, cmp.MarshalMatches(result.Containers, ` +- name: other + resources: {} + volumeMounts: + - mountPath: /tmp + name: tmp +- name: pgbackrest + resources: {} + volumeMounts: + - mountPath: /etc/pgbackrest/conf.d + name: pgbackrest-config + readOnly: true + - mountPath: /tmp + name: tmp + `)) + } + + t.Run("CustomProjections", func(t *testing.T) { + custom := corev1.ConfigMapProjection{} + custom.Name = "custom-configmap" + + cluster := cluster.DeepCopy() + cluster.Spec.Backups.PGBackRest.Configuration = []corev1.VolumeProjection{ + {ConfigMap: &custom}, + } + + out := podTemplate.DeepCopy() + AddConfigToCloudBackupJob(cluster, out) + alwaysExpect(t, &out.Spec) + + // Cloud backup configuration files and client certificates + // after custom projections. + assert.Assert(t, cmp.MarshalMatches(out.Spec.Volumes, ` +- name: pgbackrest-config + projected: + sources: + - configMap: + name: custom-configmap + - configMap: + items: + - key: pgbackrest_cloud.conf + path: pgbackrest_cloud.conf + name: hippo-pgbackrest-config + - secret: + items: + - key: pgbackrest.ca-roots + path: ~postgres-operator/tls-ca.crt + - key: pgbackrest-client.crt + path: ~postgres-operator/client-tls.crt + - key: pgbackrest-client.key + mode: 384 + path: ~postgres-operator/client-tls.key + name: hippo-pgbackrest +- emptyDir: + sizeLimit: 16Mi + name: tmp`)) + }) +} + func TestAddConfigToRestorePod(t *testing.T) { cluster := v1beta1.PostgresCluster{} cluster.Name = "source" @@ -522,7 +603,7 @@ func TestAddConfigToRestorePod(t *testing.T) { custom.Name = "custom-configmap-files" cluster := cluster.DeepCopy() - cluster.Spec.Config = &v1beta1.PostgresConfig{ + cluster.Spec.Config = &v1beta1.PostgresConfigSpec{ Files: []corev1.VolumeProjection{ {ConfigMap: &custom}, }, @@ -1027,10 +1108,13 @@ func TestSecret(t *testing.T) { assert.NilError(t, err) t.Run("NoRepoHost", func(t *testing.T) { - // Nothing happens when there is no repository host. - constant := intent.DeepCopy() + // We always add the pgbackrest server certs assert.NilError(t, Secret(ctx, cluster, nil, root, existing, intent)) - assert.DeepEqual(t, constant, intent) + assert.Assert(t, len(intent.Data["pgbackrest-client.crt"]) > 0) + assert.Assert(t, len(intent.Data["pgbackrest-client.key"]) > 0) + assert.Assert(t, len(intent.Data["pgbackrest.ca-roots"]) > 0) + assert.Assert(t, len(intent.Data["pgbackrest-repo-host.crt"]) == 0) + assert.Assert(t, len(intent.Data["pgbackrest-repo-host.key"]) == 0) }) host := new(appsv1.StatefulSet) diff --git a/internal/pgbackrest/tls-server.md b/internal/pgbackrest/tls-server.md index 7c8f191c35..56af386d5b 100644 --- a/internal/pgbackrest/tls-server.md +++ b/internal/pgbackrest/tls-server.md @@ -12,10 +12,8 @@ on different pods: - [dedicated repository host](https://pgbackrest.org/user-guide.html#repo-host) - [backup from standby](https://pgbackrest.org/user-guide.html#standby-backup) -When a PostgresCluster is configured to store backups on a PVC, the dedicated -repository host is used to make that PVC available to all PostgreSQL instances -in the cluster. Regardless of whether the repo host has a defined PVC, it -functions as the server for the pgBackRest clients that run on the Instances. +When a PostgresCluster is configured to store backups on a PVC, we start a dedicated +repository host to make that PVC available to all PostgreSQL instances in the cluster. The repository host runs a `pgbackrest` server that is secured through TLS and [certificates][]. When performing backups, it connects to `pgbackrest` servers @@ -26,32 +24,30 @@ to the repository host to [send and receive WAL files][archiving]. [archiving]: https://www.postgresql.org/docs/current/continuous-archiving.html [certificates]: certificates.md - The `pgbackrest` command acts as a TLS client and connects to a pgBackRest TLS server when `pg-host-type=tls` and/or `repo-host-type=tls`. The default for these is `ssh`: -- https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/config/parse.auto.c#L3771 -- https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/config/parse.auto.c#L6137 - +- +- The pgBackRest TLS server is configured through the `tls-server-*` [options](config.md). In pgBackRest 2.38, changing any of these options or changing certificate contents requires a reload of the server, as shown in the "Setup TLS Server" section of the documentation, with the command configured as -``` +```text ExecReload=kill -HUP $MAINPID ``` -- https://pgbackrest.org/user-guide-rhel.html#repo-host/setup-tls +- - `tls-server-address`, `tls-server-port`
The network address and port on which to listen. pgBackRest 2.38 listens on the *first* address returned by `getaddrinfo()`. There is no way to listen on all interfaces. - - https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/common/io/socket/server.c#L172 - - https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/common/io/socket/common.c#L87 + - + - - `tls-server-cert-file`, `tls-server-key-file`
The [certificate chain][certificates] and private key pair used to encrypt connections. @@ -65,12 +61,11 @@ ExecReload=kill -HUP $MAINPID to interact with. [Required](https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/config/parse.auto.c#L8751). - In pgBackRest 2.38, as mentioned above, sending SIGHUP causes a configuration reload. -- https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/command/server/server.c#L178 +- -``` +```text P00 DETAIL: configuration reload begin P00 INFO: server command begin 2.38... P00 DETAIL: configuration reload end @@ -78,20 +73,18 @@ P00 DETAIL: configuration reload end Sending SIGINT to the TLS server causes it to exit with code 63, TermError. -- https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/common/exit.c#L73-L75 -- https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/common/exit.c#L62 -- https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/common/error.auto.c#L48 +- +- +- - -``` +```text P00 INFO: server command end: terminated on signal [SIGINT] ``` Sending SIGTERM exits the signal loop and lead to the command termination. -- https://github.com/pgbackrest/pgbackrest/blob/release/2.38/src/command/server/server.c#L194 +- - -``` +```text P00 INFO: server command end: completed successfully ``` diff --git a/internal/pgbackrest/util.go b/internal/pgbackrest/util.go index a3b515ec5d..cd5fd11261 100644 --- a/internal/pgbackrest/util.go +++ b/internal/pgbackrest/util.go @@ -10,16 +10,21 @@ import ( "io" "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/util/rand" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) +// TODO: Provide explanation for this specific size. Should a tmp dir ever be smaller or larger? +var tmpDirSizeLimit = resource.MustParse("16Mi") + // maxPGBackrestRepos is the maximum number of repositories that can be configured according to the // multi-repository solution implemented within pgBackRest const maxPGBackrestRepos = 4 -// RepoHostVolumeDefined determines whether not at least one pgBackRest dedicated +// RepoHostVolumeDefined determines whether or not at least one pgBackRest dedicated // repository host volume has been defined in the PostgresCluster manifest. func RepoHostVolumeDefined(postgresCluster *v1beta1.PostgresCluster) bool { for _, repo := range postgresCluster.Spec.Backups.PGBackRest.Repos { @@ -30,6 +35,17 @@ func RepoHostVolumeDefined(postgresCluster *v1beta1.PostgresCluster) bool { return false } +// CloudRepoDefined determines whether or not at least one pgBackRest cloud-based +// repository has been defined in the PostgresCluster manifest. +func CloudRepoDefined(postgresCluster *v1beta1.PostgresCluster) bool { + for _, repo := range postgresCluster.Spec.Backups.PGBackRest.Repos { + if repo.Volume == nil { + return true + } + } + return false +} + // CalculateConfigHashes calculates hashes for any external pgBackRest repository configuration // present in the PostgresCluster spec (e.g. configuration for Azure, GCR and/or S3 repositories). // Additionally it returns a hash of the hashes for each external repository. @@ -100,3 +116,39 @@ func safeHash32(content func(w io.Writer) error) (string, error) { } return rand.SafeEncodeString(fmt.Sprint(hash.Sum32())), nil } + +// AddTMPEmptyDir adds a "tmp" EmptyDir volume to the provided Pod template, while then also adding a +// volume mount at /tmp for all containers defined within the Pod template +// The '/tmp' directory is currently utilized for the following: +// - As the pgBackRest lock directory (this is the default lock location for pgBackRest) +// - The location where the replication client certificates can be loaded with the proper +// permissions set +// +// This function was copied from the postgrescluster package. +func AddTMPEmptyDir(template *corev1.PodTemplateSpec) { + + template.Spec.Volumes = append(template.Spec.Volumes, corev1.Volume{ + Name: "tmp", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{ + SizeLimit: &tmpDirSizeLimit, + }, + }, + }) + + for i := range template.Spec.Containers { + template.Spec.Containers[i].VolumeMounts = append(template.Spec.Containers[i].VolumeMounts, + corev1.VolumeMount{ + Name: "tmp", + MountPath: "/tmp", + }) + } + + for i := range template.Spec.InitContainers { + template.Spec.InitContainers[i].VolumeMounts = append(template.Spec.InitContainers[i].VolumeMounts, + corev1.VolumeMount{ + Name: "tmp", + MountPath: "/tmp", + }) + } +} diff --git a/internal/pgbackrest/util_test.go b/internal/pgbackrest/util_test.go index e3c98e0dd7..d2fd93455c 100644 --- a/internal/pgbackrest/util_test.go +++ b/internal/pgbackrest/util_test.go @@ -6,7 +6,7 @@ package pgbackrest import ( "io" - "math/rand" + "math/rand/v2" "strconv" "testing" @@ -80,7 +80,7 @@ func TestCalculateConfigHashes(t *testing.T) { assert.Equal(t, preCalculatedRepo3S3Hash, configHashMap["repo3"]) // call CalculateConfigHashes multiple times to ensure consistent results - for i := 0; i < 10; i++ { + for range 10 { hashMap, hash, err := CalculateConfigHashes(postgresCluster) assert.NilError(t, err) assert.Equal(t, configHash, hash) @@ -92,7 +92,7 @@ func TestCalculateConfigHashes(t *testing.T) { // shuffle the repo slice in order to ensure the same result is returned regardless of the // order of the repos slice shuffleCluster := postgresCluster.DeepCopy() - for i := 0; i < 10; i++ { + for range 10 { repos := shuffleCluster.Spec.Backups.PGBackRest.Repos rand.Shuffle(len(repos), func(i, j int) { repos[i], repos[j] = repos[j], repos[i] @@ -103,7 +103,7 @@ func TestCalculateConfigHashes(t *testing.T) { } // now modify some values in each repo and confirm we see a different result - for i := 0; i < 3; i++ { + for i := range 3 { modCluster := postgresCluster.DeepCopy() switch i { case 0: diff --git a/internal/pgbouncer/config.go b/internal/pgbouncer/config.go index 257dc63dbd..99bcac0399 100644 --- a/internal/pgbouncer/config.go +++ b/internal/pgbouncer/config.go @@ -12,7 +12,7 @@ import ( corev1 "k8s.io/api/core/v1" - "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -127,13 +127,13 @@ func clusterINI(ctx context.Context, cluster *v1beta1.PostgresCluster) string { } // If OpenTelemetryLogs feature is enabled, enable logging to file - if feature.Enabled(ctx, feature.OpenTelemetryLogs) { + if collector.OpenTelemetryLogsEnabled(ctx, cluster) { global["logfile"] = naming.PGBouncerLogPath + "/pgbouncer.log" } // When OTel metrics are enabled, allow pgBouncer's postgres user // to run read-only console queries on pgBouncer's virtual db - if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if collector.OpenTelemetryMetricsEnabled(ctx, cluster) { global["stats_users"] = PostgresqlUser } diff --git a/internal/pgbouncer/config_test.go b/internal/pgbouncer/config_test.go index 43c6b77a92..97ba017ef4 100644 --- a/internal/pgbouncer/config_test.go +++ b/internal/pgbouncer/config_test.go @@ -216,7 +216,7 @@ func TestReloadCommand(t *testing.T) { assert.NilError(t, os.WriteFile(file, []byte(command[3]), 0o600)) // Expect shellcheck to be happy. - cmd := exec.Command(shellcheck, "--enable=all", file) + cmd := exec.CommandContext(t.Context(), shellcheck, "--enable=all", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) } diff --git a/internal/pgbouncer/postgres.go b/internal/pgbouncer/postgres.go index 202c6bd9be..2d0b675067 100644 --- a/internal/pgbouncer/postgres.go +++ b/internal/pgbouncer/postgres.go @@ -181,7 +181,7 @@ REVOKE ALL PRIVILEGES // - https://www.postgresql.org/docs/current/perm-functions.html `ALTER ROLE :"username" SET search_path TO :'namespace';`, - // Allow the PgBouncer user to to login. + // Allow the PgBouncer user to login. `ALTER ROLE :"username" LOGIN PASSWORD :'verifier';`, // Commit (finish) the transaction. diff --git a/internal/pgbouncer/reconcile.go b/internal/pgbouncer/reconcile.go index b663596ed7..8eed54a3b6 100644 --- a/internal/pgbouncer/reconcile.go +++ b/internal/pgbouncer/reconcile.go @@ -207,7 +207,7 @@ func Pod( template.Spec.Volumes = []corev1.Volume{configVolume} - if feature.Enabled(ctx, feature.OpenTelemetryLogs) || feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if collector.OpenTelemetryLogsOrMetricsEnabled(ctx, inCluster) { collector.AddToPod(ctx, inCluster.Spec.Instrumentation, inCluster.Spec.ImagePullPolicy, inConfigMap, template, []corev1.VolumeMount{configVolumeMount}, string(inSecret.Data["pgbouncer-password"]), []string{naming.PGBouncerLogPath}, true, true) diff --git a/internal/pgmonitor/exporter.go b/internal/pgmonitor/exporter.go index c8422fcc2c..824674349b 100644 --- a/internal/pgmonitor/exporter.go +++ b/internal/pgmonitor/exporter.go @@ -66,6 +66,12 @@ func GenerateDefaultExporterQueries(ctx context.Context, cluster *v1beta1.Postgr queries += string(queriesContents) + "\n" } + // pgMonitor will not be adding support for postgres_exporter for postgres + // versions past 17. If pg version is greater than 17, return an empty string. + if cluster.Spec.PostgresVersion > 17 { + return "" + } + // Add general queries for specific postgres version queriesGeneral, err := os.ReadFile(fmt.Sprintf("%s/pg%d/queries_general.yml", queriesConfigDir, cluster.Spec.PostgresVersion)) if err != nil { diff --git a/internal/pgmonitor/exporter_test.go b/internal/pgmonitor/exporter_test.go index 486b658dab..f9c8321821 100644 --- a/internal/pgmonitor/exporter_test.go +++ b/internal/pgmonitor/exporter_test.go @@ -38,6 +38,12 @@ func TestGenerateDefaultExporterQueries(t *testing.T) { assert.Assert(t, strings.Contains(queries, "ccp_pg_stat_statements_reset"), "Queries do not contain 'ccp_pg_stat_statements_reset' query when they should.") }) + + t.Run("PG>17", func(t *testing.T) { + cluster.Spec.PostgresVersion = 18 + queries := GenerateDefaultExporterQueries(ctx, cluster) + assert.Equal(t, queries, "") + }) } func TestExporterStartCommand(t *testing.T) { diff --git a/internal/pgmonitor/postgres.go b/internal/pgmonitor/postgres.go index 1d7817c9a3..3ef83cd2e0 100644 --- a/internal/pgmonitor/postgres.go +++ b/internal/pgmonitor/postgres.go @@ -10,7 +10,7 @@ import ( corev1 "k8s.io/api/core/v1" - "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/internal/postgres" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" @@ -24,7 +24,8 @@ const ( // PostgreSQLHBAs provides the Postgres HBA rules for allowing the monitoring // exporter to be accessible func PostgreSQLHBAs(ctx context.Context, inCluster *v1beta1.PostgresCluster, outHBAs *postgres.HBAs) { - if ExporterEnabled(ctx, inCluster) || feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if ExporterEnabled(ctx, inCluster) || + collector.OpenTelemetryMetricsEnabled(ctx, inCluster) { // Limit the monitoring user to local connections using SCRAM. outHBAs.Mandatory = append(outHBAs.Mandatory, postgres.NewHBA().TCP().Users(MonitoringUser).Method("scram-sha-256").Network("127.0.0.0/8"), @@ -34,9 +35,11 @@ func PostgreSQLHBAs(ctx context.Context, inCluster *v1beta1.PostgresCluster, out } // PostgreSQLParameters provides additional required configuration parameters -// that Postgres needs to support monitoring +// that Postgres needs to support monitoring for both pgMonitor and OTel func PostgreSQLParameters(ctx context.Context, inCluster *v1beta1.PostgresCluster, outParameters *postgres.Parameters) { - if ExporterEnabled(ctx, inCluster) || feature.Enabled(ctx, feature.OpenTelemetryMetrics) { + if ExporterEnabled(ctx, inCluster) || + collector.OpenTelemetryMetricsEnabled(ctx, inCluster) { + // Exporter expects that shared_preload_libraries are installed // pg_stat_statements: https://access.crunchydata.com/documentation/pgmonitor/latest/exporter/ // pgnodemx: https://github.com/CrunchyData/pgnodemx diff --git a/internal/pgmonitor/util.go b/internal/pgmonitor/util.go index 32cf222448..76a8a6adae 100644 --- a/internal/pgmonitor/util.go +++ b/internal/pgmonitor/util.go @@ -8,7 +8,7 @@ import ( "context" "os" - "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/collector" "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -28,6 +28,11 @@ func GetQueriesConfigDir(ctx context.Context) string { // ExporterEnabled returns true if the monitoring exporter is enabled func ExporterEnabled(ctx context.Context, cluster *v1beta1.PostgresCluster) bool { + // If OpenTelemetry metrics are enabled for this cluster, that takes precedence + // over the postgres_exporter metrics. + if collector.OpenTelemetryMetricsEnabled(ctx, cluster) { + return false + } if cluster.Spec.Monitoring == nil { return false } @@ -37,8 +42,5 @@ func ExporterEnabled(ctx context.Context, cluster *v1beta1.PostgresCluster) bool if cluster.Spec.Monitoring.PGMonitor.Exporter == nil { return false } - if feature.Enabled(ctx, feature.OpenTelemetryMetrics) { - return false - } return true } diff --git a/internal/pgmonitor/util_test.go b/internal/pgmonitor/util_test.go index e83bbb3730..e862e87a67 100644 --- a/internal/pgmonitor/util_test.go +++ b/internal/pgmonitor/util_test.go @@ -11,6 +11,7 @@ import ( "gotest.tools/v3/assert" "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -28,11 +29,18 @@ func TestExporterEnabled(t *testing.T) { cluster.Spec.Monitoring.PGMonitor.Exporter = &v1beta1.ExporterSpec{} assert.Assert(t, ExporterEnabled(ctx, cluster)) + // Enabling the OpenTelemetryMetrics is not sufficient to disable the exporter gate := feature.NewGate() assert.NilError(t, gate.SetFromMap(map[string]bool{ feature.OpenTelemetryMetrics: true, })) ctx = feature.NewContext(ctx, gate) - cluster.Spec.Monitoring.PGMonitor.Exporter = &v1beta1.ExporterSpec{} + assert.Assert(t, ExporterEnabled(ctx, cluster)) + + require.UnmarshalInto(t, &cluster.Spec, `{ + instrumentation: { + logs: { retentionPeriod: 5h }, + }, + }`) assert.Assert(t, !ExporterEnabled(ctx, cluster)) } diff --git a/internal/pki/encoding_test.go b/internal/pki/encoding_test.go index 2c63099ca4..eb2b1365b3 100644 --- a/internal/pki/encoding_test.go +++ b/internal/pki/encoding_test.go @@ -81,7 +81,7 @@ func TestCertificateTextMarshaling(t *testing.T) { assert.NilError(t, os.WriteFile(certFile, certBytes, 0o600)) // The "openssl x509" command parses X.509 certificates. - cmd := exec.Command(openssl, "x509", + cmd := exec.CommandContext(t.Context(), openssl, "x509", "-in", certFile, "-inform", "PEM", "-noout", "-text") output, err := cmd.CombinedOutput() @@ -153,7 +153,7 @@ func TestPrivateKeyTextMarshaling(t *testing.T) { assert.NilError(t, os.WriteFile(keyFile, keyBytes, 0o600)) // The "openssl pkey" command processes public and private keys. - cmd := exec.Command(openssl, "pkey", + cmd := exec.CommandContext(t.Context(), openssl, "pkey", "-in", keyFile, "-inform", "PEM", "-noout", "-text") output, err := cmd.CombinedOutput() @@ -164,12 +164,12 @@ func TestPrivateKeyTextMarshaling(t *testing.T) { "expected valid private key, got:\n%s", output) t.Run("Check", func(t *testing.T) { - output, _ := exec.Command(openssl, "pkey", "-help").CombinedOutput() + output, _ := exec.CommandContext(t.Context(), openssl, "pkey", "-help").CombinedOutput() if !strings.Contains(string(output), "-check") { t.Skip(`requires "-check" flag`) } - cmd := exec.Command(openssl, "pkey", + cmd := exec.CommandContext(t.Context(), openssl, "pkey", "-check", "-in", keyFile, "-inform", "PEM", "-noout", "-text") output, err := cmd.CombinedOutput() diff --git a/internal/pki/pki_test.go b/internal/pki/pki_test.go index 000f1a5042..fa8f290475 100644 --- a/internal/pki/pki_test.go +++ b/internal/pki/pki_test.go @@ -194,7 +194,7 @@ func TestRootIsInvalid(t *testing.T) { t.Cleanup(func() { currentTime = original }) currentTime = func() time.Time { - return time.Date(2010, time.January, 1, 0, 0, 0, 0, time.Local) + return time.Date(2010, time.January, 1, 0, 0, 0, 0, time.UTC) } root, err := NewRootCertificateAuthority() @@ -395,7 +395,7 @@ func TestLeafIsInvalid(t *testing.T) { t.Cleanup(func() { currentTime = original }) currentTime = func() time.Time { - return time.Date(2010, time.January, 1, 0, 0, 0, 0, time.Local) + return time.Date(2010, time.January, 1, 0, 0, 0, 0, time.UTC) } leaf, err := root.GenerateLeafCertificate("", nil) @@ -439,7 +439,7 @@ func basicOpenSSLVerify(t *testing.T, openssl string, root, leaf Certificate) { verify := func(t testing.TB, args ...string) { t.Helper() // #nosec G204 -- args from this test - cmd := exec.Command(openssl, append([]string{"verify"}, args...)...) + cmd := exec.CommandContext(t.Context(), openssl, append([]string{"verify"}, args...)...) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) @@ -476,7 +476,7 @@ func basicOpenSSLVerify(t *testing.T, openssl string, root, leaf Certificate) { } func strictOpenSSLVerify(t *testing.T, openssl string, root, leaf Certificate) { - output, _ := exec.Command(openssl, "verify", "-help").CombinedOutput() + output, _ := exec.CommandContext(t.Context(), openssl, "verify", "-help").CombinedOutput() if !strings.Contains(string(output), "-x509_strict") { t.Skip(`requires "-x509_strict" flag`) } @@ -487,7 +487,7 @@ func strictOpenSSLVerify(t *testing.T, openssl string, root, leaf Certificate) { verify := func(t testing.TB, args ...string) { t.Helper() // #nosec G204 -- args from this test - cmd := exec.Command(openssl, append([]string{"verify", + cmd := exec.CommandContext(t.Context(), openssl, append([]string{"verify", // Do not use the default trusted CAs. "-no-CAfile", "-no-CApath", // Disable "non-compliant workarounds for broken certificates". diff --git a/internal/postgres/config.go b/internal/postgres/config.go index b3102b74dc..9270472163 100644 --- a/internal/postgres/config.go +++ b/internal/postgres/config.go @@ -58,6 +58,9 @@ safelink() ( // dataMountPath is where to mount the main data volume. tablespaceMountPath = "/tablespaces" + // tmpMountPath is where to mount the optional ephemeral volume. + tmpMountPath = "/pgtmp" + // walMountPath is where to mount the optional WAL volume. walMountPath = "/pgwal" @@ -372,11 +375,11 @@ chmod +x /tmp/pg_rewind_tde.sh `halt "$(permissions "${postgres_data_directory}" ||:)"`, // Create log directories. - `(` + shell.MakeDirectories(0o775, dataMountPath, naming.PGBackRestPGDataLogPath) + `) ||`, + `(` + shell.MakeDirectories(dataMountPath, naming.PGBackRestPGDataLogPath) + `) ||`, `halt "$(permissions ` + naming.PGBackRestPGDataLogPath + ` ||:)"`, - `(` + shell.MakeDirectories(0o775, dataMountPath, naming.PatroniPGDataLogPath) + `) ||`, + `(` + shell.MakeDirectories(dataMountPath, naming.PatroniPGDataLogPath) + `) ||`, `halt "$(permissions ` + naming.PatroniPGDataLogPath + ` ||:)"`, - `(` + shell.MakeDirectories(0o775, dataMountPath, LogDirectory()) + `) ||`, + `(` + shell.MakeDirectories(dataMountPath, LogDirectory()) + `) ||`, `halt "$(permissions ` + LogDirectory() + ` ||:)"`, // Copy replication client certificate files diff --git a/internal/postgres/config_test.go b/internal/postgres/config_test.go index 1a7378a50c..59aca4b21a 100644 --- a/internal/postgres/config_test.go +++ b/internal/postgres/config_test.go @@ -52,7 +52,7 @@ func TestWALDirectory(t *testing.T) { func TestBashHalt(t *testing.T) { t.Run("NoPipeline", func(t *testing.T) { - cmd := exec.Command("bash") + cmd := exec.CommandContext(t.Context(), "bash") cmd.Args = append(cmd.Args, "-c", "--", bashHalt+`; halt ab cd e`) var exit *exec.ExitError @@ -64,7 +64,7 @@ func TestBashHalt(t *testing.T) { }) t.Run("PipelineZeroStatus", func(t *testing.T) { - cmd := exec.Command("bash") + cmd := exec.CommandContext(t.Context(), "bash") cmd.Args = append(cmd.Args, "-c", "--", bashHalt+`; true && halt message`) var exit *exec.ExitError @@ -76,7 +76,7 @@ func TestBashHalt(t *testing.T) { }) t.Run("PipelineNonZeroStatus", func(t *testing.T) { - cmd := exec.Command("bash") + cmd := exec.CommandContext(t.Context(), "bash") cmd.Args = append(cmd.Args, "-c", "--", bashHalt+`; (exit 99) || halt $'multi\nline'`) var exit *exec.ExitError @@ -88,7 +88,7 @@ func TestBashHalt(t *testing.T) { }) t.Run("Subshell", func(t *testing.T) { - cmd := exec.Command("bash") + cmd := exec.CommandContext(t.Context(), "bash") cmd.Args = append(cmd.Args, "-c", "--", bashHalt+`; (halt 'err') || echo 'after'`) stderr := new(bytes.Buffer) @@ -104,7 +104,7 @@ func TestBashHalt(t *testing.T) { func TestBashPermissions(t *testing.T) { // macOS `stat` takes different arguments than BusyBox and GNU coreutils. - if output, err := exec.Command("stat", "--help").CombinedOutput(); err != nil { + if output, err := exec.CommandContext(t.Context(), "stat", "--help").CombinedOutput(); err != nil { t.Skip(`requires "stat" executable`) } else if !strings.Contains(string(output), "%A") { t.Skip(`requires "stat" with access format sequence`) @@ -116,7 +116,7 @@ func TestBashPermissions(t *testing.T) { assert.NilError(t, os.WriteFile(filepath.Join(dir, "sub", "fn"), nil, 0o624)) // #nosec G306 OK permissions for a temp dir in a test assert.NilError(t, os.Chmod(filepath.Join(dir, "sub", "fn"), 0o624)) - cmd := exec.Command("bash") + cmd := exec.CommandContext(t.Context(), "bash") cmd.Args = append(cmd.Args, "-c", "--", bashPermissions+`; permissions "$@"`, "-", filepath.Join(dir, "sub", "fn")) @@ -131,7 +131,7 @@ func TestBashPermissions(t *testing.T) { func TestBashRecreateDirectory(t *testing.T) { // macOS `stat` takes different arguments than BusyBox and GNU coreutils. - if output, err := exec.Command("stat", "--help").CombinedOutput(); err != nil { + if output, err := exec.CommandContext(t.Context(), "stat", "--help").CombinedOutput(); err != nil { t.Skip(`requires "stat" executable`) } else if !strings.Contains(string(output), "%a") { t.Skip(`requires "stat" with access format sequence`) @@ -143,7 +143,7 @@ func TestBashRecreateDirectory(t *testing.T) { assert.NilError(t, os.WriteFile(filepath.Join(dir, "d", "file"), nil, 0o644)) // #nosec G306 OK permissions for a temp dir in a test stat := func(args ...string) string { - cmd := exec.Command("stat", "-c", "%i %#a %N") + cmd := exec.CommandContext(t.Context(), "stat", "-c", "%i %#a %N") cmd.Args = append(cmd.Args, args...) out, err := cmd.CombinedOutput() @@ -160,7 +160,7 @@ func TestBashRecreateDirectory(t *testing.T) { filepath.Join(dir, "d", "file"), ) - cmd := exec.Command("bash") + cmd := exec.CommandContext(t.Context(), "bash") cmd.Args = append(cmd.Args, "-ceu", "--", bashRecreateDirectory+` recreate "$@"`, "-", filepath.Join(dir, "d"), "0740") @@ -199,7 +199,7 @@ func TestBashRecreateDirectory(t *testing.T) { func TestBashSafeLink(t *testing.T) { // macOS `mv` takes different arguments than GNU coreutils. - if output, err := exec.Command("mv", "--help").CombinedOutput(); err != nil { + if output, err := exec.CommandContext(t.Context(), "mv", "--help").CombinedOutput(); err != nil { t.Skip(`requires "mv" executable`) } else if !strings.Contains(string(output), "no-target-directory") { t.Skip(`requires "mv" that overwrites a directory symlink`) @@ -207,7 +207,7 @@ func TestBashSafeLink(t *testing.T) { // execute calls the bash function with args. execute := func(args ...string) (string, error) { - cmd := exec.Command("bash") + cmd := exec.CommandContext(t.Context(), "bash") cmd.Args = append(cmd.Args, "-ceu", "--", bashSafeLink+`safelink "$@"`, "-") cmd.Args = append(cmd.Args, args...) output, err := cmd.CombinedOutput() @@ -474,7 +474,7 @@ func TestStartupCommand(t *testing.T) { assert.NilError(t, os.WriteFile(file, []byte(script), 0o600)) // Expect shellcheck to be happy. - cmd := exec.Command(shellcheck, "--enable=all", file) + cmd := exec.CommandContext(ctx, shellcheck, "--enable=all", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) diff --git a/internal/postgres/exec_test.go b/internal/postgres/exec_test.go index b8f5693bef..3ec94717d5 100644 --- a/internal/postgres/exec_test.go +++ b/internal/postgres/exec_test.go @@ -184,7 +184,7 @@ done <<< "${databases}" assert.NilError(t, os.WriteFile(file, []byte(script), 0o600)) // Expect shellcheck to be happy. - cmd := exec.Command(shellcheck, "--enable=all", file) + cmd := exec.CommandContext(t.Context(), shellcheck, "--enable=all", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) diff --git a/internal/postgres/password/md5.go b/internal/postgres/password/md5.go index c99b2c0e30..55cc43f5cb 100644 --- a/internal/postgres/password/md5.go +++ b/internal/postgres/password/md5.go @@ -5,7 +5,6 @@ package password import ( - // #nosec G501 "crypto/md5" "errors" diff --git a/internal/postgres/password/scram.go b/internal/postgres/password/scram.go index bbf8dbcbe6..90eb2a54ad 100644 --- a/internal/postgres/password/scram.go +++ b/internal/postgres/password/scram.go @@ -138,7 +138,7 @@ func (s *SCRAMPassword) isASCII() bool { // iterate through each character of the plaintext password and determine if // it is ASCII. if it is not ASCII, exit early // per research, this loop is optimized to be fast for searching - for i := 0; i < len(s.password); i++ { + for i := range len(s.password) { if s.password[i] > unicode.MaxASCII { return false } diff --git a/internal/postgres/reconcile.go b/internal/postgres/reconcile.go index fda5229792..81c6cc31fa 100644 --- a/internal/postgres/reconcile.go +++ b/internal/postgres/reconcile.go @@ -8,7 +8,6 @@ import ( "context" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" "github.com/crunchydata/postgres-operator/internal/config" "github.com/crunchydata/postgres-operator/internal/feature" @@ -17,11 +16,6 @@ import ( "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) -var ( - oneMillicore = resource.MustParse("1m") - oneMebibyte = resource.MustParse("1Mi") -) - // DataVolumeMount returns the name and mount path of the PostgreSQL data volume. func DataVolumeMount() corev1.VolumeMount { return corev1.VolumeMount{Name: "postgres-data", MountPath: dataMountPath} @@ -32,6 +26,11 @@ func TablespaceVolumeMount(tablespaceName string) corev1.VolumeMount { return corev1.VolumeMount{Name: "tablespace-" + tablespaceName, MountPath: tablespaceMountPath + "/" + tablespaceName} } +// TempVolumeMount returns the name and mount path of the ephemeral volume. +func TempVolumeMount() corev1.VolumeMount { + return corev1.VolumeMount{Name: "postgres-temp", MountPath: tmpMountPath} +} + // WALVolumeMount returns the name and mount path of the PostgreSQL WAL volume. func WALVolumeMount() corev1.VolumeMount { return corev1.VolumeMount{Name: "postgres-wal", MountPath: walMountPath} @@ -63,7 +62,7 @@ func InstancePod(ctx context.Context, inClusterCertificates, inClientCertificates *corev1.SecretProjection, inDataVolume, inWALVolume *corev1.PersistentVolumeClaim, inTablespaceVolumes []*corev1.PersistentVolumeClaim, - outInstancePod *corev1.PodSpec, + outInstancePod *corev1.PodTemplateSpec, ) { certVolumeMount := corev1.VolumeMount{ Name: naming.CertVolume, @@ -111,28 +110,24 @@ func InstancePod(ctx context.Context, ResourceFieldRef: &corev1.ResourceFieldSelector{ ContainerName: naming.ContainerDatabase, Resource: "limits.cpu", - Divisor: oneMillicore, }, }, { Path: "cpu_request", ResourceFieldRef: &corev1.ResourceFieldSelector{ ContainerName: naming.ContainerDatabase, Resource: "requests.cpu", - Divisor: oneMillicore, }, }, { Path: "mem_limit", ResourceFieldRef: &corev1.ResourceFieldSelector{ ContainerName: naming.ContainerDatabase, Resource: "limits.memory", - Divisor: oneMebibyte, }, }, { Path: "mem_request", ResourceFieldRef: &corev1.ResourceFieldSelector{ ContainerName: naming.ContainerDatabase, Resource: "requests.memory", - Divisor: oneMebibyte, }, }, { Path: "labels", @@ -207,7 +202,7 @@ func InstancePod(ctx context.Context, VolumeMounts: []corev1.VolumeMount{certVolumeMount, dataVolumeMount}, } - outInstancePod.Volumes = []corev1.Volume{ + outInstancePod.Spec.Volumes = []corev1.Volume{ certVolume, dataVolume, downwardAPIVolume, @@ -227,7 +222,7 @@ func InstancePod(ctx context.Context, }, }, } - outInstancePod.Volumes = append(outInstancePod.Volumes, tablespaceVolume) + outInstancePod.Spec.Volumes = append(outInstancePod.Spec.Volumes, tablespaceVolume) container.VolumeMounts = append(container.VolumeMounts, tablespaceVolumeMount) startup.VolumeMounts = append(startup.VolumeMounts, tablespaceVolumeMount) } @@ -239,7 +234,7 @@ func InstancePod(ctx context.Context, Sources: append([]corev1.VolumeProjection{}, inCluster.Spec.Config.Files...), } container.VolumeMounts = append(container.VolumeMounts, additionalConfigVolumeMount) - outInstancePod.Volumes = append(outInstancePod.Volumes, additionalConfigVolume) + outInstancePod.Spec.Volumes = append(outInstancePod.Spec.Volumes, additionalConfigVolume) } // Mount the WAL PVC whenever it exists. The startup command will move WAL @@ -258,19 +253,37 @@ func InstancePod(ctx context.Context, container.VolumeMounts = append(container.VolumeMounts, walVolumeMount) startup.VolumeMounts = append(startup.VolumeMounts, walVolumeMount) - outInstancePod.Volumes = append(outInstancePod.Volumes, walVolume) + outInstancePod.Spec.Volumes = append(outInstancePod.Spec.Volumes, walVolume) + } + + // Mount an ephemeral volume, if specified. + if inInstanceSpec.Volumes != nil && inInstanceSpec.Volumes.Temp != nil { + tmpVolumeMount := TempVolumeMount() + tmpVolume := corev1.Volume{Name: tmpVolumeMount.Name} + tmpVolume.Ephemeral = &corev1.EphemeralVolumeSource{ + VolumeClaimTemplate: &corev1.PersistentVolumeClaimTemplate{ + Spec: inInstanceSpec.Volumes.Temp.AsPersistentVolumeClaimSpec(), + }, + } + + // Create the PVC with the same labels and annotations as the pod. + tmpVolume.Ephemeral.VolumeClaimTemplate.Annotations = outInstancePod.Annotations + tmpVolume.Ephemeral.VolumeClaimTemplate.Labels = outInstancePod.Labels + + container.VolumeMounts = append(container.VolumeMounts, tmpVolumeMount) + outInstancePod.Spec.Volumes = append(outInstancePod.Spec.Volumes, tmpVolume) } - outInstancePod.Containers = []corev1.Container{container, reloader} + outInstancePod.Spec.Containers = []corev1.Container{container, reloader} // If the InstanceSidecars feature gate is enabled and instance sidecars are // defined, add the defined container to the Pod. if feature.Enabled(ctx, feature.InstanceSidecars) && inInstanceSpec.Containers != nil { - outInstancePod.Containers = append(outInstancePod.Containers, inInstanceSpec.Containers...) + outInstancePod.Spec.Containers = append(outInstancePod.Spec.Containers, inInstanceSpec.Containers...) } - outInstancePod.InitContainers = []corev1.Container{startup} + outInstancePod.Spec.InitContainers = []corev1.Container{startup} } // PodSecurityContext returns a v1.PodSecurityContext for cluster that can write diff --git a/internal/postgres/reconcile_test.go b/internal/postgres/reconcile_test.go index a36e3c5368..aebc5d3121 100644 --- a/internal/postgres/reconcile_test.go +++ b/internal/postgres/reconcile_test.go @@ -115,11 +115,11 @@ func TestInstancePod(t *testing.T) { } // without WAL volume nor WAL volume spec - pod := new(corev1.PodSpec) + pod := new(corev1.PodTemplateSpec) InstancePod(ctx, cluster, instance, serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, pod) - assert.Assert(t, cmp.MarshalMatches(pod, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec, ` containers: - env: - name: PGDATA @@ -268,11 +268,11 @@ initContainers: recreate "${postgres_data_directory}" '0700' else (halt Permissions!); fi || halt "$(permissions "${postgres_data_directory}" ||:)" - (mkdir -p '/pgdata/pgbackrest/log' && chmod 0775 '/pgdata/pgbackrest/log' '/pgdata/pgbackrest') || + (mkdir -p '/pgdata/pgbackrest/log' && { chmod 0775 '/pgdata/pgbackrest/log' '/pgdata/pgbackrest' || :; }) || halt "$(permissions /pgdata/pgbackrest/log ||:)" - (mkdir -p '/pgdata/patroni/log' && chmod 0775 '/pgdata/patroni/log' '/pgdata/patroni') || + (mkdir -p '/pgdata/patroni/log' && { chmod 0775 '/pgdata/patroni/log' '/pgdata/patroni' || :; }) || halt "$(permissions /pgdata/patroni/log ||:)" - (mkdir -p '/pgdata/logs/postgres' && chmod 0775 '/pgdata/logs/postgres' '/pgdata/logs') || + (mkdir -p '/pgdata/logs/postgres' && { chmod 0775 '/pgdata/logs/postgres' '/pgdata/logs' || :; }) || halt "$(permissions /pgdata/logs/postgres ||:)" install -D --mode=0600 -t "/tmp/replication" "/pgconf/tls/replication"/{tls.crt,tls.key,ca.crt} @@ -352,22 +352,22 @@ volumes: - path: cpu_limit resourceFieldRef: containerName: database - divisor: 1m + divisor: "0" resource: limits.cpu - path: cpu_request resourceFieldRef: containerName: database - divisor: 1m + divisor: "0" resource: requests.cpu - path: mem_limit resourceFieldRef: containerName: database - divisor: 1Mi + divisor: "0" resource: limits.memory - path: mem_request resourceFieldRef: containerName: database - divisor: 1Mi + divisor: "0" resource: requests.memory - fieldRef: apiVersion: v1 @@ -384,15 +384,15 @@ volumes: walVolume := new(corev1.PersistentVolumeClaim) walVolume.Name = "walvol" - pod := new(corev1.PodSpec) + pod := new(corev1.PodTemplateSpec) InstancePod(ctx, cluster, instance, serverSecretProjection, clientSecretProjection, dataVolume, walVolume, nil, pod) - assert.Assert(t, len(pod.Containers) > 0) - assert.Assert(t, len(pod.InitContainers) > 0) + assert.Assert(t, len(pod.Spec.Containers) > 0) + assert.Assert(t, len(pod.Spec.InitContainers) > 0) // Container has all mountPaths, including downwardAPI - assert.Assert(t, cmp.MarshalMatches(pod.Containers[0].VolumeMounts, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec.Containers[0].VolumeMounts, ` - mountPath: /pgconf/tls name: cert-volume readOnly: true @@ -402,19 +402,19 @@ volumes: name: database-containerinfo readOnly: true - mountPath: /pgwal - name: postgres-wal`), "expected WAL and downwardAPI mounts in %q container", pod.Containers[0].Name) + name: postgres-wal`), "expected WAL and downwardAPI mounts in %q container", pod.Spec.Containers[0].Name) // InitContainer has all mountPaths, except downwardAPI - assert.Assert(t, cmp.MarshalMatches(pod.InitContainers[0].VolumeMounts, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec.InitContainers[0].VolumeMounts, ` - mountPath: /pgconf/tls name: cert-volume readOnly: true - mountPath: /pgdata name: postgres-data - mountPath: /pgwal - name: postgres-wal`), "expected WAL mount, no downwardAPI mount in %q container", pod.InitContainers[0].Name) + name: postgres-wal`), "expected WAL mount, no downwardAPI mount in %q container", pod.Spec.InitContainers[0].Name) - assert.Assert(t, cmp.MarshalMatches(pod.Volumes, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec.Volumes, ` - name: cert-volume projected: defaultMode: 384 @@ -443,22 +443,22 @@ volumes: - path: cpu_limit resourceFieldRef: containerName: database - divisor: 1m + divisor: "0" resource: limits.cpu - path: cpu_request resourceFieldRef: containerName: database - divisor: 1m + divisor: "0" resource: requests.cpu - path: mem_limit resourceFieldRef: containerName: database - divisor: 1Mi + divisor: "0" resource: limits.memory - path: mem_request resourceFieldRef: containerName: database - divisor: 1Mi + divisor: "0" resource: requests.memory - fieldRef: apiVersion: v1 @@ -475,7 +475,7 @@ volumes: `), "expected WAL volume") // Startup moves WAL files to data volume. - assert.DeepEqual(t, pod.InitContainers[0].Command[4:], + assert.DeepEqual(t, pod.Spec.InitContainers[0].Command[4:], []string{"startup", "11", "/pgdata/pg11_wal"}) }) @@ -485,16 +485,16 @@ volumes: files: [{ secret: { name: keytab } }], }`) - pod := new(corev1.PodSpec) + pod := new(corev1.PodTemplateSpec) InstancePod(ctx, clusterWithConfig, instance, serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, pod) - assert.Assert(t, len(pod.Containers) > 0) - assert.Assert(t, len(pod.InitContainers) > 0) + assert.Assert(t, len(pod.Spec.Containers) > 0) + assert.Assert(t, len(pod.Spec.InitContainers) > 0) // Container has all mountPaths, including downwardAPI, // and the postgres-config - assert.Assert(t, cmp.MarshalMatches(pod.Containers[0].VolumeMounts, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec.Containers[0].VolumeMounts, ` - mountPath: /pgconf/tls name: cert-volume readOnly: true @@ -505,15 +505,15 @@ volumes: readOnly: true - mountPath: /etc/postgres name: postgres-config - readOnly: true`), "expected WAL and downwardAPI mounts in %q container", pod.Containers[0].Name) + readOnly: true`), "expected WAL and downwardAPI mounts in %q container", pod.Spec.Containers[0].Name) // InitContainer has all mountPaths, except downwardAPI and additionalConfig - assert.Assert(t, cmp.MarshalMatches(pod.InitContainers[0].VolumeMounts, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec.InitContainers[0].VolumeMounts, ` - mountPath: /pgconf/tls name: cert-volume readOnly: true - mountPath: /pgdata - name: postgres-data`), "expected WAL mount, no downwardAPI mount in %q container", pod.InitContainers[0].Name) + name: postgres-data`), "expected WAL mount, no downwardAPI mount in %q container", pod.Spec.InitContainers[0].Name) }) t.Run("WithCustomSidecarContainer", func(t *testing.T) { @@ -526,7 +526,7 @@ volumes: InstancePod(ctx, cluster, sidecarInstance, serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, pod) - assert.Equal(t, len(pod.Containers), 2, "expected 2 containers in Pod, got %d", len(pod.Containers)) + assert.Equal(t, len(pod.Spec.Containers), 2, "expected 2 containers in Pod") }) t.Run("SidecarEnabled", func(t *testing.T) { @@ -539,11 +539,11 @@ volumes: InstancePod(ctx, cluster, sidecarInstance, serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, pod) - assert.Equal(t, len(pod.Containers), 3, "expected 3 containers in Pod, got %d", len(pod.Containers)) + assert.Equal(t, len(pod.Spec.Containers), 3, "expected 3 containers in Pod") var found bool - for i := range pod.Containers { - if pod.Containers[i].Name == "customsidecar1" { + for i := range pod.Spec.Containers { + if pod.Spec.Containers[i].Name == "customsidecar1" { found = true break } @@ -576,7 +576,7 @@ volumes: InstancePod(ctx, cluster, instance, serverSecretProjection, clientSecretProjection, dataVolume, nil, tablespaceVolumes, pod) - assert.Assert(t, cmp.MarshalMatches(pod.Containers[0].VolumeMounts, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec.Containers[0].VolumeMounts, ` - mountPath: /pgconf/tls name: cert-volume readOnly: true @@ -588,10 +588,10 @@ volumes: - mountPath: /tablespaces/castle name: tablespace-castle - mountPath: /tablespaces/trial - name: tablespace-trial`), "expected tablespace mount(s) in %q container", pod.Containers[0].Name) + name: tablespace-trial`), "expected tablespace mount(s) in %q container", pod.Spec.Containers[0].Name) // InitContainer has all mountPaths, except downwardAPI and additionalConfig - assert.Assert(t, cmp.MarshalMatches(pod.InitContainers[0].VolumeMounts, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec.InitContainers[0].VolumeMounts, ` - mountPath: /pgconf/tls name: cert-volume readOnly: true @@ -600,7 +600,7 @@ volumes: - mountPath: /tablespaces/castle name: tablespace-castle - mountPath: /tablespaces/trial - name: tablespace-trial`), "expected tablespace mount(s) in %q container", pod.InitContainers[0].Name) + name: tablespace-trial`), "expected tablespace mount(s) in %q container", pod.Spec.InitContainers[0].Name) }) t.Run("WithWALVolumeWithWALVolumeSpec", func(t *testing.T) { @@ -610,14 +610,14 @@ volumes: instance := new(v1beta1.PostgresInstanceSetSpec) instance.WALVolumeClaimSpec = new(v1beta1.VolumeClaimSpec) - pod := new(corev1.PodSpec) + pod := new(corev1.PodTemplateSpec) InstancePod(ctx, cluster, instance, serverSecretProjection, clientSecretProjection, dataVolume, walVolume, nil, pod) - assert.Assert(t, len(pod.Containers) > 0) - assert.Assert(t, len(pod.InitContainers) > 0) + assert.Assert(t, len(pod.Spec.Containers) > 0) + assert.Assert(t, len(pod.Spec.InitContainers) > 0) - assert.Assert(t, cmp.MarshalMatches(pod.Containers[0].VolumeMounts, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec.Containers[0].VolumeMounts, ` - mountPath: /pgconf/tls name: cert-volume readOnly: true @@ -627,18 +627,18 @@ volumes: name: database-containerinfo readOnly: true - mountPath: /pgwal - name: postgres-wal`), "expected WAL and downwardAPI mounts in %q container", pod.Containers[0].Name) + name: postgres-wal`), "expected WAL and downwardAPI mounts in %q container", pod.Spec.Containers[0].Name) - assert.Assert(t, cmp.MarshalMatches(pod.InitContainers[0].VolumeMounts, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec.InitContainers[0].VolumeMounts, ` - mountPath: /pgconf/tls name: cert-volume readOnly: true - mountPath: /pgdata name: postgres-data - mountPath: /pgwal - name: postgres-wal`), "expected WAL mount, no downwardAPI mount in %q container", pod.InitContainers[0].Name) + name: postgres-wal`), "expected WAL mount, no downwardAPI mount in %q container", pod.Spec.InitContainers[0].Name) - assert.Assert(t, cmp.MarshalMatches(pod.Volumes, ` + assert.Assert(t, cmp.MarshalMatches(pod.Spec.Volumes, ` - name: cert-volume projected: defaultMode: 384 @@ -667,22 +667,22 @@ volumes: - path: cpu_limit resourceFieldRef: containerName: database - divisor: 1m + divisor: "0" resource: limits.cpu - path: cpu_request resourceFieldRef: containerName: database - divisor: 1m + divisor: "0" resource: requests.cpu - path: mem_limit resourceFieldRef: containerName: database - divisor: 1Mi + divisor: "0" resource: limits.memory - path: mem_request resourceFieldRef: containerName: database - divisor: 1Mi + divisor: "0" resource: requests.memory - fieldRef: apiVersion: v1 @@ -699,9 +699,71 @@ volumes: `), "expected WAL volume") // Startup moves WAL files to WAL volume. - assert.DeepEqual(t, pod.InitContainers[0].Command[4:], + assert.DeepEqual(t, pod.Spec.InitContainers[0].Command[4:], []string{"startup", "11", "/pgwal/pg11_wal"}) }) + + t.Run("TempVolume", func(t *testing.T) { + instance := new(v1beta1.PostgresInstanceSetSpec) + require.UnmarshalInto(t, &instance, `{ + volumes: { temp: { + resources: { requests: { storage: 99Mi } }, + storageClassName: somesuch, + } }, + }`) + + pod := new(corev1.PodTemplateSpec) + InstancePod(ctx, cluster, instance, + serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, pod) + + assert.Assert(t, len(pod.Spec.Containers) > 0) + assert.Assert(t, cmp.MarshalContains(pod.Spec.Containers[0].VolumeMounts, ` +- mountPath: /pgtmp + name: postgres-temp +`), "expected temp mount in %q container", pod.Spec.Containers[0].Name) + + // NOTE: `creationTimestamp: null` appears in the resulting pod, + // but it does not affect the PVC or reconciliation events; + // possibly https://pr.k8s.io/100032 + assert.Assert(t, cmp.MarshalContains(pod.Spec.Volumes, ` +- ephemeral: + volumeClaimTemplate: + metadata: + creationTimestamp: null + spec: + resources: + requests: + storage: 99Mi + storageClassName: somesuch + name: postgres-temp +`), "expected definition in the pod") + + t.Run("Metadata", func(t *testing.T) { + annotated := pod.DeepCopy() + annotated.Annotations = map[string]string{"n1": "etc"} + annotated.Labels = map[string]string{"gg": "asdf"} + + InstancePod(ctx, cluster, instance, + serverSecretProjection, clientSecretProjection, dataVolume, nil, nil, annotated) + + assert.Assert(t, cmp.MarshalContains(annotated.Spec.Volumes, ` +- ephemeral: + volumeClaimTemplate: + metadata: + annotations: + n1: etc + creationTimestamp: null + labels: + gg: asdf + spec: + resources: + requests: + storage: 99Mi + storageClassName: somesuch + name: postgres-temp +`), "expected definition in the pod") + }) + }) } func TestPodSecurityContext(t *testing.T) { diff --git a/internal/registration/interface.go b/internal/registration/interface.go deleted file mode 100644 index c0d4e390ad..0000000000 --- a/internal/registration/interface.go +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2023 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package registration - -import ( - "fmt" - "os" - - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/tools/record" - "sigs.k8s.io/controller-runtime/pkg/client" - - "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" -) - -type Registration interface { - // Required returns true when registration is required but the token is missing or invalid. - Required(record.EventRecorder, client.Object, *[]metav1.Condition) bool -} - -var URL = os.Getenv("REGISTRATION_URL") - -func SetAdvanceWarning(recorder record.EventRecorder, object client.Object, conditions *[]metav1.Condition) { - recorder.Eventf(object, corev1.EventTypeWarning, "Register Soon", - "Crunchy Postgres for Kubernetes requires registration for upgrades."+ - " Register now to be ready for your next upgrade. See %s for details.", URL) - - meta.SetStatusCondition(conditions, metav1.Condition{ - Type: v1beta1.Registered, - Status: metav1.ConditionFalse, - Reason: "TokenRequired", - Message: fmt.Sprintf( - "Crunchy Postgres for Kubernetes requires registration for upgrades."+ - " Register now to be ready for your next upgrade. See %s for details.", URL), - ObservedGeneration: object.GetGeneration(), - }) -} - -func SetRequiredWarning(recorder record.EventRecorder, object client.Object, conditions *[]metav1.Condition) { - recorder.Eventf(object, corev1.EventTypeWarning, "Registration Required", - "Crunchy Postgres for Kubernetes requires registration for upgrades."+ - " Register now to be ready for your next upgrade. See %s for details.", URL) - - meta.SetStatusCondition(conditions, metav1.Condition{ - Type: v1beta1.Registered, - Status: metav1.ConditionFalse, - Reason: "TokenRequired", - Message: fmt.Sprintf( - "Crunchy Postgres for Kubernetes requires registration for upgrades."+ - " Upgrade suspended. See %s for details.", URL), - ObservedGeneration: object.GetGeneration(), - }) -} - -func emitFailedWarning(recorder record.EventRecorder, object client.Object) { - recorder.Eventf(object, corev1.EventTypeWarning, "Token Authentication Failed", - "See %s for details.", URL) -} - -func emitVerifiedEvent(recorder record.EventRecorder, object client.Object) { - recorder.Event(object, corev1.EventTypeNormal, "Token Verified", - "Thank you for registering your installation of Crunchy Postgres for Kubernetes.") -} diff --git a/internal/registration/runner.go b/internal/registration/runner.go deleted file mode 100644 index b50ceeb4ed..0000000000 --- a/internal/registration/runner.go +++ /dev/null @@ -1,187 +0,0 @@ -// Copyright 2023 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package registration - -import ( - "context" - "crypto/rsa" - "errors" - "os" - "strings" - "sync" - "time" - - "github.com/golang-jwt/jwt/v5" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/tools/record" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/manager" - - "github.com/crunchydata/postgres-operator/internal/logging" - "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" -) - -// Runner implements [Registration] by loading and validating the token at a -// fixed path. Its methods are safe to call concurrently. -type Runner struct { - changed func() - enabled bool - publicKey *rsa.PublicKey - refresh time.Duration - tokenPath string - - token struct { - sync.RWMutex - Exists bool `json:"-"` - - jwt.RegisteredClaims - Iteration int `json:"itr"` - } -} - -// Runner implements [Registration] and [manager.Runnable]. -var _ Registration = (*Runner)(nil) -var _ manager.Runnable = (*Runner)(nil) - -// NewRunner creates a [Runner] that periodically checks the validity of the -// token at tokenPath. It calls changed when the validity of the token changes. -func NewRunner(publicKey, tokenPath string, changed func()) (*Runner, error) { - runner := &Runner{ - changed: changed, - refresh: time.Minute, - tokenPath: tokenPath, - } - - var err error - switch { - case publicKey != "" && tokenPath != "": - if !strings.HasPrefix(strings.TrimSpace(publicKey), "-") { - publicKey = "-----BEGIN -----\n" + publicKey + "\n-----END -----" - } - - runner.enabled = true - runner.publicKey, err = jwt.ParseRSAPublicKeyFromPEM([]byte(publicKey)) - - case publicKey == "" && tokenPath != "": - err = errors.New("registration: missing public key") - - case publicKey != "" && tokenPath == "": - err = errors.New("registration: missing token path") - } - - return runner, err -} - -// CheckToken loads and verifies the configured token, returning an error when -// the file exists but cannot be verified, and -// returning the token if it can be verified. -// NOTE(upgradecheck): return the token/nil so that we can use the token -// in upgradecheck; currently a refresh of the token will cause a restart of the pod -// meaning that the token used in upgradecheck is always the current token. -// But if the restart behavior changes, we might drop the token return in main.go -// and change upgradecheck to retrieve the token itself -func (r *Runner) CheckToken() (*jwt.Token, error) { - data, errFile := os.ReadFile(r.tokenPath) - key := func(*jwt.Token) (any, error) { return r.publicKey, nil } - - // Assume [jwt] and [os] functions could do something unexpected; use defer - // to safely write to the token. - r.token.Lock() - defer r.token.Unlock() - - token, errToken := jwt.ParseWithClaims(string(data), &r.token, key, - jwt.WithExpirationRequired(), - jwt.WithValidMethods([]string{"RS256"}), - ) - - // The error from [os.ReadFile] indicates whether a token file exists. - r.token.Exists = !os.IsNotExist(errFile) - - // Reset most claims if there is any problem loading, parsing, validating, or - // verifying the token file. - if errFile != nil || errToken != nil { - r.token.RegisteredClaims = jwt.RegisteredClaims{} - } - - switch { - case !r.enabled || !r.token.Exists: - return nil, nil - case errFile != nil: - return nil, errFile - default: - return token, errToken - } -} - -func (r *Runner) state() (failed, required bool) { - // Assume [time] functions could do something unexpected; use defer to safely - // read the token. - r.token.RLock() - defer r.token.RUnlock() - - failed = r.token.Exists && r.token.ExpiresAt == nil - required = r.enabled && - (!r.token.Exists || failed || r.token.ExpiresAt.Before(time.Now())) - return -} - -// Required returns true when registration is required but the token is missing or invalid. -func (r *Runner) Required( - recorder record.EventRecorder, object client.Object, conditions *[]metav1.Condition, -) bool { - failed, required := r.state() - - if r.enabled && failed { - emitFailedWarning(recorder, object) - } - - if !required && conditions != nil { - before := len(*conditions) - meta.RemoveStatusCondition(conditions, v1beta1.Registered) - meta.RemoveStatusCondition(conditions, "RegistrationRequired") - meta.RemoveStatusCondition(conditions, "TokenRequired") - found := len(*conditions) != before - - if r.enabled && found { - emitVerifiedEvent(recorder, object) - } - } - - return required -} - -// NeedLeaderElection returns true so that r runs only on the single -// [manager.Manager] that is elected leader in the Kubernetes namespace. -func (r *Runner) NeedLeaderElection() bool { return true } - -// Start watches for a mounted registration token when enabled. It blocks -// until ctx is cancelled. -func (r *Runner) Start(ctx context.Context) error { - var ticks <-chan time.Time - - if r.enabled { - ticker := time.NewTicker(r.refresh) - defer ticker.Stop() - ticks = ticker.C - } - - log := logging.FromContext(ctx).WithValues("controller", "registration") - - for { - select { - case <-ticks: - _, before := r.state() - if _, err := r.CheckToken(); err != nil { - log.Error(err, "Unable to validate token") - } - if _, after := r.state(); before != after && r.changed != nil { - r.changed() - } - case <-ctx.Done(): - return ctx.Err() - } - } -} diff --git a/internal/registration/runner_test.go b/internal/registration/runner_test.go deleted file mode 100644 index c70c07c6b9..0000000000 --- a/internal/registration/runner_test.go +++ /dev/null @@ -1,574 +0,0 @@ -// Copyright 2023 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package registration - -import ( - "context" - "crypto/rand" - "crypto/rsa" - "crypto/x509" - "encoding/pem" - "os" - "path/filepath" - "strings" - "testing" - "time" - - "github.com/golang-jwt/jwt/v5" - "gotest.tools/v3/assert" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes/scheme" - "sigs.k8s.io/controller-runtime/pkg/manager" - - "github.com/crunchydata/postgres-operator/internal/testing/events" -) - -func TestNewRunner(t *testing.T) { - t.Parallel() - - key, err := rsa.GenerateKey(rand.Reader, 2048) - assert.NilError(t, err) - - der, err := x509.MarshalPKIXPublicKey(&key.PublicKey) - assert.NilError(t, err) - - public := pem.EncodeToMemory(&pem.Block{Bytes: der}) - assert.Assert(t, len(public) != 0) - - t.Run("Disabled", func(t *testing.T) { - runner, err := NewRunner("", "", nil) - assert.NilError(t, err) - assert.Assert(t, runner != nil) - assert.Assert(t, !runner.enabled) - }) - - t.Run("ConfiguredCorrectly", func(t *testing.T) { - runner, err := NewRunner(string(public), "any", nil) - assert.NilError(t, err) - assert.Assert(t, runner != nil) - assert.Assert(t, runner.enabled) - - t.Run("ExtraLines", func(t *testing.T) { - input := "\n\n" + strings.ReplaceAll(string(public), "\n", "\n\n") + "\n\n" - - runner, err := NewRunner(input, "any", nil) - assert.NilError(t, err) - assert.Assert(t, runner != nil) - assert.Assert(t, runner.enabled) - }) - - t.Run("WithoutPEMBoundaries", func(t *testing.T) { - lines := strings.Split(strings.TrimSpace(string(public)), "\n") - lines = lines[1 : len(lines)-1] - - for _, input := range []string{ - strings.Join(lines, ""), // single line - strings.Join(lines, "\n"), // multi-line - "\n\n" + strings.Join(lines, "\n\n") + "\n\n", // extra lines - } { - runner, err := NewRunner(input, "any", nil) - assert.NilError(t, err) - assert.Assert(t, runner != nil) - assert.Assert(t, runner.enabled) - } - }) - }) - - t.Run("ConfiguredIncorrectly", func(t *testing.T) { - for _, tt := range []struct { - key, path, msg string - }{ - {msg: "public key", key: "", path: "any"}, - {msg: "token path", key: "bad", path: ""}, - {msg: "invalid key", key: "bad", path: "any"}, - {msg: "token path", key: string(public), path: ""}, - } { - _, err := NewRunner(tt.key, tt.path, nil) - assert.ErrorContains(t, err, tt.msg, "(key=%q, path=%q)", tt.key, tt.path) - } - }) -} - -func TestRunnerCheckToken(t *testing.T) { - t.Parallel() - - dir := t.TempDir() - key, err := rsa.GenerateKey(rand.Reader, 2048) - assert.NilError(t, err) - - t.Run("SafeToCallDisabled", func(t *testing.T) { - r := Runner{enabled: false} - _, err := r.CheckToken() - assert.NilError(t, err) - }) - - t.Run("FileMissing", func(t *testing.T) { - r := Runner{enabled: true, tokenPath: filepath.Join(dir, "nope")} - _, err := r.CheckToken() - assert.NilError(t, err) - }) - - t.Run("FileUnreadable", func(t *testing.T) { - r := Runner{enabled: true, tokenPath: filepath.Join(dir, "nope")} - assert.NilError(t, os.WriteFile(r.tokenPath, nil, 0o200)) // Writeable - - _, err := r.CheckToken() - assert.ErrorContains(t, err, "permission") - assert.Assert(t, r.token.ExpiresAt == nil) - }) - - t.Run("FileEmpty", func(t *testing.T) { - r := Runner{enabled: true, tokenPath: filepath.Join(dir, "empty")} - assert.NilError(t, os.WriteFile(r.tokenPath, nil, 0o400)) // Readable - - _, err := r.CheckToken() - assert.ErrorContains(t, err, "malformed") - assert.Assert(t, r.token.ExpiresAt == nil) - }) - - t.Run("WrongAlgorithm", func(t *testing.T) { - r := Runner{ - enabled: true, - publicKey: &key.PublicKey, - tokenPath: filepath.Join(dir, "hs256"), - } - - // Maliciously treating an RSA public key as an HMAC secret. - // - https://auth0.com/blog/critical-vulnerabilities-in-json-web-token-libraries/ - public, err := x509.MarshalPKIXPublicKey(r.publicKey) - assert.NilError(t, err) - data, err := jwt.New(jwt.SigningMethodHS256).SignedString(public) - assert.NilError(t, err) - assert.NilError(t, os.WriteFile(r.tokenPath, []byte(data), 0o400)) // Readable - - _, err = r.CheckToken() - assert.Assert(t, err != nil, "HMAC algorithm should be rejected") - assert.Assert(t, r.token.ExpiresAt == nil) - }) - - t.Run("MissingExpiration", func(t *testing.T) { - r := Runner{ - enabled: true, - publicKey: &key.PublicKey, - tokenPath: filepath.Join(dir, "no-claims"), - } - - data, err := jwt.New(jwt.SigningMethodRS256).SignedString(key) - assert.NilError(t, err) - assert.NilError(t, os.WriteFile(r.tokenPath, []byte(data), 0o400)) // Readable - - _, err = r.CheckToken() - assert.ErrorContains(t, err, "exp claim is required") - assert.Assert(t, r.token.ExpiresAt == nil) - }) - - t.Run("ExpiredToken", func(t *testing.T) { - r := Runner{ - enabled: true, - publicKey: &key.PublicKey, - tokenPath: filepath.Join(dir, "expired"), - } - - data, err := jwt.NewWithClaims(jwt.SigningMethodRS256, jwt.MapClaims{ - "exp": jwt.NewNumericDate(time.Date(2020, 1, 1, 1, 1, 1, 1, time.UTC)), - }).SignedString(key) - assert.NilError(t, err) - assert.NilError(t, os.WriteFile(r.tokenPath, []byte(data), 0o400)) // Readable - - _, err = r.CheckToken() - assert.ErrorContains(t, err, "is expired") - assert.Assert(t, r.token.ExpiresAt == nil) - }) - - t.Run("ValidToken", func(t *testing.T) { - r := Runner{ - enabled: true, - publicKey: &key.PublicKey, - tokenPath: filepath.Join(dir, "valid"), - } - - expiration := jwt.NewNumericDate(time.Now().Add(time.Hour)) - data, err := jwt.NewWithClaims(jwt.SigningMethodRS256, jwt.MapClaims{ - "exp": expiration, - }).SignedString(key) - assert.NilError(t, err) - assert.NilError(t, os.WriteFile(r.tokenPath, []byte(data), 0o400)) // Readable - - token, err := r.CheckToken() - assert.NilError(t, err) - assert.Assert(t, r.token.ExpiresAt != nil) - assert.Assert(t, token.Valid) - exp, err := token.Claims.GetExpirationTime() - assert.NilError(t, err) - assert.Equal(t, exp.Time, expiration.Time) - }) -} - -func TestRunnerLeaderElectionRunnable(t *testing.T) { - var runner manager.LeaderElectionRunnable = &Runner{} - - assert.Assert(t, runner.NeedLeaderElection()) -} - -func TestRunnerRequiredConditions(t *testing.T) { - t.Parallel() - - t.Run("RegistrationDisabled", func(t *testing.T) { - r := Runner{enabled: false} - - for _, tt := range []struct { - before, after []metav1.Condition - }{ - { - before: []metav1.Condition{}, - after: []metav1.Condition{}, - }, - { - before: []metav1.Condition{{Type: "ExistingOther"}}, - after: []metav1.Condition{{Type: "ExistingOther"}}, - }, - { - before: []metav1.Condition{{Type: "Registered"}, {Type: "ExistingOther"}}, - after: []metav1.Condition{{Type: "ExistingOther"}}, - }, - { - before: []metav1.Condition{ - {Type: "Registered"}, - {Type: "ExistingOther"}, - {Type: "RegistrationRequired"}, - }, - after: []metav1.Condition{{Type: "ExistingOther"}}, - }, - { - before: []metav1.Condition{{Type: "TokenRequired"}}, - after: []metav1.Condition{}, - }, - } { - for _, exists := range []bool{false, true} { - for _, expires := range []time.Time{ - time.Now().Add(time.Hour), - time.Now().Add(-time.Hour), - } { - r.token.Exists = exists - r.token.ExpiresAt = jwt.NewNumericDate(expires) - - conditions := append([]metav1.Condition{}, tt.before...) - discard := new(events.Recorder) - object := &corev1.ConfigMap{} - - result := r.Required(discard, object, &conditions) - - assert.Equal(t, result, false, "expected registration not required") - assert.DeepEqual(t, conditions, tt.after) - } - } - } - }) - - t.Run("RegistrationRequired", func(t *testing.T) { - r := Runner{enabled: true} - - for _, tt := range []struct { - exists bool - expires time.Time - before []metav1.Condition - }{ - { - exists: false, expires: time.Now().Add(time.Hour), - before: []metav1.Condition{{Type: "Registered"}, {Type: "ExistingOther"}}, - }, - { - exists: false, expires: time.Now().Add(-time.Hour), - before: []metav1.Condition{{Type: "Registered"}, {Type: "ExistingOther"}}, - }, - { - exists: true, expires: time.Now().Add(-time.Hour), - before: []metav1.Condition{{Type: "Registered"}, {Type: "ExistingOther"}}, - }, - } { - r.token.Exists = tt.exists - r.token.ExpiresAt = jwt.NewNumericDate(tt.expires) - - conditions := append([]metav1.Condition{}, tt.before...) - discard := new(events.Recorder) - object := &corev1.ConfigMap{} - - result := r.Required(discard, object, &conditions) - - assert.Equal(t, result, true, "expected registration required") - assert.DeepEqual(t, conditions, tt.before) - } - }) - - t.Run("Registered", func(t *testing.T) { - r := Runner{} - r.token.Exists = true - r.token.ExpiresAt = jwt.NewNumericDate(time.Now().Add(time.Hour)) - - for _, tt := range []struct { - before, after []metav1.Condition - }{ - { - before: []metav1.Condition{}, - after: []metav1.Condition{}, - }, - { - before: []metav1.Condition{{Type: "ExistingOther"}}, - after: []metav1.Condition{{Type: "ExistingOther"}}, - }, - { - before: []metav1.Condition{{Type: "Registered"}, {Type: "ExistingOther"}}, - after: []metav1.Condition{{Type: "ExistingOther"}}, - }, - { - before: []metav1.Condition{ - {Type: "Registered"}, - {Type: "ExistingOther"}, - {Type: "RegistrationRequired"}, - }, - after: []metav1.Condition{{Type: "ExistingOther"}}, - }, - { - before: []metav1.Condition{{Type: "TokenRequired"}}, - after: []metav1.Condition{}, - }, - } { - for _, enabled := range []bool{false, true} { - r.enabled = enabled - - conditions := append([]metav1.Condition{}, tt.before...) - discard := new(events.Recorder) - object := &corev1.ConfigMap{} - - result := r.Required(discard, object, &conditions) - - assert.Equal(t, result, false, "expected registration not required") - assert.DeepEqual(t, conditions, tt.after) - } - } - }) -} - -func TestRunnerRequiredEvents(t *testing.T) { - t.Parallel() - - t.Run("RegistrationDisabled", func(t *testing.T) { - r := Runner{enabled: false} - - for _, tt := range []struct { - before []metav1.Condition - }{ - { - before: []metav1.Condition{}, - }, - { - before: []metav1.Condition{{Type: "ExistingOther"}}, - }, - { - before: []metav1.Condition{{Type: "Registered"}, {Type: "ExistingOther"}}, - }, - } { - for _, exists := range []bool{false, true} { - for _, expires := range []time.Time{ - time.Now().Add(time.Hour), - time.Now().Add(-time.Hour), - } { - r.token.Exists = exists - r.token.ExpiresAt = jwt.NewNumericDate(expires) - - conditions := append([]metav1.Condition{}, tt.before...) - object := &corev1.ConfigMap{} - recorder := events.NewRecorder(t, scheme.Scheme) - - result := r.Required(recorder, object, &conditions) - - assert.Equal(t, result, false, "expected registration not required") - assert.Equal(t, len(recorder.Events), 0, "expected no events") - } - } - } - }) - - t.Run("RegistrationRequired", func(t *testing.T) { - r := Runner{enabled: true} - - t.Run("MissingToken", func(t *testing.T) { - r.token.Exists = false - - for _, tt := range []struct { - before []metav1.Condition - }{ - { - before: []metav1.Condition{}, - }, - { - before: []metav1.Condition{{Type: "ExistingOther"}}, - }, - { - before: []metav1.Condition{{Type: "Registered"}, {Type: "ExistingOther"}}, - }, - } { - conditions := append([]metav1.Condition{}, tt.before...) - object := &corev1.ConfigMap{} - recorder := events.NewRecorder(t, scheme.Scheme) - - result := r.Required(recorder, object, &conditions) - - assert.Equal(t, result, true, "expected registration required") - assert.Equal(t, len(recorder.Events), 0, "expected no events") - } - }) - - t.Run("InvalidToken", func(t *testing.T) { - r.token.Exists = true - r.token.ExpiresAt = nil - - for _, tt := range []struct { - before []metav1.Condition - }{ - { - before: []metav1.Condition{}, - }, - { - before: []metav1.Condition{{Type: "ExistingOther"}}, - }, - { - before: []metav1.Condition{{Type: "Registered"}, {Type: "ExistingOther"}}, - }, - } { - conditions := append([]metav1.Condition{}, tt.before...) - object := &corev1.ConfigMap{} - recorder := events.NewRecorder(t, scheme.Scheme) - - result := r.Required(recorder, object, &conditions) - - assert.Equal(t, result, true, "expected registration required") - assert.Equal(t, len(recorder.Events), 1, "expected one event") - assert.Equal(t, recorder.Events[0].Type, "Warning") - assert.Equal(t, recorder.Events[0].Reason, "Token Authentication Failed") - } - }) - }) - - t.Run("Registered", func(t *testing.T) { - r := Runner{} - r.token.Exists = true - r.token.ExpiresAt = jwt.NewNumericDate(time.Now().Add(time.Hour)) - - t.Run("AlwaysRegistered", func(t *testing.T) { - // No prior registration conditions - for _, tt := range []struct { - before []metav1.Condition - }{ - { - before: []metav1.Condition{}, - }, - { - before: []metav1.Condition{{Type: "ExistingOther"}}, - }, - } { - for _, enabled := range []bool{false, true} { - r.enabled = enabled - - conditions := append([]metav1.Condition{}, tt.before...) - object := &corev1.ConfigMap{} - recorder := events.NewRecorder(t, scheme.Scheme) - - result := r.Required(recorder, object, &conditions) - - assert.Equal(t, result, false, "expected registration not required") - assert.Equal(t, len(recorder.Events), 0, "expected no events") - } - } - }) - - t.Run("PreviouslyUnregistered", func(t *testing.T) { - r.enabled = true - - // One or more prior registration conditions - for _, tt := range []struct { - before []metav1.Condition - }{ - { - before: []metav1.Condition{{Type: "Registered"}, {Type: "ExistingOther"}}, - }, - { - before: []metav1.Condition{ - {Type: "Registered"}, - {Type: "ExistingOther"}, - {Type: "RegistrationRequired"}, - }, - }, - { - before: []metav1.Condition{{Type: "TokenRequired"}}, - }, - } { - conditions := append([]metav1.Condition{}, tt.before...) - object := &corev1.ConfigMap{} - recorder := events.NewRecorder(t, scheme.Scheme) - - result := r.Required(recorder, object, &conditions) - - assert.Equal(t, result, false, "expected registration not required") - assert.Equal(t, len(recorder.Events), 1, "expected one event") - assert.Equal(t, recorder.Events[0].Type, "Normal") - assert.Equal(t, recorder.Events[0].Reason, "Token Verified") - } - }) - }) -} - -func TestRunnerStart(t *testing.T) { - t.Parallel() - - dir := t.TempDir() - key, err := rsa.GenerateKey(rand.Reader, 2048) - assert.NilError(t, err) - - token, err := jwt.NewWithClaims(jwt.SigningMethodRS256, jwt.MapClaims{ - "exp": jwt.NewNumericDate(time.Now().Add(time.Hour)), - }).SignedString(key) - assert.NilError(t, err) - - t.Run("DisabledDoesNothing", func(t *testing.T) { - runner := &Runner{ - enabled: false, - refresh: time.Nanosecond, - } - - ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) - defer cancel() - - assert.ErrorIs(t, runner.Start(ctx), context.DeadlineExceeded, - "expected it to block until context is canceled") - }) - - t.Run("WithCallback", func(t *testing.T) { - called := false - runner := &Runner{ - changed: func() { called = true }, - enabled: true, - publicKey: &key.PublicKey, - refresh: time.Second, - tokenPath: filepath.Join(dir, "token"), - } - - // Begin with an invalid token. - assert.NilError(t, os.WriteFile(runner.tokenPath, nil, 0o600)) - _, err = runner.CheckToken() - assert.Assert(t, err != nil) - - // Replace it with a valid token. - assert.NilError(t, os.WriteFile(runner.tokenPath, []byte(token), 0o600)) - - // Run with a timeout that exceeds the refresh interval. - ctx, cancel := context.WithTimeout(context.Background(), runner.refresh*3/2) - defer cancel() - - assert.ErrorIs(t, runner.Start(ctx), context.DeadlineExceeded) - assert.Assert(t, called, "expected a call back") - }) -} diff --git a/internal/registration/testing.go b/internal/registration/testing.go deleted file mode 100644 index 7ea0032b31..0000000000 --- a/internal/registration/testing.go +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2023 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package registration - -import ( - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/tools/record" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -// NOTE: This type can go away following https://go.dev/issue/47487. - -type RegistrationFunc func(record.EventRecorder, client.Object, *[]metav1.Condition) bool - -func (fn RegistrationFunc) Required(rec record.EventRecorder, obj client.Object, conds *[]metav1.Condition) bool { - return fn(rec, obj, conds) -} - -var _ Registration = RegistrationFunc(nil) diff --git a/internal/shell/paths.go b/internal/shell/paths.go index 3455ff8fe4..94c997f7b4 100644 --- a/internal/shell/paths.go +++ b/internal/shell/paths.go @@ -14,16 +14,33 @@ import ( "strings" ) +// CleanFileName returns the suffix of path after its last slash U+002F. +// This is similar to "basename" except this returns empty string when: +// - The final character of path is slash U+002F, or +// - The result would be "." or ".." +// +// See: +// - https://pubs.opengroup.org/onlinepubs/9799919799/utilities/basename.html +func CleanFileName(path string) string { + if i := strings.LastIndexByte(path, '/'); i >= 0 { + path = path[i+1:] + } + if path != "." && path != ".." { + return path + } + return "" +} + // MakeDirectories returns a list of POSIX shell commands that ensure each path // exists. It creates every directory leading to path from (but not including) -// base and sets their permissions to exactly perms, regardless of umask. +// base and sets their permissions for Kubernetes, regardless of umask. // // See: // - https://pubs.opengroup.org/onlinepubs/9799919799/utilities/chmod.html // - https://pubs.opengroup.org/onlinepubs/9799919799/utilities/mkdir.html // - https://pubs.opengroup.org/onlinepubs/9799919799/utilities/test.html // - https://pubs.opengroup.org/onlinepubs/9799919799/utilities/umask.html -func MakeDirectories(perms fs.FileMode, base string, paths ...string) string { +func MakeDirectories(base string, paths ...string) string { // Without any paths, return a command that succeeds when the base path // exists. if len(paths) == 0 { @@ -44,14 +61,22 @@ func MakeDirectories(perms fs.FileMode, base string, paths ...string) string { } } + const perms fs.FileMode = 0 | + // S_IRWXU: enable owner read, write, and execute permissions. + 0o0700 | + // S_IRWXG: enable group read, write, and execute permissions. + 0o0070 | + // S_IXOTH, S_IROTH: enable other read and execute permissions. + 0o0001 | 0o0004 + return `` + // Create all the paths and any missing parents. `mkdir -p ` + strings.Join(QuoteWords(paths...), " ") + - // Set the permissions of every path and each parent. - // NOTE: FileMode bits other than file permissions are ignored. - fmt.Sprintf(` && chmod %#o %s`, - perms&fs.ModePerm, - strings.Join(QuoteWords(allPaths...), " "), + // Try to set the permissions of every path and each parent. + // This swallows the exit status of `chmod` because not all filesystems + // tolerate the operation; CIFS and NFS are notable examples. + fmt.Sprintf(` && { chmod %#o %s || :; }`, + perms, strings.Join(QuoteWords(allPaths...), " "), ) } diff --git a/internal/shell/paths_test.go b/internal/shell/paths_test.go index 273f672b79..e723e40064 100644 --- a/internal/shell/paths_test.go +++ b/internal/shell/paths_test.go @@ -17,25 +17,55 @@ import ( "github.com/crunchydata/postgres-operator/internal/testing/require" ) +func TestCleanFileName(t *testing.T) { + t.Parallel() + + t.Run("Empty", func(t *testing.T) { + assert.Equal(t, CleanFileName(""), "") + }) + + t.Run("Dots", func(t *testing.T) { + assert.Equal(t, CleanFileName("."), "") + assert.Equal(t, CleanFileName(".."), "") + assert.Equal(t, CleanFileName("..."), "...") + assert.Equal(t, CleanFileName("././/.././../."), "") + assert.Equal(t, CleanFileName("././/.././../.."), "") + assert.Equal(t, CleanFileName("././/.././../../x.j"), "x.j") + }) + + t.Run("Directories", func(t *testing.T) { + assert.Equal(t, CleanFileName("/"), "") + assert.Equal(t, CleanFileName("//"), "") + assert.Equal(t, CleanFileName("asdf/"), "") + assert.Equal(t, CleanFileName("asdf//12.3"), "12.3") + assert.Equal(t, CleanFileName("//////"), "") + assert.Equal(t, CleanFileName("//////gg"), "gg") + }) + + t.Run("NoSeparators", func(t *testing.T) { + assert.Equal(t, CleanFileName("asdf12.3.ssgg"), "asdf12.3.ssgg") + }) +} + func TestMakeDirectories(t *testing.T) { t.Parallel() t.Run("NoPaths", func(t *testing.T) { assert.Equal(t, - MakeDirectories(0o755, "/asdf/jklm"), + MakeDirectories("/asdf/jklm"), `test -d '/asdf/jklm'`) }) t.Run("Children", func(t *testing.T) { assert.DeepEqual(t, - MakeDirectories(0o775, "/asdf", "/asdf/jklm", "/asdf/qwerty"), - `mkdir -p '/asdf/jklm' '/asdf/qwerty' && chmod 0775 '/asdf/jklm' '/asdf/qwerty'`) + MakeDirectories("/asdf", "/asdf/jklm", "/asdf/qwerty"), + `mkdir -p '/asdf/jklm' '/asdf/qwerty' && { chmod 0775 '/asdf/jklm' '/asdf/qwerty' || :; }`) }) t.Run("Grandchild", func(t *testing.T) { - script := MakeDirectories(0o775, "/asdf", "/asdf/qwerty/boots") + script := MakeDirectories("/asdf", "/asdf/qwerty/boots") assert.DeepEqual(t, script, - `mkdir -p '/asdf/qwerty/boots' && chmod 0775 '/asdf/qwerty/boots' '/asdf/qwerty'`) + `mkdir -p '/asdf/qwerty/boots' && { chmod 0775 '/asdf/qwerty/boots' '/asdf/qwerty' || :; }`) t.Run("ShellCheckPOSIX", func(t *testing.T) { shellcheck := require.ShellCheck(t) @@ -46,14 +76,14 @@ func TestMakeDirectories(t *testing.T) { // Expect ShellCheck for "sh" to be happy. // - https://www.shellcheck.net/wiki/SC2148 - cmd := exec.Command(shellcheck, "--enable=all", "--shell=sh", file) + cmd := exec.CommandContext(t.Context(), shellcheck, "--enable=all", "--shell=sh", file) output, err := cmd.CombinedOutput() assert.NilError(t, err, "%q\n%s", cmd.Args, output) }) }) t.Run("Long", func(t *testing.T) { - script := MakeDirectories(0o700, "/", strings.Repeat("/asdf", 20)) + script := MakeDirectories("/", strings.Repeat("/asdf", 20)) t.Run("PrettyYAML", func(t *testing.T) { b, err := yaml.Marshal(script) diff --git a/internal/testing/events/recorder.go b/internal/testing/events/recorder.go index e76ef21eb3..dad5dccf83 100644 --- a/internal/testing/events/recorder.go +++ b/internal/testing/events/recorder.go @@ -89,7 +89,7 @@ func (*Recorder) AnnotatedEventf(object runtime.Object, annotations map[string]s } func (r *Recorder) Event(object runtime.Object, eventtype, reason, message string) { if r.eventf != nil { - r.eventf(object, nil, eventtype, reason, "", message) + r.eventf(object, nil, eventtype, reason, "", "%v", message) } } func (r *Recorder) Eventf(object runtime.Object, eventtype, reason, messageFmt string, args ...any) { diff --git a/internal/testing/require/encoding.go b/internal/testing/require/encoding.go index a99f7a42f1..8016c1921a 100644 --- a/internal/testing/require/encoding.go +++ b/internal/testing/require/encoding.go @@ -9,6 +9,7 @@ import ( "testing" "gotest.tools/v3/assert" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "sigs.k8s.io/json" "sigs.k8s.io/yaml" ) @@ -37,3 +38,24 @@ func UnmarshalInto[Data ~string | ~[]byte, Destination *T, T any]( assert.NilError(t, err) assert.NilError(t, errors.Join(strict...)) } + +// UnmarshalIntoField parses input as YAML (or JSON) the same way as the Kubernetes API Server. +// The result goes into a (nested) field of output. It calls t.Fatal when something fails. +func UnmarshalIntoField[Data ~string | ~[]byte]( + t testing.TB, output *unstructured.Unstructured, input Data, fields ...string, +) { + t.Helper() + + if len(fields) == 0 { + t.Fatal("BUG: called without a destination") + } + + if output.Object == nil { + output.Object = map[string]any{} + } + + var value any + UnmarshalInto(t, &value, []byte(input)) + + assert.NilError(t, unstructured.SetNestedField(output.Object, value, fields...)) +} diff --git a/internal/testing/require/errors.go b/internal/testing/require/errors.go index 128a0397b0..039f8e2879 100644 --- a/internal/testing/require/errors.go +++ b/internal/testing/require/errors.go @@ -16,14 +16,25 @@ import ( // StatusError returns the [metav1.Status] within err's tree. // It calls t.Fatal when err is nil or there is no status. func StatusError(t testing.TB, err error) metav1.Status { - status, ok := err.(apierrors.APIStatus) + t.Helper() + status, ok := err.(apierrors.APIStatus) assert.Assert(t, ok || errors.As(err, &status), "%T does not implement %T", err, status) return status.Status() } +// StatusErrorDetails returns the details of [metav1.Status] within err's tree. +// It calls t.Fatal when err is nil, there is no status, or its Details field is nil. +func StatusErrorDetails(t testing.TB, err error) metav1.StatusDetails { + t.Helper() + + status := StatusError(t, err) + assert.Assert(t, status.Details != nil) + return *status.Details +} + // Value returns v or panics when err is not nil. func Value[T any](v T, err error) T { if err != nil { diff --git a/internal/testing/require/exec.go b/internal/testing/require/exec.go index 338abef584..a9e028c55e 100644 --- a/internal/testing/require/exec.go +++ b/internal/testing/require/exec.go @@ -38,7 +38,7 @@ func executable(name string, args ...string) func(testing.TB) string { t.Helper() once.Do(func() { path, err := exec.LookPath(name) - cmd := exec.Command(path, args...) // #nosec G204 -- args from init() + cmd := exec.CommandContext(t.Context(), path, args...) // #nosec G204 -- args from init() if err != nil { result = func(t testing.TB) string { diff --git a/internal/testing/validation/postgrescluster_test.go b/internal/testing/validation/postgrescluster_test.go index 18a17de069..ca4160b520 100644 --- a/internal/testing/validation/postgrescluster_test.go +++ b/internal/testing/validation/postgrescluster_test.go @@ -118,6 +118,121 @@ func TestPostgresAuthenticationRules(t *testing.T) { assert.Assert(t, cmp.Contains(cause.Message, "unsafe")) } }) + + t.Run("LDAP", func(t *testing.T) { + t.Run("Required", func(t *testing.T) { + cluster := base.DeepCopy() + require.UnmarshalInto(t, &cluster.Spec.Authentication, `{ + rules: [ + { connection: hostssl, method: ldap }, + { connection: hostssl, method: ldap, options: {} }, + { connection: hostssl, method: ldap, options: { ldapbinddn: any } }, + ], + }`) + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + + status := require.StatusError(t, err) + assert.Assert(t, status.Details != nil) + assert.Assert(t, cmp.Len(status.Details.Causes, 3)) + + for i, cause := range status.Details.Causes { + assert.Equal(t, cause.Field, fmt.Sprintf("spec.authentication.rules[%d]", i), "%#v", cause) + assert.Assert(t, cmp.Contains(cause.Message, `"ldap" method requires`)) + } + + // These are valid. + + cluster.Spec.Authentication = nil + require.UnmarshalInto(t, &cluster.Spec.Authentication, `{ + rules: [ + { connection: hostssl, method: ldap, options: { ldapbasedn: any } }, + { connection: hostssl, method: ldap, options: { ldapprefix: any } }, + { connection: hostssl, method: ldap, options: { ldapsuffix: any } }, + ], + }`) + assert.NilError(t, cc.Create(ctx, cluster, client.DryRunAll)) + }) + + t.Run("Mixed", func(t *testing.T) { + // Some options cannot be combined with others. + + cluster := base.DeepCopy() + require.UnmarshalInto(t, &cluster.Spec.Authentication, `{ + rules: [ + { connection: hostssl, method: ldap, options: { ldapbinddn: any, ldapprefix: other } }, + { connection: hostssl, method: ldap, options: { ldapbasedn: any, ldapsuffix: other } }, + ], + }`) + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + + status := require.StatusError(t, err) + assert.Assert(t, status.Details != nil) + assert.Assert(t, cmp.Len(status.Details.Causes, 2)) + + for i, cause := range status.Details.Causes { + assert.Equal(t, cause.Field, fmt.Sprintf("spec.authentication.rules[%d]", i), "%#v", cause) + assert.Assert(t, cmp.Regexp(`cannot use .+? options with .+? options`, cause.Message)) + } + + // These combinations are allowed. + + cluster.Spec.Authentication = nil + require.UnmarshalInto(t, &cluster.Spec.Authentication, `{ + rules: [ + { connection: hostssl, method: ldap, options: { ldapprefix: one, ldapsuffix: two } }, + { connection: hostssl, method: ldap, options: { ldapbasedn: one, ldapbinddn: two } }, + { connection: hostssl, method: ldap, options: { + ldapbasedn: one, ldapsearchattribute: two, ldapsearchfilter: three, + } }, + ], + }`) + assert.NilError(t, cc.Create(ctx, cluster, client.DryRunAll)) + }) + }) + + t.Run("RADIUS", func(t *testing.T) { + t.Run("Required", func(t *testing.T) { + cluster := base.DeepCopy() + require.UnmarshalInto(t, &cluster.Spec.Authentication, `{ + rules: [ + { connection: hostssl, method: radius }, + { connection: hostssl, method: radius, options: {} }, + { connection: hostssl, method: radius, options: { radiusidentifiers: any } }, + { connection: hostssl, method: radius, options: { radiusservers: any } }, + { connection: hostssl, method: radius, options: { radiussecrets: any } }, + ], + }`) + + err := cc.Create(ctx, cluster, client.DryRunAll) + assert.Assert(t, apierrors.IsInvalid(err)) + + status := require.StatusError(t, err) + assert.Assert(t, status.Details != nil) + assert.Assert(t, cmp.Len(status.Details.Causes, 5)) + + for i, cause := range status.Details.Causes { + assert.Equal(t, cause.Field, fmt.Sprintf("spec.authentication.rules[%d]", i), "%#v", cause) + assert.Assert(t, cmp.Contains(cause.Message, `"radius" method requires`)) + } + + // These are valid. + + cluster.Spec.Authentication = nil + require.UnmarshalInto(t, &cluster.Spec.Authentication, `{ + rules: [ + { connection: hostssl, method: radius, options: { radiusservers: one, radiussecrets: two } }, + { connection: hostssl, method: radius, options: { + radiusservers: one, radiussecrets: two, radiusports: three, + } }, + ], + }`) + assert.NilError(t, cc.Create(ctx, cluster, client.DryRunAll)) + }) + }) } func TestPostgresConfigParameters(t *testing.T) { @@ -252,7 +367,7 @@ func TestPostgresConfigParameters(t *testing.T) { t.Run("Valid", func(t *testing.T) { cluster := base.DeepCopy() - cluster.Spec.Config = &v1beta1.PostgresConfig{ + cluster.Spec.Config = &v1beta1.PostgresConfigSpec{ Parameters: map[string]intstr.IntOrString{ "wal_level": intstr.FromString("logical"), }, @@ -263,7 +378,7 @@ func TestPostgresConfigParameters(t *testing.T) { t.Run("Invalid", func(t *testing.T) { cluster := base.DeepCopy() - cluster.Spec.Config = &v1beta1.PostgresConfig{ + cluster.Spec.Config = &v1beta1.PostgresConfigSpec{ Parameters: map[string]intstr.IntOrString{ "wal_level": intstr.FromString("minimal"), }, diff --git a/internal/upgradecheck/header.go b/internal/upgradecheck/header.go deleted file mode 100644 index f2449f909b..0000000000 --- a/internal/upgradecheck/header.go +++ /dev/null @@ -1,195 +0,0 @@ -// Copyright 2017 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package upgradecheck - -import ( - "context" - "encoding/json" - "net/http" - "os" - - googleuuid "github.com/google/uuid" - corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/util/uuid" - crclient "sigs.k8s.io/controller-runtime/pkg/client" - - "github.com/crunchydata/postgres-operator/internal/controller/postgrescluster" - "github.com/crunchydata/postgres-operator/internal/feature" - "github.com/crunchydata/postgres-operator/internal/kubernetes" - "github.com/crunchydata/postgres-operator/internal/logging" - "github.com/crunchydata/postgres-operator/internal/naming" - "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" -) - -const ( - clientHeader = "X-Crunchy-Client-Metadata" -) - -var ( - // Using apimachinery's UUID package, so our deployment UUID will be a string - deploymentID string -) - -// Extensible struct for client upgrade data -type clientUpgradeData struct { - BridgeClustersTotal int `json:"bridge_clusters_total"` - BuildSource string `json:"build_source"` - DeploymentID string `json:"deployment_id"` - FeatureGatesEnabled string `json:"feature_gates_enabled"` - IsOpenShift bool `json:"is_open_shift"` - KubernetesEnv string `json:"kubernetes_env"` - PGOClustersTotal int `json:"pgo_clusters_total"` - PGOInstaller string `json:"pgo_installer"` - PGOInstallerOrigin string `json:"pgo_installer_origin"` - PGOVersion string `json:"pgo_version"` - RegistrationToken string `json:"registration_token"` -} - -// generateHeader aggregates data and returns a struct of that data -// If any errors are encountered, it logs those errors and uses the default values -func generateHeader(ctx context.Context, crClient crclient.Client, - pgoVersion string, registrationToken string) *clientUpgradeData { - - return &clientUpgradeData{ - BridgeClustersTotal: getBridgeClusters(ctx, crClient), - BuildSource: os.Getenv("BUILD_SOURCE"), - DeploymentID: ensureDeploymentID(ctx, crClient), - FeatureGatesEnabled: feature.ShowEnabled(ctx), - IsOpenShift: kubernetes.IsOpenShift(ctx), - KubernetesEnv: kubernetes.VersionString(ctx), - PGOClustersTotal: getManagedClusters(ctx, crClient), - PGOInstaller: os.Getenv("PGO_INSTALLER"), - PGOInstallerOrigin: os.Getenv("PGO_INSTALLER_ORIGIN"), - PGOVersion: pgoVersion, - RegistrationToken: registrationToken, - } -} - -// ensureDeploymentID checks if the UUID exists in memory or in a ConfigMap -// If no UUID exists, ensureDeploymentID creates one and saves it in memory/as a ConfigMap -// Any errors encountered will be logged and the ID result will be what is in memory -func ensureDeploymentID(ctx context.Context, crClient crclient.Client) string { - // If there is no deploymentID in memory, generate one for possible use - if deploymentID == "" { - deploymentID = string(uuid.NewUUID()) - } - - cm := manageUpgradeCheckConfigMap(ctx, crClient, deploymentID) - - if cm != nil && cm.Data["deployment_id"] != "" { - deploymentID = cm.Data["deployment_id"] - } - - return deploymentID -} - -// manageUpgradeCheckConfigMap ensures a ConfigMap exists with a UUID -// If it doesn't exist, this creates it with the in-memory ID -// If it exists and it has a valid UUID, use that to replace the in-memory ID -// If it exists but the field is blank or mangled, we update the ConfigMap with the in-memory ID -func manageUpgradeCheckConfigMap(ctx context.Context, crClient crclient.Client, - currentID string) *corev1.ConfigMap { - - log := logging.FromContext(ctx) - upgradeCheckConfigMapMetadata := naming.UpgradeCheckConfigMap() - - cm := &corev1.ConfigMap{ - ObjectMeta: upgradeCheckConfigMapMetadata, - Data: map[string]string{"deployment_id": currentID}, - } - cm.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ConfigMap")) - - // If no namespace is set, then log this and skip trying to set the UUID in the ConfigMap - if upgradeCheckConfigMapMetadata.GetNamespace() == "" { - log.V(1).Info("upgrade check issue: namespace not set") - return cm - } - - retrievedCM := &corev1.ConfigMap{} - err := crClient.Get(ctx, naming.AsObjectKey(upgradeCheckConfigMapMetadata), retrievedCM) - - // If we get any error besides IsNotFound, log it, skip any ConfigMap steps, - // and use the in-memory deploymentID - if err != nil && !apierrors.IsNotFound(err) { - log.V(1).Info("upgrade check issue: error retrieving configmap", - "response", err.Error()) - return cm - } - - // If we get a ConfigMap with a "deployment_id", check if that UUID is valid - if retrievedCM.Data["deployment_id"] != "" { - _, parseErr := googleuuid.Parse(retrievedCM.Data["deployment_id"]) - // No error -- the ConfigMap has a valid deploymentID, so use that - if parseErr == nil { - cm.Data["deployment_id"] = retrievedCM.Data["deployment_id"] - } - } - - err = applyConfigMap(ctx, crClient, cm, postgrescluster.ControllerName) - if err != nil { - log.V(1).Info("upgrade check issue: could not apply configmap", - "response", err.Error()) - } - return cm -} - -// applyConfigMap is a focused version of the Reconciler.apply method, -// meant only to work with this ConfigMap -// It sends an apply patch to the Kubernetes API, with the fieldManager set to the deployment_id -// and the force parameter set to true. -// - https://docs.k8s.io/reference/using-api/server-side-apply/#managers -// - https://docs.k8s.io/reference/using-api/server-side-apply/#conflicts -func applyConfigMap(ctx context.Context, crClient crclient.Client, - object crclient.Object, owner string) error { - // Generate an apply-patch by comparing the object to its zero value. - zero := &corev1.ConfigMap{} - data, err := crclient.MergeFrom(zero).Data(object) - - if err == nil { - apply := crclient.RawPatch(crclient.Apply.Type(), data) - err = crClient.Patch(ctx, object, apply, - []crclient.PatchOption{crclient.ForceOwnership, crclient.FieldOwner(owner)}...) - } - return err -} - -// getManagedClusters returns a count of postgres clusters managed by this PGO instance -// Any errors encountered will be logged and the count result will be 0 -func getManagedClusters(ctx context.Context, crClient crclient.Client) int { - var count int - clusters := &v1beta1.PostgresClusterList{} - err := crClient.List(ctx, clusters) - if err != nil { - log := logging.FromContext(ctx) - log.V(1).Info("upgrade check issue: could not count postgres clusters", - "response", err.Error()) - } else { - count = len(clusters.Items) - } - return count -} - -// getBridgeClusters returns a count of Bridge clusters managed by this PGO instance -// Any errors encountered will be logged and the count result will be 0 -func getBridgeClusters(ctx context.Context, crClient crclient.Client) int { - var count int - clusters := &v1beta1.CrunchyBridgeClusterList{} - err := crClient.List(ctx, clusters) - if err != nil { - log := logging.FromContext(ctx) - log.V(1).Info("upgrade check issue: could not count bridge clusters", - "response", err.Error()) - } else { - count = len(clusters.Items) - } - return count -} - -func addHeader(req *http.Request, upgradeInfo *clientUpgradeData) *http.Request { - marshaled, _ := json.Marshal(upgradeInfo) - req.Header.Add(clientHeader, string(marshaled)) - return req -} diff --git a/internal/upgradecheck/header_test.go b/internal/upgradecheck/header_test.go deleted file mode 100644 index ac162f5cce..0000000000 --- a/internal/upgradecheck/header_test.go +++ /dev/null @@ -1,560 +0,0 @@ -// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package upgradecheck - -import ( - "context" - "encoding/json" - "net/http" - "strings" - "testing" - - "gotest.tools/v3/assert" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/util/uuid" - - // Google Kubernetes Engine / Google Cloud Platform authentication provider - _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" - - "github.com/crunchydata/postgres-operator/internal/feature" - "github.com/crunchydata/postgres-operator/internal/kubernetes" - "github.com/crunchydata/postgres-operator/internal/naming" - "github.com/crunchydata/postgres-operator/internal/testing/cmp" - "github.com/crunchydata/postgres-operator/internal/testing/require" - "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" -) - -func TestGenerateHeader(t *testing.T) { - setupDeploymentID(t) - ctx := context.Background() - cfg, cc := require.Kubernetes2(t) - - discovery, err := kubernetes.NewDiscoveryRunner(cfg) - assert.NilError(t, err) - assert.NilError(t, discovery.Read(ctx)) - ctx = kubernetes.NewAPIContext(ctx, discovery) - - t.Setenv("PGO_INSTALLER", "test") - t.Setenv("PGO_INSTALLER_ORIGIN", "test-origin") - t.Setenv("PGO_NAMESPACE", require.Namespace(t, cc).Name) - t.Setenv("BUILD_SOURCE", "developer") - - t.Run("error ensuring ID", func(t *testing.T) { - fakeClientWithOptionalError := &fakeClientWithError{ - cc, "patch error", - } - ctx, calls := setupLogCapture(ctx) - - res := generateHeader(ctx, fakeClientWithOptionalError, "1.2.3", "") - assert.Equal(t, len(*calls), 1) - assert.Assert(t, cmp.Contains((*calls)[0], `upgrade check issue: could not apply configmap`)) - assert.Equal(t, discovery.IsOpenShift(), res.IsOpenShift) - assert.Equal(t, deploymentID, res.DeploymentID) - pgoList := v1beta1.PostgresClusterList{} - err := cc.List(ctx, &pgoList) - assert.NilError(t, err) - assert.Equal(t, len(pgoList.Items), res.PGOClustersTotal) - bridgeList := v1beta1.CrunchyBridgeClusterList{} - err = cc.List(ctx, &bridgeList) - assert.NilError(t, err) - assert.Equal(t, len(bridgeList.Items), res.BridgeClustersTotal) - assert.Equal(t, "1.2.3", res.PGOVersion) - assert.Equal(t, discovery.Version().String(), res.KubernetesEnv) - assert.Equal(t, "test", res.PGOInstaller) - assert.Equal(t, "test-origin", res.PGOInstallerOrigin) - assert.Equal(t, "developer", res.BuildSource) - }) - - t.Run("error getting cluster count", func(t *testing.T) { - fakeClientWithOptionalError := &fakeClientWithError{ - cc, "list error", - } - ctx, calls := setupLogCapture(ctx) - - res := generateHeader(ctx, fakeClientWithOptionalError, "1.2.3", "") - assert.Equal(t, len(*calls), 2) - // Aggregating the logs since we cannot determine which call will be first - callsAggregate := strings.Join(*calls, " ") - assert.Assert(t, cmp.Contains(callsAggregate, `upgrade check issue: could not count postgres clusters`)) - assert.Assert(t, cmp.Contains(callsAggregate, `upgrade check issue: could not count bridge clusters`)) - assert.Equal(t, discovery.IsOpenShift(), res.IsOpenShift) - assert.Equal(t, deploymentID, res.DeploymentID) - assert.Equal(t, 0, res.PGOClustersTotal) - assert.Equal(t, 0, res.BridgeClustersTotal) - assert.Equal(t, "1.2.3", res.PGOVersion) - assert.Equal(t, discovery.Version().String(), res.KubernetesEnv) - assert.Equal(t, "test", res.PGOInstaller) - assert.Equal(t, "test-origin", res.PGOInstallerOrigin) - assert.Equal(t, "developer", res.BuildSource) - }) - - t.Run("success", func(t *testing.T) { - ctx, calls := setupLogCapture(ctx) - gate := feature.NewGate() - assert.NilError(t, gate.SetFromMap(map[string]bool{ - feature.TablespaceVolumes: true, - })) - ctx = feature.NewContext(ctx, gate) - - res := generateHeader(ctx, cc, "1.2.3", "") - assert.Equal(t, len(*calls), 0) - assert.Equal(t, discovery.IsOpenShift(), res.IsOpenShift) - assert.Equal(t, deploymentID, res.DeploymentID) - pgoList := v1beta1.PostgresClusterList{} - err := cc.List(ctx, &pgoList) - assert.NilError(t, err) - assert.Equal(t, len(pgoList.Items), res.PGOClustersTotal) - assert.Equal(t, "1.2.3", res.PGOVersion) - assert.Equal(t, discovery.Version().String(), res.KubernetesEnv) - assert.Check(t, strings.Contains( - res.FeatureGatesEnabled, - "TablespaceVolumes=true", - )) - assert.Equal(t, "test", res.PGOInstaller) - assert.Equal(t, "test-origin", res.PGOInstallerOrigin) - assert.Equal(t, "developer", res.BuildSource) - }) -} - -func TestEnsureID(t *testing.T) { - ctx := context.Background() - cc := require.Kubernetes(t) - t.Setenv("PGO_NAMESPACE", require.Namespace(t, cc).Name) - - t.Run("success, no id set in mem or configmap", func(t *testing.T) { - deploymentID = "" - oldID := deploymentID - ctx, calls := setupLogCapture(ctx) - - newID := ensureDeploymentID(ctx, cc) - assert.Equal(t, len(*calls), 0) - assert.Assert(t, newID != oldID) - assert.Assert(t, newID == deploymentID) - - cm := &corev1.ConfigMap{} - err := cc.Get(ctx, naming.AsObjectKey(naming.UpgradeCheckConfigMap()), cm) - assert.NilError(t, err) - assert.Equal(t, newID, cm.Data["deployment_id"]) - err = cc.Delete(ctx, cm) - assert.NilError(t, err) - }) - - t.Run("success, id set in mem, configmap created", func(t *testing.T) { - oldID := setupDeploymentID(t) - - cm := &corev1.ConfigMap{} - err := cc.Get(ctx, naming.AsObjectKey( - naming.UpgradeCheckConfigMap()), cm) - assert.Error(t, err, `configmaps "pgo-upgrade-check" not found`) - ctx, calls := setupLogCapture(ctx) - - newID := ensureDeploymentID(ctx, cc) - assert.Equal(t, len(*calls), 0) - assert.Assert(t, newID == oldID) - assert.Assert(t, newID == deploymentID) - - err = cc.Get(ctx, naming.AsObjectKey( - naming.UpgradeCheckConfigMap()), cm) - assert.NilError(t, err) - assert.Assert(t, deploymentID == cm.Data["deployment_id"]) - - err = cc.Delete(ctx, cm) - assert.NilError(t, err) - }) - - t.Run("success, id set in configmap, mem overwritten", func(t *testing.T) { - cm := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "deployment_id": string(uuid.NewUUID()), - }, - } - err := cc.Create(ctx, cm) - assert.NilError(t, err) - - cmRetrieved := &corev1.ConfigMap{} - err = cc.Get(ctx, naming.AsObjectKey( - naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.NilError(t, err) - - oldID := setupDeploymentID(t) - ctx, calls := setupLogCapture(ctx) - newID := ensureDeploymentID(ctx, cc) - assert.Equal(t, len(*calls), 0) - assert.Assert(t, newID != oldID) - assert.Assert(t, newID == deploymentID) - assert.Assert(t, deploymentID == cmRetrieved.Data["deployment_id"]) - - err = cc.Delete(ctx, cm) - assert.NilError(t, err) - }) - - t.Run("configmap failed, no namespace given", func(t *testing.T) { - cm := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "deployment_id": string(uuid.NewUUID()), - }, - } - err := cc.Create(ctx, cm) - assert.NilError(t, err) - - cmRetrieved := &corev1.ConfigMap{} - err = cc.Get(ctx, naming.AsObjectKey( - naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.NilError(t, err) - - oldID := setupDeploymentID(t) - ctx, calls := setupLogCapture(ctx) - t.Setenv("PGO_NAMESPACE", "") - - newID := ensureDeploymentID(ctx, cc) - assert.Equal(t, len(*calls), 1) - assert.Assert(t, cmp.Contains((*calls)[0], `upgrade check issue: namespace not set`)) - assert.Assert(t, newID == oldID) - assert.Assert(t, newID == deploymentID) - assert.Assert(t, deploymentID != cmRetrieved.Data["deployment_id"]) - err = cc.Delete(ctx, cm) - assert.NilError(t, err) - }) - - t.Run("configmap failed with not NotFound error, using preexisting ID", func(t *testing.T) { - fakeClientWithOptionalError := &fakeClientWithError{ - cc, "get error", - } - oldID := setupDeploymentID(t) - ctx, calls := setupLogCapture(ctx) - - newID := ensureDeploymentID(ctx, fakeClientWithOptionalError) - assert.Equal(t, len(*calls), 1) - assert.Assert(t, cmp.Contains((*calls)[0], `upgrade check issue: error retrieving configmap`)) - assert.Assert(t, newID == oldID) - assert.Assert(t, newID == deploymentID) - - cmRetrieved := &corev1.ConfigMap{} - err := cc.Get(ctx, naming.AsObjectKey( - naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.Error(t, err, `configmaps "pgo-upgrade-check" not found`) - }) - - t.Run("configmap failed to create, using preexisting ID", func(t *testing.T) { - fakeClientWithOptionalError := &fakeClientWithError{ - cc, "patch error", - } - oldID := setupDeploymentID(t) - - ctx, calls := setupLogCapture(ctx) - newID := ensureDeploymentID(ctx, fakeClientWithOptionalError) - assert.Equal(t, len(*calls), 1) - assert.Assert(t, cmp.Contains((*calls)[0], `upgrade check issue: could not apply configmap`)) - assert.Assert(t, newID == oldID) - assert.Assert(t, newID == deploymentID) - }) -} - -func TestManageUpgradeCheckConfigMap(t *testing.T) { - ctx := context.Background() - cc := require.Kubernetes(t) - t.Setenv("PGO_NAMESPACE", require.Namespace(t, cc).Name) - - t.Run("no namespace given", func(t *testing.T) { - ctx, calls := setupLogCapture(ctx) - t.Setenv("PGO_NAMESPACE", "") - - returnedCM := manageUpgradeCheckConfigMap(ctx, cc, "current-id") - assert.Equal(t, len(*calls), 1) - assert.Assert(t, cmp.Contains((*calls)[0], `upgrade check issue: namespace not set`)) - assert.Assert(t, returnedCM.Data["deployment_id"] == "current-id") - }) - - t.Run("configmap not found, created", func(t *testing.T) { - cmRetrieved := &corev1.ConfigMap{} - err := cc.Get(ctx, naming.AsObjectKey( - naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.Error(t, err, `configmaps "pgo-upgrade-check" not found`) - - ctx, calls := setupLogCapture(ctx) - returnedCM := manageUpgradeCheckConfigMap(ctx, cc, "current-id") - - assert.Equal(t, len(*calls), 0) - assert.Assert(t, returnedCM.Data["deployment_id"] == "current-id") - err = cc.Delete(ctx, returnedCM) - assert.NilError(t, err) - }) - - t.Run("configmap failed with not NotFound error", func(t *testing.T) { - fakeClientWithOptionalError := &fakeClientWithError{ - cc, "get error", - } - ctx, calls := setupLogCapture(ctx) - - returnedCM := manageUpgradeCheckConfigMap(ctx, fakeClientWithOptionalError, - "current-id") - assert.Equal(t, len(*calls), 1) - assert.Assert(t, cmp.Contains((*calls)[0], `upgrade check issue: error retrieving configmap`)) - assert.Assert(t, returnedCM.Data["deployment_id"] == "current-id") - }) - - t.Run("no deployment id in configmap", func(t *testing.T) { - cm := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "wrong_field": string(uuid.NewUUID()), - }, - } - err := cc.Create(ctx, cm) - assert.NilError(t, err) - - cmRetrieved := &corev1.ConfigMap{} - err = cc.Get(ctx, naming.AsObjectKey( - naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.NilError(t, err) - - ctx, calls := setupLogCapture(ctx) - returnedCM := manageUpgradeCheckConfigMap(ctx, cc, "current-id") - assert.Equal(t, len(*calls), 0) - assert.Assert(t, returnedCM.Data["deployment_id"] == "current-id") - err = cc.Delete(ctx, cm) - assert.NilError(t, err) - }) - - t.Run("mangled deployment id", func(t *testing.T) { - cm := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "deploymentid": string(uuid.NewUUID())[1:], - }, - } - err := cc.Create(ctx, cm) - assert.NilError(t, err) - - cmRetrieved := &corev1.ConfigMap{} - err = cc.Get(ctx, naming.AsObjectKey( - naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.NilError(t, err) - - ctx, calls := setupLogCapture(ctx) - returnedCM := manageUpgradeCheckConfigMap(ctx, cc, "current-id") - assert.Equal(t, len(*calls), 0) - assert.Assert(t, returnedCM.Data["deployment_id"] == "current-id") - err = cc.Delete(ctx, cm) - assert.NilError(t, err) - }) - - t.Run("good configmap with good id", func(t *testing.T) { - cm := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "deployment_id": string(uuid.NewUUID()), - }, - } - err := cc.Create(ctx, cm) - assert.NilError(t, err) - - cmRetrieved := &corev1.ConfigMap{} - err = cc.Get(ctx, naming.AsObjectKey( - naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.NilError(t, err) - - ctx, calls := setupLogCapture(ctx) - returnedCM := manageUpgradeCheckConfigMap(ctx, cc, "current-id") - assert.Equal(t, len(*calls), 0) - assert.Assert(t, returnedCM.Data["deployment-id"] != "current-id") - err = cc.Delete(ctx, cm) - assert.NilError(t, err) - }) - - t.Run("configmap failed to create", func(t *testing.T) { - fakeClientWithOptionalError := &fakeClientWithError{ - cc, "patch error", - } - - ctx, calls := setupLogCapture(ctx) - returnedCM := manageUpgradeCheckConfigMap(ctx, fakeClientWithOptionalError, - "current-id") - assert.Equal(t, len(*calls), 1) - assert.Assert(t, cmp.Contains((*calls)[0], `upgrade check issue: could not apply configmap`)) - assert.Assert(t, returnedCM.Data["deployment_id"] == "current-id") - }) -} - -func TestApplyConfigMap(t *testing.T) { - ctx := context.Background() - cc := require.Kubernetes(t) - t.Setenv("PGO_NAMESPACE", require.Namespace(t, cc).Name) - - t.Run("successful create", func(t *testing.T) { - cmRetrieved := &corev1.ConfigMap{} - err := cc.Get(ctx, naming.AsObjectKey(naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.Error(t, err, `configmaps "pgo-upgrade-check" not found`) - - cm := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "new_field": "new_value", - }, - } - cm.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ConfigMap")) - err = applyConfigMap(ctx, cc, cm, "test") - assert.NilError(t, err) - cmRetrieved = &corev1.ConfigMap{} - err = cc.Get(ctx, naming.AsObjectKey(naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.NilError(t, err) - assert.Equal(t, cm.Data["new_value"], cmRetrieved.Data["new_value"]) - err = cc.Delete(ctx, cm) - assert.NilError(t, err) - }) - - t.Run("successful update", func(t *testing.T) { - cm := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "new_field": "old_value", - }, - } - cm.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ConfigMap")) - err := cc.Create(ctx, cm) - assert.NilError(t, err) - cmRetrieved := &corev1.ConfigMap{} - err = cc.Get(ctx, naming.AsObjectKey(naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.NilError(t, err) - - cm2 := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "new_field": "new_value", - }, - } - cm2.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ConfigMap")) - err = applyConfigMap(ctx, cc, cm2, "test") - assert.NilError(t, err) - cmRetrieved = &corev1.ConfigMap{} - err = cc.Get(ctx, naming.AsObjectKey(naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.NilError(t, err) - assert.Equal(t, cm.Data["new_value"], cmRetrieved.Data["new_value"]) - err = cc.Delete(ctx, cm) - assert.NilError(t, err) - }) - - t.Run("successful nothing changed", func(t *testing.T) { - cm := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "new_field": "new_value", - }, - } - cm.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ConfigMap")) - err := cc.Create(ctx, cm) - assert.NilError(t, err) - cmRetrieved := &corev1.ConfigMap{} - err = cc.Get(ctx, naming.AsObjectKey(naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.NilError(t, err) - - cm2 := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "new_field": "new_value", - }, - } - cm2.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ConfigMap")) - err = applyConfigMap(ctx, cc, cm2, "test") - assert.NilError(t, err) - cmRetrieved = &corev1.ConfigMap{} - err = cc.Get(ctx, naming.AsObjectKey(naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.NilError(t, err) - assert.Equal(t, cm.Data["new_value"], cmRetrieved.Data["new_value"]) - err = cc.Delete(ctx, cm) - assert.NilError(t, err) - }) - - t.Run("failure", func(t *testing.T) { - cmRetrieved := &corev1.ConfigMap{} - err := cc.Get(ctx, naming.AsObjectKey(naming.UpgradeCheckConfigMap()), cmRetrieved) - assert.Error(t, err, `configmaps "pgo-upgrade-check" not found`) - - cm := &corev1.ConfigMap{ - ObjectMeta: naming.UpgradeCheckConfigMap(), - Data: map[string]string{ - "new_field": "new_value", - }, - } - cm.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ConfigMap")) - fakeClientWithOptionalError := &fakeClientWithError{ - cc, "patch error", - } - - err = applyConfigMap(ctx, fakeClientWithOptionalError, cm, "test") - assert.Error(t, err, "patch error") - }) -} - -func TestGetManagedClusters(t *testing.T) { - ctx := context.Background() - - t.Run("success", func(t *testing.T) { - fakeClient := setupFakeClientWithPGOScheme(t, true) - ctx, calls := setupLogCapture(ctx) - count := getManagedClusters(ctx, fakeClient) - assert.Equal(t, len(*calls), 0) - assert.Assert(t, count == 2) - }) - - t.Run("list throw error", func(t *testing.T) { - fakeClientWithOptionalError := &fakeClientWithError{ - setupFakeClientWithPGOScheme(t, true), "list error", - } - ctx, calls := setupLogCapture(ctx) - count := getManagedClusters(ctx, fakeClientWithOptionalError) - assert.Assert(t, len(*calls) > 0) - assert.Assert(t, cmp.Contains((*calls)[0], `upgrade check issue: could not count postgres clusters`)) - assert.Assert(t, count == 0) - }) -} - -func TestGetBridgeClusters(t *testing.T) { - ctx := context.Background() - - t.Run("success", func(t *testing.T) { - fakeClient := setupFakeClientWithPGOScheme(t, true) - ctx, calls := setupLogCapture(ctx) - count := getBridgeClusters(ctx, fakeClient) - assert.Equal(t, len(*calls), 0) - assert.Assert(t, count == 2) - }) - - t.Run("list throw error", func(t *testing.T) { - fakeClientWithOptionalError := &fakeClientWithError{ - setupFakeClientWithPGOScheme(t, true), "list error", - } - ctx, calls := setupLogCapture(ctx) - count := getBridgeClusters(ctx, fakeClientWithOptionalError) - assert.Assert(t, len(*calls) > 0) - assert.Assert(t, cmp.Contains((*calls)[0], `upgrade check issue: could not count bridge clusters`)) - assert.Assert(t, count == 0) - }) -} - -func TestAddHeader(t *testing.T) { - t.Run("successful", func(t *testing.T) { - req := &http.Request{ - Header: http.Header{}, - } - versionString := "1.2.3" - upgradeInfo := &clientUpgradeData{ - PGOVersion: versionString, - } - - result := addHeader(req, upgradeInfo) - header := result.Header[clientHeader] - - passedThroughData := &clientUpgradeData{} - err := json.Unmarshal([]byte(header[0]), passedThroughData) - assert.NilError(t, err) - - assert.Equal(t, passedThroughData.PGOVersion, "1.2.3") - // Failure to list clusters results in 0 returned - assert.Equal(t, passedThroughData.PGOClustersTotal, 0) - }) -} diff --git a/internal/upgradecheck/helpers_test.go b/internal/upgradecheck/helpers_test.go deleted file mode 100644 index 3d1c678ec5..0000000000 --- a/internal/upgradecheck/helpers_test.go +++ /dev/null @@ -1,125 +0,0 @@ -// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package upgradecheck - -import ( - "context" - "fmt" - "testing" - - "github.com/go-logr/logr/funcr" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/uuid" - crclient "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/fake" - - "github.com/crunchydata/postgres-operator/internal/controller/runtime" - "github.com/crunchydata/postgres-operator/internal/logging" - "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" -) - -// fakeClientWithError is a controller runtime client and an error type to force -type fakeClientWithError struct { - crclient.Client - errorType string -} - -// Get returns the client.get OR an Error (`get error`) if the fakeClientWithError is set to error that way -func (f *fakeClientWithError) Get(ctx context.Context, key types.NamespacedName, obj crclient.Object, opts ...crclient.GetOption) error { - switch f.errorType { - case "get error": - return fmt.Errorf("get error") - default: - return f.Client.Get(ctx, key, obj, opts...) - } -} - -// Patch returns the client.get OR an Error (`patch error`) if the fakeClientWithError is set to error that way -// TODO: PatchType is not supported currently by fake -// - https://github.com/kubernetes/client-go/issues/970 -// Once that gets fixed, we can test without envtest -func (f *fakeClientWithError) Patch(ctx context.Context, obj crclient.Object, - patch crclient.Patch, opts ...crclient.PatchOption) error { - switch { - case f.errorType == "patch error": - return fmt.Errorf("patch error") - default: - return f.Client.Patch(ctx, obj, patch, opts...) - } -} - -// List returns the client.get OR an Error (`list error`) if the fakeClientWithError is set to error that way -func (f *fakeClientWithError) List(ctx context.Context, objList crclient.ObjectList, - opts ...crclient.ListOption) error { - switch f.errorType { - case "list error": - return fmt.Errorf("list error") - default: - return f.Client.List(ctx, objList, opts...) - } -} - -// setupDeploymentID returns a UUID -func setupDeploymentID(t *testing.T) string { - t.Helper() - deploymentID = string(uuid.NewUUID()) - return deploymentID -} - -// setupFakeClientWithPGOScheme returns a fake client with the PGO scheme added; -// if `includeCluster` is true, also adds some empty PostgresCluster and CrunchyBridgeCluster -// items to the client -func setupFakeClientWithPGOScheme(t *testing.T, includeCluster bool) crclient.Client { - t.Helper() - if includeCluster { - pc := &v1beta1.PostgresClusterList{ - Items: []v1beta1.PostgresCluster{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "hippo", - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "elephant", - }, - }, - }, - } - - bcl := &v1beta1.CrunchyBridgeClusterList{ - Items: []v1beta1.CrunchyBridgeCluster{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "hippo", - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "elephant", - }, - }, - }, - } - - return fake.NewClientBuilder(). - WithScheme(runtime.Scheme). - WithLists(pc, bcl). - Build() - } - return fake.NewClientBuilder().WithScheme(runtime.Scheme).Build() -} - -// setupLogCapture captures the logs and keeps count of the logs captured -func setupLogCapture(ctx context.Context) (context.Context, *[]string) { - calls := []string{} - testlog := funcr.NewJSON(func(object string) { - calls = append(calls, object) - }, funcr.Options{ - Verbosity: 1, - }) - return logging.NewContext(ctx, testlog), &calls -} diff --git a/internal/upgradecheck/http.go b/internal/upgradecheck/http.go deleted file mode 100644 index fe8585d42d..0000000000 --- a/internal/upgradecheck/http.go +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright 2017 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package upgradecheck - -import ( - "context" - "fmt" - "io" - "net/http" - "time" - - "github.com/golang-jwt/jwt/v5" - "k8s.io/apimachinery/pkg/util/wait" - crclient "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/manager" - - "github.com/crunchydata/postgres-operator/internal/logging" -) - -var ( - client HTTPClient - - // With these Backoff settings, wait.ExponentialBackoff will - // * use one second as the base time; - // * increase delays between calls by a power of 2 (1, 2, 4, etc.); - // * and retry four times. - // Note that there is no indeterminacy here since there is no Jitter set). - // With these parameters, the calls will occur at 0, 1, 3, and 7 seconds - // (i.e., at 1, 2, and 4 second delays for the retries). - backoff = wait.Backoff{ - Duration: 1 * time.Second, - Factor: float64(2), - Steps: 4, - } -) - -const ( - // upgradeCheckURL can be set using the CHECK_FOR_UPGRADES_URL env var - upgradeCheckURL = "https://operator-maestro.crunchydata.com/pgo-versions" -) - -type HTTPClient interface { - Do(req *http.Request) (*http.Response, error) -} - -// Creating an interface for cache with WaitForCacheSync to allow easier mocking -type CacheWithWait interface { - WaitForCacheSync(ctx context.Context) bool -} - -func init() { - // Since we create this client once during startup, - // we want each connection to be fresh, hence the non-default transport - // with DisableKeepAlives set to true - // See https://github.com/golang/go/issues/43905 and https://github.com/golang/go/issues/23427 - // for discussion of problems with long-lived connections - client = &http.Client{ - Timeout: 5 * time.Second, - Transport: &http.Transport{ - DisableKeepAlives: true, - }, - } -} - -func checkForUpgrades(ctx context.Context, url, versionString string, backoff wait.Backoff, - crclient crclient.Client, registrationToken string, -) (message string, header string, err error) { - var headerPayloadStruct *clientUpgradeData - - // Prep request - req, err := http.NewRequest("GET", url, nil) - if err == nil { - // generateHeader always returns some sort of struct, using defaults/nil values - // in case some of the checks return errors - headerPayloadStruct = generateHeader(ctx, crclient, - versionString, registrationToken) - req = addHeader(req, headerPayloadStruct) - } - - // wait.ExponentialBackoff will retry the func according to the backoff object until - // (a) func returns done as true or - // (b) the backoff settings are exhausted, - // i.e., the process hits the cap for time or the number of steps - // The anonymous function here sets certain preexisting variables (bodyBytes, err, status) - // which are then used by the surrounding `checkForUpgrades` function as part of the return - var bodyBytes []byte - var status int - - if err == nil { - _ = wait.ExponentialBackoff( - backoff, - func() (done bool, backoffErr error) { - var res *http.Response - res, err = client.Do(req) - - if err == nil { - defer res.Body.Close() - status = res.StatusCode - - // This is a very basic check, ignoring nuances around - // certain StatusCodes that should either prevent or impact retries - if status == http.StatusOK { - bodyBytes, err = io.ReadAll(res.Body) - return true, nil - } - } - - // Return false, nil to continue checking - return false, nil - }) - } - - // We received responses, but none of them were 200 OK. - if err == nil && status != http.StatusOK { - err = fmt.Errorf("received StatusCode %d", status) - } - - // TODO: Parse response and log info for user on potential upgrades - return string(bodyBytes), req.Header.Get(clientHeader), err -} - -type CheckForUpgradesScheduler struct { - Client crclient.Client - - Refresh time.Duration - RegistrationToken string - URL, Version string -} - -// ManagedScheduler creates a [CheckForUpgradesScheduler] and adds it to m. -// NOTE(registration): This takes a token/nil parameter when the operator is started. -// Currently the operator restarts when the token is updated, -// so this token is always current; but if that restart behavior is changed, -// we will want the upgrade mechanism to instantiate its own registration runner -// or otherwise get the most recent token. -func ManagedScheduler(m manager.Manager, - url, version string, registrationToken *jwt.Token) error { - if url == "" { - url = upgradeCheckURL - } - - var token string - if registrationToken != nil { - token = registrationToken.Raw - } - - return m.Add(&CheckForUpgradesScheduler{ - Client: m.GetClient(), - Refresh: 24 * time.Hour, - RegistrationToken: token, - URL: url, - Version: version, - }) -} - -// NeedLeaderElection returns true so that s runs only on the single -// [manager.Manager] that is elected leader in the Kubernetes cluster. -func (s *CheckForUpgradesScheduler) NeedLeaderElection() bool { return true } - -// Start checks for upgrades periodically. It blocks until ctx is cancelled. -func (s *CheckForUpgradesScheduler) Start(ctx context.Context) error { - s.check(ctx) - - ticker := time.NewTicker(s.Refresh) - defer ticker.Stop() - - for { - select { - case <-ticker.C: - s.check(ctx) - case <-ctx.Done(): - return ctx.Err() - } - } -} - -func (s *CheckForUpgradesScheduler) check(ctx context.Context) { - log := logging.FromContext(ctx) - - defer func() { - if v := recover(); v != nil { - log.V(1).Info("encountered panic in upgrade check", "response", v) - } - }() - - info, header, err := checkForUpgrades(ctx, - s.URL, s.Version, backoff, s.Client, s.RegistrationToken) - - if err != nil { - log.V(1).Info("could not complete upgrade check", "response", err.Error()) - } else { - log.Info(info, clientHeader, header) - } -} diff --git a/internal/upgradecheck/http_test.go b/internal/upgradecheck/http_test.go deleted file mode 100644 index 6393c305c8..0000000000 --- a/internal/upgradecheck/http_test.go +++ /dev/null @@ -1,230 +0,0 @@ -// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. -// -// SPDX-License-Identifier: Apache-2.0 - -package upgradecheck - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "io" - "net/http" - "strings" - "testing" - "time" - - "github.com/go-logr/logr/funcr" - "gotest.tools/v3/assert" - "k8s.io/apimachinery/pkg/util/wait" - "sigs.k8s.io/controller-runtime/pkg/manager" - - "github.com/crunchydata/postgres-operator/internal/feature" - "github.com/crunchydata/postgres-operator/internal/logging" - "github.com/crunchydata/postgres-operator/internal/testing/cmp" -) - -func init() { - client = &MockClient{Timeout: 1} - // set backoff to two steps, 1 second apart for testing - backoff = wait.Backoff{ - Duration: 1 * time.Second, - Factor: float64(1), - Steps: 2, - } -} - -type MockClient struct { - Timeout time.Duration -} - -var funcFoo func() (*http.Response, error) - -// Do is the mock request that will return a mock success -func (m *MockClient) Do(req *http.Request) (*http.Response, error) { - return funcFoo() -} - -func TestCheckForUpgrades(t *testing.T) { - fakeClient := setupFakeClientWithPGOScheme(t, true) - - ctx := logging.NewContext(context.Background(), logging.Discard()) - gate := feature.NewGate() - assert.NilError(t, gate.SetFromMap(map[string]bool{ - feature.TablespaceVolumes: true, - })) - ctx = feature.NewContext(ctx, gate) - - // Pass *testing.T to allows the correct messages from the assert package - // in the event of certain failures. - checkData := func(t *testing.T, header string) { - data := clientUpgradeData{} - err := json.Unmarshal([]byte(header), &data) - assert.NilError(t, err) - assert.Assert(t, data.DeploymentID != "") - assert.Equal(t, data.PGOVersion, "4.7.3") - assert.Equal(t, data.RegistrationToken, "speakFriend") - assert.Equal(t, data.BridgeClustersTotal, 2) - assert.Equal(t, data.PGOClustersTotal, 2) - assert.Equal(t, data.FeatureGatesEnabled, - "AutoCreateUserSchema=true,InstanceSidecars=true,PGUpgradeCPUConcurrency=true,TablespaceVolumes=true") - } - - t.Run("success", func(t *testing.T) { - // A successful call - funcFoo = func() (*http.Response, error) { - json := `{"pgo_versions":[{"tag":"v5.0.4"},{"tag":"v5.0.3"},{"tag":"v5.0.2"},{"tag":"v5.0.1"},{"tag":"v5.0.0"}]}` - return &http.Response{ - Body: io.NopCloser(strings.NewReader(json)), - StatusCode: http.StatusOK, - }, nil - } - - res, header, err := checkForUpgrades(ctx, "", "4.7.3", backoff, - fakeClient, "speakFriend") - assert.NilError(t, err) - assert.Equal(t, res, `{"pgo_versions":[{"tag":"v5.0.4"},{"tag":"v5.0.3"},{"tag":"v5.0.2"},{"tag":"v5.0.1"},{"tag":"v5.0.0"}]}`) - checkData(t, header) - }) - - t.Run("total failure, err sending", func(t *testing.T) { - var counter int - // A call returning errors - funcFoo = func() (*http.Response, error) { - counter++ - return &http.Response{}, errors.New("whoops") - } - - res, header, err := checkForUpgrades(ctx, "", "4.7.3", backoff, - fakeClient, "speakFriend") - // Two failed calls because of env var - assert.Equal(t, counter, 2) - assert.Equal(t, res, "") - assert.Equal(t, err.Error(), `whoops`) - checkData(t, header) - }) - - t.Run("total failure, bad StatusCode", func(t *testing.T) { - var counter int - // A call returning bad StatusCode - funcFoo = func() (*http.Response, error) { - counter++ - return &http.Response{ - Body: io.NopCloser(strings.NewReader("")), - StatusCode: http.StatusBadRequest, - }, nil - } - - res, header, err := checkForUpgrades(ctx, "", "4.7.3", backoff, - fakeClient, "speakFriend") - assert.Equal(t, res, "") - // Two failed calls because of env var - assert.Equal(t, counter, 2) - assert.Equal(t, err.Error(), `received StatusCode 400`) - checkData(t, header) - }) - - t.Run("one failure, then success", func(t *testing.T) { - var counter int - // A call returning bad StatusCode the first time - // and a successful response the second time - funcFoo = func() (*http.Response, error) { - if counter < 1 { - counter++ - return &http.Response{ - Body: io.NopCloser(strings.NewReader("")), - StatusCode: http.StatusBadRequest, - }, nil - } - counter++ - json := `{"pgo_versions":[{"tag":"v5.0.4"},{"tag":"v5.0.3"},{"tag":"v5.0.2"},{"tag":"v5.0.1"},{"tag":"v5.0.0"}]}` - return &http.Response{ - Body: io.NopCloser(strings.NewReader(json)), - StatusCode: http.StatusOK, - }, nil - } - - res, header, err := checkForUpgrades(ctx, "", "4.7.3", backoff, - fakeClient, "speakFriend") - assert.Equal(t, counter, 2) - assert.NilError(t, err) - assert.Equal(t, res, `{"pgo_versions":[{"tag":"v5.0.4"},{"tag":"v5.0.3"},{"tag":"v5.0.2"},{"tag":"v5.0.1"},{"tag":"v5.0.0"}]}`) - checkData(t, header) - }) -} - -// TODO(benjaminjb): Replace `fake` with envtest -func TestCheckForUpgradesScheduler(t *testing.T) { - fakeClient := setupFakeClientWithPGOScheme(t, false) - - t.Run("panic from checkForUpgrades doesn't bubble up", func(t *testing.T) { - ctx := context.Background() - - // capture logs - var calls []string - ctx = logging.NewContext(ctx, funcr.NewJSON(func(object string) { - calls = append(calls, object) - }, funcr.Options{ - Verbosity: 1, - })) - - // A panicking call - funcFoo = func() (*http.Response, error) { - panic(fmt.Errorf("oh no!")) - } - - s := CheckForUpgradesScheduler{ - Client: fakeClient, - } - s.check(ctx) - - assert.Equal(t, len(calls), 2) - assert.Assert(t, cmp.Contains(calls[1], `encountered panic in upgrade check`)) - }) - - t.Run("successful log each loop, ticker works", func(t *testing.T) { - ctx := context.Background() - - // capture logs - var calls []string - ctx = logging.NewContext(ctx, funcr.NewJSON(func(object string) { - calls = append(calls, object) - }, funcr.Options{ - Verbosity: 1, - })) - - // A successful call - funcFoo = func() (*http.Response, error) { - json := `{"pgo_versions":[{"tag":"v5.0.4"},{"tag":"v5.0.3"},{"tag":"v5.0.2"},{"tag":"v5.0.1"},{"tag":"v5.0.0"}]}` - return &http.Response{ - Body: io.NopCloser(strings.NewReader(json)), - StatusCode: http.StatusOK, - }, nil - } - - // Set loop time to 1s and sleep for 2s before sending the done signal - ctx, cancel := context.WithTimeout(ctx, 2*time.Second) - defer cancel() - s := CheckForUpgradesScheduler{ - Client: fakeClient, - Refresh: 1 * time.Second, - } - assert.ErrorIs(t, context.DeadlineExceeded, s.Start(ctx)) - - // Sleeping leads to some non-deterministic results, but we expect at least 2 executions - // plus one log for the failure to apply the configmap - assert.Assert(t, len(calls) >= 4) - - assert.Assert(t, cmp.Contains(calls[1], `{\"pgo_versions\":[{\"tag\":\"v5.0.4\"},{\"tag\":\"v5.0.3\"},{\"tag\":\"v5.0.2\"},{\"tag\":\"v5.0.1\"},{\"tag\":\"v5.0.0\"}]}`)) - assert.Assert(t, cmp.Contains(calls[3], `{\"pgo_versions\":[{\"tag\":\"v5.0.4\"},{\"tag\":\"v5.0.3\"},{\"tag\":\"v5.0.2\"},{\"tag\":\"v5.0.1\"},{\"tag\":\"v5.0.0\"}]}`)) - }) -} - -func TestCheckForUpgradesSchedulerLeaderOnly(t *testing.T) { - // CheckForUpgradesScheduler should implement this interface. - var s manager.LeaderElectionRunnable = new(CheckForUpgradesScheduler) - - assert.Assert(t, s.NeedLeaderElection(), - "expected to only run on the leader") -} diff --git a/internal/util/secrets_test.go b/internal/util/secrets_test.go index e07a430718..ae5f7f5b05 100644 --- a/internal/util/secrets_test.go +++ b/internal/util/secrets_test.go @@ -55,7 +55,7 @@ func TestGenerateAlphaNumericPassword(t *testing.T) { } previous := sets.Set[string]{} - for i := 0; i < 10; i++ { + for range 10 { password, err := GenerateAlphaNumericPassword(5) assert.NilError(t, err) @@ -80,7 +80,7 @@ func TestGenerateASCIIPassword(t *testing.T) { } previous := sets.Set[string]{} - for i := 0; i < 10; i++ { + for range 10 { password, err := GenerateASCIIPassword(5) assert.NilError(t, err) diff --git a/internal/util/volumes.go b/internal/util/volumes.go new file mode 100644 index 0000000000..34e2699b54 --- /dev/null +++ b/internal/util/volumes.go @@ -0,0 +1,42 @@ +// Copyright 2017 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package util + +import ( + "fmt" + + corev1 "k8s.io/api/core/v1" +) + +// AddVolumeAndMountsToPod takes a Pod spec and a PVC and adds a Volume to the Pod spec with +// the PVC as the VolumeSource and mounts the volume to all containers and init containers +// in the Pod spec. +func AddVolumeAndMountsToPod(podSpec *corev1.PodSpec, volume *corev1.PersistentVolumeClaim) { + + podSpec.Volumes = append(podSpec.Volumes, corev1.Volume{ + Name: volume.Name, + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: volume.Name, + }, + }, + }) + + for i := range podSpec.Containers { + podSpec.Containers[i].VolumeMounts = append(podSpec.Containers[i].VolumeMounts, + corev1.VolumeMount{ + Name: volume.Name, + MountPath: fmt.Sprintf("/volumes/%s", volume.Name), + }) + } + + for i := range podSpec.InitContainers { + podSpec.InitContainers[i].VolumeMounts = append(podSpec.InitContainers[i].VolumeMounts, + corev1.VolumeMount{ + Name: volume.Name, + MountPath: fmt.Sprintf("/volumes/%s", volume.Name), + }) + } +} diff --git a/internal/util/volumes_test.go b/internal/util/volumes_test.go new file mode 100644 index 0000000000..b438943e3a --- /dev/null +++ b/internal/util/volumes_test.go @@ -0,0 +1,78 @@ +// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package util + +import ( + "testing" + + "github.com/google/go-cmp/cmp/cmpopts" + "gotest.tools/v3/assert" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/crunchydata/postgres-operator/internal/testing/cmp" +) + +func TestAddVolumeAndMountsToPod(t *testing.T) { + pod := &corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "database"}, + {Name: "other"}, + {Name: "pgbackrest"}, + }, + InitContainers: []corev1.Container{ + {Name: "initializer"}, + {Name: "another"}, + }, + } + + volume := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "volume-name", + }, + } + + alwaysExpect := func(t testing.TB, result *corev1.PodSpec) { + // Only Containers, InitContainers, and Volumes fields have changed. + assert.DeepEqual(t, *pod, *result, cmpopts.IgnoreFields(*pod, "Containers", "InitContainers", "Volumes")) + + // Volume is mounted to all containers + assert.Assert(t, cmp.MarshalMatches(result.Containers, ` +- name: database + resources: {} + volumeMounts: + - mountPath: /volumes/volume-name + name: volume-name +- name: other + resources: {} + volumeMounts: + - mountPath: /volumes/volume-name + name: volume-name +- name: pgbackrest + resources: {} + volumeMounts: + - mountPath: /volumes/volume-name + name: volume-name + `)) + + // Volume is mounted to all init containers + assert.Assert(t, cmp.MarshalMatches(result.InitContainers, ` +- name: initializer + resources: {} + volumeMounts: + - mountPath: /volumes/volume-name + name: volume-name +- name: another + resources: {} + volumeMounts: + - mountPath: /volumes/volume-name + name: volume-name + `)) + } + + out := pod.DeepCopy() + AddVolumeAndMountsToPod(out, volume) + alwaysExpect(t, out) +} diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go index d3f6882271..7c90b6f65e 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go @@ -65,6 +65,15 @@ type InstrumentationConfigSpec struct { // +listType=atomic // +optional Files []corev1.VolumeProjection `json:"files,omitempty"` + + // EnvironmentVariables allows the user to add environment variables to the + // collector container. + // --- + // +kubebuilder:validation:MinItems=1 + // +kubebuilder:validation:items:XValidation:rule=`self.name != 'K8S_POD_NAMESPACE' && self.name != 'K8S_POD_NAME' && self.name != 'PGPASSWORD'`,message="Cannot overwrite environment variables set by operator" + // +listType=atomic + // +optional + EnvironmentVariables []corev1.EnvVar `json:"environmentVariables,omitempty"` } // InstrumentationLogsSpec defines the configuration for collecting logs via @@ -107,6 +116,18 @@ type InstrumentationMetricsSpec struct { // --- // +optional CustomQueries *InstrumentationCustomQueriesSpec `json:"customQueries,omitempty"` + + // The names of exporters that should send metrics. + // --- + // +kubebuilder:validation:MinItems=1 + // +listType=set + // +optional + Exporters []string `json:"exporters,omitempty"` + + // User defined databases to target for default per-db metrics + // --- + // +optional + PerDBMetricTargets []string `json:"perDBMetricTargets,omitempty"` } type InstrumentationCustomQueriesSpec struct { @@ -159,6 +180,12 @@ type InstrumentationCustomQueries struct { // +default="5s" // +optional CollectionInterval *Duration `json:"collectionInterval,omitempty"` + + // The databases to target with added custom queries. + // Default behavior is to target `postgres`. + // --- + // +optional + Databases []string `json:"databases,omitempty"` } // --- diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgupgrade_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgupgrade_types.go index e0bfe86d5d..6e0267f0bc 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgupgrade_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/pgupgrade_types.go @@ -84,7 +84,7 @@ type PGUpgradeSettings struct { // The major version of PostgreSQL before the upgrade. // --- // +kubebuilder:validation:Minimum=11 - // +kubebuilder:validation:Maximum=17 + // +kubebuilder:validation:Maximum=18 // +required FromPostgresVersion int32 `json:"fromPostgresVersion"` @@ -98,7 +98,7 @@ type PGUpgradeSettings struct { // The major version of PostgreSQL to be upgraded to. // --- // +kubebuilder:validation:Minimum=11 - // +kubebuilder:validation:Maximum=17 + // +kubebuilder:validation:Maximum=18 // +required ToPostgresVersion int32 `json:"toPostgresVersion"` diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go index 8f950dbfa9..e45c29b8bd 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgres_types.go @@ -10,6 +10,14 @@ import ( ) type PostgresAuthenticationSpec struct { + // Postgres compares every new connection to these rules in the order they are + // defined. The first rule that matches determines if and how the connection + // must then authenticate. Connections that match no rules are disconnected. + // + // When this is omitted or empty, Postgres accepts encrypted connections to any + // database from users that have a password. To refuse all network connections, + // set this to one rule that matches "host" connections to the "reject" method. + // // More info: https://www.postgresql.org/docs/current/auth-pg-hba-conf.html // --- // +kubebuilder:validation:MaxItems=10 @@ -18,7 +26,7 @@ type PostgresAuthenticationSpec struct { Rules []PostgresHBARuleSpec `json:"rules,omitempty"` } -type PostgresConfig struct { +type PostgresConfigSpec struct { // Files to mount under "/etc/postgres". // --- // +optional @@ -46,7 +54,7 @@ type PostgresConfig struct { // // +kubebuilder:validation:XValidation:rule=`!has(self.listen_addresses)`,message=`network connectivity is always enabled: listen_addresses` // +kubebuilder:validation:XValidation:rule=`!has(self.port)`,message=`change port using .spec.port instead` - // +kubebuilder:validation:XValidation:rule=`!has(self.ssl) && !self.exists(k, k.startsWith("ssl_"))`,message=`TLS is always enabled` + // +kubebuilder:validation:XValidation:rule=`!has(self.ssl) && !self.exists(k, k.startsWith("ssl_") && !(k == 'ssl_groups' || k == 'ssl_ecdh_curve'))`,message=`TLS is always enabled` // +kubebuilder:validation:XValidation:rule=`!self.exists(k, k.startsWith("unix_socket_"))`,message=`domain socket paths cannot be changed` // // # Write Ahead Log @@ -99,6 +107,7 @@ type PostgresHBARule struct { // The authentication method to use when a connection matches this rule. // The special value "reject" refuses connections that match this rule. + // // More info: https://www.postgresql.org/docs/current/auth-methods.html // --- // +kubebuilder:validation:MinLength=1 @@ -108,6 +117,7 @@ type PostgresHBARule struct { // +optional Method string `json:"method,omitempty"` + // Additional settings for this rule or its authentication method. // --- // +kubebuilder:validation:MaxProperties=20 // +mapType=atomic @@ -124,8 +134,19 @@ type PostgresHBARule struct { // --- // Emulate OpenAPI "anyOf" aka Kubernetes union. -// +kubebuilder:validation:XValidation:rule=`has(self.hba) ? !has(self.connection) && !has(self.databases) && !has(self.method) && !has(self.options) && !has(self.users) : true`,message=`"hba" cannot be combined with other fields` -// +kubebuilder:validation:XValidation:rule=`has(self.hba) ? true : has(self.connection) && has(self.method)`,message=`"connection" and "method" are required` +// +kubebuilder:validation:XValidation:rule=`[has(self.hba), has(self.connection) || has(self.databases) || has(self.method) || has(self.options) || has(self.users)].exists_one(b,b)`,message=`"hba" cannot be combined with other fields` +// +kubebuilder:validation:XValidation:rule=`has(self.hba) || (has(self.connection) && has(self.method))`,message=`"connection" and "method" are required` +// +// Some authentication methods *must* be further configured via options. +// +// https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_10_0;f=src/backend/libpq/hba.c#l1501 +// https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_17_0;f=src/backend/libpq/hba.c#l1886 +// +kubebuilder:validation:XValidation:rule=`has(self.hba) || self.method != "ldap" || (has(self.options) && ["ldapbasedn","ldapprefix","ldapsuffix"].exists(k, k in self.options))`,message=`the "ldap" method requires an "ldapbasedn", "ldapprefix", or "ldapsuffix" option` +// +kubebuilder:validation:XValidation:rule=`has(self.hba) || self.method != "ldap" || !has(self.options) || [["ldapprefix","ldapsuffix"], ["ldapbasedn","ldapbinddn","ldapbindpasswd","ldapsearchattribute","ldapsearchfilter"]].exists_one(a, a.exists(k, k in self.options))`,message=`cannot use "ldapbasedn", "ldapbinddn", "ldapbindpasswd", "ldapsearchattribute", or "ldapsearchfilter" options with "ldapprefix" or "ldapsuffix" options` +// +// https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_10_0;f=src/backend/libpq/hba.c#l1539 +// https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_17_0;f=src/backend/libpq/hba.c#l1945 +// +kubebuilder:validation:XValidation:rule=`has(self.hba) || self.method != "radius" || (has(self.options) && ["radiusservers","radiussecrets"].all(k, k in self.options))`,message=`the "radius" method requires "radiusservers" and "radiussecrets" options` // // +structType=atomic type PostgresHBARuleSpec struct { diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go index 7ee966d211..46b7d0be43 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/postgrescluster_types.go @@ -13,6 +13,11 @@ import ( ) // PostgresClusterSpec defines the desired state of PostgresCluster +// --- +// +// # Postgres 18 +// +// +kubebuilder:validation:XValidation:rule=`!has(self.config) || !has(self.config.parameters) || !has(self.config.parameters.ssl_groups) || self.postgresVersion > 17`,message=`The ssl_groups parameter is only available in pg18 and greater` type PostgresClusterSpec struct { // +optional Metadata *Metadata `json:"metadata,omitempty"` @@ -21,6 +26,7 @@ type PostgresClusterSpec struct { // +optional DataSource *DataSource `json:"dataSource,omitempty"` + // Authentication settings for the PostgreSQL server // +optional Authentication *PostgresAuthenticationSpec `json:"authentication,omitempty"` @@ -28,8 +34,9 @@ type PostgresClusterSpec struct { // +optional Backups Backups `json:"backups,omitempty"` + // General configuration of the PostgreSQL server // +optional - Config *PostgresConfig `json:"config,omitempty"` + Config *PostgresConfigSpec `json:"config,omitempty"` // The secret containing the Certificates and Keys to encrypt PostgreSQL // traffic will need to contain the server TLS certificate, TLS key and the @@ -129,7 +136,7 @@ type PostgresClusterSpec struct { // The major version of PostgreSQL installed in the PostgreSQL image // +kubebuilder:validation:Required // +kubebuilder:validation:Minimum=11 - // +kubebuilder:validation:Maximum=17 + // +kubebuilder:validation:Maximum=18 // +operator-sdk:csv:customresourcedefinitions:type=spec,order=1 PostgresVersion int `json:"postgresVersion"` @@ -202,8 +209,9 @@ type DataSource struct { // PostgreSQL data directory for a new PostgreSQL cluster using a pgBackRest restore. // The PGBackRest field is incompatible with the PostgresCluster field: only one // data source can be used for pre-populating a new PostgreSQL cluster + // TODO(k8s-1.28): fieldPath=`.repo` // +optional - // +kubebuilder:validation:XValidation:rule="!has(self.repo.volume)", message="Only S3, GCS or Azure repos can be used as a pgBackRest data source.", fieldPath=".repo" + // +kubebuilder:validation:XValidation:rule="!has(self.repo.volume)", message="Only S3, GCS or Azure repos can be used as a pgBackRest data source." PGBackRest *PGBackRestDataSource `json:"pgbackrest,omitempty"` // Defines a pgBackRest data source that can be used to pre-populate the PostgreSQL data @@ -364,12 +372,6 @@ type PostgresClusterStatus struct { // +optional PGBackRest *PGBackRestStatus `json:"pgbackrest,omitempty"` - // +optional - RegistrationRequired *RegistrationRequirementStatus `json:"registrationRequired,omitempty"` - - // +optional - TokenRequired string `json:"tokenRequired,omitempty"` - // Stores the current PostgreSQL major version following a successful // major PostgreSQL upgrade. // +optional @@ -424,7 +426,6 @@ const ( PersistentVolumeResizeError = "PersistentVolumeResizeError" PostgresClusterProgressing = "Progressing" ProxyAvailable = "ProxyAvailable" - Registered = "Registered" ) type PostgresInstanceSetSpec struct { @@ -519,6 +520,16 @@ type PostgresInstanceSetSpec struct { // +listMapKey=name // +optional TablespaceVolumes []TablespaceVolume `json:"tablespaceVolumes,omitempty"` + + Volumes *PostgresVolumesSpec `json:"volumes,omitempty"` +} + +type PostgresVolumesSpec struct { + // An ephemeral volume for temporary files. + // More info: https://kubernetes.io/docs/concepts/storage/ephemeral-volumes + // --- + // +optional + Temp *VolumeClaimSpec `json:"temp,omitempty"` } type TablespaceVolume struct { @@ -597,10 +608,6 @@ func (s *PostgresProxySpec) Default() { } } -type RegistrationRequirementStatus struct { - PGOVersion string `json:"pgoVersion,omitempty"` -} - type PostgresProxyStatus struct { PGBouncer PGBouncerPodStatus `json:"pgBouncer,omitempty"` } diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/standalone_pgadmin_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/standalone_pgadmin_types.go index 9042245b2f..4467cce28c 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/standalone_pgadmin_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/standalone_pgadmin_types.go @@ -21,7 +21,7 @@ type StandalonePGAdminConfiguration struct { // +optional ConfigDatabaseURI *OptionalSecretKeyRef `json:"configDatabaseURI,omitempty"` - // Settings for the gunicorn server. + // Settings for the Gunicorn server. // More info: https://docs.gunicorn.org/en/latest/settings.html // +optional // +kubebuilder:pruning:PreserveUnknownFields @@ -37,11 +37,46 @@ type StandalonePGAdminConfiguration struct { // Settings for the pgAdmin server process. Keys should be uppercase and // values must be constants. // More info: https://www.pgadmin.org/docs/pgadmin4/latest/config_py.html - // +optional + // --- // +kubebuilder:pruning:PreserveUnknownFields // +kubebuilder:validation:Schemaless // +kubebuilder:validation:Type=object + // + // +mapType=granular + // +optional Settings SchemalessObject `json:"settings,omitempty"` + + // Secrets for the `OAUTH2_CONFIG` setting. If there are `OAUTH2_CONFIG` values + // in the settings field, they will be combined with the values loaded here. + // More info: https://www.pgadmin.org/docs/pgadmin4/latest/oauth2.html + // --- + // The controller expects this number to be no more than two digits. + // +kubebuilder:validation:MinItems=1 + // +kubebuilder:validation:MaxItems=10 + // + // +listType=map + // +listMapKey=name + // +optional + OAuthConfigurations []PGAdminOAuthConfig `json:"oauthConfigurations,omitempty"` +} + +// +structType=atomic +type PGAdminOAuthConfig struct { + // The OAUTH2_NAME of this configuration. + // --- + // This goes into a filename, so let's keep it short and simple. + // The Secret is allowed to contain OAUTH2_NAME and deviate from this. + // +kubebuilder:validation:Pattern=`^[A-Za-z0-9]+$` + // + // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:MaxLength=20 + // +required + Name string `json:"name"` + + // A Secret containing the settings of one OAuth2 provider as a JSON object. + // --- + // +required + Secret SecretKeyRef `json:"secret"` } // PGAdminSpec defines the desired state of PGAdmin @@ -192,6 +227,10 @@ type PGAdminStatus struct { // +optional MajorVersion int `json:"majorVersion,omitempty"` + // MinorVersion represents the minor version of the running pgAdmin. + // +optional + MinorVersion string `json:"minorVersion,omitempty"` + // observedGeneration represents the .metadata.generation on which the status was based. // +optional // +kubebuilder:validation:Minimum=0 diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go index 189eebdd23..ec31e27b3b 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go @@ -460,6 +460,13 @@ func (in *InstrumentationConfigSpec) DeepCopyInto(out *InstrumentationConfigSpec (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.EnvironmentVariables != nil { + in, out := &in.EnvironmentVariables, &out.EnvironmentVariables + *out = make([]corev1.EnvVar, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InstrumentationConfigSpec. @@ -481,6 +488,11 @@ func (in *InstrumentationCustomQueries) DeepCopyInto(out *InstrumentationCustomQ *out = new(Duration) **out = **in } + if in.Databases != nil { + in, out := &in.Databases, &out.Databases + *out = make([]string, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InstrumentationCustomQueries. @@ -558,6 +570,16 @@ func (in *InstrumentationMetricsSpec) DeepCopyInto(out *InstrumentationMetricsSp *out = new(InstrumentationCustomQueriesSpec) (*in).DeepCopyInto(*out) } + if in.Exporters != nil { + in, out := &in.Exporters, &out.Exporters + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.PerDBMetricTargets != nil { + in, out := &in.PerDBMetricTargets, &out.PerDBMetricTargets + *out = make([]string, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InstrumentationMetricsSpec. @@ -846,6 +868,22 @@ func (in *PGAdminList) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PGAdminOAuthConfig) DeepCopyInto(out *PGAdminOAuthConfig) { + *out = *in + in.Secret.DeepCopyInto(&out.Secret) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PGAdminOAuthConfig. +func (in *PGAdminOAuthConfig) DeepCopy() *PGAdminOAuthConfig { + if in == nil { + return nil + } + out := new(PGAdminOAuthConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PGAdminPodSpec) DeepCopyInto(out *PGAdminPodSpec) { *out = *in @@ -2003,7 +2041,7 @@ func (in *PostgresClusterSpec) DeepCopyInto(out *PostgresClusterSpec) { in.Backups.DeepCopyInto(&out.Backups) if in.Config != nil { in, out := &in.Config, &out.Config - *out = new(PostgresConfig) + *out = new(PostgresConfigSpec) (*in).DeepCopyInto(*out) } if in.CustomTLSSecret != nil { @@ -2138,11 +2176,6 @@ func (in *PostgresClusterStatus) DeepCopyInto(out *PostgresClusterStatus) { *out = new(PGBackRestStatus) (*in).DeepCopyInto(*out) } - if in.RegistrationRequired != nil { - in, out := &in.RegistrationRequired, &out.RegistrationRequired - *out = new(RegistrationRequirementStatus) - **out = **in - } out.Proxy = in.Proxy if in.UserInterface != nil { in, out := &in.UserInterface, &out.UserInterface @@ -2175,7 +2208,7 @@ func (in *PostgresClusterStatus) DeepCopy() *PostgresClusterStatus { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *PostgresConfig) DeepCopyInto(out *PostgresConfig) { +func (in *PostgresConfigSpec) DeepCopyInto(out *PostgresConfigSpec) { *out = *in if in.Files != nil { in, out := &in.Files, &out.Files @@ -2193,12 +2226,12 @@ func (in *PostgresConfig) DeepCopyInto(out *PostgresConfig) { } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresConfig. -func (in *PostgresConfig) DeepCopy() *PostgresConfig { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresConfigSpec. +func (in *PostgresConfigSpec) DeepCopy() *PostgresConfigSpec { if in == nil { return nil } - out := new(PostgresConfig) + out := new(PostgresConfigSpec) in.DeepCopyInto(out) return out } @@ -2318,6 +2351,11 @@ func (in *PostgresInstanceSetSpec) DeepCopyInto(out *PostgresInstanceSetSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.Volumes != nil { + in, out := &in.Volumes, &out.Volumes + *out = new(PostgresVolumesSpec) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresInstanceSetSpec. @@ -2465,16 +2503,20 @@ func (in *PostgresUserSpec) DeepCopy() *PostgresUserSpec { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *RegistrationRequirementStatus) DeepCopyInto(out *RegistrationRequirementStatus) { +func (in *PostgresVolumesSpec) DeepCopyInto(out *PostgresVolumesSpec) { *out = *in + if in.Temp != nil { + in, out := &in.Temp, &out.Temp + *out = (*in).DeepCopy() + } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RegistrationRequirementStatus. -func (in *RegistrationRequirementStatus) DeepCopy() *RegistrationRequirementStatus { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresVolumesSpec. +func (in *PostgresVolumesSpec) DeepCopy() *PostgresVolumesSpec { if in == nil { return nil } - out := new(RegistrationRequirementStatus) + out := new(PostgresVolumesSpec) in.DeepCopyInto(out) return out } @@ -2697,6 +2739,13 @@ func (in *StandalonePGAdminConfiguration) DeepCopyInto(out *StandalonePGAdminCon (*in).DeepCopyInto(*out) } out.Settings = in.Settings.DeepCopy() + if in.OAuthConfigurations != nil { + in, out := &in.OAuthConfigurations, &out.OAuthConfigurations + *out = make([]PGAdminOAuthConfig, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StandalonePGAdminConfiguration. diff --git a/testing/kuttl/README.md b/testing/kuttl/README.md index 41fbf46e19..a67ff35808 100644 --- a/testing/kuttl/README.md +++ b/testing/kuttl/README.md @@ -44,20 +44,6 @@ There are two ways to run a single test in isolation: - using an env var with the make target: `KUTTL_TEST='kuttl test --test ' make check-kuttl` - using `kubectl kuttl --test` flag: `kubectl kuttl test testing/kuttl/e2e-generated --test ` -### Writing additional tests - -To make it easier to read tests, we want to put our `assert.yaml`/`errors.yaml` files after the -files that create/update the objects for a step. To achieve this, infix an extra `-` between the -step number and the object/step name. - -For example, if the `00` test step wants to create a cluster and then assert that the cluster is ready, -the files would be named - -```yaml -00--cluster.yaml # note the extra `-` to ensure that it sorts above the following file -00-assert.yaml -``` - ### Generating tests KUTTL is good at setting up K8s objects for testing, but does not have a native way to dynamically diff --git a/testing/kuttl/e2e/cluster-pause/00--cluster.yaml b/testing/kuttl/e2e/cluster-pause/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/cluster-pause/00--cluster.yaml rename to testing/kuttl/e2e/cluster-pause/00-cluster.yaml diff --git a/testing/kuttl/e2e/cluster-pause/01--cluster-paused.yaml b/testing/kuttl/e2e/cluster-pause/01-cluster-paused.yaml similarity index 100% rename from testing/kuttl/e2e/cluster-pause/01--cluster-paused.yaml rename to testing/kuttl/e2e/cluster-pause/01-cluster-paused.yaml diff --git a/testing/kuttl/e2e/cluster-pause/02--cluster-resume.yaml b/testing/kuttl/e2e/cluster-pause/02-cluster-resume.yaml similarity index 100% rename from testing/kuttl/e2e/cluster-pause/02--cluster-resume.yaml rename to testing/kuttl/e2e/cluster-pause/02-cluster-resume.yaml diff --git a/testing/kuttl/e2e/cluster-start/00--cluster.yaml b/testing/kuttl/e2e/cluster-start/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/cluster-start/00--cluster.yaml rename to testing/kuttl/e2e/cluster-start/00-cluster.yaml diff --git a/testing/kuttl/e2e/cluster-start/01--connect.yaml b/testing/kuttl/e2e/cluster-start/01-connect.yaml similarity index 100% rename from testing/kuttl/e2e/cluster-start/01--connect.yaml rename to testing/kuttl/e2e/cluster-start/01-connect.yaml diff --git a/testing/kuttl/e2e/delete-namespace/README.md b/testing/kuttl/e2e/delete-namespace/README.md index 697e2ae915..4b0f951fef 100644 --- a/testing/kuttl/e2e/delete-namespace/README.md +++ b/testing/kuttl/e2e/delete-namespace/README.md @@ -6,6 +6,6 @@ * Check that nothing remains. Note: KUTTL provides a `$NAMESPACE` var that can be used in scripts/commands, -but which cannot be used in object definition yamls (like `01--cluster.yaml`). +but which cannot be used in object definition yamls (like `01-cluster.yaml`). Therefore, we use a given, non-random namespace that is defined in the makefile and generated with `generate-kuttl`. diff --git a/testing/kuttl/e2e/exporter-custom-queries/00-assert.yaml b/testing/kuttl/e2e/exporter-custom-queries/00-assert.yaml index bbf5c051fd..405969c18c 100644 --- a/testing/kuttl/e2e/exporter-custom-queries/00-assert.yaml +++ b/testing/kuttl/e2e/exporter-custom-queries/00-assert.yaml @@ -31,7 +31,7 @@ commands: contains "${queries_files}" "queries.yml" && !(contains "${queries_files}" "defaultQueries.yml") } || { - echo >&2 'The /conf directory should contain the queries.yml file. Instead it has:' + echo >&2 'The /conf directory should only contain the queries.yml file. Instead it has:' echo "${queries_files}" exit 1 } diff --git a/testing/kuttl/e2e/exporter-custom-queries/00--create-cluster.yaml b/testing/kuttl/e2e/exporter-custom-queries/00-create-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/exporter-custom-queries/00--create-cluster.yaml rename to testing/kuttl/e2e/exporter-custom-queries/00-create-cluster.yaml diff --git a/testing/kuttl/e2e/exporter-custom-queries/01--change-custom-queries.yaml b/testing/kuttl/e2e/exporter-custom-queries/01-change-custom-queries.yaml similarity index 100% rename from testing/kuttl/e2e/exporter-custom-queries/01--change-custom-queries.yaml rename to testing/kuttl/e2e/exporter-custom-queries/01-change-custom-queries.yaml diff --git a/testing/kuttl/e2e/exporter-no-tls/00--create-cluster.yaml b/testing/kuttl/e2e/exporter-no-tls/00-create-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/exporter-no-tls/00--create-cluster.yaml rename to testing/kuttl/e2e/exporter-no-tls/00-create-cluster.yaml diff --git a/testing/kuttl/e2e/exporter-password-change/00--create-cluster.yaml b/testing/kuttl/e2e/exporter-password-change/00-create-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/exporter-password-change/00--create-cluster.yaml rename to testing/kuttl/e2e/exporter-password-change/00-create-cluster.yaml diff --git a/testing/kuttl/e2e/exporter-password-change/02--change-password.yaml b/testing/kuttl/e2e/exporter-password-change/02-change-password.yaml similarity index 100% rename from testing/kuttl/e2e/exporter-password-change/02--change-password.yaml rename to testing/kuttl/e2e/exporter-password-change/02-change-password.yaml diff --git a/testing/kuttl/e2e/exporter-password-change/README.md b/testing/kuttl/e2e/exporter-password-change/README.md index 2a5b596309..d3d11f263c 100644 --- a/testing/kuttl/e2e/exporter-password-change/README.md +++ b/testing/kuttl/e2e/exporter-password-change/README.md @@ -1,6 +1,6 @@ # Exporter Password Change -## 00--create-cluster: +## 00-create-cluster: The TestStep will: 1) Apply the `files/inital-postgrescluster.yaml` file to create a cluster with monitoring enabled @@ -13,7 +13,7 @@ The TestStep will: This TestAssert will loop through a script until: 1) the instance pod has the `ContainersReady` condition with status `true` -2) the asserts from `00--create-cluster` are met. +2) the asserts from `00-create-cluster` are met. ## 01-assert: diff --git a/testing/kuttl/e2e/exporter-tls/00--create-cluster.yaml b/testing/kuttl/e2e/exporter-tls/00-create-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/exporter-tls/00--create-cluster.yaml rename to testing/kuttl/e2e/exporter-tls/00-create-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade-missing-image/01--valid-upgrade.yaml b/testing/kuttl/e2e/major-upgrade-missing-image/01-valid-upgrade.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade-missing-image/01--valid-upgrade.yaml rename to testing/kuttl/e2e/major-upgrade-missing-image/01-valid-upgrade.yaml diff --git a/testing/kuttl/e2e/major-upgrade-missing-image/10--cluster.yaml b/testing/kuttl/e2e/major-upgrade-missing-image/10-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade-missing-image/10--cluster.yaml rename to testing/kuttl/e2e/major-upgrade-missing-image/10-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade-missing-image/11--shutdown-cluster.yaml b/testing/kuttl/e2e/major-upgrade-missing-image/11-shutdown-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade-missing-image/11--shutdown-cluster.yaml rename to testing/kuttl/e2e/major-upgrade-missing-image/11-shutdown-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade-missing-image/12--start-and-update-version.yaml b/testing/kuttl/e2e/major-upgrade-missing-image/12-start-and-update-version.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade-missing-image/12--start-and-update-version.yaml rename to testing/kuttl/e2e/major-upgrade-missing-image/12-start-and-update-version.yaml diff --git a/testing/kuttl/e2e/major-upgrade-missing-image/13--shutdown-cluster.yaml b/testing/kuttl/e2e/major-upgrade-missing-image/13-shutdown-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade-missing-image/13--shutdown-cluster.yaml rename to testing/kuttl/e2e/major-upgrade-missing-image/13-shutdown-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade-missing-image/14--annotate-cluster.yaml b/testing/kuttl/e2e/major-upgrade-missing-image/14-annotate-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade-missing-image/14--annotate-cluster.yaml rename to testing/kuttl/e2e/major-upgrade-missing-image/14-annotate-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade-missing-image/15--start-cluster.yaml b/testing/kuttl/e2e/major-upgrade-missing-image/15-start-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade-missing-image/15--start-cluster.yaml rename to testing/kuttl/e2e/major-upgrade-missing-image/15-start-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade-missing-image/17--check-version.yaml b/testing/kuttl/e2e/major-upgrade-missing-image/17-check-version.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade-missing-image/17--check-version.yaml rename to testing/kuttl/e2e/major-upgrade-missing-image/17-check-version.yaml diff --git a/testing/kuttl/e2e/major-upgrade-missing-image/README.md b/testing/kuttl/e2e/major-upgrade-missing-image/README.md index 1053da29ed..ec3d4493b5 100644 --- a/testing/kuttl/e2e/major-upgrade-missing-image/README.md +++ b/testing/kuttl/e2e/major-upgrade-missing-image/README.md @@ -6,31 +6,31 @@ PostgresCluster spec or via the RELATED_IMAGES environment variables. ### Basic PGUpgrade controller and CRD instance validation -* 01--valid-upgrade: create a valid PGUpgrade instance +* 01-valid-upgrade: create a valid PGUpgrade instance * 01-assert: check that the PGUpgrade instance exists and has the expected status ### Verify new statuses for missing required container images -* 10--cluster: create the cluster with an unavailable image (i.e. Postgres 11) +* 10-cluster: create the cluster with an unavailable image (i.e. Postgres 11) * 10-assert: check that the PGUpgrade instance has the expected reason: "PGClusterNotShutdown" * 11-shutdown-cluster: set the spec.shutdown value to 'true' as required for upgrade * 11-assert: check that the new reason is set, "PGClusterPrimaryNotIdentified" ### Update to an available Postgres version, start and upgrade PostgresCluster -* 12--start-and-update-version: update the Postgres version on both CRD instances and set 'shutdown' to false +* 12-start-and-update-version: update the Postgres version on both CRD instances and set 'shutdown' to false * 12-assert: verify that the cluster is running and the PGUpgrade instance now has the new status info with reason: "PGClusterNotShutdown" -* 13--shutdown-cluster: set spec.shutdown to 'true' +* 13-shutdown-cluster: set spec.shutdown to 'true' * 13-assert: check that the PGUpgrade instance has the expected reason: "PGClusterMissingRequiredAnnotation" -* 14--annotate-cluster: set the required annotation +* 14-annotate-cluster: set the required annotation * 14-assert: verify that the upgrade succeeded and the new Postgres version shows in the cluster's status -* 15--start-cluster: set the new Postgres version and spec.shutdown to 'false' +* 15-start-cluster: set the new Postgres version and spec.shutdown to 'false' ### Verify upgraded PostgresCluster * 15-assert: verify that the cluster is running * 16-check-pgbackrest: check that the pgbackrest setup has successfully completed -* 17--check-version: check the version reported by PostgreSQL +* 17-check-version: check the version reported by PostgreSQL * 17-assert: assert the Job from the previous step succeeded diff --git a/testing/kuttl/e2e/major-upgrade/02--valid-upgrade.yaml b/testing/kuttl/e2e/major-upgrade/02-valid-upgrade.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade/02--valid-upgrade.yaml rename to testing/kuttl/e2e/major-upgrade/02-valid-upgrade.yaml diff --git a/testing/kuttl/e2e/major-upgrade/10--already-updated-cluster.yaml b/testing/kuttl/e2e/major-upgrade/10-already-updated-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade/10--already-updated-cluster.yaml rename to testing/kuttl/e2e/major-upgrade/10-already-updated-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade/30--cluster.yaml b/testing/kuttl/e2e/major-upgrade/30-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade/30--cluster.yaml rename to testing/kuttl/e2e/major-upgrade/30-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade/31--create-data.yaml b/testing/kuttl/e2e/major-upgrade/31-create-data.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade/31--create-data.yaml rename to testing/kuttl/e2e/major-upgrade/31-create-data.yaml diff --git a/testing/kuttl/e2e/major-upgrade/32--shutdown-cluster.yaml b/testing/kuttl/e2e/major-upgrade/32-shutdown-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade/32--shutdown-cluster.yaml rename to testing/kuttl/e2e/major-upgrade/32-shutdown-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade/33--annotate-cluster.yaml b/testing/kuttl/e2e/major-upgrade/33-annotate-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade/33--annotate-cluster.yaml rename to testing/kuttl/e2e/major-upgrade/33-annotate-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade/34--restart-cluster.yaml b/testing/kuttl/e2e/major-upgrade/34-restart-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade/34--restart-cluster.yaml rename to testing/kuttl/e2e/major-upgrade/34-restart-cluster.yaml diff --git a/testing/kuttl/e2e/major-upgrade/36--check-data-and-version.yaml b/testing/kuttl/e2e/major-upgrade/36-check-data-and-version.yaml similarity index 100% rename from testing/kuttl/e2e/major-upgrade/36--check-data-and-version.yaml rename to testing/kuttl/e2e/major-upgrade/36-check-data-and-version.yaml diff --git a/testing/kuttl/e2e/optional-backups/00--cluster.yaml b/testing/kuttl/e2e/optional-backups/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/optional-backups/00--cluster.yaml rename to testing/kuttl/e2e/optional-backups/00-cluster.yaml diff --git a/testing/kuttl/e2e/optional-backups/04--cluster.yaml b/testing/kuttl/e2e/optional-backups/04-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/optional-backups/04--cluster.yaml rename to testing/kuttl/e2e/optional-backups/04-cluster.yaml diff --git a/testing/kuttl/e2e/optional-backups/10--cluster.yaml b/testing/kuttl/e2e/optional-backups/10-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/optional-backups/10--cluster.yaml rename to testing/kuttl/e2e/optional-backups/10-cluster.yaml diff --git a/testing/kuttl/e2e/optional-backups/20--cluster.yaml b/testing/kuttl/e2e/optional-backups/20-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/optional-backups/20--cluster.yaml rename to testing/kuttl/e2e/optional-backups/20-cluster.yaml diff --git a/testing/kuttl/e2e/optional-backups/22--cluster.yaml b/testing/kuttl/e2e/optional-backups/22-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/optional-backups/22--cluster.yaml rename to testing/kuttl/e2e/optional-backups/22-cluster.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/00-cluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/00-cluster.yaml new file mode 100644 index 0000000000..5957e0fed6 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/00-cluster.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/00--create-cluster.yaml +assert: +- files/00-cluster-created.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/01-add-instrumentation-to-postgrescluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/01-add-instrumentation-to-postgrescluster.yaml new file mode 100644 index 0000000000..ddf7a754b4 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/01-add-instrumentation-to-postgrescluster.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/01--add-instrumentation.yaml +assert: +- files/01-instrumentation-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/02-assert-repo-host-does-not-logs.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/02-assert-repo-host-does-not-logs.yaml new file mode 100644 index 0000000000..31c077d540 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/02-assert-repo-host-does-not-logs.yaml @@ -0,0 +1,28 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the repo host pod are ready. +# Then, ensure that the collector logs for the repo-host do not contain any +# pgbackrest logs as the backup completed before the collector started up and we +# have the collector configured to only ingest new log records on start up. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/data=pgbackrest) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c collector | grep InstrumentationScope) + { !(contains "${logs}" 'InstrumentationScope pgbackrest') } || { + retry "pgbackrest logs were found when we did not expect any" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/03-backup.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/03-backup.yaml new file mode 100644 index 0000000000..95daf31a6a --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/03-backup.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/03--annotate-cluster.yaml +assert: +- files/03-backup-completed.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/04-assert-repo-host-contains-logs.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/04-assert-repo-host-contains-logs.yaml new file mode 100644 index 0000000000..a6cb86fb22 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/04-assert-repo-host-contains-logs.yaml @@ -0,0 +1,26 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the repo host pod are ready. +# Then, ensure that the repo-host collector logs have pgbackrest logs. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/data=pgbackrest) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c collector | grep InstrumentationScope) + { contains "${logs}" 'InstrumentationScope pgbackrest'; } || { + retry "pgbackrest logs were not found" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/05-assert-pgbouncer.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/05-assert-pgbouncer.yaml new file mode 100644 index 0000000000..2aecbc2f61 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/05-assert-pgbouncer.yaml @@ -0,0 +1,34 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the pgbouncer pod are ready. +# Then, scrape the collector metrics and check that pgbouncer metrics are present. +# Then, check the collector logs for pgbouncer logs. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/role=pgbouncer) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ + curl --insecure --silent http://localhost:9187/metrics | grep 'ccp_pgbouncer_clients_wait_seconds') + { contains "${scrape_metrics}" 'ccp_pgbouncer_clients_wait_seconds'; } || { + retry "pgbouncer metric not found" + exit 1 + } + + logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c collector | grep InstrumentationScope) + { contains "${logs}" 'InstrumentationScope pgbouncer'; } || { + retry "pgbouncer logs not found" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/06-assert-instance.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/06-assert-instance.yaml new file mode 100644 index 0000000000..67221cf8f5 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/06-assert-instance.yaml @@ -0,0 +1,67 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the instance pod are ready. +# Then, grab the collector metrics output and check that a metric from both 5m +# and 5s queries are present, as well as patroni metrics. +# Then, check the collector logs for patroni, pgbackrest, and postgres logs. +# Finally, ensure the monitoring user exists and is configured. +- timeout: 400 + script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/data=postgres) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c collector | grep InstrumentationScope) + { contains "${logs}" 'InstrumentationScope patroni'; } || { + retry "patroni logs not found" + exit 1 + } + { contains "${logs}" 'InstrumentationScope pgbackrest'; } || { + retry "pgbackrest logs not found" + exit 1 + } + { contains "${logs}" 'InstrumentationScope postgres'; } || { + retry "postgres logs not found" + exit 1 + } + + scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ + curl --insecure --silent http://localhost:9187/metrics | grep \ + -e 'ccp_connection_stats_active' \ + -e 'patroni_postgres_running' \ + -e 'ccp_database_size_bytes') + { contains "${scrape_metrics}" 'ccp_connection_stats_active'; } || { + retry "5 second metric not found" + exit 1 + } + { contains "${scrape_metrics}" 'patroni_postgres_running'; } || { + retry "patroni metric not found" + exit 1 + } + { contains "${scrape_metrics}" 'ccp_database_size_bytes'; } || { + retry "5 minute metric not found" + exit 1 + } + + kubectl exec --stdin "${pod}" --namespace "${NAMESPACE}" -c database \ + -- psql -qb --set ON_ERROR_STOP=1 --file=- <<'SQL' + DO $$ + DECLARE + result record; + BEGIN + SELECT * INTO result FROM pg_catalog.pg_roles WHERE rolname = 'ccp_monitoring'; + ASSERT FOUND, 'user not found'; + END $$ + SQL diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/07-add-instrumentation-to-pgadmin.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/07-add-instrumentation-to-pgadmin.yaml new file mode 100644 index 0000000000..55f2179939 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/07-add-instrumentation-to-pgadmin.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/07--add-instrumentation.yaml +assert: +- files/07-instrumentation-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/08-assert-pgadmin.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/08-assert-pgadmin.yaml new file mode 100644 index 0000000000..71434397e1 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/08-assert-pgadmin.yaml @@ -0,0 +1,30 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the pgadmin pod are ready. +# Then, check the collector logs for pgadmin and gunicorn logs. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/pgadmin=otel-pgadmin) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c collector | grep InstrumentationScope) + { contains "${logs}" 'InstrumentationScope pgadmin'; } || { + retry "pgadmin logs not found" + exit 1 + } + { contains "${logs}" 'InstrumentationScope gunicorn.access'; } || { + retry "gunicorn logs not found" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/09-add-custom-queries.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/09-add-custom-queries.yaml new file mode 100644 index 0000000000..223b1d71a8 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/09-add-custom-queries.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/09--add-custom-queries.yaml +assert: +- files/09-custom-queries-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/10-assert-custom-queries.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/10-assert-custom-queries.yaml new file mode 100644 index 0000000000..9476bb564e --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/10-assert-custom-queries.yaml @@ -0,0 +1,41 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the instance pod are ready. +# Then, grab the collector metrics output and check that the two metrics that we +# checked for earlier are no longer there. +# Then, check that the two custom metrics that we added are present. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/data=postgres) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ + curl --insecure --silent http://localhost:9187/metrics) + { !(contains "${scrape_metrics}" 'ccp_connection_stats_active') } || { + retry "5 second metric still present" + exit 1 + } + { !(contains "${scrape_metrics}" 'ccp_database_size_bytes') } || { + retry "5 minute metric still present" + exit 1 + } + { contains "${scrape_metrics}" 'custom_table_count'; } || { + retry "fast custom metric not found" + exit 1 + } + { contains "${scrape_metrics}" 'custom_pg_stat_statements_row_count'; } || { + retry "slow custom metric not found" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/11-add-per-db-metrics-to-postgrescluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/11-add-per-db-metrics-to-postgrescluster.yaml new file mode 100644 index 0000000000..8e73e1874e --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/11-add-per-db-metrics-to-postgrescluster.yaml @@ -0,0 +1,4 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/11--add-per-db-metrics.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/12-assert-per-db-queries.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/12-assert-per-db-queries.yaml new file mode 100644 index 0000000000..9f1f00d40e --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/12-assert-per-db-queries.yaml @@ -0,0 +1,32 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the instance pod are ready. +# Then, grab the collector metrics output and check that the per-db metrics +# are present for the single added target. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/data=postgres) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ + curl --insecure --silent http://localhost:9187/metrics | grep 'ccp_table_size_bytes') + { contains "${scrape_metrics}" 'ccp_table_size_bytes{dbname="pikachu"'; } || { + retry "ccp_table_size_bytes not found for pikachu" + exit 1 + } + { ! contains "${scrape_metrics}" 'ccp_table_size_bytes{dbname="onix"'; } || { + retry "ccp_table_size_bytes found for onix" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/13-add-second-per-db-metrics-to-postgrescluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/13-add-second-per-db-metrics-to-postgrescluster.yaml new file mode 100644 index 0000000000..12791e5066 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/13-add-second-per-db-metrics-to-postgrescluster.yaml @@ -0,0 +1,4 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/13--add-per-db-metrics.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/14-assert-per-db-queries-for-multiple-targets.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/14-assert-per-db-queries-for-multiple-targets.yaml new file mode 100644 index 0000000000..234f33ae1b --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/14-assert-per-db-queries-for-multiple-targets.yaml @@ -0,0 +1,32 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the instance pod are ready. +# Then, grab the collector metrics output and check that the per-db metrics +# are present for both added targets. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/data=postgres) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ + curl --insecure --silent http://localhost:9187/metrics | grep 'ccp_table_size_bytes') + { contains "${scrape_metrics}" 'ccp_table_size_bytes{dbname="pikachu"'; } || { + retry "ccp_table_size_bytes not found for pikachu" + exit 1 + } + { contains "${scrape_metrics}" 'ccp_table_size_bytes{dbname="onix"'; } || { + retry "ccp_table_size_bytes not found for onix" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/15-remove-per-db-metrics-from-postgrescluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/15-remove-per-db-metrics-from-postgrescluster.yaml new file mode 100644 index 0000000000..549f21d55e --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/15-remove-per-db-metrics-from-postgrescluster.yaml @@ -0,0 +1,4 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/15--remove-per-db-metrics.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/16-assert-per-db-query-removed.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/16-assert-per-db-query-removed.yaml new file mode 100644 index 0000000000..d75c06827d --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/16-assert-per-db-query-removed.yaml @@ -0,0 +1,32 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the instance pod are ready. +# Then, grab the collector metrics output and check that the per-db metrics +# are absent from the targets since they've been removed. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/data=postgres) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ + curl --insecure --silent http://localhost:9187/metrics) + { ! contains "${scrape_metrics}" 'ccp_table_size_bytes{dbname="pikachu"'; } || { + retry "ccp_table_size_bytes found for pikachu" + exit 1 + } + { ! contains "${scrape_metrics}" 'ccp_table_size_bytes{dbname="onix"'; } || { + retry "ccp_table_size_bytes found for onix" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/17-add-custom-queries-per-db.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/17-add-custom-queries-per-db.yaml new file mode 100644 index 0000000000..e1b2ebfeb3 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/17-add-custom-queries-per-db.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/17--add-custom-queries-per-db.yaml +assert: +- files/17-custom-queries-per-db-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/18-assert-custom-queries-per-db.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/18-assert-custom-queries-per-db.yaml new file mode 100644 index 0000000000..e6b1365803 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/18-assert-custom-queries-per-db.yaml @@ -0,0 +1,42 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the instance pod are ready. +# Then, grab the collector metrics output and check that the two metrics that we +# checked for earlier are no longer there. +# Then, check that the two custom metrics that we added are present +# only for the targets that were specified. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=otel-cluster,postgres-operator.crunchydata.com/data=postgres) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ + curl --insecure --silent http://localhost:9187/metrics | grep 'ccp_table_size_bytes') + { contains "${scrape_metrics}" 'ccp_table_size_bytes_1{dbname="pikachu"'; } || { + retry "custom metric not found for pikachu db" + exit 1 + } + { contains "${scrape_metrics}" 'ccp_table_size_bytes_1{dbname="onix"'; } || { + retry "custom metric found for onix db" + exit 1 + } + { contains "${scrape_metrics}" 'ccp_table_size_bytes_2{dbname="onix"'; } || { + retry "custom metric not found for onix db" + exit 1 + } + { ! contains "${scrape_metrics}" 'ccp_table_size_bytes_2{dbname="pikachu"'; } || { + retry "custom metric found for pikachu db" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/19-add-logs-metrics-exporter.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/19-add-logs-metrics-exporter.yaml new file mode 100644 index 0000000000..7b21e0ef50 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/19-add-logs-metrics-exporter.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/19--add-logs-metrics-exporter.yaml +assert: +- files/19-logs-metrics-exporter-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/20-assert-logs-metrics-exported.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/20-assert-logs-metrics-exported.yaml new file mode 100644 index 0000000000..2022397ce9 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/20-assert-logs-metrics-exported.yaml @@ -0,0 +1,52 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that the standalone otel-collector container is ready. +# Then, check the standalone collector logs for logs from all six potential +# sources: patroni, pgbackrest, postgres, pgbouncer, pgadmin, and gunicorn. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" -l app=opentelemetry) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c otel-collector | grep InstrumentationScope) + { contains "${logs}" 'InstrumentationScope patroni'; } || { + retry "patroni logs not found" + exit 1 + } + { contains "${logs}" 'InstrumentationScope pgbackrest'; } || { + retry "pgbackrest logs not found" + exit 1 + } + { contains "${logs}" 'InstrumentationScope postgres'; } || { + retry "postgres logs not found" + exit 1 + } + { contains "${logs}" 'InstrumentationScope pgbouncer'; } || { + retry "pgbouncer logs not found" + exit 1 + } + { contains "${logs}" 'InstrumentationScope pgadmin'; } || { + retry "pgadmin logs not found" + exit 1 + } + { contains "${logs}" 'InstrumentationScope gunicorn.access'; } || { + retry "gunicorn logs not found" + exit 1 + } + + metrics=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c otel-collector | grep ccp) + { contains "${metrics}" 'ccp_stat'; } || { + retry "metrics not found" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/21-cluster-no-backups.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/21-cluster-no-backups.yaml new file mode 100644 index 0000000000..a24e1c8f2d --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/21-cluster-no-backups.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/21--create-cluster.yaml +assert: +- files/21-cluster-created.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/22-assert-instance.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/22-assert-instance.yaml new file mode 100644 index 0000000000..226bcce3cd --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/22-assert-instance.yaml @@ -0,0 +1,57 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +commands: +# First, check that all containers in the instance pod are ready. +# Then, grab the collector metrics output and check that a postgres +# metric is present, as well as a patroni metric. +# Then, check the collector logs for patroni, and postgres logs. +# Finally, ensure the monitoring user exists and is configured. +- script: | + retry() { bash -ceu 'printf "$1\nSleeping...\n" && sleep 5' - "$@"; } + check_containers_ready() { bash -ceu 'echo "$1" | jq -e ".[] | select(.type==\"ContainersReady\") | .status==\"True\""' - "$@"; } + contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } + + pod=$(kubectl get pods -o name -n "${NAMESPACE}" \ + -l postgres-operator.crunchydata.com/cluster=otel-cluster-no-backups,postgres-operator.crunchydata.com/data=postgres) + [ "$pod" = "" ] && retry "Pod not found" && exit 1 + + condition_json=$(kubectl get "${pod}" -n "${NAMESPACE}" -o jsonpath="{.status.conditions}") + [ "$condition_json" = "" ] && retry "conditions not found" && exit 1 + { check_containers_ready "$condition_json"; } || { + retry "containers not ready" + exit 1 + } + + scrape_metrics=$(kubectl exec "${pod}" -c collector -n "${NAMESPACE}" -- \ + curl --insecure --silent http://localhost:9187/metrics | grep \ + -e 'ccp_connection_stats_active' \ + -e 'patroni_postgres_running') + { contains "${scrape_metrics}" 'ccp_connection_stats_active'; } || { + retry "5 second metric not found" + exit 1 + } + { contains "${scrape_metrics}" 'patroni_postgres_running'; } || { + retry "patroni metric not found" + exit 1 + } + + logs=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c collector | grep InstrumentationScope) + { contains "${logs}" 'InstrumentationScope patroni'; } || { + retry "patroni logs not found" + exit 1 + } + { contains "${logs}" 'InstrumentationScope postgres'; } || { + retry "postgres logs not found" + exit 1 + } + + kubectl exec --stdin "${pod}" --namespace "${NAMESPACE}" -c database \ + -- psql -qb --set ON_ERROR_STOP=1 --file=- <<'SQL' + DO $$ + DECLARE + result record; + BEGIN + SELECT * INTO result FROM pg_catalog.pg_roles WHERE rolname = 'ccp_monitoring'; + ASSERT FOUND, 'user not found'; + END $$ + SQL diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/23-cluster-add-backups.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/23-cluster-add-backups.yaml new file mode 100644 index 0000000000..52990e4372 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/23-cluster-add-backups.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/23--add-backups.yaml +assert: +- files/23-backups-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/24-remove-backups.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/24-remove-backups.yaml new file mode 100644 index 0000000000..abd64d40a9 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/24-remove-backups.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: +- command: |- + kubectl patch postgrescluster otel-cluster-no-backups --type 'merge' -p '{"spec":{"backups": null}}' + namespaced: true diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/25-annotate-cluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/25-annotate-cluster.yaml new file mode 100644 index 0000000000..d017479ca3 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/25-annotate-cluster.yaml @@ -0,0 +1,7 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: +- command: kubectl annotate postgrescluster otel-cluster-no-backups postgres-operator.crunchydata.com/authorizeBackupRemoval="true" + namespaced: true +assert: +- files/25-backups-removed.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/README.md b/testing/kuttl/e2e/otel-logging-and-metrics/README.md new file mode 100644 index 0000000000..46e3169e71 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/README.md @@ -0,0 +1,40 @@ +# Test OTel Logging and Metrics + +## Assumptions + +This test assumes that the operator has both OpenTelemetryLogs and OpenTelemetryMetrics feature gates turned on and that you are using an operator versioned 5.8 or greater. + +## Process + +1. Create a basic cluster with pgbouncer and pgadmin in place. (00) + 1. Ensure cluster comes up, that all containers are running and ready, and that the initial backup is complete. +2. Add the `instrumentation` spec to both PostgresCluster and PGAdmin manifests. (01-08) + 1. Ensure that OTel collector containers and `crunchy-otel-collector` labels are added to the four pods (postgres instance, repo-host, pgbouncer, & pgadmin) and that the collector containers are running and ready. + 2. Assert that the instance pod collector is getting postgres and patroni metrics and postgres, patroni, and pgbackrest logs. + 3. Assert that the pgbouncer pod collector is getting pgbouncer metrics and logs. + 4. Assert that the pgAdmin pod collector is getting pgAdmin and gunicorn logs. + 5. Assert that the repo-host pod collector is NOT getting pgbackrest logs. We do not expect logs yet as the initial backup completed and created a log file; however, we configure the collector to only ingest new logs after it has started up. + 6. Create a manual backup and ensure that it completes successfully. + 7. Ensure that the repo-host pod collector is now getting pgbackrest logs. +3. Add both "add" and "remove" custom queries to the PostgresCluster `instrumentation` spec and create a ConfigMap that holds the custom queries to add. (09-10) + 1. Ensure that the ConfigMap is created. + 2. Assert that the metrics that were removed (which we checked for earlier) are in fact no longer present in the collector metrics. + 3. Assert that the custom metrics that were added are present in the collector metrics. +4. Exercise per-db metric functionality by adding users, per-db targets, removing metrics from per-db defaults, adding custom metric db target. (11-18) + 1. Add users and per-db target, assert that per-db default metric is available for named target. + 2. Add second per-db target, assert that per-db default metric is available for all named targets. + 3. Remove per-db metric, assert that the per-db default metric is absent for all targets. + 4. Add custom metrics with a specified db, assert that we get that metric just for the specified target. +5. Add an `otlp` exporter to both PostgresCluster and PGAdmin `instrumentation` specs and create a standalone OTel collector to receive data from our sidecar collectors. (9-20) + 1. Ensure that the ConfigMap, Service, and Deployment for the standalone OTel collector come up and that the collector container is running and ready. + 2. Assert that the standalone collector is receiving logs from all of our components (i.e. the standalone collector is getting logs for postgres, patroni, pgbackrest, pgbouncer, pgadmin, and gunicorn). +6. Create a new cluster with `instrumentation` spec in place, but no `backups` spec to test the OTel features with optional backups. (21-25) + 1. Ensure that the cluster comes up and the database and collector containers are running and ready. + 2. Add a backups spec to the new cluster and ensure that pgbackrest is added to the instance pod, a repo-host pod is created, and the collector runs on both pods. + 3. Remove the backups spec from the new cluster. + 4. Annotate the cluster to allow backups to be removed. + 5. Ensure that the repo-host pod is destroyed, pgbackrest is removed from the instance pod, and the collector continues to run on the instance pod. + +### NOTES + +It is possible this test could flake if for some reason a component is not producing any logs. If we start to see this happen, we could either create some test steps that execute some actions that should trigger logs or turn up the log levels (although the latter option could create more problems as we have seen issues with the collector when the stream of logs is too voluminous). diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/00--create-cluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/00--create-cluster.yaml new file mode 100644 index 0000000000..3345bef5f9 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/00--create-cluster.yaml @@ -0,0 +1,60 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +spec: + postgresVersion: ${KUTTL_PG_VERSION} + instances: + - name: instance1 + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + backups: + pgbackrest: + manual: + repoName: repo1 + options: + - --type=diff + repos: + - name: repo1 + volume: + volumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + config: + parameters: + log_min_messages: INFO + proxy: + pgBouncer: {} +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PGAdmin +metadata: + name: otel-pgadmin +spec: + users: + - username: otel@example.com + role: Administrator + passwordRef: + name: pgadmin-password-secret + key: otel-password + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + serverGroups: + - name: supply + # An empty selector selects all postgresclusters in the Namespace + postgresClusterSelector: {} + config: + settings: + AUTHENTICATION_SOURCES: ['internal'] diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/00-cluster-created.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/00-cluster-created.yaml new file mode 100644 index 0000000000..97bd3e2b97 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/00-cluster-created.yaml @@ -0,0 +1,112 @@ +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +status: + instances: + - name: instance1 + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 + proxy: + pgBouncer: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/role: master + postgres-operator.crunchydata.com/cluster: otel-cluster +status: + containerStatuses: + - name: database + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + - name: replication-cert-copy + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgbackrest + postgres-operator.crunchydata.com/cluster: otel-cluster +status: + containerStatuses: + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + phase: Running +--- +apiVersion: batch/v1 +kind: Job +metadata: + labels: + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/pgbackrest-backup: replica-create +status: + succeeded: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/role: pgbouncer + postgres-operator.crunchydata.com/cluster: otel-cluster +status: + containerStatuses: + - name: pgbouncer + ready: true + started: true + - name: pgbouncer-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-cluster-primary +--- +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgadmin + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +status: + containerStatuses: + - name: pgadmin + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Secret +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +type: Opaque diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/01--add-instrumentation.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/01--add-instrumentation.yaml new file mode 100644 index 0000000000..ebde9f7caa --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/01--add-instrumentation.yaml @@ -0,0 +1,36 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +spec: + postgresVersion: ${KUTTL_PG_VERSION} + instances: + - name: instance1 + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + backups: + pgbackrest: + manual: + repoName: repo1 + options: + - --type=diff + repos: + - name: repo1 + volume: + volumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + config: + parameters: + log_min_messages: INFO + proxy: + pgBouncer: {} + instrumentation: {} diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/01-instrumentation-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/01-instrumentation-added.yaml new file mode 100644 index 0000000000..672bdd2d1d --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/01-instrumentation-added.yaml @@ -0,0 +1,116 @@ +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +status: + instances: + - name: instance1 + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 + proxy: + pgBouncer: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/role: master + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: database + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + - name: replication-cert-copy + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgbackrest + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/role: pgbouncer + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbouncer + ready: true + started: true + - name: pgbouncer-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-cluster-primary +--- +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgadmin + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +status: + containerStatuses: + - name: pgadmin + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Secret +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +type: Opaque +--- diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/03--annotate-cluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/03--annotate-cluster.yaml new file mode 100644 index 0000000000..1133b7fe15 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/03--annotate-cluster.yaml @@ -0,0 +1,8 @@ +--- +# Annotate the cluster to trigger a backup. +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster + annotations: + postgres-operator.crunchydata.com/pgbackrest-backup: do-it diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/03-backup-completed.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/03-backup-completed.yaml new file mode 100644 index 0000000000..fed1f745b9 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/03-backup-completed.yaml @@ -0,0 +1,8 @@ +apiVersion: batch/v1 +kind: Job +metadata: + labels: + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/pgbackrest-backup: manual +status: + succeeded: 1 diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/07--add-instrumentation.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/07--add-instrumentation.yaml new file mode 100644 index 0000000000..166f0d3347 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/07--add-instrumentation.yaml @@ -0,0 +1,26 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PGAdmin +metadata: + name: otel-pgadmin +spec: + users: + - username: otel@example.com + role: Administrator + passwordRef: + name: pgadmin-password-secret + key: otel-password + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + serverGroups: + - name: supply + # An empty selector selects all postgresclusters in the Namespace + postgresClusterSelector: {} + config: + settings: + AUTHENTICATION_SOURCES: ['internal'] + instrumentation: {} diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/07-instrumentation-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/07-instrumentation-added.yaml new file mode 100644 index 0000000000..858b78ff83 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/07-instrumentation-added.yaml @@ -0,0 +1,120 @@ +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +status: + instances: + - name: instance1 + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 + proxy: + pgBouncer: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/role: master + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: database + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + - name: replication-cert-copy + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgbackrest + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/role: pgbouncer + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbouncer + ready: true + started: true + - name: pgbouncer-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-cluster-primary +--- +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgadmin + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgadmin + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Secret +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +type: Opaque +--- diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/09--add-custom-queries.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/09--add-custom-queries.yaml new file mode 100644 index 0000000000..ed133fc26a --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/09--add-custom-queries.yaml @@ -0,0 +1,75 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +spec: + postgresVersion: ${KUTTL_PG_VERSION} + instances: + - name: instance1 + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + backups: + pgbackrest: + manual: + repoName: repo1 + options: + - --type=diff + repos: + - name: repo1 + volume: + volumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + config: + parameters: + log_min_messages: INFO + proxy: + pgBouncer: {} + instrumentation: + metrics: + customQueries: + add: + - name: slow-custom-queries + queries: + name: my-custom-queries + key: my-slow-custom-queries.yaml + collectionInterval: 300s + - name: 2fast2furious + queries: + name: my-custom-queries + key: my-fast-custom-queries.yaml + remove: + - ccp_connection_stats_active + - ccp_database_size_bytes +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: my-custom-queries +data: + my-fast-custom-queries.yaml: | + - sql: > + SELECT count(*) FROM information_schema.tables; + metrics: + - metric_name: custom_table_count + value_column: count + description: Number of tables in the database + static_attributes: + server: "localhost:5432" + my-slow-custom-queries.yaml: | + - sql: > + SELECT count(*) FROM pg_stat_statements; + metrics: + - metric_name: custom_pg_stat_statements_row_count + value_column: count + description: Number of rows in pg_stat_statements + static_attributes: + server: "localhost:5432" diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/09-custom-queries-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/09-custom-queries-added.yaml new file mode 100644 index 0000000000..1a756b7a73 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/09-custom-queries-added.yaml @@ -0,0 +1,124 @@ +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +status: + instances: + - name: instance1 + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 + proxy: + pgBouncer: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/role: master + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: database + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + - name: replication-cert-copy + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgbackrest + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/role: pgbouncer + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbouncer + ready: true + started: true + - name: pgbouncer-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-cluster-primary +--- +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgadmin + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgadmin + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Secret +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +type: Opaque +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: my-custom-queries diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/11--add-per-db-metrics.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/11--add-per-db-metrics.yaml new file mode 100644 index 0000000000..1cf4c28a83 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/11--add-per-db-metrics.yaml @@ -0,0 +1,17 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +spec: + users: + - name: ash + databases: + - pikachu + - name: brock + databases: + - onix + instrumentation: + metrics: + perDBMetricTargets: + - pikachu diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/13--add-per-db-metrics.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/13--add-per-db-metrics.yaml new file mode 100644 index 0000000000..c383238be9 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/13--add-per-db-metrics.yaml @@ -0,0 +1,11 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +spec: + instrumentation: + metrics: + perDBMetricTargets: + - pikachu + - onix diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/15--remove-per-db-metrics.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/15--remove-per-db-metrics.yaml new file mode 100644 index 0000000000..4421de8482 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/15--remove-per-db-metrics.yaml @@ -0,0 +1,13 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +spec: + instrumentation: + metrics: + customQueries: + remove: + - ccp_connection_stats_active + - ccp_database_size_bytes + - ccp_table_size_bytes diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/17--add-custom-queries-per-db.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/17--add-custom-queries-per-db.yaml new file mode 100644 index 0000000000..92360a4a9a --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/17--add-custom-queries-per-db.yaml @@ -0,0 +1,62 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +spec: + instrumentation: + metrics: + customQueries: + add: + - name: custom1 + databases: [pikachu, onix] + queries: + name: my-custom-queries2 + key: custom1.yaml + - name: custom2 + databases: [onix] + queries: + name: my-custom-queries2 + key: custom2.yaml +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: my-custom-queries2 +data: + custom1.yaml: | + - sql: > + SELECT current_database() as dbname + , n.nspname as schemaname + , c.relname + , pg_catalog.pg_total_relation_size(c.oid) as bytes + FROM pg_catalog.pg_class c + JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid + WHERE NOT pg_is_other_temp_schema(n.oid) + AND relkind IN ('r', 'm', 'f'); + metrics: + - metric_name: ccp_table_size_bytes_1 + value_type: double + value_column: bytes + description: "Table size in bytes including indexes" + attribute_columns: ["dbname", "schemaname", "relname"] + static_attributes: + server: "localhost:5432" + custom2.yaml: | + - sql: > + SELECT current_database() as dbname + , n.nspname as schemaname + , c.relname + , pg_catalog.pg_total_relation_size(c.oid) as bytes + FROM pg_catalog.pg_class c + JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid + WHERE NOT pg_is_other_temp_schema(n.oid) + AND relkind IN ('r', 'm', 'f'); + metrics: + - metric_name: ccp_table_size_bytes_2 + value_type: double + value_column: bytes + description: "Table size in bytes including indexes" + attribute_columns: ["dbname", "schemaname", "relname"] + static_attributes: + server: "localhost:5432" \ No newline at end of file diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/17-custom-queries-per-db-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/17-custom-queries-per-db-added.yaml new file mode 100644 index 0000000000..5bd9cec286 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/17-custom-queries-per-db-added.yaml @@ -0,0 +1,124 @@ +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +status: + instances: + - name: instance1 + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 + proxy: + pgBouncer: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/role: master + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: database + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + - name: replication-cert-copy + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgbackrest + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/role: pgbouncer + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbouncer + ready: true + started: true + - name: pgbouncer-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-cluster-primary +--- +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgadmin + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgadmin + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Secret +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +type: Opaque +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: my-custom-queries2 diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/19--add-logs-metrics-exporter.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/19--add-logs-metrics-exporter.yaml new file mode 100644 index 0000000000..67926505c0 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/19--add-logs-metrics-exporter.yaml @@ -0,0 +1,209 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +spec: + postgresVersion: ${KUTTL_PG_VERSION} + instances: + - name: instance1 + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + backups: + pgbackrest: + manual: + repoName: repo1 + options: + - --type=diff + repos: + - name: repo1 + volume: + volumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + config: + parameters: + log_min_messages: INFO + proxy: + pgBouncer: {} + instrumentation: + metrics: + exporters: ['otlp'] + customQueries: + add: + - name: slow-custom-queries + queries: + name: my-custom-queries + key: my-slow-custom-queries.yaml + collectionInterval: 300s + - name: 2fast2furious + queries: + name: my-custom-queries + key: my-fast-custom-queries.yaml + remove: + - ccp_connection_stats_active + - ccp_database_size_bytes + config: + exporters: + otlp: + endpoint: otel-collector:4317 + tls: + insecure: true + logs: + exporters: ['otlp'] + retentionPeriod: 1h +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PGAdmin +metadata: + name: otel-pgadmin +spec: + users: + - username: otel@example.com + role: Administrator + passwordRef: + name: pgadmin-password-secret + key: otel-password + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + serverGroups: + - name: supply + # An empty selector selects all postgresclusters in the Namespace + postgresClusterSelector: {} + config: + settings: + AUTHENTICATION_SOURCES: ['internal'] + instrumentation: + config: + exporters: + otlp: + endpoint: otel-collector:4317 + tls: + insecure: true + logs: + exporters: ['otlp'] + retentionPeriod: 1h +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: otel-collector-conf + labels: + app: opentelemetry + component: otel-collector-conf +data: + otel-collector-config: | + receivers: + otlp: + protocols: + grpc: + endpoint: ${env:MY_POD_IP}:4317 + http: + endpoint: ${env:MY_POD_IP}:4318 + extensions: + zpages: {} + exporters: + debug: + verbosity: detailed + service: + extensions: [zpages] + pipelines: + logs/1: + receivers: [otlp] + exporters: [debug] + metrics/1: + receivers: [otlp] + exporters: [debug] +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-collector + labels: + app: opentelemetry + component: otel-collector +spec: + ports: + - name: otlp-grpc # Default endpoint for OpenTelemetry gRPC receiver. + port: 4317 + protocol: TCP + targetPort: 4317 + - name: otlp-http # Default endpoint for OpenTelemetry HTTP receiver. + port: 4318 + protocol: TCP + targetPort: 4318 + - name: metrics # Default endpoint for querying metrics. + port: 8888 + selector: + component: otel-collector +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: otel-collector + labels: + app: opentelemetry + component: otel-collector +spec: + selector: + matchLabels: + app: opentelemetry + component: otel-collector + minReadySeconds: 5 + progressDeadlineSeconds: 120 + replicas: 1 #TODO - adjust this to your own requirements + template: + metadata: + labels: + app: opentelemetry + component: otel-collector + spec: + containers: + - command: + - "/otelcol" + - "--config=/conf/otel-collector-config.yaml" + image: otel/opentelemetry-collector:latest + name: otel-collector + resources: + limits: + cpu: 1000m + memory: 2Gi + requests: + cpu: 200m + memory: 400Mi + ports: + - containerPort: 55679 # Default endpoint for ZPages. + - containerPort: 4317 # Default endpoint for OpenTelemetry receiver. + - containerPort: 14250 # Default endpoint for Jaeger gRPC receiver. + - containerPort: 14268 # Default endpoint for Jaeger HTTP receiver. + - containerPort: 9411 # Default endpoint for Zipkin receiver. + - containerPort: 8888 # Default endpoint for querying metrics. + env: + - name: MY_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: GOMEMLIMIT + value: 1600MiB + volumeMounts: + - name: otel-collector-config-vol + mountPath: /conf + volumes: + - configMap: + name: otel-collector-conf + items: + - key: otel-collector-config + path: otel-collector-config.yaml + name: otel-collector-config-vol diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/19-logs-metrics-exporter-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/19-logs-metrics-exporter-added.yaml new file mode 100644 index 0000000000..f730898692 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/19-logs-metrics-exporter-added.yaml @@ -0,0 +1,155 @@ +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster +status: + instances: + - name: instance1 + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 + proxy: + pgBouncer: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/role: master + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: database + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + - name: replication-cert-copy + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgbackrest + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/role: pgbouncer + postgres-operator.crunchydata.com/cluster: otel-cluster + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbouncer + ready: true + started: true + - name: pgbouncer-config + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-cluster-primary +--- +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgadmin + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgadmin + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Secret +metadata: + labels: + postgres-operator.crunchydata.com/role: pgadmin + postgres-operator.crunchydata.com/pgadmin: otel-pgadmin +type: Opaque +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: my-custom-queries +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: otel-collector-conf +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-collector +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: otel-collector +status: + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + app: opentelemetry +status: + containerStatuses: + - name: otel-collector + ready: true + started: true + phase: Running diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/21--create-cluster.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/21--create-cluster.yaml new file mode 100644 index 0000000000..3983405b34 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/21--create-cluster.yaml @@ -0,0 +1,16 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster-no-backups +spec: + postgresVersion: ${KUTTL_PG_VERSION} + instances: + - name: instance1 + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + instrumentation: {} diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/21-cluster-created.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/21-cluster-created.yaml new file mode 100644 index 0000000000..c9aad7ec25 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/21-cluster-created.yaml @@ -0,0 +1,36 @@ +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster-no-backups +status: + instances: + - name: instance1 + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/role: master + postgres-operator.crunchydata.com/cluster: otel-cluster-no-backups + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: database + ready: true + started: true + - name: replication-cert-copy + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-cluster-no-backups-primary diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/23--add-backups.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/23--add-backups.yaml new file mode 100644 index 0000000000..bb7c70ea37 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/23--add-backups.yaml @@ -0,0 +1,31 @@ +--- +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster-no-backups +spec: + postgresVersion: ${KUTTL_PG_VERSION} + instances: + - name: instance1 + dataVolumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + backups: + pgbackrest: + manual: + repoName: repo1 + options: + - --type=diff + repos: + - name: repo1 + volume: + volumeClaimSpec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + instrumentation: {} diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/23-backups-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/23-backups-added.yaml new file mode 100644 index 0000000000..52221d2349 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/23-backups-added.yaml @@ -0,0 +1,71 @@ +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster-no-backups +status: + instances: + - name: instance1 + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/role: master + postgres-operator.crunchydata.com/cluster: otel-cluster-no-backups + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: database + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + - name: replication-cert-copy + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: pgbackrest + postgres-operator.crunchydata.com/cluster: otel-cluster-no-backups + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: pgbackrest + ready: true + started: true + - name: pgbackrest-config + ready: true + started: true + phase: Running +--- +apiVersion: batch/v1 +kind: Job +metadata: + labels: + postgres-operator.crunchydata.com/cluster: otel-cluster-no-backups + postgres-operator.crunchydata.com/pgbackrest-backup: replica-create +status: + succeeded: 1 +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-cluster-no-backups-primary diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/25-backups-removed.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/25-backups-removed.yaml new file mode 100644 index 0000000000..c9aad7ec25 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/25-backups-removed.yaml @@ -0,0 +1,36 @@ +apiVersion: postgres-operator.crunchydata.com/v1beta1 +kind: PostgresCluster +metadata: + name: otel-cluster-no-backups +status: + instances: + - name: instance1 + readyReplicas: 1 + replicas: 1 + updatedReplicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + labels: + postgres-operator.crunchydata.com/data: postgres + postgres-operator.crunchydata.com/role: master + postgres-operator.crunchydata.com/cluster: otel-cluster-no-backups + postgres-operator.crunchydata.com/crunchy-otel-collector: "true" +status: + containerStatuses: + - name: collector + ready: true + started: true + - name: database + ready: true + started: true + - name: replication-cert-copy + ready: true + started: true + phase: Running +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-cluster-no-backups-primary diff --git a/testing/kuttl/e2e/password-change/00--cluster.yaml b/testing/kuttl/e2e/password-change/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/00--cluster.yaml rename to testing/kuttl/e2e/password-change/00-cluster.yaml diff --git a/testing/kuttl/e2e/password-change/01--psql-connect-uri.yaml b/testing/kuttl/e2e/password-change/01-psql-connect-uri.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/01--psql-connect-uri.yaml rename to testing/kuttl/e2e/password-change/01-psql-connect-uri.yaml diff --git a/testing/kuttl/e2e/password-change/01--psql-connect.yaml b/testing/kuttl/e2e/password-change/01-psql-connect.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/01--psql-connect.yaml rename to testing/kuttl/e2e/password-change/01-psql-connect.yaml diff --git a/testing/kuttl/e2e/password-change/02--secret.yaml b/testing/kuttl/e2e/password-change/02-secret.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/02--secret.yaml rename to testing/kuttl/e2e/password-change/02-secret.yaml diff --git a/testing/kuttl/e2e/password-change/03--psql-connect-uri.yaml b/testing/kuttl/e2e/password-change/03-psql-connect-uri.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/03--psql-connect-uri.yaml rename to testing/kuttl/e2e/password-change/03-psql-connect-uri.yaml diff --git a/testing/kuttl/e2e/password-change/03--psql-connect.yaml b/testing/kuttl/e2e/password-change/03-psql-connect.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/03--psql-connect.yaml rename to testing/kuttl/e2e/password-change/03-psql-connect.yaml diff --git a/testing/kuttl/e2e/password-change/04--secret.yaml b/testing/kuttl/e2e/password-change/04-secret.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/04--secret.yaml rename to testing/kuttl/e2e/password-change/04-secret.yaml diff --git a/testing/kuttl/e2e/password-change/05--psql-connect-uri.yaml b/testing/kuttl/e2e/password-change/05-psql-connect-uri.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/05--psql-connect-uri.yaml rename to testing/kuttl/e2e/password-change/05-psql-connect-uri.yaml diff --git a/testing/kuttl/e2e/password-change/05--psql-connect.yaml b/testing/kuttl/e2e/password-change/05-psql-connect.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/05--psql-connect.yaml rename to testing/kuttl/e2e/password-change/05-psql-connect.yaml diff --git a/testing/kuttl/e2e/password-change/06--cluster.yaml b/testing/kuttl/e2e/password-change/06-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/06--cluster.yaml rename to testing/kuttl/e2e/password-change/06-cluster.yaml diff --git a/testing/kuttl/e2e/password-change/07--psql-connect-uri.yaml b/testing/kuttl/e2e/password-change/07-psql-connect-uri.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/07--psql-connect-uri.yaml rename to testing/kuttl/e2e/password-change/07-psql-connect-uri.yaml diff --git a/testing/kuttl/e2e/password-change/07--psql-connect.yaml b/testing/kuttl/e2e/password-change/07-psql-connect.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/07--psql-connect.yaml rename to testing/kuttl/e2e/password-change/07-psql-connect.yaml diff --git a/testing/kuttl/e2e/password-change/08--secret.yaml b/testing/kuttl/e2e/password-change/08-secret.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/08--secret.yaml rename to testing/kuttl/e2e/password-change/08-secret.yaml diff --git a/testing/kuttl/e2e/password-change/09--psql-connect-uri.yaml b/testing/kuttl/e2e/password-change/09-psql-connect-uri.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/09--psql-connect-uri.yaml rename to testing/kuttl/e2e/password-change/09-psql-connect-uri.yaml diff --git a/testing/kuttl/e2e/password-change/09--psql-connect.yaml b/testing/kuttl/e2e/password-change/09-psql-connect.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/09--psql-connect.yaml rename to testing/kuttl/e2e/password-change/09-psql-connect.yaml diff --git a/testing/kuttl/e2e/password-change/10--secret.yaml b/testing/kuttl/e2e/password-change/10-secret.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/10--secret.yaml rename to testing/kuttl/e2e/password-change/10-secret.yaml diff --git a/testing/kuttl/e2e/password-change/11--psql-connect-uri.yaml b/testing/kuttl/e2e/password-change/11-psql-connect-uri.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/11--psql-connect-uri.yaml rename to testing/kuttl/e2e/password-change/11-psql-connect-uri.yaml diff --git a/testing/kuttl/e2e/password-change/11--psql-connect.yaml b/testing/kuttl/e2e/password-change/11-psql-connect.yaml similarity index 100% rename from testing/kuttl/e2e/password-change/11--psql-connect.yaml rename to testing/kuttl/e2e/password-change/11-psql-connect.yaml diff --git a/testing/kuttl/e2e/pgadmin/01--cluster.yaml b/testing/kuttl/e2e/pgadmin/01--cluster.yaml deleted file mode 100644 index d1afb7be04..0000000000 --- a/testing/kuttl/e2e/pgadmin/01--cluster.yaml +++ /dev/null @@ -1,40 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: test-cm -data: - configMap: config ---- -apiVersion: v1 -kind: Secret -metadata: - name: test-secret -type: Opaque -stringData: - password: myPassword ---- -# Create a cluster with a configured pgAdmin UI. -apiVersion: postgres-operator.crunchydata.com/v1beta1 -kind: PostgresCluster -metadata: - name: interfaced - labels: { postgres-operator-test: kuttl } -spec: - postgresVersion: ${KUTTL_PG_VERSION} - instances: - - name: instance1 - replicas: 1 - dataVolumeClaimSpec: { accessModes: [ReadWriteOnce], resources: { requests: { storage: 1Gi } } } - userInterface: - pgAdmin: - dataVolumeClaimSpec: { accessModes: [ReadWriteOnce], resources: { requests: { storage: 1Gi } } } - config: - files: - - secret: - name: test-secret - - configMap: - name: test-cm - settings: - SHOW_GRAVATAR_IMAGE: False - LOGIN_BANNER: | - Custom KUTTL Login Banner diff --git a/testing/kuttl/e2e/pgadmin/01-assert.yaml b/testing/kuttl/e2e/pgadmin/01-assert.yaml deleted file mode 100644 index e4192a1217..0000000000 --- a/testing/kuttl/e2e/pgadmin/01-assert.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -apiVersion: postgres-operator.crunchydata.com/v1beta1 -kind: PostgresCluster -metadata: - name: interfaced -status: - instances: - - name: instance1 - replicas: 1 - readyReplicas: 1 - updatedReplicas: 1 - ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: interfaced-pgadmin -status: - replicas: 1 - readyReplicas: 1 - updatedReplicas: 1 - ---- -apiVersion: v1 -kind: Secret -metadata: - name: test-secret ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: test-cm diff --git a/testing/kuttl/e2e/pgadmin/02--check-settings.yaml b/testing/kuttl/e2e/pgadmin/02--check-settings.yaml deleted file mode 100644 index c68d032d1e..0000000000 --- a/testing/kuttl/e2e/pgadmin/02--check-settings.yaml +++ /dev/null @@ -1,56 +0,0 @@ ---- -apiVersion: kuttl.dev/v1beta1 -kind: TestStep -commands: - # Log the amount of space on the startup volume. Assert that 4KiB are used. - - script: | - kubectl exec --namespace "${NAMESPACE}" statefulset.apps/interfaced-pgadmin \ - -- df --block-size=1K /etc/pgadmin | - awk '{ print } END { exit ($3 != "4") }' - - # Assert that current settings contain values from the spec. - - script: | - SETTINGS=$( - kubectl exec --namespace "${NAMESPACE}" statefulset.apps/interfaced-pgadmin \ - -- cat /etc/pgadmin/conf.d/~postgres-operator/pgadmin.json - ) - - contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } - { - contains "${SETTINGS}" '"LOGIN_BANNER": "Custom KUTTL Login Banner\n"' && - contains "${SETTINGS}" '"SHOW_GRAVATAR_IMAGE": false' - } || { - echo >&2 'Wrong settings!' - echo "${SETTINGS}" - exit 1 - } - - - script: | - CONTENTS=$( - kubectl exec --namespace "${NAMESPACE}" statefulset.apps/interfaced-pgadmin \ - -- cat /etc/pgadmin/conf.d/configMap - ) - - contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } - { - contains "${CONTENTS}" 'config' - } || { - echo >&2 'Wrong settings!' - echo "${CONTENTS}" - exit 1 - } - - - script: | - CONTENTS=$( - kubectl exec --namespace "${NAMESPACE}" statefulset.apps/interfaced-pgadmin \ - -- cat /etc/pgadmin/conf.d/password - ) - - contains() { bash -ceu '[[ "$1" == *"$2"* ]]' - "$@"; } - { - contains "${CONTENTS}" 'myPassword' - } || { - echo >&2 'Wrong settings!' - echo "${CONTENTS}" - exit 1 - } diff --git a/testing/kuttl/e2e/pgbackrest-backup-standby/00--cluster.yaml b/testing/kuttl/e2e/pgbackrest-backup-standby/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-backup-standby/00--cluster.yaml rename to testing/kuttl/e2e/pgbackrest-backup-standby/00-cluster.yaml diff --git a/testing/kuttl/e2e/pgbackrest-backup-standby/01--check-backup-logs.yaml b/testing/kuttl/e2e/pgbackrest-backup-standby/01-check-backup-logs.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-backup-standby/01--check-backup-logs.yaml rename to testing/kuttl/e2e/pgbackrest-backup-standby/01-check-backup-logs.yaml diff --git a/testing/kuttl/e2e/pgbackrest-backup-standby/02--cluster.yaml b/testing/kuttl/e2e/pgbackrest-backup-standby/02-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-backup-standby/02--cluster.yaml rename to testing/kuttl/e2e/pgbackrest-backup-standby/02-cluster.yaml diff --git a/testing/kuttl/e2e/pgbackrest-init/00--cluster.yaml b/testing/kuttl/e2e/pgbackrest-init/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-init/00--cluster.yaml rename to testing/kuttl/e2e/pgbackrest-init/00-cluster.yaml diff --git a/testing/kuttl/e2e/pgbackrest-init/02--cluster.yaml b/testing/kuttl/e2e/pgbackrest-init/02-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-init/02--cluster.yaml rename to testing/kuttl/e2e/pgbackrest-init/02-cluster.yaml diff --git a/testing/kuttl/e2e/pgbackrest-init/04--cluster.yaml b/testing/kuttl/e2e/pgbackrest-init/04-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-init/04--cluster.yaml rename to testing/kuttl/e2e/pgbackrest-init/04-cluster.yaml diff --git a/testing/kuttl/e2e/pgbackrest-init/06--check-spool-path.yaml b/testing/kuttl/e2e/pgbackrest-init/06-check-spool-path.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-init/06--check-spool-path.yaml rename to testing/kuttl/e2e/pgbackrest-init/06-check-spool-path.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/01--create-cluster.yaml b/testing/kuttl/e2e/pgbackrest-restore/01-create-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/01--create-cluster.yaml rename to testing/kuttl/e2e/pgbackrest-restore/01-create-cluster.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/02--create-data.yaml b/testing/kuttl/e2e/pgbackrest-restore/02-create-data.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/02--create-data.yaml rename to testing/kuttl/e2e/pgbackrest-restore/02-create-data.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/03--backup.yaml b/testing/kuttl/e2e/pgbackrest-restore/03-backup.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/03--backup.yaml rename to testing/kuttl/e2e/pgbackrest-restore/03-backup.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/04--clone-cluster.yaml b/testing/kuttl/e2e/pgbackrest-restore/04-clone-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/04--clone-cluster.yaml rename to testing/kuttl/e2e/pgbackrest-restore/04-clone-cluster.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/05--check-data.yaml b/testing/kuttl/e2e/pgbackrest-restore/05-check-data.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/05--check-data.yaml rename to testing/kuttl/e2e/pgbackrest-restore/05-check-data.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/06--delete-clone.yaml b/testing/kuttl/e2e/pgbackrest-restore/06-delete-clone.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/06--delete-clone.yaml rename to testing/kuttl/e2e/pgbackrest-restore/06-delete-clone.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/07--annotate.yaml b/testing/kuttl/e2e/pgbackrest-restore/07-annotate.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/07--annotate.yaml rename to testing/kuttl/e2e/pgbackrest-restore/07-annotate.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/07--update-cluster.yaml b/testing/kuttl/e2e/pgbackrest-restore/07-update-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/07--update-cluster.yaml rename to testing/kuttl/e2e/pgbackrest-restore/07-update-cluster.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/08--wait-restart.yaml b/testing/kuttl/e2e/pgbackrest-restore/08-wait-restart.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/08--wait-restart.yaml rename to testing/kuttl/e2e/pgbackrest-restore/08-wait-restart.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/09--add-data.yaml b/testing/kuttl/e2e/pgbackrest-restore/09-add-data.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/09--add-data.yaml rename to testing/kuttl/e2e/pgbackrest-restore/09-add-data.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/10--wait-archived.yaml b/testing/kuttl/e2e/pgbackrest-restore/10-wait-archived.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/10--wait-archived.yaml rename to testing/kuttl/e2e/pgbackrest-restore/10-wait-archived.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/11--clone-cluster.yaml b/testing/kuttl/e2e/pgbackrest-restore/11-clone-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/11--clone-cluster.yaml rename to testing/kuttl/e2e/pgbackrest-restore/11-clone-cluster.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/12--check-data.yaml b/testing/kuttl/e2e/pgbackrest-restore/12-check-data.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/12--check-data.yaml rename to testing/kuttl/e2e/pgbackrest-restore/12-check-data.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/13--delete-clone.yaml b/testing/kuttl/e2e/pgbackrest-restore/13-delete-clone.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/13--delete-clone.yaml rename to testing/kuttl/e2e/pgbackrest-restore/13-delete-clone.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/14--lose-data.yaml b/testing/kuttl/e2e/pgbackrest-restore/14-lose-data.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/14--lose-data.yaml rename to testing/kuttl/e2e/pgbackrest-restore/14-lose-data.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/15--in-place-pitr.yaml b/testing/kuttl/e2e/pgbackrest-restore/15-in-place-pitr.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/15--in-place-pitr.yaml rename to testing/kuttl/e2e/pgbackrest-restore/15-in-place-pitr.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/16--check-data.yaml b/testing/kuttl/e2e/pgbackrest-restore/16-check-data.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/16--check-data.yaml rename to testing/kuttl/e2e/pgbackrest-restore/16-check-data.yaml diff --git a/testing/kuttl/e2e/pgbackrest-restore/17--check-replication.yaml b/testing/kuttl/e2e/pgbackrest-restore/17-check-replication.yaml similarity index 100% rename from testing/kuttl/e2e/pgbackrest-restore/17--check-replication.yaml rename to testing/kuttl/e2e/pgbackrest-restore/17-check-replication.yaml diff --git a/testing/kuttl/e2e/pgbouncer/00--cluster.yaml b/testing/kuttl/e2e/pgbouncer/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/pgbouncer/00--cluster.yaml rename to testing/kuttl/e2e/pgbouncer/00-cluster.yaml diff --git a/testing/kuttl/e2e/pgbouncer/01--psql-connect.yaml b/testing/kuttl/e2e/pgbouncer/01-psql-connect.yaml similarity index 100% rename from testing/kuttl/e2e/pgbouncer/01--psql-connect.yaml rename to testing/kuttl/e2e/pgbouncer/01-psql-connect.yaml diff --git a/testing/kuttl/e2e/pgbouncer/10--read-certificate.yaml b/testing/kuttl/e2e/pgbouncer/10-read-certificate.yaml similarity index 100% rename from testing/kuttl/e2e/pgbouncer/10--read-certificate.yaml rename to testing/kuttl/e2e/pgbouncer/10-read-certificate.yaml diff --git a/testing/kuttl/e2e/pgbouncer/11--open-connection.yaml b/testing/kuttl/e2e/pgbouncer/11-open-connection.yaml similarity index 100% rename from testing/kuttl/e2e/pgbouncer/11--open-connection.yaml rename to testing/kuttl/e2e/pgbouncer/11-open-connection.yaml diff --git a/testing/kuttl/e2e/pgbouncer/12--rotate-certificate.yaml b/testing/kuttl/e2e/pgbouncer/12-rotate-certificate.yaml similarity index 100% rename from testing/kuttl/e2e/pgbouncer/12--rotate-certificate.yaml rename to testing/kuttl/e2e/pgbouncer/12-rotate-certificate.yaml diff --git a/testing/kuttl/e2e/pgbouncer/13--read-certificate.yaml b/testing/kuttl/e2e/pgbouncer/13-read-certificate.yaml similarity index 100% rename from testing/kuttl/e2e/pgbouncer/13--read-certificate.yaml rename to testing/kuttl/e2e/pgbouncer/13-read-certificate.yaml diff --git a/testing/kuttl/e2e/pgbouncer/14--compare-certificate.yaml b/testing/kuttl/e2e/pgbouncer/14-compare-certificate.yaml similarity index 100% rename from testing/kuttl/e2e/pgbouncer/14--compare-certificate.yaml rename to testing/kuttl/e2e/pgbouncer/14-compare-certificate.yaml diff --git a/testing/kuttl/e2e/pgbouncer/15--check-connection.yaml b/testing/kuttl/e2e/pgbouncer/15-check-connection.yaml similarity index 100% rename from testing/kuttl/e2e/pgbouncer/15--check-connection.yaml rename to testing/kuttl/e2e/pgbouncer/15-check-connection.yaml diff --git a/testing/kuttl/e2e/pgbouncer/16--reconnect.yaml b/testing/kuttl/e2e/pgbouncer/16-reconnect.yaml similarity index 100% rename from testing/kuttl/e2e/pgbouncer/16--reconnect.yaml rename to testing/kuttl/e2e/pgbouncer/16-reconnect.yaml diff --git a/testing/kuttl/e2e/replica-read/00--cluster.yaml b/testing/kuttl/e2e/replica-read/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/replica-read/00--cluster.yaml rename to testing/kuttl/e2e/replica-read/00-cluster.yaml diff --git a/testing/kuttl/e2e/replica-read/01--psql-replica-read.yaml b/testing/kuttl/e2e/replica-read/01-psql-replica-read.yaml similarity index 100% rename from testing/kuttl/e2e/replica-read/01--psql-replica-read.yaml rename to testing/kuttl/e2e/replica-read/01-psql-replica-read.yaml diff --git a/testing/kuttl/e2e/root-cert-ownership/00--cluster.yaml b/testing/kuttl/e2e/root-cert-ownership/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/root-cert-ownership/00--cluster.yaml rename to testing/kuttl/e2e/root-cert-ownership/00-cluster.yaml diff --git a/testing/kuttl/e2e/root-cert-ownership/01--check-owners.yaml b/testing/kuttl/e2e/root-cert-ownership/01-check-owners.yaml similarity index 100% rename from testing/kuttl/e2e/root-cert-ownership/01--check-owners.yaml rename to testing/kuttl/e2e/root-cert-ownership/01-check-owners.yaml diff --git a/testing/kuttl/e2e/root-cert-ownership/02--delete-owner1.yaml b/testing/kuttl/e2e/root-cert-ownership/02-delete-owner1.yaml similarity index 100% rename from testing/kuttl/e2e/root-cert-ownership/02--delete-owner1.yaml rename to testing/kuttl/e2e/root-cert-ownership/02-delete-owner1.yaml diff --git a/testing/kuttl/e2e/root-cert-ownership/03--check-owners.yaml b/testing/kuttl/e2e/root-cert-ownership/03-check-owners.yaml similarity index 100% rename from testing/kuttl/e2e/root-cert-ownership/03--check-owners.yaml rename to testing/kuttl/e2e/root-cert-ownership/03-check-owners.yaml diff --git a/testing/kuttl/e2e/root-cert-ownership/04--delete-owner2.yaml b/testing/kuttl/e2e/root-cert-ownership/04-delete-owner2.yaml similarity index 100% rename from testing/kuttl/e2e/root-cert-ownership/04--delete-owner2.yaml rename to testing/kuttl/e2e/root-cert-ownership/04-delete-owner2.yaml diff --git a/testing/kuttl/e2e/root-cert-ownership/05--check-secret.yaml b/testing/kuttl/e2e/root-cert-ownership/05-check-secret.yaml similarity index 100% rename from testing/kuttl/e2e/root-cert-ownership/05--check-secret.yaml rename to testing/kuttl/e2e/root-cert-ownership/05-check-secret.yaml diff --git a/testing/kuttl/e2e/scaledown/00--create-cluster.yaml b/testing/kuttl/e2e/scaledown/00-create-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/scaledown/00--create-cluster.yaml rename to testing/kuttl/e2e/scaledown/00-create-cluster.yaml diff --git a/testing/kuttl/e2e/scaledown/01--update-cluster.yaml b/testing/kuttl/e2e/scaledown/01-update-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/scaledown/01--update-cluster.yaml rename to testing/kuttl/e2e/scaledown/01-update-cluster.yaml diff --git a/testing/kuttl/e2e/scaledown/02--delete-cluster.yaml b/testing/kuttl/e2e/scaledown/02-delete-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/scaledown/02--delete-cluster.yaml rename to testing/kuttl/e2e/scaledown/02-delete-cluster.yaml diff --git a/testing/kuttl/e2e/scaledown/10--create-cluster.yaml b/testing/kuttl/e2e/scaledown/10-create-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/scaledown/10--create-cluster.yaml rename to testing/kuttl/e2e/scaledown/10-create-cluster.yaml diff --git a/testing/kuttl/e2e/scaledown/12--update-cluster.yaml b/testing/kuttl/e2e/scaledown/12-update-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/scaledown/12--update-cluster.yaml rename to testing/kuttl/e2e/scaledown/12-update-cluster.yaml diff --git a/testing/kuttl/e2e/scaledown/13--delete-cluster.yaml b/testing/kuttl/e2e/scaledown/13-delete-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/scaledown/13--delete-cluster.yaml rename to testing/kuttl/e2e/scaledown/13-delete-cluster.yaml diff --git a/testing/kuttl/e2e/scaledown/20--create-cluster.yaml b/testing/kuttl/e2e/scaledown/20-create-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/scaledown/20--create-cluster.yaml rename to testing/kuttl/e2e/scaledown/20-create-cluster.yaml diff --git a/testing/kuttl/e2e/scaledown/21--update-cluster.yaml b/testing/kuttl/e2e/scaledown/21-update-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/scaledown/21--update-cluster.yaml rename to testing/kuttl/e2e/scaledown/21-update-cluster.yaml diff --git a/testing/kuttl/e2e/scaledown/readme.MD b/testing/kuttl/e2e/scaledown/readme.MD index 44fd880ed1..dd7f8fed7e 100644 --- a/testing/kuttl/e2e/scaledown/readme.MD +++ b/testing/kuttl/e2e/scaledown/readme.MD @@ -8,24 +8,24 @@ have the expected number of pods. ### From two sets to one set -* 00--create-cluster: create the cluster with two instance sets, one replica each +* 00-create-cluster: create the cluster with two instance sets, one replica each * 00-assert: check that the cluster exists with the expected status -* 01--update-cluster: update the cluster to remove one instance set +* 01-update-cluster: update the cluster to remove one instance set * 01-assert: check that the cluster exists with the expected status -* 02--delete-cluster +* 02-delete-cluster ### From one set with multiple replicas to one set with one replica -* 10--create-cluster: create the cluster with one instance set with two replicas +* 10-create-cluster: create the cluster with one instance set with two replicas * 10-assert: check that the cluster exists with the expected status * 11-annotate: set the roles as labels on the pods -* 12--update-cluster: update the cluster to remove one replica +* 12-update-cluster: update the cluster to remove one replica * 12-assert: check that the cluster exists with the expected status; and that the `master` pod that exists was the `master` before the scaledown -* 13--delete-cluster: delete the cluster +* 13-delete-cluster: delete the cluster ### From two sets with variable replicas to two set with one replica each -* 20--create-cluster: create the cluster with two instance sets, with two and one replica +* 20-create-cluster: create the cluster with two instance sets, with two and one replica * 20-assert: check that the cluster exists with the expected status -* 21--update-cluster: update the cluster to reduce the two-replica instance to one-replica +* 21-update-cluster: update the cluster to reduce the two-replica instance to one-replica * 21-assert: check that the cluster exists with the expected status diff --git a/testing/kuttl/e2e/security-context/00-assert.yaml b/testing/kuttl/e2e/security-context/00-assert.yaml index a6a5f48b6a..6df19c6608 100644 --- a/testing/kuttl/e2e/security-context/00-assert.yaml +++ b/testing/kuttl/e2e/security-context/00-assert.yaml @@ -92,38 +92,6 @@ spec: readOnlyRootFilesystem: true runAsNonRoot: true --- -# pgAdmin -apiVersion: v1 -kind: Pod -metadata: - labels: - postgres-operator.crunchydata.com/cluster: security-context - postgres-operator.crunchydata.com/data: pgadmin - postgres-operator.crunchydata.com/role: pgadmin - statefulset.kubernetes.io/pod-name: security-context-pgadmin-0 - name: security-context-pgadmin-0 -spec: - containers: - - name: pgadmin - securityContext: - allowPrivilegeEscalation: false - privileged: false - readOnlyRootFilesystem: true - runAsNonRoot: true - initContainers: - - name: pgadmin-startup - securityContext: - allowPrivilegeEscalation: false - privileged: false - readOnlyRootFilesystem: true - runAsNonRoot: true - - name: nss-wrapper-init - securityContext: - allowPrivilegeEscalation: false - privileged: false - readOnlyRootFilesystem: true - runAsNonRoot: true ---- # pgBouncer apiVersion: v1 kind: Pod diff --git a/testing/kuttl/e2e/security-context/00--cluster.yaml b/testing/kuttl/e2e/security-context/00-cluster.yaml similarity index 82% rename from testing/kuttl/e2e/security-context/00--cluster.yaml rename to testing/kuttl/e2e/security-context/00-cluster.yaml index 5155eb4fc6..d754eedec6 100644 --- a/testing/kuttl/e2e/security-context/00--cluster.yaml +++ b/testing/kuttl/e2e/security-context/00-cluster.yaml @@ -18,9 +18,6 @@ spec: proxy: pgBouncer: replicas: 1 - userInterface: - pgAdmin: - dataVolumeClaimSpec: { accessModes: [ReadWriteOnce], resources: { requests: { storage: 1Gi } } } monitoring: pgmonitor: exporter: {} diff --git a/testing/kuttl/e2e/security-context/01--security-context.yaml b/testing/kuttl/e2e/security-context/01-security-context.yaml similarity index 100% rename from testing/kuttl/e2e/security-context/01--security-context.yaml rename to testing/kuttl/e2e/security-context/01-security-context.yaml diff --git a/testing/kuttl/e2e/security-context/10--kyverno.yaml b/testing/kuttl/e2e/security-context/10-kyverno.yaml similarity index 100% rename from testing/kuttl/e2e/security-context/10--kyverno.yaml rename to testing/kuttl/e2e/security-context/10-kyverno.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-db-uri/00--create-cluster.yaml b/testing/kuttl/e2e/standalone-pgadmin-db-uri/00-create-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-db-uri/00--create-cluster.yaml rename to testing/kuttl/e2e/standalone-pgadmin-db-uri/00-create-cluster.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-db-uri/01--user-schema.yaml b/testing/kuttl/e2e/standalone-pgadmin-db-uri/01-user-schema.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-db-uri/01--user-schema.yaml rename to testing/kuttl/e2e/standalone-pgadmin-db-uri/01-user-schema.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-db-uri/02--create-pgadmin.yaml b/testing/kuttl/e2e/standalone-pgadmin-db-uri/02-create-pgadmin.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-db-uri/02--create-pgadmin.yaml rename to testing/kuttl/e2e/standalone-pgadmin-db-uri/02-create-pgadmin.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-db-uri/04--update-pgadmin.yaml b/testing/kuttl/e2e/standalone-pgadmin-db-uri/04-update-pgadmin.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-db-uri/04--update-pgadmin.yaml rename to testing/kuttl/e2e/standalone-pgadmin-db-uri/04-update-pgadmin.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-service/00--pgadmin.yaml b/testing/kuttl/e2e/standalone-pgadmin-service/00-pgadmin.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-service/00--pgadmin.yaml rename to testing/kuttl/e2e/standalone-pgadmin-service/00-pgadmin.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-service/01--update-service.yaml b/testing/kuttl/e2e/standalone-pgadmin-service/01-update-service.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-service/01--update-service.yaml rename to testing/kuttl/e2e/standalone-pgadmin-service/01-update-service.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-service/02--remove-service.yaml b/testing/kuttl/e2e/standalone-pgadmin-service/02-remove-service.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-service/02--remove-service.yaml rename to testing/kuttl/e2e/standalone-pgadmin-service/02-remove-service.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-service/10--manual-service.yaml b/testing/kuttl/e2e/standalone-pgadmin-service/10-manual-service.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-service/10--manual-service.yaml rename to testing/kuttl/e2e/standalone-pgadmin-service/10-manual-service.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-service/20--owned-service.yaml b/testing/kuttl/e2e/standalone-pgadmin-service/20-owned-service.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-service/20--owned-service.yaml rename to testing/kuttl/e2e/standalone-pgadmin-service/20-owned-service.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-service/21--service-takeover-fails.yaml b/testing/kuttl/e2e/standalone-pgadmin-service/21-service-takeover-fails.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-service/21--service-takeover-fails.yaml rename to testing/kuttl/e2e/standalone-pgadmin-service/21-service-takeover-fails.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-user-management/00--create-pgadmin.yaml b/testing/kuttl/e2e/standalone-pgadmin-user-management/00-create-pgadmin.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-user-management/00--create-pgadmin.yaml rename to testing/kuttl/e2e/standalone-pgadmin-user-management/00-create-pgadmin.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-user-management/01-assert.yaml b/testing/kuttl/e2e/standalone-pgadmin-user-management/01-assert.yaml index 244533b7ee..0290339143 100644 --- a/testing/kuttl/e2e/standalone-pgadmin-user-management/01-assert.yaml +++ b/testing/kuttl/e2e/standalone-pgadmin-user-management/01-assert.yaml @@ -6,12 +6,14 @@ commands: pod_name=$(kubectl get pod -n "${NAMESPACE}" -l postgres-operator.crunchydata.com/pgadmin=pgadmin -o name) secret_name=$(kubectl get secret -n "${NAMESPACE}" -l postgres-operator.crunchydata.com/pgadmin=pgadmin -o name) + # /usr/local/lib/python3.11/site-packages/pgadmin4 allows for various Python versions to be referenced in testing users_in_pgadmin=$(kubectl exec -n "${NAMESPACE}" "${pod_name}" -- bash -c "python3 /usr/local/lib/python3.11/site-packages/pgadmin4/setup.py get-users --json") - bob_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="bob@example.com") | .role') - dave_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="dave@example.com") | .role') + bob_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="bob@example.com") | .role') + dave_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="dave@example.com") | .role') - [ $bob_role = 1 ] && [ $dave_role = 2 ] || exit 1 + # Prior to pgAdmin 9.3, the role values were integers rather than strings. This supports both variations. + ( [ $bob_role = 1 ] && [ $dave_role = 2 ] ) || ( [ $bob_role = "Administrator" ] && [ $dave_role = "User" ] ) || exit 1 users_in_secret=$(kubectl get "${secret_name}" -n "${NAMESPACE}" -o 'go-template={{index .data "users.json" }}' | base64 -d) diff --git a/testing/kuttl/e2e/standalone-pgadmin-user-management/02--edit-pgadmin-users.yaml b/testing/kuttl/e2e/standalone-pgadmin-user-management/02-edit-pgadmin-users.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-user-management/02--edit-pgadmin-users.yaml rename to testing/kuttl/e2e/standalone-pgadmin-user-management/02-edit-pgadmin-users.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-user-management/03-assert.yaml b/testing/kuttl/e2e/standalone-pgadmin-user-management/03-assert.yaml index 01aff25b3b..00c3d819fd 100644 --- a/testing/kuttl/e2e/standalone-pgadmin-user-management/03-assert.yaml +++ b/testing/kuttl/e2e/standalone-pgadmin-user-management/03-assert.yaml @@ -6,13 +6,15 @@ commands: pod_name=$(kubectl get pod -n "${NAMESPACE}" -l postgres-operator.crunchydata.com/pgadmin=pgadmin -o name) secret_name=$(kubectl get secret -n "${NAMESPACE}" -l postgres-operator.crunchydata.com/pgadmin=pgadmin -o name) + # /usr/local/lib/python3.11/site-packages/pgadmin4 allows for various Python versions to be referenced in testing users_in_pgadmin=$(kubectl exec -n "${NAMESPACE}" "${pod_name}" -- bash -c "python3 /usr/local/lib/python3.11/site-packages/pgadmin4/setup.py get-users --json") - bob_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="bob@example.com") | .role') - dave_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="dave@example.com") | .role') - jimi_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="jimi@example.com") | .role') + bob_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="bob@example.com") | .role') + dave_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="dave@example.com") | .role') + jimi_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="jimi@example.com") | .role') - [ $bob_role = 1 ] && [ $dave_role = 1 ] && [ $jimi_role = 2 ] || exit 1 + # Prior to pgAdmin 9.3, the role values were integers rather than strings. This supports both variations. + ( [ $bob_role = 1 ] && [ $dave_role = 1 ] && [ $jimi_role = 2 ] ) || ( [ $bob_role = "Administrator" ] && [ $dave_role = "Administrator" ] && [ $jimi_role = "User" ] ) || exit 1 users_in_secret=$(kubectl get "${secret_name}" -n "${NAMESPACE}" -o 'go-template={{index .data "users.json" }}' | base64 -d) diff --git a/testing/kuttl/e2e/standalone-pgadmin-user-management/04--change-pgadmin-user-passwords.yaml b/testing/kuttl/e2e/standalone-pgadmin-user-management/04-change-pgadmin-user-passwords.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-user-management/04--change-pgadmin-user-passwords.yaml rename to testing/kuttl/e2e/standalone-pgadmin-user-management/04-change-pgadmin-user-passwords.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-user-management/05-assert.yaml b/testing/kuttl/e2e/standalone-pgadmin-user-management/05-assert.yaml index 1dca13a7b7..f6eb83b2d9 100644 --- a/testing/kuttl/e2e/standalone-pgadmin-user-management/05-assert.yaml +++ b/testing/kuttl/e2e/standalone-pgadmin-user-management/05-assert.yaml @@ -6,13 +6,15 @@ commands: pod_name=$(kubectl get pod -n "${NAMESPACE}" -l postgres-operator.crunchydata.com/pgadmin=pgadmin -o name) secret_name=$(kubectl get secret -n "${NAMESPACE}" -l postgres-operator.crunchydata.com/pgadmin=pgadmin -o name) + # /usr/local/lib/python3.11/site-packages/pgadmin4 allows for various Python versions to be referenced in testing users_in_pgadmin=$(kubectl exec -n "${NAMESPACE}" "${pod_name}" -- bash -c "python3 /usr/local/lib/python3.11/site-packages/pgadmin4/setup.py get-users --json") - bob_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="bob@example.com") | .role') - dave_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="dave@example.com") | .role') - jimi_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="jimi@example.com") | .role') + bob_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="bob@example.com") | .role') + dave_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="dave@example.com") | .role') + jimi_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="jimi@example.com") | .role') - [ $bob_role = 1 ] && [ $dave_role = 1 ] && [ $jimi_role = 2 ] || exit 1 + # Prior to pgAdmin 9.3, the role values were integers rather than strings. This supports both variations. + ( [ $bob_role = 1 ] && [ $dave_role = 1 ] && [ $jimi_role = 2 ] ) || ( [ $bob_role = "Administrator" ] && [ $dave_role = "Administrator" ] && [ $jimi_role = "User" ] ) || exit 1 users_in_secret=$(kubectl get "${secret_name}" -n "${NAMESPACE}" -o 'go-template={{index .data "users.json" }}' | base64 -d) diff --git a/testing/kuttl/e2e/standalone-pgadmin-user-management/06--delete-pgadmin-users.yaml b/testing/kuttl/e2e/standalone-pgadmin-user-management/06-delete-pgadmin-users.yaml similarity index 100% rename from testing/kuttl/e2e/standalone-pgadmin-user-management/06--delete-pgadmin-users.yaml rename to testing/kuttl/e2e/standalone-pgadmin-user-management/06-delete-pgadmin-users.yaml diff --git a/testing/kuttl/e2e/standalone-pgadmin-user-management/07-assert.yaml b/testing/kuttl/e2e/standalone-pgadmin-user-management/07-assert.yaml index 5c0e7267e6..3e3d8396b3 100644 --- a/testing/kuttl/e2e/standalone-pgadmin-user-management/07-assert.yaml +++ b/testing/kuttl/e2e/standalone-pgadmin-user-management/07-assert.yaml @@ -6,13 +6,15 @@ commands: pod_name=$(kubectl get pod -n "${NAMESPACE}" -l postgres-operator.crunchydata.com/pgadmin=pgadmin -o name) secret_name=$(kubectl get secret -n "${NAMESPACE}" -l postgres-operator.crunchydata.com/pgadmin=pgadmin -o name) + # /usr/local/lib/python3.11/site-packages/pgadmin4 allows for various Python versions to be referenced in testing users_in_pgadmin=$(kubectl exec -n "${NAMESPACE}" "${pod_name}" -- bash -c "python3 /usr/local/lib/python3.11/site-packages/pgadmin4/setup.py get-users --json") - bob_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="bob@example.com") | .role') - dave_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="dave@example.com") | .role') - jimi_role=$(printf '%s\n' $users_in_pgadmin | jq '.[] | select(.username=="jimi@example.com") | .role') + bob_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="bob@example.com") | .role') + dave_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="dave@example.com") | .role') + jimi_role=$(printf '%s\n' $users_in_pgadmin | jq -r '.[] | select(.username=="jimi@example.com") | .role') - [ $bob_role = 1 ] && [ $dave_role = 1 ] && [ $jimi_role = 2 ] || exit 1 + # Prior to pgAdmin 9.3, the role values were integers rather than strings. This supports both variations. + ( [ $bob_role = 1 ] && [ $dave_role = 1 ] && [ $jimi_role = 2 ] ) || ( [ $bob_role = "Administrator" ] && [ $dave_role = "Administrator" ] && [ $jimi_role = "User" ] ) || exit 1 users_in_secret=$(kubectl get "${secret_name}" -n "${NAMESPACE}" -o 'go-template={{index .data "users.json" }}' | base64 -d) diff --git a/testing/kuttl/e2e/streaming-standby/00--secrets.yaml b/testing/kuttl/e2e/streaming-standby/00-secrets.yaml similarity index 100% rename from testing/kuttl/e2e/streaming-standby/00--secrets.yaml rename to testing/kuttl/e2e/streaming-standby/00-secrets.yaml diff --git a/testing/kuttl/e2e/streaming-standby/01--primary-cluster.yaml b/testing/kuttl/e2e/streaming-standby/01-primary-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/streaming-standby/01--primary-cluster.yaml rename to testing/kuttl/e2e/streaming-standby/01-primary-cluster.yaml diff --git a/testing/kuttl/e2e/streaming-standby/02--create-data.yaml b/testing/kuttl/e2e/streaming-standby/02-create-data.yaml similarity index 100% rename from testing/kuttl/e2e/streaming-standby/02--create-data.yaml rename to testing/kuttl/e2e/streaming-standby/02-create-data.yaml diff --git a/testing/kuttl/e2e/streaming-standby/03--standby-cluster.yaml b/testing/kuttl/e2e/streaming-standby/03-standby-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/streaming-standby/03--standby-cluster.yaml rename to testing/kuttl/e2e/streaming-standby/03-standby-cluster.yaml diff --git a/testing/kuttl/e2e/streaming-standby/04--check-data.yaml b/testing/kuttl/e2e/streaming-standby/04-check-data.yaml similarity index 100% rename from testing/kuttl/e2e/streaming-standby/04--check-data.yaml rename to testing/kuttl/e2e/streaming-standby/04-check-data.yaml diff --git a/testing/kuttl/e2e/switchover/01--cluster.yaml b/testing/kuttl/e2e/switchover/01-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/switchover/01--cluster.yaml rename to testing/kuttl/e2e/switchover/01-cluster.yaml diff --git a/testing/kuttl/e2e/tablespace-enabled/00--cluster.yaml b/testing/kuttl/e2e/tablespace-enabled/00-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/tablespace-enabled/00--cluster.yaml rename to testing/kuttl/e2e/tablespace-enabled/00-cluster.yaml diff --git a/testing/kuttl/e2e/tablespace-enabled/01--psql-connect.yaml b/testing/kuttl/e2e/tablespace-enabled/01-psql-connect.yaml similarity index 100% rename from testing/kuttl/e2e/tablespace-enabled/01--psql-connect.yaml rename to testing/kuttl/e2e/tablespace-enabled/01-psql-connect.yaml diff --git a/testing/kuttl/e2e/wal-pvc-pgupgrade/00--create-resources.yaml b/testing/kuttl/e2e/wal-pvc-pgupgrade/00-create-resources.yaml similarity index 100% rename from testing/kuttl/e2e/wal-pvc-pgupgrade/00--create-resources.yaml rename to testing/kuttl/e2e/wal-pvc-pgupgrade/00-create-resources.yaml diff --git a/testing/kuttl/e2e/wal-pvc-pgupgrade/01--create-data.yaml b/testing/kuttl/e2e/wal-pvc-pgupgrade/01-create-data.yaml similarity index 100% rename from testing/kuttl/e2e/wal-pvc-pgupgrade/01--create-data.yaml rename to testing/kuttl/e2e/wal-pvc-pgupgrade/01-create-data.yaml diff --git a/testing/kuttl/e2e/wal-pvc-pgupgrade/02--shutdown-cluster.yaml b/testing/kuttl/e2e/wal-pvc-pgupgrade/02-shutdown-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/wal-pvc-pgupgrade/02--shutdown-cluster.yaml rename to testing/kuttl/e2e/wal-pvc-pgupgrade/02-shutdown-cluster.yaml diff --git a/testing/kuttl/e2e/wal-pvc-pgupgrade/03--annotate-cluster.yaml b/testing/kuttl/e2e/wal-pvc-pgupgrade/03-annotate-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/wal-pvc-pgupgrade/03--annotate-cluster.yaml rename to testing/kuttl/e2e/wal-pvc-pgupgrade/03-annotate-cluster.yaml diff --git a/testing/kuttl/e2e/wal-pvc-pgupgrade/04--restart-cluster.yaml b/testing/kuttl/e2e/wal-pvc-pgupgrade/04-restart-cluster.yaml similarity index 100% rename from testing/kuttl/e2e/wal-pvc-pgupgrade/04--restart-cluster.yaml rename to testing/kuttl/e2e/wal-pvc-pgupgrade/04-restart-cluster.yaml diff --git a/testing/kuttl/e2e/wal-pvc-pgupgrade/06--check-data-and-version.yaml b/testing/kuttl/e2e/wal-pvc-pgupgrade/06-check-data-and-version.yaml similarity index 100% rename from testing/kuttl/e2e/wal-pvc-pgupgrade/06--check-data-and-version.yaml rename to testing/kuttl/e2e/wal-pvc-pgupgrade/06-check-data-and-version.yaml diff --git a/testing/kuttl/e2e/wal-pvc-pgupgrade/06--check-spool-path.yaml b/testing/kuttl/e2e/wal-pvc-pgupgrade/06-check-spool-path.yaml similarity index 100% rename from testing/kuttl/e2e/wal-pvc-pgupgrade/06--check-spool-path.yaml rename to testing/kuttl/e2e/wal-pvc-pgupgrade/06-check-spool-path.yaml diff --git a/testing/kuttl/scripts/pgbackrest-initialization.sh b/testing/kuttl/scripts/pgbackrest-initialization.sh index ba6cd4a7e5..9d60a4cd9d 100755 --- a/testing/kuttl/scripts/pgbackrest-initialization.sh +++ b/testing/kuttl/scripts/pgbackrest-initialization.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash EXPECTED_STATUS=$1 EXPECTED_NUM_BACKUPS=$2